# HG changeset patch # User kvn # Date 1378404279 25200 # Node ID e2722a66aba7cfde6ec4e072a05c1105f9159811 # Parent bdd155477289345686f9afc4de1f5fdead2689ef# Parent 3f4392035ec70d44581c262f9d3083fd28423139 Merge diff -r bdd155477289 -r e2722a66aba7 .hgtags --- a/.hgtags Thu Aug 22 09:39:54 2013 -0700 +++ b/.hgtags Thu Sep 05 11:04:39 2013 -0700 @@ -368,3 +368,9 @@ c4697c1c448416108743b59118b4a2498b339d0c jdk8-b102 7f55137d6aa81efc6eb0035813709f2cb6a26b8b hs25-b45 6f9be7f87b9653e94fd8fb3070891a0cc91b15bf jdk8-b103 +580430d131ccd475e2f2ad4006531b8c4813d102 hs25-b46 +104743074675359cfbf7f4dcd9ab2a5974a16627 jdk8-b104 +c1604d5885a6f2adc0bcea2fa142a8f6bafad2f0 hs25-b47 +acac3bde66b2c22791c257a8d99611d6d08c6713 jdk8-b105 +18b4798adbc42c6fa16f5ecb7d5cd3ca130754bf hs25-b48 +aed585cafc0d9655726af6d1e1081d1c94cb3b5c jdk8-b106 diff -r bdd155477289 -r e2722a66aba7 agent/src/share/classes/sun/jvm/hotspot/oops/InstanceKlass.java --- a/agent/src/share/classes/sun/jvm/hotspot/oops/InstanceKlass.java Thu Aug 22 09:39:54 2013 -0700 +++ b/agent/src/share/classes/sun/jvm/hotspot/oops/InstanceKlass.java Thu Sep 05 11:04:39 2013 -0700 @@ -75,19 +75,19 @@ javaFieldsCount = new CIntField(type.getCIntegerField("_java_fields_count"), 0); constants = new MetadataField(type.getAddressField("_constants"), 0); classLoaderData = type.getAddressField("_class_loader_data"); - sourceFileName = type.getAddressField("_source_file_name"); sourceDebugExtension = type.getAddressField("_source_debug_extension"); innerClasses = type.getAddressField("_inner_classes"); + sourceFileNameIndex = new CIntField(type.getCIntegerField("_source_file_name_index"), 0); nonstaticFieldSize = new CIntField(type.getCIntegerField("_nonstatic_field_size"), 0); staticFieldSize = new CIntField(type.getCIntegerField("_static_field_size"), 0); - staticOopFieldCount = new CIntField(type.getCIntegerField("_static_oop_field_count"), 0); + staticOopFieldCount = new CIntField(type.getCIntegerField("_static_oop_field_count"), 0); nonstaticOopMapSize = new CIntField(type.getCIntegerField("_nonstatic_oop_map_size"), 0); isMarkedDependent = new CIntField(type.getCIntegerField("_is_marked_dependent"), 0); initState = new CIntField(type.getCIntegerField("_init_state"), 0); vtableLen = new CIntField(type.getCIntegerField("_vtable_len"), 0); itableLen = new CIntField(type.getCIntegerField("_itable_len"), 0); breakpoints = type.getAddressField("_breakpoints"); - genericSignature = type.getAddressField("_generic_signature"); + genericSignatureIndex = new CIntField(type.getCIntegerField("_generic_signature_index"), 0); majorVersion = new CIntField(type.getCIntegerField("_major_version"), 0); minorVersion = new CIntField(type.getCIntegerField("_minor_version"), 0); headerSize = Oop.alignObjectOffset(type.getSize()); @@ -134,9 +134,9 @@ private static CIntField javaFieldsCount; private static MetadataField constants; private static AddressField classLoaderData; - private static AddressField sourceFileName; private static AddressField sourceDebugExtension; private static AddressField innerClasses; + private static CIntField sourceFileNameIndex; private static CIntField nonstaticFieldSize; private static CIntField staticFieldSize; private static CIntField staticOopFieldCount; @@ -146,7 +146,7 @@ private static CIntField vtableLen; private static CIntField itableLen; private static AddressField breakpoints; - private static AddressField genericSignature; + private static CIntField genericSignatureIndex; private static CIntField majorVersion; private static CIntField minorVersion; @@ -346,7 +346,7 @@ public ConstantPool getConstants() { return (ConstantPool) constants.getValue(this); } public ClassLoaderData getClassLoaderData() { return ClassLoaderData.instantiateWrapperFor(classLoaderData.getValue(getAddress())); } public Oop getClassLoader() { return getClassLoaderData().getClassLoader(); } - public Symbol getSourceFileName() { return getSymbol(sourceFileName); } + public Symbol getSourceFileName() { return getConstants().getSymbolAt(sourceFileNameIndex.getValue(this)); } public String getSourceDebugExtension(){ return CStringUtilities.getString(sourceDebugExtension.getValue(getAddress())); } public long getNonstaticFieldSize() { return nonstaticFieldSize.getValue(this); } public long getStaticOopFieldCount() { return staticOopFieldCount.getValue(this); } @@ -354,7 +354,7 @@ public boolean getIsMarkedDependent() { return isMarkedDependent.getValue(this) != 0; } public long getVtableLen() { return vtableLen.getValue(this); } public long getItableLen() { return itableLen.getValue(this); } - public Symbol getGenericSignature() { return getSymbol(genericSignature); } + public Symbol getGenericSignature() { return getConstants().getSymbolAt(genericSignatureIndex.getValue(this)); } public long majorVersion() { return majorVersion.getValue(this); } public long minorVersion() { return minorVersion.getValue(this); } diff -r bdd155477289 -r e2722a66aba7 agent/src/share/classes/sun/jvm/hotspot/opto/PhaseCFG.java --- a/agent/src/share/classes/sun/jvm/hotspot/opto/PhaseCFG.java Thu Aug 22 09:39:54 2013 -0700 +++ b/agent/src/share/classes/sun/jvm/hotspot/opto/PhaseCFG.java Thu Sep 05 11:04:39 2013 -0700 @@ -44,7 +44,7 @@ Type type = db.lookupType("PhaseCFG"); numBlocksField = new CIntField(type.getCIntegerField("_num_blocks"), 0); blocksField = type.getAddressField("_blocks"); - bbsField = type.getAddressField("_bbs"); + bbsField = type.getAddressField("_node_to_block_mapping"); brootField = type.getAddressField("_broot"); } diff -r bdd155477289 -r e2722a66aba7 agent/src/share/classes/sun/jvm/hotspot/tools/jcore/ClassDump.java --- a/agent/src/share/classes/sun/jvm/hotspot/tools/jcore/ClassDump.java Thu Aug 22 09:39:54 2013 -0700 +++ b/agent/src/share/classes/sun/jvm/hotspot/tools/jcore/ClassDump.java Thu Sep 05 11:04:39 2013 -0700 @@ -92,8 +92,13 @@ System.err.println("Warning: Can not create class filter!"); } } - String outputDirectory = System.getProperty("sun.jvm.hotspot.tools.jcore.outputDir", "."); - setOutputDirectory(outputDirectory); + + // outputDirectory and jarStream are alternatives: setting one closes the other. + // If neither is set, use outputDirectory from the System property: + if (outputDirectory == null && jarStream == null) { + String dirName = System.getProperty("sun.jvm.hotspot.tools.jcore.outputDir", "."); + setOutputDirectory(dirName); + } // walk through the system dictionary SystemDictionary dict = VM.getVM().getSystemDictionary(); diff -r bdd155477289 -r e2722a66aba7 agent/src/share/classes/sun/jvm/hotspot/utilities/soql/sa.js --- a/agent/src/share/classes/sun/jvm/hotspot/utilities/soql/sa.js Thu Aug 22 09:39:54 2013 -0700 +++ b/agent/src/share/classes/sun/jvm/hotspot/utilities/soql/sa.js Thu Sep 05 11:04:39 2013 -0700 @@ -35,8 +35,9 @@ sapkg.code = sapkg.hotspot.code; sapkg.compiler = sapkg.hotspot.compiler; -// 'debugger' is a JavaScript keyword :-( -// sapkg.debugger = sapkg.hotspot.debugger; +// 'debugger' is a JavaScript keyword, but ES5 relaxes the +// restriction of using keywords as property name +sapkg.debugger = sapkg.hotspot.debugger; sapkg.interpreter = sapkg.hotspot.interpreter; sapkg.jdi = sapkg.hotspot.jdi; @@ -116,27 +117,36 @@ return args; } + // Handle __has__ specially to avoid metacircularity problems + // when called from __get__. + // Calling + // this.__has__(name) + // will in turn call + // this.__call__('__has__', name) + // which is not handled below + function __has__(name) { + if (typeof(name) == 'number') { + return so["has(int)"](name); + } else { + if (name == '__wrapped__') { + return true; + } else if (so["has(java.lang.String)"](name)) { + return true; + } else if (name.equals('toString')) { + return true; + } else { + return false; + } + } + } + if (so instanceof sapkg.utilities.soql.ScriptObject) { return new JSAdapter() { - __getIds__: function() { - return so.getIds(); + __getIds__: function() { + return so.getIds(); }, - __has__ : function(name) { - if (typeof(name) == 'number') { - return so["has(int)"](name); - } else { - if (name == '__wrapped__') { - return true; - } else if (so["has(java.lang.String)"](name)) { - return true; - } else if (name.equals('toString')) { - return true; - } else { - return false; - } - } - }, + __has__ : __has__, __delete__ : function(name) { if (typeof(name) == 'number') { @@ -147,7 +157,8 @@ }, __get__ : function(name) { - if (! this.__has__(name)) { + // don't call this.__has__(name); see comments above function __has__ + if (! __has__.call(this, name)) { return undefined; } if (typeof(name) == 'number') { @@ -162,7 +173,7 @@ var args = prepareArgsArray(arguments); var r; try { - r = value.call(args); + r = value.call(Java.to(args, 'java.lang.Object[]')); } catch (e) { println("call to " + name + " failed!"); throw e; @@ -204,6 +215,18 @@ } // define "writeln" and "write" if not defined + if (typeof(println) == 'undefined') { + println = function (str) { + java.lang.System.out.println(String(str)); + } + } + + if (typeof(print) == 'undefined') { + print = function (str) { + java.lang.System.out.print(String(str)); + } + } + if (typeof(writeln) == 'undefined') { writeln = println; } @@ -235,7 +258,7 @@ this.jclasses = function() { forEachKlass(function (clazz) { - writeln(clazz.getName().asString() + " @" + clazz.getHandle().toString()); + writeln(clazz.getName().asString() + " @" + clazz.getAddress().toString()); }); } registerCommand("classes", "classes", "jclasses"); @@ -490,14 +513,14 @@ function forEachKlass(callback) { var VisitorClass = sapkg.memory.SystemDictionary.ClassVisitor; var visitor = new VisitorClass() { visit: callback }; - sa.sysDict["classesDo(sun.jvm.hotspot.memory.SystemDictionary$ClassVisitor)"](visitor); + sa.sysDict["classesDo(sun.jvm.hotspot.memory.SystemDictionary.ClassVisitor)"](visitor); } // iterate system dictionary for each 'Klass' and initiating loader function forEachKlassAndLoader(callback) { var VisitorClass = sapkg.memory.SystemDictionary.ClassAndLoaderVisitor; var visitor = new VisitorClass() { visit: callback }; - sa.sysDict["classesDo(sun.jvm.hotspot.memory.SystemDictionary$ClassAndLoaderVisitor)"](visitor); + sa.sysDict["classesDo(sun.jvm.hotspot.memory.SystemDictionary.ClassAndLoaderVisitor)"](visitor); } // iterate system dictionary for each primitive array klass @@ -522,7 +545,12 @@ // iterates Java heap for each Oop function forEachOop(callback) { - sa.objHeap.iterate(new sapkg.oops.HeapVisitor() { doObj: callback }); + function empty() { } + sa.objHeap.iterate(new sapkg.oops.HeapVisitor() { + prologue: empty, + doObj: callback, + epilogue: empty + }); } // iterates Java heap for each Oop of given 'klass'. @@ -536,8 +564,14 @@ if (includeSubtypes == undefined) { includeSubtypes = true; } + + function empty() { } sa.objHeap.iterateObjectsOfKlass( - new sapkg.oops.HeapVisitor() { doObj: callback }, + new sapkg.oops.HeapVisitor() { + prologue: empty, + doObj: callback, + epilogue: empty + }, klass, includeSubtypes); } @@ -746,9 +780,9 @@ // ignore; continue; } else { - // some type names have ':'. replace to make it as a + // some type names have ':', '<', '>', '*', ' '. replace to make it as a // JavaScript identifier - tmp.name = tmp.name.replace(':', '_').replace('<', '_').replace('>', '_').replace('*', '_').replace(' ', '_'); + tmp.name = ("" + tmp.name).replace(/[:<>* ]/g, '_'); eval("function read" + tmp.name + "(addr) {" + " return readVMType('" + tmp.name + "', addr);}"); eval("function print" + tmp.name + "(addr) {" + diff -r bdd155477289 -r e2722a66aba7 make/bsd/makefiles/adlc.make --- a/make/bsd/makefiles/adlc.make Thu Aug 22 09:39:54 2013 -0700 +++ b/make/bsd/makefiles/adlc.make Thu Sep 05 11:04:39 2013 -0700 @@ -41,13 +41,11 @@ ifeq ("${Platform_arch_model}", "${Platform_arch}") SOURCES.AD = \ - $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch_model).ad) \ - $(call altsrc-replace,$(HS_COMMON_SRC)/os_cpu/$(OS)_$(ARCH)/vm/$(OS)_$(Platform_arch_model).ad) + $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch_model).ad) else SOURCES.AD = \ $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch_model).ad) \ - $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch).ad) \ - $(call altsrc-replace,$(HS_COMMON_SRC)/os_cpu/$(OS)_$(ARCH)/vm/$(OS)_$(Platform_arch_model).ad) + $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch).ad) endif EXEC = $(OUTDIR)/adlc diff -r bdd155477289 -r e2722a66aba7 make/bsd/makefiles/gcc.make --- a/make/bsd/makefiles/gcc.make Thu Aug 22 09:39:54 2013 -0700 +++ b/make/bsd/makefiles/gcc.make Thu Sep 05 11:04:39 2013 -0700 @@ -247,7 +247,7 @@ # Not yet supported by clang in Xcode 4.6.2 # WARNINGS_ARE_ERRORS += -Wno-tautological-constant-out-of-range-compare WARNINGS_ARE_ERRORS += -Wno-delete-non-virtual-dtor -Wno-deprecated -Wno-format -Wno-dynamic-class-memaccess - WARNINGS_ARE_ERRORS += -Wno-return-type -Wno-empty-body + WARNINGS_ARE_ERRORS += -Wno-empty-body endif WARNING_FLAGS = -Wpointer-arith -Wsign-compare -Wundef diff -r bdd155477289 -r e2722a66aba7 make/hotspot_version --- a/make/hotspot_version Thu Aug 22 09:39:54 2013 -0700 +++ b/make/hotspot_version Thu Sep 05 11:04:39 2013 -0700 @@ -35,7 +35,7 @@ HS_MAJOR_VER=25 HS_MINOR_VER=0 -HS_BUILD_NUMBER=45 +HS_BUILD_NUMBER=48 JDK_MAJOR_VER=1 JDK_MINOR_VER=8 diff -r bdd155477289 -r e2722a66aba7 make/linux/makefiles/adlc.make --- a/make/linux/makefiles/adlc.make Thu Aug 22 09:39:54 2013 -0700 +++ b/make/linux/makefiles/adlc.make Thu Sep 05 11:04:39 2013 -0700 @@ -41,13 +41,11 @@ ifeq ("${Platform_arch_model}", "${Platform_arch}") SOURCES.AD = \ - $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch_model).ad) \ - $(call altsrc-replace,$(HS_COMMON_SRC)/os_cpu/$(OS)_$(ARCH)/vm/$(OS)_$(Platform_arch_model).ad) + $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch_model).ad) else SOURCES.AD = \ $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch_model).ad) \ - $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch).ad) \ - $(call altsrc-replace,$(HS_COMMON_SRC)/os_cpu/$(OS)_$(ARCH)/vm/$(OS)_$(Platform_arch_model).ad) + $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch).ad) endif EXEC = $(OUTDIR)/adlc diff -r bdd155477289 -r e2722a66aba7 make/linux/makefiles/amd64.make --- a/make/linux/makefiles/amd64.make Thu Aug 22 09:39:54 2013 -0700 +++ b/make/linux/makefiles/amd64.make Thu Sep 05 11:04:39 2013 -0700 @@ -1,5 +1,5 @@ # -# Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. # # This code is free software; you can redistribute it and/or modify it @@ -31,9 +31,4 @@ CFLAGS += -D_LP64=1 -# The serviceability agent relies on frame pointer (%rbp) to walk thread stack -ifndef USE_SUNCC - CFLAGS += -fno-omit-frame-pointer -endif - OPT_CFLAGS/compactingPermGenGen.o = -O1 diff -r bdd155477289 -r e2722a66aba7 make/linux/makefiles/gcc.make --- a/make/linux/makefiles/gcc.make Thu Aug 22 09:39:54 2013 -0700 +++ b/make/linux/makefiles/gcc.make Thu Sep 05 11:04:39 2013 -0700 @@ -402,3 +402,10 @@ ifdef MINIMIZE_RAM_USAGE CFLAGS += -DMINIMIZE_RAM_USAGE endif + +# Stack walking in the JVM relies on frame pointer (%rbp) to walk thread stack. +# Explicitly specify -fno-omit-frame-pointer because it is off by default +# starting with gcc 4.6. +ifndef USE_SUNCC + CFLAGS += -fno-omit-frame-pointer +endif diff -r bdd155477289 -r e2722a66aba7 make/solaris/makefiles/adlc.make --- a/make/solaris/makefiles/adlc.make Thu Aug 22 09:39:54 2013 -0700 +++ b/make/solaris/makefiles/adlc.make Thu Sep 05 11:04:39 2013 -0700 @@ -42,13 +42,11 @@ ifeq ("${Platform_arch_model}", "${Platform_arch}") SOURCES.AD = \ - $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch_model).ad) \ - $(call altsrc-replace,$(HS_COMMON_SRC)/os_cpu/$(OS)_$(ARCH)/vm/$(OS)_$(Platform_arch_model).ad) + $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch_model).ad) else SOURCES.AD = \ $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch_model).ad) \ - $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch).ad) \ - $(call altsrc-replace,$(HS_COMMON_SRC)/os_cpu/$(OS)_$(ARCH)/vm/$(OS)_$(Platform_arch_model).ad) + $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch).ad) endif EXEC = $(OUTDIR)/adlc diff -r bdd155477289 -r e2722a66aba7 make/solaris/makefiles/dtrace.make --- a/make/solaris/makefiles/dtrace.make Thu Aug 22 09:39:54 2013 -0700 +++ b/make/solaris/makefiles/dtrace.make Thu Sep 05 11:04:39 2013 -0700 @@ -283,9 +283,9 @@ $(QUIETLY) $(DTRACE_PROG) $(DTRACE_OPTS) -C -I. -G -xlazyload -o $@ -s $(DTRACE).d \ $(DTraced_Files) ||\ STATUS=$$?;\ - if [ x"$$STATUS" = x"1" -a \ - x`uname -r` = x"5.10" -a \ - x`uname -p` = x"sparc" ]; then\ + if [ x"$$STATUS" = x"1" ]; then \ + if [ x`uname -r` = x"5.10" -a \ + x`uname -p` = x"sparc" ]; then\ echo "*****************************************************************";\ echo "* If you are building server compiler, and the error message is ";\ echo "* \"incorrect ELF machine type...\", you have run into solaris bug ";\ @@ -294,6 +294,20 @@ echo "* environment variable HOTSPOT_DISABLE_DTRACE_PROBES to disable ";\ echo "* dtrace probes for this build.";\ echo "*****************************************************************";\ + elif [ x`uname -r` = x"5.10" ]; then\ + echo "*****************************************************************";\ + echo "* If you are seeing 'syntax error near \"umpiconninfo_t\"' on Solaris";\ + echo "* 10, try doing 'cd /usr/lib/dtrace && gzip mpi.d' as root, ";\ + echo "* or set the environment variable HOTSPOT_DISABLE_DTRACE_PROBES";\ + echo "* to disable dtrace probes for this build.";\ + echo "*****************************************************************";\ + else \ + echo "*****************************************************************";\ + echo "* If you cannot fix dtrace build issues, try to ";\ + echo "* set the environment variable HOTSPOT_DISABLE_DTRACE_PROBES";\ + echo "* to disable dtrace probes for this build.";\ + echo "*****************************************************************";\ + fi; \ fi;\ exit $$STATUS # Since some DTraced_Files are in LIBJVM.o and they are touched by this diff -r bdd155477289 -r e2722a66aba7 make/windows/build_vm_def.sh --- a/make/windows/build_vm_def.sh Thu Aug 22 09:39:54 2013 -0700 +++ b/make/windows/build_vm_def.sh Thu Sep 05 11:04:39 2013 -0700 @@ -42,8 +42,6 @@ MKS_HOME=`dirname "$SH"` fi -echo "EXPORTS" > vm1.def - AWK="$MKS_HOME/awk.exe" if [ ! -e $AWK ]; then AWK="$MKS_HOME/gawk.exe" @@ -55,6 +53,22 @@ RM="$MKS_HOME/rm.exe" DUMPBIN="link.exe /dump" +if [ "$1" = "-nosa" ]; then + echo EXPORTS > vm.def + echo "" + echo "***" + echo "*** Not building SA: BUILD_WIN_SA != 1" + echo "*** C++ Vtables NOT included in vm.def" + echo "*** This jvm.dll will NOT work properly with SA." + echo "***" + echo "*** When in doubt, set BUILD_WIN_SA=1, clean and rebuild." + echo "***" + echo "" + exit +fi + +echo "EXPORTS" > vm1.def + # When called from IDE the first param should contain the link version, otherwise may be nill if [ "x$1" != "x" ]; then LD_VER="$1" diff -r bdd155477289 -r e2722a66aba7 make/windows/create.bat --- a/make/windows/create.bat Thu Aug 22 09:39:54 2013 -0700 +++ b/make/windows/create.bat Thu Sep 05 11:04:39 2013 -0700 @@ -1,6 +1,6 @@ @echo off REM -REM Copyright (c) 1999, 2011, Oracle and/or its affiliates. All rights reserved. +REM Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved. REM DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. REM REM This code is free software; you can redistribute it and/or modify it @@ -148,7 +148,7 @@ REM This is now safe to do. :copyfiles -for /D %%i in (compiler1, compiler2, tiered, core) do ( +for /D %%i in (compiler1, compiler2, tiered ) do ( if NOT EXIST %HotSpotBuildSpace%\%%i\generated mkdir %HotSpotBuildSpace%\%%i\generated copy %HotSpotWorkSpace%\make\windows\projectfiles\%%i\* %HotSpotBuildSpace%\%%i\generated > NUL ) @@ -156,7 +156,7 @@ REM force regneration of ProjectFile if exist %ProjectFile% del %ProjectFile% -for /D %%i in (compiler1, compiler2, tiered, core) do ( +for /D %%i in (compiler1, compiler2, tiered ) do ( echo -- %%i -- echo # Generated file! > %HotSpotBuildSpace%\%%i\local.make echo # Changing a variable below and then deleting %ProjectFile% will cause >> %HotSpotBuildSpace%\%%i\local.make diff -r bdd155477289 -r e2722a66aba7 make/windows/create_obj_files.sh --- a/make/windows/create_obj_files.sh Thu Aug 22 09:39:54 2013 -0700 +++ b/make/windows/create_obj_files.sh Thu Sep 05 11:04:39 2013 -0700 @@ -73,19 +73,17 @@ BASE_PATHS="${BASE_PATHS} ${GENERATED}/jvmtifiles ${GENERATED}/tracefiles" -if [ -d "${ALTSRC}/share/vm/jfr" ]; then - BASE_PATHS="${BASE_PATHS} ${ALTSRC}/share/vm/jfr" +if [ -d "${ALTSRC}/share/vm/jfr/buffers" ]; then BASE_PATHS="${BASE_PATHS} ${ALTSRC}/share/vm/jfr/buffers" fi BASE_PATHS="${BASE_PATHS} ${COMMONSRC}/share/vm/prims/wbtestmethods" -CORE_PATHS="${BASE_PATHS}" # shared is already in BASE_PATHS. Should add vm/memory but that one is also in BASE_PATHS. if [ -d "${ALTSRC}/share/vm/gc_implementation" ]; then - CORE_PATHS="${CORE_PATHS} `$FIND ${ALTSRC}/share/vm/gc_implementation ! -name gc_implementation -prune -type d \! -name shared`" + BASE_PATHS="${BASE_PATHS} `$FIND ${ALTSRC}/share/vm/gc_implementation ! -name gc_implementation -prune -type d \! -name shared`" fi -CORE_PATHS="${CORE_PATHS} `$FIND ${COMMONSRC}/share/vm/gc_implementation ! -name gc_implementation -prune -type d \! -name shared`" +BASE_PATHS="${BASE_PATHS} `$FIND ${COMMONSRC}/share/vm/gc_implementation ! -name gc_implementation -prune -type d \! -name shared`" if [ -d "${ALTSRC}/share/vm/c1" ]; then COMPILER1_PATHS="${ALTSRC}/share/vm/c1" @@ -104,12 +102,11 @@ # Include dirs per type. case "${TYPE}" in - "core") Src_Dirs="${CORE_PATHS}" ;; - "compiler1") Src_Dirs="${CORE_PATHS} ${COMPILER1_PATHS}" ;; - "compiler2") Src_Dirs="${CORE_PATHS} ${COMPILER2_PATHS}" ;; - "tiered") Src_Dirs="${CORE_PATHS} ${COMPILER1_PATHS} ${COMPILER2_PATHS}" ;; - "zero") Src_Dirs="${CORE_PATHS}" ;; - "shark") Src_Dirs="${CORE_PATHS}" ;; + "compiler1") Src_Dirs="${BASE_PATHS} ${COMPILER1_PATHS}" ;; + "compiler2") Src_Dirs="${BASE_PATHS} ${COMPILER2_PATHS}" ;; + "tiered") Src_Dirs="${BASE_PATHS} ${COMPILER1_PATHS} ${COMPILER2_PATHS}" ;; + "zero") Src_Dirs="${BASE_PATHS}" ;; + "shark") Src_Dirs="${BASE_PATHS}" ;; esac COMPILER2_SPECIFIC_FILES="opto libadt bcEscapeAnalyzer.cpp c2_* runtime_*" @@ -122,7 +119,6 @@ # Exclude per type. case "${TYPE}" in - "core") Src_Files_EXCLUDE="${Src_Files_EXCLUDE} ${COMPILER1_SPECIFIC_FILES} ${COMPILER2_SPECIFIC_FILES} ${ZERO_SPECIFIC_FILES} ${SHARK_SPECIFIC_FILES} ciTypeFlow.cpp" ;; "compiler1") Src_Files_EXCLUDE="${Src_Files_EXCLUDE} ${COMPILER2_SPECIFIC_FILES} ${ZERO_SPECIFIC_FILES} ${SHARK_SPECIFIC_FILES} ciTypeFlow.cpp" ;; "compiler2") Src_Files_EXCLUDE="${Src_Files_EXCLUDE} ${COMPILER1_SPECIFIC_FILES} ${ZERO_SPECIFIC_FILES} ${SHARK_SPECIFIC_FILES}" ;; "tiered") Src_Files_EXCLUDE="${Src_Files_EXCLUDE} ${ZERO_SPECIFIC_FILES} ${SHARK_SPECIFIC_FILES}" ;; @@ -149,9 +145,17 @@ Src_Files="${Src_Files}`findsrc ${e}` " done -Obj_Files= +Obj_Files=" " for e in ${Src_Files}; do - Obj_Files="${Obj_Files}${e%\.[!.]*}.obj " + o="${e%\.[!.]*}.obj" + set +e + chk=`expr "${Obj_Files}" : ".* $o"` + set -e + if [ "$chk" != 0 ]; then + echo "# INFO: skipping duplicate $o" + continue + fi + Obj_Files="${Obj_Files}$o " done echo Obj_Files=${Obj_Files} diff -r bdd155477289 -r e2722a66aba7 make/windows/makefiles/adlc.make --- a/make/windows/makefiles/adlc.make Thu Aug 22 09:39:54 2013 -0700 +++ b/make/windows/makefiles/adlc.make Thu Sep 05 11:04:39 2013 -0700 @@ -55,13 +55,11 @@ !if "$(Platform_arch_model)" == "$(Platform_arch)" SOURCES_AD=\ - $(WorkSpace)/src/cpu/$(Platform_arch)/vm/$(Platform_arch_model).ad \ - $(WorkSpace)/src/os_cpu/windows_$(Platform_arch)/vm/windows_$(Platform_arch_model).ad + $(WorkSpace)/src/cpu/$(Platform_arch)/vm/$(Platform_arch_model).ad !else SOURCES_AD=\ $(WorkSpace)/src/cpu/$(Platform_arch)/vm/$(Platform_arch_model).ad \ - $(WorkSpace)/src/cpu/$(Platform_arch)/vm/$(Platform_arch).ad \ - $(WorkSpace)/src/os_cpu/windows_$(Platform_arch)/vm/windows_$(Platform_arch_model).ad + $(WorkSpace)/src/cpu/$(Platform_arch)/vm/$(Platform_arch).ad !endif # NOTE! If you add any files here, you must also update GENERATED_NAMES_IN_DIR diff -r bdd155477289 -r e2722a66aba7 make/windows/makefiles/debug.make --- a/make/windows/makefiles/debug.make Thu Aug 22 09:39:54 2013 -0700 +++ b/make/windows/makefiles/debug.make Thu Sep 05 11:04:39 2013 -0700 @@ -49,9 +49,6 @@ # Force resources to be rebuilt every time $(Res_Files): FORCE -vm.def: $(Obj_Files) - sh $(WorkSpace)/make/windows/build_vm_def.sh - $(AOUT): $(Res_Files) $(Obj_Files) vm.def $(LD) @<< $(LD_FLAGS) /out:$@ /implib:$*.lib /def:vm.def $(Obj_Files) $(Res_Files) diff -r bdd155477289 -r e2722a66aba7 make/windows/makefiles/fastdebug.make --- a/make/windows/makefiles/fastdebug.make Thu Aug 22 09:39:54 2013 -0700 +++ b/make/windows/makefiles/fastdebug.make Thu Sep 05 11:04:39 2013 -0700 @@ -48,9 +48,6 @@ # Force resources to be rebuilt every time $(Res_Files): FORCE -vm.def: $(Obj_Files) - sh $(WorkSpace)/make/windows/build_vm_def.sh - $(AOUT): $(Res_Files) $(Obj_Files) vm.def $(LD) @<< $(LD_FLAGS) /out:$@ /implib:$*.lib /def:vm.def $(Obj_Files) $(Res_Files) diff -r bdd155477289 -r e2722a66aba7 make/windows/makefiles/product.make --- a/make/windows/makefiles/product.make Thu Aug 22 09:39:54 2013 -0700 +++ b/make/windows/makefiles/product.make Thu Sep 05 11:04:39 2013 -0700 @@ -51,9 +51,6 @@ # Force resources to be rebuilt every time $(Res_Files): FORCE -vm.def: $(Obj_Files) - sh $(WorkSpace)/make/windows/build_vm_def.sh - $(AOUT): $(Res_Files) $(Obj_Files) vm.def $(LD) @<< $(LD_FLAGS) /out:$@ /implib:$*.lib /def:vm.def $(Obj_Files) $(Res_Files) diff -r bdd155477289 -r e2722a66aba7 make/windows/makefiles/projectcreator.make --- a/make/windows/makefiles/projectcreator.make Thu Aug 22 09:39:54 2013 -0700 +++ b/make/windows/makefiles/projectcreator.make Thu Sep 05 11:04:39 2013 -0700 @@ -44,10 +44,11 @@ # This is only used internally ProjectCreatorIncludesPRIVATE=\ - -relativeInclude src\closed\share\vm \ - -relativeInclude src\closed\os\windows\vm \ - -relativeInclude src\closed\os_cpu\windows_$(Platform_arch)\vm \ - -relativeInclude src\closed\cpu\$(Platform_arch)\vm \ + -relativeAltSrcInclude src\closed \ + -altRelativeInclude share\vm \ + -altRelativeInclude os\windows\vm \ + -altRelativeInclude os_cpu\windows_$(Platform_arch)\vm \ + -altRelativeInclude cpu\$(Platform_arch)\vm \ -relativeInclude src\share\vm \ -relativeInclude src\share\vm\precompiled \ -relativeInclude src\share\vm\prims\wbtestmethods \ @@ -91,7 +92,11 @@ -disablePch getThread_windows_$(Platform_arch).cpp \ -disablePch_compiler2 opcodes.cpp -# Common options for the IDE builds for core, c1, and c2 +!if "$(BUILD_WIN_SA)" != "1" +BUILD_VM_DEF_FLAG=-nosa +!endif + +# Common options for the IDE builds for c1, and c2 ProjectCreatorIDEOptions=\ $(ProjectCreatorIDEOptions) \ -sourceBase $(HOTSPOTWORKSPACE) \ @@ -103,7 +108,7 @@ -jdkTargetRoot $(HOTSPOTJDKDIST) \ -define ALIGN_STACK_FRAMES \ -define VM_LITTLE_ENDIAN \ - -prelink "" "Generating vm.def..." "cd $(HOTSPOTBUILDSPACE)\%f\%b set HOTSPOTMKSHOME=$(HOTSPOTMKSHOME) set JAVA_HOME=$(HOTSPOTJDKDIST) $(HOTSPOTMKSHOME)\sh $(HOTSPOTWORKSPACE)\make\windows\build_vm_def.sh $(LD_VER)" \ + -prelink "" "Generating vm.def..." "cd $(HOTSPOTBUILDSPACE)\%f\%b set HOTSPOTMKSHOME=$(HOTSPOTMKSHOME) set JAVA_HOME=$(HOTSPOTJDKDIST) $(HOTSPOTMKSHOME)\sh $(HOTSPOTWORKSPACE)\make\windows\build_vm_def.sh $(BUILD_VM_DEF_FLAG) $(LD_VER)" \ -ignoreFile jsig.c \ -ignoreFile jvmtiEnvRecommended.cpp \ -ignoreFile jvmtiEnvStub.cpp \ @@ -158,18 +163,10 @@ -ignoreFile_TARGET $(Platform_arch_model).ad ################################################## -# Without compiler(core) specific options -################################################## -ProjectCreatorIDEOptions=$(ProjectCreatorIDEOptions) \ -$(ProjectCreatorIDEOptionsIgnoreCompiler1:TARGET=core) \ -$(ProjectCreatorIDEOptionsIgnoreCompiler2:TARGET=core) - -################################################## # Client(C1) compiler specific options ################################################## ProjectCreatorIDEOptions=$(ProjectCreatorIDEOptions) \ -define_compiler1 COMPILER1 \ - -ignorePath_compiler1 core \ $(ProjectCreatorIDEOptionsIgnoreCompiler2:TARGET=compiler1) ################################################## @@ -178,7 +175,6 @@ #NOTE! This list must be kept in sync with GENERATED_NAMES in adlc.make. ProjectCreatorIDEOptions=$(ProjectCreatorIDEOptions) \ -define_compiler2 COMPILER2 \ - -ignorePath_compiler2 core \ -additionalFile_compiler2 $(Platform_arch_model).ad \ -additionalFile_compiler2 ad_$(Platform_arch_model).cpp \ -additionalFile_compiler2 ad_$(Platform_arch_model).hpp \ diff -r bdd155477289 -r e2722a66aba7 make/windows/makefiles/trace.make --- a/make/windows/makefiles/trace.make Thu Aug 22 09:39:54 2013 -0700 +++ b/make/windows/makefiles/trace.make Thu Sep 05 11:04:39 2013 -0700 @@ -90,25 +90,25 @@ !if "$(OPENJDK)" == "true" $(TraceOutDir)/traceEventClasses.hpp: $(TraceSrcDir)/trace.xml $(TraceSrcDir)/traceEventClasses.xsl $(XML_DEPS) - @echo Generating $@ + @echo Generating OpenJDK $@ @$(XSLT) -IN $(TraceSrcDir)/trace.xml -XSL $(TraceSrcDir)/traceEventClasses.xsl -OUT $(TraceOutDir)/traceEventClasses.hpp !else $(TraceOutDir)/traceEventClasses.hpp: $(TraceSrcDir)/trace.xml $(TraceAltSrcDir)/traceEventClasses.xsl $(XML_DEPS) - @echo Generating $@ + @echo Generating AltSrc $@ @$(XSLT) -IN $(TraceSrcDir)/trace.xml -XSL $(TraceAltSrcDir)/traceEventClasses.xsl -OUT $(TraceOutDir)/traceEventClasses.hpp $(TraceOutDir)/traceProducer.cpp: $(TraceSrcDir)/trace.xml $(TraceAltSrcDir)/traceProducer.xsl $(XML_DEPS) - @echo Generating $@ + @echo Generating AltSrc $@ @$(XSLT) -IN $(TraceSrcDir)/trace.xml -XSL $(TraceAltSrcDir)/traceProducer.xsl -OUT $(TraceOutDir)/traceProducer.cpp $(TraceOutDir)/traceRequestables.hpp: $(TraceSrcDir)/trace.xml $(TraceAltSrcDir)/traceRequestables.xsl $(XML_DEPS) - @echo Generating $@ + @echo Generating AltSrc $@ @$(XSLT) -IN $(TraceSrcDir)/trace.xml -XSL $(TraceAltSrcDir)/traceRequestables.xsl -OUT $(TraceOutDir)/traceRequestables.hpp $(TraceOutDir)/traceEventControl.hpp: $(TraceSrcDir)/trace.xml $(TraceAltSrcDir)/traceEventControl.xsl $(XML_DEPS) - @echo Generating $@ + @echo Generating AltSrc $@ @$(XSLT) -IN $(TraceSrcDir)/trace.xml -XSL $(TraceAltSrcDir)/traceEventControl.xsl -OUT $(TraceOutDir)/traceEventControl.hpp !endif diff -r bdd155477289 -r e2722a66aba7 make/windows/makefiles/vm.make --- a/make/windows/makefiles/vm.make Thu Aug 22 09:39:54 2013 -0700 +++ b/make/windows/makefiles/vm.make Thu Sep 05 11:04:39 2013 -0700 @@ -36,10 +36,6 @@ CXX_FLAGS=$(CXX_FLAGS) /D "ASSERT" !endif -!if "$(Variant)" == "core" -# No need to define anything, CORE is defined as !COMPILER1 && !COMPILER2 -!endif - !if "$(Variant)" == "compiler1" CXX_FLAGS=$(CXX_FLAGS) /D "COMPILER1" !endif @@ -397,3 +393,11 @@ _build_pch_file.obj: @echo #include "precompiled.hpp" > ../generated/_build_pch_file.cpp $(CXX) $(CXX_FLAGS) /Fp"vm.pch" /Yc"precompiled.hpp" /c ../generated/_build_pch_file.cpp + +!if "$(BUILD_WIN_SA)" != "1" +BUILD_VM_DEF_FLAG=-nosa +!endif + +vm.def: $(Obj_Files) + sh $(WorkSpace)/make/windows/build_vm_def.sh $(BUILD_VM_DEF_FLAG) + diff -r bdd155477289 -r e2722a66aba7 make/windows/projectfiles/common/Makefile --- a/make/windows/projectfiles/common/Makefile Thu Aug 22 09:39:54 2013 -0700 +++ b/make/windows/projectfiles/common/Makefile Thu Sep 05 11:04:39 2013 -0700 @@ -112,6 +112,7 @@ ProjectCreatorIDEOptions = $(ProjectCreatorIDEOptions) $(ReleaseOptions) $(HOTSPOTBUILDSPACE)/$(ProjectFile): $(HOTSPOTBUILDSPACE)/classes/ProjectCreator.class + @if "$(MSC_VER)"=="1500" echo Make sure you have VS2008 SP1 or later, or you may see 'expanded command line too long' @$(RUN_JAVA) -Djava.class.path="$(HOTSPOTBUILDSPACE)/classes" ProjectCreator WinGammaPlatform$(VcVersion) $(ProjectCreatorIDEOptions) clean: diff -r bdd155477289 -r e2722a66aba7 src/cpu/sparc/vm/macroAssembler_sparc.cpp --- a/src/cpu/sparc/vm/macroAssembler_sparc.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/cpu/sparc/vm/macroAssembler_sparc.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -29,6 +29,7 @@ #include "interpreter/interpreter.hpp" #include "memory/cardTableModRefBS.hpp" #include "memory/resourceArea.hpp" +#include "memory/universe.hpp" #include "prims/methodHandles.hpp" #include "runtime/biasedLocking.hpp" #include "runtime/interfaceSupport.hpp" @@ -1145,7 +1146,7 @@ assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); int klass_index = oop_recorder()->find_index(k); RelocationHolder rspec = metadata_Relocation::spec(klass_index); - narrowOop encoded_k = oopDesc::encode_klass(k); + narrowOop encoded_k = Klass::encode_klass(k); assert_not_delayed(); // Relocation with special format (see relocInfo_sparc.hpp). @@ -1419,7 +1420,6 @@ load_klass(O0_obj, O0_obj); // assert((klass != NULL) br_null_short(O0_obj, pn, fail); - // TODO: Future assert that klass is lower 4g memory for UseCompressedKlassPointers wrccr( O5_save_flags ); // Restore CCR's @@ -4089,52 +4089,91 @@ } void MacroAssembler::encode_klass_not_null(Register r) { - assert(Metaspace::is_initialized(), "metaspace should be initialized"); assert (UseCompressedKlassPointers, "must be compressed"); - assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); - if (Universe::narrow_klass_base() != NULL) - sub(r, G6_heapbase, r); - srlx(r, LogKlassAlignmentInBytes, r); + assert(Universe::narrow_klass_base() != NULL, "narrow_klass_base should be initialized"); + assert(r != G6_heapbase, "bad register choice"); + set((intptr_t)Universe::narrow_klass_base(), G6_heapbase); + sub(r, G6_heapbase, r); + if (Universe::narrow_klass_shift() != 0) { + assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); + srlx(r, LogKlassAlignmentInBytes, r); + } + reinit_heapbase(); } void MacroAssembler::encode_klass_not_null(Register src, Register dst) { - assert(Metaspace::is_initialized(), "metaspace should be initialized"); - assert (UseCompressedKlassPointers, "must be compressed"); - assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); - if (Universe::narrow_klass_base() == NULL) { - srlx(src, LogKlassAlignmentInBytes, dst); + if (src == dst) { + encode_klass_not_null(src); } else { - sub(src, G6_heapbase, dst); - srlx(dst, LogKlassAlignmentInBytes, dst); + assert (UseCompressedKlassPointers, "must be compressed"); + assert(Universe::narrow_klass_base() != NULL, "narrow_klass_base should be initialized"); + set((intptr_t)Universe::narrow_klass_base(), dst); + sub(src, dst, dst); + if (Universe::narrow_klass_shift() != 0) { + srlx(dst, LogKlassAlignmentInBytes, dst); + } } } +// Function instr_size_for_decode_klass_not_null() counts the instructions +// generated by decode_klass_not_null() and reinit_heapbase(). Hence, if +// the instructions they generate change, then this method needs to be updated. +int MacroAssembler::instr_size_for_decode_klass_not_null() { + assert (UseCompressedKlassPointers, "only for compressed klass ptrs"); + // set + add + set + int num_instrs = insts_for_internal_set((intptr_t)Universe::narrow_klass_base()) + 1 + + insts_for_internal_set((intptr_t)Universe::narrow_ptrs_base()); + if (Universe::narrow_klass_shift() == 0) { + return num_instrs * BytesPerInstWord; + } else { // sllx + return (num_instrs + 1) * BytesPerInstWord; + } +} + +// !!! If the instructions that get generated here change then function +// instr_size_for_decode_klass_not_null() needs to get updated. void MacroAssembler::decode_klass_not_null(Register r) { - assert(Metaspace::is_initialized(), "metaspace should be initialized"); // Do not add assert code to this unless you change vtableStubs_sparc.cpp // pd_code_size_limit. assert (UseCompressedKlassPointers, "must be compressed"); - assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); - sllx(r, LogKlassAlignmentInBytes, r); - if (Universe::narrow_klass_base() != NULL) - add(r, G6_heapbase, r); + assert(Universe::narrow_klass_base() != NULL, "narrow_klass_base should be initialized"); + assert(r != G6_heapbase, "bad register choice"); + set((intptr_t)Universe::narrow_klass_base(), G6_heapbase); + if (Universe::narrow_klass_shift() != 0) + sllx(r, LogKlassAlignmentInBytes, r); + add(r, G6_heapbase, r); + reinit_heapbase(); } void MacroAssembler::decode_klass_not_null(Register src, Register dst) { - assert(Metaspace::is_initialized(), "metaspace should be initialized"); - // Do not add assert code to this unless you change vtableStubs_sparc.cpp - // pd_code_size_limit. - assert (UseCompressedKlassPointers, "must be compressed"); - assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); - sllx(src, LogKlassAlignmentInBytes, dst); - if (Universe::narrow_klass_base() != NULL) - add(dst, G6_heapbase, dst); + if (src == dst) { + decode_klass_not_null(src); + } else { + // Do not add assert code to this unless you change vtableStubs_sparc.cpp + // pd_code_size_limit. + assert (UseCompressedKlassPointers, "must be compressed"); + assert(Universe::narrow_klass_base() != NULL, "narrow_klass_base should be initialized"); + if (Universe::narrow_klass_shift() != 0) { + assert((src != G6_heapbase) && (dst != G6_heapbase), "bad register choice"); + set((intptr_t)Universe::narrow_klass_base(), G6_heapbase); + sllx(src, LogKlassAlignmentInBytes, dst); + add(dst, G6_heapbase, dst); + reinit_heapbase(); + } else { + set((intptr_t)Universe::narrow_klass_base(), dst); + add(src, dst, dst); + } + } } void MacroAssembler::reinit_heapbase() { if (UseCompressedOops || UseCompressedKlassPointers) { - AddressLiteral base(Universe::narrow_ptrs_base_addr()); - load_ptr_contents(base, G6_heapbase); + if (Universe::heap() != NULL) { + set((intptr_t)Universe::narrow_ptrs_base(), G6_heapbase); + } else { + AddressLiteral base(Universe::narrow_ptrs_base_addr()); + load_ptr_contents(base, G6_heapbase); + } } } diff -r bdd155477289 -r e2722a66aba7 src/cpu/sparc/vm/macroAssembler_sparc.hpp --- a/src/cpu/sparc/vm/macroAssembler_sparc.hpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/cpu/sparc/vm/macroAssembler_sparc.hpp Thu Sep 05 11:04:39 2013 -0700 @@ -1177,6 +1177,9 @@ void push_CPU_state(); void pop_CPU_state(); + // Returns the byte size of the instructions generated by decode_klass_not_null(). + static int instr_size_for_decode_klass_not_null(); + // if heap base register is used - reinit it with the correct value void reinit_heapbase(); diff -r bdd155477289 -r e2722a66aba7 src/cpu/sparc/vm/relocInfo_sparc.cpp --- a/src/cpu/sparc/vm/relocInfo_sparc.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/cpu/sparc/vm/relocInfo_sparc.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -97,7 +97,7 @@ guarantee(Assembler::inv_op2(inst)==Assembler::sethi_op2, "must be sethi"); if (format() != 0) { assert(type() == relocInfo::oop_type || type() == relocInfo::metadata_type, "only narrow oops or klasses case"); - jint np = type() == relocInfo::oop_type ? oopDesc::encode_heap_oop((oop)x) : oopDesc::encode_klass((Klass*)x); + jint np = type() == relocInfo::oop_type ? oopDesc::encode_heap_oop((oop)x) : Klass::encode_klass((Klass*)x); inst &= ~Assembler::hi22(-1); inst |= Assembler::hi22((intptr_t)np); if (verify_only) { diff -r bdd155477289 -r e2722a66aba7 src/cpu/sparc/vm/sparc.ad --- a/src/cpu/sparc/vm/sparc.ad Thu Aug 22 09:39:54 2013 -0700 +++ b/src/cpu/sparc/vm/sparc.ad Thu Sep 05 11:04:39 2013 -0700 @@ -559,10 +559,7 @@ int klass_load_size; if (UseCompressedKlassPointers) { assert(Universe::heap() != NULL, "java heap should be initialized"); - if (Universe::narrow_klass_base() == NULL) - klass_load_size = 2*BytesPerInstWord; // see MacroAssembler::load_klass() - else - klass_load_size = 3*BytesPerInstWord; + klass_load_size = MacroAssembler::instr_size_for_decode_klass_not_null() + 1*BytesPerInstWord; } else { klass_load_size = 1*BytesPerInstWord; } @@ -1663,9 +1660,12 @@ if (UseCompressedKlassPointers) { assert(Universe::heap() != NULL, "java heap should be initialized"); st->print_cr("\tLDUW [R_O0 + oopDesc::klass_offset_in_bytes],R_G5\t! Inline cache check - compressed klass"); - st->print_cr("\tSLL R_G5,3,R_G5"); - if (Universe::narrow_klass_base() != NULL) - st->print_cr("\tADD R_G5,R_G6_heap_base,R_G5"); + st->print_cr("\tSET Universe::narrow_klass_base,R_G6_heap_base"); + if (Universe::narrow_klass_shift() != 0) { + st->print_cr("\tSLL R_G5,3,R_G5"); + } + st->print_cr("\tADD R_G5,R_G6_heap_base,R_G5"); + st->print_cr("\tSET Universe::narrow_ptrs_base,R_G6_heap_base"); } else { st->print_cr("\tLDX [R_O0 + oopDesc::klass_offset_in_bytes],R_G5\t! Inline cache check"); } @@ -2563,10 +2563,7 @@ int klass_load_size; if (UseCompressedKlassPointers) { assert(Universe::heap() != NULL, "java heap should be initialized"); - if (Universe::narrow_klass_base() == NULL) - klass_load_size = 2*BytesPerInstWord; - else - klass_load_size = 3*BytesPerInstWord; + klass_load_size = MacroAssembler::instr_size_for_decode_klass_not_null() + 1*BytesPerInstWord; } else { klass_load_size = 1*BytesPerInstWord; } diff -r bdd155477289 -r e2722a66aba7 src/cpu/sparc/vm/templateInterpreter_sparc.cpp --- a/src/cpu/sparc/vm/templateInterpreter_sparc.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/cpu/sparc/vm/templateInterpreter_sparc.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -1887,6 +1887,27 @@ if (ProfileInterpreter) { __ set_method_data_pointer_for_bcp(); } + +#if INCLUDE_JVMTI + if (EnableInvokeDynamic) { + Label L_done; + + __ ldub(Address(Lbcp, 0), G1_scratch); // Load current bytecode + __ cmp_and_br_short(G1_scratch, Bytecodes::_invokestatic, Assembler::notEqual, Assembler::pn, L_done); + + // The member name argument must be restored if _invokestatic is re-executed after a PopFrame call. + // Detect such a case in the InterpreterRuntime function and return the member name argument, or NULL. + + __ call_VM(G1_scratch, CAST_FROM_FN_PTR(address, InterpreterRuntime::member_name_arg_or_null), I0, Lmethod, Lbcp); + + __ br_null(G1_scratch, false, Assembler::pn, L_done); + __ delayed()->nop(); + + __ st_ptr(G1_scratch, Lesp, wordSize); + __ bind(L_done); + } +#endif // INCLUDE_JVMTI + // Resume bytecode interpretation at the current bcp __ dispatch_next(vtos); // end of JVMTI PopFrame support diff -r bdd155477289 -r e2722a66aba7 src/cpu/sparc/vm/vtableStubs_sparc.cpp --- a/src/cpu/sparc/vm/vtableStubs_sparc.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/cpu/sparc/vm/vtableStubs_sparc.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -219,13 +219,13 @@ const int basic = 5*BytesPerInstWord + // shift;add for load_klass (only shift with zero heap based) (UseCompressedKlassPointers ? - ((Universe::narrow_klass_base() == NULL) ? BytesPerInstWord : 2*BytesPerInstWord) : 0); + MacroAssembler::instr_size_for_decode_klass_not_null() : 0); return basic + slop; } else { const int basic = (28 LP64_ONLY(+ 6)) * BytesPerInstWord + // shift;add for load_klass (only shift with zero heap based) (UseCompressedKlassPointers ? - ((Universe::narrow_klass_base() == NULL) ? BytesPerInstWord : 2*BytesPerInstWord) : 0); + MacroAssembler::instr_size_for_decode_klass_not_null() : 0); return (basic + slop); } } diff -r bdd155477289 -r e2722a66aba7 src/cpu/x86/vm/macroAssembler_x86.cpp --- a/src/cpu/x86/vm/macroAssembler_x86.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/cpu/x86/vm/macroAssembler_x86.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -30,6 +30,7 @@ #include "interpreter/interpreter.hpp" #include "memory/cardTableModRefBS.hpp" #include "memory/resourceArea.hpp" +#include "memory/universe.hpp" #include "prims/methodHandles.hpp" #include "runtime/biasedLocking.hpp" #include "runtime/interfaceSupport.hpp" @@ -4810,23 +4811,8 @@ } void MacroAssembler::load_prototype_header(Register dst, Register src) { -#ifdef _LP64 - if (UseCompressedKlassPointers) { - assert (Universe::heap() != NULL, "java heap should be initialized"); - movl(dst, Address(src, oopDesc::klass_offset_in_bytes())); - if (Universe::narrow_klass_shift() != 0) { - assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); - assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?"); - movq(dst, Address(r12_heapbase, dst, Address::times_8, Klass::prototype_header_offset())); - } else { - movq(dst, Address(dst, Klass::prototype_header_offset())); - } - } else -#endif - { - movptr(dst, Address(src, oopDesc::klass_offset_in_bytes())); - movptr(dst, Address(dst, Klass::prototype_header_offset())); - } + load_klass(dst, src); + movptr(dst, Address(dst, Klass::prototype_header_offset())); } void MacroAssembler::store_klass(Register dst, Register src) { @@ -4914,7 +4900,7 @@ #ifdef ASSERT void MacroAssembler::verify_heapbase(const char* msg) { - assert (UseCompressedOops || UseCompressedKlassPointers, "should be compressed"); + assert (UseCompressedOops, "should be compressed"); assert (Universe::heap() != NULL, "java heap should be initialized"); if (CheckCompressedOops) { Label ok; @@ -5058,69 +5044,80 @@ } void MacroAssembler::encode_klass_not_null(Register r) { - assert(Metaspace::is_initialized(), "metaspace should be initialized"); -#ifdef ASSERT - verify_heapbase("MacroAssembler::encode_klass_not_null: heap base corrupted?"); -#endif - if (Universe::narrow_klass_base() != NULL) { - subq(r, r12_heapbase); - } + assert(Universe::narrow_klass_base() != NULL, "Base should be initialized"); + // Use r12 as a scratch register in which to temporarily load the narrow_klass_base. + assert(r != r12_heapbase, "Encoding a klass in r12"); + mov64(r12_heapbase, (int64_t)Universe::narrow_klass_base()); + subq(r, r12_heapbase); if (Universe::narrow_klass_shift() != 0) { assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); shrq(r, LogKlassAlignmentInBytes); } + reinit_heapbase(); } void MacroAssembler::encode_klass_not_null(Register dst, Register src) { - assert(Metaspace::is_initialized(), "metaspace should be initialized"); -#ifdef ASSERT - verify_heapbase("MacroAssembler::encode_klass_not_null2: heap base corrupted?"); -#endif - if (dst != src) { - movq(dst, src); - } - if (Universe::narrow_klass_base() != NULL) { - subq(dst, r12_heapbase); - } - if (Universe::narrow_klass_shift() != 0) { - assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); - shrq(dst, LogKlassAlignmentInBytes); - } -} - + if (dst == src) { + encode_klass_not_null(src); + } else { + mov64(dst, (int64_t)Universe::narrow_klass_base()); + negq(dst); + addq(dst, src); + if (Universe::narrow_klass_shift() != 0) { + assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); + shrq(dst, LogKlassAlignmentInBytes); + } + } +} + +// Function instr_size_for_decode_klass_not_null() counts the instructions +// generated by decode_klass_not_null(register r) and reinit_heapbase(), +// when (Universe::heap() != NULL). Hence, if the instructions they +// generate change, then this method needs to be updated. +int MacroAssembler::instr_size_for_decode_klass_not_null() { + assert (UseCompressedKlassPointers, "only for compressed klass ptrs"); + // mov64 + addq + shlq? + mov64 (for reinit_heapbase()). + return (Universe::narrow_klass_shift() == 0 ? 20 : 24); +} + +// !!! If the instructions that get generated here change then function +// instr_size_for_decode_klass_not_null() needs to get updated. void MacroAssembler::decode_klass_not_null(Register r) { - assert(Metaspace::is_initialized(), "metaspace should be initialized"); // Note: it will change flags + assert(Universe::narrow_klass_base() != NULL, "Base should be initialized"); assert (UseCompressedKlassPointers, "should only be used for compressed headers"); + assert(r != r12_heapbase, "Decoding a klass in r12"); // Cannot assert, unverified entry point counts instructions (see .ad file) // vtableStubs also counts instructions in pd_code_size_limit. // Also do not verify_oop as this is called by verify_oop. if (Universe::narrow_klass_shift() != 0) { assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); shlq(r, LogKlassAlignmentInBytes); - if (Universe::narrow_klass_base() != NULL) { - addq(r, r12_heapbase); - } - } else { - assert (Universe::narrow_klass_base() == NULL, "sanity"); - } + } + // Use r12 as a scratch register in which to temporarily load the narrow_klass_base. + mov64(r12_heapbase, (int64_t)Universe::narrow_klass_base()); + addq(r, r12_heapbase); + reinit_heapbase(); } void MacroAssembler::decode_klass_not_null(Register dst, Register src) { - assert(Metaspace::is_initialized(), "metaspace should be initialized"); // Note: it will change flags + assert(Universe::narrow_klass_base() != NULL, "Base should be initialized"); assert (UseCompressedKlassPointers, "should only be used for compressed headers"); - // Cannot assert, unverified entry point counts instructions (see .ad file) - // vtableStubs also counts instructions in pd_code_size_limit. - // Also do not verify_oop as this is called by verify_oop. - if (Universe::narrow_klass_shift() != 0) { - assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); - assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?"); - leaq(dst, Address(r12_heapbase, src, Address::times_8, 0)); + if (dst == src) { + decode_klass_not_null(dst); } else { - assert (Universe::narrow_klass_base() == NULL, "sanity"); - if (dst != src) { - movq(dst, src); + // Cannot assert, unverified entry point counts instructions (see .ad file) + // vtableStubs also counts instructions in pd_code_size_limit. + // Also do not verify_oop as this is called by verify_oop. + + mov64(dst, (int64_t)Universe::narrow_klass_base()); + if (Universe::narrow_klass_shift() != 0) { + assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); + assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?"); + leaq(dst, Address(dst, src, Address::times_8, 0)); + } else { + addq(dst, src); } } } @@ -5148,7 +5145,7 @@ assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); int klass_index = oop_recorder()->find_index(k); RelocationHolder rspec = metadata_Relocation::spec(klass_index); - mov_narrow_oop(dst, oopDesc::encode_klass(k), rspec); + mov_narrow_oop(dst, Klass::encode_klass(k), rspec); } void MacroAssembler::set_narrow_klass(Address dst, Klass* k) { @@ -5156,7 +5153,7 @@ assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); int klass_index = oop_recorder()->find_index(k); RelocationHolder rspec = metadata_Relocation::spec(klass_index); - mov_narrow_oop(dst, oopDesc::encode_klass(k), rspec); + mov_narrow_oop(dst, Klass::encode_klass(k), rspec); } void MacroAssembler::cmp_narrow_oop(Register dst, jobject obj) { @@ -5182,7 +5179,7 @@ assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); int klass_index = oop_recorder()->find_index(k); RelocationHolder rspec = metadata_Relocation::spec(klass_index); - Assembler::cmp_narrow_oop(dst, oopDesc::encode_klass(k), rspec); + Assembler::cmp_narrow_oop(dst, Klass::encode_klass(k), rspec); } void MacroAssembler::cmp_narrow_klass(Address dst, Klass* k) { @@ -5190,14 +5187,23 @@ assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); int klass_index = oop_recorder()->find_index(k); RelocationHolder rspec = metadata_Relocation::spec(klass_index); - Assembler::cmp_narrow_oop(dst, oopDesc::encode_klass(k), rspec); + Assembler::cmp_narrow_oop(dst, Klass::encode_klass(k), rspec); } void MacroAssembler::reinit_heapbase() { if (UseCompressedOops || UseCompressedKlassPointers) { - movptr(r12_heapbase, ExternalAddress((address)Universe::narrow_ptrs_base_addr())); - } -} + if (Universe::heap() != NULL) { + if (Universe::narrow_oop_base() == NULL) { + MacroAssembler::xorptr(r12_heapbase, r12_heapbase); + } else { + mov64(r12_heapbase, (int64_t)Universe::narrow_ptrs_base()); + } + } else { + movptr(r12_heapbase, ExternalAddress((address)Universe::narrow_ptrs_base_addr())); + } + } +} + #endif // _LP64 diff -r bdd155477289 -r e2722a66aba7 src/cpu/x86/vm/macroAssembler_x86.hpp --- a/src/cpu/x86/vm/macroAssembler_x86.hpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/cpu/x86/vm/macroAssembler_x86.hpp Thu Sep 05 11:04:39 2013 -0700 @@ -371,6 +371,10 @@ void cmp_narrow_klass(Register dst, Klass* k); void cmp_narrow_klass(Address dst, Klass* k); + // Returns the byte size of the instructions generated by decode_klass_not_null() + // when compressed klass pointers are being used. + static int instr_size_for_decode_klass_not_null(); + // if heap base register is used - reinit it with the correct value void reinit_heapbase(); diff -r bdd155477289 -r e2722a66aba7 src/cpu/x86/vm/relocInfo_x86.cpp --- a/src/cpu/x86/vm/relocInfo_x86.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/cpu/x86/vm/relocInfo_x86.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -55,9 +55,9 @@ } } else { if (verify_only) { - assert(*(uint32_t*) disp == oopDesc::encode_klass((Klass*)x), "instructions must match"); + assert(*(uint32_t*) disp == Klass::encode_klass((Klass*)x), "instructions must match"); } else { - *(int32_t*) disp = oopDesc::encode_klass((Klass*)x); + *(int32_t*) disp = Klass::encode_klass((Klass*)x); } } } else { diff -r bdd155477289 -r e2722a66aba7 src/cpu/x86/vm/stubGenerator_x86_32.cpp --- a/src/cpu/x86/vm/stubGenerator_x86_32.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/cpu/x86/vm/stubGenerator_x86_32.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -675,7 +675,6 @@ __ movptr(rax, Address(rax, oopDesc::klass_offset_in_bytes())); // get klass __ testptr(rax, rax); __ jcc(Assembler::zero, error); // if klass is NULL it is broken - // TODO: Future assert that klass is lower 4g memory for UseCompressedKlassPointers // return if everything seems ok __ bind(exit); diff -r bdd155477289 -r e2722a66aba7 src/cpu/x86/vm/stubGenerator_x86_64.cpp --- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -1021,7 +1021,6 @@ __ load_klass(rax, rax); // get klass __ testptr(rax, rax); __ jcc(Assembler::zero, error); // if klass is NULL it is broken - // TODO: Future assert that klass is lower 4g memory for UseCompressedKlassPointers // return if everything seems ok __ bind(exit); diff -r bdd155477289 -r e2722a66aba7 src/cpu/x86/vm/templateInterpreter_x86_32.cpp --- a/src/cpu/x86/vm/templateInterpreter_x86_32.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/cpu/x86/vm/templateInterpreter_x86_32.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -1920,6 +1920,29 @@ __ get_thread(thread); __ movl(Address(thread, JavaThread::popframe_condition_offset()), JavaThread::popframe_inactive); +#if INCLUDE_JVMTI + if (EnableInvokeDynamic) { + Label L_done; + const Register local0 = rdi; + + __ cmpb(Address(rsi, 0), Bytecodes::_invokestatic); + __ jcc(Assembler::notEqual, L_done); + + // The member name argument must be restored if _invokestatic is re-executed after a PopFrame call. + // Detect such a case in the InterpreterRuntime function and return the member name argument, or NULL. + + __ get_method(rdx); + __ movptr(rax, Address(local0, 0)); + __ call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::member_name_arg_or_null), rax, rdx, rsi); + + __ testptr(rax, rax); + __ jcc(Assembler::zero, L_done); + + __ movptr(Address(rbx, 0), rax); + __ bind(L_done); + } +#endif // INCLUDE_JVMTI + __ dispatch_next(vtos); // end of PopFrame support diff -r bdd155477289 -r e2722a66aba7 src/cpu/x86/vm/templateInterpreter_x86_64.cpp --- a/src/cpu/x86/vm/templateInterpreter_x86_64.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/cpu/x86/vm/templateInterpreter_x86_64.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -849,9 +849,9 @@ address entry = __ pc(); // rbx,: Method* - // rsi: senderSP must preserved for slow path, set SP to it on fast path - // rdx: scratch - // rdi: scratch + // r13: senderSP must preserved for slow path, set SP to it on fast path + // c_rarg0: scratch (rdi on non-Win64, rcx on Win64) + // c_rarg1: scratch (rsi on non-Win64, rdx on Win64) Label slow_path; // If we need a safepoint check, generate full interpreter entry. @@ -865,8 +865,8 @@ // Load parameters const Register crc = rax; // crc - const Register val = rdx; // source java byte value - const Register tbl = rdi; // scratch + const Register val = c_rarg0; // source java byte value + const Register tbl = c_rarg1; // scratch // Arguments are reversed on java expression stack __ movl(val, Address(rsp, wordSize)); // byte value @@ -880,7 +880,7 @@ // _areturn __ pop(rdi); // get return address - __ mov(rsp, rsi); // set sp to sender sp + __ mov(rsp, r13); // set sp to sender sp __ jmp(rdi); // generate a vanilla native entry as the slow path @@ -919,20 +919,24 @@ const Register crc = c_rarg0; // crc const Register buf = c_rarg1; // source java byte array address const Register len = c_rarg2; // length + const Register off = len; // offset (never overlaps with 'len') // Arguments are reversed on java expression stack - __ movl(len, Address(rsp, wordSize)); // Length // Calculate address of start element if (kind == Interpreter::java_util_zip_CRC32_updateByteBuffer) { __ movptr(buf, Address(rsp, 3*wordSize)); // long buf - __ addptr(buf, Address(rsp, 2*wordSize)); // + offset + __ movl2ptr(off, Address(rsp, 2*wordSize)); // offset + __ addq(buf, off); // + offset __ movl(crc, Address(rsp, 5*wordSize)); // Initial CRC } else { __ movptr(buf, Address(rsp, 3*wordSize)); // byte[] array __ addptr(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size - __ addptr(buf, Address(rsp, 2*wordSize)); // + offset + __ movl2ptr(off, Address(rsp, 2*wordSize)); // offset + __ addq(buf, off); // + offset __ movl(crc, Address(rsp, 4*wordSize)); // Initial CRC } + // Can now load 'len' since we're finished with 'off' + __ movl(len, Address(rsp, wordSize)); // Length __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, StubRoutines::updateBytesCRC32()), crc, buf, len); // result in rax @@ -1929,6 +1933,29 @@ __ movl(Address(r15_thread, JavaThread::popframe_condition_offset()), JavaThread::popframe_inactive); +#if INCLUDE_JVMTI + if (EnableInvokeDynamic) { + Label L_done; + const Register local0 = r14; + + __ cmpb(Address(r13, 0), Bytecodes::_invokestatic); + __ jcc(Assembler::notEqual, L_done); + + // The member name argument must be restored if _invokestatic is re-executed after a PopFrame call. + // Detect such a case in the InterpreterRuntime function and return the member name argument, or NULL. + + __ get_method(rdx); + __ movptr(rax, Address(local0, 0)); + __ call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::member_name_arg_or_null), rax, rdx, r13); + + __ testptr(rax, rax); + __ jcc(Assembler::zero, L_done); + + __ movptr(Address(rbx, 0), rax); + __ bind(L_done); + } +#endif // INCLUDE_JVMTI + __ dispatch_next(vtos); // end of PopFrame support diff -r bdd155477289 -r e2722a66aba7 src/cpu/x86/vm/vtableStubs_x86_64.cpp --- a/src/cpu/x86/vm/vtableStubs_x86_64.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/cpu/x86/vm/vtableStubs_x86_64.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -211,11 +211,11 @@ if (is_vtable_stub) { // Vtable stub size return (DebugVtables ? 512 : 24) + (CountCompiledCalls ? 13 : 0) + - (UseCompressedKlassPointers ? 16 : 0); // 1 leaq can be 3 bytes + 1 long + (UseCompressedKlassPointers ? MacroAssembler::instr_size_for_decode_klass_not_null() : 0); } else { // Itable stub size return (DebugVtables ? 512 : 74) + (CountCompiledCalls ? 13 : 0) + - (UseCompressedKlassPointers ? 32 : 0); // 2 leaqs + (UseCompressedKlassPointers ? MacroAssembler::instr_size_for_decode_klass_not_null() : 0); } // In order to tune these parameters, run the JVM with VM options // +PrintMiscellaneous and +WizardMode to see information about diff -r bdd155477289 -r e2722a66aba7 src/cpu/x86/vm/x86_64.ad --- a/src/cpu/x86/vm/x86_64.ad Thu Aug 22 09:39:54 2013 -0700 +++ b/src/cpu/x86/vm/x86_64.ad Thu Sep 05 11:04:39 2013 -0700 @@ -1393,9 +1393,7 @@ { if (UseCompressedKlassPointers) { st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass"); - if (Universe::narrow_klass_shift() != 0) { - st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1"); - } + st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1"); st->print_cr("\tcmpq rax, rscratch1\t # Inline cache check"); } else { st->print_cr("\tcmpq rax, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t" @@ -4035,146 +4033,6 @@ %} %} -operand indirectNarrowKlass(rRegN reg) -%{ - predicate(Universe::narrow_klass_shift() == 0); - constraint(ALLOC_IN_RC(ptr_reg)); - match(DecodeNKlass reg); - - format %{ "[$reg]" %} - interface(MEMORY_INTER) %{ - base($reg); - index(0x4); - scale(0x0); - disp(0x0); - %} -%} - -operand indOffset8NarrowKlass(rRegN reg, immL8 off) -%{ - predicate(Universe::narrow_klass_shift() == 0); - constraint(ALLOC_IN_RC(ptr_reg)); - match(AddP (DecodeNKlass reg) off); - - format %{ "[$reg + $off (8-bit)]" %} - interface(MEMORY_INTER) %{ - base($reg); - index(0x4); - scale(0x0); - disp($off); - %} -%} - -operand indOffset32NarrowKlass(rRegN reg, immL32 off) -%{ - predicate(Universe::narrow_klass_shift() == 0); - constraint(ALLOC_IN_RC(ptr_reg)); - match(AddP (DecodeNKlass reg) off); - - format %{ "[$reg + $off (32-bit)]" %} - interface(MEMORY_INTER) %{ - base($reg); - index(0x4); - scale(0x0); - disp($off); - %} -%} - -operand indIndexOffsetNarrowKlass(rRegN reg, rRegL lreg, immL32 off) -%{ - predicate(Universe::narrow_klass_shift() == 0); - constraint(ALLOC_IN_RC(ptr_reg)); - match(AddP (AddP (DecodeNKlass reg) lreg) off); - - op_cost(10); - format %{"[$reg + $off + $lreg]" %} - interface(MEMORY_INTER) %{ - base($reg); - index($lreg); - scale(0x0); - disp($off); - %} -%} - -operand indIndexNarrowKlass(rRegN reg, rRegL lreg) -%{ - predicate(Universe::narrow_klass_shift() == 0); - constraint(ALLOC_IN_RC(ptr_reg)); - match(AddP (DecodeNKlass reg) lreg); - - op_cost(10); - format %{"[$reg + $lreg]" %} - interface(MEMORY_INTER) %{ - base($reg); - index($lreg); - scale(0x0); - disp(0x0); - %} -%} - -operand indIndexScaleNarrowKlass(rRegN reg, rRegL lreg, immI2 scale) -%{ - predicate(Universe::narrow_klass_shift() == 0); - constraint(ALLOC_IN_RC(ptr_reg)); - match(AddP (DecodeNKlass reg) (LShiftL lreg scale)); - - op_cost(10); - format %{"[$reg + $lreg << $scale]" %} - interface(MEMORY_INTER) %{ - base($reg); - index($lreg); - scale($scale); - disp(0x0); - %} -%} - -operand indIndexScaleOffsetNarrowKlass(rRegN reg, immL32 off, rRegL lreg, immI2 scale) -%{ - predicate(Universe::narrow_klass_shift() == 0); - constraint(ALLOC_IN_RC(ptr_reg)); - match(AddP (AddP (DecodeNKlass reg) (LShiftL lreg scale)) off); - - op_cost(10); - format %{"[$reg + $off + $lreg << $scale]" %} - interface(MEMORY_INTER) %{ - base($reg); - index($lreg); - scale($scale); - disp($off); - %} -%} - -operand indCompressedKlassOffset(rRegN reg, immL32 off) %{ - predicate(UseCompressedKlassPointers && (Universe::narrow_klass_shift() == Address::times_8)); - constraint(ALLOC_IN_RC(ptr_reg)); - match(AddP (DecodeNKlass reg) off); - - op_cost(10); - format %{"[R12 + $reg << 3 + $off] (compressed klass addressing)" %} - interface(MEMORY_INTER) %{ - base(0xc); // R12 - index($reg); - scale(0x3); - disp($off); - %} -%} - -operand indPosIndexScaleOffsetNarrowKlass(rRegN reg, immL32 off, rRegI idx, immI2 scale) -%{ - constraint(ALLOC_IN_RC(ptr_reg)); - predicate(Universe::narrow_klass_shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0); - match(AddP (AddP (DecodeNKlass reg) (LShiftL (ConvI2L idx) scale)) off); - - op_cost(10); - format %{"[$reg + $off + $idx << $scale]" %} - interface(MEMORY_INTER) %{ - base($reg); - index($idx); - scale($scale); - disp($off); - %} -%} - //----------Special Memory Operands-------------------------------------------- // Stack Slot Operand - This operand is used for loading and storing temporary // values on the stack where a match requires a value to @@ -4345,11 +4203,7 @@ indCompressedOopOffset, indirectNarrow, indOffset8Narrow, indOffset32Narrow, indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow, - indIndexScaleOffsetNarrow, indPosIndexScaleOffsetNarrow, - indCompressedKlassOffset, - indirectNarrowKlass, indOffset8NarrowKlass, indOffset32NarrowKlass, - indIndexOffsetNarrowKlass, indIndexNarrowKlass, indIndexScaleNarrowKlass, - indIndexScaleOffsetNarrowKlass, indPosIndexScaleOffsetNarrowKlass); + indIndexScaleOffsetNarrow, indPosIndexScaleOffsetNarrow); //----------PIPELINE----------------------------------------------------------- // Rules which define the behavior of the target architectures pipeline. @@ -6665,7 +6519,7 @@ instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{ match(Set dst (EncodePKlass src)); effect(KILL cr); - format %{ "encode_heap_oop_not_null $dst,$src" %} + format %{ "encode_klass_not_null $dst,$src" %} ins_encode %{ __ encode_klass_not_null($dst$$Register, $src$$Register); %} @@ -6675,7 +6529,7 @@ instruct decodeKlass_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{ match(Set dst (DecodeNKlass src)); effect(KILL cr); - format %{ "decode_heap_oop_not_null $dst,$src" %} + format %{ "decode_klass_not_null $dst,$src" %} ins_encode %{ Register s = $src$$Register; Register d = $dst$$Register; diff -r bdd155477289 -r e2722a66aba7 src/cpu/zero/vm/assembler_zero.cpp --- a/src/cpu/zero/vm/assembler_zero.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/cpu/zero/vm/assembler_zero.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -50,6 +50,7 @@ #ifdef ASSERT bool AbstractAssembler::pd_check_instruction_mark() { ShouldNotCallThis(); + return false; } #endif @@ -73,6 +74,7 @@ RegisterOrConstant MacroAssembler::delayed_value_impl( intptr_t* delayed_value_addr, Register tmpl, int offset) { ShouldNotCallThis(); + return RegisterOrConstant(); } void MacroAssembler::store_oop(jobject obj) { diff -r bdd155477289 -r e2722a66aba7 src/cpu/zero/vm/cppInterpreter_zero.cpp --- a/src/cpu/zero/vm/cppInterpreter_zero.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/cpu/zero/vm/cppInterpreter_zero.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -1008,6 +1008,7 @@ address CppInterpreter::return_entry(TosState state, int length) { ShouldNotCallThis(); + return NULL; } address CppInterpreter::deopt_entry(TosState state, int length) { diff -r bdd155477289 -r e2722a66aba7 src/cpu/zero/vm/entryFrame_zero.hpp --- a/src/cpu/zero/vm/entryFrame_zero.hpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/cpu/zero/vm/entryFrame_zero.hpp Thu Sep 05 11:04:39 2013 -0700 @@ -58,8 +58,8 @@ JavaCallWrapper* call_wrapper, TRAPS); public: - JavaCallWrapper *call_wrapper() const { - return (JavaCallWrapper *) value_of_word(call_wrapper_off); + JavaCallWrapper **call_wrapper() const { + return (JavaCallWrapper **) addr_of_word(call_wrapper_off); } public: diff -r bdd155477289 -r e2722a66aba7 src/cpu/zero/vm/frame_zero.cpp --- a/src/cpu/zero/vm/frame_zero.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/cpu/zero/vm/frame_zero.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -116,6 +116,7 @@ bool frame::safe_for_sender(JavaThread *thread) { ShouldNotCallThis(); + return false; } void frame::pd_gc_epilog() { @@ -123,6 +124,7 @@ bool frame::is_interpreted_frame_valid(JavaThread *thread) const { ShouldNotCallThis(); + return false; } BasicType frame::interpreter_frame_result(oop* oop_result, @@ -184,9 +186,8 @@ int frame::frame_size(RegisterMap* map) const { #ifdef PRODUCT ShouldNotCallThis(); -#else +#endif // PRODUCT return 0; // make javaVFrame::print_value work -#endif // PRODUCT } intptr_t* frame::interpreter_frame_tos_at(jint offset) const { diff -r bdd155477289 -r e2722a66aba7 src/cpu/zero/vm/frame_zero.inline.hpp --- a/src/cpu/zero/vm/frame_zero.inline.hpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/cpu/zero/vm/frame_zero.inline.hpp Thu Sep 05 11:04:39 2013 -0700 @@ -36,7 +36,7 @@ _deopt_state = unknown; } -inline address frame::sender_pc() const { ShouldNotCallThis(); } +inline address frame::sender_pc() const { ShouldNotCallThis(); return NULL; } inline frame::frame(ZeroFrame* zf, intptr_t* sp) { _zeroframe = zf; @@ -89,6 +89,7 @@ inline intptr_t* frame::link() const { ShouldNotCallThis(); + return NULL; } #ifdef CC_INTERP @@ -141,7 +142,7 @@ return fp(); } -inline JavaCallWrapper* frame::entry_frame_call_wrapper() const { +inline JavaCallWrapper** frame::entry_frame_call_wrapper_addr() const { return zero_entryframe()->call_wrapper(); } @@ -151,14 +152,17 @@ inline oop frame::saved_oop_result(RegisterMap* map) const { ShouldNotCallThis(); + return NULL; } inline bool frame::is_older(intptr_t* id) const { ShouldNotCallThis(); + return false; } inline intptr_t* frame::entry_frame_argument_at(int offset) const { ShouldNotCallThis(); + return NULL; } inline intptr_t* frame::unextended_sp() const { diff -r bdd155477289 -r e2722a66aba7 src/cpu/zero/vm/icBuffer_zero.cpp --- a/src/cpu/zero/vm/icBuffer_zero.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/cpu/zero/vm/icBuffer_zero.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -49,8 +49,10 @@ address InlineCacheBuffer::ic_buffer_entry_point(address code_begin) { // NB ic_stub_code_size() must return the size of the code we generate ShouldNotCallThis(); + return NULL; } void* InlineCacheBuffer::ic_buffer_cached_value(address code_begin) { ShouldNotCallThis(); + return NULL; } diff -r bdd155477289 -r e2722a66aba7 src/cpu/zero/vm/interp_masm_zero.hpp --- a/src/cpu/zero/vm/interp_masm_zero.hpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/cpu/zero/vm/interp_masm_zero.hpp Thu Sep 05 11:04:39 2013 -0700 @@ -40,6 +40,7 @@ Register tmp, int offset) { ShouldNotCallThis(); + return RegisterOrConstant(); } }; diff -r bdd155477289 -r e2722a66aba7 src/cpu/zero/vm/interpreter_zero.cpp --- a/src/cpu/zero/vm/interpreter_zero.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/cpu/zero/vm/interpreter_zero.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -64,6 +64,7 @@ return NULL; Unimplemented(); + return NULL; } address InterpreterGenerator::generate_abstract_entry() { diff -r bdd155477289 -r e2722a66aba7 src/cpu/zero/vm/nativeInst_zero.hpp --- a/src/cpu/zero/vm/nativeInst_zero.hpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/cpu/zero/vm/nativeInst_zero.hpp Thu Sep 05 11:04:39 2013 -0700 @@ -51,15 +51,18 @@ public: bool is_jump() { ShouldNotCallThis(); + return false; } bool is_safepoint_poll() { ShouldNotCallThis(); + return false; } }; inline NativeInstruction* nativeInstruction_at(address address) { ShouldNotCallThis(); + return NULL; } class NativeCall : public NativeInstruction { @@ -70,18 +73,22 @@ address instruction_address() const { ShouldNotCallThis(); + return NULL; } address next_instruction_address() const { ShouldNotCallThis(); + return NULL; } address return_address() const { ShouldNotCallThis(); + return NULL; } address destination() const { ShouldNotCallThis(); + return NULL; } void set_destination_mt_safe(address dest) { @@ -98,25 +105,30 @@ static bool is_call_before(address return_address) { ShouldNotCallThis(); + return false; } }; inline NativeCall* nativeCall_before(address return_address) { ShouldNotCallThis(); + return NULL; } inline NativeCall* nativeCall_at(address address) { ShouldNotCallThis(); + return NULL; } class NativeMovConstReg : public NativeInstruction { public: address next_instruction_address() const { ShouldNotCallThis(); + return NULL; } intptr_t data() const { ShouldNotCallThis(); + return 0; } void set_data(intptr_t x) { @@ -126,12 +138,14 @@ inline NativeMovConstReg* nativeMovConstReg_at(address address) { ShouldNotCallThis(); + return NULL; } class NativeMovRegMem : public NativeInstruction { public: int offset() const { ShouldNotCallThis(); + return 0; } void set_offset(intptr_t x) { @@ -145,6 +159,7 @@ inline NativeMovRegMem* nativeMovRegMem_at(address address) { ShouldNotCallThis(); + return NULL; } class NativeJump : public NativeInstruction { @@ -155,6 +170,7 @@ address jump_destination() const { ShouldNotCallThis(); + return NULL; } void set_jump_destination(address dest) { @@ -172,12 +188,14 @@ inline NativeJump* nativeJump_at(address address) { ShouldNotCallThis(); + return NULL; } class NativeGeneralJump : public NativeInstruction { public: address jump_destination() const { ShouldNotCallThis(); + return NULL; } static void insert_unconditional(address code_pos, address entry) { @@ -191,6 +209,7 @@ inline NativeGeneralJump* nativeGeneralJump_at(address address) { ShouldNotCallThis(); + return NULL; } #endif // CPU_ZERO_VM_NATIVEINST_ZERO_HPP diff -r bdd155477289 -r e2722a66aba7 src/cpu/zero/vm/register_zero.cpp --- a/src/cpu/zero/vm/register_zero.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/cpu/zero/vm/register_zero.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -32,8 +32,10 @@ const char* RegisterImpl::name() const { ShouldNotCallThis(); + return NULL; } const char* FloatRegisterImpl::name() const { ShouldNotCallThis(); + return NULL; } diff -r bdd155477289 -r e2722a66aba7 src/cpu/zero/vm/relocInfo_zero.cpp --- a/src/cpu/zero/vm/relocInfo_zero.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/cpu/zero/vm/relocInfo_zero.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -37,6 +37,7 @@ address Relocation::pd_call_destination(address orig_addr) { ShouldNotCallThis(); + return NULL; } void Relocation::pd_set_call_destination(address x) { @@ -45,6 +46,7 @@ address Relocation::pd_get_address_from_code() { ShouldNotCallThis(); + return NULL; } address* Relocation::pd_address_in_code() { diff -r bdd155477289 -r e2722a66aba7 src/cpu/zero/vm/sharedRuntime_zero.cpp --- a/src/cpu/zero/vm/sharedRuntime_zero.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/cpu/zero/vm/sharedRuntime_zero.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -89,6 +89,7 @@ ret_type); #else ShouldNotCallThis(); + return NULL; #endif // SHARK } @@ -99,6 +100,7 @@ uint SharedRuntime::out_preserve_stack_slots() { ShouldNotCallThis(); + return 0; } JRT_LEAF(void, zero_stub()) @@ -135,4 +137,5 @@ VMRegPair *regs, int total_args_passed) { ShouldNotCallThis(); + return 0; } diff -r bdd155477289 -r e2722a66aba7 src/cpu/zero/vm/stubGenerator_zero.cpp --- a/src/cpu/zero/vm/stubGenerator_zero.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/cpu/zero/vm/stubGenerator_zero.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -176,6 +176,19 @@ StubRoutines::_oop_arraycopy; } + static int SafeFetch32(int *adr, int errValue) { + int value = errValue; + value = *adr; + return value; + } + + static intptr_t SafeFetchN(intptr_t *adr, intptr_t errValue) { + intptr_t value = errValue; + value = *adr; + return value; + } + + void generate_initial() { // Generates all stubs and initializes the entry points @@ -225,6 +238,15 @@ // arraycopy stubs used by compilers generate_arraycopy_stubs(); + + // Safefetch stubs. + StubRoutines::_safefetch32_entry = CAST_FROM_FN_PTR(address, StubGenerator::SafeFetch32); + StubRoutines::_safefetch32_fault_pc = NULL; + StubRoutines::_safefetch32_continuation_pc = NULL; + + StubRoutines::_safefetchN_entry = CAST_FROM_FN_PTR(address, StubGenerator::SafeFetchN); + StubRoutines::_safefetchN_fault_pc = NULL; + StubRoutines::_safefetchN_continuation_pc = NULL; } public: diff -r bdd155477289 -r e2722a66aba7 src/cpu/zero/vm/vtableStubs_zero.cpp --- a/src/cpu/zero/vm/vtableStubs_zero.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/cpu/zero/vm/vtableStubs_zero.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -39,16 +39,20 @@ VtableStub* VtableStubs::create_vtable_stub(int vtable_index) { ShouldNotCallThis(); + return NULL; } VtableStub* VtableStubs::create_itable_stub(int vtable_index) { ShouldNotCallThis(); + return NULL; } int VtableStub::pd_code_size_limit(bool is_vtable_stub) { ShouldNotCallThis(); + return 0; } int VtableStub::pd_code_alignment() { ShouldNotCallThis(); + return 0; } diff -r bdd155477289 -r e2722a66aba7 src/os/bsd/vm/attachListener_bsd.cpp --- a/src/os/bsd/vm/attachListener_bsd.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/os/bsd/vm/attachListener_bsd.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -445,14 +445,14 @@ void AttachListener::vm_start() { char fn[UNIX_PATH_MAX]; - struct stat64 st; + struct stat st; int ret; int n = snprintf(fn, UNIX_PATH_MAX, "%s/.java_pid%d", os::get_temp_directory(), os::current_process_id()); assert(n < (int)UNIX_PATH_MAX, "java_pid file name buffer overflow"); - RESTARTABLE(::stat64(fn, &st), ret); + RESTARTABLE(::stat(fn, &st), ret); if (ret == 0) { ret = ::unlink(fn); if (ret == -1) { diff -r bdd155477289 -r e2722a66aba7 src/os/bsd/vm/os_bsd.cpp --- a/src/os/bsd/vm/os_bsd.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/os/bsd/vm/os_bsd.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -642,13 +642,14 @@ #endif #ifdef __APPLE__ -static uint64_t locate_unique_thread_id() { +static uint64_t locate_unique_thread_id(mach_port_t mach_thread_port) { // Additional thread_id used to correlate threads in SA thread_identifier_info_data_t m_ident_info; mach_msg_type_number_t count = THREAD_IDENTIFIER_INFO_COUNT; - thread_info(::mach_thread_self(), THREAD_IDENTIFIER_INFO, + thread_info(mach_thread_port, THREAD_IDENTIFIER_INFO, (thread_info_t) &m_ident_info, &count); + return m_ident_info.thread_id; } #endif @@ -679,9 +680,14 @@ } #ifdef __APPLE__ - // thread_id is mach thread on macos - osthread->set_thread_id(::mach_thread_self()); - osthread->set_unique_thread_id(locate_unique_thread_id()); + // thread_id is mach thread on macos, which pthreads graciously caches and provides for us + mach_port_t thread_id = ::pthread_mach_thread_np(::pthread_self()); + guarantee(thread_id != 0, "thread id missing from pthreads"); + osthread->set_thread_id(thread_id); + + uint64_t unique_thread_id = locate_unique_thread_id(thread_id); + guarantee(unique_thread_id != 0, "unique thread id was not found"); + osthread->set_unique_thread_id(unique_thread_id); #else // thread_id is pthread_id on BSD osthread->set_thread_id(::pthread_self()); @@ -843,8 +849,14 @@ // Store pthread info into the OSThread #ifdef __APPLE__ - osthread->set_thread_id(::mach_thread_self()); - osthread->set_unique_thread_id(locate_unique_thread_id()); + // thread_id is mach thread on macos, which pthreads graciously caches and provides for us + mach_port_t thread_id = ::pthread_mach_thread_np(::pthread_self()); + guarantee(thread_id != 0, "just checking"); + osthread->set_thread_id(thread_id); + + uint64_t unique_thread_id = locate_unique_thread_id(thread_id); + guarantee(unique_thread_id != 0, "just checking"); + osthread->set_unique_thread_id(unique_thread_id); #else osthread->set_thread_id(::pthread_self()); #endif @@ -1115,7 +1127,7 @@ intx os::current_thread_id() { #ifdef __APPLE__ - return (intx)::mach_thread_self(); + return (intx)::pthread_mach_thread_np(::pthread_self()); #else return (intx)::pthread_self(); #endif @@ -2267,7 +2279,9 @@ } -char* os::reserve_memory_special(size_t bytes, char* req_addr, bool exec) { +char* os::reserve_memory_special(size_t bytes, size_t alignment, char* req_addr, bool exec) { + fatal("This code is not used or maintained."); + // "exec" is passed in but not used. Creating the shared image for // the code cache doesn't have an SHM_X executable permission to check. assert(UseLargePages && UseSHM, "only for SHM large pages"); @@ -3229,11 +3243,15 @@ // and if UserSignalHandler is installed all bets are off if (CheckJNICalls) { if (libjsig_is_loaded) { - tty->print_cr("Info: libjsig is activated, all active signal checking is disabled"); + if (PrintJNIResolving) { + tty->print_cr("Info: libjsig is activated, all active signal checking is disabled"); + } check_signals = false; } if (AllowUserSignalHandlers) { - tty->print_cr("Info: AllowUserSignalHandlers is activated, all active signal checking is disabled"); + if (PrintJNIResolving) { + tty->print_cr("Info: AllowUserSignalHandlers is activated, all active signal checking is disabled"); + } check_signals = false; } } @@ -4692,3 +4710,8 @@ return n; } +#ifndef PRODUCT +void TestReserveMemorySpecial_test() { + // No tests available for this platform +} +#endif diff -r bdd155477289 -r e2722a66aba7 src/os/linux/vm/globals_linux.hpp --- a/src/os/linux/vm/globals_linux.hpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/os/linux/vm/globals_linux.hpp Thu Sep 05 11:04:39 2013 -0700 @@ -40,6 +40,9 @@ product(bool, UseHugeTLBFS, false, \ "Use MAP_HUGETLB for large pages") \ \ + product(bool, UseTransparentHugePages, false, \ + "Use MADV_HUGEPAGE for large pages") \ + \ product(bool, LoadExecStackDllInVMThread, true, \ "Load DLLs with executable-stack attribute in the VM Thread") \ \ diff -r bdd155477289 -r e2722a66aba7 src/os/linux/vm/os_linux.cpp --- a/src/os/linux/vm/os_linux.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/os/linux/vm/os_linux.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -2676,36 +2676,7 @@ int os::Linux::commit_memory_impl(char* addr, size_t size, size_t alignment_hint, bool exec) { - int err; - if (UseHugeTLBFS && alignment_hint > (size_t)vm_page_size()) { - int prot = exec ? PROT_READ|PROT_WRITE|PROT_EXEC : PROT_READ|PROT_WRITE; - uintptr_t res = - (uintptr_t) ::mmap(addr, size, prot, - MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS|MAP_HUGETLB, - -1, 0); - if (res != (uintptr_t) MAP_FAILED) { - if (UseNUMAInterleaving) { - numa_make_global(addr, size); - } - return 0; - } - - err = errno; // save errno from mmap() call above - - if (!recoverable_mmap_error(err)) { - // However, it is not clear that this loss of our reserved mapping - // happens with large pages on Linux or that we cannot recover - // from the loss. For now, we just issue a warning and we don't - // call vm_exit_out_of_memory(). This issue is being tracked by - // JBS-8007074. - warn_fail_commit_memory(addr, size, alignment_hint, exec, err); -// vm_exit_out_of_memory(size, OOM_MMAP_ERROR, -// "committing reserved memory."); - } - // Fall through and try to use small pages - } - - err = os::Linux::commit_memory_impl(addr, size, exec); + int err = os::Linux::commit_memory_impl(addr, size, exec); if (err == 0) { realign_memory(addr, size, alignment_hint); } @@ -2730,7 +2701,7 @@ } void os::pd_realign_memory(char *addr, size_t bytes, size_t alignment_hint) { - if (UseHugeTLBFS && alignment_hint > (size_t)vm_page_size()) { + if (UseTransparentHugePages && alignment_hint > (size_t)vm_page_size()) { // We don't check the return value: madvise(MADV_HUGEPAGE) may not // be supported or the memory may already be backed by huge pages. ::madvise(addr, bytes, MADV_HUGEPAGE); @@ -2743,7 +2714,7 @@ // uncommitted at all. We don't do anything in this case to avoid creating a segment with // small pages on top of the SHM segment. This method always works for small pages, so we // allow that in any case. - if (alignment_hint <= (size_t)os::vm_page_size() || !UseSHM) { + if (alignment_hint <= (size_t)os::vm_page_size() || can_commit_large_page_memory()) { commit_memory(addr, bytes, alignment_hint, !ExecMem); } } @@ -3113,11 +3084,31 @@ return linux_mprotect(addr, size, PROT_READ|PROT_WRITE); } +bool os::Linux::transparent_huge_pages_sanity_check(bool warn, size_t page_size) { + bool result = false; + void *p = mmap(NULL, page_size * 2, PROT_READ|PROT_WRITE, + MAP_ANONYMOUS|MAP_PRIVATE, + -1, 0); + if (p != MAP_FAILED) { + void *aligned_p = align_ptr_up(p, page_size); + + result = madvise(aligned_p, page_size, MADV_HUGEPAGE) == 0; + + munmap(p, page_size * 2); + } + + if (warn && !result) { + warning("TransparentHugePages is not supported by the operating system."); + } + + return result; +} + bool os::Linux::hugetlbfs_sanity_check(bool warn, size_t page_size) { bool result = false; - void *p = mmap (NULL, page_size, PROT_READ|PROT_WRITE, - MAP_ANONYMOUS|MAP_PRIVATE|MAP_HUGETLB, - -1, 0); + void *p = mmap(NULL, page_size, PROT_READ|PROT_WRITE, + MAP_ANONYMOUS|MAP_PRIVATE|MAP_HUGETLB, + -1, 0); if (p != MAP_FAILED) { // We don't know if this really is a huge page or not. @@ -3138,12 +3129,10 @@ } fclose(fp); } - munmap (p, page_size); - if (result) - return true; - } - - if (warn) { + munmap(p, page_size); + } + + if (warn && !result) { warning("HugeTLBFS is not supported by the operating system."); } @@ -3191,82 +3180,114 @@ static size_t _large_page_size = 0; -void os::large_page_init() { - if (!UseLargePages) { - UseHugeTLBFS = false; - UseSHM = false; - return; - } - - if (FLAG_IS_DEFAULT(UseHugeTLBFS) && FLAG_IS_DEFAULT(UseSHM)) { - // If UseLargePages is specified on the command line try both methods, - // if it's default, then try only HugeTLBFS. - if (FLAG_IS_DEFAULT(UseLargePages)) { - UseHugeTLBFS = true; - } else { - UseHugeTLBFS = UseSHM = true; - } - } - - if (LargePageSizeInBytes) { - _large_page_size = LargePageSizeInBytes; - } else { - // large_page_size on Linux is used to round up heap size. x86 uses either - // 2M or 4M page, depending on whether PAE (Physical Address Extensions) - // mode is enabled. AMD64/EM64T uses 2M page in 64bit mode. IA64 can use - // page as large as 256M. - // - // Here we try to figure out page size by parsing /proc/meminfo and looking - // for a line with the following format: - // Hugepagesize: 2048 kB - // - // If we can't determine the value (e.g. /proc is not mounted, or the text - // format has been changed), we'll use the largest page size supported by - // the processor. +size_t os::Linux::find_large_page_size() { + size_t large_page_size = 0; + + // large_page_size on Linux is used to round up heap size. x86 uses either + // 2M or 4M page, depending on whether PAE (Physical Address Extensions) + // mode is enabled. AMD64/EM64T uses 2M page in 64bit mode. IA64 can use + // page as large as 256M. + // + // Here we try to figure out page size by parsing /proc/meminfo and looking + // for a line with the following format: + // Hugepagesize: 2048 kB + // + // If we can't determine the value (e.g. /proc is not mounted, or the text + // format has been changed), we'll use the largest page size supported by + // the processor. #ifndef ZERO - _large_page_size = IA32_ONLY(4 * M) AMD64_ONLY(2 * M) IA64_ONLY(256 * M) SPARC_ONLY(4 * M) - ARM_ONLY(2 * M) PPC_ONLY(4 * M); + large_page_size = IA32_ONLY(4 * M) AMD64_ONLY(2 * M) IA64_ONLY(256 * M) SPARC_ONLY(4 * M) + ARM_ONLY(2 * M) PPC_ONLY(4 * M); #endif // ZERO - FILE *fp = fopen("/proc/meminfo", "r"); - if (fp) { - while (!feof(fp)) { - int x = 0; - char buf[16]; - if (fscanf(fp, "Hugepagesize: %d", &x) == 1) { - if (x && fgets(buf, sizeof(buf), fp) && strcmp(buf, " kB\n") == 0) { - _large_page_size = x * K; - break; - } - } else { - // skip to next line - for (;;) { - int ch = fgetc(fp); - if (ch == EOF || ch == (int)'\n') break; - } + FILE *fp = fopen("/proc/meminfo", "r"); + if (fp) { + while (!feof(fp)) { + int x = 0; + char buf[16]; + if (fscanf(fp, "Hugepagesize: %d", &x) == 1) { + if (x && fgets(buf, sizeof(buf), fp) && strcmp(buf, " kB\n") == 0) { + large_page_size = x * K; + break; + } + } else { + // skip to next line + for (;;) { + int ch = fgetc(fp); + if (ch == EOF || ch == (int)'\n') break; } } - fclose(fp); } - } - - // print a warning if any large page related flag is specified on command line - bool warn_on_failure = !FLAG_IS_DEFAULT(UseHugeTLBFS); - + fclose(fp); + } + + if (!FLAG_IS_DEFAULT(LargePageSizeInBytes) && LargePageSizeInBytes != large_page_size) { + warning("Setting LargePageSizeInBytes has no effect on this OS. Large page size is " + SIZE_FORMAT "%s.", byte_size_in_proper_unit(large_page_size), + proper_unit_for_byte_size(large_page_size)); + } + + return large_page_size; +} + +size_t os::Linux::setup_large_page_size() { + _large_page_size = Linux::find_large_page_size(); const size_t default_page_size = (size_t)Linux::page_size(); if (_large_page_size > default_page_size) { _page_sizes[0] = _large_page_size; _page_sizes[1] = default_page_size; _page_sizes[2] = 0; } - UseHugeTLBFS = UseHugeTLBFS && - Linux::hugetlbfs_sanity_check(warn_on_failure, _large_page_size); - - if (UseHugeTLBFS) + + return _large_page_size; +} + +bool os::Linux::setup_large_page_type(size_t page_size) { + if (FLAG_IS_DEFAULT(UseHugeTLBFS) && + FLAG_IS_DEFAULT(UseSHM) && + FLAG_IS_DEFAULT(UseTransparentHugePages)) { + // If UseLargePages is specified on the command line try all methods, + // if it's default, then try only UseTransparentHugePages. + if (FLAG_IS_DEFAULT(UseLargePages)) { + UseTransparentHugePages = true; + } else { + UseHugeTLBFS = UseTransparentHugePages = UseSHM = true; + } + } + + if (UseTransparentHugePages) { + bool warn_on_failure = !FLAG_IS_DEFAULT(UseTransparentHugePages); + if (transparent_huge_pages_sanity_check(warn_on_failure, page_size)) { + UseHugeTLBFS = false; + UseSHM = false; + return true; + } + UseTransparentHugePages = false; + } + + if (UseHugeTLBFS) { + bool warn_on_failure = !FLAG_IS_DEFAULT(UseHugeTLBFS); + if (hugetlbfs_sanity_check(warn_on_failure, page_size)) { + UseSHM = false; + return true; + } + UseHugeTLBFS = false; + } + + return UseSHM; +} + +void os::large_page_init() { + if (!UseLargePages) { + UseHugeTLBFS = false; + UseTransparentHugePages = false; UseSHM = false; - - UseLargePages = UseHugeTLBFS || UseSHM; + return; + } + + size_t large_page_size = Linux::setup_large_page_size(); + UseLargePages = Linux::setup_large_page_type(large_page_size); set_coredump_filter(); } @@ -3275,16 +3296,22 @@ #define SHM_HUGETLB 04000 #endif -char* os::reserve_memory_special(size_t bytes, char* req_addr, bool exec) { +char* os::Linux::reserve_memory_special_shm(size_t bytes, size_t alignment, char* req_addr, bool exec) { // "exec" is passed in but not used. Creating the shared image for // the code cache doesn't have an SHM_X executable permission to check. assert(UseLargePages && UseSHM, "only for SHM large pages"); + assert(is_ptr_aligned(req_addr, os::large_page_size()), "Unaligned address"); + + if (!is_size_aligned(bytes, os::large_page_size()) || alignment > os::large_page_size()) { + return NULL; // Fallback to small pages. + } key_t key = IPC_PRIVATE; char *addr; bool warn_on_failure = UseLargePages && (!FLAG_IS_DEFAULT(UseLargePages) || + !FLAG_IS_DEFAULT(UseSHM) || !FLAG_IS_DEFAULT(LargePageSizeInBytes) ); char msg[128]; @@ -3332,42 +3359,219 @@ return NULL; } - if ((addr != NULL) && UseNUMAInterleaving) { - numa_make_global(addr, bytes); - } - - // The memory is committed - MemTracker::record_virtual_memory_reserve_and_commit((address)addr, bytes, mtNone, CALLER_PC); + return addr; +} + +static void warn_on_large_pages_failure(char* req_addr, size_t bytes, int error) { + assert(error == ENOMEM, "Only expect to fail if no memory is available"); + + bool warn_on_failure = UseLargePages && + (!FLAG_IS_DEFAULT(UseLargePages) || + !FLAG_IS_DEFAULT(UseHugeTLBFS) || + !FLAG_IS_DEFAULT(LargePageSizeInBytes)); + + if (warn_on_failure) { + char msg[128]; + jio_snprintf(msg, sizeof(msg), "Failed to reserve large pages memory req_addr: " + PTR_FORMAT " bytes: " SIZE_FORMAT " (errno = %d).", req_addr, bytes, error); + warning(msg); + } +} + +char* os::Linux::reserve_memory_special_huge_tlbfs_only(size_t bytes, char* req_addr, bool exec) { + assert(UseLargePages && UseHugeTLBFS, "only for Huge TLBFS large pages"); + assert(is_size_aligned(bytes, os::large_page_size()), "Unaligned size"); + assert(is_ptr_aligned(req_addr, os::large_page_size()), "Unaligned address"); + + int prot = exec ? PROT_READ|PROT_WRITE|PROT_EXEC : PROT_READ|PROT_WRITE; + char* addr = (char*)::mmap(req_addr, bytes, prot, + MAP_PRIVATE|MAP_ANONYMOUS|MAP_HUGETLB, + -1, 0); + + if (addr == MAP_FAILED) { + warn_on_large_pages_failure(req_addr, bytes, errno); + return NULL; + } + + assert(is_ptr_aligned(addr, os::large_page_size()), "Must be"); return addr; } +char* os::Linux::reserve_memory_special_huge_tlbfs_mixed(size_t bytes, size_t alignment, char* req_addr, bool exec) { + size_t large_page_size = os::large_page_size(); + + assert(bytes >= large_page_size, "Shouldn't allocate large pages for small sizes"); + + // Allocate small pages. + + char* start; + if (req_addr != NULL) { + assert(is_ptr_aligned(req_addr, alignment), "Must be"); + assert(is_size_aligned(bytes, alignment), "Must be"); + start = os::reserve_memory(bytes, req_addr); + assert(start == NULL || start == req_addr, "Must be"); + } else { + start = os::reserve_memory_aligned(bytes, alignment); + } + + if (start == NULL) { + return NULL; + } + + assert(is_ptr_aligned(start, alignment), "Must be"); + + // os::reserve_memory_special will record this memory area. + // Need to release it here to prevent overlapping reservations. + MemTracker::record_virtual_memory_release((address)start, bytes); + + char* end = start + bytes; + + // Find the regions of the allocated chunk that can be promoted to large pages. + char* lp_start = (char*)align_ptr_up(start, large_page_size); + char* lp_end = (char*)align_ptr_down(end, large_page_size); + + size_t lp_bytes = lp_end - lp_start; + + assert(is_size_aligned(lp_bytes, large_page_size), "Must be"); + + if (lp_bytes == 0) { + // The mapped region doesn't even span the start and the end of a large page. + // Fall back to allocate a non-special area. + ::munmap(start, end - start); + return NULL; + } + + int prot = exec ? PROT_READ|PROT_WRITE|PROT_EXEC : PROT_READ|PROT_WRITE; + + + void* result; + + if (start != lp_start) { + result = ::mmap(start, lp_start - start, prot, + MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, + -1, 0); + if (result == MAP_FAILED) { + ::munmap(lp_start, end - lp_start); + return NULL; + } + } + + result = ::mmap(lp_start, lp_bytes, prot, + MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED|MAP_HUGETLB, + -1, 0); + if (result == MAP_FAILED) { + warn_on_large_pages_failure(req_addr, bytes, errno); + // If the mmap above fails, the large pages region will be unmapped and we + // have regions before and after with small pages. Release these regions. + // + // | mapped | unmapped | mapped | + // ^ ^ ^ ^ + // start lp_start lp_end end + // + ::munmap(start, lp_start - start); + ::munmap(lp_end, end - lp_end); + return NULL; + } + + if (lp_end != end) { + result = ::mmap(lp_end, end - lp_end, prot, + MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, + -1, 0); + if (result == MAP_FAILED) { + ::munmap(start, lp_end - start); + return NULL; + } + } + + return start; +} + +char* os::Linux::reserve_memory_special_huge_tlbfs(size_t bytes, size_t alignment, char* req_addr, bool exec) { + assert(UseLargePages && UseHugeTLBFS, "only for Huge TLBFS large pages"); + assert(is_ptr_aligned(req_addr, alignment), "Must be"); + assert(is_power_of_2(alignment), "Must be"); + assert(is_power_of_2(os::large_page_size()), "Must be"); + assert(bytes >= os::large_page_size(), "Shouldn't allocate large pages for small sizes"); + + if (is_size_aligned(bytes, os::large_page_size()) && alignment <= os::large_page_size()) { + return reserve_memory_special_huge_tlbfs_only(bytes, req_addr, exec); + } else { + return reserve_memory_special_huge_tlbfs_mixed(bytes, alignment, req_addr, exec); + } +} + +char* os::reserve_memory_special(size_t bytes, size_t alignment, char* req_addr, bool exec) { + assert(UseLargePages, "only for large pages"); + + char* addr; + if (UseSHM) { + addr = os::Linux::reserve_memory_special_shm(bytes, alignment, req_addr, exec); + } else { + assert(UseHugeTLBFS, "must be"); + addr = os::Linux::reserve_memory_special_huge_tlbfs(bytes, alignment, req_addr, exec); + } + + if (addr != NULL) { + if (UseNUMAInterleaving) { + numa_make_global(addr, bytes); + } + + // The memory is committed + MemTracker::record_virtual_memory_reserve_and_commit((address)addr, bytes, mtNone, CALLER_PC); + } + + return addr; +} + +bool os::Linux::release_memory_special_shm(char* base, size_t bytes) { + // detaching the SHM segment will also delete it, see reserve_memory_special_shm() + return shmdt(base) == 0; +} + +bool os::Linux::release_memory_special_huge_tlbfs(char* base, size_t bytes) { + return pd_release_memory(base, bytes); +} + bool os::release_memory_special(char* base, size_t bytes) { + assert(UseLargePages, "only for large pages"); + MemTracker::Tracker tkr = MemTracker::get_virtual_memory_release_tracker(); - // detaching the SHM segment will also delete it, see reserve_memory_special() - int rslt = shmdt(base); - if (rslt == 0) { + + bool res; + if (UseSHM) { + res = os::Linux::release_memory_special_shm(base, bytes); + } else { + assert(UseHugeTLBFS, "must be"); + res = os::Linux::release_memory_special_huge_tlbfs(base, bytes); + } + + if (res) { tkr.record((address)base, bytes); - return true; } else { tkr.discard(); - return false; - } + } + + return res; } size_t os::large_page_size() { return _large_page_size; } -// HugeTLBFS allows application to commit large page memory on demand; -// with SysV SHM the entire memory region must be allocated as shared +// With SysV SHM the entire memory region must be allocated as shared // memory. +// HugeTLBFS allows application to commit large page memory on demand. +// However, when committing memory with HugeTLBFS fails, the region +// that was supposed to be committed will lose the old reservation +// and allow other threads to steal that memory region. Because of this +// behavior we can't commit HugeTLBFS memory. bool os::can_commit_large_page_memory() { - return UseHugeTLBFS; + return UseTransparentHugePages; } bool os::can_execute_large_page_memory() { - return UseHugeTLBFS; + return UseTransparentHugePages || UseHugeTLBFS; } // Reserve memory at an arbitrary address, only if that area is @@ -4521,21 +4725,23 @@ UseNUMA = false; } } - // With SHM large pages we cannot uncommit a page, so there's not way + // With SHM and HugeTLBFS large pages we cannot uncommit a page, so there's no way // we can make the adaptive lgrp chunk resizing work. If the user specified - // both UseNUMA and UseLargePages (or UseSHM) on the command line - warn and + // both UseNUMA and UseLargePages (or UseSHM/UseHugeTLBFS) on the command line - warn and // disable adaptive resizing. - if (UseNUMA && UseLargePages && UseSHM) { - if (!FLAG_IS_DEFAULT(UseNUMA)) { - if (FLAG_IS_DEFAULT(UseLargePages) && FLAG_IS_DEFAULT(UseSHM)) { + if (UseNUMA && UseLargePages && !can_commit_large_page_memory()) { + if (FLAG_IS_DEFAULT(UseNUMA)) { + UseNUMA = false; + } else { + if (FLAG_IS_DEFAULT(UseLargePages) && + FLAG_IS_DEFAULT(UseSHM) && + FLAG_IS_DEFAULT(UseHugeTLBFS)) { UseLargePages = false; } else { - warning("UseNUMA is not fully compatible with SHM large pages, disabling adaptive resizing"); + warning("UseNUMA is not fully compatible with SHM/HugeTLBFS large pages, disabling adaptive resizing"); UseAdaptiveSizePolicy = false; UseAdaptiveNUMAChunkSizing = false; } - } else { - UseNUMA = false; } } if (!UseNUMA && ForceNUMA) { @@ -5805,3 +6011,149 @@ } #endif // JAVASE_EMBEDDED + + +/////////////// Unit tests /////////////// + +#ifndef PRODUCT + +#define test_log(...) \ + do {\ + if (VerboseInternalVMTests) { \ + tty->print_cr(__VA_ARGS__); \ + tty->flush(); \ + }\ + } while (false) + +class TestReserveMemorySpecial : AllStatic { + public: + static void small_page_write(void* addr, size_t size) { + size_t page_size = os::vm_page_size(); + + char* end = (char*)addr + size; + for (char* p = (char*)addr; p < end; p += page_size) { + *p = 1; + } + } + + static void test_reserve_memory_special_huge_tlbfs_only(size_t size) { + if (!UseHugeTLBFS) { + return; + } + + test_log("test_reserve_memory_special_huge_tlbfs_only(" SIZE_FORMAT ")", size); + + char* addr = os::Linux::reserve_memory_special_huge_tlbfs_only(size, NULL, false); + + if (addr != NULL) { + small_page_write(addr, size); + + os::Linux::release_memory_special_huge_tlbfs(addr, size); + } + } + + static void test_reserve_memory_special_huge_tlbfs_only() { + if (!UseHugeTLBFS) { + return; + } + + size_t lp = os::large_page_size(); + + for (size_t size = lp; size <= lp * 10; size += lp) { + test_reserve_memory_special_huge_tlbfs_only(size); + } + } + + static void test_reserve_memory_special_huge_tlbfs_mixed(size_t size, size_t alignment) { + if (!UseHugeTLBFS) { + return; + } + + test_log("test_reserve_memory_special_huge_tlbfs_mixed(" SIZE_FORMAT ", " SIZE_FORMAT ")", + size, alignment); + + assert(size >= os::large_page_size(), "Incorrect input to test"); + + char* addr = os::Linux::reserve_memory_special_huge_tlbfs_mixed(size, alignment, NULL, false); + + if (addr != NULL) { + small_page_write(addr, size); + + os::Linux::release_memory_special_huge_tlbfs(addr, size); + } + } + + static void test_reserve_memory_special_huge_tlbfs_mixed_all_alignments(size_t size) { + size_t lp = os::large_page_size(); + size_t ag = os::vm_allocation_granularity(); + + for (size_t alignment = ag; is_size_aligned(size, alignment); alignment *= 2) { + test_reserve_memory_special_huge_tlbfs_mixed(size, alignment); + } + } + + static void test_reserve_memory_special_huge_tlbfs_mixed() { + size_t lp = os::large_page_size(); + size_t ag = os::vm_allocation_granularity(); + + test_reserve_memory_special_huge_tlbfs_mixed_all_alignments(lp); + test_reserve_memory_special_huge_tlbfs_mixed_all_alignments(lp + ag); + test_reserve_memory_special_huge_tlbfs_mixed_all_alignments(lp + lp / 2); + test_reserve_memory_special_huge_tlbfs_mixed_all_alignments(lp * 2); + test_reserve_memory_special_huge_tlbfs_mixed_all_alignments(lp * 2 + ag); + test_reserve_memory_special_huge_tlbfs_mixed_all_alignments(lp * 2 - ag); + test_reserve_memory_special_huge_tlbfs_mixed_all_alignments(lp * 2 + lp / 2); + test_reserve_memory_special_huge_tlbfs_mixed_all_alignments(lp * 10); + test_reserve_memory_special_huge_tlbfs_mixed_all_alignments(lp * 10 + lp / 2); + } + + static void test_reserve_memory_special_huge_tlbfs() { + if (!UseHugeTLBFS) { + return; + } + + test_reserve_memory_special_huge_tlbfs_only(); + test_reserve_memory_special_huge_tlbfs_mixed(); + } + + static void test_reserve_memory_special_shm(size_t size, size_t alignment) { + if (!UseSHM) { + return; + } + + test_log("test_reserve_memory_special_shm(" SIZE_FORMAT ", " SIZE_FORMAT ")", size, alignment); + + char* addr = os::Linux::reserve_memory_special_shm(size, alignment, NULL, false); + + if (addr != NULL) { + assert(is_ptr_aligned(addr, alignment), "Check"); + assert(is_ptr_aligned(addr, os::large_page_size()), "Check"); + + small_page_write(addr, size); + + os::Linux::release_memory_special_shm(addr, size); + } + } + + static void test_reserve_memory_special_shm() { + size_t lp = os::large_page_size(); + size_t ag = os::vm_allocation_granularity(); + + for (size_t size = ag; size < lp * 3; size += ag) { + for (size_t alignment = ag; is_size_aligned(size, alignment); alignment *= 2) { + test_reserve_memory_special_shm(size, alignment); + } + } + } + + static void test() { + test_reserve_memory_special_huge_tlbfs(); + test_reserve_memory_special_shm(); + } +}; + +void TestReserveMemorySpecial_test() { + TestReserveMemorySpecial::test(); +} + +#endif diff -r bdd155477289 -r e2722a66aba7 src/os/linux/vm/os_linux.hpp --- a/src/os/linux/vm/os_linux.hpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/os/linux/vm/os_linux.hpp Thu Sep 05 11:04:39 2013 -0700 @@ -32,6 +32,7 @@ class Linux { friend class os; + friend class TestReserveMemorySpecial; // For signal-chaining #define MAXSIGNUM 32 @@ -92,8 +93,21 @@ static void rebuild_cpu_to_node_map(); static GrowableArray* cpu_to_node() { return _cpu_to_node; } + static size_t find_large_page_size(); + static size_t setup_large_page_size(); + + static bool setup_large_page_type(size_t page_size); + static bool transparent_huge_pages_sanity_check(bool warn, size_t pages_size); static bool hugetlbfs_sanity_check(bool warn, size_t page_size); + static char* reserve_memory_special_shm(size_t bytes, size_t alignment, char* req_addr, bool exec); + static char* reserve_memory_special_huge_tlbfs(size_t bytes, size_t alignment, char* req_addr, bool exec); + static char* reserve_memory_special_huge_tlbfs_only(size_t bytes, char* req_addr, bool exec); + static char* reserve_memory_special_huge_tlbfs_mixed(size_t bytes, size_t alignment, char* req_addr, bool exec); + + static bool release_memory_special_shm(char* base, size_t bytes); + static bool release_memory_special_huge_tlbfs(char* base, size_t bytes); + static void print_full_memory_info(outputStream* st); static void print_distro_info(outputStream* st); static void print_libversion_info(outputStream* st); diff -r bdd155477289 -r e2722a66aba7 src/os/posix/vm/os_posix.cpp --- a/src/os/posix/vm/os_posix.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/os/posix/vm/os_posix.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved. +* Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -165,7 +165,7 @@ else st->print("%uk", rlim.rlim_cur >> 10); // Isn't there on solaris -#if! defined(TARGET_OS_FAMILY_solaris) && !defined(TARGET_OS_FAMILY_aix) +#if !defined(TARGET_OS_FAMILY_solaris) && !defined(TARGET_OS_FAMILY_aix) st->print(", NPROC "); getrlimit(RLIMIT_NPROC, &rlim); if (rlim.rlim_cur == RLIM_INFINITY) st->print("infinity"); @@ -268,6 +268,54 @@ return ::fdopen(fd, mode); } +void* os::get_default_process_handle() { + return (void*)::dlopen(NULL, RTLD_LAZY); +} + +// Builds a platform dependent Agent_OnLoad_ function name +// which is used to find statically linked in agents. +// Parameters: +// sym_name: Symbol in library we are looking for +// lib_name: Name of library to look in, NULL for shared libs. +// is_absolute_path == true if lib_name is absolute path to agent +// such as "/a/b/libL.so" +// == false if only the base name of the library is passed in +// such as "L" +char* os::build_agent_function_name(const char *sym_name, const char *lib_name, + bool is_absolute_path) { + char *agent_entry_name; + size_t len; + size_t name_len; + size_t prefix_len = strlen(JNI_LIB_PREFIX); + size_t suffix_len = strlen(JNI_LIB_SUFFIX); + const char *start; + + if (lib_name != NULL) { + len = name_len = strlen(lib_name); + if (is_absolute_path) { + // Need to strip path, prefix and suffix + if ((start = strrchr(lib_name, *os::file_separator())) != NULL) { + lib_name = ++start; + } + if (len <= (prefix_len + suffix_len)) { + return NULL; + } + lib_name += prefix_len; + name_len = strlen(lib_name) - suffix_len; + } + } + len = (lib_name != NULL ? name_len : 0) + strlen(sym_name) + 2; + agent_entry_name = NEW_C_HEAP_ARRAY_RETURN_NULL(char, len, mtThread); + if (agent_entry_name == NULL) { + return NULL; + } + strcpy(agent_entry_name, sym_name); + if (lib_name != NULL) { + strcat(agent_entry_name, "_"); + strncat(agent_entry_name, lib_name, name_len); + } + return agent_entry_name; +} // Returned string is a constant. For unknown signals "UNKNOWN" is returned. const char* os::Posix::get_signal_name(int sig, char* out, size_t outlen) { diff -r bdd155477289 -r e2722a66aba7 src/os/solaris/vm/os_solaris.cpp --- a/src/os/solaris/vm/os_solaris.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/os/solaris/vm/os_solaris.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -3341,7 +3341,7 @@ return true; } -char* os::reserve_memory_special(size_t size, char* addr, bool exec) { +char* os::reserve_memory_special(size_t size, size_t alignment, char* addr, bool exec) { fatal("os::reserve_memory_special should not be called on Solaris."); return NULL; } @@ -6557,3 +6557,9 @@ return strlen(buffer); } + +#ifndef PRODUCT +void TestReserveMemorySpecial_test() { + // No tests available for this platform +} +#endif diff -r bdd155477289 -r e2722a66aba7 src/os/windows/vm/os_windows.cpp --- a/src/os/windows/vm/os_windows.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/os/windows/vm/os_windows.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -1642,6 +1642,8 @@ void os::win32::print_windows_version(outputStream* st) { OSVERSIONINFOEX osvi; + SYSTEM_INFO si; + ZeroMemory(&osvi, sizeof(OSVERSIONINFOEX)); osvi.dwOSVersionInfoSize = sizeof(OSVERSIONINFOEX); @@ -1651,6 +1653,18 @@ } int os_vers = osvi.dwMajorVersion * 1000 + osvi.dwMinorVersion; + + ZeroMemory(&si, sizeof(SYSTEM_INFO)); + if (os_vers >= 5002) { + // Retrieve SYSTEM_INFO from GetNativeSystemInfo call so that we could + // find out whether we are running on 64 bit processor or not. + if (os::Kernel32Dll::GetNativeSystemInfoAvailable()) { + os::Kernel32Dll::GetNativeSystemInfo(&si); + } else { + GetSystemInfo(&si); + } + } + if (osvi.dwPlatformId == VER_PLATFORM_WIN32_NT) { switch (os_vers) { case 3051: st->print(" Windows NT 3.51"); break; @@ -1658,57 +1672,48 @@ case 5000: st->print(" Windows 2000"); break; case 5001: st->print(" Windows XP"); break; case 5002: - case 6000: - case 6001: - case 6002: { - // Retrieve SYSTEM_INFO from GetNativeSystemInfo call so that we could - // find out whether we are running on 64 bit processor or not. - SYSTEM_INFO si; - ZeroMemory(&si, sizeof(SYSTEM_INFO)); - if (!os::Kernel32Dll::GetNativeSystemInfoAvailable()){ - GetSystemInfo(&si); + if (osvi.wProductType == VER_NT_WORKSTATION && + si.wProcessorArchitecture == PROCESSOR_ARCHITECTURE_AMD64) { + st->print(" Windows XP x64 Edition"); } else { - os::Kernel32Dll::GetNativeSystemInfo(&si); + st->print(" Windows Server 2003 family"); } - if (os_vers == 5002) { - if (osvi.wProductType == VER_NT_WORKSTATION && - si.wProcessorArchitecture == PROCESSOR_ARCHITECTURE_AMD64) - st->print(" Windows XP x64 Edition"); - else - st->print(" Windows Server 2003 family"); - } else if (os_vers == 6000) { - if (osvi.wProductType == VER_NT_WORKSTATION) - st->print(" Windows Vista"); - else - st->print(" Windows Server 2008"); - if (si.wProcessorArchitecture == PROCESSOR_ARCHITECTURE_AMD64) - st->print(" , 64 bit"); - } else if (os_vers == 6001) { - if (osvi.wProductType == VER_NT_WORKSTATION) { - st->print(" Windows 7"); - } else { - // Unrecognized windows, print out its major and minor versions - st->print(" Windows NT %d.%d", osvi.dwMajorVersion, osvi.dwMinorVersion); - } - if (si.wProcessorArchitecture == PROCESSOR_ARCHITECTURE_AMD64) - st->print(" , 64 bit"); - } else if (os_vers == 6002) { - if (osvi.wProductType == VER_NT_WORKSTATION) { - st->print(" Windows 8"); - } else { - st->print(" Windows Server 2012"); - } - if (si.wProcessorArchitecture == PROCESSOR_ARCHITECTURE_AMD64) - st->print(" , 64 bit"); - } else { // future os - // Unrecognized windows, print out its major and minor versions - st->print(" Windows NT %d.%d", osvi.dwMajorVersion, osvi.dwMinorVersion); - if (si.wProcessorArchitecture == PROCESSOR_ARCHITECTURE_AMD64) - st->print(" , 64 bit"); + break; + + case 6000: + if (osvi.wProductType == VER_NT_WORKSTATION) { + st->print(" Windows Vista"); + } else { + st->print(" Windows Server 2008"); } break; - } - default: // future windows, print out its major and minor versions + + case 6001: + if (osvi.wProductType == VER_NT_WORKSTATION) { + st->print(" Windows 7"); + } else { + st->print(" Windows Server 2008 R2"); + } + break; + + case 6002: + if (osvi.wProductType == VER_NT_WORKSTATION) { + st->print(" Windows 8"); + } else { + st->print(" Windows Server 2012"); + } + break; + + case 6003: + if (osvi.wProductType == VER_NT_WORKSTATION) { + st->print(" Windows 8.1"); + } else { + st->print(" Windows Server 2012 R2"); + } + break; + + default: // future os + // Unrecognized windows, print out its major and minor versions st->print(" Windows NT %d.%d", osvi.dwMajorVersion, osvi.dwMinorVersion); } } else { @@ -1720,6 +1725,11 @@ st->print(" Windows %d.%d", osvi.dwMajorVersion, osvi.dwMinorVersion); } } + + if (os_vers >= 6000 && si.wProcessorArchitecture == PROCESSOR_ARCHITECTURE_AMD64) { + st->print(" , 64 bit"); + } + st->print(" Build %d", osvi.dwBuildNumber); st->print(" %s", osvi.szCSDVersion); // service pack st->cr(); @@ -3146,7 +3156,12 @@ return true; } -char* os::reserve_memory_special(size_t bytes, char* addr, bool exec) { +char* os::reserve_memory_special(size_t bytes, size_t alignment, char* addr, bool exec) { + assert(UseLargePages, "only for large pages"); + + if (!is_size_aligned(bytes, os::large_page_size()) || alignment > os::large_page_size()) { + return NULL; // Fallback to small pages. + } const DWORD prot = exec ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE; const DWORD flags = MEM_RESERVE | MEM_COMMIT | MEM_LARGE_PAGES; @@ -5384,6 +5399,75 @@ return true; } +void* os::get_default_process_handle() { + return (void*)GetModuleHandle(NULL); +} + +// Builds a platform dependent Agent_OnLoad_ function name +// which is used to find statically linked in agents. +// Additionally for windows, takes into account __stdcall names. +// Parameters: +// sym_name: Symbol in library we are looking for +// lib_name: Name of library to look in, NULL for shared libs. +// is_absolute_path == true if lib_name is absolute path to agent +// such as "C:/a/b/L.dll" +// == false if only the base name of the library is passed in +// such as "L" +char* os::build_agent_function_name(const char *sym_name, const char *lib_name, + bool is_absolute_path) { + char *agent_entry_name; + size_t len; + size_t name_len; + size_t prefix_len = strlen(JNI_LIB_PREFIX); + size_t suffix_len = strlen(JNI_LIB_SUFFIX); + const char *start; + + if (lib_name != NULL) { + len = name_len = strlen(lib_name); + if (is_absolute_path) { + // Need to strip path, prefix and suffix + if ((start = strrchr(lib_name, *os::file_separator())) != NULL) { + lib_name = ++start; + } else { + // Need to check for C: + if ((start = strchr(lib_name, ':')) != NULL) { + lib_name = ++start; + } + } + if (len <= (prefix_len + suffix_len)) { + return NULL; + } + lib_name += prefix_len; + name_len = strlen(lib_name) - suffix_len; + } + } + len = (lib_name != NULL ? name_len : 0) + strlen(sym_name) + 2; + agent_entry_name = NEW_C_HEAP_ARRAY_RETURN_NULL(char, len, mtThread); + if (agent_entry_name == NULL) { + return NULL; + } + if (lib_name != NULL) { + const char *p = strrchr(sym_name, '@'); + if (p != NULL && p != sym_name) { + // sym_name == _Agent_OnLoad@XX + strncpy(agent_entry_name, sym_name, (p - sym_name)); + agent_entry_name[(p-sym_name)] = '\0'; + // agent_entry_name == _Agent_OnLoad + strcat(agent_entry_name, "_"); + strncat(agent_entry_name, lib_name, name_len); + strcat(agent_entry_name, p); + // agent_entry_name == _Agent_OnLoad_lib_name@XX + } else { + strcpy(agent_entry_name, sym_name); + strcat(agent_entry_name, "_"); + strncat(agent_entry_name, lib_name, name_len); + } + } else { + strcpy(agent_entry_name, sym_name); + } + return agent_entry_name; +} + #else // Kernel32 API typedef BOOL (WINAPI* SwitchToThread_Fn)(void); @@ -5628,3 +5712,9 @@ } #endif + +#ifndef PRODUCT +void TestReserveMemorySpecial_test() { + // No tests available for this platform +} +#endif diff -r bdd155477289 -r e2722a66aba7 src/os_cpu/bsd_x86/vm/bsd_x86_32.ad --- a/src/os_cpu/bsd_x86/vm/bsd_x86_32.ad Thu Aug 22 09:39:54 2013 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,26 +0,0 @@ -// -// Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved. -// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -// -// This code is free software; you can redistribute it and/or modify it -// under the terms of the GNU General Public License version 2 only, as -// published by the Free Software Foundation. -// -// This code is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -// version 2 for more details (a copy is included in the LICENSE file that -// accompanied this code). -// -// You should have received a copy of the GNU General Public License version -// 2 along with this work; if not, write to the Free Software Foundation, -// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -// -// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -// or visit www.oracle.com if you need additional information or have any -// questions. -// -// - -// X86 Bsd Architecture Description File - diff -r bdd155477289 -r e2722a66aba7 src/os_cpu/bsd_x86/vm/bsd_x86_64.ad --- a/src/os_cpu/bsd_x86/vm/bsd_x86_64.ad Thu Aug 22 09:39:54 2013 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,65 +0,0 @@ -// -// Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. -// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -// -// This code is free software; you can redistribute it and/or modify it -// under the terms of the GNU General Public License version 2 only, as -// published by the Free Software Foundation. -// -// This code is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -// version 2 for more details (a copy is included in the LICENSE file that -// accompanied this code). -// -// You should have received a copy of the GNU General Public License version -// 2 along with this work; if not, write to the Free Software Foundation, -// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -// -// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -// or visit www.oracle.com if you need additional information or have any -// questions. -// -// - -// AMD64 Bsd Architecture Description File - -//----------OS-DEPENDENT ENCODING BLOCK---------------------------------------- -// This block specifies the encoding classes used by the compiler to -// output byte streams. Encoding classes generate functions which are -// called by Machine Instruction Nodes in order to generate the bit -// encoding of the instruction. Operands specify their base encoding -// interface with the interface keyword. There are currently -// supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, & -// COND_INTER. REG_INTER causes an operand to generate a function -// which returns its register number when queried. CONST_INTER causes -// an operand to generate a function which returns the value of the -// constant when queried. MEMORY_INTER causes an operand to generate -// four functions which return the Base Register, the Index Register, -// the Scale Value, and the Offset Value of the operand when queried. -// COND_INTER causes an operand to generate six functions which return -// the encoding code (ie - encoding bits for the instruction) -// associated with each basic boolean condition for a conditional -// instruction. Instructions specify two basic values for encoding. -// They use the ins_encode keyword to specify their encoding class -// (which must be one of the class names specified in the encoding -// block), and they use the opcode keyword to specify, in order, their -// primary, secondary, and tertiary opcode. Only the opcode sections -// which a particular instruction needs for encoding need to be -// specified. -encode %{ - // Build emit functions for each basic byte or larger field in the intel - // encoding scheme (opcode, rm, sib, immediate), and call them from C++ - // code in the enc_class source block. Emit functions will live in the - // main source block for now. In future, we can generalize this by - // adding a syntax that specifies the sizes of fields in an order, - // so that the adlc can build the emit functions automagically - -%} - - -// Platform dependent source - -source %{ - -%} diff -r bdd155477289 -r e2722a66aba7 src/os_cpu/bsd_x86/vm/orderAccess_bsd_x86.inline.hpp --- a/src/os_cpu/bsd_x86/vm/orderAccess_bsd_x86.inline.hpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/os_cpu/bsd_x86/vm/orderAccess_bsd_x86.inline.hpp Thu Sep 05 11:04:39 2013 -0700 @@ -190,7 +190,7 @@ inline void OrderAccess::release_store_fence(volatile julong* p, julong v) { release_store_fence((volatile jlong*)p, (jlong)v); } inline void OrderAccess::release_store_fence(volatile jfloat* p, jfloat v) { *p = v; fence(); } -inline void OrderAccess::release_store_fence(volatile jdouble* p, jdouble v) { release_store_fence((volatile jlong*)p, jdouble_cast(v)); } +inline void OrderAccess::release_store_fence(volatile jdouble* p, jdouble v) { release_store_fence((volatile jlong*)p, jlong_cast(v)); } inline void OrderAccess::release_store_ptr_fence(volatile intptr_t* p, intptr_t v) { #ifdef AMD64 diff -r bdd155477289 -r e2722a66aba7 src/os_cpu/bsd_x86/vm/os_bsd_x86.cpp --- a/src/os_cpu/bsd_x86/vm/os_bsd_x86.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/os_cpu/bsd_x86/vm/os_bsd_x86.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -715,6 +715,7 @@ err.report_and_die(); ShouldNotReachHere(); + return false; } // From solaris_i486.s ported to bsd_i486.s diff -r bdd155477289 -r e2722a66aba7 src/os_cpu/bsd_zero/vm/os_bsd_zero.cpp --- a/src/os_cpu/bsd_zero/vm/os_bsd_zero.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/os_cpu/bsd_zero/vm/os_bsd_zero.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -66,6 +66,7 @@ frame os::get_sender_for_C_frame(frame* fr) { ShouldNotCallThis(); + return frame(); } frame os::current_frame() { @@ -103,16 +104,19 @@ address os::Bsd::ucontext_get_pc(ucontext_t* uc) { ShouldNotCallThis(); + return NULL; } ExtendedPC os::fetch_frame_from_context(void* ucVoid, intptr_t** ret_sp, intptr_t** ret_fp) { ShouldNotCallThis(); + return ExtendedPC(); } frame os::fetch_frame_from_context(void* ucVoid) { ShouldNotCallThis(); + return frame(); } extern "C" JNIEXPORT int @@ -240,6 +244,7 @@ sprintf(buf, fmt, sig, info->si_addr); fatal(buf); + return false; } void os::Bsd::init_thread_fpu_state(void) { @@ -373,17 +378,7 @@ extern "C" { int SpinPause() { - } - - int SafeFetch32(int *adr, int errValue) { - int value = errValue; - value = *adr; - return value; - } - intptr_t SafeFetchN(intptr_t *adr, intptr_t errValue) { - intptr_t value = errValue; - value = *adr; - return value; + return 1; } void _Copy_conjoint_jshorts_atomic(jshort* from, jshort* to, size_t count) { diff -r bdd155477289 -r e2722a66aba7 src/os_cpu/bsd_zero/vm/thread_bsd_zero.hpp --- a/src/os_cpu/bsd_zero/vm/thread_bsd_zero.hpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/os_cpu/bsd_zero/vm/thread_bsd_zero.hpp Thu Sep 05 11:04:39 2013 -0700 @@ -110,6 +110,7 @@ void* ucontext, bool isInJava) { ShouldNotCallThis(); + return false; } // These routines are only used on cpu architectures that diff -r bdd155477289 -r e2722a66aba7 src/os_cpu/linux_x86/vm/linux_x86_32.ad --- a/src/os_cpu/linux_x86/vm/linux_x86_32.ad Thu Aug 22 09:39:54 2013 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,26 +0,0 @@ -// -// Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved. -// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -// -// This code is free software; you can redistribute it and/or modify it -// under the terms of the GNU General Public License version 2 only, as -// published by the Free Software Foundation. -// -// This code is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -// version 2 for more details (a copy is included in the LICENSE file that -// accompanied this code). -// -// You should have received a copy of the GNU General Public License version -// 2 along with this work; if not, write to the Free Software Foundation, -// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -// -// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -// or visit www.oracle.com if you need additional information or have any -// questions. -// -// - -// X86 Linux Architecture Description File - diff -r bdd155477289 -r e2722a66aba7 src/os_cpu/linux_x86/vm/linux_x86_64.ad --- a/src/os_cpu/linux_x86/vm/linux_x86_64.ad Thu Aug 22 09:39:54 2013 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,65 +0,0 @@ -// -// Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. -// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -// -// This code is free software; you can redistribute it and/or modify it -// under the terms of the GNU General Public License version 2 only, as -// published by the Free Software Foundation. -// -// This code is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -// version 2 for more details (a copy is included in the LICENSE file that -// accompanied this code). -// -// You should have received a copy of the GNU General Public License version -// 2 along with this work; if not, write to the Free Software Foundation, -// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -// -// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -// or visit www.oracle.com if you need additional information or have any -// questions. -// -// - -// AMD64 Linux Architecture Description File - -//----------OS-DEPENDENT ENCODING BLOCK---------------------------------------- -// This block specifies the encoding classes used by the compiler to -// output byte streams. Encoding classes generate functions which are -// called by Machine Instruction Nodes in order to generate the bit -// encoding of the instruction. Operands specify their base encoding -// interface with the interface keyword. There are currently -// supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, & -// COND_INTER. REG_INTER causes an operand to generate a function -// which returns its register number when queried. CONST_INTER causes -// an operand to generate a function which returns the value of the -// constant when queried. MEMORY_INTER causes an operand to generate -// four functions which return the Base Register, the Index Register, -// the Scale Value, and the Offset Value of the operand when queried. -// COND_INTER causes an operand to generate six functions which return -// the encoding code (ie - encoding bits for the instruction) -// associated with each basic boolean condition for a conditional -// instruction. Instructions specify two basic values for encoding. -// They use the ins_encode keyword to specify their encoding class -// (which must be one of the class names specified in the encoding -// block), and they use the opcode keyword to specify, in order, their -// primary, secondary, and tertiary opcode. Only the opcode sections -// which a particular instruction needs for encoding need to be -// specified. -encode %{ - // Build emit functions for each basic byte or larger field in the intel - // encoding scheme (opcode, rm, sib, immediate), and call them from C++ - // code in the enc_class source block. Emit functions will live in the - // main source block for now. In future, we can generalize this by - // adding a syntax that specifies the sizes of fields in an order, - // so that the adlc can build the emit functions automagically - -%} - - -// Platform dependent source - -source %{ - -%} diff -r bdd155477289 -r e2722a66aba7 src/os_cpu/linux_zero/vm/os_linux_zero.cpp --- a/src/os_cpu/linux_zero/vm/os_linux_zero.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/os_cpu/linux_zero/vm/os_linux_zero.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -410,16 +410,6 @@ int SpinPause() { } - int SafeFetch32(int *adr, int errValue) { - int value = errValue; - value = *adr; - return value; - } - intptr_t SafeFetchN(intptr_t *adr, intptr_t errValue) { - intptr_t value = errValue; - value = *adr; - return value; - } void _Copy_conjoint_jshorts_atomic(jshort* from, jshort* to, size_t count) { if (from > to) { diff -r bdd155477289 -r e2722a66aba7 src/os_cpu/solaris_sparc/vm/solaris_sparc.ad --- a/src/os_cpu/solaris_sparc/vm/solaris_sparc.ad Thu Aug 22 09:39:54 2013 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,27 +0,0 @@ -// -// Copyright (c) 1999, 2007, Oracle and/or its affiliates. All rights reserved. -// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -// -// This code is free software; you can redistribute it and/or modify it -// under the terms of the GNU General Public License version 2 only, as -// published by the Free Software Foundation. -// -// This code is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -// version 2 for more details (a copy is included in the LICENSE file that -// accompanied this code). -// -// You should have received a copy of the GNU General Public License version -// 2 along with this work; if not, write to the Free Software Foundation, -// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -// -// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -// or visit www.oracle.com if you need additional information or have any -// questions. -// - -// -// - -// SPARC Solaris Architecture Description File diff -r bdd155477289 -r e2722a66aba7 src/os_cpu/solaris_x86/vm/solaris_x86_32.ad --- a/src/os_cpu/solaris_x86/vm/solaris_x86_32.ad Thu Aug 22 09:39:54 2013 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,26 +0,0 @@ -// -// Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved. -// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -// -// This code is free software; you can redistribute it and/or modify it -// under the terms of the GNU General Public License version 2 only, as -// published by the Free Software Foundation. -// -// This code is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -// version 2 for more details (a copy is included in the LICENSE file that -// accompanied this code). -// -// You should have received a copy of the GNU General Public License version -// 2 along with this work; if not, write to the Free Software Foundation, -// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -// -// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -// or visit www.oracle.com if you need additional information or have any -// questions. -// -// - -// X86 Solaris Architecture Description File - diff -r bdd155477289 -r e2722a66aba7 src/os_cpu/solaris_x86/vm/solaris_x86_64.ad --- a/src/os_cpu/solaris_x86/vm/solaris_x86_64.ad Thu Aug 22 09:39:54 2013 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,63 +0,0 @@ -// -// Copyright (c) 2004, 2012, Oracle and/or its affiliates. All rights reserved. -// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -// -// This code is free software; you can redistribute it and/or modify it -// under the terms of the GNU General Public License version 2 only, as -// published by the Free Software Foundation. -// -// This code is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -// version 2 for more details (a copy is included in the LICENSE file that -// accompanied this code). -// -// You should have received a copy of the GNU General Public License version -// 2 along with this work; if not, write to the Free Software Foundation, -// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -// -// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -// or visit www.oracle.com if you need additional information or have any -// questions. -// -// - -// AMD64 Solaris Architecture Description File - -//----------OS-DEPENDENT ENCODING BLOCK---------------------------------------- -// This block specifies the encoding classes used by the compiler to -// output byte streams. Encoding classes generate functions which are -// called by Machine Instruction Nodes in order to generate the bit -// encoding of the instruction. Operands specify their base encoding -// interface with the interface keyword. There are currently -// supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, & -// COND_INTER. REG_INTER causes an operand to generate a function -// which returns its register number when queried. CONST_INTER causes -// an operand to generate a function which returns the value of the -// constant when queried. MEMORY_INTER causes an operand to generate -// four functions which return the Base Register, the Index Register, -// the Scale Value, and the Offset Value of the operand when queried. -// COND_INTER causes an operand to generate six functions which return -// the encoding code (ie - encoding bits for the instruction) -// associated with each basic boolean condition for a conditional -// instruction. Instructions specify two basic values for encoding. -// They use the ins_encode keyword to specify their encoding class -// (which must be one of the class names specified in the encoding -// block), and they use the opcode keyword to specify, in order, their -// primary, secondary, and tertiary opcode. Only the opcode sections -// which a particular instruction needs for encoding need to be -// specified. -encode %{ - // Build emit functions for each basic byte or larger field in the intel - // encoding scheme (opcode, rm, sib, immediate), and call them from C++ - // code in the enc_class source block. Emit functions will live in the - // main source block for now. In future, we can generalize this by - // adding a syntax that specifies the sizes of fields in an order, - // so that the adlc can build the emit functions automagically -%} - - -// Platform dependent source - -source %{ -%} diff -r bdd155477289 -r e2722a66aba7 src/os_cpu/windows_x86/vm/windows_x86_32.ad --- a/src/os_cpu/windows_x86/vm/windows_x86_32.ad Thu Aug 22 09:39:54 2013 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,26 +0,0 @@ -// -// Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved. -// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -// -// This code is free software; you can redistribute it and/or modify it -// under the terms of the GNU General Public License version 2 only, as -// published by the Free Software Foundation. -// -// This code is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -// version 2 for more details (a copy is included in the LICENSE file that -// accompanied this code). -// -// You should have received a copy of the GNU General Public License version -// 2 along with this work; if not, write to the Free Software Foundation, -// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -// -// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -// or visit www.oracle.com if you need additional information or have any -// questions. -// -// - -// X86 Win32 Architecture Description File - diff -r bdd155477289 -r e2722a66aba7 src/os_cpu/windows_x86/vm/windows_x86_64.ad --- a/src/os_cpu/windows_x86/vm/windows_x86_64.ad Thu Aug 22 09:39:54 2013 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,63 +0,0 @@ -// -// Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. -// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -// -// This code is free software; you can redistribute it and/or modify it -// under the terms of the GNU General Public License version 2 only, as -// published by the Free Software Foundation. -// -// This code is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -// version 2 for more details (a copy is included in the LICENSE file that -// accompanied this code). -// -// You should have received a copy of the GNU General Public License version -// 2 along with this work; if not, write to the Free Software Foundation, -// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -// -// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -// or visit www.oracle.com if you need additional information or have any -// questions. -// -// - -// AMD64 Win32 Architecture Description File - -//----------OS-DEPENDENT ENCODING BLOCK----------------------------------------------------- -// This block specifies the encoding classes used by the compiler to output -// byte streams. Encoding classes generate functions which are called by -// Machine Instruction Nodes in order to generate the bit encoding of the -// instruction. Operands specify their base encoding interface with the -// interface keyword. There are currently supported four interfaces, -// REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an -// operand to generate a function which returns its register number when -// queried. CONST_INTER causes an operand to generate a function which -// returns the value of the constant when queried. MEMORY_INTER causes an -// operand to generate four functions which return the Base Register, the -// Index Register, the Scale Value, and the Offset Value of the operand when -// queried. COND_INTER causes an operand to generate six functions which -// return the encoding code (ie - encoding bits for the instruction) -// associated with each basic boolean condition for a conditional instruction. -// Instructions specify two basic values for encoding. They use the -// ins_encode keyword to specify their encoding class (which must be one of -// the class names specified in the encoding block), and they use the -// opcode keyword to specify, in order, their primary, secondary, and -// tertiary opcode. Only the opcode sections which a particular instruction -// needs for encoding need to be specified. -encode %{ - // Build emit functions for each basic byte or larger field in the intel - // encoding scheme (opcode, rm, sib, immediate), and call them from C++ - // code in the enc_class source block. Emit functions will live in the - // main source block for now. In future, we can generalize this by - // adding a syntax that specifies the sizes of fields in an order, - // so that the adlc can build the emit functions automagically - -%} - - -// Platform dependent source - -source %{ - -%} diff -r bdd155477289 -r e2722a66aba7 src/share/tools/ProjectCreator/BuildConfig.java --- a/src/share/tools/ProjectCreator/BuildConfig.java Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/tools/ProjectCreator/BuildConfig.java Thu Sep 05 11:04:39 2013 -0700 @@ -142,6 +142,69 @@ return rv; } + // Returns true if the specified path refers to a relative alternate + // source file. RelativeAltSrcInclude is usually "src\closed". + public static boolean matchesRelativeAltSrcInclude(String path) { + String relativeAltSrcInclude = + getFieldString(null, "RelativeAltSrcInclude"); + Vector v = getFieldVector(null, "AltRelativeInclude"); + for (String pathPart : v) { + if (path.contains(relativeAltSrcInclude + Util.sep + pathPart)) { + return true; + } + } + return false; + } + + // Returns the relative alternate source file for the specified path. + // Null is returned if the specified path does not have a matching + // alternate source file. + public static String getMatchingRelativeAltSrcFile(String path) { + Vector v = getFieldVector(null, "RelativeAltSrcFileList"); + if (v == null) { + return null; + } + for (String pathPart : v) { + if (path.endsWith(pathPart)) { + String relativeAltSrcInclude = + getFieldString(null, "RelativeAltSrcInclude"); + return relativeAltSrcInclude + Util.sep + pathPart; + } + } + return null; + } + + // Returns true if the specified path has a matching alternate + // source file. + public static boolean matchesRelativeAltSrcFile(String path) { + return getMatchingRelativeAltSrcFile(path) != null; + } + + // Track the specified alternate source file. The source file is + // tracked without the leading .* + // part to make matching regular source files easier. + public static void trackRelativeAltSrcFile(String path) { + String pattern = getFieldString(null, "RelativeAltSrcInclude") + + Util.sep; + int altSrcInd = path.indexOf(pattern); + if (altSrcInd == -1) { + // not an AltSrc path + return; + } + + altSrcInd += pattern.length(); + if (altSrcInd >= path.length()) { + // not a valid AltSrc path + return; + } + + String altSrcFile = path.substring(altSrcInd); + Vector v = getFieldVector(null, "RelativeAltSrcFileList"); + if (v == null || !v.contains(altSrcFile)) { + addFieldVector(null, "RelativeAltSrcFileList", altSrcFile); + } + } + void addTo(Hashtable ht, String key, String value) { ht.put(expandFormat(key), expandFormat(value)); } @@ -272,8 +335,19 @@ private Vector getSourceIncludes() { Vector rv = new Vector(); + String sourceBase = getFieldString(null, "SourceBase"); + + // add relative alternate source include values: + String relativeAltSrcInclude = + getFieldString(null, "RelativeAltSrcInclude"); + Vector asri = new Vector(); + collectRelevantVectors(asri, "AltRelativeInclude"); + for (String f : asri) { + rv.add(sourceBase + Util.sep + relativeAltSrcInclude + + Util.sep + f); + } + Vector ri = new Vector(); - String sourceBase = getFieldString(null, "SourceBase"); collectRelevantVectors(ri, "RelativeInclude"); for (String f : ri) { rv.add(sourceBase + Util.sep + f); @@ -541,35 +615,6 @@ } } -class CoreDebugConfig extends GenericDebugNonKernelConfig { - String getOptFlag() { - return getCI().getNoOptFlag(); - } - - CoreDebugConfig() { - initNames("core", "debug", "jvm.dll"); - init(getIncludes(), getDefines()); - } -} - -class CoreFastDebugConfig extends GenericDebugNonKernelConfig { - String getOptFlag() { - return getCI().getOptFlag(); - } - - CoreFastDebugConfig() { - initNames("core", "fastdebug", "jvm.dll"); - init(getIncludes(), getDefines()); - } -} - -class CoreProductConfig extends ProductConfig { - CoreProductConfig() { - initNames("core", "product", "jvm.dll"); - init(getIncludes(), getDefines()); - } -} - abstract class CompilerInterface { abstract Vector getBaseCompilerFlags(Vector defines, Vector includes, String outDir); diff -r bdd155477289 -r e2722a66aba7 src/share/tools/ProjectCreator/FileTreeCreator.java --- a/src/share/tools/ProjectCreator/FileTreeCreator.java Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/tools/ProjectCreator/FileTreeCreator.java Thu Sep 05 11:04:39 2013 -0700 @@ -12,11 +12,15 @@ final int startDirLength; Stack attributes = new Stack(); Vector allConfigs; - WinGammaPlatformVC10 wg; + WinGammaPlatform wg; + WinGammaPlatformVC10 wg10; - public FileTreeCreator(Path startDir, Vector allConfigs, WinGammaPlatformVC10 wg) { + public FileTreeCreator(Path startDir, Vector allConfigs, WinGammaPlatform wg) { super(); this.wg = wg; + if (wg instanceof WinGammaPlatformVC10) { + wg10 = (WinGammaPlatformVC10)wg; + } this.allConfigs = allConfigs; this.startDir = startDir; startDirLength = startDir.toAbsolutePath().toString().length(); diff -r bdd155477289 -r e2722a66aba7 src/share/tools/ProjectCreator/FileTreeCreatorVC10.java --- a/src/share/tools/ProjectCreator/FileTreeCreatorVC10.java Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/tools/ProjectCreator/FileTreeCreatorVC10.java Thu Sep 05 11:04:39 2013 -0700 @@ -1,3 +1,27 @@ +/* + * Copyright (c) 2012, 2013, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + import static java.nio.file.FileVisitResult.CONTINUE; import java.io.IOException; @@ -21,6 +45,8 @@ boolean usePch = false; boolean disablePch = false; boolean useIgnore = false; + boolean isAltSrc = false; // only needed as a debugging crumb + boolean isReplacedByAltSrc = false; String fileName = file.getFileName().toString(); // TODO hideFile @@ -30,6 +56,26 @@ usePch = true; } + String fileLoc = vcProjLocation.relativize(file).toString(); + + // isAltSrc and isReplacedByAltSrc applies to all configs for a file + if (BuildConfig.matchesRelativeAltSrcInclude( + file.toAbsolutePath().toString())) { + // current file is an alternate source file so track it + isAltSrc = true; + BuildConfig.trackRelativeAltSrcFile( + file.toAbsolutePath().toString()); + } else if (BuildConfig.matchesRelativeAltSrcFile( + file.toAbsolutePath().toString())) { + // current file is a regular file that matches an alternate + // source file so yack about replacing the regular file + isReplacedByAltSrc = true; + System.out.println("INFO: alternate source file '" + + BuildConfig.getMatchingRelativeAltSrcFile( + file.toAbsolutePath().toString()) + + "' replaces '" + fileLoc + "'"); + } + for (BuildConfig cfg : allConfigs) { if (cfg.lookupHashFieldInContext("IgnoreFile", fileName) != null) { useIgnore = true; @@ -57,10 +103,9 @@ } } - String tagName = wg.getFileTagFromSuffix(fileName); - String fileLoc = vcProjLocation.relativize(file).toString(); + String tagName = wg10.getFileTagFromSuffix(fileName); - if (!useIgnore && !disablePch && !usePch) { + if (!useIgnore && !disablePch && !usePch && !isReplacedByAltSrc) { wg.tag(tagName, new String[] { "Include", fileLoc}); } else { wg.startTag( @@ -78,12 +123,17 @@ if (disablePch) { wg.tag("PrecompiledHeader", "Condition", "'$(Configuration)|$(Platform)'=='" + cfg.get("Name") + "'"); } + if (isReplacedByAltSrc) { + wg.tagData("ExcludedFromBuild", "true", "Condition", + "'$(Configuration)|$(Platform)'=='" + + cfg.get("Name") + "'"); + } } wg.endTag(); } String filter = startDir.relativize(file.getParent().toAbsolutePath()).toString(); - wg.addFilterDependency(fileLoc, filter); + wg10.addFilterDependency(fileLoc, filter); return CONTINUE; } @@ -112,7 +162,7 @@ if (!hide) { String name = startDir.relativize(path.toAbsolutePath()).toString(); if (!"".equals(name)) { - wg.addFilter(name); + wg10.addFilter(name); } attributes.push(newAttr); @@ -137,6 +187,4 @@ public void writeFileTree() throws IOException { Files.walkFileTree(this.startDir, this); } - - - } \ No newline at end of file +} diff -r bdd155477289 -r e2722a66aba7 src/share/tools/ProjectCreator/FileTreeCreatorVC7.java --- a/src/share/tools/ProjectCreator/FileTreeCreatorVC7.java Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/tools/ProjectCreator/FileTreeCreatorVC7.java Thu Sep 05 11:04:39 2013 -0700 @@ -12,7 +12,7 @@ public class FileTreeCreatorVC7 extends FileTreeCreator { public FileTreeCreatorVC7(Path startDir, Vector allConfigs, WinGammaPlatform wg) { - super(startDir, allConfigs, null); + super(startDir, allConfigs, wg); } @Override diff -r bdd155477289 -r e2722a66aba7 src/share/tools/ProjectCreator/ProjectCreator.java --- a/src/share/tools/ProjectCreator/ProjectCreator.java Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/tools/ProjectCreator/ProjectCreator.java Thu Sep 05 11:04:39 2013 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -39,10 +39,15 @@ + "jvm.dll; no trailing slash>"); System.err.println(" If any of the above are specified, " + "they must all be."); + System.err.println(" Note: if '-altRelativeInclude' option below is " + + "used, then the '-relativeAltSrcInclude' option must be used " + + "to specify the alternate source dir, e.g., 'src\\closed'"); System.err.println(" Additional, optional arguments, which can be " + "specified multiple times:"); System.err.println(" -absoluteInclude "); + System.err.println(" -altRelativeInclude "); System.err.println(" -relativeInclude "); System.err.println(" -define "); System.err.println(" If any of the above are specified, "+ "they must all be."); + System.err.println(" Note: if '-altRelativeInclude' option below " + + "is used, then the '-relativeAltSrcInclude' " + + "option must be used to specify the alternate " + + "source dir, e.g., 'src\\closed'"); System.err.println(" Additional, optional arguments, which can be " + "specified multiple times:"); System.err.println(" -absoluteInclude "); + System.err.println(" -altRelativeInclude "); System.err.println(" -relativeInclude "); System.err.println(" -define allConfigs) throws IOException { System.out.println(); - System.out.print(" Writing .vcxproj file: " + projectFileName); + System.out.println(" Writing .vcxproj file: " + projectFileName); String projDir = Util.normalize(new File(projectFileName).getParent()); @@ -114,7 +138,7 @@ endTag(); printWriter.close(); - System.out.println(" Done."); + System.out.println(" Done writing .vcxproj file."); writeFilterFile(projectFileName, projectName, allConfigs, projDir); writeUserFile(projectFileName, allConfigs); diff -r bdd155477289 -r e2722a66aba7 src/share/tools/ProjectCreator/WinGammaPlatformVC7.java --- a/src/share/tools/ProjectCreator/WinGammaPlatformVC7.java Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/tools/ProjectCreator/WinGammaPlatformVC7.java Thu Sep 05 11:04:39 2013 -0700 @@ -139,19 +139,22 @@ tagV("Tool", cfg.getV("LinkerFlags")); - tag("Tool", - new String[] { - "Name", - "VCPostBuildEventTool", - "Description", - BuildConfig - .getFieldString(null, "PostbuildDescription"), - // Caution: String.replace(String,String) is available - // from JDK5 onwards only - "CommandLine", - cfg.expandFormat(BuildConfig.getFieldString(null, - "PostbuildCommand").replace("\t", - " ")) }); + String postBuildCmd = BuildConfig.getFieldString(null, + "PostbuildCommand"); + if (postBuildCmd != null) { + tag("Tool", + new String[] { + "Name", + "VCPostBuildEventTool", + "Description", + BuildConfig + .getFieldString(null, "PostbuildDescription"), + // Caution: String.replace(String,String) is available + // from JDK5 onwards only + "CommandLine", + cfg.expandFormat(postBuildCmd.replace("\t", + " ")) }); + } tag("Tool", new String[] { "Name", "VCPreBuildEventTool" }); diff -r bdd155477289 -r e2722a66aba7 src/share/vm/c1/c1_Runtime1.cpp --- a/src/share/vm/c1/c1_Runtime1.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/c1/c1_Runtime1.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -915,16 +915,6 @@ // Return to the now deoptimized frame. } - // If we are patching in a non-perm oop, make sure the nmethod - // is on the right list. - if (ScavengeRootsInCode && mirror.not_null() && mirror()->is_scavengable()) { - MutexLockerEx ml_code (CodeCache_lock, Mutex::_no_safepoint_check_flag); - nmethod* nm = CodeCache::find_nmethod(caller_frame.pc()); - guarantee(nm != NULL, "only nmethods can contain non-perm oops"); - if (!nm->on_scavenge_root_list()) - CodeCache::add_scavenge_root_nmethod(nm); - } - // Now copy code back { @@ -1125,6 +1115,21 @@ } } } + + // If we are patching in a non-perm oop, make sure the nmethod + // is on the right list. + if (ScavengeRootsInCode && mirror.not_null() && mirror()->is_scavengable()) { + MutexLockerEx ml_code (CodeCache_lock, Mutex::_no_safepoint_check_flag); + nmethod* nm = CodeCache::find_nmethod(caller_frame.pc()); + guarantee(nm != NULL, "only nmethods can contain non-perm oops"); + if (!nm->on_scavenge_root_list()) { + CodeCache::add_scavenge_root_nmethod(nm); + } + + // Since we've patched some oops in the nmethod, + // (re)register it with the heap. + Universe::heap()->register_nmethod(nm); + } JRT_END // diff -r bdd155477289 -r e2722a66aba7 src/share/vm/classfile/classFileParser.cpp --- a/src/share/vm/classfile/classFileParser.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/classfile/classFileParser.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -2590,7 +2590,7 @@ valid_symbol_at(sourcefile_index), "Invalid SourceFile attribute at constant pool index %u in class file %s", sourcefile_index, CHECK); - set_class_sourcefile(_cp->symbol_at(sourcefile_index)); + set_class_sourcefile_index(sourcefile_index); } @@ -2728,7 +2728,7 @@ valid_symbol_at(signature_index), "Invalid constant pool index %u in Signature attribute in class file %s", signature_index, CHECK); - set_class_generic_signature(_cp->symbol_at(signature_index)); + set_class_generic_signature_index(signature_index); } void ClassFileParser::parse_classfile_bootstrap_methods_attribute(u4 attribute_byte_length, TRAPS) { @@ -2975,13 +2975,11 @@ void ClassFileParser::apply_parsed_class_attributes(instanceKlassHandle k) { if (_synthetic_flag) k->set_is_synthetic(); - if (_sourcefile != NULL) { - _sourcefile->increment_refcount(); - k->set_source_file_name(_sourcefile); + if (_sourcefile_index != 0) { + k->set_source_file_name_index(_sourcefile_index); } - if (_generic_signature != NULL) { - _generic_signature->increment_refcount(); - k->set_generic_signature(_generic_signature); + if (_generic_signature_index != 0) { + k->set_generic_signature_index(_generic_signature_index); } if (_sde_buffer != NULL) { k->set_source_debug_extension(_sde_buffer, _sde_length); diff -r bdd155477289 -r e2722a66aba7 src/share/vm/classfile/classFileParser.hpp --- a/src/share/vm/classfile/classFileParser.hpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/classfile/classFileParser.hpp Thu Sep 05 11:04:39 2013 -0700 @@ -62,8 +62,8 @@ bool _synthetic_flag; int _sde_length; char* _sde_buffer; - Symbol* _sourcefile; - Symbol* _generic_signature; + u2 _sourcefile_index; + u2 _generic_signature_index; // Metadata created before the instance klass is created. Must be deallocated // if not transferred to the InstanceKlass upon successful class loading @@ -81,16 +81,16 @@ Array* _fields_type_annotations; InstanceKlass* _klass; // InstanceKlass once created. - void set_class_synthetic_flag(bool x) { _synthetic_flag = x; } - void set_class_sourcefile(Symbol* x) { _sourcefile = x; } - void set_class_generic_signature(Symbol* x) { _generic_signature = x; } - void set_class_sde_buffer(char* x, int len) { _sde_buffer = x; _sde_length = len; } + void set_class_synthetic_flag(bool x) { _synthetic_flag = x; } + void set_class_sourcefile_index(u2 x) { _sourcefile_index = x; } + void set_class_generic_signature_index(u2 x) { _generic_signature_index = x; } + void set_class_sde_buffer(char* x, int len) { _sde_buffer = x; _sde_length = len; } void init_parsed_class_attributes(ClassLoaderData* loader_data) { _loader_data = loader_data; _synthetic_flag = false; - _sourcefile = NULL; - _generic_signature = NULL; + _sourcefile_index = 0; + _generic_signature_index = 0; _sde_buffer = NULL; _sde_length = 0; // initialize the other flags too: diff -r bdd155477289 -r e2722a66aba7 src/share/vm/classfile/javaClasses.cpp --- a/src/share/vm/classfile/javaClasses.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/classfile/javaClasses.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -2557,6 +2557,26 @@ *offset = value; } +// Support for java_lang_invoke_DirectMethodHandle + +int java_lang_invoke_DirectMethodHandle::_member_offset; + +oop java_lang_invoke_DirectMethodHandle::member(oop dmh) { + oop member_name = NULL; + bool is_dmh = dmh->is_oop() && java_lang_invoke_DirectMethodHandle::is_instance(dmh); + assert(is_dmh, "a DirectMethodHandle oop is expected"); + if (is_dmh) { + member_name = dmh->obj_field(member_offset_in_bytes()); + } + return member_name; +} + +void java_lang_invoke_DirectMethodHandle::compute_offsets() { + Klass* klass_oop = SystemDictionary::DirectMethodHandle_klass(); + if (klass_oop != NULL && EnableInvokeDynamic) { + compute_offset(_member_offset, klass_oop, vmSymbols::member_name(), vmSymbols::java_lang_invoke_MemberName_signature()); + } +} // Support for java_lang_invoke_MethodHandle @@ -3205,6 +3225,7 @@ java_lang_ThreadGroup::compute_offsets(); if (EnableInvokeDynamic) { java_lang_invoke_MethodHandle::compute_offsets(); + java_lang_invoke_DirectMethodHandle::compute_offsets(); java_lang_invoke_MemberName::compute_offsets(); java_lang_invoke_LambdaForm::compute_offsets(); java_lang_invoke_MethodType::compute_offsets(); diff -r bdd155477289 -r e2722a66aba7 src/share/vm/classfile/javaClasses.hpp --- a/src/share/vm/classfile/javaClasses.hpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/classfile/javaClasses.hpp Thu Sep 05 11:04:39 2013 -0700 @@ -976,6 +976,32 @@ static int form_offset_in_bytes() { return _form_offset; } }; +// Interface to java.lang.invoke.DirectMethodHandle objects + +class java_lang_invoke_DirectMethodHandle: AllStatic { + friend class JavaClasses; + + private: + static int _member_offset; // the MemberName of this DMH + + static void compute_offsets(); + + public: + // Accessors + static oop member(oop mh); + + // Testers + static bool is_subclass(Klass* klass) { + return klass->is_subclass_of(SystemDictionary::DirectMethodHandle_klass()); + } + static bool is_instance(oop obj) { + return obj != NULL && is_subclass(obj->klass()); + } + + // Accessors for code generation: + static int member_offset_in_bytes() { return _member_offset; } +}; + // Interface to java.lang.invoke.LambdaForm objects // (These are a private interface for managing adapter code generation.) diff -r bdd155477289 -r e2722a66aba7 src/share/vm/classfile/systemDictionary.hpp --- a/src/share/vm/classfile/systemDictionary.hpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/classfile/systemDictionary.hpp Thu Sep 05 11:04:39 2013 -0700 @@ -151,6 +151,7 @@ do_klass(reflect_CallerSensitive_klass, sun_reflect_CallerSensitive, Opt ) \ \ /* support for dynamic typing; it's OK if these are NULL in earlier JDKs */ \ + do_klass(DirectMethodHandle_klass, java_lang_invoke_DirectMethodHandle, Opt ) \ do_klass(MethodHandle_klass, java_lang_invoke_MethodHandle, Pre_JSR292 ) \ do_klass(MemberName_klass, java_lang_invoke_MemberName, Pre_JSR292 ) \ do_klass(MethodHandleNatives_klass, java_lang_invoke_MethodHandleNatives, Pre_JSR292 ) \ diff -r bdd155477289 -r e2722a66aba7 src/share/vm/classfile/vmSymbols.hpp --- a/src/share/vm/classfile/vmSymbols.hpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/classfile/vmSymbols.hpp Thu Sep 05 11:04:39 2013 -0700 @@ -255,6 +255,7 @@ /* Support for JSR 292 & invokedynamic (JDK 1.7 and above) */ \ template(java_lang_invoke_CallSite, "java/lang/invoke/CallSite") \ template(java_lang_invoke_ConstantCallSite, "java/lang/invoke/ConstantCallSite") \ + template(java_lang_invoke_DirectMethodHandle, "java/lang/invoke/DirectMethodHandle") \ template(java_lang_invoke_MutableCallSite, "java/lang/invoke/MutableCallSite") \ template(java_lang_invoke_VolatileCallSite, "java/lang/invoke/VolatileCallSite") \ template(java_lang_invoke_MethodHandle, "java/lang/invoke/MethodHandle") \ @@ -352,6 +353,7 @@ template(thread_id_name, "tid") \ template(newInstance0_name, "newInstance0") \ template(limit_name, "limit") \ + template(member_name, "member") \ template(forName_name, "forName") \ template(forName0_name, "forName0") \ template(isJavaIdentifierStart_name, "isJavaIdentifierStart") \ diff -r bdd155477289 -r e2722a66aba7 src/share/vm/code/nmethod.cpp --- a/src/share/vm/code/nmethod.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/code/nmethod.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -687,6 +687,7 @@ code_buffer->copy_values_to(this); if (ScavengeRootsInCode && detect_scavenge_root_oops()) { CodeCache::add_scavenge_root_nmethod(this); + Universe::heap()->register_nmethod(this); } debug_only(verify_scavenge_root_oops()); CodeCache::commit(this); @@ -881,6 +882,7 @@ dependencies->copy_to(this); if (ScavengeRootsInCode && detect_scavenge_root_oops()) { CodeCache::add_scavenge_root_nmethod(this); + Universe::heap()->register_nmethod(this); } debug_only(verify_scavenge_root_oops()); @@ -1300,6 +1302,13 @@ methodHandle the_method(method()); No_Safepoint_Verifier nsv; + // during patching, depending on the nmethod state we must notify the GC that + // code has been unloaded, unregistering it. We cannot do this right while + // holding the Patching_lock because we need to use the CodeCache_lock. This + // would be prone to deadlocks. + // This flag is used to remember whether we need to later lock and unregister. + bool nmethod_needs_unregister = false; + { // invalidate osr nmethod before acquiring the patching lock since // they both acquire leaf locks and we don't want a deadlock. @@ -1332,6 +1341,13 @@ inc_decompile_count(); } + // If the state is becoming a zombie, signal to unregister the nmethod with + // the heap. + // This nmethod may have already been unloaded during a full GC. + if ((state == zombie) && !is_unloaded()) { + nmethod_needs_unregister = true; + } + // Change state _state = state; @@ -1367,6 +1383,9 @@ // safepoint can sneak in, otherwise the oops used by the // dependency logic could have become stale. MutexLockerEx mu(CodeCache_lock, Mutex::_no_safepoint_check_flag); + if (nmethod_needs_unregister) { + Universe::heap()->unregister_nmethod(this); + } flush_dependencies(NULL); } @@ -1817,21 +1836,10 @@ if (_method != NULL) f(_method); } - -// This method is called twice during GC -- once while -// tracing the "active" nmethods on thread stacks during -// the (strong) marking phase, and then again when walking -// the code cache contents during the weak roots processing -// phase. The two uses are distinguished by means of the -// 'do_strong_roots_only' flag, which is true in the first -// case. We want to walk the weak roots in the nmethod -// only in the second case. The weak roots in the nmethod -// are the oops in the ExceptionCache and the InlineCache -// oops. -void nmethod::oops_do(OopClosure* f, bool do_strong_roots_only) { +void nmethod::oops_do(OopClosure* f, bool allow_zombie) { // make sure the oops ready to receive visitors - assert(!is_zombie() && !is_unloaded(), - "should not call follow on zombie or unloaded nmethod"); + assert(allow_zombie || !is_zombie(), "should not call follow on zombie nmethod"); + assert(!is_unloaded(), "should not call follow on unloaded nmethod"); // If the method is not entrant or zombie then a JMP is plastered over the // first few bytes. If an oop in the old code was there, that oop diff -r bdd155477289 -r e2722a66aba7 src/share/vm/code/nmethod.hpp --- a/src/share/vm/code/nmethod.hpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/code/nmethod.hpp Thu Sep 05 11:04:39 2013 -0700 @@ -566,7 +566,7 @@ void preserve_callee_argument_oops(frame fr, const RegisterMap *reg_map, OopClosure* f); void oops_do(OopClosure* f) { oops_do(f, false); } - void oops_do(OopClosure* f, bool do_strong_roots_only); + void oops_do(OopClosure* f, bool allow_zombie); bool detect_scavenge_root_oops(); void verify_scavenge_root_oops() PRODUCT_RETURN; diff -r bdd155477289 -r e2722a66aba7 src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp --- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -50,6 +50,7 @@ #include "memory/genMarkSweep.hpp" #include "memory/genOopClosures.inline.hpp" #include "memory/iterator.hpp" +#include "memory/padded.hpp" #include "memory/referencePolicy.hpp" #include "memory/resourceArea.hpp" #include "memory/tenuredGeneration.hpp" @@ -3459,7 +3460,9 @@ void ConcurrentMarkSweepGeneration::shrink(size_t bytes) { assert_locked_or_safepoint(Heap_lock); size_t size = ReservedSpace::page_align_size_down(bytes); - if (size > 0) { + // Only shrink if a compaction was done so that all the free space + // in the generation is in a contiguous block at the end. + if (size > 0 && did_compact()) { shrink_by(size); } } @@ -5477,40 +5480,42 @@ HandleMark hm; SequentialSubTasksDone* pst = space->par_seq_tasks(); - assert(pst->valid(), "Uninitialized use?"); uint nth_task = 0; uint n_tasks = pst->n_tasks(); - HeapWord *start, *end; - while (!pst->is_task_claimed(/* reference */ nth_task)) { - // We claimed task # nth_task; compute its boundaries. - if (chunk_top == 0) { // no samples were taken - assert(nth_task == 0 && n_tasks == 1, "Can have only 1 EdenSpace task"); - start = space->bottom(); - end = space->top(); - } else if (nth_task == 0) { - start = space->bottom(); - end = chunk_array[nth_task]; - } else if (nth_task < (uint)chunk_top) { - assert(nth_task >= 1, "Control point invariant"); - start = chunk_array[nth_task - 1]; - end = chunk_array[nth_task]; - } else { - assert(nth_task == (uint)chunk_top, "Control point invariant"); - start = chunk_array[chunk_top - 1]; - end = space->top(); - } - MemRegion mr(start, end); - // Verify that mr is in space - assert(mr.is_empty() || space->used_region().contains(mr), - "Should be in space"); - // Verify that "start" is an object boundary - assert(mr.is_empty() || oop(mr.start())->is_oop(), - "Should be an oop"); - space->par_oop_iterate(mr, cl); - } - pst->all_tasks_completed(); + if (n_tasks > 0) { + assert(pst->valid(), "Uninitialized use?"); + HeapWord *start, *end; + while (!pst->is_task_claimed(/* reference */ nth_task)) { + // We claimed task # nth_task; compute its boundaries. + if (chunk_top == 0) { // no samples were taken + assert(nth_task == 0 && n_tasks == 1, "Can have only 1 EdenSpace task"); + start = space->bottom(); + end = space->top(); + } else if (nth_task == 0) { + start = space->bottom(); + end = chunk_array[nth_task]; + } else if (nth_task < (uint)chunk_top) { + assert(nth_task >= 1, "Control point invariant"); + start = chunk_array[nth_task - 1]; + end = chunk_array[nth_task]; + } else { + assert(nth_task == (uint)chunk_top, "Control point invariant"); + start = chunk_array[chunk_top - 1]; + end = space->top(); + } + MemRegion mr(start, end); + // Verify that mr is in space + assert(mr.is_empty() || space->used_region().contains(mr), + "Should be in space"); + // Verify that "start" is an object boundary + assert(mr.is_empty() || oop(mr.start())->is_oop(), + "Should be an oop"); + space->par_oop_iterate(mr, cl); + } + pst->all_tasks_completed(); + } } void @@ -5787,7 +5792,7 @@ DefNewGeneration* dng = (DefNewGeneration*)_young_gen; // Eden space - { + if (!dng->eden()->is_empty()) { SequentialSubTasksDone* pst = dng->eden()->par_seq_tasks(); assert(!pst->valid(), "Clobbering existing data?"); // Each valid entry in [0, _eden_chunk_index) represents a task. @@ -8693,9 +8698,10 @@ assert(inFreeRange(), "Should only be called if currently in a free range."); HeapWord* const eob = ((HeapWord*)fc) + chunk_size; assert(_sp->used_region().contains(eob - 1), - err_msg("eob = " PTR_FORMAT " out of bounds wrt _sp = [" PTR_FORMAT "," PTR_FORMAT ")" + err_msg("eob = " PTR_FORMAT " eob-1 = " PTR_FORMAT " _limit = " PTR_FORMAT + " out of bounds wrt _sp = [" PTR_FORMAT "," PTR_FORMAT ")" " when examining fc = " PTR_FORMAT "(" SIZE_FORMAT ")", - _limit, _sp->bottom(), _sp->end(), fc, chunk_size)); + eob, eob-1, _limit, _sp->bottom(), _sp->end(), fc, chunk_size)); if (eob >= _limit) { assert(eob == _limit || fc->is_free(), "Only a free chunk should allow us to cross over the limit"); if (CMSTraceSweeper) { diff -r bdd155477289 -r e2722a66aba7 src/share/vm/gc_implementation/g1/concurrentMark.cpp --- a/src/share/vm/gc_implementation/g1/concurrentMark.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/gc_implementation/g1/concurrentMark.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -4529,7 +4529,7 @@ _total_prev_live_bytes(0), _total_next_live_bytes(0), _hum_used_bytes(0), _hum_capacity_bytes(0), _hum_prev_live_bytes(0), _hum_next_live_bytes(0), - _total_remset_bytes(0) { + _total_remset_bytes(0), _total_strong_code_roots_bytes(0) { G1CollectedHeap* g1h = G1CollectedHeap::heap(); MemRegion g1_committed = g1h->g1_committed(); MemRegion g1_reserved = g1h->g1_reserved(); @@ -4553,9 +4553,11 @@ G1PPRL_BYTE_H_FORMAT G1PPRL_BYTE_H_FORMAT G1PPRL_DOUBLE_H_FORMAT + G1PPRL_BYTE_H_FORMAT G1PPRL_BYTE_H_FORMAT, "type", "address-range", - "used", "prev-live", "next-live", "gc-eff", "remset"); + "used", "prev-live", "next-live", "gc-eff", + "remset", "code-roots"); _out->print_cr(G1PPRL_LINE_PREFIX G1PPRL_TYPE_H_FORMAT G1PPRL_ADDR_BASE_H_FORMAT @@ -4563,9 +4565,11 @@ G1PPRL_BYTE_H_FORMAT G1PPRL_BYTE_H_FORMAT G1PPRL_DOUBLE_H_FORMAT + G1PPRL_BYTE_H_FORMAT G1PPRL_BYTE_H_FORMAT, "", "", - "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)", "(bytes)"); + "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)", + "(bytes)", "(bytes)"); } // It takes as a parameter a reference to one of the _hum_* fields, it @@ -4608,6 +4612,8 @@ size_t next_live_bytes = r->next_live_bytes(); double gc_eff = r->gc_efficiency(); size_t remset_bytes = r->rem_set()->mem_size(); + size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size(); + if (r->used() == 0) { type = "FREE"; } else if (r->is_survivor()) { @@ -4642,6 +4648,7 @@ _total_prev_live_bytes += prev_live_bytes; _total_next_live_bytes += next_live_bytes; _total_remset_bytes += remset_bytes; + _total_strong_code_roots_bytes += strong_code_roots_bytes; // Print a line for this particular region. _out->print_cr(G1PPRL_LINE_PREFIX @@ -4651,9 +4658,11 @@ G1PPRL_BYTE_FORMAT G1PPRL_BYTE_FORMAT G1PPRL_DOUBLE_FORMAT + G1PPRL_BYTE_FORMAT G1PPRL_BYTE_FORMAT, type, bottom, end, - used_bytes, prev_live_bytes, next_live_bytes, gc_eff , remset_bytes); + used_bytes, prev_live_bytes, next_live_bytes, gc_eff, + remset_bytes, strong_code_roots_bytes); return false; } @@ -4669,7 +4678,8 @@ G1PPRL_SUM_MB_PERC_FORMAT("used") G1PPRL_SUM_MB_PERC_FORMAT("prev-live") G1PPRL_SUM_MB_PERC_FORMAT("next-live") - G1PPRL_SUM_MB_FORMAT("remset"), + G1PPRL_SUM_MB_FORMAT("remset") + G1PPRL_SUM_MB_FORMAT("code-roots"), bytes_to_mb(_total_capacity_bytes), bytes_to_mb(_total_used_bytes), perc(_total_used_bytes, _total_capacity_bytes), @@ -4677,6 +4687,7 @@ perc(_total_prev_live_bytes, _total_capacity_bytes), bytes_to_mb(_total_next_live_bytes), perc(_total_next_live_bytes, _total_capacity_bytes), - bytes_to_mb(_total_remset_bytes)); + bytes_to_mb(_total_remset_bytes), + bytes_to_mb(_total_strong_code_roots_bytes)); _out->cr(); } diff -r bdd155477289 -r e2722a66aba7 src/share/vm/gc_implementation/g1/concurrentMark.hpp --- a/src/share/vm/gc_implementation/g1/concurrentMark.hpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/gc_implementation/g1/concurrentMark.hpp Thu Sep 05 11:04:39 2013 -0700 @@ -1257,6 +1257,9 @@ // Accumulator for the remembered set size size_t _total_remset_bytes; + // Accumulator for strong code roots memory size + size_t _total_strong_code_roots_bytes; + static double perc(size_t val, size_t total) { if (total == 0) { return 0.0; diff -r bdd155477289 -r e2722a66aba7 src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -23,6 +23,7 @@ */ #include "precompiled.hpp" +#include "code/codeCache.hpp" #include "code/icBuffer.hpp" #include "gc_implementation/g1/bufferingOopClosure.hpp" #include "gc_implementation/g1/concurrentG1Refine.hpp" @@ -980,7 +981,8 @@ if (should_try_gc) { bool succeeded; - result = do_collection_pause(word_size, gc_count_before, &succeeded); + result = do_collection_pause(word_size, gc_count_before, &succeeded, + GCCause::_g1_inc_collection_pause); if (result != NULL) { assert(succeeded, "only way to get back a non-NULL result"); return result; @@ -1105,7 +1107,8 @@ // enough space for the allocation to succeed after the pause. bool succeeded; - result = do_collection_pause(word_size, gc_count_before, &succeeded); + result = do_collection_pause(word_size, gc_count_before, &succeeded, + GCCause::_g1_humongous_allocation); if (result != NULL) { assert(succeeded, "only way to get back a non-NULL result"); return result; @@ -1176,20 +1179,27 @@ ModRefBarrierSet* _mr_bs; public: PostMCRemSetClearClosure(G1CollectedHeap* g1h, ModRefBarrierSet* mr_bs) : - _g1h(g1h), _mr_bs(mr_bs) { } + _g1h(g1h), _mr_bs(mr_bs) {} + bool doHeapRegion(HeapRegion* r) { + HeapRegionRemSet* hrrs = r->rem_set(); + if (r->continuesHumongous()) { + // We'll assert that the strong code root list and RSet is empty + assert(hrrs->strong_code_roots_list_length() == 0, "sanity"); + assert(hrrs->occupied() == 0, "RSet should be empty"); return false; } + _g1h->reset_gc_time_stamps(r); - HeapRegionRemSet* hrrs = r->rem_set(); - if (hrrs != NULL) hrrs->clear(); + hrrs->clear(); // You might think here that we could clear just the cards // corresponding to the used region. But no: if we leave a dirty card // in a region we might allocate into, then it would prevent that card // from being enqueued, and cause it to be missed. // Re: the performance cost: we shouldn't be doing full GC anyway! _mr_bs->clear(MemRegion(r->bottom(), r->end())); + return false; } }; @@ -1269,30 +1279,6 @@ heap_region_iterate(&cl); } -double G1CollectedHeap::verify(bool guard, const char* msg) { - double verify_time_ms = 0.0; - - if (guard && total_collections() >= VerifyGCStartAt) { - double verify_start = os::elapsedTime(); - HandleMark hm; // Discard invalid handles created during verification - prepare_for_verify(); - Universe::verify(VerifyOption_G1UsePrevMarking, msg); - verify_time_ms = (os::elapsedTime() - verify_start) * 1000; - } - - return verify_time_ms; -} - -void G1CollectedHeap::verify_before_gc() { - double verify_time_ms = verify(VerifyBeforeGC, " VerifyBeforeGC:"); - g1_policy()->phase_times()->record_verify_before_time_ms(verify_time_ms); -} - -void G1CollectedHeap::verify_after_gc() { - double verify_time_ms = verify(VerifyAfterGC, " VerifyAfterGC:"); - g1_policy()->phase_times()->record_verify_after_time_ms(verify_time_ms); -} - bool G1CollectedHeap::do_collection(bool explicit_gc, bool clear_all_soft_refs, size_t word_size) { @@ -1433,7 +1419,7 @@ // Delete metaspaces for unloaded class loaders and clean up loader_data graph ClassLoaderDataGraph::purge(); - MetaspaceAux::verify_metrics(); + MetaspaceAux::verify_metrics(); // Note: since we've just done a full GC, concurrent // marking is no longer active. Therefore we need not @@ -1504,6 +1490,9 @@ heap_region_iterate(&rebuild_rs); } + // Rebuild the strong code root lists for each region + rebuild_strong_code_roots(); + if (true) { // FIXME MetaspaceGC::compute_new_size(); } @@ -2019,10 +2008,12 @@ size_t init_byte_size = collector_policy()->initial_heap_byte_size(); size_t max_byte_size = collector_policy()->max_heap_byte_size(); + size_t heap_alignment = collector_policy()->max_alignment(); // Ensure that the sizes are properly aligned. Universe::check_alignment(init_byte_size, HeapRegion::GrainBytes, "g1 heap"); Universe::check_alignment(max_byte_size, HeapRegion::GrainBytes, "g1 heap"); + Universe::check_alignment(max_byte_size, heap_alignment, "g1 heap"); _cg1r = new ConcurrentG1Refine(this); @@ -2039,12 +2030,8 @@ // If this happens then we could end up using a non-optimal // compressed oops mode. - // Since max_byte_size is aligned to the size of a heap region (checked - // above). - Universe::check_alignment(max_byte_size, HeapRegion::GrainBytes, "g1 heap"); - ReservedSpace heap_rs = Universe::reserve_heap(max_byte_size, - HeapRegion::GrainBytes); + heap_alignment); // It is important to do this in a way such that concurrent readers can't // temporarily think something is in the heap. (I've actually seen this @@ -3109,6 +3096,145 @@ return NULL; // keep some compilers happy } +// TODO: VerifyRootsClosure extends OopsInGenClosure so that we can +// pass it as the perm_blk to SharedHeap::process_strong_roots. +// When process_strong_roots stop calling perm_blk->younger_refs_iterate +// we can change this closure to extend the simpler OopClosure. +class VerifyRootsClosure: public OopsInGenClosure { +private: + G1CollectedHeap* _g1h; + VerifyOption _vo; + bool _failures; +public: + // _vo == UsePrevMarking -> use "prev" marking information, + // _vo == UseNextMarking -> use "next" marking information, + // _vo == UseMarkWord -> use mark word from object header. + VerifyRootsClosure(VerifyOption vo) : + _g1h(G1CollectedHeap::heap()), + _vo(vo), + _failures(false) { } + + bool failures() { return _failures; } + + template void do_oop_nv(T* p) { + T heap_oop = oopDesc::load_heap_oop(p); + if (!oopDesc::is_null(heap_oop)) { + oop obj = oopDesc::decode_heap_oop_not_null(heap_oop); + if (_g1h->is_obj_dead_cond(obj, _vo)) { + gclog_or_tty->print_cr("Root location "PTR_FORMAT" " + "points to dead obj "PTR_FORMAT, p, (void*) obj); + if (_vo == VerifyOption_G1UseMarkWord) { + gclog_or_tty->print_cr(" Mark word: "PTR_FORMAT, (void*)(obj->mark())); + } + obj->print_on(gclog_or_tty); + _failures = true; + } + } + } + + void do_oop(oop* p) { do_oop_nv(p); } + void do_oop(narrowOop* p) { do_oop_nv(p); } +}; + +class G1VerifyCodeRootOopClosure: public OopsInGenClosure { + G1CollectedHeap* _g1h; + OopClosure* _root_cl; + nmethod* _nm; + VerifyOption _vo; + bool _failures; + + template void do_oop_work(T* p) { + // First verify that this root is live + _root_cl->do_oop(p); + + if (!G1VerifyHeapRegionCodeRoots) { + // We're not verifying the code roots attached to heap region. + return; + } + + // Don't check the code roots during marking verification in a full GC + if (_vo == VerifyOption_G1UseMarkWord) { + return; + } + + // Now verify that the current nmethod (which contains p) is + // in the code root list of the heap region containing the + // object referenced by p. + + T heap_oop = oopDesc::load_heap_oop(p); + if (!oopDesc::is_null(heap_oop)) { + oop obj = oopDesc::decode_heap_oop_not_null(heap_oop); + + // Now fetch the region containing the object + HeapRegion* hr = _g1h->heap_region_containing(obj); + HeapRegionRemSet* hrrs = hr->rem_set(); + // Verify that the strong code root list for this region + // contains the nmethod + if (!hrrs->strong_code_roots_list_contains(_nm)) { + gclog_or_tty->print_cr("Code root location "PTR_FORMAT" " + "from nmethod "PTR_FORMAT" not in strong " + "code roots for region ["PTR_FORMAT","PTR_FORMAT")", + p, _nm, hr->bottom(), hr->end()); + _failures = true; + } + } + } + +public: + G1VerifyCodeRootOopClosure(G1CollectedHeap* g1h, OopClosure* root_cl, VerifyOption vo): + _g1h(g1h), _root_cl(root_cl), _vo(vo), _nm(NULL), _failures(false) {} + + void do_oop(oop* p) { do_oop_work(p); } + void do_oop(narrowOop* p) { do_oop_work(p); } + + void set_nmethod(nmethod* nm) { _nm = nm; } + bool failures() { return _failures; } +}; + +class G1VerifyCodeRootBlobClosure: public CodeBlobClosure { + G1VerifyCodeRootOopClosure* _oop_cl; + +public: + G1VerifyCodeRootBlobClosure(G1VerifyCodeRootOopClosure* oop_cl): + _oop_cl(oop_cl) {} + + void do_code_blob(CodeBlob* cb) { + nmethod* nm = cb->as_nmethod_or_null(); + if (nm != NULL) { + _oop_cl->set_nmethod(nm); + nm->oops_do(_oop_cl); + } + } +}; + +class YoungRefCounterClosure : public OopClosure { + G1CollectedHeap* _g1h; + int _count; + public: + YoungRefCounterClosure(G1CollectedHeap* g1h) : _g1h(g1h), _count(0) {} + void do_oop(oop* p) { if (_g1h->is_in_young(*p)) { _count++; } } + void do_oop(narrowOop* p) { ShouldNotReachHere(); } + + int count() { return _count; } + void reset_count() { _count = 0; }; +}; + +class VerifyKlassClosure: public KlassClosure { + YoungRefCounterClosure _young_ref_counter_closure; + OopClosure *_oop_closure; + public: + VerifyKlassClosure(G1CollectedHeap* g1h, OopClosure* cl) : _young_ref_counter_closure(g1h), _oop_closure(cl) {} + void do_klass(Klass* k) { + k->oops_do(_oop_closure); + + _young_ref_counter_closure.reset_count(); + k->oops_do(&_young_ref_counter_closure); + if (_young_ref_counter_closure.count() > 0) { + guarantee(k->has_modified_oops(), err_msg("Klass %p, has young refs but is not dirty.", k)); + } + } +}; + class VerifyLivenessOopClosure: public OopClosure { G1CollectedHeap* _g1h; VerifyOption _vo; @@ -3242,75 +3368,7 @@ } }; -class YoungRefCounterClosure : public OopClosure { - G1CollectedHeap* _g1h; - int _count; - public: - YoungRefCounterClosure(G1CollectedHeap* g1h) : _g1h(g1h), _count(0) {} - void do_oop(oop* p) { if (_g1h->is_in_young(*p)) { _count++; } } - void do_oop(narrowOop* p) { ShouldNotReachHere(); } - - int count() { return _count; } - void reset_count() { _count = 0; }; -}; - -class VerifyKlassClosure: public KlassClosure { - YoungRefCounterClosure _young_ref_counter_closure; - OopClosure *_oop_closure; - public: - VerifyKlassClosure(G1CollectedHeap* g1h, OopClosure* cl) : _young_ref_counter_closure(g1h), _oop_closure(cl) {} - void do_klass(Klass* k) { - k->oops_do(_oop_closure); - - _young_ref_counter_closure.reset_count(); - k->oops_do(&_young_ref_counter_closure); - if (_young_ref_counter_closure.count() > 0) { - guarantee(k->has_modified_oops(), err_msg("Klass %p, has young refs but is not dirty.", k)); - } - } -}; - -// TODO: VerifyRootsClosure extends OopsInGenClosure so that we can -// pass it as the perm_blk to SharedHeap::process_strong_roots. -// When process_strong_roots stop calling perm_blk->younger_refs_iterate -// we can change this closure to extend the simpler OopClosure. -class VerifyRootsClosure: public OopsInGenClosure { -private: - G1CollectedHeap* _g1h; - VerifyOption _vo; - bool _failures; -public: - // _vo == UsePrevMarking -> use "prev" marking information, - // _vo == UseNextMarking -> use "next" marking information, - // _vo == UseMarkWord -> use mark word from object header. - VerifyRootsClosure(VerifyOption vo) : - _g1h(G1CollectedHeap::heap()), - _vo(vo), - _failures(false) { } - - bool failures() { return _failures; } - - template void do_oop_nv(T* p) { - T heap_oop = oopDesc::load_heap_oop(p); - if (!oopDesc::is_null(heap_oop)) { - oop obj = oopDesc::decode_heap_oop_not_null(heap_oop); - if (_g1h->is_obj_dead_cond(obj, _vo)) { - gclog_or_tty->print_cr("Root location "PTR_FORMAT" " - "points to dead obj "PTR_FORMAT, p, (void*) obj); - if (_vo == VerifyOption_G1UseMarkWord) { - gclog_or_tty->print_cr(" Mark word: "PTR_FORMAT, (void*)(obj->mark())); - } - obj->print_on(gclog_or_tty); - _failures = true; - } - } - } - - void do_oop(oop* p) { do_oop_nv(p); } - void do_oop(narrowOop* p) { do_oop_nv(p); } -}; - -// This is the task used for parallel heap verification. +// This is the task used for parallel verification of the heap regions class G1ParVerifyTask: public AbstractGangTask { private: @@ -3344,20 +3402,15 @@ } }; -void G1CollectedHeap::verify(bool silent) { - verify(silent, VerifyOption_G1UsePrevMarking); -} - -void G1CollectedHeap::verify(bool silent, - VerifyOption vo) { +void G1CollectedHeap::verify(bool silent, VerifyOption vo) { if (SafepointSynchronize::is_at_safepoint()) { + assert(Thread::current()->is_VM_thread(), + "Expected to be executed serially by the VM thread at this point"); + if (!silent) { gclog_or_tty->print("Roots "); } VerifyRootsClosure rootsCl(vo); - - assert(Thread::current()->is_VM_thread(), - "Expected to be executed serially by the VM thread at this point"); - - CodeBlobToOopClosure blobsCl(&rootsCl, /*do_marking=*/ false); + G1VerifyCodeRootOopClosure codeRootsCl(this, &rootsCl, vo); + G1VerifyCodeRootBlobClosure blobsCl(&codeRootsCl); VerifyKlassClosure klassCl(this, &rootsCl); // We apply the relevant closures to all the oops in the @@ -3376,7 +3429,7 @@ &klassCl ); - bool failures = rootsCl.failures(); + bool failures = rootsCl.failures() || codeRootsCl.failures(); if (vo != VerifyOption_G1UseMarkWord) { // If we're verifying during a full GC then the region sets @@ -3445,6 +3498,34 @@ } } +void G1CollectedHeap::verify(bool silent) { + verify(silent, VerifyOption_G1UsePrevMarking); +} + +double G1CollectedHeap::verify(bool guard, const char* msg) { + double verify_time_ms = 0.0; + + if (guard && total_collections() >= VerifyGCStartAt) { + double verify_start = os::elapsedTime(); + HandleMark hm; // Discard invalid handles created during verification + prepare_for_verify(); + Universe::verify(VerifyOption_G1UsePrevMarking, msg); + verify_time_ms = (os::elapsedTime() - verify_start) * 1000; + } + + return verify_time_ms; +} + +void G1CollectedHeap::verify_before_gc() { + double verify_time_ms = verify(VerifyBeforeGC, " VerifyBeforeGC:"); + g1_policy()->phase_times()->record_verify_before_time_ms(verify_time_ms); +} + +void G1CollectedHeap::verify_after_gc() { + double verify_time_ms = verify(VerifyAfterGC, " VerifyAfterGC:"); + g1_policy()->phase_times()->record_verify_after_time_ms(verify_time_ms); +} + class PrintRegionClosure: public HeapRegionClosure { outputStream* _st; public: @@ -3619,14 +3700,15 @@ HeapWord* G1CollectedHeap::do_collection_pause(size_t word_size, unsigned int gc_count_before, - bool* succeeded) { + bool* succeeded, + GCCause::Cause gc_cause) { assert_heap_not_locked_and_not_at_safepoint(); g1_policy()->record_stop_world_start(); VM_G1IncCollectionPause op(gc_count_before, word_size, false, /* should_initiate_conc_mark */ g1_policy()->max_pause_time_ms(), - GCCause::_g1_inc_collection_pause); + gc_cause); VMThread::execute(&op); HeapWord* result = op.result(); @@ -3866,8 +3948,9 @@ append_secondary_free_list_if_not_empty_with_lock(); } - assert(check_young_list_well_formed(), - "young list should be well formed"); + assert(check_young_list_well_formed(), "young list should be well formed"); + assert(check_heap_region_claim_values(HeapRegion::InitialClaimValue), + "sanity check"); // Don't dynamically change the number of GC threads this early. A value of // 0 is used to indicate serial work. When parallel work is done, @@ -4987,7 +5070,11 @@ G1ParPushHeapRSClosure push_heap_rs_cl(_g1h, &pss); - int so = SharedHeap::SO_AllClasses | SharedHeap::SO_Strings | SharedHeap::SO_CodeCache; + // Don't scan the scavengable methods in the code cache as part + // of strong root scanning. The code roots that point into a + // region in the collection set are scanned when we scan the + // region's RSet. + int so = SharedHeap::SO_AllClasses | SharedHeap::SO_Strings; pss.start_strong_roots(); _g1h->g1_process_strong_roots(/* is scavenging */ true, @@ -5029,67 +5116,6 @@ // *** Common G1 Evacuation Stuff -// Closures that support the filtering of CodeBlobs scanned during -// external root scanning. - -// Closure applied to reference fields in code blobs (specifically nmethods) -// to determine whether an nmethod contains references that point into -// the collection set. Used as a predicate when walking code roots so -// that only nmethods that point into the collection set are added to the -// 'marked' list. - -class G1FilteredCodeBlobToOopClosure : public CodeBlobToOopClosure { - - class G1PointsIntoCSOopClosure : public OopClosure { - G1CollectedHeap* _g1; - bool _points_into_cs; - public: - G1PointsIntoCSOopClosure(G1CollectedHeap* g1) : - _g1(g1), _points_into_cs(false) { } - - bool points_into_cs() const { return _points_into_cs; } - - template - void do_oop_nv(T* p) { - if (!_points_into_cs) { - T heap_oop = oopDesc::load_heap_oop(p); - if (!oopDesc::is_null(heap_oop) && - _g1->in_cset_fast_test(oopDesc::decode_heap_oop_not_null(heap_oop))) { - _points_into_cs = true; - } - } - } - - virtual void do_oop(oop* p) { do_oop_nv(p); } - virtual void do_oop(narrowOop* p) { do_oop_nv(p); } - }; - - G1CollectedHeap* _g1; - -public: - G1FilteredCodeBlobToOopClosure(G1CollectedHeap* g1, OopClosure* cl) : - CodeBlobToOopClosure(cl, true), _g1(g1) { } - - virtual void do_code_blob(CodeBlob* cb) { - nmethod* nm = cb->as_nmethod_or_null(); - if (nm != NULL && !(nm->test_oops_do_mark())) { - G1PointsIntoCSOopClosure predicate_cl(_g1); - nm->oops_do(&predicate_cl); - - if (predicate_cl.points_into_cs()) { - // At least one of the reference fields or the oop relocations - // in the nmethod points into the collection set. We have to - // 'mark' this nmethod. - // Note: Revisit the following if CodeBlobToOopClosure::do_code_blob() - // or MarkingCodeBlobClosure::do_code_blob() change. - if (!nm->test_set_oops_do_mark()) { - do_newly_marked_nmethod(nm); - } - } - } - } -}; - // This method is run in a GC worker. void @@ -5107,9 +5133,10 @@ BufferingOopClosure buf_scan_non_heap_roots(scan_non_heap_roots); - // Walk the code cache w/o buffering, because StarTask cannot handle - // unaligned oop locations. - G1FilteredCodeBlobToOopClosure eager_scan_code_roots(this, scan_non_heap_roots); + assert(so & SO_CodeCache || scan_rs != NULL, "must scan code roots somehow"); + // Walk the code cache/strong code roots w/o buffering, because StarTask + // cannot handle unaligned oop locations. + CodeBlobToOopClosure eager_scan_code_roots(scan_non_heap_roots, true /* do_marking */); process_strong_roots(false, // no scoping; this is parallel code is_scavenging, so, @@ -5154,9 +5181,22 @@ } g1_policy()->phase_times()->record_satb_filtering_time(worker_i, satb_filtering_ms); + // If this is an initial mark pause, and we're not scanning + // the entire code cache, we need to mark the oops in the + // strong code root lists for the regions that are not in + // the collection set. + // Note all threads participate in this set of root tasks. + double mark_strong_code_roots_ms = 0.0; + if (g1_policy()->during_initial_mark_pause() && !(so & SO_CodeCache)) { + double mark_strong_roots_start = os::elapsedTime(); + mark_strong_code_roots(worker_i); + mark_strong_code_roots_ms = (os::elapsedTime() - mark_strong_roots_start) * 1000.0; + } + g1_policy()->phase_times()->record_strong_code_root_mark_time(worker_i, mark_strong_code_roots_ms); + // Now scan the complement of the collection set. if (scan_rs != NULL) { - g1_rem_set()->oops_into_collection_set_do(scan_rs, worker_i); + g1_rem_set()->oops_into_collection_set_do(scan_rs, &eager_scan_code_roots, worker_i); } _process_strong_tasks->all_tasks_completed(); } @@ -5774,9 +5814,6 @@ process_discovered_references(n_workers); // Weak root processing. - // Note: when JSR 292 is enabled and code blobs can contain - // non-perm oops then we will need to process the code blobs - // here too. { G1STWIsAliveClosure is_alive(this); G1KeepAliveClosure keep_alive(this); @@ -5792,6 +5829,17 @@ hot_card_cache->reset_hot_cache(); hot_card_cache->set_use_cache(true); + // Migrate the strong code roots attached to each region in + // the collection set. Ideally we would like to do this + // after we have finished the scanning/evacuation of the + // strong code roots for a particular heap region. + migrate_strong_code_roots(); + + if (g1_policy()->during_initial_mark_pause()) { + // Reset the claim values set during marking the strong code roots + reset_heap_region_claim_values(); + } + finalize_for_evac_failure(); if (evacuation_failed()) { @@ -6588,3 +6636,208 @@ _humongous_set.verify_end(); _free_list.verify_end(); } + +// Optimized nmethod scanning + +class RegisterNMethodOopClosure: public OopClosure { + G1CollectedHeap* _g1h; + nmethod* _nm; + + template void do_oop_work(T* p) { + T heap_oop = oopDesc::load_heap_oop(p); + if (!oopDesc::is_null(heap_oop)) { + oop obj = oopDesc::decode_heap_oop_not_null(heap_oop); + HeapRegion* hr = _g1h->heap_region_containing(obj); + assert(!hr->isHumongous(), "code root in humongous region?"); + + // HeapRegion::add_strong_code_root() avoids adding duplicate + // entries but having duplicates is OK since we "mark" nmethods + // as visited when we scan the strong code root lists during the GC. + hr->add_strong_code_root(_nm); + assert(hr->rem_set()->strong_code_roots_list_contains(_nm), "add failed?"); + } + } + +public: + RegisterNMethodOopClosure(G1CollectedHeap* g1h, nmethod* nm) : + _g1h(g1h), _nm(nm) {} + + void do_oop(oop* p) { do_oop_work(p); } + void do_oop(narrowOop* p) { do_oop_work(p); } +}; + +class UnregisterNMethodOopClosure: public OopClosure { + G1CollectedHeap* _g1h; + nmethod* _nm; + + template void do_oop_work(T* p) { + T heap_oop = oopDesc::load_heap_oop(p); + if (!oopDesc::is_null(heap_oop)) { + oop obj = oopDesc::decode_heap_oop_not_null(heap_oop); + HeapRegion* hr = _g1h->heap_region_containing(obj); + assert(!hr->isHumongous(), "code root in humongous region?"); + hr->remove_strong_code_root(_nm); + assert(!hr->rem_set()->strong_code_roots_list_contains(_nm), "remove failed?"); + } + } + +public: + UnregisterNMethodOopClosure(G1CollectedHeap* g1h, nmethod* nm) : + _g1h(g1h), _nm(nm) {} + + void do_oop(oop* p) { do_oop_work(p); } + void do_oop(narrowOop* p) { do_oop_work(p); } +}; + +void G1CollectedHeap::register_nmethod(nmethod* nm) { + CollectedHeap::register_nmethod(nm); + + guarantee(nm != NULL, "sanity"); + RegisterNMethodOopClosure reg_cl(this, nm); + nm->oops_do(®_cl); +} + +void G1CollectedHeap::unregister_nmethod(nmethod* nm) { + CollectedHeap::unregister_nmethod(nm); + + guarantee(nm != NULL, "sanity"); + UnregisterNMethodOopClosure reg_cl(this, nm); + nm->oops_do(®_cl, true); +} + +class MigrateCodeRootsHeapRegionClosure: public HeapRegionClosure { +public: + bool doHeapRegion(HeapRegion *hr) { + assert(!hr->isHumongous(), "humongous region in collection set?"); + hr->migrate_strong_code_roots(); + return false; + } +}; + +void G1CollectedHeap::migrate_strong_code_roots() { + MigrateCodeRootsHeapRegionClosure cl; + double migrate_start = os::elapsedTime(); + collection_set_iterate(&cl); + double migration_time_ms = (os::elapsedTime() - migrate_start) * 1000.0; + g1_policy()->phase_times()->record_strong_code_root_migration_time(migration_time_ms); +} + +// Mark all the code roots that point into regions *not* in the +// collection set. +// +// Note we do not want to use a "marking" CodeBlobToOopClosure while +// walking the the code roots lists of regions not in the collection +// set. Suppose we have an nmethod (M) that points to objects in two +// separate regions - one in the collection set (R1) and one not (R2). +// Using a "marking" CodeBlobToOopClosure here would result in "marking" +// nmethod M when walking the code roots for R1. When we come to scan +// the code roots for R2, we would see that M is already marked and it +// would be skipped and the objects in R2 that are referenced from M +// would not be evacuated. + +class MarkStrongCodeRootCodeBlobClosure: public CodeBlobClosure { + + class MarkStrongCodeRootOopClosure: public OopClosure { + ConcurrentMark* _cm; + HeapRegion* _hr; + uint _worker_id; + + template void do_oop_work(T* p) { + T heap_oop = oopDesc::load_heap_oop(p); + if (!oopDesc::is_null(heap_oop)) { + oop obj = oopDesc::decode_heap_oop_not_null(heap_oop); + // Only mark objects in the region (which is assumed + // to be not in the collection set). + if (_hr->is_in(obj)) { + _cm->grayRoot(obj, (size_t) obj->size(), _worker_id); + } + } + } + + public: + MarkStrongCodeRootOopClosure(ConcurrentMark* cm, HeapRegion* hr, uint worker_id) : + _cm(cm), _hr(hr), _worker_id(worker_id) { + assert(!_hr->in_collection_set(), "sanity"); + } + + void do_oop(narrowOop* p) { do_oop_work(p); } + void do_oop(oop* p) { do_oop_work(p); } + }; + + MarkStrongCodeRootOopClosure _oop_cl; + +public: + MarkStrongCodeRootCodeBlobClosure(ConcurrentMark* cm, HeapRegion* hr, uint worker_id): + _oop_cl(cm, hr, worker_id) {} + + void do_code_blob(CodeBlob* cb) { + nmethod* nm = (cb == NULL) ? NULL : cb->as_nmethod_or_null(); + if (nm != NULL) { + nm->oops_do(&_oop_cl); + } + } +}; + +class MarkStrongCodeRootsHRClosure: public HeapRegionClosure { + G1CollectedHeap* _g1h; + uint _worker_id; + +public: + MarkStrongCodeRootsHRClosure(G1CollectedHeap* g1h, uint worker_id) : + _g1h(g1h), _worker_id(worker_id) {} + + bool doHeapRegion(HeapRegion *hr) { + HeapRegionRemSet* hrrs = hr->rem_set(); + if (hr->isHumongous()) { + // Code roots should never be attached to a humongous region + assert(hrrs->strong_code_roots_list_length() == 0, "sanity"); + return false; + } + + if (hr->in_collection_set()) { + // Don't mark code roots into regions in the collection set here. + // They will be marked when we scan them. + return false; + } + + MarkStrongCodeRootCodeBlobClosure cb_cl(_g1h->concurrent_mark(), hr, _worker_id); + hr->strong_code_roots_do(&cb_cl); + return false; + } +}; + +void G1CollectedHeap::mark_strong_code_roots(uint worker_id) { + MarkStrongCodeRootsHRClosure cl(this, worker_id); + if (G1CollectedHeap::use_parallel_gc_threads()) { + heap_region_par_iterate_chunked(&cl, + worker_id, + workers()->active_workers(), + HeapRegion::ParMarkRootClaimValue); + } else { + heap_region_iterate(&cl); + } +} + +class RebuildStrongCodeRootClosure: public CodeBlobClosure { + G1CollectedHeap* _g1h; + +public: + RebuildStrongCodeRootClosure(G1CollectedHeap* g1h) : + _g1h(g1h) {} + + void do_code_blob(CodeBlob* cb) { + nmethod* nm = (cb != NULL) ? cb->as_nmethod_or_null() : NULL; + if (nm == NULL) { + return; + } + + if (ScavengeRootsInCode && nm->detect_scavenge_root_oops()) { + _g1h->register_nmethod(nm); + } + } +}; + +void G1CollectedHeap::rebuild_strong_code_roots() { + RebuildStrongCodeRootClosure blob_cl(this); + CodeCache::blobs_do(&blob_cl); +} diff -r bdd155477289 -r e2722a66aba7 src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp Thu Sep 05 11:04:39 2013 -0700 @@ -46,6 +46,7 @@ // may combine concurrent marking with parallel, incremental compaction of // heap subsets that will yield large amounts of garbage. +// Forward declarations class HeapRegion; class HRRSCleanupTask; class GenerationSpec; @@ -69,6 +70,7 @@ class G1NewTracer; class G1OldTracer; class EvacuationFailedInfo; +class nmethod; typedef OverflowTaskQueue RefToScanQueue; typedef GenericTaskQueueSet RefToScanQueueSet; @@ -163,18 +165,6 @@ : G1AllocRegion("Mutator Alloc Region", false /* bot_updates */) { } }; -// The G1 STW is alive closure. -// An instance is embedded into the G1CH and used as the -// (optional) _is_alive_non_header closure in the STW -// reference processor. It is also extensively used during -// reference processing during STW evacuation pauses. -class G1STWIsAliveClosure: public BoolObjectClosure { - G1CollectedHeap* _g1; -public: - G1STWIsAliveClosure(G1CollectedHeap* g1) : _g1(g1) {} - bool do_object_b(oop p); -}; - class SurvivorGCAllocRegion : public G1AllocRegion { protected: virtual HeapRegion* allocate_new_region(size_t word_size, bool force); @@ -193,6 +183,18 @@ : G1AllocRegion("Old GC Alloc Region", true /* bot_updates */) { } }; +// The G1 STW is alive closure. +// An instance is embedded into the G1CH and used as the +// (optional) _is_alive_non_header closure in the STW +// reference processor. It is also extensively used during +// reference processing during STW evacuation pauses. +class G1STWIsAliveClosure: public BoolObjectClosure { + G1CollectedHeap* _g1; +public: + G1STWIsAliveClosure(G1CollectedHeap* g1) : _g1(g1) {} + bool do_object_b(oop p); +}; + class RefineCardTableEntryClosure; class G1CollectedHeap : public SharedHeap { @@ -774,9 +776,10 @@ // it has to be read while holding the Heap_lock. Currently, both // methods that call do_collection_pause() release the Heap_lock // before the call, so it's easy to read gc_count_before just before. - HeapWord* do_collection_pause(size_t word_size, - unsigned int gc_count_before, - bool* succeeded); + HeapWord* do_collection_pause(size_t word_size, + unsigned int gc_count_before, + bool* succeeded, + GCCause::Cause gc_cause); // The guts of the incremental collection pause, executed by the vm // thread. It returns false if it is unable to do the collection due @@ -1549,42 +1552,6 @@ virtual jlong millis_since_last_gc(); - // Perform any cleanup actions necessary before allowing a verification. - virtual void prepare_for_verify(); - - // Perform verification. - - // vo == UsePrevMarking -> use "prev" marking information, - // vo == UseNextMarking -> use "next" marking information - // vo == UseMarkWord -> use the mark word in the object header - // - // NOTE: Only the "prev" marking information is guaranteed to be - // consistent most of the time, so most calls to this should use - // vo == UsePrevMarking. - // Currently, there is only one case where this is called with - // vo == UseNextMarking, which is to verify the "next" marking - // information at the end of remark. - // Currently there is only one place where this is called with - // vo == UseMarkWord, which is to verify the marking during a - // full GC. - void verify(bool silent, VerifyOption vo); - - // Override; it uses the "prev" marking information - virtual void verify(bool silent); - - virtual void print_on(outputStream* st) const; - virtual void print_extended_on(outputStream* st) const; - virtual void print_on_error(outputStream* st) const; - - virtual void print_gc_threads_on(outputStream* st) const; - virtual void gc_threads_do(ThreadClosure* tc) const; - - // Override - void print_tracing_info() const; - - // The following two methods are helpful for debugging RSet issues. - void print_cset_rsets() PRODUCT_RETURN; - void print_all_rsets() PRODUCT_RETURN; // Convenience function to be used in situations where the heap type can be // asserted to be this type. @@ -1661,13 +1628,86 @@ else return is_obj_ill(obj, hr); } + bool allocated_since_marking(oop obj, HeapRegion* hr, VerifyOption vo); + HeapWord* top_at_mark_start(HeapRegion* hr, VerifyOption vo); + bool is_marked(oop obj, VerifyOption vo); + const char* top_at_mark_start_str(VerifyOption vo); + + ConcurrentMark* concurrent_mark() const { return _cm; } + + // Refinement + + ConcurrentG1Refine* concurrent_g1_refine() const { return _cg1r; } + + // The dirty cards region list is used to record a subset of regions + // whose cards need clearing. The list if populated during the + // remembered set scanning and drained during the card table + // cleanup. Although the methods are reentrant, population/draining + // phases must not overlap. For synchronization purposes the last + // element on the list points to itself. + HeapRegion* _dirty_cards_region_list; + void push_dirty_cards_region(HeapRegion* hr); + HeapRegion* pop_dirty_cards_region(); + + // Optimized nmethod scanning support routines + + // Register the given nmethod with the G1 heap + virtual void register_nmethod(nmethod* nm); + + // Unregister the given nmethod from the G1 heap + virtual void unregister_nmethod(nmethod* nm); + + // Migrate the nmethods in the code root lists of the regions + // in the collection set to regions in to-space. In the event + // of an evacuation failure, nmethods that reference objects + // that were not successfullly evacuated are not migrated. + void migrate_strong_code_roots(); + + // During an initial mark pause, mark all the code roots that + // point into regions *not* in the collection set. + void mark_strong_code_roots(uint worker_id); + + // Rebuild the stong code root lists for each region + // after a full GC + void rebuild_strong_code_roots(); + + // Verification + + // The following is just to alert the verification code + // that a full collection has occurred and that the + // remembered sets are no longer up to date. + bool _full_collection; + void set_full_collection() { _full_collection = true;} + void clear_full_collection() {_full_collection = false;} + bool full_collection() {return _full_collection;} + + // Perform any cleanup actions necessary before allowing a verification. + virtual void prepare_for_verify(); + + // Perform verification. + + // vo == UsePrevMarking -> use "prev" marking information, + // vo == UseNextMarking -> use "next" marking information + // vo == UseMarkWord -> use the mark word in the object header + // + // NOTE: Only the "prev" marking information is guaranteed to be + // consistent most of the time, so most calls to this should use + // vo == UsePrevMarking. + // Currently, there is only one case where this is called with + // vo == UseNextMarking, which is to verify the "next" marking + // information at the end of remark. + // Currently there is only one place where this is called with + // vo == UseMarkWord, which is to verify the marking during a + // full GC. + void verify(bool silent, VerifyOption vo); + + // Override; it uses the "prev" marking information + virtual void verify(bool silent); + // The methods below are here for convenience and dispatch the // appropriate method depending on value of the given VerifyOption - // parameter. The options for that parameter are: - // - // vo == UsePrevMarking -> use "prev" marking information, - // vo == UseNextMarking -> use "next" marking information, - // vo == UseMarkWord -> use mark word from object header + // parameter. The values for that parameter, and their meanings, + // are the same as those above. bool is_obj_dead_cond(const oop obj, const HeapRegion* hr, @@ -1692,31 +1732,21 @@ return false; // keep some compilers happy } - bool allocated_since_marking(oop obj, HeapRegion* hr, VerifyOption vo); - HeapWord* top_at_mark_start(HeapRegion* hr, VerifyOption vo); - bool is_marked(oop obj, VerifyOption vo); - const char* top_at_mark_start_str(VerifyOption vo); + // Printing - // The following is just to alert the verification code - // that a full collection has occurred and that the - // remembered sets are no longer up to date. - bool _full_collection; - void set_full_collection() { _full_collection = true;} - void clear_full_collection() {_full_collection = false;} - bool full_collection() {return _full_collection;} + virtual void print_on(outputStream* st) const; + virtual void print_extended_on(outputStream* st) const; + virtual void print_on_error(outputStream* st) const; - ConcurrentMark* concurrent_mark() const { return _cm; } - ConcurrentG1Refine* concurrent_g1_refine() const { return _cg1r; } + virtual void print_gc_threads_on(outputStream* st) const; + virtual void gc_threads_do(ThreadClosure* tc) const; - // The dirty cards region list is used to record a subset of regions - // whose cards need clearing. The list if populated during the - // remembered set scanning and drained during the card table - // cleanup. Although the methods are reentrant, population/draining - // phases must not overlap. For synchronization purposes the last - // element on the list points to itself. - HeapRegion* _dirty_cards_region_list; - void push_dirty_cards_region(HeapRegion* hr); - HeapRegion* pop_dirty_cards_region(); + // Override + void print_tracing_info() const; + + // The following two methods are helpful for debugging RSet issues. + void print_cset_rsets() PRODUCT_RETURN; + void print_all_rsets() PRODUCT_RETURN; public: void stop_conc_gc_threads(); diff -r bdd155477289 -r e2722a66aba7 src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp --- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -313,7 +313,8 @@ void G1CollectorPolicy::initialize_flags() { set_min_alignment(HeapRegion::GrainBytes); size_t card_table_alignment = GenRemSet::max_alignment_constraint(rem_set_name()); - set_max_alignment(MAX2(card_table_alignment, min_alignment())); + size_t page_size = UseLargePages ? os::large_page_size() : os::vm_page_size(); + set_max_alignment(MAX3(card_table_alignment, min_alignment(), page_size)); if (SurvivorRatio < 1) { vm_exit_during_initialization("Invalid survivor ratio specified"); } diff -r bdd155477289 -r e2722a66aba7 src/share/vm/gc_implementation/g1/g1GCPhaseTimes.cpp --- a/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -161,6 +161,8 @@ _last_update_rs_times_ms(_max_gc_threads, "%.1lf"), _last_update_rs_processed_buffers(_max_gc_threads, "%d"), _last_scan_rs_times_ms(_max_gc_threads, "%.1lf"), + _last_strong_code_root_scan_times_ms(_max_gc_threads, "%.1lf"), + _last_strong_code_root_mark_times_ms(_max_gc_threads, "%.1lf"), _last_obj_copy_times_ms(_max_gc_threads, "%.1lf"), _last_termination_times_ms(_max_gc_threads, "%.1lf"), _last_termination_attempts(_max_gc_threads, SIZE_FORMAT), @@ -182,6 +184,8 @@ _last_update_rs_times_ms.reset(); _last_update_rs_processed_buffers.reset(); _last_scan_rs_times_ms.reset(); + _last_strong_code_root_scan_times_ms.reset(); + _last_strong_code_root_mark_times_ms.reset(); _last_obj_copy_times_ms.reset(); _last_termination_times_ms.reset(); _last_termination_attempts.reset(); @@ -197,6 +201,8 @@ _last_update_rs_times_ms.verify(); _last_update_rs_processed_buffers.verify(); _last_scan_rs_times_ms.verify(); + _last_strong_code_root_scan_times_ms.verify(); + _last_strong_code_root_mark_times_ms.verify(); _last_obj_copy_times_ms.verify(); _last_termination_times_ms.verify(); _last_termination_attempts.verify(); @@ -210,6 +216,8 @@ _last_satb_filtering_times_ms.get(i) + _last_update_rs_times_ms.get(i) + _last_scan_rs_times_ms.get(i) + + _last_strong_code_root_scan_times_ms.get(i) + + _last_strong_code_root_mark_times_ms.get(i) + _last_obj_copy_times_ms.get(i) + _last_termination_times_ms.get(i); @@ -239,6 +247,9 @@ // Now subtract the time taken to fix up roots in generated code misc_time_ms += _cur_collection_code_root_fixup_time_ms; + // Strong code root migration time + misc_time_ms += _cur_strong_code_root_migration_time_ms; + // Subtract the time taken to clean the card table from the // current value of "other time" misc_time_ms += _cur_clear_ct_time_ms; @@ -257,9 +268,13 @@ if (_last_satb_filtering_times_ms.sum() > 0.0) { _last_satb_filtering_times_ms.print(2, "SATB Filtering (ms)"); } + if (_last_strong_code_root_mark_times_ms.sum() > 0.0) { + _last_strong_code_root_mark_times_ms.print(2, "Code Root Marking (ms)"); + } _last_update_rs_times_ms.print(2, "Update RS (ms)"); _last_update_rs_processed_buffers.print(3, "Processed Buffers"); _last_scan_rs_times_ms.print(2, "Scan RS (ms)"); + _last_strong_code_root_scan_times_ms.print(2, "Code Root Scanning (ms)"); _last_obj_copy_times_ms.print(2, "Object Copy (ms)"); _last_termination_times_ms.print(2, "Termination (ms)"); if (G1Log::finest()) { @@ -273,12 +288,17 @@ if (_last_satb_filtering_times_ms.sum() > 0.0) { _last_satb_filtering_times_ms.print(1, "SATB Filtering (ms)"); } + if (_last_strong_code_root_mark_times_ms.sum() > 0.0) { + _last_strong_code_root_mark_times_ms.print(1, "Code Root Marking (ms)"); + } _last_update_rs_times_ms.print(1, "Update RS (ms)"); _last_update_rs_processed_buffers.print(2, "Processed Buffers"); _last_scan_rs_times_ms.print(1, "Scan RS (ms)"); + _last_strong_code_root_scan_times_ms.print(1, "Code Root Scanning (ms)"); _last_obj_copy_times_ms.print(1, "Object Copy (ms)"); } print_stats(1, "Code Root Fixup", _cur_collection_code_root_fixup_time_ms); + print_stats(1, "Code Root Migration", _cur_strong_code_root_migration_time_ms); print_stats(1, "Clear CT", _cur_clear_ct_time_ms); double misc_time_ms = pause_time_sec * MILLIUNITS - accounted_time_ms(); print_stats(1, "Other", misc_time_ms); diff -r bdd155477289 -r e2722a66aba7 src/share/vm/gc_implementation/g1/g1GCPhaseTimes.hpp --- a/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.hpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.hpp Thu Sep 05 11:04:39 2013 -0700 @@ -119,6 +119,8 @@ WorkerDataArray _last_update_rs_times_ms; WorkerDataArray _last_update_rs_processed_buffers; WorkerDataArray _last_scan_rs_times_ms; + WorkerDataArray _last_strong_code_root_scan_times_ms; + WorkerDataArray _last_strong_code_root_mark_times_ms; WorkerDataArray _last_obj_copy_times_ms; WorkerDataArray _last_termination_times_ms; WorkerDataArray _last_termination_attempts; @@ -128,6 +130,7 @@ double _cur_collection_par_time_ms; double _cur_collection_code_root_fixup_time_ms; + double _cur_strong_code_root_migration_time_ms; double _cur_clear_ct_time_ms; double _cur_ref_proc_time_ms; @@ -179,6 +182,14 @@ _last_scan_rs_times_ms.set(worker_i, ms); } + void record_strong_code_root_scan_time(uint worker_i, double ms) { + _last_strong_code_root_scan_times_ms.set(worker_i, ms); + } + + void record_strong_code_root_mark_time(uint worker_i, double ms) { + _last_strong_code_root_mark_times_ms.set(worker_i, ms); + } + void record_obj_copy_time(uint worker_i, double ms) { _last_obj_copy_times_ms.set(worker_i, ms); } @@ -208,6 +219,10 @@ _cur_collection_code_root_fixup_time_ms = ms; } + void record_strong_code_root_migration_time(double ms) { + _cur_strong_code_root_migration_time_ms = ms; + } + void record_ref_proc_time(double ms) { _cur_ref_proc_time_ms = ms; } @@ -294,6 +309,14 @@ return _last_scan_rs_times_ms.average(); } + double average_last_strong_code_root_scan_time(){ + return _last_strong_code_root_scan_times_ms.average(); + } + + double average_last_strong_code_root_mark_time(){ + return _last_strong_code_root_mark_times_ms.average(); + } + double average_last_obj_copy_time() { return _last_obj_copy_times_ms.average(); } diff -r bdd155477289 -r e2722a66aba7 src/share/vm/gc_implementation/g1/g1MonitoringSupport.cpp --- a/src/share/vm/gc_implementation/g1/g1MonitoringSupport.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/gc_implementation/g1/g1MonitoringSupport.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -262,6 +262,7 @@ old_collection_counters()->update_all(); young_collection_counters()->update_all(); MetaspaceCounters::update_performance_counters(); + CompressedClassSpaceCounters::update_performance_counters(); } } diff -r bdd155477289 -r e2722a66aba7 src/share/vm/gc_implementation/g1/g1RemSet.cpp --- a/src/share/vm/gc_implementation/g1/g1RemSet.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/gc_implementation/g1/g1RemSet.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -104,15 +104,25 @@ class ScanRSClosure : public HeapRegionClosure { size_t _cards_done, _cards; G1CollectedHeap* _g1h; + OopsInHeapRegionClosure* _oc; + CodeBlobToOopClosure* _code_root_cl; + G1BlockOffsetSharedArray* _bot_shared; CardTableModRefBS *_ct_bs; - int _worker_i; - int _block_size; - bool _try_claimed; + + double _strong_code_root_scan_time_sec; + int _worker_i; + int _block_size; + bool _try_claimed; + public: - ScanRSClosure(OopsInHeapRegionClosure* oc, int worker_i) : + ScanRSClosure(OopsInHeapRegionClosure* oc, + CodeBlobToOopClosure* code_root_cl, + int worker_i) : _oc(oc), + _code_root_cl(code_root_cl), + _strong_code_root_scan_time_sec(0.0), _cards(0), _cards_done(0), _worker_i(worker_i), @@ -160,6 +170,12 @@ card_start, card_start + G1BlockOffsetSharedArray::N_words); } + void scan_strong_code_roots(HeapRegion* r) { + double scan_start = os::elapsedTime(); + r->strong_code_roots_do(_code_root_cl); + _strong_code_root_scan_time_sec += (os::elapsedTime() - scan_start); + } + bool doHeapRegion(HeapRegion* r) { assert(r->in_collection_set(), "should only be called on elements of CS."); HeapRegionRemSet* hrrs = r->rem_set(); @@ -173,6 +189,7 @@ // _try_claimed || r->claim_iter() // is true: either we're supposed to work on claimed-but-not-complete // regions, or we successfully claimed the region. + HeapRegionRemSetIterator iter(hrrs); size_t card_index; @@ -205,30 +222,43 @@ } } if (!_try_claimed) { + // Scan the strong code root list attached to the current region + scan_strong_code_roots(r); + hrrs->set_iter_complete(); } return false; } + + double strong_code_root_scan_time_sec() { + return _strong_code_root_scan_time_sec; + } + size_t cards_done() { return _cards_done;} size_t cards_looked_up() { return _cards;} }; -void G1RemSet::scanRS(OopsInHeapRegionClosure* oc, int worker_i) { +void G1RemSet::scanRS(OopsInHeapRegionClosure* oc, + CodeBlobToOopClosure* code_root_cl, + int worker_i) { double rs_time_start = os::elapsedTime(); HeapRegion *startRegion = _g1->start_cset_region_for_worker(worker_i); - ScanRSClosure scanRScl(oc, worker_i); + ScanRSClosure scanRScl(oc, code_root_cl, worker_i); _g1->collection_set_iterate_from(startRegion, &scanRScl); scanRScl.set_try_claimed(); _g1->collection_set_iterate_from(startRegion, &scanRScl); - double scan_rs_time_sec = os::elapsedTime() - rs_time_start; + double scan_rs_time_sec = (os::elapsedTime() - rs_time_start) + - scanRScl.strong_code_root_scan_time_sec(); - assert( _cards_scanned != NULL, "invariant" ); + assert(_cards_scanned != NULL, "invariant"); _cards_scanned[worker_i] = scanRScl.cards_done(); _g1p->phase_times()->record_scan_rs_time(worker_i, scan_rs_time_sec * 1000.0); + _g1p->phase_times()->record_strong_code_root_scan_time(worker_i, + scanRScl.strong_code_root_scan_time_sec() * 1000.0); } // Closure used for updating RSets and recording references that @@ -288,7 +318,8 @@ } void G1RemSet::oops_into_collection_set_do(OopsInHeapRegionClosure* oc, - int worker_i) { + CodeBlobToOopClosure* code_root_cl, + int worker_i) { #if CARD_REPEAT_HISTO ct_freq_update_histo_and_reset(); #endif @@ -328,7 +359,7 @@ _g1p->phase_times()->record_update_rs_time(worker_i, 0.0); } if (G1UseParallelRSetScanning || (worker_i == 0)) { - scanRS(oc, worker_i); + scanRS(oc, code_root_cl, worker_i); } else { _g1p->phase_times()->record_scan_rs_time(worker_i, 0.0); } diff -r bdd155477289 -r e2722a66aba7 src/share/vm/gc_implementation/g1/g1RemSet.hpp --- a/src/share/vm/gc_implementation/g1/g1RemSet.hpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/gc_implementation/g1/g1RemSet.hpp Thu Sep 05 11:04:39 2013 -0700 @@ -81,14 +81,23 @@ G1RemSet(G1CollectedHeap* g1, CardTableModRefBS* ct_bs); ~G1RemSet(); - // Invoke "blk->do_oop" on all pointers into the CS in objects in regions - // outside the CS (having invoked "blk->set_region" to set the "from" - // region correctly beforehand.) The "worker_i" param is for the - // parallel case where the number of the worker thread calling this - // function can be helpful in partitioning the work to be done. It - // should be the same as the "i" passed to the calling thread's - // work(i) function. In the sequential case this param will be ingored. - void oops_into_collection_set_do(OopsInHeapRegionClosure* blk, int worker_i); + // Invoke "blk->do_oop" on all pointers into the collection set + // from objects in regions outside the collection set (having + // invoked "blk->set_region" to set the "from" region correctly + // beforehand.) + // + // Invoke code_root_cl->do_code_blob on the unmarked nmethods + // on the strong code roots list for each region in the + // collection set. + // + // The "worker_i" param is for the parallel case where the id + // of the worker thread calling this function can be helpful in + // partitioning the work to be done. It should be the same as + // the "i" passed to the calling thread's work(i) function. + // In the sequential case this param will be ignored. + void oops_into_collection_set_do(OopsInHeapRegionClosure* blk, + CodeBlobToOopClosure* code_root_cl, + int worker_i); // Prepare for and cleanup after an oops_into_collection_set_do // call. Must call each of these once before and after (in sequential @@ -98,7 +107,10 @@ void prepare_for_oops_into_collection_set_do(); void cleanup_after_oops_into_collection_set_do(); - void scanRS(OopsInHeapRegionClosure* oc, int worker_i); + void scanRS(OopsInHeapRegionClosure* oc, + CodeBlobToOopClosure* code_root_cl, + int worker_i); + void updateRS(DirtyCardQueue* into_cset_dcq, int worker_i); CardTableModRefBS* ct_bs() { return _ct_bs; } diff -r bdd155477289 -r e2722a66aba7 src/share/vm/gc_implementation/g1/g1RemSetSummary.cpp --- a/src/share/vm/gc_implementation/g1/g1RemSetSummary.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/gc_implementation/g1/g1RemSetSummary.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -127,32 +127,55 @@ class HRRSStatsIter: public HeapRegionClosure { size_t _occupied; - size_t _total_mem_sz; - size_t _max_mem_sz; - HeapRegion* _max_mem_sz_region; + + size_t _total_rs_mem_sz; + size_t _max_rs_mem_sz; + HeapRegion* _max_rs_mem_sz_region; + + size_t _total_code_root_mem_sz; + size_t _max_code_root_mem_sz; + HeapRegion* _max_code_root_mem_sz_region; public: HRRSStatsIter() : _occupied(0), - _total_mem_sz(0), - _max_mem_sz(0), - _max_mem_sz_region(NULL) + _total_rs_mem_sz(0), + _max_rs_mem_sz(0), + _max_rs_mem_sz_region(NULL), + _total_code_root_mem_sz(0), + _max_code_root_mem_sz(0), + _max_code_root_mem_sz_region(NULL) {} bool doHeapRegion(HeapRegion* r) { - size_t mem_sz = r->rem_set()->mem_size(); - if (mem_sz > _max_mem_sz) { - _max_mem_sz = mem_sz; - _max_mem_sz_region = r; + HeapRegionRemSet* hrrs = r->rem_set(); + + // HeapRegionRemSet::mem_size() includes the + // size of the strong code roots + size_t rs_mem_sz = hrrs->mem_size(); + if (rs_mem_sz > _max_rs_mem_sz) { + _max_rs_mem_sz = rs_mem_sz; + _max_rs_mem_sz_region = r; } - _total_mem_sz += mem_sz; - size_t occ = r->rem_set()->occupied(); + _total_rs_mem_sz += rs_mem_sz; + + size_t code_root_mem_sz = hrrs->strong_code_roots_mem_size(); + if (code_root_mem_sz > _max_code_root_mem_sz) { + _max_code_root_mem_sz = code_root_mem_sz; + _max_code_root_mem_sz_region = r; + } + _total_code_root_mem_sz += code_root_mem_sz; + + size_t occ = hrrs->occupied(); _occupied += occ; return false; } - size_t total_mem_sz() { return _total_mem_sz; } - size_t max_mem_sz() { return _max_mem_sz; } + size_t total_rs_mem_sz() { return _total_rs_mem_sz; } + size_t max_rs_mem_sz() { return _max_rs_mem_sz; } + HeapRegion* max_rs_mem_sz_region() { return _max_rs_mem_sz_region; } + size_t total_code_root_mem_sz() { return _total_code_root_mem_sz; } + size_t max_code_root_mem_sz() { return _max_code_root_mem_sz; } + HeapRegion* max_code_root_mem_sz_region() { return _max_code_root_mem_sz_region; } size_t occupied() { return _occupied; } - HeapRegion* max_mem_sz_region() { return _max_mem_sz_region; } }; double calc_percentage(size_t numerator, size_t denominator) { @@ -184,22 +207,33 @@ HRRSStatsIter blk; G1CollectedHeap::heap()->heap_region_iterate(&blk); + // RemSet stats out->print_cr(" Total heap region rem set sizes = "SIZE_FORMAT"K." " Max = "SIZE_FORMAT"K.", - blk.total_mem_sz()/K, blk.max_mem_sz()/K); + blk.total_rs_mem_sz()/K, blk.max_rs_mem_sz()/K); out->print_cr(" Static structures = "SIZE_FORMAT"K," " free_lists = "SIZE_FORMAT"K.", HeapRegionRemSet::static_mem_size() / K, HeapRegionRemSet::fl_mem_size() / K); out->print_cr(" "SIZE_FORMAT" occupied cards represented.", blk.occupied()); - HeapRegion* max_mem_sz_region = blk.max_mem_sz_region(); - HeapRegionRemSet* rem_set = max_mem_sz_region->rem_set(); + HeapRegion* max_rs_mem_sz_region = blk.max_rs_mem_sz_region(); + HeapRegionRemSet* max_rs_rem_set = max_rs_mem_sz_region->rem_set(); out->print_cr(" Max size region = "HR_FORMAT", " "size = "SIZE_FORMAT "K, occupied = "SIZE_FORMAT"K.", - HR_FORMAT_PARAMS(max_mem_sz_region), - (rem_set->mem_size() + K - 1)/K, - (rem_set->occupied() + K - 1)/K); - + HR_FORMAT_PARAMS(max_rs_mem_sz_region), + (max_rs_rem_set->mem_size() + K - 1)/K, + (max_rs_rem_set->occupied() + K - 1)/K); out->print_cr(" Did %d coarsenings.", num_coarsenings()); + // Strong code root stats + out->print_cr(" Total heap region code-root set sizes = "SIZE_FORMAT"K." + " Max = "SIZE_FORMAT"K.", + blk.total_code_root_mem_sz()/K, blk.max_code_root_mem_sz()/K); + HeapRegion* max_code_root_mem_sz_region = blk.max_code_root_mem_sz_region(); + HeapRegionRemSet* max_code_root_rem_set = max_code_root_mem_sz_region->rem_set(); + out->print_cr(" Max size region = "HR_FORMAT", " + "size = "SIZE_FORMAT "K, num_elems = "SIZE_FORMAT".", + HR_FORMAT_PARAMS(max_code_root_mem_sz_region), + (max_code_root_rem_set->strong_code_roots_mem_size() + K - 1)/K, + (max_code_root_rem_set->strong_code_roots_list_length())); } diff -r bdd155477289 -r e2722a66aba7 src/share/vm/gc_implementation/g1/g1_globals.hpp --- a/src/share/vm/gc_implementation/g1/g1_globals.hpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/gc_implementation/g1/g1_globals.hpp Thu Sep 05 11:04:39 2013 -0700 @@ -319,7 +319,10 @@ \ diagnostic(bool, G1VerifyRSetsDuringFullGC, false, \ "If true, perform verification of each heap region's " \ - "remembered set when verifying the heap during a full GC.") + "remembered set when verifying the heap during a full GC.") \ + \ + diagnostic(bool, G1VerifyHeapRegionCodeRoots, false, \ + "Verify the code root lists attached to each heap region.") G1_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_EXPERIMENTAL_FLAG, DECLARE_NOTPRODUCT_FLAG, DECLARE_MANAGEABLE_FLAG, DECLARE_PRODUCT_RW_FLAG) diff -r bdd155477289 -r e2722a66aba7 src/share/vm/gc_implementation/g1/heapRegion.cpp --- a/src/share/vm/gc_implementation/g1/heapRegion.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/gc_implementation/g1/heapRegion.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -23,6 +23,7 @@ */ #include "precompiled.hpp" +#include "code/nmethod.hpp" #include "gc_implementation/g1/g1BlockOffsetTable.inline.hpp" #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" #include "gc_implementation/g1/g1OopClosures.inline.hpp" @@ -50,144 +51,6 @@ OopClosure* oc) : _r_bottom(r->bottom()), _r_end(r->end()), _oc(oc) { } -class VerifyLiveClosure: public OopClosure { -private: - G1CollectedHeap* _g1h; - CardTableModRefBS* _bs; - oop _containing_obj; - bool _failures; - int _n_failures; - VerifyOption _vo; -public: - // _vo == UsePrevMarking -> use "prev" marking information, - // _vo == UseNextMarking -> use "next" marking information, - // _vo == UseMarkWord -> use mark word from object header. - VerifyLiveClosure(G1CollectedHeap* g1h, VerifyOption vo) : - _g1h(g1h), _bs(NULL), _containing_obj(NULL), - _failures(false), _n_failures(0), _vo(vo) - { - BarrierSet* bs = _g1h->barrier_set(); - if (bs->is_a(BarrierSet::CardTableModRef)) - _bs = (CardTableModRefBS*)bs; - } - - void set_containing_obj(oop obj) { - _containing_obj = obj; - } - - bool failures() { return _failures; } - int n_failures() { return _n_failures; } - - virtual void do_oop(narrowOop* p) { do_oop_work(p); } - virtual void do_oop( oop* p) { do_oop_work(p); } - - void print_object(outputStream* out, oop obj) { -#ifdef PRODUCT - Klass* k = obj->klass(); - const char* class_name = InstanceKlass::cast(k)->external_name(); - out->print_cr("class name %s", class_name); -#else // PRODUCT - obj->print_on(out); -#endif // PRODUCT - } - - template - void do_oop_work(T* p) { - assert(_containing_obj != NULL, "Precondition"); - assert(!_g1h->is_obj_dead_cond(_containing_obj, _vo), - "Precondition"); - T heap_oop = oopDesc::load_heap_oop(p); - if (!oopDesc::is_null(heap_oop)) { - oop obj = oopDesc::decode_heap_oop_not_null(heap_oop); - bool failed = false; - if (!_g1h->is_in_closed_subset(obj) || _g1h->is_obj_dead_cond(obj, _vo)) { - MutexLockerEx x(ParGCRareEvent_lock, - Mutex::_no_safepoint_check_flag); - - if (!_failures) { - gclog_or_tty->print_cr(""); - gclog_or_tty->print_cr("----------"); - } - if (!_g1h->is_in_closed_subset(obj)) { - HeapRegion* from = _g1h->heap_region_containing((HeapWord*)p); - gclog_or_tty->print_cr("Field "PTR_FORMAT - " of live obj "PTR_FORMAT" in region " - "["PTR_FORMAT", "PTR_FORMAT")", - p, (void*) _containing_obj, - from->bottom(), from->end()); - print_object(gclog_or_tty, _containing_obj); - gclog_or_tty->print_cr("points to obj "PTR_FORMAT" not in the heap", - (void*) obj); - } else { - HeapRegion* from = _g1h->heap_region_containing((HeapWord*)p); - HeapRegion* to = _g1h->heap_region_containing((HeapWord*)obj); - gclog_or_tty->print_cr("Field "PTR_FORMAT - " of live obj "PTR_FORMAT" in region " - "["PTR_FORMAT", "PTR_FORMAT")", - p, (void*) _containing_obj, - from->bottom(), from->end()); - print_object(gclog_or_tty, _containing_obj); - gclog_or_tty->print_cr("points to dead obj "PTR_FORMAT" in region " - "["PTR_FORMAT", "PTR_FORMAT")", - (void*) obj, to->bottom(), to->end()); - print_object(gclog_or_tty, obj); - } - gclog_or_tty->print_cr("----------"); - gclog_or_tty->flush(); - _failures = true; - failed = true; - _n_failures++; - } - - if (!_g1h->full_collection() || G1VerifyRSetsDuringFullGC) { - HeapRegion* from = _g1h->heap_region_containing((HeapWord*)p); - HeapRegion* to = _g1h->heap_region_containing(obj); - if (from != NULL && to != NULL && - from != to && - !to->isHumongous()) { - jbyte cv_obj = *_bs->byte_for_const(_containing_obj); - jbyte cv_field = *_bs->byte_for_const(p); - const jbyte dirty = CardTableModRefBS::dirty_card_val(); - - bool is_bad = !(from->is_young() - || to->rem_set()->contains_reference(p) - || !G1HRRSFlushLogBuffersOnVerify && // buffers were not flushed - (_containing_obj->is_objArray() ? - cv_field == dirty - : cv_obj == dirty || cv_field == dirty)); - if (is_bad) { - MutexLockerEx x(ParGCRareEvent_lock, - Mutex::_no_safepoint_check_flag); - - if (!_failures) { - gclog_or_tty->print_cr(""); - gclog_or_tty->print_cr("----------"); - } - gclog_or_tty->print_cr("Missing rem set entry:"); - gclog_or_tty->print_cr("Field "PTR_FORMAT" " - "of obj "PTR_FORMAT", " - "in region "HR_FORMAT, - p, (void*) _containing_obj, - HR_FORMAT_PARAMS(from)); - _containing_obj->print_on(gclog_or_tty); - gclog_or_tty->print_cr("points to obj "PTR_FORMAT" " - "in region "HR_FORMAT, - (void*) obj, - HR_FORMAT_PARAMS(to)); - obj->print_on(gclog_or_tty); - gclog_or_tty->print_cr("Obj head CTE = %d, field CTE = %d.", - cv_obj, cv_field); - gclog_or_tty->print_cr("----------"); - gclog_or_tty->flush(); - _failures = true; - if (!failed) _n_failures++; - } - } - } - } - } -}; - template HeapWord* walk_mem_region_loop(ClosureType* cl, G1CollectedHeap* g1h, HeapRegion* hr, @@ -368,7 +231,7 @@ if (!par) { // If this is parallel, this will be done later. HeapRegionRemSet* hrrs = rem_set(); - if (hrrs != NULL) hrrs->clear(); + hrrs->clear(); _claimed = InitialClaimValue; } zero_marked_bytes(); @@ -505,6 +368,7 @@ _rem_set(NULL), _recorded_rs_length(0), _predicted_elapsed_time_ms(0), _predicted_bytes_to_copy(0) { + _rem_set = new HeapRegionRemSet(sharedOffsetArray, this); _orig_end = mr.end(); // Note that initialize() will set the start of the unmarked area of the // region. @@ -512,8 +376,6 @@ set_top(bottom()); set_saved_mark(); - _rem_set = new HeapRegionRemSet(sharedOffsetArray, this); - assert(HeapRegionRemSet::num_par_rem_sets() > 0, "Invariant."); } @@ -733,6 +595,160 @@ return NULL; } +// Code roots support + +void HeapRegion::add_strong_code_root(nmethod* nm) { + HeapRegionRemSet* hrrs = rem_set(); + hrrs->add_strong_code_root(nm); +} + +void HeapRegion::remove_strong_code_root(nmethod* nm) { + HeapRegionRemSet* hrrs = rem_set(); + hrrs->remove_strong_code_root(nm); +} + +void HeapRegion::migrate_strong_code_roots() { + assert(in_collection_set(), "only collection set regions"); + assert(!isHumongous(), "not humongous regions"); + + HeapRegionRemSet* hrrs = rem_set(); + hrrs->migrate_strong_code_roots(); +} + +void HeapRegion::strong_code_roots_do(CodeBlobClosure* blk) const { + HeapRegionRemSet* hrrs = rem_set(); + hrrs->strong_code_roots_do(blk); +} + +class VerifyStrongCodeRootOopClosure: public OopClosure { + const HeapRegion* _hr; + nmethod* _nm; + bool _failures; + bool _has_oops_in_region; + + template void do_oop_work(T* p) { + T heap_oop = oopDesc::load_heap_oop(p); + if (!oopDesc::is_null(heap_oop)) { + oop obj = oopDesc::decode_heap_oop_not_null(heap_oop); + + // Note: not all the oops embedded in the nmethod are in the + // current region. We only look at those which are. + if (_hr->is_in(obj)) { + // Object is in the region. Check that its less than top + if (_hr->top() <= (HeapWord*)obj) { + // Object is above top + gclog_or_tty->print_cr("Object "PTR_FORMAT" in region " + "["PTR_FORMAT", "PTR_FORMAT") is above " + "top "PTR_FORMAT, + obj, _hr->bottom(), _hr->end(), _hr->top()); + _failures = true; + return; + } + // Nmethod has at least one oop in the current region + _has_oops_in_region = true; + } + } + } + +public: + VerifyStrongCodeRootOopClosure(const HeapRegion* hr, nmethod* nm): + _hr(hr), _failures(false), _has_oops_in_region(false) {} + + void do_oop(narrowOop* p) { do_oop_work(p); } + void do_oop(oop* p) { do_oop_work(p); } + + bool failures() { return _failures; } + bool has_oops_in_region() { return _has_oops_in_region; } +}; + +class VerifyStrongCodeRootCodeBlobClosure: public CodeBlobClosure { + const HeapRegion* _hr; + bool _failures; +public: + VerifyStrongCodeRootCodeBlobClosure(const HeapRegion* hr) : + _hr(hr), _failures(false) {} + + void do_code_blob(CodeBlob* cb) { + nmethod* nm = (cb == NULL) ? NULL : cb->as_nmethod_or_null(); + if (nm != NULL) { + // Verify that the nemthod is live + if (!nm->is_alive()) { + gclog_or_tty->print_cr("region ["PTR_FORMAT","PTR_FORMAT"] has dead nmethod " + PTR_FORMAT" in its strong code roots", + _hr->bottom(), _hr->end(), nm); + _failures = true; + } else { + VerifyStrongCodeRootOopClosure oop_cl(_hr, nm); + nm->oops_do(&oop_cl); + if (!oop_cl.has_oops_in_region()) { + gclog_or_tty->print_cr("region ["PTR_FORMAT","PTR_FORMAT"] has nmethod " + PTR_FORMAT" in its strong code roots " + "with no pointers into region", + _hr->bottom(), _hr->end(), nm); + _failures = true; + } else if (oop_cl.failures()) { + gclog_or_tty->print_cr("region ["PTR_FORMAT","PTR_FORMAT"] has other " + "failures for nmethod "PTR_FORMAT, + _hr->bottom(), _hr->end(), nm); + _failures = true; + } + } + } + } + + bool failures() { return _failures; } +}; + +void HeapRegion::verify_strong_code_roots(VerifyOption vo, bool* failures) const { + if (!G1VerifyHeapRegionCodeRoots) { + // We're not verifying code roots. + return; + } + if (vo == VerifyOption_G1UseMarkWord) { + // Marking verification during a full GC is performed after class + // unloading, code cache unloading, etc so the strong code roots + // attached to each heap region are in an inconsistent state. They won't + // be consistent until the strong code roots are rebuilt after the + // actual GC. Skip verifying the strong code roots in this particular + // time. + assert(VerifyDuringGC, "only way to get here"); + return; + } + + HeapRegionRemSet* hrrs = rem_set(); + int strong_code_roots_length = hrrs->strong_code_roots_list_length(); + + // if this region is empty then there should be no entries + // on its strong code root list + if (is_empty()) { + if (strong_code_roots_length > 0) { + gclog_or_tty->print_cr("region ["PTR_FORMAT","PTR_FORMAT"] is empty " + "but has "INT32_FORMAT" code root entries", + bottom(), end(), strong_code_roots_length); + *failures = true; + } + return; + } + + // An H-region should have an empty strong code root list + if (isHumongous()) { + if (strong_code_roots_length > 0) { + gclog_or_tty->print_cr("region ["PTR_FORMAT","PTR_FORMAT"] is humongous " + "but has "INT32_FORMAT" code root entries", + bottom(), end(), strong_code_roots_length); + *failures = true; + } + return; + } + + VerifyStrongCodeRootCodeBlobClosure cb_cl(this); + strong_code_roots_do(&cb_cl); + + if (cb_cl.failures()) { + *failures = true; + } +} + void HeapRegion::print() const { print_on(gclog_or_tty); } void HeapRegion::print_on(outputStream* st) const { if (isHumongous()) { @@ -761,10 +777,143 @@ G1OffsetTableContigSpace::print_on(st); } -void HeapRegion::verify() const { - bool dummy = false; - verify(VerifyOption_G1UsePrevMarking, /* failures */ &dummy); -} +class VerifyLiveClosure: public OopClosure { +private: + G1CollectedHeap* _g1h; + CardTableModRefBS* _bs; + oop _containing_obj; + bool _failures; + int _n_failures; + VerifyOption _vo; +public: + // _vo == UsePrevMarking -> use "prev" marking information, + // _vo == UseNextMarking -> use "next" marking information, + // _vo == UseMarkWord -> use mark word from object header. + VerifyLiveClosure(G1CollectedHeap* g1h, VerifyOption vo) : + _g1h(g1h), _bs(NULL), _containing_obj(NULL), + _failures(false), _n_failures(0), _vo(vo) + { + BarrierSet* bs = _g1h->barrier_set(); + if (bs->is_a(BarrierSet::CardTableModRef)) + _bs = (CardTableModRefBS*)bs; + } + + void set_containing_obj(oop obj) { + _containing_obj = obj; + } + + bool failures() { return _failures; } + int n_failures() { return _n_failures; } + + virtual void do_oop(narrowOop* p) { do_oop_work(p); } + virtual void do_oop( oop* p) { do_oop_work(p); } + + void print_object(outputStream* out, oop obj) { +#ifdef PRODUCT + Klass* k = obj->klass(); + const char* class_name = InstanceKlass::cast(k)->external_name(); + out->print_cr("class name %s", class_name); +#else // PRODUCT + obj->print_on(out); +#endif // PRODUCT + } + + template + void do_oop_work(T* p) { + assert(_containing_obj != NULL, "Precondition"); + assert(!_g1h->is_obj_dead_cond(_containing_obj, _vo), + "Precondition"); + T heap_oop = oopDesc::load_heap_oop(p); + if (!oopDesc::is_null(heap_oop)) { + oop obj = oopDesc::decode_heap_oop_not_null(heap_oop); + bool failed = false; + if (!_g1h->is_in_closed_subset(obj) || _g1h->is_obj_dead_cond(obj, _vo)) { + MutexLockerEx x(ParGCRareEvent_lock, + Mutex::_no_safepoint_check_flag); + + if (!_failures) { + gclog_or_tty->print_cr(""); + gclog_or_tty->print_cr("----------"); + } + if (!_g1h->is_in_closed_subset(obj)) { + HeapRegion* from = _g1h->heap_region_containing((HeapWord*)p); + gclog_or_tty->print_cr("Field "PTR_FORMAT + " of live obj "PTR_FORMAT" in region " + "["PTR_FORMAT", "PTR_FORMAT")", + p, (void*) _containing_obj, + from->bottom(), from->end()); + print_object(gclog_or_tty, _containing_obj); + gclog_or_tty->print_cr("points to obj "PTR_FORMAT" not in the heap", + (void*) obj); + } else { + HeapRegion* from = _g1h->heap_region_containing((HeapWord*)p); + HeapRegion* to = _g1h->heap_region_containing((HeapWord*)obj); + gclog_or_tty->print_cr("Field "PTR_FORMAT + " of live obj "PTR_FORMAT" in region " + "["PTR_FORMAT", "PTR_FORMAT")", + p, (void*) _containing_obj, + from->bottom(), from->end()); + print_object(gclog_or_tty, _containing_obj); + gclog_or_tty->print_cr("points to dead obj "PTR_FORMAT" in region " + "["PTR_FORMAT", "PTR_FORMAT")", + (void*) obj, to->bottom(), to->end()); + print_object(gclog_or_tty, obj); + } + gclog_or_tty->print_cr("----------"); + gclog_or_tty->flush(); + _failures = true; + failed = true; + _n_failures++; + } + + if (!_g1h->full_collection() || G1VerifyRSetsDuringFullGC) { + HeapRegion* from = _g1h->heap_region_containing((HeapWord*)p); + HeapRegion* to = _g1h->heap_region_containing(obj); + if (from != NULL && to != NULL && + from != to && + !to->isHumongous()) { + jbyte cv_obj = *_bs->byte_for_const(_containing_obj); + jbyte cv_field = *_bs->byte_for_const(p); + const jbyte dirty = CardTableModRefBS::dirty_card_val(); + + bool is_bad = !(from->is_young() + || to->rem_set()->contains_reference(p) + || !G1HRRSFlushLogBuffersOnVerify && // buffers were not flushed + (_containing_obj->is_objArray() ? + cv_field == dirty + : cv_obj == dirty || cv_field == dirty)); + if (is_bad) { + MutexLockerEx x(ParGCRareEvent_lock, + Mutex::_no_safepoint_check_flag); + + if (!_failures) { + gclog_or_tty->print_cr(""); + gclog_or_tty->print_cr("----------"); + } + gclog_or_tty->print_cr("Missing rem set entry:"); + gclog_or_tty->print_cr("Field "PTR_FORMAT" " + "of obj "PTR_FORMAT", " + "in region "HR_FORMAT, + p, (void*) _containing_obj, + HR_FORMAT_PARAMS(from)); + _containing_obj->print_on(gclog_or_tty); + gclog_or_tty->print_cr("points to obj "PTR_FORMAT" " + "in region "HR_FORMAT, + (void*) obj, + HR_FORMAT_PARAMS(to)); + obj->print_on(gclog_or_tty); + gclog_or_tty->print_cr("Obj head CTE = %d, field CTE = %d.", + cv_obj, cv_field); + gclog_or_tty->print_cr("----------"); + gclog_or_tty->flush(); + _failures = true; + if (!failed) _n_failures++; + } + } + } + } + } +}; // This really ought to be commoned up into OffsetTableContigSpace somehow. // We would need a mechanism to make that code skip dead objects. @@ -904,6 +1053,13 @@ *failures = true; return; } + + verify_strong_code_roots(vo, failures); +} + +void HeapRegion::verify() const { + bool dummy = false; + verify(VerifyOption_G1UsePrevMarking, /* failures */ &dummy); } // G1OffsetTableContigSpace code; copied from space.cpp. Hope this can go diff -r bdd155477289 -r e2722a66aba7 src/share/vm/gc_implementation/g1/heapRegion.hpp --- a/src/share/vm/gc_implementation/g1/heapRegion.hpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/gc_implementation/g1/heapRegion.hpp Thu Sep 05 11:04:39 2013 -0700 @@ -52,6 +52,7 @@ class HeapRegionRemSetIterator; class HeapRegion; class HeapRegionSetBase; +class nmethod; #define HR_FORMAT "%u:(%s)["PTR_FORMAT","PTR_FORMAT","PTR_FORMAT"]" #define HR_FORMAT_PARAMS(_hr_) \ @@ -371,7 +372,8 @@ RebuildRSClaimValue = 5, ParEvacFailureClaimValue = 6, AggregateCountClaimValue = 7, - VerifyCountClaimValue = 8 + VerifyCountClaimValue = 8, + ParMarkRootClaimValue = 9 }; inline HeapWord* par_allocate_no_bot_updates(size_t word_size) { @@ -796,6 +798,25 @@ virtual void reset_after_compaction(); + // Routines for managing a list of code roots (attached to the + // this region's RSet) that point into this heap region. + void add_strong_code_root(nmethod* nm); + void remove_strong_code_root(nmethod* nm); + + // During a collection, migrate the successfully evacuated + // strong code roots that referenced into this region to the + // new regions that they now point into. Unsuccessfully + // evacuated code roots are not migrated. + void migrate_strong_code_roots(); + + // Applies blk->do_code_blob() to each of the entries in + // the strong code roots list for this region + void strong_code_roots_do(CodeBlobClosure* blk) const; + + // Verify that the entries on the strong code root list for this + // region are live and include at least one pointer into this region. + void verify_strong_code_roots(VerifyOption vo, bool* failures) const; + void print() const; void print_on(outputStream* st) const; diff -r bdd155477289 -r e2722a66aba7 src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp --- a/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -33,6 +33,7 @@ #include "oops/oop.inline.hpp" #include "utilities/bitMap.inline.hpp" #include "utilities/globalDefinitions.hpp" +#include "utilities/growableArray.hpp" class PerRegionTable: public CHeapObj { friend class OtherRegionsTable; @@ -849,7 +850,7 @@ HeapRegionRemSet::HeapRegionRemSet(G1BlockOffsetSharedArray* bosa, HeapRegion* hr) - : _bosa(bosa), _other_regions(hr) { + : _bosa(bosa), _strong_code_roots_list(NULL), _other_regions(hr) { reset_for_par_iteration(); } @@ -908,6 +909,12 @@ } void HeapRegionRemSet::clear() { + if (_strong_code_roots_list != NULL) { + delete _strong_code_roots_list; + } + _strong_code_roots_list = new (ResourceObj::C_HEAP, mtGC) + GrowableArray(10, 0, NULL, true); + _other_regions.clear(); assert(occupied() == 0, "Should be clear."); reset_for_par_iteration(); @@ -925,6 +932,121 @@ _other_regions.scrub(ctbs, region_bm, card_bm); } + +// Code roots support + +void HeapRegionRemSet::add_strong_code_root(nmethod* nm) { + assert(nm != NULL, "sanity"); + // Search for the code blob from the RHS to avoid + // duplicate entries as much as possible + if (_strong_code_roots_list->find_from_end(nm) < 0) { + // Code blob isn't already in the list + _strong_code_roots_list->push(nm); + } +} + +void HeapRegionRemSet::remove_strong_code_root(nmethod* nm) { + assert(nm != NULL, "sanity"); + int idx = _strong_code_roots_list->find(nm); + if (idx >= 0) { + _strong_code_roots_list->remove_at(idx); + } + // Check that there were no duplicates + guarantee(_strong_code_roots_list->find(nm) < 0, "duplicate entry found"); +} + +class NMethodMigrationOopClosure : public OopClosure { + G1CollectedHeap* _g1h; + HeapRegion* _from; + nmethod* _nm; + + uint _num_self_forwarded; + + template void do_oop_work(T* p) { + T heap_oop = oopDesc::load_heap_oop(p); + if (!oopDesc::is_null(heap_oop)) { + oop obj = oopDesc::decode_heap_oop_not_null(heap_oop); + if (_from->is_in(obj)) { + // Reference still points into the source region. + // Since roots are immediately evacuated this means that + // we must have self forwarded the object + assert(obj->is_forwarded(), + err_msg("code roots should be immediately evacuated. " + "Ref: "PTR_FORMAT", " + "Obj: "PTR_FORMAT", " + "Region: "HR_FORMAT, + p, (void*) obj, HR_FORMAT_PARAMS(_from))); + assert(obj->forwardee() == obj, + err_msg("not self forwarded? obj = "PTR_FORMAT, (void*)obj)); + + // The object has been self forwarded. + // Note, if we're during an initial mark pause, there is + // no need to explicitly mark object. It will be marked + // during the regular evacuation failure handling code. + _num_self_forwarded++; + } else { + // The reference points into a promotion or to-space region + HeapRegion* to = _g1h->heap_region_containing(obj); + to->rem_set()->add_strong_code_root(_nm); + } + } + } + +public: + NMethodMigrationOopClosure(G1CollectedHeap* g1h, HeapRegion* from, nmethod* nm): + _g1h(g1h), _from(from), _nm(nm), _num_self_forwarded(0) {} + + void do_oop(narrowOop* p) { do_oop_work(p); } + void do_oop(oop* p) { do_oop_work(p); } + + uint retain() { return _num_self_forwarded > 0; } +}; + +void HeapRegionRemSet::migrate_strong_code_roots() { + assert(hr()->in_collection_set(), "only collection set regions"); + assert(!hr()->isHumongous(), "not humongous regions"); + + ResourceMark rm; + + // List of code blobs to retain for this region + GrowableArray to_be_retained(10); + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + + while (_strong_code_roots_list->is_nonempty()) { + nmethod *nm = _strong_code_roots_list->pop(); + if (nm != NULL) { + NMethodMigrationOopClosure oop_cl(g1h, hr(), nm); + nm->oops_do(&oop_cl); + if (oop_cl.retain()) { + to_be_retained.push(nm); + } + } + } + + // Now push any code roots we need to retain + assert(to_be_retained.is_empty() || hr()->evacuation_failed(), + "Retained nmethod list must be empty or " + "evacuation of this region failed"); + + while (to_be_retained.is_nonempty()) { + nmethod* nm = to_be_retained.pop(); + assert(nm != NULL, "sanity"); + add_strong_code_root(nm); + } +} + +void HeapRegionRemSet::strong_code_roots_do(CodeBlobClosure* blk) const { + for (int i = 0; i < _strong_code_roots_list->length(); i += 1) { + nmethod* nm = _strong_code_roots_list->at(i); + blk->do_code_blob(nm); + } +} + +size_t HeapRegionRemSet::strong_code_roots_mem_size() { + return sizeof(GrowableArray) + + _strong_code_roots_list->max_length() * sizeof(nmethod*); +} + //-------------------- Iteration -------------------- HeapRegionRemSetIterator:: HeapRegionRemSetIterator(const HeapRegionRemSet* hrrs) : diff -r bdd155477289 -r e2722a66aba7 src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp --- a/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp Thu Sep 05 11:04:39 2013 -0700 @@ -37,6 +37,7 @@ class HeapRegionRemSetIterator; class PerRegionTable; class SparsePRT; +class nmethod; // Essentially a wrapper around SparsePRTCleanupTask. See // sparsePRT.hpp for more details. @@ -191,6 +192,10 @@ G1BlockOffsetSharedArray* _bosa; G1BlockOffsetSharedArray* bosa() const { return _bosa; } + // A list of code blobs (nmethods) whose code contains pointers into + // the region that owns this RSet. + GrowableArray* _strong_code_roots_list; + OtherRegionsTable _other_regions; enum ParIterState { Unclaimed, Claimed, Complete }; @@ -282,11 +287,13 @@ } // The actual # of bytes this hr_remset takes up. + // Note also includes the strong code root set. size_t mem_size() { return _other_regions.mem_size() // This correction is necessary because the above includes the second // part. - + sizeof(this) - sizeof(OtherRegionsTable); + + (sizeof(this) - sizeof(OtherRegionsTable)) + + strong_code_roots_mem_size(); } // Returns the memory occupancy of all static data structures associated @@ -304,6 +311,37 @@ bool contains_reference(OopOrNarrowOopStar from) const { return _other_regions.contains_reference(from); } + + // Routines for managing the list of code roots that point into + // the heap region that owns this RSet. + void add_strong_code_root(nmethod* nm); + void remove_strong_code_root(nmethod* nm); + + // During a collection, migrate the successfully evacuated strong + // code roots that referenced into the region that owns this RSet + // to the RSets of the new regions that they now point into. + // Unsuccessfully evacuated code roots are not migrated. + void migrate_strong_code_roots(); + + // Applies blk->do_code_blob() to each of the entries in + // the strong code roots list + void strong_code_roots_do(CodeBlobClosure* blk) const; + + // Returns the number of elements in the strong code roots list + int strong_code_roots_list_length() { + return _strong_code_roots_list->length(); + } + + // Returns true if the strong code roots contains the given + // nmethod. + bool strong_code_roots_list_contains(nmethod* nm) { + return _strong_code_roots_list->contains(nm); + } + + // Returns the amount of memory, in bytes, currently + // consumed by the strong code roots. + size_t strong_code_roots_mem_size(); + void print() const; // Called during a stop-world phase to perform any deferred cleanups. diff -r bdd155477289 -r e2722a66aba7 src/share/vm/gc_implementation/g1/vm_operations_g1.cpp --- a/src/share/vm/gc_implementation/g1/vm_operations_g1.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/gc_implementation/g1/vm_operations_g1.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -70,9 +70,6 @@ guarantee(target_pause_time_ms > 0.0, err_msg("target_pause_time_ms = %1.6lf should be positive", target_pause_time_ms)); - guarantee(word_size == 0 || gc_cause == GCCause::_g1_inc_collection_pause, - "we can only request an allocation if the GC cause is for " - "an incremental GC pause"); _gc_cause = gc_cause; } diff -r bdd155477289 -r e2722a66aba7 src/share/vm/gc_implementation/parNew/parNewGeneration.cpp --- a/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -927,11 +927,9 @@ workers->active_workers(), Threads::number_of_non_daemon_threads()); workers->set_active_workers(active_workers); - _next_gen = gch->next_gen(this); - assert(_next_gen != NULL, - "This must be the youngest gen, and not the only gen"); assert(gch->n_gens() == 2, "Par collection currently only works with single older gen."); + _next_gen = gch->next_gen(this); // Do we have to avoid promotion_undo? if (gch->collector_policy()->is_concurrent_mark_sweep_policy()) { set_avoid_promotion_undo(true); diff -r bdd155477289 -r e2722a66aba7 src/share/vm/gc_implementation/parNew/parNewGeneration.hpp --- a/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp Thu Sep 05 11:04:39 2013 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -29,6 +29,7 @@ #include "gc_implementation/shared/parGCAllocBuffer.hpp" #include "gc_implementation/shared/copyFailedInfo.hpp" #include "memory/defNewGeneration.hpp" +#include "memory/padded.hpp" #include "utilities/taskqueue.hpp" class ChunkArray; diff -r bdd155477289 -r e2722a66aba7 src/share/vm/gc_implementation/parNew/parOopClosures.hpp --- a/src/share/vm/gc_implementation/parNew/parOopClosures.hpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/gc_implementation/parNew/parOopClosures.hpp Thu Sep 05 11:04:39 2013 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2007, 2013, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -26,6 +26,7 @@ #define SHARE_VM_GC_IMPLEMENTATION_PARNEW_PAROOPCLOSURES_HPP #include "memory/genOopClosures.hpp" +#include "memory/padded.hpp" // Closures for ParNewGeneration diff -r bdd155477289 -r e2722a66aba7 src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp --- a/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -216,6 +216,7 @@ young_gen()->update_counters(); old_gen()->update_counters(); MetaspaceCounters::update_performance_counters(); + CompressedClassSpaceCounters::update_performance_counters(); } size_t ParallelScavengeHeap::capacity() const { diff -r bdd155477289 -r e2722a66aba7 src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.cpp --- a/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -29,14 +29,16 @@ #include "gc_implementation/parallelScavenge/psScavenge.inline.hpp" #include "gc_implementation/shared/gcTrace.hpp" #include "gc_implementation/shared/mutableSpace.hpp" +#include "memory/allocation.inline.hpp" #include "memory/memRegion.hpp" +#include "memory/padded.inline.hpp" #include "oops/oop.inline.hpp" #include "oops/oop.psgc.inline.hpp" -PSPromotionManager** PSPromotionManager::_manager_array = NULL; -OopStarTaskQueueSet* PSPromotionManager::_stack_array_depth = NULL; -PSOldGen* PSPromotionManager::_old_gen = NULL; -MutableSpace* PSPromotionManager::_young_space = NULL; +PaddedEnd* PSPromotionManager::_manager_array = NULL; +OopStarTaskQueueSet* PSPromotionManager::_stack_array_depth = NULL; +PSOldGen* PSPromotionManager::_old_gen = NULL; +MutableSpace* PSPromotionManager::_young_space = NULL; void PSPromotionManager::initialize() { ParallelScavengeHeap* heap = (ParallelScavengeHeap*)Universe::heap(); @@ -45,8 +47,10 @@ _old_gen = heap->old_gen(); _young_space = heap->young_gen()->to_space(); + // To prevent false sharing, we pad the PSPromotionManagers + // and make sure that the first instance starts at a cache line. assert(_manager_array == NULL, "Attempt to initialize twice"); - _manager_array = NEW_C_HEAP_ARRAY(PSPromotionManager*, ParallelGCThreads+1, mtGC); + _manager_array = PaddedArray::create_unfreeable(ParallelGCThreads + 1); guarantee(_manager_array != NULL, "Could not initialize promotion manager"); _stack_array_depth = new OopStarTaskQueueSet(ParallelGCThreads); @@ -54,26 +58,21 @@ // Create and register the PSPromotionManager(s) for the worker threads. for(uint i=0; iregister_queue(i, _manager_array[i]->claimed_stack_depth()); + stack_array_depth()->register_queue(i, _manager_array[i].claimed_stack_depth()); } - // The VMThread gets its own PSPromotionManager, which is not available // for work stealing. - _manager_array[ParallelGCThreads] = new PSPromotionManager(); - guarantee(_manager_array[ParallelGCThreads] != NULL, "Could not create PSPromotionManager"); } PSPromotionManager* PSPromotionManager::gc_thread_promotion_manager(int index) { assert(index >= 0 && index < (int)ParallelGCThreads, "index out of range"); assert(_manager_array != NULL, "Sanity"); - return _manager_array[index]; + return &_manager_array[index]; } PSPromotionManager* PSPromotionManager::vm_thread_promotion_manager() { assert(_manager_array != NULL, "Sanity"); - return _manager_array[ParallelGCThreads]; + return &_manager_array[ParallelGCThreads]; } void PSPromotionManager::pre_scavenge() { diff -r bdd155477289 -r e2722a66aba7 src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.hpp --- a/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.hpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.hpp Thu Sep 05 11:04:39 2013 -0700 @@ -29,6 +29,8 @@ #include "gc_implementation/shared/gcTrace.hpp" #include "gc_implementation/shared/copyFailedInfo.hpp" #include "memory/allocation.hpp" +#include "memory/padded.hpp" +#include "utilities/globalDefinitions.hpp" #include "utilities/taskqueue.hpp" // @@ -51,14 +53,14 @@ class PSOldGen; class ParCompactionManager; -class PSPromotionManager : public CHeapObj { +class PSPromotionManager VALUE_OBJ_CLASS_SPEC { friend class PSScavenge; friend class PSRefProcTaskExecutor; private: - static PSPromotionManager** _manager_array; - static OopStarTaskQueueSet* _stack_array_depth; - static PSOldGen* _old_gen; - static MutableSpace* _young_space; + static PaddedEnd* _manager_array; + static OopStarTaskQueueSet* _stack_array_depth; + static PSOldGen* _old_gen; + static MutableSpace* _young_space; #if TASKQUEUE_STATS size_t _masked_pushes; diff -r bdd155477289 -r e2722a66aba7 src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp --- a/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp Thu Sep 05 11:04:39 2013 -0700 @@ -33,7 +33,7 @@ inline PSPromotionManager* PSPromotionManager::manager_array(int index) { assert(_manager_array != NULL, "access of NULL manager_array"); assert(index >= 0 && index <= (int)ParallelGCThreads, "out of range manager_array access"); - return _manager_array[index]; + return &_manager_array[index]; } template diff -r bdd155477289 -r e2722a66aba7 src/share/vm/gc_implementation/shared/objectCountEventSender.cpp --- a/src/share/vm/gc_implementation/shared/objectCountEventSender.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/gc_implementation/shared/objectCountEventSender.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -32,6 +32,7 @@ #if INCLUDE_SERVICES void ObjectCountEventSender::send(const KlassInfoEntry* entry, GCId gc_id, jlong timestamp) { +#if INCLUDE_TRACE assert(Tracing::is_event_enabled(EventObjectCountAfterGC::eventId), "Only call this method if the event is enabled"); @@ -42,6 +43,7 @@ event.set_totalSize(entry->words() * BytesPerWord); event.set_endtime(timestamp); event.commit(); +#endif // INCLUDE_TRACE } bool ObjectCountEventSender::should_send_event() { diff -r bdd155477289 -r e2722a66aba7 src/share/vm/gc_interface/collectedHeap.cpp --- a/src/share/vm/gc_interface/collectedHeap.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/gc_interface/collectedHeap.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -118,6 +118,14 @@ } } +void CollectedHeap::register_nmethod(nmethod* nm) { + assert_locked_or_safepoint(CodeCache_lock); +} + +void CollectedHeap::unregister_nmethod(nmethod* nm) { + assert_locked_or_safepoint(CodeCache_lock); +} + void CollectedHeap::trace_heap(GCWhen::Type when, GCTracer* gc_tracer) { const GCHeapSummary& heap_summary = create_heap_summary(); const MetaspaceSummary& metaspace_summary = create_metaspace_summary(); diff -r bdd155477289 -r e2722a66aba7 src/share/vm/gc_interface/collectedHeap.hpp --- a/src/share/vm/gc_interface/collectedHeap.hpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/gc_interface/collectedHeap.hpp Thu Sep 05 11:04:39 2013 -0700 @@ -49,6 +49,7 @@ class Thread; class ThreadClosure; class VirtualSpaceSummary; +class nmethod; class GCMessage : public FormatBuffer<1024> { public: @@ -603,6 +604,11 @@ void print_heap_before_gc(); void print_heap_after_gc(); + // Registering and unregistering an nmethod (compiled code) with the heap. + // Override with specific mechanism for each specialized heap type. + virtual void register_nmethod(nmethod* nm); + virtual void unregister_nmethod(nmethod* nm); + void trace_heap_before_gc(GCTracer* gc_tracer); void trace_heap_after_gc(GCTracer* gc_tracer); diff -r bdd155477289 -r e2722a66aba7 src/share/vm/interpreter/interpreterRuntime.cpp --- a/src/share/vm/interpreter/interpreterRuntime.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/interpreter/interpreterRuntime.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -1209,3 +1209,26 @@ size_of_arguments * Interpreter::stackElementSize); IRT_END #endif + +#if INCLUDE_JVMTI +// This is a support of the JVMTI PopFrame interface. +// Make sure it is an invokestatic of a polymorphic intrinsic that has a member_name argument +// and return it as a vm_result so that it can be reloaded in the list of invokestatic parameters. +// The dmh argument is a reference to a DirectMethoHandle that has a member name field. +IRT_ENTRY(void, InterpreterRuntime::member_name_arg_or_null(JavaThread* thread, address dmh, + Method* method, address bcp)) + Bytecodes::Code code = Bytecodes::code_at(method, bcp); + if (code != Bytecodes::_invokestatic) { + return; + } + ConstantPool* cpool = method->constants(); + int cp_index = Bytes::get_native_u2(bcp + 1) + ConstantPool::CPCACHE_INDEX_TAG; + Symbol* cname = cpool->klass_name_at(cpool->klass_ref_index_at(cp_index)); + Symbol* mname = cpool->name_ref_at(cp_index); + + if (MethodHandles::has_member_arg(cname, mname)) { + oop member_name = java_lang_invoke_DirectMethodHandle::member((oop)dmh); + thread->set_vm_result(member_name); + } +IRT_END +#endif // INCLUDE_JVMTI diff -r bdd155477289 -r e2722a66aba7 src/share/vm/interpreter/interpreterRuntime.hpp --- a/src/share/vm/interpreter/interpreterRuntime.hpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/interpreter/interpreterRuntime.hpp Thu Sep 05 11:04:39 2013 -0700 @@ -95,6 +95,9 @@ static void create_exception(JavaThread* thread, char* name, char* message); static void create_klass_exception(JavaThread* thread, char* name, oopDesc* obj); static address exception_handler_for_exception(JavaThread* thread, oopDesc* exception); +#if INCLUDE_JVMTI + static void member_name_arg_or_null(JavaThread* thread, address dmh, Method* m, address bcp); +#endif static void throw_pending_exception(JavaThread* thread); // Statics & fields diff -r bdd155477289 -r e2722a66aba7 src/share/vm/memory/allocation.hpp --- a/src/share/vm/memory/allocation.hpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/memory/allocation.hpp Thu Sep 05 11:04:39 2013 -0700 @@ -669,7 +669,7 @@ NEW_RESOURCE_ARRAY_RETURN_NULL(type, 1) #define NEW_C_HEAP_ARRAY3(type, size, memflags, pc, allocfail)\ - (type*) AllocateHeap(size * sizeof(type), memflags, pc, allocfail) + (type*) AllocateHeap((size) * sizeof(type), memflags, pc, allocfail) #define NEW_C_HEAP_ARRAY2(type, size, memflags, pc)\ (type*) (AllocateHeap((size) * sizeof(type), memflags, pc)) @@ -678,16 +678,16 @@ (type*) (AllocateHeap((size) * sizeof(type), memflags)) #define NEW_C_HEAP_ARRAY2_RETURN_NULL(type, size, memflags, pc)\ - NEW_C_HEAP_ARRAY3(type, size, memflags, pc, AllocFailStrategy::RETURN_NULL) + NEW_C_HEAP_ARRAY3(type, (size), memflags, pc, AllocFailStrategy::RETURN_NULL) #define NEW_C_HEAP_ARRAY_RETURN_NULL(type, size, memflags)\ - NEW_C_HEAP_ARRAY3(type, size, memflags, (address)0, AllocFailStrategy::RETURN_NULL) + NEW_C_HEAP_ARRAY3(type, (size), memflags, (address)0, AllocFailStrategy::RETURN_NULL) #define REALLOC_C_HEAP_ARRAY(type, old, size, memflags)\ - (type*) (ReallocateHeap((char*)old, (size) * sizeof(type), memflags)) + (type*) (ReallocateHeap((char*)(old), (size) * sizeof(type), memflags)) #define REALLOC_C_HEAP_ARRAY_RETURN_NULL(type, old, size, memflags)\ - (type*) (ReallocateHeap((char*)old, (size) * sizeof(type), memflags, AllocFailStrategy::RETURN_NULL)) + (type*) (ReallocateHeap((char*)(old), (size) * sizeof(type), memflags, AllocFailStrategy::RETURN_NULL)) #define FREE_C_HEAP_ARRAY(type, old, memflags) \ FreeHeap((char*)(old), memflags) diff -r bdd155477289 -r e2722a66aba7 src/share/vm/memory/cardTableRS.cpp --- a/src/share/vm/memory/cardTableRS.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/memory/cardTableRS.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -310,46 +310,31 @@ _ct_bs->non_clean_card_iterate_possibly_parallel(sp, urasm, cl, this); } -void CardTableRS::clear_into_younger(Generation* gen) { - GenCollectedHeap* gch = GenCollectedHeap::heap(); - // Generations younger than gen have been evacuated. We can clear - // card table entries for gen (we know that it has no pointers - // to younger gens) and for those below. The card tables for - // the youngest gen need never be cleared. +void CardTableRS::clear_into_younger(Generation* old_gen) { + assert(old_gen->level() == 1, "Should only be called for the old generation"); + // The card tables for the youngest gen need never be cleared. // There's a bit of subtlety in the clear() and invalidate() // methods that we exploit here and in invalidate_or_clear() // below to avoid missing cards at the fringes. If clear() or // invalidate() are changed in the future, this code should // be revisited. 20040107.ysr - Generation* g = gen; - for(Generation* prev_gen = gch->prev_gen(g); - prev_gen != NULL; - g = prev_gen, prev_gen = gch->prev_gen(g)) { - MemRegion to_be_cleared_mr = g->prev_used_region(); - clear(to_be_cleared_mr); - } + clear(old_gen->prev_used_region()); } -void CardTableRS::invalidate_or_clear(Generation* gen, bool younger) { - GenCollectedHeap* gch = GenCollectedHeap::heap(); - // For each generation gen (and younger) - // invalidate the cards for the currently occupied part - // of that generation and clear the cards for the +void CardTableRS::invalidate_or_clear(Generation* old_gen) { + assert(old_gen->level() == 1, "Should only be called for the old generation"); + // Invalidate the cards for the currently occupied part of + // the old generation and clear the cards for the // unoccupied part of the generation (if any, making use // of that generation's prev_used_region to determine that // region). No need to do anything for the youngest // generation. Also see note#20040107.ysr above. - Generation* g = gen; - for(Generation* prev_gen = gch->prev_gen(g); prev_gen != NULL; - g = prev_gen, prev_gen = gch->prev_gen(g)) { - MemRegion used_mr = g->used_region(); - MemRegion to_be_cleared_mr = g->prev_used_region().minus(used_mr); - if (!to_be_cleared_mr.is_empty()) { - clear(to_be_cleared_mr); - } - invalidate(used_mr); - if (!younger) break; + MemRegion used_mr = old_gen->used_region(); + MemRegion to_be_cleared_mr = old_gen->prev_used_region().minus(used_mr); + if (!to_be_cleared_mr.is_empty()) { + clear(to_be_cleared_mr); } + invalidate(used_mr); } diff -r bdd155477289 -r e2722a66aba7 src/share/vm/memory/cardTableRS.hpp --- a/src/share/vm/memory/cardTableRS.hpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/memory/cardTableRS.hpp Thu Sep 05 11:04:39 2013 -0700 @@ -142,12 +142,12 @@ void verify_aligned_region_empty(MemRegion mr); void clear(MemRegion mr) { _ct_bs->clear(mr); } - void clear_into_younger(Generation* gen); + void clear_into_younger(Generation* old_gen); void invalidate(MemRegion mr, bool whole_heap = false) { _ct_bs->invalidate(mr, whole_heap); } - void invalidate_or_clear(Generation* gen, bool younger); + void invalidate_or_clear(Generation* old_gen); static uintx ct_max_alignment_constraint() { return CardTableModRefBS::ct_max_alignment_constraint(); diff -r bdd155477289 -r e2722a66aba7 src/share/vm/memory/collectorPolicy.cpp --- a/src/share/vm/memory/collectorPolicy.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/memory/collectorPolicy.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -193,6 +193,8 @@ alignment = lcm(os::large_page_size(), alignment); } + assert(alignment >= min_alignment(), "Must be"); + return alignment; } diff -r bdd155477289 -r e2722a66aba7 src/share/vm/memory/defNewGeneration.cpp --- a/src/share/vm/memory/defNewGeneration.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/memory/defNewGeneration.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -567,8 +567,6 @@ gc_tracer.report_gc_start(gch->gc_cause(), _gc_timer->gc_start()); _next_gen = gch->next_gen(this); - assert(_next_gen != NULL, - "This must be the youngest gen, and not the only gen"); // If the next generation is too full to accommodate promotion // from this generation, pass on collection; let the next generation @@ -901,8 +899,6 @@ if (_next_gen == NULL) { GenCollectedHeap* gch = GenCollectedHeap::heap(); _next_gen = gch->next_gen(this); - assert(_next_gen != NULL, - "This must be the youngest gen, and not the only gen"); } return _next_gen->promotion_attempt_is_safe(used()); } diff -r bdd155477289 -r e2722a66aba7 src/share/vm/memory/filemap.cpp --- a/src/share/vm/memory/filemap.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/memory/filemap.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -362,15 +362,12 @@ ReservedSpace FileMapInfo::reserve_shared_memory() { struct FileMapInfo::FileMapHeader::space_info* si = &_header._space[0]; char* requested_addr = si->_base; - size_t alignment = os::vm_allocation_granularity(); - size_t size = align_size_up(SharedReadOnlySize + SharedReadWriteSize + - SharedMiscDataSize + SharedMiscCodeSize, - alignment); + size_t size = FileMapInfo::shared_spaces_size(); // Reserve the space first, then map otherwise map will go right over some // other reserved memory (like the code cache). - ReservedSpace rs(size, alignment, false, requested_addr); + ReservedSpace rs(size, os::vm_allocation_granularity(), false, requested_addr); if (!rs.is_reserved()) { fail_continue(err_msg("Unable to reserve shared space at required address " INTPTR_FORMAT, requested_addr)); return rs; @@ -559,3 +556,19 @@ si->_base, si->_base + si->_used); } } + +// Unmap mapped regions of shared space. +void FileMapInfo::stop_sharing_and_unmap(const char* msg) { + FileMapInfo *map_info = FileMapInfo::current_info(); + if (map_info) { + map_info->fail_continue(msg); + for (int i = 0; i < MetaspaceShared::n_regions; i++) { + if (map_info->_header._space[i]._base != NULL) { + map_info->unmap_region(i); + map_info->_header._space[i]._base = NULL; + } + } + } else if (DumpSharedSpaces) { + fail_stop(msg, NULL); + } +} diff -r bdd155477289 -r e2722a66aba7 src/share/vm/memory/filemap.hpp --- a/src/share/vm/memory/filemap.hpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/memory/filemap.hpp Thu Sep 05 11:04:39 2013 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -150,6 +150,15 @@ // Return true if given address is in the mapped shared space. bool is_in_shared_space(const void* p) NOT_CDS_RETURN_(false); void print_shared_spaces() NOT_CDS_RETURN; + + static size_t shared_spaces_size() { + return align_size_up(SharedReadOnlySize + SharedReadWriteSize + + SharedMiscDataSize + SharedMiscCodeSize, + os::vm_allocation_granularity()); + } + + // Stop CDS sharing and unmap CDS regions. + static void stop_sharing_and_unmap(const char* msg); }; #endif // SHARE_VM_MEMORY_FILEMAP_HPP diff -r bdd155477289 -r e2722a66aba7 src/share/vm/memory/genCollectedHeap.cpp --- a/src/share/vm/memory/genCollectedHeap.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/memory/genCollectedHeap.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -95,13 +95,13 @@ guarantee(HeapWordSize == wordSize, "HeapWordSize must equal wordSize"); // The heap must be at least as aligned as generations. - size_t alignment = Generation::GenGrain; + size_t gen_alignment = Generation::GenGrain; _gen_specs = gen_policy()->generations(); // Make sure the sizes are all aligned. for (i = 0; i < _n_gens; i++) { - _gen_specs[i]->align(alignment); + _gen_specs[i]->align(gen_alignment); } // Allocate space for the heap. @@ -109,9 +109,11 @@ char* heap_address; size_t total_reserved = 0; int n_covered_regions = 0; - ReservedSpace heap_rs(0); + ReservedSpace heap_rs; - heap_address = allocate(alignment, &total_reserved, + size_t heap_alignment = collector_policy()->max_alignment(); + + heap_address = allocate(heap_alignment, &total_reserved, &n_covered_regions, &heap_rs); if (!heap_rs.is_reserved()) { @@ -168,6 +170,8 @@ const size_t pageSize = UseLargePages ? os::large_page_size() : os::vm_page_size(); + assert(alignment % pageSize == 0, "Must be"); + for (int i = 0; i < _n_gens; i++) { total_reserved += _gen_specs[i]->max_size(); if (total_reserved < _gen_specs[i]->max_size()) { @@ -175,24 +179,17 @@ } n_covered_regions += _gen_specs[i]->n_covered_regions(); } - assert(total_reserved % pageSize == 0, - err_msg("Gen size; total_reserved=" SIZE_FORMAT ", pageSize=" - SIZE_FORMAT, total_reserved, pageSize)); + assert(total_reserved % alignment == 0, + err_msg("Gen size; total_reserved=" SIZE_FORMAT ", alignment=" + SIZE_FORMAT, total_reserved, alignment)); // Needed until the cardtable is fixed to have the right number // of covered regions. n_covered_regions += 2; - if (UseLargePages) { - assert(total_reserved != 0, "total_reserved cannot be 0"); - total_reserved = round_to(total_reserved, os::large_page_size()); - if (total_reserved < os::large_page_size()) { - vm_exit_during_initialization(overflow_msg); - } - } + *_total_reserved = total_reserved; + *_n_covered_regions = n_covered_regions; - *_total_reserved = total_reserved; - *_n_covered_regions = n_covered_regions; *heap_rs = Universe::reserve_heap(total_reserved, alignment); return heap_rs->base(); } @@ -1070,13 +1067,13 @@ void GenCollectedHeap::prepare_for_compaction() { - Generation* scanning_gen = _gens[_n_gens-1]; + guarantee(_n_gens = 2, "Wrong number of generations"); + Generation* old_gen = _gens[1]; // Start by compacting into same gen. - CompactPoint cp(scanning_gen, NULL, NULL); - while (scanning_gen != NULL) { - scanning_gen->prepare_for_compaction(&cp); - scanning_gen = prev_gen(scanning_gen); - } + CompactPoint cp(old_gen, NULL, NULL); + old_gen->prepare_for_compaction(&cp); + Generation* young_gen = _gens[0]; + young_gen->prepare_for_compaction(&cp); } GCStats* GenCollectedHeap::gc_stats(int level) const { @@ -1211,6 +1208,7 @@ } MetaspaceCounters::update_performance_counters(); + CompressedClassSpaceCounters::update_performance_counters(); always_do_update_barrier = UseConcMarkSweepGC; }; @@ -1245,27 +1243,14 @@ generation_iterate(&ep_cl, false); } -oop GenCollectedHeap::handle_failed_promotion(Generation* gen, +oop GenCollectedHeap::handle_failed_promotion(Generation* old_gen, oop obj, size_t obj_size) { + guarantee(old_gen->level() == 1, "We only get here with an old generation"); assert(obj_size == (size_t)obj->size(), "bad obj_size passed in"); HeapWord* result = NULL; - // First give each higher generation a chance to allocate the promoted object. - Generation* allocator = next_gen(gen); - if (allocator != NULL) { - do { - result = allocator->allocate(obj_size, false); - } while (result == NULL && (allocator = next_gen(allocator)) != NULL); - } - - if (result == NULL) { - // Then give gen and higher generations a chance to expand and allocate the - // object. - do { - result = gen->expand_and_allocate(obj_size, false); - } while (result == NULL && (gen = next_gen(gen)) != NULL); - } + result = old_gen->expand_and_allocate(obj_size, false); if (result != NULL) { Copy::aligned_disjoint_words((HeapWord*)obj, result, obj_size); diff -r bdd155477289 -r e2722a66aba7 src/share/vm/memory/genCollectedHeap.hpp --- a/src/share/vm/memory/genCollectedHeap.hpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/memory/genCollectedHeap.hpp Thu Sep 05 11:04:39 2013 -0700 @@ -368,25 +368,23 @@ // collection. virtual bool is_maximal_no_gc() const; - // Return the generation before "gen", or else NULL. + // Return the generation before "gen". Generation* prev_gen(Generation* gen) const { int l = gen->level(); - if (l == 0) return NULL; - else return _gens[l-1]; + guarantee(l > 0, "Out of bounds"); + return _gens[l-1]; } - // Return the generation after "gen", or else NULL. + // Return the generation after "gen". Generation* next_gen(Generation* gen) const { int l = gen->level() + 1; - if (l == _n_gens) return NULL; - else return _gens[l]; + guarantee(l < _n_gens, "Out of bounds"); + return _gens[l]; } Generation* get_gen(int i) const { - if (i >= 0 && i < _n_gens) - return _gens[i]; - else - return NULL; + guarantee(i >= 0 && i < _n_gens, "Out of bounds"); + return _gens[i]; } int n_gens() const { @@ -485,9 +483,9 @@ // Promotion of obj into gen failed. Try to promote obj to higher // gens in ascending order; return the new location of obj if successful. - // Otherwise, try expand-and-allocate for obj in each generation starting at - // gen; return the new location of obj if successful. Otherwise, return NULL. - oop handle_failed_promotion(Generation* gen, + // Otherwise, try expand-and-allocate for obj in both the young and old + // generation; return the new location of obj if successful. Otherwise, return NULL. + oop handle_failed_promotion(Generation* old_gen, oop obj, size_t obj_size); diff -r bdd155477289 -r e2722a66aba7 src/share/vm/memory/genMarkSweep.cpp --- a/src/share/vm/memory/genMarkSweep.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/memory/genMarkSweep.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -52,8 +52,8 @@ #include "utilities/copy.hpp" #include "utilities/events.hpp" -void GenMarkSweep::invoke_at_safepoint(int level, ReferenceProcessor* rp, - bool clear_all_softrefs) { +void GenMarkSweep::invoke_at_safepoint(int level, ReferenceProcessor* rp, bool clear_all_softrefs) { + guarantee(level == 1, "We always collect both old and young."); assert(SafepointSynchronize::is_at_safepoint(), "must be at a safepoint"); GenCollectedHeap* gch = GenCollectedHeap::heap(); @@ -84,11 +84,6 @@ // Capture heap size before collection for printing. size_t gch_prev_used = gch->used(); - // Some of the card table updates below assume that the perm gen is - // also being collected. - assert(level == gch->n_gens() - 1, - "All generations are being collected, ergo perm gen too."); - // Capture used regions for each generation that will be // subject to collection, so that card table adjustments can // be made intelligently (see clear / invalidate further below). @@ -126,17 +121,15 @@ all_empty = all_empty && gch->get_gen(i)->used() == 0; } GenRemSet* rs = gch->rem_set(); + Generation* old_gen = gch->get_gen(level); // Clear/invalidate below make use of the "prev_used_regions" saved earlier. if (all_empty) { // We've evacuated all generations below us. - Generation* g = gch->get_gen(level); - rs->clear_into_younger(g); + rs->clear_into_younger(old_gen); } else { // Invalidate the cards corresponding to the currently used - // region and clear those corresponding to the evacuated region - // of all generations just collected (i.e. level and younger). - rs->invalidate_or_clear(gch->get_gen(level), - true /* younger */); + // region and clear those corresponding to the evacuated region. + rs->invalidate_or_clear(old_gen); } Threads::gc_epilogue(); diff -r bdd155477289 -r e2722a66aba7 src/share/vm/memory/genRemSet.hpp --- a/src/share/vm/memory/genRemSet.hpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/memory/genRemSet.hpp Thu Sep 05 11:04:39 2013 -0700 @@ -135,7 +135,7 @@ // younger than gen from generations gen and older. // The parameter clear_perm indicates if the perm_gen's // remembered set should also be processed/cleared. - virtual void clear_into_younger(Generation* gen) = 0; + virtual void clear_into_younger(Generation* old_gen) = 0; // Informs the RS that refs in the given "mr" may have changed // arbitrarily, and therefore may contain old-to-young pointers. @@ -146,11 +146,8 @@ // Informs the RS that refs in this generation // may have changed arbitrarily, and therefore may contain - // old-to-young pointers in arbitrary locations. The parameter - // younger indicates if the same should be done for younger generations - // as well. The parameter perm indicates if the same should be done for - // perm gen as well. - virtual void invalidate_or_clear(Generation* gen, bool younger) = 0; + // old-to-young pointers in arbitrary locations. + virtual void invalidate_or_clear(Generation* old_gen) = 0; }; #endif // SHARE_VM_MEMORY_GENREMSET_HPP diff -r bdd155477289 -r e2722a66aba7 src/share/vm/memory/heap.cpp --- a/src/share/vm/memory/heap.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/memory/heap.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -118,9 +118,12 @@ _number_of_committed_segments = size_to_segments(_memory.committed_size()); _number_of_reserved_segments = size_to_segments(_memory.reserved_size()); assert(_number_of_reserved_segments >= _number_of_committed_segments, "just checking"); + const size_t reserved_segments_alignment = MAX2((size_t)os::vm_page_size(), granularity); + const size_t reserved_segments_size = align_size_up(_number_of_reserved_segments, reserved_segments_alignment); + const size_t committed_segments_size = align_to_page_size(_number_of_committed_segments); // reserve space for _segmap - if (!_segmap.initialize(align_to_page_size(_number_of_reserved_segments), align_to_page_size(_number_of_committed_segments))) { + if (!_segmap.initialize(reserved_segments_size, committed_segments_size)) { return false; } diff -r bdd155477289 -r e2722a66aba7 src/share/vm/memory/iterator.cpp --- a/src/share/vm/memory/iterator.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/memory/iterator.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -64,7 +64,7 @@ } void CodeBlobToOopClosure::do_newly_marked_nmethod(nmethod* nm) { - nm->oops_do(_cl, /*do_strong_roots_only=*/ true); + nm->oops_do(_cl, /*allow_zombie=*/ false); } void CodeBlobToOopClosure::do_code_blob(CodeBlob* cb) { diff -r bdd155477289 -r e2722a66aba7 src/share/vm/memory/metaspace.cpp --- a/src/share/vm/memory/metaspace.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/memory/metaspace.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -35,6 +35,7 @@ #include "memory/resourceArea.hpp" #include "memory/universe.hpp" #include "runtime/globals.hpp" +#include "runtime/java.hpp" #include "runtime/mutex.hpp" #include "runtime/orderAccess.hpp" #include "services/memTracker.hpp" @@ -54,6 +55,8 @@ MetaWord* last_allocated = 0; +size_t Metaspace::_class_metaspace_size; + // Used in declarations in SpaceManager and ChunkManager enum ChunkIndex { ZeroIndex = 0, @@ -261,10 +264,6 @@ // count of chunks contained in this VirtualSpace uintx _container_count; - // Convenience functions for logical bottom and end - MetaWord* bottom() const { return (MetaWord*) _virtual_space.low(); } - MetaWord* end() const { return (MetaWord*) _virtual_space.high(); } - // Convenience functions to access the _virtual_space char* low() const { return virtual_space()->low(); } char* high() const { return virtual_space()->high(); } @@ -284,6 +283,10 @@ VirtualSpaceNode(ReservedSpace rs) : _top(NULL), _next(NULL), _rs(rs), _container_count(0) {} ~VirtualSpaceNode(); + // Convenience functions for logical bottom and end + MetaWord* bottom() const { return (MetaWord*) _virtual_space.low(); } + MetaWord* end() const { return (MetaWord*) _virtual_space.high(); } + // address of next available space in _virtual_space; // Accessors VirtualSpaceNode* next() { return _next; } @@ -342,7 +345,7 @@ }; // byte_size is the size of the associated virtualspace. -VirtualSpaceNode::VirtualSpaceNode(size_t byte_size) : _top(NULL), _next(NULL), _rs(0), _container_count(0) { +VirtualSpaceNode::VirtualSpaceNode(size_t byte_size) : _top(NULL), _next(NULL), _rs(), _container_count(0) { // align up to vm allocation granularity byte_size = align_size_up(byte_size, os::vm_allocation_granularity()); @@ -1313,7 +1316,8 @@ // Class virtual space should always be expanded. Call GC for the other // metadata virtual space. - if (vsl == Metaspace::class_space_list()) return true; + if (Metaspace::using_class_space() && + (vsl == Metaspace::class_space_list())) return true; // If this is part of an allocation after a GC, expand // unconditionally. @@ -2257,7 +2261,7 @@ size_t raw_word_size = get_raw_word_size(word_size); size_t min_size = TreeChunk::min_size(); assert(raw_word_size >= min_size, - err_msg("Should not deallocate dark matter " SIZE_FORMAT, word_size)); + err_msg("Should not deallocate dark matter " SIZE_FORMAT "<" SIZE_FORMAT, word_size, min_size)); block_freelists()->return_block(p, raw_word_size); } @@ -2374,7 +2378,7 @@ if (result == NULL) { result = grow_and_allocate(word_size); } - if (result > 0) { + if (result != 0) { inc_used_metrics(word_size); assert(result != (MetaWord*) chunks_in_use(MediumIndex), "Head of the list is being allocated"); @@ -2476,15 +2480,13 @@ size_t MetaspaceAux::_allocated_capacity_words[] = {0, 0}; size_t MetaspaceAux::_allocated_used_words[] = {0, 0}; +size_t MetaspaceAux::free_bytes(Metaspace::MetadataType mdtype) { + VirtualSpaceList* list = Metaspace::get_space_list(mdtype); + return list == NULL ? 0 : list->free_bytes(); +} + size_t MetaspaceAux::free_bytes() { - size_t result = 0; - if (Metaspace::class_space_list() != NULL) { - result = result + Metaspace::class_space_list()->free_bytes(); - } - if (Metaspace::space_list() != NULL) { - result = result + Metaspace::space_list()->free_bytes(); - } - return result; + return free_bytes(Metaspace::ClassType) + free_bytes(Metaspace::NonClassType); } void MetaspaceAux::dec_capacity(Metaspace::MetadataType mdtype, size_t words) { @@ -2549,6 +2551,9 @@ } size_t MetaspaceAux::capacity_bytes_slow(Metaspace::MetadataType mdtype) { + if ((mdtype == Metaspace::ClassType) && !Metaspace::using_class_space()) { + return 0; + } // Don't count the space in the freelists. That space will be // added to the capacity calculation as needed. size_t capacity = 0; @@ -2563,18 +2568,18 @@ } size_t MetaspaceAux::reserved_in_bytes(Metaspace::MetadataType mdtype) { - size_t reserved = (mdtype == Metaspace::ClassType) ? - Metaspace::class_space_list()->virtual_space_total() : - Metaspace::space_list()->virtual_space_total(); - return reserved * BytesPerWord; + VirtualSpaceList* list = Metaspace::get_space_list(mdtype); + return list == NULL ? 0 : list->virtual_space_total(); } size_t MetaspaceAux::min_chunk_size() { return Metaspace::first_chunk_word_size(); } size_t MetaspaceAux::free_chunks_total(Metaspace::MetadataType mdtype) { - ChunkManager* chunk = (mdtype == Metaspace::ClassType) ? - Metaspace::class_space_list()->chunk_manager() : - Metaspace::space_list()->chunk_manager(); + VirtualSpaceList* list = Metaspace::get_space_list(mdtype); + if (list == NULL) { + return 0; + } + ChunkManager* chunk = list->chunk_manager(); chunk->slow_verify(); return chunk->free_chunks_total(); } @@ -2615,7 +2620,6 @@ // This is printed when PrintGCDetails void MetaspaceAux::print_on(outputStream* out) { - Metaspace::MetadataType ct = Metaspace::ClassType; Metaspace::MetadataType nct = Metaspace::NonClassType; out->print_cr(" Metaspace total " @@ -2629,12 +2633,15 @@ allocated_capacity_bytes(nct)/K, allocated_used_bytes(nct)/K, reserved_in_bytes(nct)/K); - out->print_cr(" class space " - SIZE_FORMAT "K, used " SIZE_FORMAT "K," - " reserved " SIZE_FORMAT "K", - allocated_capacity_bytes(ct)/K, - allocated_used_bytes(ct)/K, - reserved_in_bytes(ct)/K); + if (Metaspace::using_class_space()) { + Metaspace::MetadataType ct = Metaspace::ClassType; + out->print_cr(" class space " + SIZE_FORMAT "K, used " SIZE_FORMAT "K," + " reserved " SIZE_FORMAT "K", + allocated_capacity_bytes(ct)/K, + allocated_used_bytes(ct)/K, + reserved_in_bytes(ct)/K); + } } // Print information for class space and data space separately. @@ -2659,13 +2666,37 @@ assert(!SafepointSynchronize::is_at_safepoint() || used_and_free == capacity_bytes, "Accounting is wrong"); } -// Print total fragmentation for class and data metaspaces separately +// Print total fragmentation for class metaspaces +void MetaspaceAux::print_class_waste(outputStream* out) { + assert(Metaspace::using_class_space(), "class metaspace not used"); + size_t cls_specialized_waste = 0, cls_small_waste = 0, cls_medium_waste = 0; + size_t cls_specialized_count = 0, cls_small_count = 0, cls_medium_count = 0, cls_humongous_count = 0; + ClassLoaderDataGraphMetaspaceIterator iter; + while (iter.repeat()) { + Metaspace* msp = iter.get_next(); + if (msp != NULL) { + cls_specialized_waste += msp->class_vsm()->sum_waste_in_chunks_in_use(SpecializedIndex); + cls_specialized_count += msp->class_vsm()->sum_count_in_chunks_in_use(SpecializedIndex); + cls_small_waste += msp->class_vsm()->sum_waste_in_chunks_in_use(SmallIndex); + cls_small_count += msp->class_vsm()->sum_count_in_chunks_in_use(SmallIndex); + cls_medium_waste += msp->class_vsm()->sum_waste_in_chunks_in_use(MediumIndex); + cls_medium_count += msp->class_vsm()->sum_count_in_chunks_in_use(MediumIndex); + cls_humongous_count += msp->class_vsm()->sum_count_in_chunks_in_use(HumongousIndex); + } + } + out->print_cr(" class: " SIZE_FORMAT " specialized(s) " SIZE_FORMAT ", " + SIZE_FORMAT " small(s) " SIZE_FORMAT ", " + SIZE_FORMAT " medium(s) " SIZE_FORMAT ", " + "large count " SIZE_FORMAT, + cls_specialized_count, cls_specialized_waste, + cls_small_count, cls_small_waste, + cls_medium_count, cls_medium_waste, cls_humongous_count); +} + +// Print total fragmentation for data and class metaspaces separately void MetaspaceAux::print_waste(outputStream* out) { - size_t specialized_waste = 0, small_waste = 0, medium_waste = 0; size_t specialized_count = 0, small_count = 0, medium_count = 0, humongous_count = 0; - size_t cls_specialized_waste = 0, cls_small_waste = 0, cls_medium_waste = 0; - size_t cls_specialized_count = 0, cls_small_count = 0, cls_medium_count = 0, cls_humongous_count = 0; ClassLoaderDataGraphMetaspaceIterator iter; while (iter.repeat()) { @@ -2678,14 +2709,6 @@ medium_waste += msp->vsm()->sum_waste_in_chunks_in_use(MediumIndex); medium_count += msp->vsm()->sum_count_in_chunks_in_use(MediumIndex); humongous_count += msp->vsm()->sum_count_in_chunks_in_use(HumongousIndex); - - cls_specialized_waste += msp->class_vsm()->sum_waste_in_chunks_in_use(SpecializedIndex); - cls_specialized_count += msp->class_vsm()->sum_count_in_chunks_in_use(SpecializedIndex); - cls_small_waste += msp->class_vsm()->sum_waste_in_chunks_in_use(SmallIndex); - cls_small_count += msp->class_vsm()->sum_count_in_chunks_in_use(SmallIndex); - cls_medium_waste += msp->class_vsm()->sum_waste_in_chunks_in_use(MediumIndex); - cls_medium_count += msp->class_vsm()->sum_count_in_chunks_in_use(MediumIndex); - cls_humongous_count += msp->class_vsm()->sum_count_in_chunks_in_use(HumongousIndex); } } out->print_cr("Total fragmentation waste (words) doesn't count free space"); @@ -2695,13 +2718,9 @@ "large count " SIZE_FORMAT, specialized_count, specialized_waste, small_count, small_waste, medium_count, medium_waste, humongous_count); - out->print_cr(" class: " SIZE_FORMAT " specialized(s) " SIZE_FORMAT ", " - SIZE_FORMAT " small(s) " SIZE_FORMAT ", " - SIZE_FORMAT " medium(s) " SIZE_FORMAT ", " - "large count " SIZE_FORMAT, - cls_specialized_count, cls_specialized_waste, - cls_small_count, cls_small_waste, - cls_medium_count, cls_medium_waste, cls_humongous_count); + if (Metaspace::using_class_space()) { + print_class_waste(out); + } } // Dump global metaspace things from the end of ClassLoaderDataGraph @@ -2714,7 +2733,9 @@ void MetaspaceAux::verify_free_chunks() { Metaspace::space_list()->chunk_manager()->verify(); - Metaspace::class_space_list()->chunk_manager()->verify(); + if (Metaspace::using_class_space()) { + Metaspace::class_space_list()->chunk_manager()->verify(); + } } void MetaspaceAux::verify_capacity() { @@ -2776,7 +2797,9 @@ Metaspace::~Metaspace() { delete _vsm; - delete _class_vsm; + if (using_class_space()) { + delete _class_vsm; + } } VirtualSpaceList* Metaspace::_space_list = NULL; @@ -2784,9 +2807,123 @@ #define VIRTUALSPACEMULTIPLIER 2 +#ifdef _LP64 +void Metaspace::set_narrow_klass_base_and_shift(address metaspace_base, address cds_base) { + // Figure out the narrow_klass_base and the narrow_klass_shift. The + // narrow_klass_base is the lower of the metaspace base and the cds base + // (if cds is enabled). The narrow_klass_shift depends on the distance + // between the lower base and higher address. + address lower_base; + address higher_address; + if (UseSharedSpaces) { + higher_address = MAX2((address)(cds_base + FileMapInfo::shared_spaces_size()), + (address)(metaspace_base + class_metaspace_size())); + lower_base = MIN2(metaspace_base, cds_base); + } else { + higher_address = metaspace_base + class_metaspace_size(); + lower_base = metaspace_base; + } + Universe::set_narrow_klass_base(lower_base); + if ((uint64_t)(higher_address - lower_base) < (uint64_t)max_juint) { + Universe::set_narrow_klass_shift(0); + } else { + assert(!UseSharedSpaces, "Cannot shift with UseSharedSpaces"); + Universe::set_narrow_klass_shift(LogKlassAlignmentInBytes); + } +} + +// Return TRUE if the specified metaspace_base and cds_base are close enough +// to work with compressed klass pointers. +bool Metaspace::can_use_cds_with_metaspace_addr(char* metaspace_base, address cds_base) { + assert(cds_base != 0 && UseSharedSpaces, "Only use with CDS"); + assert(UseCompressedKlassPointers, "Only use with CompressedKlassPtrs"); + address lower_base = MIN2((address)metaspace_base, cds_base); + address higher_address = MAX2((address)(cds_base + FileMapInfo::shared_spaces_size()), + (address)(metaspace_base + class_metaspace_size())); + return ((uint64_t)(higher_address - lower_base) < (uint64_t)max_juint); +} + +// Try to allocate the metaspace at the requested addr. +void Metaspace::allocate_metaspace_compressed_klass_ptrs(char* requested_addr, address cds_base) { + assert(using_class_space(), "called improperly"); + assert(UseCompressedKlassPointers, "Only use with CompressedKlassPtrs"); + assert(class_metaspace_size() < KlassEncodingMetaspaceMax, + "Metaspace size is too big"); + + ReservedSpace metaspace_rs = ReservedSpace(class_metaspace_size(), + os::vm_allocation_granularity(), + false, requested_addr, 0); + if (!metaspace_rs.is_reserved()) { + if (UseSharedSpaces) { + // Keep trying to allocate the metaspace, increasing the requested_addr + // by 1GB each time, until we reach an address that will no longer allow + // use of CDS with compressed klass pointers. + char *addr = requested_addr; + while (!metaspace_rs.is_reserved() && (addr + 1*G > addr) && + can_use_cds_with_metaspace_addr(addr + 1*G, cds_base)) { + addr = addr + 1*G; + metaspace_rs = ReservedSpace(class_metaspace_size(), + os::vm_allocation_granularity(), false, addr, 0); + } + } + + // If no successful allocation then try to allocate the space anywhere. If + // that fails then OOM doom. At this point we cannot try allocating the + // metaspace as if UseCompressedKlassPointers is off because too much + // initialization has happened that depends on UseCompressedKlassPointers. + // So, UseCompressedKlassPointers cannot be turned off at this point. + if (!metaspace_rs.is_reserved()) { + metaspace_rs = ReservedSpace(class_metaspace_size(), + os::vm_allocation_granularity(), false); + if (!metaspace_rs.is_reserved()) { + vm_exit_during_initialization(err_msg("Could not allocate metaspace: %d bytes", + class_metaspace_size())); + } + } + } + + // If we got here then the metaspace got allocated. + MemTracker::record_virtual_memory_type((address)metaspace_rs.base(), mtClass); + + // Verify that we can use shared spaces. Otherwise, turn off CDS. + if (UseSharedSpaces && !can_use_cds_with_metaspace_addr(metaspace_rs.base(), cds_base)) { + FileMapInfo::stop_sharing_and_unmap( + "Could not allocate metaspace at a compatible address"); + } + + set_narrow_klass_base_and_shift((address)metaspace_rs.base(), + UseSharedSpaces ? (address)cds_base : 0); + + initialize_class_space(metaspace_rs); + + if (PrintCompressedOopsMode || (PrintMiscellaneous && Verbose)) { + gclog_or_tty->print_cr("Narrow klass base: " PTR_FORMAT ", Narrow klass shift: " SIZE_FORMAT, + Universe::narrow_klass_base(), Universe::narrow_klass_shift()); + gclog_or_tty->print_cr("Metaspace Size: " SIZE_FORMAT " Address: " PTR_FORMAT " Req Addr: " PTR_FORMAT, + class_metaspace_size(), metaspace_rs.base(), requested_addr); + } +} + +// For UseCompressedKlassPointers the class space is reserved above the top of +// the Java heap. The argument passed in is at the base of the compressed space. +void Metaspace::initialize_class_space(ReservedSpace rs) { + // The reserved space size may be bigger because of alignment, esp with UseLargePages + assert(rs.size() >= ClassMetaspaceSize, + err_msg(SIZE_FORMAT " != " UINTX_FORMAT, rs.size(), ClassMetaspaceSize)); + assert(using_class_space(), "Must be using class space"); + _class_space_list = new VirtualSpaceList(rs); +} + +#endif + void Metaspace::global_initialize() { // Initialize the alignment for shared spaces. int max_alignment = os::vm_page_size(); + size_t cds_total = 0; + + set_class_metaspace_size(align_size_up(ClassMetaspaceSize, + os::vm_allocation_granularity())); + MetaspaceShared::set_max_alignment(max_alignment); if (DumpSharedSpaces) { @@ -2798,15 +2935,31 @@ // Initialize with the sum of the shared space sizes. The read-only // and read write metaspace chunks will be allocated out of this and the // remainder is the misc code and data chunks. - size_t total = align_size_up(SharedReadOnlySize + SharedReadWriteSize + - SharedMiscDataSize + SharedMiscCodeSize, - os::vm_allocation_granularity()); - size_t word_size = total/wordSize; - _space_list = new VirtualSpaceList(word_size); + cds_total = FileMapInfo::shared_spaces_size(); + _space_list = new VirtualSpaceList(cds_total/wordSize); + +#ifdef _LP64 + // Set the compressed klass pointer base so that decoding of these pointers works + // properly when creating the shared archive. + assert(UseCompressedOops && UseCompressedKlassPointers, + "UseCompressedOops and UseCompressedKlassPointers must be set"); + Universe::set_narrow_klass_base((address)_space_list->current_virtual_space()->bottom()); + if (TraceMetavirtualspaceAllocation && Verbose) { + gclog_or_tty->print_cr("Setting_narrow_klass_base to Address: " PTR_FORMAT, + _space_list->current_virtual_space()->bottom()); + } + + // Set the shift to zero. + assert(class_metaspace_size() < (uint64_t)(max_juint) - cds_total, + "CDS region is too large"); + Universe::set_narrow_klass_shift(0); +#endif + } else { // If using shared space, open the file that contains the shared space // and map in the memory before initializing the rest of metaspace (so // the addresses don't conflict) + address cds_address = NULL; if (UseSharedSpaces) { FileMapInfo* mapinfo = new FileMapInfo(); memset(mapinfo, 0, sizeof(FileMapInfo)); @@ -2821,8 +2974,22 @@ assert(!mapinfo->is_open() && !UseSharedSpaces, "archive file not closed or shared spaces not disabled."); } + cds_total = FileMapInfo::shared_spaces_size(); + cds_address = (address)mapinfo->region_base(0); } +#ifdef _LP64 + // If UseCompressedKlassPointers is set then allocate the metaspace area + // above the heap and above the CDS area (if it exists). + if (using_class_space()) { + if (UseSharedSpaces) { + allocate_metaspace_compressed_klass_ptrs((char *)(cds_address + cds_total), cds_address); + } else { + allocate_metaspace_compressed_klass_ptrs((char *)CompressedKlassPointersBase, 0); + } + } +#endif + // Initialize these before initializing the VirtualSpaceList _first_chunk_word_size = InitialBootClassLoaderMetaspaceSize / BytesPerWord; _first_chunk_word_size = align_word_size_up(_first_chunk_word_size); @@ -2840,39 +3007,28 @@ } } -// For UseCompressedKlassPointers the class space is reserved as a piece of the -// Java heap because the compression algorithm is the same for each. The -// argument passed in is at the top of the compressed space -void Metaspace::initialize_class_space(ReservedSpace rs) { - // The reserved space size may be bigger because of alignment, esp with UseLargePages - assert(rs.size() >= ClassMetaspaceSize, - err_msg(SIZE_FORMAT " != " UINTX_FORMAT, rs.size(), ClassMetaspaceSize)); - _class_space_list = new VirtualSpaceList(rs); -} - -void Metaspace::initialize(Mutex* lock, - MetaspaceType type) { +void Metaspace::initialize(Mutex* lock, MetaspaceType type) { assert(space_list() != NULL, "Metadata VirtualSpaceList has not been initialized"); - _vsm = new SpaceManager(Metaspace::NonClassType, lock, space_list()); + _vsm = new SpaceManager(NonClassType, lock, space_list()); if (_vsm == NULL) { return; } size_t word_size; size_t class_word_size; - vsm()->get_initial_chunk_sizes(type, - &word_size, - &class_word_size); - - assert(class_space_list() != NULL, - "Class VirtualSpaceList has not been initialized"); - - // Allocate SpaceManager for classes. - _class_vsm = new SpaceManager(Metaspace::ClassType, lock, class_space_list()); - if (_class_vsm == NULL) { - return; + vsm()->get_initial_chunk_sizes(type, &word_size, &class_word_size); + + if (using_class_space()) { + assert(class_space_list() != NULL, + "Class VirtualSpaceList has not been initialized"); + + // Allocate SpaceManager for classes. + _class_vsm = new SpaceManager(ClassType, lock, class_space_list()); + if (_class_vsm == NULL) { + return; + } } MutexLockerEx cl(SpaceManager::expand_lock(), Mutex::_no_safepoint_check_flag); @@ -2888,11 +3044,13 @@ } // Allocate chunk for class metadata objects - Metachunk* class_chunk = - class_space_list()->get_initialization_chunk(class_word_size, - class_vsm()->medium_chunk_bunch()); - if (class_chunk != NULL) { - class_vsm()->add_chunk(class_chunk, true); + if (using_class_space()) { + Metachunk* class_chunk = + class_space_list()->get_initialization_chunk(class_word_size, + class_vsm()->medium_chunk_bunch()); + if (class_chunk != NULL) { + class_vsm()->add_chunk(class_chunk, true); + } } _alloc_record_head = NULL; @@ -2906,7 +3064,8 @@ MetaWord* Metaspace::allocate(size_t word_size, MetadataType mdtype) { // DumpSharedSpaces doesn't use class metadata area (yet) - if (mdtype == ClassType && !DumpSharedSpaces) { + // Also, don't use class_vsm() unless UseCompressedKlassPointers is true. + if (mdtype == ClassType && using_class_space()) { return class_vsm()->allocate(word_size); } else { return vsm()->allocate(word_size); @@ -2937,14 +3096,19 @@ } size_t Metaspace::used_words_slow(MetadataType mdtype) const { - // return vsm()->allocated_used_words(); - return mdtype == ClassType ? class_vsm()->sum_used_in_chunks_in_use() : - vsm()->sum_used_in_chunks_in_use(); // includes overhead! + if (mdtype == ClassType) { + return using_class_space() ? class_vsm()->sum_used_in_chunks_in_use() : 0; + } else { + return vsm()->sum_used_in_chunks_in_use(); // includes overhead! + } } size_t Metaspace::free_words(MetadataType mdtype) const { - return mdtype == ClassType ? class_vsm()->sum_free_in_chunks_in_use() : - vsm()->sum_free_in_chunks_in_use(); + if (mdtype == ClassType) { + return using_class_space() ? class_vsm()->sum_free_in_chunks_in_use() : 0; + } else { + return vsm()->sum_free_in_chunks_in_use(); + } } // Space capacity in the Metaspace. It includes @@ -2953,8 +3117,11 @@ // in the space available in the dictionary which // is already counted in some chunk. size_t Metaspace::capacity_words_slow(MetadataType mdtype) const { - return mdtype == ClassType ? class_vsm()->sum_capacity_in_chunks_in_use() : - vsm()->sum_capacity_in_chunks_in_use(); + if (mdtype == ClassType) { + return using_class_space() ? class_vsm()->sum_capacity_in_chunks_in_use() : 0; + } else { + return vsm()->sum_capacity_in_chunks_in_use(); + } } size_t Metaspace::used_bytes_slow(MetadataType mdtype) const { @@ -2977,8 +3144,8 @@ #endif return; } - if (is_class) { - class_vsm()->deallocate(ptr, word_size); + if (is_class && using_class_space()) { + class_vsm()->deallocate(ptr, word_size); } else { vsm()->deallocate(ptr, word_size); } @@ -2992,7 +3159,7 @@ #endif return; } - if (is_class) { + if (is_class && using_class_space()) { class_vsm()->deallocate(ptr, word_size); } else { vsm()->deallocate(ptr, word_size); @@ -3101,14 +3268,18 @@ MutexLockerEx cl(SpaceManager::expand_lock(), Mutex::_no_safepoint_check_flag); space_list()->purge(); - class_space_list()->purge(); + if (using_class_space()) { + class_space_list()->purge(); + } } void Metaspace::print_on(outputStream* out) const { // Print both class virtual space counts and metaspace. if (Verbose) { - vsm()->print_on(out); + vsm()->print_on(out); + if (using_class_space()) { class_vsm()->print_on(out); + } } } @@ -3122,17 +3293,21 @@ // be needed. Note, locking this can cause inversion problems with the // caller in MetaspaceObj::is_metadata() function. return space_list()->contains(ptr) || - class_space_list()->contains(ptr); + (using_class_space() && class_space_list()->contains(ptr)); } void Metaspace::verify() { vsm()->verify(); - class_vsm()->verify(); + if (using_class_space()) { + class_vsm()->verify(); + } } void Metaspace::dump(outputStream* const out) const { out->print_cr("\nVirtual space manager: " INTPTR_FORMAT, vsm()); vsm()->dump(out); - out->print_cr("\nClass space manager: " INTPTR_FORMAT, class_vsm()); - class_vsm()->dump(out); + if (using_class_space()) { + out->print_cr("\nClass space manager: " INTPTR_FORMAT, class_vsm()); + class_vsm()->dump(out); + } } diff -r bdd155477289 -r e2722a66aba7 src/share/vm/memory/metaspace.hpp --- a/src/share/vm/memory/metaspace.hpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/memory/metaspace.hpp Thu Sep 05 11:04:39 2013 -0700 @@ -105,6 +105,16 @@ // Align up the word size to the allocation word size static size_t align_word_size_up(size_t); + // Aligned size of the metaspace. + static size_t _class_metaspace_size; + + static size_t class_metaspace_size() { + return _class_metaspace_size; + } + static void set_class_metaspace_size(size_t metaspace_size) { + _class_metaspace_size = metaspace_size; + } + static size_t _first_chunk_word_size; static size_t _first_class_chunk_word_size; @@ -126,11 +136,26 @@ static VirtualSpaceList* space_list() { return _space_list; } static VirtualSpaceList* class_space_list() { return _class_space_list; } + static VirtualSpaceList* get_space_list(MetadataType mdtype) { + assert(mdtype != MetadataTypeCount, "MetadaTypeCount can't be used as mdtype"); + return mdtype == ClassType ? class_space_list() : space_list(); + } // This is used by DumpSharedSpaces only, where only _vsm is used. So we will // maintain a single list for now. void record_allocation(void* ptr, MetaspaceObj::Type type, size_t word_size); +#ifdef _LP64 + static void set_narrow_klass_base_and_shift(address metaspace_base, address cds_base); + + // Returns true if can use CDS with metaspace allocated as specified address. + static bool can_use_cds_with_metaspace_addr(char* metaspace_base, address cds_base); + + static void allocate_metaspace_compressed_klass_ptrs(char* requested_addr, address cds_base); + + static void initialize_class_space(ReservedSpace rs); +#endif + class AllocRecord : public CHeapObj { public: AllocRecord(address ptr, MetaspaceObj::Type type, int byte_size) @@ -151,7 +176,6 @@ // Initialize globals for Metaspace static void global_initialize(); - static void initialize_class_space(ReservedSpace rs); static size_t first_chunk_word_size() { return _first_chunk_word_size; } static size_t first_class_chunk_word_size() { return _first_class_chunk_word_size; } @@ -172,8 +196,6 @@ MetaWord* expand_and_allocate(size_t size, MetadataType mdtype); - static bool is_initialized() { return _class_space_list != NULL; } - static bool contains(const void *ptr); void dump(outputStream* const out) const; @@ -190,11 +212,16 @@ }; void iterate(AllocRecordClosure *closure); + + // Return TRUE only if UseCompressedKlassPointers is True and DumpSharedSpaces is False. + static bool using_class_space() { + return NOT_LP64(false) LP64_ONLY(UseCompressedKlassPointers && !DumpSharedSpaces); + } + }; class MetaspaceAux : AllStatic { static size_t free_chunks_total(Metaspace::MetadataType mdtype); - static size_t free_chunks_total_in_bytes(Metaspace::MetadataType mdtype); public: // Statistics for class space and data space in metaspace. @@ -238,13 +265,15 @@ // Used by MetaspaceCounters static size_t free_chunks_total(); static size_t free_chunks_total_in_bytes(); + static size_t free_chunks_total_in_bytes(Metaspace::MetadataType mdtype); static size_t allocated_capacity_words(Metaspace::MetadataType mdtype) { return _allocated_capacity_words[mdtype]; } static size_t allocated_capacity_words() { - return _allocated_capacity_words[Metaspace::ClassType] + - _allocated_capacity_words[Metaspace::NonClassType]; + return _allocated_capacity_words[Metaspace::NonClassType] + + (Metaspace::using_class_space() ? + _allocated_capacity_words[Metaspace::ClassType] : 0); } static size_t allocated_capacity_bytes(Metaspace::MetadataType mdtype) { return allocated_capacity_words(mdtype) * BytesPerWord; @@ -257,8 +286,9 @@ return _allocated_used_words[mdtype]; } static size_t allocated_used_words() { - return _allocated_used_words[Metaspace::ClassType] + - _allocated_used_words[Metaspace::NonClassType]; + return _allocated_used_words[Metaspace::NonClassType] + + (Metaspace::using_class_space() ? + _allocated_used_words[Metaspace::ClassType] : 0); } static size_t allocated_used_bytes(Metaspace::MetadataType mdtype) { return allocated_used_words(mdtype) * BytesPerWord; @@ -268,6 +298,7 @@ } static size_t free_bytes(); + static size_t free_bytes(Metaspace::MetadataType mdtype); // Total capacity in all Metaspaces static size_t capacity_bytes_slow() { @@ -300,6 +331,7 @@ static void print_on(outputStream * out); static void print_on(outputStream * out, Metaspace::MetadataType mdtype); + static void print_class_waste(outputStream* out); static void print_waste(outputStream* out); static void dump(outputStream* out); static void verify_free_chunks(); diff -r bdd155477289 -r e2722a66aba7 src/share/vm/memory/metaspaceCounters.cpp --- a/src/share/vm/memory/metaspaceCounters.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/memory/metaspaceCounters.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -25,11 +25,47 @@ #include "precompiled.hpp" #include "memory/metaspaceCounters.hpp" #include "memory/resourceArea.hpp" +#include "runtime/globals.hpp" +#include "runtime/perfData.hpp" #include "utilities/exceptions.hpp" -MetaspaceCounters* MetaspaceCounters::_metaspace_counters = NULL; +class MetaspacePerfCounters: public CHeapObj { + friend class VMStructs; + PerfVariable* _capacity; + PerfVariable* _used; + PerfVariable* _max_capacity; + + PerfVariable* create_variable(const char *ns, const char *name, size_t value, TRAPS) { + const char *path = PerfDataManager::counter_name(ns, name); + return PerfDataManager::create_variable(SUN_GC, path, PerfData::U_Bytes, value, THREAD); + } + + void create_constant(const char *ns, const char *name, size_t value, TRAPS) { + const char *path = PerfDataManager::counter_name(ns, name); + PerfDataManager::create_constant(SUN_GC, path, PerfData::U_Bytes, value, THREAD); + } -size_t MetaspaceCounters::calc_total_capacity() { + public: + MetaspacePerfCounters(const char* ns, size_t min_capacity, size_t curr_capacity, size_t max_capacity, size_t used) { + EXCEPTION_MARK; + ResourceMark rm; + + create_constant(ns, "minCapacity", min_capacity, THREAD); + _capacity = create_variable(ns, "capacity", curr_capacity, THREAD); + _max_capacity = create_variable(ns, "maxCapacity", max_capacity, THREAD); + _used = create_variable(ns, "used", used, THREAD); + } + + void update(size_t capacity, size_t max_capacity, size_t used) { + _capacity->set_value(capacity); + _max_capacity->set_value(max_capacity); + _used->set_value(used); + } +}; + +MetaspacePerfCounters* MetaspaceCounters::_perf_counters = NULL; + +size_t MetaspaceCounters::calculate_capacity() { // The total capacity is the sum of // 1) capacity of Metachunks in use by all Metaspaces // 2) unused space at the end of each Metachunk @@ -39,95 +75,65 @@ return total_capacity; } -MetaspaceCounters::MetaspaceCounters() : - _capacity(NULL), - _used(NULL), - _max_capacity(NULL) { +void MetaspaceCounters::initialize_performance_counters() { if (UsePerfData) { + assert(_perf_counters == NULL, "Should only be initialized once"); + size_t min_capacity = MetaspaceAux::min_chunk_size(); + size_t capacity = calculate_capacity(); size_t max_capacity = MetaspaceAux::reserved_in_bytes(); - size_t curr_capacity = calc_total_capacity(); size_t used = MetaspaceAux::allocated_used_bytes(); - initialize(min_capacity, max_capacity, curr_capacity, used); - } -} - -static PerfVariable* create_ms_variable(const char *ns, - const char *name, - size_t value, - TRAPS) { - const char *path = PerfDataManager::counter_name(ns, name); - PerfVariable *result = - PerfDataManager::create_variable(SUN_GC, path, PerfData::U_Bytes, value, - CHECK_NULL); - return result; -} - -static void create_ms_constant(const char *ns, - const char *name, - size_t value, - TRAPS) { - const char *path = PerfDataManager::counter_name(ns, name); - PerfDataManager::create_constant(SUN_GC, path, PerfData::U_Bytes, value, CHECK); -} - -void MetaspaceCounters::initialize(size_t min_capacity, - size_t max_capacity, - size_t curr_capacity, - size_t used) { - - if (UsePerfData) { - EXCEPTION_MARK; - ResourceMark rm; - - const char *ms = "metaspace"; - - create_ms_constant(ms, "minCapacity", min_capacity, CHECK); - _max_capacity = create_ms_variable(ms, "maxCapacity", max_capacity, CHECK); - _capacity = create_ms_variable(ms, "capacity", curr_capacity, CHECK); - _used = create_ms_variable(ms, "used", used, CHECK); - } -} - -void MetaspaceCounters::update_capacity() { - assert(UsePerfData, "Should not be called unless being used"); - size_t total_capacity = calc_total_capacity(); - _capacity->set_value(total_capacity); -} - -void MetaspaceCounters::update_used() { - assert(UsePerfData, "Should not be called unless being used"); - size_t used_in_bytes = MetaspaceAux::allocated_used_bytes(); - _used->set_value(used_in_bytes); -} - -void MetaspaceCounters::update_max_capacity() { - assert(UsePerfData, "Should not be called unless being used"); - assert(_max_capacity != NULL, "Should be initialized"); - size_t reserved_in_bytes = MetaspaceAux::reserved_in_bytes(); - _max_capacity->set_value(reserved_in_bytes); -} - -void MetaspaceCounters::update_all() { - if (UsePerfData) { - update_used(); - update_capacity(); - update_max_capacity(); - } -} - -void MetaspaceCounters::initialize_performance_counters() { - if (UsePerfData) { - assert(_metaspace_counters == NULL, "Should only be initialized once"); - _metaspace_counters = new MetaspaceCounters(); + _perf_counters = new MetaspacePerfCounters("metaspace", min_capacity, capacity, max_capacity, used); } } void MetaspaceCounters::update_performance_counters() { if (UsePerfData) { - assert(_metaspace_counters != NULL, "Should be initialized"); - _metaspace_counters->update_all(); + assert(_perf_counters != NULL, "Should be initialized"); + + size_t capacity = calculate_capacity(); + size_t max_capacity = MetaspaceAux::reserved_in_bytes(); + size_t used = MetaspaceAux::allocated_used_bytes(); + + _perf_counters->update(capacity, max_capacity, used); } } +MetaspacePerfCounters* CompressedClassSpaceCounters::_perf_counters = NULL; + +size_t CompressedClassSpaceCounters::calculate_capacity() { + return MetaspaceAux::allocated_capacity_bytes(_class_type) + + MetaspaceAux::free_bytes(_class_type) + + MetaspaceAux::free_chunks_total_in_bytes(_class_type); +} + +void CompressedClassSpaceCounters::update_performance_counters() { + if (UsePerfData && UseCompressedKlassPointers) { + assert(_perf_counters != NULL, "Should be initialized"); + + size_t capacity = calculate_capacity(); + size_t max_capacity = MetaspaceAux::reserved_in_bytes(_class_type); + size_t used = MetaspaceAux::allocated_used_bytes(_class_type); + + _perf_counters->update(capacity, max_capacity, used); + } +} + +void CompressedClassSpaceCounters::initialize_performance_counters() { + if (UsePerfData) { + assert(_perf_counters == NULL, "Should only be initialized once"); + const char* ns = "compressedclassspace"; + + if (UseCompressedKlassPointers) { + size_t min_capacity = MetaspaceAux::min_chunk_size(); + size_t capacity = calculate_capacity(); + size_t max_capacity = MetaspaceAux::reserved_in_bytes(_class_type); + size_t used = MetaspaceAux::allocated_used_bytes(_class_type); + + _perf_counters = new MetaspacePerfCounters(ns, min_capacity, capacity, max_capacity, used); + } else { + _perf_counters = new MetaspacePerfCounters(ns, 0, 0, 0, 0); + } + } +} diff -r bdd155477289 -r e2722a66aba7 src/share/vm/memory/metaspaceCounters.hpp --- a/src/share/vm/memory/metaspaceCounters.hpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/memory/metaspaceCounters.hpp Thu Sep 05 11:04:39 2013 -0700 @@ -25,31 +25,27 @@ #ifndef SHARE_VM_MEMORY_METASPACECOUNTERS_HPP #define SHARE_VM_MEMORY_METASPACECOUNTERS_HPP -#include "runtime/perfData.hpp" +#include "memory/metaspace.hpp" + +class MetaspacePerfCounters; -class MetaspaceCounters: public CHeapObj { - friend class VMStructs; - PerfVariable* _capacity; - PerfVariable* _used; - PerfVariable* _max_capacity; - static MetaspaceCounters* _metaspace_counters; - void initialize(size_t min_capacity, - size_t max_capacity, - size_t curr_capacity, - size_t used); - size_t calc_total_capacity(); +class MetaspaceCounters: public AllStatic { + static MetaspacePerfCounters* _perf_counters; + static size_t calculate_capacity(); + public: - MetaspaceCounters(); - ~MetaspaceCounters(); - - void update_capacity(); - void update_used(); - void update_max_capacity(); - - void update_all(); - static void initialize_performance_counters(); static void update_performance_counters(); - }; + +class CompressedClassSpaceCounters: public AllStatic { + static MetaspacePerfCounters* _perf_counters; + static size_t calculate_capacity(); + static const Metaspace::MetadataType _class_type = Metaspace::ClassType; + + public: + static void initialize_performance_counters(); + static void update_performance_counters(); +}; + #endif // SHARE_VM_MEMORY_METASPACECOUNTERS_HPP diff -r bdd155477289 -r e2722a66aba7 src/share/vm/memory/metaspaceShared.cpp --- a/src/share/vm/memory/metaspaceShared.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/memory/metaspaceShared.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -52,7 +52,6 @@ int tag = 0; soc->do_tag(--tag); - assert(!UseCompressedOops, "UseCompressedOops doesn't work with shared archive"); // Verify the sizes of various metadata in the system. soc->do_tag(sizeof(Method)); soc->do_tag(sizeof(ConstMethod)); diff -r bdd155477289 -r e2722a66aba7 src/share/vm/memory/padded.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/memory/padded.hpp Thu Sep 05 11:04:39 2013 -0700 @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_VM_MEMORY_PADDED_HPP +#define SHARE_VM_MEMORY_PADDED_HPP + +#include "memory/allocation.hpp" +#include "utilities/globalDefinitions.hpp" + +// Bytes needed to pad type to avoid cache-line sharing; alignment should be the +// expected cache line size (a power of two). The first addend avoids sharing +// when the start address is not a multiple of alignment; the second maintains +// alignment of starting addresses that happen to be a multiple. +#define PADDING_SIZE(type, alignment) \ + ((alignment) + align_size_up_(sizeof(type), alignment)) + +// Templates to create a subclass padded to avoid cache line sharing. These are +// effective only when applied to derived-most (leaf) classes. + +// When no args are passed to the base ctor. +template +class Padded : public T { + private: + char _pad_buf_[PADDING_SIZE(T, alignment)]; +}; + +// When either 0 or 1 args may be passed to the base ctor. +template +class Padded01 : public T { + public: + Padded01(): T() { } + Padded01(Arg1T arg1): T(arg1) { } + private: + char _pad_buf_[PADDING_SIZE(T, alignment)]; +}; + +// Super class of PaddedEnd when pad_size != 0. +template +class PaddedEndImpl : public T { + private: + char _pad_buf[pad_size]; +}; + +// Super class of PaddedEnd when pad_size == 0. +template +class PaddedEndImpl : public T { + // No padding. +}; + +#define PADDED_END_SIZE(type, alignment) (align_size_up_(sizeof(type), alignment) - sizeof(type)) + +// More memory conservative implementation of Padded. The subclass adds the +// minimal amount of padding needed to make the size of the objects be aligned. +// This will help reducing false sharing, +// if the start address is a multiple of alignment. +template +class PaddedEnd : public PaddedEndImpl { + // C++ don't allow zero-length arrays. The padding is put in a + // super class that is specialized for the pad_size == 0 case. +}; + +// Helper class to create an array of PaddedEnd objects. All elements will +// start at a multiple of alignment and the size will be aligned to alignment. +template +class PaddedArray { + public: + // Creates an aligned padded array. + // The memory can't be deleted since the raw memory chunk is not returned. + static PaddedEnd* create_unfreeable(uint length); +}; + +#endif // SHARE_VM_MEMORY_PADDED_HPP diff -r bdd155477289 -r e2722a66aba7 src/share/vm/memory/padded.inline.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/memory/padded.inline.hpp Thu Sep 05 11:04:39 2013 -0700 @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "memory/allocation.inline.hpp" +#include "memory/padded.hpp" +#include "utilities/debug.hpp" +#include "utilities/globalDefinitions.hpp" + +// Creates an aligned padded array. +// The memory can't be deleted since the raw memory chunk is not returned. +template +PaddedEnd* PaddedArray::create_unfreeable(uint length) { + // Check that the PaddedEnd class works as intended. + STATIC_ASSERT(is_size_aligned_(sizeof(PaddedEnd), alignment)); + + // Allocate a chunk of memory large enough to allow for some alignment. + void* chunk = AllocateHeap(length * sizeof(PaddedEnd) + alignment, flags); + + // Make the initial alignment. + PaddedEnd* aligned_padded_array = (PaddedEnd*)align_pointer_up(chunk, alignment); + + // Call the default constructor for each element. + for (uint i = 0; i < length; i++) { + ::new (&aligned_padded_array[i]) T(); + } + + return aligned_padded_array; +} diff -r bdd155477289 -r e2722a66aba7 src/share/vm/memory/universe.cpp --- a/src/share/vm/memory/universe.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/memory/universe.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -105,10 +105,9 @@ Array* Universe::_the_array_interfaces_array = NULL; oop Universe::_the_null_string = NULL; oop Universe::_the_min_jint_string = NULL; -LatestMethodOopCache* Universe::_finalizer_register_cache = NULL; -LatestMethodOopCache* Universe::_loader_addClass_cache = NULL; -LatestMethodOopCache* Universe::_pd_implies_cache = NULL; -ActiveMethodOopsCache* Universe::_reflect_invoke_cache = NULL; +LatestMethodCache* Universe::_finalizer_register_cache = NULL; +LatestMethodCache* Universe::_loader_addClass_cache = NULL; +LatestMethodCache* Universe::_pd_implies_cache = NULL; oop Universe::_out_of_memory_error_java_heap = NULL; oop Universe::_out_of_memory_error_metaspace = NULL; oop Universe::_out_of_memory_error_class_metaspace = NULL; @@ -146,8 +145,6 @@ NarrowPtrStruct Universe::_narrow_klass = { NULL, 0, true }; address Universe::_narrow_ptrs_base; -size_t Universe::_class_metaspace_size; - void Universe::basic_type_classes_do(void f(Klass*)) { f(boolArrayKlassObj()); f(byteArrayKlassObj()); @@ -225,7 +222,6 @@ f->do_ptr((void**)&_the_empty_klass_array); _finalizer_register_cache->serialize(f); _loader_addClass_cache->serialize(f); - _reflect_invoke_cache->serialize(f); _pd_implies_cache->serialize(f); } @@ -643,16 +639,17 @@ return status; } + Metaspace::global_initialize(); + // Create memory for metadata. Must be after initializing heap for // DumpSharedSpaces. ClassLoaderData::init_null_class_loader_data(); // We have a heap so create the Method* caches before // Metaspace::initialize_shared_spaces() tries to populate them. - Universe::_finalizer_register_cache = new LatestMethodOopCache(); - Universe::_loader_addClass_cache = new LatestMethodOopCache(); - Universe::_pd_implies_cache = new LatestMethodOopCache(); - Universe::_reflect_invoke_cache = new ActiveMethodOopsCache(); + Universe::_finalizer_register_cache = new LatestMethodCache(); + Universe::_loader_addClass_cache = new LatestMethodCache(); + Universe::_pd_implies_cache = new LatestMethodCache(); if (UseSharedSpaces) { // Read the data structures supporting the shared spaces (shared @@ -684,25 +681,27 @@ // 32Gb // OopEncodingHeapMax == NarrowOopHeapMax << LogMinObjAlignmentInBytes; -char* Universe::preferred_heap_base(size_t heap_size, NARROW_OOP_MODE mode) { +char* Universe::preferred_heap_base(size_t heap_size, size_t alignment, NARROW_OOP_MODE mode) { + assert(is_size_aligned((size_t)OopEncodingHeapMax, alignment), "Must be"); + assert(is_size_aligned((size_t)NarrowOopHeapMax, alignment), "Must be"); + assert(is_size_aligned(heap_size, alignment), "Must be"); + + uintx heap_base_min_address_aligned = align_size_up(HeapBaseMinAddress, alignment); + size_t base = 0; #ifdef _LP64 if (UseCompressedOops) { assert(mode == UnscaledNarrowOop || mode == ZeroBasedNarrowOop || mode == HeapBasedNarrowOop, "mode is invalid"); - const size_t total_size = heap_size + HeapBaseMinAddress; + const size_t total_size = heap_size + heap_base_min_address_aligned; // Return specified base for the first request. if (!FLAG_IS_DEFAULT(HeapBaseMinAddress) && (mode == UnscaledNarrowOop)) { - base = HeapBaseMinAddress; + base = heap_base_min_address_aligned; - // If the total size and the metaspace size are small enough to allow - // UnscaledNarrowOop then just use UnscaledNarrowOop. - } else if ((total_size <= OopEncodingHeapMax) && (mode != HeapBasedNarrowOop) && - (!UseCompressedKlassPointers || - (((OopEncodingHeapMax - heap_size) + Universe::class_metaspace_size()) <= KlassEncodingMetaspaceMax))) { - // We don't need to check the metaspace size here because it is always smaller - // than total_size. + // If the total size is small enough to allow UnscaledNarrowOop then + // just use UnscaledNarrowOop. + } else if ((total_size <= OopEncodingHeapMax) && (mode != HeapBasedNarrowOop)) { if ((total_size <= NarrowOopHeapMax) && (mode == UnscaledNarrowOop) && (Universe::narrow_oop_shift() == 0)) { // Use 32-bits oops without encoding and @@ -719,13 +718,6 @@ base = (OopEncodingHeapMax - heap_size); } } - - // See if ZeroBaseNarrowOop encoding will work for a heap based at - // (KlassEncodingMetaspaceMax - class_metaspace_size()). - } else if (UseCompressedKlassPointers && (mode != HeapBasedNarrowOop) && - (Universe::class_metaspace_size() + HeapBaseMinAddress <= KlassEncodingMetaspaceMax) && - (KlassEncodingMetaspaceMax + heap_size - Universe::class_metaspace_size() <= OopEncodingHeapMax)) { - base = (KlassEncodingMetaspaceMax - Universe::class_metaspace_size()); } else { // UnscaledNarrowOop encoding didn't work, and no base was found for ZeroBasedOops or // HeapBasedNarrowOop encoding was requested. So, can't reserve below 32Gb. @@ -735,8 +727,7 @@ // Set narrow_oop_base and narrow_oop_use_implicit_null_checks // used in ReservedHeapSpace() constructors. // The final values will be set in initialize_heap() below. - if ((base != 0) && ((base + heap_size) <= OopEncodingHeapMax) && - (!UseCompressedKlassPointers || (base + Universe::class_metaspace_size()) <= KlassEncodingMetaspaceMax)) { + if ((base != 0) && ((base + heap_size) <= OopEncodingHeapMax)) { // Use zero based compressed oops Universe::set_narrow_oop_base(NULL); // Don't need guard page for implicit checks in indexed @@ -757,6 +748,8 @@ } } #endif + + assert(is_ptr_aligned((char*)base, alignment), "Must be"); return (char*)base; // also return NULL (don't care) for 32-bit VM } @@ -819,9 +812,7 @@ tty->print("heap address: " PTR_FORMAT ", size: " SIZE_FORMAT " MB", Universe::heap()->base(), Universe::heap()->reserved_region().byte_size()/M); } - if (((uint64_t)Universe::heap()->reserved_region().end() > OopEncodingHeapMax) || - (UseCompressedKlassPointers && - ((uint64_t)Universe::heap()->base() + Universe::class_metaspace_size() > KlassEncodingMetaspaceMax))) { + if (((uint64_t)Universe::heap()->reserved_region().end() > OopEncodingHeapMax)) { // Can't reserve heap below 32Gb. // keep the Universe::narrow_oop_base() set in Universe::reserve_heap() Universe::set_narrow_oop_shift(LogMinObjAlignmentInBytes); @@ -857,20 +848,16 @@ } } } + if (verbose) { tty->cr(); tty->cr(); } - if (UseCompressedKlassPointers) { - Universe::set_narrow_klass_base(Universe::narrow_oop_base()); - Universe::set_narrow_klass_shift(MIN2(Universe::narrow_oop_shift(), LogKlassAlignmentInBytes)); - } Universe::set_narrow_ptrs_base(Universe::narrow_oop_base()); } - // Universe::narrow_oop_base() is one page below the metaspace - // base. The actual metaspace base depends on alignment constraints - // so we don't know its exact location here. - assert((intptr_t)Universe::narrow_oop_base() <= (intptr_t)(Universe::heap()->base() - os::vm_page_size() - ClassMetaspaceSize) || + // Universe::narrow_oop_base() is one page below the heap. + assert((intptr_t)Universe::narrow_oop_base() <= (intptr_t)(Universe::heap()->base() - + os::vm_page_size()) || Universe::narrow_oop_base() == NULL, "invalid value"); assert(Universe::narrow_oop_shift() == LogMinObjAlignmentInBytes || Universe::narrow_oop_shift() == 0, "invalid value"); @@ -890,35 +877,36 @@ // Reserve the Java heap, which is now the same for all GCs. ReservedSpace Universe::reserve_heap(size_t heap_size, size_t alignment) { - // Add in the class metaspace area so the classes in the headers can - // be compressed the same as instances. - // Need to round class space size up because it's below the heap and - // the actual alignment depends on its size. - Universe::set_class_metaspace_size(align_size_up(ClassMetaspaceSize, alignment)); - size_t total_reserved = align_size_up(heap_size + Universe::class_metaspace_size(), alignment); + size_t total_reserved = align_size_up(heap_size, alignment); assert(!UseCompressedOops || (total_reserved <= (OopEncodingHeapMax - os::vm_page_size())), "heap size is too big for compressed oops"); - char* addr = Universe::preferred_heap_base(total_reserved, Universe::UnscaledNarrowOop); - ReservedHeapSpace total_rs(total_reserved, alignment, UseLargePages, addr); + bool use_large_pages = UseLargePages && is_size_aligned(alignment, os::large_page_size()); + assert(!UseLargePages + || UseParallelOldGC + || use_large_pages, "Wrong alignment to use large pages"); + + char* addr = Universe::preferred_heap_base(total_reserved, alignment, Universe::UnscaledNarrowOop); + + ReservedHeapSpace total_rs(total_reserved, alignment, use_large_pages, addr); if (UseCompressedOops) { if (addr != NULL && !total_rs.is_reserved()) { // Failed to reserve at specified address - the requested memory // region is taken already, for example, by 'java' launcher. // Try again to reserver heap higher. - addr = Universe::preferred_heap_base(total_reserved, Universe::ZeroBasedNarrowOop); + addr = Universe::preferred_heap_base(total_reserved, alignment, Universe::ZeroBasedNarrowOop); ReservedHeapSpace total_rs0(total_reserved, alignment, - UseLargePages, addr); + use_large_pages, addr); if (addr != NULL && !total_rs0.is_reserved()) { // Failed to reserve at specified address again - give up. - addr = Universe::preferred_heap_base(total_reserved, Universe::HeapBasedNarrowOop); + addr = Universe::preferred_heap_base(total_reserved, alignment, Universe::HeapBasedNarrowOop); assert(addr == NULL, ""); ReservedHeapSpace total_rs1(total_reserved, alignment, - UseLargePages, addr); + use_large_pages, addr); total_rs = total_rs1; } else { total_rs = total_rs0; @@ -931,28 +919,17 @@ return total_rs; } - // Split the reserved space into main Java heap and a space for - // classes so that they can be compressed using the same algorithm - // as compressed oops. If compress oops and compress klass ptrs are - // used we need the meta space first: if the alignment used for - // compressed oops is greater than the one used for compressed klass - // ptrs, a metadata space on top of the heap could become - // unreachable. - ReservedSpace class_rs = total_rs.first_part(Universe::class_metaspace_size()); - ReservedSpace heap_rs = total_rs.last_part(Universe::class_metaspace_size(), alignment); - Metaspace::initialize_class_space(class_rs); - if (UseCompressedOops) { // Universe::initialize_heap() will reset this to NULL if unscaled // or zero-based narrow oops are actually used. address base = (address)(total_rs.base() - os::vm_page_size()); Universe::set_narrow_oop_base(base); } - return heap_rs; + return total_rs; } -// It's the caller's repsonsibility to ensure glitch-freedom +// It's the caller's responsibility to ensure glitch-freedom // (if required). void Universe::update_heap_info_at_gc() { _heap_capacity_at_last_gc = heap()->capacity(); @@ -1093,35 +1070,21 @@ vmSymbols::register_method_name(), vmSymbols::register_method_signature()); if (m == NULL || !m->is_static()) { - THROW_MSG_(vmSymbols::java_lang_NoSuchMethodException(), - "java.lang.ref.Finalizer.register", false); + tty->print_cr("Unable to link/verify Finalizer.register method"); + return false; // initialization failed (cannot throw exception yet) } Universe::_finalizer_register_cache->init( - SystemDictionary::Finalizer_klass(), m, CHECK_false); - - // Resolve on first use and initialize class. - // Note: No race-condition here, since a resolve will always return the same result - - // Setup method for security checks - k = SystemDictionary::resolve_or_fail(vmSymbols::java_lang_reflect_Method(), true, CHECK_false); - k_h = instanceKlassHandle(THREAD, k); - k_h->link_class(CHECK_false); - m = k_h->find_method(vmSymbols::invoke_name(), vmSymbols::object_object_array_object_signature()); - if (m == NULL || m->is_static()) { - THROW_MSG_(vmSymbols::java_lang_NoSuchMethodException(), - "java.lang.reflect.Method.invoke", false); - } - Universe::_reflect_invoke_cache->init(k_h(), m, CHECK_false); + SystemDictionary::Finalizer_klass(), m); // Setup method for registering loaded classes in class loader vector InstanceKlass::cast(SystemDictionary::ClassLoader_klass())->link_class(CHECK_false); m = InstanceKlass::cast(SystemDictionary::ClassLoader_klass())->find_method(vmSymbols::addClass_name(), vmSymbols::class_void_signature()); if (m == NULL || m->is_static()) { - THROW_MSG_(vmSymbols::java_lang_NoSuchMethodException(), - "java.lang.ClassLoader.addClass", false); + tty->print_cr("Unable to link/verify ClassLoader.addClass method"); + return false; // initialization failed (cannot throw exception yet) } Universe::_loader_addClass_cache->init( - SystemDictionary::ClassLoader_klass(), m, CHECK_false); + SystemDictionary::ClassLoader_klass(), m); // Setup method for checking protection domain InstanceKlass::cast(SystemDictionary::ProtectionDomain_klass())->link_class(CHECK_false); @@ -1137,7 +1100,7 @@ return false; // initialization failed } Universe::_pd_implies_cache->init( - SystemDictionary::ProtectionDomain_klass(), m, CHECK_false);; + SystemDictionary::ProtectionDomain_klass(), m);; } // The folowing is initializing converter functions for serialization in @@ -1157,6 +1120,8 @@ // Initialize performance counters for metaspaces MetaspaceCounters::initialize_performance_counters(); + CompressedClassSpaceCounters::initialize_performance_counters(); + MemoryService::add_metaspace_memory_pools(); GC_locker::unlock(); // allow gc after bootstrapping @@ -1460,7 +1425,7 @@ } -void CommonMethodOopCache::init(Klass* k, Method* m, TRAPS) { +void LatestMethodCache::init(Klass* k, Method* m) { if (!UseSharedSpaces) { _klass = k; } @@ -1476,88 +1441,7 @@ } -ActiveMethodOopsCache::~ActiveMethodOopsCache() { - if (_prev_methods != NULL) { - delete _prev_methods; - _prev_methods = NULL; - } -} - - -void ActiveMethodOopsCache::add_previous_version(Method* method) { - assert(Thread::current()->is_VM_thread(), - "only VMThread can add previous versions"); - - // Only append the previous method if it is executing on the stack. - if (method->on_stack()) { - - if (_prev_methods == NULL) { - // This is the first previous version so make some space. - // Start with 2 elements under the assumption that the class - // won't be redefined much. - _prev_methods = new (ResourceObj::C_HEAP, mtClass) GrowableArray(2, true); - } - - // RC_TRACE macro has an embedded ResourceMark - RC_TRACE(0x00000100, - ("add: %s(%s): adding prev version ref for cached method @%d", - method->name()->as_C_string(), method->signature()->as_C_string(), - _prev_methods->length())); - - _prev_methods->append(method); - } - - - // Since the caller is the VMThread and we are at a safepoint, this is a good - // time to clear out unused method references. - - if (_prev_methods == NULL) return; - - for (int i = _prev_methods->length() - 1; i >= 0; i--) { - Method* method = _prev_methods->at(i); - assert(method != NULL, "weak method ref was unexpectedly cleared"); - - if (!method->on_stack()) { - // This method isn't running anymore so remove it - _prev_methods->remove_at(i); - MetadataFactory::free_metadata(method->method_holder()->class_loader_data(), method); - } else { - // RC_TRACE macro has an embedded ResourceMark - RC_TRACE(0x00000400, - ("add: %s(%s): previous cached method @%d is alive", - method->name()->as_C_string(), method->signature()->as_C_string(), i)); - } - } -} // end add_previous_version() - - -bool ActiveMethodOopsCache::is_same_method(const Method* method) const { - InstanceKlass* ik = InstanceKlass::cast(klass()); - const Method* check_method = ik->method_with_idnum(method_idnum()); - assert(check_method != NULL, "sanity check"); - if (check_method == method) { - // done with the easy case - return true; - } - - if (_prev_methods != NULL) { - // The cached method has been redefined at least once so search - // the previous versions for a match. - for (int i = 0; i < _prev_methods->length(); i++) { - check_method = _prev_methods->at(i); - if (check_method == method) { - // a previous version matches - return true; - } - } - } - - // either no previous versions or no previous version matched - return false; -} - - -Method* LatestMethodOopCache::get_Method() { +Method* LatestMethodCache::get_method() { if (klass() == NULL) return NULL; InstanceKlass* ik = InstanceKlass::cast(klass()); Method* m = ik->method_with_idnum(method_idnum()); diff -r bdd155477289 -r e2722a66aba7 src/share/vm/memory/universe.hpp --- a/src/share/vm/memory/universe.hpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/memory/universe.hpp Thu Sep 05 11:04:39 2013 -0700 @@ -41,10 +41,11 @@ class DeferredObjAllocEvent; -// Common parts of a Method* cache. This cache safely interacts with -// the RedefineClasses API. -// -class CommonMethodOopCache : public CHeapObj { +// A helper class for caching a Method* when the user of the cache +// only cares about the latest version of the Method*. This cache safely +// interacts with the RedefineClasses API. + +class LatestMethodCache : public CHeapObj { // We save the Klass* and the idnum of Method* in order to get // the current cached Method*. private: @@ -52,12 +53,14 @@ int _method_idnum; public: - CommonMethodOopCache() { _klass = NULL; _method_idnum = -1; } - ~CommonMethodOopCache() { _klass = NULL; _method_idnum = -1; } + LatestMethodCache() { _klass = NULL; _method_idnum = -1; } + ~LatestMethodCache() { _klass = NULL; _method_idnum = -1; } - void init(Klass* k, Method* m, TRAPS); - Klass* klass() const { return _klass; } - int method_idnum() const { return _method_idnum; } + void init(Klass* k, Method* m); + Klass* klass() const { return _klass; } + int method_idnum() const { return _method_idnum; } + + Method* get_method(); // Enhanced Class Redefinition support void classes_do(void f(Klass*)) { @@ -72,43 +75,10 @@ }; -// A helper class for caching a Method* when the user of the cache -// cares about all versions of the Method*. -// -class ActiveMethodOopsCache : public CommonMethodOopCache { - // This subclass adds weak references to older versions of the - // Method* and a query method for a Method*. - - private: - // If the cached Method* has not been redefined, then - // _prev_methods will be NULL. If all of the previous - // versions of the method have been collected, then - // _prev_methods can have a length of zero. - GrowableArray* _prev_methods; - - public: - ActiveMethodOopsCache() { _prev_methods = NULL; } - ~ActiveMethodOopsCache(); - - void add_previous_version(Method* method); - bool is_same_method(const Method* method) const; -}; - - -// A helper class for caching a Method* when the user of the cache -// only cares about the latest version of the Method*. -// -class LatestMethodOopCache : public CommonMethodOopCache { - // This subclass adds a getter method for the latest Method*. - - public: - Method* get_Method(); -}; - -// For UseCompressedOops and UseCompressedKlassPointers. +// For UseCompressedOops. struct NarrowPtrStruct { - // Base address for oop/klass-within-java-object materialization. - // NULL if using wide oops/klasses or zero based narrow oops/klasses. + // Base address for oop-within-java-object materialization. + // NULL if using wide oops or zero based narrow oops. address _base; // Number of shift bits for encoding/decoding narrow ptrs. // 0 if using wide ptrs or zero based unscaled narrow ptrs, @@ -136,6 +106,7 @@ friend class SystemDictionary; friend class VMStructs; friend class VM_PopulateDumpSharedSpace; + friend class Metaspace; friend jint universe_init(); friend void universe2_init(); @@ -174,10 +145,10 @@ static objArrayOop _the_empty_class_klass_array; // Canonicalized obj array of type java.lang.Class static oop _the_null_string; // A cache of "null" as a Java string static oop _the_min_jint_string; // A cache of "-2147483648" as a Java string - static LatestMethodOopCache* _finalizer_register_cache; // static method for registering finalizable objects - static LatestMethodOopCache* _loader_addClass_cache; // method for registering loaded classes in class loader vector - static LatestMethodOopCache* _pd_implies_cache; // method for checking protection domain attributes - static ActiveMethodOopsCache* _reflect_invoke_cache; // method for security checks + static LatestMethodCache* _finalizer_register_cache; // static method for registering finalizable objects + static LatestMethodCache* _loader_addClass_cache; // method for registering loaded classes in class loader vector + static LatestMethodCache* _pd_implies_cache; // method for checking protection domain attributes + // preallocated error objects (no backtrace) static oop _out_of_memory_error_java_heap; static oop _out_of_memory_error_metaspace; @@ -214,9 +185,6 @@ static struct NarrowPtrStruct _narrow_klass; static address _narrow_ptrs_base; - // Aligned size of the metaspace. - static size_t _class_metaspace_size; - // array of dummy objects used with +FullGCAlot debug_only(static objArrayOop _fullgc_alot_dummy_array;) // index of next entry to clear @@ -268,15 +236,6 @@ assert(UseCompressedOops, "no compressed ptrs?"); _narrow_oop._use_implicit_null_checks = use; } - static bool reserve_metaspace_helper(bool with_base = false); - static ReservedHeapSpace reserve_heap_metaspace(size_t heap_size, size_t alignment, bool& contiguous); - - static size_t class_metaspace_size() { - return _class_metaspace_size; - } - static void set_class_metaspace_size(size_t metaspace_size) { - _class_metaspace_size = metaspace_size; - } // Debugging static int _verify_count; // number of verifies done @@ -334,11 +293,11 @@ static Array* the_array_interfaces_array() { return _the_array_interfaces_array; } static oop the_null_string() { return _the_null_string; } static oop the_min_jint_string() { return _the_min_jint_string; } - static Method* finalizer_register_method() { return _finalizer_register_cache->get_Method(); } - static Method* loader_addClass_method() { return _loader_addClass_cache->get_Method(); } - static Method* protection_domain_implies_method() { return _pd_implies_cache->get_Method(); } - static ActiveMethodOopsCache* reflect_invoke_cache() { return _reflect_invoke_cache; } + static Method* finalizer_register_method() { return _finalizer_register_cache->get_method(); } + static Method* loader_addClass_method() { return _loader_addClass_cache->get_method(); } + + static Method* protection_domain_implies_method() { return _pd_implies_cache->get_method(); } static oop null_ptr_exception_instance() { return _null_ptr_exception_instance; } static oop arithmetic_exception_instance() { return _arithmetic_exception_instance; } @@ -387,7 +346,7 @@ }; static NARROW_OOP_MODE narrow_oop_mode(); static const char* narrow_oop_mode_to_string(NARROW_OOP_MODE mode); - static char* preferred_heap_base(size_t heap_size, NARROW_OOP_MODE mode); + static char* preferred_heap_base(size_t heap_size, size_t alignment, NARROW_OOP_MODE mode); static char* preferred_metaspace_base(size_t heap_size, NARROW_OOP_MODE mode); static address narrow_oop_base() { return _narrow_oop._base; } static bool is_narrow_oop_base(void* addr) { return (narrow_oop_base() == (address)addr); } diff -r bdd155477289 -r e2722a66aba7 src/share/vm/oops/instanceKlass.cpp --- a/src/share/vm/oops/instanceKlass.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/oops/instanceKlass.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -269,7 +269,7 @@ set_fields(NULL, 0); set_constants(NULL); set_class_loader_data(NULL); - set_source_file_name(NULL); + set_source_file_name_index(0); set_source_debug_extension(NULL, 0); set_array_name(NULL); set_inner_classes(NULL); @@ -284,7 +284,7 @@ set_osr_nmethods_head(NULL); set_breakpoints(NULL); init_previous_versions(); - set_generic_signature(NULL); + set_generic_signature_index(0); release_set_methods_jmethod_ids(NULL); release_set_methods_cached_itable_indices(NULL); set_annotations(NULL); @@ -2368,18 +2368,12 @@ // unreference array name derived from this class name (arrays of an unloaded // class can't be referenced anymore). if (_array_name != NULL) _array_name->decrement_refcount(); - if (_source_file_name != NULL) _source_file_name->decrement_refcount(); if (_source_debug_extension != NULL) FREE_C_HEAP_ARRAY(char, _source_debug_extension, mtClass); assert(_total_instanceKlass_count >= 1, "Sanity check"); Atomic::dec(&_total_instanceKlass_count); } -void InstanceKlass::set_source_file_name(Symbol* n) { - _source_file_name = n; - if (_source_file_name != NULL) _source_file_name->increment_refcount(); -} - void InstanceKlass::set_source_debug_extension(char* array, int length) { if (array == NULL) { _source_debug_extension = NULL; diff -r bdd155477289 -r e2722a66aba7 src/share/vm/oops/instanceKlass.hpp --- a/src/share/vm/oops/instanceKlass.hpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/oops/instanceKlass.hpp Thu Sep 05 11:04:39 2013 -0700 @@ -201,14 +201,10 @@ // number_of_inner_classes * 4 + enclosing_method_attribute_size. Array* _inner_classes; - // Name of source file containing this klass, NULL if not specified. - Symbol* _source_file_name; // the source debug extension for this klass, NULL if not specified. // Specified as UTF-8 string without terminating zero byte in the classfile, // it is stored in the instanceklass as a NULL-terminated UTF-8 string char* _source_debug_extension; - // Generic signature, or null if none. - Symbol* _generic_signature; // Array name derived from this class which needs unreferencing // if this class is unloaded. Symbol* _array_name; @@ -217,6 +213,12 @@ // (including inherited fields but after header_size()). int _nonstatic_field_size; int _static_field_size; // number words used by static fields (oop and non-oop) in this klass + // Constant pool index to the utf8 entry of the Generic signature, + // or 0 if none. + u2 _generic_signature_index; + // Constant pool index to the utf8 entry for the name of source file + // containing this klass, 0 if not specified. + u2 _source_file_name_index; u2 _static_oop_field_count;// number of static oop fields in this klass u2 _java_fields_count; // The number of declared Java fields int _nonstatic_oop_map_size;// size in words of nonstatic oop map blocks @@ -570,8 +572,16 @@ } // source file name - Symbol* source_file_name() const { return _source_file_name; } - void set_source_file_name(Symbol* n); + Symbol* source_file_name() const { + return (_source_file_name_index == 0) ? + (Symbol*)NULL : _constants->symbol_at(_source_file_name_index); + } + u2 source_file_name_index() const { + return _source_file_name_index; + } + void set_source_file_name_index(u2 sourcefile_index) { + _source_file_name_index = sourcefile_index; + } // minor and major version numbers of class file u2 minor_version() const { return _minor_version; } @@ -648,8 +658,16 @@ void set_initial_method_idnum(u2 value) { _idnum_allocated_count = value; } // generics support - Symbol* generic_signature() const { return _generic_signature; } - void set_generic_signature(Symbol* sig) { _generic_signature = sig; } + Symbol* generic_signature() const { + return (_generic_signature_index == 0) ? + (Symbol*)NULL : _constants->symbol_at(_generic_signature_index); + } + u2 generic_signature_index() const { + return _generic_signature_index; + } + void set_generic_signature_index(u2 sig_index) { + _generic_signature_index = sig_index; + } u2 enclosing_method_data(int offset); u2 enclosing_method_class_index() { diff -r bdd155477289 -r e2722a66aba7 src/share/vm/oops/klass.hpp --- a/src/share/vm/oops/klass.hpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/oops/klass.hpp Thu Sep 05 11:04:39 2013 -0700 @@ -352,7 +352,8 @@ static int layout_helper_log2_element_size(jint lh) { assert(lh < (jint)_lh_neutral_value, "must be array"); int l2esz = (lh >> _lh_log2_element_size_shift) & _lh_log2_element_size_mask; - assert(l2esz <= LogBitsPerLong, "sanity"); + assert(l2esz <= LogBitsPerLong, + err_msg("sanity. l2esz: 0x%x for lh: 0x%x", (uint)l2esz, (uint)lh)); return l2esz; } static jint array_layout_helper(jint tag, int hsize, BasicType etype, int log2_esize) { @@ -703,6 +704,16 @@ virtual void oop_verify_on(oop obj, outputStream* st); + static bool is_null(narrowKlass obj); + static bool is_null(Klass* obj); + + // klass encoding for klass pointer in objects. + static narrowKlass encode_klass_not_null(Klass* v); + static narrowKlass encode_klass(Klass* v); + + static Klass* decode_klass_not_null(narrowKlass v); + static Klass* decode_klass(narrowKlass v); + private: // barriers used by klass_oop_store void klass_update_barrier_set(oop v); diff -r bdd155477289 -r e2722a66aba7 src/share/vm/oops/klass.inline.hpp --- a/src/share/vm/oops/klass.inline.hpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/oops/klass.inline.hpp Thu Sep 05 11:04:39 2013 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -25,6 +25,7 @@ #ifndef SHARE_VM_OOPS_KLASS_INLINE_HPP #define SHARE_VM_OOPS_KLASS_INLINE_HPP +#include "memory/universe.hpp" #include "oops/klass.hpp" #include "oops/markOop.hpp" @@ -33,4 +34,41 @@ _prototype_header = header; } +inline bool Klass::is_null(Klass* obj) { return obj == NULL; } +inline bool Klass::is_null(narrowKlass obj) { return obj == 0; } + +// Encoding and decoding for klass field. + +inline bool check_klass_alignment(Klass* obj) { + return (intptr_t)obj % KlassAlignmentInBytes == 0; +} + +inline narrowKlass Klass::encode_klass_not_null(Klass* v) { + assert(!is_null(v), "klass value can never be zero"); + assert(check_klass_alignment(v), "Address not aligned"); + int shift = Universe::narrow_klass_shift(); + uint64_t pd = (uint64_t)(pointer_delta((void*)v, Universe::narrow_klass_base(), 1)); + assert(KlassEncodingMetaspaceMax > pd, "change encoding max if new encoding"); + uint64_t result = pd >> shift; + assert((result & CONST64(0xffffffff00000000)) == 0, "narrow klass pointer overflow"); + assert(decode_klass(result) == v, "reversibility"); + return (narrowKlass)result; +} + +inline narrowKlass Klass::encode_klass(Klass* v) { + return is_null(v) ? (narrowKlass)0 : encode_klass_not_null(v); +} + +inline Klass* Klass::decode_klass_not_null(narrowKlass v) { + assert(!is_null(v), "narrow klass value can never be zero"); + int shift = Universe::narrow_klass_shift(); + Klass* result = (Klass*)(void*)((uintptr_t)Universe::narrow_klass_base() + ((uintptr_t)v << shift)); + assert(check_klass_alignment(result), err_msg("address not aligned: " PTR_FORMAT, (void*) result)); + return result; +} + +inline Klass* Klass::decode_klass(narrowKlass v) { + return is_null(v) ? (Klass*)NULL : decode_klass_not_null(v); +} + #endif // SHARE_VM_OOPS_KLASS_INLINE_HPP diff -r bdd155477289 -r e2722a66aba7 src/share/vm/oops/method.cpp --- a/src/share/vm/oops/method.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/oops/method.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -747,6 +747,7 @@ set_not_c2_compilable(); } CompilationPolicy::policy()->disable_compilation(this); + assert(!CompilationPolicy::can_be_compiled(this, comp_level), "sanity check"); } bool Method::is_not_osr_compilable(int comp_level) const { @@ -773,6 +774,7 @@ set_not_c2_osr_compilable(); } CompilationPolicy::policy()->disable_compilation(this); + assert(!CompilationPolicy::can_be_osr_compiled(this, comp_level), "sanity check"); } // Revert to using the interpreter and clear out the nmethod @@ -981,7 +983,6 @@ bool Method::is_ignored_by_security_stack_walk() const { const bool use_new_reflection = JDK_Version::is_gte_jdk14x_version() && UseNewReflection; - assert(intrinsic_id() != vmIntrinsics::_invoke || Universe::reflect_invoke_cache()->is_same_method((Method*)this), "sanity"); if (intrinsic_id() == vmIntrinsics::_invoke) { // This is Method.invoke() -- ignore it return true; diff -r bdd155477289 -r e2722a66aba7 src/share/vm/oops/oop.hpp --- a/src/share/vm/oops/oop.hpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/oops/oop.hpp Thu Sep 05 11:04:39 2013 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -62,7 +62,7 @@ volatile markOop _mark; union _metadata { Klass* _klass; - narrowOop _compressed_klass; + narrowKlass _compressed_klass; } _metadata; // Fast access to barrier set. Must be initialized. @@ -84,7 +84,7 @@ Klass* klass() const; Klass* klass_or_null() const volatile; Klass** klass_addr(); - narrowOop* compressed_klass_addr(); + narrowKlass* compressed_klass_addr(); void set_klass(Klass* k); @@ -189,13 +189,6 @@ oop compare_value, bool prebarrier = false); - // klass encoding for klass pointer in objects. - static narrowOop encode_klass_not_null(Klass* v); - static narrowOop encode_klass(Klass* v); - - static Klass* decode_klass_not_null(narrowOop v); - static Klass* decode_klass(narrowOop v); - // Access to fields in a instanceOop through these methods. oop obj_field(int offset) const; volatile oop obj_field_volatile(int offset) const; diff -r bdd155477289 -r e2722a66aba7 src/share/vm/oops/oop.inline.hpp --- a/src/share/vm/oops/oop.inline.hpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/oops/oop.inline.hpp Thu Sep 05 11:04:39 2013 -0700 @@ -35,7 +35,7 @@ #include "memory/specialized_oop_closures.hpp" #include "oops/arrayKlass.hpp" #include "oops/arrayOop.hpp" -#include "oops/klass.hpp" +#include "oops/klass.inline.hpp" #include "oops/markOop.inline.hpp" #include "oops/oop.hpp" #include "runtime/atomic.hpp" @@ -70,7 +70,7 @@ inline Klass* oopDesc::klass() const { if (UseCompressedKlassPointers) { - return decode_klass_not_null(_metadata._compressed_klass); + return Klass::decode_klass_not_null(_metadata._compressed_klass); } else { return _metadata._klass; } @@ -79,7 +79,7 @@ inline Klass* oopDesc::klass_or_null() const volatile { // can be NULL in CMS if (UseCompressedKlassPointers) { - return decode_klass(_metadata._compressed_klass); + return Klass::decode_klass(_metadata._compressed_klass); } else { return _metadata._klass; } @@ -87,7 +87,7 @@ inline int oopDesc::klass_gap_offset_in_bytes() { assert(UseCompressedKlassPointers, "only applicable to compressed klass pointers"); - return oopDesc::klass_offset_in_bytes() + sizeof(narrowOop); + return oopDesc::klass_offset_in_bytes() + sizeof(narrowKlass); } inline Klass** oopDesc::klass_addr() { @@ -97,9 +97,9 @@ return (Klass**) &_metadata._klass; } -inline narrowOop* oopDesc::compressed_klass_addr() { +inline narrowKlass* oopDesc::compressed_klass_addr() { assert(UseCompressedKlassPointers, "only called by compressed klass pointers"); - return (narrowOop*) &_metadata._compressed_klass; + return &_metadata._compressed_klass; } inline void oopDesc::set_klass(Klass* k) { @@ -107,7 +107,7 @@ assert(Universe::is_bootstrapping() || k != NULL, "must be a real Klass*"); assert(Universe::is_bootstrapping() || k->is_klass(), "not a Klass*"); if (UseCompressedKlassPointers) { - *compressed_klass_addr() = encode_klass_not_null(k); + *compressed_klass_addr() = Klass::encode_klass_not_null(k); } else { *klass_addr() = k; } @@ -127,7 +127,7 @@ // This is only to be used during GC, for from-space objects, so no // barrier is needed. if (UseCompressedKlassPointers) { - _metadata._compressed_klass = encode_heap_oop(k); // may be null (parnew overflow handling) + _metadata._compressed_klass = (narrowKlass)encode_heap_oop(k); // may be null (parnew overflow handling) } else { _metadata._klass = (Klass*)(address)k; } @@ -136,7 +136,7 @@ inline oop oopDesc::list_ptr_from_klass() { // This is only to be used during GC, for from-space objects. if (UseCompressedKlassPointers) { - return decode_heap_oop(_metadata._compressed_klass); + return decode_heap_oop((narrowOop)_metadata._compressed_klass); } else { // Special case for GC return (oop)(address)_metadata._klass; @@ -176,7 +176,6 @@ // the right type and inlines the appopriate code). inline bool oopDesc::is_null(oop obj) { return obj == NULL; } -inline bool oopDesc::is_null(Klass* obj) { return obj == NULL; } inline bool oopDesc::is_null(narrowOop obj) { return obj == 0; } // Algorithm for encoding and decoding oops from 64 bit pointers to 32 bit @@ -186,9 +185,6 @@ inline bool check_obj_alignment(oop obj) { return (intptr_t)obj % MinObjAlignmentInBytes == 0; } -inline bool check_klass_alignment(Klass* obj) { - return (intptr_t)obj % KlassAlignmentInBytes == 0; -} inline narrowOop oopDesc::encode_heap_oop_not_null(oop v) { assert(!is_null(v), "oop value can never be zero"); @@ -224,39 +220,6 @@ inline oop oopDesc::decode_heap_oop_not_null(oop v) { return v; } inline oop oopDesc::decode_heap_oop(oop v) { return v; } -// Encoding and decoding for klass field. It is copied code, but someday -// might not be the same as oop. - -inline narrowOop oopDesc::encode_klass_not_null(Klass* v) { - assert(!is_null(v), "klass value can never be zero"); - assert(check_klass_alignment(v), "Address not aligned"); - address base = Universe::narrow_klass_base(); - int shift = Universe::narrow_klass_shift(); - uint64_t pd = (uint64_t)(pointer_delta((void*)v, (void*)base, 1)); - assert(KlassEncodingMetaspaceMax > pd, "change encoding max if new encoding"); - uint64_t result = pd >> shift; - assert((result & CONST64(0xffffffff00000000)) == 0, "narrow klass pointer overflow"); - assert(decode_klass(result) == v, "reversibility"); - return (narrowOop)result; -} - -inline narrowOop oopDesc::encode_klass(Klass* v) { - return (is_null(v)) ? (narrowOop)0 : encode_klass_not_null(v); -} - -inline Klass* oopDesc::decode_klass_not_null(narrowOop v) { - assert(!is_null(v), "narrow oop value can never be zero"); - address base = Universe::narrow_klass_base(); - int shift = Universe::narrow_klass_shift(); - Klass* result = (Klass*)(void*)((uintptr_t)base + ((uintptr_t)v << shift)); - assert(check_klass_alignment(result), err_msg("address not aligned: " PTR_FORMAT, (void*) result)); - return result; -} - -inline Klass* oopDesc::decode_klass(narrowOop v) { - return is_null(v) ? (Klass*)NULL : decode_klass_not_null(v); -} - // Load an oop out of the Java heap as is without decoding. // Called by GC to check for null before decoding. inline oop oopDesc::load_heap_oop(oop* p) { return *p; } diff -r bdd155477289 -r e2722a66aba7 src/share/vm/oops/oopsHierarchy.hpp --- a/src/share/vm/oops/oopsHierarchy.hpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/oops/oopsHierarchy.hpp Thu Sep 05 11:04:39 2013 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -33,6 +33,10 @@ // of B, A's representation is a prefix of B's representation. typedef juint narrowOop; // Offset instead of address for an oop within a java object + +// If compressed klass pointers then use narrowKlass. +typedef juint narrowKlass; + typedef void* OopOrNarrowOopStar; typedef class markOopDesc* markOop; diff -r bdd155477289 -r e2722a66aba7 src/share/vm/opto/block.cpp --- a/src/share/vm/opto/block.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/opto/block.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -35,10 +35,6 @@ #include "opto/rootnode.hpp" #include "utilities/copy.hpp" -// Optimization - Graph Style - - -//----------------------------------------------------------------------------- void Block_Array::grow( uint i ) { assert(i >= Max(), "must be an overflow"); debug_only(_limit = i+1); @@ -54,7 +50,6 @@ Copy::zero_to_bytes( &_blocks[old], (_size-old)*sizeof(Block*) ); } -//============================================================================= void Block_List::remove(uint i) { assert(i < _cnt, "index out of bounds"); Copy::conjoint_words_to_lower((HeapWord*)&_blocks[i+1], (HeapWord*)&_blocks[i], ((_cnt-i-1)*sizeof(Block*))); @@ -76,8 +71,6 @@ } #endif -//============================================================================= - uint Block::code_alignment() { // Check for Root block if (_pre_order == 0) return CodeEntryAlignment; @@ -113,7 +106,6 @@ return unit_sz; // no particular alignment } -//----------------------------------------------------------------------------- // Compute the size of first 'inst_cnt' instructions in this block. // Return the number of instructions left to compute if the block has // less then 'inst_cnt' instructions. Stop, and return 0 if sum_size @@ -138,7 +130,6 @@ return inst_cnt; } -//----------------------------------------------------------------------------- uint Block::find_node( const Node *n ) const { for( uint i = 0; i < _nodes.size(); i++ ) { if( _nodes[i] == n ) @@ -153,7 +144,6 @@ _nodes.remove(find_node(n)); } -//------------------------------is_Empty--------------------------------------- // Return empty status of a block. Empty blocks contain only the head, other // ideal nodes, and an optional trailing goto. int Block::is_Empty() const { @@ -192,7 +182,6 @@ return not_empty; } -//------------------------------has_uncommon_code------------------------------ // Return true if the block's code implies that it is likely to be // executed infrequently. Check to see if the block ends in a Halt or // a low probability call. @@ -218,10 +207,9 @@ return op == Op_Halt; } -//------------------------------is_uncommon------------------------------------ // True if block is low enough frequency or guarded by a test which // mostly does not go here. -bool Block::is_uncommon( Block_Array &bbs ) const { +bool Block::is_uncommon(PhaseCFG* cfg) const { // Initial blocks must never be moved, so are never uncommon. if (head()->is_Root() || head()->is_Start()) return false; @@ -238,7 +226,7 @@ uint uncommon_for_freq_preds = 0; for( uint i=1; i_idx]; + Block* guard = cfg->get_block_for_node(pred(i)); // Check to see if this block follows its guard 1 time out of 10000 // or less. // @@ -271,7 +259,6 @@ return false; } -//------------------------------dump------------------------------------------- #ifndef PRODUCT void Block::dump_bidx(const Block* orig, outputStream* st) const { if (_pre_order) st->print("B%d",_pre_order); @@ -285,11 +272,11 @@ } } -void Block::dump_pred(const Block_Array *bbs, Block* orig, outputStream* st) const { +void Block::dump_pred(const PhaseCFG* cfg, Block* orig, outputStream* st) const { if (is_connector()) { for (uint i=1; i_idx]); - p->dump_pred(bbs, orig, st); + Block *p = cfg->get_block_for_node(pred(i)); + p->dump_pred(cfg, orig, st); } } else { dump_bidx(orig, st); @@ -297,7 +284,7 @@ } } -void Block::dump_head( const Block_Array *bbs, outputStream* st ) const { +void Block::dump_head(const PhaseCFG* cfg, outputStream* st) const { // Print the basic block dump_bidx(this, st); st->print(": #\t"); @@ -311,26 +298,28 @@ if( head()->is_block_start() ) { for (uint i=1; i_idx]; - p->dump_pred(bbs, p, st); + if (cfg != NULL) { + Block *p = cfg->get_block_for_node(s); + p->dump_pred(cfg, p, st); } else { while (!s->is_block_start()) s = s->in(0); st->print("N%d ", s->_idx ); } } - } else + } else { st->print("BLOCK HEAD IS JUNK "); + } // Print loop, if any const Block *bhead = this; // Head of self-loop Node *bh = bhead->head(); - if( bbs && bh->is_Loop() && !head()->is_Root() ) { + + if ((cfg != NULL) && bh->is_Loop() && !head()->is_Root()) { LoopNode *loop = bh->as_Loop(); - const Block *bx = (*bbs)[loop->in(LoopNode::LoopBackControl)->_idx]; + const Block *bx = cfg->get_block_for_node(loop->in(LoopNode::LoopBackControl)); while (bx->is_connector()) { - bx = (*bbs)[bx->pred(1)->_idx]; + bx = cfg->get_block_for_node(bx->pred(1)); } st->print("\tLoop: B%d-B%d ", bhead->_pre_order, bx->_pre_order); // Dump any loop-specific bits, especially for CountedLoops. @@ -349,29 +338,31 @@ st->print_cr(""); } -void Block::dump() const { dump(NULL); } +void Block::dump() const { + dump(NULL); +} -void Block::dump( const Block_Array *bbs ) const { - dump_head(bbs); - uint cnt = _nodes.size(); - for( uint i=0; idump(); + } tty->print("\n"); } #endif -//============================================================================= -//------------------------------PhaseCFG--------------------------------------- -PhaseCFG::PhaseCFG( Arena *a, RootNode *r, Matcher &m ) : - Phase(CFG), - _bbs(a), - _root(r), - _node_latency(NULL) +PhaseCFG::PhaseCFG(Arena* arena, RootNode* root, Matcher& matcher) +: Phase(CFG) +, _block_arena(arena) +, _root(root) +, _matcher(matcher) +, _node_to_block_mapping(arena) +, _node_latency(NULL) #ifndef PRODUCT - , _trace_opto_pipelining(TraceOptoPipelining || C->method_has_option("TraceOptoPipelining")) +, _trace_opto_pipelining(TraceOptoPipelining || C->method_has_option("TraceOptoPipelining")) #endif #ifdef ASSERT - , _raw_oops(a) +, _raw_oops(arena) #endif { ResourceMark rm; @@ -380,16 +371,15 @@ // Node on demand. Node *x = new (C) GotoNode(NULL); x->init_req(0, x); - _goto = m.match_tree(x); + _goto = matcher.match_tree(x); assert(_goto != NULL, ""); _goto->set_req(0,_goto); // Build the CFG in Reverse Post Order - _num_blocks = build_cfg(); - _broot = _bbs[_root->_idx]; + _number_of_blocks = build_cfg(); + _root_block = get_block_for_node(_root); } -//------------------------------build_cfg-------------------------------------- // Build a proper looking CFG. Make every block begin with either a StartNode // or a RegionNode. Make every block end with either a Goto, If or Return. // The RootNode both starts and ends it's own block. Do this with a recursive @@ -440,9 +430,9 @@ // 'p' now points to the start of this basic block // Put self in array of basic blocks - Block *bb = new (_bbs._arena) Block(_bbs._arena,p); - _bbs.map(p->_idx,bb); - _bbs.map(x->_idx,bb); + Block *bb = new (_block_arena) Block(_block_arena, p); + map_node_to_block(p, bb); + map_node_to_block(x, bb); if( x != p ) { // Only for root is x == p bb->_nodes.push((Node*)x); } @@ -473,16 +463,16 @@ // Check if it the fist node pushed on stack at the beginning. if (idx == 0) break; // end of the build // Find predecessor basic block - Block *pb = _bbs[x->_idx]; + Block *pb = get_block_for_node(x); // Insert into nodes array, if not already there - if( !_bbs.lookup(proj->_idx) ) { + if (!has_block(proj)) { assert( x != proj, "" ); // Map basic block of projection - _bbs.map(proj->_idx,pb); + map_node_to_block(proj, pb); pb->_nodes.push(proj); } // Insert self as a child of my predecessor block - pb->_succs.map(pb->_num_succs++, _bbs[np->_idx]); + pb->_succs.map(pb->_num_succs++, get_block_for_node(np)); assert( pb->_nodes[ pb->_nodes.size() - pb->_num_succs ]->is_block_proj(), "too many control users, not a CFG?" ); } @@ -491,13 +481,12 @@ return sum; } -//------------------------------insert_goto_at--------------------------------- // Inserts a goto & corresponding basic block between // block[block_no] and its succ_no'th successor block void PhaseCFG::insert_goto_at(uint block_no, uint succ_no) { // get block with block_no - assert(block_no < _num_blocks, "illegal block number"); - Block* in = _blocks[block_no]; + assert(block_no < number_of_blocks(), "illegal block number"); + Block* in = get_block(block_no); // get successor block succ_no assert(succ_no < in->_num_succs, "illegal successor number"); Block* out = in->_succs[succ_no]; @@ -511,15 +500,15 @@ RegionNode* region = new (C) RegionNode(2); region->init_req(1, proj); // setup corresponding basic block - Block* block = new (_bbs._arena) Block(_bbs._arena, region); - _bbs.map(region->_idx, block); + Block* block = new (_block_arena) Block(_block_arena, region); + map_node_to_block(region, block); C->regalloc()->set_bad(region->_idx); // add a goto node Node* gto = _goto->clone(); // get a new goto node gto->set_req(0, region); // add it to the basic block block->_nodes.push(gto); - _bbs.map(gto->_idx, block); + map_node_to_block(gto, block); C->regalloc()->set_bad(gto->_idx); // hook up successor block block->_succs.map(block->_num_succs++, out); @@ -532,11 +521,9 @@ // Set the frequency of the new block block->_freq = freq; // add new basic block to basic block list - _blocks.insert(block_no + 1, block); - _num_blocks++; + add_block_at(block_no + 1, block); } -//------------------------------no_flip_branch--------------------------------- // Does this block end in a multiway branch that cannot have the default case // flipped for another case? static bool no_flip_branch( Block *b ) { @@ -555,7 +542,6 @@ return false; } -//------------------------------convert_NeverBranch_to_Goto-------------------- // Check for NeverBranch at block end. This needs to become a GOTO to the // true target. NeverBranch are treated as a conditional branch that always // goes the same direction for most of the optimizer and are used to give a @@ -570,7 +556,7 @@ gto->set_req(0, b->head()); Node *bp = b->_nodes[end_idx]; b->_nodes.map(end_idx,gto); // Slam over NeverBranch - _bbs.map(gto->_idx, b); + map_node_to_block(gto, b); C->regalloc()->set_bad(gto->_idx); b->_nodes.pop(); // Yank projections b->_nodes.pop(); // Yank projections @@ -593,7 +579,6 @@ dead->_nodes[k]->del_req(j); } -//------------------------------move_to_next----------------------------------- // Helper function to move block bx to the slot following b_index. Return // true if the move is successful, otherwise false bool PhaseCFG::move_to_next(Block* bx, uint b_index) { @@ -601,20 +586,22 @@ // Return false if bx is already scheduled. uint bx_index = bx->_pre_order; - if ((bx_index <= b_index) && (_blocks[bx_index] == bx)) { + if ((bx_index <= b_index) && (get_block(bx_index) == bx)) { return false; } // Find the current index of block bx on the block list bx_index = b_index + 1; - while( bx_index < _num_blocks && _blocks[bx_index] != bx ) bx_index++; - assert(_blocks[bx_index] == bx, "block not found"); + while (bx_index < number_of_blocks() && get_block(bx_index) != bx) { + bx_index++; + } + assert(get_block(bx_index) == bx, "block not found"); // If the previous block conditionally falls into bx, return false, // because moving bx will create an extra jump. for(uint k = 1; k < bx->num_preds(); k++ ) { - Block* pred = _bbs[bx->pred(k)->_idx]; - if (pred == _blocks[bx_index-1]) { + Block* pred = get_block_for_node(bx->pred(k)); + if (pred == get_block(bx_index - 1)) { if (pred->_num_succs != 1) { return false; } @@ -627,7 +614,6 @@ return true; } -//------------------------------move_to_end------------------------------------ // Move empty and uncommon blocks to the end. void PhaseCFG::move_to_end(Block *b, uint i) { int e = b->is_Empty(); @@ -645,31 +631,31 @@ _blocks.push(b); } -//---------------------------set_loop_alignment-------------------------------- // Set loop alignment for every block void PhaseCFG::set_loop_alignment() { - uint last = _num_blocks; - assert( _blocks[0] == _broot, "" ); + uint last = number_of_blocks(); + assert(get_block(0) == get_root_block(), ""); - for (uint i = 1; i < last; i++ ) { - Block *b = _blocks[i]; - if (b->head()->is_Loop()) { - b->set_loop_alignment(b); + for (uint i = 1; i < last; i++) { + Block* block = get_block(i); + if (block->head()->is_Loop()) { + block->set_loop_alignment(block); } } } -//-----------------------------remove_empty------------------------------------ // Make empty basic blocks to be "connector" blocks, Move uncommon blocks // to the end. -void PhaseCFG::remove_empty() { +void PhaseCFG::remove_empty_blocks() { // Move uncommon blocks to the end - uint last = _num_blocks; - assert( _blocks[0] == _broot, "" ); + uint last = number_of_blocks(); + assert(get_block(0) == get_root_block(), ""); for (uint i = 1; i < last; i++) { - Block *b = _blocks[i]; - if (b->is_connector()) break; + Block* block = get_block(i); + if (block->is_connector()) { + break; + } // Check for NeverBranch at block end. This needs to become a GOTO to the // true target. NeverBranch are treated as a conditional branch that @@ -677,124 +663,127 @@ // to give a fake exit path to infinite loops. At this late stage they // need to turn into Goto's so that when you enter the infinite loop you // indeed hang. - if( b->_nodes[b->end_idx()]->Opcode() == Op_NeverBranch ) - convert_NeverBranch_to_Goto(b); + if (block->_nodes[block->end_idx()]->Opcode() == Op_NeverBranch) { + convert_NeverBranch_to_Goto(block); + } // Look for uncommon blocks and move to end. if (!C->do_freq_based_layout()) { - if( b->is_uncommon(_bbs) ) { - move_to_end(b, i); + if (block->is_uncommon(this)) { + move_to_end(block, i); last--; // No longer check for being uncommon! - if( no_flip_branch(b) ) { // Fall-thru case must follow? - b = _blocks[i]; // Find the fall-thru block - move_to_end(b, i); + if (no_flip_branch(block)) { // Fall-thru case must follow? + // Find the fall-thru block + block = get_block(i); + move_to_end(block, i); last--; } - i--; // backup block counter post-increment + // backup block counter post-increment + i--; } } } // Move empty blocks to the end - last = _num_blocks; + last = number_of_blocks(); for (uint i = 1; i < last; i++) { - Block *b = _blocks[i]; - if (b->is_Empty() != Block::not_empty) { - move_to_end(b, i); + Block* block = get_block(i); + if (block->is_Empty() != Block::not_empty) { + move_to_end(block, i); last--; i--; } } // End of for all blocks } -//-----------------------------fixup_flow-------------------------------------- // Fix up the final control flow for basic blocks. void PhaseCFG::fixup_flow() { // Fixup final control flow for the blocks. Remove jump-to-next // block. If neither arm of a IF follows the conditional branch, we // have to add a second jump after the conditional. We place the // TRUE branch target in succs[0] for both GOTOs and IFs. - for (uint i=0; i < _num_blocks; i++) { - Block *b = _blocks[i]; - b->_pre_order = i; // turn pre-order into block-index + for (uint i = 0; i < number_of_blocks(); i++) { + Block* block = get_block(i); + block->_pre_order = i; // turn pre-order into block-index // Connector blocks need no further processing. - if (b->is_connector()) { - assert((i+1) == _num_blocks || _blocks[i+1]->is_connector(), - "All connector blocks should sink to the end"); + if (block->is_connector()) { + assert((i+1) == number_of_blocks() || get_block(i + 1)->is_connector(), "All connector blocks should sink to the end"); continue; } - assert(b->is_Empty() != Block::completely_empty, - "Empty blocks should be connectors"); + assert(block->is_Empty() != Block::completely_empty, "Empty blocks should be connectors"); - Block *bnext = (i < _num_blocks-1) ? _blocks[i+1] : NULL; - Block *bs0 = b->non_connector_successor(0); + Block* bnext = (i < number_of_blocks() - 1) ? get_block(i + 1) : NULL; + Block* bs0 = block->non_connector_successor(0); // Check for multi-way branches where I cannot negate the test to // exchange the true and false targets. - if( no_flip_branch( b ) ) { + if (no_flip_branch(block)) { // Find fall through case - if must fall into its target - int branch_idx = b->_nodes.size() - b->_num_succs; - for (uint j2 = 0; j2 < b->_num_succs; j2++) { - const ProjNode* p = b->_nodes[branch_idx + j2]->as_Proj(); + int branch_idx = block->_nodes.size() - block->_num_succs; + for (uint j2 = 0; j2 < block->_num_succs; j2++) { + const ProjNode* p = block->_nodes[branch_idx + j2]->as_Proj(); if (p->_con == 0) { // successor j2 is fall through case - if (b->non_connector_successor(j2) != bnext) { + if (block->non_connector_successor(j2) != bnext) { // but it is not the next block => insert a goto insert_goto_at(i, j2); } // Put taken branch in slot 0 - if( j2 == 0 && b->_num_succs == 2) { + if (j2 == 0 && block->_num_succs == 2) { // Flip targets in succs map - Block *tbs0 = b->_succs[0]; - Block *tbs1 = b->_succs[1]; - b->_succs.map( 0, tbs1 ); - b->_succs.map( 1, tbs0 ); + Block *tbs0 = block->_succs[0]; + Block *tbs1 = block->_succs[1]; + block->_succs.map(0, tbs1); + block->_succs.map(1, tbs0); } break; } } + // Remove all CatchProjs - for (uint j1 = 0; j1 < b->_num_succs; j1++) b->_nodes.pop(); + for (uint j = 0; j < block->_num_succs; j++) { + block->_nodes.pop(); + } - } else if (b->_num_succs == 1) { + } else if (block->_num_succs == 1) { // Block ends in a Goto? if (bnext == bs0) { // We fall into next block; remove the Goto - b->_nodes.pop(); + block->_nodes.pop(); } - } else if( b->_num_succs == 2 ) { // Block ends in a If? + } else if(block->_num_succs == 2) { // Block ends in a If? // Get opcode of 1st projection (matches _succs[0]) // Note: Since this basic block has 2 exits, the last 2 nodes must // be projections (in any order), the 3rd last node must be // the IfNode (we have excluded other 2-way exits such as // CatchNodes already). - MachNode *iff = b->_nodes[b->_nodes.size()-3]->as_Mach(); - ProjNode *proj0 = b->_nodes[b->_nodes.size()-2]->as_Proj(); - ProjNode *proj1 = b->_nodes[b->_nodes.size()-1]->as_Proj(); + MachNode* iff = block->_nodes[block->_nodes.size() - 3]->as_Mach(); + ProjNode* proj0 = block->_nodes[block->_nodes.size() - 2]->as_Proj(); + ProjNode* proj1 = block->_nodes[block->_nodes.size() - 1]->as_Proj(); // Assert that proj0 and succs[0] match up. Similarly for proj1 and succs[1]. - assert(proj0->raw_out(0) == b->_succs[0]->head(), "Mismatch successor 0"); - assert(proj1->raw_out(0) == b->_succs[1]->head(), "Mismatch successor 1"); + assert(proj0->raw_out(0) == block->_succs[0]->head(), "Mismatch successor 0"); + assert(proj1->raw_out(0) == block->_succs[1]->head(), "Mismatch successor 1"); - Block *bs1 = b->non_connector_successor(1); + Block* bs1 = block->non_connector_successor(1); // Check for neither successor block following the current // block ending in a conditional. If so, move one of the // successors after the current one, provided that the // successor was previously unscheduled, but moveable // (i.e., all paths to it involve a branch). - if( !C->do_freq_based_layout() && bnext != bs0 && bnext != bs1 ) { + if (!C->do_freq_based_layout() && bnext != bs0 && bnext != bs1) { // Choose the more common successor based on the probability // of the conditional branch. - Block *bx = bs0; - Block *by = bs1; + Block* bx = bs0; + Block* by = bs1; // _prob is the probability of taking the true path. Make // p the probability of taking successor #1. float p = iff->as_MachIf()->_prob; - if( proj0->Opcode() == Op_IfTrue ) { + if (proj0->Opcode() == Op_IfTrue) { p = 1.0 - p; } @@ -821,14 +810,16 @@ // succs[1]. if (bnext == bs0) { // Fall-thru case in succs[0], so flip targets in succs map - Block *tbs0 = b->_succs[0]; - Block *tbs1 = b->_succs[1]; - b->_succs.map( 0, tbs1 ); - b->_succs.map( 1, tbs0 ); + Block* tbs0 = block->_succs[0]; + Block* tbs1 = block->_succs[1]; + block->_succs.map(0, tbs1); + block->_succs.map(1, tbs0); // Flip projection for each target - { ProjNode *tmp = proj0; proj0 = proj1; proj1 = tmp; } + ProjNode* tmp = proj0; + proj0 = proj1; + proj1 = tmp; - } else if( bnext != bs1 ) { + } else if(bnext != bs1) { // Need a double-branch // The existing conditional branch need not change. // Add a unconditional branch to the false target. @@ -838,12 +829,12 @@ } // Make sure we TRUE branch to the target - if( proj0->Opcode() == Op_IfFalse ) { + if (proj0->Opcode() == Op_IfFalse) { iff->as_MachIf()->negate(); } - b->_nodes.pop(); // Remove IfFalse & IfTrue projections - b->_nodes.pop(); + block->_nodes.pop(); // Remove IfFalse & IfTrue projections + block->_nodes.pop(); } else { // Multi-exit block, e.g. a switch statement @@ -853,7 +844,6 @@ } -//------------------------------dump------------------------------------------- #ifndef PRODUCT void PhaseCFG::_dump_cfg( const Node *end, VectorSet &visited ) const { const Node *x = end->is_block_proj(); @@ -870,57 +860,58 @@ } while( !p->is_block_start() ); // Recursively visit - for( uint i=1; ireq(); i++ ) - _dump_cfg(p->in(i),visited); + for (uint i = 1; i < p->req(); i++) { + _dump_cfg(p->in(i), visited); + } // Dump the block - _bbs[p->_idx]->dump(&_bbs); + get_block_for_node(p)->dump(this); } void PhaseCFG::dump( ) const { - tty->print("\n--- CFG --- %d BBs\n",_num_blocks); - if( _blocks.size() ) { // Did we do basic-block layout? - for( uint i=0; i<_num_blocks; i++ ) - _blocks[i]->dump(&_bbs); + tty->print("\n--- CFG --- %d BBs\n", number_of_blocks()); + if (_blocks.size()) { // Did we do basic-block layout? + for (uint i = 0; i < number_of_blocks(); i++) { + const Block* block = get_block(i); + block->dump(this); + } } else { // Else do it with a DFS - VectorSet visited(_bbs._arena); + VectorSet visited(_block_arena); _dump_cfg(_root,visited); } } void PhaseCFG::dump_headers() { - for( uint i = 0; i < _num_blocks; i++ ) { - if( _blocks[i] == NULL ) continue; - _blocks[i]->dump_head(&_bbs); + for (uint i = 0; i < number_of_blocks(); i++) { + Block* block = get_block(i); + if (block != NULL) { + block->dump_head(this); + } } } -void PhaseCFG::verify( ) const { +void PhaseCFG::verify() const { #ifdef ASSERT // Verify sane CFG - for (uint i = 0; i < _num_blocks; i++) { - Block *b = _blocks[i]; - uint cnt = b->_nodes.size(); + for (uint i = 0; i < number_of_blocks(); i++) { + Block* block = get_block(i); + uint cnt = block->_nodes.size(); uint j; for (j = 0; j < cnt; j++) { - Node *n = b->_nodes[j]; - assert( _bbs[n->_idx] == b, "" ); - if (j >= 1 && n->is_Mach() && - n->as_Mach()->ideal_Opcode() == Op_CreateEx) { - assert(j == 1 || b->_nodes[j-1]->is_Phi(), - "CreateEx must be first instruction in block"); + Node *n = block->_nodes[j]; + assert(get_block_for_node(n) == block, ""); + if (j >= 1 && n->is_Mach() && n->as_Mach()->ideal_Opcode() == Op_CreateEx) { + assert(j == 1 || block->_nodes[j-1]->is_Phi(), "CreateEx must be first instruction in block"); } for (uint k = 0; k < n->req(); k++) { Node *def = n->in(k); if (def && def != n) { - assert(_bbs[def->_idx] || def->is_Con(), - "must have block; constants for debug info ok"); + assert(get_block_for_node(def) || def->is_Con(), "must have block; constants for debug info ok"); // Verify that instructions in the block is in correct order. // Uses must follow their definition if they are at the same block. // Mostly done to check that MachSpillCopy nodes are placed correctly // when CreateEx node is moved in build_ifg_physical(). - if (_bbs[def->_idx] == b && - !(b->head()->is_Loop() && n->is_Phi()) && + if (get_block_for_node(def) == block && !(block->head()->is_Loop() && n->is_Phi()) && // See (+++) comment in reg_split.cpp !(n->jvms() != NULL && n->jvms()->is_monitor_use(k))) { bool is_loop = false; @@ -932,29 +923,29 @@ } } } - assert(is_loop || b->find_node(def) < j, "uses must follow definitions"); + assert(is_loop || block->find_node(def) < j, "uses must follow definitions"); } } } } - j = b->end_idx(); - Node *bp = (Node*)b->_nodes[b->_nodes.size()-1]->is_block_proj(); - assert( bp, "last instruction must be a block proj" ); - assert( bp == b->_nodes[j], "wrong number of successors for this block" ); + j = block->end_idx(); + Node* bp = (Node*)block->_nodes[block->_nodes.size() - 1]->is_block_proj(); + assert(bp, "last instruction must be a block proj"); + assert(bp == block->_nodes[j], "wrong number of successors for this block"); if (bp->is_Catch()) { - while (b->_nodes[--j]->is_MachProj()) ; - assert(b->_nodes[j]->is_MachCall(), "CatchProj must follow call"); + while (block->_nodes[--j]->is_MachProj()) { + ; + } + assert(block->_nodes[j]->is_MachCall(), "CatchProj must follow call"); } else if (bp->is_Mach() && bp->as_Mach()->ideal_Opcode() == Op_If) { - assert(b->_num_succs == 2, "Conditional branch must have two targets"); + assert(block->_num_succs == 2, "Conditional branch must have two targets"); } } #endif } #endif -//============================================================================= -//------------------------------UnionFind-------------------------------------- UnionFind::UnionFind( uint max ) : _cnt(max), _max(max), _indices(NEW_RESOURCE_ARRAY(uint,max)) { Copy::zero_to_bytes( _indices, sizeof(uint)*max ); } @@ -979,7 +970,6 @@ for( uint i=0; ifreq(); @@ -1080,7 +1065,6 @@ return dist1 - dist0; } -//------------------------------trace_frequency_order-------------------------- // Comparison function for edges extern "C" int trace_frequency_order(const void *p0, const void *p1) { Trace *tr0 = *(Trace **) p0; @@ -1106,17 +1090,15 @@ return diff; } -//------------------------------find_edges------------------------------------- // Find edges of interest, i.e, those which can fall through. Presumes that // edges which don't fall through are of low frequency and can be generally // ignored. Initialize the list of traces. -void PhaseBlockLayout::find_edges() -{ +void PhaseBlockLayout::find_edges() { // Walk the blocks, creating edges and Traces uint i; Trace *tr = NULL; - for (i = 0; i < _cfg._num_blocks; i++) { - Block *b = _cfg._blocks[i]; + for (i = 0; i < _cfg.number_of_blocks(); i++) { + Block* b = _cfg.get_block(i); tr = new Trace(b, next, prev); traces[tr->id()] = tr; @@ -1140,7 +1122,7 @@ if (n->num_preds() != 1) break; i++; - assert(n = _cfg._blocks[i], "expecting next block"); + assert(n = _cfg.get_block(i), "expecting next block"); tr->append(n); uf->map(n->_pre_order, tr->id()); traces[n->_pre_order] = NULL; @@ -1164,8 +1146,8 @@ } // Group connector blocks into one trace - for (i++; i < _cfg._num_blocks; i++) { - Block *b = _cfg._blocks[i]; + for (i++; i < _cfg.number_of_blocks(); i++) { + Block *b = _cfg.get_block(i); assert(b->is_connector(), "connector blocks at the end"); tr->append(b); uf->map(b->_pre_order, tr->id()); @@ -1173,10 +1155,8 @@ } } -//------------------------------union_traces---------------------------------- // Union two traces together in uf, and null out the trace in the list -void PhaseBlockLayout::union_traces(Trace* updated_trace, Trace* old_trace) -{ +void PhaseBlockLayout::union_traces(Trace* updated_trace, Trace* old_trace) { uint old_id = old_trace->id(); uint updated_id = updated_trace->id(); @@ -1200,10 +1180,8 @@ traces[hi_id] = NULL; } -//------------------------------grow_traces------------------------------------- // Append traces together via the most frequently executed edges -void PhaseBlockLayout::grow_traces() -{ +void PhaseBlockLayout::grow_traces() { // Order the edges, and drive the growth of Traces via the most // frequently executed edges. edges->sort(edge_order); @@ -1245,11 +1223,9 @@ } } -//------------------------------merge_traces----------------------------------- // Embed one trace into another, if the fork or join points are sufficiently // balanced. -void PhaseBlockLayout::merge_traces(bool fall_thru_only) -{ +void PhaseBlockLayout::merge_traces(bool fall_thru_only) { // Walk the edge list a another time, looking at unprocessed edges. // Fold in diamonds for (int i = 0; i < edges->length(); i++) { @@ -1303,7 +1279,7 @@ src_trace->insert_after(src_block, targ_trace); union_traces(src_trace, targ_trace); } else if (src_at_tail) { - if (src_trace != trace(_cfg._broot)) { + if (src_trace != trace(_cfg.get_root_block())) { e->set_state(CFGEdge::connected); targ_trace->insert_before(targ_block, src_trace); union_traces(targ_trace, src_trace); @@ -1312,7 +1288,7 @@ } else if (e->state() == CFGEdge::open) { // Append traces, even without a fall-thru connection. // But leave root entry at the beginning of the block list. - if (targ_trace != trace(_cfg._broot)) { + if (targ_trace != trace(_cfg.get_root_block())) { e->set_state(CFGEdge::connected); src_trace->append(targ_trace); union_traces(src_trace, targ_trace); @@ -1321,11 +1297,9 @@ } } -//----------------------------reorder_traces----------------------------------- // Order the sequence of the traces in some desirable way, and fixup the // jumps at the end of each block. -void PhaseBlockLayout::reorder_traces(int count) -{ +void PhaseBlockLayout::reorder_traces(int count) { ResourceArea *area = Thread::current()->resource_area(); Trace ** new_traces = NEW_ARENA_ARRAY(area, Trace *, count); Block_List worklist; @@ -1340,15 +1314,14 @@ } // The entry block should be first on the new trace list. - Trace *tr = trace(_cfg._broot); + Trace *tr = trace(_cfg.get_root_block()); assert(tr == new_traces[0], "entry trace misplaced"); // Sort the new trace list by frequency qsort(new_traces + 1, new_count - 1, sizeof(new_traces[0]), trace_frequency_order); // Patch up the successor blocks - _cfg._blocks.reset(); - _cfg._num_blocks = 0; + _cfg.clear_blocks(); for (int i = 0; i < new_count; i++) { Trace *tr = new_traces[i]; if (tr != NULL) { @@ -1357,17 +1330,15 @@ } } -//------------------------------PhaseBlockLayout------------------------------- // Order basic blocks based on frequency -PhaseBlockLayout::PhaseBlockLayout(PhaseCFG &cfg) : - Phase(BlockLayout), - _cfg(cfg) -{ +PhaseBlockLayout::PhaseBlockLayout(PhaseCFG &cfg) +: Phase(BlockLayout) +, _cfg(cfg) { ResourceMark rm; ResourceArea *area = Thread::current()->resource_area(); // List of traces - int size = _cfg._num_blocks + 1; + int size = _cfg.number_of_blocks() + 1; traces = NEW_ARENA_ARRAY(area, Trace *, size); memset(traces, 0, size*sizeof(Trace*)); next = NEW_ARENA_ARRAY(area, Block *, size); @@ -1400,11 +1371,10 @@ // Re-order all the remaining traces by frequency reorder_traces(size); - assert(_cfg._num_blocks >= (uint) (size - 1), "number of blocks can not shrink"); + assert(_cfg.number_of_blocks() >= (uint) (size - 1), "number of blocks can not shrink"); } -//------------------------------backedge--------------------------------------- // Edge e completes a loop in a trace. If the target block is head of the // loop, rotate the loop block so that the loop ends in a conditional branch. bool Trace::backedge(CFGEdge *e) { @@ -1456,14 +1426,12 @@ return loop_rotated; } -//------------------------------fixup_blocks----------------------------------- // push blocks onto the CFG list // ensure that blocks have the correct two-way branch sense void Trace::fixup_blocks(PhaseCFG &cfg) { Block *last = last_block(); for (Block *b = first_block(); b != NULL; b = next(b)) { - cfg._blocks.push(b); - cfg._num_blocks++; + cfg.add_block(b); if (!b->is_connector()) { int nfallthru = b->num_fall_throughs(); if (b != last) { diff -r bdd155477289 -r e2722a66aba7 src/share/vm/opto/block.hpp --- a/src/share/vm/opto/block.hpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/opto/block.hpp Thu Sep 05 11:04:39 2013 -0700 @@ -48,13 +48,12 @@ friend class VMStructs; uint _size; // allocated size, as opposed to formal limit debug_only(uint _limit;) // limit to formal domain + Arena *_arena; // Arena to allocate in protected: Block **_blocks; void grow( uint i ); // Grow array node to fit public: - Arena *_arena; // Arena to allocate in - Block_Array(Arena *a) : _arena(a), _size(OptoBlockListSize) { debug_only(_limit=0); _blocks = NEW_ARENA_ARRAY( a, Block *, OptoBlockListSize ); @@ -77,7 +76,7 @@ public: uint _cnt; Block_List() : Block_Array(Thread::current()->resource_area()), _cnt(0) {} - void push( Block *b ) { map(_cnt++,b); } + void push( Block *b ) { map(_cnt++,b); } Block *pop() { return _blocks[--_cnt]; } Block *rpop() { Block *b = _blocks[0]; _blocks[0]=_blocks[--_cnt]; return b;} void remove( uint i ); @@ -284,15 +283,15 @@ // helper function that adds caller save registers to MachProjNode void add_call_kills(MachProjNode *proj, RegMask& regs, const char* save_policy, bool exclude_soe); // Schedule a call next in the block - uint sched_call(Matcher &matcher, Block_Array &bbs, uint node_cnt, Node_List &worklist, GrowableArray &ready_cnt, MachCallNode *mcall, VectorSet &next_call); + uint sched_call(Matcher &matcher, PhaseCFG* cfg, uint node_cnt, Node_List &worklist, GrowableArray &ready_cnt, MachCallNode *mcall, VectorSet &next_call); // Perform basic-block local scheduling Node *select(PhaseCFG *cfg, Node_List &worklist, GrowableArray &ready_cnt, VectorSet &next_call, uint sched_slot); - void set_next_call( Node *n, VectorSet &next_call, Block_Array &bbs ); - void needed_for_next_call(Node *this_call, VectorSet &next_call, Block_Array &bbs); + void set_next_call( Node *n, VectorSet &next_call, PhaseCFG* cfg); + void needed_for_next_call(Node *this_call, VectorSet &next_call, PhaseCFG* cfg); bool schedule_local(PhaseCFG *cfg, Matcher &m, GrowableArray &ready_cnt, VectorSet &next_call); // Cleanup if any code lands between a Call and his Catch - void call_catch_cleanup(Block_Array &bbs, Compile *C); + void call_catch_cleanup(PhaseCFG* cfg, Compile *C); // Detect implicit-null-check opportunities. Basically, find NULL checks // with suitable memory ops nearby. Use the memory op to do the NULL check. // I can generate a memory op if there is not one nearby. @@ -331,15 +330,15 @@ // Use frequency calculations and code shape to predict if the block // is uncommon. - bool is_uncommon( Block_Array &bbs ) const; + bool is_uncommon(PhaseCFG* cfg) const; #ifndef PRODUCT // Debugging print of basic block void dump_bidx(const Block* orig, outputStream* st = tty) const; - void dump_pred(const Block_Array *bbs, Block* orig, outputStream* st = tty) const; - void dump_head( const Block_Array *bbs, outputStream* st = tty ) const; + void dump_pred(const PhaseCFG* cfg, Block* orig, outputStream* st = tty) const; + void dump_head(const PhaseCFG* cfg, outputStream* st = tty) const; void dump() const; - void dump( const Block_Array *bbs ) const; + void dump(const PhaseCFG* cfg) const; #endif }; @@ -349,14 +348,77 @@ class PhaseCFG : public Phase { friend class VMStructs; private: + + // Root of whole program + RootNode* _root; + + // The block containing the root node + Block* _root_block; + + // List of basic blocks that are created during CFG creation + Block_List _blocks; + + // Count of basic blocks + uint _number_of_blocks; + + // Arena for the blocks to be stored in + Arena* _block_arena; + + // The matcher for this compilation + Matcher& _matcher; + + // Map nodes to owning basic block + Block_Array _node_to_block_mapping; + + // Loop from the root + CFGLoop* _root_loop; + + // Outmost loop frequency + float _outer_loop_frequency; + + // Per node latency estimation, valid only during GCM + GrowableArray* _node_latency; + // Build a proper looking cfg. Return count of basic blocks uint build_cfg(); - // Perform DFS search. + // Build the dominator tree so that we know where we can move instructions + void build_dominator_tree(); + + // Estimate block frequencies based on IfNode probabilities, so that we know where we want to move instructions + void estimate_block_frequency(); + + // Global Code Motion. See Click's PLDI95 paper. Place Nodes in specific + // basic blocks; i.e. _node_to_block_mapping now maps _idx for all Nodes to some Block. + // Move nodes to ensure correctness from GVN and also try to move nodes out of loops. + void global_code_motion(); + + // Schedule Nodes early in their basic blocks. + bool schedule_early(VectorSet &visited, Node_List &roots); + + // For each node, find the latest block it can be scheduled into + // and then select the cheapest block between the latest and earliest + // block to place the node. + void schedule_late(VectorSet &visited, Node_List &stack); + + // Compute the (backwards) latency of a node from a single use + int latency_from_use(Node *n, const Node *def, Node *use); + + // Compute the (backwards) latency of a node from the uses of this instruction + void partial_latency_of_defs(Node *n); + + // Compute the instruction global latency with a backwards walk + void compute_latencies_backwards(VectorSet &visited, Node_List &stack); + + // Pick a block between early and late that is a cheaper alternative + // to late. Helper for schedule_late. + Block* hoist_to_cheaper_block(Block* LCA, Block* early, Node* self); + + // Perform a Depth First Search (DFS). // Setup 'vertex' as DFS to vertex mapping. // Setup 'semi' as vertex to DFS mapping. // Set 'parent' to DFS parent. - uint DFS( Tarjan *tarjan ); + uint do_DFS(Tarjan* tarjan, uint rpo_counter); // Helper function to insert a node into a block void schedule_node_into_block( Node *n, Block *b ); @@ -367,79 +429,18 @@ void schedule_pinned_nodes( VectorSet &visited ); // I'll need a few machine-specific GotoNodes. Clone from this one. - MachNode *_goto; + // Used when building the CFG and creating end nodes for blocks. + MachNode* _goto; Block* insert_anti_dependences(Block* LCA, Node* load, bool verify = false); void verify_anti_dependences(Block* LCA, Node* load) { - assert(LCA == _bbs[load->_idx], "should already be scheduled"); + assert(LCA == get_block_for_node(load), "should already be scheduled"); insert_anti_dependences(LCA, load, true); } - public: - PhaseCFG( Arena *a, RootNode *r, Matcher &m ); - - uint _num_blocks; // Count of basic blocks - Block_List _blocks; // List of basic blocks - RootNode *_root; // Root of whole program - Block_Array _bbs; // Map Nodes to owning Basic Block - Block *_broot; // Basic block of root - uint _rpo_ctr; - CFGLoop* _root_loop; - float _outer_loop_freq; // Outmost loop frequency - - // Per node latency estimation, valid only during GCM - GrowableArray *_node_latency; - -#ifndef PRODUCT - bool _trace_opto_pipelining; // tracing flag -#endif - -#ifdef ASSERT - Unique_Node_List _raw_oops; -#endif - - // Build dominators - void Dominators(); - - // Estimate block frequencies based on IfNode probabilities - void Estimate_Block_Frequency(); - - // Global Code Motion. See Click's PLDI95 paper. Place Nodes in specific - // basic blocks; i.e. _bbs now maps _idx for all Nodes to some Block. - void GlobalCodeMotion( Matcher &m, uint unique, Node_List &proj_list ); - - // Compute the (backwards) latency of a node from the uses - void latency_from_uses(Node *n); - - // Compute the (backwards) latency of a node from a single use - int latency_from_use(Node *n, const Node *def, Node *use); - - // Compute the (backwards) latency of a node from the uses of this instruction - void partial_latency_of_defs(Node *n); - - // Schedule Nodes early in their basic blocks. - bool schedule_early(VectorSet &visited, Node_List &roots); - - // For each node, find the latest block it can be scheduled into - // and then select the cheapest block between the latest and earliest - // block to place the node. - void schedule_late(VectorSet &visited, Node_List &stack); - - // Pick a block between early and late that is a cheaper alternative - // to late. Helper for schedule_late. - Block* hoist_to_cheaper_block(Block* LCA, Block* early, Node* self); - - // Compute the instruction global latency with a backwards walk - void ComputeLatenciesBackwards(VectorSet &visited, Node_List &stack); - - // Set loop alignment - void set_loop_alignment(); - - // Remove empty basic blocks - void remove_empty(); - void fixup_flow(); bool move_to_next(Block* bx, uint b_index); void move_to_end(Block* bx, uint b_index); + void insert_goto_at(uint block_no, uint succ_no); // Check for NeverBranch at block end. This needs to become a GOTO to the @@ -451,10 +452,106 @@ CFGLoop* create_loop_tree(); - // Insert a node into a block, and update the _bbs - void insert( Block *b, uint idx, Node *n ) { + #ifndef PRODUCT + bool _trace_opto_pipelining; // tracing flag + #endif + + public: + PhaseCFG(Arena* arena, RootNode* root, Matcher& matcher); + + void set_latency_for_node(Node* node, int latency) { + _node_latency->at_put_grow(node->_idx, latency); + } + + uint get_latency_for_node(Node* node) { + return _node_latency->at_grow(node->_idx); + } + + // Get the outer most frequency + float get_outer_loop_frequency() const { + return _outer_loop_frequency; + } + + // Get the root node of the CFG + RootNode* get_root_node() const { + return _root; + } + + // Get the block of the root node + Block* get_root_block() const { + return _root_block; + } + + // Add a block at a position and moves the later ones one step + void add_block_at(uint pos, Block* block) { + _blocks.insert(pos, block); + _number_of_blocks++; + } + + // Adds a block to the top of the block list + void add_block(Block* block) { + _blocks.push(block); + _number_of_blocks++; + } + + // Clear the list of blocks + void clear_blocks() { + _blocks.reset(); + _number_of_blocks = 0; + } + + // Get the block at position pos in _blocks + Block* get_block(uint pos) const { + return _blocks[pos]; + } + + // Number of blocks + uint number_of_blocks() const { + return _number_of_blocks; + } + + // set which block this node should reside in + void map_node_to_block(const Node* node, Block* block) { + _node_to_block_mapping.map(node->_idx, block); + } + + // removes the mapping from a node to a block + void unmap_node_from_block(const Node* node) { + _node_to_block_mapping.map(node->_idx, NULL); + } + + // get the block in which this node resides + Block* get_block_for_node(const Node* node) const { + return _node_to_block_mapping[node->_idx]; + } + + // does this node reside in a block; return true + bool has_block(const Node* node) const { + return (_node_to_block_mapping.lookup(node->_idx) != NULL); + } + +#ifdef ASSERT + Unique_Node_List _raw_oops; +#endif + + // Do global code motion by first building dominator tree and estimate block frequency + // Returns true on success + bool do_global_code_motion(); + + // Compute the (backwards) latency of a node from the uses + void latency_from_uses(Node *n); + + // Set loop alignment + void set_loop_alignment(); + + // Remove empty basic blocks + void remove_empty_blocks(); + void fixup_flow(); + + // Insert a node into a block at index and map the node to the block + void insert(Block *b, uint idx, Node *n) { b->_nodes.insert( idx, n ); - _bbs.map( n->_idx, b ); + map_node_to_block(n, b); } #ifndef PRODUCT @@ -543,7 +640,7 @@ _child(NULL), _exit_prob(1.0f) {} CFGLoop* parent() { return _parent; } - void push_pred(Block* blk, int i, Block_List& worklist, Block_Array& node_to_blk); + void push_pred(Block* blk, int i, Block_List& worklist, PhaseCFG* cfg); void add_member(CFGElement *s) { _members.push(s); } void add_nested_loop(CFGLoop* cl); Block* head() { diff -r bdd155477289 -r e2722a66aba7 src/share/vm/opto/buildOopMap.cpp --- a/src/share/vm/opto/buildOopMap.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/opto/buildOopMap.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -87,7 +87,6 @@ // OptoReg::Bad for not-callee-saved. -//------------------------------OopFlow---------------------------------------- // Structure to pass around struct OopFlow : public ResourceObj { short *_callees; // Array mapping register to callee-saved @@ -119,7 +118,6 @@ OopMap *build_oop_map( Node *n, int max_reg, PhaseRegAlloc *regalloc, int* live ); }; -//------------------------------compute_reach---------------------------------- // Given reaching-defs for this block start, compute it for this block end void OopFlow::compute_reach( PhaseRegAlloc *regalloc, int max_reg, Dict *safehash ) { @@ -177,7 +175,6 @@ } } -//------------------------------merge------------------------------------------ // Merge the given flow into the 'this' flow void OopFlow::merge( OopFlow *flow, int max_reg ) { assert( _b == NULL, "merging into a happy flow" ); @@ -197,14 +194,12 @@ } -//------------------------------clone------------------------------------------ void OopFlow::clone( OopFlow *flow, int max_size ) { _b = flow->_b; memcpy( _callees, flow->_callees, sizeof(short)*max_size); memcpy( _defs , flow->_defs , sizeof(Node*)*max_size); } -//------------------------------make------------------------------------------- OopFlow *OopFlow::make( Arena *A, int max_size, Compile* C ) { short *callees = NEW_ARENA_ARRAY(A,short,max_size+1); Node **defs = NEW_ARENA_ARRAY(A,Node*,max_size+1); @@ -215,7 +210,6 @@ return flow; } -//------------------------------bit twiddlers---------------------------------- static int get_live_bit( int *live, int reg ) { return live[reg>>LogBitsPerInt] & (1<<(reg&(BitsPerInt-1))); } static void set_live_bit( int *live, int reg ) { @@ -223,7 +217,6 @@ static void clr_live_bit( int *live, int reg ) { live[reg>>LogBitsPerInt] &= ~(1<<(reg&(BitsPerInt-1))); } -//------------------------------build_oop_map---------------------------------- // Build an oopmap from the current flow info OopMap *OopFlow::build_oop_map( Node *n, int max_reg, PhaseRegAlloc *regalloc, int* live ) { int framesize = regalloc->_framesize; @@ -412,28 +405,29 @@ return omap; } -//------------------------------do_liveness------------------------------------ // Compute backwards liveness on registers -static void do_liveness( PhaseRegAlloc *regalloc, PhaseCFG *cfg, Block_List *worklist, int max_reg_ints, Arena *A, Dict *safehash ) { - int *live = NEW_ARENA_ARRAY(A, int, (cfg->_num_blocks+1) * max_reg_ints); - int *tmp_live = &live[cfg->_num_blocks * max_reg_ints]; - Node *root = cfg->C->root(); +static void do_liveness(PhaseRegAlloc* regalloc, PhaseCFG* cfg, Block_List* worklist, int max_reg_ints, Arena* A, Dict* safehash) { + int* live = NEW_ARENA_ARRAY(A, int, (cfg->number_of_blocks() + 1) * max_reg_ints); + int* tmp_live = &live[cfg->number_of_blocks() * max_reg_ints]; + Node* root = cfg->get_root_node(); // On CISC platforms, get the node representing the stack pointer that regalloc // used for spills Node *fp = NodeSentinel; if (UseCISCSpill && root->req() > 1) { fp = root->in(1)->in(TypeFunc::FramePtr); } - memset( live, 0, cfg->_num_blocks * (max_reg_ints<number_of_blocks() * (max_reg_ints << LogBytesPerInt)); // Push preds onto worklist - for( uint i=1; ireq(); i++ ) - worklist->push(cfg->_bbs[root->in(i)->_idx]); + for (uint i = 1; i < root->req(); i++) { + Block* block = cfg->get_block_for_node(root->in(i)); + worklist->push(block); + } // ZKM.jar includes tiny infinite loops which are unreached from below. // If we missed any blocks, we'll retry here after pushing all missed // blocks on the worklist. Normally this outer loop never trips more // than once. - while( 1 ) { + while (1) { while( worklist->size() ) { // Standard worklist algorithm Block *b = worklist->rpop(); @@ -537,37 +531,42 @@ for( l=0; lnum_preds(); l++ ) - worklist->push(cfg->_bbs[b->pred(l)->_idx]); + for (l = 1; l < (int)b->num_preds(); l++) { + Block* block = cfg->get_block_for_node(b->pred(l)); + worklist->push(block); + } } } // Scan for any missing safepoints. Happens to infinite loops // ala ZKM.jar uint i; - for( i=1; i_num_blocks; i++ ) { - Block *b = cfg->_blocks[i]; + for (i = 1; i < cfg->number_of_blocks(); i++) { + Block* block = cfg->get_block(i); uint j; - for( j=1; j_nodes.size(); j++ ) - if( b->_nodes[j]->jvms() && - (*safehash)[b->_nodes[j]] == NULL ) + for (j = 1; j < block->_nodes.size(); j++) { + if (block->_nodes[j]->jvms() && (*safehash)[block->_nodes[j]] == NULL) { break; - if( j_nodes.size() ) break; + } + } + if (j < block->_nodes.size()) { + break; + } } - if( i == cfg->_num_blocks ) + if (i == cfg->number_of_blocks()) { break; // Got 'em all + } #ifndef PRODUCT if( PrintOpto && Verbose ) tty->print_cr("retripping live calc"); #endif // Force the issue (expensively): recheck everybody - for( i=1; i_num_blocks; i++ ) - worklist->push(cfg->_blocks[i]); + for (i = 1; i < cfg->number_of_blocks(); i++) { + worklist->push(cfg->get_block(i)); + } } - } -//------------------------------BuildOopMaps----------------------------------- // Collect GC mask info - where are all the OOPs? void Compile::BuildOopMaps() { NOT_PRODUCT( TracePhase t3("bldOopMaps", &_t_buildOopMaps, TimeCompiler); ) @@ -588,12 +587,12 @@ OopFlow *free_list = NULL; // Free, unused // Array mapping blocks to completed oopflows - OopFlow **flows = NEW_ARENA_ARRAY(A, OopFlow*, _cfg->_num_blocks); - memset( flows, 0, _cfg->_num_blocks*sizeof(OopFlow*) ); + OopFlow **flows = NEW_ARENA_ARRAY(A, OopFlow*, _cfg->number_of_blocks()); + memset( flows, 0, _cfg->number_of_blocks() * sizeof(OopFlow*) ); // Do the first block 'by hand' to prime the worklist - Block *entry = _cfg->_blocks[1]; + Block *entry = _cfg->get_block(1); OopFlow *rootflow = OopFlow::make(A,max_reg,this); // Initialize to 'bottom' (not 'top') memset( rootflow->_callees, OptoReg::Bad, max_reg*sizeof(short) ); @@ -619,7 +618,9 @@ Block *b = worklist.pop(); // Ignore root block - if( b == _cfg->_broot ) continue; + if (b == _cfg->get_root_block()) { + continue; + } // Block is already done? Happens if block has several predecessors, // he can get on the worklist more than once. if( flows[b->_pre_order] ) continue; @@ -629,10 +630,9 @@ // pred to this block. Otherwise we have to grab a new OopFlow. OopFlow *flow = NULL; // Flag for finding optimized flow Block *pred = (Block*)0xdeadbeef; - uint j; // Scan this block's preds to find a done predecessor - for( j=1; jnum_preds(); j++ ) { - Block *p = _cfg->_bbs[b->pred(j)->_idx]; + for (uint j = 1; j < b->num_preds(); j++) { + Block* p = _cfg->get_block_for_node(b->pred(j)); OopFlow *p_flow = flows[p->_pre_order]; if( p_flow ) { // Predecessor is done assert( p_flow->_b == p, "cross check" ); diff -r bdd155477289 -r e2722a66aba7 src/share/vm/opto/c2_globals.hpp --- a/src/share/vm/opto/c2_globals.hpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/opto/c2_globals.hpp Thu Sep 05 11:04:39 2013 -0700 @@ -185,6 +185,9 @@ product_pd(intx, LoopUnrollLimit, \ "Unroll loop bodies with node count less than this") \ \ + product(intx, LoopMaxUnroll, 16, \ + "Maximum number of unrolls for main loop") \ + \ product(intx, LoopUnrollMin, 4, \ "Minimum number of unroll loop bodies before checking progress" \ "of rounds of unroll,optimize,..") \ diff -r bdd155477289 -r e2722a66aba7 src/share/vm/opto/chaitin.cpp --- a/src/share/vm/opto/chaitin.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/opto/chaitin.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -40,10 +40,8 @@ #include "opto/opcodes.hpp" #include "opto/rootnode.hpp" -//============================================================================= - #ifndef PRODUCT -void LRG::dump( ) const { +void LRG::dump() const { ttyLocker ttyl; tty->print("%d ",num_regs()); _mask.dump(); @@ -94,7 +92,6 @@ } #endif -//------------------------------score------------------------------------------ // Compute score from cost and area. Low score is best to spill. static double raw_score( double cost, double area ) { return cost - (area*RegisterCostAreaRatio) * 1.52588e-5; @@ -125,7 +122,6 @@ return score; } -//------------------------------LRG_List--------------------------------------- LRG_List::LRG_List( uint max ) : _cnt(max), _max(max), _lidxs(NEW_RESOURCE_ARRAY(uint,max)) { memset( _lidxs, 0, sizeof(uint)*max ); } @@ -211,7 +207,6 @@ return next; } -//------------------------------Chaitin---------------------------------------- PhaseChaitin::PhaseChaitin(uint unique, PhaseCFG &cfg, Matcher &matcher) : PhaseRegAlloc(unique, cfg, matcher, #ifndef PRODUCT @@ -232,31 +227,31 @@ { NOT_PRODUCT( Compile::TracePhase t3("ctorChaitin", &_t_ctorChaitin, TimeCompiler); ) - _high_frequency_lrg = MIN2(float(OPTO_LRG_HIGH_FREQ), _cfg._outer_loop_freq); + _high_frequency_lrg = MIN2(float(OPTO_LRG_HIGH_FREQ), _cfg.get_outer_loop_frequency()); // Build a list of basic blocks, sorted by frequency - _blks = NEW_RESOURCE_ARRAY( Block *, _cfg._num_blocks ); + _blks = NEW_RESOURCE_ARRAY(Block *, _cfg.number_of_blocks()); // Experiment with sorting strategies to speed compilation double cutoff = BLOCK_FREQUENCY(1.0); // Cutoff for high frequency bucket Block **buckets[NUMBUCKS]; // Array of buckets uint buckcnt[NUMBUCKS]; // Array of bucket counters double buckval[NUMBUCKS]; // Array of bucket value cutoffs for (uint i = 0; i < NUMBUCKS; i++) { - buckets[i] = NEW_RESOURCE_ARRAY(Block *, _cfg._num_blocks); + buckets[i] = NEW_RESOURCE_ARRAY(Block *, _cfg.number_of_blocks()); buckcnt[i] = 0; // Bump by three orders of magnitude each time cutoff *= 0.001; buckval[i] = cutoff; - for (uint j = 0; j < _cfg._num_blocks; j++) { + for (uint j = 0; j < _cfg.number_of_blocks(); j++) { buckets[i][j] = NULL; } } // Sort blocks into buckets - for (uint i = 0; i < _cfg._num_blocks; i++) { + for (uint i = 0; i < _cfg.number_of_blocks(); i++) { for (uint j = 0; j < NUMBUCKS; j++) { - if ((j == NUMBUCKS - 1) || (_cfg._blocks[i]->_freq > buckval[j])) { + if ((j == NUMBUCKS - 1) || (_cfg.get_block(i)->_freq > buckval[j])) { // Assign block to end of list for appropriate bucket - buckets[j][buckcnt[j]++] = _cfg._blocks[i]; + buckets[j][buckcnt[j]++] = _cfg.get_block(i); break; // kick out of inner loop } } @@ -269,10 +264,9 @@ } } - assert(blkcnt == _cfg._num_blocks, "Block array not totally filled"); + assert(blkcnt == _cfg.number_of_blocks(), "Block array not totally filled"); } -//------------------------------Union------------------------------------------ // union 2 sets together. void PhaseChaitin::Union( const Node *src_n, const Node *dst_n ) { uint src = _lrg_map.find(src_n); @@ -285,7 +279,6 @@ _lrg_map.uf_map(dst, src); } -//------------------------------new_lrg---------------------------------------- void PhaseChaitin::new_lrg(const Node *x, uint lrg) { // Make the Node->LRG mapping _lrg_map.extend(x->_idx,lrg); @@ -294,24 +287,28 @@ } -bool PhaseChaitin::clone_projs_shared(Block *b, uint idx, Node *con, Node *copy, uint max_lrg_id) { - Block *bcon = _cfg._bbs[con->_idx]; - uint cindex = bcon->find_node(con); - Node *con_next = bcon->_nodes[cindex+1]; - if (con_next->in(0) != con || !con_next->is_MachProj()) { - return false; // No MachProj's follow +int PhaseChaitin::clone_projs(Block* b, uint idx, Node* orig, Node* copy, uint& max_lrg_id) { + assert(b->find_node(copy) == (idx - 1), "incorrect insert index for copy kill projections"); + DEBUG_ONLY( Block* borig = _cfg.get_block_for_node(orig); ) + int found_projs = 0; + uint cnt = orig->outcnt(); + for (uint i = 0; i < cnt; i++) { + Node* proj = orig->raw_out(i); + if (proj->is_MachProj()) { + assert(proj->outcnt() == 0, "only kill projections are expected here"); + assert(_cfg.get_block_for_node(proj) == borig, "incorrect block for kill projections"); + found_projs++; + // Copy kill projections after the cloned node + Node* kills = proj->clone(); + kills->set_req(0, copy); + b->_nodes.insert(idx++, kills); + _cfg.map_node_to_block(kills, b); + new_lrg(kills, max_lrg_id++); + } } - - // Copy kills after the cloned constant - Node *kills = con_next->clone(); - kills->set_req(0, copy); - b->_nodes.insert(idx, kills); - _cfg._bbs.map(kills->_idx, b); - new_lrg(kills, max_lrg_id); - return true; + return found_projs; } -//------------------------------compact---------------------------------------- // Renumber the live ranges to compact them. Makes the IFG smaller. void PhaseChaitin::compact() { // Current the _uf_map contains a series of short chains which are headed @@ -677,20 +674,19 @@ C->set_indexSet_arena(NULL); // ResourceArea is at end of scope } -//------------------------------de_ssa----------------------------------------- void PhaseChaitin::de_ssa() { // Set initial Names for all Nodes. Most Nodes get the virtual register // number. A few get the ZERO live range number. These do not // get allocated, but instead rely on correct scheduling to ensure that // only one instance is simultaneously live at a time. uint lr_counter = 1; - for( uint i = 0; i < _cfg._num_blocks; i++ ) { - Block *b = _cfg._blocks[i]; - uint cnt = b->_nodes.size(); + for( uint i = 0; i < _cfg.number_of_blocks(); i++ ) { + Block* block = _cfg.get_block(i); + uint cnt = block->_nodes.size(); // Handle all the normal Nodes in the block for( uint j = 0; j < cnt; j++ ) { - Node *n = b->_nodes[j]; + Node *n = block->_nodes[j]; // Pre-color to the zero live range, or pick virtual register const RegMask &rm = n->out_RegMask(); _lrg_map.map(n->_idx, rm.is_NotEmpty() ? lr_counter++ : 0); @@ -701,52 +697,55 @@ } -//------------------------------gather_lrg_masks------------------------------- // Gather LiveRanGe information, including register masks. Modification of // cisc spillable in_RegMasks should not be done before AggressiveCoalesce. void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) { // Nail down the frame pointer live range - uint fp_lrg = _lrg_map.live_range_id(_cfg._root->in(1)->in(TypeFunc::FramePtr)); + uint fp_lrg = _lrg_map.live_range_id(_cfg.get_root_node()->in(1)->in(TypeFunc::FramePtr)); lrgs(fp_lrg)._cost += 1e12; // Cost is infinite // For all blocks - for( uint i = 0; i < _cfg._num_blocks; i++ ) { - Block *b = _cfg._blocks[i]; + for (uint i = 0; i < _cfg.number_of_blocks(); i++) { + Block* block = _cfg.get_block(i); // For all instructions - for( uint j = 1; j < b->_nodes.size(); j++ ) { - Node *n = b->_nodes[j]; + for (uint j = 1; j < block->_nodes.size(); j++) { + Node* n = block->_nodes[j]; uint input_edge_start =1; // Skip control most nodes - if( n->is_Mach() ) input_edge_start = n->as_Mach()->oper_input_base(); + if (n->is_Mach()) { + input_edge_start = n->as_Mach()->oper_input_base(); + } uint idx = n->is_Copy(); // Get virtual register number, same as LiveRanGe index uint vreg = _lrg_map.live_range_id(n); - LRG &lrg = lrgs(vreg); - if( vreg ) { // No vreg means un-allocable (e.g. memory) + LRG& lrg = lrgs(vreg); + if (vreg) { // No vreg means un-allocable (e.g. memory) // Collect has-copy bit - if( idx ) { + if (idx) { lrg._has_copy = 1; uint clidx = _lrg_map.live_range_id(n->in(idx)); - LRG ©_src = lrgs(clidx); + LRG& copy_src = lrgs(clidx); copy_src._has_copy = 1; } // Check for float-vs-int live range (used in register-pressure // calculations) const Type *n_type = n->bottom_type(); - if (n_type->is_floatingpoint()) + if (n_type->is_floatingpoint()) { lrg._is_float = 1; + } // Check for twice prior spilling. Once prior spilling might have // spilled 'soft', 2nd prior spill should have spilled 'hard' and // further spilling is unlikely to make progress. - if( _spilled_once.test(n->_idx) ) { + if (_spilled_once.test(n->_idx)) { lrg._was_spilled1 = 1; - if( _spilled_twice.test(n->_idx) ) + if (_spilled_twice.test(n->_idx)) { lrg._was_spilled2 = 1; + } } #ifndef PRODUCT @@ -783,16 +782,18 @@ // Check for bound register masks const RegMask &lrgmask = lrg.mask(); - if (lrgmask.is_bound(ireg)) + if (lrgmask.is_bound(ireg)) { lrg._is_bound = 1; + } // Check for maximum frequency value - if (lrg._maxfreq < b->_freq) - lrg._maxfreq = b->_freq; + if (lrg._maxfreq < block->_freq) { + lrg._maxfreq = block->_freq; + } // Check for oop-iness, or long/double // Check for multi-kill projection - switch( ireg ) { + switch (ireg) { case MachProjNode::fat_proj: // Fat projections have size equal to number of registers killed lrg.set_num_regs(rm.Size()); @@ -962,8 +963,7 @@ // AggressiveCoalesce. This effectively pre-virtual-splits // around uncommon uses of common defs. const RegMask &rm = n->in_RegMask(k); - if( !after_aggressive && - _cfg._bbs[n->in(k)->_idx]->_freq > 1000*b->_freq ) { + if (!after_aggressive && _cfg.get_block_for_node(n->in(k))->_freq > 1000 * block->_freq) { // Since we are BEFORE aggressive coalesce, leave the register // mask untrimmed by the call. This encourages more coalescing. // Later, AFTER aggressive, this live range will have to spill @@ -1007,8 +1007,9 @@ } // Check for maximum frequency value - if( lrg._maxfreq < b->_freq ) - lrg._maxfreq = b->_freq; + if (lrg._maxfreq < block->_freq) { + lrg._maxfreq = block->_freq; + } } // End for all allocated inputs } // end for all instructions @@ -1030,7 +1031,6 @@ } } -//------------------------------set_was_low------------------------------------ // Set the was-lo-degree bit. Conservative coalescing should not change the // colorability of the graph. If any live range was of low-degree before // coalescing, it should Simplify. This call sets the was-lo-degree bit. @@ -1067,7 +1067,6 @@ #define REGISTER_CONSTRAINED 16 -//------------------------------cache_lrg_info--------------------------------- // Compute cost/area ratio, in case we spill. Build the lo-degree list. void PhaseChaitin::cache_lrg_info( ) { @@ -1101,7 +1100,6 @@ } } -//------------------------------Pre-Simplify----------------------------------- // Simplify the IFG by removing LRGs of low degree that have NO copies void PhaseChaitin::Pre_Simplify( ) { @@ -1152,7 +1150,6 @@ // No more lo-degree no-copy live ranges to simplify } -//------------------------------Simplify--------------------------------------- // Simplify the IFG by removing LRGs of low degree. void PhaseChaitin::Simplify( ) { @@ -1289,7 +1286,6 @@ } -//------------------------------is_legal_reg----------------------------------- // Is 'reg' register legal for 'lrg'? static bool is_legal_reg(LRG &lrg, OptoReg::Name reg, int chunk) { if (reg >= chunk && reg < (chunk + RegMask::CHUNK_SIZE) && @@ -1316,7 +1312,6 @@ return false; } -//------------------------------bias_color------------------------------------- // Choose a color using the biasing heuristic OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) { @@ -1378,7 +1373,6 @@ return OptoReg::add( reg, chunk ); } -//------------------------------choose_color----------------------------------- // Choose a color in the current chunk OptoReg::Name PhaseChaitin::choose_color( LRG &lrg, int chunk ) { assert( C->in_preserve_stack_slots() == 0 || chunk != 0 || lrg._is_bound || lrg.mask().is_bound1() || !lrg.mask().Member(OptoReg::Name(_matcher._old_SP-1)), "must not allocate stack0 (inside preserve area)"); @@ -1400,7 +1394,6 @@ return lrg.mask().find_last_elem(); } -//------------------------------Select----------------------------------------- // Select colors by re-inserting LRGs back into the IFG. LRGs are re-inserted // in reverse order of removal. As long as nothing of hi-degree was yanked, // everything going back is guaranteed a color. Select that color. If some @@ -1575,8 +1568,6 @@ return spill_reg-LRG::SPILL_REG; // Return number of spills } - -//------------------------------copy_was_spilled------------------------------- // Copy 'was_spilled'-edness from the source Node to the dst Node. void PhaseChaitin::copy_was_spilled( Node *src, Node *dst ) { if( _spilled_once.test(src->_idx) ) { @@ -1589,14 +1580,12 @@ } } -//------------------------------set_was_spilled-------------------------------- // Set the 'spilled_once' or 'spilled_twice' flag on a node. void PhaseChaitin::set_was_spilled( Node *n ) { if( _spilled_once.test_set(n->_idx) ) _spilled_twice.set(n->_idx); } -//------------------------------fixup_spills----------------------------------- // Convert Ideal spill instructions into proper FramePtr + offset Loads and // Stores. Use-def chains are NOT preserved, but Node->LRG->reg maps are. void PhaseChaitin::fixup_spills() { @@ -1606,16 +1595,16 @@ NOT_PRODUCT( Compile::TracePhase t3("fixupSpills", &_t_fixupSpills, TimeCompiler); ) // Grab the Frame Pointer - Node *fp = _cfg._broot->head()->in(1)->in(TypeFunc::FramePtr); + Node *fp = _cfg.get_root_block()->head()->in(1)->in(TypeFunc::FramePtr); // For all blocks - for( uint i = 0; i < _cfg._num_blocks; i++ ) { - Block *b = _cfg._blocks[i]; + for (uint i = 0; i < _cfg.number_of_blocks(); i++) { + Block* block = _cfg.get_block(i); // For all instructions in block - uint last_inst = b->end_idx(); - for( uint j = 1; j <= last_inst; j++ ) { - Node *n = b->_nodes[j]; + uint last_inst = block->end_idx(); + for (uint j = 1; j <= last_inst; j++) { + Node* n = block->_nodes[j]; // Dead instruction??? assert( n->outcnt() != 0 ||// Nothing dead after post alloc @@ -1652,7 +1641,7 @@ assert( cisc->oper_input_base() == 2, "Only adding one edge"); cisc->ins_req(1,src); // Requires a memory edge } - b->_nodes.map(j,cisc); // Insert into basic block + block->_nodes.map(j,cisc); // Insert into basic block n->subsume_by(cisc, C); // Correct graph // ++_used_cisc_instructions; @@ -1678,7 +1667,6 @@ } // End of for all blocks } -//------------------------------find_base_for_derived-------------------------- // Helper to stretch above; recursively discover the base Node for a // given derived Node. Easy for AddP-related machine nodes, but needs // to be recursive for derived Phis. @@ -1708,17 +1696,16 @@ // Initialize it once and make it shared: // set control to _root and place it into Start block // (where top() node is placed). - base->init_req(0, _cfg._root); - Block *startb = _cfg._bbs[C->top()->_idx]; + base->init_req(0, _cfg.get_root_node()); + Block *startb = _cfg.get_block_for_node(C->top()); startb->_nodes.insert(startb->find_node(C->top()), base ); - _cfg._bbs.map( base->_idx, startb ); + _cfg.map_node_to_block(base, startb); assert(_lrg_map.live_range_id(base) == 0, "should not have LRG yet"); } if (_lrg_map.live_range_id(base) == 0) { new_lrg(base, maxlrg++); } - assert(base->in(0) == _cfg._root && - _cfg._bbs[base->_idx] == _cfg._bbs[C->top()->_idx], "base NULL should be shared"); + assert(base->in(0) == _cfg.get_root_node() && _cfg.get_block_for_node(base) == _cfg.get_block_for_node(C->top()), "base NULL should be shared"); derived_base_map[derived->_idx] = base; return base; } @@ -1754,12 +1741,12 @@ base->as_Phi()->set_type(t); // Search the current block for an existing base-Phi - Block *b = _cfg._bbs[derived->_idx]; + Block *b = _cfg.get_block_for_node(derived); for( i = 1; i <= b->end_idx(); i++ ) {// Search for matching Phi Node *phi = b->_nodes[i]; if( !phi->is_Phi() ) { // Found end of Phis with no match? b->_nodes.insert( i, base ); // Must insert created Phi here as base - _cfg._bbs.map( base->_idx, b ); + _cfg.map_node_to_block(base, b); new_lrg(base,maxlrg++); break; } @@ -1781,8 +1768,6 @@ return base; } - -//------------------------------stretch_base_pointer_live_ranges--------------- // At each Safepoint, insert extra debug edges for each pair of derived value/ // base pointer that is live across the Safepoint for oopmap building. The // edge pairs get added in after sfpt->jvmtail()->oopoff(), but are in the @@ -1794,14 +1779,14 @@ memset( derived_base_map, 0, sizeof(Node*)*C->unique() ); // For all blocks in RPO do... - for( uint i=0; i<_cfg._num_blocks; i++ ) { - Block *b = _cfg._blocks[i]; + for (uint i = 0; i < _cfg.number_of_blocks(); i++) { + Block* block = _cfg.get_block(i); // Note use of deep-copy constructor. I cannot hammer the original // liveout bits, because they are needed by the following coalesce pass. - IndexSet liveout(_live->live(b)); + IndexSet liveout(_live->live(block)); - for( uint j = b->end_idx() + 1; j > 1; j-- ) { - Node *n = b->_nodes[j-1]; + for (uint j = block->end_idx() + 1; j > 1; j--) { + Node* n = block->_nodes[j - 1]; // Pre-split compares of loop-phis. Loop-phis form a cycle we would // like to see in the same register. Compare uses the loop-phi and so @@ -1815,8 +1800,8 @@ if( n->is_Mach() && n->as_Mach()->ideal_Opcode() == Op_CmpI ) { Node *phi = n->in(1); if( phi->is_Phi() && phi->as_Phi()->region()->is_Loop() ) { - Block *phi_block = _cfg._bbs[phi->_idx]; - if( _cfg._bbs[phi_block->pred(2)->_idx] == b ) { + Block *phi_block = _cfg.get_block_for_node(phi); + if (_cfg.get_block_for_node(phi_block->pred(2)) == block) { const RegMask *mask = C->matcher()->idealreg2spillmask[Op_RegI]; Node *spill = new (C) MachSpillCopyNode( phi, *mask, *mask ); insert_proj( phi_block, 1, spill, maxlrg++ ); @@ -1870,7 +1855,7 @@ if ((_lrg_map.live_range_id(base) >= _lrg_map.max_lrg_id() || // (Brand new base (hence not live) or !liveout.member(_lrg_map.live_range_id(base))) && // not live) AND (_lrg_map.live_range_id(base) > 0) && // not a constant - _cfg._bbs[base->_idx] != b) { // base not def'd in blk) + _cfg.get_block_for_node(base) != block) { // base not def'd in blk) // Base pointer is not currently live. Since I stretched // the base pointer to here and it crosses basic-block // boundaries, the global live info is now incorrect. @@ -1905,15 +1890,12 @@ return must_recompute_live != 0; } - -//------------------------------add_reference---------------------------------- // Extend the node to LRG mapping void PhaseChaitin::add_reference(const Node *node, const Node *old_node) { _lrg_map.extend(node->_idx, _lrg_map.live_range_id(old_node)); } -//------------------------------dump------------------------------------------- #ifndef PRODUCT void PhaseChaitin::dump(const Node *n) const { uint r = (n->_idx < _lrg_map.size()) ? _lrg_map.find_const(n) : 0; @@ -1993,8 +1975,8 @@ tty->print("\n"); } -void PhaseChaitin::dump( const Block * b ) const { - b->dump_head( &_cfg._bbs ); +void PhaseChaitin::dump(const Block *b) const { + b->dump_head(&_cfg); // For all instructions for( uint j = 0; j < b->_nodes.size(); j++ ) @@ -2019,8 +2001,9 @@ _matcher._new_SP, _framesize ); // For all blocks - for( uint i = 0; i < _cfg._num_blocks; i++ ) - dump(_cfg._blocks[i]); + for (uint i = 0; i < _cfg.number_of_blocks(); i++) { + dump(_cfg.get_block(i)); + } // End of per-block dump tty->print("\n"); @@ -2061,7 +2044,6 @@ tty->print_cr(""); } -//------------------------------dump_degree_lists------------------------------ void PhaseChaitin::dump_degree_lists() const { // Dump lo-degree list tty->print("Lo degree: "); @@ -2082,7 +2064,6 @@ tty->print_cr(""); } -//------------------------------dump_simplified-------------------------------- void PhaseChaitin::dump_simplified() const { tty->print("Simplified: "); for( uint i = _simplified; i; i = lrgs(i)._next ) @@ -2101,7 +2082,6 @@ return buf+strlen(buf); } -//------------------------------dump_register---------------------------------- // Dump a register name into a buffer. Be intelligent if we get called // before allocation is complete. char *PhaseChaitin::dump_register( const Node *n, char *buf ) const { @@ -2135,7 +2115,6 @@ return buf+strlen(buf); } -//----------------------dump_for_spill_split_recycle-------------------------- void PhaseChaitin::dump_for_spill_split_recycle() const { if( WizardMode && (PrintCompilation || PrintOpto) ) { // Display which live ranges need to be split and the allocator's state @@ -2151,7 +2130,6 @@ } } -//------------------------------dump_frame------------------------------------ void PhaseChaitin::dump_frame() const { const char *fp = OptoReg::regname(OptoReg::c_frame_pointer); const TypeTuple *domain = C->tf()->domain(); @@ -2257,17 +2235,16 @@ tty->print_cr("#"); } -//------------------------------dump_bb---------------------------------------- void PhaseChaitin::dump_bb( uint pre_order ) const { tty->print_cr("---dump of B%d---",pre_order); - for( uint i = 0; i < _cfg._num_blocks; i++ ) { - Block *b = _cfg._blocks[i]; - if( b->_pre_order == pre_order ) - dump(b); + for (uint i = 0; i < _cfg.number_of_blocks(); i++) { + Block* block = _cfg.get_block(i); + if (block->_pre_order == pre_order) { + dump(block); + } } } -//------------------------------dump_lrg--------------------------------------- void PhaseChaitin::dump_lrg( uint lidx, bool defs_only ) const { tty->print_cr("---dump of L%d---",lidx); @@ -2289,17 +2266,17 @@ tty->cr(); } // For all blocks - for( uint i = 0; i < _cfg._num_blocks; i++ ) { - Block *b = _cfg._blocks[i]; + for (uint i = 0; i < _cfg.number_of_blocks(); i++) { + Block* block = _cfg.get_block(i); int dump_once = 0; // For all instructions - for( uint j = 0; j < b->_nodes.size(); j++ ) { - Node *n = b->_nodes[j]; + for( uint j = 0; j < block->_nodes.size(); j++ ) { + Node *n = block->_nodes[j]; if (_lrg_map.find_const(n) == lidx) { if (!dump_once++) { tty->cr(); - b->dump_head( &_cfg._bbs ); + block->dump_head(&_cfg); } dump(n); continue; @@ -2314,7 +2291,7 @@ if (_lrg_map.find_const(m) == lidx) { if (!dump_once++) { tty->cr(); - b->dump_head(&_cfg._bbs); + block->dump_head(&_cfg); } dump(n); } @@ -2326,7 +2303,6 @@ } #endif // not PRODUCT -//------------------------------print_chaitin_statistics------------------------------- int PhaseChaitin::_final_loads = 0; int PhaseChaitin::_final_stores = 0; int PhaseChaitin::_final_memoves= 0; diff -r bdd155477289 -r e2722a66aba7 src/share/vm/opto/chaitin.hpp --- a/src/share/vm/opto/chaitin.hpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/opto/chaitin.hpp Thu Sep 05 11:04:39 2013 -0700 @@ -412,33 +412,22 @@ uint split_DEF( Node *def, Block *b, int loc, uint max, Node **Reachblock, Node **debug_defs, GrowableArray splits, int slidx ); uint split_USE( Node *def, Block *b, Node *use, uint useidx, uint max, bool def_down, bool cisc_sp, GrowableArray splits, int slidx ); - bool clone_projs(Block *b, uint idx, Node *con, Node *copy, LiveRangeMap &lrg_map) { - bool found_projs = clone_projs_shared(b, idx, con, copy, lrg_map.max_lrg_id()); - - if(found_projs) { - uint max_lrg_id = lrg_map.max_lrg_id(); - lrg_map.set_max_lrg_id(max_lrg_id + 1); - } - - return found_projs; - } - //------------------------------clone_projs------------------------------------ // After cloning some rematerialized instruction, clone any MachProj's that // follow it. Example: Intel zero is XOR, kills flags. Sparc FP constants // use G3 as an address temp. - bool clone_projs(Block *b, uint idx, Node *con, Node *copy, uint &max_lrg_id) { - bool found_projs = clone_projs_shared(b, idx, con, copy, max_lrg_id); + int clone_projs(Block* b, uint idx, Node* orig, Node* copy, uint& max_lrg_id); - if(found_projs) { - max_lrg_id++; + int clone_projs(Block* b, uint idx, Node* orig, Node* copy, LiveRangeMap& lrg_map) { + uint max_lrg_id = lrg_map.max_lrg_id(); + int found_projs = clone_projs(b, idx, orig, copy, max_lrg_id); + if (found_projs > 0) { + // max_lrg_id is updated during call above + lrg_map.set_max_lrg_id(max_lrg_id); } - return found_projs; } - bool clone_projs_shared(Block *b, uint idx, Node *con, Node *copy, uint max_lrg_id); - Node *split_Rematerialize(Node *def, Block *b, uint insidx, uint &maxlrg, GrowableArray splits, int slidx, uint *lrg2reach, Node **Reachblock, bool walkThru); // True if lidx is used before any real register is def'd in the block diff -r bdd155477289 -r e2722a66aba7 src/share/vm/opto/coalesce.cpp --- a/src/share/vm/opto/coalesce.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/opto/coalesce.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -34,8 +34,6 @@ #include "opto/matcher.hpp" #include "opto/regmask.hpp" -//============================================================================= -//------------------------------Dump------------------------------------------- #ifndef PRODUCT void PhaseCoalesce::dump(Node *n) const { // Being a const function means I cannot use 'Find' @@ -43,16 +41,15 @@ tty->print("L%d/N%d ",r,n->_idx); } -//------------------------------dump------------------------------------------- void PhaseCoalesce::dump() const { // I know I have a block layout now, so I can print blocks in a loop - for( uint i=0; i<_phc._cfg._num_blocks; i++ ) { + for( uint i=0; i<_phc._cfg.number_of_blocks(); i++ ) { uint j; - Block *b = _phc._cfg._blocks[i]; + Block* b = _phc._cfg.get_block(i); // Print a nice block header tty->print("B%d: ",b->_pre_order); for( j=1; jnum_preds(); j++ ) - tty->print("B%d ", _phc._cfg._bbs[b->pred(j)->_idx]->_pre_order); + tty->print("B%d ", _phc._cfg.get_block_for_node(b->pred(j))->_pre_order); tty->print("-> "); for( j=0; j_num_succs; j++ ) tty->print("B%d ",b->_succs[j]->_pre_order); @@ -85,7 +82,6 @@ } #endif -//------------------------------combine_these_two------------------------------ // Combine the live ranges def'd by these 2 Nodes. N2 is an input to N1. void PhaseCoalesce::combine_these_two(Node *n1, Node *n2) { uint lr1 = _phc._lrg_map.find(n1); @@ -127,18 +123,15 @@ } } -//------------------------------coalesce_driver-------------------------------- // Copy coalescing -void PhaseCoalesce::coalesce_driver( ) { - +void PhaseCoalesce::coalesce_driver() { verify(); // Coalesce from high frequency to low - for( uint i=0; i<_phc._cfg._num_blocks; i++ ) - coalesce( _phc._blks[i] ); - + for (uint i = 0; i < _phc._cfg.number_of_blocks(); i++) { + coalesce(_phc._blks[i]); + } } -//------------------------------insert_copy_with_overlap----------------------- // I am inserting copies to come out of SSA form. In the general case, I am // doing a parallel renaming. I'm in the Named world now, so I can't do a // general parallel renaming. All the copies now use "names" (live-ranges) @@ -208,7 +201,7 @@ copy->set_req(idx,tmp); // Save source in temp early, before source is killed b->_nodes.insert(kill_src_idx,tmp); - _phc._cfg._bbs.map( tmp->_idx, b ); + _phc._cfg.map_node_to_block(tmp, b); last_use_idx++; } @@ -216,7 +209,6 @@ b->_nodes.insert(last_use_idx+1,copy); } -//------------------------------insert_copies---------------------------------- void PhaseAggressiveCoalesce::insert_copies( Matcher &matcher ) { // We do LRGs compressing and fix a liveout data only here since the other // place in Split() is guarded by the assert which we never hit. @@ -225,8 +217,8 @@ for (uint lrg = 1; lrg < _phc._lrg_map.max_lrg_id(); lrg++) { uint compressed_lrg = _phc._lrg_map.find(lrg); if (lrg != compressed_lrg) { - for (uint bidx = 0; bidx < _phc._cfg._num_blocks; bidx++) { - IndexSet *liveout = _phc._live->live(_phc._cfg._blocks[bidx]); + for (uint bidx = 0; bidx < _phc._cfg.number_of_blocks(); bidx++) { + IndexSet *liveout = _phc._live->live(_phc._cfg.get_block(bidx)); if (liveout->member(lrg)) { liveout->remove(lrg); liveout->insert(compressed_lrg); @@ -239,10 +231,10 @@ // Nodes with index less than '_unique' are original, non-virtual Nodes. _unique = C->unique(); - for( uint i=0; i<_phc._cfg._num_blocks; i++ ) { + for (uint i = 0; i < _phc._cfg.number_of_blocks(); i++) { C->check_node_count(NodeLimitFudgeFactor, "out of nodes in coalesce"); if (C->failing()) return; - Block *b = _phc._cfg._blocks[i]; + Block *b = _phc._cfg.get_block(i); uint cnt = b->num_preds(); // Number of inputs to the Phi for( uint l = 1; l_nodes.size(); l++ ) { @@ -286,7 +278,7 @@ Node *m = n->in(j); uint src_name = _phc._lrg_map.find(m); if (src_name != phi_name) { - Block *pred = _phc._cfg._bbs[b->pred(j)->_idx]; + Block *pred = _phc._cfg.get_block_for_node(b->pred(j)); Node *copy; assert(!m->is_Con() || m->is_Mach(), "all Con must be Mach"); // Rematerialize constants instead of copying them @@ -305,7 +297,7 @@ } // Insert the copy in the use-def chain n->set_req(j, copy); - _phc._cfg._bbs.map( copy->_idx, pred ); + _phc._cfg.map_node_to_block(copy, pred); // Extend ("register allocate") the names array for the copy. _phc._lrg_map.extend(copy->_idx, phi_name); } // End of if Phi names do not match @@ -330,9 +322,7 @@ copy = m->clone(); // Insert the copy in the basic block, just before us b->_nodes.insert(l++, copy); - if(_phc.clone_projs(b, l, m, copy, _phc._lrg_map)) { - l++; - } + l += _phc.clone_projs(b, l, m, copy, _phc._lrg_map); } else { const RegMask *rm = C->matcher()->idealreg2spillmask[m->ideal_reg()]; copy = new (C) MachSpillCopyNode(m, *rm, *rm); @@ -343,13 +333,13 @@ n->set_req(idx, copy); // Extend ("register allocate") the names array for the copy. _phc._lrg_map.extend(copy->_idx, name); - _phc._cfg._bbs.map( copy->_idx, b ); + _phc._cfg.map_node_to_block(copy, b); } } // End of is two-adr // Insert a copy at a debug use for a lrg which has high frequency - if (b->_freq < OPTO_DEBUG_SPLIT_FREQ || b->is_uncommon(_phc._cfg._bbs)) { + if (b->_freq < OPTO_DEBUG_SPLIT_FREQ || b->is_uncommon(&_phc._cfg)) { // Walk the debug inputs to the node and check for lrg freq JVMState* jvms = n->jvms(); uint debug_start = jvms ? jvms->debug_start() : 999999; @@ -391,7 +381,7 @@ uint max_lrg_id = _phc._lrg_map.max_lrg_id(); _phc.new_lrg(copy, max_lrg_id); _phc._lrg_map.set_max_lrg_id(max_lrg_id + 1); - _phc._cfg._bbs.map(copy->_idx, b); + _phc._cfg.map_node_to_block(copy, b); //tty->print_cr("Split a debug use in Aggressive Coalesce"); } // End of if high frequency use/def } // End of for all debug inputs @@ -403,8 +393,7 @@ } // End of for all blocks } -//============================================================================= -//------------------------------coalesce--------------------------------------- + // Aggressive (but pessimistic) copy coalescing of a single block // The following coalesce pass represents a single round of aggressive @@ -437,7 +426,10 @@ Block *bs = b->_succs[i]; // Find index of 'b' in 'bs' predecessors uint j=1; - while( _phc._cfg._bbs[bs->pred(j)->_idx] != b ) j++; + while (_phc._cfg.get_block_for_node(bs->pred(j)) != b) { + j++; + } + // Visit all the Phis in successor block for( uint k = 1; k_nodes.size(); k++ ) { Node *n = bs->_nodes[k]; @@ -461,20 +453,16 @@ } // End of for all instructions in block } -//============================================================================= -//------------------------------PhaseConservativeCoalesce---------------------- PhaseConservativeCoalesce::PhaseConservativeCoalesce(PhaseChaitin &chaitin) : PhaseCoalesce(chaitin) { _ulr.initialize(_phc._lrg_map.max_lrg_id()); } -//------------------------------verify----------------------------------------- void PhaseConservativeCoalesce::verify() { #ifdef ASSERT _phc.set_was_low(); #endif } -//------------------------------union_helper----------------------------------- void PhaseConservativeCoalesce::union_helper( Node *lr1_node, Node *lr2_node, uint lr1, uint lr2, Node *src_def, Node *dst_copy, Node *src_copy, Block *b, uint bindex ) { // Join live ranges. Merge larger into smaller. Union lr2 into lr1 in the // union-find tree @@ -510,14 +498,13 @@ if( bindex < b->_fhrp_index ) b->_fhrp_index--; // Stretched lr1; add it to liveness of intermediate blocks - Block *b2 = _phc._cfg._bbs[src_copy->_idx]; + Block *b2 = _phc._cfg.get_block_for_node(src_copy); while( b != b2 ) { - b = _phc._cfg._bbs[b->pred(1)->_idx]; + b = _phc._cfg.get_block_for_node(b->pred(1)); _phc._live->live(b)->insert(lr1); } } -//------------------------------compute_separating_interferences--------------- // Factored code from copy_copy that computes extra interferences from // lengthening a live range by double-coalescing. uint PhaseConservativeCoalesce::compute_separating_interferences(Node *dst_copy, Node *src_copy, Block *b, uint bindex, RegMask &rm, uint reg_degree, uint rm_size, uint lr1, uint lr2 ) { @@ -532,7 +519,7 @@ bindex2--; // Chain backwards 1 instruction while( bindex2 == 0 ) { // At block start, find prior block assert( b2->num_preds() == 2, "cannot double coalesce across c-flow" ); - b2 = _phc._cfg._bbs[b2->pred(1)->_idx]; + b2 = _phc._cfg.get_block_for_node(b2->pred(1)); bindex2 = b2->end_idx()-1; } // Get prior instruction @@ -583,7 +570,6 @@ return reg_degree; } -//------------------------------update_ifg------------------------------------- void PhaseConservativeCoalesce::update_ifg(uint lr1, uint lr2, IndexSet *n_lr1, IndexSet *n_lr2) { // Some original neighbors of lr1 might have gone away // because the constrained register mask prevented them. @@ -613,7 +599,6 @@ lrgs(neighbor).inc_degree( lrg1.compute_degree(lrgs(neighbor)) ); } -//------------------------------record_bias------------------------------------ static void record_bias( const PhaseIFG *ifg, int lr1, int lr2 ) { // Tag copy bias here if( !ifg->lrgs(lr1)._copy_bias ) @@ -622,7 +607,6 @@ ifg->lrgs(lr2)._copy_bias = lr1; } -//------------------------------copy_copy-------------------------------------- // See if I can coalesce a series of multiple copies together. I need the // final dest copy and the original src copy. They can be the same Node. // Compute the compatible register masks. @@ -676,8 +660,8 @@ if (UseFPUForSpilling && rm.is_AllStack() ) { // Don't coalesce when frequency difference is large - Block *dst_b = _phc._cfg._bbs[dst_copy->_idx]; - Block *src_def_b = _phc._cfg._bbs[src_def->_idx]; + Block *dst_b = _phc._cfg.get_block_for_node(dst_copy); + Block *src_def_b = _phc._cfg.get_block_for_node(src_def); if (src_def_b->_freq > 10*dst_b->_freq ) return false; } @@ -690,7 +674,7 @@ // Another early bail-out test is when we are double-coalescing and the // 2 copies are separated by some control flow. if( dst_copy != src_copy ) { - Block *src_b = _phc._cfg._bbs[src_copy->_idx]; + Block *src_b = _phc._cfg.get_block_for_node(src_copy); Block *b2 = b; while( b2 != src_b ) { if( b2->num_preds() > 2 ){// Found merge-point @@ -701,7 +685,7 @@ //record_bias( _phc._lrgs, lr1, lr2 ); return false; // To hard to find all interferences } - b2 = _phc._cfg._bbs[b2->pred(1)->_idx]; + b2 = _phc._cfg.get_block_for_node(b2->pred(1)); } } @@ -782,12 +766,12 @@ return true; } -//------------------------------coalesce--------------------------------------- // Conservative (but pessimistic) copy coalescing of a single block void PhaseConservativeCoalesce::coalesce( Block *b ) { // Bail out on infrequent blocks - if( b->is_uncommon(_phc._cfg._bbs) ) + if (b->is_uncommon(&_phc._cfg)) { return; + } // Check this block for copies. for( uint i = 1; iend_idx(); i++ ) { // Check for actual copies on inputs. Coalesce a copy into its diff -r bdd155477289 -r e2722a66aba7 src/share/vm/opto/compile.cpp --- a/src/share/vm/opto/compile.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/opto/compile.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -2139,7 +2139,9 @@ //------------------------------Code_Gen--------------------------------------- // Given a graph, generate code for it void Compile::Code_Gen() { - if (failing()) return; + if (failing()) { + return; + } // Perform instruction selection. You might think we could reclaim Matcher // memory PDQ, but actually the Matcher is used in generating spill code. @@ -2151,12 +2153,11 @@ // nodes. Mapping is only valid at the root of each matched subtree. NOT_PRODUCT( verify_graph_edges(); ) - Node_List proj_list; - Matcher m(proj_list); - _matcher = &m; + Matcher matcher; + _matcher = &matcher; { TracePhase t2("matcher", &_t_matcher, true); - m.match(); + matcher.match(); } // In debug mode can dump m._nodes.dump() for mapping of ideal to machine // nodes. Mapping is only valid at the root of each matched subtree. @@ -2164,31 +2165,26 @@ // If you have too many nodes, or if matching has failed, bail out check_node_count(0, "out of nodes matching instructions"); - if (failing()) return; + if (failing()) { + return; + } // Build a proper-looking CFG - PhaseCFG cfg(node_arena(), root(), m); + PhaseCFG cfg(node_arena(), root(), matcher); _cfg = &cfg; { NOT_PRODUCT( TracePhase t2("scheduler", &_t_scheduler, TimeCompiler); ) - cfg.Dominators(); - if (failing()) return; - - NOT_PRODUCT( verify_graph_edges(); ) - - cfg.Estimate_Block_Frequency(); - cfg.GlobalCodeMotion(m,unique(),proj_list); - if (failing()) return; + bool success = cfg.do_global_code_motion(); + if (!success) { + return; + } print_method(PHASE_GLOBAL_CODE_MOTION, 2); - NOT_PRODUCT( verify_graph_edges(); ) - debug_only( cfg.verify(); ) } - NOT_PRODUCT( verify_graph_edges(); ) - - PhaseChaitin regalloc(unique(), cfg, m); + + PhaseChaitin regalloc(unique(), cfg, matcher); _regalloc = ®alloc; { TracePhase t2("regalloc", &_t_registerAllocation, true); @@ -2209,7 +2205,7 @@ // can now safely remove it. { NOT_PRODUCT( TracePhase t2("blockOrdering", &_t_blockOrdering, TimeCompiler); ) - cfg.remove_empty(); + cfg.remove_empty_blocks(); if (do_freq_based_layout()) { PhaseBlockLayout layout(cfg); } else { @@ -2256,38 +2252,50 @@ _regalloc->dump_frame(); Node *n = NULL; - for( uint i=0; i<_cfg->_num_blocks; i++ ) { - if (VMThread::should_terminate()) { cut_short = true; break; } - Block *b = _cfg->_blocks[i]; - if (b->is_connector() && !Verbose) continue; - n = b->_nodes[0]; - if (pcs && n->_idx < pc_limit) + for (uint i = 0; i < _cfg->number_of_blocks(); i++) { + if (VMThread::should_terminate()) { + cut_short = true; + break; + } + Block* block = _cfg->get_block(i); + if (block->is_connector() && !Verbose) { + continue; + } + n = block->_nodes[0]; + if (pcs && n->_idx < pc_limit) { tty->print("%3.3x ", pcs[n->_idx]); - else + } else { tty->print(" "); - b->dump_head( &_cfg->_bbs ); - if (b->is_connector()) { + } + block->dump_head(_cfg); + if (block->is_connector()) { tty->print_cr(" # Empty connector block"); - } else if (b->num_preds() == 2 && b->pred(1)->is_CatchProj() && b->pred(1)->as_CatchProj()->_con == CatchProjNode::fall_through_index) { + } else if (block->num_preds() == 2 && block->pred(1)->is_CatchProj() && block->pred(1)->as_CatchProj()->_con == CatchProjNode::fall_through_index) { tty->print_cr(" # Block is sole successor of call"); } // For all instructions Node *delay = NULL; - for( uint j = 0; j_nodes.size(); j++ ) { - if (VMThread::should_terminate()) { cut_short = true; break; } - n = b->_nodes[j]; + for (uint j = 0; j < block->_nodes.size(); j++) { + if (VMThread::should_terminate()) { + cut_short = true; + break; + } + n = block->_nodes[j]; if (valid_bundle_info(n)) { - Bundle *bundle = node_bundling(n); + Bundle* bundle = node_bundling(n); if (bundle->used_in_unconditional_delay()) { delay = n; continue; } - if (bundle->starts_bundle()) + if (bundle->starts_bundle()) { starts_bundle = '+'; + } } - if (WizardMode) n->dump(); + if (WizardMode) { + n->dump(); + } if( !n->is_Region() && // Dont print in the Assembly !n->is_Phi() && // a few noisely useless nodes @@ -3528,7 +3536,7 @@ } Compile::Constant Compile::ConstantTable::add(MachConstantNode* n, BasicType type, jvalue value) { - Block* b = Compile::current()->cfg()->_bbs[n->_idx]; + Block* b = Compile::current()->cfg()->get_block_for_node(n); Constant con(type, value, b->_freq); add(con); return con; diff -r bdd155477289 -r e2722a66aba7 src/share/vm/opto/domgraph.cpp --- a/src/share/vm/opto/domgraph.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/opto/domgraph.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -32,9 +32,6 @@ // Portions of code courtesy of Clifford Click -// Optimization - Graph Style - -//------------------------------Tarjan----------------------------------------- // A data structure that holds all the information needed to find dominators. struct Tarjan { Block *_block; // Basic block for this info @@ -60,23 +57,21 @@ }; -//------------------------------Dominator-------------------------------------- // Compute the dominator tree of the CFG. The CFG must already have been // constructed. This is the Lengauer & Tarjan O(E-alpha(E,V)) algorithm. -void PhaseCFG::Dominators( ) { +void PhaseCFG::build_dominator_tree() { // Pre-grow the blocks array, prior to the ResourceMark kicking in - _blocks.map(_num_blocks,0); + _blocks.map(number_of_blocks(), 0); ResourceMark rm; // Setup mappings from my Graph to Tarjan's stuff and back // Note: Tarjan uses 1-based arrays - Tarjan *tarjan = NEW_RESOURCE_ARRAY(Tarjan,_num_blocks+1); + Tarjan* tarjan = NEW_RESOURCE_ARRAY(Tarjan, number_of_blocks() + 1); // Tarjan's algorithm, almost verbatim: // Step 1: - _rpo_ctr = _num_blocks; - uint dfsnum = DFS( tarjan ); - if( dfsnum-1 != _num_blocks ) {// Check for unreachable loops! + uint dfsnum = do_DFS(tarjan, number_of_blocks()); + if (dfsnum - 1 != number_of_blocks()) { // Check for unreachable loops! // If the returned dfsnum does not match the number of blocks, then we // must have some unreachable loops. These can be made at any time by // IterGVN. They are cleaned up by CCP or the loop opts, but the last @@ -93,20 +88,19 @@ C->record_method_not_compilable("unreachable loop"); return; } - _blocks._cnt = _num_blocks; + _blocks._cnt = number_of_blocks(); // Tarjan is using 1-based arrays, so these are some initialize flags tarjan[0]._size = tarjan[0]._semi = 0; tarjan[0]._label = &tarjan[0]; - uint i; - for( i=_num_blocks; i>=2; i-- ) { // For all vertices in DFS order + for (uint i = number_of_blocks(); i >= 2; i--) { // For all vertices in DFS order Tarjan *w = &tarjan[i]; // Get vertex from DFS // Step 2: Node *whead = w->_block->head(); - for( uint j=1; j < whead->req(); j++ ) { - Block *b = _bbs[whead->in(j)->_idx]; + for (uint j = 1; j < whead->req(); j++) { + Block* b = get_block_for_node(whead->in(j)); Tarjan *vx = &tarjan[b->_pre_order]; Tarjan *u = vx->EVAL(); if( u->_semi < w->_semi ) @@ -130,19 +124,19 @@ } // Step 4: - for( i=2; i <= _num_blocks; i++ ) { + for (uint i = 2; i <= number_of_blocks(); i++) { Tarjan *w = &tarjan[i]; if( w->_dom != &tarjan[w->_semi] ) w->_dom = w->_dom->_dom; w->_dom_next = w->_dom_child = NULL; // Initialize for building tree later } // No immediate dominator for the root - Tarjan *w = &tarjan[_broot->_pre_order]; + Tarjan *w = &tarjan[get_root_block()->_pre_order]; w->_dom = NULL; w->_dom_next = w->_dom_child = NULL; // Initialize for building tree later // Convert the dominator tree array into my kind of graph - for( i=1; i<=_num_blocks;i++){// For all Tarjan vertices + for(uint i = 1; i <= number_of_blocks(); i++){ // For all Tarjan vertices Tarjan *t = &tarjan[i]; // Handy access Tarjan *tdom = t->_dom; // Handy access to immediate dominator if( tdom ) { // Root has no immediate dominator @@ -152,11 +146,10 @@ } else t->_block->_idom = NULL; // Root } - w->setdepth( _num_blocks+1 ); // Set depth in dominator tree + w->setdepth(number_of_blocks() + 1); // Set depth in dominator tree } -//----------------------------Block_Stack-------------------------------------- class Block_Stack { private: struct Block_Descr { @@ -214,7 +207,6 @@ } }; -//-------------------------most_frequent_successor----------------------------- // Find the index into the b->succs[] array of the most frequent successor. uint Block_Stack::most_frequent_successor( Block *b ) { uint freq_idx = 0; @@ -258,40 +250,38 @@ return freq_idx; } -//------------------------------DFS-------------------------------------------- // Perform DFS search. Setup 'vertex' as DFS to vertex mapping. Setup // 'semi' as vertex to DFS mapping. Set 'parent' to DFS parent. -uint PhaseCFG::DFS( Tarjan *tarjan ) { - Block *b = _broot; +uint PhaseCFG::do_DFS(Tarjan *tarjan, uint rpo_counter) { + Block* root_block = get_root_block(); uint pre_order = 1; - // Allocate stack of size _num_blocks+1 to avoid frequent realloc - Block_Stack bstack(tarjan, _num_blocks+1); + // Allocate stack of size number_of_blocks() + 1 to avoid frequent realloc + Block_Stack bstack(tarjan, number_of_blocks() + 1); // Push on stack the state for the first block - bstack.push(pre_order, b); + bstack.push(pre_order, root_block); ++pre_order; while (bstack.is_nonempty()) { if (!bstack.last_successor()) { // Walk over all successors in pre-order (DFS). - Block *s = bstack.next_successor(); - if (s->_pre_order == 0) { // Check for no-pre-order, not-visited + Block* next_block = bstack.next_successor(); + if (next_block->_pre_order == 0) { // Check for no-pre-order, not-visited // Push on stack the state of successor - bstack.push(pre_order, s); + bstack.push(pre_order, next_block); ++pre_order; } } else { // Build a reverse post-order in the CFG _blocks array Block *stack_top = bstack.pop(); - stack_top->_rpo = --_rpo_ctr; + stack_top->_rpo = --rpo_counter; _blocks.map(stack_top->_rpo, stack_top); } } return pre_order; } -//------------------------------COMPRESS--------------------------------------- void Tarjan::COMPRESS() { assert( _ancestor != 0, "" ); @@ -303,14 +293,12 @@ } } -//------------------------------EVAL------------------------------------------- Tarjan *Tarjan::EVAL() { if( !_ancestor ) return _label; COMPRESS(); return (_ancestor->_label->_semi >= _label->_semi) ? _label : _ancestor->_label; } -//------------------------------LINK------------------------------------------- void Tarjan::LINK( Tarjan *w, Tarjan *tarjan0 ) { Tarjan *s = w; while( w->_label->_semi < s->_child->_label->_semi ) { @@ -333,7 +321,6 @@ } } -//------------------------------setdepth--------------------------------------- void Tarjan::setdepth( uint stack_size ) { Tarjan **top = NEW_RESOURCE_ARRAY(Tarjan*, stack_size); Tarjan **next = top; @@ -362,8 +349,7 @@ } while (last < top); } -//*********************** DOMINATORS ON THE SEA OF NODES*********************** -//------------------------------NTarjan---------------------------------------- +// Compute dominators on the Sea of Nodes form // A data structure that holds all the information needed to find dominators. struct NTarjan { Node *_control; // Control node associated with this info @@ -396,7 +382,6 @@ #endif }; -//------------------------------Dominator-------------------------------------- // Compute the dominator tree of the sea of nodes. This version walks all CFG // nodes (using the is_CFG() call) and places them in a dominator tree. Thus, // it needs a count of the CFG nodes for the mapping table. This is the @@ -517,7 +502,6 @@ } } -//------------------------------DFS-------------------------------------------- // Perform DFS search. Setup 'vertex' as DFS to vertex mapping. Setup // 'semi' as vertex to DFS mapping. Set 'parent' to DFS parent. int NTarjan::DFS( NTarjan *ntarjan, VectorSet &visited, PhaseIdealLoop *pil, uint *dfsorder) { @@ -560,7 +544,6 @@ return dfsnum; } -//------------------------------COMPRESS--------------------------------------- void NTarjan::COMPRESS() { assert( _ancestor != 0, "" ); @@ -572,14 +555,12 @@ } } -//------------------------------EVAL------------------------------------------- NTarjan *NTarjan::EVAL() { if( !_ancestor ) return _label; COMPRESS(); return (_ancestor->_label->_semi >= _label->_semi) ? _label : _ancestor->_label; } -//------------------------------LINK------------------------------------------- void NTarjan::LINK( NTarjan *w, NTarjan *ntarjan0 ) { NTarjan *s = w; while( w->_label->_semi < s->_child->_label->_semi ) { @@ -602,7 +583,6 @@ } } -//------------------------------setdepth--------------------------------------- void NTarjan::setdepth( uint stack_size, uint *dom_depth ) { NTarjan **top = NEW_RESOURCE_ARRAY(NTarjan*, stack_size); NTarjan **next = top; @@ -631,7 +611,6 @@ } while (last < top); } -//------------------------------dump------------------------------------------- #ifndef PRODUCT void NTarjan::dump(int offset) const { // Dump the data from this node diff -r bdd155477289 -r e2722a66aba7 src/share/vm/opto/gcm.cpp --- a/src/share/vm/opto/gcm.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/opto/gcm.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -70,7 +70,7 @@ // are in b also. void PhaseCFG::schedule_node_into_block( Node *n, Block *b ) { // Set basic block of n, Add n to b, - _bbs.map(n->_idx, b); + map_node_to_block(n, b); b->add_inst(n); // After Matching, nearly any old Node may have projections trailing it. @@ -79,11 +79,12 @@ for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { Node* use = n->fast_out(i); if (use->is_Proj()) { - Block* buse = _bbs[use->_idx]; + Block* buse = get_block_for_node(use); if (buse != b) { // In wrong block? - if (buse != NULL) + if (buse != NULL) { buse->find_remove(use); // Remove from wrong block - _bbs.map(use->_idx, b); // Re-insert in this block + } + map_node_to_block(use, b); b->add_inst(use); } } @@ -101,7 +102,7 @@ if (p != NULL && p != n) { // Control from a block projection? assert(!n->pinned() || n->is_MachConstantBase(), "only pinned MachConstantBase node is expected here"); // Find trailing Region - Block *pb = _bbs[in0->_idx]; // Block-projection already has basic block + Block *pb = get_block_for_node(in0); // Block-projection already has basic block uint j = 0; if (pb->_num_succs != 1) { // More then 1 successor? // Search for successor @@ -124,26 +125,30 @@ //------------------------------schedule_pinned_nodes-------------------------- // Set the basic block for Nodes pinned into blocks -void PhaseCFG::schedule_pinned_nodes( VectorSet &visited ) { +void PhaseCFG::schedule_pinned_nodes(VectorSet &visited) { // Allocate node stack of size C->unique()+8 to avoid frequent realloc - GrowableArray spstack(C->unique()+8); + GrowableArray spstack(C->unique() + 8); spstack.push(_root); - while ( spstack.is_nonempty() ) { - Node *n = spstack.pop(); - if( !visited.test_set(n->_idx) ) { // Test node and flag it as visited - if( n->pinned() && !_bbs.lookup(n->_idx) ) { // Pinned? Nail it down! - assert( n->in(0), "pinned Node must have Control" ); + while (spstack.is_nonempty()) { + Node* node = spstack.pop(); + if (!visited.test_set(node->_idx)) { // Test node and flag it as visited + if (node->pinned() && !has_block(node)) { // Pinned? Nail it down! + assert(node->in(0), "pinned Node must have Control"); // Before setting block replace block_proj control edge - replace_block_proj_ctrl(n); - Node *input = n->in(0); - while( !input->is_block_start() ) + replace_block_proj_ctrl(node); + Node* input = node->in(0); + while (!input->is_block_start()) { input = input->in(0); - Block *b = _bbs[input->_idx]; // Basic block of controlling input - schedule_node_into_block(n, b); + } + Block* block = get_block_for_node(input); // Basic block of controlling input + schedule_node_into_block(node, block); } - for( int i = n->req() - 1; i >= 0; --i ) { // For all inputs - if( n->in(i) != NULL ) - spstack.push(n->in(i)); + + // process all inputs that are non NULL + for (int i = node->req() - 1; i >= 0; --i) { + if (node->in(i) != NULL) { + spstack.push(node->in(i)); + } } } } @@ -153,7 +158,7 @@ // Assert that new input b2 is dominated by all previous inputs. // Check this by by seeing that it is dominated by b1, the deepest // input observed until b2. -static void assert_dom(Block* b1, Block* b2, Node* n, Block_Array &bbs) { +static void assert_dom(Block* b1, Block* b2, Node* n, const PhaseCFG* cfg) { if (b1 == NULL) return; assert(b1->_dom_depth < b2->_dom_depth, "sanity"); Block* tmp = b2; @@ -166,7 +171,7 @@ for (uint j=0; jlen(); j++) { // For all inputs Node* inn = n->in(j); // Get input if (inn == NULL) continue; // Ignore NULL, missing inputs - Block* inb = bbs[inn->_idx]; + Block* inb = cfg->get_block_for_node(inn); tty->print("B%d idom=B%d depth=%2d ",inb->_pre_order, inb->_idom ? inb->_idom->_pre_order : 0, inb->_dom_depth); inn->dump(); @@ -178,20 +183,20 @@ } #endif -static Block* find_deepest_input(Node* n, Block_Array &bbs) { +static Block* find_deepest_input(Node* n, const PhaseCFG* cfg) { // Find the last input dominated by all other inputs. Block* deepb = NULL; // Deepest block so far int deepb_dom_depth = 0; for (uint k = 0; k < n->len(); k++) { // For all inputs Node* inn = n->in(k); // Get input if (inn == NULL) continue; // Ignore NULL, missing inputs - Block* inb = bbs[inn->_idx]; + Block* inb = cfg->get_block_for_node(inn); assert(inb != NULL, "must already have scheduled this input"); if (deepb_dom_depth < (int) inb->_dom_depth) { // The new inb must be dominated by the previous deepb. // The various inputs must be linearly ordered in the dom // tree, or else there will not be a unique deepest block. - DEBUG_ONLY(assert_dom(deepb, inb, n, bbs)); + DEBUG_ONLY(assert_dom(deepb, inb, n, cfg)); deepb = inb; // Save deepest block deepb_dom_depth = deepb->_dom_depth; } @@ -207,32 +212,29 @@ // which all their inputs occur. bool PhaseCFG::schedule_early(VectorSet &visited, Node_List &roots) { // Allocate stack with enough space to avoid frequent realloc - Node_Stack nstack(roots.Size() + 8); // (unique >> 1) + 24 from Java2D stats - // roots.push(_root); _root will be processed among C->top() inputs + Node_Stack nstack(roots.Size() + 8); + // _root will be processed among C->top() inputs roots.push(C->top()); visited.set(C->top()->_idx); while (roots.size() != 0) { // Use local variables nstack_top_n & nstack_top_i to cache values // on stack's top. - Node *nstack_top_n = roots.pop(); - uint nstack_top_i = 0; -//while_nstack_nonempty: + Node* parent_node = roots.pop(); + uint input_index = 0; + while (true) { - // Get parent node and next input's index from stack's top. - Node *n = nstack_top_n; - uint i = nstack_top_i; - - if (i == 0) { + if (input_index == 0) { // Fixup some control. Constants without control get attached // to root and nodes that use is_block_proj() nodes should be attached // to the region that starts their block. - const Node *in0 = n->in(0); - if (in0 != NULL) { // Control-dependent? - replace_block_proj_ctrl(n); - } else { // n->in(0) == NULL - if (n->req() == 1) { // This guy is a constant with NO inputs? - n->set_req(0, _root); + const Node* control_input = parent_node->in(0); + if (control_input != NULL) { + replace_block_proj_ctrl(parent_node); + } else { + // Is a constant with NO inputs? + if (parent_node->req() == 1) { + parent_node->set_req(0, _root); } } } @@ -241,37 +243,47 @@ // input is already in a block we quit following inputs (to avoid // cycles). Instead we put that Node on a worklist to be handled // later (since IT'S inputs may not have a block yet). - bool done = true; // Assume all n's inputs will be processed - while (i < n->len()) { // For all inputs - Node *in = n->in(i); // Get input - ++i; - if (in == NULL) continue; // Ignore NULL, missing inputs + + // Assume all n's inputs will be processed + bool done = true; + + while (input_index < parent_node->len()) { + Node* in = parent_node->in(input_index++); + if (in == NULL) { + continue; + } + int is_visited = visited.test_set(in->_idx); - if (!_bbs.lookup(in->_idx)) { // Missing block selection? + if (!has_block(in)) { if (is_visited) { - // assert( !visited.test(in->_idx), "did not schedule early" ); return false; } - nstack.push(n, i); // Save parent node and next input's index. - nstack_top_n = in; // Process current input now. - nstack_top_i = 0; - done = false; // Not all n's inputs processed. - break; // continue while_nstack_nonempty; - } else if (!is_visited) { // Input not yet visited? - roots.push(in); // Visit this guy later, using worklist + // Save parent node and next input's index. + nstack.push(parent_node, input_index); + // Process current input now. + parent_node = in; + input_index = 0; + // Not all n's inputs processed. + done = false; + break; + } else if (!is_visited) { + // Visit this guy later, using worklist + roots.push(in); } } + if (done) { // All of n's inputs have been processed, complete post-processing. // Some instructions are pinned into a block. These include Region, // Phi, Start, Return, and other control-dependent instructions and // any projections which depend on them. - if (!n->pinned()) { + if (!parent_node->pinned()) { // Set earliest legal block. - _bbs.map(n->_idx, find_deepest_input(n, _bbs)); + Block* earliest_block = find_deepest_input(parent_node, this); + map_node_to_block(parent_node, earliest_block); } else { - assert(_bbs[n->_idx] == _bbs[n->in(0)->_idx], "Pinned Node should be at the same block as its control edge"); + assert(get_block_for_node(parent_node) == get_block_for_node(parent_node->in(0)), "Pinned Node should be at the same block as its control edge"); } if (nstack.is_empty()) { @@ -280,12 +292,12 @@ break; } // Get saved parent node and next input's index. - nstack_top_n = nstack.node(); - nstack_top_i = nstack.index(); + parent_node = nstack.node(); + input_index = nstack.index(); nstack.pop(); - } // if (done) - } // while (true) - } // while (roots.size() != 0) + } + } + } return true; } @@ -317,8 +329,8 @@ // The definition must dominate the use, so move the LCA upward in the // dominator tree to dominate the use. If the use is a phi, adjust // the LCA only with the phi input paths which actually use this def. -static Block* raise_LCA_above_use(Block* LCA, Node* use, Node* def, Block_Array &bbs) { - Block* buse = bbs[use->_idx]; +static Block* raise_LCA_above_use(Block* LCA, Node* use, Node* def, const PhaseCFG* cfg) { + Block* buse = cfg->get_block_for_node(use); if (buse == NULL) return LCA; // Unused killing Projs have no use block if (!use->is_Phi()) return buse->dom_lca(LCA); uint pmax = use->req(); // Number of Phi inputs @@ -333,7 +345,7 @@ // more than once. for (uint j=1; jin(j) == def) { // Found matching input? - Block* pred = bbs[buse->pred(j)->_idx]; + Block* pred = cfg->get_block_for_node(buse->pred(j)); LCA = pred->dom_lca(LCA); } } @@ -346,8 +358,7 @@ // which are marked with the given index. Return the LCA (in the dom tree) // of all marked blocks. If there are none marked, return the original // LCA. -static Block* raise_LCA_above_marks(Block* LCA, node_idx_t mark, - Block* early, Block_Array &bbs) { +static Block* raise_LCA_above_marks(Block* LCA, node_idx_t mark, Block* early, const PhaseCFG* cfg) { Block_List worklist; worklist.push(LCA); while (worklist.size() > 0) { @@ -370,7 +381,7 @@ } else { // Keep searching through this block's predecessors. for (uint j = 1, jmax = mid->num_preds(); j < jmax; j++) { - Block* mid_parent = bbs[ mid->pred(j)->_idx ]; + Block* mid_parent = cfg->get_block_for_node(mid->pred(j)); worklist.push(mid_parent); } } @@ -388,7 +399,7 @@ // be earlier (at a shallower dom_depth) than the true schedule_early // point of the node. We compute this earlier block as a more permissive // site for anti-dependency insertion, but only if subsume_loads is enabled. -static Block* memory_early_block(Node* load, Block* early, Block_Array &bbs) { +static Block* memory_early_block(Node* load, Block* early, const PhaseCFG* cfg) { Node* base; Node* index; Node* store = load->in(MemNode::Memory); @@ -416,12 +427,12 @@ Block* deepb = NULL; // Deepest block so far int deepb_dom_depth = 0; for (int i = 0; i < mem_inputs_length; i++) { - Block* inb = bbs[mem_inputs[i]->_idx]; + Block* inb = cfg->get_block_for_node(mem_inputs[i]); if (deepb_dom_depth < (int) inb->_dom_depth) { // The new inb must be dominated by the previous deepb. // The various inputs must be linearly ordered in the dom // tree, or else there will not be a unique deepest block. - DEBUG_ONLY(assert_dom(deepb, inb, load, bbs)); + DEBUG_ONLY(assert_dom(deepb, inb, load, cfg)); deepb = inb; // Save deepest block deepb_dom_depth = deepb->_dom_depth; } @@ -492,14 +503,14 @@ // and other inputs are first available. (Computed by schedule_early.) // For normal loads, 'early' is the shallowest place (dom graph wise) // to look for anti-deps between this load and any store. - Block* early = _bbs[load_index]; + Block* early = get_block_for_node(load); // If we are subsuming loads, compute an "early" block that only considers // memory or address inputs. This block may be different than the // schedule_early block in that it could be at an even shallower depth in the // dominator tree, and allow for a broader discovery of anti-dependences. if (C->subsume_loads()) { - early = memory_early_block(load, early, _bbs); + early = memory_early_block(load, early, this); } ResourceArea *area = Thread::current()->resource_area(); @@ -623,7 +634,7 @@ // or else observe that 'store' is all the way up in the // earliest legal block for 'load'. In the latter case, // immediately insert an anti-dependence edge. - Block* store_block = _bbs[store->_idx]; + Block* store_block = get_block_for_node(store); assert(store_block != NULL, "unused killing projections skipped above"); if (store->is_Phi()) { @@ -641,7 +652,7 @@ for (uint j = PhiNode::Input, jmax = store->req(); j < jmax; j++) { if (store->in(j) == mem) { // Found matching input? DEBUG_ONLY(found_match = true); - Block* pred_block = _bbs[store_block->pred(j)->_idx]; + Block* pred_block = get_block_for_node(store_block->pred(j)); if (pred_block != early) { // If any predecessor of the Phi matches the load's "early block", // we do not need a precedence edge between the Phi and 'load' @@ -715,7 +726,7 @@ // preventing the load from sinking past any block containing // a store that may invalidate the memory state required by 'load'. if (must_raise_LCA) - LCA = raise_LCA_above_marks(LCA, load->_idx, early, _bbs); + LCA = raise_LCA_above_marks(LCA, load->_idx, early, this); if (LCA == early) return LCA; // Insert anti-dependence edges from 'load' to each store @@ -724,7 +735,7 @@ if (LCA->raise_LCA_mark() == load_index) { while (non_early_stores.size() > 0) { Node* store = non_early_stores.pop(); - Block* store_block = _bbs[store->_idx]; + Block* store_block = get_block_for_node(store); if (store_block == LCA) { // add anti_dependence from store to load in its own block assert(store != load->in(0), "dependence cycle found"); @@ -758,7 +769,7 @@ public: // Constructor for the iterator - Node_Backward_Iterator(Node *root, VectorSet &visited, Node_List &stack, Block_Array &bbs); + Node_Backward_Iterator(Node *root, VectorSet &visited, Node_List &stack, PhaseCFG &cfg); // Postincrement operator to iterate over the nodes Node *next(); @@ -766,12 +777,12 @@ private: VectorSet &_visited; Node_List &_stack; - Block_Array &_bbs; + PhaseCFG &_cfg; }; // Constructor for the Node_Backward_Iterator -Node_Backward_Iterator::Node_Backward_Iterator( Node *root, VectorSet &visited, Node_List &stack, Block_Array &bbs ) - : _visited(visited), _stack(stack), _bbs(bbs) { +Node_Backward_Iterator::Node_Backward_Iterator( Node *root, VectorSet &visited, Node_List &stack, PhaseCFG &cfg) + : _visited(visited), _stack(stack), _cfg(cfg) { // The stack should contain exactly the root stack.clear(); stack.push(root); @@ -801,8 +812,8 @@ _visited.set(self->_idx); // Now schedule all uses as late as possible. - uint src = self->is_Proj() ? self->in(0)->_idx : self->_idx; - uint src_rpo = _bbs[src]->_rpo; + const Node* src = self->is_Proj() ? self->in(0) : self; + uint src_rpo = _cfg.get_block_for_node(src)->_rpo; // Schedule all nodes in a post-order visit Node *unvisited = NULL; // Unvisited anti-dependent Node, if any @@ -818,7 +829,7 @@ // do not traverse backward control edges Node *use = n->is_Proj() ? n->in(0) : n; - uint use_rpo = _bbs[use->_idx]->_rpo; + uint use_rpo = _cfg.get_block_for_node(use)->_rpo; if ( use_rpo < src_rpo ) continue; @@ -850,13 +861,13 @@ //------------------------------ComputeLatenciesBackwards---------------------- // Compute the latency of all the instructions. -void PhaseCFG::ComputeLatenciesBackwards(VectorSet &visited, Node_List &stack) { +void PhaseCFG::compute_latencies_backwards(VectorSet &visited, Node_List &stack) { #ifndef PRODUCT if (trace_opto_pipelining()) tty->print("\n#---- ComputeLatenciesBackwards ----\n"); #endif - Node_Backward_Iterator iter((Node *)_root, visited, stack, _bbs); + Node_Backward_Iterator iter((Node *)_root, visited, stack, *this); Node *n; // Walk over all the nodes from last to first @@ -873,31 +884,34 @@ // Set the latency for this instruction #ifndef PRODUCT if (trace_opto_pipelining()) { - tty->print("# latency_to_inputs: node_latency[%d] = %d for node", - n->_idx, _node_latency->at_grow(n->_idx)); + tty->print("# latency_to_inputs: node_latency[%d] = %d for node", n->_idx, get_latency_for_node(n)); dump(); } #endif - if (n->is_Proj()) + if (n->is_Proj()) { n = n->in(0); + } - if (n->is_Root()) + if (n->is_Root()) { return; + } uint nlen = n->len(); - uint use_latency = _node_latency->at_grow(n->_idx); - uint use_pre_order = _bbs[n->_idx]->_pre_order; + uint use_latency = get_latency_for_node(n); + uint use_pre_order = get_block_for_node(n)->_pre_order; - for ( uint j=0; jin(j); - if (!def || def == n) + if (!def || def == n) { continue; + } // Walk backwards thru projections - if (def->is_Proj()) + if (def->is_Proj()) { def = def->in(0); + } #ifndef PRODUCT if (trace_opto_pipelining()) { @@ -907,25 +921,23 @@ #endif // If the defining block is not known, assume it is ok - Block *def_block = _bbs[def->_idx]; + Block *def_block = get_block_for_node(def); uint def_pre_order = def_block ? def_block->_pre_order : 0; - if ( (use_pre_order < def_pre_order) || - (use_pre_order == def_pre_order && n->is_Phi()) ) + if ((use_pre_order < def_pre_order) || (use_pre_order == def_pre_order && n->is_Phi())) { continue; + } uint delta_latency = n->latency(j); uint current_latency = delta_latency + use_latency; - if (_node_latency->at_grow(def->_idx) < current_latency) { - _node_latency->at_put_grow(def->_idx, current_latency); + if (get_latency_for_node(def) < current_latency) { + set_latency_for_node(def, current_latency); } #ifndef PRODUCT if (trace_opto_pipelining()) { - tty->print_cr("# %d + edge_latency(%d) == %d -> %d, node_latency[%d] = %d", - use_latency, j, delta_latency, current_latency, def->_idx, - _node_latency->at_grow(def->_idx)); + tty->print_cr("# %d + edge_latency(%d) == %d -> %d, node_latency[%d] = %d", use_latency, j, delta_latency, current_latency, def->_idx, get_latency_for_node(def)); } #endif } @@ -935,10 +947,11 @@ // Compute the latency of a specific use int PhaseCFG::latency_from_use(Node *n, const Node *def, Node *use) { // If self-reference, return no latency - if (use == n || use->is_Root()) + if (use == n || use->is_Root()) { return 0; + } - uint def_pre_order = _bbs[def->_idx]->_pre_order; + uint def_pre_order = get_block_for_node(def)->_pre_order; uint latency = 0; // If the use is not a projection, then it is simple... @@ -950,7 +963,7 @@ } #endif - uint use_pre_order = _bbs[use->_idx]->_pre_order; + uint use_pre_order = get_block_for_node(use)->_pre_order; if (use_pre_order < def_pre_order) return 0; @@ -959,7 +972,7 @@ return 0; uint nlen = use->len(); - uint nl = _node_latency->at_grow(use->_idx); + uint nl = get_latency_for_node(use); for ( uint j=0; jin(j) == n) { @@ -994,8 +1007,7 @@ // Set the latency for this instruction #ifndef PRODUCT if (trace_opto_pipelining()) { - tty->print("# latency_from_outputs: node_latency[%d] = %d for node", - n->_idx, _node_latency->at_grow(n->_idx)); + tty->print("# latency_from_outputs: node_latency[%d] = %d for node", n->_idx, get_latency_for_node(n)); dump(); } #endif @@ -1008,7 +1020,7 @@ if (latency < l) latency = l; } - _node_latency->at_put_grow(n->_idx, latency); + set_latency_for_node(n, latency); } //------------------------------hoist_to_cheaper_block------------------------- @@ -1018,11 +1030,11 @@ const double delta = 1+PROB_UNLIKELY_MAG(4); Block* least = LCA; double least_freq = least->_freq; - uint target = _node_latency->at_grow(self->_idx); - uint start_latency = _node_latency->at_grow(LCA->_nodes[0]->_idx); - uint end_latency = _node_latency->at_grow(LCA->_nodes[LCA->end_idx()]->_idx); + uint target = get_latency_for_node(self); + uint start_latency = get_latency_for_node(LCA->_nodes[0]); + uint end_latency = get_latency_for_node(LCA->_nodes[LCA->end_idx()]); bool in_latency = (target <= start_latency); - const Block* root_block = _bbs[_root->_idx]; + const Block* root_block = get_block_for_node(_root); // Turn off latency scheduling if scheduling is just plain off if (!C->do_scheduling()) @@ -1037,8 +1049,7 @@ #ifndef PRODUCT if (trace_opto_pipelining()) { - tty->print("# Find cheaper block for latency %d: ", - _node_latency->at_grow(self->_idx)); + tty->print("# Find cheaper block for latency %d: ", get_latency_for_node(self)); self->dump(); tty->print_cr("# B%d: start latency for [%4d]=%d, end latency for [%4d]=%d, freq=%g", LCA->_pre_order, @@ -1067,9 +1078,9 @@ if (mach && LCA == root_block) break; - uint start_lat = _node_latency->at_grow(LCA->_nodes[0]->_idx); + uint start_lat = get_latency_for_node(LCA->_nodes[0]); uint end_idx = LCA->end_idx(); - uint end_lat = _node_latency->at_grow(LCA->_nodes[end_idx]->_idx); + uint end_lat = get_latency_for_node(LCA->_nodes[end_idx]); double LCA_freq = LCA->_freq; #ifndef PRODUCT if (trace_opto_pipelining()) { @@ -1111,7 +1122,7 @@ tty->print_cr("# Change latency for [%4d] from %d to %d", self->_idx, target, end_latency); } #endif - _node_latency->at_put_grow(self->_idx, end_latency); + set_latency_for_node(self, end_latency); partial_latency_of_defs(self); } @@ -1130,12 +1141,12 @@ tty->print("\n#---- schedule_late ----\n"); #endif - Node_Backward_Iterator iter((Node *)_root, visited, stack, _bbs); + Node_Backward_Iterator iter((Node *)_root, visited, stack, *this); Node *self; // Walk over all the nodes from last to first while (self = iter.next()) { - Block* early = _bbs[self->_idx]; // Earliest legal placement + Block* early = get_block_for_node(self); // Earliest legal placement if (self->is_top()) { // Top node goes in bb #2 with other constants. @@ -1183,7 +1194,7 @@ for (DUIterator_Fast imax, i = self->fast_outs(imax); i < imax; i++) { // For all uses, find LCA Node* use = self->fast_out(i); - LCA = raise_LCA_above_use(LCA, use, self, _bbs); + LCA = raise_LCA_above_use(LCA, use, self, this); } } // (Hide defs of imax, i from rest of block.) @@ -1191,7 +1202,7 @@ // requirement for correctness but it reduces useless // interference between temps and other nodes. if (mach != NULL && mach->is_MachTemp()) { - _bbs.map(self->_idx, LCA); + map_node_to_block(self, LCA); LCA->add_inst(self); continue; } @@ -1257,7 +1268,7 @@ } // end ScheduleLate //------------------------------GlobalCodeMotion------------------------------- -void PhaseCFG::GlobalCodeMotion( Matcher &matcher, uint unique, Node_List &proj_list ) { +void PhaseCFG::global_code_motion() { ResourceMark rm; #ifndef PRODUCT @@ -1266,22 +1277,23 @@ } #endif - // Initialize the bbs.map for things on the proj_list - uint i; - for( i=0; i < proj_list.size(); i++ ) - _bbs.map(proj_list[i]->_idx, NULL); + // Initialize the node to block mapping for things on the proj_list + for (uint i = 0; i < _matcher.number_of_projections(); i++) { + unmap_node_from_block(_matcher.get_projection(i)); + } // Set the basic block for Nodes pinned into blocks - Arena *a = Thread::current()->resource_area(); - VectorSet visited(a); - schedule_pinned_nodes( visited ); + Arena* arena = Thread::current()->resource_area(); + VectorSet visited(arena); + schedule_pinned_nodes(visited); // Find the earliest Block any instruction can be placed in. Some // instructions are pinned into Blocks. Unpinned instructions can // appear in last block in which all their inputs occur. visited.Clear(); - Node_List stack(a); - stack.map( (unique >> 1) + 16, NULL); // Pre-grow the list + Node_List stack(arena); + // Pre-grow the list + stack.map((C->unique() >> 1) + 16, NULL); if (!schedule_early(visited, stack)) { // Bailout without retry C->record_method_not_compilable("early schedule failed"); @@ -1289,29 +1301,25 @@ } // Build Def-Use edges. - proj_list.push(_root); // Add real root as another root - proj_list.pop(); - // Compute the latency information (via backwards walk) for all the // instructions in the graph _node_latency = new GrowableArray(); // resource_area allocation - if( C->do_scheduling() ) - ComputeLatenciesBackwards(visited, stack); + if (C->do_scheduling()) { + compute_latencies_backwards(visited, stack); + } // Now schedule all codes as LATE as possible. This is the LCA in the // dominator tree of all USES of a value. Pick the block with the least // loop nesting depth that is lowest in the dominator tree. // ( visited.Clear() called in schedule_late()->Node_Backward_Iterator() ) schedule_late(visited, stack); - if( C->failing() ) { + if (C->failing()) { // schedule_late fails only when graph is incorrect. assert(!VerifyGraphEdges, "verification should have failed"); return; } - unique = C->unique(); - #ifndef PRODUCT if (trace_opto_pipelining()) { tty->print("\n---- Detect implicit null checks ----\n"); @@ -1334,10 +1342,11 @@ // By reversing the loop direction we get a very minor gain on mpegaudio. // Feel free to revert to a forward loop for clarity. // for( int i=0; i < (int)matcher._null_check_tests.size(); i+=2 ) { - for( int i= matcher._null_check_tests.size()-2; i>=0; i-=2 ) { - Node *proj = matcher._null_check_tests[i ]; - Node *val = matcher._null_check_tests[i+1]; - _bbs[proj->_idx]->implicit_null_check(this, proj, val, allowed_reasons); + for (int i = _matcher._null_check_tests.size() - 2; i >= 0; i -= 2) { + Node* proj = _matcher._null_check_tests[i]; + Node* val = _matcher._null_check_tests[i + 1]; + Block* block = get_block_for_node(proj); + block->implicit_null_check(this, proj, val, allowed_reasons); // The implicit_null_check will only perform the transformation // if the null branch is truly uncommon, *and* it leads to an // uncommon trap. Combined with the too_many_traps guards @@ -1354,11 +1363,11 @@ // Schedule locally. Right now a simple topological sort. // Later, do a real latency aware scheduler. - uint max_idx = C->unique(); - GrowableArray ready_cnt(max_idx, max_idx, -1); + GrowableArray ready_cnt(C->unique(), C->unique(), -1); visited.Clear(); - for (i = 0; i < _num_blocks; i++) { - if (!_blocks[i]->schedule_local(this, matcher, ready_cnt, visited)) { + for (uint i = 0; i < number_of_blocks(); i++) { + Block* block = get_block(i); + if (!block->schedule_local(this, _matcher, ready_cnt, visited)) { if (!C->failure_reason_is(C2Compiler::retry_no_subsuming_loads())) { C->record_method_not_compilable("local schedule failed"); } @@ -1368,14 +1377,17 @@ // If we inserted any instructions between a Call and his CatchNode, // clone the instructions on all paths below the Catch. - for( i=0; i < _num_blocks; i++ ) - _blocks[i]->call_catch_cleanup(_bbs, C); + for (uint i = 0; i < number_of_blocks(); i++) { + Block* block = get_block(i); + block->call_catch_cleanup(this, C); + } #ifndef PRODUCT if (trace_opto_pipelining()) { tty->print("\n---- After GlobalCodeMotion ----\n"); - for (uint i = 0; i < _num_blocks; i++) { - _blocks[i]->dump(); + for (uint i = 0; i < number_of_blocks(); i++) { + Block* block = get_block(i); + block->dump(); } } #endif @@ -1383,10 +1395,29 @@ _node_latency = (GrowableArray *)0xdeadbeef; } +bool PhaseCFG::do_global_code_motion() { + + build_dominator_tree(); + if (C->failing()) { + return false; + } + + NOT_PRODUCT( C->verify_graph_edges(); ) + + estimate_block_frequency(); + + global_code_motion(); + + if (C->failing()) { + return false; + } + + return true; +} //------------------------------Estimate_Block_Frequency----------------------- // Estimate block frequencies based on IfNode probabilities. -void PhaseCFG::Estimate_Block_Frequency() { +void PhaseCFG::estimate_block_frequency() { // Force conditional branches leading to uncommon traps to be unlikely, // not because we get to the uncommon_trap with less relative frequency, @@ -1394,18 +1425,20 @@ // there once. if (C->do_freq_based_layout()) { Block_List worklist; - Block* root_blk = _blocks[0]; + Block* root_blk = get_block(0); for (uint i = 1; i < root_blk->num_preds(); i++) { - Block *pb = _bbs[root_blk->pred(i)->_idx]; + Block *pb = get_block_for_node(root_blk->pred(i)); if (pb->has_uncommon_code()) { worklist.push(pb); } } while (worklist.size() > 0) { Block* uct = worklist.pop(); - if (uct == _broot) continue; + if (uct == get_root_block()) { + continue; + } for (uint i = 1; i < uct->num_preds(); i++) { - Block *pb = _bbs[uct->pred(i)->_idx]; + Block *pb = get_block_for_node(uct->pred(i)); if (pb->_num_succs == 1) { worklist.push(pb); } else if (pb->num_fall_throughs() == 2) { @@ -1427,14 +1460,14 @@ _root_loop->scale_freq(); // Save outmost loop frequency for LRG frequency threshold - _outer_loop_freq = _root_loop->outer_loop_freq(); + _outer_loop_frequency = _root_loop->outer_loop_freq(); // force paths ending at uncommon traps to be infrequent if (!C->do_freq_based_layout()) { Block_List worklist; - Block* root_blk = _blocks[0]; + Block* root_blk = get_block(0); for (uint i = 1; i < root_blk->num_preds(); i++) { - Block *pb = _bbs[root_blk->pred(i)->_idx]; + Block *pb = get_block_for_node(root_blk->pred(i)); if (pb->has_uncommon_code()) { worklist.push(pb); } @@ -1443,7 +1476,7 @@ Block* uct = worklist.pop(); uct->_freq = PROB_MIN; for (uint i = 1; i < uct->num_preds(); i++) { - Block *pb = _bbs[uct->pred(i)->_idx]; + Block *pb = get_block_for_node(uct->pred(i)); if (pb->_num_succs == 1 && pb->_freq > PROB_MIN) { worklist.push(pb); } @@ -1452,8 +1485,8 @@ } #ifdef ASSERT - for (uint i = 0; i < _num_blocks; i++ ) { - Block *b = _blocks[i]; + for (uint i = 0; i < number_of_blocks(); i++) { + Block* b = get_block(i); assert(b->_freq >= MIN_BLOCK_FREQUENCY, "Register Allocator requires meaningful block frequency"); } #endif @@ -1477,16 +1510,16 @@ CFGLoop* PhaseCFG::create_loop_tree() { #ifdef ASSERT - assert( _blocks[0] == _broot, "" ); - for (uint i = 0; i < _num_blocks; i++ ) { - Block *b = _blocks[i]; + assert(get_block(0) == get_root_block(), "first block should be root block"); + for (uint i = 0; i < number_of_blocks(); i++) { + Block* block = get_block(i); // Check that _loop field are clear...we could clear them if not. - assert(b->_loop == NULL, "clear _loop expected"); + assert(block->_loop == NULL, "clear _loop expected"); // Sanity check that the RPO numbering is reflected in the _blocks array. // It doesn't have to be for the loop tree to be built, but if it is not, // then the blocks have been reordered since dom graph building...which // may question the RPO numbering - assert(b->_rpo == i, "unexpected reverse post order number"); + assert(block->_rpo == i, "unexpected reverse post order number"); } #endif @@ -1496,14 +1529,14 @@ Block_List worklist; // Assign blocks to loops - for(uint i = _num_blocks - 1; i > 0; i-- ) { // skip Root block - Block *b = _blocks[i]; + for(uint i = number_of_blocks() - 1; i > 0; i-- ) { // skip Root block + Block* block = get_block(i); - if (b->head()->is_Loop()) { - Block* loop_head = b; + if (block->head()->is_Loop()) { + Block* loop_head = block; assert(loop_head->num_preds() - 1 == 2, "loop must have 2 predecessors"); Node* tail_n = loop_head->pred(LoopNode::LoopBackControl); - Block* tail = _bbs[tail_n->_idx]; + Block* tail = get_block_for_node(tail_n); // Defensively filter out Loop nodes for non-single-entry loops. // For all reasonable loops, the head occurs before the tail in RPO. @@ -1518,13 +1551,13 @@ loop_head->_loop = nloop; // Add to nloop so push_pred() will skip over inner loops nloop->add_member(loop_head); - nloop->push_pred(loop_head, LoopNode::LoopBackControl, worklist, _bbs); + nloop->push_pred(loop_head, LoopNode::LoopBackControl, worklist, this); while (worklist.size() > 0) { Block* member = worklist.pop(); if (member != loop_head) { for (uint j = 1; j < member->num_preds(); j++) { - nloop->push_pred(member, j, worklist, _bbs); + nloop->push_pred(member, j, worklist, this); } } } @@ -1534,23 +1567,23 @@ // Create a member list for each loop consisting // of both blocks and (immediate child) loops. - for (uint i = 0; i < _num_blocks; i++) { - Block *b = _blocks[i]; - CFGLoop* lp = b->_loop; + for (uint i = 0; i < number_of_blocks(); i++) { + Block* block = get_block(i); + CFGLoop* lp = block->_loop; if (lp == NULL) { // Not assigned to a loop. Add it to the method's pseudo loop. - b->_loop = root_loop; + block->_loop = root_loop; lp = root_loop; } - if (lp == root_loop || b != lp->head()) { // loop heads are already members - lp->add_member(b); + if (lp == root_loop || block != lp->head()) { // loop heads are already members + lp->add_member(block); } if (lp != root_loop) { if (lp->parent() == NULL) { // Not a nested loop. Make it a child of the method's pseudo loop. root_loop->add_nested_loop(lp); } - if (b == lp->head()) { + if (block == lp->head()) { // Add nested loop to member list of parent loop. lp->parent()->add_member(lp); } @@ -1561,9 +1594,9 @@ } //------------------------------push_pred-------------------------------------- -void CFGLoop::push_pred(Block* blk, int i, Block_List& worklist, Block_Array& node_to_blk) { +void CFGLoop::push_pred(Block* blk, int i, Block_List& worklist, PhaseCFG* cfg) { Node* pred_n = blk->pred(i); - Block* pred = node_to_blk[pred_n->_idx]; + Block* pred = cfg->get_block_for_node(pred_n); CFGLoop *pred_loop = pred->_loop; if (pred_loop == NULL) { // Filter out blocks for non-single-entry loops. @@ -1584,7 +1617,7 @@ Block* pred_head = pred_loop->head(); assert(pred_head->num_preds() - 1 == 2, "loop must have 2 predecessors"); assert(pred_head != head(), "loop head in only one loop"); - push_pred(pred_head, LoopNode::EntryControl, worklist, node_to_blk); + push_pred(pred_head, LoopNode::EntryControl, worklist, cfg); } else { assert(pred_loop->_parent == this && _parent == NULL, "just checking"); } diff -r bdd155477289 -r e2722a66aba7 src/share/vm/opto/idealGraphPrinter.cpp --- a/src/share/vm/opto/idealGraphPrinter.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/opto/idealGraphPrinter.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -413,10 +413,10 @@ print_prop("debug_idx", node->_debug_idx); #endif - if(C->cfg() != NULL) { - Block *block = C->cfg()->_bbs[node->_idx]; - if(block == NULL) { - print_prop("block", C->cfg()->_blocks[0]->_pre_order); + if (C->cfg() != NULL) { + Block* block = C->cfg()->get_block_for_node(node); + if (block == NULL) { + print_prop("block", C->cfg()->get_block(0)->_pre_order); } else { print_prop("block", block->_pre_order); } @@ -637,10 +637,10 @@ if (C->cfg() != NULL) { // once we have a CFG there are some nodes that aren't really // reachable but are in the CFG so add them here. - for (uint i = 0; i < C->cfg()->_blocks.size(); i++) { - Block *b = C->cfg()->_blocks[i]; - for (uint s = 0; s < b->_nodes.size(); s++) { - nodeStack.push(b->_nodes[s]); + for (uint i = 0; i < C->cfg()->number_of_blocks(); i++) { + Block* block = C->cfg()->get_block(i); + for (uint s = 0; s < block->_nodes.size(); s++) { + nodeStack.push(block->_nodes[s]); } } } @@ -698,24 +698,24 @@ tail(EDGES_ELEMENT); if (C->cfg() != NULL) { head(CONTROL_FLOW_ELEMENT); - for (uint i = 0; i < C->cfg()->_blocks.size(); i++) { - Block *b = C->cfg()->_blocks[i]; + for (uint i = 0; i < C->cfg()->number_of_blocks(); i++) { + Block* block = C->cfg()->get_block(i); begin_head(BLOCK_ELEMENT); - print_attr(BLOCK_NAME_PROPERTY, b->_pre_order); + print_attr(BLOCK_NAME_PROPERTY, block->_pre_order); end_head(); head(SUCCESSORS_ELEMENT); - for (uint s = 0; s < b->_num_succs; s++) { + for (uint s = 0; s < block->_num_succs; s++) { begin_elem(SUCCESSOR_ELEMENT); - print_attr(BLOCK_NAME_PROPERTY, b->_succs[s]->_pre_order); + print_attr(BLOCK_NAME_PROPERTY, block->_succs[s]->_pre_order); end_elem(); } tail(SUCCESSORS_ELEMENT); head(NODES_ELEMENT); - for (uint s = 0; s < b->_nodes.size(); s++) { + for (uint s = 0; s < block->_nodes.size(); s++) { begin_elem(NODE_ELEMENT); - print_attr(NODE_ID_PROPERTY, get_node_id(b->_nodes[s])); + print_attr(NODE_ID_PROPERTY, get_node_id(block->_nodes[s])); end_elem(); } tail(NODES_ELEMENT); diff -r bdd155477289 -r e2722a66aba7 src/share/vm/opto/ifg.cpp --- a/src/share/vm/opto/ifg.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/opto/ifg.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -37,12 +37,9 @@ #include "opto/memnode.hpp" #include "opto/opcodes.hpp" -//============================================================================= -//------------------------------IFG-------------------------------------------- PhaseIFG::PhaseIFG( Arena *arena ) : Phase(Interference_Graph), _arena(arena) { } -//------------------------------init------------------------------------------- void PhaseIFG::init( uint maxlrg ) { _maxlrg = maxlrg; _yanked = new (_arena) VectorSet(_arena); @@ -59,7 +56,6 @@ } } -//------------------------------add-------------------------------------------- // Add edge between vertices a & b. These are sorted (triangular matrix), // then the smaller number is inserted in the larger numbered array. int PhaseIFG::add_edge( uint a, uint b ) { @@ -71,7 +67,6 @@ return _adjs[a].insert( b ); } -//------------------------------add_vector------------------------------------- // Add an edge between 'a' and everything in the vector. void PhaseIFG::add_vector( uint a, IndexSet *vec ) { // IFG is triangular, so do the inserts where 'a' < 'b'. @@ -86,7 +81,6 @@ } } -//------------------------------test------------------------------------------- // Is there an edge between a and b? int PhaseIFG::test_edge( uint a, uint b ) const { // Sort a and b, so that a is larger @@ -95,7 +89,6 @@ return _adjs[a].member(b); } -//------------------------------SquareUp--------------------------------------- // Convert triangular matrix to square matrix void PhaseIFG::SquareUp() { assert( !_is_square, "only on triangular" ); @@ -111,7 +104,6 @@ _is_square = true; } -//------------------------------Compute_Effective_Degree----------------------- // Compute effective degree in bulk void PhaseIFG::Compute_Effective_Degree() { assert( _is_square, "only on square" ); @@ -120,7 +112,6 @@ lrgs(i).set_degree(effective_degree(i)); } -//------------------------------test_edge_sq----------------------------------- int PhaseIFG::test_edge_sq( uint a, uint b ) const { assert( _is_square, "only on square" ); // Swap, so that 'a' has the lesser count. Then binary search is on @@ -130,7 +121,6 @@ return _adjs[a].member(b); } -//------------------------------Union------------------------------------------ // Union edges of B into A void PhaseIFG::Union( uint a, uint b ) { assert( _is_square, "only on square" ); @@ -146,7 +136,6 @@ } } -//------------------------------remove_node------------------------------------ // Yank a Node and all connected edges from the IFG. Return a // list of neighbors (edges) yanked. IndexSet *PhaseIFG::remove_node( uint a ) { @@ -165,7 +154,6 @@ return neighbors(a); } -//------------------------------re_insert-------------------------------------- // Re-insert a yanked Node. void PhaseIFG::re_insert( uint a ) { assert( _is_square, "only on square" ); @@ -180,7 +168,6 @@ } } -//------------------------------compute_degree--------------------------------- // Compute the degree between 2 live ranges. If both live ranges are // aligned-adjacent powers-of-2 then we use the MAX size. If either is // mis-aligned (or for Fat-Projections, not-adjacent) then we have to @@ -196,7 +183,6 @@ return tmp; } -//------------------------------effective_degree------------------------------- // Compute effective degree for this live range. If both live ranges are // aligned-adjacent powers-of-2 then we use the MAX size. If either is // mis-aligned (or for Fat-Projections, not-adjacent) then we have to @@ -221,7 +207,6 @@ #ifndef PRODUCT -//------------------------------dump------------------------------------------- void PhaseIFG::dump() const { tty->print_cr("-- Interference Graph --%s--", _is_square ? "square" : "triangular" ); @@ -260,7 +245,6 @@ tty->print("\n"); } -//------------------------------stats------------------------------------------ void PhaseIFG::stats() const { ResourceMark rm; int *h_cnt = NEW_RESOURCE_ARRAY(int,_maxlrg*2); @@ -276,7 +260,6 @@ tty->print_cr(""); } -//------------------------------verify----------------------------------------- void PhaseIFG::verify( const PhaseChaitin *pc ) const { // IFG is square, sorted and no need for Find for( uint i = 0; i < _maxlrg; i++ ) { @@ -298,7 +281,6 @@ } #endif -//------------------------------interfere_with_live---------------------------- // Interfere this register with everything currently live. Use the RegMasks // to trim the set of possible interferences. Return a count of register-only // interferences as an estimate of register pressure. @@ -315,7 +297,6 @@ _ifg->add_edge( r, l ); } -//------------------------------build_ifg_virtual------------------------------ // Actually build the interference graph. Uses virtual registers only, no // physical register masks. This allows me to be very aggressive when // coalescing copies. Some of this aggressiveness will have to be undone @@ -325,9 +306,9 @@ void PhaseChaitin::build_ifg_virtual( ) { // For all blocks (in any order) do... - for( uint i=0; i<_cfg._num_blocks; i++ ) { - Block *b = _cfg._blocks[i]; - IndexSet *liveout = _live->live(b); + for (uint i = 0; i < _cfg.number_of_blocks(); i++) { + Block* block = _cfg.get_block(i); + IndexSet* liveout = _live->live(block); // The IFG is built by a single reverse pass over each basic block. // Starting with the known live-out set, we remove things that get @@ -337,8 +318,8 @@ // The defined value interferes with everything currently live. The // value is then removed from the live-ness set and it's inputs are // added to the live-ness set. - for( uint j = b->end_idx() + 1; j > 1; j-- ) { - Node *n = b->_nodes[j-1]; + for (uint j = block->end_idx() + 1; j > 1; j--) { + Node* n = block->_nodes[j - 1]; // Get value being defined uint r = _lrg_map.live_range_id(n); @@ -408,7 +389,6 @@ } // End of forall blocks } -//------------------------------count_int_pressure----------------------------- uint PhaseChaitin::count_int_pressure( IndexSet *liveout ) { IndexSetIterator elements(liveout); uint lidx; @@ -424,7 +404,6 @@ return cnt; } -//------------------------------count_float_pressure--------------------------- uint PhaseChaitin::count_float_pressure( IndexSet *liveout ) { IndexSetIterator elements(liveout); uint lidx; @@ -438,7 +417,6 @@ return cnt; } -//------------------------------lower_pressure--------------------------------- // Adjust register pressure down by 1. Capture last hi-to-low transition, static void lower_pressure( LRG *lrg, uint where, Block *b, uint *pressure, uint *hrp_index ) { if (lrg->mask().is_UP() && lrg->mask_size()) { @@ -460,40 +438,41 @@ } } -//------------------------------build_ifg_physical----------------------------- // Build the interference graph using physical registers when available. // That is, if 2 live ranges are simultaneously alive but in their acceptable // register sets do not overlap, then they do not interfere. uint PhaseChaitin::build_ifg_physical( ResourceArea *a ) { NOT_PRODUCT( Compile::TracePhase t3("buildIFG", &_t_buildIFGphysical, TimeCompiler); ) - uint spill_reg = LRG::SPILL_REG; uint must_spill = 0; // For all blocks (in any order) do... - for( uint i = 0; i < _cfg._num_blocks; i++ ) { - Block *b = _cfg._blocks[i]; + for (uint i = 0; i < _cfg.number_of_blocks(); i++) { + Block* block = _cfg.get_block(i); // Clone (rather than smash in place) the liveout info, so it is alive // for the "collect_gc_info" phase later. - IndexSet liveout(_live->live(b)); - uint last_inst = b->end_idx(); + IndexSet liveout(_live->live(block)); + uint last_inst = block->end_idx(); // Compute first nonphi node index uint first_inst; - for( first_inst = 1; first_inst < last_inst; first_inst++ ) - if( !b->_nodes[first_inst]->is_Phi() ) + for (first_inst = 1; first_inst < last_inst; first_inst++) { + if (!block->_nodes[first_inst]->is_Phi()) { break; + } + } // Spills could be inserted before CreateEx node which should be // first instruction in block after Phis. Move CreateEx up. - for( uint insidx = first_inst; insidx < last_inst; insidx++ ) { - Node *ex = b->_nodes[insidx]; - if( ex->is_SpillCopy() ) continue; - if( insidx > first_inst && ex->is_Mach() && - ex->as_Mach()->ideal_Opcode() == Op_CreateEx ) { + for (uint insidx = first_inst; insidx < last_inst; insidx++) { + Node *ex = block->_nodes[insidx]; + if (ex->is_SpillCopy()) { + continue; + } + if (insidx > first_inst && ex->is_Mach() && ex->as_Mach()->ideal_Opcode() == Op_CreateEx) { // If the CreateEx isn't above all the MachSpillCopies // then move it to the top. - b->_nodes.remove(insidx); - b->_nodes.insert(first_inst, ex); + block->_nodes.remove(insidx); + block->_nodes.insert(first_inst, ex); } // Stop once a CreateEx or any other node is found break; @@ -503,12 +482,12 @@ uint pressure[2], hrp_index[2]; pressure[0] = pressure[1] = 0; hrp_index[0] = hrp_index[1] = last_inst+1; - b->_reg_pressure = b->_freg_pressure = 0; + block->_reg_pressure = block->_freg_pressure = 0; // Liveout things are presumed live for the whole block. We accumulate // 'area' accordingly. If they get killed in the block, we'll subtract // the unused part of the block from the area. int inst_count = last_inst - first_inst; - double cost = (inst_count <= 0) ? 0.0 : b->_freq * double(inst_count); + double cost = (inst_count <= 0) ? 0.0 : block->_freq * double(inst_count); assert(!(cost < 0.0), "negative spill cost" ); IndexSetIterator elements(&liveout); uint lidx; @@ -519,13 +498,15 @@ if (lrg.mask().is_UP() && lrg.mask_size()) { if (lrg._is_float || lrg._is_vector) { // Count float pressure pressure[1] += lrg.reg_pressure(); - if( pressure[1] > b->_freg_pressure ) - b->_freg_pressure = pressure[1]; + if (pressure[1] > block->_freg_pressure) { + block->_freg_pressure = pressure[1]; + } // Count int pressure, but do not count the SP, flags - } else if( lrgs(lidx).mask().overlap(*Matcher::idealreg2regmask[Op_RegI]) ) { + } else if(lrgs(lidx).mask().overlap(*Matcher::idealreg2regmask[Op_RegI])) { pressure[0] += lrg.reg_pressure(); - if( pressure[0] > b->_reg_pressure ) - b->_reg_pressure = pressure[0]; + if (pressure[0] > block->_reg_pressure) { + block->_reg_pressure = pressure[0]; + } } } } @@ -541,8 +522,8 @@ // value is then removed from the live-ness set and it's inputs are added // to the live-ness set. uint j; - for( j = last_inst + 1; j > 1; j-- ) { - Node *n = b->_nodes[j - 1]; + for (j = last_inst + 1; j > 1; j--) { + Node* n = block->_nodes[j - 1]; // Get value being defined uint r = _lrg_map.live_range_id(n); @@ -551,7 +532,7 @@ if(r) { // A DEF normally costs block frequency; rematerialized values are // removed from the DEF sight, so LOWER costs here. - lrgs(r)._cost += n->rematerialize() ? 0 : b->_freq; + lrgs(r)._cost += n->rematerialize() ? 0 : block->_freq; // If it is not live, then this instruction is dead. Probably caused // by spilling and rematerialization. Who cares why, yank this baby. @@ -560,12 +541,12 @@ if( !n->is_Proj() || // Could also be a flags-projection of a dead ADD or such. (_lrg_map.live_range_id(def) && !liveout.member(_lrg_map.live_range_id(def)))) { - b->_nodes.remove(j - 1); + block->_nodes.remove(j - 1); if (lrgs(r)._def == n) { lrgs(r)._def = 0; } n->disconnect_inputs(NULL, C); - _cfg._bbs.map(n->_idx,NULL); + _cfg.unmap_node_from_block(n); n->replace_by(C->top()); // Since yanking a Node from block, high pressure moves up one hrp_index[0]--; @@ -580,21 +561,21 @@ RegMask itmp = lrgs(r).mask(); itmp.AND(*Matcher::idealreg2regmask[Op_RegI]); int iregs = itmp.Size(); - if( pressure[0]+iregs > b->_reg_pressure ) - b->_reg_pressure = pressure[0]+iregs; - if( pressure[0] <= (uint)INTPRESSURE && - pressure[0]+iregs > (uint)INTPRESSURE ) { - hrp_index[0] = j-1; + if (pressure[0]+iregs > block->_reg_pressure) { + block->_reg_pressure = pressure[0] + iregs; + } + if (pressure[0] <= (uint)INTPRESSURE && pressure[0] + iregs > (uint)INTPRESSURE) { + hrp_index[0] = j - 1; } // Count the float-only registers RegMask ftmp = lrgs(r).mask(); ftmp.AND(*Matcher::idealreg2regmask[Op_RegD]); int fregs = ftmp.Size(); - if( pressure[1]+fregs > b->_freg_pressure ) - b->_freg_pressure = pressure[1]+fregs; - if( pressure[1] <= (uint)FLOATPRESSURE && - pressure[1]+fregs > (uint)FLOATPRESSURE ) { - hrp_index[1] = j-1; + if (pressure[1] + fregs > block->_freg_pressure) { + block->_freg_pressure = pressure[1] + fregs; + } + if(pressure[1] <= (uint)FLOATPRESSURE && pressure[1]+fregs > (uint)FLOATPRESSURE) { + hrp_index[1] = j - 1; } } @@ -607,7 +588,7 @@ if( n->is_SpillCopy() && lrgs(r).is_singledef() // MultiDef live range can still split && n->outcnt() == 1 // and use must be in this block - && _cfg._bbs[n->unique_out()->_idx] == b ) { + && _cfg.get_block_for_node(n->unique_out()) == block) { // All single-use MachSpillCopy(s) that immediately precede their // use must color early. If a longer live range steals their // color, the spill copy will split and may push another spill copy @@ -617,14 +598,16 @@ // Node *single_use = n->unique_out(); - assert( b->find_node(single_use) >= j, "Use must be later in block"); + assert(block->find_node(single_use) >= j, "Use must be later in block"); // Use can be earlier in block if it is a Phi, but then I should be a MultiDef // Find first non SpillCopy 'm' that follows the current instruction // (j - 1) is index for current instruction 'n' Node *m = n; - for( uint i = j; i <= last_inst && m->is_SpillCopy(); ++i ) { m = b->_nodes[i]; } - if( m == single_use ) { + for (uint i = j; i <= last_inst && m->is_SpillCopy(); ++i) { + m = block->_nodes[i]; + } + if (m == single_use) { lrgs(r)._area = 0.0; } } @@ -633,7 +616,7 @@ if( liveout.remove(r) ) { // Adjust register pressure. // Capture last hi-to-lo pressure transition - lower_pressure( &lrgs(r), j-1, b, pressure, hrp_index ); + lower_pressure(&lrgs(r), j - 1, block, pressure, hrp_index); assert( pressure[0] == count_int_pressure (&liveout), "" ); assert( pressure[1] == count_float_pressure(&liveout), "" ); } @@ -646,7 +629,7 @@ if (liveout.remove(x)) { lrgs(x)._area -= cost; // Adjust register pressure. - lower_pressure(&lrgs(x), j-1, b, pressure, hrp_index); + lower_pressure(&lrgs(x), j - 1, block, pressure, hrp_index); assert( pressure[0] == count_int_pressure (&liveout), "" ); assert( pressure[1] == count_float_pressure(&liveout), "" ); } @@ -718,7 +701,7 @@ // Area remaining in the block inst_count--; - cost = (inst_count <= 0) ? 0.0 : b->_freq * double(inst_count); + cost = (inst_count <= 0) ? 0.0 : block->_freq * double(inst_count); // Make all inputs live if( !n->is_Phi() ) { // Phi function uses come from prior block @@ -743,7 +726,7 @@ if (k < debug_start) { // A USE costs twice block frequency (once for the Load, once // for a Load-delay). Rematerialized uses only cost once. - lrg._cost += (def->rematerialize() ? b->_freq : (b->_freq + b->_freq)); + lrg._cost += (def->rematerialize() ? block->_freq : (block->_freq + block->_freq)); } // It is live now if (liveout.insert(x)) { @@ -753,12 +736,14 @@ if (lrg.mask().is_UP() && lrg.mask_size()) { if (lrg._is_float || lrg._is_vector) { pressure[1] += lrg.reg_pressure(); - if( pressure[1] > b->_freg_pressure ) - b->_freg_pressure = pressure[1]; + if (pressure[1] > block->_freg_pressure) { + block->_freg_pressure = pressure[1]; + } } else if( lrg.mask().overlap(*Matcher::idealreg2regmask[Op_RegI]) ) { pressure[0] += lrg.reg_pressure(); - if( pressure[0] > b->_reg_pressure ) - b->_reg_pressure = pressure[0]; + if (pressure[0] > block->_reg_pressure) { + block->_reg_pressure = pressure[0]; + } } } assert( pressure[0] == count_int_pressure (&liveout), "" ); @@ -772,44 +757,47 @@ // If we run off the top of the block with high pressure and // never see a hi-to-low pressure transition, just record that // the whole block is high pressure. - if( pressure[0] > (uint)INTPRESSURE ) { + if (pressure[0] > (uint)INTPRESSURE) { hrp_index[0] = 0; - if( pressure[0] > b->_reg_pressure ) - b->_reg_pressure = pressure[0]; + if (pressure[0] > block->_reg_pressure) { + block->_reg_pressure = pressure[0]; + } } - if( pressure[1] > (uint)FLOATPRESSURE ) { + if (pressure[1] > (uint)FLOATPRESSURE) { hrp_index[1] = 0; - if( pressure[1] > b->_freg_pressure ) - b->_freg_pressure = pressure[1]; + if (pressure[1] > block->_freg_pressure) { + block->_freg_pressure = pressure[1]; + } } // Compute high pressure indice; avoid landing in the middle of projnodes j = hrp_index[0]; - if( j < b->_nodes.size() && j < b->end_idx()+1 ) { - Node *cur = b->_nodes[j]; - while( cur->is_Proj() || (cur->is_MachNullCheck()) || cur->is_Catch() ) { + if (j < block->_nodes.size() && j < block->end_idx() + 1) { + Node* cur = block->_nodes[j]; + while (cur->is_Proj() || (cur->is_MachNullCheck()) || cur->is_Catch()) { j--; - cur = b->_nodes[j]; + cur = block->_nodes[j]; } } - b->_ihrp_index = j; + block->_ihrp_index = j; j = hrp_index[1]; - if( j < b->_nodes.size() && j < b->end_idx()+1 ) { - Node *cur = b->_nodes[j]; - while( cur->is_Proj() || (cur->is_MachNullCheck()) || cur->is_Catch() ) { + if (j < block->_nodes.size() && j < block->end_idx() + 1) { + Node* cur = block->_nodes[j]; + while (cur->is_Proj() || (cur->is_MachNullCheck()) || cur->is_Catch()) { j--; - cur = b->_nodes[j]; + cur = block->_nodes[j]; } } - b->_fhrp_index = j; + block->_fhrp_index = j; #ifndef PRODUCT // Gather Register Pressure Statistics if( PrintOptoStatistics ) { - if( b->_reg_pressure > (uint)INTPRESSURE || b->_freg_pressure > (uint)FLOATPRESSURE ) + if (block->_reg_pressure > (uint)INTPRESSURE || block->_freg_pressure > (uint)FLOATPRESSURE) { _high_pressure++; - else + } else { _low_pressure++; + } } #endif } // End of for all blocks diff -r bdd155477289 -r e2722a66aba7 src/share/vm/opto/lcm.cpp --- a/src/share/vm/opto/lcm.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/opto/lcm.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -240,7 +240,7 @@ } // Check ctrl input to see if the null-check dominates the memory op - Block *cb = cfg->_bbs[mach->_idx]; + Block *cb = cfg->get_block_for_node(mach); cb = cb->_idom; // Always hoist at least 1 block if( !was_store ) { // Stores can be hoisted only one block while( cb->_dom_depth > (_dom_depth + 1)) @@ -265,7 +265,7 @@ if( is_decoden ) continue; } // Block of memory-op input - Block *inb = cfg->_bbs[mach->in(j)->_idx]; + Block *inb = cfg->get_block_for_node(mach->in(j)); Block *b = this; // Start from nul check while( b != inb && b->_dom_depth > inb->_dom_depth ) b = b->_idom; // search upwards for input @@ -275,7 +275,7 @@ } if( j > 0 ) continue; - Block *mb = cfg->_bbs[mach->_idx]; + Block *mb = cfg->get_block_for_node(mach); // Hoisting stores requires more checks for the anti-dependence case. // Give up hoisting if we have to move the store past any load. if( was_store ) { @@ -294,7 +294,7 @@ break; // Found anti-dependent load // Make sure control does not do a merge (would have to check allpaths) if( b->num_preds() != 2 ) break; - b = cfg->_bbs[b->pred(1)->_idx]; // Move up to predecessor block + b = cfg->get_block_for_node(b->pred(1)); // Move up to predecessor block } if( b != this ) continue; } @@ -306,15 +306,15 @@ // Found a candidate! Pick one with least dom depth - the highest // in the dom tree should be closest to the null check. - if( !best || - cfg->_bbs[mach->_idx]->_dom_depth < cfg->_bbs[best->_idx]->_dom_depth ) { + if (best == NULL || cfg->get_block_for_node(mach)->_dom_depth < cfg->get_block_for_node(best)->_dom_depth) { best = mach; bidx = vidx; - } } // No candidate! - if( !best ) return; + if (best == NULL) { + return; + } // ---- Found an implicit null check extern int implicit_null_checks; @@ -322,29 +322,29 @@ if( is_decoden ) { // Check if we need to hoist decodeHeapOop_not_null first. - Block *valb = cfg->_bbs[val->_idx]; + Block *valb = cfg->get_block_for_node(val); if( this != valb && this->_dom_depth < valb->_dom_depth ) { // Hoist it up to the end of the test block. valb->find_remove(val); this->add_inst(val); - cfg->_bbs.map(val->_idx,this); + cfg->map_node_to_block(val, this); // DecodeN on x86 may kill flags. Check for flag-killing projections // that also need to be hoisted. for (DUIterator_Fast jmax, j = val->fast_outs(jmax); j < jmax; j++) { Node* n = val->fast_out(j); if( n->is_MachProj() ) { - cfg->_bbs[n->_idx]->find_remove(n); + cfg->get_block_for_node(n)->find_remove(n); this->add_inst(n); - cfg->_bbs.map(n->_idx,this); + cfg->map_node_to_block(n, this); } } } } // Hoist the memory candidate up to the end of the test block. - Block *old_block = cfg->_bbs[best->_idx]; + Block *old_block = cfg->get_block_for_node(best); old_block->find_remove(best); add_inst(best); - cfg->_bbs.map(best->_idx,this); + cfg->map_node_to_block(best, this); // Move the control dependence if (best->in(0) && best->in(0) == old_block->_nodes[0]) @@ -355,9 +355,9 @@ for (DUIterator_Fast jmax, j = best->fast_outs(jmax); j < jmax; j++) { Node* n = best->fast_out(j); if( n->is_MachProj() ) { - cfg->_bbs[n->_idx]->find_remove(n); + cfg->get_block_for_node(n)->find_remove(n); add_inst(n); - cfg->_bbs.map(n->_idx,this); + cfg->map_node_to_block(n, this); } } @@ -388,7 +388,7 @@ Node *old_tst = proj->in(0); MachNode *nul_chk = new (C) MachNullCheckNode(old_tst->in(0),best,bidx); _nodes.map(end_idx(),nul_chk); - cfg->_bbs.map(nul_chk->_idx,this); + cfg->map_node_to_block(nul_chk, this); // Redirect users of old_test to nul_chk for (DUIterator_Last i2min, i2 = old_tst->last_outs(i2min); i2 >= i2min; --i2) old_tst->last_out(i2)->set_req(0, nul_chk); @@ -471,7 +471,7 @@ Node* use = n->fast_out(j); // The use is a conditional branch, make them adjacent - if (use->is_MachIf() && cfg->_bbs[use->_idx]==this ) { + if (use->is_MachIf() && cfg->get_block_for_node(use) == this) { found_machif = true; break; } @@ -504,7 +504,7 @@ n_choice = 1; } - uint n_latency = cfg->_node_latency->at_grow(n->_idx); + uint n_latency = cfg->get_latency_for_node(n); uint n_score = n->req(); // Many inputs get high score to break ties // Keep best latency found @@ -532,13 +532,14 @@ //------------------------------set_next_call---------------------------------- -void Block::set_next_call( Node *n, VectorSet &next_call, Block_Array &bbs ) { +void Block::set_next_call( Node *n, VectorSet &next_call, PhaseCFG* cfg) { if( next_call.test_set(n->_idx) ) return; for( uint i=0; ilen(); i++ ) { Node *m = n->in(i); if( !m ) continue; // must see all nodes in block that precede call - if( bbs[m->_idx] == this ) - set_next_call( m, next_call, bbs ); + if (cfg->get_block_for_node(m) == this) { + set_next_call(m, next_call, cfg); + } } } @@ -548,12 +549,12 @@ // next subroutine call get priority - basically it moves things NOT needed // for the next call till after the call. This prevents me from trying to // carry lots of stuff live across a call. -void Block::needed_for_next_call(Node *this_call, VectorSet &next_call, Block_Array &bbs) { +void Block::needed_for_next_call(Node *this_call, VectorSet &next_call, PhaseCFG* cfg) { // Find the next control-defining Node in this block Node* call = NULL; for (DUIterator_Fast imax, i = this_call->fast_outs(imax); i < imax; i++) { Node* m = this_call->fast_out(i); - if( bbs[m->_idx] == this && // Local-block user + if(cfg->get_block_for_node(m) == this && // Local-block user m != this_call && // Not self-start node m->is_MachCall() ) call = m; @@ -561,7 +562,7 @@ } if (call == NULL) return; // No next call (e.g., block end is near) // Set next-call for all inputs to this call - set_next_call(call, next_call, bbs); + set_next_call(call, next_call, cfg); } //------------------------------add_call_kills------------------------------------- @@ -581,7 +582,7 @@ //------------------------------sched_call------------------------------------- -uint Block::sched_call( Matcher &matcher, Block_Array &bbs, uint node_cnt, Node_List &worklist, GrowableArray &ready_cnt, MachCallNode *mcall, VectorSet &next_call ) { +uint Block::sched_call( Matcher &matcher, PhaseCFG* cfg, uint node_cnt, Node_List &worklist, GrowableArray &ready_cnt, MachCallNode *mcall, VectorSet &next_call ) { RegMask regs; // Schedule all the users of the call right now. All the users are @@ -600,12 +601,14 @@ // Check for scheduling the next control-definer if( n->bottom_type() == Type::CONTROL ) // Warm up next pile of heuristic bits - needed_for_next_call(n, next_call, bbs); + needed_for_next_call(n, next_call, cfg); // Children of projections are now all ready for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { Node* m = n->fast_out(j); // Get user - if( bbs[m->_idx] != this ) continue; + if(cfg->get_block_for_node(m) != this) { + continue; + } if( m->is_Phi() ) continue; int m_cnt = ready_cnt.at(m->_idx)-1; ready_cnt.at_put(m->_idx, m_cnt); @@ -623,7 +626,7 @@ uint r_cnt = mcall->tf()->range()->cnt(); int op = mcall->ideal_Opcode(); MachProjNode *proj = new (matcher.C) MachProjNode( mcall, r_cnt+1, RegMask::Empty, MachProjNode::fat_proj ); - bbs.map(proj->_idx,this); + cfg->map_node_to_block(proj, this); _nodes.insert(node_cnt++, proj); // Select the right register save policy. @@ -711,7 +714,7 @@ uint local = 0; for( uint j=0; jin(j); - if( m && cfg->_bbs[m->_idx] == this && !m->is_top() ) + if( m && cfg->get_block_for_node(m) == this && !m->is_top() ) local++; // One more block-local input } ready_cnt.at_put(n->_idx, local); // Count em up @@ -723,7 +726,7 @@ for (uint prec = n->req(); prec < n->len(); prec++) { Node* oop_store = n->in(prec); if (oop_store != NULL) { - assert(cfg->_bbs[oop_store->_idx]->_dom_depth <= this->_dom_depth, "oop_store must dominate card-mark"); + assert(cfg->get_block_for_node(oop_store)->_dom_depth <= this->_dom_depth, "oop_store must dominate card-mark"); } } } @@ -756,7 +759,7 @@ Node *n = _nodes[i3]; // Get pre-scheduled for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { Node* m = n->fast_out(j); - if( cfg->_bbs[m->_idx] ==this ) { // Local-block user + if (cfg->get_block_for_node(m) == this) { // Local-block user int m_cnt = ready_cnt.at(m->_idx)-1; ready_cnt.at_put(m->_idx, m_cnt); // Fix ready count } @@ -789,7 +792,7 @@ } // Warm up the 'next_call' heuristic bits - needed_for_next_call(_nodes[0], next_call, cfg->_bbs); + needed_for_next_call(_nodes[0], next_call, cfg); #ifndef PRODUCT if (cfg->trace_opto_pipelining()) { @@ -797,7 +800,7 @@ Node *n = _nodes[j]; int idx = n->_idx; tty->print("# ready cnt:%3d ", ready_cnt.at(idx)); - tty->print("latency:%3d ", cfg->_node_latency->at_grow(idx)); + tty->print("latency:%3d ", cfg->get_latency_for_node(n)); tty->print("%4d: %s\n", idx, n->Name()); } } @@ -825,7 +828,7 @@ #ifndef PRODUCT if (cfg->trace_opto_pipelining()) { tty->print("# select %d: %s", n->_idx, n->Name()); - tty->print(", latency:%d", cfg->_node_latency->at_grow(n->_idx)); + tty->print(", latency:%d", cfg->get_latency_for_node(n)); n->dump(); if (Verbose) { tty->print("# ready list:"); @@ -840,7 +843,7 @@ #endif if( n->is_MachCall() ) { MachCallNode *mcall = n->as_MachCall(); - phi_cnt = sched_call(matcher, cfg->_bbs, phi_cnt, worklist, ready_cnt, mcall, next_call); + phi_cnt = sched_call(matcher, cfg, phi_cnt, worklist, ready_cnt, mcall, next_call); continue; } @@ -850,7 +853,7 @@ regs.OR(n->out_RegMask()); MachProjNode *proj = new (matcher.C) MachProjNode( n, 1, RegMask::Empty, MachProjNode::fat_proj ); - cfg->_bbs.map(proj->_idx,this); + cfg->map_node_to_block(proj, this); _nodes.insert(phi_cnt++, proj); add_call_kills(proj, regs, matcher._c_reg_save_policy, false); @@ -859,7 +862,9 @@ // Children are now all ready for (DUIterator_Fast i5max, i5 = n->fast_outs(i5max); i5 < i5max; i5++) { Node* m = n->fast_out(i5); // Get user - if( cfg->_bbs[m->_idx] != this ) continue; + if (cfg->get_block_for_node(m) != this) { + continue; + } if( m->is_Phi() ) continue; if (m->_idx >= max_idx) { // new node, skip it assert(m->is_MachProj() && n->is_Mach() && n->as_Mach()->has_call(), "unexpected node types"); @@ -917,7 +922,7 @@ } //------------------------------catch_cleanup_find_cloned_def------------------ -static Node *catch_cleanup_find_cloned_def(Block *use_blk, Node *def, Block *def_blk, Block_Array &bbs, int n_clone_idx) { +static Node *catch_cleanup_find_cloned_def(Block *use_blk, Node *def, Block *def_blk, PhaseCFG* cfg, int n_clone_idx) { assert( use_blk != def_blk, "Inter-block cleanup only"); // The use is some block below the Catch. Find and return the clone of the def @@ -943,7 +948,8 @@ // PhiNode, the PhiNode uses from the def and IT's uses need fixup. Node_Array inputs = new Node_List(Thread::current()->resource_area()); for(uint k = 1; k < use_blk->num_preds(); k++) { - inputs.map(k, catch_cleanup_find_cloned_def(bbs[use_blk->pred(k)->_idx], def, def_blk, bbs, n_clone_idx)); + Block* block = cfg->get_block_for_node(use_blk->pred(k)); + inputs.map(k, catch_cleanup_find_cloned_def(block, def, def_blk, cfg, n_clone_idx)); } // Check to see if the use_blk already has an identical phi inserted. @@ -965,7 +971,7 @@ if (fixup == NULL) { Node *new_phi = PhiNode::make(use_blk->head(), def); use_blk->_nodes.insert(1, new_phi); - bbs.map(new_phi->_idx, use_blk); + cfg->map_node_to_block(new_phi, use_blk); for (uint k = 1; k < use_blk->num_preds(); k++) { new_phi->set_req(k, inputs[k]); } @@ -1005,17 +1011,17 @@ //------------------------------catch_cleanup_inter_block--------------------- // Fix all input edges in use that reference "def". The use is in a different // block than the def. -static void catch_cleanup_inter_block(Node *use, Block *use_blk, Node *def, Block *def_blk, Block_Array &bbs, int n_clone_idx) { +static void catch_cleanup_inter_block(Node *use, Block *use_blk, Node *def, Block *def_blk, PhaseCFG* cfg, int n_clone_idx) { if( !use_blk ) return; // Can happen if the use is a precedence edge - Node *new_def = catch_cleanup_find_cloned_def(use_blk, def, def_blk, bbs, n_clone_idx); + Node *new_def = catch_cleanup_find_cloned_def(use_blk, def, def_blk, cfg, n_clone_idx); catch_cleanup_fix_all_inputs(use, def, new_def); } //------------------------------call_catch_cleanup----------------------------- // If we inserted any instructions between a Call and his CatchNode, // clone the instructions on all paths below the Catch. -void Block::call_catch_cleanup(Block_Array &bbs, Compile* C) { +void Block::call_catch_cleanup(PhaseCFG* cfg, Compile* C) { // End of region to clone uint end = end_idx(); @@ -1040,7 +1046,7 @@ // since clones dominate on each path. Node *clone = _nodes[j-1]->clone(); sb->_nodes.insert( 1, clone ); - bbs.map(clone->_idx,sb); + cfg->map_node_to_block(clone, sb); } } @@ -1057,18 +1063,19 @@ uint max = out->size(); for (uint j = 0; j < max; j++) {// For all users Node *use = out->pop(); - Block *buse = bbs[use->_idx]; + Block *buse = cfg->get_block_for_node(use); if( use->is_Phi() ) { for( uint k = 1; k < use->req(); k++ ) if( use->in(k) == n ) { - Node *fixup = catch_cleanup_find_cloned_def(bbs[buse->pred(k)->_idx], n, this, bbs, n_clone_idx); + Block* block = cfg->get_block_for_node(buse->pred(k)); + Node *fixup = catch_cleanup_find_cloned_def(block, n, this, cfg, n_clone_idx); use->set_req(k, fixup); } } else { if (this == buse) { catch_cleanup_intra_block(use, n, this, beg, n_clone_idx); } else { - catch_cleanup_inter_block(use, buse, n, this, bbs, n_clone_idx); + catch_cleanup_inter_block(use, buse, n, this, cfg, n_clone_idx); } } } // End for all users diff -r bdd155477289 -r e2722a66aba7 src/share/vm/opto/library_call.cpp --- a/src/share/vm/opto/library_call.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/opto/library_call.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -213,6 +213,7 @@ void insert_pre_barrier(Node* base_oop, Node* offset, Node* pre_val, bool need_mem_bar); bool inline_unsafe_access(bool is_native_ptr, bool is_store, BasicType type, bool is_volatile); bool inline_unsafe_prefetch(bool is_native_ptr, bool is_store, bool is_static); + static bool klass_needs_init_guard(Node* kls); bool inline_unsafe_allocate(); bool inline_unsafe_copyMemory(); bool inline_native_currentThread(); @@ -2892,8 +2893,21 @@ } } +bool LibraryCallKit::klass_needs_init_guard(Node* kls) { + if (!kls->is_Con()) { + return true; + } + const TypeKlassPtr* klsptr = kls->bottom_type()->isa_klassptr(); + if (klsptr == NULL) { + return true; + } + ciInstanceKlass* ik = klsptr->klass()->as_instance_klass(); + // don't need a guard for a klass that is already initialized + return !ik->is_initialized(); +} + //----------------------------inline_unsafe_allocate--------------------------- -// public native Object sun.mics.Unsafe.allocateInstance(Class cls); +// public native Object sun.misc.Unsafe.allocateInstance(Class cls); bool LibraryCallKit::inline_unsafe_allocate() { if (callee()->is_static()) return false; // caller must have the capability! @@ -2905,16 +2919,19 @@ kls = null_check(kls); if (stopped()) return true; // argument was like int.class - // Note: The argument might still be an illegal value like - // Serializable.class or Object[].class. The runtime will handle it. - // But we must make an explicit check for initialization. - Node* insp = basic_plus_adr(kls, in_bytes(InstanceKlass::init_state_offset())); - // Use T_BOOLEAN for InstanceKlass::_init_state so the compiler - // can generate code to load it as unsigned byte. - Node* inst = make_load(NULL, insp, TypeInt::UBYTE, T_BOOLEAN); - Node* bits = intcon(InstanceKlass::fully_initialized); - Node* test = _gvn.transform(new (C) SubINode(inst, bits)); - // The 'test' is non-zero if we need to take a slow path. + Node* test = NULL; + if (LibraryCallKit::klass_needs_init_guard(kls)) { + // Note: The argument might still be an illegal value like + // Serializable.class or Object[].class. The runtime will handle it. + // But we must make an explicit check for initialization. + Node* insp = basic_plus_adr(kls, in_bytes(InstanceKlass::init_state_offset())); + // Use T_BOOLEAN for InstanceKlass::_init_state so the compiler + // can generate code to load it as unsigned byte. + Node* inst = make_load(NULL, insp, TypeInt::UBYTE, T_BOOLEAN); + Node* bits = intcon(InstanceKlass::fully_initialized); + test = _gvn.transform(new (C) SubINode(inst, bits)); + // The 'test' is non-zero if we need to take a slow path. + } Node* obj = new_instance(kls, test); set_result(obj); diff -r bdd155477289 -r e2722a66aba7 src/share/vm/opto/live.cpp --- a/src/share/vm/opto/live.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/opto/live.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -30,9 +30,6 @@ #include "opto/machnode.hpp" - -//============================================================================= -//------------------------------PhaseLive-------------------------------------- // Compute live-in/live-out. We use a totally incremental algorithm. The LIVE // problem is monotonic. The steady-state solution looks like this: pull a // block from the worklist. It has a set of delta's - values which are newly @@ -53,9 +50,9 @@ // Init the sparse live arrays. This data is live on exit from here! // The _live info is the live-out info. - _live = (IndexSet*)_arena->Amalloc(sizeof(IndexSet)*_cfg._num_blocks); + _live = (IndexSet*)_arena->Amalloc(sizeof(IndexSet) * _cfg.number_of_blocks()); uint i; - for( i=0; i<_cfg._num_blocks; i++ ) { + for (i = 0; i < _cfg.number_of_blocks(); i++) { _live[i].initialize(_maxlrg); } @@ -65,14 +62,14 @@ // Does the memory used by _defs and _deltas get reclaimed? Does it matter? TT // Array of values defined locally in blocks - _defs = NEW_RESOURCE_ARRAY(IndexSet,_cfg._num_blocks); - for( i=0; i<_cfg._num_blocks; i++ ) { + _defs = NEW_RESOURCE_ARRAY(IndexSet,_cfg.number_of_blocks()); + for (i = 0; i < _cfg.number_of_blocks(); i++) { _defs[i].initialize(_maxlrg); } // Array of delta-set pointers, indexed by block pre_order-1. - _deltas = NEW_RESOURCE_ARRAY(IndexSet*,_cfg._num_blocks); - memset( _deltas, 0, sizeof(IndexSet*)* _cfg._num_blocks); + _deltas = NEW_RESOURCE_ARRAY(IndexSet*,_cfg.number_of_blocks()); + memset( _deltas, 0, sizeof(IndexSet*)* _cfg.number_of_blocks()); _free_IndexSet = NULL; @@ -80,31 +77,32 @@ VectorSet first_pass(Thread::current()->resource_area()); // Outer loop: must compute local live-in sets and push into predecessors. - uint iters = _cfg._num_blocks; // stat counters - for( uint j=_cfg._num_blocks; j>0; j-- ) { - Block *b = _cfg._blocks[j-1]; + for (uint j = _cfg.number_of_blocks(); j > 0; j--) { + Block* block = _cfg.get_block(j - 1); // Compute the local live-in set. Start with any new live-out bits. - IndexSet *use = getset( b ); - IndexSet *def = &_defs[b->_pre_order-1]; + IndexSet* use = getset(block); + IndexSet* def = &_defs[block->_pre_order-1]; DEBUG_ONLY(IndexSet *def_outside = getfreeset();) uint i; - for( i=b->_nodes.size(); i>1; i-- ) { - Node *n = b->_nodes[i-1]; - if( n->is_Phi() ) break; + for (i = block->_nodes.size(); i > 1; i--) { + Node* n = block->_nodes[i-1]; + if (n->is_Phi()) { + break; + } uint r = _names[n->_idx]; assert(!def_outside->member(r), "Use of external LRG overlaps the same LRG defined in this block"); def->insert( r ); use->remove( r ); uint cnt = n->req(); - for( uint k=1; kin(k); uint nkidx = nk->_idx; - if( _cfg._bbs[nkidx] != b ) { + if (_cfg.get_block_for_node(nk) != block) { uint u = _names[nkidx]; - use->insert( u ); - DEBUG_ONLY(def_outside->insert( u );) + use->insert(u); + DEBUG_ONLY(def_outside->insert(u);) } } } @@ -113,39 +111,38 @@ _free_IndexSet = def_outside; // Drop onto free list #endif // Remove anything defined by Phis and the block start instruction - for( uint k=i; k>0; k-- ) { - uint r = _names[b->_nodes[k-1]->_idx]; - def->insert( r ); - use->remove( r ); + for (uint k = i; k > 0; k--) { + uint r = _names[block->_nodes[k - 1]->_idx]; + def->insert(r); + use->remove(r); } // Push these live-in things to predecessors - for( uint l=1; lnum_preds(); l++ ) { - Block *p = _cfg._bbs[b->pred(l)->_idx]; - add_liveout( p, use, first_pass ); + for (uint l = 1; l < block->num_preds(); l++) { + Block* p = _cfg.get_block_for_node(block->pred(l)); + add_liveout(p, use, first_pass); // PhiNode uses go in the live-out set of prior blocks. - for( uint k=i; k>0; k-- ) - add_liveout( p, _names[b->_nodes[k-1]->in(l)->_idx], first_pass ); + for (uint k = i; k > 0; k--) { + add_liveout(p, _names[block->_nodes[k-1]->in(l)->_idx], first_pass); + } } - freeset( b ); - first_pass.set(b->_pre_order); + freeset(block); + first_pass.set(block->_pre_order); // Inner loop: blocks that picked up new live-out values to be propagated - while( _worklist->size() ) { - // !!!!! -// #ifdef ASSERT - iters++; -// #endif - Block *b = _worklist->pop(); - IndexSet *delta = getset(b); + while (_worklist->size()) { + Block* block = _worklist->pop(); + IndexSet *delta = getset(block); assert( delta->count(), "missing delta set" ); // Add new-live-in to predecessors live-out sets - for( uint l=1; lnum_preds(); l++ ) - add_liveout( _cfg._bbs[b->pred(l)->_idx], delta, first_pass ); + for (uint l = 1; l < block->num_preds(); l++) { + Block* predecessor = _cfg.get_block_for_node(block->pred(l)); + add_liveout(predecessor, delta, first_pass); + } - freeset(b); + freeset(block); } // End of while-worklist-not-empty } // End of for-all-blocks-outer-loop @@ -153,7 +150,7 @@ // We explicitly clear all of the IndexSets which we are about to release. // This allows us to recycle their internal memory into IndexSet's free list. - for( i=0; i<_cfg._num_blocks; i++ ) { + for (i = 0; i < _cfg.number_of_blocks(); i++) { _defs[i].clear(); if (_deltas[i]) { // Is this always true? @@ -169,13 +166,11 @@ } -//------------------------------stats------------------------------------------ #ifndef PRODUCT void PhaseLive::stats(uint iters) const { } #endif -//------------------------------getset----------------------------------------- // Get an IndexSet for a block. Return existing one, if any. Make a new // empty one if a prior one does not exist. IndexSet *PhaseLive::getset( Block *p ) { @@ -186,7 +181,6 @@ return delta; // Return set of new live-out items } -//------------------------------getfreeset------------------------------------- // Pull from free list, or allocate. Internal allocation on the returned set // is always from thread local storage. IndexSet *PhaseLive::getfreeset( ) { @@ -205,7 +199,6 @@ return f; } -//------------------------------freeset---------------------------------------- // Free an IndexSet from a block. void PhaseLive::freeset( const Block *p ) { IndexSet *f = _deltas[p->_pre_order-1]; @@ -214,7 +207,6 @@ _deltas[p->_pre_order-1] = NULL; } -//------------------------------add_liveout------------------------------------ // Add a live-out value to a given blocks live-out set. If it is new, then // also add it to the delta set and stick the block on the worklist. void PhaseLive::add_liveout( Block *p, uint r, VectorSet &first_pass ) { @@ -231,8 +223,6 @@ } } - -//------------------------------add_liveout------------------------------------ // Add a vector of live-out values to a given blocks live-out set. void PhaseLive::add_liveout( Block *p, IndexSet *lo, VectorSet &first_pass ) { IndexSet *live = &_live[p->_pre_order-1]; @@ -260,7 +250,6 @@ } #ifndef PRODUCT -//------------------------------dump------------------------------------------- // Dump the live-out set for a block void PhaseLive::dump( const Block *b ) const { tty->print("Block %d: ",b->_pre_order); @@ -273,18 +262,19 @@ tty->print("\n"); } -//------------------------------verify_base_ptrs------------------------------- // Verify that base pointers and derived pointers are still sane. void PhaseChaitin::verify_base_ptrs( ResourceArea *a ) const { #ifdef ASSERT Unique_Node_List worklist(a); - for( uint i = 0; i < _cfg._num_blocks; i++ ) { - Block *b = _cfg._blocks[i]; - for( uint j = b->end_idx() + 1; j > 1; j-- ) { - Node *n = b->_nodes[j-1]; - if( n->is_Phi() ) break; + for (uint i = 0; i < _cfg.number_of_blocks(); i++) { + Block* block = _cfg.get_block(i); + for (uint j = block->end_idx() + 1; j > 1; j--) { + Node* n = block->_nodes[j-1]; + if (n->is_Phi()) { + break; + } // Found a safepoint? - if( n->is_MachSafePoint() ) { + if (n->is_MachSafePoint()) { MachSafePointNode *sfpt = n->as_MachSafePoint(); JVMState* jvms = sfpt->jvms(); if (jvms != NULL) { @@ -356,7 +346,6 @@ #endif } -//------------------------------verify------------------------------------- // Verify that graphs and base pointers are still sane. void PhaseChaitin::verify( ResourceArea *a, bool verify_ifg ) const { #ifdef ASSERT diff -r bdd155477289 -r e2722a66aba7 src/share/vm/opto/loopTransform.cpp --- a/src/share/vm/opto/loopTransform.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/opto/loopTransform.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -624,8 +624,6 @@ } -#define MAX_UNROLL 16 // maximum number of unrolls for main loop - //------------------------------policy_unroll---------------------------------- // Return TRUE or FALSE if the loop should be unrolled or not. Unroll if // the loop is a CountedLoop and the body is small enough. @@ -642,7 +640,7 @@ if (cl->trip_count() <= (uint)(cl->is_normal_loop() ? 2 : 1)) return false; int future_unroll_ct = cl->unrolled_count() * 2; - if (future_unroll_ct > MAX_UNROLL) return false; + if (future_unroll_ct > LoopMaxUnroll) return false; // Check for initial stride being a small enough constant if (abs(cl->stride_con()) > (1<<2)*future_unroll_ct) return false; diff -r bdd155477289 -r e2722a66aba7 src/share/vm/opto/matcher.cpp --- a/src/share/vm/opto/matcher.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/opto/matcher.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -70,8 +70,8 @@ const uint Matcher::_end_rematerialize = _END_REMATERIALIZE; //---------------------------Matcher------------------------------------------- -Matcher::Matcher( Node_List &proj_list ) : - PhaseTransform( Phase::Ins_Select ), +Matcher::Matcher() +: PhaseTransform( Phase::Ins_Select ), #ifdef ASSERT _old2new_map(C->comp_arena()), _new2old_map(C->comp_arena()), @@ -81,7 +81,7 @@ _swallowed(swallowed), _begin_inst_chain_rule(_BEGIN_INST_CHAIN_RULE), _end_inst_chain_rule(_END_INST_CHAIN_RULE), - _must_clone(must_clone), _proj_list(proj_list), + _must_clone(must_clone), _register_save_policy(register_save_policy), _c_reg_save_policy(c_reg_save_policy), _register_save_type(register_save_type), @@ -1307,8 +1307,9 @@ for (int i = begin_out_arg_area; i < out_arg_limit_per_call; i++) proj->_rout.Insert(OptoReg::Name(i)); } - if( proj->_rout.is_NotEmpty() ) - _proj_list.push(proj); + if (proj->_rout.is_NotEmpty()) { + push_projection(proj); + } } // Transfer the safepoint information from the call to the mcall // Move the JVMState list @@ -1688,14 +1689,15 @@ } // If the _leaf is an AddP, insert the base edge - if( leaf->is_AddP() ) + if (leaf->is_AddP()) { mach->ins_req(AddPNode::Base,leaf->in(AddPNode::Base)); + } - uint num_proj = _proj_list.size(); + uint number_of_projections_prior = number_of_projections(); // Perform any 1-to-many expansions required - MachNode *ex = mach->Expand(s,_proj_list, mem); - if( ex != mach ) { + MachNode *ex = mach->Expand(s, _projection_list, mem); + if (ex != mach) { assert(ex->ideal_reg() == mach->ideal_reg(), "ideal types should match"); if( ex->in(1)->is_Con() ) ex->in(1)->set_req(0, C->root()); @@ -1716,7 +1718,7 @@ // generated belatedly during spill code generation. if (_allocation_started) { guarantee(ex == mach, "no expand rules during spill generation"); - guarantee(_proj_list.size() == num_proj, "no allocation during spill generation"); + guarantee(number_of_projections_prior == number_of_projections(), "no allocation during spill generation"); } if (leaf->is_Con() || leaf->is_DecodeNarrowPtr()) { diff -r bdd155477289 -r e2722a66aba7 src/share/vm/opto/matcher.hpp --- a/src/share/vm/opto/matcher.hpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/opto/matcher.hpp Thu Sep 05 11:04:39 2013 -0700 @@ -88,7 +88,7 @@ Node *transform( Node *dummy ); - Node_List &_proj_list; // For Machine nodes killing many values + Node_List _projection_list; // For Machine nodes killing many values Node_Array _shared_nodes; @@ -183,10 +183,30 @@ void collect_null_checks( Node *proj, Node *orig_proj ); void validate_null_checks( ); - Matcher( Node_List &proj_list ); + Matcher(); + + // Get a projection node at position pos + Node* get_projection(uint pos) { + return _projection_list[pos]; + } + + // Push a projection node onto the projection list + void push_projection(Node* node) { + _projection_list.push(node); + } + + Node* pop_projection() { + return _projection_list.pop(); + } + + // Number of nodes in the projection list + uint number_of_projections() const { + return _projection_list.size(); + } // Select instructions for entire method - void match( ); + void match(); + // Helper for match OptoReg::Name warp_incoming_stk_arg( VMReg reg ); diff -r bdd155477289 -r e2722a66aba7 src/share/vm/opto/node.hpp --- a/src/share/vm/opto/node.hpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/opto/node.hpp Thu Sep 05 11:04:39 2013 -0700 @@ -42,7 +42,6 @@ class AllocateArrayNode; class AllocateNode; class Block; -class Block_Array; class BoolNode; class BoxLockNode; class CMoveNode; diff -r bdd155477289 -r e2722a66aba7 src/share/vm/opto/output.cpp --- a/src/share/vm/opto/output.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/opto/output.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -54,11 +54,10 @@ extern int emit_exception_handler(CodeBuffer &cbuf); extern int emit_deopt_handler(CodeBuffer &cbuf); -//------------------------------Output----------------------------------------- // Convert Nodes to instruction bits and pass off to the VM void Compile::Output() { // RootNode goes - assert( _cfg->_broot->_nodes.size() == 0, "" ); + assert( _cfg->get_root_block()->_nodes.size() == 0, "" ); // The number of new nodes (mostly MachNop) is proportional to // the number of java calls and inner loops which are aligned. @@ -68,17 +67,16 @@ return; } // Make sure I can find the Start Node - Block_Array& bbs = _cfg->_bbs; - Block *entry = _cfg->_blocks[1]; - Block *broot = _cfg->_broot; + Block *entry = _cfg->get_block(1); + Block *broot = _cfg->get_root_block(); const StartNode *start = entry->_nodes[0]->as_Start(); // Replace StartNode with prolog MachPrologNode *prolog = new (this) MachPrologNode(); entry->_nodes.map( 0, prolog ); - bbs.map( prolog->_idx, entry ); - bbs.map( start->_idx, NULL ); // start is no longer in any block + _cfg->map_node_to_block(prolog, entry); + _cfg->unmap_node_from_block(start); // start is no longer in any block // Virtual methods need an unverified entry point @@ -110,41 +108,44 @@ } // Insert epilogs before every return - for( uint i=0; i<_cfg->_num_blocks; i++ ) { - Block *b = _cfg->_blocks[i]; - if( !b->is_connector() && b->non_connector_successor(0) == _cfg->_broot ) { // Found a program exit point? - Node *m = b->end(); - if( m->is_Mach() && m->as_Mach()->ideal_Opcode() != Op_Halt ) { - MachEpilogNode *epilog = new (this) MachEpilogNode(m->as_Mach()->ideal_Opcode() == Op_Return); - b->add_inst( epilog ); - bbs.map(epilog->_idx, b); - //_regalloc->set_bad(epilog->_idx); // Already initialized this way. + for (uint i = 0; i < _cfg->number_of_blocks(); i++) { + Block* block = _cfg->get_block(i); + if (!block->is_connector() && block->non_connector_successor(0) == _cfg->get_root_block()) { // Found a program exit point? + Node* m = block->end(); + if (m->is_Mach() && m->as_Mach()->ideal_Opcode() != Op_Halt) { + MachEpilogNode* epilog = new (this) MachEpilogNode(m->as_Mach()->ideal_Opcode() == Op_Return); + block->add_inst(epilog); + _cfg->map_node_to_block(epilog, block); } } } # ifdef ENABLE_ZAP_DEAD_LOCALS - if ( ZapDeadCompiledLocals ) Insert_zap_nodes(); + if (ZapDeadCompiledLocals) { + Insert_zap_nodes(); + } # endif - uint* blk_starts = NEW_RESOURCE_ARRAY(uint,_cfg->_num_blocks+1); - blk_starts[0] = 0; + uint* blk_starts = NEW_RESOURCE_ARRAY(uint, _cfg->number_of_blocks() + 1); + blk_starts[0] = 0; // Initialize code buffer and process short branches. CodeBuffer* cb = init_buffer(blk_starts); - if (cb == NULL || failing()) return; + if (cb == NULL || failing()) { + return; + } ScheduleAndBundle(); #ifndef PRODUCT if (trace_opto_output()) { tty->print("\n---- After ScheduleAndBundle ----\n"); - for (uint i = 0; i < _cfg->_num_blocks; i++) { + for (uint i = 0; i < _cfg->number_of_blocks(); i++) { tty->print("\nBB#%03d:\n", i); - Block *bb = _cfg->_blocks[i]; - for (uint j = 0; j < bb->_nodes.size(); j++) { - Node *n = bb->_nodes[j]; + Block* block = _cfg->get_block(i); + for (uint j = 0; j < block->_nodes.size(); j++) { + Node* n = block->_nodes[j]; OptoReg::Name reg = _regalloc->get_reg_first(n); tty->print(" %-6s ", reg >= 0 && reg < REG_COUNT ? Matcher::regName[reg] : ""); n->dump(); @@ -153,11 +154,15 @@ } #endif - if (failing()) return; + if (failing()) { + return; + } BuildOopMaps(); - if (failing()) return; + if (failing()) { + return; + } fill_buffer(cb, blk_starts); } @@ -219,8 +224,8 @@ return; // no safepoints/oopmaps emitted for calls in stubs,so we don't care // Insert call to zap runtime stub before every node with an oop map - for( uint i=0; i<_cfg->_num_blocks; i++ ) { - Block *b = _cfg->_blocks[i]; + for( uint i=0; i<_cfg->number_of_blocks(); i++ ) { + Block *b = _cfg->get_block(i); for ( uint j = 0; j < b->_nodes.size(); ++j ) { Node *n = b->_nodes[j]; @@ -252,7 +257,7 @@ if (insert) { Node *zap = call_zap_node(n->as_MachSafePoint(), i); b->_nodes.insert( j, zap ); - _cfg->_bbs.map( zap->_idx, b ); + _cfg->map_node_to_block(zap, b); ++j; } } @@ -277,7 +282,6 @@ return _matcher->match_sfpt(ideal_node); } -//------------------------------is_node_getting_a_safepoint-------------------- bool Compile::is_node_getting_a_safepoint( Node* n) { // This code duplicates the logic prior to the call of add_safepoint // below in this file. @@ -287,7 +291,6 @@ # endif // ENABLE_ZAP_DEAD_LOCALS -//------------------------------compute_loop_first_inst_sizes------------------ // Compute the size of first NumberOfLoopInstrToAlign instructions at the top // of a loop. When aligning a loop we need to provide enough instructions // in cpu's fetch buffer to feed decoders. The loop alignment could be @@ -304,42 +307,39 @@ // or alignment padding is larger then MaxLoopPad. By default, MaxLoopPad // is equal to OptoLoopAlignment-1 except on new Intel cpus, where it is // equal to 11 bytes which is the largest address NOP instruction. - if( MaxLoopPad < OptoLoopAlignment-1 ) { - uint last_block = _cfg->_num_blocks-1; - for( uint i=1; i <= last_block; i++ ) { - Block *b = _cfg->_blocks[i]; + if (MaxLoopPad < OptoLoopAlignment - 1) { + uint last_block = _cfg->number_of_blocks() - 1; + for (uint i = 1; i <= last_block; i++) { + Block* block = _cfg->get_block(i); // Check the first loop's block which requires an alignment. - if( b->loop_alignment() > (uint)relocInfo::addr_unit() ) { + if (block->loop_alignment() > (uint)relocInfo::addr_unit()) { uint sum_size = 0; uint inst_cnt = NumberOfLoopInstrToAlign; - inst_cnt = b->compute_first_inst_size(sum_size, inst_cnt, _regalloc); + inst_cnt = block->compute_first_inst_size(sum_size, inst_cnt, _regalloc); // Check subsequent fallthrough blocks if the loop's first // block(s) does not have enough instructions. - Block *nb = b; - while( inst_cnt > 0 && - i < last_block && - !_cfg->_blocks[i+1]->has_loop_alignment() && - !nb->has_successor(b) ) { + Block *nb = block; + while(inst_cnt > 0 && + i < last_block && + !_cfg->get_block(i + 1)->has_loop_alignment() && + !nb->has_successor(block)) { i++; - nb = _cfg->_blocks[i]; + nb = _cfg->get_block(i); inst_cnt = nb->compute_first_inst_size(sum_size, inst_cnt, _regalloc); } // while( inst_cnt > 0 && i < last_block ) - b->set_first_inst_size(sum_size); + block->set_first_inst_size(sum_size); } // f( b->head()->is_Loop() ) } // for( i <= last_block ) } // if( MaxLoopPad < OptoLoopAlignment-1 ) } -//----------------------shorten_branches--------------------------------------- // The architecture description provides short branch variants for some long // branch instructions. Replace eligible long branches with short branches. void Compile::shorten_branches(uint* blk_starts, int& code_size, int& reloc_size, int& stub_size) { - - // ------------------ // Compute size of each block, method size, and relocation information size - uint nblocks = _cfg->_num_blocks; + uint nblocks = _cfg->number_of_blocks(); uint* jmp_offset = NEW_RESOURCE_ARRAY(uint,nblocks); uint* jmp_size = NEW_RESOURCE_ARRAY(uint,nblocks); @@ -366,7 +366,7 @@ uint last_avoid_back_to_back_adr = max_uint; uint nop_size = (new (this) MachNopNode())->size(_regalloc); for (uint i = 0; i < nblocks; i++) { // For all blocks - Block *b = _cfg->_blocks[i]; + Block* block = _cfg->get_block(i); // During short branch replacement, we store the relative (to blk_starts) // offset of jump in jmp_offset, rather than the absolute offset of jump. @@ -379,10 +379,10 @@ DEBUG_ONLY( jmp_rule[i] = 0; ) // Sum all instruction sizes to compute block size - uint last_inst = b->_nodes.size(); + uint last_inst = block->_nodes.size(); uint blk_size = 0; for (uint j = 0; j < last_inst; j++) { - Node* nj = b->_nodes[j]; + Node* nj = block->_nodes[j]; // Handle machine instruction nodes if (nj->is_Mach()) { MachNode *mach = nj->as_Mach(); @@ -443,8 +443,8 @@ // When the next block starts a loop, we may insert pad NOP // instructions. Since we cannot know our future alignment, // assume the worst. - if (i< nblocks-1) { - Block *nb = _cfg->_blocks[i+1]; + if (i < nblocks - 1) { + Block* nb = _cfg->get_block(i + 1); int max_loop_pad = nb->code_alignment()-relocInfo::addr_unit(); if (max_loop_pad > 0) { assert(is_power_of_2(max_loop_pad+relocInfo::addr_unit()), ""); @@ -475,26 +475,26 @@ has_short_branch_candidate = false; int adjust_block_start = 0; for (uint i = 0; i < nblocks; i++) { - Block *b = _cfg->_blocks[i]; + Block* block = _cfg->get_block(i); int idx = jmp_nidx[i]; - MachNode* mach = (idx == -1) ? NULL: b->_nodes[idx]->as_Mach(); + MachNode* mach = (idx == -1) ? NULL: block->_nodes[idx]->as_Mach(); if (mach != NULL && mach->may_be_short_branch()) { #ifdef ASSERT assert(jmp_size[i] > 0 && mach->is_MachBranch(), "sanity"); int j; // Find the branch; ignore trailing NOPs. - for (j = b->_nodes.size()-1; j>=0; j--) { - Node* n = b->_nodes[j]; + for (j = block->_nodes.size()-1; j>=0; j--) { + Node* n = block->_nodes[j]; if (!n->is_Mach() || n->as_Mach()->ideal_Opcode() != Op_Con) break; } - assert(j >= 0 && j == idx && b->_nodes[j] == (Node*)mach, "sanity"); + assert(j >= 0 && j == idx && block->_nodes[j] == (Node*)mach, "sanity"); #endif int br_size = jmp_size[i]; int br_offs = blk_starts[i] + jmp_offset[i]; // This requires the TRUE branch target be in succs[0] - uint bnum = b->non_connector_successor(0)->_pre_order; + uint bnum = block->non_connector_successor(0)->_pre_order; int offset = blk_starts[bnum] - br_offs; if (bnum > i) { // adjust following block's offset offset -= adjust_block_start; @@ -522,7 +522,7 @@ diff -= nop_size; } adjust_block_start += diff; - b->_nodes.map(idx, replacement); + block->_nodes.map(idx, replacement); mach->subsume_by(replacement, C); mach = replacement; progress = true; @@ -1085,8 +1085,8 @@ if (has_mach_constant_base_node()) { // Fill the constant table. // Note: This must happen before shorten_branches. - for (uint i = 0; i < _cfg->_num_blocks; i++) { - Block* b = _cfg->_blocks[i]; + for (uint i = 0; i < _cfg->number_of_blocks(); i++) { + Block* b = _cfg->get_block(i); for (uint j = 0; j < b->_nodes.size(); j++) { Node* n = b->_nodes[j]; @@ -1172,7 +1172,7 @@ // !!!!! This preserves old handling of oopmaps for now debug_info()->set_oopmaps(_oop_map_set); - uint nblocks = _cfg->_num_blocks; + uint nblocks = _cfg->number_of_blocks(); // Count and start of implicit null check instructions uint inct_cnt = 0; uint *inct_starts = NEW_RESOURCE_ARRAY(uint, nblocks+1); @@ -1220,21 +1220,21 @@ // Now fill in the code buffer Node *delay_slot = NULL; - for (uint i=0; i < nblocks; i++) { - Block *b = _cfg->_blocks[i]; - - Node *head = b->head(); + for (uint i = 0; i < nblocks; i++) { + Block* block = _cfg->get_block(i); + Node* head = block->head(); // If this block needs to start aligned (i.e, can be reached other // than by falling-thru from the previous block), then force the // start of a new bundle. - if (Pipeline::requires_bundling() && starts_bundle(head)) + if (Pipeline::requires_bundling() && starts_bundle(head)) { cb->flush_bundle(true); + } #ifdef ASSERT - if (!b->is_connector()) { + if (!block->is_connector()) { stringStream st; - b->dump_head(&_cfg->_bbs, &st); + block->dump_head(_cfg, &st); MacroAssembler(cb).block_comment(st.as_string()); } jmp_target[i] = 0; @@ -1245,16 +1245,16 @@ int blk_offset = current_offset; // Define the label at the beginning of the basic block - MacroAssembler(cb).bind(blk_labels[b->_pre_order]); - - uint last_inst = b->_nodes.size(); + MacroAssembler(cb).bind(blk_labels[block->_pre_order]); + + uint last_inst = block->_nodes.size(); // Emit block normally, except for last instruction. // Emit means "dump code bits into code buffer". for (uint j = 0; j_nodes[j]; + Node* n = block->_nodes[j]; // See if delay slots are supported if (valid_bundle_info(n) && @@ -1308,9 +1308,9 @@ assert((padding % nop_size) == 0, "padding is not a multiple of NOP size"); int nops_cnt = padding / nop_size; MachNode *nop = new (this) MachNopNode(nops_cnt); - b->_nodes.insert(j++, nop); + block->_nodes.insert(j++, nop); last_inst++; - _cfg->_bbs.map( nop->_idx, b ); + _cfg->map_node_to_block(nop, block); nop->emit(*cb, _regalloc); cb->flush_bundle(true); current_offset = cb->insts_size(); @@ -1324,7 +1324,7 @@ mcall->method_set((intptr_t)mcall->entry_point()); // Save the return address - call_returns[b->_pre_order] = current_offset + mcall->ret_addr_offset(); + call_returns[block->_pre_order] = current_offset + mcall->ret_addr_offset(); if (mcall->is_MachCallLeaf()) { is_mcall = false; @@ -1361,7 +1361,7 @@ // If this is a branch, then fill in the label with the target BB's label else if (mach->is_MachBranch()) { // This requires the TRUE branch target be in succs[0] - uint block_num = b->non_connector_successor(0)->_pre_order; + uint block_num = block->non_connector_successor(0)->_pre_order; // Try to replace long branch if delay slot is not used, // it is mostly for back branches since forward branch's @@ -1394,8 +1394,8 @@ // Insert padding between avoid_back_to_back branches. if (needs_padding && replacement->avoid_back_to_back()) { MachNode *nop = new (this) MachNopNode(); - b->_nodes.insert(j++, nop); - _cfg->_bbs.map(nop->_idx, b); + block->_nodes.insert(j++, nop); + _cfg->map_node_to_block(nop, block); last_inst++; nop->emit(*cb, _regalloc); cb->flush_bundle(true); @@ -1407,7 +1407,7 @@ jmp_size[i] = new_size; jmp_rule[i] = mach->rule(); #endif - b->_nodes.map(j, replacement); + block->_nodes.map(j, replacement); mach->subsume_by(replacement, C); n = replacement; mach = replacement; @@ -1415,8 +1415,8 @@ } mach->as_MachBranch()->label_set( &blk_labels[block_num], block_num ); } else if (mach->ideal_Opcode() == Op_Jump) { - for (uint h = 0; h < b->_num_succs; h++) { - Block* succs_block = b->_succs[h]; + for (uint h = 0; h < block->_num_succs; h++) { + Block* succs_block = block->_succs[h]; for (uint j = 1; j < succs_block->num_preds(); j++) { Node* jpn = succs_block->pred(j); if (jpn->is_JumpProj() && jpn->in(0) == mach) { @@ -1427,7 +1427,6 @@ } } } - #ifdef ASSERT // Check that oop-store precedes the card-mark else if (mach->ideal_Opcode() == Op_StoreCM) { @@ -1438,17 +1437,18 @@ if (oop_store == NULL) continue; count++; uint i4; - for( i4 = 0; i4 < last_inst; ++i4 ) { - if( b->_nodes[i4] == oop_store ) break; + for (i4 = 0; i4 < last_inst; ++i4) { + if (block->_nodes[i4] == oop_store) { + break; + } } // Note: This test can provide a false failure if other precedence // edges have been added to the storeCMNode. - assert( i4 == last_inst || i4 < storeCM_idx, "CM card-mark executes before oop-store"); + assert(i4 == last_inst || i4 < storeCM_idx, "CM card-mark executes before oop-store"); } assert(count > 0, "storeCM expects at least one precedence edge"); } #endif - else if (!n->is_Proj()) { // Remember the beginning of the previous instruction, in case // it's followed by a flag-kill and a null-check. Happens on @@ -1544,12 +1544,12 @@ // If the next block is the top of a loop, pad this block out to align // the loop top a little. Helps prevent pipe stalls at loop back branches. if (i < nblocks-1) { - Block *nb = _cfg->_blocks[i+1]; + Block *nb = _cfg->get_block(i + 1); int padding = nb->alignment_padding(current_offset); if( padding > 0 ) { MachNode *nop = new (this) MachNopNode(padding / nop_size); - b->_nodes.insert( b->_nodes.size(), nop ); - _cfg->_bbs.map( nop->_idx, b ); + block->_nodes.insert(block->_nodes.size(), nop); + _cfg->map_node_to_block(nop, block); nop->emit(*cb, _regalloc); current_offset = cb->insts_size(); } @@ -1589,8 +1589,6 @@ } #endif - // ------------------ - #ifndef PRODUCT // Information on the size of the method, without the extraneous code Scheduling::increment_method_size(cb->insts_size()); @@ -1651,52 +1649,55 @@ _inc_table.set_size(cnt); uint inct_cnt = 0; - for( uint i=0; i<_cfg->_num_blocks; i++ ) { - Block *b = _cfg->_blocks[i]; + for (uint i = 0; i < _cfg->number_of_blocks(); i++) { + Block* block = _cfg->get_block(i); Node *n = NULL; int j; // Find the branch; ignore trailing NOPs. - for( j = b->_nodes.size()-1; j>=0; j-- ) { - n = b->_nodes[j]; - if( !n->is_Mach() || n->as_Mach()->ideal_Opcode() != Op_Con ) + for (j = block->_nodes.size() - 1; j >= 0; j--) { + n = block->_nodes[j]; + if (!n->is_Mach() || n->as_Mach()->ideal_Opcode() != Op_Con) { break; + } } // If we didn't find anything, continue - if( j < 0 ) continue; + if (j < 0) { + continue; + } // Compute ExceptionHandlerTable subtable entry and add it // (skip empty blocks) - if( n->is_Catch() ) { + if (n->is_Catch()) { // Get the offset of the return from the call - uint call_return = call_returns[b->_pre_order]; + uint call_return = call_returns[block->_pre_order]; #ifdef ASSERT assert( call_return > 0, "no call seen for this basic block" ); - while( b->_nodes[--j]->is_MachProj() ) ; - assert( b->_nodes[j]->is_MachCall(), "CatchProj must follow call" ); + while (block->_nodes[--j]->is_MachProj()) ; + assert(block->_nodes[j]->is_MachCall(), "CatchProj must follow call"); #endif // last instruction is a CatchNode, find it's CatchProjNodes - int nof_succs = b->_num_succs; + int nof_succs = block->_num_succs; // allocate space GrowableArray handler_bcis(nof_succs); GrowableArray handler_pcos(nof_succs); // iterate through all successors for (int j = 0; j < nof_succs; j++) { - Block* s = b->_succs[j]; + Block* s = block->_succs[j]; bool found_p = false; - for( uint k = 1; k < s->num_preds(); k++ ) { - Node *pk = s->pred(k); - if( pk->is_CatchProj() && pk->in(0) == n ) { + for (uint k = 1; k < s->num_preds(); k++) { + Node* pk = s->pred(k); + if (pk->is_CatchProj() && pk->in(0) == n) { const CatchProjNode* p = pk->as_CatchProj(); found_p = true; // add the corresponding handler bci & pco information - if( p->_con != CatchProjNode::fall_through_index ) { + if (p->_con != CatchProjNode::fall_through_index) { // p leads to an exception handler (and is not fall through) - assert(s == _cfg->_blocks[s->_pre_order],"bad numbering"); + assert(s == _cfg->get_block(s->_pre_order), "bad numbering"); // no duplicates, please - if( !handler_bcis.contains(p->handler_bci()) ) { + if (!handler_bcis.contains(p->handler_bci())) { uint block_num = s->non_connector()->_pre_order; handler_bcis.append(p->handler_bci()); handler_pcos.append(blk_labels[block_num].loc_pos()); @@ -1715,9 +1716,9 @@ } // Handle implicit null exception table updates - if( n->is_MachNullCheck() ) { - uint block_num = b->non_connector_successor(0)->_pre_order; - _inc_table.append( inct_starts[inct_cnt++], blk_labels[block_num].loc_pos() ); + if (n->is_MachNullCheck()) { + uint block_num = block->non_connector_successor(0)->_pre_order; + _inc_table.append(inct_starts[inct_cnt++], blk_labels[block_num].loc_pos()); continue; } } // End of for all blocks fill in exception table entries @@ -1737,7 +1738,6 @@ Scheduling::Scheduling(Arena *arena, Compile &compile) : _arena(arena), _cfg(compile.cfg()), - _bbs(compile.cfg()->_bbs), _regalloc(compile.regalloc()), _reg_node(arena), _bundle_instr_count(0), @@ -1777,14 +1777,12 @@ memset(_current_latency, 0, node_max * sizeof(unsigned short)); // Clear the bundling information - memcpy(_bundle_use_elements, - Pipeline_Use::elaborated_elements, - sizeof(Pipeline_Use::elaborated_elements)); + memcpy(_bundle_use_elements, Pipeline_Use::elaborated_elements, sizeof(Pipeline_Use::elaborated_elements)); // Get the last node - Block *bb = _cfg->_blocks[_cfg->_blocks.size()-1]; - - _next_node = bb->_nodes[bb->_nodes.size()-1]; + Block* block = _cfg->get_block(_cfg->number_of_blocks() - 1); + + _next_node = block->_nodes[block->_nodes.size() - 1]; } #ifndef PRODUCT @@ -1834,7 +1832,6 @@ sizeof(Pipeline_Use::elaborated_elements)); } -//------------------------------ScheduleAndBundle------------------------------ // Perform instruction scheduling and bundling over the sequence of // instructions in backwards order. void Compile::ScheduleAndBundle() { @@ -1861,7 +1858,6 @@ scheduling.DoScheduling(); } -//------------------------------ComputeLocalLatenciesForward------------------- // Compute the latency of all the instructions. This is fairly simple, // because we already have a legal ordering. Walk over the instructions // from first to last, and compute the latency of the instruction based @@ -2031,7 +2027,6 @@ return _available[0]; } -//------------------------------AddNodeToAvailableList------------------------- void Scheduling::AddNodeToAvailableList(Node *n) { assert( !n->is_Proj(), "projections never directly made available" ); #ifndef PRODUCT @@ -2077,7 +2072,6 @@ #endif } -//------------------------------DecrementUseCounts----------------------------- void Scheduling::DecrementUseCounts(Node *n, const Block *bb) { for ( uint i=0; i < n->len(); i++ ) { Node *def = n->in(i); @@ -2085,8 +2079,9 @@ if( def->is_Proj() ) // If this is a machine projection, then def = def->in(0); // propagate usage thru to the base instruction - if( _bbs[def->_idx] != bb ) // Ignore if not block-local + if(_cfg->get_block_for_node(def) != bb) { // Ignore if not block-local continue; + } // Compute the latency uint l = _bundle_cycle_number + n->latency(i); @@ -2099,7 +2094,6 @@ } } -//------------------------------AddNodeToBundle-------------------------------- void Scheduling::AddNodeToBundle(Node *n, const Block *bb) { #ifndef PRODUCT if (_cfg->C->trace_opto_output()) { @@ -2314,7 +2308,6 @@ DecrementUseCounts(n,bb); } -//------------------------------ComputeUseCount-------------------------------- // This method sets the use count within a basic block. We will ignore all // uses outside the current basic block. As we are doing a backwards walk, // any node we reach that has a use count of 0 may be scheduled. This also @@ -2358,9 +2351,10 @@ Node *inp = n->in(k); if (!inp) continue; assert(inp != n, "no cycles allowed" ); - if( _bbs[inp->_idx] == bb ) { // Block-local use? - if( inp->is_Proj() ) // Skip through Proj's + if (_cfg->get_block_for_node(inp) == bb) { // Block-local use? + if (inp->is_Proj()) { // Skip through Proj's inp = inp->in(0); + } ++_uses[inp->_idx]; // Count 1 block-local use } } @@ -2398,20 +2392,22 @@ Block *bb; // Walk over all the basic blocks in reverse order - for( int i=_cfg->_num_blocks-1; i >= 0; succ_bb = bb, i-- ) { - bb = _cfg->_blocks[i]; + for (int i = _cfg->number_of_blocks() - 1; i >= 0; succ_bb = bb, i--) { + bb = _cfg->get_block(i); #ifndef PRODUCT if (_cfg->C->trace_opto_output()) { tty->print("# Schedule BB#%03d (initial)\n", i); - for (uint j = 0; j < bb->_nodes.size(); j++) + for (uint j = 0; j < bb->_nodes.size(); j++) { bb->_nodes[j]->dump(); + } } #endif // On the head node, skip processing - if( bb == _cfg->_broot ) + if (bb == _cfg->get_root_block()) { continue; + } // Skip empty, connector blocks if (bb->is_connector()) @@ -2548,7 +2544,6 @@ } // end DoScheduling -//------------------------------verify_good_schedule--------------------------- // Verify that no live-range used in the block is killed in the block by a // wrong DEF. This doesn't verify live-ranges that span blocks. @@ -2561,7 +2556,6 @@ } #ifdef ASSERT -//------------------------------verify_do_def---------------------------------- void Scheduling::verify_do_def( Node *n, OptoReg::Name def, const char *msg ) { // Check for bad kills if( OptoReg::is_valid(def) ) { // Ignore stores & control flow @@ -2577,7 +2571,6 @@ } } -//------------------------------verify_good_schedule--------------------------- void Scheduling::verify_good_schedule( Block *b, const char *msg ) { // Zap to something reasonable for the verify code @@ -2637,13 +2630,12 @@ from->add_prec(to); } -//------------------------------anti_do_def------------------------------------ void Scheduling::anti_do_def( Block *b, Node *def, OptoReg::Name def_reg, int is_def ) { if( !OptoReg::is_valid(def_reg) ) // Ignore stores & control flow return; Node *pinch = _reg_node[def_reg]; // Get pinch point - if( !pinch || _bbs[pinch->_idx] != b || // No pinch-point yet? + if ((pinch == NULL) || _cfg->get_block_for_node(pinch) != b || // No pinch-point yet? is_def ) { // Check for a true def (not a kill) _reg_node.map(def_reg,def); // Record def/kill as the optimistic pinch-point return; @@ -2669,7 +2661,7 @@ _cfg->C->record_method_not_compilable("too many D-U pinch points"); return; } - _bbs.map(pinch->_idx,b); // Pretend it's valid in this block (lazy init) + _cfg->map_node_to_block(pinch, b); // Pretend it's valid in this block (lazy init) _reg_node.map(def_reg,pinch); // Record pinch-point //_regalloc->set_bad(pinch->_idx); // Already initialized this way. if( later_def->outcnt() == 0 || later_def->ideal_reg() == MachProjNode::fat_proj ) { // Distinguish def from kill @@ -2707,15 +2699,14 @@ add_prec_edge_from_to(kill,pinch); } -//------------------------------anti_do_use------------------------------------ void Scheduling::anti_do_use( Block *b, Node *use, OptoReg::Name use_reg ) { if( !OptoReg::is_valid(use_reg) ) // Ignore stores & control flow return; Node *pinch = _reg_node[use_reg]; // Get pinch point // Check for no later def_reg/kill in block - if( pinch && _bbs[pinch->_idx] == b && + if ((pinch != NULL) && _cfg->get_block_for_node(pinch) == b && // Use has to be block-local as well - _bbs[use->_idx] == b ) { + _cfg->get_block_for_node(use) == b) { if( pinch->Opcode() == Op_Node && // Real pinch-point (not optimistic?) pinch->req() == 1 ) { // pinch not yet in block? pinch->del_req(0); // yank pointer to later-def, also set flag @@ -2728,7 +2719,6 @@ } } -//------------------------------ComputeRegisterAntidependences----------------- // We insert antidependences between the reads and following write of // allocated registers to prevent illegal code motion. Hopefully, the // number of added references should be fairly small, especially as we @@ -2862,8 +2852,6 @@ } } -//------------------------------garbage_collect_pinch_nodes------------------------------- - // Garbage collect pinch nodes for reuse by other blocks. // // The block scheduler's insertion of anti-dependence @@ -2895,7 +2883,7 @@ int trace_cnt = 0; for (uint k = 0; k < _reg_node.Size(); k++) { Node* pinch = _reg_node[k]; - if (pinch != NULL && pinch->Opcode() == Op_Node && + if ((pinch != NULL) && pinch->Opcode() == Op_Node && // no predecence input edges (pinch->req() == pinch->len() || pinch->in(pinch->req()) == NULL) ) { cleanup_pinch(pinch); @@ -2938,7 +2926,6 @@ pinch->set_req(0, NULL); } -//------------------------------print_statistics------------------------------- #ifndef PRODUCT void Scheduling::dump_available() const { diff -r bdd155477289 -r e2722a66aba7 src/share/vm/opto/output.hpp --- a/src/share/vm/opto/output.hpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/opto/output.hpp Thu Sep 05 11:04:39 2013 -0700 @@ -99,9 +99,6 @@ // List of nodes currently available for choosing for scheduling Node_List _available; - // Mapping from node (index) to basic block - Block_Array& _bbs; - // For each instruction beginning a bundle, the number of following // nodes to be bundled with it. Bundle *_node_bundling_base; diff -r bdd155477289 -r e2722a66aba7 src/share/vm/opto/phaseX.cpp --- a/src/share/vm/opto/phaseX.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/opto/phaseX.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -1643,8 +1643,8 @@ bool method_name_not_printed = true; // Examine each basic block - for( uint block_number = 1; block_number < _cfg._num_blocks; ++block_number ) { - Block *block = _cfg._blocks[block_number]; + for (uint block_number = 1; block_number < _cfg.number_of_blocks(); ++block_number) { + Block* block = _cfg.get_block(block_number); bool block_not_printed = true; // and each instruction within a block diff -r bdd155477289 -r e2722a66aba7 src/share/vm/opto/postaloc.cpp --- a/src/share/vm/opto/postaloc.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/opto/postaloc.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -78,11 +78,13 @@ // Helper function for yank_if_dead int PhaseChaitin::yank( Node *old, Block *current_block, Node_List *value, Node_List *regnd ) { int blk_adjust=0; - Block *oldb = _cfg._bbs[old->_idx]; + Block *oldb = _cfg.get_block_for_node(old); oldb->find_remove(old); // Count 1 if deleting an instruction from the current block - if( oldb == current_block ) blk_adjust++; - _cfg._bbs.map(old->_idx,NULL); + if (oldb == current_block) { + blk_adjust++; + } + _cfg.unmap_node_from_block(old); OptoReg::Name old_reg = lrgs(_lrg_map.live_range_id(old)).reg(); if( regnd && (*regnd)[old_reg]==old ) { // Instruction is currently available? value->map(old_reg,NULL); // Yank from value/regnd maps @@ -403,28 +405,29 @@ // Need a mapping from basic block Node_Lists. We need a Node_List to // map from register number to value-producing Node. - Node_List **blk2value = NEW_RESOURCE_ARRAY( Node_List *, _cfg._num_blocks+1); - memset( blk2value, 0, sizeof(Node_List*)*(_cfg._num_blocks+1) ); + Node_List **blk2value = NEW_RESOURCE_ARRAY( Node_List *, _cfg.number_of_blocks() + 1); + memset(blk2value, 0, sizeof(Node_List*) * (_cfg.number_of_blocks() + 1)); // Need a mapping from basic block Node_Lists. We need a Node_List to // map from register number to register-defining Node. - Node_List **blk2regnd = NEW_RESOURCE_ARRAY( Node_List *, _cfg._num_blocks+1); - memset( blk2regnd, 0, sizeof(Node_List*)*(_cfg._num_blocks+1) ); + Node_List **blk2regnd = NEW_RESOURCE_ARRAY( Node_List *, _cfg.number_of_blocks() + 1); + memset(blk2regnd, 0, sizeof(Node_List*) * (_cfg.number_of_blocks() + 1)); // We keep unused Node_Lists on a free_list to avoid wasting // memory. GrowableArray free_list = GrowableArray(16); // For all blocks - for( uint i = 0; i < _cfg._num_blocks; i++ ) { + for (uint i = 0; i < _cfg.number_of_blocks(); i++) { uint j; - Block *b = _cfg._blocks[i]; + Block* block = _cfg.get_block(i); // Count of Phis in block uint phi_dex; - for( phi_dex = 1; phi_dex < b->_nodes.size(); phi_dex++ ) { - Node *phi = b->_nodes[phi_dex]; - if( !phi->is_Phi() ) + for (phi_dex = 1; phi_dex < block->_nodes.size(); phi_dex++) { + Node* phi = block->_nodes[phi_dex]; + if (!phi->is_Phi()) { break; + } } // If any predecessor has not been visited, we do not know the state @@ -432,21 +435,23 @@ // along Phi input edges bool missing_some_inputs = false; Block *freed = NULL; - for( j = 1; j < b->num_preds(); j++ ) { - Block *pb = _cfg._bbs[b->pred(j)->_idx]; + for (j = 1; j < block->num_preds(); j++) { + Block* pb = _cfg.get_block_for_node(block->pred(j)); // Remove copies along phi edges - for( uint k=1; k_nodes[k], j, b, *blk2value[pb->_pre_order], *blk2regnd[pb->_pre_order], false ); - if( blk2value[pb->_pre_order] ) { // Have a mapping on this edge? + for (uint k = 1; k < phi_dex; k++) { + elide_copy(block->_nodes[k], j, block, *blk2value[pb->_pre_order], *blk2regnd[pb->_pre_order], false); + } + if (blk2value[pb->_pre_order]) { // Have a mapping on this edge? // See if this predecessor's mappings have been used by everybody // who wants them. If so, free 'em. uint k; - for( k=0; k_num_succs; k++ ) { - Block *pbsucc = pb->_succs[k]; - if( !blk2value[pbsucc->_pre_order] && pbsucc != b ) + for (k = 0; k < pb->_num_succs; k++) { + Block* pbsucc = pb->_succs[k]; + if (!blk2value[pbsucc->_pre_order] && pbsucc != block) { break; // Found a future user + } } - if( k >= pb->_num_succs ) { // No more uses, free! + if (k >= pb->_num_succs) { // No more uses, free! freed = pb; // Record last block freed free_list.push(blk2value[pb->_pre_order]); free_list.push(blk2regnd[pb->_pre_order]); @@ -465,20 +470,20 @@ value.map(_max_reg,NULL); regnd.map(_max_reg,NULL); // Set mappings as OUR mappings - blk2value[b->_pre_order] = &value; - blk2regnd[b->_pre_order] = ®nd; + blk2value[block->_pre_order] = &value; + blk2regnd[block->_pre_order] = ®nd; // Initialize value & regnd for this block - if( missing_some_inputs ) { + if (missing_some_inputs) { // Some predecessor has not yet been visited; zap map to empty - for( uint k = 0; k < (uint)_max_reg; k++ ) { + for (uint k = 0; k < (uint)_max_reg; k++) { value.map(k,NULL); regnd.map(k,NULL); } } else { if( !freed ) { // Didn't get a freebie prior block // Must clone some data - freed = _cfg._bbs[b->pred(1)->_idx]; + freed = _cfg.get_block_for_node(block->pred(1)); Node_List &f_value = *blk2value[freed->_pre_order]; Node_List &f_regnd = *blk2regnd[freed->_pre_order]; for( uint k = 0; k < (uint)_max_reg; k++ ) { @@ -487,9 +492,11 @@ } } // Merge all inputs together, setting to NULL any conflicts. - for( j = 1; j < b->num_preds(); j++ ) { - Block *pb = _cfg._bbs[b->pred(j)->_idx]; - if( pb == freed ) continue; // Did self already via freelist + for (j = 1; j < block->num_preds(); j++) { + Block* pb = _cfg.get_block_for_node(block->pred(j)); + if (pb == freed) { + continue; // Did self already via freelist + } Node_List &p_regnd = *blk2regnd[pb->_pre_order]; for( uint k = 0; k < (uint)_max_reg; k++ ) { if( regnd[k] != p_regnd[k] ) { // Conflict on reaching defs? @@ -501,9 +508,9 @@ } // For all Phi's - for( j = 1; j < phi_dex; j++ ) { + for (j = 1; j < phi_dex; j++) { uint k; - Node *phi = b->_nodes[j]; + Node *phi = block->_nodes[j]; uint pidx = _lrg_map.live_range_id(phi); OptoReg::Name preg = lrgs(_lrg_map.live_range_id(phi)).reg(); @@ -514,9 +521,10 @@ if( phi != x && u != x ) // Found a different input u = u ? NodeSentinel : x; // Capture unique input, or NodeSentinel for 2nd input } - if( u != NodeSentinel ) { // Junk Phi. Remove - b->_nodes.remove(j--); phi_dex--; - _cfg._bbs.map(phi->_idx,NULL); + if (u != NodeSentinel) { // Junk Phi. Remove + block->_nodes.remove(j--); + phi_dex--; + _cfg.unmap_node_from_block(phi); phi->replace_by(u); phi->disconnect_inputs(NULL, C); continue; @@ -544,13 +552,13 @@ } // For all remaining instructions - for( j = phi_dex; j < b->_nodes.size(); j++ ) { - Node *n = b->_nodes[j]; + for (j = phi_dex; j < block->_nodes.size(); j++) { + Node* n = block->_nodes[j]; - if( n->outcnt() == 0 && // Dead? - n != C->top() && // (ignore TOP, it has no du info) - !n->is_Proj() ) { // fat-proj kills - j -= yank_if_dead(n,b,&value,®nd); + if(n->outcnt() == 0 && // Dead? + n != C->top() && // (ignore TOP, it has no du info) + !n->is_Proj() ) { // fat-proj kills + j -= yank_if_dead(n, block, &value, ®nd); continue; } @@ -595,8 +603,9 @@ const uint two_adr = n->is_Mach() ? n->as_Mach()->two_adr() : 0; // Remove copies along input edges - for( k = 1; k < n->req(); k++ ) - j -= elide_copy( n, k, b, value, regnd, two_adr!=k ); + for (k = 1; k < n->req(); k++) { + j -= elide_copy(n, k, block, value, regnd, two_adr != k); + } // Unallocated Nodes define no registers uint lidx = _lrg_map.live_range_id(n); @@ -627,8 +636,8 @@ // then 'n' is a useless copy. Do not update the register->node // mapping so 'n' will go dead. if( value[nreg] != val ) { - if (eliminate_copy_of_constant(val, n, b, value, regnd, nreg, OptoReg::Bad)) { - j -= replace_and_yank_if_dead(n, nreg, b, value, regnd); + if (eliminate_copy_of_constant(val, n, block, value, regnd, nreg, OptoReg::Bad)) { + j -= replace_and_yank_if_dead(n, nreg, block, value, regnd); } else { // Update the mapping: record new Node defined by the register regnd.map(nreg,n); @@ -637,8 +646,8 @@ value.map(nreg,val); } } else if( !may_be_copy_of_callee(n) ) { - assert( n->is_Copy(), "" ); - j -= replace_and_yank_if_dead(n, nreg, b, value, regnd); + assert(n->is_Copy(), ""); + j -= replace_and_yank_if_dead(n, nreg, block, value, regnd); } } else if (RegMask::is_vector(n_ideal_reg)) { // If Node 'n' does not change the value mapped by the register, @@ -657,7 +666,7 @@ } } else if (n->is_Copy()) { // Note: vector can't be constant and can't be copy of calee. - j -= replace_and_yank_if_dead(n, nreg, b, value, regnd); + j -= replace_and_yank_if_dead(n, nreg, block, value, regnd); } } else { // If the value occupies a register pair, record same info @@ -671,18 +680,18 @@ tmp.Remove(nreg); nreg_lo = tmp.find_first_elem(); } - if( value[nreg] != val || value[nreg_lo] != val ) { - if (eliminate_copy_of_constant(val, n, b, value, regnd, nreg, nreg_lo)) { - j -= replace_and_yank_if_dead(n, nreg, b, value, regnd); + if (value[nreg] != val || value[nreg_lo] != val) { + if (eliminate_copy_of_constant(val, n, block, value, regnd, nreg, nreg_lo)) { + j -= replace_and_yank_if_dead(n, nreg, block, value, regnd); } else { regnd.map(nreg , n ); regnd.map(nreg_lo, n ); value.map(nreg ,val); value.map(nreg_lo,val); } - } else if( !may_be_copy_of_callee(n) ) { - assert( n->is_Copy(), "" ); - j -= replace_and_yank_if_dead(n, nreg, b, value, regnd); + } else if (!may_be_copy_of_callee(n)) { + assert(n->is_Copy(), ""); + j -= replace_and_yank_if_dead(n, nreg, block, value, regnd); } } diff -r bdd155477289 -r e2722a66aba7 src/share/vm/opto/reg_split.cpp --- a/src/share/vm/opto/reg_split.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/opto/reg_split.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -132,7 +132,7 @@ } b->_nodes.insert(i,spill); // Insert node in block - _cfg._bbs.map(spill->_idx,b); // Update node->block mapping to reflect + _cfg.map_node_to_block(spill, b); // Update node->block mapping to reflect // Adjust the point where we go hi-pressure if( i <= b->_ihrp_index ) b->_ihrp_index++; if( i <= b->_fhrp_index ) b->_fhrp_index++; @@ -219,7 +219,7 @@ use->set_req(useidx, def); } else { // Block and index where the use occurs. - Block *b = _cfg._bbs[use->_idx]; + Block *b = _cfg.get_block_for_node(use); // Put the clone just prior to use int bindex = b->find_node(use); // DEF is UP, so must copy it DOWN and hook in USE @@ -270,7 +270,7 @@ int bindex; // Phi input spill-copys belong at the end of the prior block if( use->is_Phi() ) { - b = _cfg._bbs[b->pred(useidx)->_idx]; + b = _cfg.get_block_for_node(b->pred(useidx)); bindex = b->end_idx(); } else { // Put the clone just prior to use @@ -335,7 +335,7 @@ continue; } - Block *b_def = _cfg._bbs[def->_idx]; + Block *b_def = _cfg.get_block_for_node(def); int idx_def = b_def->find_node(def); Node *in_spill = get_spillcopy_wide( in, def, i ); if( !in_spill ) return 0; // Bailed out @@ -397,10 +397,15 @@ #endif // See if the cloned def kills any flags, and copy those kills as well uint i = insidx+1; - if( clone_projs( b, i, def, spill, maxlrg) ) { + int found_projs = clone_projs( b, i, def, spill, maxlrg); + if (found_projs > 0) { // Adjust the point where we go hi-pressure - if( i <= b->_ihrp_index ) b->_ihrp_index++; - if( i <= b->_fhrp_index ) b->_fhrp_index++; + if (i <= b->_ihrp_index) { + b->_ihrp_index += found_projs; + } + if (i <= b->_fhrp_index) { + b->_fhrp_index += found_projs; + } } return spill; @@ -529,13 +534,13 @@ // a Def is UP or DOWN. UP means that it should get a register (ie - // it is always in LRP regions), and DOWN means that it is probably // on the stack (ie - it crosses HRP regions). - Node ***Reaches = NEW_SPLIT_ARRAY( Node**, _cfg._num_blocks+1 ); - bool **UP = NEW_SPLIT_ARRAY( bool*, _cfg._num_blocks+1 ); + Node ***Reaches = NEW_SPLIT_ARRAY( Node**, _cfg.number_of_blocks() + 1); + bool **UP = NEW_SPLIT_ARRAY( bool*, _cfg.number_of_blocks() + 1); Node **debug_defs = NEW_SPLIT_ARRAY( Node*, spill_cnt ); VectorSet **UP_entry= NEW_SPLIT_ARRAY( VectorSet*, spill_cnt ); // Initialize Reaches & UP - for( bidx = 0; bidx < _cfg._num_blocks+1; bidx++ ) { + for (bidx = 0; bidx < _cfg.number_of_blocks() + 1; bidx++) { Reaches[bidx] = NEW_SPLIT_ARRAY( Node*, spill_cnt ); UP[bidx] = NEW_SPLIT_ARRAY( bool, spill_cnt ); Node **Reachblock = Reaches[bidx]; @@ -555,13 +560,13 @@ //----------PASS 1---------- //----------Propagation & Node Insertion Code---------- // Walk the Blocks in RPO for DEF & USE info - for( bidx = 0; bidx < _cfg._num_blocks; bidx++ ) { + for( bidx = 0; bidx < _cfg.number_of_blocks(); bidx++ ) { if (C->check_node_count(spill_cnt, out_of_nodes)) { return 0; } - b = _cfg._blocks[bidx]; + b = _cfg.get_block(bidx); // Reaches & UP arrays for this block Reachblock = Reaches[b->_pre_order]; UPblock = UP[b->_pre_order]; @@ -589,7 +594,7 @@ UPblock[slidx] = true; // Record following instruction in case 'n' rematerializes and // kills flags - Block *pred1 = _cfg._bbs[b->pred(1)->_idx]; + Block *pred1 = _cfg.get_block_for_node(b->pred(1)); continue; } @@ -601,7 +606,7 @@ // Grab predecessor block header n1 = b->pred(1); // Grab the appropriate reaching def info for inpidx - pred = _cfg._bbs[n1->_idx]; + pred = _cfg.get_block_for_node(n1); pidx = pred->_pre_order; Node **Ltmp = Reaches[pidx]; bool *Utmp = UP[pidx]; @@ -616,7 +621,7 @@ // Grab predecessor block headers n2 = b->pred(inpidx); // Grab the appropriate reaching def info for inpidx - pred = _cfg._bbs[n2->_idx]; + pred = _cfg.get_block_for_node(n2); pidx = pred->_pre_order; Ltmp = Reaches[pidx]; Utmp = UP[pidx]; @@ -701,7 +706,7 @@ // Grab predecessor block header n1 = b->pred(1); // Grab the appropriate reaching def info for k - pred = _cfg._bbs[n1->_idx]; + pred = _cfg.get_block_for_node(n1); pidx = pred->_pre_order; Node **Ltmp = Reaches[pidx]; bool *Utmp = UP[pidx]; @@ -919,7 +924,7 @@ return 0; } _lrg_map.extend(def->_idx, 0); - _cfg._bbs.map(def->_idx,b); + _cfg.map_node_to_block(def, b); n->set_req(inpidx, def); continue; } @@ -1291,7 +1296,7 @@ for( insidx = 0; insidx < phis->size(); insidx++ ) { Node *phi = phis->at(insidx); assert(phi->is_Phi(),"This list must only contain Phi Nodes"); - Block *b = _cfg._bbs[phi->_idx]; + Block *b = _cfg.get_block_for_node(phi); // Grab the live range number uint lidx = _lrg_map.find_id(phi); uint slidx = lrg2reach[lidx]; @@ -1315,7 +1320,7 @@ // DEF has the wrong UP/DOWN value. for( uint i = 1; i < b->num_preds(); i++ ) { // Get predecessor block pre-order number - Block *pred = _cfg._bbs[b->pred(i)->_idx]; + Block *pred = _cfg.get_block_for_node(b->pred(i)); pidx = pred->_pre_order; // Grab reaching def Node *def = Reaches[pidx][slidx]; @@ -1394,8 +1399,8 @@ // DEBUG #ifdef ASSERT // Validate all live range index assignments - for (bidx = 0; bidx < _cfg._num_blocks; bidx++) { - b = _cfg._blocks[bidx]; + for (bidx = 0; bidx < _cfg.number_of_blocks(); bidx++) { + b = _cfg.get_block(bidx); for (insidx = 0; insidx <= b->end_idx(); insidx++) { Node *n = b->_nodes[insidx]; uint defidx = _lrg_map.find(n); diff -r bdd155477289 -r e2722a66aba7 src/share/vm/prims/jni.cpp --- a/src/share/vm/prims/jni.cpp Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/prims/jni.cpp Thu Sep 05 11:04:39 2013 -0700 @@ -3234,19 +3234,22 @@ HOTSPOT_JNI_GETSTRINGCHARS_ENTRY( env, string, (uintptr_t *) isCopy); #endif /* USDT2 */ - //%note jni_5 - if (isCopy != NULL) { - *isCopy = JNI_TRUE; - } oop s = JNIHandles::resolve_non_null(string); int s_len = java_lang_String::length(s); typeArrayOop s_value = java_lang_String::value(s); int s_offset = java_lang_String::offset(s); - jchar* buf = NEW_C_HEAP_ARRAY(jchar, s_len + 1, mtInternal); // add one for zero termination - if (s_len > 0) { - memcpy(buf, s_value->char_at_addr(s_offset), sizeof(jchar)*s_len); + jchar* buf = NEW_C_HEAP_ARRAY_RETURN_NULL(jchar, s_len + 1, mtInternal); // add one for zero termination + /* JNI Specification states return NULL on OOM */ + if (buf != NULL) { + if (s_len > 0) { + memcpy(buf, s_value->char_at_addr(s_offset), sizeof(jchar)*s_len); + } + buf[s_len] = 0; + //%note jni_5 + if (isCopy != NULL) { + *isCopy = JNI_TRUE; + } } - buf[s_len] = 0; #ifndef USDT2 DTRACE_PROBE1(hotspot_jni, GetStringChars__return, buf); #else /* USDT2 */ @@ -3335,9 +3338,14 @@ #endif /* USDT2 */ oop java_string = JNIHandles::resolve_non_null(string); size_t length = java_lang_String::utf8_length(java_string); - char* result = AllocateHeap(length + 1, mtInternal); - java_lang_String::as_utf8_string(java_string, result, (int) length + 1); - if (isCopy != NULL) *isCopy = JNI_TRUE; + /* JNI Specification states return NULL on OOM */ + char* result = AllocateHeap(length + 1, mtInternal, 0, AllocFailStrategy::RETURN_NULL); + if (result != NULL) { + java_lang_String::as_utf8_string(java_string, result, (int) length + 1); + if (isCopy != NULL) { + *isCopy = JNI_TRUE; + } + } #ifndef USDT2 DTRACE_PROBE1(hotspot_jni, GetStringUTFChars__return, result); #else /* USDT2 */ @@ -3591,11 +3599,16 @@ * Avoid asserts in typeArrayOop. */ \ result = (ElementType*)get_bad_address(); \ } else { \ - result = NEW_C_HEAP_ARRAY(ElementType, len, mtInternal); \ - /* copy the array to the c chunk */ \ - memcpy(result, a->Tag##_at_addr(0), sizeof(ElementType)*len); \ + /* JNI Specification states return NULL on OOM */ \ + result = NEW_C_HEAP_ARRAY_RETURN_NULL(ElementType, len, mtInternal); \ + if (result != NULL) { \ + /* copy the array to the c chunk */ \ + memcpy(result, a->Tag##_at_addr(0), sizeof(ElementType)*len); \ + if (isCopy) { \ + *isCopy = JNI_TRUE; \ + } \ + } \ } \ - if (isCopy) *isCopy = JNI_TRUE; \ DTRACE_PROBE1(hotspot_jni, Get##Result##ArrayElements__return, result);\ return result; \ JNI_END @@ -3628,11 +3641,16 @@ * Avoid asserts in typeArrayOop. */ \ result = (ElementType*)get_bad_address(); \ } else { \ - result = NEW_C_HEAP_ARRAY(ElementType, len, mtInternal); \ - /* copy the array to the c chunk */ \ - memcpy(result, a->Tag##_at_addr(0), sizeof(ElementType)*len); \ + /* JNI Specification states return NULL on OOM */ \ + result = NEW_C_HEAP_ARRAY_RETURN_NULL(ElementType, len, mtInternal); \ + if (result != NULL) { \ + /* copy the array to the c chunk */ \ + memcpy(result, a->Tag##_at_addr(0), sizeof(ElementType)*len); \ + if (isCopy) { \ + *isCopy = JNI_TRUE; \ + } \ + } \ } \ - if (isCopy) *isCopy = JNI_TRUE; \ ReturnProbe; \ return result; \ JNI_END @@ -5027,9 +5045,15 @@ tty->print_cr("Running test: " #unit_test_function_call); \ unit_test_function_call +// Forward declaration +void TestReservedSpace_test(); +void TestReserveMemorySpecial_test(); + void execute_internal_vm_tests() { if (ExecuteInternalVMTests) { tty->print_cr("Running internal VM tests"); + run_unit_test(TestReservedSpace_test()); + run_unit_test(TestReserveMemorySpecial_test()); run_unit_test(GlobalDefinitions::test_globals()); run_unit_test(GCTimerAllTest::all()); run_unit_test(arrayOopDesc::test_max_array_length()); diff -r bdd155477289 -r e2722a66aba7 src/share/vm/prims/jvmti.xml --- a/src/share/vm/prims/jvmti.xml Thu Aug 22 09:39:54 2013 -0700 +++ b/src/share/vm/prims/jvmti.xml Thu Sep 05 11:04:39 2013 -0700 @@ -1,7 +1,7 @@