# HG changeset patch # User kvn # Date 1395792456 25200 # Node ID 62c54fcc0a352e1fef3c5c89f32adc71d7c6c238 # Parent a433eb716ce19c00315b9909d5382b5467959b73# Parent eb6b3ac64f0eb6c182e41b8051e5eb0ec347e284 Merge diff -r a433eb716ce1 -r 62c54fcc0a35 .hgtags --- a/.hgtags Tue Mar 25 12:54:21 2014 -0700 +++ b/.hgtags Tue Mar 25 17:07:36 2014 -0700 @@ -432,3 +432,8 @@ ecf3678d5736a645aea893b525a9eb5fa1a8e072 hs25.20-b04 51e1bb81df8680bd237630323de5e0704fb25607 jdk8u20-b03 54436d3b2a915ff50a8d6b34f61d5afb45be7bb6 hs25.20-b05 +d4e18f0633c662588cc0875be7759721c7d85af4 jdk8u20-b04 +57eb3e69397e9d5818c5fdaef65b47d9b03f7f88 jdk8u20-b05 +804f89b6ff46728d60a69e9a338e63f362f7ac68 hs25.20-b06 +c3d92e04873788275eeebec6bcd2948cdbd143a7 jdk8u20-b06 +39eae002499704438142e78f5e0e24d46d0b266f hs25.20-b07 diff -r a433eb716ce1 -r 62c54fcc0a35 agent/src/share/classes/sun/jvm/hotspot/gc_implementation/g1/G1CollectedHeap.java --- a/agent/src/share/classes/sun/jvm/hotspot/gc_implementation/g1/G1CollectedHeap.java Tue Mar 25 12:54:21 2014 -0700 +++ b/agent/src/share/classes/sun/jvm/hotspot/gc_implementation/g1/G1CollectedHeap.java Tue Mar 25 17:07:36 2014 -0700 @@ -51,9 +51,9 @@ static private CIntegerField summaryBytesUsedField; // G1MonitoringSupport* _g1mm; static private AddressField g1mmField; - // MasterOldRegionSet _old_set; + // HeapRegionSet _old_set; static private long oldSetFieldOffset; - // MasterHumongousRegionSet _humongous_set; + // HeapRegionSet _humongous_set; static private long humongousSetFieldOffset; static { diff -r a433eb716ce1 -r 62c54fcc0a35 agent/src/share/classes/sun/jvm/hotspot/gc_implementation/g1/HeapRegionSetBase.java --- a/agent/src/share/classes/sun/jvm/hotspot/gc_implementation/g1/HeapRegionSetBase.java Tue Mar 25 12:54:21 2014 -0700 +++ b/agent/src/share/classes/sun/jvm/hotspot/gc_implementation/g1/HeapRegionSetBase.java Tue Mar 25 17:07:36 2014 -0700 @@ -40,12 +40,8 @@ // Mirror class for HeapRegionSetBase. Represents a group of regions. public class HeapRegionSetBase extends VMObject { - // uint _length; - static private CIntegerField lengthField; - // uint _region_num; - static private CIntegerField regionNumField; - // size_t _total_used_bytes; - static private CIntegerField totalUsedBytesField; + + static private long countField; static { VM.registerVMInitializedObserver(new Observer() { @@ -58,21 +54,13 @@ static private synchronized void initialize(TypeDataBase db) { Type type = db.lookupType("HeapRegionSetBase"); - lengthField = type.getCIntegerField("_length"); - regionNumField = type.getCIntegerField("_region_num"); - totalUsedBytesField = type.getCIntegerField("_total_used_bytes"); + countField = type.getField("_count").getOffset(); } - public long length() { - return lengthField.getValue(addr); - } - public long regionNum() { - return regionNumField.getValue(addr); - } - - public long totalUsedBytes() { - return totalUsedBytesField.getValue(addr); + public HeapRegionSetCount count() { + Address countFieldAddr = addr.addOffsetTo(countField); + return (HeapRegionSetCount) VMObjectFactory.newObject(HeapRegionSetCount.class, countFieldAddr); } public HeapRegionSetBase(Address addr) { diff -r a433eb716ce1 -r 62c54fcc0a35 agent/src/share/classes/sun/jvm/hotspot/gc_implementation/g1/HeapRegionSetCount.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/agent/src/share/classes/sun/jvm/hotspot/gc_implementation/g1/HeapRegionSetCount.java Tue Mar 25 17:07:36 2014 -0700 @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.gc_implementation.g1; + +import java.util.Iterator; +import java.util.Observable; +import java.util.Observer; + +import sun.jvm.hotspot.debugger.Address; +import sun.jvm.hotspot.runtime.VM; +import sun.jvm.hotspot.runtime.VMObject; +import sun.jvm.hotspot.runtime.VMObjectFactory; +import sun.jvm.hotspot.types.AddressField; +import sun.jvm.hotspot.types.CIntegerField; +import sun.jvm.hotspot.types.Type; +import sun.jvm.hotspot.types.TypeDataBase; + +// Mirror class for HeapRegionSetCount. Represents a group of regions. + +public class HeapRegionSetCount extends VMObject { + + static private CIntegerField lengthField; + static private CIntegerField capacityField; + + static { + VM.registerVMInitializedObserver(new Observer() { + public void update(Observable o, Object data) { + initialize(VM.getVM().getTypeDataBase()); + } + }); + } + + static private synchronized void initialize(TypeDataBase db) { + Type type = db.lookupType("HeapRegionSetCount"); + + lengthField = type.getCIntegerField("_length"); + capacityField = type.getCIntegerField("_capacity"); + } + + public long length() { + return lengthField.getValue(addr); + } + + public long capacity() { + return capacityField.getValue(addr); + } + + public HeapRegionSetCount(Address addr) { + super(addr); + } +} diff -r a433eb716ce1 -r 62c54fcc0a35 agent/src/share/classes/sun/jvm/hotspot/tools/HeapSummary.java --- a/agent/src/share/classes/sun/jvm/hotspot/tools/HeapSummary.java Tue Mar 25 12:54:21 2014 -0700 +++ b/agent/src/share/classes/sun/jvm/hotspot/tools/HeapSummary.java Tue Mar 25 17:07:36 2014 -0700 @@ -114,7 +114,8 @@ long survivorRegionNum = g1mm.survivorRegionNum(); HeapRegionSetBase oldSet = g1h.oldSet(); HeapRegionSetBase humongousSet = g1h.humongousSet(); - long oldRegionNum = oldSet.regionNum() + humongousSet.regionNum(); + long oldRegionNum = oldSet.count().length() + + humongousSet.count().capacity() / HeapRegion.grainBytes(); printG1Space("G1 Heap:", g1h.n_regions(), g1h.used(), g1h.capacity()); System.out.println("G1 Young Generation:"); diff -r a433eb716ce1 -r 62c54fcc0a35 make/excludeSrc.make --- a/make/excludeSrc.make Tue Mar 25 12:54:21 2014 -0700 +++ b/make/excludeSrc.make Tue Mar 25 17:07:36 2014 -0700 @@ -1,5 +1,5 @@ # -# Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. # # This code is free software; you can redistribute it and/or modify it @@ -87,9 +87,10 @@ g1BlockOffsetTable.cpp g1CardCounts.cpp g1CollectedHeap.cpp g1CollectorPolicy.cpp \ g1ErgoVerbose.cpp g1GCPhaseTimes.cpp g1HRPrinter.cpp g1HotCardCache.cpp g1Log.cpp \ g1MMUTracker.cpp g1MarkSweep.cpp g1MemoryPool.cpp g1MonitoringSupport.cpp g1OopClosures.cpp \ - g1RemSet.cpp g1RemSetSummary.cpp g1SATBCardTableModRefBS.cpp g1_globals.cpp heapRegion.cpp \ + g1RemSet.cpp g1RemSetSummary.cpp g1SATBCardTableModRefBS.cpp g1StringDedup.cpp g1StringDedupStat.cpp \ + g1StringDedupTable.cpp g1StringDedupThread.cpp g1StringDedupQueue.cpp g1_globals.cpp heapRegion.cpp \ g1BiasedArray.cpp heapRegionRemSet.cpp heapRegionSeq.cpp heapRegionSet.cpp heapRegionSets.cpp \ - ptrQueue.cpp satbQueue.cpp sparsePRT.cpp survRateGroup.cpp vm_operations_g1.cpp \ + ptrQueue.cpp satbQueue.cpp sparsePRT.cpp survRateGroup.cpp vm_operations_g1.cpp g1CodeCacheRemSet.cpp \ adjoiningGenerations.cpp adjoiningVirtualSpaces.cpp asPSOldGen.cpp asPSYoungGen.cpp \ cardTableExtension.cpp gcTaskManager.cpp gcTaskThread.cpp objectStartArray.cpp \ parallelScavengeHeap.cpp parMarkBitMap.cpp pcTasks.cpp psAdaptiveSizePolicy.cpp \ diff -r a433eb716ce1 -r 62c54fcc0a35 make/hotspot_version --- a/make/hotspot_version Tue Mar 25 12:54:21 2014 -0700 +++ b/make/hotspot_version Tue Mar 25 17:07:36 2014 -0700 @@ -35,7 +35,7 @@ HS_MAJOR_VER=25 HS_MINOR_VER=20 -HS_BUILD_NUMBER=05 +HS_BUILD_NUMBER=08 JDK_MAJOR_VER=1 JDK_MINOR_VER=8 diff -r a433eb716ce1 -r 62c54fcc0a35 src/cpu/sparc/vm/sparc.ad --- a/src/cpu/sparc/vm/sparc.ad Tue Mar 25 12:54:21 2014 -0700 +++ b/src/cpu/sparc/vm/sparc.ad Tue Mar 25 17:07:36 2014 -0700 @@ -2071,19 +2071,6 @@ return L7_REGP_mask(); } -const RegMask Matcher::mathExactI_result_proj_mask() { - return G1_REGI_mask(); -} - -const RegMask Matcher::mathExactL_result_proj_mask() { - return G1_REGL_mask(); -} - -const RegMask Matcher::mathExactI_flags_proj_mask() { - return INT_FLAGS_mask(); -} - - %} diff -r a433eb716ce1 -r 62c54fcc0a35 src/cpu/x86/vm/assembler_x86.cpp --- a/src/cpu/x86/vm/assembler_x86.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/cpu/x86/vm/assembler_x86.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -1089,6 +1089,21 @@ emit_arith(0x23, 0xC0, dst, src); } +void Assembler::andnl(Register dst, Register src1, Register src2) { + assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); + int encode = vex_prefix_0F38_and_encode(dst, src1, src2); + emit_int8((unsigned char)0xF2); + emit_int8((unsigned char)(0xC0 | encode)); +} + +void Assembler::andnl(Register dst, Register src1, Address src2) { + InstructionMark im(this); + assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); + vex_prefix_0F38(dst, src1, src2); + emit_int8((unsigned char)0xF2); + emit_operand(dst, src2); +} + void Assembler::bsfl(Register dst, Register src) { int encode = prefix_and_encode(dst->encoding(), src->encoding()); emit_int8(0x0F); @@ -1097,7 +1112,6 @@ } void Assembler::bsrl(Register dst, Register src) { - assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT"); int encode = prefix_and_encode(dst->encoding(), src->encoding()); emit_int8(0x0F); emit_int8((unsigned char)0xBD); @@ -1110,6 +1124,51 @@ emit_int8((unsigned char)(0xC8 | encode)); } +void Assembler::blsil(Register dst, Register src) { + assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); + int encode = vex_prefix_0F38_and_encode(rbx, dst, src); + emit_int8((unsigned char)0xF3); + emit_int8((unsigned char)(0xC0 | encode)); +} + +void Assembler::blsil(Register dst, Address src) { + InstructionMark im(this); + assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); + vex_prefix_0F38(rbx, dst, src); + emit_int8((unsigned char)0xF3); + emit_operand(rbx, src); +} + +void Assembler::blsmskl(Register dst, Register src) { + assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); + int encode = vex_prefix_0F38_and_encode(rdx, dst, src); + emit_int8((unsigned char)0xF3); + emit_int8((unsigned char)(0xC0 | encode)); +} + +void Assembler::blsmskl(Register dst, Address src) { + InstructionMark im(this); + assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); + vex_prefix_0F38(rdx, dst, src); + emit_int8((unsigned char)0xF3); + emit_operand(rdx, src); +} + +void Assembler::blsrl(Register dst, Register src) { + assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); + int encode = vex_prefix_0F38_and_encode(rcx, dst, src); + emit_int8((unsigned char)0xF3); + emit_int8((unsigned char)(0xC0 | encode)); +} + +void Assembler::blsrl(Register dst, Address src) { + InstructionMark im(this); + assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); + vex_prefix_0F38(rcx, dst, src); + emit_int8((unsigned char)0xF3); + emit_operand(rcx, src); +} + void Assembler::call(Label& L, relocInfo::relocType rtype) { // suspect disp32 is always good int operand = LP64_ONLY(disp32_operand) NOT_LP64(imm_operand); @@ -2283,6 +2342,11 @@ emit_int8(imm8); } +void Assembler::pause() { + emit_int8((unsigned char)0xF3); + emit_int8((unsigned char)0x90); +} + void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) { assert(VM_Version::supports_sse4_2(), ""); InstructionMark im(this); @@ -2607,6 +2671,11 @@ } } +void Assembler::rdtsc() { + emit_int8((unsigned char)0x0F); + emit_int8((unsigned char)0x31); +} + // copies data from [esi] to [edi] using rcx pointer sized words // generic void Assembler::rep_mov() { @@ -2878,6 +2947,24 @@ emit_operand(dst, src); } +void Assembler::tzcntl(Register dst, Register src) { + assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported"); + emit_int8((unsigned char)0xF3); + int encode = prefix_and_encode(dst->encoding(), src->encoding()); + emit_int8(0x0F); + emit_int8((unsigned char)0xBC); + emit_int8((unsigned char)0xC0 | encode); +} + +void Assembler::tzcntq(Register dst, Register src) { + assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported"); + emit_int8((unsigned char)0xF3); + int encode = prefixq_and_encode(dst->encoding(), src->encoding()); + emit_int8(0x0F); + emit_int8((unsigned char)0xBC); + emit_int8((unsigned char)(0xC0 | encode)); +} + void Assembler::ucomisd(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66); @@ -2898,6 +2985,11 @@ emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE); } +void Assembler::xabort(int8_t imm8) { + emit_int8((unsigned char)0xC6); + emit_int8((unsigned char)0xF8); + emit_int8((unsigned char)(imm8 & 0xFF)); +} void Assembler::xaddl(Address dst, Register src) { InstructionMark im(this); @@ -2907,6 +2999,24 @@ emit_operand(src, dst); } +void Assembler::xbegin(Label& abort, relocInfo::relocType rtype) { + InstructionMark im(this); + relocate(rtype); + if (abort.is_bound()) { + address entry = target(abort); + assert(entry != NULL, "abort entry NULL"); + intptr_t offset = entry - pc(); + emit_int8((unsigned char)0xC7); + emit_int8((unsigned char)0xF8); + emit_int32(offset - 6); // 2 opcode + 4 address + } else { + abort.add_patch_at(code(), locator()); + emit_int8((unsigned char)0xC7); + emit_int8((unsigned char)0xF8); + emit_int32(0); + } +} + void Assembler::xchgl(Register dst, Address src) { // xchg InstructionMark im(this); prefix(src, dst); @@ -2920,6 +3030,12 @@ emit_int8((unsigned char)(0xC0 | encode)); } +void Assembler::xend() { + emit_int8((unsigned char)0x0F); + emit_int8((unsigned char)0x01); + emit_int8((unsigned char)0xD5); +} + void Assembler::xgetbv() { emit_int8(0x0F); emit_int8(0x01); @@ -4837,6 +4953,21 @@ emit_arith(0x23, 0xC0, dst, src); } +void Assembler::andnq(Register dst, Register src1, Register src2) { + assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); + int encode = vex_prefix_0F38_and_encode_q(dst, src1, src2); + emit_int8((unsigned char)0xF2); + emit_int8((unsigned char)(0xC0 | encode)); +} + +void Assembler::andnq(Register dst, Register src1, Address src2) { + InstructionMark im(this); + assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); + vex_prefix_0F38_q(dst, src1, src2); + emit_int8((unsigned char)0xF2); + emit_operand(dst, src2); +} + void Assembler::bsfq(Register dst, Register src) { int encode = prefixq_and_encode(dst->encoding(), src->encoding()); emit_int8(0x0F); @@ -4845,7 +4976,6 @@ } void Assembler::bsrq(Register dst, Register src) { - assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT"); int encode = prefixq_and_encode(dst->encoding(), src->encoding()); emit_int8(0x0F); emit_int8((unsigned char)0xBD); @@ -4858,6 +4988,51 @@ emit_int8((unsigned char)(0xC8 | encode)); } +void Assembler::blsiq(Register dst, Register src) { + assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); + int encode = vex_prefix_0F38_and_encode_q(rbx, dst, src); + emit_int8((unsigned char)0xF3); + emit_int8((unsigned char)(0xC0 | encode)); +} + +void Assembler::blsiq(Register dst, Address src) { + InstructionMark im(this); + assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); + vex_prefix_0F38_q(rbx, dst, src); + emit_int8((unsigned char)0xF3); + emit_operand(rbx, src); +} + +void Assembler::blsmskq(Register dst, Register src) { + assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); + int encode = vex_prefix_0F38_and_encode_q(rdx, dst, src); + emit_int8((unsigned char)0xF3); + emit_int8((unsigned char)(0xC0 | encode)); +} + +void Assembler::blsmskq(Register dst, Address src) { + InstructionMark im(this); + assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); + vex_prefix_0F38_q(rdx, dst, src); + emit_int8((unsigned char)0xF3); + emit_operand(rdx, src); +} + +void Assembler::blsrq(Register dst, Register src) { + assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); + int encode = vex_prefix_0F38_and_encode_q(rcx, dst, src); + emit_int8((unsigned char)0xF3); + emit_int8((unsigned char)(0xC0 | encode)); +} + +void Assembler::blsrq(Register dst, Address src) { + InstructionMark im(this); + assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); + vex_prefix_0F38_q(rcx, dst, src); + emit_int8((unsigned char)0xF3); + emit_operand(rcx, src); +} + void Assembler::cdqq() { prefix(REX_W); emit_int8((unsigned char)0x99); diff -r a433eb716ce1 -r 62c54fcc0a35 src/cpu/x86/vm/assembler_x86.hpp --- a/src/cpu/x86/vm/assembler_x86.hpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/cpu/x86/vm/assembler_x86.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -590,10 +590,35 @@ vex_prefix(src, nds_enc, dst_enc, pre, VEX_OPCODE_0F, false, vector256); } + void vex_prefix_0F38(Register dst, Register nds, Address src) { + bool vex_w = false; + bool vector256 = false; + vex_prefix(src, nds->encoding(), dst->encoding(), + VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector256); + } + + void vex_prefix_0F38_q(Register dst, Register nds, Address src) { + bool vex_w = true; + bool vector256 = false; + vex_prefix(src, nds->encoding(), dst->encoding(), + VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector256); + } int vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool vex_w, bool vector256); + int vex_prefix_0F38_and_encode(Register dst, Register nds, Register src) { + bool vex_w = false; + bool vector256 = false; + return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), + VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector256); + } + int vex_prefix_0F38_and_encode_q(Register dst, Register nds, Register src) { + bool vex_w = true; + bool vector256 = false; + return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), + VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector256); + } int vex_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre, bool vector256 = false, VexOpcode opc = VEX_OPCODE_0F) { @@ -897,6 +922,27 @@ void andq(Register dst, Address src); void andq(Register dst, Register src); + // BMI instructions + void andnl(Register dst, Register src1, Register src2); + void andnl(Register dst, Register src1, Address src2); + void andnq(Register dst, Register src1, Register src2); + void andnq(Register dst, Register src1, Address src2); + + void blsil(Register dst, Register src); + void blsil(Register dst, Address src); + void blsiq(Register dst, Register src); + void blsiq(Register dst, Address src); + + void blsmskl(Register dst, Register src); + void blsmskl(Register dst, Address src); + void blsmskq(Register dst, Register src); + void blsmskq(Register dst, Address src); + + void blsrl(Register dst, Register src); + void blsrl(Register dst, Address src); + void blsrq(Register dst, Register src); + void blsrq(Register dst, Address src); + void bsfl(Register dst, Register src); void bsrl(Register dst, Register src); @@ -1405,6 +1451,8 @@ // Pemutation of 64bit words void vpermq(XMMRegister dst, XMMRegister src, int imm8, bool vector256); + void pause(); + // SSE4.2 string instructions void pcmpestri(XMMRegister xmm1, XMMRegister xmm2, int imm8); void pcmpestri(XMMRegister xmm1, Address src, int imm8); @@ -1489,6 +1537,8 @@ void rclq(Register dst, int imm8); + void rdtsc(); + void ret(int imm16); void sahf(); @@ -1574,6 +1624,9 @@ void testq(Register dst, int32_t imm32); void testq(Register dst, Register src); + // BMI - count trailing zeros + void tzcntl(Register dst, Register src); + void tzcntq(Register dst, Register src); // Unordered Compare Scalar Double-Precision Floating-Point Values and set EFLAGS void ucomisd(XMMRegister dst, Address src); @@ -1583,16 +1636,22 @@ void ucomiss(XMMRegister dst, Address src); void ucomiss(XMMRegister dst, XMMRegister src); + void xabort(int8_t imm8); + void xaddl(Address dst, Register src); void xaddq(Address dst, Register src); + void xbegin(Label& abort, relocInfo::relocType rtype = relocInfo::none); + void xchgl(Register reg, Address adr); void xchgl(Register dst, Register src); void xchgq(Register reg, Address adr); void xchgq(Register dst, Register src); + void xend(); + // Get Value of Extended Control Register void xgetbv(); diff -r a433eb716ce1 -r 62c54fcc0a35 src/cpu/x86/vm/globals_x86.hpp --- a/src/cpu/x86/vm/globals_x86.hpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/cpu/x86/vm/globals_x86.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -129,11 +129,53 @@ product(bool, UseFastStosb, false, \ "Use fast-string operation for zeroing: rep stosb") \ \ + /* Use Restricted Transactional Memory for lock eliding */ \ + experimental(bool, UseRTMLocking, false, \ + "Enable RTM lock eliding for inflated locks in compiled code") \ + \ + experimental(bool, UseRTMForStackLocks, false, \ + "Enable RTM lock eliding for stack locks in compiled code") \ + \ + experimental(bool, UseRTMDeopt, false, \ + "Perform deopt and recompilation based on RTM abort ratio") \ + \ + experimental(uintx, RTMRetryCount, 5, \ + "Number of RTM retries on lock abort or busy") \ + \ + experimental(intx, RTMSpinLoopCount, 100, \ + "Spin count for lock to become free before RTM retry") \ + \ + experimental(intx, RTMAbortThreshold, 1000, \ + "Calculate abort ratio after this number of aborts") \ + \ + experimental(intx, RTMLockingThreshold, 10000, \ + "Lock count at which to do RTM lock eliding without " \ + "abort ratio calculation") \ + \ + experimental(intx, RTMAbortRatio, 50, \ + "Lock abort ratio at which to stop use RTM lock eliding") \ + \ + experimental(intx, RTMTotalCountIncrRate, 64, \ + "Increment total RTM attempted lock count once every n times") \ + \ + experimental(intx, RTMLockingCalculationDelay, 0, \ + "Number of milliseconds to wait before start calculating aborts " \ + "for RTM locking") \ + \ + experimental(bool, UseRTMXendForLockBusy, false, \ + "Use RTM Xend instead of Xabort when lock busy") \ + \ /* assembler */ \ product(bool, Use486InstrsOnly, false, \ "Use 80486 Compliant instruction subset") \ \ product(bool, UseCountLeadingZerosInstruction, false, \ "Use count leading zeros instruction") \ + \ + product(bool, UseCountTrailingZerosInstruction, false, \ + "Use count trailing zeros instruction") \ + \ + product(bool, UseBMI1Instructions, false, \ + "Use BMI instructions") #endif // CPU_X86_VM_GLOBALS_X86_HPP diff -r a433eb716ce1 -r 62c54fcc0a35 src/cpu/x86/vm/macroAssembler_x86.cpp --- a/src/cpu/x86/vm/macroAssembler_x86.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/cpu/x86/vm/macroAssembler_x86.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -98,217 +98,6 @@ return Address::make_array(adr); } -int MacroAssembler::biased_locking_enter(Register lock_reg, - Register obj_reg, - Register swap_reg, - Register tmp_reg, - bool swap_reg_contains_mark, - Label& done, - Label* slow_case, - BiasedLockingCounters* counters) { - assert(UseBiasedLocking, "why call this otherwise?"); - assert(swap_reg == rax, "swap_reg must be rax, for cmpxchg"); - assert_different_registers(lock_reg, obj_reg, swap_reg); - - if (PrintBiasedLockingStatistics && counters == NULL) - counters = BiasedLocking::counters(); - - bool need_tmp_reg = false; - if (tmp_reg == noreg) { - need_tmp_reg = true; - tmp_reg = lock_reg; - } else { - assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg); - } - assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); - Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); - Address klass_addr (obj_reg, oopDesc::klass_offset_in_bytes()); - Address saved_mark_addr(lock_reg, 0); - - // Biased locking - // See whether the lock is currently biased toward our thread and - // whether the epoch is still valid - // Note that the runtime guarantees sufficient alignment of JavaThread - // pointers to allow age to be placed into low bits - // First check to see whether biasing is even enabled for this object - Label cas_label; - int null_check_offset = -1; - if (!swap_reg_contains_mark) { - null_check_offset = offset(); - movl(swap_reg, mark_addr); - } - if (need_tmp_reg) { - push(tmp_reg); - } - movl(tmp_reg, swap_reg); - andl(tmp_reg, markOopDesc::biased_lock_mask_in_place); - cmpl(tmp_reg, markOopDesc::biased_lock_pattern); - if (need_tmp_reg) { - pop(tmp_reg); - } - jcc(Assembler::notEqual, cas_label); - // The bias pattern is present in the object's header. Need to check - // whether the bias owner and the epoch are both still current. - // Note that because there is no current thread register on x86 we - // need to store off the mark word we read out of the object to - // avoid reloading it and needing to recheck invariants below. This - // store is unfortunate but it makes the overall code shorter and - // simpler. - movl(saved_mark_addr, swap_reg); - if (need_tmp_reg) { - push(tmp_reg); - } - get_thread(tmp_reg); - xorl(swap_reg, tmp_reg); - if (swap_reg_contains_mark) { - null_check_offset = offset(); - } - movl(tmp_reg, klass_addr); - xorl(swap_reg, Address(tmp_reg, Klass::prototype_header_offset())); - andl(swap_reg, ~((int) markOopDesc::age_mask_in_place)); - if (need_tmp_reg) { - pop(tmp_reg); - } - if (counters != NULL) { - cond_inc32(Assembler::zero, - ExternalAddress((address)counters->biased_lock_entry_count_addr())); - } - jcc(Assembler::equal, done); - - Label try_revoke_bias; - Label try_rebias; - - // At this point we know that the header has the bias pattern and - // that we are not the bias owner in the current epoch. We need to - // figure out more details about the state of the header in order to - // know what operations can be legally performed on the object's - // header. - - // If the low three bits in the xor result aren't clear, that means - // the prototype header is no longer biased and we have to revoke - // the bias on this object. - testl(swap_reg, markOopDesc::biased_lock_mask_in_place); - jcc(Assembler::notZero, try_revoke_bias); - - // Biasing is still enabled for this data type. See whether the - // epoch of the current bias is still valid, meaning that the epoch - // bits of the mark word are equal to the epoch bits of the - // prototype header. (Note that the prototype header's epoch bits - // only change at a safepoint.) If not, attempt to rebias the object - // toward the current thread. Note that we must be absolutely sure - // that the current epoch is invalid in order to do this because - // otherwise the manipulations it performs on the mark word are - // illegal. - testl(swap_reg, markOopDesc::epoch_mask_in_place); - jcc(Assembler::notZero, try_rebias); - - // The epoch of the current bias is still valid but we know nothing - // about the owner; it might be set or it might be clear. Try to - // acquire the bias of the object using an atomic operation. If this - // fails we will go in to the runtime to revoke the object's bias. - // Note that we first construct the presumed unbiased header so we - // don't accidentally blow away another thread's valid bias. - movl(swap_reg, saved_mark_addr); - andl(swap_reg, - markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); - if (need_tmp_reg) { - push(tmp_reg); - } - get_thread(tmp_reg); - orl(tmp_reg, swap_reg); - if (os::is_MP()) { - lock(); - } - cmpxchgptr(tmp_reg, Address(obj_reg, 0)); - if (need_tmp_reg) { - pop(tmp_reg); - } - // If the biasing toward our thread failed, this means that - // another thread succeeded in biasing it toward itself and we - // need to revoke that bias. The revocation will occur in the - // interpreter runtime in the slow case. - if (counters != NULL) { - cond_inc32(Assembler::zero, - ExternalAddress((address)counters->anonymously_biased_lock_entry_count_addr())); - } - if (slow_case != NULL) { - jcc(Assembler::notZero, *slow_case); - } - jmp(done); - - bind(try_rebias); - // At this point we know the epoch has expired, meaning that the - // current "bias owner", if any, is actually invalid. Under these - // circumstances _only_, we are allowed to use the current header's - // value as the comparison value when doing the cas to acquire the - // bias in the current epoch. In other words, we allow transfer of - // the bias from one thread to another directly in this situation. - // - // FIXME: due to a lack of registers we currently blow away the age - // bits in this situation. Should attempt to preserve them. - if (need_tmp_reg) { - push(tmp_reg); - } - get_thread(tmp_reg); - movl(swap_reg, klass_addr); - orl(tmp_reg, Address(swap_reg, Klass::prototype_header_offset())); - movl(swap_reg, saved_mark_addr); - if (os::is_MP()) { - lock(); - } - cmpxchgptr(tmp_reg, Address(obj_reg, 0)); - if (need_tmp_reg) { - pop(tmp_reg); - } - // If the biasing toward our thread failed, then another thread - // succeeded in biasing it toward itself and we need to revoke that - // bias. The revocation will occur in the runtime in the slow case. - if (counters != NULL) { - cond_inc32(Assembler::zero, - ExternalAddress((address)counters->rebiased_lock_entry_count_addr())); - } - if (slow_case != NULL) { - jcc(Assembler::notZero, *slow_case); - } - jmp(done); - - bind(try_revoke_bias); - // The prototype mark in the klass doesn't have the bias bit set any - // more, indicating that objects of this data type are not supposed - // to be biased any more. We are going to try to reset the mark of - // this object to the prototype value and fall through to the - // CAS-based locking scheme. Note that if our CAS fails, it means - // that another thread raced us for the privilege of revoking the - // bias of this particular object, so it's okay to continue in the - // normal locking code. - // - // FIXME: due to a lack of registers we currently blow away the age - // bits in this situation. Should attempt to preserve them. - movl(swap_reg, saved_mark_addr); - if (need_tmp_reg) { - push(tmp_reg); - } - movl(tmp_reg, klass_addr); - movl(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset())); - if (os::is_MP()) { - lock(); - } - cmpxchgptr(tmp_reg, Address(obj_reg, 0)); - if (need_tmp_reg) { - pop(tmp_reg); - } - // Fall through to the normal CAS-based lock, because no matter what - // the result of the above CAS, some thread must have succeeded in - // removing the bias bit from the object's header. - if (counters != NULL) { - cond_inc32(Assembler::zero, - ExternalAddress((address)counters->revoked_lock_entry_count_addr())); - } - - bind(cas_label); - - return null_check_offset; -} void MacroAssembler::call_VM_leaf_base(address entry_point, int number_of_arguments) { call(RuntimeAddress(entry_point)); @@ -512,7 +301,9 @@ mov_literal32(dst, (int32_t)obj, metadata_Relocation::spec_for_immediate()); } -void MacroAssembler::movptr(Register dst, AddressLiteral src) { +void MacroAssembler::movptr(Register dst, AddressLiteral src, Register scratch) { + // scratch register is not used, + // it is defined to match parameters of 64-bit version of this method. if (src.is_lval()) { mov_literal32(dst, (intptr_t)src.target(), src.rspec()); } else { @@ -726,165 +517,6 @@ return array; } -int MacroAssembler::biased_locking_enter(Register lock_reg, - Register obj_reg, - Register swap_reg, - Register tmp_reg, - bool swap_reg_contains_mark, - Label& done, - Label* slow_case, - BiasedLockingCounters* counters) { - assert(UseBiasedLocking, "why call this otherwise?"); - assert(swap_reg == rax, "swap_reg must be rax for cmpxchgq"); - assert(tmp_reg != noreg, "tmp_reg must be supplied"); - assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg); - assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); - Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); - Address saved_mark_addr(lock_reg, 0); - - if (PrintBiasedLockingStatistics && counters == NULL) - counters = BiasedLocking::counters(); - - // Biased locking - // See whether the lock is currently biased toward our thread and - // whether the epoch is still valid - // Note that the runtime guarantees sufficient alignment of JavaThread - // pointers to allow age to be placed into low bits - // First check to see whether biasing is even enabled for this object - Label cas_label; - int null_check_offset = -1; - if (!swap_reg_contains_mark) { - null_check_offset = offset(); - movq(swap_reg, mark_addr); - } - movq(tmp_reg, swap_reg); - andq(tmp_reg, markOopDesc::biased_lock_mask_in_place); - cmpq(tmp_reg, markOopDesc::biased_lock_pattern); - jcc(Assembler::notEqual, cas_label); - // The bias pattern is present in the object's header. Need to check - // whether the bias owner and the epoch are both still current. - load_prototype_header(tmp_reg, obj_reg); - orq(tmp_reg, r15_thread); - xorq(tmp_reg, swap_reg); - andq(tmp_reg, ~((int) markOopDesc::age_mask_in_place)); - if (counters != NULL) { - cond_inc32(Assembler::zero, - ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr())); - } - jcc(Assembler::equal, done); - - Label try_revoke_bias; - Label try_rebias; - - // At this point we know that the header has the bias pattern and - // that we are not the bias owner in the current epoch. We need to - // figure out more details about the state of the header in order to - // know what operations can be legally performed on the object's - // header. - - // If the low three bits in the xor result aren't clear, that means - // the prototype header is no longer biased and we have to revoke - // the bias on this object. - testq(tmp_reg, markOopDesc::biased_lock_mask_in_place); - jcc(Assembler::notZero, try_revoke_bias); - - // Biasing is still enabled for this data type. See whether the - // epoch of the current bias is still valid, meaning that the epoch - // bits of the mark word are equal to the epoch bits of the - // prototype header. (Note that the prototype header's epoch bits - // only change at a safepoint.) If not, attempt to rebias the object - // toward the current thread. Note that we must be absolutely sure - // that the current epoch is invalid in order to do this because - // otherwise the manipulations it performs on the mark word are - // illegal. - testq(tmp_reg, markOopDesc::epoch_mask_in_place); - jcc(Assembler::notZero, try_rebias); - - // The epoch of the current bias is still valid but we know nothing - // about the owner; it might be set or it might be clear. Try to - // acquire the bias of the object using an atomic operation. If this - // fails we will go in to the runtime to revoke the object's bias. - // Note that we first construct the presumed unbiased header so we - // don't accidentally blow away another thread's valid bias. - andq(swap_reg, - markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); - movq(tmp_reg, swap_reg); - orq(tmp_reg, r15_thread); - if (os::is_MP()) { - lock(); - } - cmpxchgq(tmp_reg, Address(obj_reg, 0)); - // If the biasing toward our thread failed, this means that - // another thread succeeded in biasing it toward itself and we - // need to revoke that bias. The revocation will occur in the - // interpreter runtime in the slow case. - if (counters != NULL) { - cond_inc32(Assembler::zero, - ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr())); - } - if (slow_case != NULL) { - jcc(Assembler::notZero, *slow_case); - } - jmp(done); - - bind(try_rebias); - // At this point we know the epoch has expired, meaning that the - // current "bias owner", if any, is actually invalid. Under these - // circumstances _only_, we are allowed to use the current header's - // value as the comparison value when doing the cas to acquire the - // bias in the current epoch. In other words, we allow transfer of - // the bias from one thread to another directly in this situation. - // - // FIXME: due to a lack of registers we currently blow away the age - // bits in this situation. Should attempt to preserve them. - load_prototype_header(tmp_reg, obj_reg); - orq(tmp_reg, r15_thread); - if (os::is_MP()) { - lock(); - } - cmpxchgq(tmp_reg, Address(obj_reg, 0)); - // If the biasing toward our thread failed, then another thread - // succeeded in biasing it toward itself and we need to revoke that - // bias. The revocation will occur in the runtime in the slow case. - if (counters != NULL) { - cond_inc32(Assembler::zero, - ExternalAddress((address) counters->rebiased_lock_entry_count_addr())); - } - if (slow_case != NULL) { - jcc(Assembler::notZero, *slow_case); - } - jmp(done); - - bind(try_revoke_bias); - // The prototype mark in the klass doesn't have the bias bit set any - // more, indicating that objects of this data type are not supposed - // to be biased any more. We are going to try to reset the mark of - // this object to the prototype value and fall through to the - // CAS-based locking scheme. Note that if our CAS fails, it means - // that another thread raced us for the privilege of revoking the - // bias of this particular object, so it's okay to continue in the - // normal locking code. - // - // FIXME: due to a lack of registers we currently blow away the age - // bits in this situation. Should attempt to preserve them. - load_prototype_header(tmp_reg, obj_reg); - if (os::is_MP()) { - lock(); - } - cmpxchgq(tmp_reg, Address(obj_reg, 0)); - // Fall through to the normal CAS-based lock, because no matter what - // the result of the above CAS, some thread must have succeeded in - // removing the bias bit from the object's header. - if (counters != NULL) { - cond_inc32(Assembler::zero, - ExternalAddress((address) counters->revoked_lock_entry_count_addr())); - } - - bind(cas_label); - - return null_check_offset; -} - void MacroAssembler::call_VM_leaf_base(address entry_point, int num_args) { Label L, E; @@ -983,6 +615,15 @@ /* else */ { subq(dst, value) ; return; } } +void MacroAssembler::incrementq(AddressLiteral dst) { + if (reachable(dst)) { + incrementq(as_Address(dst)); + } else { + lea(rscratch1, dst); + incrementq(Address(rscratch1, 0)); + } +} + void MacroAssembler::incrementq(Register reg, int value) { if (value == min_jint) { addq(reg, value); return; } if (value < 0) { decrementq(reg, -value); return; } @@ -1051,15 +692,15 @@ movq(dst, rscratch1); } -void MacroAssembler::movptr(Register dst, AddressLiteral src) { +void MacroAssembler::movptr(Register dst, AddressLiteral src, Register scratch) { if (src.is_lval()) { mov_literal64(dst, (intptr_t)src.target(), src.rspec()); } else { if (reachable(src)) { movq(dst, as_Address(src)); } else { - lea(rscratch1, src); - movq(dst, Address(rscratch1,0)); + lea(scratch, src); + movq(dst, Address(scratch, 0)); } } } @@ -1358,13 +999,37 @@ LP64_ONLY(andq(dst, imm32)) NOT_LP64(andl(dst, imm32)); } -void MacroAssembler::atomic_incl(AddressLiteral counter_addr) { - pushf(); +void MacroAssembler::atomic_incl(Address counter_addr) { if (os::is_MP()) lock(); incrementl(counter_addr); - popf(); -} +} + +void MacroAssembler::atomic_incl(AddressLiteral counter_addr, Register scr) { + if (reachable(counter_addr)) { + atomic_incl(as_Address(counter_addr)); + } else { + lea(scr, counter_addr); + atomic_incl(Address(scr, 0)); + } +} + +#ifdef _LP64 +void MacroAssembler::atomic_incq(Address counter_addr) { + if (os::is_MP()) + lock(); + incrementq(counter_addr); +} + +void MacroAssembler::atomic_incq(AddressLiteral counter_addr, Register scr) { + if (reachable(counter_addr)) { + atomic_incq(as_Address(counter_addr)); + } else { + lea(scr, counter_addr); + atomic_incq(Address(scr, 0)); + } +} +#endif // Writes to stack successive pages until offset reached to check for // stack overflow + shadow pages. This clobbers tmp. @@ -1393,6 +1058,234 @@ } } +int MacroAssembler::biased_locking_enter(Register lock_reg, + Register obj_reg, + Register swap_reg, + Register tmp_reg, + bool swap_reg_contains_mark, + Label& done, + Label* slow_case, + BiasedLockingCounters* counters) { + assert(UseBiasedLocking, "why call this otherwise?"); + assert(swap_reg == rax, "swap_reg must be rax for cmpxchgq"); + LP64_ONLY( assert(tmp_reg != noreg, "tmp_reg must be supplied"); ) + bool need_tmp_reg = false; + if (tmp_reg == noreg) { + need_tmp_reg = true; + tmp_reg = lock_reg; + assert_different_registers(lock_reg, obj_reg, swap_reg); + } else { + assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg); + } + assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); + Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); + Address saved_mark_addr(lock_reg, 0); + + if (PrintBiasedLockingStatistics && counters == NULL) { + counters = BiasedLocking::counters(); + } + // Biased locking + // See whether the lock is currently biased toward our thread and + // whether the epoch is still valid + // Note that the runtime guarantees sufficient alignment of JavaThread + // pointers to allow age to be placed into low bits + // First check to see whether biasing is even enabled for this object + Label cas_label; + int null_check_offset = -1; + if (!swap_reg_contains_mark) { + null_check_offset = offset(); + movptr(swap_reg, mark_addr); + } + if (need_tmp_reg) { + push(tmp_reg); + } + movptr(tmp_reg, swap_reg); + andptr(tmp_reg, markOopDesc::biased_lock_mask_in_place); + cmpptr(tmp_reg, markOopDesc::biased_lock_pattern); + if (need_tmp_reg) { + pop(tmp_reg); + } + jcc(Assembler::notEqual, cas_label); + // The bias pattern is present in the object's header. Need to check + // whether the bias owner and the epoch are both still current. +#ifndef _LP64 + // Note that because there is no current thread register on x86_32 we + // need to store off the mark word we read out of the object to + // avoid reloading it and needing to recheck invariants below. This + // store is unfortunate but it makes the overall code shorter and + // simpler. + movptr(saved_mark_addr, swap_reg); +#endif + if (need_tmp_reg) { + push(tmp_reg); + } + if (swap_reg_contains_mark) { + null_check_offset = offset(); + } + load_prototype_header(tmp_reg, obj_reg); +#ifdef _LP64 + orptr(tmp_reg, r15_thread); + xorptr(tmp_reg, swap_reg); + Register header_reg = tmp_reg; +#else + xorptr(tmp_reg, swap_reg); + get_thread(swap_reg); + xorptr(swap_reg, tmp_reg); + Register header_reg = swap_reg; +#endif + andptr(header_reg, ~((int) markOopDesc::age_mask_in_place)); + if (need_tmp_reg) { + pop(tmp_reg); + } + if (counters != NULL) { + cond_inc32(Assembler::zero, + ExternalAddress((address) counters->biased_lock_entry_count_addr())); + } + jcc(Assembler::equal, done); + + Label try_revoke_bias; + Label try_rebias; + + // At this point we know that the header has the bias pattern and + // that we are not the bias owner in the current epoch. We need to + // figure out more details about the state of the header in order to + // know what operations can be legally performed on the object's + // header. + + // If the low three bits in the xor result aren't clear, that means + // the prototype header is no longer biased and we have to revoke + // the bias on this object. + testptr(header_reg, markOopDesc::biased_lock_mask_in_place); + jccb(Assembler::notZero, try_revoke_bias); + + // Biasing is still enabled for this data type. See whether the + // epoch of the current bias is still valid, meaning that the epoch + // bits of the mark word are equal to the epoch bits of the + // prototype header. (Note that the prototype header's epoch bits + // only change at a safepoint.) If not, attempt to rebias the object + // toward the current thread. Note that we must be absolutely sure + // that the current epoch is invalid in order to do this because + // otherwise the manipulations it performs on the mark word are + // illegal. + testptr(header_reg, markOopDesc::epoch_mask_in_place); + jccb(Assembler::notZero, try_rebias); + + // The epoch of the current bias is still valid but we know nothing + // about the owner; it might be set or it might be clear. Try to + // acquire the bias of the object using an atomic operation. If this + // fails we will go in to the runtime to revoke the object's bias. + // Note that we first construct the presumed unbiased header so we + // don't accidentally blow away another thread's valid bias. + NOT_LP64( movptr(swap_reg, saved_mark_addr); ) + andptr(swap_reg, + markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); + if (need_tmp_reg) { + push(tmp_reg); + } +#ifdef _LP64 + movptr(tmp_reg, swap_reg); + orptr(tmp_reg, r15_thread); +#else + get_thread(tmp_reg); + orptr(tmp_reg, swap_reg); +#endif + if (os::is_MP()) { + lock(); + } + cmpxchgptr(tmp_reg, mark_addr); // compare tmp_reg and swap_reg + if (need_tmp_reg) { + pop(tmp_reg); + } + // If the biasing toward our thread failed, this means that + // another thread succeeded in biasing it toward itself and we + // need to revoke that bias. The revocation will occur in the + // interpreter runtime in the slow case. + if (counters != NULL) { + cond_inc32(Assembler::zero, + ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr())); + } + if (slow_case != NULL) { + jcc(Assembler::notZero, *slow_case); + } + jmp(done); + + bind(try_rebias); + // At this point we know the epoch has expired, meaning that the + // current "bias owner", if any, is actually invalid. Under these + // circumstances _only_, we are allowed to use the current header's + // value as the comparison value when doing the cas to acquire the + // bias in the current epoch. In other words, we allow transfer of + // the bias from one thread to another directly in this situation. + // + // FIXME: due to a lack of registers we currently blow away the age + // bits in this situation. Should attempt to preserve them. + if (need_tmp_reg) { + push(tmp_reg); + } + load_prototype_header(tmp_reg, obj_reg); +#ifdef _LP64 + orptr(tmp_reg, r15_thread); +#else + get_thread(swap_reg); + orptr(tmp_reg, swap_reg); + movptr(swap_reg, saved_mark_addr); +#endif + if (os::is_MP()) { + lock(); + } + cmpxchgptr(tmp_reg, mark_addr); // compare tmp_reg and swap_reg + if (need_tmp_reg) { + pop(tmp_reg); + } + // If the biasing toward our thread failed, then another thread + // succeeded in biasing it toward itself and we need to revoke that + // bias. The revocation will occur in the runtime in the slow case. + if (counters != NULL) { + cond_inc32(Assembler::zero, + ExternalAddress((address) counters->rebiased_lock_entry_count_addr())); + } + if (slow_case != NULL) { + jcc(Assembler::notZero, *slow_case); + } + jmp(done); + + bind(try_revoke_bias); + // The prototype mark in the klass doesn't have the bias bit set any + // more, indicating that objects of this data type are not supposed + // to be biased any more. We are going to try to reset the mark of + // this object to the prototype value and fall through to the + // CAS-based locking scheme. Note that if our CAS fails, it means + // that another thread raced us for the privilege of revoking the + // bias of this particular object, so it's okay to continue in the + // normal locking code. + // + // FIXME: due to a lack of registers we currently blow away the age + // bits in this situation. Should attempt to preserve them. + NOT_LP64( movptr(swap_reg, saved_mark_addr); ) + if (need_tmp_reg) { + push(tmp_reg); + } + load_prototype_header(tmp_reg, obj_reg); + if (os::is_MP()) { + lock(); + } + cmpxchgptr(tmp_reg, mark_addr); // compare tmp_reg and swap_reg + if (need_tmp_reg) { + pop(tmp_reg); + } + // Fall through to the normal CAS-based lock, because no matter what + // the result of the above CAS, some thread must have succeeded in + // removing the bias bit from the object's header. + if (counters != NULL) { + cond_inc32(Assembler::zero, + ExternalAddress((address) counters->revoked_lock_entry_count_addr())); + } + + bind(cas_label); + + return null_check_offset; +} + void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) { assert(UseBiasedLocking, "why call this otherwise?"); @@ -1408,6 +1301,996 @@ jcc(Assembler::equal, done); } +#ifdef COMPILER2 + +#if INCLUDE_RTM_OPT + +// Update rtm_counters based on abort status +// input: abort_status +// rtm_counters (RTMLockingCounters*) +// flags are killed +void MacroAssembler::rtm_counters_update(Register abort_status, Register rtm_counters) { + + atomic_incptr(Address(rtm_counters, RTMLockingCounters::abort_count_offset())); + if (PrintPreciseRTMLockingStatistics) { + for (int i = 0; i < RTMLockingCounters::ABORT_STATUS_LIMIT; i++) { + Label check_abort; + testl(abort_status, (1< 0) { + // Delay calculation + movptr(tmpReg, ExternalAddress((address) RTMLockingCounters::rtm_calculation_flag_addr()), tmpReg); + testptr(tmpReg, tmpReg); + jccb(Assembler::equal, L_done); + } + // Abort ratio calculation only if abort_count > RTMAbortThreshold + // Aborted transactions = abort_count * 100 + // All transactions = total_count * RTMTotalCountIncrRate + // Set no_rtm bit if (Aborted transactions >= All transactions * RTMAbortRatio) + + movptr(tmpReg, Address(rtm_counters_Reg, RTMLockingCounters::abort_count_offset())); + cmpptr(tmpReg, RTMAbortThreshold); + jccb(Assembler::below, L_check_always_rtm2); + imulptr(tmpReg, tmpReg, 100); + + Register scrReg = rtm_counters_Reg; + movptr(scrReg, Address(rtm_counters_Reg, RTMLockingCounters::total_count_offset())); + imulptr(scrReg, scrReg, RTMTotalCountIncrRate); + imulptr(scrReg, scrReg, RTMAbortRatio); + cmpptr(tmpReg, scrReg); + jccb(Assembler::below, L_check_always_rtm1); + if (method_data != NULL) { + // set rtm_state to "no rtm" in MDO + mov_metadata(tmpReg, method_data); + if (os::is_MP()) { + lock(); + } + orl(Address(tmpReg, MethodData::rtm_state_offset_in_bytes()), NoRTM); + } + jmpb(L_done); + bind(L_check_always_rtm1); + // Reload RTMLockingCounters* address + lea(rtm_counters_Reg, ExternalAddress((address)rtm_counters)); + bind(L_check_always_rtm2); + movptr(tmpReg, Address(rtm_counters_Reg, RTMLockingCounters::total_count_offset())); + cmpptr(tmpReg, RTMLockingThreshold / RTMTotalCountIncrRate); + jccb(Assembler::below, L_done); + if (method_data != NULL) { + // set rtm_state to "always rtm" in MDO + mov_metadata(tmpReg, method_data); + if (os::is_MP()) { + lock(); + } + orl(Address(tmpReg, MethodData::rtm_state_offset_in_bytes()), UseRTM); + } + bind(L_done); +} + +// Update counters and perform abort ratio calculation +// input: abort_status_Reg +// rtm_counters_Reg, flags are killed +void MacroAssembler::rtm_profiling(Register abort_status_Reg, + Register rtm_counters_Reg, + RTMLockingCounters* rtm_counters, + Metadata* method_data, + bool profile_rtm) { + + assert(rtm_counters != NULL, "should not be NULL when profiling RTM"); + // update rtm counters based on rax value at abort + // reads abort_status_Reg, updates flags + lea(rtm_counters_Reg, ExternalAddress((address)rtm_counters)); + rtm_counters_update(abort_status_Reg, rtm_counters_Reg); + if (profile_rtm) { + // Save abort status because abort_status_Reg is used by following code. + if (RTMRetryCount > 0) { + push(abort_status_Reg); + } + assert(rtm_counters != NULL, "should not be NULL when profiling RTM"); + rtm_abort_ratio_calculation(abort_status_Reg, rtm_counters_Reg, rtm_counters, method_data); + // restore abort status + if (RTMRetryCount > 0) { + pop(abort_status_Reg); + } + } +} + +// Retry on abort if abort's status is 0x6: can retry (0x2) | memory conflict (0x4) +// inputs: retry_count_Reg +// : abort_status_Reg +// output: retry_count_Reg decremented by 1 +// flags are killed +void MacroAssembler::rtm_retry_lock_on_abort(Register retry_count_Reg, Register abort_status_Reg, Label& retryLabel) { + Label doneRetry; + assert(abort_status_Reg == rax, ""); + // The abort reason bits are in eax (see all states in rtmLocking.hpp) + // 0x6 = conflict on which we can retry (0x2) | memory conflict (0x4) + // if reason is in 0x6 and retry count != 0 then retry + andptr(abort_status_Reg, 0x6); + jccb(Assembler::zero, doneRetry); + testl(retry_count_Reg, retry_count_Reg); + jccb(Assembler::zero, doneRetry); + pause(); + decrementl(retry_count_Reg); + jmp(retryLabel); + bind(doneRetry); +} + +// Spin and retry if lock is busy, +// inputs: box_Reg (monitor address) +// : retry_count_Reg +// output: retry_count_Reg decremented by 1 +// : clear z flag if retry count exceeded +// tmp_Reg, scr_Reg, flags are killed +void MacroAssembler::rtm_retry_lock_on_busy(Register retry_count_Reg, Register box_Reg, + Register tmp_Reg, Register scr_Reg, Label& retryLabel) { + Label SpinLoop, SpinExit, doneRetry; + // Clean monitor_value bit to get valid pointer + int owner_offset = ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value; + + testl(retry_count_Reg, retry_count_Reg); + jccb(Assembler::zero, doneRetry); + decrementl(retry_count_Reg); + movptr(scr_Reg, RTMSpinLoopCount); + + bind(SpinLoop); + pause(); + decrementl(scr_Reg); + jccb(Assembler::lessEqual, SpinExit); + movptr(tmp_Reg, Address(box_Reg, owner_offset)); + testptr(tmp_Reg, tmp_Reg); + jccb(Assembler::notZero, SpinLoop); + + bind(SpinExit); + jmp(retryLabel); + bind(doneRetry); + incrementl(retry_count_Reg); // clear z flag +} + +// Use RTM for normal stack locks +// Input: objReg (object to lock) +void MacroAssembler::rtm_stack_locking(Register objReg, Register tmpReg, Register scrReg, + Register retry_on_abort_count_Reg, + RTMLockingCounters* stack_rtm_counters, + Metadata* method_data, bool profile_rtm, + Label& DONE_LABEL, Label& IsInflated) { + assert(UseRTMForStackLocks, "why call this otherwise?"); + assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking"); + assert(tmpReg == rax, ""); + assert(scrReg == rdx, ""); + Label L_rtm_retry, L_decrement_retry, L_on_abort; + + if (RTMRetryCount > 0) { + movl(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort + bind(L_rtm_retry); + } + if (!UseRTMXendForLockBusy) { + movptr(tmpReg, Address(objReg, 0)); + testptr(tmpReg, markOopDesc::monitor_value); // inflated vs stack-locked|neutral|biased + jcc(Assembler::notZero, IsInflated); + } + if (PrintPreciseRTMLockingStatistics || profile_rtm) { + Label L_noincrement; + if (RTMTotalCountIncrRate > 1) { + // tmpReg, scrReg and flags are killed + branch_on_random_using_rdtsc(tmpReg, scrReg, (int)RTMTotalCountIncrRate, L_noincrement); + } + assert(stack_rtm_counters != NULL, "should not be NULL when profiling RTM"); + atomic_incptr(ExternalAddress((address)stack_rtm_counters->total_count_addr()), scrReg); + bind(L_noincrement); + } + xbegin(L_on_abort); + movptr(tmpReg, Address(objReg, 0)); // fetch markword + andptr(tmpReg, markOopDesc::biased_lock_mask_in_place); // look at 3 lock bits + cmpptr(tmpReg, markOopDesc::unlocked_value); // bits = 001 unlocked + jcc(Assembler::equal, DONE_LABEL); // all done if unlocked + + Register abort_status_Reg = tmpReg; // status of abort is stored in RAX + if (UseRTMXendForLockBusy) { + xend(); + movptr(tmpReg, Address(objReg, 0)); + testptr(tmpReg, markOopDesc::monitor_value); // inflated vs stack-locked|neutral|biased + jcc(Assembler::notZero, IsInflated); + movptr(abort_status_Reg, 0x1); // Set the abort status to 1 (as xabort does) + jmp(L_decrement_retry); + } + else { + xabort(0); + } + bind(L_on_abort); + if (PrintPreciseRTMLockingStatistics || profile_rtm) { + rtm_profiling(abort_status_Reg, scrReg, stack_rtm_counters, method_data, profile_rtm); + } + bind(L_decrement_retry); + if (RTMRetryCount > 0) { + // retry on lock abort if abort status is 'can retry' (0x2) or 'memory conflict' (0x4) + rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry); + } +} + +// Use RTM for inflating locks +// inputs: objReg (object to lock) +// boxReg (on-stack box address (displaced header location) - KILLED) +// tmpReg (ObjectMonitor address + 2(monitor_value)) +void MacroAssembler::rtm_inflated_locking(Register objReg, Register boxReg, Register tmpReg, + Register scrReg, Register retry_on_busy_count_Reg, + Register retry_on_abort_count_Reg, + RTMLockingCounters* rtm_counters, + Metadata* method_data, bool profile_rtm, + Label& DONE_LABEL) { + assert(UseRTMLocking, "why call this otherwise?"); + assert(tmpReg == rax, ""); + assert(scrReg == rdx, ""); + Label L_rtm_retry, L_decrement_retry, L_on_abort; + // Clean monitor_value bit to get valid pointer + int owner_offset = ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value; + + // Without cast to int32_t a movptr will destroy r10 which is typically obj + movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())); + movptr(boxReg, tmpReg); // Save ObjectMonitor address + + if (RTMRetryCount > 0) { + movl(retry_on_busy_count_Reg, RTMRetryCount); // Retry on lock busy + movl(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort + bind(L_rtm_retry); + } + if (PrintPreciseRTMLockingStatistics || profile_rtm) { + Label L_noincrement; + if (RTMTotalCountIncrRate > 1) { + // tmpReg, scrReg and flags are killed + branch_on_random_using_rdtsc(tmpReg, scrReg, (int)RTMTotalCountIncrRate, L_noincrement); + } + assert(rtm_counters != NULL, "should not be NULL when profiling RTM"); + atomic_incptr(ExternalAddress((address)rtm_counters->total_count_addr()), scrReg); + bind(L_noincrement); + } + xbegin(L_on_abort); + movptr(tmpReg, Address(objReg, 0)); + movptr(tmpReg, Address(tmpReg, owner_offset)); + testptr(tmpReg, tmpReg); + jcc(Assembler::zero, DONE_LABEL); + if (UseRTMXendForLockBusy) { + xend(); + jmp(L_decrement_retry); + } + else { + xabort(0); + } + bind(L_on_abort); + Register abort_status_Reg = tmpReg; // status of abort is stored in RAX + if (PrintPreciseRTMLockingStatistics || profile_rtm) { + rtm_profiling(abort_status_Reg, scrReg, rtm_counters, method_data, profile_rtm); + } + if (RTMRetryCount > 0) { + // retry on lock abort if abort status is 'can retry' (0x2) or 'memory conflict' (0x4) + rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry); + } + + movptr(tmpReg, Address(boxReg, owner_offset)) ; + testptr(tmpReg, tmpReg) ; + jccb(Assembler::notZero, L_decrement_retry) ; + + // Appears unlocked - try to swing _owner from null to non-null. + // Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand. +#ifdef _LP64 + Register threadReg = r15_thread; +#else + get_thread(scrReg); + Register threadReg = scrReg; +#endif + if (os::is_MP()) { + lock(); + } + cmpxchgptr(threadReg, Address(boxReg, owner_offset)); // Updates tmpReg + + if (RTMRetryCount > 0) { + // success done else retry + jccb(Assembler::equal, DONE_LABEL) ; + bind(L_decrement_retry); + // Spin and retry if lock is busy. + rtm_retry_lock_on_busy(retry_on_busy_count_Reg, boxReg, tmpReg, scrReg, L_rtm_retry); + } + else { + bind(L_decrement_retry); + } +} + +#endif // INCLUDE_RTM_OPT + +// Fast_Lock and Fast_Unlock used by C2 + +// Because the transitions from emitted code to the runtime +// monitorenter/exit helper stubs are so slow it's critical that +// we inline both the stack-locking fast-path and the inflated fast path. +// +// See also: cmpFastLock and cmpFastUnlock. +// +// What follows is a specialized inline transliteration of the code +// in slow_enter() and slow_exit(). If we're concerned about I$ bloat +// another option would be to emit TrySlowEnter and TrySlowExit methods +// at startup-time. These methods would accept arguments as +// (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure +// indications in the icc.ZFlag. Fast_Lock and Fast_Unlock would simply +// marshal the arguments and emit calls to TrySlowEnter and TrySlowExit. +// In practice, however, the # of lock sites is bounded and is usually small. +// Besides the call overhead, TrySlowEnter and TrySlowExit might suffer +// if the processor uses simple bimodal branch predictors keyed by EIP +// Since the helper routines would be called from multiple synchronization +// sites. +// +// An even better approach would be write "MonitorEnter()" and "MonitorExit()" +// in java - using j.u.c and unsafe - and just bind the lock and unlock sites +// to those specialized methods. That'd give us a mostly platform-independent +// implementation that the JITs could optimize and inline at their pleasure. +// Done correctly, the only time we'd need to cross to native could would be +// to park() or unpark() threads. We'd also need a few more unsafe operators +// to (a) prevent compiler-JIT reordering of non-volatile accesses, and +// (b) explicit barriers or fence operations. +// +// TODO: +// +// * Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr). +// This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals. +// Given TLAB allocation, Self is usually manifested in a register, so passing it into +// the lock operators would typically be faster than reifying Self. +// +// * Ideally I'd define the primitives as: +// fast_lock (nax Obj, nax box, EAX tmp, nax scr) where box, tmp and scr are KILLED. +// fast_unlock (nax Obj, EAX box, nax tmp) where box and tmp are KILLED +// Unfortunately ADLC bugs prevent us from expressing the ideal form. +// Instead, we're stuck with a rather awkward and brittle register assignments below. +// Furthermore the register assignments are overconstrained, possibly resulting in +// sub-optimal code near the synchronization site. +// +// * Eliminate the sp-proximity tests and just use "== Self" tests instead. +// Alternately, use a better sp-proximity test. +// +// * Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value. +// Either one is sufficient to uniquely identify a thread. +// TODO: eliminate use of sp in _owner and use get_thread(tr) instead. +// +// * Intrinsify notify() and notifyAll() for the common cases where the +// object is locked by the calling thread but the waitlist is empty. +// avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll(). +// +// * use jccb and jmpb instead of jcc and jmp to improve code density. +// But beware of excessive branch density on AMD Opterons. +// +// * Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success +// or failure of the fast-path. If the fast-path fails then we pass +// control to the slow-path, typically in C. In Fast_Lock and +// Fast_Unlock we often branch to DONE_LABEL, just to find that C2 +// will emit a conditional branch immediately after the node. +// So we have branches to branches and lots of ICC.ZF games. +// Instead, it might be better to have C2 pass a "FailureLabel" +// into Fast_Lock and Fast_Unlock. In the case of success, control +// will drop through the node. ICC.ZF is undefined at exit. +// In the case of failure, the node will branch directly to the +// FailureLabel + + +// obj: object to lock +// box: on-stack box address (displaced header location) - KILLED +// rax,: tmp -- KILLED +// scr: tmp -- KILLED +void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg, + Register scrReg, Register cx1Reg, Register cx2Reg, + BiasedLockingCounters* counters, + RTMLockingCounters* rtm_counters, + RTMLockingCounters* stack_rtm_counters, + Metadata* method_data, + bool use_rtm, bool profile_rtm) { + // Ensure the register assignents are disjoint + assert(tmpReg == rax, ""); + + if (use_rtm) { + assert_different_registers(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg); + } else { + assert(cx1Reg == noreg, ""); + assert(cx2Reg == noreg, ""); + assert_different_registers(objReg, boxReg, tmpReg, scrReg); + } + + if (counters != NULL) { + atomic_incl(ExternalAddress((address)counters->total_entry_count_addr()), scrReg); + } + if (EmitSync & 1) { + // set box->dhw = unused_mark (3) + // Force all sync thru slow-path: slow_enter() and slow_exit() + movptr (Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())); + cmpptr (rsp, (int32_t)NULL_WORD); + } else + if (EmitSync & 2) { + Label DONE_LABEL ; + if (UseBiasedLocking) { + // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument. + biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, counters); + } + + movptr(tmpReg, Address(objReg, 0)); // fetch markword + orptr (tmpReg, 0x1); + movptr(Address(boxReg, 0), tmpReg); // Anticipate successful CAS + if (os::is_MP()) { + lock(); + } + cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg + jccb(Assembler::equal, DONE_LABEL); + // Recursive locking + subptr(tmpReg, rsp); + andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - os::vm_page_size())) ); + movptr(Address(boxReg, 0), tmpReg); + bind(DONE_LABEL); + } else { + // Possible cases that we'll encounter in fast_lock + // ------------------------------------------------ + // * Inflated + // -- unlocked + // -- Locked + // = by self + // = by other + // * biased + // -- by Self + // -- by other + // * neutral + // * stack-locked + // -- by self + // = sp-proximity test hits + // = sp-proximity test generates false-negative + // -- by other + // + + Label IsInflated, DONE_LABEL; + + // it's stack-locked, biased or neutral + // TODO: optimize away redundant LDs of obj->mark and improve the markword triage + // order to reduce the number of conditional branches in the most common cases. + // Beware -- there's a subtle invariant that fetch of the markword + // at [FETCH], below, will never observe a biased encoding (*101b). + // If this invariant is not held we risk exclusion (safety) failure. + if (UseBiasedLocking && !UseOptoBiasInlining) { + biased_locking_enter(boxReg, objReg, tmpReg, scrReg, true, DONE_LABEL, NULL, counters); + } + +#if INCLUDE_RTM_OPT + if (UseRTMForStackLocks && use_rtm) { + rtm_stack_locking(objReg, tmpReg, scrReg, cx2Reg, + stack_rtm_counters, method_data, profile_rtm, + DONE_LABEL, IsInflated); + } +#endif // INCLUDE_RTM_OPT + + movptr(tmpReg, Address(objReg, 0)); // [FETCH] + testptr(tmpReg, markOopDesc::monitor_value); // inflated vs stack-locked|neutral|biased + jccb(Assembler::notZero, IsInflated); + + // Attempt stack-locking ... + orptr (tmpReg, markOopDesc::unlocked_value); + movptr(Address(boxReg, 0), tmpReg); // Anticipate successful CAS + if (os::is_MP()) { + lock(); + } + cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg + if (counters != NULL) { + cond_inc32(Assembler::equal, + ExternalAddress((address)counters->fast_path_entry_count_addr())); + } + jcc(Assembler::equal, DONE_LABEL); // Success + + // Recursive locking. + // The object is stack-locked: markword contains stack pointer to BasicLock. + // Locked by current thread if difference with current SP is less than one page. + subptr(tmpReg, rsp); + // Next instruction set ZFlag == 1 (Success) if difference is less then one page. + andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - os::vm_page_size())) ); + movptr(Address(boxReg, 0), tmpReg); + if (counters != NULL) { + cond_inc32(Assembler::equal, + ExternalAddress((address)counters->fast_path_entry_count_addr())); + } + jmp(DONE_LABEL); + + bind(IsInflated); + // The object is inflated. tmpReg contains pointer to ObjectMonitor* + 2(monitor_value) + +#if INCLUDE_RTM_OPT + // Use the same RTM locking code in 32- and 64-bit VM. + if (use_rtm) { + rtm_inflated_locking(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg, + rtm_counters, method_data, profile_rtm, DONE_LABEL); + } else { +#endif // INCLUDE_RTM_OPT + +#ifndef _LP64 + // The object is inflated. + // + // TODO-FIXME: eliminate the ugly use of manifest constants: + // Use markOopDesc::monitor_value instead of "2". + // use markOop::unused_mark() instead of "3". + // The tmpReg value is an objectMonitor reference ORed with + // markOopDesc::monitor_value (2). We can either convert tmpReg to an + // objectmonitor pointer by masking off the "2" bit or we can just + // use tmpReg as an objectmonitor pointer but bias the objectmonitor + // field offsets with "-2" to compensate for and annul the low-order tag bit. + // + // I use the latter as it avoids AGI stalls. + // As such, we write "mov r, [tmpReg+OFFSETOF(Owner)-2]" + // instead of "mov r, [tmpReg+OFFSETOF(Owner)]". + // + #define OFFSET_SKEWED(f) ((ObjectMonitor::f ## _offset_in_bytes())-2) + + // boxReg refers to the on-stack BasicLock in the current frame. + // We'd like to write: + // set box->_displaced_header = markOop::unused_mark(). Any non-0 value suffices. + // This is convenient but results a ST-before-CAS penalty. The following CAS suffers + // additional latency as we have another ST in the store buffer that must drain. + + if (EmitSync & 8192) { + movptr(Address(boxReg, 0), 3); // results in ST-before-CAS penalty + get_thread (scrReg); + movptr(boxReg, tmpReg); // consider: LEA box, [tmp-2] + movptr(tmpReg, NULL_WORD); // consider: xor vs mov + if (os::is_MP()) { + lock(); + } + cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)); + } else + if ((EmitSync & 128) == 0) { // avoid ST-before-CAS + movptr(scrReg, boxReg); + movptr(boxReg, tmpReg); // consider: LEA box, [tmp-2] + + // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes + if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) { + // prefetchw [eax + Offset(_owner)-2] + prefetchw(Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)); + } + + if ((EmitSync & 64) == 0) { + // Optimistic form: consider XORL tmpReg,tmpReg + movptr(tmpReg, NULL_WORD); + } else { + // Can suffer RTS->RTO upgrades on shared or cold $ lines + // Test-And-CAS instead of CAS + movptr(tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)); // rax, = m->_owner + testptr(tmpReg, tmpReg); // Locked ? + jccb (Assembler::notZero, DONE_LABEL); + } + + // Appears unlocked - try to swing _owner from null to non-null. + // Ideally, I'd manifest "Self" with get_thread and then attempt + // to CAS the register containing Self into m->Owner. + // But we don't have enough registers, so instead we can either try to CAS + // rsp or the address of the box (in scr) into &m->owner. If the CAS succeeds + // we later store "Self" into m->Owner. Transiently storing a stack address + // (rsp or the address of the box) into m->owner is harmless. + // Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand. + if (os::is_MP()) { + lock(); + } + cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)); + movptr(Address(scrReg, 0), 3); // box->_displaced_header = 3 + jccb (Assembler::notZero, DONE_LABEL); + get_thread (scrReg); // beware: clobbers ICCs + movptr(Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2), scrReg); + xorptr(boxReg, boxReg); // set icc.ZFlag = 1 to indicate success + + // If the CAS fails we can either retry or pass control to the slow-path. + // We use the latter tactic. + // Pass the CAS result in the icc.ZFlag into DONE_LABEL + // If the CAS was successful ... + // Self has acquired the lock + // Invariant: m->_recursions should already be 0, so we don't need to explicitly set it. + // Intentional fall-through into DONE_LABEL ... + } else { + movptr(Address(boxReg, 0), intptr_t(markOopDesc::unused_mark())); // results in ST-before-CAS penalty + movptr(boxReg, tmpReg); + + // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes + if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) { + // prefetchw [eax + Offset(_owner)-2] + prefetchw(Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)); + } + + if ((EmitSync & 64) == 0) { + // Optimistic form + xorptr (tmpReg, tmpReg); + } else { + // Can suffer RTS->RTO upgrades on shared or cold $ lines + movptr(tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)); // rax, = m->_owner + testptr(tmpReg, tmpReg); // Locked ? + jccb (Assembler::notZero, DONE_LABEL); + } + + // Appears unlocked - try to swing _owner from null to non-null. + // Use either "Self" (in scr) or rsp as thread identity in _owner. + // Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand. + get_thread (scrReg); + if (os::is_MP()) { + lock(); + } + cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)); + + // If the CAS fails we can either retry or pass control to the slow-path. + // We use the latter tactic. + // Pass the CAS result in the icc.ZFlag into DONE_LABEL + // If the CAS was successful ... + // Self has acquired the lock + // Invariant: m->_recursions should already be 0, so we don't need to explicitly set it. + // Intentional fall-through into DONE_LABEL ... + } +#else // _LP64 + // It's inflated + + // TODO: someday avoid the ST-before-CAS penalty by + // relocating (deferring) the following ST. + // We should also think about trying a CAS without having + // fetched _owner. If the CAS is successful we may + // avoid an RTO->RTS upgrade on the $line. + + // Without cast to int32_t a movptr will destroy r10 which is typically obj + movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())); + + movptr (boxReg, tmpReg); + movptr (tmpReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)); + testptr(tmpReg, tmpReg); + jccb (Assembler::notZero, DONE_LABEL); + + // It's inflated and appears unlocked + if (os::is_MP()) { + lock(); + } + cmpxchgptr(r15_thread, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)); + // Intentional fall-through into DONE_LABEL ... +#endif // _LP64 + +#if INCLUDE_RTM_OPT + } // use_rtm() +#endif + // DONE_LABEL is a hot target - we'd really like to place it at the + // start of cache line by padding with NOPs. + // See the AMD and Intel software optimization manuals for the + // most efficient "long" NOP encodings. + // Unfortunately none of our alignment mechanisms suffice. + bind(DONE_LABEL); + + // At DONE_LABEL the icc ZFlag is set as follows ... + // Fast_Unlock uses the same protocol. + // ZFlag == 1 -> Success + // ZFlag == 0 -> Failure - force control through the slow-path + } +} + +// obj: object to unlock +// box: box address (displaced header location), killed. Must be EAX. +// tmp: killed, cannot be obj nor box. +// +// Some commentary on balanced locking: +// +// Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites. +// Methods that don't have provably balanced locking are forced to run in the +// interpreter - such methods won't be compiled to use fast_lock and fast_unlock. +// The interpreter provides two properties: +// I1: At return-time the interpreter automatically and quietly unlocks any +// objects acquired the current activation (frame). Recall that the +// interpreter maintains an on-stack list of locks currently held by +// a frame. +// I2: If a method attempts to unlock an object that is not held by the +// the frame the interpreter throws IMSX. +// +// Lets say A(), which has provably balanced locking, acquires O and then calls B(). +// B() doesn't have provably balanced locking so it runs in the interpreter. +// Control returns to A() and A() unlocks O. By I1 and I2, above, we know that O +// is still locked by A(). +// +// The only other source of unbalanced locking would be JNI. The "Java Native Interface: +// Programmer's Guide and Specification" claims that an object locked by jni_monitorenter +// should not be unlocked by "normal" java-level locking and vice-versa. The specification +// doesn't specify what will occur if a program engages in such mixed-mode locking, however. + +void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg, bool use_rtm) { + assert(boxReg == rax, ""); + assert_different_registers(objReg, boxReg, tmpReg); + + if (EmitSync & 4) { + // Disable - inhibit all inlining. Force control through the slow-path + cmpptr (rsp, 0); + } else + if (EmitSync & 8) { + Label DONE_LABEL; + if (UseBiasedLocking) { + biased_locking_exit(objReg, tmpReg, DONE_LABEL); + } + // Classic stack-locking code ... + // Check whether the displaced header is 0 + //(=> recursive unlock) + movptr(tmpReg, Address(boxReg, 0)); + testptr(tmpReg, tmpReg); + jccb(Assembler::zero, DONE_LABEL); + // If not recursive lock, reset the header to displaced header + if (os::is_MP()) { + lock(); + } + cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box + bind(DONE_LABEL); + } else { + Label DONE_LABEL, Stacked, CheckSucc; + + // Critically, the biased locking test must have precedence over + // and appear before the (box->dhw == 0) recursive stack-lock test. + if (UseBiasedLocking && !UseOptoBiasInlining) { + biased_locking_exit(objReg, tmpReg, DONE_LABEL); + } + +#if INCLUDE_RTM_OPT + if (UseRTMForStackLocks && use_rtm) { + assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking"); + Label L_regular_unlock; + movptr(tmpReg, Address(objReg, 0)); // fetch markword + andptr(tmpReg, markOopDesc::biased_lock_mask_in_place); // look at 3 lock bits + cmpptr(tmpReg, markOopDesc::unlocked_value); // bits = 001 unlocked + jccb(Assembler::notEqual, L_regular_unlock); // if !HLE RegularLock + xend(); // otherwise end... + jmp(DONE_LABEL); // ... and we're done + bind(L_regular_unlock); + } +#endif + + cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD); // Examine the displaced header + jcc (Assembler::zero, DONE_LABEL); // 0 indicates recursive stack-lock + movptr(tmpReg, Address(objReg, 0)); // Examine the object's markword + testptr(tmpReg, markOopDesc::monitor_value); // Inflated? + jccb (Assembler::zero, Stacked); + + // It's inflated. +#if INCLUDE_RTM_OPT + if (use_rtm) { + Label L_regular_inflated_unlock; + // Clean monitor_value bit to get valid pointer + int owner_offset = ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value; + movptr(boxReg, Address(tmpReg, owner_offset)); + testptr(boxReg, boxReg); + jccb(Assembler::notZero, L_regular_inflated_unlock); + xend(); + jmpb(DONE_LABEL); + bind(L_regular_inflated_unlock); + } +#endif + + // Despite our balanced locking property we still check that m->_owner == Self + // as java routines or native JNI code called by this thread might + // have released the lock. + // Refer to the comments in synchronizer.cpp for how we might encode extra + // state in _succ so we can avoid fetching EntryList|cxq. + // + // I'd like to add more cases in fast_lock() and fast_unlock() -- + // such as recursive enter and exit -- but we have to be wary of + // I$ bloat, T$ effects and BP$ effects. + // + // If there's no contention try a 1-0 exit. That is, exit without + // a costly MEMBAR or CAS. See synchronizer.cpp for details on how + // we detect and recover from the race that the 1-0 exit admits. + // + // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier + // before it STs null into _owner, releasing the lock. Updates + // to data protected by the critical section must be visible before + // we drop the lock (and thus before any other thread could acquire + // the lock and observe the fields protected by the lock). + // IA32's memory-model is SPO, so STs are ordered with respect to + // each other and there's no need for an explicit barrier (fence). + // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html. +#ifndef _LP64 + get_thread (boxReg); + if ((EmitSync & 4096) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) { + // prefetchw [ebx + Offset(_owner)-2] + prefetchw(Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)); + } + + // Note that we could employ various encoding schemes to reduce + // the number of loads below (currently 4) to just 2 or 3. + // Refer to the comments in synchronizer.cpp. + // In practice the chain of fetches doesn't seem to impact performance, however. + if ((EmitSync & 65536) == 0 && (EmitSync & 256)) { + // Attempt to reduce branch density - AMD's branch predictor. + xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)); + orptr(boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)); + orptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)); + orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)); + jccb (Assembler::notZero, DONE_LABEL); + movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD); + jmpb (DONE_LABEL); + } else { + xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)); + orptr(boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)); + jccb (Assembler::notZero, DONE_LABEL); + movptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)); + orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)); + jccb (Assembler::notZero, CheckSucc); + movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD); + jmpb (DONE_LABEL); + } + + // The Following code fragment (EmitSync & 65536) improves the performance of + // contended applications and contended synchronization microbenchmarks. + // Unfortunately the emission of the code - even though not executed - causes regressions + // in scimark and jetstream, evidently because of $ effects. Replacing the code + // with an equal number of never-executed NOPs results in the same regression. + // We leave it off by default. + + if ((EmitSync & 65536) != 0) { + Label LSuccess, LGoSlowPath ; + + bind (CheckSucc); + + // Optional pre-test ... it's safe to elide this + if ((EmitSync & 16) == 0) { + cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD); + jccb (Assembler::zero, LGoSlowPath); + } + + // We have a classic Dekker-style idiom: + // ST m->_owner = 0 ; MEMBAR; LD m->_succ + // There are a number of ways to implement the barrier: + // (1) lock:andl &m->_owner, 0 + // is fast, but mask doesn't currently support the "ANDL M,IMM32" form. + // LOCK: ANDL [ebx+Offset(_Owner)-2], 0 + // Encodes as 81 31 OFF32 IMM32 or 83 63 OFF8 IMM8 + // (2) If supported, an explicit MFENCE is appealing. + // In older IA32 processors MFENCE is slower than lock:add or xchg + // particularly if the write-buffer is full as might be the case if + // if stores closely precede the fence or fence-equivalent instruction. + // In more modern implementations MFENCE appears faster, however. + // (3) In lieu of an explicit fence, use lock:addl to the top-of-stack + // The $lines underlying the top-of-stack should be in M-state. + // The locked add instruction is serializing, of course. + // (4) Use xchg, which is serializing + // mov boxReg, 0; xchgl boxReg, [tmpReg + Offset(_owner)-2] also works + // (5) ST m->_owner = 0 and then execute lock:orl &m->_succ, 0. + // The integer condition codes will tell us if succ was 0. + // Since _succ and _owner should reside in the same $line and + // we just stored into _owner, it's likely that the $line + // remains in M-state for the lock:orl. + // + // We currently use (3), although it's likely that switching to (2) + // is correct for the future. + + movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD); + if (os::is_MP()) { + if (VM_Version::supports_sse2() && 1 == FenceInstruction) { + mfence(); + } else { + lock (); addptr(Address(rsp, 0), 0); + } + } + // Ratify _succ remains non-null + cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), 0); + jccb (Assembler::notZero, LSuccess); + + xorptr(boxReg, boxReg); // box is really EAX + if (os::is_MP()) { lock(); } + cmpxchgptr(rsp, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)); + jccb (Assembler::notEqual, LSuccess); + // Since we're low on registers we installed rsp as a placeholding in _owner. + // Now install Self over rsp. This is safe as we're transitioning from + // non-null to non=null + get_thread (boxReg); + movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), boxReg); + // Intentional fall-through into LGoSlowPath ... + + bind (LGoSlowPath); + orptr(boxReg, 1); // set ICC.ZF=0 to indicate failure + jmpb (DONE_LABEL); + + bind (LSuccess); + xorptr(boxReg, boxReg); // set ICC.ZF=1 to indicate success + jmpb (DONE_LABEL); + } + + bind (Stacked); + // It's not inflated and it's not recursively stack-locked and it's not biased. + // It must be stack-locked. + // Try to reset the header to displaced header. + // The "box" value on the stack is stable, so we can reload + // and be assured we observe the same value as above. + movptr(tmpReg, Address(boxReg, 0)); + if (os::is_MP()) { + lock(); + } + cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box + // Intention fall-thru into DONE_LABEL + + // DONE_LABEL is a hot target - we'd really like to place it at the + // start of cache line by padding with NOPs. + // See the AMD and Intel software optimization manuals for the + // most efficient "long" NOP encodings. + // Unfortunately none of our alignment mechanisms suffice. + if ((EmitSync & 65536) == 0) { + bind (CheckSucc); + } +#else // _LP64 + // It's inflated + movptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)); + xorptr(boxReg, r15_thread); + orptr (boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)); + jccb (Assembler::notZero, DONE_LABEL); + movptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)); + orptr (boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)); + jccb (Assembler::notZero, CheckSucc); + movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD); + jmpb (DONE_LABEL); + + if ((EmitSync & 65536) == 0) { + Label LSuccess, LGoSlowPath ; + bind (CheckSucc); + cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD); + jccb (Assembler::zero, LGoSlowPath); + + // I'd much rather use lock:andl m->_owner, 0 as it's faster than the + // the explicit ST;MEMBAR combination, but masm doesn't currently support + // "ANDQ M,IMM". Don't use MFENCE here. lock:add to TOS, xchg, etc + // are all faster when the write buffer is populated. + movptr (Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD); + if (os::is_MP()) { + lock (); addl (Address(rsp, 0), 0); + } + cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD); + jccb (Assembler::notZero, LSuccess); + + movptr (boxReg, (int32_t)NULL_WORD); // box is really EAX + if (os::is_MP()) { lock(); } + cmpxchgptr(r15_thread, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)); + jccb (Assembler::notEqual, LSuccess); + // Intentional fall-through into slow-path + + bind (LGoSlowPath); + orl (boxReg, 1); // set ICC.ZF=0 to indicate failure + jmpb (DONE_LABEL); + + bind (LSuccess); + testl (boxReg, 0); // set ICC.ZF=1 to indicate success + jmpb (DONE_LABEL); + } + + bind (Stacked); + movptr(tmpReg, Address (boxReg, 0)); // re-fetch + if (os::is_MP()) { lock(); } + cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box + + if (EmitSync & 65536) { + bind (CheckSucc); + } +#endif + bind(DONE_LABEL); + // Avoid branch to branch on AMD processors + if (EmitSync & 32768) { + nop(); + } + } +} +#endif // COMPILER2 + void MacroAssembler::c2bool(Register x) { // implements x == 0 ? 0 : 1 // note: must only look at least-significant byte of x @@ -1969,7 +2852,9 @@ Condition negated_cond = negate_condition(cond); Label L; jcc(negated_cond, L); + pushf(); // Preserve flags atomic_incl(counter_addr); + popf(); bind(L); } diff -r a433eb716ce1 -r 62c54fcc0a35 src/cpu/x86/vm/macroAssembler_x86.hpp --- a/src/cpu/x86/vm/macroAssembler_x86.hpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/cpu/x86/vm/macroAssembler_x86.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -27,6 +27,7 @@ #include "asm/assembler.hpp" #include "utilities/macros.hpp" +#include "runtime/rtmLocking.hpp" // MacroAssembler extends Assembler by frequently used macros. @@ -111,7 +112,8 @@ op == 0xE9 /* jmp */ || op == 0xEB /* short jmp */ || (op & 0xF0) == 0x70 /* short jcc */ || - op == 0x0F && (branch[1] & 0xF0) == 0x80 /* jcc */, + op == 0x0F && (branch[1] & 0xF0) == 0x80 /* jcc */ || + op == 0xC7 && branch[1] == 0xF8 /* xbegin */, "Invalid opcode at patch point"); if (op == 0xEB || (op & 0xF0) == 0x70) { @@ -121,7 +123,7 @@ guarantee(this->is8bit(imm8), "Short forward jump exceeds 8-bit offset"); *disp = imm8; } else { - int* disp = (int*) &branch[(op == 0x0F)? 2: 1]; + int* disp = (int*) &branch[(op == 0x0F || op == 0xC7)? 2: 1]; int imm32 = target - (address) &disp[1]; *disp = imm32; } @@ -161,7 +163,6 @@ void incrementq(Register reg, int value = 1); void incrementq(Address dst, int value = 1); - // Support optimal SSE move instructions. void movflt(XMMRegister dst, XMMRegister src) { if (UseXmmRegToRegMoveAll) { movaps(dst, src); return; } @@ -187,6 +188,8 @@ void incrementl(AddressLiteral dst); void incrementl(ArrayAddress dst); + void incrementq(AddressLiteral dst); + // Alignment void align(int modulus); @@ -651,7 +654,40 @@ Label& done, Label* slow_case = NULL, BiasedLockingCounters* counters = NULL); void biased_locking_exit (Register obj_reg, Register temp_reg, Label& done); - +#ifdef COMPILER2 + // Code used by cmpFastLock and cmpFastUnlock mach instructions in .ad file. + // See full desription in macroAssembler_x86.cpp. + void fast_lock(Register obj, Register box, Register tmp, + Register scr, Register cx1, Register cx2, + BiasedLockingCounters* counters, + RTMLockingCounters* rtm_counters, + RTMLockingCounters* stack_rtm_counters, + Metadata* method_data, + bool use_rtm, bool profile_rtm); + void fast_unlock(Register obj, Register box, Register tmp, bool use_rtm); +#if INCLUDE_RTM_OPT + void rtm_counters_update(Register abort_status, Register rtm_counters); + void branch_on_random_using_rdtsc(Register tmp, Register scr, int count, Label& brLabel); + void rtm_abort_ratio_calculation(Register tmp, Register rtm_counters_reg, + RTMLockingCounters* rtm_counters, + Metadata* method_data); + void rtm_profiling(Register abort_status_Reg, Register rtm_counters_Reg, + RTMLockingCounters* rtm_counters, Metadata* method_data, bool profile_rtm); + void rtm_retry_lock_on_abort(Register retry_count, Register abort_status, Label& retryLabel); + void rtm_retry_lock_on_busy(Register retry_count, Register box, Register tmp, Register scr, Label& retryLabel); + void rtm_stack_locking(Register obj, Register tmp, Register scr, + Register retry_on_abort_count, + RTMLockingCounters* stack_rtm_counters, + Metadata* method_data, bool profile_rtm, + Label& DONE_LABEL, Label& IsInflated); + void rtm_inflated_locking(Register obj, Register box, Register tmp, + Register scr, Register retry_on_busy_count, + Register retry_on_abort_count, + RTMLockingCounters* rtm_counters, + Metadata* method_data, bool profile_rtm, + Label& DONE_LABEL); +#endif +#endif Condition negate_condition(Condition cond); @@ -716,6 +752,7 @@ void imulptr(Register dst, Register src) { LP64_ONLY(imulq(dst, src)) NOT_LP64(imull(dst, src)); } + void imulptr(Register dst, Register src, int imm32) { LP64_ONLY(imulq(dst, src, imm32)) NOT_LP64(imull(dst, src, imm32)); } void negptr(Register dst) { LP64_ONLY(negq(dst)) NOT_LP64(negl(dst)); } @@ -757,7 +794,14 @@ // Conditionally (atomically, on MPs) increments passed counter address, preserving condition codes. void cond_inc32(Condition cond, AddressLiteral counter_addr); // Unconditional atomic increment. - void atomic_incl(AddressLiteral counter_addr); + void atomic_incl(Address counter_addr); + void atomic_incl(AddressLiteral counter_addr, Register scr = rscratch1); +#ifdef _LP64 + void atomic_incq(Address counter_addr); + void atomic_incq(AddressLiteral counter_addr, Register scr = rscratch1); +#endif + void atomic_incptr(AddressLiteral counter_addr, Register scr = rscratch1) { LP64_ONLY(atomic_incq(counter_addr, scr)) NOT_LP64(atomic_incl(counter_addr, scr)) ; } + void atomic_incptr(Address counter_addr) { LP64_ONLY(atomic_incq(counter_addr)) NOT_LP64(atomic_incl(counter_addr)) ; } void lea(Register dst, AddressLiteral adr); void lea(Address dst, AddressLiteral adr); @@ -1069,7 +1113,11 @@ void movptr(Register dst, Address src); - void movptr(Register dst, AddressLiteral src); +#ifdef _LP64 + void movptr(Register dst, AddressLiteral src, Register scratch=rscratch1); +#else + void movptr(Register dst, AddressLiteral src, Register scratch=noreg); // Scratch reg is ignored in 32-bit +#endif void movptr(Register dst, intptr_t src); void movptr(Register dst, Register src); diff -r a433eb716ce1 -r 62c54fcc0a35 src/cpu/x86/vm/rtmLocking.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/x86/vm/rtmLocking.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "memory/allocation.inline.hpp" +#include "runtime/task.hpp" +#include "runtime/rtmLocking.hpp" + +// One-shot PeriodicTask subclass for enabling RTM locking +uintx RTMLockingCounters::_calculation_flag = 0; + +class RTMLockingCalculationTask : public PeriodicTask { + public: + RTMLockingCalculationTask(size_t interval_time) : PeriodicTask(interval_time){ } + + virtual void task() { + RTMLockingCounters::_calculation_flag = 1; + // Reclaim our storage and disenroll ourself + delete this; + } +}; + +void RTMLockingCounters::init() { + if (UseRTMLocking && RTMLockingCalculationDelay > 0) { + RTMLockingCalculationTask* task = new RTMLockingCalculationTask(RTMLockingCalculationDelay); + task->enroll(); + } else { + _calculation_flag = 1; + } +} + +//------------------------------print_on------------------------------- +void RTMLockingCounters::print_on(outputStream* st) { + tty->print_cr("# rtm locks total (estimated): " UINTX_FORMAT, _total_count * RTMTotalCountIncrRate); + tty->print_cr("# rtm lock aborts : " UINTX_FORMAT, _abort_count); + for (int i = 0; i < ABORT_STATUS_LIMIT; i++) { + tty->print_cr("# rtm lock aborts %d: " UINTX_FORMAT, i, _abortX_count[i]); + } +} diff -r a433eb716ce1 -r 62c54fcc0a35 src/cpu/x86/vm/sharedRuntime_x86_32.cpp --- a/src/cpu/x86/vm/sharedRuntime_x86_32.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/cpu/x86/vm/sharedRuntime_x86_32.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -1817,6 +1817,13 @@ // Frame is now completed as far as size and linkage. int frame_complete = ((intptr_t)__ pc()) - start; + if (UseRTMLocking) { + // Abort RTM transaction before calling JNI + // because critical section will be large and will be + // aborted anyway. Also nmethod could be deoptimized. + __ xabort(0); + } + // Calculate the difference between rsp and rbp,. We need to know it // after the native call because on windows Java Natives will pop // the arguments and it is painful to do rsp relative addressing @@ -3170,6 +3177,12 @@ }; address start = __ pc(); + + if (UseRTMLocking) { + // Abort RTM transaction before possible nmethod deoptimization. + __ xabort(0); + } + // Push self-frame. __ subptr(rsp, return_off*wordSize); // Epilog! @@ -3355,6 +3368,14 @@ address call_pc = NULL; bool cause_return = (poll_type == POLL_AT_RETURN); bool save_vectors = (poll_type == POLL_AT_VECTOR_LOOP); + + if (UseRTMLocking) { + // Abort RTM transaction before calling runtime + // because critical section will be large and will be + // aborted anyway. Also nmethod could be deoptimized. + __ xabort(0); + } + // If cause_return is true we are at a poll_return and there is // the return address on the stack to the caller on the nmethod // that is safepoint. We can leave this return on the stack and diff -r a433eb716ce1 -r 62c54fcc0a35 src/cpu/x86/vm/sharedRuntime_x86_64.cpp --- a/src/cpu/x86/vm/sharedRuntime_x86_64.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/cpu/x86/vm/sharedRuntime_x86_64.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -2012,6 +2012,13 @@ // Frame is now completed as far as size and linkage. int frame_complete = ((intptr_t)__ pc()) - start; + if (UseRTMLocking) { + // Abort RTM transaction before calling JNI + // because critical section will be large and will be + // aborted anyway. Also nmethod could be deoptimized. + __ xabort(0); + } + #ifdef ASSERT { Label L; @@ -3612,6 +3619,11 @@ address start = __ pc(); + if (UseRTMLocking) { + // Abort RTM transaction before possible nmethod deoptimization. + __ xabort(0); + } + // Push self-frame. We get here with a return address on the // stack, so rsp is 8-byte aligned until we allocate our frame. __ subptr(rsp, SimpleRuntimeFrame::return_off << LogBytesPerInt); // Epilog! @@ -3792,6 +3804,13 @@ bool cause_return = (poll_type == POLL_AT_RETURN); bool save_vectors = (poll_type == POLL_AT_VECTOR_LOOP); + if (UseRTMLocking) { + // Abort RTM transaction before calling runtime + // because critical section will be large and will be + // aborted anyway. Also nmethod could be deoptimized. + __ xabort(0); + } + // Make room for return address (or push it again) if (!cause_return) { __ push(rbx); diff -r a433eb716ce1 -r 62c54fcc0a35 src/cpu/x86/vm/vm_version_x86.cpp --- a/src/cpu/x86/vm/vm_version_x86.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/cpu/x86/vm/vm_version_x86.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -50,8 +50,13 @@ const char* VM_Version::_features_str = ""; VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, }; +// Address of instruction which causes SEGV +address VM_Version::_cpuinfo_segv_addr = 0; +// Address of instruction after the one which causes SEGV +address VM_Version::_cpuinfo_cont_addr = 0; + static BufferBlob* stub_blob; -static const int stub_size = 550; +static const int stub_size = 600; extern "C" { typedef void (*getPsrInfo_stub_t)(void*); @@ -234,9 +239,9 @@ // Check if OS has enabled XGETBV instruction to access XCR0 // (OSXSAVE feature flag) and CPU supports AVX // - __ andl(rcx, 0x18000000); + __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx __ cmpl(rcx, 0x18000000); - __ jccb(Assembler::notEqual, sef_cpuid); + __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported // // XCR0, XFEATURE_ENABLED_MASK register @@ -247,6 +252,47 @@ __ movl(Address(rsi, 0), rax); __ movl(Address(rsi, 4), rdx); + __ andl(rax, 0x6); // xcr0 bits sse | ymm + __ cmpl(rax, 0x6); + __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported + + // + // Some OSs have a bug when upper 128bits of YMM + // registers are not restored after a signal processing. + // Generate SEGV here (reference through NULL) + // and check upper YMM bits after it. + // + VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts + + // load value into all 32 bytes of ymm7 register + __ movl(rcx, VM_Version::ymm_test_value()); + + __ movdl(xmm0, rcx); + __ pshufd(xmm0, xmm0, 0x00); + __ vinsertf128h(xmm0, xmm0, xmm0); + __ vmovdqu(xmm7, xmm0); +#ifdef _LP64 + __ vmovdqu(xmm8, xmm0); + __ vmovdqu(xmm15, xmm0); +#endif + + __ xorl(rsi, rsi); + VM_Version::set_cpuinfo_segv_addr( __ pc() ); + // Generate SEGV + __ movl(rax, Address(rsi, 0)); + + VM_Version::set_cpuinfo_cont_addr( __ pc() ); + // Returns here after signal. Save xmm0 to check it later. + __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset()))); + __ vmovdqu(Address(rsi, 0), xmm0); + __ vmovdqu(Address(rsi, 32), xmm7); +#ifdef _LP64 + __ vmovdqu(Address(rsi, 64), xmm8); + __ vmovdqu(Address(rsi, 96), xmm15); +#endif + + VM_Version::clean_cpuFeatures(); + // // cpuid(0x7) Structured Extended Features // @@ -429,7 +475,7 @@ } char buf[256]; - jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", + jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", cores_per_cpu(), threads_per_core(), cpu_family(), _model, _stepping, (supports_cmov() ? ", cmov" : ""), @@ -446,8 +492,9 @@ (supports_avx() ? ", avx" : ""), (supports_avx2() ? ", avx2" : ""), (supports_aes() ? ", aes" : ""), - (supports_clmul() ? ", clmul" : ""), + (supports_clmul() ? ", clmul" : ""), (supports_erms() ? ", erms" : ""), + (supports_rtm() ? ", rtm" : ""), (supports_mmx_ext() ? ", mmxext" : ""), (supports_3dnow_prefetch() ? ", 3dnowpref" : ""), (supports_lzcnt() ? ", lzcnt": ""), @@ -455,7 +502,9 @@ (supports_ht() ? ", ht": ""), (supports_tsc() ? ", tsc": ""), (supports_tscinv_bit() ? ", tscinvbit": ""), - (supports_tscinv() ? ", tscinv": "")); + (supports_tscinv() ? ", tscinv": ""), + (supports_bmi1() ? ", bmi1" : ""), + (supports_bmi2() ? ", bmi2" : "")); _features_str = strdup(buf); // UseSSE is set to the smaller of what hardware supports and what @@ -486,7 +535,7 @@ } } else if (UseAES) { if (!FLAG_IS_DEFAULT(UseAES)) - warning("AES instructions not available on this CPU"); + warning("AES instructions are not available on this CPU"); FLAG_SET_DEFAULT(UseAES, false); } @@ -519,10 +568,57 @@ } } else if (UseAESIntrinsics) { if (!FLAG_IS_DEFAULT(UseAESIntrinsics)) - warning("AES intrinsics not available on this CPU"); + warning("AES intrinsics are not available on this CPU"); FLAG_SET_DEFAULT(UseAESIntrinsics, false); } + // Adjust RTM (Restricted Transactional Memory) flags + if (!supports_rtm() && UseRTMLocking) { + // Can't continue because UseRTMLocking affects UseBiasedLocking flag + // setting during arguments processing. See use_biased_locking(). + // VM_Version_init() is executed after UseBiasedLocking is used + // in Thread::allocate(). + vm_exit_during_initialization("RTM instructions are not available on this CPU"); + } + +#if INCLUDE_RTM_OPT + if (UseRTMLocking) { + if (!FLAG_IS_CMDLINE(UseRTMLocking)) { + // RTM locking should be used only for applications with + // high lock contention. For now we do not use it by default. + vm_exit_during_initialization("UseRTMLocking flag should be only set on command line"); + } + if (!is_power_of_2(RTMTotalCountIncrRate)) { + warning("RTMTotalCountIncrRate must be a power of 2, resetting it to 64"); + FLAG_SET_DEFAULT(RTMTotalCountIncrRate, 64); + } + if (RTMAbortRatio < 0 || RTMAbortRatio > 100) { + warning("RTMAbortRatio must be in the range 0 to 100, resetting it to 50"); + FLAG_SET_DEFAULT(RTMAbortRatio, 50); + } + } else { // !UseRTMLocking + if (UseRTMForStackLocks) { + if (!FLAG_IS_DEFAULT(UseRTMForStackLocks)) { + warning("UseRTMForStackLocks flag should be off when UseRTMLocking flag is off"); + } + FLAG_SET_DEFAULT(UseRTMForStackLocks, false); + } + if (UseRTMDeopt) { + FLAG_SET_DEFAULT(UseRTMDeopt, false); + } + if (PrintPreciseRTMLockingStatistics) { + FLAG_SET_DEFAULT(PrintPreciseRTMLockingStatistics, false); + } + } +#else + if (UseRTMLocking) { + // Only C2 does RTM locking optimization. + // Can't continue because UseRTMLocking affects UseBiasedLocking flag + // setting during arguments processing. See use_biased_locking(). + vm_exit_during_initialization("RTM locking optimization is not supported in this VM"); + } +#endif + #ifdef COMPILER2 if (UseFPUForSpilling) { if (UseSSE < 2) { @@ -538,14 +634,28 @@ if (MaxVectorSize > 32) { FLAG_SET_DEFAULT(MaxVectorSize, 32); } - if (MaxVectorSize > 16 && UseAVX == 0) { - // Only supported with AVX+ + if (MaxVectorSize > 16 && (UseAVX == 0 || !os_supports_avx_vectors())) { + // 32 bytes vectors (in YMM) are only supported with AVX+ FLAG_SET_DEFAULT(MaxVectorSize, 16); } if (UseSSE < 2) { - // Only supported with SSE2+ + // Vectors (in XMM) are only supported with SSE2+ FLAG_SET_DEFAULT(MaxVectorSize, 0); } +#ifdef ASSERT + if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) { + tty->print_cr("State of YMM registers after signal handle:"); + int nreg = 2 LP64_ONLY(+2); + const char* ymm_name[4] = {"0", "7", "8", "15"}; + for (int i = 0; i < nreg; i++) { + tty->print("YMM%s:", ymm_name[i]); + for (int j = 7; j >=0; j--) { + tty->print(" %x", _cpuid_info.ymm_save[i*8 + j]); + } + tty->cr(); + } + } +#endif } #endif @@ -600,13 +710,6 @@ } } - // Use count leading zeros count instruction if available. - if (supports_lzcnt()) { - if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) { - UseCountLeadingZerosInstruction = true; - } - } - // some defaults for AMD family 15h if ( cpu_family() == 0x15 ) { // On family 15h processors default is no sw prefetch @@ -683,14 +786,35 @@ } } } -#if defined(COMPILER2) && defined(_ALLBSD_SOURCE) - if (MaxVectorSize > 16) { - // Limit vectors size to 16 bytes on BSD until it fixes - // restoring upper 128bit of YMM registers on return - // from signal handler. - FLAG_SET_DEFAULT(MaxVectorSize, 16); + + // Use count leading zeros count instruction if available. + if (supports_lzcnt()) { + if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) { + UseCountLeadingZerosInstruction = true; + } + } else if (UseCountLeadingZerosInstruction) { + warning("lzcnt instruction is not available on this CPU"); + FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false); + } + + if (supports_bmi1()) { + if (FLAG_IS_DEFAULT(UseBMI1Instructions)) { + UseBMI1Instructions = true; } -#endif // COMPILER2 + } else if (UseBMI1Instructions) { + warning("BMI1 instructions are not available on this CPU"); + FLAG_SET_DEFAULT(UseBMI1Instructions, false); + } + + // Use count trailing zeros instruction if available + if (supports_bmi1()) { + if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) { + UseCountTrailingZerosInstruction = UseBMI1Instructions; + } + } else if (UseCountTrailingZerosInstruction) { + warning("tzcnt instruction is not available on this CPU"); + FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false); + } // Use population count instruction if available. if (supports_popcnt()) { @@ -790,6 +914,11 @@ if (UseAES) { tty->print(" UseAES=1"); } +#ifdef COMPILER2 + if (MaxVectorSize > 0) { + tty->print(" MaxVectorSize=%d", MaxVectorSize); + } +#endif tty->cr(); tty->print("Allocation"); if (AllocatePrefetchStyle <= 0 || UseSSE == 0 && !supports_3dnow_prefetch()) { @@ -832,6 +961,27 @@ #endif // !PRODUCT } +bool VM_Version::use_biased_locking() { +#if INCLUDE_RTM_OPT + // RTM locking is most useful when there is high lock contention and + // low data contention. With high lock contention the lock is usually + // inflated and biased locking is not suitable for that case. + // RTM locking code requires that biased locking is off. + // Note: we can't switch off UseBiasedLocking in get_processor_features() + // because it is used by Thread::allocate() which is called before + // VM_Version::initialize(). + if (UseRTMLocking && UseBiasedLocking) { + if (FLAG_IS_DEFAULT(UseBiasedLocking)) { + FLAG_SET_DEFAULT(UseBiasedLocking, false); + } else { + warning("Biased locking is not supported with RTM locking; ignoring UseBiasedLocking flag." ); + UseBiasedLocking = false; + } + } +#endif + return UseBiasedLocking; +} + void VM_Version::initialize() { ResourceMark rm; // Making this stub must be FIRST use of assembler diff -r a433eb716ce1 -r 62c54fcc0a35 src/cpu/x86/vm/vm_version_x86.hpp --- a/src/cpu/x86/vm/vm_version_x86.hpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/cpu/x86/vm/vm_version_x86.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -141,7 +141,8 @@ struct { uint32_t LahfSahf : 1, CmpLegacy : 1, - : 4, + : 3, + lzcnt_intel : 1, lzcnt : 1, sse4a : 1, misalignsse : 1, @@ -206,7 +207,9 @@ : 2, bmi2 : 1, erms : 1, - : 22; + : 1, + rtm : 1, + : 20; } bits; }; @@ -228,6 +231,9 @@ // 0 if this instruction is not available static const char* _features_str; + static address _cpuinfo_segv_addr; // address of instruction which causes SEGV + static address _cpuinfo_cont_addr; // address of instruction after the one which causes SEGV + enum { CPU_CX8 = (1 << 0), // next bits are from cpuid 1 (EDX) CPU_CMOV = (1 << 1), @@ -251,7 +257,10 @@ CPU_AVX2 = (1 << 18), CPU_AES = (1 << 19), CPU_ERMS = (1 << 20), // enhanced 'rep movsb/stosb' instructions - CPU_CLMUL = (1 << 21) // carryless multiply for CRC + CPU_CLMUL = (1 << 21), // carryless multiply for CRC + CPU_BMI1 = (1 << 22), + CPU_BMI2 = (1 << 23), + CPU_RTM = (1 << 24) // Restricted Transactional Memory instructions } cpuFeatureFlags; enum { @@ -358,6 +367,9 @@ // extended control register XCR0 (the XFEATURE_ENABLED_MASK register) XemXcr0Eax xem_xcr0_eax; uint32_t xem_xcr0_edx; // reserved + + // Space to save ymm registers after signal handle + int ymm_save[8*4]; // Save ymm0, ymm7, ymm8, ymm15 }; // The actual cpuid info block @@ -423,6 +435,8 @@ if (_cpuid_info.sef_cpuid7_ebx.bits.avx2 != 0) result |= CPU_AVX2; } + if(_cpuid_info.sef_cpuid7_ebx.bits.bmi1 != 0) + result |= CPU_BMI1; if (_cpuid_info.std_cpuid1_edx.bits.tsc != 0) result |= CPU_TSC; if (_cpuid_info.ext_cpuid7_edx.bits.tsc_invariance != 0) @@ -433,6 +447,8 @@ result |= CPU_ERMS; if (_cpuid_info.std_cpuid1_ecx.bits.clmul != 0) result |= CPU_CLMUL; + if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0) + result |= CPU_RTM; // AMD features. if (is_amd()) { @@ -444,10 +460,32 @@ if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0) result |= CPU_SSE4A; } + // Intel features. + if(is_intel()) { + if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0) + result |= CPU_BMI2; + if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0) + result |= CPU_LZCNT; + } return result; } + static bool os_supports_avx_vectors() { + if (!supports_avx()) { + return false; + } + // Verify that OS save/restore all bits of AVX registers + // during signal processing. + int nreg = 2 LP64_ONLY(+2); + for (int i = 0; i < 8 * nreg; i++) { // 32 bytes per ymm register + if (_cpuid_info.ymm_save[i] != ymm_test_value()) { + return false; + } + } + return true; + } + static void get_processor_features(); public: @@ -464,10 +502,26 @@ static ByteSize tpl_cpuidB1_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); } static ByteSize tpl_cpuidB2_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); } static ByteSize xem_xcr0_offset() { return byte_offset_of(CpuidInfo, xem_xcr0_eax); } + static ByteSize ymm_save_offset() { return byte_offset_of(CpuidInfo, ymm_save); } + + // The value used to check ymm register after signal handle + static int ymm_test_value() { return 0xCAFEBABE; } + + static void set_cpuinfo_segv_addr(address pc) { _cpuinfo_segv_addr = pc; } + static bool is_cpuinfo_segv_addr(address pc) { return _cpuinfo_segv_addr == pc; } + static void set_cpuinfo_cont_addr(address pc) { _cpuinfo_cont_addr = pc; } + static address cpuinfo_cont_addr() { return _cpuinfo_cont_addr; } + + static void clean_cpuFeatures() { _cpuFeatures = 0; } + static void set_avx_cpuFeatures() { _cpuFeatures = (CPU_SSE | CPU_SSE2 | CPU_AVX); } + // Initialization static void initialize(); + // Override Abstract_VM_Version implementation + static bool use_biased_locking(); + // Asserts static void assert_is_initialized() { assert(_cpuid_info.std_cpuid1_eax.bits.family != 0, "VM_Version not initialized"); @@ -560,7 +614,9 @@ static bool supports_aes() { return (_cpuFeatures & CPU_AES) != 0; } static bool supports_erms() { return (_cpuFeatures & CPU_ERMS) != 0; } static bool supports_clmul() { return (_cpuFeatures & CPU_CLMUL) != 0; } - + static bool supports_rtm() { return (_cpuFeatures & CPU_RTM) != 0; } + static bool supports_bmi1() { return (_cpuFeatures & CPU_BMI1) != 0; } + static bool supports_bmi2() { return (_cpuFeatures & CPU_BMI2) != 0; } // Intel features static bool is_intel_family_core() { return is_intel() && extended_cpu_family() == CPU_FAMILY_INTEL_CORE; } diff -r a433eb716ce1 -r 62c54fcc0a35 src/cpu/x86/vm/x86_32.ad --- a/src/cpu/x86/vm/x86_32.ad Tue Mar 25 12:54:21 2014 -0700 +++ b/src/cpu/x86/vm/x86_32.ad Tue Mar 25 17:07:36 2014 -0700 @@ -1489,19 +1489,6 @@ return EBP_REG_mask(); } -const RegMask Matcher::mathExactI_result_proj_mask() { - return EAX_REG_mask(); -} - -const RegMask Matcher::mathExactL_result_proj_mask() { - ShouldNotReachHere(); - return RegMask(); -} - -const RegMask Matcher::mathExactI_flags_proj_mask() { - return INT_FLAGS_mask(); -} - // Returns true if the high 32 bits of the value is known to be zero. bool is_operand_hi32_zero(Node* n) { int opc = n->Opcode(); @@ -2865,542 +2852,6 @@ emit_d8 (cbuf,0 ); %} - - // Because the transitions from emitted code to the runtime - // monitorenter/exit helper stubs are so slow it's critical that - // we inline both the stack-locking fast-path and the inflated fast path. - // - // See also: cmpFastLock and cmpFastUnlock. - // - // What follows is a specialized inline transliteration of the code - // in slow_enter() and slow_exit(). If we're concerned about I$ bloat - // another option would be to emit TrySlowEnter and TrySlowExit methods - // at startup-time. These methods would accept arguments as - // (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure - // indications in the icc.ZFlag. Fast_Lock and Fast_Unlock would simply - // marshal the arguments and emit calls to TrySlowEnter and TrySlowExit. - // In practice, however, the # of lock sites is bounded and is usually small. - // Besides the call overhead, TrySlowEnter and TrySlowExit might suffer - // if the processor uses simple bimodal branch predictors keyed by EIP - // Since the helper routines would be called from multiple synchronization - // sites. - // - // An even better approach would be write "MonitorEnter()" and "MonitorExit()" - // in java - using j.u.c and unsafe - and just bind the lock and unlock sites - // to those specialized methods. That'd give us a mostly platform-independent - // implementation that the JITs could optimize and inline at their pleasure. - // Done correctly, the only time we'd need to cross to native could would be - // to park() or unpark() threads. We'd also need a few more unsafe operators - // to (a) prevent compiler-JIT reordering of non-volatile accesses, and - // (b) explicit barriers or fence operations. - // - // TODO: - // - // * Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr). - // This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals. - // Given TLAB allocation, Self is usually manifested in a register, so passing it into - // the lock operators would typically be faster than reifying Self. - // - // * Ideally I'd define the primitives as: - // fast_lock (nax Obj, nax box, EAX tmp, nax scr) where box, tmp and scr are KILLED. - // fast_unlock (nax Obj, EAX box, nax tmp) where box and tmp are KILLED - // Unfortunately ADLC bugs prevent us from expressing the ideal form. - // Instead, we're stuck with a rather awkward and brittle register assignments below. - // Furthermore the register assignments are overconstrained, possibly resulting in - // sub-optimal code near the synchronization site. - // - // * Eliminate the sp-proximity tests and just use "== Self" tests instead. - // Alternately, use a better sp-proximity test. - // - // * Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value. - // Either one is sufficient to uniquely identify a thread. - // TODO: eliminate use of sp in _owner and use get_thread(tr) instead. - // - // * Intrinsify notify() and notifyAll() for the common cases where the - // object is locked by the calling thread but the waitlist is empty. - // avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll(). - // - // * use jccb and jmpb instead of jcc and jmp to improve code density. - // But beware of excessive branch density on AMD Opterons. - // - // * Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success - // or failure of the fast-path. If the fast-path fails then we pass - // control to the slow-path, typically in C. In Fast_Lock and - // Fast_Unlock we often branch to DONE_LABEL, just to find that C2 - // will emit a conditional branch immediately after the node. - // So we have branches to branches and lots of ICC.ZF games. - // Instead, it might be better to have C2 pass a "FailureLabel" - // into Fast_Lock and Fast_Unlock. In the case of success, control - // will drop through the node. ICC.ZF is undefined at exit. - // In the case of failure, the node will branch directly to the - // FailureLabel - - - // obj: object to lock - // box: on-stack box address (displaced header location) - KILLED - // rax,: tmp -- KILLED - // scr: tmp -- KILLED - enc_class Fast_Lock( eRegP obj, eRegP box, eAXRegI tmp, eRegP scr ) %{ - - Register objReg = as_Register($obj$$reg); - Register boxReg = as_Register($box$$reg); - Register tmpReg = as_Register($tmp$$reg); - Register scrReg = as_Register($scr$$reg); - - // Ensure the register assignents are disjoint - guarantee (objReg != boxReg, "") ; - guarantee (objReg != tmpReg, "") ; - guarantee (objReg != scrReg, "") ; - guarantee (boxReg != tmpReg, "") ; - guarantee (boxReg != scrReg, "") ; - guarantee (tmpReg == as_Register(EAX_enc), "") ; - - MacroAssembler masm(&cbuf); - - if (_counters != NULL) { - masm.atomic_incl(ExternalAddress((address) _counters->total_entry_count_addr())); - } - if (EmitSync & 1) { - // set box->dhw = unused_mark (3) - // Force all sync thru slow-path: slow_enter() and slow_exit() - masm.movptr (Address(boxReg, 0), int32_t(markOopDesc::unused_mark())) ; - masm.cmpptr (rsp, (int32_t)0) ; - } else - if (EmitSync & 2) { - Label DONE_LABEL ; - if (UseBiasedLocking) { - // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument. - masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters); - } - - masm.movptr(tmpReg, Address(objReg, 0)) ; // fetch markword - masm.orptr (tmpReg, 0x1); - masm.movptr(Address(boxReg, 0), tmpReg); // Anticipate successful CAS - if (os::is_MP()) { masm.lock(); } - masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg - masm.jcc(Assembler::equal, DONE_LABEL); - // Recursive locking - masm.subptr(tmpReg, rsp); - masm.andptr(tmpReg, (int32_t) 0xFFFFF003 ); - masm.movptr(Address(boxReg, 0), tmpReg); - masm.bind(DONE_LABEL) ; - } else { - // Possible cases that we'll encounter in fast_lock - // ------------------------------------------------ - // * Inflated - // -- unlocked - // -- Locked - // = by self - // = by other - // * biased - // -- by Self - // -- by other - // * neutral - // * stack-locked - // -- by self - // = sp-proximity test hits - // = sp-proximity test generates false-negative - // -- by other - // - - Label IsInflated, DONE_LABEL, PopDone ; - - // TODO: optimize away redundant LDs of obj->mark and improve the markword triage - // order to reduce the number of conditional branches in the most common cases. - // Beware -- there's a subtle invariant that fetch of the markword - // at [FETCH], below, will never observe a biased encoding (*101b). - // If this invariant is not held we risk exclusion (safety) failure. - if (UseBiasedLocking && !UseOptoBiasInlining) { - masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters); - } - - masm.movptr(tmpReg, Address(objReg, 0)) ; // [FETCH] - masm.testptr(tmpReg, 0x02) ; // Inflated v (Stack-locked or neutral) - masm.jccb (Assembler::notZero, IsInflated) ; - - // Attempt stack-locking ... - masm.orptr (tmpReg, 0x1); - masm.movptr(Address(boxReg, 0), tmpReg); // Anticipate successful CAS - if (os::is_MP()) { masm.lock(); } - masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg - if (_counters != NULL) { - masm.cond_inc32(Assembler::equal, - ExternalAddress((address)_counters->fast_path_entry_count_addr())); - } - masm.jccb (Assembler::equal, DONE_LABEL); - - // Recursive locking - masm.subptr(tmpReg, rsp); - masm.andptr(tmpReg, 0xFFFFF003 ); - masm.movptr(Address(boxReg, 0), tmpReg); - if (_counters != NULL) { - masm.cond_inc32(Assembler::equal, - ExternalAddress((address)_counters->fast_path_entry_count_addr())); - } - masm.jmp (DONE_LABEL) ; - - masm.bind (IsInflated) ; - - // The object is inflated. - // - // TODO-FIXME: eliminate the ugly use of manifest constants: - // Use markOopDesc::monitor_value instead of "2". - // use markOop::unused_mark() instead of "3". - // The tmpReg value is an objectMonitor reference ORed with - // markOopDesc::monitor_value (2). We can either convert tmpReg to an - // objectmonitor pointer by masking off the "2" bit or we can just - // use tmpReg as an objectmonitor pointer but bias the objectmonitor - // field offsets with "-2" to compensate for and annul the low-order tag bit. - // - // I use the latter as it avoids AGI stalls. - // As such, we write "mov r, [tmpReg+OFFSETOF(Owner)-2]" - // instead of "mov r, [tmpReg+OFFSETOF(Owner)]". - // - #define OFFSET_SKEWED(f) ((ObjectMonitor::f ## _offset_in_bytes())-2) - - // boxReg refers to the on-stack BasicLock in the current frame. - // We'd like to write: - // set box->_displaced_header = markOop::unused_mark(). Any non-0 value suffices. - // This is convenient but results a ST-before-CAS penalty. The following CAS suffers - // additional latency as we have another ST in the store buffer that must drain. - - if (EmitSync & 8192) { - masm.movptr(Address(boxReg, 0), 3) ; // results in ST-before-CAS penalty - masm.get_thread (scrReg) ; - masm.movptr(boxReg, tmpReg); // consider: LEA box, [tmp-2] - masm.movptr(tmpReg, NULL_WORD); // consider: xor vs mov - if (os::is_MP()) { masm.lock(); } - masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; - } else - if ((EmitSync & 128) == 0) { // avoid ST-before-CAS - masm.movptr(scrReg, boxReg) ; - masm.movptr(boxReg, tmpReg); // consider: LEA box, [tmp-2] - - // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes - if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) { - // prefetchw [eax + Offset(_owner)-2] - masm.prefetchw(Address(rax, ObjectMonitor::owner_offset_in_bytes()-2)); - } - - if ((EmitSync & 64) == 0) { - // Optimistic form: consider XORL tmpReg,tmpReg - masm.movptr(tmpReg, NULL_WORD) ; - } else { - // Can suffer RTS->RTO upgrades on shared or cold $ lines - // Test-And-CAS instead of CAS - masm.movptr(tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; // rax, = m->_owner - masm.testptr(tmpReg, tmpReg) ; // Locked ? - masm.jccb (Assembler::notZero, DONE_LABEL) ; - } - - // Appears unlocked - try to swing _owner from null to non-null. - // Ideally, I'd manifest "Self" with get_thread and then attempt - // to CAS the register containing Self into m->Owner. - // But we don't have enough registers, so instead we can either try to CAS - // rsp or the address of the box (in scr) into &m->owner. If the CAS succeeds - // we later store "Self" into m->Owner. Transiently storing a stack address - // (rsp or the address of the box) into m->owner is harmless. - // Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand. - if (os::is_MP()) { masm.lock(); } - masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; - masm.movptr(Address(scrReg, 0), 3) ; // box->_displaced_header = 3 - masm.jccb (Assembler::notZero, DONE_LABEL) ; - masm.get_thread (scrReg) ; // beware: clobbers ICCs - masm.movptr(Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2), scrReg) ; - masm.xorptr(boxReg, boxReg) ; // set icc.ZFlag = 1 to indicate success - - // If the CAS fails we can either retry or pass control to the slow-path. - // We use the latter tactic. - // Pass the CAS result in the icc.ZFlag into DONE_LABEL - // If the CAS was successful ... - // Self has acquired the lock - // Invariant: m->_recursions should already be 0, so we don't need to explicitly set it. - // Intentional fall-through into DONE_LABEL ... - } else { - masm.movptr(Address(boxReg, 0), 3) ; // results in ST-before-CAS penalty - masm.movptr(boxReg, tmpReg) ; - - // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes - if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) { - // prefetchw [eax + Offset(_owner)-2] - masm.prefetchw(Address(rax, ObjectMonitor::owner_offset_in_bytes()-2)); - } - - if ((EmitSync & 64) == 0) { - // Optimistic form - masm.xorptr (tmpReg, tmpReg) ; - } else { - // Can suffer RTS->RTO upgrades on shared or cold $ lines - masm.movptr(tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; // rax, = m->_owner - masm.testptr(tmpReg, tmpReg) ; // Locked ? - masm.jccb (Assembler::notZero, DONE_LABEL) ; - } - - // Appears unlocked - try to swing _owner from null to non-null. - // Use either "Self" (in scr) or rsp as thread identity in _owner. - // Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand. - masm.get_thread (scrReg) ; - if (os::is_MP()) { masm.lock(); } - masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; - - // If the CAS fails we can either retry or pass control to the slow-path. - // We use the latter tactic. - // Pass the CAS result in the icc.ZFlag into DONE_LABEL - // If the CAS was successful ... - // Self has acquired the lock - // Invariant: m->_recursions should already be 0, so we don't need to explicitly set it. - // Intentional fall-through into DONE_LABEL ... - } - - // DONE_LABEL is a hot target - we'd really like to place it at the - // start of cache line by padding with NOPs. - // See the AMD and Intel software optimization manuals for the - // most efficient "long" NOP encodings. - // Unfortunately none of our alignment mechanisms suffice. - masm.bind(DONE_LABEL); - - // Avoid branch-to-branch on AMD processors - // This appears to be superstition. - if (EmitSync & 32) masm.nop() ; - - - // At DONE_LABEL the icc ZFlag is set as follows ... - // Fast_Unlock uses the same protocol. - // ZFlag == 1 -> Success - // ZFlag == 0 -> Failure - force control through the slow-path - } - %} - - // obj: object to unlock - // box: box address (displaced header location), killed. Must be EAX. - // rbx,: killed tmp; cannot be obj nor box. - // - // Some commentary on balanced locking: - // - // Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites. - // Methods that don't have provably balanced locking are forced to run in the - // interpreter - such methods won't be compiled to use fast_lock and fast_unlock. - // The interpreter provides two properties: - // I1: At return-time the interpreter automatically and quietly unlocks any - // objects acquired the current activation (frame). Recall that the - // interpreter maintains an on-stack list of locks currently held by - // a frame. - // I2: If a method attempts to unlock an object that is not held by the - // the frame the interpreter throws IMSX. - // - // Lets say A(), which has provably balanced locking, acquires O and then calls B(). - // B() doesn't have provably balanced locking so it runs in the interpreter. - // Control returns to A() and A() unlocks O. By I1 and I2, above, we know that O - // is still locked by A(). - // - // The only other source of unbalanced locking would be JNI. The "Java Native Interface: - // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter - // should not be unlocked by "normal" java-level locking and vice-versa. The specification - // doesn't specify what will occur if a program engages in such mixed-mode locking, however. - - enc_class Fast_Unlock( nabxRegP obj, eAXRegP box, eRegP tmp) %{ - - Register objReg = as_Register($obj$$reg); - Register boxReg = as_Register($box$$reg); - Register tmpReg = as_Register($tmp$$reg); - - guarantee (objReg != boxReg, "") ; - guarantee (objReg != tmpReg, "") ; - guarantee (boxReg != tmpReg, "") ; - guarantee (boxReg == as_Register(EAX_enc), "") ; - MacroAssembler masm(&cbuf); - - if (EmitSync & 4) { - // Disable - inhibit all inlining. Force control through the slow-path - masm.cmpptr (rsp, 0) ; - } else - if (EmitSync & 8) { - Label DONE_LABEL ; - if (UseBiasedLocking) { - masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL); - } - // classic stack-locking code ... - masm.movptr(tmpReg, Address(boxReg, 0)) ; - masm.testptr(tmpReg, tmpReg) ; - masm.jcc (Assembler::zero, DONE_LABEL) ; - if (os::is_MP()) { masm.lock(); } - masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses EAX which is box - masm.bind(DONE_LABEL); - } else { - Label DONE_LABEL, Stacked, CheckSucc, Inflated ; - - // Critically, the biased locking test must have precedence over - // and appear before the (box->dhw == 0) recursive stack-lock test. - if (UseBiasedLocking && !UseOptoBiasInlining) { - masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL); - } - - masm.cmpptr(Address(boxReg, 0), 0) ; // Examine the displaced header - masm.movptr(tmpReg, Address(objReg, 0)) ; // Examine the object's markword - masm.jccb (Assembler::zero, DONE_LABEL) ; // 0 indicates recursive stack-lock - - masm.testptr(tmpReg, 0x02) ; // Inflated? - masm.jccb (Assembler::zero, Stacked) ; - - masm.bind (Inflated) ; - // It's inflated. - // Despite our balanced locking property we still check that m->_owner == Self - // as java routines or native JNI code called by this thread might - // have released the lock. - // Refer to the comments in synchronizer.cpp for how we might encode extra - // state in _succ so we can avoid fetching EntryList|cxq. - // - // I'd like to add more cases in fast_lock() and fast_unlock() -- - // such as recursive enter and exit -- but we have to be wary of - // I$ bloat, T$ effects and BP$ effects. - // - // If there's no contention try a 1-0 exit. That is, exit without - // a costly MEMBAR or CAS. See synchronizer.cpp for details on how - // we detect and recover from the race that the 1-0 exit admits. - // - // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier - // before it STs null into _owner, releasing the lock. Updates - // to data protected by the critical section must be visible before - // we drop the lock (and thus before any other thread could acquire - // the lock and observe the fields protected by the lock). - // IA32's memory-model is SPO, so STs are ordered with respect to - // each other and there's no need for an explicit barrier (fence). - // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html. - - masm.get_thread (boxReg) ; - if ((EmitSync & 4096) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) { - // prefetchw [ebx + Offset(_owner)-2] - masm.prefetchw(Address(rbx, ObjectMonitor::owner_offset_in_bytes()-2)); - } - - // Note that we could employ various encoding schemes to reduce - // the number of loads below (currently 4) to just 2 or 3. - // Refer to the comments in synchronizer.cpp. - // In practice the chain of fetches doesn't seem to impact performance, however. - if ((EmitSync & 65536) == 0 && (EmitSync & 256)) { - // Attempt to reduce branch density - AMD's branch predictor. - masm.xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; - masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ; - masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ; - masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ; - masm.jccb (Assembler::notZero, DONE_LABEL) ; - masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ; - masm.jmpb (DONE_LABEL) ; - } else { - masm.xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; - masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ; - masm.jccb (Assembler::notZero, DONE_LABEL) ; - masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ; - masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ; - masm.jccb (Assembler::notZero, CheckSucc) ; - masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ; - masm.jmpb (DONE_LABEL) ; - } - - // The Following code fragment (EmitSync & 65536) improves the performance of - // contended applications and contended synchronization microbenchmarks. - // Unfortunately the emission of the code - even though not executed - causes regressions - // in scimark and jetstream, evidently because of $ effects. Replacing the code - // with an equal number of never-executed NOPs results in the same regression. - // We leave it off by default. - - if ((EmitSync & 65536) != 0) { - Label LSuccess, LGoSlowPath ; - - masm.bind (CheckSucc) ; - - // Optional pre-test ... it's safe to elide this - if ((EmitSync & 16) == 0) { - masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), 0) ; - masm.jccb (Assembler::zero, LGoSlowPath) ; - } - - // We have a classic Dekker-style idiom: - // ST m->_owner = 0 ; MEMBAR; LD m->_succ - // There are a number of ways to implement the barrier: - // (1) lock:andl &m->_owner, 0 - // is fast, but mask doesn't currently support the "ANDL M,IMM32" form. - // LOCK: ANDL [ebx+Offset(_Owner)-2], 0 - // Encodes as 81 31 OFF32 IMM32 or 83 63 OFF8 IMM8 - // (2) If supported, an explicit MFENCE is appealing. - // In older IA32 processors MFENCE is slower than lock:add or xchg - // particularly if the write-buffer is full as might be the case if - // if stores closely precede the fence or fence-equivalent instruction. - // In more modern implementations MFENCE appears faster, however. - // (3) In lieu of an explicit fence, use lock:addl to the top-of-stack - // The $lines underlying the top-of-stack should be in M-state. - // The locked add instruction is serializing, of course. - // (4) Use xchg, which is serializing - // mov boxReg, 0; xchgl boxReg, [tmpReg + Offset(_owner)-2] also works - // (5) ST m->_owner = 0 and then execute lock:orl &m->_succ, 0. - // The integer condition codes will tell us if succ was 0. - // Since _succ and _owner should reside in the same $line and - // we just stored into _owner, it's likely that the $line - // remains in M-state for the lock:orl. - // - // We currently use (3), although it's likely that switching to (2) - // is correct for the future. - - masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ; - if (os::is_MP()) { - if (VM_Version::supports_sse2() && 1 == FenceInstruction) { - masm.mfence(); - } else { - masm.lock () ; masm.addptr(Address(rsp, 0), 0) ; - } - } - // Ratify _succ remains non-null - masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), 0) ; - masm.jccb (Assembler::notZero, LSuccess) ; - - masm.xorptr(boxReg, boxReg) ; // box is really EAX - if (os::is_MP()) { masm.lock(); } - masm.cmpxchgptr(rsp, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)); - masm.jccb (Assembler::notEqual, LSuccess) ; - // Since we're low on registers we installed rsp as a placeholding in _owner. - // Now install Self over rsp. This is safe as we're transitioning from - // non-null to non=null - masm.get_thread (boxReg) ; - masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), boxReg) ; - // Intentional fall-through into LGoSlowPath ... - - masm.bind (LGoSlowPath) ; - masm.orptr(boxReg, 1) ; // set ICC.ZF=0 to indicate failure - masm.jmpb (DONE_LABEL) ; - - masm.bind (LSuccess) ; - masm.xorptr(boxReg, boxReg) ; // set ICC.ZF=1 to indicate success - masm.jmpb (DONE_LABEL) ; - } - - masm.bind (Stacked) ; - // It's not inflated and it's not recursively stack-locked and it's not biased. - // It must be stack-locked. - // Try to reset the header to displaced header. - // The "box" value on the stack is stable, so we can reload - // and be assured we observe the same value as above. - masm.movptr(tmpReg, Address(boxReg, 0)) ; - if (os::is_MP()) { masm.lock(); } - masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses EAX which is box - // Intention fall-thru into DONE_LABEL - - - // DONE_LABEL is a hot target - we'd really like to place it at the - // start of cache line by padding with NOPs. - // See the AMD and Intel software optimization manuals for the - // most efficient "long" NOP encodings. - // Unfortunately none of our alignment mechanisms suffice. - if ((EmitSync & 65536) == 0) { - masm.bind (CheckSucc) ; - } - masm.bind(DONE_LABEL); - - // Avoid branch to branch on AMD processors - if (EmitSync & 32768) { masm.nop() ; } - } - %} - - enc_class enc_pop_rdx() %{ emit_opcode(cbuf,0x5A); %} @@ -5659,6 +5110,19 @@ %} instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ + predicate(UseCountTrailingZerosInstruction); + match(Set dst (CountTrailingZerosI src)); + effect(KILL cr); + + format %{ "TZCNT $dst, $src\t# count trailing zeros (int)" %} + ins_encode %{ + __ tzcntl($dst$$Register, $src$$Register); + %} + ins_pipe(ialu_reg); +%} + +instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{ + predicate(!UseCountTrailingZerosInstruction); match(Set dst (CountTrailingZerosI src)); effect(KILL cr); @@ -5678,6 +5142,30 @@ %} instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ + predicate(UseCountTrailingZerosInstruction); + match(Set dst (CountTrailingZerosL src)); + effect(TEMP dst, KILL cr); + + format %{ "TZCNT $dst, $src.lo\t# count trailing zeros (long) \n\t" + "JNC done\n\t" + "TZCNT $dst, $src.hi\n\t" + "ADD $dst, 32\n" + "done:" %} + ins_encode %{ + Register Rdst = $dst$$Register; + Register Rsrc = $src$$Register; + Label done; + __ tzcntl(Rdst, Rsrc); + __ jccb(Assembler::carryClear, done); + __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); + __ addl(Rdst, BitsPerInt); + __ bind(done); + %} + ins_pipe(ialu_reg); +%} + +instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{ + predicate(!UseCountTrailingZerosInstruction); match(Set dst (CountTrailingZerosL src)); effect(TEMP dst, KILL cr); @@ -7492,44 +6980,6 @@ //----------Arithmetic Instructions-------------------------------------------- //----------Addition Instructions---------------------------------------------- -instruct addExactI_eReg(eAXRegI dst, rRegI src, eFlagsReg cr) -%{ - match(AddExactI dst src); - effect(DEF cr); - - format %{ "ADD $dst, $src\t# addExact int" %} - ins_encode %{ - __ addl($dst$$Register, $src$$Register); - %} - ins_pipe(ialu_reg_reg); -%} - -instruct addExactI_eReg_imm(eAXRegI dst, immI src, eFlagsReg cr) -%{ - match(AddExactI dst src); - effect(DEF cr); - - format %{ "ADD $dst, $src\t# addExact int" %} - ins_encode %{ - __ addl($dst$$Register, $src$$constant); - %} - ins_pipe(ialu_reg_reg); -%} - -instruct addExactI_eReg_mem(eAXRegI dst, memory src, eFlagsReg cr) -%{ - match(AddExactI dst (LoadI src)); - effect(DEF cr); - - ins_cost(125); - format %{ "ADD $dst,$src\t# addExact int" %} - ins_encode %{ - __ addl($dst$$Register, $src$$Address); - %} - ins_pipe( ialu_reg_mem ); -%} - - // Integer Addition Instructions instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ match(Set dst (AddI dst src)); @@ -7839,43 +7289,6 @@ //----------Subtraction Instructions------------------------------------------- -instruct subExactI_eReg(eAXRegI dst, rRegI src, eFlagsReg cr) -%{ - match(SubExactI dst src); - effect(DEF cr); - - format %{ "SUB $dst, $src\t# subExact int" %} - ins_encode %{ - __ subl($dst$$Register, $src$$Register); - %} - ins_pipe(ialu_reg_reg); -%} - -instruct subExactI_eReg_imm(eAXRegI dst, immI src, eFlagsReg cr) -%{ - match(SubExactI dst src); - effect(DEF cr); - - format %{ "SUB $dst, $src\t# subExact int" %} - ins_encode %{ - __ subl($dst$$Register, $src$$constant); - %} - ins_pipe(ialu_reg_reg); -%} - -instruct subExactI_eReg_mem(eAXRegI dst, memory src, eFlagsReg cr) -%{ - match(SubExactI dst (LoadI src)); - effect(DEF cr); - - ins_cost(125); - format %{ "SUB $dst,$src\t# subExact int" %} - ins_encode %{ - __ subl($dst$$Register, $src$$Address); - %} - ins_pipe( ialu_reg_mem ); -%} - // Integer Subtraction Instructions instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ match(Set dst (SubI dst src)); @@ -7944,17 +7357,6 @@ ins_pipe( ialu_reg ); %} -instruct negExactI_eReg(eAXRegI dst, eFlagsReg cr) %{ - match(NegExactI dst); - effect(DEF cr); - - format %{ "NEG $dst\t# negExact int"%} - ins_encode %{ - __ negl($dst$$Register); - %} - ins_pipe(ialu_reg); -%} - //----------Multiplication/Division Instructions------------------------------- // Integer Multiplication Instructions // Multiply Register @@ -8166,46 +7568,6 @@ ins_pipe( pipe_slow ); %} -instruct mulExactI_eReg(eAXRegI dst, rRegI src, eFlagsReg cr) -%{ - match(MulExactI dst src); - effect(DEF cr); - - ins_cost(300); - format %{ "IMUL $dst, $src\t# mulExact int" %} - ins_encode %{ - __ imull($dst$$Register, $src$$Register); - %} - ins_pipe(ialu_reg_reg_alu0); -%} - -instruct mulExactI_eReg_imm(eAXRegI dst, rRegI src, immI imm, eFlagsReg cr) -%{ - match(MulExactI src imm); - effect(DEF cr); - - ins_cost(300); - format %{ "IMUL $dst, $src, $imm\t# mulExact int" %} - ins_encode %{ - __ imull($dst$$Register, $src$$Register, $imm$$constant); - %} - ins_pipe(ialu_reg_reg_alu0); -%} - -instruct mulExactI_eReg_mem(eAXRegI dst, memory src, eFlagsReg cr) -%{ - match(MulExactI dst (LoadI src)); - effect(DEF cr); - - ins_cost(350); - format %{ "IMUL $dst, $src\t# mulExact int" %} - ins_encode %{ - __ imull($dst$$Register, $src$$Address); - %} - ins_pipe(ialu_reg_mem_alu0); -%} - - // Integer DIV with Register instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ match(Set rax (DivI rax div)); @@ -8649,6 +8011,123 @@ ins_pipe( ialu_mem_imm ); %} +// BMI1 instructions +instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{ + match(Set dst (AndI (XorI src1 minus_1) src2)); + predicate(UseBMI1Instructions); + effect(KILL cr); + + format %{ "ANDNL $dst, $src1, $src2" %} + + ins_encode %{ + __ andnl($dst$$Register, $src1$$Register, $src2$$Register); + %} + ins_pipe(ialu_reg); +%} + +instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{ + match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) )); + predicate(UseBMI1Instructions); + effect(KILL cr); + + ins_cost(125); + format %{ "ANDNL $dst, $src1, $src2" %} + + ins_encode %{ + __ andnl($dst$$Register, $src1$$Register, $src2$$Address); + %} + ins_pipe(ialu_reg_mem); +%} + +instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{ + match(Set dst (AndI (SubI imm_zero src) src)); + predicate(UseBMI1Instructions); + effect(KILL cr); + + format %{ "BLSIL $dst, $src" %} + + ins_encode %{ + __ blsil($dst$$Register, $src$$Register); + %} + ins_pipe(ialu_reg); +%} + +instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{ + match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) )); + predicate(UseBMI1Instructions); + effect(KILL cr); + + ins_cost(125); + format %{ "BLSIL $dst, $src" %} + + ins_encode %{ + __ blsil($dst$$Register, $src$$Address); + %} + ins_pipe(ialu_reg_mem); +%} + +instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) +%{ + match(Set dst (XorI (AddI src minus_1) src)); + predicate(UseBMI1Instructions); + effect(KILL cr); + + format %{ "BLSMSKL $dst, $src" %} + + ins_encode %{ + __ blsmskl($dst$$Register, $src$$Register); + %} + + ins_pipe(ialu_reg); +%} + +instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) +%{ + match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) )); + predicate(UseBMI1Instructions); + effect(KILL cr); + + ins_cost(125); + format %{ "BLSMSKL $dst, $src" %} + + ins_encode %{ + __ blsmskl($dst$$Register, $src$$Address); + %} + + ins_pipe(ialu_reg_mem); +%} + +instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) +%{ + match(Set dst (AndI (AddI src minus_1) src) ); + predicate(UseBMI1Instructions); + effect(KILL cr); + + format %{ "BLSRL $dst, $src" %} + + ins_encode %{ + __ blsrl($dst$$Register, $src$$Register); + %} + + ins_pipe(ialu_reg); +%} + +instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) +%{ + match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) )); + predicate(UseBMI1Instructions); + effect(KILL cr); + + ins_cost(125); + format %{ "BLSRL $dst, $src" %} + + ins_encode %{ + __ blsrl($dst$$Register, $src$$Address); + %} + + ins_pipe(ialu_reg_mem); +%} + // Or Instructions // Or Register with Register instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ @@ -9071,6 +8550,91 @@ instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{ match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q))); */ +//----------Overflow Math Instructions----------------------------------------- + +instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2) +%{ + match(Set cr (OverflowAddI op1 op2)); + effect(DEF cr, USE_KILL op1, USE op2); + + format %{ "ADD $op1, $op2\t# overflow check int" %} + + ins_encode %{ + __ addl($op1$$Register, $op2$$Register); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2) +%{ + match(Set cr (OverflowAddI op1 op2)); + effect(DEF cr, USE_KILL op1, USE op2); + + format %{ "ADD $op1, $op2\t# overflow check int" %} + + ins_encode %{ + __ addl($op1$$Register, $op2$$constant); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2) +%{ + match(Set cr (OverflowSubI op1 op2)); + + format %{ "CMP $op1, $op2\t# overflow check int" %} + ins_encode %{ + __ cmpl($op1$$Register, $op2$$Register); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2) +%{ + match(Set cr (OverflowSubI op1 op2)); + + format %{ "CMP $op1, $op2\t# overflow check int" %} + ins_encode %{ + __ cmpl($op1$$Register, $op2$$constant); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2) +%{ + match(Set cr (OverflowSubI zero op2)); + effect(DEF cr, USE_KILL op2); + + format %{ "NEG $op2\t# overflow check int" %} + ins_encode %{ + __ negl($op2$$Register); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2) +%{ + match(Set cr (OverflowMulI op1 op2)); + effect(DEF cr, USE_KILL op1, USE op2); + + format %{ "IMUL $op1, $op2\t# overflow check int" %} + ins_encode %{ + __ imull($op1$$Register, $op2$$Register); + %} + ins_pipe(ialu_reg_reg_alu0); +%} + +instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp) +%{ + match(Set cr (OverflowMulI op1 op2)); + effect(DEF cr, TEMP tmp, USE op1, USE op2); + + format %{ "IMUL $tmp, $op1, $op2\t# overflow check int" %} + ins_encode %{ + __ imull($tmp$$Register, $op1$$Register, $op2$$constant); + %} + ins_pipe(ialu_reg_reg_alu0); +%} //----------Long Instructions------------------------------------------------ // Add Long Register with Register @@ -9186,6 +8750,210 @@ ins_pipe( ialu_reg_long_mem ); %} +// BMI1 instructions +instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{ + match(Set dst (AndL (XorL src1 minus_1) src2)); + predicate(UseBMI1Instructions); + effect(KILL cr, TEMP dst); + + format %{ "ANDNL $dst.lo, $src1.lo, $src2.lo\n\t" + "ANDNL $dst.hi, $src1.hi, $src2.hi" + %} + + ins_encode %{ + Register Rdst = $dst$$Register; + Register Rsrc1 = $src1$$Register; + Register Rsrc2 = $src2$$Register; + __ andnl(Rdst, Rsrc1, Rsrc2); + __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2)); + %} + ins_pipe(ialu_reg_reg_long); +%} + +instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{ + match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) )); + predicate(UseBMI1Instructions); + effect(KILL cr, TEMP dst); + + ins_cost(125); + format %{ "ANDNL $dst.lo, $src1.lo, $src2\n\t" + "ANDNL $dst.hi, $src1.hi, $src2+4" + %} + + ins_encode %{ + Register Rdst = $dst$$Register; + Register Rsrc1 = $src1$$Register; + Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none); + + __ andnl(Rdst, Rsrc1, $src2$$Address); + __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi); + %} + ins_pipe(ialu_reg_mem); +%} + +instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{ + match(Set dst (AndL (SubL imm_zero src) src)); + predicate(UseBMI1Instructions); + effect(KILL cr, TEMP dst); + + format %{ "MOVL $dst.hi, 0\n\t" + "BLSIL $dst.lo, $src.lo\n\t" + "JNZ done\n\t" + "BLSIL $dst.hi, $src.hi\n" + "done:" + %} + + ins_encode %{ + Label done; + Register Rdst = $dst$$Register; + Register Rsrc = $src$$Register; + __ movl(HIGH_FROM_LOW(Rdst), 0); + __ blsil(Rdst, Rsrc); + __ jccb(Assembler::notZero, done); + __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); + __ bind(done); + %} + ins_pipe(ialu_reg); +%} + +instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{ + match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) )); + predicate(UseBMI1Instructions); + effect(KILL cr, TEMP dst); + + ins_cost(125); + format %{ "MOVL $dst.hi, 0\n\t" + "BLSIL $dst.lo, $src\n\t" + "JNZ done\n\t" + "BLSIL $dst.hi, $src+4\n" + "done:" + %} + + ins_encode %{ + Label done; + Register Rdst = $dst$$Register; + Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); + + __ movl(HIGH_FROM_LOW(Rdst), 0); + __ blsil(Rdst, $src$$Address); + __ jccb(Assembler::notZero, done); + __ blsil(HIGH_FROM_LOW(Rdst), src_hi); + __ bind(done); + %} + ins_pipe(ialu_reg_mem); +%} + +instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) +%{ + match(Set dst (XorL (AddL src minus_1) src)); + predicate(UseBMI1Instructions); + effect(KILL cr, TEMP dst); + + format %{ "MOVL $dst.hi, 0\n\t" + "BLSMSKL $dst.lo, $src.lo\n\t" + "JNC done\n\t" + "BLSMSKL $dst.hi, $src.hi\n" + "done:" + %} + + ins_encode %{ + Label done; + Register Rdst = $dst$$Register; + Register Rsrc = $src$$Register; + __ movl(HIGH_FROM_LOW(Rdst), 0); + __ blsmskl(Rdst, Rsrc); + __ jccb(Assembler::carryClear, done); + __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); + __ bind(done); + %} + + ins_pipe(ialu_reg); +%} + +instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) +%{ + match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) )); + predicate(UseBMI1Instructions); + effect(KILL cr, TEMP dst); + + ins_cost(125); + format %{ "MOVL $dst.hi, 0\n\t" + "BLSMSKL $dst.lo, $src\n\t" + "JNC done\n\t" + "BLSMSKL $dst.hi, $src+4\n" + "done:" + %} + + ins_encode %{ + Label done; + Register Rdst = $dst$$Register; + Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); + + __ movl(HIGH_FROM_LOW(Rdst), 0); + __ blsmskl(Rdst, $src$$Address); + __ jccb(Assembler::carryClear, done); + __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi); + __ bind(done); + %} + + ins_pipe(ialu_reg_mem); +%} + +instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) +%{ + match(Set dst (AndL (AddL src minus_1) src) ); + predicate(UseBMI1Instructions); + effect(KILL cr, TEMP dst); + + format %{ "MOVL $dst.hi, $src.hi\n\t" + "BLSRL $dst.lo, $src.lo\n\t" + "JNC done\n\t" + "BLSRL $dst.hi, $src.hi\n" + "done:" + %} + + ins_encode %{ + Label done; + Register Rdst = $dst$$Register; + Register Rsrc = $src$$Register; + __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); + __ blsrl(Rdst, Rsrc); + __ jccb(Assembler::carryClear, done); + __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); + __ bind(done); + %} + + ins_pipe(ialu_reg); +%} + +instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) +%{ + match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) )); + predicate(UseBMI1Instructions); + effect(KILL cr, TEMP dst); + + ins_cost(125); + format %{ "MOVL $dst.hi, $src+4\n\t" + "BLSRL $dst.lo, $src\n\t" + "JNC done\n\t" + "BLSRL $dst.hi, $src+4\n" + "done:" + %} + + ins_encode %{ + Label done; + Register Rdst = $dst$$Register; + Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); + __ movl(HIGH_FROM_LOW(Rdst), src_hi); + __ blsrl(Rdst, $src$$Address); + __ jccb(Assembler::carryClear, done); + __ blsrl(HIGH_FROM_LOW(Rdst), src_hi); + __ bind(done); + %} + + ins_pipe(ialu_reg_mem); +%} + // Or Long Register with Register instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ match(Set dst (OrL dst src)); @@ -13104,23 +12872,44 @@ // inlined locking and unlocking - -instruct cmpFastLock( eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{ - match( Set cr (FastLock object box) ); - effect( TEMP tmp, TEMP scr, USE_KILL box ); +instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{ + predicate(Compile::current()->use_rtm()); + match(Set cr (FastLock object box)); + effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box); + ins_cost(300); + format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %} + ins_encode %{ + __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, + $scr$$Register, $cx1$$Register, $cx2$$Register, + _counters, _rtm_counters, _stack_rtm_counters, + ((Method*)(ra_->C->method()->constant_encoding()))->method_data(), + true, ra_->C->profile_rtm()); + %} + ins_pipe(pipe_slow); +%} + +instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{ + predicate(!Compile::current()->use_rtm()); + match(Set cr (FastLock object box)); + effect(TEMP tmp, TEMP scr, USE_KILL box); ins_cost(300); format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %} - ins_encode( Fast_Lock(object,box,tmp,scr) ); - ins_pipe( pipe_slow ); -%} - -instruct cmpFastUnlock( eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{ - match( Set cr (FastUnlock object box) ); - effect( TEMP tmp, USE_KILL box ); + ins_encode %{ + __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, + $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false); + %} + ins_pipe(pipe_slow); +%} + +instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{ + match(Set cr (FastUnlock object box)); + effect(TEMP tmp, USE_KILL box); ins_cost(300); format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %} - ins_encode( Fast_Unlock(object,box,tmp) ); - ins_pipe( pipe_slow ); + ins_encode %{ + __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm()); + %} + ins_pipe(pipe_slow); %} diff -r a433eb716ce1 -r 62c54fcc0a35 src/cpu/x86/vm/x86_64.ad --- a/src/cpu/x86/vm/x86_64.ad Tue Mar 25 12:54:21 2014 -0700 +++ b/src/cpu/x86/vm/x86_64.ad Tue Mar 25 17:07:36 2014 -0700 @@ -1600,18 +1600,6 @@ return PTR_RBP_REG_mask(); } -const RegMask Matcher::mathExactI_result_proj_mask() { - return INT_RAX_REG_mask(); -} - -const RegMask Matcher::mathExactL_result_proj_mask() { - return LONG_RAX_REG_mask(); -} - -const RegMask Matcher::mathExactI_flags_proj_mask() { - return INT_FLAGS_mask(); -} - %} //----------ENCODING BLOCK----------------------------------------------------- @@ -2542,231 +2530,6 @@ %} - // obj: object to lock - // box: box address (header location) -- killed - // tmp: rax -- killed - // scr: rbx -- killed - // - // What follows is a direct transliteration of fast_lock() and fast_unlock() - // from i486.ad. See that file for comments. - // TODO: where possible switch from movq (r, 0) to movl(r,0) and - // use the shorter encoding. (Movl clears the high-order 32-bits). - - - enc_class Fast_Lock(rRegP obj, rRegP box, rax_RegI tmp, rRegP scr) - %{ - Register objReg = as_Register((int)$obj$$reg); - Register boxReg = as_Register((int)$box$$reg); - Register tmpReg = as_Register($tmp$$reg); - Register scrReg = as_Register($scr$$reg); - MacroAssembler masm(&cbuf); - - // Verify uniqueness of register assignments -- necessary but not sufficient - assert (objReg != boxReg && objReg != tmpReg && - objReg != scrReg && tmpReg != scrReg, "invariant") ; - - if (_counters != NULL) { - masm.atomic_incl(ExternalAddress((address) _counters->total_entry_count_addr())); - } - if (EmitSync & 1) { - // Without cast to int32_t a movptr will destroy r10 which is typically obj - masm.movptr (Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())) ; - masm.cmpptr(rsp, (int32_t)NULL_WORD) ; - } else - if (EmitSync & 2) { - Label DONE_LABEL; - if (UseBiasedLocking) { - // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument. - masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters); - } - // QQQ was movl... - masm.movptr(tmpReg, 0x1); - masm.orptr(tmpReg, Address(objReg, 0)); - masm.movptr(Address(boxReg, 0), tmpReg); - if (os::is_MP()) { - masm.lock(); - } - masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg - masm.jcc(Assembler::equal, DONE_LABEL); - - // Recursive locking - masm.subptr(tmpReg, rsp); - masm.andptr(tmpReg, 7 - os::vm_page_size()); - masm.movptr(Address(boxReg, 0), tmpReg); - - masm.bind(DONE_LABEL); - masm.nop(); // avoid branch to branch - } else { - Label DONE_LABEL, IsInflated, Egress; - - masm.movptr(tmpReg, Address(objReg, 0)) ; - masm.testl (tmpReg, 0x02) ; // inflated vs stack-locked|neutral|biased - masm.jcc (Assembler::notZero, IsInflated) ; - - // it's stack-locked, biased or neutral - // TODO: optimize markword triage order to reduce the number of - // conditional branches in the most common cases. - // Beware -- there's a subtle invariant that fetch of the markword - // at [FETCH], below, will never observe a biased encoding (*101b). - // If this invariant is not held we'll suffer exclusion (safety) failure. - - if (UseBiasedLocking && !UseOptoBiasInlining) { - masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, true, DONE_LABEL, NULL, _counters); - masm.movptr(tmpReg, Address(objReg, 0)) ; // [FETCH] - } - - // was q will it destroy high? - masm.orl (tmpReg, 1) ; - masm.movptr(Address(boxReg, 0), tmpReg) ; - if (os::is_MP()) { masm.lock(); } - masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg - if (_counters != NULL) { - masm.cond_inc32(Assembler::equal, - ExternalAddress((address) _counters->fast_path_entry_count_addr())); - } - masm.jcc (Assembler::equal, DONE_LABEL); - - // Recursive locking - masm.subptr(tmpReg, rsp); - masm.andptr(tmpReg, 7 - os::vm_page_size()); - masm.movptr(Address(boxReg, 0), tmpReg); - if (_counters != NULL) { - masm.cond_inc32(Assembler::equal, - ExternalAddress((address) _counters->fast_path_entry_count_addr())); - } - masm.jmp (DONE_LABEL) ; - - masm.bind (IsInflated) ; - // It's inflated - - // TODO: someday avoid the ST-before-CAS penalty by - // relocating (deferring) the following ST. - // We should also think about trying a CAS without having - // fetched _owner. If the CAS is successful we may - // avoid an RTO->RTS upgrade on the $line. - // Without cast to int32_t a movptr will destroy r10 which is typically obj - masm.movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())) ; - - masm.mov (boxReg, tmpReg) ; - masm.movptr (tmpReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; - masm.testptr(tmpReg, tmpReg) ; - masm.jcc (Assembler::notZero, DONE_LABEL) ; - - // It's inflated and appears unlocked - if (os::is_MP()) { masm.lock(); } - masm.cmpxchgptr(r15_thread, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; - // Intentional fall-through into DONE_LABEL ... - - masm.bind (DONE_LABEL) ; - masm.nop () ; // avoid jmp to jmp - } - %} - - // obj: object to unlock - // box: box address (displaced header location), killed - // RBX: killed tmp; cannot be obj nor box - enc_class Fast_Unlock(rRegP obj, rax_RegP box, rRegP tmp) - %{ - - Register objReg = as_Register($obj$$reg); - Register boxReg = as_Register($box$$reg); - Register tmpReg = as_Register($tmp$$reg); - MacroAssembler masm(&cbuf); - - if (EmitSync & 4) { - masm.cmpptr(rsp, 0) ; - } else - if (EmitSync & 8) { - Label DONE_LABEL; - if (UseBiasedLocking) { - masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL); - } - - // Check whether the displaced header is 0 - //(=> recursive unlock) - masm.movptr(tmpReg, Address(boxReg, 0)); - masm.testptr(tmpReg, tmpReg); - masm.jcc(Assembler::zero, DONE_LABEL); - - // If not recursive lock, reset the header to displaced header - if (os::is_MP()) { - masm.lock(); - } - masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box - masm.bind(DONE_LABEL); - masm.nop(); // avoid branch to branch - } else { - Label DONE_LABEL, Stacked, CheckSucc ; - - if (UseBiasedLocking && !UseOptoBiasInlining) { - masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL); - } - - masm.movptr(tmpReg, Address(objReg, 0)) ; - masm.cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD) ; - masm.jcc (Assembler::zero, DONE_LABEL) ; - masm.testl (tmpReg, 0x02) ; - masm.jcc (Assembler::zero, Stacked) ; - - // It's inflated - masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; - masm.xorptr(boxReg, r15_thread) ; - masm.orptr (boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ; - masm.jcc (Assembler::notZero, DONE_LABEL) ; - masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ; - masm.orptr (boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ; - masm.jcc (Assembler::notZero, CheckSucc) ; - masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD) ; - masm.jmp (DONE_LABEL) ; - - if ((EmitSync & 65536) == 0) { - Label LSuccess, LGoSlowPath ; - masm.bind (CheckSucc) ; - masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD) ; - masm.jcc (Assembler::zero, LGoSlowPath) ; - - // I'd much rather use lock:andl m->_owner, 0 as it's faster than the - // the explicit ST;MEMBAR combination, but masm doesn't currently support - // "ANDQ M,IMM". Don't use MFENCE here. lock:add to TOS, xchg, etc - // are all faster when the write buffer is populated. - masm.movptr (Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD) ; - if (os::is_MP()) { - masm.lock () ; masm.addl (Address(rsp, 0), 0) ; - } - masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD) ; - masm.jcc (Assembler::notZero, LSuccess) ; - - masm.movptr (boxReg, (int32_t)NULL_WORD) ; // box is really EAX - if (os::is_MP()) { masm.lock(); } - masm.cmpxchgptr(r15_thread, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)); - masm.jcc (Assembler::notEqual, LSuccess) ; - // Intentional fall-through into slow-path - - masm.bind (LGoSlowPath) ; - masm.orl (boxReg, 1) ; // set ICC.ZF=0 to indicate failure - masm.jmp (DONE_LABEL) ; - - masm.bind (LSuccess) ; - masm.testl (boxReg, 0) ; // set ICC.ZF=1 to indicate success - masm.jmp (DONE_LABEL) ; - } - - masm.bind (Stacked) ; - masm.movptr(tmpReg, Address (boxReg, 0)) ; // re-fetch - if (os::is_MP()) { masm.lock(); } - masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box - - if (EmitSync & 65536) { - masm.bind (CheckSucc) ; - } - masm.bind(DONE_LABEL); - if (EmitSync & 32768) { - masm.nop(); // avoid branch to branch - } - } - %} - - enc_class enc_rethrow() %{ cbuf.set_insts_mark(); @@ -6202,6 +5965,19 @@ %} instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{ + predicate(UseCountTrailingZerosInstruction); + match(Set dst (CountTrailingZerosI src)); + effect(KILL cr); + + format %{ "tzcntl $dst, $src\t# count trailing zeros (int)" %} + ins_encode %{ + __ tzcntl($dst$$Register, $src$$Register); + %} + ins_pipe(ialu_reg); +%} + +instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, rFlagsReg cr) %{ + predicate(!UseCountTrailingZerosInstruction); match(Set dst (CountTrailingZerosI src)); effect(KILL cr); @@ -6221,6 +5997,19 @@ %} instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{ + predicate(UseCountTrailingZerosInstruction); + match(Set dst (CountTrailingZerosL src)); + effect(KILL cr); + + format %{ "tzcntq $dst, $src\t# count trailing zeros (long)" %} + ins_encode %{ + __ tzcntq($dst$$Register, $src$$Register); + %} + ins_pipe(ialu_reg); +%} + +instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{ + predicate(!UseCountTrailingZerosInstruction); match(Set dst (CountTrailingZerosL src)); effect(KILL cr); @@ -6906,82 +6695,6 @@ //----------Arithmetic Instructions-------------------------------------------- //----------Addition Instructions---------------------------------------------- -instruct addExactI_rReg(rax_RegI dst, rRegI src, rFlagsReg cr) -%{ - match(AddExactI dst src); - effect(DEF cr); - - format %{ "addl $dst, $src\t# addExact int" %} - ins_encode %{ - __ addl($dst$$Register, $src$$Register); - %} - ins_pipe(ialu_reg_reg); -%} - -instruct addExactI_rReg_imm(rax_RegI dst, immI src, rFlagsReg cr) -%{ - match(AddExactI dst src); - effect(DEF cr); - - format %{ "addl $dst, $src\t# addExact int" %} - ins_encode %{ - __ addl($dst$$Register, $src$$constant); - %} - ins_pipe(ialu_reg_reg); -%} - -instruct addExactI_rReg_mem(rax_RegI dst, memory src, rFlagsReg cr) -%{ - match(AddExactI dst (LoadI src)); - effect(DEF cr); - - ins_cost(125); // XXX - format %{ "addl $dst, $src\t# addExact int" %} - ins_encode %{ - __ addl($dst$$Register, $src$$Address); - %} - - ins_pipe(ialu_reg_mem); -%} - -instruct addExactL_rReg(rax_RegL dst, rRegL src, rFlagsReg cr) -%{ - match(AddExactL dst src); - effect(DEF cr); - - format %{ "addq $dst, $src\t# addExact long" %} - ins_encode %{ - __ addq($dst$$Register, $src$$Register); - %} - ins_pipe(ialu_reg_reg); -%} - -instruct addExactL_rReg_imm(rax_RegL dst, immL32 src, rFlagsReg cr) -%{ - match(AddExactL dst src); - effect(DEF cr); - - format %{ "addq $dst, $src\t# addExact long" %} - ins_encode %{ - __ addq($dst$$Register, $src$$constant); - %} - ins_pipe(ialu_reg_reg); -%} - -instruct addExactL_rReg_mem(rax_RegL dst, memory src, rFlagsReg cr) -%{ - match(AddExactL dst (LoadL src)); - effect(DEF cr); - - ins_cost(125); // XXX - format %{ "addq $dst, $src\t# addExact long" %} - ins_encode %{ - __ addq($dst$$Register, $src$$Address); - %} - - ins_pipe(ialu_reg_mem); -%} - instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr) %{ match(Set dst (AddI dst src)); @@ -7594,80 +7307,6 @@ ins_pipe(ialu_mem_imm); %} -instruct subExactI_rReg(rax_RegI dst, rRegI src, rFlagsReg cr) -%{ - match(SubExactI dst src); - effect(DEF cr); - - format %{ "subl $dst, $src\t# subExact int" %} - ins_encode %{ - __ subl($dst$$Register, $src$$Register); - %} - ins_pipe(ialu_reg_reg); -%} - -instruct subExactI_rReg_imm(rax_RegI dst, immI src, rFlagsReg cr) -%{ - match(SubExactI dst src); - effect(DEF cr); - - format %{ "subl $dst, $src\t# subExact int" %} - ins_encode %{ - __ subl($dst$$Register, $src$$constant); - %} - ins_pipe(ialu_reg_reg); -%} - -instruct subExactI_rReg_mem(rax_RegI dst, memory src, rFlagsReg cr) -%{ - match(SubExactI dst (LoadI src)); - effect(DEF cr); - - ins_cost(125); - format %{ "subl $dst, $src\t# subExact int" %} - ins_encode %{ - __ subl($dst$$Register, $src$$Address); - %} - ins_pipe(ialu_reg_mem); -%} - -instruct subExactL_rReg(rax_RegL dst, rRegL src, rFlagsReg cr) -%{ - match(SubExactL dst src); - effect(DEF cr); - - format %{ "subq $dst, $src\t# subExact long" %} - ins_encode %{ - __ subq($dst$$Register, $src$$Register); - %} - ins_pipe(ialu_reg_reg); -%} - -instruct subExactL_rReg_imm(rax_RegL dst, immL32 src, rFlagsReg cr) -%{ - match(SubExactL dst (LoadL src)); - effect(DEF cr); - - format %{ "subq $dst, $src\t# subExact long" %} - ins_encode %{ - __ subq($dst$$Register, $src$$constant); - %} - ins_pipe(ialu_reg_reg); -%} - -instruct subExactL_rReg_mem(rax_RegI dst, memory src, rFlagsReg cr) -%{ - match(SubExactI dst src); - effect(DEF cr); - - ins_cost(125); - format %{ "subq $dst, $src\t# subExact long" %} - ins_encode %{ - __ subq($dst$$Register, $src$$Address); - %} - ins_pipe(ialu_reg_mem); -%} - instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr) %{ match(Set dst (SubL dst src)); @@ -7784,31 +7423,6 @@ ins_pipe(ialu_reg); %} -instruct negExactI_rReg(rax_RegI dst, rFlagsReg cr) -%{ - match(NegExactI dst); - effect(KILL cr); - - format %{ "negl $dst\t# negExact int" %} - ins_encode %{ - __ negl($dst$$Register); - %} - ins_pipe(ialu_reg); -%} - -instruct negExactL_rReg(rax_RegL dst, rFlagsReg cr) -%{ - match(NegExactL dst); - effect(KILL cr); - - format %{ "negq $dst\t# negExact long" %} - ins_encode %{ - __ negq($dst$$Register); - %} - ins_pipe(ialu_reg); -%} - - //----------Multiplication/Division Instructions------------------------------- // Integer Multiplication Instructions // Multiply Register @@ -7925,86 +7539,6 @@ ins_pipe(ialu_reg_reg_alu0); %} - -instruct mulExactI_rReg(rax_RegI dst, rRegI src, rFlagsReg cr) -%{ - match(MulExactI dst src); - effect(DEF cr); - - ins_cost(300); - format %{ "imull $dst, $src\t# mulExact int" %} - ins_encode %{ - __ imull($dst$$Register, $src$$Register); - %} - ins_pipe(ialu_reg_reg_alu0); -%} - - -instruct mulExactI_rReg_imm(rax_RegI dst, rRegI src, immI imm, rFlagsReg cr) -%{ - match(MulExactI src imm); - effect(DEF cr); - - ins_cost(300); - format %{ "imull $dst, $src, $imm\t# mulExact int" %} - ins_encode %{ - __ imull($dst$$Register, $src$$Register, $imm$$constant); - %} - ins_pipe(ialu_reg_reg_alu0); -%} - -instruct mulExactI_rReg_mem(rax_RegI dst, memory src, rFlagsReg cr) -%{ - match(MulExactI dst (LoadI src)); - effect(DEF cr); - - ins_cost(350); - format %{ "imull $dst, $src\t# mulExact int" %} - ins_encode %{ - __ imull($dst$$Register, $src$$Address); - %} - ins_pipe(ialu_reg_mem_alu0); -%} - -instruct mulExactL_rReg(rax_RegL dst, rRegL src, rFlagsReg cr) -%{ - match(MulExactL dst src); - effect(DEF cr); - - ins_cost(300); - format %{ "imulq $dst, $src\t# mulExact long" %} - ins_encode %{ - __ imulq($dst$$Register, $src$$Register); - %} - ins_pipe(ialu_reg_reg_alu0); -%} - -instruct mulExactL_rReg_imm(rax_RegL dst, rRegL src, immL32 imm, rFlagsReg cr) -%{ - match(MulExactL src imm); - effect(DEF cr); - - ins_cost(300); - format %{ "imulq $dst, $src, $imm\t# mulExact long" %} - ins_encode %{ - __ imulq($dst$$Register, $src$$Register, $imm$$constant); - %} - ins_pipe(ialu_reg_reg_alu0); -%} - -instruct mulExactL_rReg_mem(rax_RegL dst, memory src, rFlagsReg cr) -%{ - match(MulExactL dst (LoadL src)); - effect(DEF cr); - - ins_cost(350); - format %{ "imulq $dst, $src\t# mulExact long" %} - ins_encode %{ - __ imulq($dst$$Register, $src$$Address); - %} - ins_pipe(ialu_reg_mem_alu0); -%} - instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div, rFlagsReg cr) %{ @@ -9057,6 +8591,122 @@ ins_pipe(ialu_mem_imm); %} +// BMI1 instructions +instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, rFlagsReg cr) %{ + match(Set dst (AndI (XorI src1 minus_1) (LoadI src2))); + predicate(UseBMI1Instructions); + effect(KILL cr); + + ins_cost(125); + format %{ "andnl $dst, $src1, $src2" %} + + ins_encode %{ + __ andnl($dst$$Register, $src1$$Register, $src2$$Address); + %} + ins_pipe(ialu_reg_mem); +%} + +instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, rFlagsReg cr) %{ + match(Set dst (AndI (XorI src1 minus_1) src2)); + predicate(UseBMI1Instructions); + effect(KILL cr); + + format %{ "andnl $dst, $src1, $src2" %} + + ins_encode %{ + __ andnl($dst$$Register, $src1$$Register, $src2$$Register); + %} + ins_pipe(ialu_reg); +%} + +instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, rFlagsReg cr) %{ + match(Set dst (AndI (SubI imm_zero src) src)); + predicate(UseBMI1Instructions); + effect(KILL cr); + + format %{ "blsil $dst, $src" %} + + ins_encode %{ + __ blsil($dst$$Register, $src$$Register); + %} + ins_pipe(ialu_reg); +%} + +instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, rFlagsReg cr) %{ + match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) )); + predicate(UseBMI1Instructions); + effect(KILL cr); + + ins_cost(125); + format %{ "blsil $dst, $src" %} + + ins_encode %{ + __ blsil($dst$$Register, $src$$Address); + %} + ins_pipe(ialu_reg_mem); +%} + +instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr) +%{ + match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) ); + predicate(UseBMI1Instructions); + effect(KILL cr); + + ins_cost(125); + format %{ "blsmskl $dst, $src" %} + + ins_encode %{ + __ blsmskl($dst$$Register, $src$$Address); + %} + ins_pipe(ialu_reg_mem); +%} + +instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr) +%{ + match(Set dst (XorI (AddI src minus_1) src)); + predicate(UseBMI1Instructions); + effect(KILL cr); + + format %{ "blsmskl $dst, $src" %} + + ins_encode %{ + __ blsmskl($dst$$Register, $src$$Register); + %} + + ins_pipe(ialu_reg); +%} + +instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr) +%{ + match(Set dst (AndI (AddI src minus_1) src) ); + predicate(UseBMI1Instructions); + effect(KILL cr); + + format %{ "blsrl $dst, $src" %} + + ins_encode %{ + __ blsrl($dst$$Register, $src$$Register); + %} + + ins_pipe(ialu_reg_mem); +%} + +instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr) +%{ + match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) ); + predicate(UseBMI1Instructions); + effect(KILL cr); + + ins_cost(125); + format %{ "blsrl $dst, $src" %} + + ins_encode %{ + __ blsrl($dst$$Register, $src$$Address); + %} + + ins_pipe(ialu_reg); +%} + // Or Instructions // Or Register with Register instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr) @@ -9288,6 +8938,122 @@ ins_pipe(ialu_mem_imm); %} +// BMI1 instructions +instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{ + match(Set dst (AndL (XorL src1 minus_1) (LoadL src2))); + predicate(UseBMI1Instructions); + effect(KILL cr); + + ins_cost(125); + format %{ "andnq $dst, $src1, $src2" %} + + ins_encode %{ + __ andnq($dst$$Register, $src1$$Register, $src2$$Address); + %} + ins_pipe(ialu_reg_mem); +%} + +instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1, rFlagsReg cr) %{ + match(Set dst (AndL (XorL src1 minus_1) src2)); + predicate(UseBMI1Instructions); + effect(KILL cr); + + format %{ "andnq $dst, $src1, $src2" %} + + ins_encode %{ + __ andnq($dst$$Register, $src1$$Register, $src2$$Register); + %} + ins_pipe(ialu_reg_mem); +%} + +instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero, rFlagsReg cr) %{ + match(Set dst (AndL (SubL imm_zero src) src)); + predicate(UseBMI1Instructions); + effect(KILL cr); + + format %{ "blsiq $dst, $src" %} + + ins_encode %{ + __ blsiq($dst$$Register, $src$$Register); + %} + ins_pipe(ialu_reg); +%} + +instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero, rFlagsReg cr) %{ + match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) )); + predicate(UseBMI1Instructions); + effect(KILL cr); + + ins_cost(125); + format %{ "blsiq $dst, $src" %} + + ins_encode %{ + __ blsiq($dst$$Register, $src$$Address); + %} + ins_pipe(ialu_reg_mem); +%} + +instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr) +%{ + match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) ); + predicate(UseBMI1Instructions); + effect(KILL cr); + + ins_cost(125); + format %{ "blsmskq $dst, $src" %} + + ins_encode %{ + __ blsmskq($dst$$Register, $src$$Address); + %} + ins_pipe(ialu_reg_mem); +%} + +instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr) +%{ + match(Set dst (XorL (AddL src minus_1) src)); + predicate(UseBMI1Instructions); + effect(KILL cr); + + format %{ "blsmskq $dst, $src" %} + + ins_encode %{ + __ blsmskq($dst$$Register, $src$$Register); + %} + + ins_pipe(ialu_reg); +%} + +instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr) +%{ + match(Set dst (AndL (AddL src minus_1) src) ); + predicate(UseBMI1Instructions); + effect(KILL cr); + + format %{ "blsrq $dst, $src" %} + + ins_encode %{ + __ blsrq($dst$$Register, $src$$Register); + %} + + ins_pipe(ialu_reg); +%} + +instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr) +%{ + match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) ); + predicate(UseBMI1Instructions); + effect(KILL cr); + + ins_cost(125); + format %{ "blsrq $dst, $src" %} + + ins_encode %{ + __ blsrq($dst$$Register, $src$$Address); + %} + + ins_pipe(ialu_reg); +%} + // Or Instructions // Or Register with Register instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr) @@ -10613,6 +10379,174 @@ ins_pipe( pipe_slow ); %} +//----------Overflow Math Instructions----------------------------------------- + +instruct overflowAddI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2) +%{ + match(Set cr (OverflowAddI op1 op2)); + effect(DEF cr, USE_KILL op1, USE op2); + + format %{ "addl $op1, $op2\t# overflow check int" %} + + ins_encode %{ + __ addl($op1$$Register, $op2$$Register); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct overflowAddI_rReg_imm(rFlagsReg cr, rax_RegI op1, immI op2) +%{ + match(Set cr (OverflowAddI op1 op2)); + effect(DEF cr, USE_KILL op1, USE op2); + + format %{ "addl $op1, $op2\t# overflow check int" %} + + ins_encode %{ + __ addl($op1$$Register, $op2$$constant); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct overflowAddL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2) +%{ + match(Set cr (OverflowAddL op1 op2)); + effect(DEF cr, USE_KILL op1, USE op2); + + format %{ "addq $op1, $op2\t# overflow check long" %} + ins_encode %{ + __ addq($op1$$Register, $op2$$Register); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct overflowAddL_rReg_imm(rFlagsReg cr, rax_RegL op1, immL32 op2) +%{ + match(Set cr (OverflowAddL op1 op2)); + effect(DEF cr, USE_KILL op1, USE op2); + + format %{ "addq $op1, $op2\t# overflow check long" %} + ins_encode %{ + __ addq($op1$$Register, $op2$$constant); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct overflowSubI_rReg(rFlagsReg cr, rRegI op1, rRegI op2) +%{ + match(Set cr (OverflowSubI op1 op2)); + + format %{ "cmpl $op1, $op2\t# overflow check int" %} + ins_encode %{ + __ cmpl($op1$$Register, $op2$$Register); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct overflowSubI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2) +%{ + match(Set cr (OverflowSubI op1 op2)); + + format %{ "cmpl $op1, $op2\t# overflow check int" %} + ins_encode %{ + __ cmpl($op1$$Register, $op2$$constant); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct overflowSubL_rReg(rFlagsReg cr, rRegL op1, rRegL op2) +%{ + match(Set cr (OverflowSubL op1 op2)); + + format %{ "cmpq $op1, $op2\t# overflow check long" %} + ins_encode %{ + __ cmpq($op1$$Register, $op2$$Register); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2) +%{ + match(Set cr (OverflowSubL op1 op2)); + + format %{ "cmpq $op1, $op2\t# overflow check long" %} + ins_encode %{ + __ cmpq($op1$$Register, $op2$$constant); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct overflowNegI_rReg(rFlagsReg cr, immI0 zero, rax_RegI op2) +%{ + match(Set cr (OverflowSubI zero op2)); + effect(DEF cr, USE_KILL op2); + + format %{ "negl $op2\t# overflow check int" %} + ins_encode %{ + __ negl($op2$$Register); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct overflowNegL_rReg(rFlagsReg cr, immL0 zero, rax_RegL op2) +%{ + match(Set cr (OverflowSubL zero op2)); + effect(DEF cr, USE_KILL op2); + + format %{ "negq $op2\t# overflow check long" %} + ins_encode %{ + __ negq($op2$$Register); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct overflowMulI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2) +%{ + match(Set cr (OverflowMulI op1 op2)); + effect(DEF cr, USE_KILL op1, USE op2); + + format %{ "imull $op1, $op2\t# overflow check int" %} + ins_encode %{ + __ imull($op1$$Register, $op2$$Register); + %} + ins_pipe(ialu_reg_reg_alu0); +%} + +instruct overflowMulI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2, rRegI tmp) +%{ + match(Set cr (OverflowMulI op1 op2)); + effect(DEF cr, TEMP tmp, USE op1, USE op2); + + format %{ "imull $tmp, $op1, $op2\t# overflow check int" %} + ins_encode %{ + __ imull($tmp$$Register, $op1$$Register, $op2$$constant); + %} + ins_pipe(ialu_reg_reg_alu0); +%} + +instruct overflowMulL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2) +%{ + match(Set cr (OverflowMulL op1 op2)); + effect(DEF cr, USE_KILL op1, USE op2); + + format %{ "imulq $op1, $op2\t# overflow check long" %} + ins_encode %{ + __ imulq($op1$$Register, $op2$$Register); + %} + ins_pipe(ialu_reg_reg_alu0); +%} + +instruct overflowMulL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2, rRegL tmp) +%{ + match(Set cr (OverflowMulL op1 op2)); + effect(DEF cr, TEMP tmp, USE op1, USE op2); + + format %{ "imulq $tmp, $op1, $op2\t# overflow check long" %} + ins_encode %{ + __ imulq($tmp$$Register, $op1$$Register, $op2$$constant); + %} + ins_pipe(ialu_reg_reg_alu0); +%} + //----------Control Flow Instructions------------------------------------------ // Signed compare Instructions @@ -11396,27 +11330,43 @@ // ============================================================================ // inlined locking and unlocking -instruct cmpFastLock(rFlagsReg cr, - rRegP object, rbx_RegP box, rax_RegI tmp, rRegP scr) -%{ +instruct cmpFastLockRTM(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI tmp, rdx_RegI scr, rRegI cx1, rRegI cx2) %{ + predicate(Compile::current()->use_rtm()); + match(Set cr (FastLock object box)); + effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box); + ins_cost(300); + format %{ "fastlock $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %} + ins_encode %{ + __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, + $scr$$Register, $cx1$$Register, $cx2$$Register, + _counters, _rtm_counters, _stack_rtm_counters, + ((Method*)(ra_->C->method()->constant_encoding()))->method_data(), + true, ra_->C->profile_rtm()); + %} + ins_pipe(pipe_slow); +%} + +instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI tmp, rRegP scr) %{ + predicate(!Compile::current()->use_rtm()); match(Set cr (FastLock object box)); effect(TEMP tmp, TEMP scr, USE_KILL box); - ins_cost(300); format %{ "fastlock $object,$box\t! kills $box,$tmp,$scr" %} - ins_encode(Fast_Lock(object, box, tmp, scr)); + ins_encode %{ + __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, + $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false); + %} ins_pipe(pipe_slow); %} -instruct cmpFastUnlock(rFlagsReg cr, - rRegP object, rax_RegP box, rRegP tmp) -%{ +instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP box, rRegP tmp) %{ match(Set cr (FastUnlock object box)); effect(TEMP tmp, USE_KILL box); - ins_cost(300); format %{ "fastunlock $object,$box\t! kills $box,$tmp" %} - ins_encode(Fast_Unlock(object, box, tmp)); + ins_encode %{ + __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm()); + %} ins_pipe(pipe_slow); %} diff -r a433eb716ce1 -r 62c54fcc0a35 src/os/bsd/vm/os_bsd.cpp --- a/src/os/bsd/vm/os_bsd.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/os/bsd/vm/os_bsd.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -2590,9 +2590,21 @@ } } -int os::naked_sleep() { - // %% make the sleep time an integer flag. for now use 1 millisec. - return os::sleep(Thread::current(), 1, false); +void os::naked_short_sleep(jlong ms) { + struct timespec req; + + assert(ms < 1000, "Un-interruptable sleep, short time use only"); + req.tv_sec = 0; + if (ms > 0) { + req.tv_nsec = (ms % 1000) * 1000000; + } + else { + req.tv_nsec = 1; + } + + nanosleep(&req, NULL); + + return; } // Sleep forever; naked call to OS-specific sleep; use with CAUTION diff -r a433eb716ce1 -r 62c54fcc0a35 src/os/linux/vm/os_linux.cpp --- a/src/os/linux/vm/os_linux.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/os/linux/vm/os_linux.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1999, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -109,6 +109,8 @@ #define MAX_PATH (2 * K) +#define MAX_SECS 100000000 + // for timer info max values which include all bits #define ALL_64_BITS CONST64(0xFFFFFFFFFFFFFFFF) @@ -2433,7 +2435,6 @@ sem_t _semaphore; }; - Semaphore::Semaphore() { sem_init(&_semaphore, 0, 0); } @@ -2455,8 +2456,22 @@ } bool Semaphore::timedwait(unsigned int sec, int nsec) { + struct timespec ts; - unpackTime(&ts, false, (sec * NANOSECS_PER_SEC) + nsec); + // Semaphore's are always associated with CLOCK_REALTIME + os::Linux::clock_gettime(CLOCK_REALTIME, &ts); + // see unpackTime for discussion on overflow checking + if (sec >= MAX_SECS) { + ts.tv_sec += MAX_SECS; + ts.tv_nsec = 0; + } else { + ts.tv_sec += sec; + ts.tv_nsec += nsec; + if (ts.tv_nsec >= NANOSECS_PER_SEC) { + ts.tv_nsec -= NANOSECS_PER_SEC; + ++ts.tv_sec; // note: this must be <= max_secs + } + } while (1) { int result = sem_timedwait(&_semaphore, &ts); @@ -2961,7 +2976,9 @@ unsigned char vec[1]; unsigned imin = 1, imax = pages + 1, imid; - int mincore_return_value; + int mincore_return_value = 0; + + assert(imin <= imax, "Unexpected page size"); while (imin < imax) { imid = (imax + imin) / 2; @@ -3833,9 +3850,33 @@ } } -int os::naked_sleep() { - // %% make the sleep time an integer flag. for now use 1 millisec. - return os::sleep(Thread::current(), 1, false); +// +// Short sleep, direct OS call. +// +// Note: certain versions of Linux CFS scheduler (since 2.6.23) do not guarantee +// sched_yield(2) will actually give up the CPU: +// +// * Alone on this pariticular CPU, keeps running. +// * Before the introduction of "skip_buddy" with "compat_yield" disabled +// (pre 2.6.39). +// +// So calling this with 0 is an alternative. +// +void os::naked_short_sleep(jlong ms) { + struct timespec req; + + assert(ms < 1000, "Un-interruptable sleep, short time use only"); + req.tv_sec = 0; + if (ms > 0) { + req.tv_nsec = (ms % 1000) * 1000000; + } + else { + req.tv_nsec = 1; + } + + nanosleep(&req, NULL); + + return; } // Sleep forever; naked call to OS-specific sleep; use with CAUTION @@ -5752,7 +5793,6 @@ * is no need to track notifications. */ -#define MAX_SECS 100000000 /* * This code is common to linux and solaris and will be moved to a * common place in dolphin. diff -r a433eb716ce1 -r 62c54fcc0a35 src/os/linux/vm/perfMemory_linux.cpp --- a/src/os/linux/vm/perfMemory_linux.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/os/linux/vm/perfMemory_linux.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -891,8 +891,16 @@ FREE_C_HEAP_ARRAY(char, filename, mtInternal); // open the shared memory file for the give vmid - fd = open_sharedmem_file(rfilename, file_flags, CHECK); - assert(fd != OS_ERR, "unexpected value"); + fd = open_sharedmem_file(rfilename, file_flags, THREAD); + + if (fd == OS_ERR) { + return; + } + + if (HAS_PENDING_EXCEPTION) { + ::close(fd); + return; + } if (*sizep == 0) { size = sharedmem_filesize(fd, CHECK); diff -r a433eb716ce1 -r 62c54fcc0a35 src/os/solaris/vm/os_solaris.cpp --- a/src/os/solaris/vm/os_solaris.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/os/solaris/vm/os_solaris.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -2232,8 +2232,8 @@ st->cr(); status = true; } - ::close(fd); } + ::close(fd); } return status; } @@ -2967,7 +2967,7 @@ char *os::scan_pages(char *start, char* end, page_info* page_expected, page_info* page_found) { const uint_t info_types[] = { MEMINFO_VLGRP, MEMINFO_VPAGESIZE }; const size_t types = sizeof(info_types) / sizeof(info_types[0]); - uint64_t addrs[MAX_MEMINFO_CNT], outdata[types * MAX_MEMINFO_CNT]; + uint64_t addrs[MAX_MEMINFO_CNT], outdata[types * MAX_MEMINFO_CNT + 1]; uint_t validity[MAX_MEMINFO_CNT]; size_t page_size = MAX2((size_t)os::vm_page_size(), page_expected->size); @@ -3006,7 +3006,7 @@ } } - if (i != addrs_count) { + if (i < addrs_count) { if ((validity[i] & 2) != 0) { page_found->lgrp_id = outdata[types * i]; } else { @@ -3496,9 +3496,14 @@ return os_sleep(millis, interruptible); } -int os::naked_sleep() { - // %% make the sleep time an integer flag. for now use 1 millisec. - return os_sleep(1, false); +void os::naked_short_sleep(jlong ms) { + assert(ms < 1000, "Un-interruptable sleep, short time use only"); + + // usleep is deprecated and removed from POSIX, in favour of nanosleep, but + // Solaris requires -lrt for this. + usleep((ms * 1000)); + + return; } // Sleep forever; naked call to OS-specific sleep; use with CAUTION diff -r a433eb716ce1 -r 62c54fcc0a35 src/os/solaris/vm/perfMemory_solaris.cpp --- a/src/os/solaris/vm/perfMemory_solaris.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/os/solaris/vm/perfMemory_solaris.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -431,10 +431,12 @@ RESTARTABLE(::read(fd, addr, remaining), result); if (result == OS_ERR) { + ::close(fd); THROW_MSG_0(vmSymbols::java_io_IOException(), "Read error"); + } else { + remaining-=result; + addr+=result; } - remaining-=result; - addr+=result; } ::close(fd); @@ -906,8 +908,16 @@ FREE_C_HEAP_ARRAY(char, filename, mtInternal); // open the shared memory file for the give vmid - fd = open_sharedmem_file(rfilename, file_flags, CHECK); - assert(fd != OS_ERR, "unexpected value"); + fd = open_sharedmem_file(rfilename, file_flags, THREAD); + + if (fd == OS_ERR) { + return; + } + + if (HAS_PENDING_EXCEPTION) { + ::close(fd); + return; + } if (*sizep == 0) { size = sharedmem_filesize(fd, CHECK); diff -r a433eb716ce1 -r 62c54fcc0a35 src/os/windows/vm/os_windows.cpp --- a/src/os/windows/vm/os_windows.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/os/windows/vm/os_windows.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -2437,6 +2437,12 @@ } } + if ((exception_code == EXCEPTION_ACCESS_VIOLATION) && + VM_Version::is_cpuinfo_segv_addr(pc)) { + // Verify that OS save/restore AVX registers. + return Handle_Exception(exceptionInfo, VM_Version::cpuinfo_cont_addr()); + } + if (t != NULL && t->is_Java_thread()) { JavaThread* thread = (JavaThread*) t; bool in_java = thread->thread_state() == _thread_in_Java; @@ -3496,6 +3502,16 @@ return result; } +// +// Short sleep, direct OS call. +// +// ms = 0, means allow others (if any) to run. +// +void os::naked_short_sleep(jlong ms) { + assert(ms < 1000, "Un-interruptable sleep, short time use only"); + Sleep(ms); +} + // Sleep forever; naked call to OS-specific sleep; use with CAUTION void os::infinite_sleep() { while (true) { // sleep forever ... @@ -3623,13 +3639,14 @@ "possibility of dangling Thread pointer"); OSThread* osthread = thread->osthread(); - bool interrupted = osthread->interrupted(); // There is no synchronization between the setting of the interrupt // and it being cleared here. It is critical - see 6535709 - that // we only clear the interrupt state, and reset the interrupt event, // if we are going to report that we were indeed interrupted - else // an interrupt can be "lost", leading to spurious wakeups or lost wakeups - // depending on the timing + // depending on the timing. By checking thread interrupt event to see + // if the thread gets real interrupt thus prevent spurious wakeup. + bool interrupted = osthread->interrupted() && (WaitForSingleObject(osthread->interrupt_event(), 0) == WAIT_OBJECT_0); if (interrupted && clear_interrupted) { osthread->set_interrupted(false); ResetEvent(osthread->interrupt_event()); diff -r a433eb716ce1 -r 62c54fcc0a35 src/os_cpu/bsd_x86/vm/os_bsd_x86.cpp --- a/src/os_cpu/bsd_x86/vm/os_bsd_x86.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/os_cpu/bsd_x86/vm/os_bsd_x86.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -492,6 +492,11 @@ } } + if ((sig == SIGSEGV || sig == SIGBUS) && VM_Version::is_cpuinfo_segv_addr(pc)) { + // Verify that OS save/restore AVX registers. + stub = VM_Version::cpuinfo_cont_addr(); + } + // We test if stub is already set (by the stack overflow code // above) so it is not overwritten by the code that follows. This // check is not required on other platforms, because on other diff -r a433eb716ce1 -r 62c54fcc0a35 src/os_cpu/linux_x86/vm/os_linux_x86.cpp --- a/src/os_cpu/linux_x86/vm/os_linux_x86.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/os_cpu/linux_x86/vm/os_linux_x86.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -337,6 +337,11 @@ } } + if ((sig == SIGSEGV) && VM_Version::is_cpuinfo_segv_addr(pc)) { + // Verify that OS save/restore AVX registers. + stub = VM_Version::cpuinfo_cont_addr(); + } + if (thread->thread_state() == _thread_in_Java) { // Java thread running in Java code => find exception handler if any // a fault inside compiled code, the interpreter, or a stub diff -r a433eb716ce1 -r 62c54fcc0a35 src/os_cpu/solaris_x86/vm/os_solaris_x86.cpp --- a/src/os_cpu/solaris_x86/vm/os_solaris_x86.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/os_cpu/solaris_x86/vm/os_solaris_x86.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1999, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -459,6 +459,11 @@ } } + if ((sig == SIGSEGV) && VM_Version::is_cpuinfo_segv_addr(pc)) { + // Verify that OS save/restore AVX registers. + stub = VM_Version::cpuinfo_cont_addr(); + } + if (thread->thread_state() == _thread_in_vm) { if (sig == SIGBUS && info->si_code == BUS_OBJERR && thread->doing_unsafe_access()) { stub = StubRoutines::handler_for_unsafe_access(); @@ -475,9 +480,11 @@ // here if the underlying file has been truncated. // Do not crash the VM in such a case. CodeBlob* cb = CodeCache::find_blob_unsafe(pc); - nmethod* nm = cb->is_nmethod() ? (nmethod*)cb : NULL; - if (nm != NULL && nm->has_unsafe_access()) { - stub = StubRoutines::handler_for_unsafe_access(); + if (cb != NULL) { + nmethod* nm = cb->is_nmethod() ? (nmethod*)cb : NULL; + if (nm != NULL && nm->has_unsafe_access()) { + stub = StubRoutines::handler_for_unsafe_access(); + } } } else @@ -724,6 +731,7 @@ err.report_and_die(); ShouldNotReachHere(); + return false; } void os::print_context(outputStream *st, void *context) { diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/adlc/archDesc.cpp --- a/src/share/vm/adlc/archDesc.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/adlc/archDesc.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -1193,15 +1193,12 @@ || strcmp(idealName,"CmpF") == 0 || strcmp(idealName,"FastLock") == 0 || strcmp(idealName,"FastUnlock") == 0 - || strcmp(idealName,"AddExactI") == 0 - || strcmp(idealName,"AddExactL") == 0 - || strcmp(idealName,"SubExactI") == 0 - || strcmp(idealName,"SubExactL") == 0 - || strcmp(idealName,"MulExactI") == 0 - || strcmp(idealName,"MulExactL") == 0 - || strcmp(idealName,"NegExactI") == 0 - || strcmp(idealName,"NegExactL") == 0 - || strcmp(idealName,"FlagsProj") == 0 + || strcmp(idealName,"OverflowAddI") == 0 + || strcmp(idealName,"OverflowAddL") == 0 + || strcmp(idealName,"OverflowSubI") == 0 + || strcmp(idealName,"OverflowSubL") == 0 + || strcmp(idealName,"OverflowMulI") == 0 + || strcmp(idealName,"OverflowMulL") == 0 || strcmp(idealName,"Bool") == 0 || strcmp(idealName,"Binary") == 0 ) { // Removed ConI from the must_clone list. CPUs that cannot use diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/adlc/formssel.cpp --- a/src/share/vm/adlc/formssel.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/adlc/formssel.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -660,6 +660,7 @@ int USE_of_memory = 0; int DEF_of_memory = 0; const char* last_memory_DEF = NULL; // to test DEF/USE pairing in asserts + const char* last_memory_USE = NULL; Component *unique = NULL; Component *comp = NULL; ComponentList &components = (ComponentList &)_components; @@ -681,7 +682,16 @@ assert(0 == strcmp(last_memory_DEF, comp->_name), "every memory DEF is followed by a USE of the same name"); last_memory_DEF = NULL; } - USE_of_memory++; + // Handles same memory being used multiple times in the case of BMI1 instructions. + if (last_memory_USE != NULL) { + if (strcmp(comp->_name, last_memory_USE) != 0) { + USE_of_memory++; + } + } else { + USE_of_memory++; + } + last_memory_USE = comp->_name; + if (DEF_of_memory == 0) // defs take precedence unique = comp; } else { diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/adlc/output_c.cpp --- a/src/share/vm/adlc/output_c.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/adlc/output_c.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -1599,6 +1599,8 @@ if( node->is_ideal_fastlock() && new_inst->is_ideal_fastlock() ) { fprintf(fp, " ((MachFastLockNode*)n%d)->_counters = _counters;\n",cnt); + fprintf(fp, " ((MachFastLockNode*)n%d)->_rtm_counters = _rtm_counters;\n",cnt); + fprintf(fp, " ((MachFastLockNode*)n%d)->_stack_rtm_counters = _stack_rtm_counters;\n",cnt); } // Fill in the bottom_type where requested @@ -3980,6 +3982,8 @@ } if( inst->is_ideal_fastlock() ) { fprintf(fp_cpp, "%s node->_counters = _leaf->as_FastLock()->counters();\n", indent); + fprintf(fp_cpp, "%s node->_rtm_counters = _leaf->as_FastLock()->rtm_counters();\n", indent); + fprintf(fp_cpp, "%s node->_stack_rtm_counters = _leaf->as_FastLock()->stack_rtm_counters();\n", indent); } } diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/ci/ciClassList.hpp --- a/src/share/vm/ci/ciClassList.hpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/ci/ciClassList.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -103,6 +103,7 @@ friend class ciMethodType; \ friend class ciReceiverTypeData; \ friend class ciTypeEntries; \ +friend class ciSpeculativeTrapData; \ friend class ciSymbol; \ friend class ciArray; \ friend class ciObjArray; \ diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/ci/ciEnv.cpp --- a/src/share/vm/ci/ciEnv.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/ci/ciEnv.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -926,7 +926,8 @@ AbstractCompiler* compiler, int comp_level, bool has_unsafe_access, - bool has_wide_vectors) { + bool has_wide_vectors, + RTMState rtm_state) { VM_ENTRY_MARK; nmethod* nm = NULL; { @@ -973,6 +974,15 @@ methodHandle method(THREAD, target->get_Method()); +#if INCLUDE_RTM_OPT + if (!failing() && (rtm_state != NoRTM) && + (method()->method_data() != NULL) && + (method()->method_data()->rtm_state() != rtm_state)) { + // Preemptive decompile if rtm state was changed. + record_failure("RTM state change invalidated rtm code"); + } +#endif + if (failing()) { // While not a true deoptimization, it is a preemptive decompile. MethodData* mdo = method()->method_data(); @@ -999,13 +1009,15 @@ frame_words, oop_map_set, handler_table, inc_table, compiler, comp_level); - // Free codeBlobs code_buffer->free_blob(); if (nm != NULL) { nm->set_has_unsafe_access(has_unsafe_access); nm->set_has_wide_vectors(has_wide_vectors); +#if INCLUDE_RTM_OPT + nm->set_rtm_state(rtm_state); +#endif // Record successful registration. // (Put nm into the task handle *before* publishing to the Java heap.) diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/ci/ciEnv.hpp --- a/src/share/vm/ci/ciEnv.hpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/ci/ciEnv.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -363,7 +363,8 @@ AbstractCompiler* compiler, int comp_level, bool has_unsafe_access, - bool has_wide_vectors); + bool has_wide_vectors, + RTMState rtm_state = NoRTM); // Access to certain well known ciObjects. diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/ci/ciMethodData.cpp --- a/src/share/vm/ci/ciMethodData.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/ci/ciMethodData.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -78,6 +78,36 @@ _parameters = NULL; } +void ciMethodData::load_extra_data() { + MethodData* mdo = get_MethodData(); + + // speculative trap entries also hold a pointer to a Method so need to be translated + DataLayout* dp_src = mdo->extra_data_base(); + DataLayout* end_src = mdo->extra_data_limit(); + DataLayout* dp_dst = extra_data_base(); + for (;; dp_src = MethodData::next_extra(dp_src), dp_dst = MethodData::next_extra(dp_dst)) { + assert(dp_src < end_src, "moved past end of extra data"); + // New traps in the MDO can be added as we translate the copy so + // look at the entries in the copy. + switch(dp_dst->tag()) { + case DataLayout::speculative_trap_data_tag: { + ciSpeculativeTrapData* data_dst = new ciSpeculativeTrapData(dp_dst); + SpeculativeTrapData* data_src = new SpeculativeTrapData(dp_src); + data_dst->translate_from(data_src); + break; + } + case DataLayout::bit_data_tag: + break; + case DataLayout::no_tag: + case DataLayout::arg_info_data_tag: + // An empty slot or ArgInfoData entry marks the end of the trap data + return; + default: + fatal(err_msg("bad tag = %d", dp_dst->tag())); + } + } +} + void ciMethodData::load_data() { MethodData* mdo = get_MethodData(); if (mdo == NULL) { @@ -116,6 +146,8 @@ parameters->translate_from(mdo->parameters_type_data()); } + load_extra_data(); + // Note: Extra data are all BitData, and do not need translation. _current_mileage = MethodData::mileage_of(mdo->method()); _invocation_counter = mdo->invocation_count(); @@ -156,6 +188,12 @@ set_type(translate_klass(k)); } +void ciSpeculativeTrapData::translate_from(const ProfileData* data) { + Method* m = data->as_SpeculativeTrapData()->method(); + ciMethod* ci_m = CURRENT_ENV->get_method(m); + set_method(ci_m); +} + // Get the data at an arbitrary (sort of) data index. ciProfileData* ciMethodData::data_at(int data_index) { if (out_of_bounds(data_index)) { @@ -203,32 +241,64 @@ return next; } -// Translate a bci to its corresponding data, or NULL. -ciProfileData* ciMethodData::bci_to_data(int bci) { - ciProfileData* data = data_before(bci); - for ( ; is_valid(data); data = next_data(data)) { - if (data->bci() == bci) { - set_hint_di(dp_to_di(data->dp())); - return data; - } else if (data->bci() > bci) { - break; - } - } +ciProfileData* ciMethodData::bci_to_extra_data(int bci, ciMethod* m, bool& two_free_slots) { // bci_to_extra_data(bci) ... DataLayout* dp = data_layout_at(data_size()); DataLayout* end = data_layout_at(data_size() + extra_data_size()); - for (; dp < end; dp = MethodData::next_extra(dp)) { - if (dp->tag() == DataLayout::no_tag) { + two_free_slots = false; + for (;dp < end; dp = MethodData::next_extra(dp)) { + switch(dp->tag()) { + case DataLayout::no_tag: _saw_free_extra_data = true; // observed an empty slot (common case) + two_free_slots = (MethodData::next_extra(dp)->tag() == DataLayout::no_tag); return NULL; + case DataLayout::arg_info_data_tag: + return NULL; // ArgInfoData is at the end of extra data section. + case DataLayout::bit_data_tag: + if (m == NULL && dp->bci() == bci) { + return new ciBitData(dp); + } + break; + case DataLayout::speculative_trap_data_tag: { + ciSpeculativeTrapData* data = new ciSpeculativeTrapData(dp); + // data->method() might be null if the MDO is snapshotted + // concurrently with a trap + if (m != NULL && data->method() == m && dp->bci() == bci) { + return data; + } + break; + } + default: + fatal(err_msg("bad tag = %d", dp->tag())); } - if (dp->tag() == DataLayout::arg_info_data_tag) { - break; // ArgInfoData is at the end of extra data section. + } + return NULL; +} + +// Translate a bci to its corresponding data, or NULL. +ciProfileData* ciMethodData::bci_to_data(int bci, ciMethod* m) { + // If m is not NULL we look for a SpeculativeTrapData entry + if (m == NULL) { + ciProfileData* data = data_before(bci); + for ( ; is_valid(data); data = next_data(data)) { + if (data->bci() == bci) { + set_hint_di(dp_to_di(data->dp())); + return data; + } else if (data->bci() > bci) { + break; + } } - if (dp->bci() == bci) { - assert(dp->tag() == DataLayout::bit_data_tag, "sane"); - return new ciBitData(dp); - } + } + bool two_free_slots = false; + ciProfileData* result = bci_to_extra_data(bci, m, two_free_slots); + if (result != NULL) { + return result; + } + if (m != NULL && !two_free_slots) { + // We were looking for a SpeculativeTrapData entry we didn't + // find. Room is not available for more SpeculativeTrapData + // entries, look in the non SpeculativeTrapData entries. + return bci_to_data(bci, NULL); } return NULL; } @@ -525,18 +595,25 @@ st->print_cr("--- Extra data:"); DataLayout* dp = data_layout_at(data_size()); DataLayout* end = data_layout_at(data_size() + extra_data_size()); - for (; dp < end; dp = MethodData::next_extra(dp)) { - if (dp->tag() == DataLayout::no_tag) continue; - if (dp->tag() == DataLayout::bit_data_tag) { + for (;; dp = MethodData::next_extra(dp)) { + assert(dp < end, "moved past end of extra data"); + switch (dp->tag()) { + case DataLayout::no_tag: + continue; + case DataLayout::bit_data_tag: data = new BitData(dp); - } else { - assert(dp->tag() == DataLayout::arg_info_data_tag, "must be BitData or ArgInfo"); + break; + case DataLayout::arg_info_data_tag: data = new ciArgInfoData(dp); dp = end; // ArgInfoData is at the end of extra data section. + break; + default: + fatal(err_msg("unexpected tag %d", dp->tag())); } st->print("%d", dp_to_di(data->dp())); st->fill_to(6); data->print_data_on(st); + if (dp >= end) return; } } @@ -569,8 +646,8 @@ st->cr(); } -void ciCallTypeData::print_data_on(outputStream* st) const { - print_shared(st, "ciCallTypeData"); +void ciCallTypeData::print_data_on(outputStream* st, const char* extra) const { + print_shared(st, "ciCallTypeData", extra); if (has_arguments()) { tab(st, true); st->print("argument types"); @@ -599,18 +676,18 @@ } } -void ciReceiverTypeData::print_data_on(outputStream* st) const { - print_shared(st, "ciReceiverTypeData"); +void ciReceiverTypeData::print_data_on(outputStream* st, const char* extra) const { + print_shared(st, "ciReceiverTypeData", extra); print_receiver_data_on(st); } -void ciVirtualCallData::print_data_on(outputStream* st) const { - print_shared(st, "ciVirtualCallData"); +void ciVirtualCallData::print_data_on(outputStream* st, const char* extra) const { + print_shared(st, "ciVirtualCallData", extra); rtd_super()->print_receiver_data_on(st); } -void ciVirtualCallTypeData::print_data_on(outputStream* st) const { - print_shared(st, "ciVirtualCallTypeData"); +void ciVirtualCallTypeData::print_data_on(outputStream* st, const char* extra) const { + print_shared(st, "ciVirtualCallTypeData", extra); rtd_super()->print_receiver_data_on(st); if (has_arguments()) { tab(st, true); @@ -624,8 +701,15 @@ } } -void ciParametersTypeData::print_data_on(outputStream* st) const { - st->print_cr("Parametertypes"); +void ciParametersTypeData::print_data_on(outputStream* st, const char* extra) const { + st->print_cr("ciParametersTypeData"); parameters()->print_data_on(st); } + +void ciSpeculativeTrapData::print_data_on(outputStream* st, const char* extra) const { + st->print_cr("ciSpeculativeTrapData"); + tab(st); + method()->print_short_name(st); + st->cr(); +} #endif diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/ci/ciMethodData.hpp --- a/src/share/vm/ci/ciMethodData.hpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/ci/ciMethodData.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -31,6 +31,7 @@ #include "ci/ciUtilities.hpp" #include "oops/methodData.hpp" #include "oops/oop.inline.hpp" +#include "runtime/deoptimization.hpp" class ciBitData; class ciCounterData; @@ -44,6 +45,7 @@ class ciCallTypeData; class ciVirtualCallTypeData; class ciParametersTypeData; +class ciSpeculativeTrapData;; typedef ProfileData ciProfileData; @@ -173,7 +175,7 @@ } #ifndef PRODUCT - void print_data_on(outputStream* st) const; + void print_data_on(outputStream* st, const char* extra) const; #endif }; @@ -200,7 +202,7 @@ } void translate_receiver_data_from(const ProfileData* data); #ifndef PRODUCT - void print_data_on(outputStream* st) const; + void print_data_on(outputStream* st, const char* extra) const; void print_receiver_data_on(outputStream* st) const; #endif }; @@ -225,7 +227,7 @@ rtd_super()->translate_receiver_data_from(data); } #ifndef PRODUCT - void print_data_on(outputStream* st) const; + void print_data_on(outputStream* st, const char* extra) const; #endif }; @@ -287,7 +289,7 @@ } #ifndef PRODUCT - void print_data_on(outputStream* st) const; + void print_data_on(outputStream* st, const char* extra) const; #endif }; @@ -336,7 +338,26 @@ } #ifndef PRODUCT - void print_data_on(outputStream* st) const; + void print_data_on(outputStream* st, const char* extra) const; +#endif +}; + +class ciSpeculativeTrapData : public SpeculativeTrapData { +public: + ciSpeculativeTrapData(DataLayout* layout) : SpeculativeTrapData(layout) {} + + virtual void translate_from(const ProfileData* data); + + ciMethod* method() const { + return (ciMethod*)intptr_at(method_offset); + } + + void set_method(ciMethod* m) { + set_intptr_at(method_offset, (intptr_t)m); + } + +#ifndef PRODUCT + void print_data_on(outputStream* st, const char* extra) const; #endif }; @@ -436,6 +457,16 @@ ciArgInfoData *arg_info() const; + address data_base() const { + return (address) _data; + } + DataLayout* limit_data_position() const { + return (DataLayout*)((address)data_base() + _data_size); + } + + void load_extra_data(); + ciProfileData* bci_to_extra_data(int bci, ciMethod* m, bool& two_free_slots); + public: bool is_method_data() const { return true; } @@ -447,6 +478,18 @@ int invocation_count() { return _invocation_counter; } int backedge_count() { return _backedge_counter; } + +#if INCLUDE_RTM_OPT + // return cached value + int rtm_state() { + if (is_empty()) { + return NoRTM; + } else { + return get_MethodData()->rtm_state(); + } + } +#endif + // Transfer information about the method to MethodData*. // would_profile means we would like to profile this method, // meaning it's not trivial. @@ -475,9 +518,11 @@ ciProfileData* next_data(ciProfileData* current); bool is_valid(ciProfileData* current) { return current != NULL; } - // Get the data at an arbitrary bci, or NULL if there is none. - ciProfileData* bci_to_data(int bci); - ciProfileData* bci_to_extra_data(int bci, bool create_if_missing); + DataLayout* extra_data_base() const { return limit_data_position(); } + + // Get the data at an arbitrary bci, or NULL if there is none. If m + // is not NULL look for a SpeculativeTrapData if any first. + ciProfileData* bci_to_data(int bci, ciMethod* m = NULL); uint overflow_trap_count() const { return _orig.overflow_trap_count(); @@ -496,12 +541,13 @@ // Helpful query functions that decode trap_state. int has_trap_at(ciProfileData* data, int reason); - int has_trap_at(int bci, int reason) { - return has_trap_at(bci_to_data(bci), reason); + int has_trap_at(int bci, ciMethod* m, int reason) { + assert((m != NULL) == Deoptimization::reason_is_speculate(reason), "inconsistent method/reason"); + return has_trap_at(bci_to_data(bci, m), reason); } int trap_recompiled_at(ciProfileData* data); - int trap_recompiled_at(int bci) { - return trap_recompiled_at(bci_to_data(bci)); + int trap_recompiled_at(int bci, ciMethod* m) { + return trap_recompiled_at(bci_to_data(bci, m)); } void clear_escape_info(); diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/classfile/altHashing.cpp --- a/src/share/vm/classfile/altHashing.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/classfile/altHashing.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -39,18 +39,18 @@ } // Seed value used for each alternative hash calculated. -jint AltHashing::compute_seed() { +juint AltHashing::compute_seed() { jlong nanos = os::javaTimeNanos(); jlong now = os::javaTimeMillis(); - jint SEED_MATERIAL[8] = { - (jint) object_hash(SystemDictionary::String_klass()), - (jint) object_hash(SystemDictionary::System_klass()), - (jint) os::random(), // current thread isn't a java thread - (jint) (((julong)nanos) >> 32), - (jint) nanos, - (jint) (((julong)now) >> 32), - (jint) now, - (jint) (os::javaTimeNanos() >> 2) + int SEED_MATERIAL[8] = { + (int) object_hash(SystemDictionary::String_klass()), + (int) object_hash(SystemDictionary::System_klass()), + (int) os::random(), // current thread isn't a java thread + (int) (((julong)nanos) >> 32), + (int) nanos, + (int) (((julong)now) >> 32), + (int) now, + (int) (os::javaTimeNanos() >> 2) }; return murmur3_32(SEED_MATERIAL, 8); @@ -58,14 +58,14 @@ // Murmur3 hashing for Symbol -jint AltHashing::murmur3_32(jint seed, const jbyte* data, int len) { - jint h1 = seed; +juint AltHashing::murmur3_32(juint seed, const jbyte* data, int len) { + juint h1 = seed; int count = len; int offset = 0; // body while (count >= 4) { - jint k1 = (data[offset] & 0x0FF) + juint k1 = (data[offset] & 0x0FF) | (data[offset + 1] & 0x0FF) << 8 | (data[offset + 2] & 0x0FF) << 16 | data[offset + 3] << 24; @@ -85,7 +85,7 @@ // tail if (count > 0) { - jint k1 = 0; + juint k1 = 0; switch (count) { case 3: @@ -109,18 +109,18 @@ h1 ^= len; // finalization mix force all bits of a hash block to avalanche - h1 ^= ((unsigned int)h1) >> 16; + h1 ^= h1 >> 16; h1 *= 0x85ebca6b; - h1 ^= ((unsigned int)h1) >> 13; + h1 ^= h1 >> 13; h1 *= 0xc2b2ae35; - h1 ^= ((unsigned int)h1) >> 16; + h1 ^= h1 >> 16; return h1; } // Murmur3 hashing for Strings -jint AltHashing::murmur3_32(jint seed, const jchar* data, int len) { - jint h1 = seed; +juint AltHashing::murmur3_32(juint seed, const jchar* data, int len) { + juint h1 = seed; int off = 0; int count = len; @@ -129,7 +129,7 @@ while (count >= 2) { jchar d1 = data[off++] & 0xFFFF; jchar d2 = data[off++]; - jint k1 = (d1 | d2 << 16); + juint k1 = (d1 | d2 << 16); count -= 2; @@ -145,7 +145,7 @@ // tail if (count > 0) { - int k1 = data[off]; + juint k1 = (juint)data[off]; k1 *= 0xcc9e2d51; k1 = Integer_rotateLeft(k1, 15); @@ -157,25 +157,25 @@ h1 ^= len * 2; // (Character.SIZE / Byte.SIZE); // finalization mix force all bits of a hash block to avalanche - h1 ^= ((unsigned int)h1) >> 16; + h1 ^= h1 >> 16; h1 *= 0x85ebca6b; - h1 ^= ((unsigned int)h1) >> 13; + h1 ^= h1 >> 13; h1 *= 0xc2b2ae35; - h1 ^= ((unsigned int)h1) >> 16; + h1 ^= h1 >> 16; return h1; } // Hash used for the seed. -jint AltHashing::murmur3_32(jint seed, const int* data, int len) { - jint h1 = seed; +juint AltHashing::murmur3_32(juint seed, const int* data, int len) { + juint h1 = seed; int off = 0; int end = len; // body while (off < end) { - jint k1 = data[off++]; + juint k1 = (juint)data[off++]; k1 *= 0xcc9e2d51; k1 = Integer_rotateLeft(k1, 15); @@ -193,26 +193,26 @@ h1 ^= len * 4; // (Integer.SIZE / Byte.SIZE); // finalization mix force all bits of a hash block to avalanche - h1 ^= ((juint)h1) >> 16; + h1 ^= h1 >> 16; h1 *= 0x85ebca6b; - h1 ^= ((juint)h1) >> 13; + h1 ^= h1 >> 13; h1 *= 0xc2b2ae35; - h1 ^= ((juint)h1) >> 16; + h1 ^= h1 >> 16; return h1; } -jint AltHashing::murmur3_32(const int* data, int len) { +juint AltHashing::murmur3_32(const int* data, int len) { return murmur3_32(0, data, len); } #ifndef PRODUCT // Overloaded versions for internal test. -jint AltHashing::murmur3_32(const jbyte* data, int len) { +juint AltHashing::murmur3_32(const jbyte* data, int len) { return murmur3_32(0, data, len); } -jint AltHashing::murmur3_32(const jchar* data, int len) { +juint AltHashing::murmur3_32(const jchar* data, int len) { return murmur3_32(0, data, len); } @@ -251,11 +251,11 @@ // Hash subranges {}, {0}, {0,1}, {0,1,2}, ..., {0,...,255} for (int i = 0; i < 256; i++) { - jint hash = murmur3_32(256 - i, vector, i); + juint hash = murmur3_32(256 - i, vector, i); hashes[i * 4] = (jbyte) hash; - hashes[i * 4 + 1] = (jbyte) (((juint)hash) >> 8); - hashes[i * 4 + 2] = (jbyte) (((juint)hash) >> 16); - hashes[i * 4 + 3] = (jbyte) (((juint)hash) >> 24); + hashes[i * 4 + 1] = (jbyte)(hash >> 8); + hashes[i * 4 + 2] = (jbyte)(hash >> 16); + hashes[i * 4 + 3] = (jbyte)(hash >> 24); } // hash to get const result. @@ -269,7 +269,7 @@ } void AltHashing::testEquivalentHashes() { - jint jbytes, jchars, ints; + juint jbytes, jchars, ints; // printf("testEquivalentHashes\n"); diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/classfile/altHashing.hpp --- a/src/share/vm/classfile/altHashing.hpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/classfile/altHashing.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -39,24 +39,24 @@ class AltHashing : AllStatic { // utility function copied from java/lang/Integer - static jint Integer_rotateLeft(jint i, int distance) { - return (i << distance) | (((juint)i) >> (32-distance)); + static juint Integer_rotateLeft(juint i, int distance) { + return (i << distance) | (i >> (32-distance)); } - static jint murmur3_32(const int* data, int len); - static jint murmur3_32(jint seed, const int* data, int len); + static juint murmur3_32(const int* data, int len); + static juint murmur3_32(juint seed, const int* data, int len); #ifndef PRODUCT // Hashing functions used for internal testing - static jint murmur3_32(const jbyte* data, int len); - static jint murmur3_32(const jchar* data, int len); + static juint murmur3_32(const jbyte* data, int len); + static juint murmur3_32(const jchar* data, int len); static void testMurmur3_32_ByteArray(); static void testEquivalentHashes(); #endif // PRODUCT public: - static jint compute_seed(); - static jint murmur3_32(jint seed, const jbyte* data, int len); - static jint murmur3_32(jint seed, const jchar* data, int len); + static juint compute_seed(); + static juint murmur3_32(juint seed, const jbyte* data, int len); + static juint murmur3_32(juint seed, const jchar* data, int len); NOT_PRODUCT(static void test_alt_hash();) }; #endif // SHARE_VM_CLASSFILE_ALTHASHING_HPP diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/classfile/javaClasses.hpp --- a/src/share/vm/classfile/javaClasses.hpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/classfile/javaClasses.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -61,10 +61,6 @@ static Handle basic_create(int length, TRAPS); - static void set_value( oop string, typeArrayOop buffer) { - assert(initialized, "Must be initialized"); - string->obj_field_put(value_offset, (oop)buffer); - } static void set_offset(oop string, int offset) { assert(initialized, "Must be initialized"); if (offset_offset > 0) { @@ -122,12 +118,26 @@ return hash_offset; } + static void set_value(oop string, typeArrayOop buffer) { + assert(initialized && (value_offset > 0), "Must be initialized"); + string->obj_field_put(value_offset, (oop)buffer); + } + static void set_hash(oop string, unsigned int hash) { + assert(initialized && (hash_offset > 0), "Must be initialized"); + string->int_field_put(hash_offset, hash); + } + // Accessors static typeArrayOop value(oop java_string) { assert(initialized && (value_offset > 0), "Must be initialized"); assert(is_instance(java_string), "must be java_string"); return (typeArrayOop) java_string->obj_field(value_offset); } + static unsigned int hash(oop java_string) { + assert(initialized && (hash_offset > 0), "Must be initialized"); + assert(is_instance(java_string), "must be java_string"); + return java_string->int_field(hash_offset); + } static int offset(oop java_string) { assert(initialized, "Must be initialized"); assert(is_instance(java_string), "must be java_string"); diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/classfile/symbolTable.cpp --- a/src/share/vm/classfile/symbolTable.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/classfile/symbolTable.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -35,6 +35,9 @@ #include "oops/oop.inline2.hpp" #include "runtime/mutexLocker.hpp" #include "utilities/hashtable.inline.hpp" +#if INCLUDE_ALL_GCS +#include "gc_implementation/g1/g1StringDedup.hpp" +#endif // -------------------------------------------------------------------------- @@ -728,6 +731,15 @@ string = java_lang_String::create_from_unicode(name, len, CHECK_NULL); } +#if INCLUDE_ALL_GCS + if (G1StringDedup::is_enabled()) { + // Deduplicate the string before it is interned. Note that we should never + // deduplicate a string after it has been interned. Doing so will counteract + // compiler optimizations done on e.g. interned string literals. + G1StringDedup::deduplicate(string()); + } +#endif + // Grab the StringTable_lock before getting the_table() because it could // change at safepoint. MutexLocker ml(StringTable_lock, THREAD); diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/code/nmethod.cpp --- a/src/share/vm/code/nmethod.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/code/nmethod.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -481,7 +481,9 @@ _scavenge_root_link = NULL; _scavenge_root_state = 0; _compiler = NULL; - +#if INCLUDE_RTM_OPT + _rtm_state = NoRTM; +#endif #ifdef HAVE_DTRACE_H _trap_offset = 0; #endif // def HAVE_DTRACE_H diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/code/nmethod.hpp --- a/src/share/vm/code/nmethod.hpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/code/nmethod.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -193,6 +193,12 @@ jbyte _scavenge_root_state; +#if INCLUDE_RTM_OPT + // RTM state at compile time. Used during deoptimization to decide + // whether to restart collecting RTM locking abort statistic again. + RTMState _rtm_state; +#endif + // Nmethod Flushing lock. If non-zero, then the nmethod is not removed // and is not made into a zombie. However, once the nmethod is made into // a zombie, it will be locked one final time if CompiledMethodUnload @@ -414,6 +420,12 @@ bool is_zombie() const { return _state == zombie; } bool is_unloaded() const { return _state == unloaded; } +#if INCLUDE_RTM_OPT + // rtm state accessing and manipulating + RTMState rtm_state() const { return _rtm_state; } + void set_rtm_state(RTMState state) { _rtm_state = state; } +#endif + // Make the nmethod non entrant. The nmethod will continue to be // alive. It is used when an uncommon trap happens. Returns true // if this thread changed the state of the nmethod or false if diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp --- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -2496,7 +2496,8 @@ } void CMSCollector::report_heap_summary(GCWhen::Type when) { - _gc_tracer_cm->report_gc_heap_summary(when, _last_heap_summary, _last_metaspace_summary); + _gc_tracer_cm->report_gc_heap_summary(when, _last_heap_summary); + _gc_tracer_cm->report_metaspace_summary(when, _last_metaspace_summary); } void CMSCollector::collect_in_foreground(bool clear_all_soft_refs, GCCause::Cause cause) { diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/gc_implementation/g1/concurrentMark.cpp --- a/src/share/vm/gc_implementation/g1/concurrentMark.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/gc_implementation/g1/concurrentMark.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -1809,8 +1809,8 @@ uint _regions_claimed; size_t _freed_bytes; FreeRegionList* _local_cleanup_list; - OldRegionSet* _old_proxy_set; - HumongousRegionSet* _humongous_proxy_set; + HeapRegionSetCount _old_regions_removed; + HeapRegionSetCount _humongous_regions_removed; HRRSCleanupTask* _hrrs_cleanup_task; double _claimed_region_time; double _max_region_time; @@ -1819,19 +1819,19 @@ G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, int worker_num, FreeRegionList* local_cleanup_list, - OldRegionSet* old_proxy_set, - HumongousRegionSet* humongous_proxy_set, HRRSCleanupTask* hrrs_cleanup_task) : _g1(g1), _worker_num(worker_num), _max_live_bytes(0), _regions_claimed(0), _freed_bytes(0), _claimed_region_time(0.0), _max_region_time(0.0), _local_cleanup_list(local_cleanup_list), - _old_proxy_set(old_proxy_set), - _humongous_proxy_set(humongous_proxy_set), + _old_regions_removed(), + _humongous_regions_removed(), _hrrs_cleanup_task(hrrs_cleanup_task) { } size_t freed_bytes() { return _freed_bytes; } + const HeapRegionSetCount& old_regions_removed() { return _old_regions_removed; } + const HeapRegionSetCount& humongous_regions_removed() { return _humongous_regions_removed; } bool doHeapRegion(HeapRegion *hr) { if (hr->continuesHumongous()) { @@ -1844,13 +1844,22 @@ _regions_claimed++; hr->note_end_of_marking(); _max_live_bytes += hr->max_live_bytes(); - _g1->free_region_if_empty(hr, - &_freed_bytes, - _local_cleanup_list, - _old_proxy_set, - _humongous_proxy_set, - _hrrs_cleanup_task, - true /* par */); + + if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) { + _freed_bytes += hr->used(); + hr->set_containing_set(NULL); + if (hr->isHumongous()) { + assert(hr->startsHumongous(), "we should only see starts humongous"); + _humongous_regions_removed.increment(1u, hr->capacity()); + _g1->free_humongous_region(hr, _local_cleanup_list, true); + } else { + _old_regions_removed.increment(1u, hr->capacity()); + _g1->free_region(hr, _local_cleanup_list, true); + } + } else { + hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task); + } + double region_time = (os::elapsedTime() - start); _claimed_region_time += region_time; if (region_time > _max_region_time) { @@ -1883,12 +1892,8 @@ void work(uint worker_id) { double start = os::elapsedTime(); FreeRegionList local_cleanup_list("Local Cleanup List"); - OldRegionSet old_proxy_set("Local Cleanup Old Proxy Set"); - HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set"); HRRSCleanupTask hrrs_cleanup_task; G1NoteEndOfConcMarkClosure g1_note_end(_g1h, worker_id, &local_cleanup_list, - &old_proxy_set, - &humongous_proxy_set, &hrrs_cleanup_task); if (G1CollectedHeap::use_parallel_gc_threads()) { _g1h->heap_region_par_iterate_chunked(&g1_note_end, worker_id, @@ -1900,13 +1905,10 @@ assert(g1_note_end.complete(), "Shouldn't have yielded!"); // Now update the lists - _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(), - NULL /* free_list */, - &old_proxy_set, - &humongous_proxy_set, - true /* par */); + _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed()); { MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); + _g1h->decrement_summary_bytes(g1_note_end.freed_bytes()); _max_live_bytes += g1_note_end.max_live_bytes(); _freed_bytes += g1_note_end.freed_bytes(); @@ -1920,14 +1922,14 @@ G1HRPrinter* hr_printer = _g1h->hr_printer(); if (hr_printer->is_active()) { - HeapRegionLinkedListIterator iter(&local_cleanup_list); + FreeRegionListIterator iter(&local_cleanup_list); while (iter.more_available()) { HeapRegion* hr = iter.get_next(); hr_printer->cleanup(hr); } } - _cleanup_list->add_as_tail(&local_cleanup_list); + _cleanup_list->add_ordered(&local_cleanup_list); assert(local_cleanup_list.is_empty(), "post-condition"); HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task); @@ -1971,7 +1973,6 @@ return; } - HRSPhaseSetter x(HRSPhaseCleanup); g1h->verify_region_sets_optional(); if (VerifyDuringGC) { @@ -2144,7 +2145,7 @@ G1CollectedHeap* g1h = G1CollectedHeap::heap(); - _cleanup_list.verify_optional(); + _cleanup_list.verify_list(); FreeRegionList tmp_free_list("Tmp Free List"); if (G1ConcRegionFreeingVerbose) { @@ -2157,9 +2158,9 @@ // so it's not necessary to take any locks while (!_cleanup_list.is_empty()) { HeapRegion* hr = _cleanup_list.remove_head(); - assert(hr != NULL, "the list was not empty"); + assert(hr != NULL, "Got NULL from a non-empty list"); hr->par_clear(); - tmp_free_list.add_as_tail(hr); + tmp_free_list.add_ordered(hr); // Instead of adding one region at a time to the secondary_free_list, // we accumulate them in the local list and move them a few at a @@ -2179,7 +2180,7 @@ { MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag); - g1h->secondary_free_list_add_as_tail(&tmp_free_list); + g1h->secondary_free_list_add(&tmp_free_list); SecondaryFreeList_lock->notify_all(); } @@ -2528,6 +2529,11 @@ assert(!rp->discovery_enabled(), "Post condition"); } + if (has_overflown()) { + // We can not trust g1_is_alive if the marking stack overflowed + return; + } + g1h->unlink_string_and_symbol_table(&g1_is_alive, /* process_strings */ false, // currently strings are always roots /* process_symbols */ true); diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/gc_implementation/g1/concurrentMark.hpp --- a/src/share/vm/gc_implementation/g1/concurrentMark.hpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/gc_implementation/g1/concurrentMark.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -25,7 +25,7 @@ #ifndef SHARE_VM_GC_IMPLEMENTATION_G1_CONCURRENTMARK_HPP #define SHARE_VM_GC_IMPLEMENTATION_G1_CONCURRENTMARK_HPP -#include "gc_implementation/g1/heapRegionSets.hpp" +#include "gc_implementation/g1/heapRegionSet.hpp" #include "utilities/taskqueue.hpp" class G1CollectedHeap; diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/gc_implementation/g1/g1BiasedArray.cpp --- a/src/share/vm/gc_implementation/g1/g1BiasedArray.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/gc_implementation/g1/g1BiasedArray.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -24,6 +24,14 @@ #include "precompiled.hpp" #include "gc_implementation/g1/g1BiasedArray.hpp" +#include "memory/padded.inline.hpp" + +// Allocate a new array, generic version. +address G1BiasedMappedArrayBase::create_new_base_array(size_t length, size_t elem_size) { + assert(length > 0, "just checking"); + assert(elem_size > 0, "just checking"); + return PaddedPrimitiveArray::create_unfreeable(length * elem_size); +} #ifndef PRODUCT void G1BiasedMappedArrayBase::verify_index(idx_t index) const { diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/gc_implementation/g1/g1BiasedArray.hpp --- a/src/share/vm/gc_implementation/g1/g1BiasedArray.hpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/gc_implementation/g1/g1BiasedArray.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -25,8 +25,8 @@ #ifndef SHARE_VM_GC_IMPLEMENTATION_G1_G1BIASEDARRAY_HPP #define SHARE_VM_GC_IMPLEMENTATION_G1_G1BIASEDARRAY_HPP +#include "memory/allocation.hpp" #include "utilities/debug.hpp" -#include "memory/allocation.inline.hpp" // Implements the common base functionality for arrays that contain provisions // for accessing its elements using a biased index. @@ -48,11 +48,7 @@ _bias(0), _shift_by(0) { } // Allocate a new array, generic version. - static address create_new_base_array(size_t length, size_t elem_size) { - assert(length > 0, "just checking"); - assert(elem_size > 0, "just checking"); - return NEW_C_HEAP_ARRAY(u_char, length * elem_size, mtGC); - } + static address create_new_base_array(size_t length, size_t elem_size); // Initialize the members of this class. The biased start address of this array // is the bias (in elements) multiplied by the element size. diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/gc_implementation/g1/g1CodeCacheRemSet.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/g1/g1CodeCacheRemSet.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -0,0 +1,317 @@ +/* + * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + + +#include "precompiled.hpp" +#include "code/nmethod.hpp" +#include "gc_implementation/g1/g1CodeCacheRemSet.hpp" +#include "memory/iterator.hpp" + +G1CodeRootChunk::G1CodeRootChunk() : _top(NULL), _next(NULL), _prev(NULL) { + _top = bottom(); +} + +void G1CodeRootChunk::reset() { + _next = _prev = NULL; + _top = bottom(); +} + +void G1CodeRootChunk::nmethods_do(CodeBlobClosure* cl) { + nmethod** cur = bottom(); + while (cur != _top) { + cl->do_code_blob(*cur); + cur++; + } +} + +FreeList G1CodeRootSet::_free_list; +size_t G1CodeRootSet::_num_chunks_handed_out = 0; + +G1CodeRootChunk* G1CodeRootSet::new_chunk() { + G1CodeRootChunk* result = _free_list.get_chunk_at_head(); + if (result == NULL) { + result = new G1CodeRootChunk(); + } + G1CodeRootSet::_num_chunks_handed_out++; + result->reset(); + return result; +} + +void G1CodeRootSet::free_chunk(G1CodeRootChunk* chunk) { + _free_list.return_chunk_at_head(chunk); + G1CodeRootSet::_num_chunks_handed_out--; +} + +void G1CodeRootSet::free_all_chunks(FreeList* list) { + G1CodeRootSet::_num_chunks_handed_out -= list->count(); + _free_list.prepend(list); +} + +void G1CodeRootSet::purge_chunks(size_t keep_ratio) { + size_t keep = G1CodeRootSet::_num_chunks_handed_out * keep_ratio / 100; + + if (keep >= (size_t)_free_list.count()) { + return; + } + + FreeList temp; + temp.initialize(); + temp.set_size(G1CodeRootChunk::word_size()); + + _free_list.getFirstNChunksFromList((size_t)_free_list.count() - keep, &temp); + + G1CodeRootChunk* cur = temp.get_chunk_at_head(); + while (cur != NULL) { + delete cur; + cur = temp.get_chunk_at_head(); + } +} + +size_t G1CodeRootSet::static_mem_size() { + return sizeof(_free_list) + sizeof(_num_chunks_handed_out); +} + +size_t G1CodeRootSet::fl_mem_size() { + return _free_list.count() * _free_list.size(); +} + +void G1CodeRootSet::initialize() { + _free_list.initialize(); + _free_list.set_size(G1CodeRootChunk::word_size()); +} + +G1CodeRootSet::G1CodeRootSet() : _list(), _length(0) { + _list.initialize(); + _list.set_size(G1CodeRootChunk::word_size()); +} + +G1CodeRootSet::~G1CodeRootSet() { + clear(); +} + +void G1CodeRootSet::add(nmethod* method) { + if (!contains(method)) { + // Try to add the nmethod. If there is not enough space, get a new chunk. + if (_list.head() == NULL || _list.head()->is_full()) { + G1CodeRootChunk* cur = new_chunk(); + _list.return_chunk_at_head(cur); + } + bool result = _list.head()->add(method); + guarantee(result, err_msg("Not able to add nmethod "PTR_FORMAT" to newly allocated chunk.", method)); + _length++; + } +} + +void G1CodeRootSet::remove(nmethod* method) { + G1CodeRootChunk* found = find(method); + if (found != NULL) { + bool result = found->remove(method); + guarantee(result, err_msg("could not find nmethod "PTR_FORMAT" during removal although we previously found it", method)); + // eventually free completely emptied chunk + if (found->is_empty()) { + _list.remove_chunk(found); + free(found); + } + _length--; + } + assert(!contains(method), err_msg(PTR_FORMAT" still contains nmethod "PTR_FORMAT, this, method)); +} + +nmethod* G1CodeRootSet::pop() { + do { + G1CodeRootChunk* cur = _list.head(); + if (cur == NULL) { + assert(_length == 0, "when there are no chunks, there should be no elements"); + return NULL; + } + nmethod* result = cur->pop(); + if (result != NULL) { + _length--; + return result; + } else { + free(_list.get_chunk_at_head()); + } + } while (true); +} + +G1CodeRootChunk* G1CodeRootSet::find(nmethod* method) { + G1CodeRootChunk* cur = _list.head(); + while (cur != NULL) { + if (cur->contains(method)) { + return cur; + } + cur = (G1CodeRootChunk*)cur->next(); + } + return NULL; +} + +void G1CodeRootSet::free(G1CodeRootChunk* chunk) { + free_chunk(chunk); +} + +bool G1CodeRootSet::contains(nmethod* method) { + return find(method) != NULL; +} + +void G1CodeRootSet::clear() { + free_all_chunks(&_list); + _length = 0; +} + +void G1CodeRootSet::nmethods_do(CodeBlobClosure* blk) const { + G1CodeRootChunk* cur = _list.head(); + while (cur != NULL) { + cur->nmethods_do(blk); + cur = (G1CodeRootChunk*)cur->next(); + } +} + +size_t G1CodeRootSet::mem_size() { + return sizeof(this) + _list.count() * _list.size(); +} + +#ifndef PRODUCT + +void G1CodeRootSet::test() { + initialize(); + + assert(_free_list.count() == 0, "Free List must be empty"); + assert(_num_chunks_handed_out == 0, "No elements must have been handed out yet"); + + // The number of chunks that we allocate for purge testing. + size_t const num_chunks = 10; + { + G1CodeRootSet set1; + assert(set1.is_empty(), "Code root set must be initially empty but is not."); + + set1.add((nmethod*)1); + assert(_num_chunks_handed_out == 1, + err_msg("Must have allocated and handed out one chunk, but handed out " + SIZE_FORMAT" chunks", _num_chunks_handed_out)); + assert(set1.length() == 1, err_msg("Added exactly one element, but set contains " + SIZE_FORMAT" elements", set1.length())); + + // G1CodeRootChunk::word_size() is larger than G1CodeRootChunk::num_entries which + // we cannot access. + for (uint i = 0; i < G1CodeRootChunk::word_size() + 1; i++) { + set1.add((nmethod*)1); + } + assert(_num_chunks_handed_out == 1, + err_msg("Duplicate detection must have prevented allocation of further " + "chunks but contains "SIZE_FORMAT, _num_chunks_handed_out)); + assert(set1.length() == 1, + err_msg("Duplicate detection should not have increased the set size but " + "is "SIZE_FORMAT, set1.length())); + + size_t num_total_after_add = G1CodeRootChunk::word_size() + 1; + for (size_t i = 0; i < num_total_after_add - 1; i++) { + set1.add((nmethod*)(2 + i)); + } + assert(_num_chunks_handed_out > 1, + "After adding more code roots, more than one chunks should have been handed out"); + assert(set1.length() == num_total_after_add, + err_msg("After adding in total "SIZE_FORMAT" distinct code roots, they " + "need to be in the set, but there are only "SIZE_FORMAT, + num_total_after_add, set1.length())); + + size_t num_popped = 0; + while (set1.pop() != NULL) { + num_popped++; + } + assert(num_popped == num_total_after_add, + err_msg("Managed to pop "SIZE_FORMAT" code roots, but only "SIZE_FORMAT" " + "were added", num_popped, num_total_after_add)); + assert(_num_chunks_handed_out == 0, + err_msg("After popping all elements, all chunks must have been returned " + "but are still "SIZE_FORMAT, _num_chunks_handed_out)); + + purge_chunks(0); + assert(_free_list.count() == 0, + err_msg("After purging everything, the free list must be empty but still " + "contains "SIZE_FORMAT" chunks", _free_list.count())); + + // Add some more handed out chunks. + size_t i = 0; + while (_num_chunks_handed_out < num_chunks) { + set1.add((nmethod*)i); + i++; + } + + { + // Generate chunks on the free list. + G1CodeRootSet set2; + size_t i = 0; + while (_num_chunks_handed_out < num_chunks * 2) { + set2.add((nmethod*)i); + i++; + } + // Exit of the scope of the set2 object will call the destructor that generates + // num_chunks elements on the free list. + } + + assert(_num_chunks_handed_out == num_chunks, + err_msg("Deletion of the second set must have resulted in giving back " + "those, but there is still "SIZE_FORMAT" handed out, expecting " + SIZE_FORMAT, _num_chunks_handed_out, num_chunks)); + assert((size_t)_free_list.count() == num_chunks, + err_msg("After freeing "SIZE_FORMAT" chunks, they must be on the free list " + "but there are only "SIZE_FORMAT, num_chunks, _free_list.count())); + + size_t const test_percentage = 50; + purge_chunks(test_percentage); + assert(_num_chunks_handed_out == num_chunks, + err_msg("Purging must not hand out chunks but there are "SIZE_FORMAT, + _num_chunks_handed_out)); + assert((size_t)_free_list.count() == (ssize_t)(num_chunks * test_percentage / 100), + err_msg("Must have purged "SIZE_FORMAT" percent of "SIZE_FORMAT" chunks" + "but there are "SSIZE_FORMAT, test_percentage, num_chunks, + _free_list.count())); + // Purge the remainder of the chunks on the free list. + purge_chunks(0); + assert(_free_list.count() == 0, "Free List must be empty"); + assert(_num_chunks_handed_out == num_chunks, + err_msg("Expected to be "SIZE_FORMAT" chunks handed out from the first set " + "but there are "SIZE_FORMAT, num_chunks, _num_chunks_handed_out)); + + // Exit of the scope of the set1 object will call the destructor that generates + // num_chunks additional elements on the free list. + } + + assert(_num_chunks_handed_out == 0, + err_msg("Deletion of the only set must have resulted in no chunks handed " + "out, but there is still "SIZE_FORMAT" handed out", _num_chunks_handed_out)); + assert((size_t)_free_list.count() == num_chunks, + err_msg("After freeing "SIZE_FORMAT" chunks, they must be on the free list " + "but there are only "SSIZE_FORMAT, num_chunks, _free_list.count())); + + // Restore initial state. + purge_chunks(0); + assert(_free_list.count() == 0, "Free List must be empty"); + assert(_num_chunks_handed_out == 0, "No elements must have been handed out yet"); +} + +void TestCodeCacheRemSet_test() { + G1CodeRootSet::test(); +} +#endif diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/gc_implementation/g1/g1CodeCacheRemSet.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/g1/g1CodeCacheRemSet.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -0,0 +1,188 @@ +/* + * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_VM_GC_IMPLEMENTATION_G1_G1CODECACHEREMSET_HPP +#define SHARE_VM_GC_IMPLEMENTATION_G1_G1CODECACHEREMSET_HPP + +#include "memory/allocation.hpp" +#include "memory/freeList.hpp" +#include "runtime/globals.hpp" + +class CodeBlobClosure; + +class G1CodeRootChunk : public CHeapObj { + private: + static const int NUM_ENTRIES = 32; + public: + G1CodeRootChunk* _next; + G1CodeRootChunk* _prev; + + nmethod** _top; + + nmethod* _data[NUM_ENTRIES]; + + nmethod** bottom() const { + return (nmethod**) &(_data[0]); + } + + nmethod** end() const { + return (nmethod**) &(_data[NUM_ENTRIES]); + } + + public: + G1CodeRootChunk(); + ~G1CodeRootChunk() {} + + static size_t word_size() { return (size_t)(align_size_up_(sizeof(G1CodeRootChunk), HeapWordSize) / HeapWordSize); } + + // FreeList "interface" methods + + G1CodeRootChunk* next() const { return _next; } + G1CodeRootChunk* prev() const { return _prev; } + void set_next(G1CodeRootChunk* v) { _next = v; assert(v != this, "Boom");} + void set_prev(G1CodeRootChunk* v) { _prev = v; assert(v != this, "Boom");} + void clear_next() { set_next(NULL); } + void clear_prev() { set_prev(NULL); } + + size_t size() const { return word_size(); } + + void link_next(G1CodeRootChunk* ptr) { set_next(ptr); } + void link_prev(G1CodeRootChunk* ptr) { set_prev(ptr); } + void link_after(G1CodeRootChunk* ptr) { + link_next(ptr); + if (ptr != NULL) ptr->link_prev((G1CodeRootChunk*)this); + } + + bool is_free() { return true; } + + // New G1CodeRootChunk routines + + void reset(); + + bool is_empty() const { + return _top == bottom(); + } + + bool is_full() const { + return _top == (nmethod**)end(); + } + + bool contains(nmethod* method) { + nmethod** cur = bottom(); + while (cur != _top) { + if (*cur == method) return true; + cur++; + } + return false; + } + + bool add(nmethod* method) { + if (is_full()) return false; + *_top = method; + _top++; + return true; + } + + bool remove(nmethod* method) { + nmethod** cur = bottom(); + while (cur != _top) { + if (*cur == method) { + memmove(cur, cur + 1, (_top - (cur + 1)) * sizeof(nmethod**)); + _top--; + return true; + } + cur++; + } + return false; + } + + void nmethods_do(CodeBlobClosure* blk); + + nmethod* pop() { + if (is_empty()) { + return NULL; + } + _top--; + return *_top; + } +}; + +// Implements storage for a set of code roots. +// All methods that modify the set are not thread-safe except if otherwise noted. +class G1CodeRootSet VALUE_OBJ_CLASS_SPEC { + private: + // Global free chunk list management + static FreeList _free_list; + // Total number of chunks handed out + static size_t _num_chunks_handed_out; + + static G1CodeRootChunk* new_chunk(); + static void free_chunk(G1CodeRootChunk* chunk); + // Free all elements of the given list. + static void free_all_chunks(FreeList* list); + + // Return the chunk that contains the given nmethod, NULL otherwise. + // Scans the list of chunks backwards, as this method is used to add new + // entries, which are typically added in bulk for a single nmethod. + G1CodeRootChunk* find(nmethod* method); + void free(G1CodeRootChunk* chunk); + + size_t _length; + FreeList _list; + + public: + G1CodeRootSet(); + ~G1CodeRootSet(); + + static void initialize(); + static void purge_chunks(size_t keep_ratio); + + static size_t static_mem_size(); + static size_t fl_mem_size(); + + // Search for the code blob from the recently allocated ones to find duplicates more quickly, as this + // method is likely to be repeatedly called with the same nmethod. + void add(nmethod* method); + + void remove(nmethod* method); + nmethod* pop(); + + bool contains(nmethod* method); + + void clear(); + + void nmethods_do(CodeBlobClosure* blk) const; + + bool is_empty() { return length() == 0; } + + // Length in elements + size_t length() const { return _length; } + + // Memory size in bytes taken by this set. + size_t mem_size(); + + static void test() PRODUCT_RETURN; +}; + +#endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1CODECACHEREMSET_HPP diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -39,6 +39,7 @@ #include "gc_implementation/g1/g1MarkSweep.hpp" #include "gc_implementation/g1/g1OopClosures.inline.hpp" #include "gc_implementation/g1/g1RemSet.inline.hpp" +#include "gc_implementation/g1/g1StringDedup.hpp" #include "gc_implementation/g1/g1YCTypes.hpp" #include "gc_implementation/g1/heapRegion.inline.hpp" #include "gc_implementation/g1/heapRegionRemSet.hpp" @@ -169,14 +170,6 @@ int calls() { return _calls; } }; -class RedirtyLoggedCardTableEntryFastClosure : public CardTableEntryClosure { -public: - bool do_card_ptr(jbyte* card_ptr, int worker_i) { - *card_ptr = CardTableModRefBS::dirty_card_val(); - return true; - } -}; - YoungList::YoungList(G1CollectedHeap* g1h) : _g1h(g1h), _head(NULL), _length(0), _last_sampled_rs_lengths(0), _survivor_head(NULL), _survivor_tail(NULL), _survivor_length(0) { @@ -524,7 +517,7 @@ // Private methods. HeapRegion* -G1CollectedHeap::new_region_try_secondary_free_list() { +G1CollectedHeap::new_region_try_secondary_free_list(bool is_old) { MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag); while (!_secondary_free_list.is_empty() || free_regions_coming()) { if (!_secondary_free_list.is_empty()) { @@ -540,7 +533,7 @@ assert(!_free_list.is_empty(), "if the secondary_free_list was not " "empty we should have moved at least one entry to the free_list"); - HeapRegion* res = _free_list.remove_head(); + HeapRegion* res = _free_list.remove_region(is_old); if (G1ConcRegionFreeingVerbose) { gclog_or_tty->print_cr("G1ConcRegionFreeing [region alloc] : " "allocated "HR_FORMAT" from secondary_free_list", @@ -562,7 +555,7 @@ return NULL; } -HeapRegion* G1CollectedHeap::new_region(size_t word_size, bool do_expand) { +HeapRegion* G1CollectedHeap::new_region(size_t word_size, bool is_old, bool do_expand) { assert(!isHumongous(word_size) || word_size <= HeapRegion::GrainWords, "the only time we use this to allocate a humongous region is " "when we are allocating a single humongous region"); @@ -574,19 +567,21 @@ gclog_or_tty->print_cr("G1ConcRegionFreeing [region alloc] : " "forced to look at the secondary_free_list"); } - res = new_region_try_secondary_free_list(); + res = new_region_try_secondary_free_list(is_old); if (res != NULL) { return res; } } } - res = _free_list.remove_head_or_null(); + + res = _free_list.remove_region(is_old); + if (res == NULL) { if (G1ConcRegionFreeingVerbose) { gclog_or_tty->print_cr("G1ConcRegionFreeing [region alloc] : " "res == NULL, trying the secondary_free_list"); } - res = new_region_try_secondary_free_list(); + res = new_region_try_secondary_free_list(is_old); } if (res == NULL && do_expand && _expand_heap_after_alloc_failure) { // Currently, only attempts to allocate GC alloc regions set @@ -603,12 +598,9 @@ if (expand(word_size * HeapWordSize)) { // Given that expand() succeeded in expanding the heap, and we // always expand the heap by an amount aligned to the heap - // region size, the free list should in theory not be empty. So - // it would probably be OK to use remove_head(). But the extra - // check for NULL is unlikely to be a performance issue here (we - // just expanded the heap!) so let's just be conservative and - // use remove_head_or_null(). - res = _free_list.remove_head_or_null(); + // region size, the free list should in theory not be empty. + // In either case remove_region() will check for NULL. + res = _free_list.remove_region(is_old); } else { _expand_heap_after_alloc_failure = false; } @@ -626,7 +618,7 @@ // Only one region to allocate, no need to go through the slower // path. The caller will attempt the expansion if this fails, so // let's not try to expand here too. - HeapRegion* hr = new_region(word_size, false /* do_expand */); + HeapRegion* hr = new_region(word_size, true /* is_old */, false /* do_expand */); if (hr != NULL) { first = hr->hrs_index(); } else { @@ -1298,7 +1290,6 @@ size_t metadata_prev_used = MetaspaceAux::allocated_used_bytes(); - HRSPhaseSetter x(HRSPhaseFullGC); verify_region_sets_optional(); const bool do_clear_all_soft_refs = clear_all_soft_refs || @@ -1928,10 +1919,10 @@ _g1mm(NULL), _refine_cte_cl(NULL), _full_collection(false), - _free_list("Master Free List"), - _secondary_free_list("Secondary Free List"), - _old_set("Old Set"), - _humongous_set("Master Humongous Set"), + _free_list("Master Free List", new MasterFreeRegionListMtSafeChecker()), + _secondary_free_list("Secondary Free List", new SecondaryFreeRegionListMtSafeChecker()), + _old_set("Old Set", false /* humongous */, new OldRegionSetMtSafeChecker()), + _humongous_set("Master Humongous Set", true /* humongous */, new HumongousRegionSetMtSafeChecker()), _free_regions_coming(false), _young_list(new YoungList(this)), _gc_time_stamp(0), @@ -1963,7 +1954,7 @@ int n_queues = MAX2((int)ParallelGCThreads, 1); _task_queues = new RefToScanQueueSet(n_queues); - int n_rem_sets = HeapRegionRemSet::num_par_rem_sets(); + uint n_rem_sets = HeapRegionRemSet::num_par_rem_sets(); assert(n_rem_sets > 0, "Invariant."); _worker_cset_start_region = NEW_C_HEAP_ARRAY(HeapRegion*, n_queues, mtGC); @@ -2079,7 +2070,7 @@ guarantee(HeapRegion::CardsPerRegion < max_cards_per_region, "too many cards per region"); - HeapRegionSet::set_unrealistically_long_length(max_regions() + 1); + FreeRegionList::set_unrealistically_long_length(max_regions() + 1); _bot_shared = new G1BlockOffsetSharedArray(_reserved, heap_word_size(init_byte_size)); @@ -2182,6 +2173,8 @@ // values in the heap have been properly initialized. _g1mm = new G1MonitoringSupport(this); + G1StringDedup::initialize(); + return JNI_OK; } @@ -2266,7 +2259,7 @@ // (for efficiency/performance) false); // Setting next fields of discovered - // lists requires a barrier. + // lists does not require a barrier. } size_t G1CollectedHeap::capacity() const { @@ -2369,8 +2362,12 @@ }; size_t G1CollectedHeap::recalculate_used() const { + double recalculate_used_start = os::elapsedTime(); + SumUsedClosure blk; heap_region_iterate(&blk); + + g1_policy()->phase_times()->record_evac_fail_recalc_used_time((os::elapsedTime() - recalculate_used_start) * 1000.0); return blk.result(); } @@ -3013,7 +3010,17 @@ } size_t G1CollectedHeap::tlab_capacity(Thread* ignored) const { - return HeapRegion::GrainBytes; + return (_g1_policy->young_list_target_length() - young_list()->survivor_length()) * HeapRegion::GrainBytes; +} + +size_t G1CollectedHeap::tlab_used(Thread* ignored) const { + return young_list()->eden_used_bytes(); +} + +// For G1 TLABs should not contain humongous objects, so the maximum TLAB size +// must be smaller than the humongous object limit. +size_t G1CollectedHeap::max_tlab_size() const { + return align_size_down(_humongous_object_threshold_in_words - 1, MinObjAlignment); } size_t G1CollectedHeap::unsafe_max_tlab_alloc(Thread* ignored) const { @@ -3025,11 +3032,11 @@ // humongous objects. HeapRegion* hr = _mutator_alloc_region.get(); - size_t max_tlab_size = _humongous_object_threshold_in_words * wordSize; + size_t max_tlab = max_tlab_size() * wordSize; if (hr == NULL) { - return max_tlab_size; + return max_tlab; } else { - return MIN2(MAX2(hr->free(), (size_t) MinTLABSize), max_tlab_size); + return MIN2(MAX2(hr->free(), (size_t) MinTLABSize), max_tlab); } } @@ -3470,6 +3477,11 @@ if (!silent) gclog_or_tty->print("RemSet "); rem_set()->verify(); + if (G1StringDedup::is_enabled()) { + if (!silent) gclog_or_tty->print("StrDedup "); + G1StringDedup::verify(); + } + if (failures) { gclog_or_tty->print_cr("Heap:"); // It helps to have the per-region information in the output to @@ -3487,8 +3499,13 @@ } guarantee(!failures, "there should not have been any failures"); } else { - if (!silent) - gclog_or_tty->print("(SKIPPING roots, heapRegionSets, heapRegions, remset) "); + if (!silent) { + gclog_or_tty->print("(SKIPPING Roots, HeapRegionSets, HeapRegions, RemSet"); + if (G1StringDedup::is_enabled()) { + gclog_or_tty->print(", StrDedup"); + } + gclog_or_tty->print(") "); + } } } @@ -3581,6 +3598,9 @@ st->cr(); _cm->print_worker_threads_on(st); _cg1r->print_worker_threads_on(st); + if (G1StringDedup::is_enabled()) { + G1StringDedup::print_worker_threads_on(st); + } } void G1CollectedHeap::gc_threads_do(ThreadClosure* tc) const { @@ -3589,6 +3609,9 @@ } tc->do_thread(_cmThread); _cg1r->threads_do(tc); + if (G1StringDedup::is_enabled()) { + G1StringDedup::threads_do(tc); + } } void G1CollectedHeap::print_tracing_info() const { @@ -3668,6 +3691,7 @@ // always_do_update_barrier = false; assert(InlineCacheBuffer::is_empty(), "should have cleaned up ICBuffer"); // Fill TLAB's and such + accumulate_statistics_all_tlabs(); ensure_parsability(true); if (G1SummarizeRSetStats && (G1SummarizeRSetStatsPeriod > 0) && @@ -3692,6 +3716,8 @@ "derived pointer present")); // always_do_update_barrier = true; + resize_all_tlabs(); + // We have just completed a GC. Update the soft reference // policy with the new heap occupancy Universe::update_heap_info_at_gc(); @@ -3892,7 +3918,6 @@ print_heap_before_gc(); trace_heap_before_gc(_gc_tracer_stw); - HRSPhaseSetter x(HRSPhaseEvacuation); verify_region_sets_optional(); verify_dirty_young_regions(); @@ -4391,6 +4416,8 @@ void G1CollectedHeap::remove_self_forwarding_pointers() { assert(check_cset_heap_region_claim_values(HeapRegion::InitialClaimValue), "sanity"); + double remove_self_forwards_start = os::elapsedTime(); + G1ParRemoveSelfForwardPtrsTask rsfp_task(this); if (G1CollectedHeap::use_parallel_gc_threads()) { @@ -4418,6 +4445,8 @@ } _objs_with_preserved_marks.clear(true); _preserved_marks_of_objs.clear(true); + + g1_policy()->phase_times()->record_evac_fail_remove_self_forwards((os::elapsedTime() - remove_self_forwards_start) * 1000.0); } void G1CollectedHeap::push_on_evac_failure_scan_stack(oop obj) { @@ -4639,9 +4668,7 @@ #endif // ASSERT void G1ParScanThreadState::trim_queue() { - assert(_evac_cl != NULL, "not set"); assert(_evac_failure_cl != NULL, "not set"); - assert(_partial_scan_cl != NULL, "not set"); StarTask ref; do { @@ -4732,6 +4759,12 @@ oop forward_ptr = old->forward_to_atomic(obj); if (forward_ptr == NULL) { Copy::aligned_disjoint_words((HeapWord*) old, obj_ptr, word_sz); + + // alloc_purpose is just a hint to allocate() above, recheck the type of region + // we actually allocated from and update alloc_purpose accordingly + HeapRegion* to_region = _g1h->heap_region_containing_raw(obj_ptr); + alloc_purpose = to_region->is_young() ? GCAllocForSurvived : GCAllocForTenured; + if (g1p->track_object_age(alloc_purpose)) { // We could simply do obj->incr_age(). However, this causes a // performance issue. obj->incr_age() will first check whether @@ -4759,6 +4792,13 @@ obj->set_mark(m); } + if (G1StringDedup::is_enabled()) { + G1StringDedup::enqueue_from_evacuation(from_region->is_young(), + to_region->is_young(), + queue_num(), + obj); + } + size_t* surv_young_words = surviving_young_words(); surv_young_words[young_index] += word_sz; @@ -4837,55 +4877,6 @@ template void G1ParCopyClosure::do_oop_work(oop* p); template void G1ParCopyClosure::do_oop_work(narrowOop* p); -template void G1ParScanPartialArrayClosure::do_oop_nv(T* p) { - assert(has_partial_array_mask(p), "invariant"); - oop from_obj = clear_partial_array_mask(p); - - assert(Universe::heap()->is_in_reserved(from_obj), "must be in heap."); - assert(from_obj->is_objArray(), "must be obj array"); - objArrayOop from_obj_array = objArrayOop(from_obj); - // The from-space object contains the real length. - int length = from_obj_array->length(); - - assert(from_obj->is_forwarded(), "must be forwarded"); - oop to_obj = from_obj->forwardee(); - assert(from_obj != to_obj, "should not be chunking self-forwarded objects"); - objArrayOop to_obj_array = objArrayOop(to_obj); - // We keep track of the next start index in the length field of the - // to-space object. - int next_index = to_obj_array->length(); - assert(0 <= next_index && next_index < length, - err_msg("invariant, next index: %d, length: %d", next_index, length)); - - int start = next_index; - int end = length; - int remainder = end - start; - // We'll try not to push a range that's smaller than ParGCArrayScanChunk. - if (remainder > 2 * ParGCArrayScanChunk) { - end = start + ParGCArrayScanChunk; - to_obj_array->set_length(end); - // Push the remainder before we process the range in case another - // worker has run out of things to do and can steal it. - oop* from_obj_p = set_partial_array_mask(from_obj); - _par_scan_state->push_on_queue(from_obj_p); - } else { - assert(length == end, "sanity"); - // We'll process the final range for this object. Restore the length - // so that the heap remains parsable in case of evacuation failure. - to_obj_array->set_length(end); - } - _scanner.set_region(_g1->heap_region_containing_raw(to_obj)); - // Process indexes [start,end). It will also process the header - // along with the first chunk (i.e., the chunk with start == 0). - // Note that at this point the length field of to_obj_array is not - // correct given that we are using it to keep track of the next - // start index. oop_iterate_range() (thankfully!) ignores the length - // field and only relies on the start / end parameters. It does - // however return the size of the object which will be incorrect. So - // we have to ignore it even if we wanted to use it. - to_obj_array->oop_iterate_range(&_scanner, start, end); -} - class G1ParEvacuateFollowersClosure : public VoidClosure { protected: G1CollectedHeap* _g1h; @@ -5027,13 +5018,9 @@ ReferenceProcessor* rp = _g1h->ref_processor_stw(); G1ParScanThreadState pss(_g1h, worker_id, rp); - G1ParScanHeapEvacClosure scan_evac_cl(_g1h, &pss, rp); G1ParScanHeapEvacFailureClosure evac_failure_cl(_g1h, &pss, rp); - G1ParScanPartialArrayClosure partial_scan_cl(_g1h, &pss, rp); - - pss.set_evac_closure(&scan_evac_cl); + pss.set_evac_failure_closure(&evac_failure_cl); - pss.set_partial_scan_closure(&partial_scan_cl); G1ParScanExtRootClosure only_scan_root_cl(_g1h, &pss, rp); G1ParScanMetadataClosure only_scan_metadata_cl(_g1h, &pss, rp); @@ -5287,6 +5274,33 @@ g1_unlink_task.strings_processed(), g1_unlink_task.strings_removed(), g1_unlink_task.symbols_processed(), g1_unlink_task.symbols_removed()); } + + if (G1StringDedup::is_enabled()) { + G1StringDedup::unlink(is_alive); + } +} + +class RedirtyLoggedCardTableEntryFastClosure : public CardTableEntryClosure { +public: + bool do_card_ptr(jbyte* card_ptr, int worker_i) { + *card_ptr = CardTableModRefBS::dirty_card_val(); + return true; + } +}; + +void G1CollectedHeap::redirty_logged_cards() { + guarantee(G1DeferredRSUpdate, "Must only be called when using deferred RS updates."); + double redirty_logged_cards_start = os::elapsedTime(); + + RedirtyLoggedCardTableEntryFastClosure redirty; + dirty_card_queue_set().set_closure(&redirty); + dirty_card_queue_set().apply_closure_to_all_completed_buffers(); + + DirtyCardQueueSet& dcq = JavaThread::dirty_card_queue_set(); + dcq.merge_bufferlists(&dirty_card_queue_set()); + assert(dirty_card_queue_set().completed_buffers_num() == 0, "All should be consumed"); + + g1_policy()->phase_times()->record_redirty_logged_cards_time_ms((os::elapsedTime() - redirty_logged_cards_start) * 1000.0); } // Weak Reference Processing support @@ -5470,14 +5484,9 @@ G1STWIsAliveClosure is_alive(_g1h); G1ParScanThreadState pss(_g1h, worker_id, NULL); - - G1ParScanHeapEvacClosure scan_evac_cl(_g1h, &pss, NULL); G1ParScanHeapEvacFailureClosure evac_failure_cl(_g1h, &pss, NULL); - G1ParScanPartialArrayClosure partial_scan_cl(_g1h, &pss, NULL); - - pss.set_evac_closure(&scan_evac_cl); + pss.set_evac_failure_closure(&evac_failure_cl); - pss.set_partial_scan_closure(&partial_scan_cl); G1ParScanExtRootClosure only_copy_non_heap_cl(_g1h, &pss, NULL); G1ParScanMetadataClosure only_copy_metadata_cl(_g1h, &pss, NULL); @@ -5582,13 +5591,9 @@ HandleMark hm; G1ParScanThreadState pss(_g1h, worker_id, NULL); - G1ParScanHeapEvacClosure scan_evac_cl(_g1h, &pss, NULL); G1ParScanHeapEvacFailureClosure evac_failure_cl(_g1h, &pss, NULL); - G1ParScanPartialArrayClosure partial_scan_cl(_g1h, &pss, NULL); - - pss.set_evac_closure(&scan_evac_cl); + pss.set_evac_failure_closure(&evac_failure_cl); - pss.set_partial_scan_closure(&partial_scan_cl); assert(pss.refs()->is_empty(), "both queue and overflow should be empty"); @@ -5712,13 +5717,9 @@ // We do not embed a reference processor in the copying/scanning // closures while we're actually processing the discovered // reference objects. - G1ParScanHeapEvacClosure scan_evac_cl(this, &pss, NULL); G1ParScanHeapEvacFailureClosure evac_failure_cl(this, &pss, NULL); - G1ParScanPartialArrayClosure partial_scan_cl(this, &pss, NULL); - - pss.set_evac_closure(&scan_evac_cl); + pss.set_evac_failure_closure(&evac_failure_cl); - pss.set_partial_scan_closure(&partial_scan_cl); assert(pss.refs()->is_empty(), "pre-condition"); @@ -5900,6 +5901,9 @@ G1STWIsAliveClosure is_alive(this); G1KeepAliveClosure keep_alive(this); JNIHandles::weak_oops_do(&is_alive, &keep_alive); + if (G1StringDedup::is_enabled()) { + G1StringDedup::unlink_or_oops_do(&is_alive, &keep_alive); + } } release_gc_alloc_regions(n_workers, evacuation_info); @@ -5917,6 +5921,8 @@ // strong code roots for a particular heap region. migrate_strong_code_roots(); + purge_code_root_memory(); + if (g1_policy()->during_initial_mark_pause()) { // Reset the claim values set during marking the strong code roots reset_heap_region_claim_values(); @@ -5943,41 +5949,15 @@ enqueue_discovered_references(n_workers); if (G1DeferredRSUpdate) { - RedirtyLoggedCardTableEntryFastClosure redirty; - dirty_card_queue_set().set_closure(&redirty); - dirty_card_queue_set().apply_closure_to_all_completed_buffers(); - - DirtyCardQueueSet& dcq = JavaThread::dirty_card_queue_set(); - dcq.merge_bufferlists(&dirty_card_queue_set()); - assert(dirty_card_queue_set().completed_buffers_num() == 0, "All should be consumed"); + redirty_logged_cards(); } COMPILER2_PRESENT(DerivedPointerTable::update_pointers()); } -void G1CollectedHeap::free_region_if_empty(HeapRegion* hr, - size_t* pre_used, - FreeRegionList* free_list, - OldRegionSet* old_proxy_set, - HumongousRegionSet* humongous_proxy_set, - HRRSCleanupTask* hrrs_cleanup_task, - bool par) { - if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) { - if (hr->isHumongous()) { - assert(hr->startsHumongous(), "we should only see starts humongous"); - free_humongous_region(hr, pre_used, free_list, humongous_proxy_set, par); - } else { - _old_set.remove_with_proxy(hr, old_proxy_set); - free_region(hr, pre_used, free_list, par); - } - } else { - hr->rem_set()->do_cleanup_work(hrrs_cleanup_task); - } -} - void G1CollectedHeap::free_region(HeapRegion* hr, - size_t* pre_used, FreeRegionList* free_list, - bool par) { + bool par, + bool locked) { assert(!hr->isHumongous(), "this is only for non-humongous regions"); assert(!hr->is_empty(), "the region should not be empty"); assert(free_list != NULL, "pre-condition"); @@ -5988,70 +5968,56 @@ if (!hr->is_young()) { _cg1r->hot_card_cache()->reset_card_counts(hr); } - *pre_used += hr->used(); - hr->hr_clear(par, true /* clear_space */); - free_list->add_as_head(hr); + hr->hr_clear(par, true /* clear_space */, locked /* locked */); + free_list->add_ordered(hr); } void G1CollectedHeap::free_humongous_region(HeapRegion* hr, - size_t* pre_used, FreeRegionList* free_list, - HumongousRegionSet* humongous_proxy_set, bool par) { assert(hr->startsHumongous(), "this is only for starts humongous regions"); assert(free_list != NULL, "pre-condition"); - assert(humongous_proxy_set != NULL, "pre-condition"); - - size_t hr_used = hr->used(); + size_t hr_capacity = hr->capacity(); - size_t hr_pre_used = 0; - _humongous_set.remove_with_proxy(hr, humongous_proxy_set); // We need to read this before we make the region non-humongous, // otherwise the information will be gone. uint last_index = hr->last_hc_index(); hr->set_notHumongous(); - free_region(hr, &hr_pre_used, free_list, par); + free_region(hr, free_list, par); uint i = hr->hrs_index() + 1; while (i < last_index) { HeapRegion* curr_hr = region_at(i); assert(curr_hr->continuesHumongous(), "invariant"); curr_hr->set_notHumongous(); - free_region(curr_hr, &hr_pre_used, free_list, par); + free_region(curr_hr, free_list, par); i += 1; } - assert(hr_pre_used == hr_used, - err_msg("hr_pre_used: "SIZE_FORMAT" and hr_used: "SIZE_FORMAT" " - "should be the same", hr_pre_used, hr_used)); - *pre_used += hr_pre_used; -} - -void G1CollectedHeap::update_sets_after_freeing_regions(size_t pre_used, - FreeRegionList* free_list, - OldRegionSet* old_proxy_set, - HumongousRegionSet* humongous_proxy_set, - bool par) { - if (pre_used > 0) { - Mutex* lock = (par) ? ParGCRareEvent_lock : NULL; - MutexLockerEx x(lock, Mutex::_no_safepoint_check_flag); - assert(_summary_bytes_used >= pre_used, - err_msg("invariant: _summary_bytes_used: "SIZE_FORMAT" " - "should be >= pre_used: "SIZE_FORMAT, - _summary_bytes_used, pre_used)); - _summary_bytes_used -= pre_used; - } - if (free_list != NULL && !free_list->is_empty()) { +} + +void G1CollectedHeap::remove_from_old_sets(const HeapRegionSetCount& old_regions_removed, + const HeapRegionSetCount& humongous_regions_removed) { + if (old_regions_removed.length() > 0 || humongous_regions_removed.length() > 0) { + MutexLockerEx x(OldSets_lock, Mutex::_no_safepoint_check_flag); + _old_set.bulk_remove(old_regions_removed); + _humongous_set.bulk_remove(humongous_regions_removed); + } + +} + +void G1CollectedHeap::prepend_to_freelist(FreeRegionList* list) { + assert(list != NULL, "list can't be null"); + if (!list->is_empty()) { MutexLockerEx x(FreeList_lock, Mutex::_no_safepoint_check_flag); - _free_list.add_as_head(free_list); - } - if (old_proxy_set != NULL && !old_proxy_set->is_empty()) { - MutexLockerEx x(OldSets_lock, Mutex::_no_safepoint_check_flag); - _old_set.update_from_proxy(old_proxy_set); - } - if (humongous_proxy_set != NULL && !humongous_proxy_set->is_empty()) { - MutexLockerEx x(OldSets_lock, Mutex::_no_safepoint_check_flag); - _humongous_set.update_from_proxy(humongous_proxy_set); - } + _free_list.add_ordered(list); + } +} + +void G1CollectedHeap::decrement_summary_bytes(size_t bytes) { + assert(_summary_bytes_used >= bytes, + err_msg("invariant: _summary_bytes_used: "SIZE_FORMAT" should be >= bytes: "SIZE_FORMAT, + _summary_bytes_used, bytes)); + _summary_bytes_used -= bytes; } class G1ParCleanupCTTask : public AbstractGangTask { @@ -6211,7 +6177,7 @@ } } - rs_lengths += cur->rem_set()->occupied(); + rs_lengths += cur->rem_set()->occupied_locked(); HeapRegion* next = cur->next_in_collection_set(); assert(cur->in_collection_set(), "bad CS"); @@ -6244,7 +6210,8 @@ // And the region is empty. assert(!used_mr.is_empty(), "Should not have empty regions in a CS."); - free_region(cur, &pre_used, &local_free_list, false /* par */); + pre_used += cur->used(); + free_region(cur, &local_free_list, false /* par */, true /* locked */); } else { cur->uninstall_surv_rate_group(); if (cur->is_young()) { @@ -6272,10 +6239,8 @@ young_time_ms += elapsed_ms; } - update_sets_after_freeing_regions(pre_used, &local_free_list, - NULL /* old_proxy_set */, - NULL /* humongous_proxy_set */, - false /* par */); + prepend_to_freelist(&local_free_list); + decrement_summary_bytes(pre_used); policy->phase_times()->record_young_free_cset_time_ms(young_time_ms); policy->phase_times()->record_non_young_free_cset_time_ms(non_young_time_ms); } @@ -6387,10 +6352,10 @@ class TearDownRegionSetsClosure : public HeapRegionClosure { private: - OldRegionSet *_old_set; + HeapRegionSet *_old_set; public: - TearDownRegionSetsClosure(OldRegionSet* old_set) : _old_set(old_set) { } + TearDownRegionSetsClosure(HeapRegionSet* old_set) : _old_set(old_set) { } bool doHeapRegion(HeapRegion* r) { if (r->is_empty()) { @@ -6419,9 +6384,10 @@ TearDownRegionSetsClosure cl(&_old_set); heap_region_iterate(&cl); - // Need to do this after the heap iteration to be able to - // recognize the young regions and ignore them during the iteration. - _young_list->empty_list(); + // Note that emptying the _young_list is postponed and instead done as + // the first step when rebuilding the regions sets again. The reason for + // this is that during a full GC string deduplication needs to know if + // a collected region was young or old when the full GC was initiated. } _free_list.remove_all(); } @@ -6429,13 +6395,13 @@ class RebuildRegionSetsClosure : public HeapRegionClosure { private: bool _free_list_only; - OldRegionSet* _old_set; + HeapRegionSet* _old_set; FreeRegionList* _free_list; size_t _total_used; public: RebuildRegionSetsClosure(bool free_list_only, - OldRegionSet* old_set, FreeRegionList* free_list) : + HeapRegionSet* old_set, FreeRegionList* free_list) : _free_list_only(free_list_only), _old_set(old_set), _free_list(free_list), _total_used(0) { assert(_free_list->is_empty(), "pre-condition"); @@ -6475,6 +6441,10 @@ void G1CollectedHeap::rebuild_region_sets(bool free_list_only) { assert_at_safepoint(true /* should_be_vm_thread */); + if (!free_list_only) { + _young_list->empty_list(); + } + RebuildRegionSetsClosure cl(free_list_only, &_old_set, &_free_list); heap_region_iterate(&cl); @@ -6510,6 +6480,7 @@ bool young_list_full = g1_policy()->is_young_list_full(); if (force || !young_list_full) { HeapRegion* new_alloc_region = new_region(word_size, + false /* is_old */, false /* do_expand */); if (new_alloc_region != NULL) { set_region_short_lived_locked(new_alloc_region); @@ -6568,14 +6539,16 @@ assert(FreeList_lock->owned_by_self(), "pre-condition"); if (count < g1_policy()->max_regions(ap)) { + bool survivor = (ap == GCAllocForSurvived); HeapRegion* new_alloc_region = new_region(word_size, + !survivor, true /* do_expand */); if (new_alloc_region != NULL) { // We really only need to do this for old regions given that we // should never scan survivors. But it doesn't hurt to do it // for survivors too. new_alloc_region->set_saved_mark(); - if (ap == GCAllocForSurvived) { + if (survivor) { new_alloc_region->set_survivor(); _hr_printer.alloc(new_alloc_region, G1HRPrinter::Survivor); } else { @@ -6632,23 +6605,22 @@ class VerifyRegionListsClosure : public HeapRegionClosure { private: - FreeRegionList* _free_list; - OldRegionSet* _old_set; - HumongousRegionSet* _humongous_set; - uint _region_count; + HeapRegionSet* _old_set; + HeapRegionSet* _humongous_set; + FreeRegionList* _free_list; public: - VerifyRegionListsClosure(OldRegionSet* old_set, - HumongousRegionSet* humongous_set, + HeapRegionSetCount _old_count; + HeapRegionSetCount _humongous_count; + HeapRegionSetCount _free_count; + + VerifyRegionListsClosure(HeapRegionSet* old_set, + HeapRegionSet* humongous_set, FreeRegionList* free_list) : - _old_set(old_set), _humongous_set(humongous_set), - _free_list(free_list), _region_count(0) { } - - uint region_count() { return _region_count; } + _old_set(old_set), _humongous_set(humongous_set), _free_list(free_list), + _old_count(), _humongous_count(), _free_count(){ } bool doHeapRegion(HeapRegion* hr) { - _region_count += 1; - if (hr->continuesHumongous()) { return false; } @@ -6656,14 +6628,31 @@ if (hr->is_young()) { // TODO } else if (hr->startsHumongous()) { - _humongous_set->verify_next_region(hr); + assert(hr->containing_set() == _humongous_set, err_msg("Heap region %u is starts humongous but not in humongous set.", hr->region_num())); + _humongous_count.increment(1u, hr->capacity()); } else if (hr->is_empty()) { - _free_list->verify_next_region(hr); + assert(hr->containing_set() == _free_list, err_msg("Heap region %u is empty but not on the free list.", hr->region_num())); + _free_count.increment(1u, hr->capacity()); } else { - _old_set->verify_next_region(hr); + assert(hr->containing_set() == _old_set, err_msg("Heap region %u is old but not in the old set.", hr->region_num())); + _old_count.increment(1u, hr->capacity()); } return false; } + + void verify_counts(HeapRegionSet* old_set, HeapRegionSet* humongous_set, FreeRegionList* free_list) { + guarantee(old_set->length() == _old_count.length(), err_msg("Old set count mismatch. Expected %u, actual %u.", old_set->length(), _old_count.length())); + guarantee(old_set->total_capacity_bytes() == _old_count.capacity(), err_msg("Old set capacity mismatch. Expected " SIZE_FORMAT ", actual " SIZE_FORMAT, + old_set->total_capacity_bytes(), _old_count.capacity())); + + guarantee(humongous_set->length() == _humongous_count.length(), err_msg("Hum set count mismatch. Expected %u, actual %u.", humongous_set->length(), _humongous_count.length())); + guarantee(humongous_set->total_capacity_bytes() == _humongous_count.capacity(), err_msg("Hum set capacity mismatch. Expected " SIZE_FORMAT ", actual " SIZE_FORMAT, + humongous_set->total_capacity_bytes(), _humongous_count.capacity())); + + guarantee(free_list->length() == _free_count.length(), err_msg("Free list count mismatch. Expected %u, actual %u.", free_list->length(), _free_count.length())); + guarantee(free_list->total_capacity_bytes() == _free_count.capacity(), err_msg("Free list capacity mismatch. Expected " SIZE_FORMAT ", actual " SIZE_FORMAT, + free_list->total_capacity_bytes(), _free_count.capacity())); + } }; HeapRegion* G1CollectedHeap::new_heap_region(uint hrs_index, @@ -6679,16 +6668,14 @@ assert_heap_locked_or_at_safepoint(true /* should_be_vm_thread */); // First, check the explicit lists. - _free_list.verify(); + _free_list.verify_list(); { // Given that a concurrent operation might be adding regions to // the secondary free list we have to take the lock before // verifying it. MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag); - _secondary_free_list.verify(); - } - _old_set.verify(); - _humongous_set.verify(); + _secondary_free_list.verify_list(); + } // If a concurrent region freeing operation is in progress it will // be difficult to correctly attributed any free regions we come @@ -6711,16 +6698,10 @@ // Finally, make sure that the region accounting in the lists is // consistent with what we see in the heap. - _old_set.verify_start(); - _humongous_set.verify_start(); - _free_list.verify_start(); VerifyRegionListsClosure cl(&_old_set, &_humongous_set, &_free_list); heap_region_iterate(&cl); - - _old_set.verify_end(); - _humongous_set.verify_end(); - _free_list.verify_end(); + cl.verify_counts(&_old_set, &_humongous_set, &_free_list); } // Optimized nmethod scanning @@ -6821,6 +6802,13 @@ g1_policy()->phase_times()->record_strong_code_root_migration_time(migration_time_ms); } +void G1CollectedHeap::purge_code_root_memory() { + double purge_start = os::elapsedTime(); + G1CodeRootSet::purge_chunks(G1CodeRootsChunkCacheKeepPercent); + double purge_time_ms = (os::elapsedTime() - purge_start) * 1000.0; + g1_policy()->phase_times()->record_strong_code_root_purge_time(purge_time_ms); +} + // Mark all the code roots that point into regions *not* in the // collection set. // @@ -6891,7 +6879,7 @@ // Code roots should never be attached to a continuation of a humongous region assert(hrrs->strong_code_roots_list_length() == 0, err_msg("code roots should never be attached to continuations of humongous region "HR_FORMAT - " starting at "HR_FORMAT", but has "INT32_FORMAT, + " starting at "HR_FORMAT", but has "SIZE_FORMAT, HR_FORMAT_PARAMS(hr), HR_FORMAT_PARAMS(hr->humongous_start_region()), hrrs->strong_code_roots_list_length())); return false; diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -34,7 +34,7 @@ #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp" #include "gc_implementation/g1/g1YCTypes.hpp" #include "gc_implementation/g1/heapRegionSeq.hpp" -#include "gc_implementation/g1/heapRegionSets.hpp" +#include "gc_implementation/g1/heapRegionSet.hpp" #include "gc_implementation/shared/hSpaceCounters.hpp" #include "gc_implementation/shared/parGCAllocBuffer.hpp" #include "memory/barrierSet.hpp" @@ -243,18 +243,18 @@ MemRegion _g1_committed; // The master free list. It will satisfy all new region allocations. - MasterFreeRegionList _free_list; + FreeRegionList _free_list; // The secondary free list which contains regions that have been // freed up during the cleanup process. This will be appended to the // master free list when appropriate. - SecondaryFreeRegionList _secondary_free_list; + FreeRegionList _secondary_free_list; // It keeps track of the old regions. - MasterOldRegionSet _old_set; + HeapRegionSet _old_set; // It keeps track of the humongous regions. - MasterHumongousRegionSet _humongous_set; + HeapRegionSet _humongous_set; // The number of regions we could create by expansion. uint _expansion_regions; @@ -497,13 +497,14 @@ // check whether there's anything available on the // secondary_free_list and/or wait for more regions to appear on // that list, if _free_regions_coming is set. - HeapRegion* new_region_try_secondary_free_list(); + HeapRegion* new_region_try_secondary_free_list(bool is_old); // Try to allocate a single non-humongous HeapRegion sufficient for // an allocation of the given word_size. If do_expand is true, // attempt to expand the heap if necessary to satisfy the allocation - // request. - HeapRegion* new_region(size_t word_size, bool do_expand); + // request. If the region is to be used as an old region or for a + // humongous object, set is_old to true. If not, to false. + HeapRegion* new_region(size_t word_size, bool is_old, bool do_expand); // Attempt to satisfy a humongous allocation request of the given // size by finding a contiguous set of free regions of num_regions @@ -757,6 +758,29 @@ G1HRPrinter* hr_printer() { return &_hr_printer; } + // Frees a non-humongous region by initializing its contents and + // adding it to the free list that's passed as a parameter (this is + // usually a local list which will be appended to the master free + // list later). The used bytes of freed regions are accumulated in + // pre_used. If par is true, the region's RSet will not be freed + // up. The assumption is that this will be done later. + // The locked parameter indicates if the caller has already taken + // care of proper synchronization. This may allow some optimizations. + void free_region(HeapRegion* hr, + FreeRegionList* free_list, + bool par, + bool locked = false); + + // Frees a humongous region by collapsing it into individual regions + // and calling free_region() for each of them. The freed regions + // will be added to the free list that's passed as a parameter (this + // is usually a local list which will be appended to the master free + // list later). The used bytes of freed regions are accumulated in + // pre_used. If par is true, the region's RSet will not be freed + // up. The assumption is that this will be done later. + void free_humongous_region(HeapRegion* hr, + FreeRegionList* free_list, + bool par); protected: // Shrink the garbage-first heap by at most the given size (in bytes!). @@ -840,30 +864,6 @@ // string table, and referents of reachable weak refs. void g1_process_weak_roots(OopClosure* root_closure); - // Frees a non-humongous region by initializing its contents and - // adding it to the free list that's passed as a parameter (this is - // usually a local list which will be appended to the master free - // list later). The used bytes of freed regions are accumulated in - // pre_used. If par is true, the region's RSet will not be freed - // up. The assumption is that this will be done later. - void free_region(HeapRegion* hr, - size_t* pre_used, - FreeRegionList* free_list, - bool par); - - // Frees a humongous region by collapsing it into individual regions - // and calling free_region() for each of them. The freed regions - // will be added to the free list that's passed as a parameter (this - // is usually a local list which will be appended to the master free - // list later). The used bytes of freed regions are accumulated in - // pre_used. If par is true, the region's RSet will not be freed - // up. The assumption is that this will be done later. - void free_humongous_region(HeapRegion* hr, - size_t* pre_used, - FreeRegionList* free_list, - HumongousRegionSet* humongous_proxy_set, - bool par); - // Notifies all the necessary spaces that the committed space has // been updated (either expanded or shrunk). It should be called // after _g1_storage is updated. @@ -1242,21 +1242,17 @@ bool is_on_master_free_list(HeapRegion* hr) { return hr->containing_set() == &_free_list; } - - bool is_in_humongous_set(HeapRegion* hr) { - return hr->containing_set() == &_humongous_set; - } #endif // ASSERT // Wrapper for the region list operations that can be called from // methods outside this class. - void secondary_free_list_add_as_tail(FreeRegionList* list) { - _secondary_free_list.add_as_tail(list); + void secondary_free_list_add(FreeRegionList* list) { + _secondary_free_list.add_ordered(list); } void append_secondary_free_list() { - _free_list.add_as_head(&_secondary_free_list); + _free_list.add_ordered(&_secondary_free_list); } void append_secondary_free_list_if_not_empty_with_lock() { @@ -1298,27 +1294,9 @@ // True iff an evacuation has failed in the most-recent collection. bool evacuation_failed() { return _evacuation_failed; } - // It will free a region if it has allocated objects in it that are - // all dead. It calls either free_region() or - // free_humongous_region() depending on the type of the region that - // is passed to it. - void free_region_if_empty(HeapRegion* hr, - size_t* pre_used, - FreeRegionList* free_list, - OldRegionSet* old_proxy_set, - HumongousRegionSet* humongous_proxy_set, - HRRSCleanupTask* hrrs_cleanup_task, - bool par); - - // It appends the free list to the master free list and updates the - // master humongous list according to the contents of the proxy - // list. It also adjusts the total used bytes according to pre_used - // (if par is true, it will do so by taking the ParGCRareEvent_lock). - void update_sets_after_freeing_regions(size_t pre_used, - FreeRegionList* free_list, - OldRegionSet* old_proxy_set, - HumongousRegionSet* humongous_proxy_set, - bool par); + void remove_from_old_sets(const HeapRegionSetCount& old_regions_removed, const HeapRegionSetCount& humongous_regions_removed); + void prepend_to_freelist(FreeRegionList* list); + void decrement_summary_bytes(size_t bytes); // Returns "TRUE" iff "p" points into the committed areas of the heap. virtual bool is_in(const void* p) const; @@ -1481,9 +1459,11 @@ // Section on thread-local allocation buffers (TLABs) // See CollectedHeap for semantics. - virtual bool supports_tlab_allocation() const; - virtual size_t tlab_capacity(Thread* thr) const; - virtual size_t unsafe_max_tlab_alloc(Thread* thr) const; + bool supports_tlab_allocation() const; + size_t tlab_capacity(Thread* ignored) const; + size_t tlab_used(Thread* ignored) const; + size_t max_tlab_size() const; + size_t unsafe_max_tlab_alloc(Thread* ignored) const; // Can a compiler initialize a new object without store barriers? // This permission only extends from the creation of a new object @@ -1568,7 +1548,7 @@ void set_region_short_lived_locked(HeapRegion* hr); // add appropriate methods for any other surv rate groups - YoungList* young_list() { return _young_list; } + YoungList* young_list() const { return _young_list; } // debugging bool check_young_list_well_formed() { @@ -1671,6 +1651,9 @@ // that were not successfullly evacuated are not migrated. void migrate_strong_code_roots(); + // Free up superfluous code root memory. + void purge_code_root_memory(); + // During an initial mark pause, mark all the code roots that // point into regions *not* in the collection set. void mark_strong_code_roots(uint worker_id); @@ -1683,6 +1666,8 @@ // in symbol table, possibly in parallel. void unlink_string_and_symbol_table(BoolObjectClosure* is_alive, bool unlink_strings = true, bool unlink_symbols = true); + // Redirty logged cards in the refinement queue. + void redirty_logged_cards(); // Verification // The following is just to alert the verification code @@ -1809,8 +1794,6 @@ size_t _undo_waste; OopsInHeapRegionClosure* _evac_failure_cl; - G1ParScanHeapEvacClosure* _evac_cl; - G1ParScanPartialArrayClosure* _partial_scan_cl; int _hash_seed; uint _queue_num; @@ -1938,14 +1921,6 @@ return _evac_failure_cl; } - void set_evac_closure(G1ParScanHeapEvacClosure* evac_cl) { - _evac_cl = evac_cl; - } - - void set_partial_scan_closure(G1ParScanPartialArrayClosure* partial_scan_cl) { - _partial_scan_cl = partial_scan_cl; - } - int* hash_seed() { return &_hash_seed; } uint queue_num() { return _queue_num; } @@ -1993,19 +1968,121 @@ false /* retain */); } } +private: + #define G1_PARTIAL_ARRAY_MASK 0x2 + + inline bool has_partial_array_mask(oop* ref) const { + return ((uintptr_t)ref & G1_PARTIAL_ARRAY_MASK) == G1_PARTIAL_ARRAY_MASK; + } + + // We never encode partial array oops as narrowOop*, so return false immediately. + // This allows the compiler to create optimized code when popping references from + // the work queue. + inline bool has_partial_array_mask(narrowOop* ref) const { + assert(((uintptr_t)ref & G1_PARTIAL_ARRAY_MASK) != G1_PARTIAL_ARRAY_MASK, "Partial array oop reference encoded as narrowOop*"); + return false; + } + + // Only implement set_partial_array_mask() for regular oops, not for narrowOops. + // We always encode partial arrays as regular oop, to allow the + // specialization for has_partial_array_mask() for narrowOops above. + // This means that unintentional use of this method with narrowOops are caught + // by the compiler. + inline oop* set_partial_array_mask(oop obj) const { + assert(((uintptr_t)(void *)obj & G1_PARTIAL_ARRAY_MASK) == 0, "Information loss!"); + return (oop*) ((uintptr_t)(void *)obj | G1_PARTIAL_ARRAY_MASK); + } + + inline oop clear_partial_array_mask(oop* ref) const { + return cast_to_oop((intptr_t)ref & ~G1_PARTIAL_ARRAY_MASK); + } + + void do_oop_partial_array(oop* p) { + assert(has_partial_array_mask(p), "invariant"); + oop from_obj = clear_partial_array_mask(p); + + assert(Universe::heap()->is_in_reserved(from_obj), "must be in heap."); + assert(from_obj->is_objArray(), "must be obj array"); + objArrayOop from_obj_array = objArrayOop(from_obj); + // The from-space object contains the real length. + int length = from_obj_array->length(); + + assert(from_obj->is_forwarded(), "must be forwarded"); + oop to_obj = from_obj->forwardee(); + assert(from_obj != to_obj, "should not be chunking self-forwarded objects"); + objArrayOop to_obj_array = objArrayOop(to_obj); + // We keep track of the next start index in the length field of the + // to-space object. + int next_index = to_obj_array->length(); + assert(0 <= next_index && next_index < length, + err_msg("invariant, next index: %d, length: %d", next_index, length)); + + int start = next_index; + int end = length; + int remainder = end - start; + // We'll try not to push a range that's smaller than ParGCArrayScanChunk. + if (remainder > 2 * ParGCArrayScanChunk) { + end = start + ParGCArrayScanChunk; + to_obj_array->set_length(end); + // Push the remainder before we process the range in case another + // worker has run out of things to do and can steal it. + oop* from_obj_p = set_partial_array_mask(from_obj); + push_on_queue(from_obj_p); + } else { + assert(length == end, "sanity"); + // We'll process the final range for this object. Restore the length + // so that the heap remains parsable in case of evacuation failure. + to_obj_array->set_length(end); + } + _scanner.set_region(_g1h->heap_region_containing_raw(to_obj)); + // Process indexes [start,end). It will also process the header + // along with the first chunk (i.e., the chunk with start == 0). + // Note that at this point the length field of to_obj_array is not + // correct given that we are using it to keep track of the next + // start index. oop_iterate_range() (thankfully!) ignores the length + // field and only relies on the start / end parameters. It does + // however return the size of the object which will be incorrect. So + // we have to ignore it even if we wanted to use it. + to_obj_array->oop_iterate_range(&_scanner, start, end); + } + + // This method is applied to the fields of the objects that have just been copied. + template void do_oop_evac(T* p, HeapRegion* from) { + assert(!oopDesc::is_null(oopDesc::load_decode_heap_oop(p)), + "Reference should not be NULL here as such are never pushed to the task queue."); + oop obj = oopDesc::load_decode_heap_oop_not_null(p); + + // Although we never intentionally push references outside of the collection + // set, due to (benign) races in the claim mechanism during RSet scanning more + // than one thread might claim the same card. So the same card may be + // processed multiple times. So redo this check. + if (_g1h->in_cset_fast_test(obj)) { + oop forwardee; + if (obj->is_forwarded()) { + forwardee = obj->forwardee(); + } else { + forwardee = copy_to_survivor_space(obj); + } + assert(forwardee != NULL, "forwardee should not be NULL"); + oopDesc::encode_store_heap_oop(p, forwardee); + } + + assert(obj != NULL, "Must be"); + update_rs(from, p, queue_num()); + } +public: oop copy_to_survivor_space(oop const obj); template void deal_with_reference(T* ref_to_scan) { - if (has_partial_array_mask(ref_to_scan)) { - _partial_scan_cl->do_oop_nv(ref_to_scan); - } else { + if (!has_partial_array_mask(ref_to_scan)) { // Note: we can use "raw" versions of "region_containing" because // "obj_to_scan" is definitely in the heap, and is not in a // humongous region. HeapRegion* r = _g1h->heap_region_containing_raw(ref_to_scan); - _evac_cl->set_region(r); - _evac_cl->do_oop_nv(ref_to_scan); + do_oop_evac(ref_to_scan, r); + } else { + do_oop_partial_array((oop*)ref_to_scan); } } diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -30,6 +30,7 @@ #include "gc_implementation/g1/g1AllocRegion.inline.hpp" #include "gc_implementation/g1/g1CollectorPolicy.hpp" #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp" +#include "gc_implementation/g1/heapRegionSet.inline.hpp" #include "gc_implementation/g1/heapRegionSeq.inline.hpp" #include "utilities/taskqueue.hpp" diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp --- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -820,6 +820,8 @@ // do that for any other surv rate groups } + size_t young_list_target_length() const { return _young_list_target_length; } + bool is_young_list_full() { uint young_list_length = _g1->young_list()->length(); uint young_list_target_length = _young_list_target_length; diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/gc_implementation/g1/g1GCPhaseTimes.cpp --- a/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013, 2014 Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -27,6 +27,7 @@ #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" #include "gc_implementation/g1/g1GCPhaseTimes.hpp" #include "gc_implementation/g1/g1Log.hpp" +#include "gc_implementation/g1/g1StringDedup.hpp" // Helper class for avoiding interleaved logging class LineBuffer: public StackObj { @@ -168,7 +169,9 @@ _last_termination_attempts(_max_gc_threads, SIZE_FORMAT), _last_gc_worker_end_times_ms(_max_gc_threads, "%.1lf", false), _last_gc_worker_times_ms(_max_gc_threads, "%.1lf"), - _last_gc_worker_other_times_ms(_max_gc_threads, "%.1lf") + _last_gc_worker_other_times_ms(_max_gc_threads, "%.1lf"), + _cur_string_dedup_queue_fixup_worker_times_ms(_max_gc_threads, "%.1lf"), + _cur_string_dedup_table_fixup_worker_times_ms(_max_gc_threads, "%.1lf") { assert(max_gc_threads > 0, "Must have some GC threads"); } @@ -229,6 +232,16 @@ _last_gc_worker_other_times_ms.verify(); } +void G1GCPhaseTimes::note_string_dedup_fixup_start() { + _cur_string_dedup_queue_fixup_worker_times_ms.reset(); + _cur_string_dedup_table_fixup_worker_times_ms.reset(); +} + +void G1GCPhaseTimes::note_string_dedup_fixup_end() { + _cur_string_dedup_queue_fixup_worker_times_ms.verify(); + _cur_string_dedup_table_fixup_worker_times_ms.verify(); +} + void G1GCPhaseTimes::print_stats(int level, const char* str, double value) { LineBuffer(level).append_and_print_cr("[%s: %.1lf ms]", str, value); } @@ -250,6 +263,14 @@ // Strong code root migration time misc_time_ms += _cur_strong_code_root_migration_time_ms; + // Strong code root purge time + misc_time_ms += _cur_strong_code_root_purge_time_ms; + + if (G1StringDedup::is_enabled()) { + // String dedup fixup time + misc_time_ms += _cur_string_dedup_fixup_time_ms; + } + // Subtract the time taken to clean the card table from the // current value of "other time" misc_time_ms += _cur_clear_ct_time_ms; @@ -299,20 +320,43 @@ } print_stats(1, "Code Root Fixup", _cur_collection_code_root_fixup_time_ms); print_stats(1, "Code Root Migration", _cur_strong_code_root_migration_time_ms); + print_stats(1, "Code Root Purge", _cur_strong_code_root_purge_time_ms); + if (G1StringDedup::is_enabled()) { + print_stats(1, "String Dedup Fixup", _cur_string_dedup_fixup_time_ms, _active_gc_threads); + _cur_string_dedup_queue_fixup_worker_times_ms.print(2, "Queue Fixup (ms)"); + _cur_string_dedup_table_fixup_worker_times_ms.print(2, "Table Fixup (ms)"); + } print_stats(1, "Clear CT", _cur_clear_ct_time_ms); double misc_time_ms = pause_time_sec * MILLIUNITS - accounted_time_ms(); print_stats(1, "Other", misc_time_ms); if (_cur_verify_before_time_ms > 0.0) { print_stats(2, "Verify Before", _cur_verify_before_time_ms); } + if (G1CollectedHeap::heap()->evacuation_failed()) { + double evac_fail_handling = _cur_evac_fail_recalc_used + _cur_evac_fail_remove_self_forwards + + _cur_evac_fail_restore_remsets; + print_stats(2, "Evacuation Failure", evac_fail_handling); + if (G1Log::finest()) { + print_stats(3, "Recalculate Used", _cur_evac_fail_recalc_used); + print_stats(3, "Remove Self Forwards", _cur_evac_fail_remove_self_forwards); + print_stats(3, "Restore RemSet", _cur_evac_fail_restore_remsets); + } + } print_stats(2, "Choose CSet", (_recorded_young_cset_choice_time_ms + _recorded_non_young_cset_choice_time_ms)); print_stats(2, "Ref Proc", _cur_ref_proc_time_ms); print_stats(2, "Ref Enq", _cur_ref_enq_time_ms); + if (G1DeferredRSUpdate) { + print_stats(2, "Redirty Cards", _recorded_redirty_logged_cards_time_ms); + } print_stats(2, "Free CSet", (_recorded_young_free_cset_time_ms + _recorded_non_young_free_cset_time_ms)); + if (G1Log::finest()) { + print_stats(3, "Young Free CSet", _recorded_young_free_cset_time_ms); + print_stats(3, "Non-Young Free CSet", _recorded_non_young_free_cset_time_ms); + } if (_cur_verify_after_time_ms > 0.0) { print_stats(2, "Verify After", _cur_verify_after_time_ms); } diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/gc_implementation/g1/g1GCPhaseTimes.hpp --- a/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.hpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013, 2014 Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -131,6 +131,15 @@ double _cur_collection_par_time_ms; double _cur_collection_code_root_fixup_time_ms; double _cur_strong_code_root_migration_time_ms; + double _cur_strong_code_root_purge_time_ms; + + double _cur_evac_fail_recalc_used; + double _cur_evac_fail_restore_remsets; + double _cur_evac_fail_remove_self_forwards; + + double _cur_string_dedup_fixup_time_ms; + WorkerDataArray _cur_string_dedup_queue_fixup_worker_times_ms; + WorkerDataArray _cur_string_dedup_table_fixup_worker_times_ms; double _cur_clear_ct_time_ms; double _cur_ref_proc_time_ms; @@ -142,6 +151,8 @@ double _recorded_young_cset_choice_time_ms; double _recorded_non_young_cset_choice_time_ms; + double _recorded_redirty_logged_cards_time_ms; + double _recorded_young_free_cset_time_ms; double _recorded_non_young_free_cset_time_ms; @@ -223,6 +234,37 @@ _cur_strong_code_root_migration_time_ms = ms; } + void record_strong_code_root_purge_time(double ms) { + _cur_strong_code_root_purge_time_ms = ms; + } + + void record_evac_fail_recalc_used_time(double ms) { + _cur_evac_fail_recalc_used = ms; + } + + void record_evac_fail_restore_remsets(double ms) { + _cur_evac_fail_restore_remsets = ms; + } + + void record_evac_fail_remove_self_forwards(double ms) { + _cur_evac_fail_remove_self_forwards = ms; + } + + void note_string_dedup_fixup_start(); + void note_string_dedup_fixup_end(); + + void record_string_dedup_fixup_time(double ms) { + _cur_string_dedup_fixup_time_ms = ms; + } + + void record_string_dedup_queue_fixup_worker_time(uint worker_id, double ms) { + _cur_string_dedup_queue_fixup_worker_times_ms.set(worker_id, ms); + } + + void record_string_dedup_table_fixup_worker_time(uint worker_id, double ms) { + _cur_string_dedup_table_fixup_worker_times_ms.set(worker_id, ms); + } + void record_ref_proc_time(double ms) { _cur_ref_proc_time_ms = ms; } @@ -251,6 +293,10 @@ _recorded_non_young_cset_choice_time_ms = time_ms; } + void record_redirty_logged_cards_time_ms(double time_ms) { + _recorded_redirty_logged_cards_time_ms = time_ms; + } + void record_cur_collection_start_sec(double time_ms) { _cur_collection_start_sec = time_ms; } diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/gc_implementation/g1/g1MarkSweep.cpp --- a/src/share/vm/gc_implementation/g1/g1MarkSweep.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/gc_implementation/g1/g1MarkSweep.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -31,6 +31,7 @@ #include "code/icBuffer.hpp" #include "gc_implementation/g1/g1Log.hpp" #include "gc_implementation/g1/g1MarkSweep.hpp" +#include "gc_implementation/g1/g1StringDedup.hpp" #include "gc_implementation/shared/gcHeapSummary.hpp" #include "gc_implementation/shared/gcTimer.hpp" #include "gc_implementation/shared/gcTrace.hpp" @@ -196,17 +197,19 @@ G1CollectedHeap* _g1h; ModRefBarrierSet* _mrbs; CompactPoint _cp; - HumongousRegionSet _humongous_proxy_set; + HeapRegionSetCount _humongous_regions_removed; void free_humongous_region(HeapRegion* hr) { HeapWord* end = hr->end(); - size_t dummy_pre_used; FreeRegionList dummy_free_list("Dummy Free List for G1MarkSweep"); assert(hr->startsHumongous(), "Only the start of a humongous region should be freed."); - _g1h->free_humongous_region(hr, &dummy_pre_used, &dummy_free_list, - &_humongous_proxy_set, false /* par */); + + hr->set_containing_set(NULL); + _humongous_regions_removed.increment(1u, hr->capacity()); + + _g1h->free_humongous_region(hr, &dummy_free_list, false /* par */); hr->prepare_for_compaction(&_cp); // Also clear the part of the card table that will be unused after // compaction. @@ -219,16 +222,13 @@ : _g1h(G1CollectedHeap::heap()), _mrbs(_g1h->g1_barrier_set()), _cp(NULL, cs, cs->initialize_threshold()), - _humongous_proxy_set("G1MarkSweep Humongous Proxy Set") { } + _humongous_regions_removed() { } void update_sets() { // We'll recalculate total used bytes and recreate the free list // at the end of the GC, so no point in updating those values here. - _g1h->update_sets_after_freeing_regions(0, /* pre_used */ - NULL, /* free_list */ - NULL, /* old_proxy_set */ - &_humongous_proxy_set, - false /* par */); + HeapRegionSetCount empty_set; + _g1h->remove_from_old_sets(empty_set, _humongous_regions_removed); } bool doHeapRegion(HeapRegion* hr) { @@ -321,6 +321,10 @@ // have been cleared if they pointed to non-surviving objects.) g1h->g1_process_weak_roots(&GenMarkSweep::adjust_pointer_closure); + if (G1StringDedup::is_enabled()) { + G1StringDedup::oops_do(&GenMarkSweep::adjust_pointer_closure); + } + GenMarkSweep::adjust_marks(); G1AdjustPointersClosure blk; diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/gc_implementation/g1/g1OopClosures.hpp --- a/src/share/vm/gc_implementation/g1/g1OopClosures.hpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/gc_implementation/g1/g1OopClosures.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -80,53 +80,6 @@ virtual void do_oop(narrowOop* p) { do_oop_nv(p); } }; -#define G1_PARTIAL_ARRAY_MASK 0x2 - -inline bool has_partial_array_mask(oop* ref) { - return ((uintptr_t)ref & G1_PARTIAL_ARRAY_MASK) == G1_PARTIAL_ARRAY_MASK; -} - -// We never encode partial array oops as narrowOop*, so return false immediately. -// This allows the compiler to create optimized code when popping references from -// the work queue. -inline bool has_partial_array_mask(narrowOop* ref) { - assert(((uintptr_t)ref & G1_PARTIAL_ARRAY_MASK) != G1_PARTIAL_ARRAY_MASK, "Partial array oop reference encoded as narrowOop*"); - return false; -} - -// Only implement set_partial_array_mask() for regular oops, not for narrowOops. -// We always encode partial arrays as regular oop, to allow the -// specialization for has_partial_array_mask() for narrowOops above. -// This means that unintentional use of this method with narrowOops are caught -// by the compiler. -inline oop* set_partial_array_mask(oop obj) { - assert(((uintptr_t)(void *)obj & G1_PARTIAL_ARRAY_MASK) == 0, "Information loss!"); - return (oop*) ((uintptr_t)(void *)obj | G1_PARTIAL_ARRAY_MASK); -} - -template inline oop clear_partial_array_mask(T* ref) { - return cast_to_oop((intptr_t)ref & ~G1_PARTIAL_ARRAY_MASK); -} - -class G1ParScanPartialArrayClosure : public G1ParClosureSuper { - G1ParScanClosure _scanner; - -public: - G1ParScanPartialArrayClosure(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state, ReferenceProcessor* rp) : - G1ParClosureSuper(g1, par_scan_state), _scanner(g1, par_scan_state, rp) - { - assert(_ref_processor == NULL, "sanity"); - } - - G1ParScanClosure* scanner() { - return &_scanner; - } - - template void do_oop_nv(T* p); - virtual void do_oop(oop* p) { do_oop_nv(p); } - virtual void do_oop(narrowOop* p) { do_oop_nv(p); } -}; - // Add back base class for metadata class G1ParCopyHelper : public G1ParClosureSuper { protected: @@ -173,15 +126,8 @@ typedef G1ParCopyClosure G1ParScanAndMarkExtRootClosure; typedef G1ParCopyClosure G1ParScanAndMarkMetadataClosure; -// The following closure type is defined in g1_specialized_oop_closures.hpp: -// -// typedef G1ParCopyClosure G1ParScanHeapEvacClosure; - // We use a separate closure to handle references during evacuation // failure processing. -// We could have used another instance of G1ParScanHeapEvacClosure -// (since that closure no longer assumes that the references it -// handles point into the collection set). typedef G1ParCopyClosure G1ParScanHeapEvacFailureClosure; diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/gc_implementation/g1/g1RemSet.cpp --- a/src/share/vm/gc_implementation/g1/g1RemSet.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/gc_implementation/g1/g1RemSet.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -463,8 +463,9 @@ int into_cset_n_buffers = into_cset_dcqs.completed_buffers_num(); if (_g1->evacuation_failed()) { + double restore_remembered_set_start = os::elapsedTime(); + // Restore remembered sets for the regions pointing into the collection set. - if (G1DeferredRSUpdate) { // If deferred RS updates are enabled then we just need to transfer // the completed buffers from (a) the DirtyCardQueueSet used to hold @@ -483,6 +484,8 @@ } assert(n_completed_buffers == into_cset_n_buffers, "missed some buffers"); } + + _g1->g1_policy()->phase_times()->record_evac_fail_restore_remsets((os::elapsedTime() - restore_remembered_set_start) * 1000.0); } // Free any completed buffers in the DirtyCardQueueSet used to hold cards diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/gc_implementation/g1/g1StringDedup.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/g1/g1StringDedup.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -0,0 +1,208 @@ +/* + * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "classfile/javaClasses.hpp" +#include "gc_implementation/g1/g1CollectedHeap.inline.hpp" +#include "gc_implementation/g1/g1GCPhaseTimes.hpp" +#include "gc_implementation/g1/g1StringDedup.hpp" +#include "gc_implementation/g1/g1StringDedupQueue.hpp" +#include "gc_implementation/g1/g1StringDedupStat.hpp" +#include "gc_implementation/g1/g1StringDedupTable.hpp" +#include "gc_implementation/g1/g1StringDedupThread.hpp" + +bool G1StringDedup::_enabled = false; + +void G1StringDedup::initialize() { + assert(UseG1GC, "String deduplication only available with G1"); + if (UseStringDeduplication) { + _enabled = true; + G1StringDedupQueue::create(); + G1StringDedupTable::create(); + G1StringDedupThread::create(); + } +} + +bool G1StringDedup::is_candidate_from_mark(oop obj) { + if (java_lang_String::is_instance(obj)) { + bool from_young = G1CollectedHeap::heap()->heap_region_containing_raw(obj)->is_young(); + if (from_young && obj->age() < StringDeduplicationAgeThreshold) { + // Candidate found. String is being evacuated from young to old but has not + // reached the deduplication age threshold, i.e. has not previously been a + // candidate during its life in the young generation. + return true; + } + } + + // Not a candidate + return false; +} + +void G1StringDedup::enqueue_from_mark(oop java_string) { + assert(is_enabled(), "String deduplication not enabled"); + if (is_candidate_from_mark(java_string)) { + G1StringDedupQueue::push(0 /* worker_id */, java_string); + } +} + +bool G1StringDedup::is_candidate_from_evacuation(bool from_young, bool to_young, oop obj) { + if (from_young && java_lang_String::is_instance(obj)) { + if (to_young && obj->age() == StringDeduplicationAgeThreshold) { + // Candidate found. String is being evacuated from young to young and just + // reached the deduplication age threshold. + return true; + } + if (!to_young && obj->age() < StringDeduplicationAgeThreshold) { + // Candidate found. String is being evacuated from young to old but has not + // reached the deduplication age threshold, i.e. has not previously been a + // candidate during its life in the young generation. + return true; + } + } + + // Not a candidate + return false; +} + +void G1StringDedup::enqueue_from_evacuation(bool from_young, bool to_young, uint worker_id, oop java_string) { + assert(is_enabled(), "String deduplication not enabled"); + if (is_candidate_from_evacuation(from_young, to_young, java_string)) { + G1StringDedupQueue::push(worker_id, java_string); + } +} + +void G1StringDedup::deduplicate(oop java_string) { + assert(is_enabled(), "String deduplication not enabled"); + G1StringDedupStat dummy; // Statistics from this path is never used + G1StringDedupTable::deduplicate(java_string, dummy); +} + +void G1StringDedup::oops_do(OopClosure* keep_alive) { + assert(is_enabled(), "String deduplication not enabled"); + unlink_or_oops_do(NULL, keep_alive); +} + +void G1StringDedup::unlink(BoolObjectClosure* is_alive) { + assert(is_enabled(), "String deduplication not enabled"); + // Don't allow a potential resize or rehash during unlink, as the unlink + // operation itself might remove enough entries to invalidate such a decision. + unlink_or_oops_do(is_alive, NULL, false /* allow_resize_and_rehash */); +} + +// +// Task for parallel unlink_or_oops_do() operation on the deduplication queue +// and table. +// +class G1StringDedupUnlinkOrOopsDoTask : public AbstractGangTask { +private: + G1StringDedupUnlinkOrOopsDoClosure _cl; + +public: + G1StringDedupUnlinkOrOopsDoTask(BoolObjectClosure* is_alive, + OopClosure* keep_alive, + bool allow_resize_and_rehash) : + AbstractGangTask("G1StringDedupUnlinkOrOopsDoTask"), + _cl(is_alive, keep_alive, allow_resize_and_rehash) { + } + + virtual void work(uint worker_id) { + double queue_fixup_start = os::elapsedTime(); + G1StringDedupQueue::unlink_or_oops_do(&_cl); + + double table_fixup_start = os::elapsedTime(); + G1StringDedupTable::unlink_or_oops_do(&_cl, worker_id); + + double queue_fixup_time_ms = (table_fixup_start - queue_fixup_start) * 1000.0; + double table_fixup_time_ms = (os::elapsedTime() - table_fixup_start) * 1000.0; + G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy(); + g1p->phase_times()->record_string_dedup_queue_fixup_worker_time(worker_id, queue_fixup_time_ms); + g1p->phase_times()->record_string_dedup_table_fixup_worker_time(worker_id, table_fixup_time_ms); + } +}; + +void G1StringDedup::unlink_or_oops_do(BoolObjectClosure* is_alive, OopClosure* keep_alive, bool allow_resize_and_rehash) { + assert(is_enabled(), "String deduplication not enabled"); + G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy(); + g1p->phase_times()->note_string_dedup_fixup_start(); + double fixup_start = os::elapsedTime(); + + G1StringDedupUnlinkOrOopsDoTask task(is_alive, keep_alive, allow_resize_and_rehash); + if (G1CollectedHeap::use_parallel_gc_threads()) { + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + g1h->set_par_threads(); + g1h->workers()->run_task(&task); + g1h->set_par_threads(0); + } else { + task.work(0); + } + + double fixup_time_ms = (os::elapsedTime() - fixup_start) * 1000.0; + g1p->phase_times()->record_string_dedup_fixup_time(fixup_time_ms); + g1p->phase_times()->note_string_dedup_fixup_end(); +} + +void G1StringDedup::threads_do(ThreadClosure* tc) { + assert(is_enabled(), "String deduplication not enabled"); + tc->do_thread(G1StringDedupThread::thread()); +} + +void G1StringDedup::print_worker_threads_on(outputStream* st) { + assert(is_enabled(), "String deduplication not enabled"); + G1StringDedupThread::thread()->print_on(st); + st->cr(); +} + +void G1StringDedup::verify() { + assert(is_enabled(), "String deduplication not enabled"); + G1StringDedupQueue::verify(); + G1StringDedupTable::verify(); +} + +G1StringDedupUnlinkOrOopsDoClosure::G1StringDedupUnlinkOrOopsDoClosure(BoolObjectClosure* is_alive, + OopClosure* keep_alive, + bool allow_resize_and_rehash) : + _is_alive(is_alive), + _keep_alive(keep_alive), + _resized_table(NULL), + _rehashed_table(NULL), + _next_queue(0), + _next_bucket(0) { + if (allow_resize_and_rehash) { + // If both resize and rehash is needed, only do resize. Rehash of + // the table will eventually happen if the situation persists. + _resized_table = G1StringDedupTable::prepare_resize(); + if (!is_resizing()) { + _rehashed_table = G1StringDedupTable::prepare_rehash(); + } + } +} + +G1StringDedupUnlinkOrOopsDoClosure::~G1StringDedupUnlinkOrOopsDoClosure() { + assert(!is_resizing() || !is_rehashing(), "Can not both resize and rehash"); + if (is_resizing()) { + G1StringDedupTable::finish_resize(_resized_table); + } else if (is_rehashing()) { + G1StringDedupTable::finish_rehash(_rehashed_table); + } +} diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/gc_implementation/g1/g1StringDedup.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/g1/g1StringDedup.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -0,0 +1,202 @@ +/* + * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_VM_GC_IMPLEMENTATION_G1_G1STRINGDEDUP_HPP +#define SHARE_VM_GC_IMPLEMENTATION_G1_G1STRINGDEDUP_HPP + +// +// String Deduplication +// +// String deduplication aims to reduce the heap live-set by deduplicating identical +// instances of String so that they share the same backing character array. +// +// The deduplication process is divided in two main parts, 1) finding the objects to +// deduplicate, and 2) deduplicating those objects. The first part is done as part of +// a normal GC cycle when objects are marked or evacuated. At this time a check is +// applied on each object to check if it is a candidate for deduplication. If so, the +// object is placed on the deduplication queue for later processing. The second part, +// processing the objects on the deduplication queue, is a concurrent phase which +// starts right after the stop-the-wold marking/evacuation phase. This phase is +// executed by the deduplication thread, which pulls deduplication candidates of the +// deduplication queue and tries to deduplicate them. +// +// A deduplication hashtable is used to keep track of all unique character arrays +// used by String objects. When deduplicating, a lookup is made in this table to see +// if there is already an identical character array somewhere on the heap. If so, the +// String object is adjusted to point to that character array, releasing the reference +// to the original array allowing it to eventually be garbage collected. If the lookup +// fails the character array is instead inserted into the hashtable so that this array +// can be shared at some point in the future. +// +// Candidate selection +// +// An object is considered a deduplication candidate if all of the following +// statements are true: +// +// - The object is an instance of java.lang.String +// +// - The object is being evacuated from a young heap region +// +// - The object is being evacuated to a young/survivor heap region and the +// object's age is equal to the deduplication age threshold +// +// or +// +// The object is being evacuated to an old heap region and the object's age is +// less than the deduplication age threshold +// +// Once an string object has been promoted to an old region, or its age is higher +// than the deduplication age threshold, is will never become a candidate again. +// This approach avoids making the same object a candidate more than once. +// +// Interned strings are a bit special. They are explicitly deduplicated just before +// being inserted into the StringTable (to avoid counteracting C2 optimizations done +// on string literals), then they also become deduplication candidates if they reach +// the deduplication age threshold or are evacuated to an old heap region. The second +// attempt to deduplicate such strings will be in vain, but we have no fast way of +// filtering them out. This has not shown to be a problem, as the number of interned +// strings is usually dwarfed by the number of normal (non-interned) strings. +// +// For additional information on string deduplication, please see JEP 192, +// http://openjdk.java.net/jeps/192 +// + +#include "memory/allocation.hpp" +#include "oops/oop.hpp" + +class OopClosure; +class BoolObjectClosure; +class ThreadClosure; +class outputStream; +class G1StringDedupTable; + +// +// Main interface for interacting with string deduplication. +// +class G1StringDedup : public AllStatic { +private: + // Single state for checking if both G1 and string deduplication is enabled. + static bool _enabled; + + // Candidate selection policies, returns true if the given object is + // candidate for string deduplication. + static bool is_candidate_from_mark(oop obj); + static bool is_candidate_from_evacuation(bool from_young, bool to_young, oop obj); + +public: + // Returns true if both G1 and string deduplication is enabled. + static bool is_enabled() { + return _enabled; + } + + static void initialize(); + + // Immediately deduplicates the given String object, bypassing the + // the deduplication queue. + static void deduplicate(oop java_string); + + // Enqueues a deduplication candidate for later processing by the deduplication + // thread. Before enqueuing, these functions apply the appropriate candidate + // selection policy to filters out non-candidates. + static void enqueue_from_mark(oop java_string); + static void enqueue_from_evacuation(bool from_young, bool to_young, + unsigned int queue, oop java_string); + + static void oops_do(OopClosure* keep_alive); + static void unlink(BoolObjectClosure* is_alive); + static void unlink_or_oops_do(BoolObjectClosure* is_alive, OopClosure* keep_alive, + bool allow_resize_and_rehash = true); + + static void threads_do(ThreadClosure* tc); + static void print_worker_threads_on(outputStream* st); + static void verify(); +}; + +// +// This closure encapsulates the state and the closures needed when scanning +// the deduplication queue and table during the unlink_or_oops_do() operation. +// A single instance of this closure is created and then shared by all worker +// threads participating in the scan. The _next_queue and _next_bucket fields +// provide a simple mechanism for GC workers to claim exclusive access to a +// queue or a table partition. +// +class G1StringDedupUnlinkOrOopsDoClosure : public StackObj { +private: + BoolObjectClosure* _is_alive; + OopClosure* _keep_alive; + G1StringDedupTable* _resized_table; + G1StringDedupTable* _rehashed_table; + size_t _next_queue; + size_t _next_bucket; + +public: + G1StringDedupUnlinkOrOopsDoClosure(BoolObjectClosure* is_alive, + OopClosure* keep_alive, + bool allow_resize_and_rehash); + ~G1StringDedupUnlinkOrOopsDoClosure(); + + bool is_resizing() { + return _resized_table != NULL; + } + + G1StringDedupTable* resized_table() { + return _resized_table; + } + + bool is_rehashing() { + return _rehashed_table != NULL; + } + + // Atomically claims the next available queue for exclusive access by + // the current thread. Returns the queue number of the claimed queue. + size_t claim_queue() { + return (size_t)Atomic::add_ptr(1, &_next_queue) - 1; + } + + // Atomically claims the next available table partition for exclusive + // access by the current thread. Returns the table bucket number where + // the claimed partition starts. + size_t claim_table_partition(size_t partition_size) { + return (size_t)Atomic::add_ptr(partition_size, &_next_bucket) - partition_size; + } + + // Applies and returns the result from the is_alive closure, or + // returns true if no such closure was provided. + bool is_alive(oop o) { + if (_is_alive != NULL) { + return _is_alive->do_object_b(o); + } + return true; + } + + // Applies the keep_alive closure, or does nothing if no such + // closure was provided. + void keep_alive(oop* p) { + if (_keep_alive != NULL) { + _keep_alive->do_oop(p); + } + } +}; + +#endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1STRINGDEDUP_HPP diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/gc_implementation/g1/g1StringDedupQueue.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/g1/g1StringDedupQueue.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -0,0 +1,162 @@ +/* + * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "classfile/javaClasses.hpp" +#include "gc_implementation/g1/g1StringDedupQueue.hpp" +#include "memory/gcLocker.hpp" +#include "runtime/mutexLocker.hpp" +#include "utilities/stack.inline.hpp" + +G1StringDedupQueue* G1StringDedupQueue::_queue = NULL; +const size_t G1StringDedupQueue::_max_size = 1000000; // Max number of elements per queue +const size_t G1StringDedupQueue::_max_cache_size = 0; // Max cache size per queue + +G1StringDedupQueue::G1StringDedupQueue() : + _cursor(0), + _empty(true), + _dropped(0) { + _nqueues = MAX2(ParallelGCThreads, (size_t)1); + _queues = NEW_C_HEAP_ARRAY(G1StringDedupWorkerQueue, _nqueues, mtGC); + for (size_t i = 0; i < _nqueues; i++) { + new (_queues + i) G1StringDedupWorkerQueue(G1StringDedupWorkerQueue::default_segment_size(), _max_cache_size, _max_size); + } +} + +G1StringDedupQueue::~G1StringDedupQueue() { + ShouldNotReachHere(); +} + +void G1StringDedupQueue::create() { + assert(_queue == NULL, "One string deduplication queue allowed"); + _queue = new G1StringDedupQueue(); +} + +void G1StringDedupQueue::wait() { + MonitorLockerEx ml(StringDedupQueue_lock, Mutex::_no_safepoint_check_flag); + while (_queue->_empty) { + ml.wait(Mutex::_no_safepoint_check_flag); + } +} + +void G1StringDedupQueue::push(uint worker_id, oop java_string) { + assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint"); + assert(worker_id < _queue->_nqueues, "Invalid queue"); + + // Push and notify waiter + G1StringDedupWorkerQueue& worker_queue = _queue->_queues[worker_id]; + if (!worker_queue.is_full()) { + worker_queue.push(java_string); + if (_queue->_empty) { + MonitorLockerEx ml(StringDedupQueue_lock, Mutex::_no_safepoint_check_flag); + if (_queue->_empty) { + // Mark non-empty and notify waiter + _queue->_empty = false; + ml.notify(); + } + } + } else { + // Queue is full, drop the string and update the statistics + Atomic::inc_ptr(&_queue->_dropped); + } +} + +oop G1StringDedupQueue::pop() { + assert(!SafepointSynchronize::is_at_safepoint(), "Must not be at safepoint"); + No_Safepoint_Verifier nsv; + + // Try all queues before giving up + for (size_t tries = 0; tries < _queue->_nqueues; tries++) { + // The cursor indicates where we left of last time + G1StringDedupWorkerQueue* queue = &_queue->_queues[_queue->_cursor]; + while (!queue->is_empty()) { + oop obj = queue->pop(); + // The oop we pop can be NULL if it was marked + // dead. Just ignore those and pop the next oop. + if (obj != NULL) { + return obj; + } + } + + // Try next queue + _queue->_cursor = (_queue->_cursor + 1) % _queue->_nqueues; + } + + // Mark empty + _queue->_empty = true; + + return NULL; +} + +void G1StringDedupQueue::unlink_or_oops_do(G1StringDedupUnlinkOrOopsDoClosure* cl) { + // A worker thread first claims a queue, which ensures exclusive + // access to that queue, then continues to process it. + for (;;) { + // Grab next queue to scan + size_t queue = cl->claim_queue(); + if (queue >= _queue->_nqueues) { + // End of queues + break; + } + + // Scan the queue + unlink_or_oops_do(cl, queue); + } +} + +void G1StringDedupQueue::unlink_or_oops_do(G1StringDedupUnlinkOrOopsDoClosure* cl, size_t queue) { + assert(queue < _queue->_nqueues, "Invalid queue"); + StackIterator iter(_queue->_queues[queue]); + while (!iter.is_empty()) { + oop* p = iter.next_addr(); + if (*p != NULL) { + if (cl->is_alive(*p)) { + cl->keep_alive(p); + } else { + // Clear dead reference + *p = NULL; + } + } + } +} + +void G1StringDedupQueue::print_statistics(outputStream* st) { + st->print_cr( + " [Queue]\n" + " [Dropped: "UINTX_FORMAT"]", _queue->_dropped); +} + +void G1StringDedupQueue::verify() { + for (size_t i = 0; i < _queue->_nqueues; i++) { + StackIterator iter(_queue->_queues[i]); + while (!iter.is_empty()) { + oop obj = iter.next(); + if (obj != NULL) { + guarantee(Universe::heap()->is_in_reserved(obj), "Object must be on the heap"); + guarantee(!obj->is_forwarded(), "Object must not be forwarded"); + guarantee(java_lang_String::is_instance(obj), "Object must be a String"); + } + } + } +} diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/gc_implementation/g1/g1StringDedupQueue.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/g1/g1StringDedupQueue.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_VM_GC_IMPLEMENTATION_G1_G1STRINGDEDUPQUEUE_HPP +#define SHARE_VM_GC_IMPLEMENTATION_G1_G1STRINGDEDUPQUEUE_HPP + +#include "memory/allocation.hpp" +#include "oops/oop.hpp" +#include "utilities/stack.hpp" + +class G1StringDedupUnlinkOrOopsDoClosure; + +// +// The deduplication queue acts as the communication channel between the stop-the-world +// mark/evacuation phase and the concurrent deduplication phase. Deduplication candidates +// found during mark/evacuation are placed on this queue for later processing in the +// deduplication thread. A queue entry is an oop pointing to a String object (as opposed +// to entries in the deduplication hashtable which points to character arrays). +// +// While users of the queue treat it as a single queue, it is implemented as a set of +// queues, one queue per GC worker thread, to allow lock-free and cache-friendly enqueue +// operations by the GC workers. +// +// The oops in the queue are treated as weak pointers, meaning the objects they point to +// can become unreachable and pruned (cleared) before being popped by the deduplication +// thread. +// +// Pushing to the queue is thread safe (this relies on each thread using a unique worker +// id), but only allowed during a safepoint. Popping from the queue is NOT thread safe +// and can only be done by the deduplication thread outside a safepoint. +// +// The StringDedupQueue_lock is only used for blocking and waking up the deduplication +// thread in case the queue is empty or becomes non-empty, respectively. This lock does +// not otherwise protect the queue content. +// +class G1StringDedupQueue : public CHeapObj { +private: + typedef Stack G1StringDedupWorkerQueue; + + static G1StringDedupQueue* _queue; + static const size_t _max_size; + static const size_t _max_cache_size; + + G1StringDedupWorkerQueue* _queues; + size_t _nqueues; + size_t _cursor; + volatile bool _empty; + + // Statistics counter, only used for logging. + uintx _dropped; + + G1StringDedupQueue(); + ~G1StringDedupQueue(); + + static void unlink_or_oops_do(G1StringDedupUnlinkOrOopsDoClosure* cl, size_t queue); + +public: + static void create(); + + // Blocks and waits for the queue to become non-empty. + static void wait(); + + // Pushes a deduplication candidate onto a specific GC worker queue. + static void push(uint worker_id, oop java_string); + + // Pops a deduplication candidate from any queue, returns NULL if + // all queues are empty. + static oop pop(); + + static void unlink_or_oops_do(G1StringDedupUnlinkOrOopsDoClosure* cl); + + static void print_statistics(outputStream* st); + static void verify(); +}; + +#endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1STRINGDEDUPQUEUE_HPP diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/gc_implementation/g1/g1StringDedupStat.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/g1/g1StringDedupStat.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -0,0 +1,162 @@ +/* + * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "gc_implementation/g1/g1StringDedupStat.hpp" + +G1StringDedupStat::G1StringDedupStat() : + _inspected(0), + _skipped(0), + _hashed(0), + _known(0), + _new(0), + _new_bytes(0), + _deduped(0), + _deduped_bytes(0), + _deduped_young(0), + _deduped_young_bytes(0), + _deduped_old(0), + _deduped_old_bytes(0), + _idle(0), + _exec(0), + _block(0), + _start(0.0), + _idle_elapsed(0.0), + _exec_elapsed(0.0), + _block_elapsed(0.0) { +} + +void G1StringDedupStat::add(const G1StringDedupStat& stat) { + _inspected += stat._inspected; + _skipped += stat._skipped; + _hashed += stat._hashed; + _known += stat._known; + _new += stat._new; + _new_bytes += stat._new_bytes; + _deduped += stat._deduped; + _deduped_bytes += stat._deduped_bytes; + _deduped_young += stat._deduped_young; + _deduped_young_bytes += stat._deduped_young_bytes; + _deduped_old += stat._deduped_old; + _deduped_old_bytes += stat._deduped_old_bytes; + _idle += stat._idle; + _exec += stat._exec; + _block += stat._block; + _idle_elapsed += stat._idle_elapsed; + _exec_elapsed += stat._exec_elapsed; + _block_elapsed += stat._block_elapsed; +} + +void G1StringDedupStat::print_summary(outputStream* st, const G1StringDedupStat& last_stat, const G1StringDedupStat& total_stat) { + double total_deduped_bytes_percent = 0.0; + + if (total_stat._new_bytes > 0) { + // Avoid division by zero + total_deduped_bytes_percent = (double)total_stat._deduped_bytes / (double)total_stat._new_bytes * 100.0; + } + + st->date_stamp(PrintGCDateStamps); + st->stamp(PrintGCTimeStamps); + st->print_cr( + "[GC concurrent-string-deduplication, " + G1_STRDEDUP_BYTES_FORMAT_NS"->"G1_STRDEDUP_BYTES_FORMAT_NS"("G1_STRDEDUP_BYTES_FORMAT_NS"), avg " + G1_STRDEDUP_PERCENT_FORMAT_NS", "G1_STRDEDUP_TIME_FORMAT"]", + G1_STRDEDUP_BYTES_PARAM(last_stat._new_bytes), + G1_STRDEDUP_BYTES_PARAM(last_stat._new_bytes - last_stat._deduped_bytes), + G1_STRDEDUP_BYTES_PARAM(last_stat._deduped_bytes), + total_deduped_bytes_percent, + last_stat._exec_elapsed); +} + +void G1StringDedupStat::print_statistics(outputStream* st, const G1StringDedupStat& stat, bool total) { + double young_percent = 0.0; + double old_percent = 0.0; + double skipped_percent = 0.0; + double hashed_percent = 0.0; + double known_percent = 0.0; + double new_percent = 0.0; + double deduped_percent = 0.0; + double deduped_bytes_percent = 0.0; + double deduped_young_percent = 0.0; + double deduped_young_bytes_percent = 0.0; + double deduped_old_percent = 0.0; + double deduped_old_bytes_percent = 0.0; + + if (stat._inspected > 0) { + // Avoid division by zero + skipped_percent = (double)stat._skipped / (double)stat._inspected * 100.0; + hashed_percent = (double)stat._hashed / (double)stat._inspected * 100.0; + known_percent = (double)stat._known / (double)stat._inspected * 100.0; + new_percent = (double)stat._new / (double)stat._inspected * 100.0; + } + + if (stat._new > 0) { + // Avoid division by zero + deduped_percent = (double)stat._deduped / (double)stat._new * 100.0; + } + + if (stat._deduped > 0) { + // Avoid division by zero + deduped_young_percent = (double)stat._deduped_young / (double)stat._deduped * 100.0; + deduped_old_percent = (double)stat._deduped_old / (double)stat._deduped * 100.0; + } + + if (stat._new_bytes > 0) { + // Avoid division by zero + deduped_bytes_percent = (double)stat._deduped_bytes / (double)stat._new_bytes * 100.0; + } + + if (stat._deduped_bytes > 0) { + // Avoid division by zero + deduped_young_bytes_percent = (double)stat._deduped_young_bytes / (double)stat._deduped_bytes * 100.0; + deduped_old_bytes_percent = (double)stat._deduped_old_bytes / (double)stat._deduped_bytes * 100.0; + } + + if (total) { + st->print_cr( + " [Total Exec: "UINTX_FORMAT"/"G1_STRDEDUP_TIME_FORMAT", Idle: "UINTX_FORMAT"/"G1_STRDEDUP_TIME_FORMAT", Blocked: "UINTX_FORMAT"/"G1_STRDEDUP_TIME_FORMAT"]", + stat._exec, stat._exec_elapsed, stat._idle, stat._idle_elapsed, stat._block, stat._block_elapsed); + } else { + st->print_cr( + " [Last Exec: "G1_STRDEDUP_TIME_FORMAT", Idle: "G1_STRDEDUP_TIME_FORMAT", Blocked: "UINTX_FORMAT"/"G1_STRDEDUP_TIME_FORMAT"]", + stat._exec_elapsed, stat._idle_elapsed, stat._block, stat._block_elapsed); + } + st->print_cr( + " [Inspected: "G1_STRDEDUP_OBJECTS_FORMAT"]\n" + " [Skipped: "G1_STRDEDUP_OBJECTS_FORMAT"("G1_STRDEDUP_PERCENT_FORMAT")]\n" + " [Hashed: "G1_STRDEDUP_OBJECTS_FORMAT"("G1_STRDEDUP_PERCENT_FORMAT")]\n" + " [Known: "G1_STRDEDUP_OBJECTS_FORMAT"("G1_STRDEDUP_PERCENT_FORMAT")]\n" + " [New: "G1_STRDEDUP_OBJECTS_FORMAT"("G1_STRDEDUP_PERCENT_FORMAT") "G1_STRDEDUP_BYTES_FORMAT"]\n" + " [Deduplicated: "G1_STRDEDUP_OBJECTS_FORMAT"("G1_STRDEDUP_PERCENT_FORMAT") "G1_STRDEDUP_BYTES_FORMAT"("G1_STRDEDUP_PERCENT_FORMAT")]\n" + " [Young: "G1_STRDEDUP_OBJECTS_FORMAT"("G1_STRDEDUP_PERCENT_FORMAT") "G1_STRDEDUP_BYTES_FORMAT"("G1_STRDEDUP_PERCENT_FORMAT")]\n" + " [Old: "G1_STRDEDUP_OBJECTS_FORMAT"("G1_STRDEDUP_PERCENT_FORMAT") "G1_STRDEDUP_BYTES_FORMAT"("G1_STRDEDUP_PERCENT_FORMAT")]", + stat._inspected, + stat._skipped, skipped_percent, + stat._hashed, hashed_percent, + stat._known, known_percent, + stat._new, new_percent, G1_STRDEDUP_BYTES_PARAM(stat._new_bytes), + stat._deduped, deduped_percent, G1_STRDEDUP_BYTES_PARAM(stat._deduped_bytes), deduped_bytes_percent, + stat._deduped_young, deduped_young_percent, G1_STRDEDUP_BYTES_PARAM(stat._deduped_young_bytes), deduped_young_bytes_percent, + stat._deduped_old, deduped_old_percent, G1_STRDEDUP_BYTES_PARAM(stat._deduped_old_bytes), deduped_old_bytes_percent); +} diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/gc_implementation/g1/g1StringDedupStat.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/g1/g1StringDedupStat.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_VM_GC_IMPLEMENTATION_G1_G1STRINGDEDUPSTAT_HPP +#define SHARE_VM_GC_IMPLEMENTATION_G1_G1STRINGDEDUPSTAT_HPP + +#include "memory/allocation.hpp" +#include "runtime/os.hpp" + +// Macros for GC log output formating +#define G1_STRDEDUP_OBJECTS_FORMAT UINTX_FORMAT_W(12) +#define G1_STRDEDUP_TIME_FORMAT "%1.7lf secs" +#define G1_STRDEDUP_PERCENT_FORMAT "%5.1lf%%" +#define G1_STRDEDUP_PERCENT_FORMAT_NS "%.1lf%%" +#define G1_STRDEDUP_BYTES_FORMAT "%8.1lf%s" +#define G1_STRDEDUP_BYTES_FORMAT_NS "%.1lf%s" +#define G1_STRDEDUP_BYTES_PARAM(bytes) byte_size_in_proper_unit((double)(bytes)), proper_unit_for_byte_size((bytes)) + +// +// Statistics gathered by the deduplication thread. +// +class G1StringDedupStat : public StackObj { +private: + // Counters + uintx _inspected; + uintx _skipped; + uintx _hashed; + uintx _known; + uintx _new; + uintx _new_bytes; + uintx _deduped; + uintx _deduped_bytes; + uintx _deduped_young; + uintx _deduped_young_bytes; + uintx _deduped_old; + uintx _deduped_old_bytes; + uintx _idle; + uintx _exec; + uintx _block; + + // Time spent by the deduplication thread in different phases + double _start; + double _idle_elapsed; + double _exec_elapsed; + double _block_elapsed; + +public: + G1StringDedupStat(); + + void inc_inspected() { + _inspected++; + } + + void inc_skipped() { + _skipped++; + } + + void inc_hashed() { + _hashed++; + } + + void inc_known() { + _known++; + } + + void inc_new(uintx bytes) { + _new++; + _new_bytes += bytes; + } + + void inc_deduped_young(uintx bytes) { + _deduped++; + _deduped_bytes += bytes; + _deduped_young++; + _deduped_young_bytes += bytes; + } + + void inc_deduped_old(uintx bytes) { + _deduped++; + _deduped_bytes += bytes; + _deduped_old++; + _deduped_old_bytes += bytes; + } + + void mark_idle() { + _start = os::elapsedTime(); + _idle++; + } + + void mark_exec() { + double now = os::elapsedTime(); + _idle_elapsed = now - _start; + _start = now; + _exec++; + } + + void mark_block() { + double now = os::elapsedTime(); + _exec_elapsed += now - _start; + _start = now; + _block++; + } + + void mark_unblock() { + double now = os::elapsedTime(); + _block_elapsed += now - _start; + _start = now; + } + + void mark_done() { + double now = os::elapsedTime(); + _exec_elapsed += now - _start; + } + + void add(const G1StringDedupStat& stat); + + static void print_summary(outputStream* st, const G1StringDedupStat& last_stat, const G1StringDedupStat& total_stat); + static void print_statistics(outputStream* st, const G1StringDedupStat& stat, bool total); +}; + +#endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1STRINGDEDUPSTAT_HPP diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/gc_implementation/g1/g1StringDedupTable.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/g1/g1StringDedupTable.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -0,0 +1,569 @@ +/* + * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "classfile/altHashing.hpp" +#include "classfile/javaClasses.hpp" +#include "gc_implementation/g1/g1CollectedHeap.inline.hpp" +#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp" +#include "gc_implementation/g1/g1StringDedupTable.hpp" +#include "memory/gcLocker.hpp" +#include "memory/padded.inline.hpp" +#include "oops/typeArrayOop.hpp" +#include "runtime/mutexLocker.hpp" + +// +// Freelist in the deduplication table entry cache. Links table +// entries together using their _next fields. +// +class G1StringDedupEntryFreeList : public CHeapObj { +private: + G1StringDedupEntry* _list; + size_t _length; + +public: + G1StringDedupEntryFreeList() : + _list(NULL), + _length(0) { + } + + void add(G1StringDedupEntry* entry) { + entry->set_next(_list); + _list = entry; + _length++; + } + + G1StringDedupEntry* remove() { + G1StringDedupEntry* entry = _list; + if (entry != NULL) { + _list = entry->next(); + _length--; + } + return entry; + } + + size_t length() { + return _length; + } +}; + +// +// Cache of deduplication table entries. This cache provides fast allocation and +// reuse of table entries to lower the pressure on the underlying allocator. +// But more importantly, it provides fast/deferred freeing of table entries. This +// is important because freeing of table entries is done during stop-the-world +// phases and it is not uncommon for large number of entries to be freed at once. +// Tables entries that are freed during these phases are placed onto a freelist in +// the cache. The deduplication thread, which executes in a concurrent phase, will +// later reuse or free the underlying memory for these entries. +// +// The cache allows for single-threaded allocations and multi-threaded frees. +// Allocations are synchronized by StringDedupTable_lock as part of a table +// modification. +// +class G1StringDedupEntryCache : public CHeapObj { +private: + // One freelist per GC worker to allow lock less freeing of + // entries while doing a parallel scan of the table. Using + // PaddedEnd to avoid false sharing. + PaddedEnd* _lists; + size_t _nlists; + +public: + G1StringDedupEntryCache(); + ~G1StringDedupEntryCache(); + + // Get a table entry from the cache freelist, or allocate a new + // entry if the cache is empty. + G1StringDedupEntry* alloc(); + + // Insert a table entry into the cache freelist. + void free(G1StringDedupEntry* entry, uint worker_id); + + // Returns current number of entries in the cache. + size_t size(); + + // If the cache has grown above the given max size, trim it down + // and deallocate the memory occupied by trimmed of entries. + void trim(size_t max_size); +}; + +G1StringDedupEntryCache::G1StringDedupEntryCache() { + _nlists = MAX2(ParallelGCThreads, (size_t)1); + _lists = PaddedArray::create_unfreeable((uint)_nlists); +} + +G1StringDedupEntryCache::~G1StringDedupEntryCache() { + ShouldNotReachHere(); +} + +G1StringDedupEntry* G1StringDedupEntryCache::alloc() { + for (size_t i = 0; i < _nlists; i++) { + G1StringDedupEntry* entry = _lists[i].remove(); + if (entry != NULL) { + return entry; + } + } + return new G1StringDedupEntry(); +} + +void G1StringDedupEntryCache::free(G1StringDedupEntry* entry, uint worker_id) { + assert(entry->obj() != NULL, "Double free"); + assert(worker_id < _nlists, "Invalid worker id"); + entry->set_obj(NULL); + entry->set_hash(0); + _lists[worker_id].add(entry); +} + +size_t G1StringDedupEntryCache::size() { + size_t size = 0; + for (size_t i = 0; i < _nlists; i++) { + size += _lists[i].length(); + } + return size; +} + +void G1StringDedupEntryCache::trim(size_t max_size) { + size_t cache_size = 0; + for (size_t i = 0; i < _nlists; i++) { + G1StringDedupEntryFreeList* list = &_lists[i]; + cache_size += list->length(); + while (cache_size > max_size) { + G1StringDedupEntry* entry = list->remove(); + assert(entry != NULL, "Should not be null"); + cache_size--; + delete entry; + } + } +} + +G1StringDedupTable* G1StringDedupTable::_table = NULL; +G1StringDedupEntryCache* G1StringDedupTable::_entry_cache = NULL; + +const size_t G1StringDedupTable::_min_size = (1 << 10); // 1024 +const size_t G1StringDedupTable::_max_size = (1 << 24); // 16777216 +const double G1StringDedupTable::_grow_load_factor = 2.0; // Grow table at 200% load +const double G1StringDedupTable::_shrink_load_factor = _grow_load_factor / 3.0; // Shrink table at 67% load +const double G1StringDedupTable::_max_cache_factor = 0.1; // Cache a maximum of 10% of the table size +const uintx G1StringDedupTable::_rehash_multiple = 60; // Hash bucket has 60 times more collisions than expected +const uintx G1StringDedupTable::_rehash_threshold = (uintx)(_rehash_multiple * _grow_load_factor); + +uintx G1StringDedupTable::_entries_added = 0; +uintx G1StringDedupTable::_entries_removed = 0; +uintx G1StringDedupTable::_resize_count = 0; +uintx G1StringDedupTable::_rehash_count = 0; + +G1StringDedupTable::G1StringDedupTable(size_t size, jint hash_seed) : + _size(size), + _entries(0), + _grow_threshold((uintx)(size * _grow_load_factor)), + _shrink_threshold((uintx)(size * _shrink_load_factor)), + _rehash_needed(false), + _hash_seed(hash_seed) { + assert(is_power_of_2(size), "Table size must be a power of 2"); + _buckets = NEW_C_HEAP_ARRAY(G1StringDedupEntry*, _size, mtGC); + memset(_buckets, 0, _size * sizeof(G1StringDedupEntry*)); +} + +G1StringDedupTable::~G1StringDedupTable() { + FREE_C_HEAP_ARRAY(G1StringDedupEntry*, _buckets, mtGC); +} + +void G1StringDedupTable::create() { + assert(_table == NULL, "One string deduplication table allowed"); + _entry_cache = new G1StringDedupEntryCache(); + _table = new G1StringDedupTable(_min_size); +} + +void G1StringDedupTable::add(typeArrayOop value, unsigned int hash, G1StringDedupEntry** list) { + G1StringDedupEntry* entry = _entry_cache->alloc(); + entry->set_obj(value); + entry->set_hash(hash); + entry->set_next(*list); + *list = entry; + _entries++; +} + +void G1StringDedupTable::remove(G1StringDedupEntry** pentry, uint worker_id) { + G1StringDedupEntry* entry = *pentry; + *pentry = entry->next(); + _entry_cache->free(entry, worker_id); +} + +void G1StringDedupTable::transfer(G1StringDedupEntry** pentry, G1StringDedupTable* dest) { + G1StringDedupEntry* entry = *pentry; + *pentry = entry->next(); + unsigned int hash = entry->hash(); + size_t index = dest->hash_to_index(hash); + G1StringDedupEntry** list = dest->bucket(index); + entry->set_next(*list); + *list = entry; +} + +bool G1StringDedupTable::equals(typeArrayOop value1, typeArrayOop value2) { + return (value1 == value2 || + (value1->length() == value2->length() && + (!memcmp(value1->base(T_CHAR), + value2->base(T_CHAR), + value1->length() * sizeof(jchar))))); +} + +typeArrayOop G1StringDedupTable::lookup(typeArrayOop value, unsigned int hash, + G1StringDedupEntry** list, uintx &count) { + for (G1StringDedupEntry* entry = *list; entry != NULL; entry = entry->next()) { + if (entry->hash() == hash) { + typeArrayOop existing_value = entry->obj(); + if (equals(value, existing_value)) { + // Match found + return existing_value; + } + } + count++; + } + + // Not found + return NULL; +} + +typeArrayOop G1StringDedupTable::lookup_or_add_inner(typeArrayOop value, unsigned int hash) { + size_t index = hash_to_index(hash); + G1StringDedupEntry** list = bucket(index); + uintx count = 0; + + // Lookup in list + typeArrayOop existing_value = lookup(value, hash, list, count); + + // Check if rehash is needed + if (count > _rehash_threshold) { + _rehash_needed = true; + } + + if (existing_value == NULL) { + // Not found, add new entry + add(value, hash, list); + + // Update statistics + _entries_added++; + } + + return existing_value; +} + +unsigned int G1StringDedupTable::hash_code(typeArrayOop value) { + unsigned int hash; + int length = value->length(); + const jchar* data = (jchar*)value->base(T_CHAR); + + if (use_java_hash()) { + hash = java_lang_String::hash_code(data, length); + } else { + hash = AltHashing::murmur3_32(_table->_hash_seed, data, length); + } + + return hash; +} + +void G1StringDedupTable::deduplicate(oop java_string, G1StringDedupStat& stat) { + assert(java_lang_String::is_instance(java_string), "Must be a string"); + No_Safepoint_Verifier nsv; + + stat.inc_inspected(); + + typeArrayOop value = java_lang_String::value(java_string); + if (value == NULL) { + // String has no value + stat.inc_skipped(); + return; + } + + unsigned int hash = 0; + + if (use_java_hash()) { + // Get hash code from cache + hash = java_lang_String::hash(java_string); + } + + if (hash == 0) { + // Compute hash + hash = hash_code(value); + stat.inc_hashed(); + } + + if (use_java_hash() && hash != 0) { + // Store hash code in cache + java_lang_String::set_hash(java_string, hash); + } + + typeArrayOop existing_value = lookup_or_add(value, hash); + if (existing_value == value) { + // Same value, already known + stat.inc_known(); + return; + } + + // Get size of value array + uintx size_in_bytes = value->size() * HeapWordSize; + stat.inc_new(size_in_bytes); + + if (existing_value != NULL) { + // Enqueue the reference to make sure it is kept alive. Concurrent mark might + // otherwise declare it dead if there are no other strong references to this object. + G1SATBCardTableModRefBS::enqueue(existing_value); + + // Existing value found, deduplicate string + java_lang_String::set_value(java_string, existing_value); + + if (G1CollectedHeap::heap()->is_in_young(value)) { + stat.inc_deduped_young(size_in_bytes); + } else { + stat.inc_deduped_old(size_in_bytes); + } + } +} + +G1StringDedupTable* G1StringDedupTable::prepare_resize() { + size_t size = _table->_size; + + // Check if the hashtable needs to be resized + if (_table->_entries > _table->_grow_threshold) { + // Grow table, double the size + size *= 2; + if (size > _max_size) { + // Too big, don't resize + return NULL; + } + } else if (_table->_entries < _table->_shrink_threshold) { + // Shrink table, half the size + size /= 2; + if (size < _min_size) { + // Too small, don't resize + return NULL; + } + } else if (StringDeduplicationResizeALot) { + // Force grow + size *= 2; + if (size > _max_size) { + // Too big, force shrink instead + size /= 4; + } + } else { + // Resize not needed + return NULL; + } + + // Update statistics + _resize_count++; + + // Allocate the new table. The new table will be populated by workers + // calling unlink_or_oops_do() and finally installed by finish_resize(). + return new G1StringDedupTable(size, _table->_hash_seed); +} + +void G1StringDedupTable::finish_resize(G1StringDedupTable* resized_table) { + assert(resized_table != NULL, "Invalid table"); + + resized_table->_entries = _table->_entries; + + // Free old table + delete _table; + + // Install new table + _table = resized_table; +} + +void G1StringDedupTable::unlink_or_oops_do(G1StringDedupUnlinkOrOopsDoClosure* cl, uint worker_id) { + // The table is divided into partitions to allow lock-less parallel processing by + // multiple worker threads. A worker thread first claims a partition, which ensures + // exclusive access to that part of the table, then continues to process it. To allow + // shrinking of the table in parallel we also need to make sure that the same worker + // thread processes all partitions where entries will hash to the same destination + // partition. Since the table size is always a power of two and we always shrink by + // dividing the table in half, we know that for a given partition there is only one + // other partition whoes entries will hash to the same destination partition. That + // other partition is always the sibling partition in the second half of the table. + // For example, if the table is divided into 8 partitions, the sibling of partition 0 + // is partition 4, the sibling of partition 1 is partition 5, etc. + size_t table_half = _table->_size / 2; + + // Let each partition be one page worth of buckets + size_t partition_size = MIN2(table_half, os::vm_page_size() / sizeof(G1StringDedupEntry*)); + assert(table_half % partition_size == 0, "Invalid partition size"); + + // Number of entries removed during the scan + uintx removed = 0; + + for (;;) { + // Grab next partition to scan + size_t partition_begin = cl->claim_table_partition(partition_size); + size_t partition_end = partition_begin + partition_size; + if (partition_begin >= table_half) { + // End of table + break; + } + + // Scan the partition followed by the sibling partition in the second half of the table + removed += unlink_or_oops_do(cl, partition_begin, partition_end, worker_id); + removed += unlink_or_oops_do(cl, table_half + partition_begin, table_half + partition_end, worker_id); + } + + // Delayed update avoid contention on the table lock + if (removed > 0) { + MutexLockerEx ml(StringDedupTable_lock, Mutex::_no_safepoint_check_flag); + _table->_entries -= removed; + _entries_removed += removed; + } +} + +uintx G1StringDedupTable::unlink_or_oops_do(G1StringDedupUnlinkOrOopsDoClosure* cl, + size_t partition_begin, + size_t partition_end, + uint worker_id) { + uintx removed = 0; + for (size_t bucket = partition_begin; bucket < partition_end; bucket++) { + G1StringDedupEntry** entry = _table->bucket(bucket); + while (*entry != NULL) { + oop* p = (oop*)(*entry)->obj_addr(); + if (cl->is_alive(*p)) { + cl->keep_alive(p); + if (cl->is_resizing()) { + // We are resizing the table, transfer entry to the new table + _table->transfer(entry, cl->resized_table()); + } else { + if (cl->is_rehashing()) { + // We are rehashing the table, rehash the entry but keep it + // in the table. We can't transfer entries into the new table + // at this point since we don't have exclusive access to all + // destination partitions. finish_rehash() will do a single + // threaded transfer of all entries. + typeArrayOop value = (typeArrayOop)*p; + unsigned int hash = hash_code(value); + (*entry)->set_hash(hash); + } + + // Move to next entry + entry = (*entry)->next_addr(); + } + } else { + // Not alive, remove entry from table + _table->remove(entry, worker_id); + removed++; + } + } + } + + return removed; +} + +G1StringDedupTable* G1StringDedupTable::prepare_rehash() { + if (!_table->_rehash_needed && !StringDeduplicationRehashALot) { + // Rehash not needed + return NULL; + } + + // Update statistics + _rehash_count++; + + // Compute new hash seed + _table->_hash_seed = AltHashing::compute_seed(); + + // Allocate the new table, same size and hash seed + return new G1StringDedupTable(_table->_size, _table->_hash_seed); +} + +void G1StringDedupTable::finish_rehash(G1StringDedupTable* rehashed_table) { + assert(rehashed_table != NULL, "Invalid table"); + + // Move all newly rehashed entries into the correct buckets in the new table + for (size_t bucket = 0; bucket < _table->_size; bucket++) { + G1StringDedupEntry** entry = _table->bucket(bucket); + while (*entry != NULL) { + _table->transfer(entry, rehashed_table); + } + } + + rehashed_table->_entries = _table->_entries; + + // Free old table + delete _table; + + // Install new table + _table = rehashed_table; +} + +void G1StringDedupTable::verify() { + for (size_t bucket = 0; bucket < _table->_size; bucket++) { + // Verify entries + G1StringDedupEntry** entry = _table->bucket(bucket); + while (*entry != NULL) { + typeArrayOop value = (*entry)->obj(); + guarantee(value != NULL, "Object must not be NULL"); + guarantee(Universe::heap()->is_in_reserved(value), "Object must be on the heap"); + guarantee(!value->is_forwarded(), "Object must not be forwarded"); + guarantee(value->is_typeArray(), "Object must be a typeArrayOop"); + unsigned int hash = hash_code(value); + guarantee((*entry)->hash() == hash, "Table entry has inorrect hash"); + guarantee(_table->hash_to_index(hash) == bucket, "Table entry has incorrect index"); + entry = (*entry)->next_addr(); + } + + // Verify that we do not have entries with identical oops or identical arrays. + // We only need to compare entries in the same bucket. If the same oop or an + // identical array has been inserted more than once into different/incorrect + // buckets the verification step above will catch that. + G1StringDedupEntry** entry1 = _table->bucket(bucket); + while (*entry1 != NULL) { + typeArrayOop value1 = (*entry1)->obj(); + G1StringDedupEntry** entry2 = (*entry1)->next_addr(); + while (*entry2 != NULL) { + typeArrayOop value2 = (*entry2)->obj(); + guarantee(!equals(value1, value2), "Table entries must not have identical arrays"); + entry2 = (*entry2)->next_addr(); + } + entry1 = (*entry1)->next_addr(); + } + } +} + +void G1StringDedupTable::trim_entry_cache() { + MutexLockerEx ml(StringDedupTable_lock, Mutex::_no_safepoint_check_flag); + size_t max_cache_size = (size_t)(_table->_size * _max_cache_factor); + _entry_cache->trim(max_cache_size); +} + +void G1StringDedupTable::print_statistics(outputStream* st) { + st->print_cr( + " [Table]\n" + " [Memory Usage: "G1_STRDEDUP_BYTES_FORMAT_NS"]\n" + " [Size: "SIZE_FORMAT", Min: "SIZE_FORMAT", Max: "SIZE_FORMAT"]\n" + " [Entries: "UINTX_FORMAT", Load: "G1_STRDEDUP_PERCENT_FORMAT_NS", Cached: " UINTX_FORMAT ", Added: "UINTX_FORMAT", Removed: "UINTX_FORMAT"]\n" + " [Resize Count: "UINTX_FORMAT", Shrink Threshold: "UINTX_FORMAT"("G1_STRDEDUP_PERCENT_FORMAT_NS"), Grow Threshold: "UINTX_FORMAT"("G1_STRDEDUP_PERCENT_FORMAT_NS")]\n" + " [Rehash Count: "UINTX_FORMAT", Rehash Threshold: "UINTX_FORMAT", Hash Seed: 0x%x]\n" + " [Age Threshold: "UINTX_FORMAT"]", + G1_STRDEDUP_BYTES_PARAM(_table->_size * sizeof(G1StringDedupEntry*) + (_table->_entries + _entry_cache->size()) * sizeof(G1StringDedupEntry)), + _table->_size, _min_size, _max_size, + _table->_entries, (double)_table->_entries / (double)_table->_size * 100.0, _entry_cache->size(), _entries_added, _entries_removed, + _resize_count, _table->_shrink_threshold, _shrink_load_factor * 100.0, _table->_grow_threshold, _grow_load_factor * 100.0, + _rehash_count, _rehash_threshold, _table->_hash_seed, + StringDeduplicationAgeThreshold); +} diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/gc_implementation/g1/g1StringDedupTable.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/g1/g1StringDedupTable.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -0,0 +1,230 @@ +/* + * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_VM_GC_IMPLEMENTATION_G1_G1STRINGDEDUPTABLE_HPP +#define SHARE_VM_GC_IMPLEMENTATION_G1_G1STRINGDEDUPTABLE_HPP + +#include "gc_implementation/g1/g1StringDedupStat.hpp" +#include "runtime/mutexLocker.hpp" + +class G1StringDedupEntryCache; + +// +// Table entry in the deduplication hashtable. Points weakly to the +// character array. Can be chained in a linked list in case of hash +// collisions or when placed in a freelist in the entry cache. +// +class G1StringDedupEntry : public CHeapObj { +private: + G1StringDedupEntry* _next; + unsigned int _hash; + typeArrayOop _obj; + +public: + G1StringDedupEntry() : + _next(NULL), + _hash(0), + _obj(NULL) { + } + + G1StringDedupEntry* next() { + return _next; + } + + G1StringDedupEntry** next_addr() { + return &_next; + } + + void set_next(G1StringDedupEntry* next) { + _next = next; + } + + unsigned int hash() { + return _hash; + } + + void set_hash(unsigned int hash) { + _hash = hash; + } + + typeArrayOop obj() { + return _obj; + } + + typeArrayOop* obj_addr() { + return &_obj; + } + + void set_obj(typeArrayOop obj) { + _obj = obj; + } +}; + +// +// The deduplication hashtable keeps track of all unique character arrays used +// by String objects. Each table entry weakly points to an character array, allowing +// otherwise unreachable character arrays to be declared dead and pruned from the +// table. +// +// The table is dynamically resized to accommodate the current number of table entries. +// The table has hash buckets with chains for hash collision. If the average chain +// length goes above or below given thresholds the table grows or shrinks accordingly. +// +// The table is also dynamically rehashed (using a new hash seed) if it becomes severely +// unbalanced, i.e., a hash chain is significantly longer than average. +// +// All access to the table is protected by the StringDedupTable_lock, except under +// safepoints in which case GC workers are allowed to access a table partitions they +// have claimed without first acquiring the lock. Note however, that this applies only +// the table partition (i.e. a range of elements in _buckets), not other parts of the +// table such as the _entries field, statistics counters, etc. +// +class G1StringDedupTable : public CHeapObj { +private: + // The currently active hashtable instance. Only modified when + // the table is resizes or rehashed. + static G1StringDedupTable* _table; + + // Cache for reuse and fast alloc/free of table entries. + static G1StringDedupEntryCache* _entry_cache; + + G1StringDedupEntry** _buckets; + size_t _size; + uintx _entries; + uintx _shrink_threshold; + uintx _grow_threshold; + bool _rehash_needed; + + // The hash seed also dictates which hash function to use. A + // zero hash seed means we will use the Java compatible hash + // function (which doesn't use a seed), and a non-zero hash + // seed means we use the murmur3 hash function. + jint _hash_seed; + + // Constants governing table resize/rehash/cache. + static const size_t _min_size; + static const size_t _max_size; + static const double _grow_load_factor; + static const double _shrink_load_factor; + static const uintx _rehash_multiple; + static const uintx _rehash_threshold; + static const double _max_cache_factor; + + // Table statistics, only used for logging. + static uintx _entries_added; + static uintx _entries_removed; + static uintx _resize_count; + static uintx _rehash_count; + + G1StringDedupTable(size_t size, jint hash_seed = 0); + ~G1StringDedupTable(); + + // Returns the hash bucket at the given index. + G1StringDedupEntry** bucket(size_t index) { + return _buckets + index; + } + + // Returns the hash bucket index for the given hash code. + size_t hash_to_index(unsigned int hash) { + return (size_t)hash & (_size - 1); + } + + // Adds a new table entry to the given hash bucket. + void add(typeArrayOop value, unsigned int hash, G1StringDedupEntry** list); + + // Removes the given table entry from the table. + void remove(G1StringDedupEntry** pentry, uint worker_id); + + // Transfers a table entry from the current table to the destination table. + void transfer(G1StringDedupEntry** pentry, G1StringDedupTable* dest); + + // Returns an existing character array in the given hash bucket, or NULL + // if no matching character array exists. + typeArrayOop lookup(typeArrayOop value, unsigned int hash, + G1StringDedupEntry** list, uintx &count); + + // Returns an existing character array in the table, or inserts a new + // table entry if no matching character array exists. + typeArrayOop lookup_or_add_inner(typeArrayOop value, unsigned int hash); + + // Thread safe lookup or add of table entry + static typeArrayOop lookup_or_add(typeArrayOop value, unsigned int hash) { + // Protect the table from concurrent access. Also note that this lock + // acts as a fence for _table, which could have been replaced by a new + // instance if the table was resized or rehashed. + MutexLockerEx ml(StringDedupTable_lock, Mutex::_no_safepoint_check_flag); + return _table->lookup_or_add_inner(value, hash); + } + + // Returns true if the hashtable is currently using a Java compatible + // hash function. + static bool use_java_hash() { + return _table->_hash_seed == 0; + } + + static bool equals(typeArrayOop value1, typeArrayOop value2); + + // Computes the hash code for the given character array, using the + // currently active hash function and hash seed. + static unsigned int hash_code(typeArrayOop value); + + static uintx unlink_or_oops_do(G1StringDedupUnlinkOrOopsDoClosure* cl, + size_t partition_begin, + size_t partition_end, + uint worker_id); + +public: + static void create(); + + // Deduplicates the given String object, or adds its backing + // character array to the deduplication hashtable. + static void deduplicate(oop java_string, G1StringDedupStat& stat); + + // If a table resize is needed, returns a newly allocated empty + // hashtable of the proper size. + static G1StringDedupTable* prepare_resize(); + + // Installs a newly resized table as the currently active table + // and deletes the previously active table. + static void finish_resize(G1StringDedupTable* resized_table); + + // If a table rehash is needed, returns a newly allocated empty + // hashtable and updates the hash seed. + static G1StringDedupTable* prepare_rehash(); + + // Transfers rehashed entries from the currently active table into + // the new table. Installs the new table as the currently active table + // and deletes the previously active table. + static void finish_rehash(G1StringDedupTable* rehashed_table); + + // If the table entry cache has grown too large, trim it down according to policy + static void trim_entry_cache(); + + static void unlink_or_oops_do(G1StringDedupUnlinkOrOopsDoClosure* cl, uint worker_id); + + static void print_statistics(outputStream* st); + static void verify(); +}; + +#endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1STRINGDEDUPTABLE_HPP diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/gc_implementation/g1/g1StringDedupThread.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/g1/g1StringDedupThread.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "gc_implementation/g1/g1Log.hpp" +#include "gc_implementation/g1/g1StringDedup.hpp" +#include "gc_implementation/g1/g1StringDedupTable.hpp" +#include "gc_implementation/g1/g1StringDedupThread.hpp" +#include "gc_implementation/g1/g1StringDedupQueue.hpp" + +G1StringDedupThread* G1StringDedupThread::_thread = NULL; + +G1StringDedupThread::G1StringDedupThread() : + ConcurrentGCThread() { + set_name("String Deduplication Thread"); + create_and_start(); +} + +G1StringDedupThread::~G1StringDedupThread() { + ShouldNotReachHere(); +} + +void G1StringDedupThread::create() { + assert(G1StringDedup::is_enabled(), "String deduplication not enabled"); + assert(_thread == NULL, "One string deduplication thread allowed"); + _thread = new G1StringDedupThread(); +} + +G1StringDedupThread* G1StringDedupThread::thread() { + assert(G1StringDedup::is_enabled(), "String deduplication not enabled"); + assert(_thread != NULL, "String deduplication thread not created"); + return _thread; +} + +void G1StringDedupThread::print_on(outputStream* st) const { + st->print("\"%s\" ", name()); + Thread::print_on(st); + st->cr(); +} + +void G1StringDedupThread::run() { + G1StringDedupStat total_stat; + + initialize_in_thread(); + wait_for_universe_init(); + + // Main loop + for (;;) { + G1StringDedupStat stat; + + stat.mark_idle(); + + // Wait for the queue to become non-empty + G1StringDedupQueue::wait(); + + // Include this thread in safepoints + stsJoin(); + + stat.mark_exec(); + + // Process the queue + for (;;) { + oop java_string = G1StringDedupQueue::pop(); + if (java_string == NULL) { + break; + } + + G1StringDedupTable::deduplicate(java_string, stat); + + // Safepoint this thread if needed + if (stsShouldYield()) { + stat.mark_block(); + stsYield(NULL); + stat.mark_unblock(); + } + } + + G1StringDedupTable::trim_entry_cache(); + + stat.mark_done(); + + // Print statistics + total_stat.add(stat); + print(gclog_or_tty, stat, total_stat); + + // Exclude this thread from safepoints + stsLeave(); + } + + ShouldNotReachHere(); +} + +void G1StringDedupThread::print(outputStream* st, const G1StringDedupStat& last_stat, const G1StringDedupStat& total_stat) { + if (G1Log::fine() || PrintStringDeduplicationStatistics) { + G1StringDedupStat::print_summary(st, last_stat, total_stat); + if (PrintStringDeduplicationStatistics) { + G1StringDedupStat::print_statistics(st, last_stat, false); + G1StringDedupStat::print_statistics(st, total_stat, true); + G1StringDedupTable::print_statistics(st); + G1StringDedupQueue::print_statistics(st); + } + } +} diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/gc_implementation/g1/g1StringDedupThread.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/g1/g1StringDedupThread.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_VM_GC_IMPLEMENTATION_G1_G1STRINGDEDUPTHREAD_HPP +#define SHARE_VM_GC_IMPLEMENTATION_G1_G1STRINGDEDUPTHREAD_HPP + +#include "gc_implementation/g1/g1StringDedupStat.hpp" +#include "gc_implementation/shared/concurrentGCThread.hpp" + +// +// The deduplication thread is where the actual deduplication occurs. It waits for +// deduplication candidates to appear on the deduplication queue, removes them from +// the queue and tries to deduplicate them. It uses the deduplication hashtable to +// find identical, already existing, character arrays on the heap. The thread runs +// concurrently with the Java application but participates in safepoints to allow +// the GC to adjust and unlink oops from the deduplication queue and table. +// +class G1StringDedupThread: public ConcurrentGCThread { +private: + static G1StringDedupThread* _thread; + + G1StringDedupThread(); + ~G1StringDedupThread(); + + void print(outputStream* st, const G1StringDedupStat& last_stat, const G1StringDedupStat& total_stat); + +public: + static void create(); + static G1StringDedupThread* thread(); + + virtual void run(); + virtual void print_on(outputStream* st) const; +}; + +#endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1STRINGDEDUPTHREAD_HPP diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/gc_implementation/g1/g1_globals.hpp --- a/src/share/vm/gc_implementation/g1/g1_globals.hpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/gc_implementation/g1/g1_globals.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -285,6 +285,10 @@ product(uintx, G1MixedGCCountTarget, 8, \ "The target number of mixed GCs after a marking cycle.") \ \ + experimental(uintx, G1CodeRootsChunkCacheKeepPercent, 10, \ + "The amount of code root chunks that should be kept at most " \ + "as percentage of already allocated.") \ + \ experimental(uintx, G1OldCSetRegionThresholdPercent, 10, \ "An upper bound for the number of old CSet regions expressed " \ "as a percentage of the heap size.") \ diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/gc_implementation/g1/g1_specialized_oop_closures.hpp --- a/src/share/vm/gc_implementation/g1/g1_specialized_oop_closures.hpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/gc_implementation/g1/g1_specialized_oop_closures.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -43,8 +43,6 @@ class G1ParScanClosure; class G1ParPushHeapRSClosure; -typedef G1ParCopyClosure G1ParScanHeapEvacClosure; - class FilterIntoCSClosure; class FilterOutOfRegionClosure; class G1CMOopClosure; @@ -61,7 +59,6 @@ #endif #define FURTHER_SPECIALIZED_OOP_OOP_ITERATE_CLOSURES(f) \ - f(G1ParScanHeapEvacClosure,_nv) \ f(G1ParScanClosure,_nv) \ f(G1ParPushHeapRSClosure,_nv) \ f(FilterIntoCSClosure,_nv) \ diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/gc_implementation/g1/heapRegion.cpp --- a/src/share/vm/gc_implementation/g1/heapRegion.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/gc_implementation/g1/heapRegion.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -205,7 +205,7 @@ init_top_at_mark_start(); } -void HeapRegion::hr_clear(bool par, bool clear_space) { +void HeapRegion::hr_clear(bool par, bool clear_space, bool locked) { assert(_humongous_type == NotHumongous, "we should have already filtered out humongous regions"); assert(_humongous_start_region == NULL, @@ -223,7 +223,11 @@ if (!par) { // If this is parallel, this will be done later. HeapRegionRemSet* hrrs = rem_set(); - hrrs->clear(); + if (locked) { + hrrs->clear_locked(); + } else { + hrrs->clear(); + } _claimed = InitialClaimValue; } zero_marked_bytes(); @@ -352,7 +356,7 @@ _claimed(InitialClaimValue), _evacuation_failed(false), _prev_marked_bytes(0), _next_marked_bytes(0), _gc_efficiency(0.0), _young_type(NotYoung), _next_young_region(NULL), - _next_dirty_cards_region(NULL), _next(NULL), _pending_removal(false), + _next_dirty_cards_region(NULL), _next(NULL), _prev(NULL), _pending_removal(false), #ifdef ASSERT _containing_set(NULL), #endif // ASSERT @@ -710,14 +714,14 @@ } HeapRegionRemSet* hrrs = rem_set(); - int strong_code_roots_length = hrrs->strong_code_roots_list_length(); + size_t strong_code_roots_length = hrrs->strong_code_roots_list_length(); // if this region is empty then there should be no entries // on its strong code root list if (is_empty()) { if (strong_code_roots_length > 0) { gclog_or_tty->print_cr("region ["PTR_FORMAT","PTR_FORMAT"] is empty " - "but has "INT32_FORMAT" code root entries", + "but has "SIZE_FORMAT" code root entries", bottom(), end(), strong_code_roots_length); *failures = true; } @@ -727,7 +731,7 @@ if (continuesHumongous()) { if (strong_code_roots_length > 0) { gclog_or_tty->print_cr("region "HR_FORMAT" is a continuation of a humongous " - "region but has "INT32_FORMAT" code root entries", + "region but has "SIZE_FORMAT" code root entries", HR_FORMAT_PARAMS(this), strong_code_roots_length); *failures = true; } diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/gc_implementation/g1/heapRegion.hpp --- a/src/share/vm/gc_implementation/g1/heapRegion.hpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/gc_implementation/g1/heapRegion.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -271,6 +271,7 @@ // Fields used by the HeapRegionSetBase class and subclasses. HeapRegion* _next; + HeapRegion* _prev; #ifdef ASSERT HeapRegionSetBase* _containing_set; #endif // ASSERT @@ -531,11 +532,13 @@ // Methods used by the HeapRegionSetBase class and subclasses. - // Getter and setter for the next field used to link regions into + // Getter and setter for the next and prev fields used to link regions into // linked lists. HeapRegion* next() { return _next; } + HeapRegion* prev() { return _prev; } void set_next(HeapRegion* next) { _next = next; } + void set_prev(HeapRegion* prev) { _prev = prev; } // Every region added to a set is tagged with a reference to that // set. This is used for doing consistency checking to make sure that @@ -596,7 +599,7 @@ void save_marks(); // Reset HR stuff to default values. - void hr_clear(bool par, bool clear_space); + void hr_clear(bool par, bool clear_space, bool locked = false); void par_clear(); // Get the start of the unmarked area in this region. diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp --- a/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -29,6 +29,7 @@ #include "gc_implementation/g1/heapRegionRemSet.hpp" #include "gc_implementation/g1/heapRegionSeq.inline.hpp" #include "memory/allocation.hpp" +#include "memory/padded.inline.hpp" #include "memory/space.inline.hpp" #include "oops/oop.inline.hpp" #include "utilities/bitMap.inline.hpp" @@ -259,10 +260,9 @@ size_t OtherRegionsTable::_fine_eviction_stride = 0; size_t OtherRegionsTable::_fine_eviction_sample_size = 0; -OtherRegionsTable::OtherRegionsTable(HeapRegion* hr) : +OtherRegionsTable::OtherRegionsTable(HeapRegion* hr, Mutex* m) : _g1h(G1CollectedHeap::heap()), - _m(Mutex::leaf, "An OtherRegionsTable lock", true), - _hr(hr), + _hr(hr), _m(m), _coarse_map(G1CollectedHeap::heap()->max_regions(), false /* in-resource-area */), _fine_grain_regions(NULL), @@ -358,46 +358,66 @@ "just checking"); } -int** OtherRegionsTable::_from_card_cache = NULL; -size_t OtherRegionsTable::_from_card_cache_max_regions = 0; -size_t OtherRegionsTable::_from_card_cache_mem_size = 0; +int** FromCardCache::_cache = NULL; +uint FromCardCache::_max_regions = 0; +size_t FromCardCache::_static_mem_size = 0; -void OtherRegionsTable::init_from_card_cache(size_t max_regions) { - _from_card_cache_max_regions = max_regions; +void FromCardCache::initialize(uint n_par_rs, uint max_num_regions) { + guarantee(_cache == NULL, "Should not call this multiple times"); - int n_par_rs = HeapRegionRemSet::num_par_rem_sets(); - _from_card_cache = NEW_C_HEAP_ARRAY(int*, n_par_rs, mtGC); - for (int i = 0; i < n_par_rs; i++) { - _from_card_cache[i] = NEW_C_HEAP_ARRAY(int, max_regions, mtGC); - for (size_t j = 0; j < max_regions; j++) { - _from_card_cache[i][j] = -1; // An invalid value. + _max_regions = max_num_regions; + _cache = Padded2DArray::create_unfreeable(n_par_rs, + _max_regions, + &_static_mem_size); + + for (uint i = 0; i < n_par_rs; i++) { + for (uint j = 0; j < _max_regions; j++) { + set(i, j, InvalidCard); } } - _from_card_cache_mem_size = n_par_rs * max_regions * sizeof(int); } -void OtherRegionsTable::shrink_from_card_cache(size_t new_n_regs) { - for (int i = 0; i < HeapRegionRemSet::num_par_rem_sets(); i++) { - assert(new_n_regs <= _from_card_cache_max_regions, "Must be within max."); - for (size_t j = new_n_regs; j < _from_card_cache_max_regions; j++) { - _from_card_cache[i][j] = -1; // An invalid value. +void FromCardCache::shrink(uint new_num_regions) { + for (uint i = 0; i < HeapRegionRemSet::num_par_rem_sets(); i++) { + assert(new_num_regions <= _max_regions, "Must be within max."); + for (uint j = new_num_regions; j < _max_regions; j++) { + set(i, j, InvalidCard); } } } #ifndef PRODUCT -void OtherRegionsTable::print_from_card_cache() { - for (int i = 0; i < HeapRegionRemSet::num_par_rem_sets(); i++) { - for (size_t j = 0; j < _from_card_cache_max_regions; j++) { - gclog_or_tty->print_cr("_from_card_cache[%d][%d] = %d.", - i, j, _from_card_cache[i][j]); +void FromCardCache::print(outputStream* out) { + for (uint i = 0; i < HeapRegionRemSet::num_par_rem_sets(); i++) { + for (uint j = 0; j < _max_regions; j++) { + out->print_cr("_from_card_cache["UINT32_FORMAT"]["UINT32_FORMAT"] = "INT32_FORMAT".", + i, j, at(i, j)); } } } #endif +void FromCardCache::clear(uint region_idx) { + uint num_par_remsets = HeapRegionRemSet::num_par_rem_sets(); + for (uint i = 0; i < num_par_remsets; i++) { + set(i, region_idx, InvalidCard); + } +} + +void OtherRegionsTable::init_from_card_cache(uint max_regions) { + FromCardCache::initialize(HeapRegionRemSet::num_par_rem_sets(), max_regions); +} + +void OtherRegionsTable::shrink_from_card_cache(uint new_num_regions) { + FromCardCache::shrink(new_num_regions); +} + +void OtherRegionsTable::print_from_card_cache() { + FromCardCache::print(); +} + void OtherRegionsTable::add_reference(OopOrNarrowOopStar from, int tid) { - size_t cur_hrs_ind = (size_t) hr()->hrs_index(); + uint cur_hrs_ind = hr()->hrs_index(); if (G1TraceHeapRegionRememberedSet) { gclog_or_tty->print_cr("ORT::add_reference_work(" PTR_FORMAT "->" PTR_FORMAT ").", @@ -410,19 +430,17 @@ int from_card = (int)(uintptr_t(from) >> CardTableModRefBS::card_shift); if (G1TraceHeapRegionRememberedSet) { - gclog_or_tty->print_cr("Table for [" PTR_FORMAT "...): card %d (cache = %d)", + gclog_or_tty->print_cr("Table for [" PTR_FORMAT "...): card %d (cache = "INT32_FORMAT")", hr()->bottom(), from_card, - _from_card_cache[tid][cur_hrs_ind]); + FromCardCache::at((uint)tid, cur_hrs_ind)); } - if (from_card == _from_card_cache[tid][cur_hrs_ind]) { + if (FromCardCache::contains_or_replace((uint)tid, cur_hrs_ind, from_card)) { if (G1TraceHeapRegionRememberedSet) { gclog_or_tty->print_cr(" from-card cache hit."); } assert(contains_reference(from), "We just added it!"); return; - } else { - _from_card_cache[tid][cur_hrs_ind] = from_card; } // Note that this may be a continued H region. @@ -442,7 +460,7 @@ size_t ind = from_hrs_ind & _mod_max_fine_entries_mask; PerRegionTable* prt = find_region_table(ind, from_hr); if (prt == NULL) { - MutexLockerEx x(&_m, Mutex::_no_safepoint_check_flag); + MutexLockerEx x(_m, Mutex::_no_safepoint_check_flag); // Confirm that it's really not there... prt = find_region_table(ind, from_hr); if (prt == NULL) { @@ -544,7 +562,7 @@ jint OtherRegionsTable::_n_coarsenings = 0; PerRegionTable* OtherRegionsTable::delete_region_table() { - assert(_m.owned_by_self(), "Precondition"); + assert(_m->owned_by_self(), "Precondition"); assert(_n_fine_entries == _max_fine_entries, "Precondition"); PerRegionTable* max = NULL; jint max_occ = 0; @@ -676,8 +694,6 @@ size_t OtherRegionsTable::occupied() const { - // Cast away const in this case. - MutexLockerEx x((Mutex*)&_m, Mutex::_no_safepoint_check_flag); size_t sum = occ_fine(); sum += occ_sparse(); sum += occ_coarse(); @@ -707,8 +723,6 @@ } size_t OtherRegionsTable::mem_size() const { - // Cast away const in this case. - MutexLockerEx x((Mutex*)&_m, Mutex::_no_safepoint_check_flag); size_t sum = 0; // all PRTs are of the same size so it is sufficient to query only one of them. if (_first_all_fine_prts != NULL) { @@ -724,7 +738,7 @@ } size_t OtherRegionsTable::static_mem_size() { - return _from_card_cache_mem_size; + return FromCardCache::static_mem_size(); } size_t OtherRegionsTable::fl_mem_size() { @@ -732,14 +746,10 @@ } void OtherRegionsTable::clear_fcc() { - size_t hrs_idx = hr()->hrs_index(); - for (int i = 0; i < HeapRegionRemSet::num_par_rem_sets(); i++) { - _from_card_cache[i][hrs_idx] = -1; - } + FromCardCache::clear(hr()->hrs_index()); } void OtherRegionsTable::clear() { - MutexLockerEx x(&_m, Mutex::_no_safepoint_check_flag); // if there are no entries, skip this step if (_first_all_fine_prts != NULL) { guarantee(_first_all_fine_prts != NULL && _last_all_fine_prts != NULL, "just checking"); @@ -759,7 +769,7 @@ } void OtherRegionsTable::clear_incoming_entry(HeapRegion* from_hr) { - MutexLockerEx x(&_m, Mutex::_no_safepoint_check_flag); + MutexLockerEx x(_m, Mutex::_no_safepoint_check_flag); size_t hrs_ind = (size_t) from_hr->hrs_index(); size_t ind = hrs_ind & _mod_max_fine_entries_mask; if (del_single_region_table(ind, from_hr)) { @@ -768,15 +778,15 @@ _coarse_map.par_at_put(hrs_ind, 0); } // Check to see if any of the fcc entries come from here. - size_t hr_ind = (size_t) hr()->hrs_index(); - for (int tid = 0; tid < HeapRegionRemSet::num_par_rem_sets(); tid++) { - int fcc_ent = _from_card_cache[tid][hr_ind]; - if (fcc_ent != -1) { + uint hr_ind = hr()->hrs_index(); + for (uint tid = 0; tid < HeapRegionRemSet::num_par_rem_sets(); tid++) { + int fcc_ent = FromCardCache::at(tid, hr_ind); + if (fcc_ent != FromCardCache::InvalidCard) { HeapWord* card_addr = (HeapWord*) (uintptr_t(fcc_ent) << CardTableModRefBS::card_shift); if (hr()->is_in_reserved(card_addr)) { // Clear the from card cache. - _from_card_cache[tid][hr_ind] = -1; + FromCardCache::set(tid, hr_ind, FromCardCache::InvalidCard); } } } @@ -805,7 +815,7 @@ bool OtherRegionsTable::contains_reference(OopOrNarrowOopStar from) const { // Cast away const in this case. - MutexLockerEx x((Mutex*)&_m, Mutex::_no_safepoint_check_flag); + MutexLockerEx x((Mutex*)_m, Mutex::_no_safepoint_check_flag); return contains_reference_locked(from); } @@ -832,8 +842,6 @@ "Must be in range."); return _sparse_table.contains_card(hr_ind, card_index); } - - } void @@ -844,13 +852,15 @@ // Determines how many threads can add records to an rset in parallel. // This can be done by either mutator threads together with the // concurrent refinement threads or GC threads. -int HeapRegionRemSet::num_par_rem_sets() { - return (int)MAX2(DirtyCardQueueSet::num_par_ids() + ConcurrentG1Refine::thread_num(), ParallelGCThreads); +uint HeapRegionRemSet::num_par_rem_sets() { + return (uint)MAX2(DirtyCardQueueSet::num_par_ids() + ConcurrentG1Refine::thread_num(), ParallelGCThreads); } HeapRegionRemSet::HeapRegionRemSet(G1BlockOffsetSharedArray* bosa, HeapRegion* hr) - : _bosa(bosa), _strong_code_roots_list(NULL), _other_regions(hr) { + : _bosa(bosa), + _m(Mutex::leaf, FormatBuffer<128>("HeapRegionRemSet lock #"UINT32_FORMAT, hr->hrs_index()), true), + _code_roots(), _other_regions(hr, &_m) { reset_for_par_iteration(); } @@ -883,7 +893,7 @@ } #ifndef PRODUCT -void HeapRegionRemSet::print() const { +void HeapRegionRemSet::print() { HeapRegionRemSetIterator iter(this); size_t card_index; while (iter.has_next(card_index)) { @@ -909,14 +919,14 @@ } void HeapRegionRemSet::clear() { - if (_strong_code_roots_list != NULL) { - delete _strong_code_roots_list; - } - _strong_code_roots_list = new (ResourceObj::C_HEAP, mtGC) - GrowableArray(10, 0, NULL, true); + MutexLockerEx x(&_m, Mutex::_no_safepoint_check_flag); + clear_locked(); +} +void HeapRegionRemSet::clear_locked() { + _code_roots.clear(); _other_regions.clear(); - assert(occupied() == 0, "Should be clear."); + assert(occupied_locked() == 0, "Should be clear."); reset_for_par_iteration(); } @@ -932,27 +942,18 @@ _other_regions.scrub(ctbs, region_bm, card_bm); } - // Code roots support void HeapRegionRemSet::add_strong_code_root(nmethod* nm) { assert(nm != NULL, "sanity"); - // Search for the code blob from the RHS to avoid - // duplicate entries as much as possible - if (_strong_code_roots_list->find_from_end(nm) < 0) { - // Code blob isn't already in the list - _strong_code_roots_list->push(nm); - } + _code_roots.add(nm); } void HeapRegionRemSet::remove_strong_code_root(nmethod* nm) { assert(nm != NULL, "sanity"); - int idx = _strong_code_roots_list->find(nm); - if (idx >= 0) { - _strong_code_roots_list->remove_at(idx); - } + _code_roots.remove(nm); // Check that there were no duplicates - guarantee(_strong_code_roots_list->find(nm) < 0, "duplicate entry found"); + guarantee(!_code_roots.contains(nm), "duplicate entry found"); } class NMethodMigrationOopClosure : public OopClosure { @@ -1014,8 +1015,8 @@ GrowableArray to_be_retained(10); G1CollectedHeap* g1h = G1CollectedHeap::heap(); - while (_strong_code_roots_list->is_nonempty()) { - nmethod *nm = _strong_code_roots_list->pop(); + while (!_code_roots.is_empty()) { + nmethod *nm = _code_roots.pop(); if (nm != NULL) { NMethodMigrationOopClosure oop_cl(g1h, hr(), nm); nm->oops_do(&oop_cl); @@ -1038,20 +1039,16 @@ } void HeapRegionRemSet::strong_code_roots_do(CodeBlobClosure* blk) const { - for (int i = 0; i < _strong_code_roots_list->length(); i += 1) { - nmethod* nm = _strong_code_roots_list->at(i); - blk->do_code_blob(nm); - } + _code_roots.nmethods_do(blk); } size_t HeapRegionRemSet::strong_code_roots_mem_size() { - return sizeof(GrowableArray) + - _strong_code_roots_list->max_length() * sizeof(nmethod*); + return _code_roots.mem_size(); } //-------------------- Iteration -------------------- -HeapRegionRemSetIterator:: HeapRegionRemSetIterator(const HeapRegionRemSet* hrrs) : +HeapRegionRemSetIterator:: HeapRegionRemSetIterator(HeapRegionRemSet* hrrs) : _hrrs(hrrs), _g1h(G1CollectedHeap::heap()), _coarse_map(&hrrs->_other_regions._coarse_map), diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp --- a/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -25,6 +25,7 @@ #ifndef SHARE_VM_GC_IMPLEMENTATION_G1_HEAPREGIONREMSET_HPP #define SHARE_VM_GC_IMPLEMENTATION_G1_HEAPREGIONREMSET_HPP +#include "gc_implementation/g1/g1CodeCacheRemSet.hpp" #include "gc_implementation/g1/sparsePRT.hpp" // Remembered set for a heap region. Represent a set of "cards" that @@ -44,6 +45,54 @@ class HRRSCleanupTask : public SparsePRTCleanupTask { }; +// The FromCardCache remembers the most recently processed card on the heap on +// a per-region and per-thread basis. +class FromCardCache : public AllStatic { + private: + // Array of card indices. Indexed by thread X and heap region to minimize + // thread contention. + static int** _cache; + static uint _max_regions; + static size_t _static_mem_size; + + public: + enum { + InvalidCard = -1 // Card value of an invalid card, i.e. a card index not otherwise used. + }; + + static void clear(uint region_idx); + + // Returns true if the given card is in the cache at the given location, or + // replaces the card at that location and returns false. + static bool contains_or_replace(uint worker_id, uint region_idx, int card) { + int card_in_cache = at(worker_id, region_idx); + if (card_in_cache == card) { + return true; + } else { + set(worker_id, region_idx, card); + return false; + } + } + + static int at(uint worker_id, uint region_idx) { + return _cache[worker_id][region_idx]; + } + + static void set(uint worker_id, uint region_idx, int val) { + _cache[worker_id][region_idx] = val; + } + + static void initialize(uint n_par_rs, uint max_num_regions); + + static void shrink(uint new_num_regions); + + static void print(outputStream* out = gclog_or_tty) PRODUCT_RETURN; + + static size_t static_mem_size() { + return _static_mem_size; + } +}; + // The "_coarse_map" is a bitmap with one bit for each region, where set // bits indicate that the corresponding region may contain some pointer // into the owning region. @@ -72,7 +121,7 @@ friend class HeapRegionRemSetIterator; G1CollectedHeap* _g1h; - Mutex _m; + Mutex* _m; HeapRegion* _hr; // These are protected by "_m". @@ -118,18 +167,13 @@ // false. bool del_single_region_table(size_t ind, HeapRegion* hr); - // Indexed by thread X heap region, to minimize thread contention. - static int** _from_card_cache; - static size_t _from_card_cache_max_regions; - static size_t _from_card_cache_mem_size; - // link/add the given fine grain remembered set into the "all" list void link_to_all(PerRegionTable * prt); // unlink/remove the given fine grain remembered set into the "all" list void unlink_from_all(PerRegionTable * prt); public: - OtherRegionsTable(HeapRegion* hr); + OtherRegionsTable(HeapRegion* hr, Mutex* m); HeapRegion* hr() const { return _hr; } @@ -141,7 +185,6 @@ // objects. void scrub(CardTableModRefBS* ctbs, BitMap* region_bm, BitMap* card_bm); - // Not const because it takes a lock. size_t occupied() const; size_t occ_fine() const; size_t occ_coarse() const; @@ -170,11 +213,11 @@ // Declare the heap size (in # of regions) to the OtherRegionsTable. // (Uses it to initialize from_card_cache). - static void init_from_card_cache(size_t max_regions); + static void init_from_card_cache(uint max_regions); // Declares that only regions i s.t. 0 <= i < new_n_regs are in use. // Make sure any entries for higher regions are invalid. - static void shrink_from_card_cache(size_t new_n_regs); + static void shrink_from_card_cache(uint new_num_regions); static void print_from_card_cache(); }; @@ -192,9 +235,11 @@ G1BlockOffsetSharedArray* _bosa; G1BlockOffsetSharedArray* bosa() const { return _bosa; } - // A list of code blobs (nmethods) whose code contains pointers into + // A set of code blobs (nmethods) whose code contains pointers into // the region that owns this RSet. - GrowableArray* _strong_code_roots_list; + G1CodeRootSet _code_roots; + + Mutex _m; OtherRegionsTable _other_regions; @@ -218,17 +263,20 @@ static void print_event(outputStream* str, Event evnt); public: - HeapRegionRemSet(G1BlockOffsetSharedArray* bosa, - HeapRegion* hr); + HeapRegionRemSet(G1BlockOffsetSharedArray* bosa, HeapRegion* hr); - static int num_par_rem_sets(); + static uint num_par_rem_sets(); static void setup_remset_size(); HeapRegion* hr() const { return _other_regions.hr(); } - size_t occupied() const { + size_t occupied() { + MutexLockerEx x(&_m, Mutex::_no_safepoint_check_flag); + return occupied_locked(); + } + size_t occupied_locked() { return _other_regions.occupied(); } size_t occ_fine() const { @@ -260,6 +308,7 @@ // The region is being reclaimed; clear its remset, and any mention of // entries for this region in other remsets. void clear(); + void clear_locked(); // Attempt to claim the region. Returns true iff this call caused an // atomic transition from Unclaimed to Claimed. @@ -289,6 +338,7 @@ // The actual # of bytes this hr_remset takes up. // Note also includes the strong code root set. size_t mem_size() { + MutexLockerEx x(&_m, Mutex::_no_safepoint_check_flag); return _other_regions.mem_size() // This correction is necessary because the above includes the second // part. @@ -299,13 +349,13 @@ // Returns the memory occupancy of all static data structures associated // with remembered sets. static size_t static_mem_size() { - return OtherRegionsTable::static_mem_size(); + return OtherRegionsTable::static_mem_size() + G1CodeRootSet::static_mem_size(); } // Returns the memory occupancy of all free_list data structures associated // with remembered sets. static size_t fl_mem_size() { - return OtherRegionsTable::fl_mem_size(); + return OtherRegionsTable::fl_mem_size() + G1CodeRootSet::fl_mem_size(); } bool contains_reference(OopOrNarrowOopStar from) const { @@ -328,21 +378,21 @@ void strong_code_roots_do(CodeBlobClosure* blk) const; // Returns the number of elements in the strong code roots list - int strong_code_roots_list_length() { - return _strong_code_roots_list->length(); + size_t strong_code_roots_list_length() { + return _code_roots.length(); } // Returns true if the strong code roots contains the given // nmethod. bool strong_code_roots_list_contains(nmethod* nm) { - return _strong_code_roots_list->contains(nm); + return _code_roots.contains(nm); } // Returns the amount of memory, in bytes, currently // consumed by the strong code roots. size_t strong_code_roots_mem_size(); - void print() const; + void print() PRODUCT_RETURN; // Called during a stop-world phase to perform any deferred cleanups. static void cleanup(); @@ -350,12 +400,13 @@ // Declare the heap size (in # of regions) to the HeapRegionRemSet(s). // (Uses it to initialize from_card_cache). static void init_heap(uint max_regions) { - OtherRegionsTable::init_from_card_cache((size_t) max_regions); + G1CodeRootSet::initialize(); + OtherRegionsTable::init_from_card_cache(max_regions); } // Declares that only regions i s.t. 0 <= i < new_n_regs are in use. static void shrink_heap(uint new_n_regs) { - OtherRegionsTable::shrink_from_card_cache((size_t) new_n_regs); + OtherRegionsTable::shrink_from_card_cache(new_n_regs); } #ifndef PRODUCT @@ -384,7 +435,7 @@ class HeapRegionRemSetIterator : public StackObj { // The region RSet over which we're iterating. - const HeapRegionRemSet* _hrrs; + HeapRegionRemSet* _hrrs; // Local caching of HRRS fields. const BitMap* _coarse_map; @@ -441,7 +492,7 @@ public: // We require an iterator to be initialized before use, so the // constructor does little. - HeapRegionRemSetIterator(const HeapRegionRemSet* hrrs); + HeapRegionRemSetIterator(HeapRegionRemSet* hrrs); // If there remains one or more cards to be yielded, returns true and // sets "card_index" to one of those cards (which is then considered diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/gc_implementation/g1/heapRegionSeq.cpp --- a/src/share/vm/gc_implementation/g1/heapRegionSeq.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/gc_implementation/g1/heapRegionSeq.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -25,7 +25,7 @@ #include "precompiled.hpp" #include "gc_implementation/g1/heapRegion.hpp" #include "gc_implementation/g1/heapRegionSeq.inline.hpp" -#include "gc_implementation/g1/heapRegionSets.hpp" +#include "gc_implementation/g1/heapRegionSet.hpp" #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" #include "memory/allocation.hpp" diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/gc_implementation/g1/heapRegionSet.cpp --- a/src/share/vm/gc_implementation/g1/heapRegionSet.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/gc_implementation/g1/heapRegionSet.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -23,171 +23,60 @@ */ #include "precompiled.hpp" +#include "gc_implementation/g1/heapRegionRemSet.hpp" #include "gc_implementation/g1/heapRegionSet.inline.hpp" -uint HeapRegionSetBase::_unrealistically_long_length = 0; -HRSPhase HeapRegionSetBase::_phase = HRSPhaseNone; - -//////////////////// HeapRegionSetBase //////////////////// - -void HeapRegionSetBase::set_unrealistically_long_length(uint len) { - guarantee(_unrealistically_long_length == 0, "should only be set once"); - _unrealistically_long_length = len; -} +uint FreeRegionList::_unrealistically_long_length = 0; void HeapRegionSetBase::fill_in_ext_msg(hrs_ext_msg* msg, const char* message) { - msg->append("[%s] %s ln: %u rn: %u cy: "SIZE_FORMAT" ud: "SIZE_FORMAT, - name(), message, length(), region_num(), - total_capacity_bytes(), total_used_bytes()); + msg->append("[%s] %s ln: %u cy: "SIZE_FORMAT, + name(), message, length(), total_capacity_bytes()); fill_in_ext_msg_extra(msg); } -bool HeapRegionSetBase::verify_region(HeapRegion* hr, - HeapRegionSetBase* expected_containing_set) { - const char* error_message = NULL; - - if (!regions_humongous()) { - if (hr->isHumongous()) { - error_message = "the region should not be humongous"; - } - } else { - if (!hr->isHumongous() || !hr->startsHumongous()) { - error_message = "the region should be 'starts humongous'"; - } - } - - if (!regions_empty()) { - if (hr->is_empty()) { - error_message = "the region should not be empty"; - } - } else { - if (!hr->is_empty()) { - error_message = "the region should be empty"; - } - } - -#ifdef ASSERT - // The _containing_set field is only available when ASSERT is defined. - if (hr->containing_set() != expected_containing_set) { - error_message = "inconsistent containing set found"; - } -#endif // ASSERT - - const char* extra_error_message = verify_region_extra(hr); - if (extra_error_message != NULL) { - error_message = extra_error_message; - } - - if (error_message != NULL) { - outputStream* out = tty; - out->cr(); - out->print_cr("## [%s] %s", name(), error_message); - out->print_cr("## Offending Region: "PTR_FORMAT, hr); - out->print_cr(" "HR_FORMAT, HR_FORMAT_PARAMS(hr)); -#ifdef ASSERT - out->print_cr(" containing set: "PTR_FORMAT, hr->containing_set()); -#endif // ASSERT - out->print_cr("## Offending Region Set: "PTR_FORMAT, this); - print_on(out); - return false; - } else { - return true; - } +#ifndef PRODUCT +void HeapRegionSetBase::verify_region(HeapRegion* hr) { + assert(hr->containing_set() == this, err_msg("Inconsistent containing set for %u", hr->hrs_index())); + assert(!hr->is_young(), err_msg("Adding young region %u", hr->hrs_index())); // currently we don't use these sets for young regions + assert(hr->isHumongous() == regions_humongous(), err_msg("Wrong humongous state for region %u and set %s", hr->hrs_index(), name())); + assert(hr->is_empty() == regions_empty(), err_msg("Wrong empty state for region %u and set %s", hr->hrs_index(), name())); + assert(hr->rem_set()->verify_ready_for_par_iteration(), err_msg("Wrong iteration state %u", hr->hrs_index())); } +#endif void HeapRegionSetBase::verify() { // It's important that we also observe the MT safety protocol even // for the verification calls. If we do verification without the // appropriate locks and the set changes underneath our feet // verification might fail and send us on a wild goose chase. - hrs_assert_mt_safety_ok(this); - - guarantee(( is_empty() && length() == 0 && region_num() == 0 && - total_used_bytes() == 0 && total_capacity_bytes() == 0) || - (!is_empty() && length() >= 0 && region_num() >= 0 && - total_used_bytes() >= 0 && total_capacity_bytes() >= 0), - hrs_ext_msg(this, "invariant")); + check_mt_safety(); - guarantee((!regions_humongous() && region_num() == length()) || - ( regions_humongous() && region_num() >= length()), - hrs_ext_msg(this, "invariant")); - - guarantee(!regions_empty() || total_used_bytes() == 0, - hrs_ext_msg(this, "invariant")); - - guarantee(total_used_bytes() <= total_capacity_bytes(), + guarantee(( is_empty() && length() == 0 && total_capacity_bytes() == 0) || + (!is_empty() && length() >= 0 && total_capacity_bytes() >= 0), hrs_ext_msg(this, "invariant")); } void HeapRegionSetBase::verify_start() { // See comment in verify() about MT safety and verification. - hrs_assert_mt_safety_ok(this); + check_mt_safety(); assert(!_verify_in_progress, hrs_ext_msg(this, "verification should not be in progress")); // Do the basic verification first before we do the checks over the regions. HeapRegionSetBase::verify(); - _calc_length = 0; - _calc_region_num = 0; - _calc_total_capacity_bytes = 0; - _calc_total_used_bytes = 0; _verify_in_progress = true; } -void HeapRegionSetBase::verify_next_region(HeapRegion* hr) { - // See comment in verify() about MT safety and verification. - hrs_assert_mt_safety_ok(this); - assert(_verify_in_progress, - hrs_ext_msg(this, "verification should be in progress")); - - guarantee(verify_region(hr, this), hrs_ext_msg(this, "region verification")); - - _calc_length += 1; - _calc_region_num += hr->region_num(); - _calc_total_capacity_bytes += hr->capacity(); - _calc_total_used_bytes += hr->used(); -} - void HeapRegionSetBase::verify_end() { // See comment in verify() about MT safety and verification. - hrs_assert_mt_safety_ok(this); + check_mt_safety(); assert(_verify_in_progress, hrs_ext_msg(this, "verification should be in progress")); - guarantee(length() == _calc_length, - hrs_err_msg("[%s] length: %u should be == calc length: %u", - name(), length(), _calc_length)); - - guarantee(region_num() == _calc_region_num, - hrs_err_msg("[%s] region num: %u should be == calc region num: %u", - name(), region_num(), _calc_region_num)); - - guarantee(total_capacity_bytes() == _calc_total_capacity_bytes, - hrs_err_msg("[%s] capacity bytes: "SIZE_FORMAT" should be == " - "calc capacity bytes: "SIZE_FORMAT, - name(), - total_capacity_bytes(), _calc_total_capacity_bytes)); - - guarantee(total_used_bytes() == _calc_total_used_bytes, - hrs_err_msg("[%s] used bytes: "SIZE_FORMAT" should be == " - "calc used bytes: "SIZE_FORMAT, - name(), total_used_bytes(), _calc_total_used_bytes)); - _verify_in_progress = false; } -void HeapRegionSetBase::clear_phase() { - assert(_phase != HRSPhaseNone, "pre-condition"); - _phase = HRSPhaseNone; -} - -void HeapRegionSetBase::set_phase(HRSPhase phase) { - assert(_phase == HRSPhaseNone, "pre-condition"); - assert(phase != HRSPhaseNone, "pre-condition"); - _phase = phase; -} - void HeapRegionSetBase::print_on(outputStream* out, bool print_contents) { out->cr(); out->print_cr("Set: %s ("PTR_FORMAT")", name(), this); @@ -196,76 +85,38 @@ out->print_cr(" empty : %s", BOOL_TO_STR(regions_empty())); out->print_cr(" Attributes"); out->print_cr(" length : %14u", length()); - out->print_cr(" region num : %14u", region_num()); out->print_cr(" total capacity : "SIZE_FORMAT_W(14)" bytes", total_capacity_bytes()); - out->print_cr(" total used : "SIZE_FORMAT_W(14)" bytes", - total_used_bytes()); -} - -void HeapRegionSetBase::clear() { - _length = 0; - _region_num = 0; - _total_used_bytes = 0; } -HeapRegionSetBase::HeapRegionSetBase(const char* name) +HeapRegionSetBase::HeapRegionSetBase(const char* name, bool humongous, bool empty, HRSMtSafeChecker* mt_safety_checker) : _name(name), _verify_in_progress(false), - _calc_length(0), _calc_region_num(0), - _calc_total_capacity_bytes(0), _calc_total_used_bytes(0) { } - -//////////////////// HeapRegionSet //////////////////// - -void HeapRegionSet::update_from_proxy(HeapRegionSet* proxy_set) { - hrs_assert_mt_safety_ok(this); - hrs_assert_mt_safety_ok(proxy_set); - hrs_assert_sets_match(this, proxy_set); - - verify_optional(); - proxy_set->verify_optional(); - - if (proxy_set->is_empty()) return; + _is_humongous(humongous), _is_empty(empty), _mt_safety_checker(mt_safety_checker), + _count() +{ } - assert(proxy_set->length() <= _length, - hrs_err_msg("[%s] proxy set length: %u should be <= length: %u", - name(), proxy_set->length(), _length)); - _length -= proxy_set->length(); - - assert(proxy_set->region_num() <= _region_num, - hrs_err_msg("[%s] proxy set region num: %u should be <= region num: %u", - name(), proxy_set->region_num(), _region_num)); - _region_num -= proxy_set->region_num(); - - assert(proxy_set->total_used_bytes() <= _total_used_bytes, - hrs_err_msg("[%s] proxy set used bytes: "SIZE_FORMAT" " - "should be <= used bytes: "SIZE_FORMAT, - name(), proxy_set->total_used_bytes(), - _total_used_bytes)); - _total_used_bytes -= proxy_set->total_used_bytes(); - - proxy_set->clear(); - - verify_optional(); - proxy_set->verify_optional(); +void FreeRegionList::set_unrealistically_long_length(uint len) { + guarantee(_unrealistically_long_length == 0, "should only be set once"); + _unrealistically_long_length = len; } -//////////////////// HeapRegionLinkedList //////////////////// - -void HeapRegionLinkedList::fill_in_ext_msg_extra(hrs_ext_msg* msg) { +void FreeRegionList::fill_in_ext_msg_extra(hrs_ext_msg* msg) { msg->append(" hd: "PTR_FORMAT" tl: "PTR_FORMAT, head(), tail()); } -void HeapRegionLinkedList::add_as_head(HeapRegionLinkedList* from_list) { - hrs_assert_mt_safety_ok(this); - hrs_assert_mt_safety_ok(from_list); +void FreeRegionList::add_as_head_or_tail(FreeRegionList* from_list, bool as_head) { + check_mt_safety(); + from_list->check_mt_safety(); verify_optional(); from_list->verify_optional(); - if (from_list->is_empty()) return; + if (from_list->is_empty()) { + return; + } #ifdef ASSERT - HeapRegionLinkedListIterator iter(from_list); + FreeRegionListIterator iter(from_list); while (iter.more_available()) { HeapRegion* hr = iter.get_next(); // In set_containing_set() we check that we either set the value @@ -276,35 +127,75 @@ } #endif // ASSERT - if (_head != NULL) { - assert(length() > 0 && _tail != NULL, hrs_ext_msg(this, "invariant")); - from_list->_tail->set_next(_head); - } else { + if (_head == NULL) { assert(length() == 0 && _tail == NULL, hrs_ext_msg(this, "invariant")); + _head = from_list->_head; _tail = from_list->_tail; + } else { + assert(length() > 0 && _tail != NULL, hrs_ext_msg(this, "invariant")); + if (as_head) { + from_list->_tail->set_next(_head); + _head->set_prev(from_list->_tail); + _head = from_list->_head; + } else { + _tail->set_next(from_list->_head); + from_list->_head->set_prev(_tail); + _tail = from_list->_tail; + } } - _head = from_list->_head; - _length += from_list->length(); - _region_num += from_list->region_num(); - _total_used_bytes += from_list->total_used_bytes(); + _count.increment(from_list->length(), from_list->total_capacity_bytes()); from_list->clear(); verify_optional(); from_list->verify_optional(); } -void HeapRegionLinkedList::add_as_tail(HeapRegionLinkedList* from_list) { - hrs_assert_mt_safety_ok(this); - hrs_assert_mt_safety_ok(from_list); +void FreeRegionList::add_as_head(FreeRegionList* from_list) { + add_as_head_or_tail(from_list, true /* as_head */); +} + +void FreeRegionList::add_as_tail(FreeRegionList* from_list) { + add_as_head_or_tail(from_list, false /* as_head */); +} + +void FreeRegionList::remove_all() { + check_mt_safety(); + verify_optional(); + + HeapRegion* curr = _head; + while (curr != NULL) { + verify_region(curr); + + HeapRegion* next = curr->next(); + curr->set_next(NULL); + curr->set_prev(NULL); + curr->set_containing_set(NULL); + curr = next; + } + clear(); + + verify_optional(); +} + +void FreeRegionList::add_ordered(FreeRegionList* from_list) { + check_mt_safety(); + from_list->check_mt_safety(); verify_optional(); from_list->verify_optional(); - if (from_list->is_empty()) return; + if (from_list->is_empty()) { + return; + } -#ifdef ASSERT - HeapRegionLinkedListIterator iter(from_list); + if (is_empty()) { + add_as_head(from_list); + return; + } + + #ifdef ASSERT + FreeRegionListIterator iter(from_list); while (iter.more_available()) { HeapRegion* hr = iter.get_next(); // In set_containing_set() we check that we either set the value @@ -313,46 +204,50 @@ hr->set_containing_set(NULL); hr->set_containing_set(this); } -#endif // ASSERT + #endif // ASSERT + + HeapRegion* curr_to = _head; + HeapRegion* curr_from = from_list->_head; + + while (curr_from != NULL) { + while (curr_to != NULL && curr_to->hrs_index() < curr_from->hrs_index()) { + curr_to = curr_to->next(); + } + + if (curr_to == NULL) { + // The rest of the from list should be added as tail + _tail->set_next(curr_from); + curr_from->set_prev(_tail); + curr_from = NULL; + } else { + HeapRegion* next_from = curr_from->next(); - if (_tail != NULL) { - assert(length() > 0 && _head != NULL, hrs_ext_msg(this, "invariant")); - _tail->set_next(from_list->_head); - } else { - assert(length() == 0 && _head == NULL, hrs_ext_msg(this, "invariant")); - _head = from_list->_head; + curr_from->set_next(curr_to); + curr_from->set_prev(curr_to->prev()); + if (curr_to->prev() == NULL) { + _head = curr_from; + } else { + curr_to->prev()->set_next(curr_from); + } + curr_to->set_prev(curr_from); + + curr_from = next_from; + } } - _tail = from_list->_tail; - _length += from_list->length(); - _region_num += from_list->region_num(); - _total_used_bytes += from_list->total_used_bytes(); + if (_tail->hrs_index() < from_list->_tail->hrs_index()) { + _tail = from_list->_tail; + } + + _count.increment(from_list->length(), from_list->total_capacity_bytes()); from_list->clear(); verify_optional(); from_list->verify_optional(); } -void HeapRegionLinkedList::remove_all() { - hrs_assert_mt_safety_ok(this); - verify_optional(); - - HeapRegion* curr = _head; - while (curr != NULL) { - hrs_assert_region_ok(this, curr, this); - - HeapRegion* next = curr->next(); - curr->set_next(NULL); - curr->set_containing_set(NULL); - curr = next; - } - clear(); - - verify_optional(); -} - -void HeapRegionLinkedList::remove_all_pending(uint target_count) { - hrs_assert_mt_safety_ok(this); +void FreeRegionList::remove_all_pending(uint target_count) { + check_mt_safety(); assert(target_count > 1, hrs_ext_msg(this, "pre-condition")); assert(!is_empty(), hrs_ext_msg(this, "pre-condition")); @@ -360,11 +255,11 @@ DEBUG_ONLY(uint old_length = length();) HeapRegion* curr = _head; - HeapRegion* prev = NULL; uint count = 0; while (curr != NULL) { - hrs_assert_region_ok(this, curr, this); + verify_region(curr); HeapRegion* next = curr->next(); + HeapRegion* prev = curr->prev(); if (curr->pending_removal()) { assert(count < target_count, @@ -384,10 +279,15 @@ _tail = prev; } else { assert(_tail != curr, hrs_ext_msg(this, "invariant")); + next->set_prev(prev); + } + if (_last = curr) { + _last = NULL; } curr->set_next(NULL); - remove_internal(curr); + curr->set_prev(NULL); + remove(curr); curr->set_pending_removal(false); count += 1; @@ -397,8 +297,6 @@ // carry on iterating to make sure there are not more regions // tagged with pending removal. DEBUG_ONLY(if (count == target_count) break;) - } else { - prev = curr; } curr = next; } @@ -414,46 +312,27 @@ verify_optional(); } -void HeapRegionLinkedList::verify() { +void FreeRegionList::verify() { // See comment in HeapRegionSetBase::verify() about MT safety and // verification. - hrs_assert_mt_safety_ok(this); + check_mt_safety(); // This will also do the basic verification too. verify_start(); - HeapRegion* curr = _head; - HeapRegion* prev1 = NULL; - HeapRegion* prev0 = NULL; - uint count = 0; - while (curr != NULL) { - verify_next_region(curr); - - count += 1; - guarantee(count < _unrealistically_long_length, - hrs_err_msg("[%s] the calculated length: %u " - "seems very long, is there maybe a cycle? " - "curr: "PTR_FORMAT" prev0: "PTR_FORMAT" " - "prev1: "PTR_FORMAT" length: %u", - name(), count, curr, prev0, prev1, length())); - - prev1 = prev0; - prev0 = curr; - curr = curr->next(); - } - - guarantee(_tail == prev0, hrs_ext_msg(this, "post-condition")); + verify_list(); verify_end(); } -void HeapRegionLinkedList::clear() { - HeapRegionSetBase::clear(); +void FreeRegionList::clear() { + _count = HeapRegionSetCount(); _head = NULL; _tail = NULL; + _last = NULL; } -void HeapRegionLinkedList::print_on(outputStream* out, bool print_contents) { +void FreeRegionList::print_on(outputStream* out, bool print_contents) { HeapRegionSetBase::print_on(out, print_contents); out->print_cr(" Linking"); out->print_cr(" head : "PTR_FORMAT, _head); @@ -461,10 +340,124 @@ if (print_contents) { out->print_cr(" Contents"); - HeapRegionLinkedListIterator iter(this); + FreeRegionListIterator iter(this); while (iter.more_available()) { HeapRegion* hr = iter.get_next(); hr->print_on(out); } } } + +void FreeRegionList::verify_list() { + HeapRegion* curr = head(); + HeapRegion* prev1 = NULL; + HeapRegion* prev0 = NULL; + uint count = 0; + size_t capacity = 0; + uint last_index = 0; + + guarantee(_head == NULL || _head->prev() == NULL, "_head should not have a prev"); + while (curr != NULL) { + verify_region(curr); + + count++; + guarantee(count < _unrealistically_long_length, + hrs_err_msg("[%s] the calculated length: %u seems very long, is there maybe a cycle? curr: "PTR_FORMAT" prev0: "PTR_FORMAT" " "prev1: "PTR_FORMAT" length: %u", name(), count, curr, prev0, prev1, length())); + + if (curr->next() != NULL) { + guarantee(curr->next()->prev() == curr, "Next or prev pointers messed up"); + } + guarantee(curr->hrs_index() == 0 || curr->hrs_index() > last_index, "List should be sorted"); + last_index = curr->hrs_index(); + + capacity += curr->capacity(); + + prev1 = prev0; + prev0 = curr; + curr = curr->next(); + } + + guarantee(tail() == prev0, err_msg("Expected %s to end with %u but it ended with %u.", name(), tail()->hrs_index(), prev0->hrs_index())); + guarantee(_tail == NULL || _tail->next() == NULL, "_tail should not have a next"); + guarantee(length() == count, err_msg("%s count mismatch. Expected %u, actual %u.", name(), length(), count)); + guarantee(total_capacity_bytes() == capacity, err_msg("%s capacity mismatch. Expected " SIZE_FORMAT ", actual " SIZE_FORMAT, + name(), total_capacity_bytes(), capacity)); +} + +// Note on the check_mt_safety() methods below: +// +// Verification of the "master" heap region sets / lists that are +// maintained by G1CollectedHeap is always done during a STW pause and +// by the VM thread at the start / end of the pause. The standard +// verification methods all assert check_mt_safety(). This is +// important as it ensures that verification is done without +// concurrent updates taking place at the same time. It follows, that, +// for the "master" heap region sets / lists, the check_mt_safety() +// method should include the VM thread / STW case. + +void MasterFreeRegionListMtSafeChecker::check() { + // Master Free List MT safety protocol: + // (a) If we're at a safepoint, operations on the master free list + // should be invoked by either the VM thread (which will serialize + // them) or by the GC workers while holding the + // FreeList_lock. + // (b) If we're not at a safepoint, operations on the master free + // list should be invoked while holding the Heap_lock. + + if (SafepointSynchronize::is_at_safepoint()) { + guarantee(Thread::current()->is_VM_thread() || + FreeList_lock->owned_by_self(), "master free list MT safety protocol at a safepoint"); + } else { + guarantee(Heap_lock->owned_by_self(), "master free list MT safety protocol outside a safepoint"); + } +} + +void SecondaryFreeRegionListMtSafeChecker::check() { + // Secondary Free List MT safety protocol: + // Operations on the secondary free list should always be invoked + // while holding the SecondaryFreeList_lock. + + guarantee(SecondaryFreeList_lock->owned_by_self(), "secondary free list MT safety protocol"); +} + +void OldRegionSetMtSafeChecker::check() { + // Master Old Set MT safety protocol: + // (a) If we're at a safepoint, operations on the master old set + // should be invoked: + // - by the VM thread (which will serialize them), or + // - by the GC workers while holding the FreeList_lock, if we're + // at a safepoint for an evacuation pause (this lock is taken + // anyway when an GC alloc region is retired so that a new one + // is allocated from the free list), or + // - by the GC workers while holding the OldSets_lock, if we're at a + // safepoint for a cleanup pause. + // (b) If we're not at a safepoint, operations on the master old set + // should be invoked while holding the Heap_lock. + + if (SafepointSynchronize::is_at_safepoint()) { + guarantee(Thread::current()->is_VM_thread() + || FreeList_lock->owned_by_self() || OldSets_lock->owned_by_self(), + "master old set MT safety protocol at a safepoint"); + } else { + guarantee(Heap_lock->owned_by_self(), "master old set MT safety protocol outside a safepoint"); + } +} + +void HumongousRegionSetMtSafeChecker::check() { + // Humongous Set MT safety protocol: + // (a) If we're at a safepoint, operations on the master humongous + // set should be invoked by either the VM thread (which will + // serialize them) or by the GC workers while holding the + // OldSets_lock. + // (b) If we're not at a safepoint, operations on the master + // humongous set should be invoked while holding the Heap_lock. + + if (SafepointSynchronize::is_at_safepoint()) { + guarantee(Thread::current()->is_VM_thread() || + OldSets_lock->owned_by_self(), + "master humongous set MT safety protocol at a safepoint"); + } else { + guarantee(Heap_lock->owned_by_self(), + "master humongous set MT safety protocol outside a safepoint"); + } +} diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/gc_implementation/g1/heapRegionSet.hpp --- a/src/share/vm/gc_implementation/g1/heapRegionSet.hpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/gc_implementation/g1/heapRegionSet.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -38,135 +38,108 @@ #define HEAP_REGION_SET_FORCE_VERIFY defined(ASSERT) #endif // HEAP_REGION_SET_FORCE_VERIFY -//////////////////// HeapRegionSetBase //////////////////// +class hrs_ext_msg; + +class HRSMtSafeChecker : public CHeapObj { +public: + virtual void check() = 0; +}; + +class MasterFreeRegionListMtSafeChecker : public HRSMtSafeChecker { public: void check(); }; +class SecondaryFreeRegionListMtSafeChecker : public HRSMtSafeChecker { public: void check(); }; +class HumongousRegionSetMtSafeChecker : public HRSMtSafeChecker { public: void check(); }; +class OldRegionSetMtSafeChecker : public HRSMtSafeChecker { public: void check(); }; + +class HeapRegionSetCount VALUE_OBJ_CLASS_SPEC { + friend class VMStructs; + uint _length; + size_t _capacity; + +public: + HeapRegionSetCount() : _length(0), _capacity(0) { } + + const uint length() const { return _length; } + const size_t capacity() const { return _capacity; } + + void increment(uint length_to_add, size_t capacity_to_add) { + _length += length_to_add; + _capacity += capacity_to_add; + } + + void decrement(const uint length_to_remove, const size_t capacity_to_remove) { + _length -= length_to_remove; + _capacity -= capacity_to_remove; + } +}; // Base class for all the classes that represent heap region sets. It // contains the basic attributes that each set needs to maintain // (e.g., length, region num, used bytes sum) plus any shared // functionality (e.g., verification). -class hrs_ext_msg; - -typedef enum { - HRSPhaseNone, - HRSPhaseEvacuation, - HRSPhaseCleanup, - HRSPhaseFullGC -} HRSPhase; - -class HRSPhaseSetter; - class HeapRegionSetBase VALUE_OBJ_CLASS_SPEC { - friend class hrs_ext_msg; - friend class HRSPhaseSetter; friend class VMStructs; +private: + bool _is_humongous; + bool _is_empty; + HRSMtSafeChecker* _mt_safety_checker; protected: - static uint _unrealistically_long_length; - // The number of regions added to the set. If the set contains // only humongous regions, this reflects only 'starts humongous' // regions and does not include 'continues humongous' ones. - uint _length; - - // The total number of regions represented by the set. If the set - // does not contain humongous regions, this should be the same as - // _length. If the set contains only humongous regions, this will - // include the 'continues humongous' regions. - uint _region_num; - - // We don't keep track of the total capacity explicitly, we instead - // recalculate it based on _region_num and the heap region size. - - // The sum of used bytes in the all the regions in the set. - size_t _total_used_bytes; + HeapRegionSetCount _count; const char* _name; - bool _verify_in_progress; - uint _calc_length; - uint _calc_region_num; - size_t _calc_total_capacity_bytes; - size_t _calc_total_used_bytes; - - // This is here so that it can be used in the subclasses to assert - // something different depending on which phase the GC is in. This - // can be particularly helpful in the check_mt_safety() methods. - static HRSPhase _phase; - - // Only used by HRSPhaseSetter. - static void clear_phase(); - static void set_phase(HRSPhase phase); + bool _verify_in_progress; // verify_region() is used to ensure that the contents of a region - // added to / removed from a set are consistent. Different sets - // make different assumptions about the regions added to them. So - // each set can override verify_region_extra(), which is called - // from verify_region(), and do any extra verification it needs to - // perform in that. - virtual const char* verify_region_extra(HeapRegion* hr) { return NULL; } - bool verify_region(HeapRegion* hr, - HeapRegionSetBase* expected_containing_set); + // added to / removed from a set are consistent. + void verify_region(HeapRegion* hr) PRODUCT_RETURN; // Indicates whether all regions in the set should be humongous or // not. Only used during verification. - virtual bool regions_humongous() = 0; + bool regions_humongous() { return _is_humongous; } // Indicates whether all regions in the set should be empty or // not. Only used during verification. - virtual bool regions_empty() = 0; + bool regions_empty() { return _is_empty; } + + void check_mt_safety() { + if (_mt_safety_checker != NULL) { + _mt_safety_checker->check(); + } + } + + virtual void fill_in_ext_msg_extra(hrs_ext_msg* msg) { } + + HeapRegionSetBase(const char* name, bool humongous, bool empty, HRSMtSafeChecker* mt_safety_checker); + +public: + const char* name() { return _name; } - // Subclasses can optionally override this to do MT safety protocol - // checks. It is called in an assert from all methods that perform - // updates on the set (and subclasses should also call it too). - virtual bool check_mt_safety() { return true; } + uint length() { return _count.length(); } + + bool is_empty() { return _count.length() == 0; } + + size_t total_capacity_bytes() { + return _count.capacity(); + } + + // It updates the fields of the set to reflect hr being added to + // the set and tags the region appropriately. + inline void add(HeapRegion* hr); + + // It updates the fields of the set to reflect hr being removed + // from the set and tags the region appropriately. + inline void remove(HeapRegion* hr); // fill_in_ext_msg() writes the the values of the set's attributes // in the custom err_msg (hrs_ext_msg). fill_in_ext_msg_extra() // allows subclasses to append further information. - virtual void fill_in_ext_msg_extra(hrs_ext_msg* msg) { } void fill_in_ext_msg(hrs_ext_msg* msg, const char* message); - // It updates the fields of the set to reflect hr being added to - // the set. - inline void update_for_addition(HeapRegion* hr); - - // It updates the fields of the set to reflect hr being added to - // the set and tags the region appropriately. - inline void add_internal(HeapRegion* hr); - - // It updates the fields of the set to reflect hr being removed - // from the set. - inline void update_for_removal(HeapRegion* hr); - - // It updates the fields of the set to reflect hr being removed - // from the set and tags the region appropriately. - inline void remove_internal(HeapRegion* hr); - - // It clears all the fields of the sets. Note: it will not iterate - // over the set and remove regions from it. It assumes that the - // caller has already done so. It will literally just clear the fields. - virtual void clear(); - - HeapRegionSetBase(const char* name); - -public: - static void set_unrealistically_long_length(uint len); - - const char* name() { return _name; } - - uint length() { return _length; } - - bool is_empty() { return _length == 0; } - - uint region_num() { return _region_num; } - - size_t total_capacity_bytes() { - return (size_t) region_num() << HeapRegion::LogOfHRGrainBytes; - } - - size_t total_used_bytes() { return _total_used_bytes; } - virtual void verify(); void verify_start(); void verify_next_region(HeapRegion* hr); @@ -187,7 +160,6 @@ // assert/guarantee-specific message it also prints out the values of // the fields of the associated set. This can be very helpful in // diagnosing failures. - class hrs_ext_msg : public hrs_err_msg { public: hrs_ext_msg(HeapRegionSetBase* set, const char* message) : hrs_err_msg("") { @@ -195,32 +167,6 @@ } }; -class HRSPhaseSetter { -public: - HRSPhaseSetter(HRSPhase phase) { - HeapRegionSetBase::set_phase(phase); - } - ~HRSPhaseSetter() { - HeapRegionSetBase::clear_phase(); - } -}; - -// These two macros are provided for convenience, to keep the uses of -// these two asserts a bit more concise. - -#define hrs_assert_mt_safety_ok(_set_) \ - do { \ - assert((_set_)->check_mt_safety(), hrs_ext_msg((_set_), "MT safety")); \ - } while (0) - -#define hrs_assert_region_ok(_set_, _hr_, _expected_) \ - do { \ - assert((_set_)->verify_region((_hr_), (_expected_)), \ - hrs_ext_msg((_set_), "region verification")); \ - } while (0) - -//////////////////// HeapRegionSet //////////////////// - #define hrs_assert_sets_match(_set1_, _set2_) \ do { \ assert(((_set1_)->regions_humongous() == \ @@ -236,63 +182,41 @@ // the same interface (namely, the HeapRegionSetBase API). class HeapRegionSet : public HeapRegionSetBase { -protected: - virtual const char* verify_region_extra(HeapRegion* hr) { - if (hr->next() != NULL) { - return "next() should always be NULL as we do not link the regions"; - } - - return HeapRegionSetBase::verify_region_extra(hr); - } - - HeapRegionSet(const char* name) : HeapRegionSetBase(name) { - clear(); - } - public: - // It adds hr to the set. The region should not be a member of - // another set. - inline void add(HeapRegion* hr); + HeapRegionSet(const char* name, bool humongous, HRSMtSafeChecker* mt_safety_checker): + HeapRegionSetBase(name, humongous, false /* empty */, mt_safety_checker) { } - // It removes hr from the set. The region should be a member of - // this set. - inline void remove(HeapRegion* hr); - - // It removes a region from the set. Instead of updating the fields - // of the set to reflect this removal, it accumulates the updates - // in proxy_set. The idea is that proxy_set is thread-local to - // avoid multiple threads updating the fields of the set - // concurrently and having to synchronize. The method - // update_from_proxy() will update the fields of the set from the - // proxy_set. - inline void remove_with_proxy(HeapRegion* hr, HeapRegionSet* proxy_set); - - // After multiple calls to remove_with_proxy() the updates to the - // fields of the set are accumulated in proxy_set. This call - // updates the fields of the set from proxy_set. - void update_from_proxy(HeapRegionSet* proxy_set); + void bulk_remove(const HeapRegionSetCount& removed) { + _count.decrement(removed.length(), removed.capacity()); + } }; -//////////////////// HeapRegionLinkedList //////////////////// - -// A set that links all the regions added to it in a singly-linked +// A set that links all the regions added to it in a doubly-linked // list. We should try to avoid doing operations that iterate over // such lists in performance critical paths. Typically we should -// add / remove one region at a time or concatenate two lists. All -// those operations are done in constant time. +// add / remove one region at a time or concatenate two lists. There are +// two ways to treat your lists, ordered and un-ordered. All un-ordered +// operations are done in constant time. To keep a list ordered only use +// add_ordered() to add elements to the list. If a list is not ordered +// from start, there is no way to sort it later. -class HeapRegionLinkedListIterator; +class FreeRegionListIterator; -class HeapRegionLinkedList : public HeapRegionSetBase { - friend class HeapRegionLinkedListIterator; +class FreeRegionList : public HeapRegionSetBase { + friend class FreeRegionListIterator; private: HeapRegion* _head; HeapRegion* _tail; - // These are provided for use by the friend classes. - HeapRegion* head() { return _head; } - HeapRegion* tail() { return _tail; } + // _last is used to keep track of where we added an element the last + // time in ordered lists. It helps to improve performance when adding + // several ordered items in a row. + HeapRegion* _last; + + static uint _unrealistically_long_length; + + void add_as_head_or_tail(FreeRegionList* from_list, bool as_head); protected: virtual void fill_in_ext_msg_extra(hrs_ext_msg* msg); @@ -300,11 +224,24 @@ // See the comment for HeapRegionSetBase::clear() virtual void clear(); - HeapRegionLinkedList(const char* name) : HeapRegionSetBase(name) { +public: + FreeRegionList(const char* name, HRSMtSafeChecker* mt_safety_checker = NULL): + HeapRegionSetBase(name, false /* humongous */, true /* empty */, mt_safety_checker) { clear(); } -public: + void verify_list(); + + HeapRegion* head() { return _head; } + HeapRegion* tail() { return _tail; } + + static void set_unrealistically_long_length(uint len); + + // Add hr to the list. The region should not be a member of another set. + // Assumes that the list is ordered and will preserve that order. The order + // is determined by hrs_index. + inline void add_ordered(HeapRegion* hr); + // It adds hr to the list as the new head. The region should not be // a member of another set. inline void add_as_head(HeapRegion* hr); @@ -320,15 +257,29 @@ // Convenience method. inline HeapRegion* remove_head_or_null(); + // Removes and returns the last element (_tail) of the list. It assumes + // that the list isn't empty so that it can return a non-NULL value. + inline HeapRegion* remove_tail(); + + // Convenience method + inline HeapRegion* remove_tail_or_null(); + + // Removes from head or tail based on the given argument. + inline HeapRegion* remove_region(bool from_head); + + // Merge two ordered lists. The result is also ordered. The order is + // determined by hrs_index. + void add_ordered(FreeRegionList* from_list); + // It moves the regions from from_list to this list and empties // from_list. The new regions will appear in the same order as they // were in from_list and be linked in the beginning of this list. - void add_as_head(HeapRegionLinkedList* from_list); + void add_as_head(FreeRegionList* from_list); // It moves the regions from from_list to this list and empties // from_list. The new regions will appear in the same order as they // were in from_list and be linked in the end of this list. - void add_as_tail(HeapRegionLinkedList* from_list); + void add_as_tail(FreeRegionList* from_list); // It empties the list by removing all regions from it. void remove_all(); @@ -346,15 +297,13 @@ virtual void print_on(outputStream* out, bool print_contents = false); }; -//////////////////// HeapRegionLinkedListIterator //////////////////// - // Iterator class that provides a convenient way to iterate over the // regions of a HeapRegionLinkedList instance. -class HeapRegionLinkedListIterator : public StackObj { +class FreeRegionListIterator : public StackObj { private: - HeapRegionLinkedList* _list; - HeapRegion* _curr; + FreeRegionList* _list; + HeapRegion* _curr; public: bool more_available() { @@ -369,13 +318,12 @@ // do the "cycle" check. HeapRegion* hr = _curr; - assert(_list->verify_region(hr, _list), "region verification"); + _list->verify_region(hr); _curr = hr->next(); return hr; } - HeapRegionLinkedListIterator(HeapRegionLinkedList* list) - : _curr(NULL), _list(list) { + FreeRegionListIterator(FreeRegionList* list) : _curr(NULL), _list(list) { _curr = list->head(); } }; diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/gc_implementation/g1/heapRegionSet.inline.hpp --- a/src/share/vm/gc_implementation/g1/heapRegionSet.inline.hpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/gc_implementation/g1/heapRegionSet.inline.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -27,116 +27,110 @@ #include "gc_implementation/g1/heapRegionSet.hpp" -//////////////////// HeapRegionSetBase //////////////////// - -inline void HeapRegionSetBase::update_for_addition(HeapRegion* hr) { - // Assumes the caller has already verified the region. - - _length += 1; - _region_num += hr->region_num(); - _total_used_bytes += hr->used(); -} - -inline void HeapRegionSetBase::add_internal(HeapRegion* hr) { - hrs_assert_region_ok(this, hr, NULL); +inline void HeapRegionSetBase::add(HeapRegion* hr) { + check_mt_safety(); + assert(hr->containing_set() == NULL, hrs_ext_msg(this, "should not already have a containing set %u")); assert(hr->next() == NULL, hrs_ext_msg(this, "should not already be linked")); - update_for_addition(hr); + _count.increment(1u, hr->capacity()); hr->set_containing_set(this); + verify_region(hr); } -inline void HeapRegionSetBase::update_for_removal(HeapRegion* hr) { - // Assumes the caller has already verified the region. - assert(_length > 0, hrs_ext_msg(this, "pre-condition")); - _length -= 1; - - uint region_num_diff = hr->region_num(); - assert(region_num_diff <= _region_num, - hrs_err_msg("[%s] region's region num: %u " - "should be <= region num: %u", - name(), region_num_diff, _region_num)); - _region_num -= region_num_diff; - - size_t used_bytes = hr->used(); - assert(used_bytes <= _total_used_bytes, - hrs_err_msg("[%s] region's used bytes: "SIZE_FORMAT" " - "should be <= used bytes: "SIZE_FORMAT, - name(), used_bytes, _total_used_bytes)); - _total_used_bytes -= used_bytes; -} - -inline void HeapRegionSetBase::remove_internal(HeapRegion* hr) { - hrs_assert_region_ok(this, hr, this); +inline void HeapRegionSetBase::remove(HeapRegion* hr) { + check_mt_safety(); + verify_region(hr); assert(hr->next() == NULL, hrs_ext_msg(this, "should already be unlinked")); hr->set_containing_set(NULL); - update_for_removal(hr); -} - -//////////////////// HeapRegionSet //////////////////// - -inline void HeapRegionSet::add(HeapRegion* hr) { - hrs_assert_mt_safety_ok(this); - // add_internal() will verify the region. - add_internal(hr); -} - -inline void HeapRegionSet::remove(HeapRegion* hr) { - hrs_assert_mt_safety_ok(this); - // remove_internal() will verify the region. - remove_internal(hr); + assert(_count.length() > 0, hrs_ext_msg(this, "pre-condition")); + _count.decrement(1u, hr->capacity()); } -inline void HeapRegionSet::remove_with_proxy(HeapRegion* hr, - HeapRegionSet* proxy_set) { - // No need to fo the MT safety check here given that this method - // does not update the contents of the set but instead accumulates - // the changes in proxy_set which is assumed to be thread-local. - hrs_assert_sets_match(this, proxy_set); - hrs_assert_region_ok(this, hr, this); - - hr->set_containing_set(NULL); - proxy_set->update_for_addition(hr); -} - -//////////////////// HeapRegionLinkedList //////////////////// - -inline void HeapRegionLinkedList::add_as_head(HeapRegion* hr) { - hrs_assert_mt_safety_ok(this); +inline void FreeRegionList::add_ordered(HeapRegion* hr) { + check_mt_safety(); assert((length() == 0 && _head == NULL && _tail == NULL) || (length() > 0 && _head != NULL && _tail != NULL), hrs_ext_msg(this, "invariant")); - // add_internal() will verify the region. - add_internal(hr); + // add() will verify the region and check mt safety. + add(hr); + + // Now link the region + if (_head != NULL) { + HeapRegion* curr; + + if (_last != NULL && _last->hrs_index() < hr->hrs_index()) { + curr = _last; + } else { + curr = _head; + } + + // Find first entry with a Region Index larger than entry to insert. + while (curr != NULL && curr->hrs_index() < hr->hrs_index()) { + curr = curr->next(); + } + + hr->set_next(curr); + + if (curr == NULL) { + // Adding at the end + hr->set_prev(_tail); + _tail->set_next(hr); + _tail = hr; + } else if (curr->prev() == NULL) { + // Adding at the beginning + hr->set_prev(NULL); + _head = hr; + curr->set_prev(hr); + } else { + hr->set_prev(curr->prev()); + hr->prev()->set_next(hr); + curr->set_prev(hr); + } + } else { + // The list was empty + _tail = hr; + _head = hr; + } + _last = hr; +} + +inline void FreeRegionList::add_as_head(HeapRegion* hr) { + assert((length() == 0 && _head == NULL && _tail == NULL) || + (length() > 0 && _head != NULL && _tail != NULL), + hrs_ext_msg(this, "invariant")); + // add() will verify the region and check mt safety. + add(hr); // Now link the region. if (_head != NULL) { hr->set_next(_head); + _head->set_prev(hr); } else { _tail = hr; } _head = hr; } -inline void HeapRegionLinkedList::add_as_tail(HeapRegion* hr) { - hrs_assert_mt_safety_ok(this); +inline void FreeRegionList::add_as_tail(HeapRegion* hr) { + check_mt_safety(); assert((length() == 0 && _head == NULL && _tail == NULL) || (length() > 0 && _head != NULL && _tail != NULL), hrs_ext_msg(this, "invariant")); - // add_internal() will verify the region. - add_internal(hr); + // add() will verify the region and check mt safety. + add(hr); // Now link the region. if (_tail != NULL) { _tail->set_next(hr); + hr->set_prev(_tail); } else { _head = hr; } _tail = hr; } -inline HeapRegion* HeapRegionLinkedList::remove_head() { - hrs_assert_mt_safety_ok(this); +inline HeapRegion* FreeRegionList::remove_head() { assert(!is_empty(), hrs_ext_msg(this, "the list should not be empty")); assert(length() > 0 && _head != NULL && _tail != NULL, hrs_ext_msg(this, "invariant")); @@ -146,17 +140,22 @@ _head = hr->next(); if (_head == NULL) { _tail = NULL; + } else { + _head->set_prev(NULL); } hr->set_next(NULL); - // remove_internal() will verify the region. - remove_internal(hr); + if (_last == hr) { + _last = NULL; + } + + // remove() will verify the region and check mt safety. + remove(hr); return hr; } -inline HeapRegion* HeapRegionLinkedList::remove_head_or_null() { - hrs_assert_mt_safety_ok(this); - +inline HeapRegion* FreeRegionList::remove_head_or_null() { + check_mt_safety(); if (!is_empty()) { return remove_head(); } else { @@ -164,4 +163,47 @@ } } +inline HeapRegion* FreeRegionList::remove_tail() { + assert(!is_empty(), hrs_ext_msg(this, "The list should not be empty")); + assert(length() > 0 && _head != NULL && _tail != NULL, + hrs_ext_msg(this, "invariant")); + + // We need to unlink it first + HeapRegion* hr = _tail; + + _tail = hr->prev(); + if (_tail == NULL) { + _head = NULL; + } else { + _tail->set_next(NULL); + } + hr->set_prev(NULL); + + if (_last == hr) { + _last = NULL; + } + + // remove() will verify the region and check mt safety. + remove(hr); + return hr; +} + +inline HeapRegion* FreeRegionList::remove_tail_or_null() { + check_mt_safety(); + + if (!is_empty()) { + return remove_tail(); + } else { + return NULL; + } +} + +inline HeapRegion* FreeRegionList::remove_region(bool from_head) { + if (from_head) { + return remove_head_or_null(); + } else { + return remove_tail_or_null(); + } +} + #endif // SHARE_VM_GC_IMPLEMENTATION_G1_HEAPREGIONSET_INLINE_HPP diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/gc_implementation/g1/heapRegionSets.cpp --- a/src/share/vm/gc_implementation/g1/heapRegionSets.cpp Tue Mar 25 12:54:21 2014 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,175 +0,0 @@ -/* - * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - * - */ - -#include "precompiled.hpp" -#include "gc_implementation/g1/heapRegionRemSet.hpp" -#include "gc_implementation/g1/heapRegionSets.hpp" - -// Note on the check_mt_safety() methods below: -// -// Verification of the "master" heap region sets / lists that are -// maintained by G1CollectedHeap is always done during a STW pause and -// by the VM thread at the start / end of the pause. The standard -// verification methods all assert check_mt_safety(). This is -// important as it ensures that verification is done without -// concurrent updates taking place at the same time. It follows, that, -// for the "master" heap region sets / lists, the check_mt_safety() -// method should include the VM thread / STW case. - -//////////////////// FreeRegionList //////////////////// - -const char* FreeRegionList::verify_region_extra(HeapRegion* hr) { - if (hr->is_young()) { - return "the region should not be young"; - } - // The superclass will check that the region is empty and - // not humongous. - return HeapRegionLinkedList::verify_region_extra(hr); -} - -//////////////////// MasterFreeRegionList //////////////////// - -const char* MasterFreeRegionList::verify_region_extra(HeapRegion* hr) { - // We should reset the RSet for parallel iteration before we add it - // to the master free list so that it is ready when the region is - // re-allocated. - if (!hr->rem_set()->verify_ready_for_par_iteration()) { - return "the region's RSet should be ready for parallel iteration"; - } - return FreeRegionList::verify_region_extra(hr); -} - -bool MasterFreeRegionList::check_mt_safety() { - // Master Free List MT safety protocol: - // (a) If we're at a safepoint, operations on the master free list - // should be invoked by either the VM thread (which will serialize - // them) or by the GC workers while holding the - // FreeList_lock. - // (b) If we're not at a safepoint, operations on the master free - // list should be invoked while holding the Heap_lock. - - if (SafepointSynchronize::is_at_safepoint()) { - guarantee(Thread::current()->is_VM_thread() || - FreeList_lock->owned_by_self(), - hrs_ext_msg(this, "master free list MT safety protocol " - "at a safepoint")); - } else { - guarantee(Heap_lock->owned_by_self(), - hrs_ext_msg(this, "master free list MT safety protocol " - "outside a safepoint")); - } - - return FreeRegionList::check_mt_safety(); -} - -//////////////////// SecondaryFreeRegionList //////////////////// - -bool SecondaryFreeRegionList::check_mt_safety() { - // Secondary Free List MT safety protocol: - // Operations on the secondary free list should always be invoked - // while holding the SecondaryFreeList_lock. - - guarantee(SecondaryFreeList_lock->owned_by_self(), - hrs_ext_msg(this, "secondary free list MT safety protocol")); - - return FreeRegionList::check_mt_safety(); -} - -//////////////////// OldRegionSet //////////////////// - -const char* OldRegionSet::verify_region_extra(HeapRegion* hr) { - if (hr->is_young()) { - return "the region should not be young"; - } - // The superclass will check that the region is not empty and not - // humongous. - return HeapRegionSet::verify_region_extra(hr); -} - -//////////////////// MasterOldRegionSet //////////////////// - -bool MasterOldRegionSet::check_mt_safety() { - // Master Old Set MT safety protocol: - // (a) If we're at a safepoint, operations on the master old set - // should be invoked: - // - by the VM thread (which will serialize them), or - // - by the GC workers while holding the FreeList_lock, if we're - // at a safepoint for an evacuation pause (this lock is taken - // anyway when an GC alloc region is retired so that a new one - // is allocated from the free list), or - // - by the GC workers while holding the OldSets_lock, if we're at a - // safepoint for a cleanup pause. - // (b) If we're not at a safepoint, operations on the master old set - // should be invoked while holding the Heap_lock. - - if (SafepointSynchronize::is_at_safepoint()) { - guarantee(Thread::current()->is_VM_thread() || - _phase == HRSPhaseEvacuation && FreeList_lock->owned_by_self() || - _phase == HRSPhaseCleanup && OldSets_lock->owned_by_self(), - hrs_ext_msg(this, "master old set MT safety protocol " - "at a safepoint")); - } else { - guarantee(Heap_lock->owned_by_self(), - hrs_ext_msg(this, "master old set MT safety protocol " - "outside a safepoint")); - } - - return OldRegionSet::check_mt_safety(); -} - -//////////////////// HumongousRegionSet //////////////////// - -const char* HumongousRegionSet::verify_region_extra(HeapRegion* hr) { - if (hr->is_young()) { - return "the region should not be young"; - } - // The superclass will check that the region is not empty and - // humongous. - return HeapRegionSet::verify_region_extra(hr); -} - -//////////////////// MasterHumongousRegionSet //////////////////// - -bool MasterHumongousRegionSet::check_mt_safety() { - // Master Humongous Set MT safety protocol: - // (a) If we're at a safepoint, operations on the master humongous - // set should be invoked by either the VM thread (which will - // serialize them) or by the GC workers while holding the - // OldSets_lock. - // (b) If we're not at a safepoint, operations on the master - // humongous set should be invoked while holding the Heap_lock. - - if (SafepointSynchronize::is_at_safepoint()) { - guarantee(Thread::current()->is_VM_thread() || - OldSets_lock->owned_by_self(), - hrs_ext_msg(this, "master humongous set MT safety protocol " - "at a safepoint")); - } else { - guarantee(Heap_lock->owned_by_self(), - hrs_ext_msg(this, "master humongous set MT safety protocol " - "outside a safepoint")); - } - - return HumongousRegionSet::check_mt_safety(); -} diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/gc_implementation/g1/heapRegionSets.hpp --- a/src/share/vm/gc_implementation/g1/heapRegionSets.hpp Tue Mar 25 12:54:21 2014 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,111 +0,0 @@ -/* - * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - * - */ - -#ifndef SHARE_VM_GC_IMPLEMENTATION_G1_HEAPREGIONSETS_HPP -#define SHARE_VM_GC_IMPLEMENTATION_G1_HEAPREGIONSETS_HPP - -#include "gc_implementation/g1/heapRegionSet.inline.hpp" - -//////////////////// FreeRegionList //////////////////// - -class FreeRegionList : public HeapRegionLinkedList { -protected: - virtual const char* verify_region_extra(HeapRegion* hr); - - virtual bool regions_humongous() { return false; } - virtual bool regions_empty() { return true; } - -public: - FreeRegionList(const char* name) : HeapRegionLinkedList(name) { } -}; - -//////////////////// MasterFreeRegionList //////////////////// - -class MasterFreeRegionList : public FreeRegionList { -protected: - virtual const char* verify_region_extra(HeapRegion* hr); - virtual bool check_mt_safety(); - -public: - MasterFreeRegionList(const char* name) : FreeRegionList(name) { } -}; - -//////////////////// SecondaryFreeRegionList //////////////////// - -class SecondaryFreeRegionList : public FreeRegionList { -protected: - virtual bool check_mt_safety(); - -public: - SecondaryFreeRegionList(const char* name) : FreeRegionList(name) { } -}; - -//////////////////// OldRegionSet //////////////////// - -class OldRegionSet : public HeapRegionSet { -protected: - virtual const char* verify_region_extra(HeapRegion* hr); - - virtual bool regions_humongous() { return false; } - virtual bool regions_empty() { return false; } - -public: - OldRegionSet(const char* name) : HeapRegionSet(name) { } -}; - -//////////////////// MasterOldRegionSet //////////////////// - -class MasterOldRegionSet : public OldRegionSet { -private: -protected: - virtual bool check_mt_safety(); - -public: - MasterOldRegionSet(const char* name) : OldRegionSet(name) { } -}; - -//////////////////// HumongousRegionSet //////////////////// - -class HumongousRegionSet : public HeapRegionSet { -protected: - virtual const char* verify_region_extra(HeapRegion* hr); - - virtual bool regions_humongous() { return true; } - virtual bool regions_empty() { return false; } - -public: - HumongousRegionSet(const char* name) : HeapRegionSet(name) { } -}; - -//////////////////// MasterHumongousRegionSet //////////////////// - -class MasterHumongousRegionSet : public HumongousRegionSet { -protected: - virtual bool check_mt_safety(); - -public: - MasterHumongousRegionSet(const char* name) : HumongousRegionSet(name) { } -}; - -#endif // SHARE_VM_GC_IMPLEMENTATION_G1_HEAPREGIONSETS_HPP diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/gc_implementation/g1/satbQueue.cpp --- a/src/share/vm/gc_implementation/g1/satbQueue.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/gc_implementation/g1/satbQueue.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -219,58 +219,52 @@ } #ifdef ASSERT -void SATBMarkQueueSet::dump_active_values(JavaThread* first, - bool expected_active) { - gclog_or_tty->print_cr("SATB queue active values for Java Threads"); - gclog_or_tty->print_cr(" SATB queue set: active is %s", - (is_active()) ? "TRUE" : "FALSE"); - gclog_or_tty->print_cr(" expected_active is %s", - (expected_active) ? "TRUE" : "FALSE"); - for (JavaThread* t = first; t; t = t->next()) { - bool active = t->satb_mark_queue().is_active(); - gclog_or_tty->print_cr(" thread %s, active is %s", - t->name(), (active) ? "TRUE" : "FALSE"); +void SATBMarkQueueSet::dump_active_states(bool expected_active) { + gclog_or_tty->print_cr("Expected SATB active state: %s", + expected_active ? "ACTIVE" : "INACTIVE"); + gclog_or_tty->print_cr("Actual SATB active states:"); + gclog_or_tty->print_cr(" Queue set: %s", is_active() ? "ACTIVE" : "INACTIVE"); + for (JavaThread* t = Threads::first(); t; t = t->next()) { + gclog_or_tty->print_cr(" Thread \"%s\" queue: %s", t->name(), + t->satb_mark_queue().is_active() ? "ACTIVE" : "INACTIVE"); + } + gclog_or_tty->print_cr(" Shared queue: %s", + shared_satb_queue()->is_active() ? "ACTIVE" : "INACTIVE"); +} + +void SATBMarkQueueSet::verify_active_states(bool expected_active) { + // Verify queue set state + if (is_active() != expected_active) { + dump_active_states(expected_active); + guarantee(false, "SATB queue set has an unexpected active state"); + } + + // Verify thread queue states + for (JavaThread* t = Threads::first(); t; t = t->next()) { + if (t->satb_mark_queue().is_active() != expected_active) { + dump_active_states(expected_active); + guarantee(false, "Thread SATB queue has an unexpected active state"); + } + } + + // Verify shared queue state + if (shared_satb_queue()->is_active() != expected_active) { + dump_active_states(expected_active); + guarantee(false, "Shared SATB queue has an unexpected active state"); } } #endif // ASSERT -void SATBMarkQueueSet::set_active_all_threads(bool b, - bool expected_active) { +void SATBMarkQueueSet::set_active_all_threads(bool active, bool expected_active) { assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint."); - JavaThread* first = Threads::first(); - #ifdef ASSERT - if (_all_active != expected_active) { - dump_active_values(first, expected_active); - - // I leave this here as a guarantee, instead of an assert, so - // that it will still be compiled in if we choose to uncomment - // the #ifdef ASSERT in a product build. The whole block is - // within an #ifdef ASSERT so the guarantee will not be compiled - // in a product build anyway. - guarantee(false, - "SATB queue set has an unexpected active value"); - } + verify_active_states(expected_active); #endif // ASSERT - _all_active = b; - - for (JavaThread* t = first; t; t = t->next()) { -#ifdef ASSERT - bool active = t->satb_mark_queue().is_active(); - if (active != expected_active) { - dump_active_values(first, expected_active); - - // I leave this here as a guarantee, instead of an assert, so - // that it will still be compiled in if we choose to uncomment - // the #ifdef ASSERT in a product build. The whole block is - // within an #ifdef ASSERT so the guarantee will not be compiled - // in a product build anyway. - guarantee(false, - "thread has an unexpected active value in its SATB queue"); - } -#endif // ASSERT - t->satb_mark_queue().set_active(b); + _all_active = active; + for (JavaThread* t = Threads::first(); t; t = t->next()) { + t->satb_mark_queue().set_active(active); } + shared_satb_queue()->set_active(active); } void SATBMarkQueueSet::filter_thread_buffers() { diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/gc_implementation/g1/satbQueue.hpp --- a/src/share/vm/gc_implementation/g1/satbQueue.hpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/gc_implementation/g1/satbQueue.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -87,7 +87,8 @@ bool apply_closure_to_completed_buffer_work(bool par, int worker); #ifdef ASSERT - void dump_active_values(JavaThread* first, bool expected_active); + void dump_active_states(bool expected_active); + void verify_active_states(bool expected_active); #endif // ASSERT public: @@ -99,11 +100,11 @@ static void handle_zero_index_for_thread(JavaThread* t); - // Apply "set_active(b)" to all Java threads' SATB queues. It should be + // Apply "set_active(active)" to all SATB queues in the set. It should be // called only with the world stopped. The method will assert that the // SATB queues of all threads it visits, as well as the SATB queue // set itself, has an active value same as expected_active. - void set_active_all_threads(bool b, bool expected_active); + void set_active_all_threads(bool active, bool expected_active); // Filter all the currently-active SATB buffers. void filter_thread_buffers(); diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/gc_implementation/g1/vmStructs_g1.hpp --- a/src/share/vm/gc_implementation/g1/vmStructs_g1.hpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/gc_implementation/g1/vmStructs_g1.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -57,9 +57,10 @@ nonstatic_field(G1MonitoringSupport, _old_committed, size_t) \ nonstatic_field(G1MonitoringSupport, _old_used, size_t) \ \ - nonstatic_field(HeapRegionSetBase, _length, uint) \ - nonstatic_field(HeapRegionSetBase, _region_num, uint) \ - nonstatic_field(HeapRegionSetBase, _total_used_bytes, size_t) \ + nonstatic_field(HeapRegionSetBase, _count, HeapRegionSetCount) \ + \ + nonstatic_field(HeapRegionSetCount, _length, uint) \ + nonstatic_field(HeapRegionSetCount, _capacity, size_t) \ #define VM_TYPES_G1(declare_type, declare_toplevel_type) \ @@ -71,6 +72,7 @@ declare_type(HeapRegion, ContiguousSpace) \ declare_toplevel_type(HeapRegionSeq) \ declare_toplevel_type(HeapRegionSetBase) \ + declare_toplevel_type(HeapRegionSetCount) \ declare_toplevel_type(G1MonitoringSupport) \ \ declare_toplevel_type(G1CollectedHeap*) \ diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/gc_implementation/parallelScavenge/objectStartArray.cpp --- a/src/share/vm/gc_implementation/parallelScavenge/objectStartArray.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/gc_implementation/parallelScavenge/objectStartArray.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -139,11 +139,6 @@ return true; } } - // No object starts in this slice; verify this using - // more traditional methods: Note that no object can - // start before the start_addr. - assert(end_addr == start_addr || - object_start(end_addr - 1) <= start_addr, - "Oops an object does start in this slice?"); + return false; } diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp --- a/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -492,6 +492,10 @@ return young_gen()->eden_space()->tlab_capacity(thr); } +size_t ParallelScavengeHeap::tlab_used(Thread* thr) const { + return young_gen()->eden_space()->tlab_used(thr); +} + size_t ParallelScavengeHeap::unsafe_max_tlab_alloc(Thread* thr) const { return young_gen()->eden_space()->unsafe_max_tlab_alloc(thr); } @@ -665,8 +669,10 @@ void ParallelScavengeHeap::trace_heap(GCWhen::Type when, GCTracer* gc_tracer) { const PSHeapSummary& heap_summary = create_ps_heap_summary(); + gc_tracer->report_gc_heap_summary(when, heap_summary); + const MetaspaceSummary& metaspace_summary = create_metaspace_summary(); - gc_tracer->report_gc_heap_summary(when, heap_summary, metaspace_summary); + gc_tracer->report_metaspace_summary(when, metaspace_summary); } ParallelScavengeHeap* ParallelScavengeHeap::heap() { diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp --- a/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -189,6 +189,7 @@ bool supports_tlab_allocation() const { return true; } size_t tlab_capacity(Thread* thr) const; + size_t tlab_used(Thread* thr) const; size_t unsafe_max_tlab_alloc(Thread* thr) const; // Can a compiler initialize a new object without store barriers? diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/gc_implementation/shared/gcHeapSummary.hpp --- a/src/share/vm/gc_implementation/shared/gcHeapSummary.hpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/gc_implementation/shared/gcHeapSummary.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -26,6 +26,7 @@ #define SHARE_VM_GC_IMPLEMENTATION_SHARED_GCHEAPSUMMARY_HPP #include "memory/allocation.hpp" +#include "memory/metaspaceChunkFreeListSummary.hpp" class VirtualSpaceSummary : public StackObj { HeapWord* _start; @@ -125,18 +126,49 @@ }; class MetaspaceSummary : public StackObj { + size_t _capacity_until_GC; MetaspaceSizes _meta_space; MetaspaceSizes _data_space; MetaspaceSizes _class_space; + MetaspaceChunkFreeListSummary _metaspace_chunk_free_list_summary; + MetaspaceChunkFreeListSummary _class_chunk_free_list_summary; public: - MetaspaceSummary() : _meta_space(), _data_space(), _class_space() {} - MetaspaceSummary(const MetaspaceSizes& meta_space, const MetaspaceSizes& data_space, const MetaspaceSizes& class_space) : - _meta_space(meta_space), _data_space(data_space), _class_space(class_space) { } + MetaspaceSummary() : + _capacity_until_GC(0), + _meta_space(), + _data_space(), + _class_space(), + _metaspace_chunk_free_list_summary(), + _class_chunk_free_list_summary() + {} + MetaspaceSummary(size_t capacity_until_GC, + const MetaspaceSizes& meta_space, + const MetaspaceSizes& data_space, + const MetaspaceSizes& class_space, + const MetaspaceChunkFreeListSummary& metaspace_chunk_free_list_summary, + const MetaspaceChunkFreeListSummary& class_chunk_free_list_summary) : + _capacity_until_GC(capacity_until_GC), + _meta_space(meta_space), + _data_space(data_space), + _class_space(class_space), + _metaspace_chunk_free_list_summary(metaspace_chunk_free_list_summary), + _class_chunk_free_list_summary(class_chunk_free_list_summary) + {} + size_t capacity_until_GC() const { return _capacity_until_GC; } const MetaspaceSizes& meta_space() const { return _meta_space; } const MetaspaceSizes& data_space() const { return _data_space; } const MetaspaceSizes& class_space() const { return _class_space; } + + const MetaspaceChunkFreeListSummary& metaspace_chunk_free_list_summary() const { + return _metaspace_chunk_free_list_summary; + } + + const MetaspaceChunkFreeListSummary& class_chunk_free_list_summary() const { + return _class_chunk_free_list_summary; + } + }; #endif // SHARE_VM_GC_IMPLEMENTATION_SHARED_GCHEAPSUMMARY_HPP diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/gc_implementation/shared/gcTrace.cpp --- a/src/share/vm/gc_implementation/shared/gcTrace.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/gc_implementation/shared/gcTrace.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -139,11 +139,21 @@ } #endif // INCLUDE_SERVICES -void GCTracer::report_gc_heap_summary(GCWhen::Type when, const GCHeapSummary& heap_summary, const MetaspaceSummary& meta_space_summary) const { +void GCTracer::report_gc_heap_summary(GCWhen::Type when, const GCHeapSummary& heap_summary) const { assert_set_gc_id(); send_gc_heap_summary_event(when, heap_summary); - send_meta_space_summary_event(when, meta_space_summary); +} + +void GCTracer::report_metaspace_summary(GCWhen::Type when, const MetaspaceSummary& summary) const { + assert_set_gc_id(); + + send_meta_space_summary_event(when, summary); + + send_metaspace_chunk_free_list_summary(when, Metaspace::NonClassType, summary.metaspace_chunk_free_list_summary()); + if (UseCompressedClassPointers) { + send_metaspace_chunk_free_list_summary(when, Metaspace::ClassType, summary.class_chunk_free_list_summary()); + } } void YoungGCTracer::report_gc_end_impl(const Ticks& timestamp, TimePartitions* time_partitions) { diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/gc_implementation/shared/gcTrace.hpp --- a/src/share/vm/gc_implementation/shared/gcTrace.hpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/gc_implementation/shared/gcTrace.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -30,6 +30,7 @@ #include "gc_implementation/shared/gcWhen.hpp" #include "gc_implementation/shared/copyFailedInfo.hpp" #include "memory/allocation.hpp" +#include "memory/metaspace.hpp" #include "memory/referenceType.hpp" #if INCLUDE_ALL_GCS #include "gc_implementation/g1/g1YCTypes.hpp" @@ -41,6 +42,7 @@ class EvacuationInfo; class GCHeapSummary; +class MetaspaceChunkFreeListSummary; class MetaspaceSummary; class PSHeapSummary; class ReferenceProcessorStats; @@ -124,7 +126,8 @@ public: void report_gc_start(GCCause::Cause cause, const Ticks& timestamp); void report_gc_end(const Ticks& timestamp, TimePartitions* time_partitions); - void report_gc_heap_summary(GCWhen::Type when, const GCHeapSummary& heap_summary, const MetaspaceSummary& meta_space_summary) const; + void report_gc_heap_summary(GCWhen::Type when, const GCHeapSummary& heap_summary) const; + void report_metaspace_summary(GCWhen::Type when, const MetaspaceSummary& metaspace_summary) const; void report_gc_reference_stats(const ReferenceProcessorStats& rp) const; void report_object_count_after_gc(BoolObjectClosure* object_filter) NOT_SERVICES_RETURN; bool has_reported_gc_start() const; @@ -138,6 +141,7 @@ void send_garbage_collection_event() const; void send_gc_heap_summary_event(GCWhen::Type when, const GCHeapSummary& heap_summary) const; void send_meta_space_summary_event(GCWhen::Type when, const MetaspaceSummary& meta_space_summary) const; + void send_metaspace_chunk_free_list_summary(GCWhen::Type when, Metaspace::MetadataType mdtype, const MetaspaceChunkFreeListSummary& summary) const; void send_reference_stats_event(ReferenceType type, size_t count) const; void send_phase_events(TimePartitions* time_partitions) const; }; diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/gc_implementation/shared/gcTraceSend.cpp --- a/src/share/vm/gc_implementation/shared/gcTraceSend.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/gc_implementation/shared/gcTraceSend.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -64,6 +64,30 @@ } } +void GCTracer::send_metaspace_chunk_free_list_summary(GCWhen::Type when, Metaspace::MetadataType mdtype, + const MetaspaceChunkFreeListSummary& summary) const { + EventMetaspaceChunkFreeListSummary e; + if (e.should_commit()) { + e.set_gcId(_shared_gc_info.id()); + e.set_when(when); + e.set_metadataType(mdtype); + + e.set_specializedChunks(summary.num_specialized_chunks()); + e.set_specializedChunksTotalSize(summary.specialized_chunks_size_in_bytes()); + + e.set_smallChunks(summary.num_small_chunks()); + e.set_smallChunksTotalSize(summary.small_chunks_size_in_bytes()); + + e.set_mediumChunks(summary.num_medium_chunks()); + e.set_mediumChunksTotalSize(summary.medium_chunks_size_in_bytes()); + + e.set_humongousChunks(summary.num_humongous_chunks()); + e.set_humongousChunksTotalSize(summary.humongous_chunks_size_in_bytes()); + + e.commit(); + } +} + void ParallelOldTracer::send_parallel_old_event() const { EventGCParallelOld e(UNTIMED); if (e.should_commit()) { @@ -246,6 +270,7 @@ if (e.should_commit()) { e.set_gcId(_shared_gc_info.id()); e.set_when((u1) when); + e.set_gcThreshold(meta_space_summary.capacity_until_GC()); e.set_metaspace(to_trace_struct(meta_space_summary.meta_space())); e.set_dataSpace(to_trace_struct(meta_space_summary.data_space())); e.set_classSpace(to_trace_struct(meta_space_summary.class_space())); diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/gc_implementation/shared/markSweep.inline.hpp --- a/src/share/vm/gc_implementation/shared/markSweep.inline.hpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/gc_implementation/shared/markSweep.inline.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -30,10 +30,18 @@ #include "utilities/stack.inline.hpp" #include "utilities/macros.hpp" #if INCLUDE_ALL_GCS +#include "gc_implementation/g1/g1StringDedup.hpp" #include "gc_implementation/parallelScavenge/psParallelCompact.hpp" #endif // INCLUDE_ALL_GCS inline void MarkSweep::mark_object(oop obj) { +#if INCLUDE_ALL_GCS + if (G1StringDedup::is_enabled()) { + // We must enqueue the object before it is marked + // as we otherwise can't read the object's age. + G1StringDedup::enqueue_from_mark(obj); + } +#endif // some marks may contain information we need to preserve so we store them away // and overwrite the mark. We'll restore it at the end of markSweep. markOop mark = obj->mark(); diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/gc_implementation/shared/mutableNUMASpace.cpp --- a/src/share/vm/gc_implementation/shared/mutableNUMASpace.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/gc_implementation/shared/mutableNUMASpace.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -173,6 +173,26 @@ return lgrp_spaces()->at(i)->space()->capacity_in_bytes(); } +size_t MutableNUMASpace::tlab_used(Thread *thr) const { + // Please see the comments for tlab_capacity(). + guarantee(thr != NULL, "No thread"); + int lgrp_id = thr->lgrp_id(); + if (lgrp_id == -1) { + if (lgrp_spaces()->length() > 0) { + return (used_in_bytes()) / lgrp_spaces()->length(); + } else { + assert(false, "There should be at least one locality group"); + return 0; + } + } + int i = lgrp_spaces()->find(&lgrp_id, LGRPSpace::equals); + if (i == -1) { + return 0; + } + return lgrp_spaces()->at(i)->space()->used_in_bytes(); +} + + size_t MutableNUMASpace::unsafe_max_tlab_alloc(Thread *thr) const { // Please see the comments for tlab_capacity(). guarantee(thr != NULL, "No thread"); diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/gc_implementation/shared/mutableNUMASpace.hpp --- a/src/share/vm/gc_implementation/shared/mutableNUMASpace.hpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/gc_implementation/shared/mutableNUMASpace.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -217,6 +217,7 @@ using MutableSpace::capacity_in_words; virtual size_t capacity_in_words(Thread* thr) const; virtual size_t tlab_capacity(Thread* thr) const; + virtual size_t tlab_used(Thread* thr) const; virtual size_t unsafe_max_tlab_alloc(Thread* thr) const; // Allocation (return NULL if full) diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/gc_implementation/shared/mutableSpace.hpp --- a/src/share/vm/gc_implementation/shared/mutableSpace.hpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/gc_implementation/shared/mutableSpace.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -124,6 +124,7 @@ virtual size_t used_in_words() const { return pointer_delta(top(), bottom()); } virtual size_t free_in_words() const { return pointer_delta(end(), top()); } virtual size_t tlab_capacity(Thread* thr) const { return capacity_in_bytes(); } + virtual size_t tlab_used(Thread* thr) const { return used_in_bytes(); } virtual size_t unsafe_max_tlab_alloc(Thread* thr) const { return free_in_bytes(); } // Allocation (return NULL if full) diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/gc_implementation/shared/parGCAllocBuffer.cpp --- a/src/share/vm/gc_implementation/shared/parGCAllocBuffer.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/gc_implementation/shared/parGCAllocBuffer.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -89,6 +89,10 @@ // scavenge; it clears the sensor accumulators. void PLABStats::adjust_desired_plab_sz(uint no_of_gc_workers) { assert(ResizePLAB, "Not set"); + + assert(is_object_aligned(max_size()) && min_size() <= max_size(), + "PLAB clipping computation may be incorrect"); + if (_allocated == 0) { assert(_unused == 0, err_msg("Inconsistency in PLAB stats: " diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/gc_implementation/shared/parGCAllocBuffer.hpp --- a/src/share/vm/gc_implementation/shared/parGCAllocBuffer.hpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/gc_implementation/shared/parGCAllocBuffer.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -181,16 +181,7 @@ _used(0), _desired_plab_sz(desired_plab_sz_), _filter(wt) - { - size_t min_sz = min_size(); - size_t max_sz = max_size(); - size_t aligned_min_sz = align_object_size(min_sz); - size_t aligned_max_sz = align_object_size(max_sz); - assert(min_sz <= aligned_min_sz && max_sz >= aligned_max_sz && - min_sz <= max_sz, - "PLAB clipping computation in adjust_desired_plab_sz()" - " may be incorrect"); - } + { } static const size_t min_size() { return ParGCAllocBuffer::min_size(); diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/gc_interface/collectedHeap.cpp --- a/src/share/vm/gc_interface/collectedHeap.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/gc_interface/collectedHeap.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -97,7 +97,13 @@ MetaspaceAux::allocated_used_bytes(Metaspace::ClassType), MetaspaceAux::reserved_bytes(Metaspace::ClassType)); - return MetaspaceSummary(meta_space, data_space, class_space); + const MetaspaceChunkFreeListSummary& ms_chunk_free_list_summary = + MetaspaceAux::chunk_free_list_summary(Metaspace::NonClassType); + const MetaspaceChunkFreeListSummary& class_chunk_free_list_summary = + MetaspaceAux::chunk_free_list_summary(Metaspace::ClassType); + + return MetaspaceSummary(MetaspaceGC::capacity_until_GC(), meta_space, data_space, class_space, + ms_chunk_free_list_summary, class_chunk_free_list_summary); } void CollectedHeap::print_heap_before_gc() { @@ -128,8 +134,10 @@ void CollectedHeap::trace_heap(GCWhen::Type when, GCTracer* gc_tracer) { const GCHeapSummary& heap_summary = create_heap_summary(); + gc_tracer->report_gc_heap_summary(when, heap_summary); + const MetaspaceSummary& metaspace_summary = create_metaspace_summary(); - gc_tracer->report_gc_heap_summary(when, heap_summary, metaspace_summary); + gc_tracer->report_metaspace_summary(when, metaspace_summary); } void CollectedHeap::trace_heap_before_gc(GCTracer* gc_tracer) { @@ -320,6 +328,21 @@ assert(thread->deferred_card_mark().is_empty(), "invariant"); } +size_t CollectedHeap::max_tlab_size() const { + // TLABs can't be bigger than we can fill with a int[Integer.MAX_VALUE]. + // This restriction could be removed by enabling filling with multiple arrays. + // If we compute that the reasonable way as + // header_size + ((sizeof(jint) * max_jint) / HeapWordSize) + // we'll overflow on the multiply, so we do the divide first. + // We actually lose a little by dividing first, + // but that just makes the TLAB somewhat smaller than the biggest array, + // which is fine, since we'll be able to fill that. + size_t max_int_size = typeArrayOopDesc::header_size(T_INT) + + sizeof(jint) * + ((juint) max_jint / (size_t) HeapWordSize); + return align_size_down(max_int_size, MinObjAlignment); +} + // Helper for ReduceInitialCardMarks. For performance, // compiled code may elide card-marks for initializing stores // to a newly allocated object along the fast-path. We diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/gc_interface/collectedHeap.hpp --- a/src/share/vm/gc_interface/collectedHeap.hpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/gc_interface/collectedHeap.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -403,14 +403,16 @@ // the following methods: // Returns "true" iff the heap supports thread-local allocation buffers. // The default is "no". - virtual bool supports_tlab_allocation() const { - return false; - } + virtual bool supports_tlab_allocation() const = 0; + // The amount of space available for thread-local allocation buffers. - virtual size_t tlab_capacity(Thread *thr) const { - guarantee(false, "thread-local allocation buffers not supported"); - return 0; - } + virtual size_t tlab_capacity(Thread *thr) const = 0; + + // The amount of used space for thread-local allocation buffers for the given thread. + virtual size_t tlab_used(Thread *thr) const = 0; + + virtual size_t max_tlab_size() const; + // An estimate of the maximum allocation that could be performed // for thread-local allocation buffers without triggering any // collection or expansion activity. diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/interpreter/bytecodeTracer.cpp --- a/src/share/vm/interpreter/bytecodeTracer.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/interpreter/bytecodeTracer.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -596,7 +596,7 @@ if (data != NULL) { st->print(" %d", mdo->dp_to_di(data->dp())); st->fill_to(6); - data->print_data_on(st); + data->print_data_on(st, mdo); } } } diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/memory/defNewGeneration.cpp --- a/src/share/vm/memory/defNewGeneration.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/memory/defNewGeneration.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -1086,6 +1086,10 @@ return eden()->capacity(); } +size_t DefNewGeneration::tlab_used() const { + return eden()->used(); +} + size_t DefNewGeneration::unsafe_max_tlab_alloc() const { return unsafe_max_alloc_nogc(); } diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/memory/defNewGeneration.hpp --- a/src/share/vm/memory/defNewGeneration.hpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/memory/defNewGeneration.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -239,6 +239,7 @@ // Thread-local allocation buffers bool supports_tlab_allocation() const { return true; } size_t tlab_capacity() const; + size_t tlab_used() const; size_t unsafe_max_tlab_alloc() const; // Grow the generation by the specified number of bytes. diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/memory/freeList.cpp --- a/src/share/vm/memory/freeList.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/memory/freeList.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -34,6 +34,7 @@ #if INCLUDE_ALL_GCS #include "gc_implementation/concurrentMarkSweep/freeChunk.hpp" +#include "gc_implementation/g1/g1CodeCacheRemSet.hpp" #endif // INCLUDE_ALL_GCS // Free list. A FreeList is used to access a linked list of chunks @@ -332,4 +333,5 @@ template class FreeList; #if INCLUDE_ALL_GCS template class FreeList; +template class FreeList; #endif // INCLUDE_ALL_GCS diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/memory/genCollectedHeap.cpp --- a/src/share/vm/memory/genCollectedHeap.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/memory/genCollectedHeap.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -937,6 +937,16 @@ return result; } +size_t GenCollectedHeap::tlab_used(Thread* thr) const { + size_t result = 0; + for (int i = 0; i < _n_gens; i += 1) { + if (_gens[i]->supports_tlab_allocation()) { + result += _gens[i]->tlab_used(); + } + } + return result; +} + size_t GenCollectedHeap::unsafe_max_tlab_alloc(Thread* thr) const { size_t result = 0; for (int i = 0; i < _n_gens; i += 1) { diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/memory/genCollectedHeap.hpp --- a/src/share/vm/memory/genCollectedHeap.hpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/memory/genCollectedHeap.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -256,6 +256,7 @@ // Section on TLAB's. virtual bool supports_tlab_allocation() const; virtual size_t tlab_capacity(Thread* thr) const; + virtual size_t tlab_used(Thread* thr) const; virtual size_t unsafe_max_tlab_alloc(Thread* thr) const; virtual HeapWord* allocate_new_tlab(size_t size); diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/memory/generation.hpp --- a/src/share/vm/memory/generation.hpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/memory/generation.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -299,6 +299,10 @@ guarantee(false, "Generation doesn't support thread local allocation buffers"); return 0; } + virtual size_t tlab_used() const { + guarantee(false, "Generation doesn't support thread local allocation buffers"); + return 0; + } virtual size_t unsafe_max_tlab_alloc() const { guarantee(false, "Generation doesn't support thread local allocation buffers"); return 0; diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/memory/metaspace.cpp --- a/src/share/vm/memory/metaspace.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/memory/metaspace.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -32,7 +32,9 @@ #include "memory/gcLocker.hpp" #include "memory/metachunk.hpp" #include "memory/metaspace.hpp" +#include "memory/metaspaceGCThresholdUpdater.hpp" #include "memory/metaspaceShared.hpp" +#include "memory/metaspaceTracer.hpp" #include "memory/resourceArea.hpp" #include "memory/universe.hpp" #include "runtime/atomic.inline.hpp" @@ -57,6 +59,7 @@ MetaWord* last_allocated = 0; size_t Metaspace::_compressed_class_space_size; +const MetaspaceTracer* Metaspace::_tracer = NULL; // Used in declarations in SpaceManager and ChunkManager enum ChunkIndex { @@ -182,6 +185,48 @@ // Remove from a list by size. Selects list based on size of chunk. Metachunk* free_chunks_get(size_t chunk_word_size); +#define index_bounds_check(index) \ + assert(index == SpecializedIndex || \ + index == SmallIndex || \ + index == MediumIndex || \ + index == HumongousIndex, err_msg("Bad index: %d", (int) index)) + + size_t num_free_chunks(ChunkIndex index) const { + index_bounds_check(index); + + if (index == HumongousIndex) { + return _humongous_dictionary.total_free_blocks(); + } + + ssize_t count = _free_chunks[index].count(); + return count == -1 ? 0 : (size_t) count; + } + + size_t size_free_chunks_in_bytes(ChunkIndex index) const { + index_bounds_check(index); + + size_t word_size = 0; + if (index == HumongousIndex) { + word_size = _humongous_dictionary.total_size(); + } else { + const size_t size_per_chunk_in_words = _free_chunks[index].size(); + word_size = size_per_chunk_in_words * num_free_chunks(index); + } + + return word_size * BytesPerWord; + } + + MetaspaceChunkFreeListSummary chunk_free_list_summary() const { + return MetaspaceChunkFreeListSummary(num_free_chunks(SpecializedIndex), + num_free_chunks(SmallIndex), + num_free_chunks(MediumIndex), + num_free_chunks(HumongousIndex), + size_free_chunks_in_bytes(SpecializedIndex), + size_free_chunks_in_bytes(SmallIndex), + size_free_chunks_in_bytes(MediumIndex), + size_free_chunks_in_bytes(HumongousIndex)); + } + // Debug support void verify(); void slow_verify() { @@ -1436,19 +1481,21 @@ expand_bytes = align_size_up(expand_bytes, Metaspace::commit_alignment()); // Don't expand unless it's significant if (expand_bytes >= MinMetaspaceExpansion) { - MetaspaceGC::inc_capacity_until_GC(expand_bytes); - } - if (PrintGCDetails && Verbose) { - size_t new_capacity_until_GC = capacity_until_GC; - gclog_or_tty->print_cr(" expanding:" - " minimum_desired_capacity: %6.1fKB" - " expand_bytes: %6.1fKB" - " MinMetaspaceExpansion: %6.1fKB" - " new metaspace HWM: %6.1fKB", - minimum_desired_capacity / (double) K, - expand_bytes / (double) K, - MinMetaspaceExpansion / (double) K, - new_capacity_until_GC / (double) K); + size_t new_capacity_until_GC = MetaspaceGC::inc_capacity_until_GC(expand_bytes); + Metaspace::tracer()->report_gc_threshold(capacity_until_GC, + new_capacity_until_GC, + MetaspaceGCThresholdUpdater::ComputeNewSize); + if (PrintGCDetails && Verbose) { + gclog_or_tty->print_cr(" expanding:" + " minimum_desired_capacity: %6.1fKB" + " expand_bytes: %6.1fKB" + " MinMetaspaceExpansion: %6.1fKB" + " new metaspace HWM: %6.1fKB", + minimum_desired_capacity / (double) K, + expand_bytes / (double) K, + MinMetaspaceExpansion / (double) K, + new_capacity_until_GC / (double) K); + } } return; } @@ -1527,7 +1574,10 @@ // Don't shrink unless it's significant if (shrink_bytes >= MinMetaspaceExpansion && ((capacity_until_GC - shrink_bytes) >= MetaspaceSize)) { - MetaspaceGC::dec_capacity_until_GC(shrink_bytes); + size_t new_capacity_until_GC = MetaspaceGC::dec_capacity_until_GC(shrink_bytes); + Metaspace::tracer()->report_gc_threshold(capacity_until_GC, + new_capacity_until_GC, + MetaspaceGCThresholdUpdater::ComputeNewSize); } } @@ -2628,6 +2678,19 @@ return free_chunks_total_words() * BytesPerWord; } +bool MetaspaceAux::has_chunk_free_list(Metaspace::MetadataType mdtype) { + return Metaspace::get_chunk_manager(mdtype) != NULL; +} + +MetaspaceChunkFreeListSummary MetaspaceAux::chunk_free_list_summary(Metaspace::MetadataType mdtype) { + if (!has_chunk_free_list(mdtype)) { + return MetaspaceChunkFreeListSummary(); + } + + const ChunkManager* cm = Metaspace::get_chunk_manager(mdtype); + return cm->chunk_free_list_summary(); +} + void MetaspaceAux::print_metaspace_change(size_t prev_metadata_used) { gclog_or_tty->print(", [Metaspace:"); if (PrintGCDetails && Verbose) { @@ -3131,6 +3194,7 @@ } MetaspaceGC::initialize(); + _tracer = new MetaspaceTracer(); } Metachunk* Metaspace::get_initialization_chunk(MetadataType mdtype, @@ -3219,8 +3283,12 @@ assert(delta_bytes > 0, "Must be"); size_t after_inc = MetaspaceGC::inc_capacity_until_GC(delta_bytes); + + // capacity_until_GC might be updated concurrently, must calculate previous value. size_t before_inc = after_inc - delta_bytes; + tracer()->report_gc_threshold(before_inc, after_inc, + MetaspaceGCThresholdUpdater::ExpandAndAllocate); if (PrintGCDetails && Verbose) { gclog_or_tty->print_cr("Increase capacity to GC from " SIZE_FORMAT " to " SIZE_FORMAT, before_inc, after_inc); @@ -3344,6 +3412,8 @@ MetaWord* result = loader_data->metaspace_non_null()->allocate(word_size, mdtype); if (result == NULL) { + tracer()->report_metaspace_allocation_failure(loader_data, word_size, type, mdtype); + // Allocation failed. if (is_init_completed()) { // Only start a GC if the bootstrapping has completed. @@ -3355,7 +3425,7 @@ } if (result == NULL) { - report_metadata_oome(loader_data, word_size, mdtype, CHECK_NULL); + report_metadata_oome(loader_data, word_size, type, mdtype, CHECK_NULL); } // Zero initialize. @@ -3369,7 +3439,9 @@ return class_vsm()->calc_chunk_size(word_size); } -void Metaspace::report_metadata_oome(ClassLoaderData* loader_data, size_t word_size, MetadataType mdtype, TRAPS) { +void Metaspace::report_metadata_oome(ClassLoaderData* loader_data, size_t word_size, MetaspaceObj::Type type, MetadataType mdtype, TRAPS) { + tracer()->report_metadata_oom(loader_data, word_size, type, mdtype); + // If result is still null, we are out of memory. if (Verbose && TraceMetadataChunkAllocation) { gclog_or_tty->print_cr("Metaspace allocation failed for size " @@ -3412,6 +3484,16 @@ } } +const char* Metaspace::metadata_type_name(Metaspace::MetadataType mdtype) { + switch (mdtype) { + case Metaspace::ClassType: return "Class"; + case Metaspace::NonClassType: return "Metadata"; + default: + assert(false, err_msg("Got bad mdtype: %d", (int) mdtype)); + return NULL; + } +} + void Metaspace::record_allocation(void* ptr, MetaspaceObj::Type type, size_t word_size) { assert(DumpSharedSpaces, "sanity"); diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/memory/metaspace.hpp --- a/src/share/vm/memory/metaspace.hpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/memory/metaspace.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -26,6 +26,7 @@ #include "memory/allocation.hpp" #include "memory/memRegion.hpp" +#include "memory/metaspaceChunkFreeListSummary.hpp" #include "runtime/virtualspace.hpp" #include "utilities/exceptions.hpp" @@ -60,6 +61,7 @@ class ClassLoaderData; class Metablock; class Metachunk; +class MetaspaceTracer; class MetaWord; class Mutex; class outputStream; @@ -149,6 +151,8 @@ static ChunkManager* _chunk_manager_metadata; static ChunkManager* _chunk_manager_class; + static const MetaspaceTracer* _tracer; + public: static VirtualSpaceList* space_list() { return _space_list; } static VirtualSpaceList* class_space_list() { return _class_space_list; } @@ -164,6 +168,8 @@ return mdtype == ClassType ? chunk_manager_class() : chunk_manager_metadata(); } + static const MetaspaceTracer* tracer() { return _tracer; } + private: // This is used by DumpSharedSpaces only, where only _vsm is used. So we will // maintain a single list for now. @@ -234,7 +240,9 @@ static void purge(); static void report_metadata_oome(ClassLoaderData* loader_data, size_t word_size, - MetadataType mdtype, TRAPS); + MetaspaceObj::Type type, MetadataType mdtype, TRAPS); + + static const char* metadata_type_name(Metaspace::MetadataType mdtype); void print_on(outputStream* st) const; // Debugging support @@ -348,6 +356,9 @@ return min_chunk_size_words() * BytesPerWord; } + static bool has_chunk_free_list(Metaspace::MetadataType mdtype); + static MetaspaceChunkFreeListSummary chunk_free_list_summary(Metaspace::MetadataType mdtype); + // Print change in used metadata. static void print_metaspace_change(size_t prev_metadata_used); static void print_on(outputStream * out); diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/memory/metaspaceChunkFreeListSummary.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/memory/metaspaceChunkFreeListSummary.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ +#ifndef SHARE_VM_MEMORY_METASPACE_CHUNK_FREE_LIST_SUMMARY_HPP +#define SHARE_VM_MEMORY_METASPACE_CHUNK_FREE_LIST_SUMMARY_HPP + +#include "memory/allocation.hpp" + +class MetaspaceChunkFreeListSummary VALUE_OBJ_CLASS_SPEC { + size_t _num_specialized_chunks; + size_t _num_small_chunks; + size_t _num_medium_chunks; + size_t _num_humongous_chunks; + + size_t _specialized_chunks_size_in_bytes; + size_t _small_chunks_size_in_bytes; + size_t _medium_chunks_size_in_bytes; + size_t _humongous_chunks_size_in_bytes; + + public: + MetaspaceChunkFreeListSummary() : + _num_specialized_chunks(0), + _num_small_chunks(0), + _num_medium_chunks(0), + _num_humongous_chunks(0), + _specialized_chunks_size_in_bytes(0), + _small_chunks_size_in_bytes(0), + _medium_chunks_size_in_bytes(0), + _humongous_chunks_size_in_bytes(0) + {} + + MetaspaceChunkFreeListSummary(size_t num_specialized_chunks, + size_t num_small_chunks, + size_t num_medium_chunks, + size_t num_humongous_chunks, + size_t specialized_chunks_size_in_bytes, + size_t small_chunks_size_in_bytes, + size_t medium_chunks_size_in_bytes, + size_t humongous_chunks_size_in_bytes) : + _num_specialized_chunks(num_specialized_chunks), + _num_small_chunks(num_small_chunks), + _num_medium_chunks(num_medium_chunks), + _num_humongous_chunks(num_humongous_chunks), + _specialized_chunks_size_in_bytes(specialized_chunks_size_in_bytes), + _small_chunks_size_in_bytes(small_chunks_size_in_bytes), + _medium_chunks_size_in_bytes(medium_chunks_size_in_bytes), + _humongous_chunks_size_in_bytes(humongous_chunks_size_in_bytes) + {} + + size_t num_specialized_chunks() const { + return _num_specialized_chunks; + } + + size_t num_small_chunks() const { + return _num_small_chunks; + } + + size_t num_medium_chunks() const { + return _num_medium_chunks; + } + + size_t num_humongous_chunks() const { + return _num_humongous_chunks; + } + + size_t specialized_chunks_size_in_bytes() const { + return _specialized_chunks_size_in_bytes; + } + + size_t small_chunks_size_in_bytes() const { + return _small_chunks_size_in_bytes; + } + + size_t medium_chunks_size_in_bytes() const { + return _medium_chunks_size_in_bytes; + } + + size_t humongous_chunks_size_in_bytes() const { + return _humongous_chunks_size_in_bytes; + } +}; + +#endif // SHARE_VM_MEMORY_METASPACE_CHUNK_FREE_LIST_SUMMARY_HPP diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/memory/metaspaceGCThresholdUpdater.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/memory/metaspaceGCThresholdUpdater.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_VM_MEMORY_METASPACE_GC_THRESHOLD_UPDATER_HPP +#define SHARE_VM_MEMORY_METASPACE_GC_THRESHOLD_UPDATER_HPP + +#include "memory/allocation.hpp" +#include "utilities/debug.hpp" + +class MetaspaceGCThresholdUpdater : public AllStatic { + public: + enum Type { + ComputeNewSize, + ExpandAndAllocate, + Last + }; + + static const char* to_string(MetaspaceGCThresholdUpdater::Type updater) { + switch (updater) { + case ComputeNewSize: + return "compute_new_size"; + case ExpandAndAllocate: + return "expand_and_allocate"; + default: + assert(false, err_msg("Got bad updater: %d", (int) updater)); + return NULL; + }; + } +}; + +#endif // SHARE_VM_MEMORY_METASPACE_GC_THRESHOLD_UPDATER_HPP diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/memory/metaspaceTracer.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/memory/metaspaceTracer.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "classfile/classLoaderData.hpp" +#include "memory/metaspaceTracer.hpp" +#include "oops/oop.inline.hpp" +#include "trace/tracing.hpp" +#include "trace/traceBackend.hpp" + +void MetaspaceTracer::report_gc_threshold(size_t old_val, + size_t new_val, + MetaspaceGCThresholdUpdater::Type updater) const { + EventMetaspaceGCThreshold event; + if (event.should_commit()) { + event.set_oldValue(old_val); + event.set_newValue(new_val); + event.set_updater((u1)updater); + event.commit(); + } +} + +void MetaspaceTracer::report_metaspace_allocation_failure(ClassLoaderData *cld, + size_t word_size, + MetaspaceObj::Type objtype, + Metaspace::MetadataType mdtype) const { + send_allocation_failure_event(cld, word_size, objtype, mdtype); +} + +void MetaspaceTracer::report_metadata_oom(ClassLoaderData *cld, + size_t word_size, + MetaspaceObj::Type objtype, + Metaspace::MetadataType mdtype) const { + send_allocation_failure_event(cld, word_size, objtype, mdtype); +} + +template +void MetaspaceTracer::send_allocation_failure_event(ClassLoaderData *cld, + size_t word_size, + MetaspaceObj::Type objtype, + Metaspace::MetadataType mdtype) const { + E event; + if (event.should_commit()) { + if (cld->is_anonymous()) { + event.set_classLoader(NULL); + event.set_anonymousClassLoader(true); + } else { + if (cld->is_the_null_class_loader_data()) { + event.set_classLoader((Klass*) NULL); + } else { + event.set_classLoader(cld->class_loader()->klass()); + } + event.set_anonymousClassLoader(false); + } + + event.set_size(word_size * BytesPerWord); + event.set_metadataType((u1) mdtype); + event.set_metaspaceObjectType((u1) objtype); + event.commit(); + } +} diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/memory/metaspaceTracer.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/memory/metaspaceTracer.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_VM_MEMORY_METASPACE_TRACER_HPP +#define SHARE_VM_MEMORY_METASPACE_TRACER_HPP + +#include "memory/allocation.hpp" +#include "memory/metaspace.hpp" +#include "memory/metaspaceGCThresholdUpdater.hpp" + +class ClassLoaderData; + +class MetaspaceTracer : public CHeapObj { + template + void send_allocation_failure_event(ClassLoaderData *cld, + size_t word_size, + MetaspaceObj::Type objtype, + Metaspace::MetadataType mdtype) const; + public: + void report_gc_threshold(size_t old_val, + size_t new_val, + MetaspaceGCThresholdUpdater::Type updater) const; + void report_metaspace_allocation_failure(ClassLoaderData *cld, + size_t word_size, + MetaspaceObj::Type objtype, + Metaspace::MetadataType mdtype) const; + void report_metadata_oom(ClassLoaderData *cld, + size_t word_size, + MetaspaceObj::Type objtype, + Metaspace::MetadataType mdtype) const; + +}; + +#endif // SHARE_VM_MEMORY_METASPACE_TRACER_HPP diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/memory/padded.hpp --- a/src/share/vm/memory/padded.hpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/memory/padded.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -90,4 +90,23 @@ static PaddedEnd* create_unfreeable(uint length); }; +// Helper class to create an array of references to arrays of primitive types +// Both the array of references and the data arrays are aligned to the given +// alignment. The allocated memory is zero-filled. +template +class Padded2DArray { + public: + // Creates an aligned padded 2D array. + // The memory cannot be deleted since the raw memory chunk is not returned. + static T** create_unfreeable(uint rows, uint columns, size_t* allocation_size = NULL); +}; + +// Helper class to create an array of T objects. The array as a whole will +// start at a multiple of alignment and its size will be aligned to alignment. +template +class PaddedPrimitiveArray { + public: + static T* create_unfreeable(size_t length); +}; + #endif // SHARE_VM_MEMORY_PADDED_HPP diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/memory/padded.inline.hpp --- a/src/share/vm/memory/padded.inline.hpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/memory/padded.inline.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -47,3 +47,42 @@ return aligned_padded_array; } + +template +T** Padded2DArray::create_unfreeable(uint rows, uint columns, size_t* allocation_size) { + // Calculate and align the size of the first dimension's table. + size_t table_size = align_size_up_(rows * sizeof(T*), alignment); + // The size of the separate rows. + size_t row_size = align_size_up_(columns * sizeof(T), alignment); + // Total size consists of the indirection table plus the rows. + size_t total_size = table_size + rows * row_size + alignment; + + // Allocate a chunk of memory large enough to allow alignment of the chunk. + void* chunk = AllocateHeap(total_size, flags); + // Clear the allocated memory. + memset(chunk, 0, total_size); + // Align the chunk of memory. + T** result = (T**)align_pointer_up(chunk, alignment); + void* data_start = (void*)((uintptr_t)result + table_size); + + // Fill in the row table. + for (size_t i = 0; i < rows; i++) { + result[i] = (T*)((uintptr_t)data_start + i * row_size); + } + + if (allocation_size != NULL) { + *allocation_size = total_size; + } + + return result; +} + +template +T* PaddedPrimitiveArray::create_unfreeable(size_t length) { + // Allocate a chunk of memory large enough to allow for some alignment. + void* chunk = AllocateHeap(length * sizeof(T) + alignment, flags); + + memset(chunk, 0, length * sizeof(T) + alignment); + + return (T*)align_pointer_up(chunk, alignment); +} diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/memory/referenceProcessor.cpp --- a/src/share/vm/memory/referenceProcessor.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/memory/referenceProcessor.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -95,12 +95,11 @@ uint mt_discovery_degree, bool atomic_discovery, BoolObjectClosure* is_alive_non_header, - bool discovered_list_needs_barrier) : + bool discovered_list_needs_post_barrier) : _discovering_refs(false), _enqueuing_is_done(false), _is_alive_non_header(is_alive_non_header), - _discovered_list_needs_barrier(discovered_list_needs_barrier), - _bs(NULL), + _discovered_list_needs_post_barrier(discovered_list_needs_post_barrier), _processing_is_mt(mt_processing), _next_id(0) { @@ -126,10 +125,6 @@ _discovered_refs[i].set_length(0); } - // If we do barriers, cache a copy of the barrier set. - if (discovered_list_needs_barrier) { - _bs = Universe::heap()->barrier_set(); - } setup_policy(false /* default soft ref policy */); } @@ -317,13 +312,9 @@ // Enqueue references that are not made active again, and // clear the decks for the next collection (cycle). ref->enqueue_discovered_reflists((HeapWord*)pending_list_addr, task_executor); - // Do the oop-check on pending_list_addr missed in - // enqueue_discovered_reflist. We should probably - // do a raw oop_check so that future such idempotent - // oop_stores relying on the oop-check side-effect - // may be elided automatically and safely without - // affecting correctness. - oop_store(pending_list_addr, oopDesc::load_decode_heap_oop(pending_list_addr)); + // Do the post-barrier on pending_list_addr missed in + // enqueue_discovered_reflist. + oopDesc::bs()->write_ref_field(pending_list_addr, oopDesc::load_decode_heap_oop(pending_list_addr)); // Stop treating discovered references specially. ref->disable_discovery(); @@ -372,15 +363,17 @@ assert(java_lang_ref_Reference::next(obj) == NULL, "Reference not active; should not be discovered"); // Self-loop next, so as to make Ref not active. - java_lang_ref_Reference::set_next(obj, obj); + // Post-barrier not needed when looping to self. + java_lang_ref_Reference::set_next_raw(obj, obj); if (next_d == obj) { // obj is last // Swap refs_list into pendling_list_addr and // set obj's discovered to what we read from pending_list_addr. oop old = oopDesc::atomic_exchange_oop(refs_list.head(), pending_list_addr); - // Need oop_check on pending_list_addr above; - // see special oop-check code at the end of + // Need post-barrier on pending_list_addr above; + // see special post-barrier code at the end of // enqueue_discovered_reflists() further below. - java_lang_ref_Reference::set_discovered(obj, old); // old may be NULL + java_lang_ref_Reference::set_discovered_raw(obj, old); // old may be NULL + oopDesc::bs()->write_ref_field(java_lang_ref_Reference::discovered_addr(obj), old); } } } else { // Old behaviour @@ -497,13 +490,13 @@ } else { new_next = _next; } - - if (UseCompressedOops) { - // Remove Reference object from list. - oopDesc::encode_store_heap_oop((narrowOop*)_prev_next, new_next); - } else { - // Remove Reference object from list. - oopDesc::store_heap_oop((oop*)_prev_next, new_next); + // Remove Reference object from discovered list. Note that G1 does not need a + // pre-barrier here because we know the Reference has already been found/marked, + // that's how it ended up in the discovered list in the first place. + oop_store_raw(_prev_next, new_next); + if (_discovered_list_needs_post_barrier && _prev_next != _refs_list.adr_head()) { + // Needs post-barrier and this is not the list head (which is not on the heap) + oopDesc::bs()->write_ref_field(_prev_next, new_next); } NOT_PRODUCT(_removed++); _refs_list.dec_length(1); @@ -516,13 +509,11 @@ // the reference object and will fail // CT verification. if (UseG1GC) { - BarrierSet* bs = oopDesc::bs(); HeapWord* next_addr = java_lang_ref_Reference::next_addr(_ref); - if (UseCompressedOops) { - bs->write_ref_field_pre((narrowOop*)next_addr, NULL); + oopDesc::bs()->write_ref_field_pre((narrowOop*)next_addr, NULL); } else { - bs->write_ref_field_pre((oop*)next_addr, NULL); + oopDesc::bs()->write_ref_field_pre((oop*)next_addr, NULL); } java_lang_ref_Reference::set_next_raw(_ref, NULL); } else { @@ -553,7 +544,7 @@ OopClosure* keep_alive, VoidClosure* complete_gc) { assert(policy != NULL, "Must have a non-NULL policy"); - DiscoveredListIterator iter(refs_list, keep_alive, is_alive); + DiscoveredListIterator iter(refs_list, keep_alive, is_alive, _discovered_list_needs_post_barrier); // Decide which softly reachable refs should be kept alive. while (iter.has_next()) { iter.load_ptrs(DEBUG_ONLY(!discovery_is_atomic() /* allow_null_referent */)); @@ -593,7 +584,7 @@ BoolObjectClosure* is_alive, OopClosure* keep_alive) { assert(discovery_is_atomic(), "Error"); - DiscoveredListIterator iter(refs_list, keep_alive, is_alive); + DiscoveredListIterator iter(refs_list, keep_alive, is_alive, _discovered_list_needs_post_barrier); while (iter.has_next()) { iter.load_ptrs(DEBUG_ONLY(false /* allow_null_referent */)); DEBUG_ONLY(oop next = java_lang_ref_Reference::next(iter.obj());) @@ -630,7 +621,7 @@ OopClosure* keep_alive, VoidClosure* complete_gc) { assert(!discovery_is_atomic(), "Error"); - DiscoveredListIterator iter(refs_list, keep_alive, is_alive); + DiscoveredListIterator iter(refs_list, keep_alive, is_alive, _discovered_list_needs_post_barrier); while (iter.has_next()) { iter.load_ptrs(DEBUG_ONLY(true /* allow_null_referent */)); HeapWord* next_addr = java_lang_ref_Reference::next_addr(iter.obj()); @@ -673,7 +664,7 @@ OopClosure* keep_alive, VoidClosure* complete_gc) { ResourceMark rm; - DiscoveredListIterator iter(refs_list, keep_alive, is_alive); + DiscoveredListIterator iter(refs_list, keep_alive, is_alive, _discovered_list_needs_post_barrier); while (iter.has_next()) { iter.update_discovered(); iter.load_ptrs(DEBUG_ONLY(false /* allow_null_referent */)); @@ -790,10 +781,9 @@ }; void ReferenceProcessor::set_discovered(oop ref, oop value) { - if (_discovered_list_needs_barrier) { - java_lang_ref_Reference::set_discovered(ref, value); - } else { - java_lang_ref_Reference::set_discovered_raw(ref, value); + java_lang_ref_Reference::set_discovered_raw(ref, value); + if (_discovered_list_needs_post_barrier) { + oopDesc::bs()->write_ref_field(java_lang_ref_Reference::discovered_addr(ref), value); } } @@ -990,7 +980,7 @@ void ReferenceProcessor::clean_up_discovered_reflist(DiscoveredList& refs_list) { assert(!discovery_is_atomic(), "Else why call this method?"); - DiscoveredListIterator iter(refs_list, NULL, NULL); + DiscoveredListIterator iter(refs_list, NULL, NULL, _discovered_list_needs_post_barrier); while (iter.has_next()) { iter.load_ptrs(DEBUG_ONLY(true /* allow_null_referent */)); oop next = java_lang_ref_Reference::next(iter.obj()); @@ -1085,8 +1075,8 @@ // so this will expand to nothing. As a result, we have manually // elided this out for G1, but left in the test for some future // collector that might have need for a pre-barrier here, e.g.:- - // _bs->write_ref_field_pre((oop* or narrowOop*)discovered_addr, next_discovered); - assert(!_discovered_list_needs_barrier || UseG1GC, + // oopDesc::bs()->write_ref_field_pre((oop* or narrowOop*)discovered_addr, next_discovered); + assert(!_discovered_list_needs_post_barrier || UseG1GC, "Need to check non-G1 collector: " "may need a pre-write-barrier for CAS from NULL below"); oop retest = oopDesc::atomic_compare_exchange_oop(next_discovered, discovered_addr, @@ -1097,8 +1087,8 @@ // is necessary. refs_list.set_head(obj); refs_list.inc_length(1); - if (_discovered_list_needs_barrier) { - _bs->write_ref_field((void*)discovered_addr, next_discovered); + if (_discovered_list_needs_post_barrier) { + oopDesc::bs()->write_ref_field((void*)discovered_addr, next_discovered); } if (TraceReferenceGC) { @@ -1250,7 +1240,7 @@ if (_discovery_is_mt) { add_to_discovered_list_mt(*list, obj, discovered_addr); } else { - // If "_discovered_list_needs_barrier", we do write barriers when + // If "_discovered_list_needs_post_barrier", we do write barriers when // updating the discovered reference list. Otherwise, we do a raw store // here: the field will be visited later when processing the discovered // references. @@ -1260,13 +1250,13 @@ // As in the case further above, since we are over-writing a NULL // pre-value, we can safely elide the pre-barrier here for the case of G1. - // e.g.:- _bs->write_ref_field_pre((oop* or narrowOop*)discovered_addr, next_discovered); + // e.g.:- oopDesc::bs()->write_ref_field_pre((oop* or narrowOop*)discovered_addr, next_discovered); assert(discovered == NULL, "control point invariant"); - assert(!_discovered_list_needs_barrier || UseG1GC, + assert(!_discovered_list_needs_post_barrier || UseG1GC, "For non-G1 collector, may need a pre-write-barrier for CAS from NULL below"); oop_store_raw(discovered_addr, next_discovered); - if (_discovered_list_needs_barrier) { - _bs->write_ref_field((void*)discovered_addr, next_discovered); + if (_discovered_list_needs_post_barrier) { + oopDesc::bs()->write_ref_field((void*)discovered_addr, next_discovered); } list->set_head(obj); list->inc_length(1); @@ -1361,7 +1351,7 @@ OopClosure* keep_alive, VoidClosure* complete_gc, YieldClosure* yield) { - DiscoveredListIterator iter(refs_list, keep_alive, is_alive); + DiscoveredListIterator iter(refs_list, keep_alive, is_alive, _discovered_list_needs_post_barrier); while (iter.has_next()) { iter.load_ptrs(DEBUG_ONLY(true /* allow_null_referent */)); oop obj = iter.obj(); diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/memory/referenceProcessor.hpp --- a/src/share/vm/memory/referenceProcessor.hpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/memory/referenceProcessor.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -99,6 +99,7 @@ oop _referent; OopClosure* _keep_alive; BoolObjectClosure* _is_alive; + bool _discovered_list_needs_post_barrier; DEBUG_ONLY( oop _first_seen; // cyclic linked list check @@ -112,7 +113,8 @@ public: inline DiscoveredListIterator(DiscoveredList& refs_list, OopClosure* keep_alive, - BoolObjectClosure* is_alive): + BoolObjectClosure* is_alive, + bool discovered_list_needs_post_barrier = false): _refs_list(refs_list), _prev_next(refs_list.adr_head()), _prev(NULL), @@ -126,7 +128,8 @@ #endif _next(NULL), _keep_alive(keep_alive), - _is_alive(is_alive) + _is_alive(is_alive), + _discovered_list_needs_post_barrier(discovered_list_needs_post_barrier) { } // End Of List. @@ -228,14 +231,13 @@ bool _discovery_is_mt; // true if reference discovery is MT. // If true, setting "next" field of a discovered refs list requires - // write barrier(s). (Must be true if used in a collector in which + // write post barrier. (Must be true if used in a collector in which // elements of a discovered list may be moved during discovery: for // example, a collector like Garbage-First that moves objects during a // long-term concurrent marking phase that does weak reference // discovery.) - bool _discovered_list_needs_barrier; + bool _discovered_list_needs_post_barrier; - BarrierSet* _bs; // Cached copy of BarrierSet. bool _enqueuing_is_done; // true if all weak references enqueued bool _processing_is_mt; // true during phases when // reference processing is MT. @@ -381,8 +383,8 @@ protected: // Set the 'discovered' field of the given reference to - // the given value - emitting barriers depending upon - // the value of _discovered_list_needs_barrier. + // the given value - emitting post barriers depending upon + // the value of _discovered_list_needs_post_barrier. void set_discovered(oop ref, oop value); // "Preclean" the given discovered reference list @@ -420,32 +422,13 @@ void update_soft_ref_master_clock(); public: - // constructor - ReferenceProcessor(): - _span((HeapWord*)NULL, (HeapWord*)NULL), - _discovered_refs(NULL), - _discoveredSoftRefs(NULL), _discoveredWeakRefs(NULL), - _discoveredFinalRefs(NULL), _discoveredPhantomRefs(NULL), - _discovering_refs(false), - _discovery_is_atomic(true), - _enqueuing_is_done(false), - _discovery_is_mt(false), - _discovered_list_needs_barrier(false), - _bs(NULL), - _is_alive_non_header(NULL), - _num_q(0), - _max_num_q(0), - _processing_is_mt(false), - _next_id(0) - { } - // Default parameters give you a vanilla reference processor. ReferenceProcessor(MemRegion span, bool mt_processing = false, uint mt_processing_degree = 1, bool mt_discovery = false, uint mt_discovery_degree = 1, bool atomic_discovery = true, BoolObjectClosure* is_alive_non_header = NULL, - bool discovered_list_needs_barrier = false); + bool discovered_list_needs_post_barrier = false); // RefDiscoveryPolicy values enum DiscoveryPolicy { diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/memory/threadLocalAllocBuffer.cpp --- a/src/share/vm/memory/threadLocalAllocBuffer.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/memory/threadLocalAllocBuffer.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -34,6 +34,7 @@ // Thread-Local Edens support // static member initialization +size_t ThreadLocalAllocBuffer::_max_size = 0; unsigned ThreadLocalAllocBuffer::_target_refills = 0; GlobalTLABStats* ThreadLocalAllocBuffer::_global_stats = NULL; @@ -45,7 +46,7 @@ void ThreadLocalAllocBuffer::accumulate_statistics_before_gc() { global_stats()->initialize(); - for(JavaThread *thread = Threads::first(); thread; thread = thread->next()) { + for (JavaThread *thread = Threads::first(); thread != NULL; thread = thread->next()) { thread->tlab().accumulate_statistics(); thread->tlab().initialize_statistics(); } @@ -60,28 +61,32 @@ } void ThreadLocalAllocBuffer::accumulate_statistics() { - size_t capacity = Universe::heap()->tlab_capacity(myThread()) / HeapWordSize; - size_t unused = Universe::heap()->unsafe_max_tlab_alloc(myThread()) / HeapWordSize; - size_t used = capacity - unused; - - // Update allocation history if a reasonable amount of eden was allocated. - bool update_allocation_history = used > 0.5 * capacity; + Thread* thread = myThread(); + size_t capacity = Universe::heap()->tlab_capacity(thread); + size_t used = Universe::heap()->tlab_used(thread); _gc_waste += (unsigned)remaining(); + size_t total_allocated = thread->allocated_bytes(); + size_t allocated_since_last_gc = total_allocated - _allocated_before_last_gc; + _allocated_before_last_gc = total_allocated; if (PrintTLAB && (_number_of_refills > 0 || Verbose)) { print_stats("gc"); } if (_number_of_refills > 0) { + // Update allocation history if a reasonable amount of eden was allocated. + bool update_allocation_history = used > 0.5 * capacity; if (update_allocation_history) { // Average the fraction of eden allocated in a tlab by this // thread for use in the next resize operation. // _gc_waste is not subtracted because it's included in // "used". - size_t allocation = _number_of_refills * desired_size(); - double alloc_frac = allocation / (double) used; + // The result can be larger than 1.0 due to direct to old allocations. + // These allocations should ideally not be counted but since it is not possible + // to filter them out here we just cap the fraction to be at most 1.0. + double alloc_frac = MIN2(1.0, (double) allocated_since_last_gc / used); _allocation_fraction.sample(alloc_frac); } global_stats()->update_allocating_threads(); @@ -126,33 +131,32 @@ } void ThreadLocalAllocBuffer::resize_all_tlabs() { - for(JavaThread *thread = Threads::first(); thread; thread = thread->next()) { - thread->tlab().resize(); + if (ResizeTLAB) { + for (JavaThread *thread = Threads::first(); thread != NULL; thread = thread->next()) { + thread->tlab().resize(); + } } } void ThreadLocalAllocBuffer::resize() { + // Compute the next tlab size using expected allocation amount + assert(ResizeTLAB, "Should not call this otherwise"); + size_t alloc = (size_t)(_allocation_fraction.average() * + (Universe::heap()->tlab_capacity(myThread()) / HeapWordSize)); + size_t new_size = alloc / _target_refills; - if (ResizeTLAB) { - // Compute the next tlab size using expected allocation amount - size_t alloc = (size_t)(_allocation_fraction.average() * - (Universe::heap()->tlab_capacity(myThread()) / HeapWordSize)); - size_t new_size = alloc / _target_refills; - - new_size = MIN2(MAX2(new_size, min_size()), max_size()); - - size_t aligned_new_size = align_object_size(new_size); + new_size = MIN2(MAX2(new_size, min_size()), max_size()); - if (PrintTLAB && Verbose) { - gclog_or_tty->print("TLAB new size: thread: " INTPTR_FORMAT " [id: %2d]" - " refills %d alloc: %8.6f desired_size: " SIZE_FORMAT " -> " SIZE_FORMAT "\n", - myThread(), myThread()->osthread()->thread_id(), - _target_refills, _allocation_fraction.average(), desired_size(), aligned_new_size); - } - set_desired_size(aligned_new_size); + size_t aligned_new_size = align_object_size(new_size); - set_refill_waste_limit(initial_refill_waste_limit()); + if (PrintTLAB && Verbose) { + gclog_or_tty->print("TLAB new size: thread: " INTPTR_FORMAT " [id: %2d]" + " refills %d alloc: %8.6f desired_size: " SIZE_FORMAT " -> " SIZE_FORMAT "\n", + myThread(), myThread()->osthread()->thread_id(), + _target_refills, _allocation_fraction.average(), desired_size(), aligned_new_size); } + set_desired_size(aligned_new_size); + set_refill_waste_limit(initial_refill_waste_limit()); } void ThreadLocalAllocBuffer::initialize_statistics() { @@ -248,31 +252,13 @@ return init_sz; } -const size_t ThreadLocalAllocBuffer::max_size() { - - // TLABs can't be bigger than we can fill with a int[Integer.MAX_VALUE]. - // This restriction could be removed by enabling filling with multiple arrays. - // If we compute that the reasonable way as - // header_size + ((sizeof(jint) * max_jint) / HeapWordSize) - // we'll overflow on the multiply, so we do the divide first. - // We actually lose a little by dividing first, - // but that just makes the TLAB somewhat smaller than the biggest array, - // which is fine, since we'll be able to fill that. - - size_t unaligned_max_size = typeArrayOopDesc::header_size(T_INT) + - sizeof(jint) * - ((juint) max_jint / (size_t) HeapWordSize); - return align_size_down(unaligned_max_size, MinObjAlignment); -} - void ThreadLocalAllocBuffer::print_stats(const char* tag) { Thread* thrd = myThread(); size_t waste = _gc_waste + _slow_refill_waste + _fast_refill_waste; size_t alloc = _number_of_refills * _desired_size; double waste_percent = alloc == 0 ? 0.0 : 100.0 * waste / alloc; - size_t tlab_used = Universe::heap()->tlab_capacity(thrd) - - Universe::heap()->unsafe_max_tlab_alloc(thrd); + size_t tlab_used = Universe::heap()->tlab_used(thrd); gclog_or_tty->print("TLAB: %s thread: " INTPTR_FORMAT " [id: %2d]" " desired_size: " SIZE_FORMAT "KB" " slow allocs: %d refill waste: " SIZE_FORMAT "B" diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/memory/threadLocalAllocBuffer.hpp --- a/src/share/vm/memory/threadLocalAllocBuffer.hpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/memory/threadLocalAllocBuffer.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -45,7 +45,9 @@ HeapWord* _end; // allocation end (excluding alignment_reserve) size_t _desired_size; // desired size (including alignment_reserve) size_t _refill_waste_limit; // hold onto tlab if free() is larger than this + size_t _allocated_before_last_gc; // total bytes allocated up until the last gc + static size_t _max_size; // maximum size of any TLAB static unsigned _target_refills; // expected number of refills between GCs unsigned _number_of_refills; @@ -99,12 +101,13 @@ static GlobalTLABStats* global_stats() { return _global_stats; } public: - ThreadLocalAllocBuffer() : _allocation_fraction(TLABAllocationWeight) { + ThreadLocalAllocBuffer() : _allocation_fraction(TLABAllocationWeight), _allocated_before_last_gc(0) { // do nothing. tlabs must be inited by initialize() calls } static const size_t min_size() { return align_object_size(MinTLABSize / HeapWordSize); } - static const size_t max_size(); + static const size_t max_size() { assert(_max_size != 0, "max_size not set up"); return _max_size; } + static void set_max_size(size_t max_size) { _max_size = max_size; } HeapWord* start() const { return _start; } HeapWord* end() const { return _end; } diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/memory/universe.cpp --- a/src/share/vm/memory/universe.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/memory/universe.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -816,6 +816,8 @@ Universe::_collectedHeap = new GenCollectedHeap(gc_policy); } + ThreadLocalAllocBuffer::set_max_size(Universe::heap()->max_tlab_size()); + jint status = Universe::heap()->initialize(); if (status != JNI_OK) { return status; diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/oops/instanceKlass.cpp --- a/src/share/vm/oops/instanceKlass.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/oops/instanceKlass.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -2238,15 +2238,7 @@ for (int m = 0; m < methods()->length(); m++) { MethodData* mdo = methods()->at(m)->method_data(); if (mdo != NULL) { - for (ProfileData* data = mdo->first_data(); - mdo->is_valid(data); - data = mdo->next_data(data)) { - data->clean_weak_klass_links(is_alive); - } - ParametersTypeData* parameters = mdo->parameters_type_data(); - if (parameters != NULL) { - parameters->clean_weak_klass_links(is_alive); - } + mdo->clean_method_data(is_alive); } } } diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/oops/instanceKlass.hpp --- a/src/share/vm/oops/instanceKlass.hpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/oops/instanceKlass.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -306,7 +306,7 @@ // three cases: // NULL: no implementor. // A Klass* that's not itself: one implementor. - // Itsef: more than one implementors. + // Itself: more than one implementors. // embedded host klass follows here // The embedded host klass only exists in an anonymous class for // dynamic language support (JSR 292 enabled). The host class grants @@ -554,6 +554,7 @@ if (hk == NULL) { return NULL; } else { + assert(*hk != NULL, "host klass should always be set if the address is not null"); return *hk; } } diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/oops/metadata.hpp --- a/src/share/vm/oops/metadata.hpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/oops/metadata.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -40,7 +40,7 @@ int identity_hash() { return (int)(uintptr_t)this; } // Rehashing support for tables containing pointers to this - unsigned int new_hash(jint seed) { ShouldNotReachHere(); return 0; } + unsigned int new_hash(juint seed) { ShouldNotReachHere(); return 0; } virtual bool is_klass() const volatile { return false; } virtual bool is_method() const volatile { return false; } diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/oops/method.cpp --- a/src/share/vm/oops/method.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/oops/method.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -273,7 +273,7 @@ } address Method::bcp_from(int bci) const { - assert((is_native() && bci == 0) || (!is_native() && 0 <= bci && bci < code_size()), "illegal bci"); + assert((is_native() && bci == 0) || (!is_native() && 0 <= bci && bci < code_size()), err_msg("illegal bci: %d", bci)); address bcp = code_base() + bci; assert(is_native() && bcp == code_base() || contains(bcp), "bcp doesn't belong to this method"); return bcp; diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/oops/methodData.cpp --- a/src/share/vm/oops/methodData.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/oops/methodData.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -24,6 +24,7 @@ #include "precompiled.hpp" #include "classfile/systemDictionary.hpp" +#include "compiler/compilerOracle.hpp" #include "interpreter/bytecode.hpp" #include "interpreter/bytecodeStream.hpp" #include "interpreter/linkResolver.hpp" @@ -80,8 +81,42 @@ _data = NULL; } +char* ProfileData::print_data_on_helper(const MethodData* md) const { + DataLayout* dp = md->extra_data_base(); + DataLayout* end = md->extra_data_limit(); + stringStream ss; + for (;; dp = MethodData::next_extra(dp)) { + assert(dp < end, "moved past end of extra data"); + switch(dp->tag()) { + case DataLayout::speculative_trap_data_tag: + if (dp->bci() == bci()) { + SpeculativeTrapData* data = new SpeculativeTrapData(dp); + int trap = data->trap_state(); + char buf[100]; + ss.print("trap/"); + data->method()->print_short_name(&ss); + ss.print("(%s) ", Deoptimization::format_trap_state(buf, sizeof(buf), trap)); + } + break; + case DataLayout::bit_data_tag: + break; + case DataLayout::no_tag: + case DataLayout::arg_info_data_tag: + return ss.as_string(); + break; + default: + fatal(err_msg("unexpected tag %d", dp->tag())); + } + } + return NULL; +} + +void ProfileData::print_data_on(outputStream* st, const MethodData* md) const { + print_data_on(st, print_data_on_helper(md)); +} + #ifndef PRODUCT -void ProfileData::print_shared(outputStream* st, const char* name) const { +void ProfileData::print_shared(outputStream* st, const char* name, const char* extra) const { st->print("bci: %d", bci()); st->fill_to(tab_width_one); st->print("%s", name); @@ -91,9 +126,13 @@ char buf[100]; st->print("trap(%s) ", Deoptimization::format_trap_state(buf, sizeof(buf), trap)); } + if (extra != NULL) { + st->print(extra); + } int flags = data()->flags(); - if (flags != 0) + if (flags != 0) { st->print("flags(%d) ", flags); + } } void ProfileData::tab(outputStream* st, bool first) const { @@ -109,8 +148,8 @@ #ifndef PRODUCT -void BitData::print_data_on(outputStream* st) const { - print_shared(st, "BitData"); +void BitData::print_data_on(outputStream* st, const char* extra) const { + print_shared(st, "BitData", extra); } #endif // !PRODUCT @@ -120,8 +159,8 @@ // A CounterData corresponds to a simple counter. #ifndef PRODUCT -void CounterData::print_data_on(outputStream* st) const { - print_shared(st, "CounterData"); +void CounterData::print_data_on(outputStream* st, const char* extra) const { + print_shared(st, "CounterData", extra); st->print_cr("count(%u)", count()); } #endif // !PRODUCT @@ -150,8 +189,8 @@ } #ifndef PRODUCT -void JumpData::print_data_on(outputStream* st) const { - print_shared(st, "JumpData"); +void JumpData::print_data_on(outputStream* st, const char* extra) const { + print_shared(st, "JumpData", extra); st->print_cr("taken(%u) displacement(%d)", taken(), displacement()); } #endif // !PRODUCT @@ -332,8 +371,8 @@ st->cr(); } -void CallTypeData::print_data_on(outputStream* st) const { - CounterData::print_data_on(st); +void CallTypeData::print_data_on(outputStream* st, const char* extra) const { + CounterData::print_data_on(st, extra); if (has_arguments()) { tab(st, true); st->print("argument types"); @@ -346,8 +385,8 @@ } } -void VirtualCallTypeData::print_data_on(outputStream* st) const { - VirtualCallData::print_data_on(st); +void VirtualCallTypeData::print_data_on(outputStream* st, const char* extra) const { + VirtualCallData::print_data_on(st, extra); if (has_arguments()) { tab(st, true); st->print("argument types"); @@ -400,12 +439,12 @@ } } } -void ReceiverTypeData::print_data_on(outputStream* st) const { - print_shared(st, "ReceiverTypeData"); +void ReceiverTypeData::print_data_on(outputStream* st, const char* extra) const { + print_shared(st, "ReceiverTypeData", extra); print_receiver_data_on(st); } -void VirtualCallData::print_data_on(outputStream* st) const { - print_shared(st, "VirtualCallData"); +void VirtualCallData::print_data_on(outputStream* st, const char* extra) const { + print_shared(st, "VirtualCallData", extra); print_receiver_data_on(st); } #endif // !PRODUCT @@ -461,8 +500,8 @@ #endif // CC_INTERP #ifndef PRODUCT -void RetData::print_data_on(outputStream* st) const { - print_shared(st, "RetData"); +void RetData::print_data_on(outputStream* st, const char* extra) const { + print_shared(st, "RetData", extra); uint row; int entries = 0; for (row = 0; row < row_limit(); row++) { @@ -496,8 +535,8 @@ } #ifndef PRODUCT -void BranchData::print_data_on(outputStream* st) const { - print_shared(st, "BranchData"); +void BranchData::print_data_on(outputStream* st, const char* extra) const { + print_shared(st, "BranchData", extra); st->print_cr("taken(%u) displacement(%d)", taken(), displacement()); tab(st); @@ -570,8 +609,8 @@ } #ifndef PRODUCT -void MultiBranchData::print_data_on(outputStream* st) const { - print_shared(st, "MultiBranchData"); +void MultiBranchData::print_data_on(outputStream* st, const char* extra) const { + print_shared(st, "MultiBranchData", extra); st->print_cr("default_count(%u) displacement(%d)", default_count(), default_displacement()); int cases = number_of_cases(); @@ -584,8 +623,8 @@ #endif #ifndef PRODUCT -void ArgInfoData::print_data_on(outputStream* st) const { - print_shared(st, "ArgInfoData"); +void ArgInfoData::print_data_on(outputStream* st, const char* extra) const { + print_shared(st, "ArgInfoData", extra); int nargs = number_of_args(); for (int i = 0; i < nargs; i++) { st->print(" 0x%x", arg_modified(i)); @@ -616,10 +655,17 @@ } #ifndef PRODUCT -void ParametersTypeData::print_data_on(outputStream* st) const { - st->print("parameter types"); +void ParametersTypeData::print_data_on(outputStream* st, const char* extra) const { + st->print("parameter types", extra); _parameters.print_data_on(st); } + +void SpeculativeTrapData::print_data_on(outputStream* st, const char* extra) const { + print_shared(st, "SpeculativeTrapData", extra); + tab(st); + method()->print_short_name(st); + st->cr(); +} #endif // ================================================================== @@ -745,7 +791,27 @@ return DataLayout::compute_size_in_bytes(cell_count); } -int MethodData::compute_extra_data_count(int data_size, int empty_bc_count) { +bool MethodData::is_speculative_trap_bytecode(Bytecodes::Code code) { + // Bytecodes for which we may use speculation + switch (code) { + case Bytecodes::_checkcast: + case Bytecodes::_instanceof: + case Bytecodes::_aastore: + case Bytecodes::_invokevirtual: + case Bytecodes::_invokeinterface: + case Bytecodes::_if_acmpeq: + case Bytecodes::_if_acmpne: + case Bytecodes::_invokestatic: +#ifdef COMPILER2 + return UseTypeSpeculation; +#endif + default: + return false; + } + return false; +} + +int MethodData::compute_extra_data_count(int data_size, int empty_bc_count, bool needs_speculative_traps) { if (ProfileTraps) { // Assume that up to 3% of BCIs with no MDP will need to allocate one. int extra_data_count = (uint)(empty_bc_count * 3) / 128 + 1; @@ -756,7 +822,18 @@ extra_data_count = one_percent_of_data; if (extra_data_count > empty_bc_count) extra_data_count = empty_bc_count; // no need for more - return extra_data_count; + + // Make sure we have a minimum number of extra data slots to + // allocate SpeculativeTrapData entries. We would want to have one + // entry per compilation that inlines this method and for which + // some type speculation assumption fails. So the room we need for + // the SpeculativeTrapData entries doesn't directly depend on the + // size of the method. Because it's hard to estimate, we reserve + // space for an arbitrary number of entries. + int spec_data_count = (needs_speculative_traps ? SpecTrapLimitExtraEntries : 0) * + (SpeculativeTrapData::static_cell_count() + DataLayout::header_size_in_cells()); + + return MAX2(extra_data_count, spec_data_count); } else { return 0; } @@ -769,15 +846,17 @@ BytecodeStream stream(method); Bytecodes::Code c; int empty_bc_count = 0; // number of bytecodes lacking data + bool needs_speculative_traps = false; while ((c = stream.next()) >= 0) { int size_in_bytes = compute_data_size(&stream); data_size += size_in_bytes; if (size_in_bytes == 0) empty_bc_count += 1; + needs_speculative_traps = needs_speculative_traps || is_speculative_trap_bytecode(c); } int object_size = in_bytes(data_offset()) + data_size; // Add some extra DataLayout cells (at least one) to track stray traps. - int extra_data_count = compute_extra_data_count(data_size, empty_bc_count); + int extra_data_count = compute_extra_data_count(data_size, empty_bc_count, needs_speculative_traps); object_size += extra_data_count * DataLayout::compute_size_in_bytes(0); // Add a cell to record information about modified arguments. @@ -993,7 +1072,8 @@ } // Initialize the MethodData* corresponding to a given method. -MethodData::MethodData(methodHandle method, int size, TRAPS) { +MethodData::MethodData(methodHandle method, int size, TRAPS) + : _extra_data_lock(Monitor::leaf, "MDO extra data lock") { No_Safepoint_Verifier no_safepoint; // init function atomic wrt GC ResourceMark rm; // Set the method back-pointer. @@ -1009,18 +1089,23 @@ _data[0] = 0; // apparently not set below. BytecodeStream stream(method); Bytecodes::Code c; + bool needs_speculative_traps = false; while ((c = stream.next()) >= 0) { int size_in_bytes = initialize_data(&stream, data_size); data_size += size_in_bytes; if (size_in_bytes == 0) empty_bc_count += 1; + needs_speculative_traps = needs_speculative_traps || is_speculative_trap_bytecode(c); } _data_size = data_size; int object_size = in_bytes(data_offset()) + data_size; // Add some extra DataLayout cells (at least one) to track stray traps. - int extra_data_count = compute_extra_data_count(data_size, empty_bc_count); + int extra_data_count = compute_extra_data_count(data_size, empty_bc_count, needs_speculative_traps); int extra_size = extra_data_count * DataLayout::compute_size_in_bytes(0); + // Let's zero the space for the extra data + Copy::zero_to_bytes(((address)_data) + data_size, extra_size); + // Add a cell to record information about modified arguments. // Set up _args_modified array after traps cells so that // the code for traps cells works. @@ -1032,17 +1117,17 @@ int arg_data_size = DataLayout::compute_size_in_bytes(arg_size+1); object_size += extra_size + arg_data_size; - int args_cell = ParametersTypeData::compute_cell_count(method()); + int parms_cell = ParametersTypeData::compute_cell_count(method()); // If we are profiling parameters, we reserver an area near the end // of the MDO after the slots for bytecodes (because there's no bci // for method entry so they don't fit with the framework for the // profiling of bytecodes). We store the offset within the MDO of // this area (or -1 if no parameter is profiled) - if (args_cell > 0) { - object_size += DataLayout::compute_size_in_bytes(args_cell); + if (parms_cell > 0) { + object_size += DataLayout::compute_size_in_bytes(parms_cell); _parameters_type_data_di = data_size + extra_size + arg_data_size; DataLayout *dp = data_layout_at(data_size + extra_size + arg_data_size); - dp->initialize(DataLayout::parameters_type_data_tag, 0, args_cell); + dp->initialize(DataLayout::parameters_type_data_tag, 0, parms_cell); } else { _parameters_type_data_di = -1; } @@ -1069,6 +1154,21 @@ _highest_osr_comp_level = 0; _would_profile = true; +#if INCLUDE_RTM_OPT + _rtm_state = NoRTM; // No RTM lock eliding by default + if (UseRTMLocking && + !CompilerOracle::has_option_string(_method, "NoRTMLockEliding")) { + if (CompilerOracle::has_option_string(_method, "UseRTMLockEliding") || !UseRTMDeopt) { + // Generate RTM lock eliding code without abort ratio calculation code. + _rtm_state = UseRTM; + } else if (UseRTMDeopt) { + // Generate RTM lock eliding code and include abort ratio calculation + // code if UseRTMDeopt is on. + _rtm_state = ProfileRTM; + } + } +#endif + // Initialize flags and trap history. _nof_decompiles = 0; _nof_overflow_recompiles = 0; @@ -1133,39 +1233,114 @@ break; } } - return bci_to_extra_data(bci, false); + return bci_to_extra_data(bci, NULL, false); } -// Translate a bci to its corresponding extra data, or NULL. -ProfileData* MethodData::bci_to_extra_data(int bci, bool create_if_missing) { - DataLayout* dp = extra_data_base(); - DataLayout* end = extra_data_limit(); - DataLayout* avail = NULL; - for (; dp < end; dp = next_extra(dp)) { +DataLayout* MethodData::next_extra(DataLayout* dp) { + int nb_cells = 0; + switch(dp->tag()) { + case DataLayout::bit_data_tag: + case DataLayout::no_tag: + nb_cells = BitData::static_cell_count(); + break; + case DataLayout::speculative_trap_data_tag: + nb_cells = SpeculativeTrapData::static_cell_count(); + break; + default: + fatal(err_msg("unexpected tag %d", dp->tag())); + } + return (DataLayout*)((address)dp + DataLayout::compute_size_in_bytes(nb_cells)); +} + +ProfileData* MethodData::bci_to_extra_data_helper(int bci, Method* m, DataLayout*& dp, bool concurrent) { + DataLayout* end = extra_data_limit(); + + for (;; dp = next_extra(dp)) { + assert(dp < end, "moved past end of extra data"); // No need for "OrderAccess::load_acquire" ops, // since the data structure is monotonic. - if (dp->tag() == DataLayout::no_tag) break; - if (dp->tag() == DataLayout::arg_info_data_tag) { - dp = end; // ArgInfoData is at the end of extra data section. + switch(dp->tag()) { + case DataLayout::no_tag: + return NULL; + case DataLayout::arg_info_data_tag: + dp = end; + return NULL; // ArgInfoData is at the end of extra data section. + case DataLayout::bit_data_tag: + if (m == NULL && dp->bci() == bci) { + return new BitData(dp); + } break; - } - if (dp->bci() == bci) { - assert(dp->tag() == DataLayout::bit_data_tag, "sane"); - return new BitData(dp); + case DataLayout::speculative_trap_data_tag: + if (m != NULL) { + SpeculativeTrapData* data = new SpeculativeTrapData(dp); + // data->method() may be null in case of a concurrent + // allocation. Maybe it's for the same method. Try to use that + // entry in that case. + if (dp->bci() == bci) { + if (data->method() == NULL) { + assert(concurrent, "impossible because no concurrent allocation"); + return NULL; + } else if (data->method() == m) { + return data; + } + } + } + break; + default: + fatal(err_msg("unexpected tag %d", dp->tag())); } } + return NULL; +} + + +// Translate a bci to its corresponding extra data, or NULL. +ProfileData* MethodData::bci_to_extra_data(int bci, Method* m, bool create_if_missing) { + // This code assumes an entry for a SpeculativeTrapData is 2 cells + assert(2*DataLayout::compute_size_in_bytes(BitData::static_cell_count()) == + DataLayout::compute_size_in_bytes(SpeculativeTrapData::static_cell_count()), + "code needs to be adjusted"); + + DataLayout* dp = extra_data_base(); + DataLayout* end = extra_data_limit(); + + // Allocation in the extra data space has to be atomic because not + // all entries have the same size and non atomic concurrent + // allocation would result in a corrupted extra data space. + ProfileData* result = bci_to_extra_data_helper(bci, m, dp, true); + if (result != NULL) { + return result; + } + if (create_if_missing && dp < end) { - // Allocate this one. There is no mutual exclusion, - // so two threads could allocate different BCIs to the - // same data layout. This means these extra data - // records, like most other MDO contents, must not be - // trusted too much. + MutexLocker ml(&_extra_data_lock); + // Check again now that we have the lock. Another thread may + // have added extra data entries. + ProfileData* result = bci_to_extra_data_helper(bci, m, dp, false); + if (result != NULL || dp >= end) { + return result; + } + + assert(dp->tag() == DataLayout::no_tag || (dp->tag() == DataLayout::speculative_trap_data_tag && m != NULL), "should be free"); + assert(next_extra(dp)->tag() == DataLayout::no_tag || next_extra(dp)->tag() == DataLayout::arg_info_data_tag, "should be free or arg info"); + u1 tag = m == NULL ? DataLayout::bit_data_tag : DataLayout::speculative_trap_data_tag; + // SpeculativeTrapData is 2 slots. Make sure we have room. + if (m != NULL && next_extra(dp)->tag() != DataLayout::no_tag) { + return NULL; + } DataLayout temp; - temp.initialize(DataLayout::bit_data_tag, bci, 0); - dp->release_set_header(temp.header()); - assert(dp->tag() == DataLayout::bit_data_tag, "sane"); - //NO: assert(dp->bci() == bci, "no concurrent allocation"); - return new BitData(dp); + temp.initialize(tag, bci, 0); + + dp->set_header(temp.header()); + assert(dp->tag() == tag, "sane"); + assert(dp->bci() == bci, "no concurrent allocation"); + if (tag == DataLayout::bit_data_tag) { + return new BitData(dp); + } else { + SpeculativeTrapData* data = new SpeculativeTrapData(dp); + data->set_method(m); + return data; + } } return NULL; } @@ -1210,25 +1385,35 @@ for ( ; is_valid(data); data = next_data(data)) { st->print("%d", dp_to_di(data->dp())); st->fill_to(6); - data->print_data_on(st); + data->print_data_on(st, this); } st->print_cr("--- Extra data:"); DataLayout* dp = extra_data_base(); DataLayout* end = extra_data_limit(); - for (; dp < end; dp = next_extra(dp)) { + for (;; dp = next_extra(dp)) { + assert(dp < end, "moved past end of extra data"); // No need for "OrderAccess::load_acquire" ops, // since the data structure is monotonic. - if (dp->tag() == DataLayout::no_tag) continue; - if (dp->tag() == DataLayout::bit_data_tag) { + switch(dp->tag()) { + case DataLayout::no_tag: + continue; + case DataLayout::bit_data_tag: data = new BitData(dp); - } else { - assert(dp->tag() == DataLayout::arg_info_data_tag, "must be BitData or ArgInfo"); + break; + case DataLayout::speculative_trap_data_tag: + data = new SpeculativeTrapData(dp); + break; + case DataLayout::arg_info_data_tag: data = new ArgInfoData(dp); dp = end; // ArgInfoData is at the end of extra data section. + break; + default: + fatal(err_msg("unexpected tag %d", dp->tag())); } st->print("%d", dp_to_di(data->dp())); st->fill_to(6); data->print_data_on(st); + if (dp >= end) return; } } #endif @@ -1351,3 +1536,110 @@ assert(profile_parameters_jsr292_only(), "inconsistent"); return m->is_compiled_lambda_form(); } + +void MethodData::clean_extra_data_helper(DataLayout* dp, int shift, bool reset) { + if (shift == 0) { + return; + } + if (!reset) { + // Move all cells of trap entry at dp left by "shift" cells + intptr_t* start = (intptr_t*)dp; + intptr_t* end = (intptr_t*)next_extra(dp); + for (intptr_t* ptr = start; ptr < end; ptr++) { + *(ptr-shift) = *ptr; + } + } else { + // Reset "shift" cells stopping at dp + intptr_t* start = ((intptr_t*)dp) - shift; + intptr_t* end = (intptr_t*)dp; + for (intptr_t* ptr = start; ptr < end; ptr++) { + *ptr = 0; + } + } +} + +// Remove SpeculativeTrapData entries that reference an unloaded +// method +void MethodData::clean_extra_data(BoolObjectClosure* is_alive) { + DataLayout* dp = extra_data_base(); + DataLayout* end = extra_data_limit(); + + int shift = 0; + for (; dp < end; dp = next_extra(dp)) { + switch(dp->tag()) { + case DataLayout::speculative_trap_data_tag: { + SpeculativeTrapData* data = new SpeculativeTrapData(dp); + Method* m = data->method(); + assert(m != NULL, "should have a method"); + if (!m->method_holder()->is_loader_alive(is_alive)) { + // "shift" accumulates the number of cells for dead + // SpeculativeTrapData entries that have been seen so + // far. Following entries must be shifted left by that many + // cells to remove the dead SpeculativeTrapData entries. + shift += (int)((intptr_t*)next_extra(dp) - (intptr_t*)dp); + } else { + // Shift this entry left if it follows dead + // SpeculativeTrapData entries + clean_extra_data_helper(dp, shift); + } + break; + } + case DataLayout::bit_data_tag: + // Shift this entry left if it follows dead SpeculativeTrapData + // entries + clean_extra_data_helper(dp, shift); + continue; + case DataLayout::no_tag: + case DataLayout::arg_info_data_tag: + // We are at end of the live trap entries. The previous "shift" + // cells contain entries that are either dead or were shifted + // left. They need to be reset to no_tag + clean_extra_data_helper(dp, shift, true); + return; + default: + fatal(err_msg("unexpected tag %d", dp->tag())); + } + } +} + +// Verify there's no unloaded method referenced by a +// SpeculativeTrapData entry +void MethodData::verify_extra_data_clean(BoolObjectClosure* is_alive) { +#ifdef ASSERT + DataLayout* dp = extra_data_base(); + DataLayout* end = extra_data_limit(); + + for (; dp < end; dp = next_extra(dp)) { + switch(dp->tag()) { + case DataLayout::speculative_trap_data_tag: { + SpeculativeTrapData* data = new SpeculativeTrapData(dp); + Method* m = data->method(); + assert(m != NULL && m->method_holder()->is_loader_alive(is_alive), "Method should exist"); + break; + } + case DataLayout::bit_data_tag: + continue; + case DataLayout::no_tag: + case DataLayout::arg_info_data_tag: + return; + default: + fatal(err_msg("unexpected tag %d", dp->tag())); + } + } +#endif +} + +void MethodData::clean_method_data(BoolObjectClosure* is_alive) { + for (ProfileData* data = first_data(); + is_valid(data); + data = next_data(data)) { + data->clean_weak_klass_links(is_alive); + } + ParametersTypeData* parameters = parameters_type_data(); + if (parameters != NULL) { + parameters->clean_weak_klass_links(is_alive); + } + + clean_extra_data(is_alive); + verify_extra_data_clean(is_alive); +} diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/oops/methodData.hpp --- a/src/share/vm/oops/methodData.hpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/oops/methodData.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -120,7 +120,8 @@ arg_info_data_tag, call_type_data_tag, virtual_call_type_data_tag, - parameters_type_data_tag + parameters_type_data_tag, + speculative_trap_data_tag }; enum { @@ -189,9 +190,6 @@ void set_header(intptr_t value) { _header._bits = value; } - void release_set_header(intptr_t value) { - OrderAccess::release_store_ptr(&_header._bits, value); - } intptr_t header() { return _header._bits; } @@ -271,6 +269,7 @@ class MultiBranchData; class ArgInfoData; class ParametersTypeData; +class SpeculativeTrapData; // ProfileData // @@ -291,6 +290,8 @@ // This is a pointer to a section of profiling data. DataLayout* _data; + char* print_data_on_helper(const MethodData* md) const; + protected: DataLayout* data() { return _data; } const DataLayout* data() const { return _data; } @@ -440,6 +441,7 @@ virtual bool is_CallTypeData() const { return false; } virtual bool is_VirtualCallTypeData()const { return false; } virtual bool is_ParametersTypeData() const { return false; } + virtual bool is_SpeculativeTrapData()const { return false; } BitData* as_BitData() const { @@ -494,6 +496,10 @@ assert(is_ParametersTypeData(), "wrong type"); return is_ParametersTypeData() ? (ParametersTypeData*)this : NULL; } + SpeculativeTrapData* as_SpeculativeTrapData() const { + assert(is_SpeculativeTrapData(), "wrong type"); + return is_SpeculativeTrapData() ? (SpeculativeTrapData*)this : NULL; + } // Subclass specific initialization @@ -509,12 +515,14 @@ // translation here, and the required translators are in the ci subclasses. virtual void translate_from(const ProfileData* data) {} - virtual void print_data_on(outputStream* st) const { + virtual void print_data_on(outputStream* st, const char* extra = NULL) const { ShouldNotReachHere(); } + void print_data_on(outputStream* st, const MethodData* md) const; + #ifndef PRODUCT - void print_shared(outputStream* st, const char* name) const; + void print_shared(outputStream* st, const char* name, const char* extra) const; void tab(outputStream* st, bool first = false) const; #endif }; @@ -576,7 +584,7 @@ #endif // CC_INTERP #ifndef PRODUCT - void print_data_on(outputStream* st) const; + void print_data_on(outputStream* st, const char* extra = NULL) const; #endif }; @@ -639,7 +647,7 @@ #endif // CC_INTERP #ifndef PRODUCT - void print_data_on(outputStream* st) const; + void print_data_on(outputStream* st, const char* extra = NULL) const; #endif }; @@ -726,7 +734,7 @@ void post_initialize(BytecodeStream* stream, MethodData* mdo); #ifndef PRODUCT - void print_data_on(outputStream* st) const; + void print_data_on(outputStream* st, const char* extra = NULL) const; #endif }; @@ -1137,7 +1145,7 @@ } #ifndef PRODUCT - virtual void print_data_on(outputStream* st) const; + virtual void print_data_on(outputStream* st, const char* extra = NULL) const; #endif }; @@ -1282,7 +1290,7 @@ #ifndef PRODUCT void print_receiver_data_on(outputStream* st) const; - void print_data_on(outputStream* st) const; + void print_data_on(outputStream* st, const char* extra = NULL) const; #endif }; @@ -1325,7 +1333,7 @@ #endif // CC_INTERP #ifndef PRODUCT - void print_data_on(outputStream* st) const; + void print_data_on(outputStream* st, const char* extra = NULL) const; #endif }; @@ -1451,7 +1459,7 @@ } #ifndef PRODUCT - virtual void print_data_on(outputStream* st) const; + virtual void print_data_on(outputStream* st, const char* extra = NULL) const; #endif }; @@ -1554,7 +1562,7 @@ void post_initialize(BytecodeStream* stream, MethodData* mdo); #ifndef PRODUCT - void print_data_on(outputStream* st) const; + void print_data_on(outputStream* st, const char* extra = NULL) const; #endif }; @@ -1632,7 +1640,7 @@ void post_initialize(BytecodeStream* stream, MethodData* mdo); #ifndef PRODUCT - void print_data_on(outputStream* st) const; + void print_data_on(outputStream* st, const char* extra = NULL) const; #endif }; @@ -1825,7 +1833,7 @@ void post_initialize(BytecodeStream* stream, MethodData* mdo); #ifndef PRODUCT - void print_data_on(outputStream* st) const; + void print_data_on(outputStream* st, const char* extra = NULL) const; #endif }; @@ -1852,7 +1860,7 @@ } #ifndef PRODUCT - void print_data_on(outputStream* st) const; + void print_data_on(outputStream* st, const char* extra = NULL) const; #endif }; @@ -1913,7 +1921,7 @@ } #ifndef PRODUCT - virtual void print_data_on(outputStream* st) const; + virtual void print_data_on(outputStream* st, const char* extra = NULL) const; #endif static ByteSize stack_slot_offset(int i) { @@ -1925,6 +1933,54 @@ } }; +// SpeculativeTrapData +// +// A SpeculativeTrapData is used to record traps due to type +// speculation. It records the root of the compilation: that type +// speculation is wrong in the context of one compilation (for +// method1) doesn't mean it's wrong in the context of another one (for +// method2). Type speculation could have more/different data in the +// context of the compilation of method2 and it's worthwhile to try an +// optimization that failed for compilation of method1 in the context +// of compilation of method2. +// Space for SpeculativeTrapData entries is allocated from the extra +// data space in the MDO. If we run out of space, the trap data for +// the ProfileData at that bci is updated. +class SpeculativeTrapData : public ProfileData { +protected: + enum { + method_offset, + speculative_trap_cell_count + }; +public: + SpeculativeTrapData(DataLayout* layout) : ProfileData(layout) { + assert(layout->tag() == DataLayout::speculative_trap_data_tag, "wrong type"); + } + + virtual bool is_SpeculativeTrapData() const { return true; } + + static int static_cell_count() { + return speculative_trap_cell_count; + } + + virtual int cell_count() const { + return static_cell_count(); + } + + // Direct accessor + Method* method() const { + return (Method*)intptr_at(method_offset); + } + + void set_method(Method* m) { + set_intptr_at(method_offset, (intptr_t)m); + } + +#ifndef PRODUCT + virtual void print_data_on(outputStream* st, const char* extra = NULL) const; +#endif +}; + // MethodData* // // A MethodData* holds information which has been collected about @@ -1985,16 +2041,18 @@ // Cached hint for bci_to_dp and bci_to_data int _hint_di; + Mutex _extra_data_lock; + MethodData(methodHandle method, int size, TRAPS); public: static MethodData* allocate(ClassLoaderData* loader_data, methodHandle method, TRAPS); - MethodData() {}; // For ciMethodData + MethodData() : _extra_data_lock(Monitor::leaf, "MDO extra data lock") {}; // For ciMethodData bool is_methodData() const volatile { return true; } // Whole-method sticky bits and flags enum { - _trap_hist_limit = 17, // decoupled from Deoptimization::Reason_LIMIT + _trap_hist_limit = 19, // decoupled from Deoptimization::Reason_LIMIT _trap_hist_mask = max_jubyte, _extra_data_count = 4 // extra DataLayout headers, for trap history }; // Public flag values @@ -2025,6 +2083,12 @@ // Counter values at the time profiling started. int _invocation_counter_start; int _backedge_counter_start; + +#if INCLUDE_RTM_OPT + // State of RTM code generation during compilation of the method + int _rtm_state; +#endif + // Number of loops and blocks is computed when compiling the first // time with C1. It is used to determine if method is trivial. short _num_loops; @@ -2049,6 +2113,7 @@ // Helper for size computation static int compute_data_size(BytecodeStream* stream); static int bytecode_cell_count(Bytecodes::Code code); + static bool is_speculative_trap_bytecode(Bytecodes::Code code); enum { no_profile_data = -1, variable_cell_count = -2 }; // Helper for initialization @@ -2092,8 +2157,9 @@ // What is the index of the first data entry? int first_di() const { return 0; } + ProfileData* bci_to_extra_data_helper(int bci, Method* m, DataLayout*& dp, bool concurrent); // Find or create an extra ProfileData: - ProfileData* bci_to_extra_data(int bci, bool create_if_missing); + ProfileData* bci_to_extra_data(int bci, Method* m, bool create_if_missing); // return the argument info cell ArgInfoData *arg_info(); @@ -2116,6 +2182,10 @@ static bool profile_parameters_jsr292_only(); static bool profile_all_parameters(); + void clean_extra_data(BoolObjectClosure* is_alive); + void clean_extra_data_helper(DataLayout* dp, int shift, bool reset = false); + void verify_extra_data_clean(BoolObjectClosure* is_alive); + public: static int header_size() { return sizeof(MethodData)/wordSize; @@ -2124,7 +2194,7 @@ // Compute the size of a MethodData* before it is created. static int compute_allocation_size_in_bytes(methodHandle method); static int compute_allocation_size_in_words(methodHandle method); - static int compute_extra_data_count(int data_size, int empty_bc_count); + static int compute_extra_data_count(int data_size, int empty_bc_count, bool needs_speculative_traps); // Determine if a given bytecode can have profile information. static bool bytecode_has_profile(Bytecodes::Code code) { @@ -2182,6 +2252,22 @@ InvocationCounter* invocation_counter() { return &_invocation_counter; } InvocationCounter* backedge_counter() { return &_backedge_counter; } +#if INCLUDE_RTM_OPT + int rtm_state() const { + return _rtm_state; + } + void set_rtm_state(RTMState rstate) { + _rtm_state = (int)rstate; + } + void atomic_set_rtm_state(RTMState rstate) { + Atomic::store((int)rstate, &_rtm_state); + } + + static int rtm_state_offset_in_bytes() { + return offset_of(MethodData, _rtm_state); + } +#endif + void set_would_profile(bool p) { _would_profile = p; } bool would_profile() const { return _would_profile; } @@ -2265,9 +2351,26 @@ ProfileData* bci_to_data(int bci); // Same, but try to create an extra_data record if one is needed: - ProfileData* allocate_bci_to_data(int bci) { - ProfileData* data = bci_to_data(bci); - return (data != NULL) ? data : bci_to_extra_data(bci, true); + ProfileData* allocate_bci_to_data(int bci, Method* m) { + ProfileData* data = NULL; + // If m not NULL, try to allocate a SpeculativeTrapData entry + if (m == NULL) { + data = bci_to_data(bci); + } + if (data != NULL) { + return data; + } + data = bci_to_extra_data(bci, m, true); + if (data != NULL) { + return data; + } + // If SpeculativeTrapData allocation fails try to allocate a + // regular entry + data = bci_to_data(bci); + if (data != NULL) { + return data; + } + return bci_to_extra_data(bci, NULL, true); } // Add a handful of extra data records, for trap tracking. @@ -2275,7 +2378,7 @@ DataLayout* extra_data_limit() const { return (DataLayout*)((address)this + size_in_bytes()); } int extra_data_size() const { return (address)extra_data_limit() - (address)extra_data_base(); } - static DataLayout* next_extra(DataLayout* dp) { return (DataLayout*)((address)dp + in_bytes(DataLayout::cell_offset(0))); } + static DataLayout* next_extra(DataLayout* dp); // Return (uint)-1 for overflow. uint trap_count(int reason) const { @@ -2375,6 +2478,8 @@ static bool profile_return(); static bool profile_parameters(); static bool profile_return_jsr292_only(); + + void clean_method_data(BoolObjectClosure* is_alive); }; #endif // SHARE_VM_OOPS_METHODDATAOOP_HPP diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/oops/oop.cpp --- a/src/share/vm/oops/oop.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/oops/oop.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -102,7 +102,7 @@ } // When String table needs to rehash -unsigned int oopDesc::new_hash(jint seed) { +unsigned int oopDesc::new_hash(juint seed) { EXCEPTION_MARK; ResourceMark rm; int length; diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/oops/oop.hpp --- a/src/share/vm/oops/oop.hpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/oops/oop.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -362,7 +362,7 @@ intptr_t slow_identity_hash(); // Alternate hashing code if string table is rehashed - unsigned int new_hash(jint seed); + unsigned int new_hash(juint seed); // marks are forwarded to stack when object is locked bool has_displaced_mark() const; diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/oops/symbol.cpp --- a/src/share/vm/oops/symbol.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/oops/symbol.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -207,7 +207,7 @@ } // Alternate hashing for unbalanced symbol tables. -unsigned int Symbol::new_hash(jint seed) { +unsigned int Symbol::new_hash(juint seed) { ResourceMark rm; // Use alternate hashing algorithm on this symbol. return AltHashing::murmur3_32(seed, (const jbyte*)as_C_string(), utf8_length()); diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/oops/symbol.hpp --- a/src/share/vm/oops/symbol.hpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/oops/symbol.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -154,7 +154,7 @@ int identity_hash() { return _identity_hash; } // For symbol table alternate hashing - unsigned int new_hash(jint seed); + unsigned int new_hash(juint seed); // Reference counting. See comments above this class for when to use. int refcount() const { return _refcount; } diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/opto/c2_globals.hpp --- a/src/share/vm/opto/c2_globals.hpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/opto/c2_globals.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -449,6 +449,9 @@ diagnostic(bool, PrintPreciseBiasedLockingStatistics, false, \ "Print per-lock-site statistics of biased locking in JVM") \ \ + diagnostic(bool, PrintPreciseRTMLockingStatistics, false, \ + "Print per-lock-site statistics of rtm locking in JVM") \ + \ notproduct(bool, PrintEliminateLocks, false, \ "Print out when locks are eliminated") \ \ @@ -647,15 +650,19 @@ diagnostic(bool, OptimizeExpensiveOps, true, \ "Find best control for expensive operations") \ \ - experimental(bool, UseMathExactIntrinsics, false, \ + product(bool, UseMathExactIntrinsics, true, \ "Enables intrinsification of various java.lang.Math functions") \ \ experimental(bool, ReplaceInParentMaps, false, \ "Propagate type improvements in callers of inlinee if possible") \ \ - experimental(bool, UseTypeSpeculation, false, \ + product(bool, UseTypeSpeculation, true, \ "Speculatively propagate types from profiles") \ \ + diagnostic(bool, UseInlineDepthForSpeculativeTypes, true, \ + "Carry inline depth of profile point with speculative type " \ + "and give priority to profiling from lower inline depth") \ + \ product_pd(bool, TrapBasedRangeChecks, \ "Generate code for range checks that uses a cmp and trap " \ "instruction raising SIGTRAP. Used on PPC64.") \ diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/opto/classes.hpp --- a/src/share/vm/opto/classes.hpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/opto/classes.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -29,8 +29,6 @@ macro(AbsF) macro(AbsI) macro(AddD) -macro(AddExactI) -macro(AddExactL) macro(AddF) macro(AddI) macro(AddL) @@ -135,7 +133,6 @@ macro(ExpD) macro(FastLock) macro(FastUnlock) -macro(FlagsProj) macro(Goto) macro(Halt) macro(If) @@ -170,9 +167,6 @@ macro(LoopLimit) macro(Mach) macro(MachProj) -macro(MathExact) -macro(MathExactI) -macro(MathExactL) macro(MaxI) macro(MemBarAcquire) macro(LoadFence) @@ -194,22 +188,25 @@ macro(MoveL2D) macro(MoveD2L) macro(MulD) -macro(MulExactI) -macro(MulExactL) macro(MulF) macro(MulHiL) macro(MulI) macro(MulL) macro(Multi) macro(NegD) -macro(NegExactI) -macro(NegExactL) macro(NegF) macro(NeverBranch) macro(Opaque1) macro(Opaque2) +macro(Opaque3) macro(OrI) macro(OrL) +macro(OverflowAddI) +macro(OverflowSubI) +macro(OverflowMulI) +macro(OverflowAddL) +macro(OverflowSubL) +macro(OverflowMulL) macro(PCTable) macro(Parm) macro(PartialSubtypeCheck) @@ -253,8 +250,6 @@ macro(StrEquals) macro(StrIndexOf) macro(SubD) -macro(SubExactI) -macro(SubExactL) macro(SubF) macro(SubI) macro(SubL) diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/opto/compile.cpp --- a/src/share/vm/opto/compile.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/opto/compile.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -694,9 +694,10 @@ set_print_inlining(PrintInlining || method()->has_option("PrintInlining") NOT_PRODUCT( || PrintOptoInlining)); set_print_intrinsics(PrintIntrinsics || method()->has_option("PrintIntrinsics")); - if (ProfileTraps) { + if (ProfileTraps RTM_OPT_ONLY( || UseRTMLocking )) { // Make sure the method being compiled gets its own MDO, // so we can at least track the decompile_count(). + // Need MDO to record RTM code generation state. method()->ensure_method_data(); } @@ -907,7 +908,8 @@ compiler, env()->comp_level(), has_unsafe_access(), - SharedRuntime::is_wide_vector(max_vector_size()) + SharedRuntime::is_wide_vector(max_vector_size()), + rtm_state() ); if (log() != NULL) // Print code cache state into compiler log @@ -1073,7 +1075,23 @@ set_do_scheduling(OptoScheduling); set_do_count_invocations(false); set_do_method_data_update(false); - + set_rtm_state(NoRTM); // No RTM lock eliding by default +#if INCLUDE_RTM_OPT + if (UseRTMLocking && has_method() && (method()->method_data_or_null() != NULL)) { + int rtm_state = method()->method_data()->rtm_state(); + if (method_has_option("NoRTMLockEliding") || ((rtm_state & NoRTM) != 0)) { + // Don't generate RTM lock eliding code. + set_rtm_state(NoRTM); + } else if (method_has_option("UseRTMLockEliding") || ((rtm_state & UseRTM) != 0) || !UseRTMDeopt) { + // Generate RTM lock eliding code without abort ratio calculation code. + set_rtm_state(UseRTM); + } else if (UseRTMDeopt) { + // Generate RTM lock eliding code and include abort ratio calculation + // code if UseRTMDeopt is on. + set_rtm_state(ProfileRTM); + } + } +#endif if (debug_info()->recording_non_safepoints()) { set_node_note_array(new(comp_arena()) GrowableArray (comp_arena(), 8, 0, NULL)); @@ -2581,6 +2599,7 @@ break; case Op_Opaque1: // Remove Opaque Nodes before matching case Op_Opaque2: // Remove Opaque Nodes before matching + case Op_Opaque3: n->subsume_by(n->in(1), this); break; case Op_CallStaticJava: @@ -3028,42 +3047,6 @@ n->set_req(MemBarNode::Precedent, top()); } break; - // Must set a control edge on all nodes that produce a FlagsProj - // so they can't escape the block that consumes the flags. - // Must also set the non throwing branch as the control - // for all nodes that depends on the result. Unless the node - // already have a control that isn't the control of the - // flag producer - case Op_FlagsProj: - { - MathExactNode* math = (MathExactNode*) n->in(0); - Node* ctrl = math->control_node(); - Node* non_throwing = math->non_throwing_branch(); - math->set_req(0, ctrl); - - Node* result = math->result_node(); - if (result != NULL) { - for (DUIterator_Fast jmax, j = result->fast_outs(jmax); j < jmax; j++) { - Node* out = result->fast_out(j); - // Phi nodes shouldn't be moved. They would only match below if they - // had the same control as the MathExactNode. The only time that - // would happen is if the Phi is also an input to the MathExact - // - // Cmp nodes shouldn't have control set at all. - if (out->is_Phi() || - out->is_Cmp()) { - continue; - } - - if (out->in(0) == NULL) { - out->set_req(0, non_throwing); - } else if (out->in(0) == ctrl) { - out->set_req(0, non_throwing); - } - } - } - } - break; default: assert( !n->is_Call(), "" ); assert( !n->is_Mem(), "" ); @@ -3285,7 +3268,8 @@ // because of a transient condition during start-up in the interpreter. return false; } - if (md->has_trap_at(bci, reason) != 0) { + ciMethod* m = Deoptimization::reason_is_speculate(reason) ? this->method() : NULL; + if (md->has_trap_at(bci, m, reason) != 0) { // Assume PerBytecodeTrapLimit==0, for a more conservative heuristic. // Also, if there are multiple reasons, or if there is no per-BCI record, // assume the worst. @@ -3303,7 +3287,7 @@ // Less-accurate variant which does not require a method and bci. bool Compile::too_many_traps(Deoptimization::DeoptReason reason, ciMethodData* logmd) { - if (trap_count(reason) >= (uint)PerMethodTrapLimit) { + if (trap_count(reason) >= Deoptimization::per_method_trap_limit(reason)) { // Too many traps globally. // Note that we use cumulative trap_count, not just md->trap_count. if (log()) { @@ -3338,10 +3322,11 @@ uint m_cutoff = (uint) PerMethodRecompilationCutoff / 2 + 1; // not zero Deoptimization::DeoptReason per_bc_reason = Deoptimization::reason_recorded_per_bytecode_if_any(reason); + ciMethod* m = Deoptimization::reason_is_speculate(reason) ? this->method() : NULL; if ((per_bc_reason == Deoptimization::Reason_none - || md->has_trap_at(bci, reason) != 0) + || md->has_trap_at(bci, m, reason) != 0) // The trap frequency measure we care about is the recompile count: - && md->trap_recompiled_at(bci) + && md->trap_recompiled_at(bci, m) && md->overflow_recompile_count() >= bc_cutoff) { // Do not emit a trap here if it has already caused recompilations. // Also, if there are multiple reasons, or if there is no per-BCI record, diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/opto/compile.hpp --- a/src/share/vm/opto/compile.hpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/opto/compile.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -319,9 +319,9 @@ bool _trace_opto_output; bool _parsed_irreducible_loop; // True if ciTypeFlow detected irreducible loops during parsing #endif - // JSR 292 bool _has_method_handle_invokes; // True if this method has MethodHandle invokes. + RTMState _rtm_state; // State of Restricted Transactional Memory usage // Compilation environment. Arena _comp_arena; // Arena with lifetime equivalent to Compile @@ -591,6 +591,10 @@ void set_print_inlining(bool z) { _print_inlining = z; } bool print_intrinsics() const { return _print_intrinsics; } void set_print_intrinsics(bool z) { _print_intrinsics = z; } + RTMState rtm_state() const { return _rtm_state; } + void set_rtm_state(RTMState s) { _rtm_state = s; } + bool use_rtm() const { return (_rtm_state & NoRTM) == 0; } + bool profile_rtm() const { return _rtm_state == ProfileRTM; } // check the CompilerOracle for special behaviours for this compile bool method_has_option(const char * option) { return method() != NULL && method()->has_option(option); diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/opto/connode.hpp --- a/src/share/vm/opto/connode.hpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/opto/connode.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -642,6 +642,19 @@ virtual const Type *bottom_type() const { return TypeInt::INT; } }; +//------------------------------Opaque3Node------------------------------------ +// A node to prevent unwanted optimizations. Will be optimized only during +// macro nodes expansion. +class Opaque3Node : public Opaque2Node { + int _opt; // what optimization it was used for +public: + enum { RTM_OPT }; + Opaque3Node(Compile* C, Node *n, int opt) : Opaque2Node(C, n), _opt(opt) {} + virtual int Opcode() const; + bool rtm_opt() const { return (_opt == RTM_OPT); } +}; + + //----------------------PartialSubtypeCheckNode-------------------------------- // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass // array for an instance of the superklass. Set a hidden internal cache on a diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/opto/doCall.cpp --- a/src/share/vm/opto/doCall.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/opto/doCall.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -250,7 +250,7 @@ CallGenerator* miss_cg; Deoptimization::DeoptReason reason = morphism == 2 ? Deoptimization::Reason_bimorphic : - Deoptimization::Reason_class_check; + (speculative_receiver_type == NULL ? Deoptimization::Reason_class_check : Deoptimization::Reason_speculate_class_check); if ((morphism == 1 || (morphism == 2 && next_hit_cg != NULL)) && !too_many_traps(jvms->method(), jvms->bci(), reason) ) { diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/opto/graphKit.cpp --- a/src/share/vm/opto/graphKit.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/opto/graphKit.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -612,9 +612,10 @@ // Usual case: Bail to interpreter. // Reserve the right to recompile if we haven't seen anything yet. + assert(!Deoptimization::reason_is_speculate(reason), "unsupported"); Deoptimization::DeoptAction action = Deoptimization::Action_maybe_recompile; if (treat_throw_as_hot - && (method()->method_data()->trap_recompiled_at(bci()) + && (method()->method_data()->trap_recompiled_at(bci(), NULL) || C->too_many_traps(reason))) { // We cannot afford to take more traps here. Suffer in the interpreter. if (C->log() != NULL) @@ -1124,6 +1125,17 @@ } return _gvn.transform( new (C) ConvI2LNode(offset)); } + +Node* GraphKit::ConvI2UL(Node* offset) { + juint offset_con = (juint) find_int_con(offset, Type::OffsetBot); + if (offset_con != (juint) Type::OffsetBot) { + return longcon((julong) offset_con); + } + Node* conv = _gvn.transform( new (C) ConvI2LNode(offset)); + Node* mask = _gvn.transform( ConLNode::make(C, (julong) max_juint) ); + return _gvn.transform( new (C) AndLNode(conv, mask) ); +} + Node* GraphKit::ConvL2I(Node* offset) { // short-circuit a common case jlong offset_con = find_long_con(offset, (jlong)Type::OffsetBot); @@ -2112,30 +2124,33 @@ * @return node with improved type */ Node* GraphKit::record_profile_for_speculation(Node* n, ciKlass* exact_kls) { - const TypeOopPtr* current_type = _gvn.type(n)->isa_oopptr(); + const Type* current_type = _gvn.type(n); assert(UseTypeSpeculation, "type speculation must be on"); - if (exact_kls != NULL && - // nothing to improve if type is already exact - (current_type == NULL || - (!current_type->klass_is_exact() && - (current_type->speculative() == NULL || - !current_type->speculative()->klass_is_exact())))) { + + const TypeOopPtr* speculative = current_type->speculative(); + + if (current_type->would_improve_type(exact_kls, jvms()->depth())) { const TypeKlassPtr* tklass = TypeKlassPtr::make(exact_kls); const TypeOopPtr* xtype = tklass->as_instance_type(); assert(xtype->klass_is_exact(), "Should be exact"); - + // record the new speculative type's depth + speculative = xtype->with_inline_depth(jvms()->depth()); + } + + if (speculative != current_type->speculative()) { // Build a type with a speculative type (what we think we know // about the type but will need a guard when we use it) - const TypeOopPtr* spec_type = TypeOopPtr::make(TypePtr::BotPTR, Type::OffsetBot, TypeOopPtr::InstanceBot, xtype); - // We're changing the type, we need a new cast node to carry the - // new type. The new type depends on the control: what profiling - // tells us is only valid from here as far as we can tell. - Node* cast = new(C) CastPPNode(n, spec_type); - cast->init_req(0, control()); + const TypeOopPtr* spec_type = TypeOopPtr::make(TypePtr::BotPTR, Type::OffsetBot, TypeOopPtr::InstanceBot, speculative); + // We're changing the type, we need a new CheckCast node to carry + // the new type. The new type depends on the control: what + // profiling tells us is only valid from here as far as we can + // tell. + Node* cast = new(C) CheckCastPPNode(control(), n, current_type->remove_speculative()->join_speculative(spec_type)); cast = _gvn.transform(cast); replace_in_map(n, cast); n = cast; } + return n; } @@ -2145,7 +2160,7 @@ * * @param n receiver node * - * @return node with improved type + * @return node with improved type */ Node* GraphKit::record_profiled_receiver_for_speculation(Node* n) { if (!UseTypeSpeculation) { @@ -2739,12 +2754,14 @@ // Subsequent type checks will always fold up. Node* GraphKit::maybe_cast_profiled_receiver(Node* not_null_obj, ciKlass* require_klass, - ciKlass* spec_klass, + ciKlass* spec_klass, bool safe_for_replace) { if (!UseTypeProfile || !TypeProfileCasts) return NULL; + Deoptimization::DeoptReason reason = spec_klass == NULL ? Deoptimization::Reason_class_check : Deoptimization::Reason_speculate_class_check; + // Make sure we haven't already deoptimized from this tactic. - if (too_many_traps(Deoptimization::Reason_class_check)) + if (too_many_traps(reason)) return NULL; // (No, this isn't a call, but it's enough like a virtual call @@ -2766,7 +2783,7 @@ &exact_obj); { PreserveJVMState pjvms(this); set_control(slow_ctl); - uncommon_trap(Deoptimization::Reason_class_check, + uncommon_trap(reason, Deoptimization::Action_maybe_recompile); } if (safe_for_replace) { @@ -2793,8 +2810,10 @@ bool not_null) { // type == NULL if profiling tells us this object is always null if (type != NULL) { - if (!too_many_traps(Deoptimization::Reason_null_check) && - !too_many_traps(Deoptimization::Reason_class_check)) { + Deoptimization::DeoptReason class_reason = Deoptimization::Reason_speculate_class_check; + Deoptimization::DeoptReason null_reason = Deoptimization::Reason_null_check; + if (!too_many_traps(null_reason) && + !too_many_traps(class_reason)) { Node* not_null_obj = NULL; // not_null is true if we know the object is not null and // there's no need for a null check @@ -2813,7 +2832,7 @@ { PreserveJVMState pjvms(this); set_control(slow_ctl); - uncommon_trap(Deoptimization::Reason_class_check, + uncommon_trap(class_reason, Deoptimization::Action_maybe_recompile); } replace_in_map(not_null_obj, exact_obj); @@ -2882,7 +2901,7 @@ } if (known_statically && UseTypeSpeculation) { - // If we know the type check always succeed then we don't use the + // If we know the type check always succeeds then we don't use the // profiling data at this bytecode. Don't lose it, feed it to the // type system as a speculative type. not_null_obj = record_profiled_receiver_for_speculation(not_null_obj); @@ -2999,22 +3018,28 @@ } Node* cast_obj = NULL; - const TypeOopPtr* obj_type = _gvn.type(obj)->is_oopptr(); - // We may not have profiling here or it may not help us. If we have - // a speculative type use it to perform an exact cast. - ciKlass* spec_obj_type = obj_type->speculative_type(); - if (spec_obj_type != NULL || - (data != NULL && - // Counter has never been decremented (due to cast failure). - // ...This is a reasonable thing to expect. It is true of - // all casts inserted by javac to implement generic types. - data->as_CounterData()->count() >= 0)) { - cast_obj = maybe_cast_profiled_receiver(not_null_obj, tk->klass(), spec_obj_type, safe_for_replace); - if (cast_obj != NULL) { - if (failure_control != NULL) // failure is now impossible - (*failure_control) = top(); - // adjust the type of the phi to the exact klass: - phi->raise_bottom_type(_gvn.type(cast_obj)->meet_speculative(TypePtr::NULL_PTR)); + if (tk->klass_is_exact()) { + // The following optimization tries to statically cast the speculative type of the object + // (for example obtained during profiling) to the type of the superklass and then do a + // dynamic check that the type of the object is what we expect. To work correctly + // for checkcast and aastore the type of superklass should be exact. + const TypeOopPtr* obj_type = _gvn.type(obj)->is_oopptr(); + // We may not have profiling here or it may not help us. If we have + // a speculative type use it to perform an exact cast. + ciKlass* spec_obj_type = obj_type->speculative_type(); + if (spec_obj_type != NULL || + (data != NULL && + // Counter has never been decremented (due to cast failure). + // ...This is a reasonable thing to expect. It is true of + // all casts inserted by javac to implement generic types. + data->as_CounterData()->count() >= 0)) { + cast_obj = maybe_cast_profiled_receiver(not_null_obj, tk->klass(), spec_obj_type, safe_for_replace); + if (cast_obj != NULL) { + if (failure_control != NULL) // failure is now impossible + (*failure_control) = top(); + // adjust the type of the phi to the exact klass: + phi->raise_bottom_type(_gvn.type(cast_obj)->meet_speculative(TypePtr::NULL_PTR)); + } } } @@ -3137,10 +3162,14 @@ Node* mem = reset_memory(); FastLockNode * flock = _gvn.transform(new (C) FastLockNode(0, obj, box) )->as_FastLock(); - if (PrintPreciseBiasedLockingStatistics) { + if (UseBiasedLocking && PrintPreciseBiasedLockingStatistics) { // Create the counters for this fast lock. flock->create_lock_counter(sync_jvms()); // sync_jvms used to get current bci } + + // Create the rtm counters for this fast lock if needed. + flock->create_rtm_lock_counter(sync_jvms()); // sync_jvms used to get current bci + // Add monitor to debug info for the slow path. If we block inside the // slow path and de-opt, we need the monitor hanging around map()->push_monitor( flock ); diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/opto/graphKit.hpp --- a/src/share/vm/opto/graphKit.hpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/opto/graphKit.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -338,6 +338,7 @@ // Convert between int and long, and size_t. // (See macros ConvI2X, etc., in type.hpp for ConvI2X, etc.) Node* ConvI2L(Node* offset); + Node* ConvI2UL(Node* offset); Node* ConvL2I(Node* offset); // Find out the klass of an object. Node* load_object_klass(Node* object); @@ -406,7 +407,7 @@ // Use the type profile to narrow an object type. Node* maybe_cast_profiled_receiver(Node* not_null_obj, ciKlass* require_klass, - ciKlass* spec, + ciKlass* spec, bool safe_for_replace); // Cast obj to type and emit guard unless we had too many traps here already diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/opto/ifnode.cpp --- a/src/share/vm/opto/ifnode.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/opto/ifnode.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -76,7 +76,6 @@ if( !i1->is_Bool() ) return NULL; BoolNode *b = i1->as_Bool(); Node *cmp = b->in(1); - if( cmp->is_FlagsProj() ) return NULL; if( !cmp->is_Cmp() ) return NULL; i1 = cmp->in(1); if( i1 == NULL || !i1->is_Phi() ) return NULL; diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/opto/lcm.cpp --- a/src/share/vm/opto/lcm.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/opto/lcm.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -520,13 +520,6 @@ break; } - // For nodes that produce a FlagsProj, make the node adjacent to the - // use of the FlagsProj - if (use->is_FlagsProj() && get_block_for_node(use) == block) { - found_machif = true; - break; - } - // More than this instruction pending for successor to be ready, // don't choose this if other opportunities are ready if (ready_cnt.at(use->_idx) > 1) diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/opto/library_call.cpp --- a/src/share/vm/opto/library_call.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/opto/library_call.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -203,7 +203,9 @@ bool inline_math_native(vmIntrinsics::ID id); bool inline_trig(vmIntrinsics::ID id); bool inline_math(vmIntrinsics::ID id); - void inline_math_mathExact(Node* math); + template + bool inline_math_overflow(Node* arg1, Node* arg2); + void inline_math_mathExact(Node* math, Node* test); bool inline_math_addExactI(bool is_increment); bool inline_math_addExactL(bool is_increment); bool inline_math_multiplyExactI(); @@ -517,31 +519,31 @@ case vmIntrinsics::_incrementExactI: case vmIntrinsics::_addExactI: - if (!Matcher::match_rule_supported(Op_AddExactI) || !UseMathExactIntrinsics) return NULL; + if (!Matcher::match_rule_supported(Op_OverflowAddI) || !UseMathExactIntrinsics) return NULL; break; case vmIntrinsics::_incrementExactL: case vmIntrinsics::_addExactL: - if (!Matcher::match_rule_supported(Op_AddExactL) || !UseMathExactIntrinsics) return NULL; + if (!Matcher::match_rule_supported(Op_OverflowAddL) || !UseMathExactIntrinsics) return NULL; break; case vmIntrinsics::_decrementExactI: case vmIntrinsics::_subtractExactI: - if (!Matcher::match_rule_supported(Op_SubExactI) || !UseMathExactIntrinsics) return NULL; + if (!Matcher::match_rule_supported(Op_OverflowSubI) || !UseMathExactIntrinsics) return NULL; break; case vmIntrinsics::_decrementExactL: case vmIntrinsics::_subtractExactL: - if (!Matcher::match_rule_supported(Op_SubExactL) || !UseMathExactIntrinsics) return NULL; + if (!Matcher::match_rule_supported(Op_OverflowSubL) || !UseMathExactIntrinsics) return NULL; break; case vmIntrinsics::_negateExactI: - if (!Matcher::match_rule_supported(Op_NegExactI) || !UseMathExactIntrinsics) return NULL; + if (!Matcher::match_rule_supported(Op_OverflowSubI) || !UseMathExactIntrinsics) return NULL; break; case vmIntrinsics::_negateExactL: - if (!Matcher::match_rule_supported(Op_NegExactL) || !UseMathExactIntrinsics) return NULL; + if (!Matcher::match_rule_supported(Op_OverflowSubL) || !UseMathExactIntrinsics) return NULL; break; case vmIntrinsics::_multiplyExactI: - if (!Matcher::match_rule_supported(Op_MulExactI) || !UseMathExactIntrinsics) return NULL; + if (!Matcher::match_rule_supported(Op_OverflowMulI) || !UseMathExactIntrinsics) return NULL; break; case vmIntrinsics::_multiplyExactL: - if (!Matcher::match_rule_supported(Op_MulExactL) || !UseMathExactIntrinsics) return NULL; + if (!Matcher::match_rule_supported(Op_OverflowMulL) || !UseMathExactIntrinsics) return NULL; break; default: @@ -1970,18 +1972,8 @@ return true; } -void LibraryCallKit::inline_math_mathExact(Node* math) { - // If we didn't get the expected opcode it means we have optimized - // the node to something else and don't need the exception edge. - if (!math->is_MathExact()) { - set_result(math); - return; - } - - Node* result = _gvn.transform( new(C) ProjNode(math, MathExactNode::result_proj_node)); - Node* flags = _gvn.transform( new(C) FlagsProjNode(math, MathExactNode::flags_proj_node)); - - Node* bol = _gvn.transform( new (C) BoolNode(flags, BoolTest::overflow) ); +void LibraryCallKit::inline_math_mathExact(Node* math, Node *test) { + Node* bol = _gvn.transform( new (C) BoolNode(test, BoolTest::overflow) ); IfNode* check = create_and_map_if(control(), bol, PROB_UNLIKELY_MAG(3), COUNT_UNKNOWN); Node* fast_path = _gvn.transform( new (C) IfFalseNode(check)); Node* slow_path = _gvn.transform( new (C) IfTrueNode(check) ); @@ -1999,108 +1991,50 @@ } set_control(fast_path); - set_result(result); + set_result(math); +} + +template +bool LibraryCallKit::inline_math_overflow(Node* arg1, Node* arg2) { + typedef typename OverflowOp::MathOp MathOp; + + MathOp* mathOp = new(C) MathOp(arg1, arg2); + Node* operation = _gvn.transform( mathOp ); + Node* ofcheck = _gvn.transform( new(C) OverflowOp(arg1, arg2) ); + inline_math_mathExact(operation, ofcheck); + return true; } bool LibraryCallKit::inline_math_addExactI(bool is_increment) { - Node* arg1 = argument(0); - Node* arg2 = NULL; - - if (is_increment) { - arg2 = intcon(1); - } else { - arg2 = argument(1); - } - - Node* add = _gvn.transform( new(C) AddExactINode(NULL, arg1, arg2) ); - inline_math_mathExact(add); - return true; + return inline_math_overflow(argument(0), is_increment ? intcon(1) : argument(1)); } bool LibraryCallKit::inline_math_addExactL(bool is_increment) { - Node* arg1 = argument(0); // type long - // argument(1) == TOP - Node* arg2 = NULL; - - if (is_increment) { - arg2 = longcon(1); - } else { - arg2 = argument(2); // type long - // argument(3) == TOP - } - - Node* add = _gvn.transform(new(C) AddExactLNode(NULL, arg1, arg2)); - inline_math_mathExact(add); - return true; + return inline_math_overflow(argument(0), is_increment ? longcon(1) : argument(2)); } bool LibraryCallKit::inline_math_subtractExactI(bool is_decrement) { - Node* arg1 = argument(0); - Node* arg2 = NULL; - - if (is_decrement) { - arg2 = intcon(1); - } else { - arg2 = argument(1); - } - - Node* sub = _gvn.transform(new(C) SubExactINode(NULL, arg1, arg2)); - inline_math_mathExact(sub); - return true; + return inline_math_overflow(argument(0), is_decrement ? intcon(1) : argument(1)); } bool LibraryCallKit::inline_math_subtractExactL(bool is_decrement) { - Node* arg1 = argument(0); // type long - // argument(1) == TOP - Node* arg2 = NULL; - - if (is_decrement) { - arg2 = longcon(1); - } else { - arg2 = argument(2); // type long - // argument(3) == TOP - } - - Node* sub = _gvn.transform(new(C) SubExactLNode(NULL, arg1, arg2)); - inline_math_mathExact(sub); - return true; + return inline_math_overflow(argument(0), is_decrement ? longcon(1) : argument(2)); } bool LibraryCallKit::inline_math_negateExactI() { - Node* arg1 = argument(0); - - Node* neg = _gvn.transform(new(C) NegExactINode(NULL, arg1)); - inline_math_mathExact(neg); - return true; + return inline_math_overflow(intcon(0), argument(0)); } bool LibraryCallKit::inline_math_negateExactL() { - Node* arg1 = argument(0); - // argument(1) == TOP - - Node* neg = _gvn.transform(new(C) NegExactLNode(NULL, arg1)); - inline_math_mathExact(neg); - return true; + return inline_math_overflow(longcon(0), argument(0)); } bool LibraryCallKit::inline_math_multiplyExactI() { - Node* arg1 = argument(0); - Node* arg2 = argument(1); - - Node* mul = _gvn.transform(new(C) MulExactINode(NULL, arg1, arg2)); - inline_math_mathExact(mul); - return true; + return inline_math_overflow(argument(0), argument(1)); } bool LibraryCallKit::inline_math_multiplyExactL() { - Node* arg1 = argument(0); - // argument(1) == TOP - Node* arg2 = argument(2); - // argument(3) == TOP - - Node* mul = _gvn.transform(new(C) MulExactLNode(NULL, arg1, arg2)); - inline_math_mathExact(mul); - return true; + return inline_math_overflow(argument(0), argument(2)); } Node* @@ -2666,7 +2600,7 @@ case T_ADDRESS: // Cast to an int type. p = _gvn.transform(new (C) CastP2XNode(NULL, p)); - p = ConvX2L(p); + p = ConvX2UL(p); break; default: fatal(err_msg_res("unexpected type %d: %s", type, type2name(type))); @@ -3246,7 +3180,8 @@ // private native boolean java.lang.Thread.isInterrupted(boolean ClearInterrupted); bool LibraryCallKit::inline_native_isInterrupted() { // Add a fast path to t.isInterrupted(clear_int): - // (t == Thread.current() && (!TLS._osthread._interrupted || !clear_int)) + // (t == Thread.current() && + // (!TLS._osthread._interrupted || WINDOWS_ONLY(false) NOT_WINDOWS(!clear_int))) // ? TLS._osthread._interrupted : /*slow path:*/ t.isInterrupted(clear_int) // So, in the common case that the interrupt bit is false, // we avoid making a call into the VM. Even if the interrupt bit @@ -3303,6 +3238,7 @@ // drop through to next case set_control( _gvn.transform(new (C) IfTrueNode(iff_bit))); +#ifndef TARGET_OS_FAMILY_windows // (c) Or, if interrupt bit is set and clear_int is false, use 2nd fast path. Node* clr_arg = argument(1); Node* cmp_arg = _gvn.transform(new (C) CmpINode(clr_arg, intcon(0))); @@ -3316,6 +3252,10 @@ // drop through to next case set_control( _gvn.transform(new (C) IfTrueNode(iff_arg))); +#else + // To return true on Windows you must read the _interrupted field + // and check the the event state i.e. take the slow path. +#endif // TARGET_OS_FAMILY_windows // (d) Otherwise, go to the slow path. slow_region->add_req(control()); diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/opto/locknode.cpp --- a/src/share/vm/opto/locknode.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/opto/locknode.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -136,6 +136,8 @@ //-----------------------------hash-------------------------------------------- uint FastLockNode::hash() const { return NO_HASH; } +uint FastLockNode::size_of() const { return sizeof(*this); } + //------------------------------cmp-------------------------------------------- uint FastLockNode::cmp( const Node &n ) const { return (&n == this); // Always fail except on self @@ -159,6 +161,22 @@ _counters = blnc->counters(); } +void FastLockNode::create_rtm_lock_counter(JVMState* state) { +#if INCLUDE_RTM_OPT + Compile* C = Compile::current(); + if (C->profile_rtm() || (PrintPreciseRTMLockingStatistics && C->use_rtm())) { + RTMLockingNamedCounter* rlnc = (RTMLockingNamedCounter*) + OptoRuntime::new_named_counter(state, NamedCounter::RTMLockingCounter); + _rtm_counters = rlnc->counters(); + if (UseRTMForStackLocks) { + rlnc = (RTMLockingNamedCounter*) + OptoRuntime::new_named_counter(state, NamedCounter::RTMLockingCounter); + _stack_rtm_counters = rlnc->counters(); + } + } +#endif +} + //============================================================================= //------------------------------do_monitor_enter------------------------------- void Parse::do_monitor_enter() { diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/opto/locknode.hpp --- a/src/share/vm/opto/locknode.hpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/opto/locknode.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -92,13 +92,17 @@ //------------------------------FastLockNode----------------------------------- class FastLockNode: public CmpNode { private: - BiasedLockingCounters* _counters; + BiasedLockingCounters* _counters; + RTMLockingCounters* _rtm_counters; // RTM lock counters for inflated locks + RTMLockingCounters* _stack_rtm_counters; // RTM lock counters for stack locks public: FastLockNode(Node *ctrl, Node *oop, Node *box) : CmpNode(oop,box) { init_req(0,ctrl); init_class_id(Class_FastLock); _counters = NULL; + _rtm_counters = NULL; + _stack_rtm_counters = NULL; } Node* obj_node() const { return in(1); } Node* box_node() const { return in(2); } @@ -107,13 +111,17 @@ // FastLock and FastUnlockNode do not hash, we need one for each correspoding // LockNode/UnLockNode to avoid creating Phi's. virtual uint hash() const ; // { return NO_HASH; } + virtual uint size_of() const; virtual uint cmp( const Node &n ) const ; // Always fail, except on self virtual int Opcode() const; virtual const Type *Value( PhaseTransform *phase ) const { return TypeInt::CC; } const Type *sub(const Type *t1, const Type *t2) const { return TypeInt::CC;} void create_lock_counter(JVMState* s); - BiasedLockingCounters* counters() const { return _counters; } + void create_rtm_lock_counter(JVMState* state); + BiasedLockingCounters* counters() const { return _counters; } + RTMLockingCounters* rtm_counters() const { return _rtm_counters; } + RTMLockingCounters* stack_rtm_counters() const { return _stack_rtm_counters; } }; diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/opto/loopTransform.cpp --- a/src/share/vm/opto/loopTransform.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/opto/loopTransform.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -617,6 +617,15 @@ case Op_AryEq: { return false; } +#if INCLUDE_RTM_OPT + case Op_FastLock: + case Op_FastUnlock: { + // Don't unroll RTM locking code because it is large. + if (UseRTMLocking) { + return false; + } + } +#endif } // switch } @@ -713,10 +722,6 @@ case Op_ModL: body_size += 30; break; case Op_DivL: body_size += 30; break; case Op_MulL: body_size += 10; break; - case Op_FlagsProj: - // Can't handle unrolling of loops containing - // nodes that generate a FlagsProj at the moment - return false; case Op_StrComp: case Op_StrEquals: case Op_StrIndexOf: @@ -726,6 +731,15 @@ // String intrinsics are large and have loops. return false; } +#if INCLUDE_RTM_OPT + case Op_FastLock: + case Op_FastUnlock: { + // Don't unroll RTM locking code because it is large. + if (UseRTMLocking) { + return false; + } + } +#endif } // switch } @@ -780,10 +794,6 @@ continue; // not RC Node *cmp = bol->in(1); - if (cmp->is_FlagsProj()) { - continue; - } - Node *rc_exp = cmp->in(1); Node *limit = cmp->in(2); diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/opto/loopopts.cpp --- a/src/share/vm/opto/loopopts.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/opto/loopopts.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -43,12 +43,6 @@ return NULL; } - if (n->is_MathExact()) { - // MathExact has projections that are not correctly handled in the code - // below. - return NULL; - } - int wins = 0; assert(!n->is_CFG(), ""); assert(region->is_Region(), ""); @@ -2362,8 +2356,7 @@ opc == Op_Catch || opc == Op_CatchProj || opc == Op_Jump || - opc == Op_JumpProj || - opc == Op_FlagsProj) { + opc == Op_JumpProj) { #if !defined(PRODUCT) if (TracePartialPeeling) { tty->print_cr("\nExit control too complex: lp: %d", head->_idx); diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/opto/machnode.hpp --- a/src/share/vm/opto/machnode.hpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/opto/machnode.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -53,6 +53,7 @@ class Matcher; class PhaseRegAlloc; class RegMask; +class RTMLockingCounters; class State; //---------------------------MachOper------------------------------------------ @@ -659,8 +660,9 @@ class MachFastLockNode : public MachNode { virtual uint size_of() const { return sizeof(*this); } // Size is bigger public: - BiasedLockingCounters* _counters; - + BiasedLockingCounters* _counters; + RTMLockingCounters* _rtm_counters; // RTM lock counters for inflated locks + RTMLockingCounters* _stack_rtm_counters; // RTM lock counters for stack locks MachFastLockNode() : MachNode() {} }; diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/opto/macro.cpp --- a/src/share/vm/opto/macro.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/opto/macro.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -2439,6 +2439,7 @@ } } // Next, attempt to eliminate allocations + _has_locks = false; progress = true; while (progress) { progress = false; @@ -2457,11 +2458,13 @@ case Node::Class_Lock: case Node::Class_Unlock: assert(!n->as_AbstractLock()->is_eliminated(), "sanity"); + _has_locks = true; break; default: assert(n->Opcode() == Op_LoopLimit || n->Opcode() == Op_Opaque1 || - n->Opcode() == Op_Opaque2, "unknown node type in macro list"); + n->Opcode() == Op_Opaque2 || + n->Opcode() == Op_Opaque3, "unknown node type in macro list"); } assert(success == (C->macro_count() < old_macro_count), "elimination reduces macro count"); progress = progress || success; @@ -2502,6 +2505,30 @@ } else if (n->Opcode() == Op_Opaque1 || n->Opcode() == Op_Opaque2) { _igvn.replace_node(n, n->in(1)); success = true; +#if INCLUDE_RTM_OPT + } else if ((n->Opcode() == Op_Opaque3) && ((Opaque3Node*)n)->rtm_opt()) { + assert(C->profile_rtm(), "should be used only in rtm deoptimization code"); + assert((n->outcnt() == 1) && n->unique_out()->is_Cmp(), ""); + Node* cmp = n->unique_out(); +#ifdef ASSERT + // Validate graph. + assert((cmp->outcnt() == 1) && cmp->unique_out()->is_Bool(), ""); + BoolNode* bol = cmp->unique_out()->as_Bool(); + assert((bol->outcnt() == 1) && bol->unique_out()->is_If() && + (bol->_test._test == BoolTest::ne), ""); + IfNode* ifn = bol->unique_out()->as_If(); + assert((ifn->outcnt() == 2) && + ifn->proj_out(1)->is_uncommon_trap_proj(Deoptimization::Reason_rtm_state_change), ""); +#endif + Node* repl = n->in(1); + if (!_has_locks) { + // Remove RTM state check if there are no locks in the code. + // Replace input to compare the same value. + repl = (cmp->in(1) == n) ? cmp->in(2) : cmp->in(1); + } + _igvn.replace_node(n, repl); + success = true; +#endif } assert(success == (C->macro_count() < old_macro_count), "elimination reduces macro count"); progress = progress || success; diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/opto/macro.hpp --- a/src/share/vm/opto/macro.hpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/opto/macro.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -76,6 +76,8 @@ ProjNode *_memproj_catchall; ProjNode *_resproj; + // Additional data collected during macro expansion + bool _has_locks; void expand_allocate(AllocateNode *alloc); void expand_allocate_array(AllocateArrayNode *alloc); @@ -118,7 +120,7 @@ Node* length); public: - PhaseMacroExpand(PhaseIterGVN &igvn) : Phase(Macro_Expand), _igvn(igvn) { + PhaseMacroExpand(PhaseIterGVN &igvn) : Phase(Macro_Expand), _igvn(igvn), _has_locks(false) { _igvn.set_delay_transform(true); } void eliminate_macro_nodes(); diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/opto/matcher.cpp --- a/src/share/vm/opto/matcher.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/opto/matcher.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -1922,6 +1922,105 @@ return OptoReg::as_OptoReg(regs.first()); } +// This function identifies sub-graphs in which a 'load' node is +// input to two different nodes, and such that it can be matched +// with BMI instructions like blsi, blsr, etc. +// Example : for b = -a[i] & a[i] can be matched to blsi r32, m32. +// The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL* +// refers to the same node. +#ifdef X86 +// Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop) +// This is a temporary solution until we make DAGs expressible in ADL. +template +class FusedPatternMatcher { + Node* _op1_node; + Node* _mop_node; + int _con_op; + + static int match_next(Node* n, int next_op, int next_op_idx) { + if (n->in(1) == NULL || n->in(2) == NULL) { + return -1; + } + + if (next_op_idx == -1) { // n is commutative, try rotations + if (n->in(1)->Opcode() == next_op) { + return 1; + } else if (n->in(2)->Opcode() == next_op) { + return 2; + } + } else { + assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index"); + if (n->in(next_op_idx)->Opcode() == next_op) { + return next_op_idx; + } + } + return -1; + } +public: + FusedPatternMatcher(Node* op1_node, Node *mop_node, int con_op) : + _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { } + + bool match(int op1, int op1_op2_idx, // op1 and the index of the op1->op2 edge, -1 if op1 is commutative + int op2, int op2_con_idx, // op2 and the index of the op2->con edge, -1 if op2 is commutative + typename ConType::NativeType con_value) { + if (_op1_node->Opcode() != op1) { + return false; + } + if (_mop_node->outcnt() > 2) { + return false; + } + op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx); + if (op1_op2_idx == -1) { + return false; + } + // Memory operation must be the other edge + int op1_mop_idx = (op1_op2_idx & 1) + 1; + + // Check that the mop node is really what we want + if (_op1_node->in(op1_mop_idx) == _mop_node) { + Node *op2_node = _op1_node->in(op1_op2_idx); + if (op2_node->outcnt() > 1) { + return false; + } + assert(op2_node->Opcode() == op2, "Should be"); + op2_con_idx = match_next(op2_node, _con_op, op2_con_idx); + if (op2_con_idx == -1) { + return false; + } + // Memory operation must be the other edge + int op2_mop_idx = (op2_con_idx & 1) + 1; + // Check that the memory operation is the same node + if (op2_node->in(op2_mop_idx) == _mop_node) { + // Now check the constant + const Type* con_type = op2_node->in(op2_con_idx)->bottom_type(); + if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) { + return true; + } + } + } + return false; + } +}; + + +bool Matcher::is_bmi_pattern(Node *n, Node *m) { + if (n != NULL && m != NULL) { + if (m->Opcode() == Op_LoadI) { + FusedPatternMatcher bmii(n, m, Op_ConI); + return bmii.match(Op_AndI, -1, Op_SubI, 1, 0) || + bmii.match(Op_AndI, -1, Op_AddI, -1, -1) || + bmii.match(Op_XorI, -1, Op_AddI, -1, -1); + } else if (m->Opcode() == Op_LoadL) { + FusedPatternMatcher bmil(n, m, Op_ConL); + return bmil.match(Op_AndL, -1, Op_SubL, 1, 0) || + bmil.match(Op_AndL, -1, Op_AddL, -1, -1) || + bmil.match(Op_XorL, -1, Op_AddL, -1, -1); + } + } + return false; +} +#endif // X86 + // A method-klass-holder may be passed in the inline_cache_reg // and then expanded into the inline_cache_reg and a method_oop register // defined in ad_.cpp @@ -1998,7 +2097,6 @@ case Op_Catch: case Op_CatchProj: case Op_CProj: - case Op_FlagsProj: case Op_JumpProj: case Op_JProj: case Op_NeverBranch: @@ -2078,6 +2176,14 @@ set_shared(m->in(AddPNode::Base)->in(1)); } + // if 'n' and 'm' are part of a graph for BMI instruction, clone this node. +#ifdef X86 + if (UseBMI1Instructions && is_bmi_pattern(n, m)) { + mstack.push(m, Visit); + continue; + } +#endif + // Clone addressing expressions as they are "free" in memory access instructions if( mem_op && i == MemNode::Address && mop == Op_AddP ) { // Some inputs for address expression are not put on stack diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/opto/matcher.hpp --- a/src/share/vm/opto/matcher.hpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/opto/matcher.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -79,6 +79,9 @@ // Find shared Nodes, or Nodes that otherwise are Matcher roots void find_shared( Node *n ); +#ifdef X86 + bool is_bmi_pattern(Node *n, Node *m); +#endif // Debug and profile information for nodes in old space: GrowableArray* _old_node_note_array; @@ -340,10 +343,6 @@ // Register for MODL projection of divmodL static RegMask modL_proj_mask(); - static const RegMask mathExactI_result_proj_mask(); - static const RegMask mathExactL_result_proj_mask(); - static const RegMask mathExactI_flags_proj_mask(); - // Use hardware DIV instruction when it is faster than // a code which use multiply for division by constant. static bool use_asm_for_ldiv_by_con( jlong divisor ); diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/opto/mathexactnode.cpp --- a/src/share/vm/opto/mathexactnode.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/opto/mathexactnode.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -31,358 +31,93 @@ #include "opto/mathexactnode.hpp" #include "opto/subnode.hpp" -MathExactNode::MathExactNode(Node* ctrl, Node* in1) : MultiNode(2) { - init_class_id(Class_MathExact); - init_req(0, ctrl); - init_req(1, in1); -} - -MathExactNode::MathExactNode(Node* ctrl, Node* in1, Node* in2) : MultiNode(3) { - init_class_id(Class_MathExact); - init_req(0, ctrl); - init_req(1, in1); - init_req(2, in2); -} - -BoolNode* MathExactNode::bool_node() const { - Node* flags = flags_node(); - BoolNode* boolnode = flags->unique_out()->as_Bool(); - assert(boolnode != NULL, "must have BoolNode"); - return boolnode; -} - -IfNode* MathExactNode::if_node() const { - BoolNode* boolnode = bool_node(); - IfNode* ifnode = boolnode->unique_out()->as_If(); - assert(ifnode != NULL, "must have IfNode"); - return ifnode; -} - -Node* MathExactNode::control_node() const { - IfNode* ifnode = if_node(); - return ifnode->in(0); -} - -Node* MathExactNode::non_throwing_branch() const { - IfNode* ifnode = if_node(); - if (bool_node()->_test._test == BoolTest::overflow) { - return ifnode->proj_out(0); - } - return ifnode->proj_out(1); -} - -// If the MathExactNode won't overflow we have to replace the -// FlagsProjNode and ProjNode that is generated by the MathExactNode -Node* MathExactNode::no_overflow(PhaseGVN* phase, Node* new_result) { - PhaseIterGVN* igvn = phase->is_IterGVN(); - if (igvn) { - ProjNode* result = result_node(); - ProjNode* flags = flags_node(); - - if (result != NULL) { - igvn->replace_node(result, new_result); - } +template +class AddHelper { +public: + typedef typename OverflowOp::TypeClass TypeClass; + typedef typename TypeClass::NativeType NativeType; - if (flags != NULL) { - BoolNode* boolnode = bool_node(); - switch (boolnode->_test._test) { - case BoolTest::overflow: - // if the check is for overflow - never taken - igvn->replace_node(boolnode, phase->intcon(0)); - break; - case BoolTest::no_overflow: - // if the check is for no overflow - always taken - igvn->replace_node(boolnode, phase->intcon(1)); - break; - default: - fatal("Unexpected value of BoolTest"); - break; - } - flags->del_req(0); + static bool will_overflow(NativeType value1, NativeType value2) { + NativeType result = value1 + value2; + // Hacker's Delight 2-12 Overflow if both arguments have the opposite sign of the result + if (((value1 ^ result) & (value2 ^ result)) >= 0) { + return false; } - } - return new_result; -} - -Node* MathExactINode::match(const ProjNode* proj, const Matcher* m) { - uint ideal_reg = proj->ideal_reg(); - RegMask rm; - if (proj->_con == result_proj_node) { - rm = m->mathExactI_result_proj_mask(); - } else { - assert(proj->_con == flags_proj_node, "must be result or flags"); - assert(ideal_reg == Op_RegFlags, "sanity"); - rm = m->mathExactI_flags_proj_mask(); - } - return new (m->C) MachProjNode(this, proj->_con, rm, ideal_reg); -} - -Node* MathExactLNode::match(const ProjNode* proj, const Matcher* m) { - uint ideal_reg = proj->ideal_reg(); - RegMask rm; - if (proj->_con == result_proj_node) { - rm = m->mathExactL_result_proj_mask(); - } else { - assert(proj->_con == flags_proj_node, "must be result or flags"); - assert(ideal_reg == Op_RegFlags, "sanity"); - rm = m->mathExactI_flags_proj_mask(); - } - return new (m->C) MachProjNode(this, proj->_con, rm, ideal_reg); -} - -Node* AddExactINode::Ideal(PhaseGVN* phase, bool can_reshape) { - Node* arg1 = in(1); - Node* arg2 = in(2); - - const Type* type1 = phase->type(arg1); - const Type* type2 = phase->type(arg2); - - if (type1 != Type::TOP && type1->singleton() && - type2 != Type::TOP && type2->singleton()) { - jint val1 = arg1->get_int(); - jint val2 = arg2->get_int(); - jint result = val1 + val2; - // Hacker's Delight 2-12 Overflow if both arguments have the opposite sign of the result - if ( (((val1 ^ result) & (val2 ^ result)) >= 0)) { - Node* con_result = ConINode::make(phase->C, result); - return no_overflow(phase, con_result); - } - return NULL; + return true; } - if (type1 == TypeInt::ZERO || type2 == TypeInt::ZERO) { // (Add 0 x) == x - Node* add_result = new (phase->C) AddINode(arg1, arg2); - return no_overflow(phase, add_result); - } - - if (type2->singleton()) { - return NULL; // no change - keep constant on the right + static bool can_overflow(const Type* type1, const Type* type2) { + if (type1 == TypeClass::ZERO || type2 == TypeClass::ZERO) { + return false; + } + return true; } - - if (type1->singleton()) { - // Make it x + Constant - move constant to the right - swap_edges(1, 2); - return this; - } - - if (arg2->is_Load()) { - return NULL; // no change - keep load on the right - } - - if (arg1->is_Load()) { - // Make it x + Load - move load to the right - swap_edges(1, 2); - return this; - } +}; - if (arg1->_idx > arg2->_idx) { - // Sort the edges - swap_edges(1, 2); - return this; - } - - return NULL; -} - -Node* AddExactLNode::Ideal(PhaseGVN* phase, bool can_reshape) { - Node* arg1 = in(1); - Node* arg2 = in(2); +template +class SubHelper { +public: + typedef typename OverflowOp::TypeClass TypeClass; + typedef typename TypeClass::NativeType NativeType; - const Type* type1 = phase->type(arg1); - const Type* type2 = phase->type(arg2); - - if (type1 != Type::TOP && type1->singleton() && - type2 != Type::TOP && type2->singleton()) { - jlong val1 = arg1->get_long(); - jlong val2 = arg2->get_long(); - jlong result = val1 + val2; - // Hacker's Delight 2-12 Overflow if both arguments have the opposite sign of the result - if ( (((val1 ^ result) & (val2 ^ result)) >= 0)) { - Node* con_result = ConLNode::make(phase->C, result); - return no_overflow(phase, con_result); + static bool will_overflow(NativeType value1, NativeType value2) { + NativeType result = value1 - value2; + // hacker's delight 2-12 overflow iff the arguments have different signs and + // the sign of the result is different than the sign of arg1 + if (((value1 ^ value2) & (value1 ^ result)) >= 0) { + return false; } - return NULL; + return true; } - if (type1 == TypeLong::ZERO || type2 == TypeLong::ZERO) { // (Add 0 x) == x - Node* add_result = new (phase->C) AddLNode(arg1, arg2); - return no_overflow(phase, add_result); + static bool can_overflow(const Type* type1, const Type* type2) { + if (type2 == TypeClass::ZERO) { + return false; + } + return true; } - - if (type2->singleton()) { - return NULL; // no change - keep constant on the right - } - - if (type1->singleton()) { - // Make it x + Constant - move constant to the right - swap_edges(1, 2); - return this; - } +}; - if (arg2->is_Load()) { - return NULL; // no change - keep load on the right - } - - if (arg1->is_Load()) { - // Make it x + Load - move load to the right - swap_edges(1, 2); - return this; - } - - if (arg1->_idx > arg2->_idx) { - // Sort the edges - swap_edges(1, 2); - return this; - } +template +class MulHelper { +public: + typedef typename OverflowOp::TypeClass TypeClass; - return NULL; -} - -Node* SubExactINode::Ideal(PhaseGVN* phase, bool can_reshape) { - Node* arg1 = in(1); - Node* arg2 = in(2); - - const Type* type1 = phase->type(arg1); - const Type* type2 = phase->type(arg2); - - if (type1 != Type::TOP && type1->singleton() && - type2 != Type::TOP && type2->singleton()) { - jint val1 = arg1->get_int(); - jint val2 = arg2->get_int(); - jint result = val1 - val2; + static bool can_overflow(const Type* type1, const Type* type2) { + if (type1 == TypeClass::ZERO || type2 == TypeClass::ZERO) { + return false; + } else if (type1 == TypeClass::ONE || type2 == TypeClass::ONE) { + return false; + } + return true; + } +}; - // Hacker's Delight 2-12 Overflow iff the arguments have different signs and - // the sign of the result is different than the sign of arg1 - if (((val1 ^ val2) & (val1 ^ result)) >= 0) { - Node* con_result = ConINode::make(phase->C, result); - return no_overflow(phase, con_result); - } - return NULL; - } - - if (type1 == TypeInt::ZERO || type2 == TypeInt::ZERO) { - // Sub with zero is the same as add with zero - Node* add_result = new (phase->C) AddINode(arg1, arg2); - return no_overflow(phase, add_result); - } - - return NULL; +bool OverflowAddINode::will_overflow(jint v1, jint v2) const { + return AddHelper::will_overflow(v1, v2); } -Node* SubExactLNode::Ideal(PhaseGVN* phase, bool can_reshape) { - Node* arg1 = in(1); - Node* arg2 = in(2); - - const Type* type1 = phase->type(arg1); - const Type* type2 = phase->type(arg2); - - if (type1 != Type::TOP && type1->singleton() && - type2 != Type::TOP && type2->singleton()) { - jlong val1 = arg1->get_long(); - jlong val2 = arg2->get_long(); - jlong result = val1 - val2; - - // Hacker's Delight 2-12 Overflow iff the arguments have different signs and - // the sign of the result is different than the sign of arg1 - if (((val1 ^ val2) & (val1 ^ result)) >= 0) { - Node* con_result = ConLNode::make(phase->C, result); - return no_overflow(phase, con_result); - } - return NULL; - } - - if (type1 == TypeLong::ZERO || type2 == TypeLong::ZERO) { - // Sub with zero is the same as add with zero - Node* add_result = new (phase->C) AddLNode(arg1, arg2); - return no_overflow(phase, add_result); - } - - return NULL; -} - -Node* NegExactINode::Ideal(PhaseGVN* phase, bool can_reshape) { - Node *arg = in(1); - - const Type* type = phase->type(arg); - if (type != Type::TOP && type->singleton()) { - jint value = arg->get_int(); - if (value != min_jint) { - Node* neg_result = ConINode::make(phase->C, -value); - return no_overflow(phase, neg_result); - } - } - return NULL; +bool OverflowSubINode::will_overflow(jint v1, jint v2) const { + return SubHelper::will_overflow(v1, v2); } -Node* NegExactLNode::Ideal(PhaseGVN* phase, bool can_reshape) { - Node *arg = in(1); - - const Type* type = phase->type(arg); - if (type != Type::TOP && type->singleton()) { - jlong value = arg->get_long(); - if (value != min_jlong) { - Node* neg_result = ConLNode::make(phase->C, -value); - return no_overflow(phase, neg_result); +bool OverflowMulINode::will_overflow(jint v1, jint v2) const { + jlong result = (jlong) v1 * (jlong) v2; + if ((jint) result == result) { + return false; } - } - return NULL; + return true; } -Node* MulExactINode::Ideal(PhaseGVN* phase, bool can_reshape) { - Node* arg1 = in(1); - Node* arg2 = in(2); - - const Type* type1 = phase->type(arg1); - const Type* type2 = phase->type(arg2); - - if (type1 != Type::TOP && type1->singleton() && - type2 != Type::TOP && type2->singleton()) { - jint val1 = arg1->get_int(); - jint val2 = arg2->get_int(); - jlong result = (jlong) val1 * (jlong) val2; - if ((jint) result == result) { - // no overflow - Node* mul_result = ConINode::make(phase->C, result); - return no_overflow(phase, mul_result); - } - } - - if (type1 == TypeInt::ZERO || type2 == TypeInt::ZERO) { - return no_overflow(phase, ConINode::make(phase->C, 0)); - } - - if (type1 == TypeInt::ONE) { - Node* mul_result = new (phase->C) AddINode(arg2, phase->intcon(0)); - return no_overflow(phase, mul_result); - } - if (type2 == TypeInt::ONE) { - Node* mul_result = new (phase->C) AddINode(arg1, phase->intcon(0)); - return no_overflow(phase, mul_result); - } - - if (type1 == TypeInt::MINUS_1) { - return new (phase->C) NegExactINode(NULL, arg2); - } - - if (type2 == TypeInt::MINUS_1) { - return new (phase->C) NegExactINode(NULL, arg1); - } - - return NULL; +bool OverflowAddLNode::will_overflow(jlong v1, jlong v2) const { + return AddHelper::will_overflow(v1, v2); } -Node* MulExactLNode::Ideal(PhaseGVN* phase, bool can_reshape) { - Node* arg1 = in(1); - Node* arg2 = in(2); +bool OverflowSubLNode::will_overflow(jlong v1, jlong v2) const { + return SubHelper::will_overflow(v1, v2); +} - const Type* type1 = phase->type(arg1); - const Type* type2 = phase->type(arg2); - - if (type1 != Type::TOP && type1->singleton() && - type2 != Type::TOP && type2->singleton()) { - jlong val1 = arg1->get_long(); - jlong val2 = arg2->get_long(); - +bool OverflowMulLNode::will_overflow(jlong val1, jlong val2) const { jlong result = val1 * val2; jlong ax = (val1 < 0 ? -val1 : val1); jlong ay = (val2 < 0 ? -val2 : val2); @@ -398,33 +133,125 @@ } } - if (!overflow) { - Node* mul_result = ConLNode::make(phase->C, result); - return no_overflow(phase, mul_result); + return overflow; +} + +bool OverflowAddINode::can_overflow(const Type* t1, const Type* t2) const { + return AddHelper::can_overflow(t1, t2); +} + +bool OverflowSubINode::can_overflow(const Type* t1, const Type* t2) const { + if (in(1) == in(2)) { + return false; + } + return SubHelper::can_overflow(t1, t2); +} + +bool OverflowMulINode::can_overflow(const Type* t1, const Type* t2) const { + return MulHelper::can_overflow(t1, t2); +} + +bool OverflowAddLNode::can_overflow(const Type* t1, const Type* t2) const { + return AddHelper::can_overflow(t1, t2); +} + +bool OverflowSubLNode::can_overflow(const Type* t1, const Type* t2) const { + if (in(1) == in(2)) { + return false; + } + return SubHelper::can_overflow(t1, t2); +} + +bool OverflowMulLNode::can_overflow(const Type* t1, const Type* t2) const { + return MulHelper::can_overflow(t1, t2); +} + +const Type* OverflowNode::sub(const Type* t1, const Type* t2) const { + fatal(err_msg_res("sub() should not be called for '%s'", NodeClassNames[this->Opcode()])); + return TypeInt::CC; +} + +template +struct IdealHelper { + typedef typename OverflowOp::TypeClass TypeClass; // TypeInt, TypeLong + typedef typename TypeClass::NativeType NativeType; + + static Node* Ideal(const OverflowOp* node, PhaseGVN* phase, bool can_reshape) { + Node* arg1 = node->in(1); + Node* arg2 = node->in(2); + const Type* type1 = phase->type(arg1); + const Type* type2 = phase->type(arg2); + + if (type1 == NULL || type2 == NULL) { + return NULL; } - } - if (type1 == TypeLong::ZERO || type2 == TypeLong::ZERO) { - return no_overflow(phase, ConLNode::make(phase->C, 0)); + if (type1 != Type::TOP && type1->singleton() && + type2 != Type::TOP && type2->singleton()) { + NativeType val1 = TypeClass::as_self(type1)->get_con(); + NativeType val2 = TypeClass::as_self(type2)->get_con(); + if (node->will_overflow(val1, val2) == false) { + Node* con_result = ConINode::make(phase->C, 0); + return con_result; + } + return NULL; + } + return NULL; } - if (type1 == TypeLong::ONE) { - Node* mul_result = new (phase->C) AddLNode(arg2, phase->longcon(0)); - return no_overflow(phase, mul_result); - } - if (type2 == TypeLong::ONE) { - Node* mul_result = new (phase->C) AddLNode(arg1, phase->longcon(0)); - return no_overflow(phase, mul_result); - } + static const Type* Value(const OverflowOp* node, PhaseTransform* phase) { + const Type *t1 = phase->type( node->in(1) ); + const Type *t2 = phase->type( node->in(2) ); + if( t1 == Type::TOP ) return Type::TOP; + if( t2 == Type::TOP ) return Type::TOP; + + const TypeClass* i1 = TypeClass::as_self(t1); + const TypeClass* i2 = TypeClass::as_self(t2); + + if (i1 == NULL || i2 == NULL) { + return TypeInt::CC; + } - if (type1 == TypeLong::MINUS_1) { - return new (phase->C) NegExactLNode(NULL, arg2); - } + if (t1->singleton() && t2->singleton()) { + NativeType val1 = i1->get_con(); + NativeType val2 = i2->get_con(); + if (node->will_overflow(val1, val2)) { + return TypeInt::CC; + } + return TypeInt::ZERO; + } else if (i1 != TypeClass::TYPE_DOMAIN && i2 != TypeClass::TYPE_DOMAIN) { + if (node->will_overflow(i1->_lo, i2->_lo)) { + return TypeInt::CC; + } else if (node->will_overflow(i1->_lo, i2->_hi)) { + return TypeInt::CC; + } else if (node->will_overflow(i1->_hi, i2->_lo)) { + return TypeInt::CC; + } else if (node->will_overflow(i1->_hi, i2->_hi)) { + return TypeInt::CC; + } + return TypeInt::ZERO; + } - if (type2 == TypeLong::MINUS_1) { - return new (phase->C) NegExactLNode(NULL, arg1); + if (!node->can_overflow(t1, t2)) { + return TypeInt::ZERO; + } + return TypeInt::CC; } +}; - return NULL; +Node* OverflowINode::Ideal(PhaseGVN* phase, bool can_reshape) { + return IdealHelper::Ideal(this, phase, can_reshape); } +Node* OverflowLNode::Ideal(PhaseGVN* phase, bool can_reshape) { + return IdealHelper::Ideal(this, phase, can_reshape); +} + +const Type* OverflowINode::Value(PhaseTransform* phase) const { + return IdealHelper::Value(this, phase); +} + +const Type* OverflowLNode::Value(PhaseTransform* phase) const { + return IdealHelper::Value(this, phase); +} + diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/opto/mathexactnode.hpp --- a/src/share/vm/opto/mathexactnode.hpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/opto/mathexactnode.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -27,128 +27,111 @@ #include "opto/multnode.hpp" #include "opto/node.hpp" +#include "opto/addnode.hpp" #include "opto/subnode.hpp" #include "opto/type.hpp" -class BoolNode; -class IfNode; -class Node; - class PhaseGVN; class PhaseTransform; -class MathExactNode : public MultiNode { +class OverflowNode : public CmpNode { public: - MathExactNode(Node* ctrl, Node* in1); - MathExactNode(Node* ctrl, Node* in1, Node* in2); - enum { - result_proj_node = 0, - flags_proj_node = 1 - }; - virtual int Opcode() const; - virtual Node* Identity(PhaseTransform* phase) { return this; } - virtual Node* Ideal(PhaseGVN* phase, bool can_reshape) { return NULL; } - virtual const Type* Value(PhaseTransform* phase) const { return bottom_type(); } - virtual uint hash() const { return NO_HASH; } - virtual bool is_CFG() const { return false; } - virtual uint ideal_reg() const { return NotAMachineReg; } + OverflowNode(Node* in1, Node* in2) : CmpNode(in1, in2) {} - ProjNode* result_node() const { return proj_out(result_proj_node); } - ProjNode* flags_node() const { return proj_out(flags_proj_node); } - Node* control_node() const; - Node* non_throwing_branch() const; -protected: - IfNode* if_node() const; - BoolNode* bool_node() const; - Node* no_overflow(PhaseGVN *phase, Node* new_result); -}; - -class MathExactINode : public MathExactNode { - public: - MathExactINode(Node* ctrl, Node* in1) : MathExactNode(ctrl, in1) {} - MathExactINode(Node* ctrl, Node* in1, Node* in2) : MathExactNode(ctrl, in1, in2) {} - virtual int Opcode() const; - virtual Node* match(const ProjNode* proj, const Matcher* m); - virtual const Type* bottom_type() const { return TypeTuple::INT_CC_PAIR; } -}; - -class MathExactLNode : public MathExactNode { -public: - MathExactLNode(Node* ctrl, Node* in1) : MathExactNode(ctrl, in1) {} - MathExactLNode(Node* ctrl, Node* in1, Node* in2) : MathExactNode(ctrl, in1, in2) {} - virtual int Opcode() const; - virtual Node* match(const ProjNode* proj, const Matcher* m); - virtual const Type* bottom_type() const { return TypeTuple::LONG_CC_PAIR; } -}; - -class AddExactINode : public MathExactINode { -public: - AddExactINode(Node* ctrl, Node* in1, Node* in2) : MathExactINode(ctrl, in1, in2) {} - virtual int Opcode() const; - virtual Node *Ideal(PhaseGVN *phase, bool can_reshape); + virtual uint ideal_reg() const { return Op_RegFlags; } + virtual const Type* sub(const Type* t1, const Type* t2) const; }; -class AddExactLNode : public MathExactLNode { -public: - AddExactLNode(Node* ctrl, Node* in1, Node* in2) : MathExactLNode(ctrl, in1, in2) {} - virtual int Opcode() const; - virtual Node* Ideal(PhaseGVN* phase, bool can_reshape); -}; - -class SubExactINode : public MathExactINode { -public: - SubExactINode(Node* ctrl, Node* in1, Node* in2) : MathExactINode(ctrl, in1, in2) {} - virtual int Opcode() const; - virtual Node* Ideal(PhaseGVN* phase, bool can_reshape); -}; - -class SubExactLNode : public MathExactLNode { -public: - SubExactLNode(Node* ctrl, Node* in1, Node* in2) : MathExactLNode(ctrl, in1, in2) {} - virtual int Opcode() const; - virtual Node* Ideal(PhaseGVN* phase, bool can_reshape); -}; - -class NegExactINode : public MathExactINode { -public: - NegExactINode(Node* ctrl, Node* in1) : MathExactINode(ctrl, in1) {} - virtual int Opcode() const; - virtual Node* Ideal(PhaseGVN* phase, bool can_reshape); -}; - -class NegExactLNode : public MathExactLNode { +class OverflowINode : public OverflowNode { public: - NegExactLNode(Node* ctrl, Node* in1) : MathExactLNode(ctrl, in1) {} - virtual int Opcode() const; - virtual Node* Ideal(PhaseGVN* phase, bool can_reshape); -}; - -class MulExactINode : public MathExactINode { -public: - MulExactINode(Node* ctrl, Node* in1, Node* in2) : MathExactINode(ctrl, in1, in2) {} - virtual int Opcode() const; - virtual Node* Ideal(PhaseGVN* phase, bool can_reshape); -}; + typedef TypeInt TypeClass; -class MulExactLNode : public MathExactLNode { -public: - MulExactLNode(Node* ctrl, Node* in1, Node* in2) : MathExactLNode(ctrl, in1, in2) {} - virtual int Opcode() const; + OverflowINode(Node* in1, Node* in2) : OverflowNode(in1, in2) {} virtual Node* Ideal(PhaseGVN* phase, bool can_reshape); -}; + virtual const Type* Value(PhaseTransform* phase) const; -class FlagsProjNode : public ProjNode { -public: - FlagsProjNode(Node* src, uint con) : ProjNode(src, con) { - init_class_id(Class_FlagsProj); - } - - virtual int Opcode() const; - virtual bool is_CFG() const { return false; } - virtual const Type* bottom_type() const { return TypeInt::CC; } - virtual uint ideal_reg() const { return Op_RegFlags; } + virtual bool will_overflow(jint v1, jint v2) const = 0; + virtual bool can_overflow(const Type* t1, const Type* t2) const = 0; }; +class OverflowLNode : public OverflowNode { +public: + typedef TypeLong TypeClass; + + OverflowLNode(Node* in1, Node* in2) : OverflowNode(in1, in2) {} + virtual Node* Ideal(PhaseGVN* phase, bool can_reshape); + virtual const Type* Value(PhaseTransform* phase) const; + + virtual bool will_overflow(jlong v1, jlong v2) const = 0; + virtual bool can_overflow(const Type* t1, const Type* t2) const = 0; +}; + +class OverflowAddINode : public OverflowINode { +public: + typedef AddINode MathOp; + + OverflowAddINode(Node* in1, Node* in2) : OverflowINode(in1, in2) {} + virtual int Opcode() const; + + virtual bool will_overflow(jint v1, jint v2) const; + virtual bool can_overflow(const Type* t1, const Type* t2) const; +}; + +class OverflowSubINode : public OverflowINode { +public: + typedef SubINode MathOp; + + OverflowSubINode(Node* in1, Node* in2) : OverflowINode(in1, in2) {} + virtual int Opcode() const; + + virtual bool will_overflow(jint v1, jint v2) const; + virtual bool can_overflow(const Type* t1, const Type* t2) const; +}; + +class OverflowMulINode : public OverflowINode { +public: + typedef MulINode MathOp; + + OverflowMulINode(Node* in1, Node* in2) : OverflowINode(in1, in2) {} + virtual int Opcode() const; + + virtual bool will_overflow(jint v1, jint v2) const; + virtual bool can_overflow(const Type* t1, const Type* t2) const; +}; + +class OverflowAddLNode : public OverflowLNode { +public: + typedef AddLNode MathOp; + + OverflowAddLNode(Node* in1, Node* in2) : OverflowLNode(in1, in2) {} + virtual int Opcode() const; + + virtual bool will_overflow(jlong v1, jlong v2) const; + virtual bool can_overflow(const Type* t1, const Type* t2) const; +}; + +class OverflowSubLNode : public OverflowLNode { +public: + typedef SubLNode MathOp; + + OverflowSubLNode(Node* in1, Node* in2) : OverflowLNode(in1, in2) {} + virtual int Opcode() const; + + virtual bool will_overflow(jlong v1, jlong v2) const; + virtual bool can_overflow(const Type* t1, const Type* t2) const; +}; + +class OverflowMulLNode : public OverflowLNode { +public: + typedef MulLNode MathOp; + + OverflowMulLNode(Node* in1, Node* in2) : OverflowLNode(in1, in2) {} + virtual int Opcode() const; + + virtual bool will_overflow(jlong v1, jlong v2) const; + virtual bool can_overflow(const Type* t1, const Type* t2) const; +}; + #endif diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/opto/multnode.cpp --- a/src/share/vm/opto/multnode.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/opto/multnode.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -54,11 +54,6 @@ assert(Opcode() != Op_If || proj->Opcode() == (which_proj?Op_IfTrue:Op_IfFalse), "bad if #2"); return proj; } - } else if (p->is_FlagsProj()) { - FlagsProjNode *proj = p->as_FlagsProj(); - if (proj->_con == which_proj) { - return proj; - } } else { assert(p == this && this->is_Start(), "else must be proj"); continue; diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/opto/node.hpp --- a/src/share/vm/opto/node.hpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/opto/node.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -69,7 +69,6 @@ class EncodePKlassNode; class FastLockNode; class FastUnlockNode; -class FlagsProjNode; class IfNode; class IfFalseNode; class IfTrueNode; @@ -100,7 +99,6 @@ class MachSpillCopyNode; class MachTempNode; class Matcher; -class MathExactNode; class MemBarNode; class MemBarStoreStoreNode; class MemNode; @@ -575,7 +573,6 @@ DEFINE_CLASS_ID(MemBar, Multi, 3) DEFINE_CLASS_ID(Initialize, MemBar, 0) DEFINE_CLASS_ID(MemBarStoreStore, MemBar, 1) - DEFINE_CLASS_ID(MathExact, Multi, 4) DEFINE_CLASS_ID(Mach, Node, 1) DEFINE_CLASS_ID(MachReturn, Mach, 0) @@ -632,7 +629,6 @@ DEFINE_CLASS_ID(Cmp, Sub, 0) DEFINE_CLASS_ID(FastLock, Cmp, 0) DEFINE_CLASS_ID(FastUnlock, Cmp, 1) - DEFINE_CLASS_ID(FlagsProj, Cmp, 2) DEFINE_CLASS_ID(MergeMem, Node, 7) DEFINE_CLASS_ID(Bool, Node, 8) @@ -736,7 +732,6 @@ DEFINE_CLASS_QUERY(EncodePKlass) DEFINE_CLASS_QUERY(FastLock) DEFINE_CLASS_QUERY(FastUnlock) - DEFINE_CLASS_QUERY(FlagsProj) DEFINE_CLASS_QUERY(If) DEFINE_CLASS_QUERY(IfFalse) DEFINE_CLASS_QUERY(IfTrue) @@ -765,7 +760,6 @@ DEFINE_CLASS_QUERY(MachSafePoint) DEFINE_CLASS_QUERY(MachSpillCopy) DEFINE_CLASS_QUERY(MachTemp) - DEFINE_CLASS_QUERY(MathExact) DEFINE_CLASS_QUERY(Mem) DEFINE_CLASS_QUERY(MemBar) DEFINE_CLASS_QUERY(MemBarStoreStore) diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/opto/parse.hpp --- a/src/share/vm/opto/parse.hpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/opto/parse.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -480,6 +480,8 @@ // Helper function to compute array addressing Node* array_addressing(BasicType type, int vals, const Type* *result2=NULL); + void rtm_deopt(); + // Pass current map to exits void return_current(Node* value); diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/opto/parse1.cpp --- a/src/share/vm/opto/parse1.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/opto/parse1.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -565,6 +565,10 @@ set_map(entry_map); do_method_entry(); } + if (depth() == 1) { + // Add check to deoptimize the nmethod if RTM state was changed + rtm_deopt(); + } // Check for bailouts during method entry. if (failing()) { @@ -1982,6 +1986,42 @@ set_control( _gvn.transform(result_rgn) ); } +// Add check to deoptimize if RTM state is not ProfileRTM +void Parse::rtm_deopt() { +#if INCLUDE_RTM_OPT + if (C->profile_rtm()) { + assert(C->method() != NULL, "only for normal compilations"); + assert(!C->method()->method_data()->is_empty(), "MDO is needed to record RTM state"); + assert(depth() == 1, "generate check only for main compiled method"); + + // Set starting bci for uncommon trap. + set_parse_bci(is_osr_parse() ? osr_bci() : 0); + + // Load the rtm_state from the MethodData. + const TypePtr* adr_type = TypeMetadataPtr::make(C->method()->method_data()); + Node* mdo = makecon(adr_type); + int offset = MethodData::rtm_state_offset_in_bytes(); + Node* adr_node = basic_plus_adr(mdo, mdo, offset); + Node* rtm_state = make_load(control(), adr_node, TypeInt::INT, T_INT, adr_type, MemNode::unordered); + + // Separate Load from Cmp by Opaque. + // In expand_macro_nodes() it will be replaced either + // with this load when there are locks in the code + // or with ProfileRTM (cmp->in(2)) otherwise so that + // the check will fold. + Node* profile_state = makecon(TypeInt::make(ProfileRTM)); + Node* opq = _gvn.transform( new (C) Opaque3Node(C, rtm_state, Opaque3Node::RTM_OPT) ); + Node* chk = _gvn.transform( new (C) CmpINode(opq, profile_state) ); + Node* tst = _gvn.transform( new (C) BoolNode(chk, BoolTest::eq) ); + // Branch to failure if state was changed + { BuildCutout unless(this, tst, PROB_ALWAYS); + uncommon_trap(Deoptimization::Reason_rtm_state_change, + Deoptimization::Action_make_not_entrant); + } + } +#endif +} + //------------------------------return_current--------------------------------- // Append current _map to _exit_return void Parse::return_current(Node* value) { diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/opto/runtime.cpp --- a/src/share/vm/opto/runtime.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/opto/runtime.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -1311,6 +1311,14 @@ tty->print_cr("%s", c->name()); blc->print_on(tty); } +#if INCLUDE_RTM_OPT + } else if (c->tag() == NamedCounter::RTMLockingCounter) { + RTMLockingCounters* rlc = ((RTMLockingNamedCounter*)c)->counters(); + if (rlc->nonzero()) { + tty->print_cr("%s", c->name()); + rlc->print_on(tty); + } +#endif } c = c->next(); } @@ -1350,6 +1358,8 @@ NamedCounter* c; if (tag == NamedCounter::BiasedLockingCounter) { c = new BiasedLockingNamedCounter(strdup(st.as_string())); + } else if (tag == NamedCounter::RTMLockingCounter) { + c = new RTMLockingNamedCounter(strdup(st.as_string())); } else { c = new NamedCounter(strdup(st.as_string()), tag); } @@ -1358,6 +1368,7 @@ // add counters so this is safe. NamedCounter* head; do { + c->set_next(NULL); head = _named_counters; c->set_next(head); } while (Atomic::cmpxchg_ptr(c, &_named_counters, head) != head); diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/opto/runtime.hpp --- a/src/share/vm/opto/runtime.hpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/opto/runtime.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -29,6 +29,7 @@ #include "opto/machnode.hpp" #include "opto/type.hpp" #include "runtime/biasedLocking.hpp" +#include "runtime/rtmLocking.hpp" #include "runtime/deoptimization.hpp" #include "runtime/vframe.hpp" @@ -61,7 +62,8 @@ NoTag, LockCounter, EliminatedLockCounter, - BiasedLockingCounter + BiasedLockingCounter, + RTMLockingCounter }; private: @@ -85,7 +87,7 @@ NamedCounter* next() const { return _next; } void set_next(NamedCounter* next) { - assert(_next == NULL, "already set"); + assert(_next == NULL || next == NULL, "already set"); _next = next; } @@ -102,6 +104,18 @@ BiasedLockingCounters* counters() { return &_counters; } }; + +class RTMLockingNamedCounter : public NamedCounter { + private: + RTMLockingCounters _counters; + + public: + RTMLockingNamedCounter(const char *n) : + NamedCounter(n, RTMLockingCounter), _counters() {} + + RTMLockingCounters* counters() { return &_counters; } +}; + typedef const TypeFunc*(*TypeFunc_generator)(); class OptoRuntime : public AllStatic { diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/opto/subnode.cpp --- a/src/share/vm/opto/subnode.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/opto/subnode.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -1126,11 +1126,15 @@ Node *cmp = in(1); if( !cmp->is_Sub() ) return NULL; int cop = cmp->Opcode(); - if( cop == Op_FastLock || cop == Op_FastUnlock || cop == Op_FlagsProj) return NULL; + if( cop == Op_FastLock || cop == Op_FastUnlock) return NULL; Node *cmp1 = cmp->in(1); Node *cmp2 = cmp->in(2); if( !cmp1 ) return NULL; + if (_test._test == BoolTest::overflow || _test._test == BoolTest::no_overflow) { + return NULL; + } + // Constant on left? Node *con = cmp1; uint op2 = cmp2->Opcode(); diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/opto/type.cpp --- a/src/share/vm/opto/type.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/opto/type.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -306,6 +306,7 @@ TypeInt::POS1 = TypeInt::make(1,max_jint, WidenMin); // Positive values TypeInt::INT = TypeInt::make(min_jint,max_jint, WidenMax); // 32-bit integers TypeInt::SYMINT = TypeInt::make(-max_jint,max_jint,WidenMin); // symmetric range + TypeInt::TYPE_DOMAIN = TypeInt::INT; // CmpL is overloaded both as the bytecode computation returning // a trinary (-1,0,+1) integer result AND as an efficient long // compare returning optimizer ideal-type flags. @@ -322,6 +323,7 @@ TypeLong::LONG = TypeLong::make(min_jlong,max_jlong,WidenMax); // 64-bit integers TypeLong::INT = TypeLong::make((jlong)min_jint,(jlong)max_jint,WidenMin); TypeLong::UINT = TypeLong::make(0,(jlong)max_juint,WidenMin); + TypeLong::TYPE_DOMAIN = TypeLong::LONG; const Type **fboth =(const Type**)shared_type_arena->Amalloc_4(2*sizeof(Type*)); fboth[0] = Type::CONTROL; @@ -1161,6 +1163,7 @@ const TypeInt *TypeInt::POS1; // Positive 32-bit integers const TypeInt *TypeInt::INT; // 32-bit integers const TypeInt *TypeInt::SYMINT; // symmetric range [-max_jint..max_jint] +const TypeInt *TypeInt::TYPE_DOMAIN; // alias for TypeInt::INT //------------------------------TypeInt---------------------------------------- TypeInt::TypeInt( jint lo, jint hi, int w ) : Type(Int), _lo(lo), _hi(hi), _widen(w) { @@ -1418,6 +1421,7 @@ const TypeLong *TypeLong::LONG; // 64-bit integers const TypeLong *TypeLong::INT; // 32-bit subrange const TypeLong *TypeLong::UINT; // 32-bit unsigned subrange +const TypeLong *TypeLong::TYPE_DOMAIN; // alias for TypeLong::LONG //------------------------------TypeLong--------------------------------------- TypeLong::TypeLong( jlong lo, jlong hi, int w ) : Type(Long), _lo(lo), _hi(hi), _widen(w) { @@ -2459,7 +2463,7 @@ const TypeOopPtr *TypeOopPtr::BOTTOM; //------------------------------TypeOopPtr------------------------------------- -TypeOopPtr::TypeOopPtr(TYPES t, PTR ptr, ciKlass* k, bool xk, ciObject* o, int offset, int instance_id, const TypeOopPtr* speculative) +TypeOopPtr::TypeOopPtr(TYPES t, PTR ptr, ciKlass* k, bool xk, ciObject* o, int offset, int instance_id, const TypeOopPtr* speculative, int inline_depth) : TypePtr(t, ptr, offset), _const_oop(o), _klass(k), _klass_is_exact(xk), @@ -2467,7 +2471,8 @@ _is_ptr_to_narrowklass(false), _is_ptr_to_boxed_value(false), _instance_id(instance_id), - _speculative(speculative) { + _speculative(speculative), + _inline_depth(inline_depth){ if (Compile::current()->eliminate_boxing() && (t == InstPtr) && (offset > 0) && xk && (k != 0) && k->is_instance_klass()) { _is_ptr_to_boxed_value = k->as_instance_klass()->is_boxed_value_offset(offset); @@ -2534,12 +2539,12 @@ //------------------------------make------------------------------------------- const TypeOopPtr *TypeOopPtr::make(PTR ptr, - int offset, int instance_id, const TypeOopPtr* speculative) { + int offset, int instance_id, const TypeOopPtr* speculative, int inline_depth) { assert(ptr != Constant, "no constant generic pointers"); ciKlass* k = Compile::current()->env()->Object_klass(); bool xk = false; ciObject* o = NULL; - return (TypeOopPtr*)(new TypeOopPtr(OopPtr, ptr, k, xk, o, offset, instance_id, speculative))->hashcons(); + return (TypeOopPtr*)(new TypeOopPtr(OopPtr, ptr, k, xk, o, offset, instance_id, speculative, inline_depth))->hashcons(); } @@ -2547,7 +2552,7 @@ const Type *TypeOopPtr::cast_to_ptr_type(PTR ptr) const { assert(_base == OopPtr, "subclass must override cast_to_ptr_type"); if( ptr == _ptr ) return this; - return make(ptr, _offset, _instance_id, _speculative); + return make(ptr, _offset, _instance_id, _speculative, _inline_depth); } //-----------------------------cast_to_instance_id---------------------------- @@ -2644,7 +2649,7 @@ case AnyNull: { int instance_id = meet_instance_id(InstanceTop); const TypeOopPtr* speculative = _speculative; - return make(ptr, offset, instance_id, speculative); + return make(ptr, offset, instance_id, speculative, _inline_depth); } case BotPTR: case NotNull: @@ -2657,7 +2662,8 @@ const TypeOopPtr *tp = t->is_oopptr(); int instance_id = meet_instance_id(tp->instance_id()); const TypeOopPtr* speculative = xmeet_speculative(tp); - return make(meet_ptr(tp->ptr()), meet_offset(tp->offset()), instance_id, speculative); + int depth = meet_inline_depth(tp->inline_depth()); + return make(meet_ptr(tp->ptr()), meet_offset(tp->offset()), instance_id, speculative, depth); } case InstPtr: // For these, flip the call around to cut down @@ -2674,7 +2680,7 @@ const Type *TypeOopPtr::xdual() const { assert(klass() == Compile::current()->env()->Object_klass(), "no klasses here"); assert(const_oop() == NULL, "no constants here"); - return new TypeOopPtr(_base, dual_ptr(), klass(), klass_is_exact(), const_oop(), dual_offset(), dual_instance_id(), dual_speculative()); + return new TypeOopPtr(_base, dual_ptr(), klass(), klass_is_exact(), const_oop(), dual_offset(), dual_instance_id(), dual_speculative(), dual_inline_depth()); } //--------------------------make_from_klass_common----------------------------- @@ -2765,7 +2771,7 @@ } else if (!o->should_be_constant()) { return TypeAryPtr::make(TypePtr::NotNull, arr0, klass, true, 0); } - const TypeAryPtr* arr = TypeAryPtr::make(TypePtr::Constant, o, arr0, klass, true, 0, InstanceBot, NULL, is_autobox_cache); + const TypeAryPtr* arr = TypeAryPtr::make(TypePtr::Constant, o, arr0, klass, true, 0, InstanceBot, NULL, InlineDepthBottom, is_autobox_cache); return arr; } else if (klass->is_type_array_klass()) { // Element is an typeArray @@ -2854,7 +2860,8 @@ const TypeOopPtr *a = (const TypeOopPtr*)t; if (_klass_is_exact != a->_klass_is_exact || _instance_id != a->_instance_id || - !eq_speculative(a)) return false; + !eq_speculative(a) || + _inline_depth != a->_inline_depth) return false; ciObject* one = const_oop(); ciObject* two = a->const_oop(); if (one == NULL || two == NULL) { @@ -2872,6 +2879,7 @@ _klass_is_exact + _instance_id + hash_speculative() + + _inline_depth + TypePtr::hash(); } @@ -2892,6 +2900,7 @@ else if (_instance_id != InstanceBot) st->print(",iid=%d",_instance_id); + dump_inline_depth(st); dump_speculative(st); } @@ -2905,6 +2914,16 @@ st->print(")"); } } + +void TypeOopPtr::dump_inline_depth(outputStream *st) const { + if (_inline_depth != InlineDepthBottom) { + if (_inline_depth == InlineDepthTop) { + st->print(" (inline_depth=InlineDepthTop)"); + } else { + st->print(" (inline_depth=%d)", _inline_depth); + } + } +} #endif //------------------------------singleton-------------------------------------- @@ -2918,7 +2937,7 @@ //------------------------------add_offset------------------------------------- const TypePtr *TypeOopPtr::add_offset(intptr_t offset) const { - return make(_ptr, xadd_offset(offset), _instance_id, add_offset_speculative(offset)); + return make(_ptr, xadd_offset(offset), _instance_id, add_offset_speculative(offset), _inline_depth); } /** @@ -2928,7 +2947,52 @@ if (_speculative == NULL) { return this; } - return make(_ptr, _offset, _instance_id, NULL); + assert(_inline_depth == InlineDepthTop || _inline_depth == InlineDepthBottom, "non speculative type shouldn't have inline depth"); + return make(_ptr, _offset, _instance_id, NULL, _inline_depth); +} + +/** + * Return same type but with a different inline depth (used for speculation) + * + * @param depth depth to meet with + */ +const TypeOopPtr* TypeOopPtr::with_inline_depth(int depth) const { + if (!UseInlineDepthForSpeculativeTypes) { + return this; + } + return make(_ptr, _offset, _instance_id, _speculative, depth); +} + +/** + * Check whether new profiling would improve speculative type + * + * @param exact_kls class from profiling + * @param inline_depth inlining depth of profile point + * + * @return true if type profile is valuable + */ +bool TypeOopPtr::would_improve_type(ciKlass* exact_kls, int inline_depth) const { + // no way to improve an already exact type + if (klass_is_exact()) { + return false; + } + // no profiling? + if (exact_kls == NULL) { + return false; + } + // no speculative type or non exact speculative type? + if (speculative_type() == NULL) { + return true; + } + // If the node already has an exact speculative type keep it, + // unless it was provided by profiling that is at a deeper + // inlining level. Profiling at a higher inlining depth is + // expected to be less accurate. + if (_speculative->inline_depth() == InlineDepthBottom) { + return false; + } + assert(_speculative->inline_depth() != InlineDepthTop, "can't do the comparison"); + return inline_depth < _speculative->inline_depth(); } //------------------------------meet_instance_id-------------------------------- @@ -3031,6 +3095,21 @@ return _speculative->hash(); } +/** + * dual of the inline depth for this type (used for speculation) + */ +int TypeOopPtr::dual_inline_depth() const { + return -inline_depth(); +} + +/** + * meet of 2 inline depth (used for speculation) + * + * @param depth depth to meet with + */ +int TypeOopPtr::meet_inline_depth(int depth) const { + return MAX2(inline_depth(), depth); +} //============================================================================= // Convenience common pre-built types. @@ -3041,8 +3120,8 @@ const TypeInstPtr *TypeInstPtr::KLASS; //------------------------------TypeInstPtr------------------------------------- -TypeInstPtr::TypeInstPtr(PTR ptr, ciKlass* k, bool xk, ciObject* o, int off, int instance_id, const TypeOopPtr* speculative) - : TypeOopPtr(InstPtr, ptr, k, xk, o, off, instance_id, speculative), _name(k->name()) { +TypeInstPtr::TypeInstPtr(PTR ptr, ciKlass* k, bool xk, ciObject* o, int off, int instance_id, const TypeOopPtr* speculative, int inline_depth) + : TypeOopPtr(InstPtr, ptr, k, xk, o, off, instance_id, speculative, inline_depth), _name(k->name()) { assert(k != NULL && (k->is_loaded() || o == NULL), "cannot have constants with non-loaded klass"); @@ -3055,7 +3134,8 @@ ciObject* o, int offset, int instance_id, - const TypeOopPtr* speculative) { + const TypeOopPtr* speculative, + int inline_depth) { assert( !k->is_loaded() || k->is_instance_klass(), "Must be for instance"); // Either const_oop() is NULL or else ptr is Constant assert( (!o && ptr != Constant) || (o && ptr == Constant), @@ -3076,7 +3156,7 @@ // Now hash this baby TypeInstPtr *result = - (TypeInstPtr*)(new TypeInstPtr(ptr, k, xk, o ,offset, instance_id, speculative))->hashcons(); + (TypeInstPtr*)(new TypeInstPtr(ptr, k, xk, o ,offset, instance_id, speculative, inline_depth))->hashcons(); return result; } @@ -3109,7 +3189,7 @@ if( ptr == _ptr ) return this; // Reconstruct _sig info here since not a problem with later lazy // construction, _sig will show up on demand. - return make(ptr, klass(), klass_is_exact(), const_oop(), _offset, _instance_id, _speculative); + return make(ptr, klass(), klass_is_exact(), const_oop(), _offset, _instance_id, _speculative, _inline_depth); } @@ -3121,13 +3201,13 @@ ciInstanceKlass* ik = _klass->as_instance_klass(); if( (ik->is_final() || _const_oop) ) return this; // cannot clear xk if( ik->is_interface() ) return this; // cannot set xk - return make(ptr(), klass(), klass_is_exact, const_oop(), _offset, _instance_id, _speculative); + return make(ptr(), klass(), klass_is_exact, const_oop(), _offset, _instance_id, _speculative, _inline_depth); } //-----------------------------cast_to_instance_id---------------------------- const TypeOopPtr *TypeInstPtr::cast_to_instance_id(int instance_id) const { if( instance_id == _instance_id ) return this; - return make(_ptr, klass(), _klass_is_exact, const_oop(), _offset, instance_id, _speculative); + return make(_ptr, klass(), _klass_is_exact, const_oop(), _offset, instance_id, _speculative, _inline_depth); } //------------------------------xmeet_unloaded--------------------------------- @@ -3138,6 +3218,7 @@ PTR ptr = meet_ptr(tinst->ptr()); int instance_id = meet_instance_id(tinst->instance_id()); const TypeOopPtr* speculative = xmeet_speculative(tinst); + int depth = meet_inline_depth(tinst->inline_depth()); const TypeInstPtr *loaded = is_loaded() ? this : tinst; const TypeInstPtr *unloaded = is_loaded() ? tinst : this; @@ -3158,7 +3239,7 @@ assert(loaded->ptr() != TypePtr::Null, "insanity check"); // if( loaded->ptr() == TypePtr::TopPTR ) { return unloaded; } - else if (loaded->ptr() == TypePtr::AnyNull) { return TypeInstPtr::make(ptr, unloaded->klass(), false, NULL, off, instance_id, speculative); } + else if (loaded->ptr() == TypePtr::AnyNull) { return TypeInstPtr::make(ptr, unloaded->klass(), false, NULL, off, instance_id, speculative, depth); } else if (loaded->ptr() == TypePtr::BotPTR ) { return TypeInstPtr::BOTTOM; } else if (loaded->ptr() == TypePtr::Constant || loaded->ptr() == TypePtr::NotNull) { if (unloaded->ptr() == TypePtr::BotPTR ) { return TypeInstPtr::BOTTOM; } @@ -3215,6 +3296,7 @@ PTR ptr = meet_ptr(tp->ptr()); int instance_id = meet_instance_id(tp->instance_id()); const TypeOopPtr* speculative = xmeet_speculative(tp); + int depth = meet_inline_depth(tp->inline_depth()); switch (ptr) { case TopPTR: case AnyNull: // Fall 'down' to dual of object klass @@ -3222,12 +3304,12 @@ // below the centerline when the superclass is exact. We need to // do the same here. if (klass()->equals(ciEnv::current()->Object_klass()) && !klass_is_exact()) { - return TypeAryPtr::make(ptr, tp->ary(), tp->klass(), tp->klass_is_exact(), offset, instance_id, speculative); + return TypeAryPtr::make(ptr, tp->ary(), tp->klass(), tp->klass_is_exact(), offset, instance_id, speculative, depth); } else { // cannot subclass, so the meet has to fall badly below the centerline ptr = NotNull; instance_id = InstanceBot; - return TypeInstPtr::make( ptr, ciEnv::current()->Object_klass(), false, NULL, offset, instance_id, speculative); + return TypeInstPtr::make( ptr, ciEnv::current()->Object_klass(), false, NULL, offset, instance_id, speculative, depth); } case Constant: case NotNull: @@ -3242,7 +3324,7 @@ if (klass()->equals(ciEnv::current()->Object_klass()) && !klass_is_exact()) { // that is, tp's array type is a subtype of my klass return TypeAryPtr::make(ptr, (ptr == Constant ? tp->const_oop() : NULL), - tp->ary(), tp->klass(), tp->klass_is_exact(), offset, instance_id, speculative); + tp->ary(), tp->klass(), tp->klass_is_exact(), offset, instance_id, speculative, depth); } } // The other case cannot happen, since I cannot be a subtype of an array. @@ -3250,7 +3332,7 @@ if( ptr == Constant ) ptr = NotNull; instance_id = InstanceBot; - return make(ptr, ciEnv::current()->Object_klass(), false, NULL, offset, instance_id, speculative); + return make(ptr, ciEnv::current()->Object_klass(), false, NULL, offset, instance_id, speculative, depth); default: typerr(t); } } @@ -3265,14 +3347,16 @@ case AnyNull: { int instance_id = meet_instance_id(InstanceTop); const TypeOopPtr* speculative = xmeet_speculative(tp); + int depth = meet_inline_depth(tp->inline_depth()); return make(ptr, klass(), klass_is_exact(), - (ptr == Constant ? const_oop() : NULL), offset, instance_id, speculative); + (ptr == Constant ? const_oop() : NULL), offset, instance_id, speculative, depth); } case NotNull: case BotPTR: { int instance_id = meet_instance_id(tp->instance_id()); const TypeOopPtr* speculative = xmeet_speculative(tp); - return TypeOopPtr::make(ptr, offset, instance_id, speculative); + int depth = meet_inline_depth(tp->inline_depth()); + return TypeOopPtr::make(ptr, offset, instance_id, speculative, depth); } default: typerr(t); } @@ -3292,7 +3376,7 @@ int instance_id = meet_instance_id(InstanceTop); const TypeOopPtr* speculative = _speculative; return make(ptr, klass(), klass_is_exact(), - (ptr == Constant ? const_oop() : NULL), offset, instance_id, speculative); + (ptr == Constant ? const_oop() : NULL), offset, instance_id, speculative, _inline_depth); } case NotNull: case BotPTR: @@ -3324,13 +3408,14 @@ PTR ptr = meet_ptr( tinst->ptr() ); int instance_id = meet_instance_id(tinst->instance_id()); const TypeOopPtr* speculative = xmeet_speculative(tinst); + int depth = meet_inline_depth(tinst->inline_depth()); // Check for easy case; klasses are equal (and perhaps not loaded!) // If we have constants, then we created oops so classes are loaded // and we can handle the constants further down. This case handles // both-not-loaded or both-loaded classes if (ptr != Constant && klass()->equals(tinst->klass()) && klass_is_exact() == tinst->klass_is_exact()) { - return make(ptr, klass(), klass_is_exact(), NULL, off, instance_id, speculative); + return make(ptr, klass(), klass_is_exact(), NULL, off, instance_id, speculative, depth); } // Classes require inspection in the Java klass hierarchy. Must be loaded. @@ -3394,7 +3479,7 @@ // Find out which constant. o = (this_klass == klass()) ? const_oop() : tinst->const_oop(); } - return make(ptr, k, xk, o, off, instance_id, speculative); + return make(ptr, k, xk, o, off, instance_id, speculative, depth); } // Either oop vs oop or interface vs interface or interface vs Object @@ -3471,7 +3556,7 @@ else ptr = NotNull; } - return make(ptr, this_klass, this_xk, o, off, instance_id, speculative); + return make(ptr, this_klass, this_xk, o, off, instance_id, speculative, depth); } // Else classes are not equal // Since klasses are different, we require a LCA in the Java @@ -3482,7 +3567,7 @@ // Now we find the LCA of Java classes ciKlass* k = this_klass->least_common_ancestor(tinst_klass); - return make(ptr, k, false, NULL, off, instance_id, speculative); + return make(ptr, k, false, NULL, off, instance_id, speculative, depth); } // End of case InstPtr } // End of switch @@ -3506,7 +3591,7 @@ // Dual: do NOT dual on klasses. This means I do NOT understand the Java // inheritance mechanism. const Type *TypeInstPtr::xdual() const { - return new TypeInstPtr(dual_ptr(), klass(), klass_is_exact(), const_oop(), dual_offset(), dual_instance_id(), dual_speculative()); + return new TypeInstPtr(dual_ptr(), klass(), klass_is_exact(), const_oop(), dual_offset(), dual_instance_id(), dual_speculative(), dual_inline_depth()); } //------------------------------eq--------------------------------------------- @@ -3563,6 +3648,7 @@ else if (_instance_id != InstanceBot) st->print(",iid=%d",_instance_id); + dump_inline_depth(st); dump_speculative(st); } #endif @@ -3576,7 +3662,15 @@ if (_speculative == NULL) { return this; } - return make(_ptr, klass(), klass_is_exact(), const_oop(), _offset, _instance_id, NULL); + assert(_inline_depth == InlineDepthTop || _inline_depth == InlineDepthBottom, "non speculative type shouldn't have inline depth"); + return make(_ptr, klass(), klass_is_exact(), const_oop(), _offset, _instance_id, NULL, _inline_depth); +} + +const TypeOopPtr *TypeInstPtr::with_inline_depth(int depth) const { + if (!UseInlineDepthForSpeculativeTypes) { + return this; + } + return make(_ptr, klass(), klass_is_exact(), const_oop(), _offset, _instance_id, _speculative, depth); } //============================================================================= @@ -3593,30 +3687,30 @@ const TypeAryPtr *TypeAryPtr::DOUBLES; //------------------------------make------------------------------------------- -const TypeAryPtr *TypeAryPtr::make(PTR ptr, const TypeAry *ary, ciKlass* k, bool xk, int offset, int instance_id, const TypeOopPtr* speculative) { +const TypeAryPtr *TypeAryPtr::make(PTR ptr, const TypeAry *ary, ciKlass* k, bool xk, int offset, int instance_id, const TypeOopPtr* speculative, int inline_depth) { assert(!(k == NULL && ary->_elem->isa_int()), "integral arrays must be pre-equipped with a class"); if (!xk) xk = ary->ary_must_be_exact(); assert(instance_id <= 0 || xk || !UseExactTypes, "instances are always exactly typed"); if (!UseExactTypes) xk = (ptr == Constant); - return (TypeAryPtr*)(new TypeAryPtr(ptr, NULL, ary, k, xk, offset, instance_id, false, speculative))->hashcons(); + return (TypeAryPtr*)(new TypeAryPtr(ptr, NULL, ary, k, xk, offset, instance_id, false, speculative, inline_depth))->hashcons(); } //------------------------------make------------------------------------------- -const TypeAryPtr *TypeAryPtr::make(PTR ptr, ciObject* o, const TypeAry *ary, ciKlass* k, bool xk, int offset, int instance_id, const TypeOopPtr* speculative, bool is_autobox_cache) { +const TypeAryPtr *TypeAryPtr::make(PTR ptr, ciObject* o, const TypeAry *ary, ciKlass* k, bool xk, int offset, int instance_id, const TypeOopPtr* speculative, int inline_depth, bool is_autobox_cache) { assert(!(k == NULL && ary->_elem->isa_int()), "integral arrays must be pre-equipped with a class"); assert( (ptr==Constant && o) || (ptr!=Constant && !o), "" ); if (!xk) xk = (o != NULL) || ary->ary_must_be_exact(); assert(instance_id <= 0 || xk || !UseExactTypes, "instances are always exactly typed"); if (!UseExactTypes) xk = (ptr == Constant); - return (TypeAryPtr*)(new TypeAryPtr(ptr, o, ary, k, xk, offset, instance_id, is_autobox_cache, speculative))->hashcons(); + return (TypeAryPtr*)(new TypeAryPtr(ptr, o, ary, k, xk, offset, instance_id, is_autobox_cache, speculative, inline_depth))->hashcons(); } //------------------------------cast_to_ptr_type------------------------------- const Type *TypeAryPtr::cast_to_ptr_type(PTR ptr) const { if( ptr == _ptr ) return this; - return make(ptr, const_oop(), _ary, klass(), klass_is_exact(), _offset, _instance_id, _speculative); + return make(ptr, const_oop(), _ary, klass(), klass_is_exact(), _offset, _instance_id, _speculative, _inline_depth); } @@ -3625,13 +3719,13 @@ if( klass_is_exact == _klass_is_exact ) return this; if (!UseExactTypes) return this; if (_ary->ary_must_be_exact()) return this; // cannot clear xk - return make(ptr(), const_oop(), _ary, klass(), klass_is_exact, _offset, _instance_id, _speculative); + return make(ptr(), const_oop(), _ary, klass(), klass_is_exact, _offset, _instance_id, _speculative, _inline_depth); } //-----------------------------cast_to_instance_id---------------------------- const TypeOopPtr *TypeAryPtr::cast_to_instance_id(int instance_id) const { if( instance_id == _instance_id ) return this; - return make(_ptr, const_oop(), _ary, klass(), _klass_is_exact, _offset, instance_id, _speculative); + return make(_ptr, const_oop(), _ary, klass(), _klass_is_exact, _offset, instance_id, _speculative, _inline_depth); } //-----------------------------narrow_size_type------------------------------- @@ -3694,7 +3788,7 @@ new_size = narrow_size_type(new_size); if (new_size == size()) return this; const TypeAry* new_ary = TypeAry::make(elem(), new_size, is_stable()); - return make(ptr(), const_oop(), new_ary, klass(), klass_is_exact(), _offset, _instance_id, _speculative); + return make(ptr(), const_oop(), new_ary, klass(), klass_is_exact(), _offset, _instance_id, _speculative, _inline_depth); } @@ -3773,19 +3867,20 @@ const TypeOopPtr *tp = t->is_oopptr(); int offset = meet_offset(tp->offset()); PTR ptr = meet_ptr(tp->ptr()); + int depth = meet_inline_depth(tp->inline_depth()); switch (tp->ptr()) { case TopPTR: case AnyNull: { int instance_id = meet_instance_id(InstanceTop); const TypeOopPtr* speculative = xmeet_speculative(tp); return make(ptr, (ptr == Constant ? const_oop() : NULL), - _ary, _klass, _klass_is_exact, offset, instance_id, speculative); + _ary, _klass, _klass_is_exact, offset, instance_id, speculative, depth); } case BotPTR: case NotNull: { int instance_id = meet_instance_id(tp->instance_id()); const TypeOopPtr* speculative = xmeet_speculative(tp); - return TypeOopPtr::make(ptr, offset, instance_id, speculative); + return TypeOopPtr::make(ptr, offset, instance_id, speculative, depth); } default: ShouldNotReachHere(); } @@ -3809,7 +3904,7 @@ int instance_id = meet_instance_id(InstanceTop); const TypeOopPtr* speculative = _speculative; return make(ptr, (ptr == Constant ? const_oop() : NULL), - _ary, _klass, _klass_is_exact, offset, instance_id, speculative); + _ary, _klass, _klass_is_exact, offset, instance_id, speculative, _inline_depth); } default: ShouldNotReachHere(); } @@ -3826,6 +3921,7 @@ PTR ptr = meet_ptr(tap->ptr()); int instance_id = meet_instance_id(tap->instance_id()); const TypeOopPtr* speculative = xmeet_speculative(tap); + int depth = meet_inline_depth(tap->inline_depth()); ciKlass* lazy_klass = NULL; if (tary->_elem->isa_int()) { // Integral array element types have irrelevant lattice relations. @@ -3866,7 +3962,7 @@ } else { xk = (tap->_klass_is_exact | this->_klass_is_exact); } - return make(ptr, const_oop(), tary, lazy_klass, xk, off, instance_id, speculative); + return make(ptr, const_oop(), tary, lazy_klass, xk, off, instance_id, speculative, depth); case Constant: { ciObject* o = const_oop(); if( _ptr == Constant ) { @@ -3885,7 +3981,7 @@ // Only precise for identical arrays xk = this->_klass_is_exact && (klass() == tap->klass()); } - return TypeAryPtr::make(ptr, o, tary, lazy_klass, xk, off, instance_id, speculative); + return TypeAryPtr::make(ptr, o, tary, lazy_klass, xk, off, instance_id, speculative, depth); } case NotNull: case BotPTR: @@ -3894,7 +3990,7 @@ xk = tap->_klass_is_exact; else xk = (tap->_klass_is_exact & this->_klass_is_exact) && (klass() == tap->klass()); // Only precise for identical arrays - return TypeAryPtr::make(ptr, NULL, tary, lazy_klass, xk, off, instance_id, speculative); + return TypeAryPtr::make(ptr, NULL, tary, lazy_klass, xk, off, instance_id, speculative, depth); default: ShouldNotReachHere(); } } @@ -3906,6 +4002,7 @@ PTR ptr = meet_ptr(tp->ptr()); int instance_id = meet_instance_id(tp->instance_id()); const TypeOopPtr* speculative = xmeet_speculative(tp); + int depth = meet_inline_depth(tp->inline_depth()); switch (ptr) { case TopPTR: case AnyNull: // Fall 'down' to dual of object klass @@ -3913,12 +4010,12 @@ // below the centerline when the superclass is exact. We need to // do the same here. if (tp->klass()->equals(ciEnv::current()->Object_klass()) && !tp->klass_is_exact()) { - return TypeAryPtr::make(ptr, _ary, _klass, _klass_is_exact, offset, instance_id, speculative); + return TypeAryPtr::make(ptr, _ary, _klass, _klass_is_exact, offset, instance_id, speculative, depth); } else { // cannot subclass, so the meet has to fall badly below the centerline ptr = NotNull; instance_id = InstanceBot; - return TypeInstPtr::make(ptr, ciEnv::current()->Object_klass(), false, NULL,offset, instance_id, speculative); + return TypeInstPtr::make(ptr, ciEnv::current()->Object_klass(), false, NULL,offset, instance_id, speculative, depth); } case Constant: case NotNull: @@ -3933,7 +4030,7 @@ if (tp->klass()->equals(ciEnv::current()->Object_klass()) && !tp->klass_is_exact()) { // that is, my array type is a subtype of 'tp' klass return make(ptr, (ptr == Constant ? const_oop() : NULL), - _ary, _klass, _klass_is_exact, offset, instance_id, speculative); + _ary, _klass, _klass_is_exact, offset, instance_id, speculative, depth); } } // The other case cannot happen, since t cannot be a subtype of an array. @@ -3941,7 +4038,7 @@ if( ptr == Constant ) ptr = NotNull; instance_id = InstanceBot; - return TypeInstPtr::make(ptr, ciEnv::current()->Object_klass(), false, NULL,offset, instance_id, speculative); + return TypeInstPtr::make(ptr, ciEnv::current()->Object_klass(), false, NULL,offset, instance_id, speculative, depth); default: typerr(t); } } @@ -3952,7 +4049,7 @@ //------------------------------xdual------------------------------------------ // Dual: compute field-by-field dual const Type *TypeAryPtr::xdual() const { - return new TypeAryPtr(dual_ptr(), _const_oop, _ary->dual()->is_ary(),_klass, _klass_is_exact, dual_offset(), dual_instance_id(), is_autobox_cache(), dual_speculative()); + return new TypeAryPtr(dual_ptr(), _const_oop, _ary->dual()->is_ary(),_klass, _klass_is_exact, dual_offset(), dual_instance_id(), is_autobox_cache(), dual_speculative(), dual_inline_depth()); } //----------------------interface_vs_oop--------------------------------------- @@ -4005,6 +4102,7 @@ else if (_instance_id != InstanceBot) st->print(",iid=%d",_instance_id); + dump_inline_depth(st); dump_speculative(st); } #endif @@ -4016,11 +4114,22 @@ //------------------------------add_offset------------------------------------- const TypePtr *TypeAryPtr::add_offset(intptr_t offset) const { - return make(_ptr, _const_oop, _ary, _klass, _klass_is_exact, xadd_offset(offset), _instance_id, add_offset_speculative(offset)); + return make(_ptr, _const_oop, _ary, _klass, _klass_is_exact, xadd_offset(offset), _instance_id, add_offset_speculative(offset), _inline_depth); } const Type *TypeAryPtr::remove_speculative() const { - return make(_ptr, _const_oop, _ary->remove_speculative()->is_ary(), _klass, _klass_is_exact, _offset, _instance_id, NULL); + if (_speculative == NULL) { + return this; + } + assert(_inline_depth == InlineDepthTop || _inline_depth == InlineDepthBottom, "non speculative type shouldn't have inline depth"); + return make(_ptr, _const_oop, _ary->remove_speculative()->is_ary(), _klass, _klass_is_exact, _offset, _instance_id, NULL, _inline_depth); +} + +const TypeOopPtr *TypeAryPtr::with_inline_depth(int depth) const { + if (!UseInlineDepthForSpeculativeTypes) { + return this; + } + return make(_ptr, _const_oop, _ary->remove_speculative()->is_ary(), _klass, _klass_is_exact, _offset, _instance_id, _speculative, depth); } //============================================================================= @@ -4271,7 +4380,7 @@ // else fall through: case TopPTR: case AnyNull: { - return make(ptr, NULL, offset); + return make(ptr, _metadata, offset); } case BotPTR: case NotNull: diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/opto/type.hpp --- a/src/share/vm/opto/type.hpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/opto/type.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -415,10 +415,15 @@ bool is_autobox_cache = false); // Speculative type. See TypeInstPtr + virtual const TypeOopPtr* speculative() const { return NULL; } virtual ciKlass* speculative_type() const { return NULL; } const Type* maybe_remove_speculative(bool include_speculative) const; virtual const Type* remove_speculative() const { return this; } + virtual bool would_improve_type(ciKlass* exact_kls, int inline_depth) const { + return exact_kls != NULL; + } + private: // support arrays static const BasicType _basic_type[]; @@ -489,6 +494,7 @@ virtual const Type *filter_helper(const Type *kills, bool include_speculative) const; public: + typedef jint NativeType; virtual bool eq( const Type *t ) const; virtual int hash() const; // Type specific hashing virtual bool singleton(void) const; // TRUE if type is a singleton @@ -531,6 +537,9 @@ static const TypeInt *POS1; static const TypeInt *INT; static const TypeInt *SYMINT; // symmetric range [-max_jint..max_jint] + static const TypeInt *TYPE_DOMAIN; // alias for TypeInt::INT + + static const TypeInt *as_self(const Type *t) { return t->is_int(); } #ifndef PRODUCT virtual void dump2( Dict &d, uint depth, outputStream *st ) const; #endif @@ -546,6 +555,7 @@ // Do not kill _widen bits. virtual const Type *filter_helper(const Type *kills, bool include_speculative) const; public: + typedef jlong NativeType; virtual bool eq( const Type *t ) const; virtual int hash() const; // Type specific hashing virtual bool singleton(void) const; // TRUE if type is a singleton @@ -568,6 +578,7 @@ virtual bool is_finite() const; // Has a finite value + virtual const Type *xmeet( const Type *t ) const; virtual const Type *xdual() const; // Compute dual right now. virtual const Type *widen( const Type *t, const Type* limit_type ) const; @@ -580,6 +591,11 @@ static const TypeLong *LONG; static const TypeLong *INT; // 32-bit subrange [min_jint..max_jint] static const TypeLong *UINT; // 32-bit unsigned [0..max_juint] + static const TypeLong *TYPE_DOMAIN; // alias for TypeLong::LONG + + // static convenience methods. + static const TypeLong *as_self(const Type *t) { return t->is_long(); } + #ifndef PRODUCT virtual void dump2( Dict &d, uint, outputStream *st ) const;// Specialized per-Type dumping #endif @@ -834,7 +850,7 @@ // Some kind of oop (Java pointer), either klass or instance or array. class TypeOopPtr : public TypePtr { protected: - TypeOopPtr(TYPES t, PTR ptr, ciKlass* k, bool xk, ciObject* o, int offset, int instance_id, const TypeOopPtr* speculative); + TypeOopPtr(TYPES t, PTR ptr, ciKlass* k, bool xk, ciObject* o, int offset, int instance_id, const TypeOopPtr* speculative, int inline_depth); public: virtual bool eq( const Type *t ) const; virtual int hash() const; // Type specific hashing @@ -845,6 +861,10 @@ }; protected: + enum { + InlineDepthBottom = INT_MAX, + InlineDepthTop = -InlineDepthBottom + }; // Oop is NULL, unless this is a constant oop. ciObject* _const_oop; // Constant oop // If _klass is NULL, then so is _sig. This is an unloaded klass. @@ -865,6 +885,11 @@ // use it, then we have to emit a guard: this part of the type is // not something we know but something we speculate about the type. const TypeOopPtr* _speculative; + // For speculative types, we record at what inlining depth the + // profiling point that provided the data is. We want to favor + // profile data coming from outer scopes which are likely better for + // the current compilation. + int _inline_depth; static const TypeOopPtr* make_from_klass_common(ciKlass* klass, bool klass_change, bool try_for_exact); @@ -880,6 +905,12 @@ #ifndef PRODUCT void dump_speculative(outputStream *st) const; #endif + // utility methods to work on the inline depth of the type + int dual_inline_depth() const; + int meet_inline_depth(int depth) const; +#ifndef PRODUCT + void dump_inline_depth(outputStream *st) const; +#endif // Do not allow interface-vs.-noninterface joins to collapse to top. virtual const Type *filter_helper(const Type *kills, bool include_speculative) const; @@ -910,7 +941,7 @@ bool not_null_elements = false); // Make a generic (unclassed) pointer to an oop. - static const TypeOopPtr* make(PTR ptr, int offset, int instance_id, const TypeOopPtr* speculative); + static const TypeOopPtr* make(PTR ptr, int offset, int instance_id, const TypeOopPtr* speculative = NULL, int inline_depth = InlineDepthBottom); ciObject* const_oop() const { return _const_oop; } virtual ciKlass* klass() const { return _klass; } @@ -924,7 +955,7 @@ bool is_known_instance() const { return _instance_id > 0; } int instance_id() const { return _instance_id; } bool is_known_instance_field() const { return is_known_instance() && _offset >= 0; } - const TypeOopPtr* speculative() const { return _speculative; } + virtual const TypeOopPtr* speculative() const { return _speculative; } virtual intptr_t get_con() const; @@ -957,18 +988,23 @@ if (_speculative != NULL) { const TypeOopPtr* speculative = _speculative->join(this)->is_oopptr(); if (speculative->klass_is_exact()) { - return speculative->klass(); + return speculative->klass(); } } return NULL; } + int inline_depth() const { + return _inline_depth; + } + virtual const TypeOopPtr* with_inline_depth(int depth) const; + virtual bool would_improve_type(ciKlass* exact_kls, int inline_depth) const; }; //------------------------------TypeInstPtr------------------------------------ // Class of Java object pointers, pointing either to non-array Java instances // or to a Klass* (including array klasses). class TypeInstPtr : public TypeOopPtr { - TypeInstPtr(PTR ptr, ciKlass* k, bool xk, ciObject* o, int offset, int instance_id, const TypeOopPtr* speculative); + TypeInstPtr(PTR ptr, ciKlass* k, bool xk, ciObject* o, int offset, int instance_id, const TypeOopPtr* speculative, int inline_depth); virtual bool eq( const Type *t ) const; virtual int hash() const; // Type specific hashing @@ -1004,7 +1040,7 @@ } // Make a pointer to an oop. - static const TypeInstPtr *make(PTR ptr, ciKlass* k, bool xk, ciObject* o, int offset, int instance_id = InstanceBot, const TypeOopPtr* speculative = NULL); + static const TypeInstPtr *make(PTR ptr, ciKlass* k, bool xk, ciObject* o, int offset, int instance_id = InstanceBot, const TypeOopPtr* speculative = NULL, int inline_depth = InlineDepthBottom); /** Create constant type for a constant boxed value */ const Type* get_const_boxed_value() const; @@ -1023,6 +1059,7 @@ virtual const TypePtr *add_offset( intptr_t offset ) const; // Return same type without a speculative part virtual const Type* remove_speculative() const; + virtual const TypeOopPtr* with_inline_depth(int depth) const; // the core of the computation of the meet of 2 types virtual const Type *xmeet_helper(const Type *t) const; @@ -1044,8 +1081,8 @@ // Class of Java array pointers class TypeAryPtr : public TypeOopPtr { TypeAryPtr( PTR ptr, ciObject* o, const TypeAry *ary, ciKlass* k, bool xk, - int offset, int instance_id, bool is_autobox_cache, const TypeOopPtr* speculative) - : TypeOopPtr(AryPtr,ptr,k,xk,o,offset, instance_id, speculative), + int offset, int instance_id, bool is_autobox_cache, const TypeOopPtr* speculative, int inline_depth) + : TypeOopPtr(AryPtr,ptr,k,xk,o,offset, instance_id, speculative, inline_depth), _ary(ary), _is_autobox_cache(is_autobox_cache) { @@ -1083,9 +1120,9 @@ bool is_autobox_cache() const { return _is_autobox_cache; } - static const TypeAryPtr *make( PTR ptr, const TypeAry *ary, ciKlass* k, bool xk, int offset, int instance_id = InstanceBot, const TypeOopPtr* speculative = NULL); + static const TypeAryPtr *make( PTR ptr, const TypeAry *ary, ciKlass* k, bool xk, int offset, int instance_id = InstanceBot, const TypeOopPtr* speculative = NULL, int inline_depth = InlineDepthBottom); // Constant pointer to array - static const TypeAryPtr *make( PTR ptr, ciObject* o, const TypeAry *ary, ciKlass* k, bool xk, int offset, int instance_id = InstanceBot, const TypeOopPtr* speculative = NULL, bool is_autobox_cache = false); + static const TypeAryPtr *make( PTR ptr, ciObject* o, const TypeAry *ary, ciKlass* k, bool xk, int offset, int instance_id = InstanceBot, const TypeOopPtr* speculative = NULL, int inline_depth = InlineDepthBottom, bool is_autobox_cache= false); // Return a 'ptr' version of this type virtual const Type *cast_to_ptr_type(PTR ptr) const; @@ -1101,6 +1138,7 @@ virtual const TypePtr *add_offset( intptr_t offset ) const; // Return same type without a speculative part virtual const Type* remove_speculative() const; + virtual const TypeOopPtr* with_inline_depth(int depth) const; // the core of the computation of the meet of 2 types virtual const Type *xmeet_helper(const Type *t) const; @@ -1678,6 +1716,7 @@ #define ConvL2X(x) (x) #define ConvX2I(x) ConvL2I(x) #define ConvX2L(x) (x) +#define ConvX2UL(x) (x) #else @@ -1722,6 +1761,7 @@ #define ConvL2X(x) ConvL2I(x) #define ConvX2I(x) (x) #define ConvX2L(x) ConvI2L(x) +#define ConvX2UL(x) ConvI2UL(x) #endif diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/prims/jni.cpp --- a/src/share/vm/prims/jni.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/prims/jni.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -4450,8 +4450,23 @@ // Get needed field and method IDs directByteBufferConstructor = env->GetMethodID(directByteBufferClass, "", "(JI)V"); + if (env->ExceptionCheck()) { + env->ExceptionClear(); + directBufferSupportInitializeFailed = 1; + return false; + } directBufferAddressField = env->GetFieldID(bufferClass, "address", "J"); + if (env->ExceptionCheck()) { + env->ExceptionClear(); + directBufferSupportInitializeFailed = 1; + return false; + } bufferCapacityField = env->GetFieldID(bufferClass, "capacity", "I"); + if (env->ExceptionCheck()) { + env->ExceptionClear(); + directBufferSupportInitializeFailed = 1; + return false; + } if ((directByteBufferConstructor == NULL) || (directBufferAddressField == NULL) || @@ -5068,6 +5083,7 @@ #if INCLUDE_ALL_GCS void TestOldFreeSpaceCalculation_test(); void TestG1BiasedArray_test(); +void TestCodeCacheRemSet_test(); #endif void execute_internal_vm_tests() { @@ -5093,6 +5109,7 @@ run_unit_test(TestOldFreeSpaceCalculation_test()); run_unit_test(TestG1BiasedArray_test()); run_unit_test(HeapRegionRemSet::test_prt()); + run_unit_test(TestCodeCacheRemSet_test()); #endif tty->print_cr("All internal VM tests passed"); } diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/prims/unsafe.cpp --- a/src/share/vm/prims/unsafe.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/prims/unsafe.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -861,6 +861,11 @@ strcpy(buf, "java/lang/"); strcat(buf, ename); jclass cls = env->FindClass(buf); + if (env->ExceptionCheck()) { + env->ExceptionClear(); + tty->print_cr("Unsafe: cannot throw %s because FindClass has failed", buf); + return; + } char* msg = NULL; env->ThrowNew(cls, msg); } diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/prims/whitebox.cpp --- a/src/share/vm/prims/whitebox.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/prims/whitebox.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -316,9 +316,10 @@ WB_ENTRY(jint, WB_DeoptimizeMethod(JNIEnv* env, jobject o, jobject method, jboolean is_osr)) jmethodID jmid = reflected_method_to_jmid(thread, env, method); + int result = 0; + CHECK_JNI_EXCEPTION_(env, result); MutexLockerEx mu(Compile_lock); methodHandle mh(THREAD, Method::checked_resolve_jmethod_id(jmid)); - int result = 0; nmethod* code; if (is_osr) { int bci = InvocationEntryBci; @@ -344,6 +345,7 @@ WB_ENTRY(jboolean, WB_IsMethodCompiled(JNIEnv* env, jobject o, jobject method, jboolean is_osr)) jmethodID jmid = reflected_method_to_jmid(thread, env, method); + CHECK_JNI_EXCEPTION_(env, JNI_FALSE); MutexLockerEx mu(Compile_lock); methodHandle mh(THREAD, Method::checked_resolve_jmethod_id(jmid)); nmethod* code = is_osr ? mh->lookup_osr_nmethod_for(InvocationEntryBci, CompLevel_none, false) : mh->code(); @@ -355,6 +357,7 @@ WB_ENTRY(jboolean, WB_IsMethodCompilable(JNIEnv* env, jobject o, jobject method, jint comp_level, jboolean is_osr)) jmethodID jmid = reflected_method_to_jmid(thread, env, method); + CHECK_JNI_EXCEPTION_(env, JNI_FALSE); MutexLockerEx mu(Compile_lock); methodHandle mh(THREAD, Method::checked_resolve_jmethod_id(jmid)); if (is_osr) { @@ -366,6 +369,7 @@ WB_ENTRY(jboolean, WB_IsMethodQueuedForCompilation(JNIEnv* env, jobject o, jobject method)) jmethodID jmid = reflected_method_to_jmid(thread, env, method); + CHECK_JNI_EXCEPTION_(env, JNI_FALSE); MutexLockerEx mu(Compile_lock); methodHandle mh(THREAD, Method::checked_resolve_jmethod_id(jmid)); return mh->queued_for_compilation(); @@ -373,6 +377,7 @@ WB_ENTRY(jint, WB_GetMethodCompilationLevel(JNIEnv* env, jobject o, jobject method, jboolean is_osr)) jmethodID jmid = reflected_method_to_jmid(thread, env, method); + CHECK_JNI_EXCEPTION_(env, CompLevel_none); methodHandle mh(THREAD, Method::checked_resolve_jmethod_id(jmid)); nmethod* code = is_osr ? mh->lookup_osr_nmethod_for(InvocationEntryBci, CompLevel_none, false) : mh->code(); return (code != NULL ? code->comp_level() : CompLevel_none); @@ -380,6 +385,7 @@ WB_ENTRY(void, WB_MakeMethodNotCompilable(JNIEnv* env, jobject o, jobject method, jint comp_level, jboolean is_osr)) jmethodID jmid = reflected_method_to_jmid(thread, env, method); + CHECK_JNI_EXCEPTION(env); methodHandle mh(THREAD, Method::checked_resolve_jmethod_id(jmid)); if (is_osr) { mh->set_not_osr_compilable(comp_level, true /* report */, "WhiteBox"); @@ -390,6 +396,7 @@ WB_ENTRY(jint, WB_GetMethodEntryBci(JNIEnv* env, jobject o, jobject method)) jmethodID jmid = reflected_method_to_jmid(thread, env, method); + CHECK_JNI_EXCEPTION_(env, InvocationEntryBci); methodHandle mh(THREAD, Method::checked_resolve_jmethod_id(jmid)); nmethod* code = mh->lookup_osr_nmethod_for(InvocationEntryBci, CompLevel_none, false); return (code != NULL && code->is_osr_method() ? code->osr_entry_bci() : InvocationEntryBci); @@ -397,6 +404,7 @@ WB_ENTRY(jboolean, WB_TestSetDontInlineMethod(JNIEnv* env, jobject o, jobject method, jboolean value)) jmethodID jmid = reflected_method_to_jmid(thread, env, method); + CHECK_JNI_EXCEPTION_(env, JNI_FALSE); methodHandle mh(THREAD, Method::checked_resolve_jmethod_id(jmid)); bool result = mh->dont_inline(); mh->set_dont_inline(value == JNI_TRUE); @@ -414,6 +422,7 @@ WB_ENTRY(jboolean, WB_TestSetForceInlineMethod(JNIEnv* env, jobject o, jobject method, jboolean value)) jmethodID jmid = reflected_method_to_jmid(thread, env, method); + CHECK_JNI_EXCEPTION_(env, JNI_FALSE); methodHandle mh(THREAD, Method::checked_resolve_jmethod_id(jmid)); bool result = mh->force_inline(); mh->set_force_inline(value == JNI_TRUE); @@ -422,6 +431,7 @@ WB_ENTRY(jboolean, WB_EnqueueMethodForCompilation(JNIEnv* env, jobject o, jobject method, jint comp_level, jint bci)) jmethodID jmid = reflected_method_to_jmid(thread, env, method); + CHECK_JNI_EXCEPTION_(env, JNI_FALSE); methodHandle mh(THREAD, Method::checked_resolve_jmethod_id(jmid)); nmethod* nm = CompileBroker::compile_method(mh, bci, comp_level, mh, mh->invocation_count(), "WhiteBox", THREAD); MutexLockerEx mu(Compile_lock); @@ -430,6 +440,7 @@ WB_ENTRY(void, WB_ClearMethodState(JNIEnv* env, jobject o, jobject method)) jmethodID jmid = reflected_method_to_jmid(thread, env, method); + CHECK_JNI_EXCEPTION(env); methodHandle mh(THREAD, Method::checked_resolve_jmethod_id(jmid)); MutexLockerEx mu(Compile_lock); MethodData* mdo = mh->method_data(); @@ -489,6 +500,16 @@ c = *p; WB_END +WB_ENTRY(jstring, WB_GetCPUFeatures(JNIEnv* env, jobject o)) + const char* cpu_features = VM_Version::cpu_features(); + ThreadToNativeFromVM ttn(thread); + jstring features_string = env->NewStringUTF(cpu_features); + + CHECK_JNI_EXCEPTION_(env, NULL); + + return features_string; +WB_END + //Some convenience methods to deal with objects from java int WhiteBox::offset_for_field(const char* field_name, oop object, Symbol* signature_symbol) { @@ -600,6 +621,7 @@ {CC"isInStringTable", CC"(Ljava/lang/String;)Z", (void*)&WB_IsInStringTable }, {CC"fullGC", CC"()V", (void*)&WB_FullGC }, {CC"readReservedMemory", CC"()V", (void*)&WB_ReadReservedMemory }, + {CC"getCPUFeatures", CC"()Ljava/lang/String;", (void*)&WB_GetCPUFeatures }, }; #undef CC @@ -616,14 +638,18 @@ bool result = true; // one by one registration natives for exception catching jclass exceptionKlass = env->FindClass(vmSymbols::java_lang_NoSuchMethodError()->as_C_string()); + CHECK_JNI_EXCEPTION(env); for (int i = 0, n = sizeof(methods) / sizeof(methods[0]); i < n; ++i) { if (env->RegisterNatives(wbclass, methods + i, 1) != 0) { result = false; - if (env->ExceptionCheck() && env->IsInstanceOf(env->ExceptionOccurred(), exceptionKlass)) { - // j.l.NoSuchMethodError is thrown when a method can't be found or a method is not native - // ignoring the exception - tty->print_cr("Warning: 'NoSuchMethodError' on register of sun.hotspot.WhiteBox::%s%s", methods[i].name, methods[i].signature); + jthrowable throwable_obj = env->ExceptionOccurred(); + if (throwable_obj != NULL) { env->ExceptionClear(); + if (env->IsInstanceOf(throwable_obj, exceptionKlass)) { + // j.l.NoSuchMethodError is thrown when a method can't be found or a method is not native + // ignoring the exception + tty->print_cr("Warning: 'NoSuchMethodError' on register of sun.hotspot.WhiteBox::%s%s", methods[i].name, methods[i].signature); + } } else { // register is failed w/o exception or w/ unexpected exception tty->print_cr("Warning: unexpected error on register of sun.hotspot.WhiteBox::%s%s. All methods will be unregistered", methods[i].name, methods[i].signature); diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/prims/whitebox.hpp --- a/src/share/vm/prims/whitebox.hpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/prims/whitebox.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -36,6 +36,24 @@ #define WB_END JNI_END #define WB_METHOD_DECLARE(result_type) extern "C" result_type JNICALL +#define CHECK_JNI_EXCEPTION_(env, value) \ + do { \ + JavaThread* THREAD = JavaThread::thread_from_jni_environment(env); \ + if (HAS_PENDING_EXCEPTION) { \ + CLEAR_PENDING_EXCEPTION; \ + return(value); \ + } \ + } while (0) + +#define CHECK_JNI_EXCEPTION(env) \ + do { \ + JavaThread* THREAD = JavaThread::thread_from_jni_environment(env); \ + if (HAS_PENDING_EXCEPTION) { \ + CLEAR_PENDING_EXCEPTION; \ + return; \ + } \ + } while (0) + class WhiteBox : public AllStatic { private: static bool _used; diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/runtime/arguments.cpp --- a/src/share/vm/runtime/arguments.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/runtime/arguments.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -2220,6 +2220,8 @@ "G1ConcRSHotCardLimit"); status = status && verify_interval(G1ConcRSLogCacheSize, 0, 31, "G1ConcRSLogCacheSize"); + status = status && verify_interval(StringDeduplicationAgeThreshold, 1, markOopDesc::max_age, + "StringDeduplicationAgeThreshold"); } if (UseConcMarkSweepGC) { status = status && verify_min_value(CMSOldPLABNumRefills, 1, "CMSOldPLABNumRefills"); @@ -3749,9 +3751,6 @@ #endif // CC_INTERP #ifdef COMPILER2 - if (!UseBiasedLocking || EmitSync != 0) { - UseOptoBiasInlining = false; - } if (!EliminateLocks) { EliminateNestedLocks = false; } @@ -3812,6 +3811,11 @@ UseBiasedLocking = false; } } +#ifdef COMPILER2 + if (!UseBiasedLocking || EmitSync != 0) { + UseOptoBiasInlining = false; + } +#endif // set PauseAtExit if the gamma launcher was used and a debugger is attached // but only if not already set on the commandline diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/runtime/deoptimization.cpp --- a/src/share/vm/runtime/deoptimization.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/runtime/deoptimization.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -1288,7 +1288,8 @@ gather_statistics(reason, action, trap_bc); // Ensure that we can record deopt. history: - bool create_if_missing = ProfileTraps; + // Need MDO to record RTM code generation state. + bool create_if_missing = ProfileTraps RTM_OPT_ONLY( || UseRTMLocking ); MethodData* trap_mdo = get_method_data(thread, trap_method, create_if_missing); @@ -1489,6 +1490,7 @@ bool maybe_prior_trap = false; bool maybe_prior_recompile = false; pdata = query_update_method_data(trap_mdo, trap_bci, reason, + nm->method(), //outputs: this_trap_count, maybe_prior_trap, @@ -1534,7 +1536,7 @@ } // Go back to the compiler if there are too many traps in this method. - if (this_trap_count >= (uint)PerMethodTrapLimit) { + if (this_trap_count >= per_method_trap_limit(reason)) { // If there are too many traps in this method, force a recompile. // This will allow the compiler to see the limit overflow, and // take corrective action, if possible. @@ -1568,6 +1570,17 @@ if (tstate1 != tstate0) pdata->set_trap_state(tstate1); } + +#if INCLUDE_RTM_OPT + // Restart collecting RTM locking abort statistic if the method + // is recompiled for a reason other than RTM state change. + // Assume that in new recompiled code the statistic could be different, + // for example, due to different inlining. + if ((reason != Reason_rtm_state_change) && (trap_mdo != NULL) && + UseRTMDeopt && (nm->rtm_state() != ProfileRTM)) { + trap_mdo->atomic_set_rtm_state(ProfileRTM); + } +#endif } if (inc_recompile_count) { @@ -1622,6 +1635,7 @@ Deoptimization::query_update_method_data(MethodData* trap_mdo, int trap_bci, Deoptimization::DeoptReason reason, + Method* compiled_method, //outputs: uint& ret_this_trap_count, bool& ret_maybe_prior_trap, @@ -1645,9 +1659,16 @@ // Find the profile data for this BCI. If there isn't one, // try to allocate one from the MDO's set of spares. // This will let us detect a repeated trap at this point. - pdata = trap_mdo->allocate_bci_to_data(trap_bci); + pdata = trap_mdo->allocate_bci_to_data(trap_bci, reason_is_speculate(reason) ? compiled_method : NULL); if (pdata != NULL) { + if (reason_is_speculate(reason) && !pdata->is_SpeculativeTrapData()) { + if (LogCompilation && xtty != NULL) { + ttyLocker ttyl; + // no more room for speculative traps in this MDO + xtty->elem("speculative_traps_oom"); + } + } // Query the trap state of this profile datum. int tstate0 = pdata->trap_state(); if (!trap_state_has_reason(tstate0, per_bc_reason)) @@ -1685,8 +1706,10 @@ uint ignore_this_trap_count; bool ignore_maybe_prior_trap; bool ignore_maybe_prior_recompile; + assert(!reason_is_speculate(reason), "reason speculate only used by compiler"); query_update_method_data(trap_mdo, trap_bci, (DeoptReason)reason, + NULL, ignore_this_trap_count, ignore_maybe_prior_trap, ignore_maybe_prior_recompile); @@ -1814,7 +1837,9 @@ "div0_check", "age", "predicate", - "loop_limit_check" + "loop_limit_check", + "speculate_class_check", + "rtm_state_change" }; const char* Deoptimization::_trap_action_name[Action_LIMIT] = { // Note: Keep this in sync. with enum DeoptAction. diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/runtime/deoptimization.hpp --- a/src/share/vm/runtime/deoptimization.hpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/runtime/deoptimization.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -59,6 +59,8 @@ Reason_age, // nmethod too old; tier threshold reached Reason_predicate, // compiler generated predicate failed Reason_loop_limit_check, // compiler generated loop limits check failed + Reason_speculate_class_check, // saw unexpected object class from type speculation + Reason_rtm_state_change, // rtm state change detected Reason_LIMIT, // Note: Keep this enum in sync. with _trap_reason_name. Reason_RECORDED_LIMIT = Reason_bimorphic // some are not recorded per bc @@ -311,10 +313,23 @@ return reason; else if (reason == Reason_div0_check) // null check due to divide-by-zero? return Reason_null_check; // recorded per BCI as a null check + else if (reason == Reason_speculate_class_check) + return Reason_class_check; else return Reason_none; } + static bool reason_is_speculate(int reason) { + if (reason == Reason_speculate_class_check) { + return true; + } + return false; + } + + static uint per_method_trap_limit(int reason) { + return reason_is_speculate(reason) ? (uint)PerMethodSpecTrapLimit : (uint)PerMethodTrapLimit; + } + static const char* trap_reason_name(int reason); static const char* trap_action_name(int action); // Format like reason='foo' action='bar' index='123'. @@ -337,6 +352,7 @@ static ProfileData* query_update_method_data(MethodData* trap_mdo, int trap_bci, DeoptReason reason, + Method* compiled_method, //outputs: uint& ret_this_trap_count, bool& ret_maybe_prior_trap, diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/runtime/globals.hpp --- a/src/share/vm/runtime/globals.hpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/runtime/globals.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -3080,9 +3080,15 @@ product(intx, PerMethodTrapLimit, 100, \ "Limit on traps (of one kind) in a method (includes inlines)") \ \ + experimental(intx, PerMethodSpecTrapLimit, 5000, \ + "Limit on speculative traps (of one kind) in a method (includes inlines)") \ + \ product(intx, PerBytecodeTrapLimit, 4, \ "Limit on traps (of one kind) at a particular BCI") \ \ + experimental(intx, SpecTrapLimitExtraEntries, 3, \ + "Extra method data trap entries for speculation") \ + \ develop(intx, InlineFrequencyRatio, 20, \ "Ratio of call site execution to caller method invocation") \ \ @@ -3832,6 +3838,22 @@ experimental(uintx, SymbolTableSize, defaultSymbolTableSize, \ "Number of buckets in the JVM internal Symbol table") \ \ + product(bool, UseStringDeduplication, false, \ + "Use string deduplication") \ + \ + product(bool, PrintStringDeduplicationStatistics, false, \ + "Print string deduplication statistics") \ + \ + product(uintx, StringDeduplicationAgeThreshold, 3, \ + "A string must reach this age (or be promoted to an old region) " \ + "to be considered for deduplication") \ + \ + diagnostic(bool, StringDeduplicationResizeALot, false, \ + "Force table resize every time the table is scanned") \ + \ + diagnostic(bool, StringDeduplicationRehashALot, false, \ + "Force table rehash every time the table is scanned") \ + \ develop(bool, TraceDefaultMethods, false, \ "Trace the default method processing steps") \ \ diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/runtime/java.cpp --- a/src/share/vm/runtime/java.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/runtime/java.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -268,7 +268,7 @@ os::print_statistics(); } - if (PrintLockStatistics || PrintPreciseBiasedLockingStatistics) { + if (PrintLockStatistics || PrintPreciseBiasedLockingStatistics || PrintPreciseRTMLockingStatistics) { OptoRuntime::print_named_counters(); } @@ -390,7 +390,7 @@ } #ifdef COMPILER2 - if (PrintPreciseBiasedLockingStatistics) { + if (PrintPreciseBiasedLockingStatistics || PrintPreciseRTMLockingStatistics) { OptoRuntime::print_named_counters(); } #endif diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/runtime/mutexLocker.cpp --- a/src/share/vm/runtime/mutexLocker.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/runtime/mutexLocker.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -58,6 +58,8 @@ Mutex* VtableStubs_lock = NULL; Mutex* SymbolTable_lock = NULL; Mutex* StringTable_lock = NULL; +Monitor* StringDedupQueue_lock = NULL; +Mutex* StringDedupTable_lock = NULL; Mutex* CodeCache_lock = NULL; Mutex* MethodData_lock = NULL; Mutex* RetData_lock = NULL; @@ -196,6 +198,9 @@ def(MMUTracker_lock , Mutex , leaf , true ); def(HotCardCache_lock , Mutex , special , true ); def(EvacFailureStack_lock , Mutex , nonleaf , true ); + + def(StringDedupQueue_lock , Monitor, leaf, true ); + def(StringDedupTable_lock , Mutex , leaf, true ); } def(ParGCRareEvent_lock , Mutex , leaf , true ); def(DerivedPointerTableGC_lock , Mutex, leaf, true ); diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/runtime/mutexLocker.hpp --- a/src/share/vm/runtime/mutexLocker.hpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/runtime/mutexLocker.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -66,6 +66,8 @@ extern Mutex* VtableStubs_lock; // a lock on the VtableStubs extern Mutex* SymbolTable_lock; // a lock on the symbol table extern Mutex* StringTable_lock; // a lock on the interned string table +extern Monitor* StringDedupQueue_lock; // a lock on the string deduplication queue +extern Mutex* StringDedupTable_lock; // a lock on the string deduplication table extern Mutex* CodeCache_lock; // a lock on the CodeCache, rank is special, use MutexLockerEx extern Mutex* MethodData_lock; // a lock on installation of method data extern Mutex* RetData_lock; // a lock on installation of RetData inside method data diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/runtime/os.hpp --- a/src/share/vm/runtime/os.hpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/runtime/os.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -434,7 +434,10 @@ static intx current_thread_id(); static int current_process_id(); static int sleep(Thread* thread, jlong ms, bool interruptable); - static int naked_sleep(); + // Short standalone OS sleep suitable for slow path spin loop. + // Ignores Thread.interrupt() (so keep it short). + // ms = 0, will sleep for the least amount of time allowed by the OS. + static void naked_short_sleep(jlong ms); static void infinite_sleep(); // never returns, use with CAUTION static void yield(); // Yields to all threads with same priority enum YieldResult { diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/runtime/park.cpp --- a/src/share/vm/runtime/park.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/runtime/park.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -59,58 +59,22 @@ // Start by trying to recycle an existing but unassociated // ParkEvent from the global free list. - for (;;) { - ev = FreeList ; - if (ev == NULL) break ; - // 1: Detach - sequester or privatize the list - // Tantamount to ev = Swap (&FreeList, NULL) - if (Atomic::cmpxchg_ptr (NULL, &FreeList, ev) != ev) { - continue ; + // Using a spin lock since we are part of the mutex impl. + // 8028280: using concurrent free list without memory management can leak + // pretty badly it turns out. + Thread::SpinAcquire(&ListLock, "ParkEventFreeListAllocate"); + { + ev = FreeList; + if (ev != NULL) { + FreeList = ev->FreeNext; } - - // We've detached the list. The list in-hand is now - // local to this thread. This thread can operate on the - // list without risk of interference from other threads. - // 2: Extract -- pop the 1st element from the list. - ParkEvent * List = ev->FreeNext ; - if (List == NULL) break ; - for (;;) { - // 3: Try to reattach the residual list - guarantee (List != NULL, "invariant") ; - ParkEvent * Arv = (ParkEvent *) Atomic::cmpxchg_ptr (List, &FreeList, NULL) ; - if (Arv == NULL) break ; - - // New nodes arrived. Try to detach the recent arrivals. - if (Atomic::cmpxchg_ptr (NULL, &FreeList, Arv) != Arv) { - continue ; - } - guarantee (Arv != NULL, "invariant") ; - // 4: Merge Arv into List - ParkEvent * Tail = List ; - while (Tail->FreeNext != NULL) Tail = Tail->FreeNext ; - Tail->FreeNext = Arv ; - } - break ; } + Thread::SpinRelease(&ListLock); if (ev != NULL) { guarantee (ev->AssociatedWith == NULL, "invariant") ; } else { // Do this the hard way -- materialize a new ParkEvent. - // In rare cases an allocating thread might detach a long list -- - // installing null into FreeList -- and then stall or be obstructed. - // A 2nd thread calling Allocate() would see FreeList == null. - // The list held privately by the 1st thread is unavailable to the 2nd thread. - // In that case the 2nd thread would have to materialize a new ParkEvent, - // even though free ParkEvents existed in the system. In this case we end up - // with more ParkEvents in circulation than we need, but the race is - // rare and the outcome is benign. Ideally, the # of extant ParkEvents - // is equal to the maximum # of threads that existed at any one time. - // Because of the race mentioned above, segments of the freelist - // can be transiently inaccessible. At worst we may end up with the - // # of ParkEvents in circulation slightly above the ideal. - // Note that if we didn't have the TSM/immortal constraint, then - // when reattaching, above, we could trim the list. ev = new ParkEvent () ; guarantee ((intptr_t(ev) & 0xFF) == 0, "invariant") ; } @@ -124,13 +88,14 @@ if (ev == NULL) return ; guarantee (ev->FreeNext == NULL , "invariant") ; ev->AssociatedWith = NULL ; - for (;;) { - // Push ev onto FreeList - // The mechanism is "half" lock-free. - ParkEvent * List = FreeList ; - ev->FreeNext = List ; - if (Atomic::cmpxchg_ptr (ev, &FreeList, List) == List) break ; + // Note that if we didn't have the TSM/immortal constraint, then + // when reattaching we could trim the list. + Thread::SpinAcquire(&ListLock, "ParkEventFreeListRelease"); + { + ev->FreeNext = FreeList; + FreeList = ev; } + Thread::SpinRelease(&ListLock); } // Override operator new and delete so we can ensure that the @@ -164,56 +129,21 @@ // Start by trying to recycle an existing but unassociated // Parker from the global free list. - for (;;) { - p = FreeList ; - if (p == NULL) break ; - // 1: Detach - // Tantamount to p = Swap (&FreeList, NULL) - if (Atomic::cmpxchg_ptr (NULL, &FreeList, p) != p) { - continue ; + // 8028280: using concurrent free list without memory management can leak + // pretty badly it turns out. + Thread::SpinAcquire(&ListLock, "ParkerFreeListAllocate"); + { + p = FreeList; + if (p != NULL) { + FreeList = p->FreeNext; } - - // We've detached the list. The list in-hand is now - // local to this thread. This thread can operate on the - // list without risk of interference from other threads. - // 2: Extract -- pop the 1st element from the list. - Parker * List = p->FreeNext ; - if (List == NULL) break ; - for (;;) { - // 3: Try to reattach the residual list - guarantee (List != NULL, "invariant") ; - Parker * Arv = (Parker *) Atomic::cmpxchg_ptr (List, &FreeList, NULL) ; - if (Arv == NULL) break ; - - // New nodes arrived. Try to detach the recent arrivals. - if (Atomic::cmpxchg_ptr (NULL, &FreeList, Arv) != Arv) { - continue ; - } - guarantee (Arv != NULL, "invariant") ; - // 4: Merge Arv into List - Parker * Tail = List ; - while (Tail->FreeNext != NULL) Tail = Tail->FreeNext ; - Tail->FreeNext = Arv ; - } - break ; } + Thread::SpinRelease(&ListLock); if (p != NULL) { guarantee (p->AssociatedWith == NULL, "invariant") ; } else { // Do this the hard way -- materialize a new Parker.. - // In rare cases an allocating thread might detach - // a long list -- installing null into FreeList --and - // then stall. Another thread calling Allocate() would see - // FreeList == null and then invoke the ctor. In this case we - // end up with more Parkers in circulation than we need, but - // the race is rare and the outcome is benign. - // Ideally, the # of extant Parkers is equal to the - // maximum # of threads that existed at any one time. - // Because of the race mentioned above, segments of the - // freelist can be transiently inaccessible. At worst - // we may end up with the # of Parkers in circulation - // slightly above the ideal. p = new Parker() ; } p->AssociatedWith = t ; // Associate p with t @@ -227,11 +157,12 @@ guarantee (p->AssociatedWith != NULL, "invariant") ; guarantee (p->FreeNext == NULL , "invariant") ; p->AssociatedWith = NULL ; - for (;;) { - // Push p onto FreeList - Parker * List = FreeList ; - p->FreeNext = List ; - if (Atomic::cmpxchg_ptr (p, &FreeList, List) == List) break ; + + Thread::SpinAcquire(&ListLock, "ParkerFreeListRelease"); + { + p->FreeNext = FreeList; + FreeList = p; } + Thread::SpinRelease(&ListLock); } diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/runtime/rtmLocking.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/runtime/rtmLocking.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_VM_RUNTIME_RTMLOCKING_HPP +#define SHARE_VM_RUNTIME_RTMLOCKING_HPP + +// Generate RTM (Restricted Transactional Memory) locking code for all inflated +// locks when "UseRTMLocking" option is on with normal locking mechanism as fall back +// handler. +// +// On abort/lock busy the lock will be retried a fixed number of times under RTM +// as specified by "RTMRetryCount" option. The locks which abort too often +// can be auto tuned or manually tuned. +// +// Auto-tuning can be done on an option like UseRTMDeopt and it will need abort +// ratio calculation for each lock. The abort ratio will be calculated after +// "RTMAbortThreshold" number of aborts is reached. The formulas are: +// +// Aborted transactions = abort_count * 100 +// All transactions = total_count * RTMTotalCountIncrRate +// +// Aborted transactions >= All transactions * RTMAbortRatio +// +// If "UseRTMDeopt" is on and the aborts ratio reaches "RTMAbortRatio" +// the method containing the lock will be deoptimized and recompiled with +// all locks as normal locks. If the abort ratio continues to remain low after +// "RTMLockingThreshold" locks are attempted, then the method will be deoptimized +// and recompiled with all locks as RTM locks without abort ratio calculation code. +// The abort ratio calculation can be delayed by specifying flag +// -XX:RTMLockingCalculationDelay in millisecond. +// +// For manual tuning the abort statistics for each lock needs to be provided +// to the user on some JVM option like "PrintPreciseRTMLockingStatistics". +// Based on the abort statistics users can create a .hotspot_compiler file +// or use -XX:CompileCommand=option,class::method,NoRTMLockEliding +// to specify for which methods to disable RTM locking. +// +// When UseRTMForStackLocks option is enabled along with UseRTMLocking option, +// the RTM locking code is generated for stack locks too. +// The retries, auto-tuning support and rtm locking statistics are all +// supported for stack locks just like inflated locks. + +// RTM locking counters +class RTMLockingCounters VALUE_OBJ_CLASS_SPEC { + private: + uintx _total_count; // Total RTM locks count + uintx _abort_count; // Total aborts count + + public: + enum { ABORT_STATUS_LIMIT = 6 }; + // Counters per RTM Abort Status. Incremented with +PrintPreciseRTMLockingStatistics + // RTM uses the EAX register to communicate abort status to software. + // Following an RTM abort the EAX register has the following definition. + // + // EAX register bit position Meaning + // 0 Set if abort caused by XABORT instruction. + // 1 If set, the transaction may succeed on a retry. This bit is always clear if bit 0 is set. + // 2 Set if another logical processor conflicted with a memory address that was part of the transaction that aborted. + // 3 Set if an internal buffer overflowed. + // 4 Set if a debug breakpoint was hit. + // 5 Set if an abort occurred during execution of a nested transaction. + private: + uintx _abortX_count[ABORT_STATUS_LIMIT]; + + public: + static uintx _calculation_flag; + static uintx* rtm_calculation_flag_addr() { return &_calculation_flag; } + + static void init(); + + RTMLockingCounters() : _total_count(0), _abort_count(0) { + for (int i = 0; i < ABORT_STATUS_LIMIT; i++) { + _abortX_count[i] = 0; + } + } + + uintx* total_count_addr() { return &_total_count; } + uintx* abort_count_addr() { return &_abort_count; } + uintx* abortX_count_addr() { return &_abortX_count[0]; } + + static int total_count_offset() { return (int)offset_of(RTMLockingCounters, _total_count); } + static int abort_count_offset() { return (int)offset_of(RTMLockingCounters, _abort_count); } + static int abortX_count_offset() { return (int)offset_of(RTMLockingCounters, _abortX_count[0]); } + + + bool nonzero() { return (_abort_count + _total_count) > 0; } + + void print_on(outputStream* st); + void print() { print_on(tty); } +}; + +#endif // SHARE_VM_RUNTIME_RTMLOCKING_HPP diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/runtime/task.cpp --- a/src/share/vm/runtime/task.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/runtime/task.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -105,7 +105,6 @@ _counter(0), _interval((int) interval_time) { // Sanity check the interval time assert(_interval >= PeriodicTask::min_interval && - _interval <= PeriodicTask::max_interval && _interval % PeriodicTask::interval_gran == 0, "improper PeriodicTask interval time"); } diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/runtime/thread.cpp --- a/src/share/vm/runtime/thread.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/runtime/thread.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -107,6 +107,9 @@ #include "opto/c2compiler.hpp" #include "opto/idealGraphPrinter.hpp" #endif +#if INCLUDE_RTM_OPT +#include "runtime/rtmLocking.hpp" +#endif #ifdef DTRACE_ENABLED @@ -3673,6 +3676,10 @@ BiasedLocking::init(); +#if INCLUDE_RTM_OPT + RTMLockingCounters::init(); +#endif + if (JDK_Version::current().post_vm_init_hook_enabled()) { call_postVMInitHook(THREAD); // The Java side of PostVMInitHook.run must deal with all @@ -4449,9 +4456,7 @@ ++ctr ; if ((ctr & 0xFFF) == 0 || !os::is_MP()) { if (Yields > 5) { - // Consider using a simple NakedSleep() instead. - // Then SpinAcquire could be called by non-JVM threads - Thread::current()->_ParkEvent->park(1) ; + os::naked_short_sleep(1); } else { os::NakedYield() ; ++Yields ; diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/runtime/vmStructs.cpp --- a/src/share/vm/runtime/vmStructs.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/runtime/vmStructs.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -1946,15 +1946,6 @@ declare_c2_type(CmpF3Node, CmpFNode) \ declare_c2_type(CmpDNode, CmpNode) \ declare_c2_type(CmpD3Node, CmpDNode) \ - declare_c2_type(MathExactNode, MultiNode) \ - declare_c2_type(MathExactINode, MathExactNode) \ - declare_c2_type(AddExactINode, MathExactINode) \ - declare_c2_type(AddExactLNode, MathExactLNode) \ - declare_c2_type(SubExactINode, MathExactINode) \ - declare_c2_type(SubExactLNode, MathExactLNode) \ - declare_c2_type(NegExactINode, MathExactINode) \ - declare_c2_type(MulExactINode, MathExactINode) \ - declare_c2_type(FlagsProjNode, ProjNode) \ declare_c2_type(BoolNode, Node) \ declare_c2_type(AbsNode, Node) \ declare_c2_type(AbsINode, AbsNode) \ @@ -2035,6 +2026,15 @@ declare_c2_type(ExtractLNode, ExtractNode) \ declare_c2_type(ExtractFNode, ExtractNode) \ declare_c2_type(ExtractDNode, ExtractNode) \ + declare_c2_type(OverflowNode, CmpNode) \ + declare_c2_type(OverflowINode, OverflowNode) \ + declare_c2_type(OverflowAddINode, OverflowINode) \ + declare_c2_type(OverflowSubINode, OverflowINode) \ + declare_c2_type(OverflowMulINode, OverflowINode) \ + declare_c2_type(OverflowLNode, OverflowNode) \ + declare_c2_type(OverflowAddLNode, OverflowLNode) \ + declare_c2_type(OverflowSubLNode, OverflowLNode) \ + declare_c2_type(OverflowMulLNode, OverflowLNode) \ \ /*********************/ \ /* Adapter Blob Entries */ \ diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/trace/trace.xml --- a/src/share/vm/trace/trace.xml Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/trace/trace.xml Tue Mar 25 17:07:36 2014 -0700 @@ -193,11 +193,48 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/trace/tracetypes.xml --- a/src/share/vm/trace/tracetypes.xml Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/trace/tracetypes.xml Tue Mar 25 17:07:36 2014 -0700 @@ -130,11 +130,26 @@ + + + + + + + + + + + + @@ -324,10 +339,22 @@ + + + + + + + + + diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/utilities/globalDefinitions.hpp --- a/src/share/vm/utilities/globalDefinitions.hpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/utilities/globalDefinitions.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -373,6 +373,21 @@ // Machine dependent stuff +#if defined(X86) && defined(COMPILER2) && !defined(JAVASE_EMBEDDED) +// Include Restricted Transactional Memory lock eliding optimization +#define INCLUDE_RTM_OPT 1 +#define RTM_OPT_ONLY(code) code +#else +#define INCLUDE_RTM_OPT 0 +#define RTM_OPT_ONLY(code) +#endif +// States of Restricted Transactional Memory usage. +enum RTMState { + NoRTM = 0x2, // Don't use RTM + UseRTM = 0x1, // Use RTM + ProfileRTM = 0x0 // Use RTM with abort ratio calculation +}; + #ifdef TARGET_ARCH_x86 # include "globalDefinitions_x86.hpp" #endif diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/utilities/hashtable.cpp --- a/src/share/vm/utilities/hashtable.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/utilities/hashtable.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -93,7 +93,7 @@ return false; } -template jint Hashtable::_seed = 0; +template juint Hashtable::_seed = 0; // Create a new table and using alternate hash code, populate the new table // with the existing elements. This can be used to change the hash code diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/utilities/hashtable.hpp --- a/src/share/vm/utilities/hashtable.hpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/utilities/hashtable.hpp Tue Mar 25 17:07:36 2014 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -280,7 +280,7 @@ // Function to move these elements into the new table. void move_to(Hashtable* new_table); static bool use_alternate_hashcode() { return _seed != 0; } - static jint seed() { return _seed; } + static juint seed() { return _seed; } static int literal_size(Symbol *symbol); static int literal_size(oop oop); @@ -296,7 +296,7 @@ void dump_table(outputStream* st, const char *table_name); private: - static jint _seed; + static juint _seed; }; diff -r a433eb716ce1 -r 62c54fcc0a35 src/share/vm/utilities/vmError.cpp --- a/src/share/vm/utilities/vmError.cpp Tue Mar 25 12:54:21 2014 -0700 +++ b/src/share/vm/utilities/vmError.cpp Tue Mar 25 17:07:36 2014 -0700 @@ -592,13 +592,24 @@ st->cr(); // Compiled code may use EBP register on x86 so it looks like // non-walkable C frame. Use frame.sender() for java frames. - if (_thread && _thread->is_Java_thread() && fr.is_java_frame()) { - RegisterMap map((JavaThread*)_thread, false); // No update - fr = fr.sender(&map); - continue; + if (_thread && _thread->is_Java_thread()) { + // Catch very first native frame by using stack address. + // For JavaThread stack_base and stack_size should be set. + if (!_thread->on_local_stack((address)(fr.sender_sp() + 1))) { + break; + } + if (fr.is_java_frame()) { + RegisterMap map((JavaThread*)_thread, false); // No update + fr = fr.sender(&map); + } else { + fr = os::get_sender_for_C_frame(&fr); + } + } else { + // is_first_C_frame() does only simple checks for frame pointer, + // it will pass if java compiled code has a pointer in EBP. + if (os::is_first_C_frame(&fr)) break; + fr = os::get_sender_for_C_frame(&fr); } - if (os::is_first_C_frame(&fr)) break; - fr = os::get_sender_for_C_frame(&fr); } if (count > StackPrintLimit) { diff -r a433eb716ce1 -r 62c54fcc0a35 test/TEST.groups --- a/test/TEST.groups Tue Mar 25 12:54:21 2014 -0700 +++ b/test/TEST.groups Tue Mar 25 17:07:36 2014 -0700 @@ -130,8 +130,12 @@ gc/g1/TestHumongousAllocInitialMark.java \ gc/arguments/TestG1HeapRegionSize.java \ gc/metaspace/TestMetaspaceMemoryPool.java \ + gc/arguments/TestDynMinHeapFreeRatio.java \ + gc/arguments/TestDynMaxHeapFreeRatio.java \ runtime/InternalApi/ThreadCpuTimesDeadlock.java \ - serviceability/threads/TestFalseDeadLock.java + serviceability/threads/TestFalseDeadLock.java \ + compiler/tiered/NonTieredLevelsTest.java \ + compiler/tiered/TieredLevelsTest.java # Compact 2 adds full VM tests compact2 = \ diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/ciReplay/TestVM.sh --- a/test/compiler/ciReplay/TestVM.sh Tue Mar 25 12:54:21 2014 -0700 +++ b/test/compiler/ciReplay/TestVM.sh Tue Mar 25 17:07:36 2014 -0700 @@ -78,8 +78,8 @@ positive_test `expr $stop_level + 50` "TIERED LEVEL $stop_level :: REPLAY" \ "-XX:TieredStopAtLevel=$stop_level" stop_level=`expr $stop_level + 1` + cleanup done - cleanup fi echo TEST PASSED diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/ciReplay/common.sh --- a/test/compiler/ciReplay/common.sh Tue Mar 25 12:54:21 2014 -0700 +++ b/test/compiler/ciReplay/common.sh Tue Mar 25 17:07:36 2014 -0700 @@ -99,14 +99,13 @@ # $2 - non-tiered comp_level nontiered_tests() { level=`grep "^compile " $replay_data | awk '{print $6}'` - # is level available in non-tiere + # is level available in non-tiered if [ "$level" -eq $2 ] then positive_test $1 "NON-TIERED :: AVAILABLE COMP_LEVEL" \ -XX:-TieredCompilation else negative_test `expr $1 + 1` "NON-TIERED :: UNAVAILABLE COMP_LEVEL" \ - negative_test `expr $1 + 1` "NON-TIERED :: UNAVAILABLE COMP_LEVEL" \ -XX:-TieredCompilation fi } diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/codegen/BMI1.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/compiler/codegen/BMI1.java Tue Mar 25 17:07:36 2014 -0700 @@ -0,0 +1,301 @@ +/* + * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * @test + * @bug 8031321 + * @summary Support BMI1 instructions on x86/x64 + * @run main/othervm -Xbatch -XX:-TieredCompilation -XX:CompileCommand=compileonly,BMITests.* BMI1 + * + */ + +class MemI { + public int x; + public MemI(int x) { this.x = x; } +} + +class MemL { + public long x; + public MemL(long x) { this.x = x; } +} + +class BMITests { + static int andnl(int src1, int src2) { + return ~src1 & src2; + } + static long andnq(long src1, long src2) { + return ~src1 & src2; + } + static int andnl(int src1, MemI src2) { + return ~src1 & src2.x; + } + static long andnq(long src1, MemL src2) { + return ~src1 & src2.x; + } + static int blsil(int src1) { + return src1 & -src1; + } + static long blsiq(long src1) { + return src1 & -src1; + } + static int blsil(MemI src1) { + return src1.x & -src1.x; + } + static long blsiq(MemL src1) { + return src1.x & -src1.x; + } + static int blsmskl(int src1) { + return (src1 - 1) ^ src1; + } + static long blsmskq(long src1) { + return (src1 - 1) ^ src1; + } + static int blsmskl(MemI src1) { + return (src1.x - 1) ^ src1.x; + } + static long blsmskq(MemL src1) { + return (src1.x - 1) ^ src1.x; + } + static int blsrl(int src1) { + return (src1 - 1) & src1; + } + static long blsrq(long src1) { + return (src1 - 1) & src1; + } + static int blsrl(MemI src1) { + return (src1.x - 1) & src1.x; + } + static long blsrq(MemL src1) { + return (src1.x - 1) & src1.x; + } + static int lzcntl(int src1) { + return Integer.numberOfLeadingZeros(src1); + } + static int lzcntq(long src1) { + return Long.numberOfLeadingZeros(src1); + } + static int tzcntl(int src1) { + return Integer.numberOfTrailingZeros(src1); + } + static int tzcntq(long src1) { + return Long.numberOfTrailingZeros(src1); + } +} + +public class BMI1 { + private final static int ITERATIONS = 1000000; + + public static void main(String[] args) { + int ix = 0x01234567; + int iy = 0x89abcdef; + MemI imy = new MemI(iy); + long lx = 0x0123456701234567L; + long ly = 0x89abcdef89abcdefL; + MemL lmy = new MemL(ly); + + { // match(Set dst (AndI (XorI src1 minus_1) src2)) + int z = BMITests.andnl(ix, iy); + for (int i = 0; i < ITERATIONS; i++) { + int ii = BMITests.andnl(ix, iy); + if (ii != z) { + throw new Error("andnl with register failed"); + } + } + } + { // match(Set dst (AndL (XorL src1 minus_1) src2)) + long z = BMITests.andnq(lx, ly); + for (int i = 0; i < ITERATIONS; i++) { + long ll = BMITests.andnq(lx, ly); + if (ll != z) { + throw new Error("andnq with register failed"); + } + } + } + { // match(Set dst (AndI (XorI src1 minus_1) (LoadI src2))) + int z = BMITests.andnl(ix, imy); + for (int i = 0; i < ITERATIONS; i++) { + int ii = BMITests.andnl(ix, imy); + if (ii != z) { + throw new Error("andnl with memory failed"); + } + } + } + { // match(Set dst (AndL (XorL src1 minus_1) (LoadL src2))) + long z = BMITests.andnq(lx, lmy); + for (int i = 0; i < ITERATIONS; i++) { + long ll = BMITests.andnq(lx, lmy); + if (ll != z) { + throw new Error("andnq with memory failed"); + } + } + } + { // match(Set dst (AndI (SubI imm_zero src) src)) + int z = BMITests.blsil(ix); + for (int i = 0; i < ITERATIONS; i++) { + int ii = BMITests.blsil(ix); + if (ii != z) { + throw new Error("blsil with register failed"); + } + } + } + { // match(Set dst (AndL (SubL imm_zero src) src)) + long z = BMITests.blsiq(lx); + for (int i = 0; i < ITERATIONS; i++) { + long ll = BMITests.blsiq(lx); + if (ll != z) { + throw new Error("blsiq with register failed"); + } + } + } + { // match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) )) + int z = BMITests.blsil(imy); + for (int i = 0; i < ITERATIONS; i++) { + int ii = BMITests.blsil(imy); + if (ii != z) { + throw new Error("blsil with memory failed"); + } + } + } + { // match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) )) + long z = BMITests.blsiq(lmy); + for (int i = 0; i < ITERATIONS; i++) { + long ll = BMITests.blsiq(lmy); + if (ll != z) { + throw new Error("blsiq with memory failed"); + } + } + } + + { // match(Set dst (XorI (AddI src minus_1) src)) + int z = BMITests.blsmskl(ix); + for (int i = 0; i < ITERATIONS; i++) { + int ii = BMITests.blsmskl(ix); + if (ii != z) { + throw new Error("blsmskl with register failed"); + } + } + } + { // match(Set dst (XorL (AddL src minus_1) src)) + long z = BMITests.blsmskq(lx); + for (int i = 0; i < ITERATIONS; i++) { + long ll = BMITests.blsmskq(lx); + if (ll != z) { + throw new Error("blsmskq with register failed"); + } + } + } + { // match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) ) + int z = BMITests.blsmskl(imy); + for (int i = 0; i < ITERATIONS; i++) { + int ii = BMITests.blsmskl(imy); + if (ii != z) { + throw new Error("blsmskl with memory failed"); + } + } + } + { // match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) ) + long z = BMITests.blsmskq(lmy); + for (int i = 0; i < ITERATIONS; i++) { + long ll = BMITests.blsmskq(lmy); + if (ll != z) { + throw new Error("blsmskq with memory failed"); + } + } + } + + { // match(Set dst (AndI (AddI src minus_1) src) ) + int z = BMITests.blsrl(ix); + for (int i = 0; i < ITERATIONS; i++) { + int ii = BMITests.blsrl(ix); + if (ii != z) { + throw new Error("blsrl with register failed"); + } + } + } + { // match(Set dst (AndL (AddL src minus_1) src) ) + long z = BMITests.blsrq(lx); + for (int i = 0; i < ITERATIONS; i++) { + long ll = BMITests.blsrq(lx); + if (ll != z) { + throw new Error("blsrq with register failed"); + } + } + } + { // match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) ) + int z = BMITests.blsrl(imy); + for (int i = 0; i < ITERATIONS; i++) { + int ii = BMITests.blsrl(imy); + if (ii != z) { + throw new Error("blsrl with memory failed"); + } + } + } + { // match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) ) + long z = BMITests.blsrq(lmy); + for (int i = 0; i < ITERATIONS; i++) { + long ll = BMITests.blsrq(lmy); + if (ll != z) { + throw new Error("blsrq with memory failed"); + } + } + } + + { + int z = BMITests.lzcntl(ix); + for (int i = 0; i < ITERATIONS; i++) { + int ii = BMITests.lzcntl(ix); + if (ii != z) { + throw new Error("lzcntl failed"); + } + } + } + { + int z = BMITests.lzcntq(lx); + for (int i = 0; i < ITERATIONS; i++) { + int ii = BMITests.lzcntq(lx); + if (ii != z) { + throw new Error("lzcntq failed"); + } + } + } + + { + int z = BMITests.tzcntl(ix); + for (int i = 0; i < ITERATIONS; i++) { + int ii = BMITests.tzcntl(ix); + if (ii != z) { + throw new Error("tzcntl failed"); + } + } + } + { + int z = BMITests.tzcntq(lx); + for (int i = 0; i < ITERATIONS; i++) { + int ii = BMITests.tzcntq(lx); + if (ii != z) { + throw new Error("tzcntq failed"); + } + } + } + } +} diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/intrinsics/bmi/BMITestRunner.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/compiler/intrinsics/bmi/BMITestRunner.java Tue Mar 25 17:07:36 2014 -0700 @@ -0,0 +1,442 @@ +/* + * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +import java.util.*; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.nio.charset.StandardCharsets; + +import com.oracle.java.testlibrary.*; + +/** + * Test runner that invokes all methods implemented by particular Expr + * with random arguments in two different JVM processes and compares output. + * JVMs being started in different modes - one in int and other in comp + * with C2 and disabled tiered compilation. + */ +public class BMITestRunner { + + enum VMMode { + COMP, INT; + }; + + public static int DEFAULT_ITERATIONS_COUNT = 4000; + + /** + * Execute all methods implemented by expr in int and comp modes + * and compare output. + * Test pass only of output obtained with different VM modes is equal. + * To control behaviour of test following options could be passed: + *
    + *
  • -iterations=<N> each operation implemented by + * expr will be executed N times. Default value + * is 4000.
  • + *
  • -seed=<SEED> arguments for expr's methods + * obtained via RNG initiated with seed SEED. By default + * some random seed will be used.
  • + *
+ * + * @param expr operation that should be tested + * @param testOpts options to control test behaviour + * @param additionalVMOpts additional options for VM + * + * @throws Throwable if test failed. + */ + public static void runTests(Class expr, + String testOpts[], + String... additionalVMOpts) + throws Throwable { + + int seed = new Random().nextInt(); + int iterations = DEFAULT_ITERATIONS_COUNT; + + for (String testOption : testOpts) { + if (testOption.startsWith("-iterations=")) { + iterations = Integer.valueOf(testOption. + replace("-iterations=", "")); + } else if (testOption.startsWith("-seed=")) { + seed = Integer.valueOf(testOption.replace("-seed=", "")); + } + } + + System.out.println("Running test with seed: " + seed); + + OutputAnalyzer intOutput = runTest(expr, VMMode.INT, + additionalVMOpts, + seed, iterations); + OutputAnalyzer compOutput = runTest(expr, VMMode.COMP, + additionalVMOpts, + seed, iterations); + + dumpOutput(intOutput, "int"); + dumpOutput(compOutput, "comp"); + + Asserts.assertStringsEqual(intOutput.getStdout(), + compOutput.getStdout(), + "Results obtained in -Xint and " + + "-Xcomp should be the same."); + } + + /** + * Execute tests on methods implemented by expr in new VM + * started in testVMMode mode. + * + * @param expr operation that should be tested + * @param testVMMode VM mode for test + * @param additionalVMOpts additional options for VM + * @param seed for RNG used it tests + * @param iterations that will be used to invoke expr's methods. + * + * @return OutputAnalyzer for executed test. + * @throws Throwable when something goes wrong. + */ + public static OutputAnalyzer runTest(Class expr, + VMMode testVMMode, + String additionalVMOpts[], + int seed, int iterations) + throws Throwable { + + List vmOpts = new LinkedList(); + + Collections.addAll(vmOpts, additionalVMOpts); + + //setup mode-specific options + switch (testVMMode) { + case INT: + Collections.addAll(vmOpts, new String[] { "-Xint" }); + break; + case COMP: + Collections.addAll(vmOpts, new String[] { + "-Xcomp", + "-XX:-TieredCompilation", + String.format("-XX:CompileCommand=compileonly,%s::*", + expr.getName()) + }); + break; + } + + Collections.addAll(vmOpts, new String[] { + "-XX:+DisplayVMOutputToStderr", + Executor.class.getName(), + expr.getName(), + new Integer(seed).toString(), + new Integer(iterations).toString() + }); + + OutputAnalyzer outputAnalyzer = ProcessTools. + executeTestJvm(vmOpts.toArray(new String[vmOpts.size()])); + + outputAnalyzer.shouldHaveExitValue(0); + + return outputAnalyzer; + } + + /** + * Dump stdout and stderr of test process to prefix.test.out + * and prefix.test.err respectively. + * + * @param outputAnalyzer OutputAnalyzer whom output should be dumped + * @param prefix Prefix that will be used in file names. + * @throws IOException if unable to dump output to file. + */ + protected static void dumpOutput(OutputAnalyzer outputAnalyzer, + String prefix) + throws IOException { + Files.write(Paths.get(prefix + ".test.out"), + outputAnalyzer.getStdout().getBytes()); + + Files.write(Paths.get(prefix + ".test.err"), + outputAnalyzer.getStderr().getBytes()); + } + + + /** + * Executor that invoke all methods implemented by particular + * Expr instance. + */ + public static class Executor { + + /** + * Usage: BMITestRunner$Executor + */ + public static void main(String args[]) throws Exception { + @SuppressWarnings("unchecked") + Class exprClass = + (Class)Class.forName(args[0]); + Expr expr = exprClass.getConstructor().newInstance(); + Random rng = new Random(Integer.valueOf(args[1])); + int iterations = Integer.valueOf(args[2]); + runTests(expr, iterations, rng); + } + + + public static int[] getIntBitShifts() { + //SIZE+1 shift is for zero. + int data[] = new int[Integer.SIZE+1]; + for (int s = 0; s < data.length; s++) { + data[s] = 1< 0X%x", + value, expr.intExpr(value)); + } + + for (int i = 0; i < iterations; i++) { + int value = rng.nextInt(); + log("UnaryIntReg(0X%x) -> 0X%x", + value, expr.intExpr(value)); + } + } + + public static void runUnaryIntMemTest(Expr expr, int iterations, + Random rng) { + if (!(expr.isUnaryArgumentSupported() + && expr.isIntExprSupported() + && expr.isMemExprSupported())) { + return; + } + + for (int value : getIntBitShifts()) { + log("UnaryIntMem(0X%x) -> 0X%x", + value, expr.intExpr(new Expr.MemI(value))); + } + + for (int i = 0; i < iterations; i++) { + int value = rng.nextInt(); + log("UnaryIntMem(0X%x) -> 0X%x", + value, expr.intExpr(new Expr.MemI(value))); + } + } + + public static void runUnaryLongRegTest(Expr expr, int iterations, + Random rng) { + if (!(expr.isUnaryArgumentSupported() + && expr.isLongExprSupported())) { + return; + } + + for (long value : getLongBitShifts()) { + log("UnaryLongReg(0X%x) -> 0X%x", + value, expr.longExpr(value)); + } + + for (int i = 0; i < iterations; i++) { + long value = rng.nextLong(); + log("UnaryLongReg(0X%x) -> 0X%x", + value, expr.longExpr(value)); + } + } + + public static void runUnaryLongMemTest(Expr expr, int iterations, + Random rng) { + if (!(expr.isUnaryArgumentSupported() + && expr.isLongExprSupported() + && expr.isMemExprSupported())) { + return; + } + + for (long value : getLongBitShifts()) { + log("UnaryLongMem(0X%x) -> 0X%x", + value, expr.longExpr(new Expr.MemL(value))); + } + + for (int i = 0; i < iterations; i++) { + long value = rng.nextLong(); + log("UnaryLongMem(0X%x) -> 0X%x", + value, expr.longExpr(new Expr.MemL(value))); + } + } + + public static void runBinaryRegRegIntTest(Expr expr, int iterations, + Random rng) { + if (!(expr.isIntExprSupported() + && expr.isBinaryArgumentSupported())) { + return; + } + + for (int i = 0; i < iterations; i++) { + int aValue = rng.nextInt(); + int bValue = rng.nextInt(); + log("BinaryIntRegReg(0X%x, 0X%x) -> 0X%x", + aValue, bValue, expr.intExpr(aValue, bValue)); + } + } + + public static void runBinaryRegMemIntTest(Expr expr, int iterations, + Random rng) { + if (!(expr.isIntExprSupported() + && expr.isBinaryArgumentSupported() + && expr.isMemExprSupported())) { + return; + } + + for (int i = 0; i < iterations; i++) { + int aValue = rng.nextInt(); + int bValue = rng.nextInt(); + log("BinaryIntRegMem(0X%x, 0X%x) -> 0X%x", aValue, bValue, + expr.intExpr(aValue, new Expr.MemI(bValue))); + } + } + + public static void runBinaryMemRegIntTest(Expr expr, int iterations, + Random rng) { + if (!(expr.isIntExprSupported() + && expr.isBinaryArgumentSupported() + && expr.isMemExprSupported())) { + return; + } + + for (int i = 0; i < iterations; i++) { + int aValue = rng.nextInt(); + int bValue = rng.nextInt(); + log("BinaryIntMemReg(0X%x, 0X%x) -> 0X%x", aValue, bValue, + expr.intExpr(new Expr.MemI(aValue), bValue)); + } + } + + public static void runBinaryMemMemIntTest(Expr expr, int iterations, + Random rng) { + if (!(expr.isIntExprSupported() + && expr.isBinaryArgumentSupported() + && expr.isMemExprSupported())) { + return; + } + + for (int i = 0; i < iterations; i++) { + int aValue = rng.nextInt(); + int bValue = rng.nextInt(); + log("BinaryIntMemMem(0X%x, 0X%x) -> 0X%x", aValue, bValue, + expr.intExpr(new Expr.MemI(aValue), + new Expr.MemI(bValue))); + } + } + + public static void runBinaryRegRegLongTest(Expr expr, + int iterations, + Random rng) { + if (!(expr.isLongExprSupported() + && expr.isBinaryArgumentSupported())) { + return; + } + + for (int i = 0; i < iterations; i++) { + long aValue = rng.nextLong(); + long bValue = rng.nextLong(); + log("BinaryLongRegReg(0X%x, 0X%x) -> 0X%x", aValue, bValue, + expr.longExpr(aValue, bValue)); + } + } + + public static void runBinaryRegMemLongTest(Expr expr, + int iterations, + Random rng) { + if (!(expr.isLongExprSupported() + && expr.isBinaryArgumentSupported() + && expr.isMemExprSupported())) { + return; + } + + for (int i = 0; i < iterations; i++) { + long aValue = rng.nextLong(); + long bValue = rng.nextLong(); + log("BinaryLongRegMem(0X%x, 0X%x) -> 0X%x", aValue, bValue, + expr.longExpr(aValue, new Expr.MemL(bValue))); + } + } + + public static void runBinaryMemRegLongTest(Expr expr, + int iterations, + Random rng) { + if (!(expr.isLongExprSupported() + && expr.isBinaryArgumentSupported() + && expr.isMemExprSupported())) { + return; + } + + for (int i = 0; i < iterations; i++) { + long aValue = rng.nextLong(); + long bValue = rng.nextLong(); + log("BinaryLongMemReg(0X%x, 0X%x) -> 0X%x", aValue, bValue, + expr.longExpr(new Expr.MemL(aValue), bValue)); + } + } + + public static void runBinaryMemMemLongTest(Expr expr, + int iterations, + Random rng) { + if (!(expr.isLongExprSupported() + && expr.isBinaryArgumentSupported() + && expr.isMemExprSupported())) { + return; + } + + for (int i = 0; i < iterations; i++) { + long aValue = rng.nextLong(); + long bValue = rng.nextLong(); + log("BinaryLongMemMem(0X%x, 0X%x) -> 0X%x", aValue, bValue, + expr.longExpr(new Expr.MemL(aValue), + new Expr.MemL(bValue))); + } + } + } +} diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/intrinsics/bmi/Expr.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/compiler/intrinsics/bmi/Expr.java Tue Mar 25 17:07:36 2014 -0700 @@ -0,0 +1,178 @@ +/* + * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/** + * Expression that should be replaced by particular instrinsic + * or intruction during compilation. + */ + +public abstract class Expr { + + public static class MemI { + public MemI(int i) { + this.value = i; + } + + public int value; + } + + public static class MemL { + public MemL(long l) { + this.value = l; + } + + public long value; + } + + public boolean isUnaryArgumentSupported() { + return false; + } + + public boolean isIntExprSupported() { + return false; + } + + public boolean isBinaryArgumentSupported() { + return false; + } + + public boolean isLongExprSupported() { + return false; + } + + public boolean isMemExprSupported() { + return false; + } + + public int intExpr(int reg) { + throw new UnsupportedOperationException(); + } + + public int intExpr(MemI mem) { + throw new UnsupportedOperationException(); + } + + public int intExpr(int a, int b) { + throw new UnsupportedOperationException(); + } + + public int intExpr(int a, MemI b) { + throw new UnsupportedOperationException(); + } + + public int intExpr(MemI a, int b) { + throw new UnsupportedOperationException(); + } + + public int intExpr(MemI a, MemI b) { + throw new UnsupportedOperationException(); + } + + public long longExpr(long reg) { + throw new UnsupportedOperationException(); + } + + public long longExpr(MemL mem) { + throw new UnsupportedOperationException(); + } + + public long longExpr(long a, long b) { + throw new UnsupportedOperationException(); + } + + public long longExpr(long a, MemL b) { + throw new UnsupportedOperationException(); + } + + public long longExpr(MemL a, long b) { + throw new UnsupportedOperationException(); + } + + public long longExpr(MemL a, MemL b) { + throw new UnsupportedOperationException(); + } + + public static class BMIExpr extends Expr { + + public boolean isMemExprSupported() { + return true; + } + } + + public static class BMIBinaryExpr extends BMIExpr { + + public boolean isBinaryArgumentSupported() { + return true; + } + + } + + public static class BMIUnaryExpr extends BMIExpr { + public boolean isUnaryArgumentSupported() { + return true; + } + } + + public static class BMIBinaryIntExpr extends BMIBinaryExpr { + public boolean isIntExprSupported() { + return true; + } + } + + public static class BMIBinaryLongExpr extends BMIBinaryExpr { + public boolean isLongExprSupported() { + return true; + } + } + + public static class BMIUnaryIntExpr extends BMIUnaryExpr { + public boolean isIntExprSupported() { + return true; + } + } + + public static class BMIUnaryLongExpr extends BMIUnaryExpr { + public boolean isLongExprSupported() { + return true; + } + } + + public static class BitCountingExpr extends Expr { + public boolean isUnaryArgumentSupported() { + return true; + } + } + + public static class BitCountingIntExpr extends BitCountingExpr { + public boolean isIntExprSupported() { + return true; + } + } + + public static class BitCountingLongExpr extends BitCountingExpr { + public boolean isLongExprSupported() { + return true; + } + } +} diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/intrinsics/bmi/TestAndnI.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/compiler/intrinsics/bmi/TestAndnI.java Tue Mar 25 17:07:36 2014 -0700 @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/** + * @test + * @bug 8031321 + * @summary Verify that results of computations are the same w/ + * and w/o usage of ANDN instruction + * @library /testlibrary /testlibrary/whitebox + * @build TestAndnI BMITestRunner Expr + * @run main ClassFileInstaller sun.hotspot.WhiteBox + * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions + * -XX:+WhiteBoxAPI TestAndnI + */ + +import sun.hotspot.cpuinfo.CPUInfo; + +public class TestAndnI { + + public static void main(String args[]) throws Throwable { + if (!CPUInfo.hasFeature("bmi1")) { + System.out.println("CPU does not support bmi1 feature. "+ + "Test skipped."); + return; + } + + BMITestRunner.runTests(AndnIExpr.class, args, + "-XX:+UseBMI1Instructions"); + BMITestRunner.runTests(AndnICommutativeExpr.class, args, + "-XX:+UseBMI1Instructions"); + } + + public static class AndnIExpr extends Expr.BMIBinaryIntExpr { + + public int intExpr(int src1, int src2) { + return ~src1 & src2; + } + + public int intExpr(int src1, Expr.MemI src2) { + return ~src1 & src2.value; + } + + public int intExpr(Expr.MemI src1, int src2) { + return ~src1.value & src2; + } + + public int intExpr(Expr.MemI src1, Expr.MemI src2) { + return ~src1.value & src2.value; + } + } + + public static class AndnICommutativeExpr extends Expr.BMIBinaryIntExpr { + + public int intExpr(int src1, int src2) { + return src1 & ~src2; + } + + public int intExpr(int src1, Expr.MemI src2) { + return src1 & ~src2.value; + } + + public int intExpr(Expr.MemI src1, int src2) { + return src1.value & ~src2; + } + + public int intExpr(Expr.MemI src1, Expr.MemI src2) { + return src1.value & ~src2.value; + } + } +} diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/intrinsics/bmi/TestAndnL.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/compiler/intrinsics/bmi/TestAndnL.java Tue Mar 25 17:07:36 2014 -0700 @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/** + * @test + * @bug 8031321 + * @summary Verify that results of computations are the same w/ + * and w/o usage of ANDN instruction + * @library /testlibrary /testlibrary/whitebox + * @build TestAndnL BMITestRunner Expr + * @run main ClassFileInstaller sun.hotspot.WhiteBox + * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions + * -XX:+WhiteBoxAPI TestAndnL + */ + +import sun.hotspot.cpuinfo.CPUInfo; + +public class TestAndnL { + + public static void main(String args[]) throws Throwable { + if (!CPUInfo.hasFeature("bmi1")) { + System.out.println("CPU does not support bmi1 feature. " + + "Test skipped."); + return; + } + + BMITestRunner.runTests(AndnLExpr.class, args, + "-XX:+UseBMI1Instructions"); + BMITestRunner.runTests(AndnLCommutativeExpr.class, args, + "-XX:+UseBMI1Instructions"); + } + + public static class AndnLExpr extends Expr.BMIBinaryLongExpr { + + public long longExpr(long src1, long src2) { + return ~src1 & src2; + } + + public long longExpr(long src1, Expr.MemL src2) { + return ~src1 & src2.value; + } + + public long longExpr(Expr.MemL src1, long src2) { + return ~src1.value & src2; + } + + public long longExpr(Expr.MemL src1, Expr.MemL src2) { + return ~src1.value & src2.value; + } + + + } + + public static class AndnLCommutativeExpr extends Expr.BMIBinaryLongExpr { + + public long longExpr(long src1, long src2) { + return src1 & ~src2; + } + + public long longExpr(long src1, Expr.MemL src2) { + return src1 & ~src2.value; + } + + public long longExpr(Expr.MemL src1, long src2) { + return src1.value & ~src2; + } + + public long longExpr(Expr.MemL src1, Expr.MemL src2) { + return src1.value & ~src2.value; + } + + } + +} diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/intrinsics/bmi/TestBlsiI.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/compiler/intrinsics/bmi/TestBlsiI.java Tue Mar 25 17:07:36 2014 -0700 @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/** + * @test + * @bug 8031321 + * @summary Verify that results of computations are the same w/ + * and w/o usage of BLSI instruction + * @library /testlibrary /testlibrary/whitebox + * @build TestBlsiI BMITestRunner Expr + * @run main ClassFileInstaller sun.hotspot.WhiteBox + * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions + * -XX:+WhiteBoxAPI TestBlsiI + */ + +import sun.hotspot.cpuinfo.CPUInfo; + +public class TestBlsiI { + + public static void main(String args[]) throws Throwable { + if (!CPUInfo.hasFeature("bmi1")) { + System.out.println("CPU does not support bmi1 feature. " + + "Test skipped."); + return; + } + + BMITestRunner.runTests(BlsiIExpr.class, args, + "-XX:+UseBMI1Instructions"); + BMITestRunner.runTests(BlsiICommutativeExpr.class, args, + "-XX:+UseBMI1Instructions"); + } + + public static class BlsiIExpr extends Expr.BMIUnaryIntExpr { + + public int intExpr(int src) { + return -src & src; + } + + public int intExpr(Expr.MemI src) { + return -src.value & src.value; + } + + } + + public static class BlsiICommutativeExpr extends Expr.BMIUnaryIntExpr { + + public int intExpr(int src) { + return src & -src; + } + + public int intExpr(Expr.MemI src) { + return src.value & -src.value; + } + + } + +} diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/intrinsics/bmi/TestBlsiL.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/compiler/intrinsics/bmi/TestBlsiL.java Tue Mar 25 17:07:36 2014 -0700 @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/** + * @test + * @bug 8031321 + * @summary Verify that results of computations are the same w/ + * and w/o usage of BLSI instruction + * @library /testlibrary /testlibrary/whitebox + * @build TestBlsiL BMITestRunner Expr + * @run main ClassFileInstaller sun.hotspot.WhiteBox + * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions + * -XX:+WhiteBoxAPI TestBlsiL + */ + +import sun.hotspot.cpuinfo.CPUInfo; + +public class TestBlsiL { + + public static void main(String args[]) throws Throwable { + if (!CPUInfo.hasFeature("bmi1")) { + System.out.println("CPU does not support bmi1 feature. " + + "Test skipped."); + return; + } + + BMITestRunner.runTests(BlsiLExpr.class, args, + "-XX:+UseBMI1Instructions"); + BMITestRunner.runTests(BlsiLCommutativeExpr.class, args, + "-XX:+UseBMI1Instructions"); + } + + public static class BlsiLExpr extends Expr.BMIUnaryLongExpr { + + public long longExpr(long src) { + return -src & src; + } + + public long longExpr(Expr.MemL src) { + return -src.value & src.value; + } + + } + + public static class BlsiLCommutativeExpr extends Expr.BMIUnaryLongExpr { + + public long longExpr(long src) { + return src & -src; + } + + public long longExpr(Expr.MemL src) { + return src.value & -src.value; + } + + } + +} diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/intrinsics/bmi/TestBlsmskI.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/compiler/intrinsics/bmi/TestBlsmskI.java Tue Mar 25 17:07:36 2014 -0700 @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/** + * @test + * @bug 8031321 + * @summary Verify that results of computations are the same w/ + * and w/o usage of BLSMSK instruction + * @library /testlibrary /testlibrary/whitebox + * @build TestBlsmskI BMITestRunner Expr + * @run main ClassFileInstaller sun.hotspot.WhiteBox + * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions + * -XX:+WhiteBoxAPI TestBlsmskI + */ + +import sun.hotspot.cpuinfo.CPUInfo; + +public class TestBlsmskI { + + public static void main(String args[]) throws Throwable { + if (!CPUInfo.hasFeature("bmi1")) { + System.out.println("CPU does not support bmi1 feature. " + + "Test skipped."); + return; + } + + BMITestRunner.runTests(BlsmskIExpr.class, args, + "-XX:+UseBMI1Instructions"); + BMITestRunner.runTests(BlsmskICommutativeExpr.class, args, + "-XX:+UseBMI1Instructions"); + } + + public static class BlsmskIExpr extends Expr.BMIUnaryIntExpr { + + public int intExpr(int src) { + return (src - 1) ^ src; + } + + public int intExpr(Expr.MemI src) { + return (src.value - 1) ^ src.value; + } + + } + + public static class BlsmskICommutativeExpr extends Expr.BMIUnaryIntExpr { + + public int intExpr(int src) { + return src ^ (src - 1); + } + + public int intExpr(Expr.MemI src) { + return src.value ^ (src.value - 1); + } + + } + +} diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/intrinsics/bmi/TestBlsmskL.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/compiler/intrinsics/bmi/TestBlsmskL.java Tue Mar 25 17:07:36 2014 -0700 @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/** + * @test + * @bug 8031321 + * @summary Verify that results of computations are the same w/ + * and w/o usage of BLSMSK instruction + * @library /testlibrary /testlibrary/whitebox + * @build TestBlsmskL BMITestRunner Expr + * @run main ClassFileInstaller sun.hotspot.WhiteBox + * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions + * -XX:+WhiteBoxAPI TestBlsmskL + */ + +import sun.hotspot.cpuinfo.CPUInfo; + +public class TestBlsmskL { + + public static void main(String args[]) throws Throwable { + if (!CPUInfo.hasFeature("bmi1")) { + System.out.println("CPU does not support bmi1 feature. " + + "Test skipped."); + return; + } + + BMITestRunner.runTests(BlsmskLExpr.class, args, + "-XX:+UseBMI1Instructions"); + BMITestRunner.runTests(BlsmskLCommutativeExpr.class, args, + "-XX:+UseBMI1Instructions"); + } + + public static class BlsmskLExpr + extends Expr.BMIUnaryLongExpr { + + public long longExpr(long src) { + return (src - 1) ^ src; + } + + public long longExpr(Expr.MemL src) { + return (src.value - 1) ^ src.value; + } + + } + + public static class BlsmskLCommutativeExpr + extends Expr.BMIUnaryLongExpr { + + public long longExpr(long src) { + return src ^ (src - 1); + } + + public long longExpr(Expr.MemL src) { + return src.value ^ (src.value - 1); + } + + } + +} diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/intrinsics/bmi/TestBlsrI.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/compiler/intrinsics/bmi/TestBlsrI.java Tue Mar 25 17:07:36 2014 -0700 @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/** + * @test + * @bug 8031321 + * @summary Verify that results of computations are the same w/ + * and w/o usage of BLSR instruction + * @library /testlibrary /testlibrary/whitebox + * @build TestBlsrI BMITestRunner Expr + * @run main ClassFileInstaller sun.hotspot.WhiteBox + * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions + * -XX:+WhiteBoxAPI TestBlsrI + */ + +import sun.hotspot.cpuinfo.CPUInfo; + +public class TestBlsrI { + + public static void main(String args[]) throws Throwable { + if (!CPUInfo.hasFeature("bmi1")) { + System.out.println("CPU does not support bmi1 feature. " + + "Test skipped."); + return; + } + + BMITestRunner.runTests(BlsrIExpr.class, args, + "-XX:+UseBMI1Instructions"); + BMITestRunner.runTests(BlsrICommutativeExpr.class, args, + "-XX:+UseBMI1Instructions"); + } + + public static class BlsrIExpr extends Expr.BMIUnaryIntExpr { + + public int intExpr(int src) { + return (src - 1) & src; + } + + public int intExpr(Expr.MemI src) { + return (src.value - 1) & src.value; + } + + } + + public static class BlsrICommutativeExpr extends Expr.BMIUnaryIntExpr { + + public int intExpr(int src) { + return src & (src - 1); + } + + public int intExpr(Expr.MemI src) { + return src.value & (src.value - 1); + } + + } + +} diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/intrinsics/bmi/TestBlsrL.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/compiler/intrinsics/bmi/TestBlsrL.java Tue Mar 25 17:07:36 2014 -0700 @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/** + * @test + * @bug 8031321 + * @summary Verify that results of computations are the same w/ + * and w/o usage of BLSR instruction + * @library /testlibrary /testlibrary/whitebox + * @build TestBlsrL BMITestRunner Expr + * @run main ClassFileInstaller sun.hotspot.WhiteBox + * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions + * -XX:+WhiteBoxAPI TestBlsrL + */ + +import sun.hotspot.cpuinfo.CPUInfo; + +public class TestBlsrL { + + public static void main(String args[]) throws Throwable { + if (!CPUInfo.hasFeature("bmi1")) { + System.out.println("CPU does not support bmi1 feature. " + + "Test skipped."); + return; + } + + BMITestRunner.runTests(BlsrLExpr.class, args, + "-XX:+UseBMI1Instructions"); + BMITestRunner.runTests(BlsrLCommutativeExpr.class, args, + "-XX:+UseBMI1Instructions"); + } + + public static class BlsrLExpr extends Expr.BMIUnaryLongExpr { + + public long longExpr(long src) { + return (src - 1) & src; + } + + public long longExpr(Expr.MemL src) { + return (src.value - 1) & src.value; + } + + } + + public static class BlsrLCommutativeExpr extends Expr.BMIUnaryLongExpr { + + public long longExpr(long src) { + return src & (src - 1); + } + + public long longExpr(Expr.MemL src) { + return src.value & (src.value - 1); + } + + } + +} diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/intrinsics/bmi/TestLzcntI.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/compiler/intrinsics/bmi/TestLzcntI.java Tue Mar 25 17:07:36 2014 -0700 @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/** + * @test + * @bug 8031321 + * @summary Verify that results of computations are the same w/ + * and w/o usage of intrinsic + * @library /testlibrary /testlibrary/whitebox + * @build TestLzcntI BMITestRunner Expr + * @run main ClassFileInstaller sun.hotspot.WhiteBox + * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions + * -XX:+WhiteBoxAPI TestLzcntI + */ + +import sun.hotspot.cpuinfo.CPUInfo; + +public class TestLzcntI { + + public static void main(String args[]) throws Throwable { + if (!CPUInfo.hasFeature("lzcnt")) { + System.out.println("CPU does not support lzcnt feature. " + + "Test skipped."); + return; + } + + BMITestRunner.runTests(LzcntIExpr.class, args, + "-XX:+UseCountLeadingZerosInstruction"); + } + + public static class LzcntIExpr extends Expr.BitCountingIntExpr { + + public int intExpr(int src) { + return Integer.numberOfLeadingZeros(src); + } + + } + +} diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/intrinsics/bmi/TestLzcntL.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/compiler/intrinsics/bmi/TestLzcntL.java Tue Mar 25 17:07:36 2014 -0700 @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/** + * @test + * @bug 8031321 + * @summary Verify that results of computations are the same w/ + * and w/o usage of intrinsic + * @library /testlibrary /testlibrary/whitebox + * @build TestLzcntL BMITestRunner Expr + * @run main ClassFileInstaller sun.hotspot.WhiteBox + * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions + * -XX:+WhiteBoxAPI TestLzcntL + */ + +import sun.hotspot.cpuinfo.CPUInfo; + +public class TestLzcntL { + + public static void main(String args[]) throws Throwable { + if (!CPUInfo.hasFeature("lzcnt")) { + System.out.println("CPU does not support lzcnt feature. " + + "Test skipped."); + return; + } + + BMITestRunner.runTests(LzcntLExpr.class, args, + "-XX:+UseCountLeadingZerosInstruction"); + } + + public static class LzcntLExpr extends Expr.BitCountingLongExpr { + + public long longExpr(long src) { + return Long.numberOfLeadingZeros(src); + } + + } + +} diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/intrinsics/bmi/TestTzcntI.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/compiler/intrinsics/bmi/TestTzcntI.java Tue Mar 25 17:07:36 2014 -0700 @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/** + * @test + * @bug 8031321 + * @summary Verify that results of computations are the same w/ + * and w/o usage of intrinsic + * @library /testlibrary /testlibrary/whitebox + * @build TestTzcntI BMITestRunner Expr + * @run main ClassFileInstaller sun.hotspot.WhiteBox + * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions + * -XX:+WhiteBoxAPI TestTzcntI + */ + +import sun.hotspot.cpuinfo.CPUInfo; + +public class TestTzcntI { + + public static void main(String args[]) throws Throwable { + if (!CPUInfo.hasFeature("bmi1")) { + System.out.println("CPU does not support bmi1 feature. " + + "Test skipped."); + return; + } + + BMITestRunner.runTests(TzcntIExpr.class, args, + "-XX:+UseCountTrailingZerosInstruction"); + } + + public static class TzcntIExpr extends Expr.BitCountingIntExpr { + + public int intExpr(int src) { + return Integer.numberOfTrailingZeros(src); + } + + } + +} diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/intrinsics/bmi/TestTzcntL.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/compiler/intrinsics/bmi/TestTzcntL.java Tue Mar 25 17:07:36 2014 -0700 @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/** + * @test + * @bug 8031321 + * @summary Verify that results of computations are the same w/ + * and w/o usage of intrinsic + * @library /testlibrary /testlibrary/whitebox + * @build TestTzcntL BMITestRunner Expr + * @run main ClassFileInstaller sun.hotspot.WhiteBox + * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions + * -XX:+WhiteBoxAPI TestTzcntL + */ + +import sun.hotspot.cpuinfo.CPUInfo; + +public class TestTzcntL { + + public static void main(String args[]) throws Throwable { + if (!CPUInfo.hasFeature("bmi1")) { + System.out.println("CPU does not support bmi1 feature. " + + "Test skipped."); + return; + } + + BMITestRunner.runTests(TzcntLExpr.class, args, + "-XX:+UseCountTrailingZerosInstruction"); + } + + public static class TzcntLExpr extends Expr.BitCountingLongExpr { + + public long longExpr(long src) { + return Long.numberOfTrailingZeros(src); + } + + } + +} diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/intrinsics/mathexact/AddExactICondTest.java --- a/test/compiler/intrinsics/mathexact/AddExactICondTest.java Tue Mar 25 12:54:21 2014 -0700 +++ b/test/compiler/intrinsics/mathexact/AddExactICondTest.java Tue Mar 25 17:07:36 2014 -0700 @@ -26,7 +26,7 @@ * @bug 8024924 * @summary Test non constant addExact * @compile AddExactICondTest.java - * @run main AddExactICondTest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics + * @run main AddExactICondTest * */ diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/intrinsics/mathexact/AddExactIConstantTest.java --- a/test/compiler/intrinsics/mathexact/AddExactIConstantTest.java Tue Mar 25 12:54:21 2014 -0700 +++ b/test/compiler/intrinsics/mathexact/AddExactIConstantTest.java Tue Mar 25 17:07:36 2014 -0700 @@ -26,7 +26,7 @@ * @bug 8024924 * @summary Test constant addExact * @compile AddExactIConstantTest.java Verify.java - * @run main AddExactIConstantTest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics + * @run main AddExactIConstantTest * */ diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/intrinsics/mathexact/AddExactILoadTest.java --- a/test/compiler/intrinsics/mathexact/AddExactILoadTest.java Tue Mar 25 12:54:21 2014 -0700 +++ b/test/compiler/intrinsics/mathexact/AddExactILoadTest.java Tue Mar 25 17:07:36 2014 -0700 @@ -26,7 +26,7 @@ * @bug 8024924 * @summary Test non constant addExact * @compile AddExactILoadTest.java Verify.java - * @run main AddExactILoadTest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics + * @run main AddExactILoadTest * */ diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/intrinsics/mathexact/AddExactILoopDependentTest.java --- a/test/compiler/intrinsics/mathexact/AddExactILoopDependentTest.java Tue Mar 25 12:54:21 2014 -0700 +++ b/test/compiler/intrinsics/mathexact/AddExactILoopDependentTest.java Tue Mar 25 17:07:36 2014 -0700 @@ -26,7 +26,7 @@ * @bug 8024924 * @summary Test non constant addExact * @compile AddExactILoopDependentTest.java Verify.java - * @run main AddExactILoopDependentTest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics + * @run main AddExactILoopDependentTest * */ diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/intrinsics/mathexact/AddExactINonConstantTest.java --- a/test/compiler/intrinsics/mathexact/AddExactINonConstantTest.java Tue Mar 25 12:54:21 2014 -0700 +++ b/test/compiler/intrinsics/mathexact/AddExactINonConstantTest.java Tue Mar 25 17:07:36 2014 -0700 @@ -26,7 +26,7 @@ * @bug 8024924 * @summary Test non constant addExact * @compile AddExactINonConstantTest.java Verify.java - * @run main AddExactINonConstantTest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics + * @run main AddExactINonConstantTest * */ diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/intrinsics/mathexact/AddExactIRepeatTest.java --- a/test/compiler/intrinsics/mathexact/AddExactIRepeatTest.java Tue Mar 25 12:54:21 2014 -0700 +++ b/test/compiler/intrinsics/mathexact/AddExactIRepeatTest.java Tue Mar 25 17:07:36 2014 -0700 @@ -26,7 +26,7 @@ * @bug 8025657 * @summary Test repeating addExact * @compile AddExactIRepeatTest.java Verify.java - * @run main AddExactIRepeatTest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics + * @run main AddExactIRepeatTest * */ diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/intrinsics/mathexact/AddExactLConstantTest.java --- a/test/compiler/intrinsics/mathexact/AddExactLConstantTest.java Tue Mar 25 12:54:21 2014 -0700 +++ b/test/compiler/intrinsics/mathexact/AddExactLConstantTest.java Tue Mar 25 17:07:36 2014 -0700 @@ -26,7 +26,7 @@ * @bug 8026844 * @summary Test constant addExact * @compile AddExactLConstantTest.java Verify.java - * @run main AddExactLConstantTest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics + * @run main AddExactLConstantTest * */ diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/intrinsics/mathexact/AddExactLNonConstantTest.java --- a/test/compiler/intrinsics/mathexact/AddExactLNonConstantTest.java Tue Mar 25 12:54:21 2014 -0700 +++ b/test/compiler/intrinsics/mathexact/AddExactLNonConstantTest.java Tue Mar 25 17:07:36 2014 -0700 @@ -26,7 +26,7 @@ * @bug 8026844 * @summary Test non constant addExact * @compile AddExactLNonConstantTest.java Verify.java - * @run main AddExactLNonConstantTest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics + * @run main AddExactLNonConstantTest * */ diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/intrinsics/mathexact/CompareTest.java --- a/test/compiler/intrinsics/mathexact/CompareTest.java Tue Mar 25 12:54:21 2014 -0700 +++ b/test/compiler/intrinsics/mathexact/CompareTest.java Tue Mar 25 17:07:36 2014 -0700 @@ -26,7 +26,7 @@ * @bug 8026722 * @summary Verify that the compare after addExact is a signed compare * @compile CompareTest.java - * @run main CompareTest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics + * @run main CompareTest * */ diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/intrinsics/mathexact/DecExactITest.java --- a/test/compiler/intrinsics/mathexact/DecExactITest.java Tue Mar 25 12:54:21 2014 -0700 +++ b/test/compiler/intrinsics/mathexact/DecExactITest.java Tue Mar 25 17:07:36 2014 -0700 @@ -26,7 +26,7 @@ * @bug 8026844 * @summary Test decrementExact * @compile DecExactITest.java Verify.java - * @run main DecExactITest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics + * @run main DecExactITest * */ diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/intrinsics/mathexact/DecExactLTest.java --- a/test/compiler/intrinsics/mathexact/DecExactLTest.java Tue Mar 25 12:54:21 2014 -0700 +++ b/test/compiler/intrinsics/mathexact/DecExactLTest.java Tue Mar 25 17:07:36 2014 -0700 @@ -26,7 +26,7 @@ * @bug 8026844 * @summary Test decrementExact * @compile DecExactLTest.java Verify.java - * @run main DecExactLTest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics + * @run main DecExactLTest * */ diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/intrinsics/mathexact/GVNTest.java --- a/test/compiler/intrinsics/mathexact/GVNTest.java Tue Mar 25 12:54:21 2014 -0700 +++ b/test/compiler/intrinsics/mathexact/GVNTest.java Tue Mar 25 17:07:36 2014 -0700 @@ -26,7 +26,7 @@ * @bug 8028207 * @summary Verify that GVN doesn't mess up the two addExacts * @compile GVNTest.java - * @run main GVNTest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics + * @run main GVNTest * */ diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/intrinsics/mathexact/IncExactITest.java --- a/test/compiler/intrinsics/mathexact/IncExactITest.java Tue Mar 25 12:54:21 2014 -0700 +++ b/test/compiler/intrinsics/mathexact/IncExactITest.java Tue Mar 25 17:07:36 2014 -0700 @@ -26,7 +26,7 @@ * @bug 8026844 * @summary Test incrementExact * @compile IncExactITest.java Verify.java - * @run main IncExactITest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics + * @run main IncExactITest * */ diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/intrinsics/mathexact/IncExactLTest.java --- a/test/compiler/intrinsics/mathexact/IncExactLTest.java Tue Mar 25 12:54:21 2014 -0700 +++ b/test/compiler/intrinsics/mathexact/IncExactLTest.java Tue Mar 25 17:07:36 2014 -0700 @@ -26,7 +26,7 @@ * @bug 8026844 * @summary Test incrementExact * @compile IncExactLTest.java Verify.java - * @run main IncExactLTest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics + * @run main IncExactLTest * */ diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/intrinsics/mathexact/MulExactICondTest.java --- a/test/compiler/intrinsics/mathexact/MulExactICondTest.java Tue Mar 25 12:54:21 2014 -0700 +++ b/test/compiler/intrinsics/mathexact/MulExactICondTest.java Tue Mar 25 17:07:36 2014 -0700 @@ -26,7 +26,7 @@ * @bug 8026844 * @summary Test multiplyExact as condition * @compile MulExactICondTest.java - * @run main MulExactICondTest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics + * @run main MulExactICondTest * */ diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/intrinsics/mathexact/MulExactIConstantTest.java --- a/test/compiler/intrinsics/mathexact/MulExactIConstantTest.java Tue Mar 25 12:54:21 2014 -0700 +++ b/test/compiler/intrinsics/mathexact/MulExactIConstantTest.java Tue Mar 25 17:07:36 2014 -0700 @@ -26,7 +26,7 @@ * @bug 8026844 * @summary Test constant multiplyExact * @compile MulExactIConstantTest.java Verify.java - * @run main MulExactIConstantTest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics + * @run main MulExactIConstantTest * */ diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/intrinsics/mathexact/MulExactILoadTest.java --- a/test/compiler/intrinsics/mathexact/MulExactILoadTest.java Tue Mar 25 12:54:21 2014 -0700 +++ b/test/compiler/intrinsics/mathexact/MulExactILoadTest.java Tue Mar 25 17:07:36 2014 -0700 @@ -26,7 +26,7 @@ * @bug 8026844 * @summary Test multiplyExact * @compile MulExactILoadTest.java Verify.java - * @run main MulExactILoadTest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics + * @run main MulExactILoadTest * */ diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/intrinsics/mathexact/MulExactILoopDependentTest.java --- a/test/compiler/intrinsics/mathexact/MulExactILoopDependentTest.java Tue Mar 25 12:54:21 2014 -0700 +++ b/test/compiler/intrinsics/mathexact/MulExactILoopDependentTest.java Tue Mar 25 17:07:36 2014 -0700 @@ -26,7 +26,7 @@ * @bug 8026844 * @summary Test loop dependent multiplyExact * @compile MulExactILoopDependentTest.java Verify.java - * @run main MulExactILoopDependentTest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics + * @run main MulExactILoopDependentTest * */ public class MulExactILoopDependentTest { diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/intrinsics/mathexact/MulExactINonConstantTest.java --- a/test/compiler/intrinsics/mathexact/MulExactINonConstantTest.java Tue Mar 25 12:54:21 2014 -0700 +++ b/test/compiler/intrinsics/mathexact/MulExactINonConstantTest.java Tue Mar 25 17:07:36 2014 -0700 @@ -26,7 +26,7 @@ * @bug 8026844 * @summary Test non constant multiplyExact * @compile MulExactINonConstantTest.java Verify.java - * @run main MulExactINonConstantTest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics + * @run main MulExactINonConstantTest * */ diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/intrinsics/mathexact/MulExactIRepeatTest.java --- a/test/compiler/intrinsics/mathexact/MulExactIRepeatTest.java Tue Mar 25 12:54:21 2014 -0700 +++ b/test/compiler/intrinsics/mathexact/MulExactIRepeatTest.java Tue Mar 25 17:07:36 2014 -0700 @@ -26,7 +26,7 @@ * @bug 8026844 * @summary Test repeating multiplyExact * @compile MulExactIRepeatTest.java Verify.java - * @run main MulExactIRepeatTest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics + * @run main MulExactIRepeatTest * */ diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/intrinsics/mathexact/MulExactLConstantTest.java --- a/test/compiler/intrinsics/mathexact/MulExactLConstantTest.java Tue Mar 25 12:54:21 2014 -0700 +++ b/test/compiler/intrinsics/mathexact/MulExactLConstantTest.java Tue Mar 25 17:07:36 2014 -0700 @@ -26,7 +26,7 @@ * @bug 8026844 * @summary Test constant mulExact * @compile MulExactLConstantTest.java Verify.java - * @run main MulExactLConstantTest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics + * @run main MulExactLConstantTest * */ diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/intrinsics/mathexact/MulExactLNonConstantTest.java --- a/test/compiler/intrinsics/mathexact/MulExactLNonConstantTest.java Tue Mar 25 12:54:21 2014 -0700 +++ b/test/compiler/intrinsics/mathexact/MulExactLNonConstantTest.java Tue Mar 25 17:07:36 2014 -0700 @@ -26,7 +26,7 @@ * @bug 8026844 * @summary Test non constant mulExact * @compile MulExactLNonConstantTest.java Verify.java - * @run main MulExactLNonConstantTest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics + * @run main MulExactLNonConstantTest * */ diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/intrinsics/mathexact/NegExactIConstantTest.java --- a/test/compiler/intrinsics/mathexact/NegExactIConstantTest.java Tue Mar 25 12:54:21 2014 -0700 +++ b/test/compiler/intrinsics/mathexact/NegExactIConstantTest.java Tue Mar 25 17:07:36 2014 -0700 @@ -26,7 +26,7 @@ * @bug 8026844 * @summary Test constant negExact * @compile NegExactIConstantTest.java Verify.java - * @run main NegExactIConstantTest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics + * @run main NegExactIConstantTest * */ diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/intrinsics/mathexact/NegExactILoadTest.java --- a/test/compiler/intrinsics/mathexact/NegExactILoadTest.java Tue Mar 25 12:54:21 2014 -0700 +++ b/test/compiler/intrinsics/mathexact/NegExactILoadTest.java Tue Mar 25 17:07:36 2014 -0700 @@ -26,14 +26,14 @@ * @bug 8026844 * @summary Test negExact * @compile NegExactILoadTest.java Verify.java - * @run main NegExactILoadTest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics + * @run main NegExactILoadTest * */ public class NegExactILoadTest { public static void main(String[] args) { - Verify.LoadTest.init(); - Verify.LoadTest.verify(new Verify.UnaryToBinary(new Verify.NegExactI())); + Verify.LoadTest.init(); + Verify.LoadTest.verify(new Verify.UnaryToBinary(new Verify.NegExactI())); } } diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/intrinsics/mathexact/NegExactILoopDependentTest.java --- a/test/compiler/intrinsics/mathexact/NegExactILoopDependentTest.java Tue Mar 25 12:54:21 2014 -0700 +++ b/test/compiler/intrinsics/mathexact/NegExactILoopDependentTest.java Tue Mar 25 17:07:36 2014 -0700 @@ -26,7 +26,7 @@ * @bug 8026844 * @summary Test negExact loop dependent * @compile NegExactILoopDependentTest.java Verify.java - * @run main NegExactILoopDependentTest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics + * @run main NegExactILoopDependentTest * */ public class NegExactILoopDependentTest { diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/intrinsics/mathexact/NegExactINonConstantTest.java --- a/test/compiler/intrinsics/mathexact/NegExactINonConstantTest.java Tue Mar 25 12:54:21 2014 -0700 +++ b/test/compiler/intrinsics/mathexact/NegExactINonConstantTest.java Tue Mar 25 17:07:36 2014 -0700 @@ -26,7 +26,7 @@ * @bug 8026844 * @summary Test non constant negExact * @compile NegExactINonConstantTest.java Verify.java - * @run main NegExactINonConstantTest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics + * @run main NegExactINonConstantTest * */ diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/intrinsics/mathexact/NegExactLConstantTest.java --- a/test/compiler/intrinsics/mathexact/NegExactLConstantTest.java Tue Mar 25 12:54:21 2014 -0700 +++ b/test/compiler/intrinsics/mathexact/NegExactLConstantTest.java Tue Mar 25 17:07:36 2014 -0700 @@ -26,7 +26,7 @@ * @bug 8026844 * @summary Test constant negExact * @compile NegExactLConstantTest.java Verify.java - * @run main NegExactLConstantTest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics + * @run main NegExactLConstantTest * */ diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/intrinsics/mathexact/NegExactLNonConstantTest.java --- a/test/compiler/intrinsics/mathexact/NegExactLNonConstantTest.java Tue Mar 25 12:54:21 2014 -0700 +++ b/test/compiler/intrinsics/mathexact/NegExactLNonConstantTest.java Tue Mar 25 17:07:36 2014 -0700 @@ -26,7 +26,7 @@ * @bug 8026844 * @summary Test constant negExact * @compile NegExactLNonConstantTest.java Verify.java - * @run main NegExactLNonConstantTest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics + * @run main NegExactLNonConstantTest * */ diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/intrinsics/mathexact/NestedMathExactTest.java --- a/test/compiler/intrinsics/mathexact/NestedMathExactTest.java Tue Mar 25 12:54:21 2014 -0700 +++ b/test/compiler/intrinsics/mathexact/NestedMathExactTest.java Tue Mar 25 17:07:36 2014 -0700 @@ -26,7 +26,7 @@ * @bug 8027444 * @summary Test nested loops * @compile NestedMathExactTest.java - * @run main NestedMathExactTest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics + * @run main NestedMathExactTest * */ diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/intrinsics/mathexact/SplitThruPhiTest.java --- a/test/compiler/intrinsics/mathexact/SplitThruPhiTest.java Tue Mar 25 12:54:21 2014 -0700 +++ b/test/compiler/intrinsics/mathexact/SplitThruPhiTest.java Tue Mar 25 17:07:36 2014 -0700 @@ -26,7 +26,7 @@ * @bug 8028198 * @summary Verify that split through phi does the right thing * @compile SplitThruPhiTest.java - * @run main SplitThruPhiTest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics + * @run main SplitThruPhiTest * */ diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/intrinsics/mathexact/SubExactICondTest.java --- a/test/compiler/intrinsics/mathexact/SubExactICondTest.java Tue Mar 25 12:54:21 2014 -0700 +++ b/test/compiler/intrinsics/mathexact/SubExactICondTest.java Tue Mar 25 17:07:36 2014 -0700 @@ -26,7 +26,7 @@ * @bug 8026844 * @summary Test subtractExact as condition * @compile SubExactICondTest.java Verify.java - * @run main SubExactICondTest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics + * @run main SubExactICondTest * */ diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/intrinsics/mathexact/SubExactIConstantTest.java --- a/test/compiler/intrinsics/mathexact/SubExactIConstantTest.java Tue Mar 25 12:54:21 2014 -0700 +++ b/test/compiler/intrinsics/mathexact/SubExactIConstantTest.java Tue Mar 25 17:07:36 2014 -0700 @@ -26,7 +26,7 @@ * @bug 8026844 * @summary Test constant subtractExact * @compile SubExactIConstantTest.java Verify.java - * @run main SubExactIConstantTest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics + * @run main SubExactIConstantTest * */ diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/intrinsics/mathexact/SubExactILoadTest.java --- a/test/compiler/intrinsics/mathexact/SubExactILoadTest.java Tue Mar 25 12:54:21 2014 -0700 +++ b/test/compiler/intrinsics/mathexact/SubExactILoadTest.java Tue Mar 25 17:07:36 2014 -0700 @@ -26,7 +26,7 @@ * @bug 8026844 * @summary Test non constant subtractExact * @compile SubExactILoadTest.java Verify.java - * @run main SubExactILoadTest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics + * @run main SubExactILoadTest * */ diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/intrinsics/mathexact/SubExactILoopDependentTest.java --- a/test/compiler/intrinsics/mathexact/SubExactILoopDependentTest.java Tue Mar 25 12:54:21 2014 -0700 +++ b/test/compiler/intrinsics/mathexact/SubExactILoopDependentTest.java Tue Mar 25 17:07:36 2014 -0700 @@ -26,7 +26,7 @@ * @bug 8026844 * @summary Test non constant subtractExact * @compile SubExactILoopDependentTest.java Verify.java - * @run main SubExactILoopDependentTest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics + * @run main SubExactILoopDependentTest * */ diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/intrinsics/mathexact/SubExactINonConstantTest.java --- a/test/compiler/intrinsics/mathexact/SubExactINonConstantTest.java Tue Mar 25 12:54:21 2014 -0700 +++ b/test/compiler/intrinsics/mathexact/SubExactINonConstantTest.java Tue Mar 25 17:07:36 2014 -0700 @@ -26,7 +26,7 @@ * @bug 8026844 * @summary Test non constant subtractExact * @compile SubExactINonConstantTest.java Verify.java - * @run main SubExactINonConstantTest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics + * @run main SubExactINonConstantTest * */ diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/intrinsics/mathexact/SubExactIRepeatTest.java --- a/test/compiler/intrinsics/mathexact/SubExactIRepeatTest.java Tue Mar 25 12:54:21 2014 -0700 +++ b/test/compiler/intrinsics/mathexact/SubExactIRepeatTest.java Tue Mar 25 17:07:36 2014 -0700 @@ -26,7 +26,7 @@ * @bug 8026844 * @summary Test repeating subtractExact * @compile SubExactIRepeatTest.java Verify.java - * @run main SubExactIRepeatTest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics + * @run main SubExactIRepeatTest * */ diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/intrinsics/mathexact/SubExactLConstantTest.java --- a/test/compiler/intrinsics/mathexact/SubExactLConstantTest.java Tue Mar 25 12:54:21 2014 -0700 +++ b/test/compiler/intrinsics/mathexact/SubExactLConstantTest.java Tue Mar 25 17:07:36 2014 -0700 @@ -27,7 +27,7 @@ * @bug 8027353 * @summary Test constant subtractExact * @compile SubExactLConstantTest.java Verify.java - * @run main SubExactLConstantTest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics + * @run main SubExactLConstantTest * */ diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/intrinsics/mathexact/SubExactLNonConstantTest.java --- a/test/compiler/intrinsics/mathexact/SubExactLNonConstantTest.java Tue Mar 25 12:54:21 2014 -0700 +++ b/test/compiler/intrinsics/mathexact/SubExactLNonConstantTest.java Tue Mar 25 17:07:36 2014 -0700 @@ -27,7 +27,7 @@ * @bug 8027353 * @summary Test non constant subtractExact * @compile SubExactLNonConstantTest.java Verify.java - * @run main SubExactLNonConstantTest -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions -XX:+UseMathExactIntrinsics + * @run main SubExactLNonConstantTest * */ diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/intrinsics/mathexact/Verify.java --- a/test/compiler/intrinsics/mathexact/Verify.java Tue Mar 25 12:54:21 2014 -0700 +++ b/test/compiler/intrinsics/mathexact/Verify.java Tue Mar 25 17:07:36 2014 -0700 @@ -160,6 +160,7 @@ public static class NonConstantTest { public static java.util.Random rnd = new java.util.Random(); + public static int[] values = new int[] { Integer.MAX_VALUE, Integer.MIN_VALUE }; public static void verify(BinaryMethod method) { for (int i = 0; i < 50000; ++i) { @@ -169,6 +170,10 @@ Verify.verifyBinary(rnd1 + 1, rnd2, method); Verify.verifyBinary(rnd1 - 1, rnd2, method); Verify.verifyBinary(rnd1, rnd2 - 1, method); + Verify.verifyBinary(0, values[0], method); + Verify.verifyBinary(values[0], 0, method); + Verify.verifyBinary(0, values[1], method); + Verify.verifyBinary(values[1], 0, method); } } } diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/intrinsics/mathexact/sanity/AddExactIntTest.java --- a/test/compiler/intrinsics/mathexact/sanity/AddExactIntTest.java Tue Mar 25 12:54:21 2014 -0700 +++ b/test/compiler/intrinsics/mathexact/sanity/AddExactIntTest.java Tue Mar 25 17:07:36 2014 -0700 @@ -26,11 +26,11 @@ * @library /testlibrary /testlibrary/whitebox /compiler/whitebox * @build AddExactIntTest * @run main ClassFileInstaller sun.hotspot.WhiteBox - * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions + * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions * -XX:+IgnoreUnrecognizedVMOptions -XX:+WhiteBoxAPI -XX:+LogCompilation * -XX:CompileCommand=compileonly,MathIntrinsic*::execMathMethod * -XX:LogFile=hs_neg.log -XX:-UseMathExactIntrinsics AddExactIntTest - * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions + * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions * -XX:+IgnoreUnrecognizedVMOptions -XX:+WhiteBoxAPI -XX:+LogCompilation * -XX:CompileCommand=compileonly,MathIntrinsic*::execMathMethod * -XX:LogFile=hs.log -XX:+UseMathExactIntrinsics AddExactIntTest diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/intrinsics/mathexact/sanity/AddExactLongTest.java --- a/test/compiler/intrinsics/mathexact/sanity/AddExactLongTest.java Tue Mar 25 12:54:21 2014 -0700 +++ b/test/compiler/intrinsics/mathexact/sanity/AddExactLongTest.java Tue Mar 25 17:07:36 2014 -0700 @@ -26,11 +26,11 @@ * @library /testlibrary /testlibrary/whitebox /compiler/whitebox * @build AddExactLongTest * @run main ClassFileInstaller sun.hotspot.WhiteBox - * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions + * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions * -XX:+IgnoreUnrecognizedVMOptions -XX:+WhiteBoxAPI -XX:+LogCompilation * -XX:CompileCommand=compileonly,MathIntrinsic*::execMathMethod * -XX:LogFile=hs_neg.log -XX:-UseMathExactIntrinsics AddExactLongTest - * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions + * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions * -XX:+IgnoreUnrecognizedVMOptions -XX:+WhiteBoxAPI -XX:+LogCompilation * -XX:CompileCommand=compileonly,MathIntrinsic*::execMathMethod * -XX:LogFile=hs.log -XX:+UseMathExactIntrinsics AddExactLongTest diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/intrinsics/mathexact/sanity/DecrementExactIntTest.java --- a/test/compiler/intrinsics/mathexact/sanity/DecrementExactIntTest.java Tue Mar 25 12:54:21 2014 -0700 +++ b/test/compiler/intrinsics/mathexact/sanity/DecrementExactIntTest.java Tue Mar 25 17:07:36 2014 -0700 @@ -26,11 +26,11 @@ * @library /testlibrary /testlibrary/whitebox /compiler/whitebox * @build DecrementExactIntTest * @run main ClassFileInstaller sun.hotspot.WhiteBox - * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions + * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions * -XX:+IgnoreUnrecognizedVMOptions -XX:+WhiteBoxAPI -XX:+LogCompilation * -XX:CompileCommand=compileonly,MathIntrinsic*::execMathMethod * -XX:LogFile=hs_neg.log -XX:-UseMathExactIntrinsics DecrementExactIntTest - * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions + * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions * -XX:+IgnoreUnrecognizedVMOptions -XX:+WhiteBoxAPI -XX:+LogCompilation * -XX:CompileCommand=compileonly,MathIntrinsic*::execMathMethod * -XX:LogFile=hs.log -XX:+UseMathExactIntrinsics DecrementExactIntTest diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/intrinsics/mathexact/sanity/DecrementExactLongTest.java --- a/test/compiler/intrinsics/mathexact/sanity/DecrementExactLongTest.java Tue Mar 25 12:54:21 2014 -0700 +++ b/test/compiler/intrinsics/mathexact/sanity/DecrementExactLongTest.java Tue Mar 25 17:07:36 2014 -0700 @@ -26,11 +26,11 @@ * @library /testlibrary /testlibrary/whitebox /compiler/whitebox * @build DecrementExactLongTest * @run main ClassFileInstaller sun.hotspot.WhiteBox - * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions + * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions * -XX:+IgnoreUnrecognizedVMOptions -XX:+WhiteBoxAPI -XX:+LogCompilation * -XX:CompileCommand=compileonly,MathIntrinsic*::execMathMethod * -XX:LogFile=hs_neg.log -XX:-UseMathExactIntrinsics DecrementExactLongTest - * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions + * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions * -XX:+IgnoreUnrecognizedVMOptions -XX:+WhiteBoxAPI -XX:+LogCompilation * -XX:CompileCommand=compileonly,MathIntrinsic*::execMathMethod * -XX:LogFile=hs.log -XX:+UseMathExactIntrinsics DecrementExactLongTest @@ -42,4 +42,4 @@ public static void main(String[] args) throws Exception { new IntrinsicBase.LongTest(MathIntrinsic.LongIntrinsic.Decrement).test(); } -} \ No newline at end of file +} diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/intrinsics/mathexact/sanity/IncrementExactIntTest.java --- a/test/compiler/intrinsics/mathexact/sanity/IncrementExactIntTest.java Tue Mar 25 12:54:21 2014 -0700 +++ b/test/compiler/intrinsics/mathexact/sanity/IncrementExactIntTest.java Tue Mar 25 17:07:36 2014 -0700 @@ -26,11 +26,11 @@ * @library /testlibrary /testlibrary/whitebox /compiler/whitebox * @build IncrementExactIntTest * @run main ClassFileInstaller sun.hotspot.WhiteBox - * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions + * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions * -XX:+IgnoreUnrecognizedVMOptions -XX:+WhiteBoxAPI -XX:+LogCompilation * -XX:CompileCommand=compileonly,MathIntrinsic*::execMathMethod * -XX:LogFile=hs_neg.log -XX:-UseMathExactIntrinsics IncrementExactIntTest - * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions + * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions * -XX:+IgnoreUnrecognizedVMOptions -XX:+WhiteBoxAPI -XX:+LogCompilation * -XX:CompileCommand=compileonly,MathIntrinsic*::execMathMethod * -XX:LogFile=hs.log -XX:+UseMathExactIntrinsics IncrementExactIntTest diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/intrinsics/mathexact/sanity/IncrementExactLongTest.java --- a/test/compiler/intrinsics/mathexact/sanity/IncrementExactLongTest.java Tue Mar 25 12:54:21 2014 -0700 +++ b/test/compiler/intrinsics/mathexact/sanity/IncrementExactLongTest.java Tue Mar 25 17:07:36 2014 -0700 @@ -26,11 +26,11 @@ * @library /testlibrary /testlibrary/whitebox /compiler/whitebox * @build IncrementExactLongTest * @run main ClassFileInstaller sun.hotspot.WhiteBox - * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions + * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions * -XX:+IgnoreUnrecognizedVMOptions -XX:+WhiteBoxAPI -XX:+LogCompilation * -XX:CompileCommand=compileonly,MathIntrinsic*::execMathMethod * -XX:LogFile=hs_neg.log -XX:-UseMathExactIntrinsics IncrementExactLongTest - * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions + * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions * -XX:+IgnoreUnrecognizedVMOptions -XX:+WhiteBoxAPI -XX:+LogCompilation * -XX:CompileCommand=compileonly,MathIntrinsic*::execMathMethod * -XX:LogFile=hs.log -XX:+UseMathExactIntrinsics IncrementExactLongTest diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/intrinsics/mathexact/sanity/MultiplyExactIntTest.java --- a/test/compiler/intrinsics/mathexact/sanity/MultiplyExactIntTest.java Tue Mar 25 12:54:21 2014 -0700 +++ b/test/compiler/intrinsics/mathexact/sanity/MultiplyExactIntTest.java Tue Mar 25 17:07:36 2014 -0700 @@ -26,11 +26,11 @@ * @library /testlibrary /testlibrary/whitebox /compiler/whitebox * @build MultiplyExactIntTest * @run main ClassFileInstaller sun.hotspot.WhiteBox - * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions + * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions * -XX:+IgnoreUnrecognizedVMOptions -XX:+WhiteBoxAPI -XX:+LogCompilation * -XX:CompileCommand=compileonly,MathIntrinsic*::execMathMethod * -XX:LogFile=hs_neg.log -XX:-UseMathExactIntrinsics MultiplyExactIntTest - * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions + * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions * -XX:+IgnoreUnrecognizedVMOptions -XX:+WhiteBoxAPI -XX:+LogCompilation * -XX:CompileCommand=compileonly,MathIntrinsic*::execMathMethod * -XX:LogFile=hs.log -XX:+UseMathExactIntrinsics MultiplyExactIntTest diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/intrinsics/mathexact/sanity/MultiplyExactLongTest.java --- a/test/compiler/intrinsics/mathexact/sanity/MultiplyExactLongTest.java Tue Mar 25 12:54:21 2014 -0700 +++ b/test/compiler/intrinsics/mathexact/sanity/MultiplyExactLongTest.java Tue Mar 25 17:07:36 2014 -0700 @@ -26,11 +26,11 @@ * @library /testlibrary /testlibrary/whitebox /compiler/whitebox * @build MultiplyExactLongTest * @run main ClassFileInstaller sun.hotspot.WhiteBox - * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions + * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions * -XX:+IgnoreUnrecognizedVMOptions -XX:+WhiteBoxAPI -XX:+LogCompilation * -XX:CompileCommand=compileonly,MathIntrinsic*::execMathMethod * -XX:LogFile=hs_neg.log -XX:-UseMathExactIntrinsics MultiplyExactLongTest - * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions + * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions * -XX:+IgnoreUnrecognizedVMOptions -XX:+WhiteBoxAPI -XX:+LogCompilation * -XX:CompileCommand=compileonly,MathIntrinsic*::execMathMethod * -XX:LogFile=hs.log -XX:+UseMathExactIntrinsics MultiplyExactLongTest diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/intrinsics/mathexact/sanity/NegateExactIntTest.java --- a/test/compiler/intrinsics/mathexact/sanity/NegateExactIntTest.java Tue Mar 25 12:54:21 2014 -0700 +++ b/test/compiler/intrinsics/mathexact/sanity/NegateExactIntTest.java Tue Mar 25 17:07:36 2014 -0700 @@ -26,11 +26,11 @@ * @library /testlibrary /testlibrary/whitebox /compiler/whitebox * @build NegateExactIntTest * @run main ClassFileInstaller sun.hotspot.WhiteBox - * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions + * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions * -XX:+IgnoreUnrecognizedVMOptions -XX:+WhiteBoxAPI -XX:+LogCompilation * -XX:CompileCommand=compileonly,MathIntrinsic*::execMathMethod * -XX:LogFile=hs_neg.log -XX:-UseMathExactIntrinsics NegateExactIntTest - * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions + * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions * -XX:+IgnoreUnrecognizedVMOptions -XX:+WhiteBoxAPI -XX:+LogCompilation * -XX:CompileCommand=compileonly,MathIntrinsic*::execMathMethod * -XX:LogFile=hs.log -XX:+UseMathExactIntrinsics NegateExactIntTest diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/intrinsics/mathexact/sanity/NegateExactLongTest.java --- a/test/compiler/intrinsics/mathexact/sanity/NegateExactLongTest.java Tue Mar 25 12:54:21 2014 -0700 +++ b/test/compiler/intrinsics/mathexact/sanity/NegateExactLongTest.java Tue Mar 25 17:07:36 2014 -0700 @@ -26,11 +26,11 @@ * @library /testlibrary /testlibrary/whitebox /compiler/whitebox * @build NegateExactLongTest * @run main ClassFileInstaller sun.hotspot.WhiteBox - * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions + * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions * -XX:+IgnoreUnrecognizedVMOptions -XX:+WhiteBoxAPI -XX:+LogCompilation * -XX:CompileCommand=compileonly,MathIntrinsic*::execMathMethod * -XX:LogFile=hs_neg.log -XX:-UseMathExactIntrinsics NegateExactLongTest - * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions + * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions * -XX:+IgnoreUnrecognizedVMOptions -XX:+WhiteBoxAPI -XX:+LogCompilation * -XX:CompileCommand=compileonly,MathIntrinsic*::execMathMethod * -XX:LogFile=hs.log -XX:+UseMathExactIntrinsics NegateExactLongTest diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/intrinsics/mathexact/sanity/SubtractExactIntTest.java --- a/test/compiler/intrinsics/mathexact/sanity/SubtractExactIntTest.java Tue Mar 25 12:54:21 2014 -0700 +++ b/test/compiler/intrinsics/mathexact/sanity/SubtractExactIntTest.java Tue Mar 25 17:07:36 2014 -0700 @@ -26,11 +26,11 @@ * @library /testlibrary /testlibrary/whitebox /compiler/whitebox * @build SubtractExactIntTest * @run main ClassFileInstaller sun.hotspot.WhiteBox - * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions + * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions * -XX:+IgnoreUnrecognizedVMOptions -XX:+WhiteBoxAPI -XX:+LogCompilation * -XX:CompileCommand=compileonly,MathIntrinsic*::execMathMethod * -XX:LogFile=hs_neg.log -XX:-UseMathExactIntrinsics SubtractExactIntTest - * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions + * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions * -XX:+IgnoreUnrecognizedVMOptions -XX:+WhiteBoxAPI -XX:+LogCompilation * -XX:CompileCommand=compileonly,MathIntrinsic*::execMathMethod * -XX:LogFile=hs.log -XX:+UseMathExactIntrinsics SubtractExactIntTest diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/intrinsics/mathexact/sanity/SubtractExactLongTest.java --- a/test/compiler/intrinsics/mathexact/sanity/SubtractExactLongTest.java Tue Mar 25 12:54:21 2014 -0700 +++ b/test/compiler/intrinsics/mathexact/sanity/SubtractExactLongTest.java Tue Mar 25 17:07:36 2014 -0700 @@ -26,11 +26,11 @@ * @library /testlibrary /testlibrary/whitebox /compiler/whitebox * @build SubtractExactLongTest * @run main ClassFileInstaller sun.hotspot.WhiteBox - * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions + * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions * -XX:+IgnoreUnrecognizedVMOptions -XX:+WhiteBoxAPI -XX:+LogCompilation * -XX:CompileCommand=compileonly,MathIntrinsic*::execMathMethod * -XX:LogFile=hs_neg.log -XX:-UseMathExactIntrinsics SubtractExactLongTest - * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions + * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions * -XX:+IgnoreUnrecognizedVMOptions -XX:+WhiteBoxAPI -XX:+LogCompilation * -XX:CompileCommand=compileonly,MathIntrinsic*::execMathMethod * -XX:LogFile=hs.log -XX:+UseMathExactIntrinsics SubtractExactLongTest diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/intrinsics/unsafe/UnsafeGetAddressTest.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/compiler/intrinsics/unsafe/UnsafeGetAddressTest.java Tue Mar 25 17:07:36 2014 -0700 @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * @test + * @bug 6653795 + * @summary C2 intrinsic for Unsafe.getAddress performs pointer sign extension on 32-bit systems + * @run main UnsafeGetAddressTest + * + */ + +import sun.misc.Unsafe; +import java.lang.reflect.*; + +public class UnsafeGetAddressTest { + private static Unsafe unsafe; + + public static void main(String[] args) throws Exception { + Class c = UnsafeGetAddressTest.class.getClassLoader().loadClass("sun.misc.Unsafe"); + Field f = c.getDeclaredField("theUnsafe"); + f.setAccessible(true); + unsafe = (Unsafe)f.get(c); + + long address = unsafe.allocateMemory(unsafe.addressSize()); + unsafe.putAddress(address, 0x0000000080000000L); + // from sun.misc.Unsafe.getAddress' documentation: + // "If the native pointer is less than 64 bits wide, it is + // extended as an unsigned number to a Java long." + result = unsafe.getAddress(address); + System.out.printf("1: was 0x%x, expected 0x%x\n", result, + 0x0000000080000000L); + for (int i = 0; i < 1000000; i++) { + result = unsafe.getAddress(address); + } + + // The code has got compiled, check the result now + System.out.printf("2: was 0x%x, expected 0x%x\n", result, + 0x0000000080000000L); + if (result != 0x0000000080000000L) { + System.out.println("Test Failed"); + System.exit(97); + } else { + System.out.println("Test Passed"); + } + } + static volatile long result; +} + diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/tiered/NonTieredLevelsTest.java --- a/test/compiler/tiered/NonTieredLevelsTest.java Tue Mar 25 12:54:21 2014 -0700 +++ b/test/compiler/tiered/NonTieredLevelsTest.java Tue Mar 25 17:07:36 2014 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -70,6 +70,9 @@ @Override protected void test() throws Exception { + if (skipXcompOSR()) { + return; + } checkNotCompiled(); compile(); checkCompiled(); diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/tiered/TieredLevelsTest.java --- a/test/compiler/tiered/TieredLevelsTest.java Tue Mar 25 12:54:21 2014 -0700 +++ b/test/compiler/tiered/TieredLevelsTest.java Tue Mar 25 17:07:36 2014 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -51,6 +51,9 @@ @Override protected void test() throws Exception { + if (skipXcompOSR()) { + return; + } checkNotCompiled(); compile(); checkCompiled(); diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/types/TestMeetTopArrayExactConstantArray.java --- a/test/compiler/types/TestMeetTopArrayExactConstantArray.java Tue Mar 25 12:54:21 2014 -0700 +++ b/test/compiler/types/TestMeetTopArrayExactConstantArray.java Tue Mar 25 17:07:36 2014 -0700 @@ -25,7 +25,7 @@ * @test * @bug 8027571 * @summary meet of TopPTR exact array with constant array is not symmetric - * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-UseOnStackReplacement -XX:TypeProfileLevel=222 -XX:+UnlockExperimentalVMOptions -XX:+UseTypeSpeculation -XX:-BackgroundCompilation TestMeetTopArrayExactConstantArray + * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-UseOnStackReplacement -XX:TypeProfileLevel=222 -XX:+UseTypeSpeculation -XX:-BackgroundCompilation TestMeetTopArrayExactConstantArray * */ diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/types/TestSpeculationFailedHigherEqual.java --- a/test/compiler/types/TestSpeculationFailedHigherEqual.java Tue Mar 25 12:54:21 2014 -0700 +++ b/test/compiler/types/TestSpeculationFailedHigherEqual.java Tue Mar 25 17:07:36 2014 -0700 @@ -25,7 +25,7 @@ * @test * @bug 8027422 * @summary type methods shouldn't always operate on speculative part - * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:TypeProfileLevel=222 -XX:+UnlockExperimentalVMOptions -XX:+UseTypeSpeculation -XX:-BackgroundCompilation TestSpeculationFailedHigherEqual + * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:TypeProfileLevel=222 -XX:+UseTypeSpeculation -XX:-BackgroundCompilation TestSpeculationFailedHigherEqual * */ diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/types/TypeSpeculation.java --- a/test/compiler/types/TypeSpeculation.java Tue Mar 25 12:54:21 2014 -0700 +++ b/test/compiler/types/TypeSpeculation.java Tue Mar 25 17:07:36 2014 -0700 @@ -25,7 +25,7 @@ * @test * @bug 8024070 * @summary Test that type speculation doesn't cause incorrect execution - * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-UseOnStackReplacement -XX:-BackgroundCompilation -XX:TypeProfileLevel=222 -XX:+UnlockExperimentalVMOptions -XX:+UseTypeSpeculation TypeSpeculation + * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-UseOnStackReplacement -XX:-BackgroundCompilation -XX:TypeProfileLevel=222 -XX:+UseTypeSpeculation TypeSpeculation * */ diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/uncommontrap/TestSpecTrapClassUnloading.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/compiler/uncommontrap/TestSpecTrapClassUnloading.java Tue Mar 25 17:07:36 2014 -0700 @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * @test + * @bug 8031752 + * @summary speculative traps need to be cleaned up at GC + * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:-UseOnStackReplacement -XX:-BackgroundCompilation -XX:+UseTypeSpeculation -XX:TypeProfileLevel=222 -XX:CompileCommand=exclude,java.lang.reflect.Method::invoke -XX:CompileCommand=exclude,sun.reflect.DelegatingMethodAccessorImpl::invoke -Xmx1M TestSpecTrapClassUnloading + * + */ + +import java.lang.reflect.Method; + +public class TestSpecTrapClassUnloading { + static class B { + final public boolean m(Object o) { + if (o.getClass() == B.class) { + return true; + } + return false; + } + } + + static class MemoryChunk { + MemoryChunk other; + long[] array; + MemoryChunk(MemoryChunk other) { + other = other; + array = new long[1024 * 1024 * 1024]; + } + } + + static void m1(B b, Object o) { + b.m(o); + } + + static void m2(B b, Object o) { + b.m(o); + } + + public static void main(String[] args) throws Exception { + Method m = B.class.getMethod("m", Object.class); + Object o = new Object(); + B b = new B(); + + // add speculative trap in B.m() for m1 + for (int i = 0; i < 20000; i++) { + m1(b, b); + } + m1(b, o); + + // add speculative trap in B.m() for code generated by reflection + for (int i = 0; i < 20000; i++) { + m.invoke(b, b); + } + m.invoke(b, o); + + m = null; + + // add speculative trap in B.m() for m2 + for (int i = 0; i < 20000; i++) { + m2(b, b); + } + m2(b, o); + + // Exhaust memory which causes the code generated by + // reflection to be unloaded but B.m() is not. + MemoryChunk root = null; + try { + while (true) { + root = new MemoryChunk(root); + } + } catch(OutOfMemoryError e) { + root = null; + } + } +} diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/whitebox/CompilerWhiteBoxTest.java --- a/test/compiler/whitebox/CompilerWhiteBoxTest.java Tue Mar 25 12:54:21 2014 -0700 +++ b/test/compiler/whitebox/CompilerWhiteBoxTest.java Tue Mar 25 17:07:36 2014 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -380,6 +380,20 @@ /** flag for OSR test case */ boolean isOsr(); } + + /** + * @return {@code true} if the current test case is OSR and the mode is + * Xcomp, otherwise {@code false} + */ + protected boolean skipXcompOSR() { + boolean result = testCase.isOsr() + && CompilerWhiteBoxTest.MODE.startsWith("compiled "); + if (result && IS_VERBOSE) { + System.err.printf("Warning: %s is not applicable in %s%n", + testCase.name(), CompilerWhiteBoxTest.MODE); + } + return result; + } } enum SimpleTestCase implements CompilerWhiteBoxTest.TestCase { diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/whitebox/DeoptimizeAllTest.java --- a/test/compiler/whitebox/DeoptimizeAllTest.java Tue Mar 25 12:54:21 2014 -0700 +++ b/test/compiler/whitebox/DeoptimizeAllTest.java Tue Mar 25 17:07:36 2014 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -51,11 +51,8 @@ */ @Override protected void test() throws Exception { - if (testCase.isOsr() && CompilerWhiteBoxTest.MODE.startsWith( - "compiled ")) { - System.err.printf("Warning: %s is not applicable in %s%n", - testCase.name(), CompilerWhiteBoxTest.MODE); - return; + if (skipXcompOSR()) { + return; } compile(); checkCompiled(); diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/whitebox/DeoptimizeMethodTest.java --- a/test/compiler/whitebox/DeoptimizeMethodTest.java Tue Mar 25 12:54:21 2014 -0700 +++ b/test/compiler/whitebox/DeoptimizeMethodTest.java Tue Mar 25 17:07:36 2014 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -51,11 +51,8 @@ */ @Override protected void test() throws Exception { - if (testCase.isOsr() && CompilerWhiteBoxTest.MODE.startsWith( - "compiled ")) { - System.err.printf("Warning: %s is not applicable in %s%n", - testCase.name(), CompilerWhiteBoxTest.MODE); - return; + if (skipXcompOSR()) { + return; } compile(); checkCompiled(); diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/whitebox/IsMethodCompilableTest.java --- a/test/compiler/whitebox/IsMethodCompilableTest.java Tue Mar 25 12:54:21 2014 -0700 +++ b/test/compiler/whitebox/IsMethodCompilableTest.java Tue Mar 25 17:07:36 2014 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -66,10 +66,7 @@ */ @Override protected void test() throws Exception { - if (testCase.isOsr() && CompilerWhiteBoxTest.MODE.startsWith( - "compiled ")) { - System.err.printf("Warning: %s is not applicable in %s%n", - testCase.name(), CompilerWhiteBoxTest.MODE); + if (skipXcompOSR()) { return; } if (!isCompilable()) { diff -r a433eb716ce1 -r 62c54fcc0a35 test/compiler/whitebox/MakeMethodNotCompilableTest.java --- a/test/compiler/whitebox/MakeMethodNotCompilableTest.java Tue Mar 25 12:54:21 2014 -0700 +++ b/test/compiler/whitebox/MakeMethodNotCompilableTest.java Tue Mar 25 17:07:36 2014 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -53,11 +53,8 @@ */ @Override protected void test() throws Exception { - if (testCase.isOsr() && CompilerWhiteBoxTest.MODE.startsWith( - "compiled ")) { - System.err.printf("Warning: %s is not applicable in %s%n", - testCase.name(), CompilerWhiteBoxTest.MODE); - return; + if (skipXcompOSR()) { + return; } checkNotCompiled(); if (!isCompilable()) { diff -r a433eb716ce1 -r 62c54fcc0a35 test/gc/arguments/TestDynMaxHeapFreeRatio.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/gc/arguments/TestDynMaxHeapFreeRatio.java Tue Mar 25 17:07:36 2014 -0700 @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/** + * @test TestDynMaxHeapFreeRatio + * @bug 8028391 + * @summary Verify that MaxHeapFreeRatio flag is manageable + * @library /testlibrary + * @run main TestDynMaxHeapFreeRatio + * @run main/othervm -XX:MinHeapFreeRatio=0 -XX:MaxHeapFreeRatio=100 TestDynMaxHeapFreeRatio + * @run main/othervm -XX:MinHeapFreeRatio=10 -XX:MaxHeapFreeRatio=50 -XX:-UseAdaptiveSizePolicy TestDynMaxHeapFreeRatio + * @run main/othervm -XX:MinHeapFreeRatio=10 -XX:MaxHeapFreeRatio=50 TestDynMaxHeapFreeRatio + * @run main/othervm -XX:MinHeapFreeRatio=51 -XX:MaxHeapFreeRatio=52 TestDynMaxHeapFreeRatio + * @run main/othervm -XX:MinHeapFreeRatio=75 -XX:MaxHeapFreeRatio=100 TestDynMaxHeapFreeRatio + */ +import com.oracle.java.testlibrary.TestDynamicVMOption; +import com.oracle.java.testlibrary.DynamicVMOptionChecker; + +public class TestDynMaxHeapFreeRatio extends TestDynamicVMOption { + + public static final String MinFreeRatioFlagName = "MinHeapFreeRatio"; + public static final String MaxFreeRatioFlagName = "MaxHeapFreeRatio"; + + public TestDynMaxHeapFreeRatio() { + super(MaxFreeRatioFlagName); + } + + public void test() { + + int minHeapFreeValue = DynamicVMOptionChecker.getIntValue(MinFreeRatioFlagName); + System.out.println(MinFreeRatioFlagName + " = " + minHeapFreeValue); + + testPercentageValues(); + + checkInvalidValue(Integer.toString(minHeapFreeValue - 1)); + checkValidValue(Integer.toString(minHeapFreeValue)); + checkValidValue("100"); + } + + public static void main(String args[]) throws Exception { + new TestDynMaxHeapFreeRatio().test(); + } + +} diff -r a433eb716ce1 -r 62c54fcc0a35 test/gc/arguments/TestDynMinHeapFreeRatio.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/gc/arguments/TestDynMinHeapFreeRatio.java Tue Mar 25 17:07:36 2014 -0700 @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/** + * @test TestDynMinHeapFreeRatio + * @bug 8028391 + * @summary Verify that MinHeapFreeRatio flag is manageable + * @library /testlibrary + * @run main TestDynMinHeapFreeRatio + * @run main/othervm -XX:MinHeapFreeRatio=0 -XX:MaxHeapFreeRatio=100 TestDynMinHeapFreeRatio + * @run main/othervm -XX:MinHeapFreeRatio=10 -XX:MaxHeapFreeRatio=50 -XX:-UseAdaptiveSizePolicy TestDynMinHeapFreeRatio + * @run main/othervm -XX:MinHeapFreeRatio=10 -XX:MaxHeapFreeRatio=50 TestDynMinHeapFreeRatio + * @run main/othervm -XX:MinHeapFreeRatio=51 -XX:MaxHeapFreeRatio=52 TestDynMinHeapFreeRatio + * @run main/othervm -XX:MinHeapFreeRatio=75 -XX:MaxHeapFreeRatio=100 TestDynMinHeapFreeRatio + */ +import com.oracle.java.testlibrary.TestDynamicVMOption; +import com.oracle.java.testlibrary.DynamicVMOptionChecker; + +public class TestDynMinHeapFreeRatio extends TestDynamicVMOption { + + public static final String MinFreeRatioFlagName = "MinHeapFreeRatio"; + public static final String MaxFreeRatioFlagName = "MaxHeapFreeRatio"; + + public TestDynMinHeapFreeRatio() { + super(MinFreeRatioFlagName); + } + + public void test() { + int maxHeapFreeValue = DynamicVMOptionChecker.getIntValue(MaxFreeRatioFlagName); + System.out.println(MaxFreeRatioFlagName + " = " + maxHeapFreeValue); + + testPercentageValues(); + + checkInvalidValue(Integer.toString(maxHeapFreeValue + 1)); + checkValidValue(Integer.toString(maxHeapFreeValue)); + checkValidValue("0"); + } + + public static void main(String args[]) throws Exception { + new TestDynMinHeapFreeRatio().test(); + } +} diff -r a433eb716ce1 -r 62c54fcc0a35 test/gc/g1/TestGCLogMessages.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/gc/g1/TestGCLogMessages.java Tue Mar 25 17:07:36 2014 -0700 @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * @test TestPrintGCDetails + * @bug 8035406 8027295 8035398 + * @summary Ensure that the PrintGCDetails output for a minor GC with G1 + * includes the expected necessary messages. + * @key gc + * @library /testlibrary + */ + +import com.oracle.java.testlibrary.ProcessTools; +import com.oracle.java.testlibrary.OutputAnalyzer; + +public class TestGCLogMessages { + public static void main(String[] args) throws Exception { + testNormalLogs(); + testWithToSpaceExhaustionLogs(); + } + + private static void testNormalLogs() throws Exception { + + ProcessBuilder pb = ProcessTools.createJavaProcessBuilder("-XX:+UseG1GC", + "-Xmx10M", + GCTest.class.getName()); + + OutputAnalyzer output = new OutputAnalyzer(pb.start()); + + output.shouldNotContain("[Redirty Cards"); + output.shouldNotContain("[Code Root Purge"); + output.shouldNotContain("[String Dedup Fixup"); + output.shouldNotContain("[Young Free CSet"); + output.shouldNotContain("[Non-Young Free CSet"); + output.shouldHaveExitValue(0); + + pb = ProcessTools.createJavaProcessBuilder("-XX:+UseG1GC", + "-XX:+UseStringDeduplication", + "-Xmx10M", + "-XX:+PrintGCDetails", + GCTest.class.getName()); + + output = new OutputAnalyzer(pb.start()); + + output.shouldContain("[Redirty Cards"); + output.shouldContain("[Code Root Purge"); + output.shouldContain("[String Dedup Fixup"); + output.shouldNotContain("[Young Free CSet"); + output.shouldNotContain("[Non-Young Free CSet"); + output.shouldHaveExitValue(0); + + pb = ProcessTools.createJavaProcessBuilder("-XX:+UseG1GC", + "-XX:+UseStringDeduplication", + "-Xmx10M", + "-XX:+PrintGCDetails", + "-XX:+UnlockExperimentalVMOptions", + "-XX:G1LogLevel=finest", + GCTest.class.getName()); + + output = new OutputAnalyzer(pb.start()); + + output.shouldContain("[Redirty Cards"); + output.shouldContain("[Code Root Purge"); + output.shouldContain("[String Dedup Fixup"); + output.shouldContain("[Young Free CSet"); + output.shouldContain("[Non-Young Free CSet"); + + // also check evacuation failure messages once + output.shouldNotContain("[Evacuation Failure"); + output.shouldNotContain("[Recalculate Used"); + output.shouldNotContain("[Remove Self Forwards"); + output.shouldNotContain("[Restore RemSet"); + output.shouldHaveExitValue(0); + } + + private static void testWithToSpaceExhaustionLogs() throws Exception { + ProcessBuilder pb = ProcessTools.createJavaProcessBuilder("-XX:+UseG1GC", + "-Xmx10M", + "-Xmn5M", + "-XX:+PrintGCDetails", + GCTestWithToSpaceExhaustion.class.getName()); + + OutputAnalyzer output = new OutputAnalyzer(pb.start()); + output.shouldContain("[Evacuation Failure"); + output.shouldNotContain("[Recalculate Used"); + output.shouldNotContain("[Remove Self Forwards"); + output.shouldNotContain("[Restore RemSet"); + output.shouldHaveExitValue(0); + + pb = ProcessTools.createJavaProcessBuilder("-XX:+UseG1GC", + "-Xmx10M", + "-Xmn5M", + "-XX:+PrintGCDetails", + "-XX:+UnlockExperimentalVMOptions", + "-XX:G1LogLevel=finest", + GCTestWithToSpaceExhaustion.class.getName()); + + output = new OutputAnalyzer(pb.start()); + output.shouldContain("[Evacuation Failure"); + output.shouldContain("[Recalculate Used"); + output.shouldContain("[Remove Self Forwards"); + output.shouldContain("[Restore RemSet"); + output.shouldHaveExitValue(0); + } + + static class GCTest { + private static byte[] garbage; + public static void main(String [] args) { + System.out.println("Creating garbage"); + // create 128MB of garbage. This should result in at least one GC + for (int i = 0; i < 1024; i++) { + garbage = new byte[128 * 1024]; + } + System.out.println("Done"); + } + } + + static class GCTestWithToSpaceExhaustion { + private static byte[] garbage; + private static byte[] largeObject; + public static void main(String [] args) { + largeObject = new byte[5*1024*1024]; + System.out.println("Creating garbage"); + // create 128MB of garbage. This should result in at least one GC, + // some of them with to-space exhaustion. + for (int i = 0; i < 1024; i++) { + garbage = new byte[128 * 1024]; + } + System.out.println("Done"); + } + } +} diff -r a433eb716ce1 -r 62c54fcc0a35 test/gc/g1/TestStringDeduplicationAgeThreshold.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/gc/g1/TestStringDeduplicationAgeThreshold.java Tue Mar 25 17:07:36 2014 -0700 @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * @test TestStringDeduplicationAgeThreshold + * @summary Test string deduplication age threshold + * @bug 8029075 + * @key gc + * @library /testlibrary + */ + +public class TestStringDeduplicationAgeThreshold { + public static void main(String[] args) throws Exception { + TestStringDeduplicationTools.testAgeThreshold(); + } +} diff -r a433eb716ce1 -r 62c54fcc0a35 test/gc/g1/TestStringDeduplicationFullGC.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/gc/g1/TestStringDeduplicationFullGC.java Tue Mar 25 17:07:36 2014 -0700 @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * @test TestStringDeduplicationFullGC + * @summary Test string deduplication during full GC + * @bug 8029075 + * @key gc + * @library /testlibrary + */ + +public class TestStringDeduplicationFullGC { + public static void main(String[] args) throws Exception { + TestStringDeduplicationTools.testFullGC(); + } +} diff -r a433eb716ce1 -r 62c54fcc0a35 test/gc/g1/TestStringDeduplicationInterned.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/gc/g1/TestStringDeduplicationInterned.java Tue Mar 25 17:07:36 2014 -0700 @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * @test TestStringDeduplicationInterned + * @summary Test string deduplication of interned strings + * @bug 8029075 + * @key gc + * @library /testlibrary + */ + +public class TestStringDeduplicationInterned { + public static void main(String[] args) throws Exception { + TestStringDeduplicationTools.testInterned(); + } +} diff -r a433eb716ce1 -r 62c54fcc0a35 test/gc/g1/TestStringDeduplicationMemoryUsage.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/gc/g1/TestStringDeduplicationMemoryUsage.java Tue Mar 25 17:07:36 2014 -0700 @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * @test TestStringDeduplicationMemoryUsage + * @summary Test string deduplication memory usage + * @bug 8029075 + * @key gc + * @library /testlibrary + */ + +public class TestStringDeduplicationMemoryUsage { + public static void main(String[] args) throws Exception { + TestStringDeduplicationTools.testMemoryUsage(); + } +} diff -r a433eb716ce1 -r 62c54fcc0a35 test/gc/g1/TestStringDeduplicationPrintOptions.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/gc/g1/TestStringDeduplicationPrintOptions.java Tue Mar 25 17:07:36 2014 -0700 @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * @test TestStringDeduplicationPrintOptions + * @summary Test string deduplication print options + * @bug 8029075 + * @key gc + * @library /testlibrary + */ + +public class TestStringDeduplicationPrintOptions { + public static void main(String[] args) throws Exception { + TestStringDeduplicationTools.testPrintOptions(); + } +} diff -r a433eb716ce1 -r 62c54fcc0a35 test/gc/g1/TestStringDeduplicationTableRehash.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/gc/g1/TestStringDeduplicationTableRehash.java Tue Mar 25 17:07:36 2014 -0700 @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * @test TestStringDeduplicationTableRehash + * @summary Test string deduplication table rehash + * @bug 8029075 + * @key gc + * @library /testlibrary + */ + +public class TestStringDeduplicationTableRehash { + public static void main(String[] args) throws Exception { + TestStringDeduplicationTools.testTableRehash(); + } +} diff -r a433eb716ce1 -r 62c54fcc0a35 test/gc/g1/TestStringDeduplicationTableResize.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/gc/g1/TestStringDeduplicationTableResize.java Tue Mar 25 17:07:36 2014 -0700 @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * @test TestStringDeduplicationTableResize + * @summary Test string deduplication table resize + * @bug 8029075 + * @key gc + * @library /testlibrary + */ + +public class TestStringDeduplicationTableResize { + public static void main(String[] args) throws Exception { + TestStringDeduplicationTools.testTableResize(); + } +} diff -r a433eb716ce1 -r 62c54fcc0a35 test/gc/g1/TestStringDeduplicationTools.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/gc/g1/TestStringDeduplicationTools.java Tue Mar 25 17:07:36 2014 -0700 @@ -0,0 +1,512 @@ +/* + * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * Common code for string deduplication tests + */ + +import java.lang.management.*; +import java.lang.reflect.*; +import java.security.*; +import java.util.*; +import com.oracle.java.testlibrary.*; +import sun.misc.*; + +class TestStringDeduplicationTools { + private static final String YoungGC = "YoungGC"; + private static final String FullGC = "FullGC"; + + private static final int Xmn = 50; // MB + private static final int Xms = 100; // MB + private static final int Xmx = 100; // MB + private static final int MB = 1024 * 1024; + private static final int StringLength = 50; + + private static Field valueField; + private static Unsafe unsafe; + private static byte[] dummy; + + static { + try { + Field field = Unsafe.class.getDeclaredField("theUnsafe"); + field.setAccessible(true); + unsafe = (Unsafe)field.get(null); + + valueField = String.class.getDeclaredField("value"); + valueField.setAccessible(true); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + private static Object getValue(String string) { + try { + return valueField.get(string); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + private static void doFullGc(int numberOfTimes) { + for (int i = 0; i < numberOfTimes; i++) { + System.out.println("Begin: Full GC " + (i + 1) + "/" + numberOfTimes); + System.gc(); + System.out.println("End: Full GC " + (i + 1) + "/" + numberOfTimes); + } + } + + private static void doYoungGc(int numberOfTimes) { + // Provoke at least numberOfTimes young GCs + final int objectSize = 128; + final int maxObjectInYoung = (Xmn * MB) / objectSize; + for (int i = 0; i < numberOfTimes; i++) { + System.out.println("Begin: Young GC " + (i + 1) + "/" + numberOfTimes); + for (int j = 0; j < maxObjectInYoung + 1; j++) { + dummy = new byte[objectSize]; + } + System.out.println("End: Young GC " + (i + 1) + "/" + numberOfTimes); + } + } + + private static void forceDeduplication(int ageThreshold, String gcType) { + // Force deduplication to happen by either causing a FullGC or a YoungGC. + // We do several collections to also provoke a situation where the the + // deduplication thread needs to yield while processing the queue. This + // also tests that the references in the deduplication queue are adjusted + // accordingly. + if (gcType.equals(FullGC)) { + doFullGc(3); + } else { + doYoungGc(ageThreshold + 3); + } + } + + private static String generateString(int id) { + StringBuilder builder = new StringBuilder(StringLength); + + builder.append("DeduplicationTestString:" + id + ":"); + + while (builder.length() < StringLength) { + builder.append('X'); + } + + return builder.toString(); + } + + private static ArrayList createStrings(int total, int unique) { + System.out.println("Creating strings: total=" + total + ", unique=" + unique); + if (total % unique != 0) { + throw new RuntimeException("Total must be divisible by unique"); + } + + ArrayList list = new ArrayList(total); + for (int j = 0; j < total / unique; j++) { + for (int i = 0; i < unique; i++) { + list.add(generateString(i)); + } + } + + return list; + } + + private static void verifyStrings(ArrayList list, int uniqueExpected) { + for (;;) { + // Check number of deduplicated strings + ArrayList unique = new ArrayList(uniqueExpected); + for (String string: list) { + Object value = getValue(string); + boolean uniqueValue = true; + for (Object obj: unique) { + if (obj == value) { + uniqueValue = false; + break; + } + } + + if (uniqueValue) { + unique.add(value); + } + } + + System.out.println("Verifying strings: total=" + list.size() + + ", uniqueFound=" + unique.size() + + ", uniqueExpected=" + uniqueExpected); + + if (unique.size() == uniqueExpected) { + System.out.println("Deduplication completed"); + break; + } else { + System.out.println("Deduplication not completed, waiting..."); + + // Give the deduplication thread time to complete + try { + Thread.sleep(1000); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + } + } + + private static OutputAnalyzer runTest(String... extraArgs) throws Exception { + String[] defaultArgs = new String[] { + "-Xmn" + Xmn + "m", + "-Xms" + Xms + "m", + "-Xmx" + Xmx + "m", + "-XX:+UseG1GC", + "-XX:+UnlockDiagnosticVMOptions", + "-XX:+VerifyAfterGC" // Always verify after GC + }; + + ArrayList args = new ArrayList(); + args.addAll(Arrays.asList(defaultArgs)); + args.addAll(Arrays.asList(extraArgs)); + + ProcessBuilder pb = ProcessTools.createJavaProcessBuilder(args.toArray(new String[args.size()])); + OutputAnalyzer output = new OutputAnalyzer(pb.start()); + System.err.println(output.getStderr()); + System.out.println(output.getStdout()); + return output; + } + + private static class DeduplicationTest { + public static void main(String[] args) { + System.out.println("Begin: DeduplicationTest"); + + final int numberOfStrings = Integer.parseUnsignedInt(args[0]); + final int numberOfUniqueStrings = Integer.parseUnsignedInt(args[1]); + final int ageThreshold = Integer.parseUnsignedInt(args[2]); + final String gcType = args[3]; + + ArrayList list = createStrings(numberOfStrings, numberOfUniqueStrings); + forceDeduplication(ageThreshold, gcType); + verifyStrings(list, numberOfUniqueStrings); + + System.out.println("End: DeduplicationTest"); + } + + public static OutputAnalyzer run(int numberOfStrings, int ageThreshold, String gcType, String... extraArgs) throws Exception { + String[] defaultArgs = new String[] { + "-XX:+UseStringDeduplication", + "-XX:StringDeduplicationAgeThreshold=" + ageThreshold, + DeduplicationTest.class.getName(), + "" + numberOfStrings, + "" + numberOfStrings / 2, + "" + ageThreshold, + gcType + }; + + ArrayList args = new ArrayList(); + args.addAll(Arrays.asList(extraArgs)); + args.addAll(Arrays.asList(defaultArgs)); + + return runTest(args.toArray(new String[args.size()])); + } + } + + private static class InternedTest { + public static void main(String[] args) { + // This test verifies that interned strings are always + // deduplicated when being interned, and never after + // being interned. + + System.out.println("Begin: InternedTest"); + + final int ageThreshold = Integer.parseUnsignedInt(args[0]); + final String baseString = "DeduplicationTestString:" + InternedTest.class.getName(); + + // Create duplicate of baseString + StringBuilder sb1 = new StringBuilder(baseString); + String dupString1 = sb1.toString(); + if (getValue(dupString1) == getValue(baseString)) { + throw new RuntimeException("Values should not match"); + } + + // Force baseString to be inspected for deduplication + // and be inserted into the deduplication hashtable. + forceDeduplication(ageThreshold, FullGC); + + // Wait for deduplication to occur + while (getValue(dupString1) != getValue(baseString)) { + System.out.println("Waiting..."); + try { + Thread.sleep(100); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + // Create a new duplicate of baseString + StringBuilder sb2 = new StringBuilder(baseString); + String dupString2 = sb2.toString(); + if (getValue(dupString2) == getValue(baseString)) { + throw new RuntimeException("Values should not match"); + } + + // Intern the new duplicate + Object beforeInternedValue = getValue(dupString2); + String internedString = dupString2.intern(); + if (internedString != dupString2) { + throw new RuntimeException("String should match"); + } + if (getValue(internedString) != getValue(baseString)) { + throw new RuntimeException("Values should match"); + } + + // Check original value of interned string, to make sure + // deduplication happened on the interned string and not + // on the base string + if (beforeInternedValue == getValue(baseString)) { + throw new RuntimeException("Values should not match"); + } + + System.out.println("End: InternedTest"); + } + + public static OutputAnalyzer run() throws Exception { + return runTest("-XX:+PrintGC", + "-XX:+PrintGCDetails", + "-XX:+UseStringDeduplication", + "-XX:+PrintStringDeduplicationStatistics", + "-XX:StringDeduplicationAgeThreshold=" + DefaultAgeThreshold, + InternedTest.class.getName(), + "" + DefaultAgeThreshold); + } + } + + private static class MemoryUsageTest { + public static void main(String[] args) { + System.out.println("Begin: MemoryUsageTest"); + + final boolean useStringDeduplication = Boolean.parseBoolean(args[0]); + final int numberOfStrings = LargeNumberOfStrings; + final int numberOfUniqueStrings = 1; + + ArrayList list = createStrings(numberOfStrings, numberOfUniqueStrings); + forceDeduplication(DefaultAgeThreshold, FullGC); + + if (useStringDeduplication) { + verifyStrings(list, numberOfUniqueStrings); + } + + System.gc(); + System.out.println("Heap Memory Usage: " + ManagementFactory.getMemoryMXBean().getHeapMemoryUsage().getUsed()); + + System.out.println("End: MemoryUsageTest"); + } + + public static OutputAnalyzer run(boolean useStringDeduplication) throws Exception { + String[] extraArgs = new String[0]; + + if (useStringDeduplication) { + extraArgs = new String[] { + "-XX:+UseStringDeduplication", + "-XX:+PrintStringDeduplicationStatistics", + "-XX:StringDeduplicationAgeThreshold=" + DefaultAgeThreshold + }; + } + + String[] defaultArgs = new String[] { + "-XX:+PrintGC", + "-XX:+PrintGCDetails", + MemoryUsageTest.class.getName(), + "" + useStringDeduplication + }; + + ArrayList args = new ArrayList(); + args.addAll(Arrays.asList(extraArgs)); + args.addAll(Arrays.asList(defaultArgs)); + + return runTest(args.toArray(new String[args.size()])); + } + } + + /* + * Tests + */ + + private static final int LargeNumberOfStrings = 10000; + private static final int SmallNumberOfStrings = 10; + + private static final int MaxAgeThreshold = 15; + private static final int DefaultAgeThreshold = 3; + private static final int MinAgeThreshold = 1; + + private static final int TooLowAgeThreshold = MinAgeThreshold - 1; + private static final int TooHighAgeThreshold = MaxAgeThreshold + 1; + + public static void testYoungGC() throws Exception { + // Do young GC to age strings to provoke deduplication + OutputAnalyzer output = DeduplicationTest.run(LargeNumberOfStrings, + DefaultAgeThreshold, + YoungGC, + "-XX:+PrintGC", + "-XX:+PrintStringDeduplicationStatistics"); + output.shouldNotContain("Full GC"); + output.shouldContain("GC pause (G1 Evacuation Pause) (young)"); + output.shouldContain("GC concurrent-string-deduplication"); + output.shouldContain("Deduplicated:"); + output.shouldHaveExitValue(0); + } + + public static void testFullGC() throws Exception { + // Do full GC to age strings to provoke deduplication + OutputAnalyzer output = DeduplicationTest.run(LargeNumberOfStrings, + DefaultAgeThreshold, + FullGC, + "-XX:+PrintGC", + "-XX:+PrintStringDeduplicationStatistics"); + output.shouldNotContain("GC pause (G1 Evacuation Pause) (young)"); + output.shouldContain("Full GC"); + output.shouldContain("GC concurrent-string-deduplication"); + output.shouldContain("Deduplicated:"); + output.shouldHaveExitValue(0); + } + + public static void testTableResize() throws Exception { + // Test with StringDeduplicationResizeALot + OutputAnalyzer output = DeduplicationTest.run(LargeNumberOfStrings, + DefaultAgeThreshold, + YoungGC, + "-XX:+PrintGC", + "-XX:+PrintStringDeduplicationStatistics", + "-XX:+StringDeduplicationResizeALot"); + output.shouldContain("GC concurrent-string-deduplication"); + output.shouldContain("Deduplicated:"); + output.shouldNotContain("Resize Count: 0"); + output.shouldHaveExitValue(0); + } + + public static void testTableRehash() throws Exception { + // Test with StringDeduplicationRehashALot + OutputAnalyzer output = DeduplicationTest.run(LargeNumberOfStrings, + DefaultAgeThreshold, + YoungGC, + "-XX:+PrintGC", + "-XX:+PrintStringDeduplicationStatistics", + "-XX:+StringDeduplicationRehashALot"); + output.shouldContain("GC concurrent-string-deduplication"); + output.shouldContain("Deduplicated:"); + output.shouldNotContain("Rehash Count: 0"); + output.shouldNotContain("Hash Seed: 0x0"); + output.shouldHaveExitValue(0); + } + + public static void testAgeThreshold() throws Exception { + OutputAnalyzer output; + + // Test with max age theshold + output = DeduplicationTest.run(SmallNumberOfStrings, + MaxAgeThreshold, + YoungGC, + "-XX:+PrintGC", + "-XX:+PrintStringDeduplicationStatistics"); + output.shouldContain("GC concurrent-string-deduplication"); + output.shouldContain("Deduplicated:"); + output.shouldHaveExitValue(0); + + // Test with min age theshold + output = DeduplicationTest.run(SmallNumberOfStrings, + MinAgeThreshold, + YoungGC, + "-XX:+PrintGC", + "-XX:+PrintStringDeduplicationStatistics"); + output.shouldContain("GC concurrent-string-deduplication"); + output.shouldContain("Deduplicated:"); + output.shouldHaveExitValue(0); + + // Test with too low age threshold + output = DeduplicationTest.run(SmallNumberOfStrings, + TooLowAgeThreshold, + YoungGC); + output.shouldContain("StringDeduplicationAgeThreshold of " + TooLowAgeThreshold + + " is invalid; must be between " + MinAgeThreshold + " and " + MaxAgeThreshold); + output.shouldHaveExitValue(1); + + // Test with too high age threshold + output = DeduplicationTest.run(SmallNumberOfStrings, + TooHighAgeThreshold, + YoungGC); + output.shouldContain("StringDeduplicationAgeThreshold of " + TooHighAgeThreshold + + " is invalid; must be between " + MinAgeThreshold + " and " + MaxAgeThreshold); + output.shouldHaveExitValue(1); + } + + public static void testPrintOptions() throws Exception { + OutputAnalyzer output; + + // Test without PrintGC and without PrintStringDeduplicationStatistics + output = DeduplicationTest.run(SmallNumberOfStrings, + DefaultAgeThreshold, + YoungGC); + output.shouldNotContain("GC concurrent-string-deduplication"); + output.shouldNotContain("Deduplicated:"); + output.shouldHaveExitValue(0); + + // Test with PrintGC but without PrintStringDeduplicationStatistics + output = DeduplicationTest.run(SmallNumberOfStrings, + DefaultAgeThreshold, + YoungGC, + "-XX:+PrintGC"); + output.shouldContain("GC concurrent-string-deduplication"); + output.shouldNotContain("Deduplicated:"); + output.shouldHaveExitValue(0); + } + + public static void testInterned() throws Exception { + // Test that interned strings are deduplicated before being interned + OutputAnalyzer output = InternedTest.run(); + output.shouldHaveExitValue(0); + } + + public static void testMemoryUsage() throws Exception { + // Test that memory usage is reduced after deduplication + OutputAnalyzer output; + final String usagePattern = "Heap Memory Usage: (\\d+)"; + + // Run without deduplication + output = MemoryUsageTest.run(false); + output.shouldHaveExitValue(0); + final long memoryUsageWithoutDedup = Long.parseLong(output.firstMatch(usagePattern, 1)); + + // Run with deduplication + output = MemoryUsageTest.run(true); + output.shouldHaveExitValue(0); + final long memoryUsageWithDedup = Long.parseLong(output.firstMatch(usagePattern, 1)); + + // Calculate expected memory usage with deduplication enabled. This calculation does + // not take alignment and padding into account, so it's a conservative estimate. + final long sizeOfChar = 2; // bytes + final long bytesSaved = (LargeNumberOfStrings - 1) * (StringLength * sizeOfChar + unsafe.ARRAY_CHAR_BASE_OFFSET); + final long memoryUsageWithDedupExpected = memoryUsageWithoutDedup - bytesSaved; + + System.out.println("Memory usage summary:"); + System.out.println(" memoryUsageWithoutDedup: " + memoryUsageWithoutDedup); + System.out.println(" memoryUsageWithDedup: " + memoryUsageWithDedup); + System.out.println(" memoryUsageWithDedupExpected: " + memoryUsageWithDedupExpected); + + if (memoryUsageWithDedup > memoryUsageWithDedupExpected) { + throw new Exception("Unexpected memory usage, memoryUsageWithDedup should less or equal to memoryUsageWithDedupExpected"); + } + } +} diff -r a433eb716ce1 -r 62c54fcc0a35 test/gc/g1/TestStringDeduplicationYoungGC.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/gc/g1/TestStringDeduplicationYoungGC.java Tue Mar 25 17:07:36 2014 -0700 @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * @test TestStringDeduplicationYoungGC + * @summary Test string deduplication during young GC + * @bug 8029075 + * @key gc + * @library /testlibrary + */ + +public class TestStringDeduplicationYoungGC { + public static void main(String[] args) throws Exception { + TestStringDeduplicationTools.testYoungGC(); + } +} diff -r a433eb716ce1 -r 62c54fcc0a35 test/testlibrary/com/oracle/java/testlibrary/Asserts.java --- a/test/testlibrary/com/oracle/java/testlibrary/Asserts.java Tue Mar 25 12:54:21 2014 -0700 +++ b/test/testlibrary/com/oracle/java/testlibrary/Asserts.java Tue Mar 25 17:07:36 2014 -0700 @@ -378,6 +378,64 @@ } } + /** + * Asserts that two strings are equal. + * + * If strings are not equals, then exception message + * will contain {@code msg} followed by list of mismatched lines. + * + * @param str1 First string to compare. + * @param str2 Second string to compare. + * @param msg A description of the assumption. + * @throws RuntimeException if strings are not equal. + */ + public static void assertStringsEqual(String str1, String str2, + String msg) { + String lineSeparator = System.getProperty("line.separator"); + String str1Lines[] = str1.split(lineSeparator); + String str2Lines[] = str2.split(lineSeparator); + + int minLength = Math.min(str1Lines.length, str2Lines.length); + String longestStringLines[] = ((str1Lines.length == minLength) ? + str2Lines : str1Lines); + + boolean stringsAreDifferent = false; + + StringBuilder messageBuilder = new StringBuilder(msg); + + messageBuilder.append("\n"); + + for (int line = 0; line < minLength; line++) { + if (!str1Lines[line].equals(str2Lines[line])) { + messageBuilder.append(String. + format("[line %d] '%s' differs " + + "from '%s'\n", + line, + str1Lines[line], + str2Lines[line])); + stringsAreDifferent = true; + } + } + + if (minLength < longestStringLines.length) { + String stringName = ((longestStringLines == str1Lines) ? + "first" : "second"); + messageBuilder.append(String.format("Only %s string contains " + + "following lines:\n", + stringName)); + stringsAreDifferent = true; + for(int line = minLength; line < longestStringLines.length; line++) { + messageBuilder.append(String. + format("[line %d] '%s'", line, + longestStringLines[line])); + } + } + + if (stringsAreDifferent) { + error(messageBuilder.toString()); + } + } + private static > int compare(T lhs, T rhs, String msg) { assertNotNull(lhs, msg); assertNotNull(rhs, msg); diff -r a433eb716ce1 -r 62c54fcc0a35 test/testlibrary/com/oracle/java/testlibrary/DynamicVMOptionChecker.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/testlibrary/com/oracle/java/testlibrary/DynamicVMOptionChecker.java Tue Mar 25 17:07:36 2014 -0700 @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +package com.oracle.java.testlibrary; + +import com.sun.management.HotSpotDiagnosticMXBean; +import com.sun.management.VMOption; +import java.lang.management.ManagementFactory; + +/** + * Simple class to check writeability, invalid and valid values for VMOption + */ +public class DynamicVMOptionChecker { + + /** + * Reads VM option from PlatformMXBean and parse it to integer value + * + * @param name of option + * @return parsed value + */ + public static int getIntValue(String name) { + + VMOption option = ManagementFactory. + getPlatformMXBean(HotSpotDiagnosticMXBean.class). + getVMOption(name); + + return Integer.parseInt(option.getValue()); + } + + /** + * Sets VM option value + * + * @param name of option + * @param value to set + */ + public static void setIntValue(String name, int value) { + ManagementFactory.getPlatformMXBean(HotSpotDiagnosticMXBean.class).setVMOption(name, Integer.toString(value)); + } + + /** + * Checks that VM option is dynamically writable + * + * @param name + * @throws RuntimeException if option if not writable + * @return always true + */ + public static boolean checkIsWritable(String name) { + VMOption option = ManagementFactory. + getPlatformMXBean(HotSpotDiagnosticMXBean.class). + getVMOption(name); + + if (!option.isWriteable()) { + throw new RuntimeException(name + " is not writable"); + } + + return true; + } + + /** + * Checks that value cannot be set + * + * @param name of flag + * @param value string representation of value to set + * @throws RuntimeException on error - when expected exception hasn't been thrown + */ + public static void checkInvalidValue(String name, String value) { + // should throw + try { + ManagementFactory. + getPlatformMXBean(HotSpotDiagnosticMXBean.class). + setVMOption(name, value); + + } catch (IllegalArgumentException e) { + return; + } + + throw new RuntimeException("Expected IllegalArgumentException was not thrown, " + name + "= " + value); + } + + /** + * Checks that value can be set + * + * @param name of flag to set + * @param value string representation of value to set + * @throws RuntimeException on error - when value in VM is not equal to origin + */ + public static void checkValidValue(String name, String value) { + ManagementFactory. + getPlatformMXBean(HotSpotDiagnosticMXBean.class). + setVMOption(name, value); + + VMOption option = ManagementFactory. + getPlatformMXBean(HotSpotDiagnosticMXBean.class). + getVMOption(name); + + if (!option.getValue().equals(value)) { + throw new RuntimeException("Actual value of " + name + " \"" + option.getValue() + + "\" not equal origin \"" + value + "\""); + } + } + +} diff -r a433eb716ce1 -r 62c54fcc0a35 test/testlibrary/com/oracle/java/testlibrary/TestDynamicVMOption.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/testlibrary/com/oracle/java/testlibrary/TestDynamicVMOption.java Tue Mar 25 17:07:36 2014 -0700 @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +package com.oracle.java.testlibrary; + +/** + * Simple class to check writeability, invalid and valid values for concrete VMOption + */ +public class TestDynamicVMOption { + + private final String name; + private final int value; + + /** + * Constructor + * + * @param name of VM option to test + */ + public TestDynamicVMOption(String name) { + this.name = name; + this.value = DynamicVMOptionChecker.getIntValue(name); + System.out.println(this.name + " = " + this.value); + } + + /** + * Checks that this value can accept valid percentage values and cannot accept invalid percentage values + * + * @throws RuntimeException + */ + public void testPercentageValues() { + checkInvalidValue(Integer.toString(Integer.MIN_VALUE)); + checkInvalidValue(Integer.toString(Integer.MAX_VALUE)); + checkInvalidValue("-10"); + checkInvalidValue("190"); + } + + /** + * Reads VM option from PlatformMXBean and parse it to integer value + * + * @return value + */ + public int getIntValue() { + return DynamicVMOptionChecker.getIntValue(this.name); + } + + /** + * Sets VM option value + * + * @param value to set + */ + public void setIntValue(int value) { + DynamicVMOptionChecker.setIntValue(this.name, value); + } + + /** + * Checks that this VM option is dynamically writable + * + * @throws RuntimeException if option if not writable + * @return true + */ + public boolean checkIsWritable() throws RuntimeException { + return DynamicVMOptionChecker.checkIsWritable(this.name); + } + + /** + * Checks that value for this VM option cannot be set + * + * @param value to check + * @throws RuntimeException on error - when expected exception hasn't been thrown + */ + public void checkInvalidValue(String value) { + DynamicVMOptionChecker.checkInvalidValue(this.name, value); + } + + /** + * Checks that value for this VM option can be set + * + * @param value to check + * @throws RuntimeException on error - when value in VM is not equal to origin + */ + public void checkValidValue(String value) { + DynamicVMOptionChecker.checkValidValue(this.name, value); + } + +} diff -r a433eb716ce1 -r 62c54fcc0a35 test/testlibrary/whitebox/sun/hotspot/WhiteBox.java --- a/test/testlibrary/whitebox/sun/hotspot/WhiteBox.java Tue Mar 25 12:54:21 2014 -0700 +++ b/test/testlibrary/whitebox/sun/hotspot/WhiteBox.java Tue Mar 25 17:07:36 2014 -0700 @@ -150,4 +150,7 @@ public native void runMemoryUnitTests(); public native void readFromNoaccessArea(); + // CPU features + public native String getCPUFeatures(); + } diff -r a433eb716ce1 -r 62c54fcc0a35 test/testlibrary/whitebox/sun/hotspot/cpuinfo/CPUInfo.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/testlibrary/whitebox/sun/hotspot/cpuinfo/CPUInfo.java Tue Mar 25 17:07:36 2014 -0700 @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.hotspot.cpuinfo; + +import java.util.List; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.regex.Pattern; +import java.util.regex.Matcher; + +import sun.hotspot.WhiteBox; + +/** + * Information about CPU on test box. + * + * CPUInfo uses WhiteBox to gather information, + * so WhiteBox class should be added to bootclasspath + * and option -XX:+WhiteBoxAPI should expclicetly + * specified on command line. + */ +public class CPUInfo { + + private static final List features; + private static final String additionalCPUInfo; + + static { + WhiteBox wb = WhiteBox.getWhiteBox(); + + Pattern additionalCPUInfoRE = + Pattern.compile("([^(]*\\([^)]*\\)[^,]*),\\s*"); + + String cpuFeaturesString = wb.getCPUFeatures(); + Matcher matcher = additionalCPUInfoRE.matcher(cpuFeaturesString); + if (matcher.find()) { + additionalCPUInfo = matcher.group(1); + } else { + additionalCPUInfo = ""; + } + String splittedFeatures[] = matcher.replaceAll("").split("(, )| "); + + features = Collections.unmodifiableList(Arrays. + asList(splittedFeatures)); + } + + /** + * Get additional information about CPU. + * For example, on X86 in will be family/model/stepping + * and number of cores. + * + * @return additional CPU info + */ + public static String getAdditionalCPUInfo() { + return additionalCPUInfo; + } + + /** + * Get all known features supported by CPU. + * + * @return unmodifiable list with names of all known features + * supported by CPU. + */ + public static List getFeatures() { + return features; + } + + /** + * Check if some feature is supported by CPU. + * + * @param feature Name of feature to be tested. + * @return true if tested feature is supported by CPU. + */ + public static boolean hasFeature(String feature) { + return features.contains(feature.toLowerCase()); + } +}