# HG changeset patch # User amurillo # Date 1351285792 25200 # Node ID dc16fe422c535ecd4e9f80fb814a1bb9704da6f5 # Parent 556dd9e475c679fe34ee96c771a4a4572ff97c8f# Parent 588f08ed16cf8229f599219c8d3d9276e2dd8ed5 Merge diff -r 556dd9e475c6 -r dc16fe422c53 agent/src/share/classes/sun/jvm/hotspot/interpreter/Bytecodes.java --- a/agent/src/share/classes/sun/jvm/hotspot/interpreter/Bytecodes.java Thu Oct 25 09:53:16 2012 -0700 +++ b/agent/src/share/classes/sun/jvm/hotspot/interpreter/Bytecodes.java Fri Oct 26 14:09:52 2012 -0700 @@ -272,9 +272,10 @@ public static final int _fast_aldc = 229; public static final int _fast_aldc_w = 230; public static final int _return_register_finalizer = 231; - public static final int _shouldnotreachhere = 232; // For debugging + public static final int _invokehandle = 232; + public static final int _shouldnotreachhere = 233; // For debugging - public static final int number_of_codes = 233; + public static final int number_of_codes = 234; // Flag bits derived from format strings, can_trap, can_rewrite, etc.: // semantic flags: @@ -787,20 +788,22 @@ def(_fast_aaccess_0 , "fast_aaccess_0" , "b_JJ" , null , BasicType.getTObject() , 1, true , _aload_0 ); def(_fast_faccess_0 , "fast_faccess_0" , "b_JJ" , null , BasicType.getTObject() , 1, true , _aload_0 ); - def(_fast_iload , "fast_iload" , "bi" , null , BasicType.getTInt() , 1, false, _iload); - def(_fast_iload2 , "fast_iload2" , "bi_i" , null , BasicType.getTInt() , 2, false, _iload); - def(_fast_icaload , "fast_icaload" , "bi_" , null , BasicType.getTInt() , 0, false, _iload); + def(_fast_iload , "fast_iload" , "bi" , null , BasicType.getTInt() , 1, false, _iload ); + def(_fast_iload2 , "fast_iload2" , "bi_i" , null , BasicType.getTInt() , 2, false, _iload ); + def(_fast_icaload , "fast_icaload" , "bi_" , null , BasicType.getTInt() , 0, false, _iload ); // Faster method invocation. - def(_fast_invokevfinal , "fast_invokevfinal" , "bJJ" , null , BasicType.getTIllegal(), -1, true, _invokevirtual); + def(_fast_invokevfinal , "fast_invokevfinal" , "bJJ" , null , BasicType.getTIllegal(), -1, true, _invokevirtual ); def(_fast_linearswitch , "fast_linearswitch" , "" , null , BasicType.getTVoid() , -1, false, _lookupswitch ); def(_fast_binaryswitch , "fast_binaryswitch" , "" , null , BasicType.getTVoid() , -1, false, _lookupswitch ); + def(_fast_aldc , "fast_aldc" , "bj" , null , BasicType.getTObject(), 1, true, _ldc ); + def(_fast_aldc_w , "fast_aldc_w" , "bJJ" , null , BasicType.getTObject(), 1, true, _ldc_w ); def(_return_register_finalizer, "return_register_finalizer", "b" , null , BasicType.getTVoid() , 0, true, _return ); - def(_fast_aldc , "fast_aldc" , "bj" , null , BasicType.getTObject(), 1, true, _ldc ); - def(_fast_aldc_w , "fast_aldc_w" , "bJJ" , null , BasicType.getTObject(), 1, true, _ldc_w ); + // special handling of signature-polymorphic methods + def(_invokehandle , "invokehandle" , "bJJ" , null , BasicType.getTIllegal(), -1, true, _invokevirtual ); def(_shouldnotreachhere , "_shouldnotreachhere" , "b" , null , BasicType.getTVoid() , 0, false); diff -r 556dd9e475c6 -r dc16fe422c53 agent/src/share/classes/sun/jvm/hotspot/runtime/Bytes.java --- a/agent/src/share/classes/sun/jvm/hotspot/runtime/Bytes.java Thu Oct 25 09:53:16 2012 -0700 +++ b/agent/src/share/classes/sun/jvm/hotspot/runtime/Bytes.java Fri Oct 26 14:09:52 2012 -0700 @@ -30,24 +30,10 @@ /** Encapsulates some byte-swapping operations defined in the VM */ public class Bytes { - // swap if client platform is different from server's. private boolean swap; public Bytes(MachineDescription machDesc) { - String cpu = PlatformInfo.getCPU(); - if (cpu.equals("sparc")) { - if (machDesc.isBigEndian()) { - swap = false; - } else { - swap = true; - } - } else { // intel - if (machDesc.isBigEndian()) { - swap = true; - } else { - swap = false; - } - } + swap = !machDesc.isBigEndian(); } /** Should only swap if the hardware's underlying byte order is diff -r 556dd9e475c6 -r dc16fe422c53 agent/src/share/classes/sun/jvm/hotspot/tools/jcore/ByteCodeRewriter.java --- a/agent/src/share/classes/sun/jvm/hotspot/tools/jcore/ByteCodeRewriter.java Thu Oct 25 09:53:16 2012 -0700 +++ b/agent/src/share/classes/sun/jvm/hotspot/tools/jcore/ByteCodeRewriter.java Fri Oct 26 14:09:52 2012 -0700 @@ -29,6 +29,11 @@ import sun.jvm.hotspot.utilities.*; import sun.jvm.hotspot.debugger.*; import sun.jvm.hotspot.runtime.*; +import java.security.AccessController; +import java.security.PrivilegedAction; +import java.security.AccessControlContext; +import java.security.PrivilegedExceptionAction; +import java.security.PrivilegedActionException; public class ByteCodeRewriter { @@ -38,8 +43,20 @@ private byte[] code; private Bytes bytes; - public static final boolean DEBUG = false; private static final int jintSize = 4; + public static final boolean DEBUG; + + static { + String debug = (String) AccessController.doPrivileged( + new PrivilegedAction() { + public Object run() { + return System.getProperty("sun.jvm.hotspot.tools.jcore.ByteCodeRewriter.DEBUG"); + } + } + ); + DEBUG = (debug != null ? debug.equalsIgnoreCase("true") : false); + } + protected void debugMessage(String message) { System.out.println(message); @@ -54,6 +71,18 @@ } + protected short getConstantPoolIndexFromRefMap(int rawcode, int bci) { + int refIndex; + String fmt = Bytecodes.format(rawcode); + switch (fmt.length()) { + case 2: refIndex = 0xFF & method.getBytecodeByteArg(bci); break; + case 3: refIndex = 0xFFFF & bytes.swapShort(method.getBytecodeShortArg(bci)); break; + default: throw new IllegalArgumentException(); + } + + return (short)cpool.objectToCPIndex(refIndex); + } + protected short getConstantPoolIndex(int rawcode, int bci) { // get ConstantPool index from ConstantPoolCacheIndex at given bci String fmt = Bytecodes.format(rawcode); @@ -95,6 +124,12 @@ int hotspotcode = Bytecodes._illegal; int len = 0; + if (DEBUG) { + String msg = method.getMethodHolder().getName().asString() + "." + + method.getName().asString() + + method.getSignature().asString(); + debugMessage(msg); + } for (int bci = 0; bci < code.length;) { hotspotcode = Bytecodes.codeAt(method, bci); bytecode = Bytecodes.javaCode(hotspotcode); @@ -133,15 +168,15 @@ case Bytecodes._ldc_w: if (hotspotcode != bytecode) { - // fast_aldc_w puts constant in CP cache - cpoolIndex = getConstantPoolIndex(hotspotcode, bci + 1); + // fast_aldc_w puts constant in reference map + cpoolIndex = getConstantPoolIndexFromRefMap(hotspotcode, bci + 1); writeShort(code, bci + 1, cpoolIndex); } break; case Bytecodes._ldc: if (hotspotcode != bytecode) { - // fast_aldc puts constant in CP cache - cpoolIndex = getConstantPoolIndex(hotspotcode, bci + 1); + // fast_aldc puts constant in reference map + cpoolIndex = getConstantPoolIndexFromRefMap(hotspotcode, bci + 1); code[bci + 1] = (byte)(cpoolIndex); } break; diff -r 556dd9e475c6 -r dc16fe422c53 make/excludeSrc.make --- a/make/excludeSrc.make Thu Oct 25 09:53:16 2012 -0700 +++ b/make/excludeSrc.make Fri Oct 26 14:09:52 2012 -0700 @@ -79,10 +79,10 @@ CXXFLAGS += -DSERIALGC CFLAGS += -DSERIALGC Src_Files_EXCLUDE += \ - binaryTreeDictionary.cpp cmsAdaptiveSizePolicy.cpp cmsCollectorPolicy.cpp \ + cmsAdaptiveSizePolicy.cpp cmsCollectorPolicy.cpp \ cmsGCAdaptivePolicyCounters.cpp cmsLockVerifier.cpp cmsPermGen.cpp compactibleFreeListSpace.cpp \ - concurrentMarkSweepGeneration.cpp concurrentMarkSweepThread.cpp freeBlockDictionary.cpp \ - freeChunk.cpp freeList.cpp promotionInfo.cpp vmCMSOperations.cpp collectionSetChooser.cpp \ + concurrentMarkSweepGeneration.cpp concurrentMarkSweepThread.cpp \ + freeChunk.cpp adaptiveFreeList.cpp promotionInfo.cpp vmCMSOperations.cpp collectionSetChooser.cpp \ concurrentG1Refine.cpp concurrentG1RefineThread.cpp concurrentMark.cpp concurrentMarkThread.cpp \ dirtyCardQueue.cpp g1AllocRegion.cpp g1BlockOffsetTable.cpp g1CollectedHeap.cpp g1GCPhaseTimes.cpp \ g1CollectorPolicy.cpp g1ErgoVerbose.cpp g1_globals.cpp g1HRPrinter.cpp g1MarkSweep.cpp \ diff -r 556dd9e475c6 -r dc16fe422c53 make/hotspot_version --- a/make/hotspot_version Thu Oct 25 09:53:16 2012 -0700 +++ b/make/hotspot_version Fri Oct 26 14:09:52 2012 -0700 @@ -35,7 +35,7 @@ HS_MAJOR_VER=25 HS_MINOR_VER=0 -HS_BUILD_NUMBER=06 +HS_BUILD_NUMBER=07 JDK_MAJOR_VER=1 JDK_MINOR_VER=8 diff -r 556dd9e475c6 -r dc16fe422c53 src/cpu/x86/vm/assembler_x86.cpp --- a/src/cpu/x86/vm/assembler_x86.cpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/cpu/x86/vm/assembler_x86.cpp Fri Oct 26 14:09:52 2012 -0700 @@ -1007,6 +1007,67 @@ emit_simd_arith(0x58, dst, src, VEX_SIMD_F3); } +void Assembler::aesdec(XMMRegister dst, Address src) { + assert(VM_Version::supports_aes(), ""); + InstructionMark im(this); + simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); + emit_byte(0xde); + emit_operand(dst, src); +} + +void Assembler::aesdec(XMMRegister dst, XMMRegister src) { + assert(VM_Version::supports_aes(), ""); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); + emit_byte(0xde); + emit_byte(0xC0 | encode); +} + +void Assembler::aesdeclast(XMMRegister dst, Address src) { + assert(VM_Version::supports_aes(), ""); + InstructionMark im(this); + simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); + emit_byte(0xdf); + emit_operand(dst, src); +} + +void Assembler::aesdeclast(XMMRegister dst, XMMRegister src) { + assert(VM_Version::supports_aes(), ""); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); + emit_byte(0xdf); + emit_byte(0xC0 | encode); +} + +void Assembler::aesenc(XMMRegister dst, Address src) { + assert(VM_Version::supports_aes(), ""); + InstructionMark im(this); + simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); + emit_byte(0xdc); + emit_operand(dst, src); +} + +void Assembler::aesenc(XMMRegister dst, XMMRegister src) { + assert(VM_Version::supports_aes(), ""); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); + emit_byte(0xdc); + emit_byte(0xC0 | encode); +} + +void Assembler::aesenclast(XMMRegister dst, Address src) { + assert(VM_Version::supports_aes(), ""); + InstructionMark im(this); + simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); + emit_byte(0xdd); + emit_operand(dst, src); +} + +void Assembler::aesenclast(XMMRegister dst, XMMRegister src) { + assert(VM_Version::supports_aes(), ""); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); + emit_byte(0xdd); + emit_byte(0xC0 | encode); +} + + void Assembler::andl(Address dst, int32_t imm32) { InstructionMark im(this); prefix(dst); @@ -2307,6 +2368,22 @@ a_byte(p); } +void Assembler::pshufb(XMMRegister dst, XMMRegister src) { + assert(VM_Version::supports_ssse3(), ""); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); + emit_byte(0x00); + emit_byte(0xC0 | encode); +} + +void Assembler::pshufb(XMMRegister dst, Address src) { + assert(VM_Version::supports_ssse3(), ""); + assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); + InstructionMark im(this); + simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); + emit_byte(0x00); + emit_operand(dst, src); +} + void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) { assert(isByte(mode), "invalid value"); NOT_LP64(assert(VM_Version::supports_sse2(), "")); @@ -8067,6 +8144,15 @@ LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); } +void MacroAssembler::movdqu(XMMRegister dst, AddressLiteral src) { + if (reachable(src)) { + Assembler::movdqu(dst, as_Address(src)); + } else { + lea(rscratch1, src); + Assembler::movdqu(dst, Address(rscratch1, 0)); + } +} + void MacroAssembler::movsd(XMMRegister dst, AddressLiteral src) { if (reachable(src)) { Assembler::movsd(dst, as_Address(src)); @@ -8357,6 +8443,17 @@ } } +void MacroAssembler::pshufb(XMMRegister dst, AddressLiteral src) { + // Used in sign-bit flipping with aligned address. + assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); + if (reachable(src)) { + Assembler::pshufb(dst, as_Address(src)); + } else { + lea(rscratch1, src); + Assembler::pshufb(dst, Address(rscratch1, 0)); + } +} + // AVX 3-operands instructions void MacroAssembler::vaddsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { diff -r 556dd9e475c6 -r dc16fe422c53 src/cpu/x86/vm/assembler_x86.hpp --- a/src/cpu/x86/vm/assembler_x86.hpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/cpu/x86/vm/assembler_x86.hpp Fri Oct 26 14:09:52 2012 -0700 @@ -875,6 +875,17 @@ void addss(XMMRegister dst, Address src); void addss(XMMRegister dst, XMMRegister src); + // AES instructions + void aesdec(XMMRegister dst, Address src); + void aesdec(XMMRegister dst, XMMRegister src); + void aesdeclast(XMMRegister dst, Address src); + void aesdeclast(XMMRegister dst, XMMRegister src); + void aesenc(XMMRegister dst, Address src); + void aesenc(XMMRegister dst, XMMRegister src); + void aesenclast(XMMRegister dst, Address src); + void aesenclast(XMMRegister dst, XMMRegister src); + + void andl(Address dst, int32_t imm32); void andl(Register dst, int32_t imm32); void andl(Register dst, Address src); @@ -1424,6 +1435,10 @@ void prefetcht2(Address src); void prefetchw(Address src); + // Shuffle Bytes + void pshufb(XMMRegister dst, XMMRegister src); + void pshufb(XMMRegister dst, Address src); + // Shuffle Packed Doublewords void pshufd(XMMRegister dst, XMMRegister src, int mode); void pshufd(XMMRegister dst, Address src, int mode); @@ -2611,6 +2626,12 @@ void divss(XMMRegister dst, Address src) { Assembler::divss(dst, src); } void divss(XMMRegister dst, AddressLiteral src); + // Move Unaligned Double Quadword + void movdqu(Address dst, XMMRegister src) { Assembler::movdqu(dst, src); } + void movdqu(XMMRegister dst, Address src) { Assembler::movdqu(dst, src); } + void movdqu(XMMRegister dst, XMMRegister src) { Assembler::movdqu(dst, src); } + void movdqu(XMMRegister dst, AddressLiteral src); + void movsd(XMMRegister dst, XMMRegister src) { Assembler::movsd(dst, src); } void movsd(Address dst, XMMRegister src) { Assembler::movsd(dst, src); } void movsd(XMMRegister dst, Address src) { Assembler::movsd(dst, src); } @@ -2658,6 +2679,10 @@ void xorps(XMMRegister dst, Address src) { Assembler::xorps(dst, src); } void xorps(XMMRegister dst, AddressLiteral src); + // Shuffle Bytes + void pshufb(XMMRegister dst, XMMRegister src) { Assembler::pshufb(dst, src); } + void pshufb(XMMRegister dst, Address src) { Assembler::pshufb(dst, src); } + void pshufb(XMMRegister dst, AddressLiteral src); // AVX 3-operands instructions void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vaddsd(dst, nds, src); } diff -r 556dd9e475c6 -r dc16fe422c53 src/cpu/x86/vm/stubGenerator_x86_32.cpp --- a/src/cpu/x86/vm/stubGenerator_x86_32.cpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/cpu/x86/vm/stubGenerator_x86_32.cpp Fri Oct 26 14:09:52 2012 -0700 @@ -2137,6 +2137,529 @@ } } + // AES intrinsic stubs + enum {AESBlockSize = 16}; + + address generate_key_shuffle_mask() { + __ align(16); + StubCodeMark mark(this, "StubRoutines", "key_shuffle_mask"); + address start = __ pc(); + __ emit_data(0x00010203, relocInfo::none, 0 ); + __ emit_data(0x04050607, relocInfo::none, 0 ); + __ emit_data(0x08090a0b, relocInfo::none, 0 ); + __ emit_data(0x0c0d0e0f, relocInfo::none, 0 ); + return start; + } + + // Utility routine for loading a 128-bit key word in little endian format + // can optionally specify that the shuffle mask is already in an xmmregister + void load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) { + __ movdqu(xmmdst, Address(key, offset)); + if (xmm_shuf_mask != NULL) { + __ pshufb(xmmdst, xmm_shuf_mask); + } else { + __ pshufb(xmmdst, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); + } + } + + // aesenc using specified key+offset + // can optionally specify that the shuffle mask is already in an xmmregister + void aes_enc_key(XMMRegister xmmdst, XMMRegister xmmtmp, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) { + load_key(xmmtmp, key, offset, xmm_shuf_mask); + __ aesenc(xmmdst, xmmtmp); + } + + // aesdec using specified key+offset + // can optionally specify that the shuffle mask is already in an xmmregister + void aes_dec_key(XMMRegister xmmdst, XMMRegister xmmtmp, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) { + load_key(xmmtmp, key, offset, xmm_shuf_mask); + __ aesdec(xmmdst, xmmtmp); + } + + + // Arguments: + // + // Inputs: + // c_rarg0 - source byte array address + // c_rarg1 - destination byte array address + // c_rarg2 - K (key) in little endian int array + // + address generate_aescrypt_encryptBlock() { + assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support"); + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock"); + Label L_doLast; + address start = __ pc(); + + const Register from = rsi; // source array address + const Register to = rdx; // destination array address + const Register key = rcx; // key array address + const Register keylen = rax; + const Address from_param(rbp, 8+0); + const Address to_param (rbp, 8+4); + const Address key_param (rbp, 8+8); + + const XMMRegister xmm_result = xmm0; + const XMMRegister xmm_temp = xmm1; + const XMMRegister xmm_key_shuf_mask = xmm2; + + __ enter(); // required for proper stackwalking of RuntimeStub frame + __ push(rsi); + __ movptr(from , from_param); + __ movptr(to , to_param); + __ movptr(key , key_param); + + __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); + // keylen = # of 32-bit words, convert to 128-bit words + __ shrl(keylen, 2); + __ subl(keylen, 11); // every key has at least 11 128-bit words, some have more + + __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); + __ movdqu(xmm_result, Address(from, 0)); // get 16 bytes of input + + // For encryption, the java expanded key ordering is just what we need + + load_key(xmm_temp, key, 0x00, xmm_key_shuf_mask); + __ pxor(xmm_result, xmm_temp); + for (int offset = 0x10; offset <= 0x90; offset += 0x10) { + aes_enc_key(xmm_result, xmm_temp, key, offset, xmm_key_shuf_mask); + } + load_key (xmm_temp, key, 0xa0, xmm_key_shuf_mask); + __ cmpl(keylen, 0); + __ jcc(Assembler::equal, L_doLast); + __ aesenc(xmm_result, xmm_temp); // only in 192 and 256 bit keys + aes_enc_key(xmm_result, xmm_temp, key, 0xb0, xmm_key_shuf_mask); + load_key(xmm_temp, key, 0xc0, xmm_key_shuf_mask); + __ subl(keylen, 2); + __ jcc(Assembler::equal, L_doLast); + __ aesenc(xmm_result, xmm_temp); // only in 256 bit keys + aes_enc_key(xmm_result, xmm_temp, key, 0xd0, xmm_key_shuf_mask); + load_key(xmm_temp, key, 0xe0, xmm_key_shuf_mask); + + __ BIND(L_doLast); + __ aesenclast(xmm_result, xmm_temp); + __ movdqu(Address(to, 0), xmm_result); // store the result + __ xorptr(rax, rax); // return 0 + __ pop(rsi); + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret(0); + + return start; + } + + + // Arguments: + // + // Inputs: + // c_rarg0 - source byte array address + // c_rarg1 - destination byte array address + // c_rarg2 - K (key) in little endian int array + // + address generate_aescrypt_decryptBlock() { + assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support"); + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock"); + Label L_doLast; + address start = __ pc(); + + const Register from = rsi; // source array address + const Register to = rdx; // destination array address + const Register key = rcx; // key array address + const Register keylen = rax; + const Address from_param(rbp, 8+0); + const Address to_param (rbp, 8+4); + const Address key_param (rbp, 8+8); + + const XMMRegister xmm_result = xmm0; + const XMMRegister xmm_temp = xmm1; + const XMMRegister xmm_key_shuf_mask = xmm2; + + __ enter(); // required for proper stackwalking of RuntimeStub frame + __ push(rsi); + __ movptr(from , from_param); + __ movptr(to , to_param); + __ movptr(key , key_param); + + __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); + // keylen = # of 32-bit words, convert to 128-bit words + __ shrl(keylen, 2); + __ subl(keylen, 11); // every key has at least 11 128-bit words, some have more + + __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); + __ movdqu(xmm_result, Address(from, 0)); + + // for decryption java expanded key ordering is rotated one position from what we want + // so we start from 0x10 here and hit 0x00 last + // we don't know if the key is aligned, hence not using load-execute form + load_key(xmm_temp, key, 0x10, xmm_key_shuf_mask); + __ pxor (xmm_result, xmm_temp); + for (int offset = 0x20; offset <= 0xa0; offset += 0x10) { + aes_dec_key(xmm_result, xmm_temp, key, offset, xmm_key_shuf_mask); + } + __ cmpl(keylen, 0); + __ jcc(Assembler::equal, L_doLast); + // only in 192 and 256 bit keys + aes_dec_key(xmm_result, xmm_temp, key, 0xb0, xmm_key_shuf_mask); + aes_dec_key(xmm_result, xmm_temp, key, 0xc0, xmm_key_shuf_mask); + __ subl(keylen, 2); + __ jcc(Assembler::equal, L_doLast); + // only in 256 bit keys + aes_dec_key(xmm_result, xmm_temp, key, 0xd0, xmm_key_shuf_mask); + aes_dec_key(xmm_result, xmm_temp, key, 0xe0, xmm_key_shuf_mask); + + __ BIND(L_doLast); + // for decryption the aesdeclast operation is always on key+0x00 + load_key(xmm_temp, key, 0x00, xmm_key_shuf_mask); + __ aesdeclast(xmm_result, xmm_temp); + + __ movdqu(Address(to, 0), xmm_result); // store the result + + __ xorptr(rax, rax); // return 0 + __ pop(rsi); + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret(0); + + return start; + } + + void handleSOERegisters(bool saving) { + const int saveFrameSizeInBytes = 4 * wordSize; + const Address saved_rbx (rbp, -3 * wordSize); + const Address saved_rsi (rbp, -2 * wordSize); + const Address saved_rdi (rbp, -1 * wordSize); + + if (saving) { + __ subptr(rsp, saveFrameSizeInBytes); + __ movptr(saved_rsi, rsi); + __ movptr(saved_rdi, rdi); + __ movptr(saved_rbx, rbx); + } else { + // restoring + __ movptr(rsi, saved_rsi); + __ movptr(rdi, saved_rdi); + __ movptr(rbx, saved_rbx); + } + } + + // Arguments: + // + // Inputs: + // c_rarg0 - source byte array address + // c_rarg1 - destination byte array address + // c_rarg2 - K (key) in little endian int array + // c_rarg3 - r vector byte array address + // c_rarg4 - input length + // + address generate_cipherBlockChaining_encryptAESCrypt() { + assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support"); + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt"); + address start = __ pc(); + + Label L_exit, L_key_192_256, L_key_256, L_loopTop_128, L_loopTop_192, L_loopTop_256; + const Register from = rsi; // source array address + const Register to = rdx; // destination array address + const Register key = rcx; // key array address + const Register rvec = rdi; // r byte array initialized from initvector array address + // and left with the results of the last encryption block + const Register len_reg = rbx; // src len (must be multiple of blocksize 16) + const Register pos = rax; + + // xmm register assignments for the loops below + const XMMRegister xmm_result = xmm0; + const XMMRegister xmm_temp = xmm1; + // first 6 keys preloaded into xmm2-xmm7 + const int XMM_REG_NUM_KEY_FIRST = 2; + const int XMM_REG_NUM_KEY_LAST = 7; + const XMMRegister xmm_key0 = as_XMMRegister(XMM_REG_NUM_KEY_FIRST); + + __ enter(); // required for proper stackwalking of RuntimeStub frame + handleSOERegisters(true /*saving*/); + + // load registers from incoming parameters + const Address from_param(rbp, 8+0); + const Address to_param (rbp, 8+4); + const Address key_param (rbp, 8+8); + const Address rvec_param (rbp, 8+12); + const Address len_param (rbp, 8+16); + __ movptr(from , from_param); + __ movptr(to , to_param); + __ movptr(key , key_param); + __ movptr(rvec , rvec_param); + __ movptr(len_reg , len_param); + + const XMMRegister xmm_key_shuf_mask = xmm_temp; // used temporarily to swap key bytes up front + __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); + // load up xmm regs 2 thru 7 with keys 0-5 + for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x00; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { + load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask); + offset += 0x10; + } + + __ movdqu(xmm_result, Address(rvec, 0x00)); // initialize xmm_result with r vec + + // now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256)) + __ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); + __ cmpl(rax, 44); + __ jcc(Assembler::notEqual, L_key_192_256); + + // 128 bit code follows here + __ movptr(pos, 0); + __ align(OptoLoopAlignment); + __ BIND(L_loopTop_128); + __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input + __ pxor (xmm_result, xmm_temp); // xor with the current r vector + + __ pxor (xmm_result, xmm_key0); // do the aes rounds + for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { + __ aesenc(xmm_result, as_XMMRegister(rnum)); + } + for (int key_offset = 0x60; key_offset <= 0x90; key_offset += 0x10) { + aes_enc_key(xmm_result, xmm_temp, key, key_offset); + } + load_key(xmm_temp, key, 0xa0); + __ aesenclast(xmm_result, xmm_temp); + + __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output + // no need to store r to memory until we exit + __ addptr(pos, AESBlockSize); + __ subptr(len_reg, AESBlockSize); + __ jcc(Assembler::notEqual, L_loopTop_128); + + __ BIND(L_exit); + __ movdqu(Address(rvec, 0), xmm_result); // final value of r stored in rvec of CipherBlockChaining object + + handleSOERegisters(false /*restoring*/); + __ movl(rax, 0); // return 0 (why?) + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret(0); + + __ BIND(L_key_192_256); + // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256) + __ cmpl(rax, 52); + __ jcc(Assembler::notEqual, L_key_256); + + // 192-bit code follows here (could be changed to use more xmm registers) + __ movptr(pos, 0); + __ align(OptoLoopAlignment); + __ BIND(L_loopTop_192); + __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input + __ pxor (xmm_result, xmm_temp); // xor with the current r vector + + __ pxor (xmm_result, xmm_key0); // do the aes rounds + for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { + __ aesenc(xmm_result, as_XMMRegister(rnum)); + } + for (int key_offset = 0x60; key_offset <= 0xb0; key_offset += 0x10) { + aes_enc_key(xmm_result, xmm_temp, key, key_offset); + } + load_key(xmm_temp, key, 0xc0); + __ aesenclast(xmm_result, xmm_temp); + + __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output + // no need to store r to memory until we exit + __ addptr(pos, AESBlockSize); + __ subptr(len_reg, AESBlockSize); + __ jcc(Assembler::notEqual, L_loopTop_192); + __ jmp(L_exit); + + __ BIND(L_key_256); + // 256-bit code follows here (could be changed to use more xmm registers) + __ movptr(pos, 0); + __ align(OptoLoopAlignment); + __ BIND(L_loopTop_256); + __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input + __ pxor (xmm_result, xmm_temp); // xor with the current r vector + + __ pxor (xmm_result, xmm_key0); // do the aes rounds + for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { + __ aesenc(xmm_result, as_XMMRegister(rnum)); + } + for (int key_offset = 0x60; key_offset <= 0xd0; key_offset += 0x10) { + aes_enc_key(xmm_result, xmm_temp, key, key_offset); + } + load_key(xmm_temp, key, 0xe0); + __ aesenclast(xmm_result, xmm_temp); + + __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output + // no need to store r to memory until we exit + __ addptr(pos, AESBlockSize); + __ subptr(len_reg, AESBlockSize); + __ jcc(Assembler::notEqual, L_loopTop_256); + __ jmp(L_exit); + + return start; + } + + + // CBC AES Decryption. + // In 32-bit stub, because of lack of registers we do not try to parallelize 4 blocks at a time. + // + // Arguments: + // + // Inputs: + // c_rarg0 - source byte array address + // c_rarg1 - destination byte array address + // c_rarg2 - K (key) in little endian int array + // c_rarg3 - r vector byte array address + // c_rarg4 - input length + // + + address generate_cipherBlockChaining_decryptAESCrypt() { + assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support"); + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt"); + address start = __ pc(); + + Label L_exit, L_key_192_256, L_key_256; + Label L_singleBlock_loopTop_128; + Label L_singleBlock_loopTop_192, L_singleBlock_loopTop_256; + const Register from = rsi; // source array address + const Register to = rdx; // destination array address + const Register key = rcx; // key array address + const Register rvec = rdi; // r byte array initialized from initvector array address + // and left with the results of the last encryption block + const Register len_reg = rbx; // src len (must be multiple of blocksize 16) + const Register pos = rax; + + // xmm register assignments for the loops below + const XMMRegister xmm_result = xmm0; + const XMMRegister xmm_temp = xmm1; + // first 6 keys preloaded into xmm2-xmm7 + const int XMM_REG_NUM_KEY_FIRST = 2; + const int XMM_REG_NUM_KEY_LAST = 7; + const int FIRST_NON_REG_KEY_offset = 0x70; + const XMMRegister xmm_key_first = as_XMMRegister(XMM_REG_NUM_KEY_FIRST); + + __ enter(); // required for proper stackwalking of RuntimeStub frame + handleSOERegisters(true /*saving*/); + + // load registers from incoming parameters + const Address from_param(rbp, 8+0); + const Address to_param (rbp, 8+4); + const Address key_param (rbp, 8+8); + const Address rvec_param (rbp, 8+12); + const Address len_param (rbp, 8+16); + __ movptr(from , from_param); + __ movptr(to , to_param); + __ movptr(key , key_param); + __ movptr(rvec , rvec_param); + __ movptr(len_reg , len_param); + + // the java expanded key ordering is rotated one position from what we want + // so we start from 0x10 here and hit 0x00 last + const XMMRegister xmm_key_shuf_mask = xmm1; // used temporarily to swap key bytes up front + __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); + // load up xmm regs 2 thru 6 with first 5 keys + for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x10; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { + load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask); + offset += 0x10; + } + + // inside here, use the rvec register to point to previous block cipher + // with which we xor at the end of each newly decrypted block + const Register prev_block_cipher_ptr = rvec; + + // now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256)) + __ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); + __ cmpl(rax, 44); + __ jcc(Assembler::notEqual, L_key_192_256); + + + // 128-bit code follows here, parallelized + __ movptr(pos, 0); + __ align(OptoLoopAlignment); + __ BIND(L_singleBlock_loopTop_128); + __ cmpptr(len_reg, 0); // any blocks left?? + __ jcc(Assembler::equal, L_exit); + __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input + __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds + for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { + __ aesdec(xmm_result, as_XMMRegister(rnum)); + } + for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xa0; key_offset += 0x10) { // 128-bit runs up to key offset a0 + aes_dec_key(xmm_result, xmm_temp, key, key_offset); + } + load_key(xmm_temp, key, 0x00); // final key is stored in java expanded array at offset 0 + __ aesdeclast(xmm_result, xmm_temp); + __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00)); + __ pxor (xmm_result, xmm_temp); // xor with the current r vector + __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output + // no need to store r to memory until we exit + __ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0)); // set up new ptr + __ addptr(pos, AESBlockSize); + __ subptr(len_reg, AESBlockSize); + __ jmp(L_singleBlock_loopTop_128); + + + __ BIND(L_exit); + __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00)); + __ movptr(rvec , rvec_param); // restore this since used in loop + __ movdqu(Address(rvec, 0), xmm_temp); // final value of r stored in rvec of CipherBlockChaining object + handleSOERegisters(false /*restoring*/); + __ movl(rax, 0); // return 0 (why?) + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret(0); + + + __ BIND(L_key_192_256); + // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256) + __ cmpl(rax, 52); + __ jcc(Assembler::notEqual, L_key_256); + + // 192-bit code follows here (could be optimized to use parallelism) + __ movptr(pos, 0); + __ align(OptoLoopAlignment); + __ BIND(L_singleBlock_loopTop_192); + __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input + __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds + for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { + __ aesdec(xmm_result, as_XMMRegister(rnum)); + } + for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xc0; key_offset += 0x10) { // 192-bit runs up to key offset c0 + aes_dec_key(xmm_result, xmm_temp, key, key_offset); + } + load_key(xmm_temp, key, 0x00); // final key is stored in java expanded array at offset 0 + __ aesdeclast(xmm_result, xmm_temp); + __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00)); + __ pxor (xmm_result, xmm_temp); // xor with the current r vector + __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output + // no need to store r to memory until we exit + __ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0)); // set up new ptr + __ addptr(pos, AESBlockSize); + __ subptr(len_reg, AESBlockSize); + __ jcc(Assembler::notEqual,L_singleBlock_loopTop_192); + __ jmp(L_exit); + + __ BIND(L_key_256); + // 256-bit code follows here (could be optimized to use parallelism) + __ movptr(pos, 0); + __ align(OptoLoopAlignment); + __ BIND(L_singleBlock_loopTop_256); + __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input + __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds + for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { + __ aesdec(xmm_result, as_XMMRegister(rnum)); + } + for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xe0; key_offset += 0x10) { // 256-bit runs up to key offset e0 + aes_dec_key(xmm_result, xmm_temp, key, key_offset); + } + load_key(xmm_temp, key, 0x00); // final key is stored in java expanded array at offset 0 + __ aesdeclast(xmm_result, xmm_temp); + __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00)); + __ pxor (xmm_result, xmm_temp); // xor with the current r vector + __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output + // no need to store r to memory until we exit + __ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0)); // set up new ptr + __ addptr(pos, AESBlockSize); + __ subptr(len_reg, AESBlockSize); + __ jcc(Assembler::notEqual,L_singleBlock_loopTop_256); + __ jmp(L_exit); + + return start; + } + + public: // Information about frame layout at time of blocking runtime call. // Note that we only have to preserve callee-saved registers since @@ -2332,6 +2855,16 @@ generate_arraycopy_stubs(); generate_math_stubs(); + + // don't bother generating these AES intrinsic stubs unless global flag is set + if (UseAESIntrinsics) { + StubRoutines::x86::_key_shuffle_mask_addr = generate_key_shuffle_mask(); // might be needed by the others + + StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock(); + StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock(); + StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt(); + StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt(); + } } diff -r 556dd9e475c6 -r dc16fe422c53 src/cpu/x86/vm/stubGenerator_x86_64.cpp --- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp Fri Oct 26 14:09:52 2012 -0700 @@ -2941,6 +2941,548 @@ } } + // AES intrinsic stubs + enum {AESBlockSize = 16}; + + address generate_key_shuffle_mask() { + __ align(16); + StubCodeMark mark(this, "StubRoutines", "key_shuffle_mask"); + address start = __ pc(); + __ emit_data64( 0x0405060700010203, relocInfo::none ); + __ emit_data64( 0x0c0d0e0f08090a0b, relocInfo::none ); + return start; + } + + // Utility routine for loading a 128-bit key word in little endian format + // can optionally specify that the shuffle mask is already in an xmmregister + void load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) { + __ movdqu(xmmdst, Address(key, offset)); + if (xmm_shuf_mask != NULL) { + __ pshufb(xmmdst, xmm_shuf_mask); + } else { + __ pshufb(xmmdst, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); + } + } + + // aesenc using specified key+offset + // can optionally specify that the shuffle mask is already in an xmmregister + void aes_enc_key(XMMRegister xmmdst, XMMRegister xmmtmp, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) { + load_key(xmmtmp, key, offset, xmm_shuf_mask); + __ aesenc(xmmdst, xmmtmp); + } + + // aesdec using specified key+offset + // can optionally specify that the shuffle mask is already in an xmmregister + void aes_dec_key(XMMRegister xmmdst, XMMRegister xmmtmp, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) { + load_key(xmmtmp, key, offset, xmm_shuf_mask); + __ aesdec(xmmdst, xmmtmp); + } + + + // Arguments: + // + // Inputs: + // c_rarg0 - source byte array address + // c_rarg1 - destination byte array address + // c_rarg2 - K (key) in little endian int array + // + address generate_aescrypt_encryptBlock() { + assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support"); + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock"); + Label L_doLast; + address start = __ pc(); + + const Register from = c_rarg0; // source array address + const Register to = c_rarg1; // destination array address + const Register key = c_rarg2; // key array address + const Register keylen = rax; + + const XMMRegister xmm_result = xmm0; + const XMMRegister xmm_temp = xmm1; + const XMMRegister xmm_key_shuf_mask = xmm2; + + __ enter(); // required for proper stackwalking of RuntimeStub frame + + __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); + // keylen = # of 32-bit words, convert to 128-bit words + __ shrl(keylen, 2); + __ subl(keylen, 11); // every key has at least 11 128-bit words, some have more + + __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); + __ movdqu(xmm_result, Address(from, 0)); // get 16 bytes of input + + // For encryption, the java expanded key ordering is just what we need + // we don't know if the key is aligned, hence not using load-execute form + + load_key(xmm_temp, key, 0x00, xmm_key_shuf_mask); + __ pxor(xmm_result, xmm_temp); + for (int offset = 0x10; offset <= 0x90; offset += 0x10) { + aes_enc_key(xmm_result, xmm_temp, key, offset, xmm_key_shuf_mask); + } + load_key (xmm_temp, key, 0xa0, xmm_key_shuf_mask); + __ cmpl(keylen, 0); + __ jcc(Assembler::equal, L_doLast); + __ aesenc(xmm_result, xmm_temp); // only in 192 and 256 bit keys + aes_enc_key(xmm_result, xmm_temp, key, 0xb0, xmm_key_shuf_mask); + load_key(xmm_temp, key, 0xc0, xmm_key_shuf_mask); + __ subl(keylen, 2); + __ jcc(Assembler::equal, L_doLast); + __ aesenc(xmm_result, xmm_temp); // only in 256 bit keys + aes_enc_key(xmm_result, xmm_temp, key, 0xd0, xmm_key_shuf_mask); + load_key(xmm_temp, key, 0xe0, xmm_key_shuf_mask); + + __ BIND(L_doLast); + __ aesenclast(xmm_result, xmm_temp); + __ movdqu(Address(to, 0), xmm_result); // store the result + __ xorptr(rax, rax); // return 0 + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret(0); + + return start; + } + + + // Arguments: + // + // Inputs: + // c_rarg0 - source byte array address + // c_rarg1 - destination byte array address + // c_rarg2 - K (key) in little endian int array + // + address generate_aescrypt_decryptBlock() { + assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support"); + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock"); + Label L_doLast; + address start = __ pc(); + + const Register from = c_rarg0; // source array address + const Register to = c_rarg1; // destination array address + const Register key = c_rarg2; // key array address + const Register keylen = rax; + + const XMMRegister xmm_result = xmm0; + const XMMRegister xmm_temp = xmm1; + const XMMRegister xmm_key_shuf_mask = xmm2; + + __ enter(); // required for proper stackwalking of RuntimeStub frame + + __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); + // keylen = # of 32-bit words, convert to 128-bit words + __ shrl(keylen, 2); + __ subl(keylen, 11); // every key has at least 11 128-bit words, some have more + + __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); + __ movdqu(xmm_result, Address(from, 0)); + + // for decryption java expanded key ordering is rotated one position from what we want + // so we start from 0x10 here and hit 0x00 last + // we don't know if the key is aligned, hence not using load-execute form + load_key(xmm_temp, key, 0x10, xmm_key_shuf_mask); + __ pxor (xmm_result, xmm_temp); + for (int offset = 0x20; offset <= 0xa0; offset += 0x10) { + aes_dec_key(xmm_result, xmm_temp, key, offset, xmm_key_shuf_mask); + } + __ cmpl(keylen, 0); + __ jcc(Assembler::equal, L_doLast); + // only in 192 and 256 bit keys + aes_dec_key(xmm_result, xmm_temp, key, 0xb0, xmm_key_shuf_mask); + aes_dec_key(xmm_result, xmm_temp, key, 0xc0, xmm_key_shuf_mask); + __ subl(keylen, 2); + __ jcc(Assembler::equal, L_doLast); + // only in 256 bit keys + aes_dec_key(xmm_result, xmm_temp, key, 0xd0, xmm_key_shuf_mask); + aes_dec_key(xmm_result, xmm_temp, key, 0xe0, xmm_key_shuf_mask); + + __ BIND(L_doLast); + // for decryption the aesdeclast operation is always on key+0x00 + load_key(xmm_temp, key, 0x00, xmm_key_shuf_mask); + __ aesdeclast(xmm_result, xmm_temp); + + __ movdqu(Address(to, 0), xmm_result); // store the result + + __ xorptr(rax, rax); // return 0 + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret(0); + + return start; + } + + + // Arguments: + // + // Inputs: + // c_rarg0 - source byte array address + // c_rarg1 - destination byte array address + // c_rarg2 - K (key) in little endian int array + // c_rarg3 - r vector byte array address + // c_rarg4 - input length + // + address generate_cipherBlockChaining_encryptAESCrypt() { + assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support"); + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt"); + address start = __ pc(); + + Label L_exit, L_key_192_256, L_key_256, L_loopTop_128, L_loopTop_192, L_loopTop_256; + const Register from = c_rarg0; // source array address + const Register to = c_rarg1; // destination array address + const Register key = c_rarg2; // key array address + const Register rvec = c_rarg3; // r byte array initialized from initvector array address + // and left with the results of the last encryption block +#ifndef _WIN64 + const Register len_reg = c_rarg4; // src len (must be multiple of blocksize 16) +#else + const Address len_mem(rsp, 6 * wordSize); // length is on stack on Win64 + const Register len_reg = r10; // pick the first volatile windows register +#endif + const Register pos = rax; + + // xmm register assignments for the loops below + const XMMRegister xmm_result = xmm0; + const XMMRegister xmm_temp = xmm1; + // keys 0-10 preloaded into xmm2-xmm12 + const int XMM_REG_NUM_KEY_FIRST = 2; + const int XMM_REG_NUM_KEY_LAST = 12; + const XMMRegister xmm_key0 = as_XMMRegister(XMM_REG_NUM_KEY_FIRST); + const XMMRegister xmm_key10 = as_XMMRegister(XMM_REG_NUM_KEY_LAST); + + __ enter(); // required for proper stackwalking of RuntimeStub frame + +#ifdef _WIN64 + // on win64, fill len_reg from stack position + __ movl(len_reg, len_mem); + // save the xmm registers which must be preserved 6-12 + __ subptr(rsp, -rsp_after_call_off * wordSize); + for (int i = 6; i <= XMM_REG_NUM_KEY_LAST; i++) { + __ movdqu(xmm_save(i), as_XMMRegister(i)); + } +#endif + + const XMMRegister xmm_key_shuf_mask = xmm_temp; // used temporarily to swap key bytes up front + __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); + // load up xmm regs 2 thru 12 with key 0x00 - 0xa0 + for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x00; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { + load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask); + offset += 0x10; + } + + __ movdqu(xmm_result, Address(rvec, 0x00)); // initialize xmm_result with r vec + + // now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256)) + __ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); + __ cmpl(rax, 44); + __ jcc(Assembler::notEqual, L_key_192_256); + + // 128 bit code follows here + __ movptr(pos, 0); + __ align(OptoLoopAlignment); + __ BIND(L_loopTop_128); + __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input + __ pxor (xmm_result, xmm_temp); // xor with the current r vector + + __ pxor (xmm_result, xmm_key0); // do the aes rounds + for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST - 1; rnum++) { + __ aesenc(xmm_result, as_XMMRegister(rnum)); + } + __ aesenclast(xmm_result, xmm_key10); + + __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output + // no need to store r to memory until we exit + __ addptr(pos, AESBlockSize); + __ subptr(len_reg, AESBlockSize); + __ jcc(Assembler::notEqual, L_loopTop_128); + + __ BIND(L_exit); + __ movdqu(Address(rvec, 0), xmm_result); // final value of r stored in rvec of CipherBlockChaining object + +#ifdef _WIN64 + // restore xmm regs belonging to calling function + for (int i = 6; i <= XMM_REG_NUM_KEY_LAST; i++) { + __ movdqu(as_XMMRegister(i), xmm_save(i)); + } +#endif + __ movl(rax, 0); // return 0 (why?) + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret(0); + + __ BIND(L_key_192_256); + // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256) + __ cmpl(rax, 52); + __ jcc(Assembler::notEqual, L_key_256); + + // 192-bit code follows here (could be changed to use more xmm registers) + __ movptr(pos, 0); + __ align(OptoLoopAlignment); + __ BIND(L_loopTop_192); + __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input + __ pxor (xmm_result, xmm_temp); // xor with the current r vector + + __ pxor (xmm_result, xmm_key0); // do the aes rounds + for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { + __ aesenc(xmm_result, as_XMMRegister(rnum)); + } + aes_enc_key(xmm_result, xmm_temp, key, 0xb0); + load_key(xmm_temp, key, 0xc0); + __ aesenclast(xmm_result, xmm_temp); + + __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output + // no need to store r to memory until we exit + __ addptr(pos, AESBlockSize); + __ subptr(len_reg, AESBlockSize); + __ jcc(Assembler::notEqual, L_loopTop_192); + __ jmp(L_exit); + + __ BIND(L_key_256); + // 256-bit code follows here (could be changed to use more xmm registers) + __ movptr(pos, 0); + __ align(OptoLoopAlignment); + __ BIND(L_loopTop_256); + __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input + __ pxor (xmm_result, xmm_temp); // xor with the current r vector + + __ pxor (xmm_result, xmm_key0); // do the aes rounds + for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { + __ aesenc(xmm_result, as_XMMRegister(rnum)); + } + aes_enc_key(xmm_result, xmm_temp, key, 0xb0); + aes_enc_key(xmm_result, xmm_temp, key, 0xc0); + aes_enc_key(xmm_result, xmm_temp, key, 0xd0); + load_key(xmm_temp, key, 0xe0); + __ aesenclast(xmm_result, xmm_temp); + + __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output + // no need to store r to memory until we exit + __ addptr(pos, AESBlockSize); + __ subptr(len_reg, AESBlockSize); + __ jcc(Assembler::notEqual, L_loopTop_256); + __ jmp(L_exit); + + return start; + } + + + + // This is a version of CBC/AES Decrypt which does 4 blocks in a loop at a time + // to hide instruction latency + // + // Arguments: + // + // Inputs: + // c_rarg0 - source byte array address + // c_rarg1 - destination byte array address + // c_rarg2 - K (key) in little endian int array + // c_rarg3 - r vector byte array address + // c_rarg4 - input length + // + + address generate_cipherBlockChaining_decryptAESCrypt_Parallel() { + assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support"); + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt"); + address start = __ pc(); + + Label L_exit, L_key_192_256, L_key_256; + Label L_singleBlock_loopTop_128, L_multiBlock_loopTop_128; + Label L_singleBlock_loopTop_192, L_singleBlock_loopTop_256; + const Register from = c_rarg0; // source array address + const Register to = c_rarg1; // destination array address + const Register key = c_rarg2; // key array address + const Register rvec = c_rarg3; // r byte array initialized from initvector array address + // and left with the results of the last encryption block +#ifndef _WIN64 + const Register len_reg = c_rarg4; // src len (must be multiple of blocksize 16) +#else + const Address len_mem(rsp, 6 * wordSize); // length is on stack on Win64 + const Register len_reg = r10; // pick the first volatile windows register +#endif + const Register pos = rax; + + // xmm register assignments for the loops below + const XMMRegister xmm_result = xmm0; + // keys 0-10 preloaded into xmm2-xmm12 + const int XMM_REG_NUM_KEY_FIRST = 5; + const int XMM_REG_NUM_KEY_LAST = 15; + const XMMRegister xmm_key_first = as_XMMRegister(XMM_REG_NUM_KEY_FIRST); + const XMMRegister xmm_key_last = as_XMMRegister(XMM_REG_NUM_KEY_LAST); + + __ enter(); // required for proper stackwalking of RuntimeStub frame + +#ifdef _WIN64 + // on win64, fill len_reg from stack position + __ movl(len_reg, len_mem); + // save the xmm registers which must be preserved 6-15 + __ subptr(rsp, -rsp_after_call_off * wordSize); + for (int i = 6; i <= XMM_REG_NUM_KEY_LAST; i++) { + __ movdqu(xmm_save(i), as_XMMRegister(i)); + } +#endif + // the java expanded key ordering is rotated one position from what we want + // so we start from 0x10 here and hit 0x00 last + const XMMRegister xmm_key_shuf_mask = xmm1; // used temporarily to swap key bytes up front + __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); + // load up xmm regs 5 thru 15 with key 0x10 - 0xa0 - 0x00 + for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x10; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { + if (rnum == XMM_REG_NUM_KEY_LAST) offset = 0x00; + load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask); + offset += 0x10; + } + + const XMMRegister xmm_prev_block_cipher = xmm1; // holds cipher of previous block + // registers holding the four results in the parallelized loop + const XMMRegister xmm_result0 = xmm0; + const XMMRegister xmm_result1 = xmm2; + const XMMRegister xmm_result2 = xmm3; + const XMMRegister xmm_result3 = xmm4; + + __ movdqu(xmm_prev_block_cipher, Address(rvec, 0x00)); // initialize with initial rvec + + // now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256)) + __ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); + __ cmpl(rax, 44); + __ jcc(Assembler::notEqual, L_key_192_256); + + + // 128-bit code follows here, parallelized + __ movptr(pos, 0); + __ align(OptoLoopAlignment); + __ BIND(L_multiBlock_loopTop_128); + __ cmpptr(len_reg, 4*AESBlockSize); // see if at least 4 blocks left + __ jcc(Assembler::less, L_singleBlock_loopTop_128); + + __ movdqu(xmm_result0, Address(from, pos, Address::times_1, 0*AESBlockSize)); // get next 4 blocks into xmmresult registers + __ movdqu(xmm_result1, Address(from, pos, Address::times_1, 1*AESBlockSize)); + __ movdqu(xmm_result2, Address(from, pos, Address::times_1, 2*AESBlockSize)); + __ movdqu(xmm_result3, Address(from, pos, Address::times_1, 3*AESBlockSize)); + +#define DoFour(opc, src_reg) \ + __ opc(xmm_result0, src_reg); \ + __ opc(xmm_result1, src_reg); \ + __ opc(xmm_result2, src_reg); \ + __ opc(xmm_result3, src_reg); + + DoFour(pxor, xmm_key_first); + for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST - 1; rnum++) { + DoFour(aesdec, as_XMMRegister(rnum)); + } + DoFour(aesdeclast, xmm_key_last); + // for each result, xor with the r vector of previous cipher block + __ pxor(xmm_result0, xmm_prev_block_cipher); + __ movdqu(xmm_prev_block_cipher, Address(from, pos, Address::times_1, 0*AESBlockSize)); + __ pxor(xmm_result1, xmm_prev_block_cipher); + __ movdqu(xmm_prev_block_cipher, Address(from, pos, Address::times_1, 1*AESBlockSize)); + __ pxor(xmm_result2, xmm_prev_block_cipher); + __ movdqu(xmm_prev_block_cipher, Address(from, pos, Address::times_1, 2*AESBlockSize)); + __ pxor(xmm_result3, xmm_prev_block_cipher); + __ movdqu(xmm_prev_block_cipher, Address(from, pos, Address::times_1, 3*AESBlockSize)); // this will carry over to next set of blocks + + __ movdqu(Address(to, pos, Address::times_1, 0*AESBlockSize), xmm_result0); // store 4 results into the next 64 bytes of output + __ movdqu(Address(to, pos, Address::times_1, 1*AESBlockSize), xmm_result1); + __ movdqu(Address(to, pos, Address::times_1, 2*AESBlockSize), xmm_result2); + __ movdqu(Address(to, pos, Address::times_1, 3*AESBlockSize), xmm_result3); + + __ addptr(pos, 4*AESBlockSize); + __ subptr(len_reg, 4*AESBlockSize); + __ jmp(L_multiBlock_loopTop_128); + + // registers used in the non-parallelized loops + const XMMRegister xmm_prev_block_cipher_save = xmm2; + const XMMRegister xmm_temp = xmm3; + + __ align(OptoLoopAlignment); + __ BIND(L_singleBlock_loopTop_128); + __ cmpptr(len_reg, 0); // any blocks left?? + __ jcc(Assembler::equal, L_exit); + __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input + __ movdqa(xmm_prev_block_cipher_save, xmm_result); // save for next r vector + __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds + for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST - 1; rnum++) { + __ aesdec(xmm_result, as_XMMRegister(rnum)); + } + __ aesdeclast(xmm_result, xmm_key_last); + __ pxor (xmm_result, xmm_prev_block_cipher); // xor with the current r vector + __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output + // no need to store r to memory until we exit + __ movdqa(xmm_prev_block_cipher, xmm_prev_block_cipher_save); // set up next r vector with cipher input from this block + + __ addptr(pos, AESBlockSize); + __ subptr(len_reg, AESBlockSize); + __ jmp(L_singleBlock_loopTop_128); + + + __ BIND(L_exit); + __ movdqu(Address(rvec, 0), xmm_prev_block_cipher); // final value of r stored in rvec of CipherBlockChaining object +#ifdef _WIN64 + // restore regs belonging to calling function + for (int i = 6; i <= XMM_REG_NUM_KEY_LAST; i++) { + __ movdqu(as_XMMRegister(i), xmm_save(i)); + } +#endif + __ movl(rax, 0); // return 0 (why?) + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret(0); + + + __ BIND(L_key_192_256); + // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256) + __ cmpl(rax, 52); + __ jcc(Assembler::notEqual, L_key_256); + + // 192-bit code follows here (could be optimized to use parallelism) + __ movptr(pos, 0); + __ align(OptoLoopAlignment); + __ BIND(L_singleBlock_loopTop_192); + __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input + __ movdqa(xmm_prev_block_cipher_save, xmm_result); // save for next r vector + __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds + for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST - 1; rnum++) { + __ aesdec(xmm_result, as_XMMRegister(rnum)); + } + aes_dec_key(xmm_result, xmm_temp, key, 0xb0); // 192-bit key goes up to c0 + aes_dec_key(xmm_result, xmm_temp, key, 0xc0); + __ aesdeclast(xmm_result, xmm_key_last); // xmm15 always came from key+0 + __ pxor (xmm_result, xmm_prev_block_cipher); // xor with the current r vector + __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output + // no need to store r to memory until we exit + __ movdqa(xmm_prev_block_cipher, xmm_prev_block_cipher_save); // set up next r vector with cipher input from this block + + __ addptr(pos, AESBlockSize); + __ subptr(len_reg, AESBlockSize); + __ jcc(Assembler::notEqual,L_singleBlock_loopTop_192); + __ jmp(L_exit); + + __ BIND(L_key_256); + // 256-bit code follows here (could be optimized to use parallelism) + __ movptr(pos, 0); + __ align(OptoLoopAlignment); + __ BIND(L_singleBlock_loopTop_256); + __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input + __ movdqa(xmm_prev_block_cipher_save, xmm_result); // save for next r vector + __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds + for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST - 1; rnum++) { + __ aesdec(xmm_result, as_XMMRegister(rnum)); + } + aes_dec_key(xmm_result, xmm_temp, key, 0xb0); // 256-bit key goes up to e0 + aes_dec_key(xmm_result, xmm_temp, key, 0xc0); + aes_dec_key(xmm_result, xmm_temp, key, 0xd0); + aes_dec_key(xmm_result, xmm_temp, key, 0xe0); + __ aesdeclast(xmm_result, xmm_key_last); // xmm15 came from key+0 + __ pxor (xmm_result, xmm_prev_block_cipher); // xor with the current r vector + __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output + // no need to store r to memory until we exit + __ movdqa(xmm_prev_block_cipher, xmm_prev_block_cipher_save); // set up next r vector with cipher input from this block + + __ addptr(pos, AESBlockSize); + __ subptr(len_reg, AESBlockSize); + __ jcc(Assembler::notEqual,L_singleBlock_loopTop_256); + __ jmp(L_exit); + + return start; + } + + + #undef __ #define __ masm-> @@ -3135,6 +3677,16 @@ generate_arraycopy_stubs(); generate_math_stubs(); + + // don't bother generating these AES intrinsic stubs unless global flag is set + if (UseAESIntrinsics) { + StubRoutines::x86::_key_shuffle_mask_addr = generate_key_shuffle_mask(); // needed by the others + + StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock(); + StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock(); + StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt(); + StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel(); + } } public: diff -r 556dd9e475c6 -r dc16fe422c53 src/cpu/x86/vm/stubRoutines_x86_32.cpp --- a/src/cpu/x86/vm/stubRoutines_x86_32.cpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/cpu/x86/vm/stubRoutines_x86_32.cpp Fri Oct 26 14:09:52 2012 -0700 @@ -44,3 +44,4 @@ address StubRoutines::x86::_verify_mxcsr_entry = NULL; address StubRoutines::x86::_verify_fpu_cntrl_wrd_entry = NULL; +address StubRoutines::x86::_key_shuffle_mask_addr = NULL; diff -r 556dd9e475c6 -r dc16fe422c53 src/cpu/x86/vm/stubRoutines_x86_32.hpp --- a/src/cpu/x86/vm/stubRoutines_x86_32.hpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/cpu/x86/vm/stubRoutines_x86_32.hpp Fri Oct 26 14:09:52 2012 -0700 @@ -41,10 +41,14 @@ private: static address _verify_mxcsr_entry; static address _verify_fpu_cntrl_wrd_entry; + // shuffle mask for fixing up 128-bit words consisting of big-endian 32-bit integers + static address _key_shuffle_mask_addr; public: static address verify_mxcsr_entry() { return _verify_mxcsr_entry; } static address verify_fpu_cntrl_wrd_entry() { return _verify_fpu_cntrl_wrd_entry; } + static address key_shuffle_mask_addr() { return _key_shuffle_mask_addr; } + }; static bool returns_to_call_stub(address return_pc) { return return_pc == _call_stub_return_address; } diff -r 556dd9e475c6 -r dc16fe422c53 src/cpu/x86/vm/stubRoutines_x86_64.cpp --- a/src/cpu/x86/vm/stubRoutines_x86_64.cpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/cpu/x86/vm/stubRoutines_x86_64.cpp Fri Oct 26 14:09:52 2012 -0700 @@ -56,3 +56,4 @@ address StubRoutines::x86::_double_sign_mask = NULL; address StubRoutines::x86::_double_sign_flip = NULL; address StubRoutines::x86::_mxcsr_std = NULL; +address StubRoutines::x86::_key_shuffle_mask_addr = NULL; diff -r 556dd9e475c6 -r dc16fe422c53 src/cpu/x86/vm/stubRoutines_x86_64.hpp --- a/src/cpu/x86/vm/stubRoutines_x86_64.hpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/cpu/x86/vm/stubRoutines_x86_64.hpp Fri Oct 26 14:09:52 2012 -0700 @@ -54,6 +54,8 @@ static address _double_sign_mask; static address _double_sign_flip; static address _mxcsr_std; + // shuffle mask for fixing up 128-bit words consisting of big-endian 32-bit integers + static address _key_shuffle_mask_addr; public: @@ -116,6 +118,9 @@ { return _mxcsr_std; } + + static address key_shuffle_mask_addr() { return _key_shuffle_mask_addr; } + }; #endif // CPU_X86_VM_STUBROUTINES_X86_64_HPP diff -r 556dd9e475c6 -r dc16fe422c53 src/cpu/x86/vm/vm_version_x86.cpp --- a/src/cpu/x86/vm/vm_version_x86.cpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/cpu/x86/vm/vm_version_x86.cpp Fri Oct 26 14:09:52 2012 -0700 @@ -419,13 +419,16 @@ if (UseAVX < 1) _cpuFeatures &= ~CPU_AVX; + if (!UseAES && !FLAG_IS_DEFAULT(UseAES)) + _cpuFeatures &= ~CPU_AES; + if (logical_processors_per_package() == 1) { // HT processor could be installed on a system which doesn't support HT. _cpuFeatures &= ~CPU_HT; } char buf[256]; - jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", + jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", cores_per_cpu(), threads_per_core(), cpu_family(), _model, _stepping, (supports_cmov() ? ", cmov" : ""), @@ -441,6 +444,7 @@ (supports_popcnt() ? ", popcnt" : ""), (supports_avx() ? ", avx" : ""), (supports_avx2() ? ", avx2" : ""), + (supports_aes() ? ", aes" : ""), (supports_mmx_ext() ? ", mmxext" : ""), (supports_3dnow_prefetch() ? ", 3dnowpref" : ""), (supports_lzcnt() ? ", lzcnt": ""), @@ -472,6 +476,29 @@ if (!supports_avx ()) // Drop to 0 if no AVX support UseAVX = 0; + // Use AES instructions if available. + if (supports_aes()) { + if (FLAG_IS_DEFAULT(UseAES)) { + UseAES = true; + } + } else if (UseAES) { + if (!FLAG_IS_DEFAULT(UseAES)) + warning("AES instructions not available on this CPU"); + FLAG_SET_DEFAULT(UseAES, false); + } + + // The AES intrinsic stubs require AES instruction support (of course) + // but also require AVX mode for misaligned SSE access + if (UseAES && (UseAVX > 0)) { + if (FLAG_IS_DEFAULT(UseAESIntrinsics)) { + UseAESIntrinsics = true; + } + } else if (UseAESIntrinsics) { + if (!FLAG_IS_DEFAULT(UseAESIntrinsics)) + warning("AES intrinsics not available on this CPU"); + FLAG_SET_DEFAULT(UseAESIntrinsics, false); + } + #ifdef COMPILER2 if (UseFPUForSpilling) { if (UseSSE < 2) { @@ -714,6 +741,9 @@ if (UseAVX > 0) { tty->print(" UseAVX=%d",UseAVX); } + if (UseAES) { + tty->print(" UseAES=1"); + } tty->cr(); tty->print("Allocation"); if (AllocatePrefetchStyle <= 0 || UseSSE == 0 && !supports_3dnow_prefetch()) { diff -r 556dd9e475c6 -r dc16fe422c53 src/cpu/x86/vm/vm_version_x86.hpp --- a/src/cpu/x86/vm/vm_version_x86.hpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/cpu/x86/vm/vm_version_x86.hpp Fri Oct 26 14:09:52 2012 -0700 @@ -78,7 +78,9 @@ sse4_2 : 1, : 2, popcnt : 1, - : 3, + : 1, + aes : 1, + : 1, osxsave : 1, avx : 1, : 3; @@ -244,7 +246,8 @@ CPU_TSC = (1 << 15), CPU_TSCINV = (1 << 16), CPU_AVX = (1 << 17), - CPU_AVX2 = (1 << 18) + CPU_AVX2 = (1 << 18), + CPU_AES = (1 << 19) } cpuFeatureFlags; enum { @@ -420,6 +423,8 @@ result |= CPU_TSC; if (_cpuid_info.ext_cpuid7_edx.bits.tsc_invariance != 0) result |= CPU_TSCINV; + if (_cpuid_info.std_cpuid1_ecx.bits.aes != 0) + result |= CPU_AES; // AMD features. if (is_amd()) { @@ -544,6 +549,7 @@ static bool supports_avx() { return (_cpuFeatures & CPU_AVX) != 0; } static bool supports_avx2() { return (_cpuFeatures & CPU_AVX2) != 0; } static bool supports_tsc() { return (_cpuFeatures & CPU_TSC) != 0; } + static bool supports_aes() { return (_cpuFeatures & CPU_AES) != 0; } // Intel features static bool is_intel_family_core() { return is_intel() && diff -r 556dd9e475c6 -r dc16fe422c53 src/cpu/x86/vm/x86.ad --- a/src/cpu/x86/vm/x86.ad Thu Oct 25 09:53:16 2012 -0700 +++ b/src/cpu/x86/vm/x86.ad Fri Oct 26 14:09:52 2012 -0700 @@ -4102,9 +4102,158 @@ // ----------------------- LogicalRightShift ----------------------------------- -// Shorts/Chars vector logical right shift produces incorrect Java result +// Shorts vector logical right shift produces incorrect Java result // for negative data because java code convert short value into int with -// sign extension before a shift. +// sign extension before a shift. But char vectors are fine since chars are +// unsigned values. + +instruct vsrl2S(vecS dst, vecS shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (URShiftVS dst shift)); + format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} + ins_encode %{ + __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsrl2S_imm(vecS dst, immI8 shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (URShiftVS dst shift)); + format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} + ins_encode %{ + __ psrlw($dst$$XMMRegister, (int)$shift$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsrl2S_reg(vecS dst, vecS src, vecS shift) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); + match(Set dst (URShiftVS src shift)); + format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} + ins_encode %{ + bool vector256 = false; + __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsrl2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); + match(Set dst (URShiftVS src shift)); + format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} + ins_encode %{ + bool vector256 = false; + __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsrl4S(vecD dst, vecS shift) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (URShiftVS dst shift)); + format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} + ins_encode %{ + __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsrl4S_imm(vecD dst, immI8 shift) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (URShiftVS dst shift)); + format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} + ins_encode %{ + __ psrlw($dst$$XMMRegister, (int)$shift$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsrl4S_reg(vecD dst, vecD src, vecS shift) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); + match(Set dst (URShiftVS src shift)); + format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} + ins_encode %{ + bool vector256 = false; + __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsrl4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); + match(Set dst (URShiftVS src shift)); + format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} + ins_encode %{ + bool vector256 = false; + __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsrl8S(vecX dst, vecS shift) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (URShiftVS dst shift)); + format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} + ins_encode %{ + __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsrl8S_imm(vecX dst, immI8 shift) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (URShiftVS dst shift)); + format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} + ins_encode %{ + __ psrlw($dst$$XMMRegister, (int)$shift$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsrl8S_reg(vecX dst, vecX src, vecS shift) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 8); + match(Set dst (URShiftVS src shift)); + format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} + ins_encode %{ + bool vector256 = false; + __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsrl8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 8); + match(Set dst (URShiftVS src shift)); + format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} + ins_encode %{ + bool vector256 = false; + __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsrl16S_reg(vecY dst, vecY src, vecS shift) %{ + predicate(UseAVX > 1 && n->as_Vector()->length() == 16); + match(Set dst (URShiftVS src shift)); + format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} + ins_encode %{ + bool vector256 = true; + __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsrl16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ + predicate(UseAVX > 1 && n->as_Vector()->length() == 16); + match(Set dst (URShiftVS src shift)); + format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} + ins_encode %{ + bool vector256 = true; + __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); + %} + ins_pipe( pipe_slow ); +%} // Integers vector logical right shift instruct vsrl2I(vecD dst, vecS shift) %{ diff -r 556dd9e475c6 -r dc16fe422c53 src/os/bsd/vm/perfMemory_bsd.cpp --- a/src/os/bsd/vm/perfMemory_bsd.cpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/os/bsd/vm/perfMemory_bsd.cpp Fri Oct 26 14:09:52 2012 -0700 @@ -30,6 +30,7 @@ #include "os_bsd.inline.hpp" #include "runtime/handles.inline.hpp" #include "runtime/perfMemory.hpp" +#include "services/memTracker.hpp" #include "utilities/exceptions.hpp" // put OS-includes here @@ -753,6 +754,10 @@ // clear the shared memory region (void)::memset((void*) mapAddress, 0, size); + // it does not go through os api, the operation has to record from here + MemTracker::record_virtual_memory_reserve((address)mapAddress, size, CURRENT_PC); + MemTracker::record_virtual_memory_type((address)mapAddress, mtInternal); + return mapAddress; } @@ -912,6 +917,10 @@ "Could not map PerfMemory"); } + // it does not go through os api, the operation has to record from here + MemTracker::record_virtual_memory_reserve((address)mapAddress, size, CURRENT_PC); + MemTracker::record_virtual_memory_type((address)mapAddress, mtInternal); + *addr = mapAddress; *sizep = size; diff -r 556dd9e475c6 -r dc16fe422c53 src/os/linux/vm/perfMemory_linux.cpp --- a/src/os/linux/vm/perfMemory_linux.cpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/os/linux/vm/perfMemory_linux.cpp Fri Oct 26 14:09:52 2012 -0700 @@ -30,6 +30,7 @@ #include "os_linux.inline.hpp" #include "runtime/handles.inline.hpp" #include "runtime/perfMemory.hpp" +#include "services/memTracker.hpp" #include "utilities/exceptions.hpp" // put OS-includes here @@ -753,6 +754,10 @@ // clear the shared memory region (void)::memset((void*) mapAddress, 0, size); + // it does not go through os api, the operation has to record from here + MemTracker::record_virtual_memory_reserve((address)mapAddress, size, CURRENT_PC); + MemTracker::record_virtual_memory_type((address)mapAddress, mtInternal); + return mapAddress; } @@ -912,6 +917,10 @@ "Could not map PerfMemory"); } + // it does not go through os api, the operation has to record from here + MemTracker::record_virtual_memory_reserve((address)mapAddress, size, CURRENT_PC); + MemTracker::record_virtual_memory_type((address)mapAddress, mtInternal); + *addr = mapAddress; *sizep = size; diff -r 556dd9e475c6 -r dc16fe422c53 src/os/solaris/vm/os_solaris.cpp --- a/src/os/solaris/vm/os_solaris.cpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/os/solaris/vm/os_solaris.cpp Fri Oct 26 14:09:52 2012 -0700 @@ -55,6 +55,7 @@ #include "runtime/threadCritical.hpp" #include "runtime/timer.hpp" #include "services/attachListener.hpp" +#include "services/memTracker.hpp" #include "services/runtimeService.hpp" #include "thread_solaris.inline.hpp" #include "utilities/decoder.hpp" @@ -3072,11 +3073,12 @@ // Since snv_84, Solaris attempts to honor the address hint - see 5003415. // Give it a try, if the kernel honors the hint we can return immediately. char* addr = Solaris::anon_mmap(requested_addr, bytes, 0, false); + volatile int err = errno; if (addr == requested_addr) { return addr; } else if (addr != NULL) { - unmap_memory(addr, bytes); + pd_unmap_memory(addr, bytes); } if (PrintMiscellaneous && Verbose) { diff -r 556dd9e475c6 -r dc16fe422c53 src/os/solaris/vm/perfMemory_solaris.cpp --- a/src/os/solaris/vm/perfMemory_solaris.cpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/os/solaris/vm/perfMemory_solaris.cpp Fri Oct 26 14:09:52 2012 -0700 @@ -30,6 +30,7 @@ #include "os_solaris.inline.hpp" #include "runtime/handles.inline.hpp" #include "runtime/perfMemory.hpp" +#include "services/memTracker.hpp" #include "utilities/exceptions.hpp" // put OS-includes here @@ -768,6 +769,10 @@ // clear the shared memory region (void)::memset((void*) mapAddress, 0, size); + // it does not go through os api, the operation has to record from here + MemTracker::record_virtual_memory_reserve((address)mapAddress, size, CURRENT_PC); + MemTracker::record_virtual_memory_type((address)mapAddress, mtInternal); + return mapAddress; } @@ -927,6 +932,10 @@ "Could not map PerfMemory"); } + // it does not go through os api, the operation has to record from here + MemTracker::record_virtual_memory_reserve((address)mapAddress, size, CURRENT_PC); + MemTracker::record_virtual_memory_type((address)mapAddress, mtInternal); + *addr = mapAddress; *sizep = size; diff -r 556dd9e475c6 -r dc16fe422c53 src/os/windows/vm/perfMemory_windows.cpp --- a/src/os/windows/vm/perfMemory_windows.cpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/os/windows/vm/perfMemory_windows.cpp Fri Oct 26 14:09:52 2012 -0700 @@ -30,6 +30,7 @@ #include "os_windows.inline.hpp" #include "runtime/handles.inline.hpp" #include "runtime/perfMemory.hpp" +#include "services/memTracker.hpp" #include "utilities/exceptions.hpp" #include @@ -1496,6 +1497,10 @@ // clear the shared memory region (void)memset(mapAddress, '\0', size); + // it does not go through os api, the operation has to record from here + MemTracker::record_virtual_memory_reserve((address)mapAddress, size, CURRENT_PC); + MemTracker::record_virtual_memory_type((address)mapAddress, mtInternal); + return (char*) mapAddress; } @@ -1672,6 +1677,11 @@ "Could not map PerfMemory"); } + // it does not go through os api, the operation has to record from here + MemTracker::record_virtual_memory_reserve((address)mapAddress, size, CURRENT_PC); + MemTracker::record_virtual_memory_type((address)mapAddress, mtInternal); + + *addrp = (char*)mapAddress; *sizep = size; @@ -1824,6 +1834,8 @@ } remove_file_mapping(addr); + // it does not go through os api, the operation has to record from here + MemTracker::record_virtual_memory_release((address)addr, bytes); } char* PerfMemory::backing_store_filename() { diff -r 556dd9e475c6 -r dc16fe422c53 src/share/vm/c1/c1_GraphBuilder.cpp --- a/src/share/vm/c1/c1_GraphBuilder.cpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/share/vm/c1/c1_GraphBuilder.cpp Fri Oct 26 14:09:52 2012 -0700 @@ -1844,17 +1844,12 @@ code == Bytecodes::_invokevirtual && target->is_final_method() || code == Bytecodes::_invokedynamic) { ciMethod* inline_target = (cha_monomorphic_target != NULL) ? cha_monomorphic_target : target; - bool success = false; - if (target->is_method_handle_intrinsic()) { - // method handle invokes - success = try_method_handle_inline(target); - } else { - // static binding => check if callee is ok - success = try_inline(inline_target, (cha_monomorphic_target != NULL) || (exact_target != NULL), code, better_receiver); - } + // static binding => check if callee is ok + bool success = try_inline(inline_target, (cha_monomorphic_target != NULL) || (exact_target != NULL), code, better_receiver); + CHECK_BAILOUT(); - clear_inline_bailout(); + if (success) { // Register dependence if JVMTI has either breakpoint // setting or hotswapping of methods capabilities since they may @@ -3201,6 +3196,11 @@ return false; } + // method handle invokes + if (callee->is_method_handle_intrinsic()) { + return try_method_handle_inline(callee); + } + // handle intrinsics if (callee->intrinsic_id() != vmIntrinsics::_none) { if (try_inline_intrinsics(callee)) { @@ -3885,10 +3885,14 @@ ValueType* type = state()->stack_at(args_base)->type(); if (type->is_constant()) { ciMethod* target = type->as_ObjectType()->constant_value()->as_method_handle()->get_vmtarget(); - guarantee(!target->is_method_handle_intrinsic(), "should not happen"); // XXX remove - Bytecodes::Code bc = target->is_static() ? Bytecodes::_invokestatic : Bytecodes::_invokevirtual; - if (try_inline(target, /*holder_known*/ true, bc)) { - return true; + // We don't do CHA here so only inline static and statically bindable methods. + if (target->is_static() || target->can_be_statically_bound()) { + Bytecodes::Code bc = target->is_static() ? Bytecodes::_invokestatic : Bytecodes::_invokevirtual; + if (try_inline(target, /*holder_known*/ true, bc)) { + return true; + } + } else { + print_inlining(target, "not static or statically bindable", /*success*/ false); } } else { print_inlining(callee, "receiver not constant", /*success*/ false); @@ -3941,9 +3945,14 @@ } j += t->size(); // long and double take two slots } - Bytecodes::Code bc = target->is_static() ? Bytecodes::_invokestatic : Bytecodes::_invokevirtual; - if (try_inline(target, /*holder_known*/ true, bc)) { - return true; + // We don't do CHA here so only inline static and statically bindable methods. + if (target->is_static() || target->can_be_statically_bound()) { + Bytecodes::Code bc = target->is_static() ? Bytecodes::_invokestatic : Bytecodes::_invokevirtual; + if (try_inline(target, /*holder_known*/ true, bc)) { + return true; + } + } else { + print_inlining(target, "not static or statically bindable", /*success*/ false); } } } else { diff -r 556dd9e475c6 -r dc16fe422c53 src/share/vm/classfile/vmSymbols.hpp --- a/src/share/vm/classfile/vmSymbols.hpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/share/vm/classfile/vmSymbols.hpp Fri Oct 26 14:09:52 2012 -0700 @@ -110,6 +110,7 @@ template(sun_jkernel_DownloadManager, "sun/jkernel/DownloadManager") \ template(getBootClassPathEntryForClass_name, "getBootClassPathEntryForClass") \ template(sun_misc_PostVMInitHook, "sun/misc/PostVMInitHook") \ + template(sun_misc_Launcher_ExtClassLoader, "sun/misc/Launcher$ExtClassLoader") \ \ /* Java runtime version access */ \ template(sun_misc_Version, "sun/misc/Version") \ @@ -723,6 +724,21 @@ /* java/lang/ref/Reference */ \ do_intrinsic(_Reference_get, java_lang_ref_Reference, get_name, void_object_signature, F_R) \ \ + /* support for com.sum.crypto.provider.AESCrypt and some of its callers */ \ + do_class(com_sun_crypto_provider_aescrypt, "com/sun/crypto/provider/AESCrypt") \ + do_intrinsic(_aescrypt_encryptBlock, com_sun_crypto_provider_aescrypt, encryptBlock_name, byteArray_int_byteArray_int_signature, F_R) \ + do_intrinsic(_aescrypt_decryptBlock, com_sun_crypto_provider_aescrypt, decryptBlock_name, byteArray_int_byteArray_int_signature, F_R) \ + do_name( encryptBlock_name, "encryptBlock") \ + do_name( decryptBlock_name, "decryptBlock") \ + do_signature(byteArray_int_byteArray_int_signature, "([BI[BI)V") \ + \ + do_class(com_sun_crypto_provider_cipherBlockChaining, "com/sun/crypto/provider/CipherBlockChaining") \ + do_intrinsic(_cipherBlockChaining_encryptAESCrypt, com_sun_crypto_provider_cipherBlockChaining, encrypt_name, byteArray_int_int_byteArray_int_signature, F_R) \ + do_intrinsic(_cipherBlockChaining_decryptAESCrypt, com_sun_crypto_provider_cipherBlockChaining, decrypt_name, byteArray_int_int_byteArray_int_signature, F_R) \ + do_name( encrypt_name, "encrypt") \ + do_name( decrypt_name, "decrypt") \ + do_signature(byteArray_int_int_byteArray_int_signature, "([BII[BI)V") \ + \ /* support for sun.misc.Unsafe */ \ do_class(sun_misc_Unsafe, "sun/misc/Unsafe") \ \ diff -r 556dd9e475c6 -r dc16fe422c53 src/share/vm/gc_implementation/concurrentMarkSweep/adaptiveFreeList.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/concurrentMarkSweep/adaptiveFreeList.cpp Fri Oct 26 14:09:52 2012 -0700 @@ -0,0 +1,175 @@ +/* + * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "gc_implementation/concurrentMarkSweep/adaptiveFreeList.hpp" +#include "gc_implementation/concurrentMarkSweep/freeChunk.hpp" +#include "memory/freeBlockDictionary.hpp" +#include "memory/sharedHeap.hpp" +#include "runtime/globals.hpp" +#include "runtime/mutex.hpp" +#include "runtime/vmThread.hpp" + +template <> +void AdaptiveFreeList::print_on(outputStream* st, const char* c) const { + if (c != NULL) { + st->print("%16s", c); + } else { + st->print(SIZE_FORMAT_W(16), size()); + } + st->print("\t" + SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t" + SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\n", + bfr_surp(), surplus(), desired(), prev_sweep(), before_sweep(), + count(), coal_births(), coal_deaths(), split_births(), split_deaths()); +} + +template +AdaptiveFreeList::AdaptiveFreeList() : FreeList(), _hint(0) { + init_statistics(); +} + +template +AdaptiveFreeList::AdaptiveFreeList(Chunk* fc) : FreeList(fc), _hint(0) { + init_statistics(); +#ifndef PRODUCT + _allocation_stats.set_returned_bytes(size() * HeapWordSize); +#endif +} + +template +void AdaptiveFreeList::initialize() { + FreeList::initialize(); + set_hint(0); + init_statistics(true /* split_birth */); +} + +template +void AdaptiveFreeList::reset(size_t hint) { + FreeList::reset(); + set_hint(hint); +} + +#ifndef PRODUCT +template +void AdaptiveFreeList::assert_proper_lock_protection_work() const { + assert(protecting_lock() != NULL, "Don't call this directly"); + assert(ParallelGCThreads > 0, "Don't call this directly"); + Thread* thr = Thread::current(); + if (thr->is_VM_thread() || thr->is_ConcurrentGC_thread()) { + // assert that we are holding the freelist lock + } else if (thr->is_GC_task_thread()) { + assert(protecting_lock()->owned_by_self(), "FreeList RACE DETECTED"); + } else if (thr->is_Java_thread()) { + assert(!SafepointSynchronize::is_at_safepoint(), "Should not be executing"); + } else { + ShouldNotReachHere(); // unaccounted thread type? + } +} +#endif +template +void AdaptiveFreeList::init_statistics(bool split_birth) { + _allocation_stats.initialize(split_birth); +} + +template +size_t AdaptiveFreeList::get_better_size() { + + // A candidate chunk has been found. If it is already under + // populated and there is a hinT, REturn the hint(). Else + // return the size of this chunk. + if (surplus() <= 0) { + if (hint() != 0) { + return hint(); + } else { + return size(); + } + } else { + // This list has a surplus so use it. + return size(); + } +} + + +template +void AdaptiveFreeList::return_chunk_at_head(Chunk* chunk) { + assert_proper_lock_protection(); + return_chunk_at_head(chunk, true); +} + +template +void AdaptiveFreeList::return_chunk_at_head(Chunk* chunk, bool record_return) { + FreeList::return_chunk_at_head(chunk, record_return); +#ifdef ASSERT + if (record_return) { + increment_returned_bytes_by(size()*HeapWordSize); + } +#endif +} + +template +void AdaptiveFreeList::return_chunk_at_tail(Chunk* chunk) { + return_chunk_at_tail(chunk, true); +} + +template +void AdaptiveFreeList::return_chunk_at_tail(Chunk* chunk, bool record_return) { + FreeList::return_chunk_at_tail(chunk, record_return); +#ifdef ASSERT + if (record_return) { + increment_returned_bytes_by(size()*HeapWordSize); + } +#endif +} + +#ifndef PRODUCT +template +void AdaptiveFreeList::verify_stats() const { + // The +1 of the LH comparand is to allow some "looseness" in + // checking: we usually call this interface when adding a block + // and we'll subsequently update the stats; we cannot update the + // stats beforehand because in the case of the large-block BT + // dictionary for example, this might be the first block and + // in that case there would be no place that we could record + // the stats (which are kept in the block itself). + assert((_allocation_stats.prev_sweep() + _allocation_stats.split_births() + + _allocation_stats.coal_births() + 1) // Total Production Stock + 1 + >= (_allocation_stats.split_deaths() + _allocation_stats.coal_deaths() + + (ssize_t)count()), // Total Current Stock + depletion + err_msg("FreeList " PTR_FORMAT " of size " SIZE_FORMAT + " violates Conservation Principle: " + "prev_sweep(" SIZE_FORMAT ")" + " + split_births(" SIZE_FORMAT ")" + " + coal_births(" SIZE_FORMAT ") + 1 >= " + " split_deaths(" SIZE_FORMAT ")" + " coal_deaths(" SIZE_FORMAT ")" + " + count(" SSIZE_FORMAT ")", + this, size(), _allocation_stats.prev_sweep(), _allocation_stats.split_births(), + _allocation_stats.split_births(), _allocation_stats.split_deaths(), + _allocation_stats.coal_deaths(), count())); +} +#endif + +// Needs to be after the definitions have been seen. +template class AdaptiveFreeList; diff -r 556dd9e475c6 -r dc16fe422c53 src/share/vm/gc_implementation/concurrentMarkSweep/adaptiveFreeList.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/concurrentMarkSweep/adaptiveFreeList.hpp Fri Oct 26 14:09:52 2012 -0700 @@ -0,0 +1,232 @@ +/* + * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_VM_MEMORY_ADAPTIVEFREELIST_HPP +#define SHARE_VM_MEMORY_ADAPTIVEFREELIST_HPP + +#include "memory/freeList.hpp" +#include "gc_implementation/shared/allocationStats.hpp" + +class CompactibleFreeListSpace; + +// A class for maintaining a free list of Chunk's. The FreeList +// maintains a the structure of the list (head, tail, etc.) plus +// statistics for allocations from the list. The links between items +// are not part of FreeList. The statistics are +// used to make decisions about coalescing Chunk's when they +// are swept during collection. +// +// See the corresponding .cpp file for a description of the specifics +// for that implementation. + +class Mutex; + +template +class AdaptiveFreeList : public FreeList { + friend class CompactibleFreeListSpace; + friend class VMStructs; + // friend class PrintTreeCensusClosure; + + size_t _hint; // next larger size list with a positive surplus + + AllocationStats _allocation_stats; // allocation-related statistics + + public: + + AdaptiveFreeList(); + AdaptiveFreeList(Chunk* fc); + + using FreeList::assert_proper_lock_protection; +#ifdef ASSERT + using FreeList::protecting_lock; +#endif + using FreeList::count; + using FreeList::size; + using FreeList::verify_chunk_in_free_list; + using FreeList::getFirstNChunksFromList; + using FreeList::print_on; + void return_chunk_at_head(Chunk* fc, bool record_return); + void return_chunk_at_head(Chunk* fc); + void return_chunk_at_tail(Chunk* fc, bool record_return); + void return_chunk_at_tail(Chunk* fc); + using FreeList::return_chunk_at_tail; + using FreeList::remove_chunk; + using FreeList::prepend; + using FreeList::print_labels_on; + using FreeList::get_chunk_at_head; + + // Initialize. + void initialize(); + + // Reset the head, tail, hint, and count of a free list. + void reset(size_t hint); + + void assert_proper_lock_protection_work() const PRODUCT_RETURN; + + void print_on(outputStream* st, const char* c = NULL) const; + + size_t hint() const { + return _hint; + } + void set_hint(size_t v) { + assert_proper_lock_protection(); + assert(v == 0 || size() < v, "Bad hint"); + _hint = v; + } + + size_t get_better_size(); + + // Accessors for statistics + void init_statistics(bool split_birth = false); + + AllocationStats* allocation_stats() { + assert_proper_lock_protection(); + return &_allocation_stats; + } + + ssize_t desired() const { + return _allocation_stats.desired(); + } + void set_desired(ssize_t v) { + assert_proper_lock_protection(); + _allocation_stats.set_desired(v); + } + void compute_desired(float inter_sweep_current, + float inter_sweep_estimate, + float intra_sweep_estimate) { + assert_proper_lock_protection(); + _allocation_stats.compute_desired(count(), + inter_sweep_current, + inter_sweep_estimate, + intra_sweep_estimate); + } + ssize_t coal_desired() const { + return _allocation_stats.coal_desired(); + } + void set_coal_desired(ssize_t v) { + assert_proper_lock_protection(); + _allocation_stats.set_coal_desired(v); + } + + ssize_t surplus() const { + return _allocation_stats.surplus(); + } + void set_surplus(ssize_t v) { + assert_proper_lock_protection(); + _allocation_stats.set_surplus(v); + } + void increment_surplus() { + assert_proper_lock_protection(); + _allocation_stats.increment_surplus(); + } + void decrement_surplus() { + assert_proper_lock_protection(); + _allocation_stats.decrement_surplus(); + } + + ssize_t bfr_surp() const { + return _allocation_stats.bfr_surp(); + } + void set_bfr_surp(ssize_t v) { + assert_proper_lock_protection(); + _allocation_stats.set_bfr_surp(v); + } + ssize_t prev_sweep() const { + return _allocation_stats.prev_sweep(); + } + void set_prev_sweep(ssize_t v) { + assert_proper_lock_protection(); + _allocation_stats.set_prev_sweep(v); + } + ssize_t before_sweep() const { + return _allocation_stats.before_sweep(); + } + void set_before_sweep(ssize_t v) { + assert_proper_lock_protection(); + _allocation_stats.set_before_sweep(v); + } + + ssize_t coal_births() const { + return _allocation_stats.coal_births(); + } + void set_coal_births(ssize_t v) { + assert_proper_lock_protection(); + _allocation_stats.set_coal_births(v); + } + void increment_coal_births() { + assert_proper_lock_protection(); + _allocation_stats.increment_coal_births(); + } + + ssize_t coal_deaths() const { + return _allocation_stats.coal_deaths(); + } + void set_coal_deaths(ssize_t v) { + assert_proper_lock_protection(); + _allocation_stats.set_coal_deaths(v); + } + void increment_coal_deaths() { + assert_proper_lock_protection(); + _allocation_stats.increment_coal_deaths(); + } + + ssize_t split_births() const { + return _allocation_stats.split_births(); + } + void set_split_births(ssize_t v) { + assert_proper_lock_protection(); + _allocation_stats.set_split_births(v); + } + void increment_split_births() { + assert_proper_lock_protection(); + _allocation_stats.increment_split_births(); + } + + ssize_t split_deaths() const { + return _allocation_stats.split_deaths(); + } + void set_split_deaths(ssize_t v) { + assert_proper_lock_protection(); + _allocation_stats.set_split_deaths(v); + } + void increment_split_deaths() { + assert_proper_lock_protection(); + _allocation_stats.increment_split_deaths(); + } + +#ifndef PRODUCT + // For debugging. The "_returned_bytes" in all the lists are summed + // and compared with the total number of bytes swept during a + // collection. + size_t returned_bytes() const { return _allocation_stats.returned_bytes(); } + void set_returned_bytes(size_t v) { _allocation_stats.set_returned_bytes(v); } + void increment_returned_bytes_by(size_t v) { + _allocation_stats.set_returned_bytes(_allocation_stats.returned_bytes() + v); + } + // Stats verification + void verify_stats() const; +#endif // NOT PRODUCT +}; + +#endif // SHARE_VM_MEMORY_ADAPTIVEFREELIST_HPP diff -r 556dd9e475c6 -r dc16fe422c53 src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp --- a/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp Fri Oct 26 14:09:52 2012 -0700 @@ -91,7 +91,7 @@ _collector(NULL) { assert(sizeof(FreeChunk) / BytesPerWord <= MinChunkSize, - "FreeChunk is larger than expected"); + "FreeChunk is larger than expected"); _bt.set_space(this); initialize(mr, SpaceDecorator::Clear, SpaceDecorator::Mangle); // We have all of "mr", all of which we place in the dictionary @@ -101,14 +101,14 @@ // implementation, namely, the simple binary tree (splaying // temporarily disabled). switch (dictionaryChoice) { + case FreeBlockDictionary::dictionaryBinaryTree: + _dictionary = new BinaryTreeDictionary(mr); + break; case FreeBlockDictionary::dictionarySplayTree: case FreeBlockDictionary::dictionarySkipList: default: warning("dictionaryChoice: selected option not understood; using" " default BinaryTreeDictionary implementation instead."); - case FreeBlockDictionary::dictionaryBinaryTree: - _dictionary = new BinaryTreeDictionary(mr, use_adaptive_freelists); - break; } assert(_dictionary != NULL, "CMS dictionary initialization"); // The indexed free lists are initially all empty and are lazily @@ -453,7 +453,7 @@ reportIndexedFreeListStatistics(); gclog_or_tty->print_cr("Layout of Indexed Freelists"); gclog_or_tty->print_cr("---------------------------"); - FreeList::print_labels_on(st, "size"); + AdaptiveFreeList::print_labels_on(st, "size"); for (size_t i = IndexSetStart; i < IndexSetSize; i += IndexSetStride) { _indexedFreeList[i].print_on(gclog_or_tty); for (FreeChunk* fc = _indexedFreeList[i].head(); fc != NULL; @@ -1319,7 +1319,7 @@ size_t currSize = numWords + MinChunkSize; assert(currSize % MinObjAlignment == 0, "currSize should be aligned"); for (i = currSize; i < IndexSetSize; i += IndexSetStride) { - FreeList* fl = &_indexedFreeList[i]; + AdaptiveFreeList* fl = &_indexedFreeList[i]; if (fl->head()) { ret = getFromListGreater(fl, numWords); assert(ret == NULL || ret->is_free(), "Should be returning a free chunk"); @@ -1702,7 +1702,9 @@ _dictionary->return_chunk(chunk); #ifndef PRODUCT if (CMSCollector::abstract_state() != CMSCollector::Sweeping) { - TreeChunk::as_TreeChunk(chunk)->list()->verify_stats(); + TreeChunk* tc = TreeChunk::as_TreeChunk(chunk); + TreeList* tl = tc->list(); + tl->verify_stats(); } #endif // PRODUCT } @@ -1745,7 +1747,7 @@ { MutexLockerEx x(lock, Mutex::_no_safepoint_check_flag); ec = dictionary()->find_largest_dict(); // get largest block - if (ec != NULL && ec->end() == chunk) { + if (ec != NULL && ec->end() == (uintptr_t*) chunk) { // It's a coterminal block - we can coalesce. size_t old_size = ec->size(); coalDeath(old_size); @@ -1850,11 +1852,11 @@ the excess is >= MIN_CHUNK. */ size_t start = align_object_size(numWords + MinChunkSize); if (start < IndexSetSize) { - FreeList* it = _indexedFreeList; + AdaptiveFreeList* it = _indexedFreeList; size_t hint = _indexedFreeList[start].hint(); while (hint < IndexSetSize) { assert(hint % MinObjAlignment == 0, "hint should be aligned"); - FreeList *fl = &_indexedFreeList[hint]; + AdaptiveFreeList *fl = &_indexedFreeList[hint]; if (fl->surplus() > 0 && fl->head() != NULL) { // Found a list with surplus, reset original hint // and split out a free chunk which is returned. @@ -1873,7 +1875,7 @@ } /* Requires fl->size >= numWords + MinChunkSize */ -FreeChunk* CompactibleFreeListSpace::getFromListGreater(FreeList* fl, +FreeChunk* CompactibleFreeListSpace::getFromListGreater(AdaptiveFreeList* fl, size_t numWords) { FreeChunk *curr = fl->head(); size_t oldNumWords = curr->size(); @@ -2155,7 +2157,7 @@ assert_locked(); size_t i; for (i = IndexSetStart; i < IndexSetSize; i += IndexSetStride) { - FreeList* fl = &_indexedFreeList[i]; + AdaptiveFreeList* fl = &_indexedFreeList[i]; if (PrintFLSStatistics > 1) { gclog_or_tty->print("size[%d] : ", i); } @@ -2174,7 +2176,7 @@ assert_locked(); size_t i; for (i = IndexSetStart; i < IndexSetSize; i += IndexSetStride) { - FreeList *fl = &_indexedFreeList[i]; + AdaptiveFreeList *fl = &_indexedFreeList[i]; fl->set_surplus(fl->count() - (ssize_t)((double)fl->desired() * CMSSmallSplitSurplusPercent)); } @@ -2185,7 +2187,7 @@ size_t i; size_t h = IndexSetSize; for (i = IndexSetSize - 1; i != 0; i -= IndexSetStride) { - FreeList *fl = &_indexedFreeList[i]; + AdaptiveFreeList *fl = &_indexedFreeList[i]; fl->set_hint(h); if (fl->surplus() > 0) { h = i; @@ -2197,7 +2199,7 @@ assert_locked(); size_t i; for (i = IndexSetStart; i < IndexSetSize; i += IndexSetStride) { - FreeList *fl = &_indexedFreeList[i]; + AdaptiveFreeList *fl = &_indexedFreeList[i]; fl->set_prev_sweep(fl->count()); fl->set_coal_births(0); fl->set_coal_deaths(0); @@ -2224,7 +2226,7 @@ bool CompactibleFreeListSpace::coalOverPopulated(size_t size) { if (size < SmallForDictionary) { - FreeList *fl = &_indexedFreeList[size]; + AdaptiveFreeList *fl = &_indexedFreeList[size]; return (fl->coal_desired() < 0) || ((int)fl->count() > fl->coal_desired()); } else { @@ -2234,14 +2236,14 @@ void CompactibleFreeListSpace::smallCoalBirth(size_t size) { assert(size < SmallForDictionary, "Size too large for indexed list"); - FreeList *fl = &_indexedFreeList[size]; + AdaptiveFreeList *fl = &_indexedFreeList[size]; fl->increment_coal_births(); fl->increment_surplus(); } void CompactibleFreeListSpace::smallCoalDeath(size_t size) { assert(size < SmallForDictionary, "Size too large for indexed list"); - FreeList *fl = &_indexedFreeList[size]; + AdaptiveFreeList *fl = &_indexedFreeList[size]; fl->increment_coal_deaths(); fl->decrement_surplus(); } @@ -2250,7 +2252,7 @@ if (size < SmallForDictionary) { smallCoalBirth(size); } else { - dictionary()->dict_census_udpate(size, + dictionary()->dict_census_update(size, false /* split */, true /* birth */); } @@ -2260,7 +2262,7 @@ if(size < SmallForDictionary) { smallCoalDeath(size); } else { - dictionary()->dict_census_udpate(size, + dictionary()->dict_census_update(size, false /* split */, false /* birth */); } @@ -2268,14 +2270,14 @@ void CompactibleFreeListSpace::smallSplitBirth(size_t size) { assert(size < SmallForDictionary, "Size too large for indexed list"); - FreeList *fl = &_indexedFreeList[size]; + AdaptiveFreeList *fl = &_indexedFreeList[size]; fl->increment_split_births(); fl->increment_surplus(); } void CompactibleFreeListSpace::smallSplitDeath(size_t size) { assert(size < SmallForDictionary, "Size too large for indexed list"); - FreeList *fl = &_indexedFreeList[size]; + AdaptiveFreeList *fl = &_indexedFreeList[size]; fl->increment_split_deaths(); fl->decrement_surplus(); } @@ -2284,7 +2286,7 @@ if (size < SmallForDictionary) { smallSplitBirth(size); } else { - dictionary()->dict_census_udpate(size, + dictionary()->dict_census_update(size, true /* split */, true /* birth */); } @@ -2294,7 +2296,7 @@ if (size < SmallForDictionary) { smallSplitDeath(size); } else { - dictionary()->dict_census_udpate(size, + dictionary()->dict_census_update(size, true /* split */, false /* birth */); } @@ -2517,10 +2519,10 @@ #ifndef PRODUCT void CompactibleFreeListSpace::check_free_list_consistency() const { - assert(_dictionary->min_size() <= IndexSetSize, + assert((TreeChunk::min_size() <= IndexSetSize), "Some sizes can't be allocated without recourse to" " linear allocation buffers"); - assert(BinaryTreeDictionary::min_tree_chunk_size*HeapWordSize == sizeof(TreeChunk), + assert((TreeChunk::min_size()*HeapWordSize == sizeof(TreeChunk)), "else MIN_TREE_CHUNK_SIZE is wrong"); assert(IndexSetStart != 0, "IndexSetStart not initialized"); assert(IndexSetStride != 0, "IndexSetStride not initialized"); @@ -2529,15 +2531,15 @@ void CompactibleFreeListSpace::printFLCensus(size_t sweep_count) const { assert_lock_strong(&_freelistLock); - FreeList total; + AdaptiveFreeList total; gclog_or_tty->print("end sweep# " SIZE_FORMAT "\n", sweep_count); - FreeList::print_labels_on(gclog_or_tty, "size"); + AdaptiveFreeList::print_labels_on(gclog_or_tty, "size"); size_t total_free = 0; for (size_t i = IndexSetStart; i < IndexSetSize; i += IndexSetStride) { - const FreeList *fl = &_indexedFreeList[i]; + const AdaptiveFreeList *fl = &_indexedFreeList[i]; total_free += fl->count() * fl->size(); if (i % (40*IndexSetStride) == 0) { - FreeList::print_labels_on(gclog_or_tty, "size"); + AdaptiveFreeList::print_labels_on(gclog_or_tty, "size"); } fl->print_on(gclog_or_tty); total.set_bfr_surp( total.bfr_surp() + fl->bfr_surp() ); @@ -2620,7 +2622,7 @@ res = _cfls->getChunkFromDictionaryExact(word_sz); if (res == NULL) return NULL; } else { - FreeList* fl = &_indexedFreeList[word_sz]; + AdaptiveFreeList* fl = &_indexedFreeList[word_sz]; if (fl->count() == 0) { // Attempt to refill this local free list. get_from_global_pool(word_sz, fl); @@ -2640,7 +2642,7 @@ // Get a chunk of blocks of the right size and update related // book-keeping stats -void CFLS_LAB::get_from_global_pool(size_t word_sz, FreeList* fl) { +void CFLS_LAB::get_from_global_pool(size_t word_sz, AdaptiveFreeList* fl) { // Get the #blocks we want to claim size_t n_blks = (size_t)_blocks_to_claim[word_sz].average(); assert(n_blks > 0, "Error"); @@ -2722,7 +2724,7 @@ if (num_retire > 0) { _cfls->_indexedFreeList[i].prepend(&_indexedFreeList[i]); // Reset this list. - _indexedFreeList[i] = FreeList(); + _indexedFreeList[i] = AdaptiveFreeList(); _indexedFreeList[i].set_size(i); } } @@ -2736,7 +2738,7 @@ } } -void CompactibleFreeListSpace:: par_get_chunk_of_blocks(size_t word_sz, size_t n, FreeList* fl) { +void CompactibleFreeListSpace:: par_get_chunk_of_blocks(size_t word_sz, size_t n, AdaptiveFreeList* fl) { assert(fl->count() == 0, "Precondition."); assert(word_sz < CompactibleFreeListSpace::IndexSetSize, "Precondition"); @@ -2752,12 +2754,12 @@ (cur_sz < CompactibleFreeListSpace::IndexSetSize) && (CMSSplitIndexedFreeListBlocks || k <= 1); k++, cur_sz = k * word_sz) { - FreeList fl_for_cur_sz; // Empty. + AdaptiveFreeList fl_for_cur_sz; // Empty. fl_for_cur_sz.set_size(cur_sz); { MutexLockerEx x(_indexedFreeListParLocks[cur_sz], Mutex::_no_safepoint_check_flag); - FreeList* gfl = &_indexedFreeList[cur_sz]; + AdaptiveFreeList* gfl = &_indexedFreeList[cur_sz]; if (gfl->count() != 0) { // nn is the number of chunks of size cur_sz that // we'd need to split k-ways each, in order to create @@ -2832,12 +2834,11 @@ MutexLockerEx x(parDictionaryAllocLock(), Mutex::_no_safepoint_check_flag); while (n > 0) { - fc = dictionary()->get_chunk(MAX2(n * word_sz, - _dictionary->min_size()), + fc = dictionary()->get_chunk(MAX2(n * word_sz, _dictionary->min_size()), FreeBlockDictionary::atLeast); if (fc != NULL) { _bt.allocated((HeapWord*)fc, fc->size(), true /* reducing */); // update _unallocated_blk - dictionary()->dict_census_udpate(fc->size(), + dictionary()->dict_census_update(fc->size(), true /*split*/, false /*birth*/); break; @@ -2890,7 +2891,7 @@ fc->set_size(prefix_size); if (rem >= IndexSetSize) { returnChunkToDictionary(rem_fc); - dictionary()->dict_census_udpate(rem, true /*split*/, true /*birth*/); + dictionary()->dict_census_update(rem, true /*split*/, true /*birth*/); rem_fc = NULL; } // Otherwise, return it to the small list below. diff -r 556dd9e475c6 -r dc16fe422c53 src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp --- a/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp Fri Oct 26 14:09:52 2012 -0700 @@ -25,6 +25,7 @@ #ifndef SHARE_VM_GC_IMPLEMENTATION_CONCURRENTMARKSWEEP_COMPACTIBLEFREELISTSPACE_HPP #define SHARE_VM_GC_IMPLEMENTATION_CONCURRENTMARKSWEEP_COMPACTIBLEFREELISTSPACE_HPP +#include "gc_implementation/concurrentMarkSweep/adaptiveFreeList.hpp" #include "gc_implementation/concurrentMarkSweep/promotionInfo.hpp" #include "memory/binaryTreeDictionary.hpp" #include "memory/blockOffsetTable.inline.hpp" @@ -38,6 +39,7 @@ class CompactibleFreeListSpace; class BlkClosure; class BlkClosureCareful; +class FreeChunk; class UpwardsObjectClosure; class ObjectClosureCareful; class Klass; @@ -131,7 +133,7 @@ FreeBlockDictionary::DictionaryChoice _dictionaryChoice; FreeBlockDictionary* _dictionary; // ptr to dictionary for large size blocks - FreeList _indexedFreeList[IndexSetSize]; + AdaptiveFreeList _indexedFreeList[IndexSetSize]; // indexed array for small size blocks // allocation stategy bool _fitStrategy; // Use best fit strategy. @@ -168,7 +170,7 @@ // If the count of "fl" is negative, it's absolute value indicates a // number of free chunks that had been previously "borrowed" from global // list of size "word_sz", and must now be decremented. - void par_get_chunk_of_blocks(size_t word_sz, size_t n, FreeList* fl); + void par_get_chunk_of_blocks(size_t word_sz, size_t n, AdaptiveFreeList* fl); // Allocation helper functions // Allocate using a strategy that takes from the indexed free lists @@ -214,7 +216,7 @@ // and return it. The split off remainder is returned to // the free lists. The old name for getFromListGreater // was lookInListGreater. - FreeChunk* getFromListGreater(FreeList* fl, size_t numWords); + FreeChunk* getFromListGreater(AdaptiveFreeList* fl, size_t numWords); // Get a chunk in the indexed free list or dictionary, // by considering a larger chunk and splitting it. FreeChunk* getChunkFromGreater(size_t numWords); @@ -621,7 +623,7 @@ CompactibleFreeListSpace* _cfls; // Our local free lists. - FreeList _indexedFreeList[CompactibleFreeListSpace::IndexSetSize]; + AdaptiveFreeList _indexedFreeList[CompactibleFreeListSpace::IndexSetSize]; // Initialized from a command-line arg. @@ -634,7 +636,7 @@ size_t _num_blocks [CompactibleFreeListSpace::IndexSetSize]; // Internal work method - void get_from_global_pool(size_t word_sz, FreeList* fl); + void get_from_global_pool(size_t word_sz, AdaptiveFreeList* fl); public: CFLS_LAB(CompactibleFreeListSpace* cfls); diff -r 556dd9e475c6 -r dc16fe422c53 src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp --- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp Fri Oct 26 14:09:52 2012 -0700 @@ -9143,7 +9143,7 @@ size_t shrinkable_size_in_bytes = chunk_at_end->size(); size_t aligned_shrinkable_size_in_bytes = align_size_down(shrinkable_size_in_bytes, os::vm_page_size()); - assert(unallocated_start <= chunk_at_end->end(), + assert(unallocated_start <= (HeapWord*) chunk_at_end->end(), "Inconsistent chunk at end of space"); size_t bytes = MIN2(desired_bytes, aligned_shrinkable_size_in_bytes); size_t word_size_before = heap_word_size(_virtual_space.committed_size()); @@ -9210,7 +9210,7 @@ assert(_cmsSpace->unallocated_block() <= _cmsSpace->end(), "Inconsistency at end of space"); - assert(chunk_at_end->end() == _cmsSpace->end(), + assert(chunk_at_end->end() == (uintptr_t*) _cmsSpace->end(), "Shrinking is inconsistent"); return; } diff -r 556dd9e475c6 -r dc16fe422c53 src/share/vm/gc_implementation/concurrentMarkSweep/freeChunk.hpp --- a/src/share/vm/gc_implementation/concurrentMarkSweep/freeChunk.hpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/share/vm/gc_implementation/concurrentMarkSweep/freeChunk.hpp Fri Oct 26 14:09:52 2012 -0700 @@ -133,7 +133,7 @@ } // Return the address past the end of this chunk - HeapWord* end() const { return ((HeapWord*) this) + size(); } + uintptr_t* end() const { return ((uintptr_t*) this) + size(); } // debugging void verify() const PRODUCT_RETURN; diff -r 556dd9e475c6 -r dc16fe422c53 src/share/vm/gc_implementation/concurrentMarkSweep/vmStructs_cms.hpp --- a/src/share/vm/gc_implementation/concurrentMarkSweep/vmStructs_cms.hpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/share/vm/gc_implementation/concurrentMarkSweep/vmStructs_cms.hpp Fri Oct 26 14:09:52 2012 -0700 @@ -25,6 +25,8 @@ #ifndef SHARE_VM_GC_IMPLEMENTATION_CONCURRENTMARKSWEEP_VMSTRUCTS_CMS_HPP #define SHARE_VM_GC_IMPLEMENTATION_CONCURRENTMARKSWEEP_VMSTRUCTS_CMS_HPP +typedef BinaryTreeDictionary AFLBinaryTreeDictionary; + #define VM_STRUCTS_CMS(nonstatic_field, \ volatile_nonstatic_field, \ static_field) \ @@ -38,14 +40,8 @@ nonstatic_field(CMSCollector, _markBitMap, CMSBitMap) \ nonstatic_field(ConcurrentMarkSweepGeneration, _cmsSpace, CompactibleFreeListSpace*) \ static_field(ConcurrentMarkSweepThread, _collector, CMSCollector*) \ - volatile_nonstatic_field(FreeChunk, _size, size_t) \ - nonstatic_field(FreeChunk, _next, FreeChunk*) \ - nonstatic_field(FreeChunk, _prev, FreeChunk*) \ nonstatic_field(LinearAllocBlock, _word_size, size_t) \ - nonstatic_field(FreeList, _size, size_t) \ - nonstatic_field(FreeList, _count, ssize_t) \ - nonstatic_field(BinaryTreeDictionary,_total_size, size_t) \ - nonstatic_field(CompactibleFreeListSpace, _dictionary, FreeBlockDictionary*) \ + nonstatic_field(AFLBinaryTreeDictionary, _total_size, size_t) \ nonstatic_field(CompactibleFreeListSpace, _indexedFreeList[0], FreeList) \ nonstatic_field(CompactibleFreeListSpace, _smallLinearAllocBlock, LinearAllocBlock) @@ -60,19 +56,17 @@ declare_toplevel_type(CMSCollector) \ declare_toplevel_type(CMSBitMap) \ declare_toplevel_type(FreeChunk) \ + declare_toplevel_type(Metablock) \ declare_toplevel_type(ConcurrentMarkSweepThread*) \ declare_toplevel_type(ConcurrentMarkSweepGeneration*) \ declare_toplevel_type(SurrogateLockerThread*) \ declare_toplevel_type(CompactibleFreeListSpace*) \ declare_toplevel_type(CMSCollector*) \ - declare_toplevel_type(FreeChunk*) \ - declare_toplevel_type(BinaryTreeDictionary*) \ - declare_toplevel_type(FreeBlockDictionary*) \ - declare_toplevel_type(FreeList*) \ - declare_toplevel_type(FreeList) \ + declare_toplevel_type(AFLBinaryTreeDictionary*) \ declare_toplevel_type(LinearAllocBlock) \ declare_toplevel_type(FreeBlockDictionary) \ - declare_type(BinaryTreeDictionary, FreeBlockDictionary) + declare_type(AFLBinaryTreeDictionary, FreeBlockDictionary) \ + declare_type(AFLBinaryTreeDictionary, FreeBlockDictionary) \ #define VM_INT_CONSTANTS_CMS(declare_constant) \ declare_constant(Generation::ConcurrentMarkSweep) \ diff -r 556dd9e475c6 -r dc16fe422c53 src/share/vm/gc_implementation/shared/vmGCOperations.hpp --- a/src/share/vm/gc_implementation/shared/vmGCOperations.hpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/share/vm/gc_implementation/shared/vmGCOperations.hpp Fri Oct 26 14:09:52 2012 -0700 @@ -191,7 +191,7 @@ class VM_CollectForMetadataAllocation: public VM_GC_Operation { private: MetaWord* _result; - size_t _size; // size of object to be allocated + size_t _size; // size of object to be allocated Metaspace::MetadataType _mdtype; ClassLoaderData* _loader_data; public: diff -r 556dd9e475c6 -r dc16fe422c53 src/share/vm/memory/allocation.cpp --- a/src/share/vm/memory/allocation.cpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/share/vm/memory/allocation.cpp Fri Oct 26 14:09:52 2012 -0700 @@ -433,19 +433,18 @@ NOT_PRODUCT(Atomic::inc(&_instance_count);) } -Arena::Arena(Arena *a) : _chunk(a->_chunk), _hwm(a->_hwm), _max(a->_max), _first(a->_first) { - set_size_in_bytes(a->size_in_bytes()); - NOT_PRODUCT(Atomic::inc(&_instance_count);) -} - - Arena *Arena::move_contents(Arena *copy) { copy->destruct_contents(); copy->_chunk = _chunk; copy->_hwm = _hwm; copy->_max = _max; copy->_first = _first; - copy->set_size_in_bytes(size_in_bytes()); + + // workaround rare racing condition, which could double count + // the arena size by native memory tracking + size_t size = size_in_bytes(); + set_size_in_bytes(0); + copy->set_size_in_bytes(size); // Destroy original arena reset(); return copy; // Return Arena with contents @@ -497,6 +496,9 @@ char* end = _first->next() ? _first->top() : _hwm; free_malloced_objects(_first, _first->bottom(), end, _hwm); } + // reset size before chop to avoid a rare racing condition + // that can have total arena memory exceed total chunk memory + set_size_in_bytes(0); _first->chop(); reset(); } diff -r 556dd9e475c6 -r dc16fe422c53 src/share/vm/memory/allocation.hpp --- a/src/share/vm/memory/allocation.hpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/share/vm/memory/allocation.hpp Fri Oct 26 14:09:52 2012 -0700 @@ -144,8 +144,10 @@ mtNMT = 0x0A00, // memory used by native memory tracking mtChunk = 0x0B00, // chunk that holds content of arenas mtJavaHeap = 0x0C00, // Java heap - mtDontTrack = 0x0D00, // memory we donot or cannot track - mt_number_of_types = 0x000C, // number of memory types + mtClassShared = 0x0D00, // class data sharing + mt_number_of_types = 0x000D, // number of memory types (mtDontTrack + // is not included as validate type) + mtDontTrack = 0x0E00, // memory we do not or cannot track mt_masks = 0x7F00, // object type mask @@ -342,7 +344,6 @@ public: Arena(); Arena(size_t init_size); - Arena(Arena *old); ~Arena(); void destruct_contents(); char* hwm() const { return _hwm; } diff -r 556dd9e475c6 -r dc16fe422c53 src/share/vm/memory/binaryTreeDictionary.cpp --- a/src/share/vm/memory/binaryTreeDictionary.cpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/share/vm/memory/binaryTreeDictionary.cpp Fri Oct 26 14:09:52 2012 -0700 @@ -25,9 +25,15 @@ #include "precompiled.hpp" #include "gc_implementation/shared/allocationStats.hpp" #include "memory/binaryTreeDictionary.hpp" +#include "memory/freeList.hpp" +#include "memory/freeBlockDictionary.hpp" +#include "memory/metablock.hpp" +#include "memory/metachunk.hpp" #include "runtime/globals.hpp" #include "utilities/ostream.hpp" #ifndef SERIALGC +#include "gc_implementation/concurrentMarkSweep/adaptiveFreeList.hpp" +#include "gc_implementation/concurrentMarkSweep/freeChunk.hpp" #include "gc_implementation/shared/spaceDecorator.hpp" #include "gc_implementation/concurrentMarkSweep/freeChunk.hpp" #endif // SERIALGC @@ -37,15 +43,18 @@ // This is currently used in the Concurrent Mark&Sweep implementation. //////////////////////////////////////////////////////////////////////////////// -template -TreeChunk* TreeChunk::as_TreeChunk(Chunk* fc) { +template class FreeList_t> +size_t TreeChunk::_min_tree_chunk_size = sizeof(TreeChunk)/HeapWordSize; + +template class FreeList_t> +TreeChunk* TreeChunk::as_TreeChunk(Chunk_t* fc) { // Do some assertion checking here. - return (TreeChunk*) fc; + return (TreeChunk*) fc; } -template -void TreeChunk::verify_tree_chunk_list() const { - TreeChunk* nextTC = (TreeChunk*)next(); +template class FreeList_t> +void TreeChunk::verify_tree_chunk_list() const { + TreeChunk* nextTC = (TreeChunk*)next(); if (prev() != NULL) { // interior list node shouldn'r have tree fields guarantee(embedded_list()->parent() == NULL && embedded_list()->left() == NULL && embedded_list()->right() == NULL, "should be clear"); @@ -57,53 +66,113 @@ } } +template class FreeList_t> +TreeList::TreeList() {} -template -TreeList* TreeList::as_TreeList(TreeChunk* tc) { +template class FreeList_t> +TreeList* +TreeList::as_TreeList(TreeChunk* tc) { // This first free chunk in the list will be the tree list. - assert(tc->size() >= BinaryTreeDictionary::min_tree_chunk_size, "Chunk is too small for a TreeChunk"); - TreeList* tl = tc->embedded_list(); + assert((tc->size() >= (TreeChunk::min_size())), + "Chunk is too small for a TreeChunk"); + TreeList* tl = tc->embedded_list(); + tl->initialize(); tc->set_list(tl); -#ifdef ASSERT - tl->set_protecting_lock(NULL); -#endif - tl->set_hint(0); tl->set_size(tc->size()); tl->link_head(tc); tl->link_tail(tc); tl->set_count(1); - tl->init_statistics(true /* split_birth */); - tl->set_parent(NULL); - tl->set_left(NULL); - tl->set_right(NULL); + + return tl; +} + + +template class FreeList_t> +TreeList* +get_chunk(size_t size, enum FreeBlockDictionary::Dither dither) { + FreeBlockDictionary::verify_par_locked(); + Chunk_t* res = get_chunk_from_tree(size, dither); + assert(res == NULL || res->is_free(), + "Should be returning a free chunk"); + assert(dither != FreeBlockDictionary::exactly || + res->size() == size, "Not correct size"); + return res; +} + +template class FreeList_t> +TreeList* +TreeList::as_TreeList(HeapWord* addr, size_t size) { + TreeChunk* tc = (TreeChunk*) addr; + assert((size >= TreeChunk::min_size()), + "Chunk is too small for a TreeChunk"); + // The space will have been mangled initially but + // is not remangled when a Chunk_t is returned to the free list + // (since it is used to maintain the chunk on the free list). + tc->assert_is_mangled(); + tc->set_size(size); + tc->link_prev(NULL); + tc->link_next(NULL); + TreeList* tl = TreeList::as_TreeList(tc); return tl; } -template -TreeList* TreeList::as_TreeList(HeapWord* addr, size_t size) { - TreeChunk* tc = (TreeChunk*) addr; - assert(size >= BinaryTreeDictionary::min_tree_chunk_size, "Chunk is too small for a TreeChunk"); - // The space in the heap will have been mangled initially but - // is not remangled when a free chunk is returned to the free list - // (since it is used to maintain the chunk on the free list). - assert((ZapUnusedHeapArea && - SpaceMangler::is_mangled((HeapWord*) tc->size_addr()) && - SpaceMangler::is_mangled((HeapWord*) tc->prev_addr()) && - SpaceMangler::is_mangled((HeapWord*) tc->next_addr())) || - (tc->size() == 0 && tc->prev() == NULL && tc->next() == NULL), - "Space should be clear or mangled"); - tc->set_size(size); - tc->link_prev(NULL); - tc->link_next(NULL); - TreeList* tl = TreeList::as_TreeList(tc); - return tl; + +#ifndef SERIALGC +// Specialize for AdaptiveFreeList which tries to avoid +// splitting a chunk of a size that is under populated in favor of +// an over populated size. The general get_better_list() just returns +// the current list. +template <> +TreeList* +TreeList::get_better_list( + BinaryTreeDictionary* dictionary) { + // A candidate chunk has been found. If it is already under + // populated, get a chunk associated with the hint for this + // chunk. + + TreeList* curTL = this; + if (surplus() <= 0) { + /* Use the hint to find a size with a surplus, and reset the hint. */ + TreeList* hintTL = this; + while (hintTL->hint() != 0) { + assert(hintTL->hint() > hintTL->size(), + "hint points in the wrong direction"); + hintTL = dictionary->find_list(hintTL->hint()); + assert(curTL != hintTL, "Infinite loop"); + if (hintTL == NULL || + hintTL == curTL /* Should not happen but protect against it */ ) { + // No useful hint. Set the hint to NULL and go on. + curTL->set_hint(0); + break; + } + assert(hintTL->size() > curTL->size(), "hint is inconsistent"); + if (hintTL->surplus() > 0) { + // The hint led to a list that has a surplus. Use it. + // Set the hint for the candidate to an overpopulated + // size. + curTL->set_hint(hintTL->size()); + // Change the candidate. + curTL = hintTL; + break; + } + } + } + return curTL; +} +#endif // SERIALGC + +template class FreeList_t> +TreeList* +TreeList::get_better_list( + BinaryTreeDictionary* dictionary) { + return this; } -template -TreeList* TreeList::remove_chunk_replace_if_needed(TreeChunk* tc) { +template class FreeList_t> +TreeList* TreeList::remove_chunk_replace_if_needed(TreeChunk* tc) { - TreeList* retTL = this; - Chunk* list = head(); + TreeList* retTL = this; + Chunk_t* list = head(); assert(!list || list != list->next(), "Chunk on list twice"); assert(tc != NULL, "Chunk being removed is NULL"); assert(parent() == NULL || this == parent()->left() || @@ -112,13 +181,13 @@ assert(head() == NULL || head()->prev() == NULL, "list invariant"); assert(tail() == NULL || tail()->next() == NULL, "list invariant"); - Chunk* prevFC = tc->prev(); - TreeChunk* nextTC = TreeChunk::as_TreeChunk(tc->next()); + Chunk_t* prevFC = tc->prev(); + TreeChunk* nextTC = TreeChunk::as_TreeChunk(tc->next()); assert(list != NULL, "should have at least the target chunk"); // Is this the first item on the list? if (tc == list) { - // The "getChunk..." functions for a TreeList will not return the + // The "getChunk..." functions for a TreeList will not return the // first chunk in the list unless it is the last chunk in the list // because the first chunk is also acting as the tree node. // When coalescing happens, however, the first chunk in the a tree @@ -127,8 +196,8 @@ // allocated when the sweeper yields (giving up the free list lock) // to allow mutator activity. If this chunk is the first in the // list and is not the last in the list, do the work to copy the - // TreeList from the first chunk to the next chunk and update all - // the TreeList pointers in the chunks in the list. + // TreeList from the first chunk to the next chunk and update all + // the TreeList pointers in the chunks in the list. if (nextTC == NULL) { assert(prevFC == NULL, "Not last chunk in the list"); set_tail(NULL); @@ -141,11 +210,11 @@ // This can be slow for a long list. Consider having // an option that does not allow the first chunk on the // list to be coalesced. - for (TreeChunk* curTC = nextTC; curTC != NULL; - curTC = TreeChunk::as_TreeChunk(curTC->next())) { + for (TreeChunk* curTC = nextTC; curTC != NULL; + curTC = TreeChunk::as_TreeChunk(curTC->next())) { curTC->set_list(retTL); } - // Fix the parent to point to the new TreeList. + // Fix the parent to point to the new TreeList. if (retTL->parent() != NULL) { if (this == retTL->parent()->left()) { retTL->parent()->set_left(retTL); @@ -176,9 +245,9 @@ prevFC->link_after(nextTC); } - // Below this point the embeded TreeList being used for the + // Below this point the embeded TreeList being used for the // tree node may have changed. Don't use "this" - // TreeList*. + // TreeList*. // chunk should still be a free chunk (bit set in _prev) assert(!retTL->head() || retTL->size() == retTL->head()->size(), "Wrong sized chunk in list"); @@ -188,7 +257,7 @@ tc->set_list(NULL); bool prev_found = false; bool next_found = false; - for (Chunk* curFC = retTL->head(); + for (Chunk_t* curFC = retTL->head(); curFC != NULL; curFC = curFC->next()) { assert(curFC != tc, "Chunk is still in list"); if (curFC == prevFC) { @@ -215,8 +284,8 @@ return retTL; } -template -void TreeList::return_chunk_at_tail(TreeChunk* chunk) { +template class FreeList_t> +void TreeList::return_chunk_at_tail(TreeChunk* chunk) { assert(chunk != NULL, "returning NULL chunk"); assert(chunk->list() == this, "list should be set for chunk"); assert(tail() != NULL, "The tree list is embedded in the first chunk"); @@ -225,12 +294,12 @@ assert(head() == NULL || head()->prev() == NULL, "list invariant"); assert(tail() == NULL || tail()->next() == NULL, "list invariant"); - Chunk* fc = tail(); + Chunk_t* fc = tail(); fc->link_after(chunk); link_tail(chunk); assert(!tail() || size() == tail()->size(), "Wrong sized chunk in list"); - increment_count(); + FreeList_t::increment_count(); debug_only(increment_returned_bytes_by(chunk->size()*sizeof(HeapWord));) assert(head() == NULL || head()->prev() == NULL, "list invariant"); assert(tail() == NULL || tail()->next() == NULL, "list invariant"); @@ -238,10 +307,10 @@ // Add this chunk at the head of the list. "At the head of the list" // is defined to be after the chunk pointer to by head(). This is -// because the TreeList is embedded in the first TreeChunk in the -// list. See the definition of TreeChunk. -template -void TreeList::return_chunk_at_head(TreeChunk* chunk) { +// because the TreeList is embedded in the first TreeChunk in the +// list. See the definition of TreeChunk. +template class FreeList_t> +void TreeList::return_chunk_at_head(TreeChunk* chunk) { assert(chunk->list() == this, "list should be set for chunk"); assert(head() != NULL, "The tree list is embedded in the first chunk"); assert(chunk != NULL, "returning NULL chunk"); @@ -249,7 +318,7 @@ assert(head() == NULL || head()->prev() == NULL, "list invariant"); assert(tail() == NULL || tail()->next() == NULL, "list invariant"); - Chunk* fc = head()->next(); + Chunk_t* fc = head()->next(); if (fc != NULL) { chunk->link_after(fc); } else { @@ -258,28 +327,38 @@ } head()->link_after(chunk); assert(!head() || size() == head()->size(), "Wrong sized chunk in list"); - increment_count(); + FreeList_t::increment_count(); debug_only(increment_returned_bytes_by(chunk->size()*sizeof(HeapWord));) assert(head() == NULL || head()->prev() == NULL, "list invariant"); assert(tail() == NULL || tail()->next() == NULL, "list invariant"); } -template -TreeChunk* TreeList::head_as_TreeChunk() { - assert(head() == NULL || TreeChunk::as_TreeChunk(head())->list() == this, - "Wrong type of chunk?"); - return TreeChunk::as_TreeChunk(head()); +template class FreeList_t> +void TreeChunk::assert_is_mangled() const { + assert((ZapUnusedHeapArea && + SpaceMangler::is_mangled((HeapWord*) Chunk_t::size_addr()) && + SpaceMangler::is_mangled((HeapWord*) Chunk_t::prev_addr()) && + SpaceMangler::is_mangled((HeapWord*) Chunk_t::next_addr())) || + (size() == 0 && prev() == NULL && next() == NULL), + "Space should be clear or mangled"); } -template -TreeChunk* TreeList::first_available() { +template class FreeList_t> +TreeChunk* TreeList::head_as_TreeChunk() { + assert(head() == NULL || (TreeChunk::as_TreeChunk(head())->list() == this), + "Wrong type of chunk?"); + return TreeChunk::as_TreeChunk(head()); +} + +template class FreeList_t> +TreeChunk* TreeList::first_available() { assert(head() != NULL, "The head of the list cannot be NULL"); - Chunk* fc = head()->next(); - TreeChunk* retTC; + Chunk_t* fc = head()->next(); + TreeChunk* retTC; if (fc == NULL) { retTC = head_as_TreeChunk(); } else { - retTC = TreeChunk::as_TreeChunk(fc); + retTC = TreeChunk::as_TreeChunk(fc); } assert(retTC->list() == this, "Wrong type of chunk."); return retTC; @@ -288,41 +367,32 @@ // Returns the block with the largest heap address amongst // those in the list for this size; potentially slow and expensive, // use with caution! -template -TreeChunk* TreeList::largest_address() { +template class FreeList_t> +TreeChunk* TreeList::largest_address() { assert(head() != NULL, "The head of the list cannot be NULL"); - Chunk* fc = head()->next(); - TreeChunk* retTC; + Chunk_t* fc = head()->next(); + TreeChunk* retTC; if (fc == NULL) { retTC = head_as_TreeChunk(); } else { // walk down the list and return the one with the highest // heap address among chunks of this size. - Chunk* last = fc; + Chunk_t* last = fc; while (fc->next() != NULL) { if ((HeapWord*)last < (HeapWord*)fc) { last = fc; } fc = fc->next(); } - retTC = TreeChunk::as_TreeChunk(last); + retTC = TreeChunk::as_TreeChunk(last); } assert(retTC->list() == this, "Wrong type of chunk."); return retTC; } -template -BinaryTreeDictionary::BinaryTreeDictionary(bool adaptive_freelists, bool splay) : - _splay(splay), _adaptive_freelists(adaptive_freelists), - _total_size(0), _total_free_blocks(0), _root(0) {} - -template -BinaryTreeDictionary::BinaryTreeDictionary(MemRegion mr, - bool adaptive_freelists, - bool splay): - _adaptive_freelists(adaptive_freelists), _splay(splay) -{ - assert(mr.word_size() >= BinaryTreeDictionary::min_tree_chunk_size, "minimum chunk size"); +template class FreeList_t> +BinaryTreeDictionary::BinaryTreeDictionary(MemRegion mr) { + assert((mr.byte_size() > min_size()), "minimum chunk size"); reset(mr); assert(root()->left() == NULL, "reset check failed"); @@ -333,52 +403,48 @@ assert(total_free_blocks() == 1, "reset check failed"); } -template -void BinaryTreeDictionary::inc_total_size(size_t inc) { +template class FreeList_t> +void BinaryTreeDictionary::inc_total_size(size_t inc) { _total_size = _total_size + inc; } -template -void BinaryTreeDictionary::dec_total_size(size_t dec) { +template class FreeList_t> +void BinaryTreeDictionary::dec_total_size(size_t dec) { _total_size = _total_size - dec; } -template -void BinaryTreeDictionary::reset(MemRegion mr) { - assert(mr.word_size() >= BinaryTreeDictionary::min_tree_chunk_size, "minimum chunk size"); - set_root(TreeList::as_TreeList(mr.start(), mr.word_size())); +template class FreeList_t> +void BinaryTreeDictionary::reset(MemRegion mr) { + assert((mr.byte_size() > min_size()), "minimum chunk size"); + set_root(TreeList::as_TreeList(mr.start(), mr.word_size())); set_total_size(mr.word_size()); set_total_free_blocks(1); } -template -void BinaryTreeDictionary::reset(HeapWord* addr, size_t byte_size) { +template class FreeList_t> +void BinaryTreeDictionary::reset(HeapWord* addr, size_t byte_size) { MemRegion mr(addr, heap_word_size(byte_size)); reset(mr); } -template -void BinaryTreeDictionary::reset() { +template class FreeList_t> +void BinaryTreeDictionary::reset() { set_root(NULL); set_total_size(0); set_total_free_blocks(0); } // Get a free block of size at least size from tree, or NULL. -// If a splay step is requested, the removal algorithm (only) incorporates -// a splay step as follows: -// . the search proceeds down the tree looking for a possible -// match. At the (closest) matching location, an appropriate splay step is applied -// (zig, zig-zig or zig-zag). A chunk of the appropriate size is then returned -// if available, and if it's the last chunk, the node is deleted. A deteleted -// node is replaced in place by its tree successor. -template -TreeChunk* -BinaryTreeDictionary::get_chunk_from_tree(size_t size, enum FreeBlockDictionary::Dither dither, bool splay) +template class FreeList_t> +TreeChunk* +BinaryTreeDictionary::get_chunk_from_tree( + size_t size, + enum FreeBlockDictionary::Dither dither) { - TreeList *curTL, *prevTL; - TreeChunk* retTC = NULL; - assert(size >= BinaryTreeDictionary::min_tree_chunk_size, "minimum chunk size"); + TreeList *curTL, *prevTL; + TreeChunk* retTC = NULL; + + assert((size >= min_size()), "minimum chunk size"); if (FLSVerifyDictionary) { verify_tree(); } @@ -398,7 +464,7 @@ } if (curTL == NULL) { // couldn't find exact match - if (dither == FreeBlockDictionary::exactly) return NULL; + if (dither == FreeBlockDictionary::exactly) return NULL; // try and find the next larger size by walking back up the search path for (curTL = prevTL; curTL != NULL;) { @@ -410,46 +476,9 @@ } if (curTL != NULL) { assert(curTL->size() >= size, "size inconsistency"); - if (adaptive_freelists()) { - // A candidate chunk has been found. If it is already under - // populated, get a chunk associated with the hint for this - // chunk. - if (curTL->surplus() <= 0) { - /* Use the hint to find a size with a surplus, and reset the hint. */ - TreeList* hintTL = curTL; - while (hintTL->hint() != 0) { - assert(hintTL->hint() == 0 || hintTL->hint() > hintTL->size(), - "hint points in the wrong direction"); - hintTL = find_list(hintTL->hint()); - assert(curTL != hintTL, "Infinite loop"); - if (hintTL == NULL || - hintTL == curTL /* Should not happen but protect against it */ ) { - // No useful hint. Set the hint to NULL and go on. - curTL->set_hint(0); - break; - } - assert(hintTL->size() > size, "hint is inconsistent"); - if (hintTL->surplus() > 0) { - // The hint led to a list that has a surplus. Use it. - // Set the hint for the candidate to an overpopulated - // size. - curTL->set_hint(hintTL->size()); - // Change the candidate. - curTL = hintTL; - break; - } - // The evm code reset the hint of the candidate as - // at an interim point. Why? Seems like this leaves - // the hint pointing to a list that didn't work. - // curTL->set_hint(hintTL->size()); - } - } - } - // don't waste time splaying if chunk's singleton - if (splay && curTL->head()->next() != NULL) { - semi_splay_step(curTL); - } + curTL = curTL->get_better_list(this); + retTC = curTL->first_available(); assert((retTC != NULL) && (curTL->count() > 0), "A list in the binary tree should not be NULL"); @@ -465,9 +494,9 @@ return retTC; } -template -TreeList* BinaryTreeDictionary::find_list(size_t size) const { - TreeList* curTL; +template class FreeList_t> +TreeList* BinaryTreeDictionary::find_list(size_t size) const { + TreeList* curTL; for (curTL = root(); curTL != NULL;) { if (curTL->size() == size) { // exact match break; @@ -484,10 +513,10 @@ } -template -bool BinaryTreeDictionary::verify_chunk_in_free_list(Chunk* tc) const { +template class FreeList_t> +bool BinaryTreeDictionary::verify_chunk_in_free_list(Chunk_t* tc) const { size_t size = tc->size(); - TreeList* tl = find_list(size); + TreeList* tl = find_list(size); if (tl == NULL) { return false; } else { @@ -495,9 +524,9 @@ } } -template -Chunk* BinaryTreeDictionary::find_largest_dict() const { - TreeList *curTL = root(); +template class FreeList_t> +Chunk_t* BinaryTreeDictionary::find_largest_dict() const { + TreeList *curTL = root(); if (curTL != NULL) { while(curTL->right() != NULL) curTL = curTL->right(); return curTL->largest_address(); @@ -510,15 +539,15 @@ // chunk in a list on a tree node, just unlink it. // If it is the last chunk in the list (the next link is NULL), // remove the node and repair the tree. -template -TreeChunk* -BinaryTreeDictionary::remove_chunk_from_tree(TreeChunk* tc) { +template class FreeList_t> +TreeChunk* +BinaryTreeDictionary::remove_chunk_from_tree(TreeChunk* tc) { assert(tc != NULL, "Should not call with a NULL chunk"); assert(tc->is_free(), "Header is not marked correctly"); - TreeList *newTL, *parentTL; - TreeChunk* retTC; - TreeList* tl = tc->list(); + TreeList *newTL, *parentTL; + TreeChunk* retTC; + TreeList* tl = tc->list(); debug_only( bool removing_only_chunk = false; if (tl == _root) { @@ -538,8 +567,8 @@ retTC = tc; // Removing this chunk can have the side effect of changing the node - // (TreeList*) in the tree. If the node is the root, update it. - TreeList* replacementTL = tl->remove_chunk_replace_if_needed(tc); + // (TreeList*) in the tree. If the node is the root, update it. + TreeList* replacementTL = tl->remove_chunk_replace_if_needed(tc); assert(tc->is_free(), "Chunk should still be free"); assert(replacementTL->parent() == NULL || replacementTL == replacementTL->parent()->left() || @@ -549,17 +578,18 @@ assert(replacementTL->parent() == NULL, "Incorrectly replacing root"); set_root(replacementTL); } - debug_only( +#ifdef ASSERT if (tl != replacementTL) { assert(replacementTL->head() != NULL, "If the tree list was replaced, it should not be a NULL list"); - TreeList* rhl = replacementTL->head_as_TreeChunk()->list(); - TreeList* rtl = TreeChunk::as_TreeChunk(replacementTL->tail())->list(); + TreeList* rhl = replacementTL->head_as_TreeChunk()->list(); + TreeList* rtl = + TreeChunk::as_TreeChunk(replacementTL->tail())->list(); assert(rhl == replacementTL, "Broken head"); assert(rtl == replacementTL, "Broken tail"); assert(replacementTL->size() == tc->size(), "Broken size"); } - ) +#endif // Does the tree need to be repaired? if (replacementTL->count() == 0) { @@ -574,7 +604,7 @@ } else if (replacementTL->right() == NULL) { // right is NULL newTL = replacementTL->left(); - debug_only(replacementTL->clearLeft();) + debug_only(replacementTL->clear_left();) } else { // we have both children, so, by patriarchal convention, // my replacement is least node in right sub-tree complicated_splice = true; @@ -623,7 +653,7 @@ newTL->set_right(replacementTL->right()); debug_only( replacementTL->clear_right(); - replacementTL->clearLeft(); + replacementTL->clear_left(); ) } assert(replacementTL->right() == NULL && @@ -644,21 +674,21 @@ verify_tree(); } assert(!removing_only_chunk || _root == NULL, "root should be NULL"); - return TreeChunk::as_TreeChunk(retTC); + return TreeChunk::as_TreeChunk(retTC); } // Remove the leftmost node (lm) in the tree and return it. // If lm has a right child, link it to the left node of // the parent of lm. -template -TreeList* BinaryTreeDictionary::remove_tree_minimum(TreeList* tl) { +template class FreeList_t> +TreeList* BinaryTreeDictionary::remove_tree_minimum(TreeList* tl) { assert(tl != NULL && tl->parent() != NULL, "really need a proper sub-tree"); // locate the subtree minimum by walking down left branches - TreeList* curTL = tl; + TreeList* curTL = tl; for (; curTL->left() != NULL; curTL = curTL->left()); // obviously curTL now has at most one child, a right child if (curTL != root()) { // Should this test just be removed? - TreeList* parentTL = curTL->parent(); + TreeList* parentTL = curTL->parent(); if (parentTL->left() == curTL) { // curTL is a left child parentTL->set_left(curTL->right()); } else { @@ -685,31 +715,14 @@ return curTL; } -// Based on a simplification of the algorithm by Sleator and Tarjan (JACM 1985). -// The simplifications are the following: -// . we splay only when we delete (not when we insert) -// . we apply a single spay step per deletion/access -// By doing such partial splaying, we reduce the amount of restructuring, -// while getting a reasonably efficient search tree (we think). -// [Measurements will be needed to (in)validate this expectation.] - -template -void BinaryTreeDictionary::semi_splay_step(TreeList* tc) { - // apply a semi-splay step at the given node: - // . if root, norting needs to be done - // . if child of root, splay once - // . else zig-zig or sig-zag depending on path from grandparent - if (root() == tc) return; - warning("*** Splaying not yet implemented; " - "tree operations may be inefficient ***"); -} - -template -void BinaryTreeDictionary::insert_chunk_in_tree(Chunk* fc) { - TreeList *curTL, *prevTL; +template class FreeList_t> +void BinaryTreeDictionary::insert_chunk_in_tree(Chunk_t* fc) { + TreeList *curTL, *prevTL; size_t size = fc->size(); - assert(size >= BinaryTreeDictionary::min_tree_chunk_size, "too small to be a TreeList"); + assert((size >= min_size()), + err_msg(SIZE_FORMAT " is too small to be a TreeChunk " SIZE_FORMAT, + size, min_size())); if (FLSVerifyDictionary) { verify_tree(); } @@ -729,9 +742,9 @@ curTL = curTL->right(); } } - TreeChunk* tc = TreeChunk::as_TreeChunk(fc); + TreeChunk* tc = TreeChunk::as_TreeChunk(fc); // This chunk is being returned to the binary tree. Its embedded - // TreeList should be unused at this point. + // TreeList should be unused at this point. tc->initialize(); if (curTL != NULL) { // exact match tc->set_list(curTL); @@ -739,8 +752,8 @@ } else { // need a new node in tree tc->clear_next(); tc->link_prev(NULL); - TreeList* newTL = TreeList::as_TreeList(tc); - assert(((TreeChunk*)tc)->list() == newTL, + TreeList* newTL = TreeList::as_TreeList(tc); + assert(((TreeChunk*)tc)->list() == newTL, "List was not initialized correctly"); if (prevTL == NULL) { // we are the only tree node assert(root() == NULL, "control point invariant"); @@ -768,30 +781,30 @@ } } -template -size_t BinaryTreeDictionary::max_chunk_size() const { - FreeBlockDictionary::verify_par_locked(); - TreeList* tc = root(); +template class FreeList_t> +size_t BinaryTreeDictionary::max_chunk_size() const { + FreeBlockDictionary::verify_par_locked(); + TreeList* tc = root(); if (tc == NULL) return 0; for (; tc->right() != NULL; tc = tc->right()); return tc->size(); } -template -size_t BinaryTreeDictionary::total_list_length(TreeList* tl) const { +template class FreeList_t> +size_t BinaryTreeDictionary::total_list_length(TreeList* tl) const { size_t res; res = tl->count(); #ifdef ASSERT size_t cnt; - Chunk* tc = tl->head(); + Chunk_t* tc = tl->head(); for (cnt = 0; tc != NULL; tc = tc->next(), cnt++); assert(res == cnt, "The count is not being maintained correctly"); #endif return res; } -template -size_t BinaryTreeDictionary::total_size_in_tree(TreeList* tl) const { +template class FreeList_t> +size_t BinaryTreeDictionary::total_size_in_tree(TreeList* tl) const { if (tl == NULL) return 0; return (tl->size() * total_list_length(tl)) + @@ -799,8 +812,8 @@ total_size_in_tree(tl->right()); } -template -double BinaryTreeDictionary::sum_of_squared_block_sizes(TreeList* const tl) const { +template class FreeList_t> +double BinaryTreeDictionary::sum_of_squared_block_sizes(TreeList* const tl) const { if (tl == NULL) { return 0.0; } @@ -811,8 +824,8 @@ return curr; } -template -size_t BinaryTreeDictionary::total_free_blocks_in_tree(TreeList* tl) const { +template class FreeList_t> +size_t BinaryTreeDictionary::total_free_blocks_in_tree(TreeList* tl) const { if (tl == NULL) return 0; return total_list_length(tl) + @@ -820,28 +833,28 @@ total_free_blocks_in_tree(tl->right()); } -template -size_t BinaryTreeDictionary::num_free_blocks() const { +template class FreeList_t> +size_t BinaryTreeDictionary::num_free_blocks() const { assert(total_free_blocks_in_tree(root()) == total_free_blocks(), "_total_free_blocks inconsistency"); return total_free_blocks(); } -template -size_t BinaryTreeDictionary::tree_height_helper(TreeList* tl) const { +template class FreeList_t> +size_t BinaryTreeDictionary::tree_height_helper(TreeList* tl) const { if (tl == NULL) return 0; return 1 + MAX2(tree_height_helper(tl->left()), tree_height_helper(tl->right())); } -template -size_t BinaryTreeDictionary::treeHeight() const { +template class FreeList_t> +size_t BinaryTreeDictionary::tree_height() const { return tree_height_helper(root()); } -template -size_t BinaryTreeDictionary::total_nodes_helper(TreeList* tl) const { +template class FreeList_t> +size_t BinaryTreeDictionary::total_nodes_helper(TreeList* tl) const { if (tl == NULL) { return 0; } @@ -849,14 +862,18 @@ total_nodes_helper(tl->right()); } -template -size_t BinaryTreeDictionary::total_nodes_in_tree(TreeList* tl) const { +template class FreeList_t> +size_t BinaryTreeDictionary::total_nodes_in_tree(TreeList* tl) const { return total_nodes_helper(root()); } -template -void BinaryTreeDictionary::dict_census_udpate(size_t size, bool split, bool birth){ - TreeList* nd = find_list(size); +template class FreeList_t> +void BinaryTreeDictionary::dict_census_update(size_t size, bool split, bool birth){} + +#ifndef SERIALGC +template <> +void BinaryTreeDictionary::dict_census_update(size_t size, bool split, bool birth){ + TreeList* nd = find_list(size); if (nd) { if (split) { if (birth) { @@ -882,16 +899,26 @@ // This is a birth associated with a LinAB. The chunk // for the LinAB is not in the dictionary. } +#endif // SERIALGC -template -bool BinaryTreeDictionary::coal_dict_over_populated(size_t size) { +template class FreeList_t> +bool BinaryTreeDictionary::coal_dict_over_populated(size_t size) { + // For the general type of freelists, encourage coalescing by + // returning true. + return true; +} + +#ifndef SERIALGC +template <> +bool BinaryTreeDictionary::coal_dict_over_populated(size_t size) { if (FLSAlwaysCoalesceLarge) return true; - TreeList* list_of_size = find_list(size); + TreeList* list_of_size = find_list(size); // None of requested size implies overpopulated. return list_of_size == NULL || list_of_size->coal_desired() <= 0 || list_of_size->count() > list_of_size->coal_desired(); } +#endif // SERIALGC // Closures for walking the binary tree. // do_list() walks the free list in a node applying the closure @@ -899,19 +926,18 @@ // do_tree() walks the nodes in the binary tree applying do_list() // to each list at each node. -template +template class FreeList_t> class TreeCensusClosure : public StackObj { protected: - virtual void do_list(FreeList* fl) = 0; + virtual void do_list(FreeList_t* fl) = 0; public: - virtual void do_tree(TreeList* tl) = 0; + virtual void do_tree(TreeList* tl) = 0; }; -template -class AscendTreeCensusClosure : public TreeCensusClosure { - using TreeCensusClosure::do_list; +template class FreeList_t> +class AscendTreeCensusClosure : public TreeCensusClosure { public: - void do_tree(TreeList* tl) { + void do_tree(TreeList* tl) { if (tl != NULL) { do_tree(tl->left()); do_list(tl); @@ -920,11 +946,10 @@ } }; -template -class DescendTreeCensusClosure : public TreeCensusClosure { - using TreeCensusClosure::do_list; +template class FreeList_t> +class DescendTreeCensusClosure : public TreeCensusClosure { public: - void do_tree(TreeList* tl) { + void do_tree(TreeList* tl) { if (tl != NULL) { do_tree(tl->right()); do_list(tl); @@ -935,8 +960,8 @@ // For each list in the tree, calculate the desired, desired // coalesce, count before sweep, and surplus before sweep. -template -class BeginSweepClosure : public AscendTreeCensusClosure { +template class FreeList_t> +class BeginSweepClosure : public AscendTreeCensusClosure { double _percentage; float _inter_sweep_current; float _inter_sweep_estimate; @@ -951,32 +976,36 @@ _inter_sweep_estimate(inter_sweep_estimate), _intra_sweep_estimate(intra_sweep_estimate) { } - void do_list(FreeList* fl) { + void do_list(FreeList* fl) {} + +#ifndef SERIALGC + void do_list(AdaptiveFreeList* fl) { double coalSurplusPercent = _percentage; fl->compute_desired(_inter_sweep_current, _inter_sweep_estimate, _intra_sweep_estimate); fl->set_coal_desired((ssize_t)((double)fl->desired() * coalSurplusPercent)); fl->set_before_sweep(fl->count()); fl->set_bfr_surp(fl->surplus()); } +#endif // SERIALGC }; // Used to search the tree until a condition is met. // Similar to TreeCensusClosure but searches the // tree and returns promptly when found. -template +template class FreeList_t> class TreeSearchClosure : public StackObj { protected: - virtual bool do_list(FreeList* fl) = 0; + virtual bool do_list(FreeList_t* fl) = 0; public: - virtual bool do_tree(TreeList* tl) = 0; + virtual bool do_tree(TreeList* tl) = 0; }; #if 0 // Don't need this yet but here for symmetry. -template -class AscendTreeSearchClosure : public TreeSearchClosure { +template class FreeList_t> +class AscendTreeSearchClosure : public TreeSearchClosure { public: - bool do_tree(TreeList* tl) { + bool do_tree(TreeList* tl) { if (tl != NULL) { if (do_tree(tl->left())) return true; if (do_list(tl)) return true; @@ -987,11 +1016,10 @@ }; #endif -template -class DescendTreeSearchClosure : public TreeSearchClosure { - using TreeSearchClosure::do_list; +template class FreeList_t> +class DescendTreeSearchClosure : public TreeSearchClosure { public: - bool do_tree(TreeList* tl) { + bool do_tree(TreeList* tl) { if (tl != NULL) { if (do_tree(tl->right())) return true; if (do_list(tl)) return true; @@ -1003,17 +1031,17 @@ // Searches the tree for a chunk that ends at the // specified address. -template -class EndTreeSearchClosure : public DescendTreeSearchClosure { +template class FreeList_t> +class EndTreeSearchClosure : public DescendTreeSearchClosure { HeapWord* _target; - Chunk* _found; + Chunk_t* _found; public: EndTreeSearchClosure(HeapWord* target) : _target(target), _found(NULL) {} - bool do_list(FreeList* fl) { - Chunk* item = fl->head(); + bool do_list(FreeList_t* fl) { + Chunk_t* item = fl->head(); while (item != NULL) { - if (item->end() == _target) { + if (item->end() == (uintptr_t*) _target) { _found = item; return true; } @@ -1021,22 +1049,22 @@ } return false; } - Chunk* found() { return _found; } + Chunk_t* found() { return _found; } }; -template -Chunk* BinaryTreeDictionary::find_chunk_ends_at(HeapWord* target) const { - EndTreeSearchClosure etsc(target); +template class FreeList_t> +Chunk_t* BinaryTreeDictionary::find_chunk_ends_at(HeapWord* target) const { + EndTreeSearchClosure etsc(target); bool found_target = etsc.do_tree(root()); assert(found_target || etsc.found() == NULL, "Consistency check"); assert(!found_target || etsc.found() != NULL, "Consistency check"); return etsc.found(); } -template -void BinaryTreeDictionary::begin_sweep_dict_census(double coalSurplusPercent, +template class FreeList_t> +void BinaryTreeDictionary::begin_sweep_dict_census(double coalSurplusPercent, float inter_sweep_current, float inter_sweep_estimate, float intra_sweep_estimate) { - BeginSweepClosure bsc(coalSurplusPercent, inter_sweep_current, + BeginSweepClosure bsc(coalSurplusPercent, inter_sweep_current, inter_sweep_estimate, intra_sweep_estimate); bsc.do_tree(root()); @@ -1045,84 +1073,91 @@ // Closures and methods for calculating total bytes returned to the // free lists in the tree. #ifndef PRODUCT -template -class InitializeDictReturnedBytesClosure : public AscendTreeCensusClosure { +template class FreeList_t> +class InitializeDictReturnedBytesClosure : public AscendTreeCensusClosure { public: - void do_list(FreeList* fl) { + void do_list(FreeList_t* fl) { fl->set_returned_bytes(0); } }; -template -void BinaryTreeDictionary::initialize_dict_returned_bytes() { - InitializeDictReturnedBytesClosure idrb; +template class FreeList_t> +void BinaryTreeDictionary::initialize_dict_returned_bytes() { + InitializeDictReturnedBytesClosure idrb; idrb.do_tree(root()); } -template -class ReturnedBytesClosure : public AscendTreeCensusClosure { +template class FreeList_t> +class ReturnedBytesClosure : public AscendTreeCensusClosure { size_t _dict_returned_bytes; public: ReturnedBytesClosure() { _dict_returned_bytes = 0; } - void do_list(FreeList* fl) { + void do_list(FreeList_t* fl) { _dict_returned_bytes += fl->returned_bytes(); } size_t dict_returned_bytes() { return _dict_returned_bytes; } }; -template -size_t BinaryTreeDictionary::sum_dict_returned_bytes() { - ReturnedBytesClosure rbc; +template class FreeList_t> +size_t BinaryTreeDictionary::sum_dict_returned_bytes() { + ReturnedBytesClosure rbc; rbc.do_tree(root()); return rbc.dict_returned_bytes(); } // Count the number of entries in the tree. -template -class treeCountClosure : public DescendTreeCensusClosure { +template class FreeList_t> +class treeCountClosure : public DescendTreeCensusClosure { public: uint count; treeCountClosure(uint c) { count = c; } - void do_list(FreeList* fl) { + void do_list(FreeList_t* fl) { count++; } }; -template -size_t BinaryTreeDictionary::total_count() { - treeCountClosure ctc(0); +template class FreeList_t> +size_t BinaryTreeDictionary::total_count() { + treeCountClosure ctc(0); ctc.do_tree(root()); return ctc.count; } #endif // PRODUCT // Calculate surpluses for the lists in the tree. -template -class setTreeSurplusClosure : public AscendTreeCensusClosure { +template class FreeList_t> +class setTreeSurplusClosure : public AscendTreeCensusClosure { double percentage; public: setTreeSurplusClosure(double v) { percentage = v; } - void do_list(FreeList* fl) { + void do_list(FreeList* fl) {} + +#ifndef SERIALGC + void do_list(AdaptiveFreeList* fl) { double splitSurplusPercent = percentage; fl->set_surplus(fl->count() - (ssize_t)((double)fl->desired() * splitSurplusPercent)); } +#endif // SERIALGC }; -template -void BinaryTreeDictionary::set_tree_surplus(double splitSurplusPercent) { - setTreeSurplusClosure sts(splitSurplusPercent); +template class FreeList_t> +void BinaryTreeDictionary::set_tree_surplus(double splitSurplusPercent) { + setTreeSurplusClosure sts(splitSurplusPercent); sts.do_tree(root()); } // Set hints for the lists in the tree. -template -class setTreeHintsClosure : public DescendTreeCensusClosure { +template class FreeList_t> +class setTreeHintsClosure : public DescendTreeCensusClosure { size_t hint; public: setTreeHintsClosure(size_t v) { hint = v; } - void do_list(FreeList* fl) { + void do_list(FreeList* fl) {} + +#ifndef SERIALGC + void do_list(AdaptiveFreeList* fl) { fl->set_hint(hint); assert(fl->hint() == 0 || fl->hint() > fl->size(), "Current hint is inconsistent"); @@ -1130,35 +1165,40 @@ hint = fl->size(); } } +#endif // SERIALGC }; -template -void BinaryTreeDictionary::set_tree_hints(void) { - setTreeHintsClosure sth(0); +template class FreeList_t> +void BinaryTreeDictionary::set_tree_hints(void) { + setTreeHintsClosure sth(0); sth.do_tree(root()); } // Save count before previous sweep and splits and coalesces. -template -class clearTreeCensusClosure : public AscendTreeCensusClosure { - void do_list(FreeList* fl) { +template class FreeList_t> +class clearTreeCensusClosure : public AscendTreeCensusClosure { + void do_list(FreeList* fl) {} + +#ifndef SERIALGC + void do_list(AdaptiveFreeList* fl) { fl->set_prev_sweep(fl->count()); fl->set_coal_births(0); fl->set_coal_deaths(0); fl->set_split_births(0); fl->set_split_deaths(0); } +#endif // SERIALGC }; -template -void BinaryTreeDictionary::clear_tree_census(void) { - clearTreeCensusClosure ctc; +template class FreeList_t> +void BinaryTreeDictionary::clear_tree_census(void) { + clearTreeCensusClosure ctc; ctc.do_tree(root()); } // Do reporting and post sweep clean up. -template -void BinaryTreeDictionary::end_sweep_dict_census(double splitSurplusPercent) { +template class FreeList_t> +void BinaryTreeDictionary::end_sweep_dict_census(double splitSurplusPercent) { // Does walking the tree 3 times hurt? set_tree_surplus(splitSurplusPercent); set_tree_hints(); @@ -1169,9 +1209,9 @@ } // Print summary statistics -template -void BinaryTreeDictionary::report_statistics() const { - FreeBlockDictionary::verify_par_locked(); +template class FreeList_t> +void BinaryTreeDictionary::report_statistics() const { + FreeBlockDictionary::verify_par_locked(); gclog_or_tty->print("Statistics for BinaryTreeDictionary:\n" "------------------------------------\n"); size_t total_size = total_chunk_size(debug_only(NULL)); @@ -1182,36 +1222,47 @@ if (free_blocks > 0) { gclog_or_tty->print("Av. Block Size: %d\n", total_size/free_blocks); } - gclog_or_tty->print("Tree Height: %d\n", treeHeight()); + gclog_or_tty->print("Tree Height: %d\n", tree_height()); } // Print census information - counts, births, deaths, etc. // for each list in the tree. Also print some summary // information. -template -class PrintTreeCensusClosure : public AscendTreeCensusClosure { +template class FreeList_t> +class PrintTreeCensusClosure : public AscendTreeCensusClosure { int _print_line; size_t _total_free; - FreeList _total; + FreeList_t _total; public: PrintTreeCensusClosure() { _print_line = 0; _total_free = 0; } - FreeList* total() { return &_total; } + FreeList_t* total() { return &_total; } size_t total_free() { return _total_free; } - void do_list(FreeList* fl) { + void do_list(FreeList* fl) { if (++_print_line >= 40) { - FreeList::print_labels_on(gclog_or_tty, "size"); + FreeList_t::print_labels_on(gclog_or_tty, "size"); _print_line = 0; } fl->print_on(gclog_or_tty); _total_free += fl->count() * fl->size() ; total()->set_count( total()->count() + fl->count() ); - total()->set_bfr_surp( total()->bfr_surp() + fl->bfr_surp() ); + } + +#ifndef SERIALGC + void do_list(AdaptiveFreeList* fl) { + if (++_print_line >= 40) { + FreeList_t::print_labels_on(gclog_or_tty, "size"); + _print_line = 0; + } + fl->print_on(gclog_or_tty); + _total_free += fl->count() * fl->size() ; + total()->set_count( total()->count() + fl->count() ); + total()->set_bfr_surp( total()->bfr_surp() + fl->bfr_surp() ); total()->set_surplus( total()->split_deaths() + fl->surplus() ); - total()->set_desired( total()->desired() + fl->desired() ); + total()->set_desired( total()->desired() + fl->desired() ); total()->set_prev_sweep( total()->prev_sweep() + fl->prev_sweep() ); total()->set_before_sweep(total()->before_sweep() + fl->before_sweep()); total()->set_coal_births( total()->coal_births() + fl->coal_births() ); @@ -1219,18 +1270,32 @@ total()->set_split_births(total()->split_births() + fl->split_births()); total()->set_split_deaths(total()->split_deaths() + fl->split_deaths()); } +#endif // SERIALGC }; -template -void BinaryTreeDictionary::print_dict_census(void) const { +template class FreeList_t> +void BinaryTreeDictionary::print_dict_census(void) const { gclog_or_tty->print("\nBinaryTree\n"); - FreeList::print_labels_on(gclog_or_tty, "size"); - PrintTreeCensusClosure ptc; + FreeList_t::print_labels_on(gclog_or_tty, "size"); + PrintTreeCensusClosure ptc; ptc.do_tree(root()); - FreeList* total = ptc.total(); - FreeList::print_labels_on(gclog_or_tty, " "); + FreeList_t* total = ptc.total(); + FreeList_t::print_labels_on(gclog_or_tty, " "); +} + +#ifndef SERIALGC +template <> +void BinaryTreeDictionary::print_dict_census(void) const { + + gclog_or_tty->print("\nBinaryTree\n"); + AdaptiveFreeList::print_labels_on(gclog_or_tty, "size"); + PrintTreeCensusClosure ptc; + ptc.do_tree(root()); + + AdaptiveFreeList* total = ptc.total(); + AdaptiveFreeList::print_labels_on(gclog_or_tty, " "); total->print_on(gclog_or_tty, "TOTAL\t"); gclog_or_tty->print( "total_free(words): " SIZE_FORMAT_W(16) @@ -1242,9 +1307,10 @@ (double)(total->desired() - total->count()) /(total->desired() != 0 ? (double)total->desired() : 1.0)); } +#endif // SERIALGC -template -class PrintFreeListsClosure : public AscendTreeCensusClosure { +template class FreeList_t> +class PrintFreeListsClosure : public AscendTreeCensusClosure { outputStream* _st; int _print_line; @@ -1253,14 +1319,14 @@ _st = st; _print_line = 0; } - void do_list(FreeList* fl) { + void do_list(FreeList_t* fl) { if (++_print_line >= 40) { - FreeList::print_labels_on(_st, "size"); + FreeList_t::print_labels_on(_st, "size"); _print_line = 0; } fl->print_on(gclog_or_tty); size_t sz = fl->size(); - for (Chunk* fc = fl->head(); fc != NULL; + for (Chunk_t* fc = fl->head(); fc != NULL; fc = fc->next()) { _st->print_cr("\t[" PTR_FORMAT "," PTR_FORMAT ") %s", fc, (HeapWord*)fc + sz, @@ -1269,11 +1335,11 @@ } }; -template -void BinaryTreeDictionary::print_free_lists(outputStream* st) const { +template class FreeList_t> +void BinaryTreeDictionary::print_free_lists(outputStream* st) const { - FreeList::print_labels_on(st, "size"); - PrintFreeListsClosure pflc(st); + FreeList_t::print_labels_on(st, "size"); + PrintFreeListsClosure pflc(st); pflc.do_tree(root()); } @@ -1281,18 +1347,18 @@ // . _root has no parent // . parent and child point to each other // . each node's key correctly related to that of its child(ren) -template -void BinaryTreeDictionary::verify_tree() const { +template class FreeList_t> +void BinaryTreeDictionary::verify_tree() const { guarantee(root() == NULL || total_free_blocks() == 0 || total_size() != 0, "_total_size should't be 0?"); guarantee(root() == NULL || root()->parent() == NULL, "_root shouldn't have parent"); verify_tree_helper(root()); } -template -size_t BinaryTreeDictionary::verify_prev_free_ptrs(TreeList* tl) { +template class FreeList_t> +size_t BinaryTreeDictionary::verify_prev_free_ptrs(TreeList* tl) { size_t ct = 0; - for (Chunk* curFC = tl->head(); curFC != NULL; curFC = curFC->next()) { + for (Chunk_t* curFC = tl->head(); curFC != NULL; curFC = curFC->next()) { ct++; assert(curFC->prev() == NULL || curFC->prev()->is_free(), "Chunk should be free"); @@ -1303,8 +1369,8 @@ // Note: this helper is recursive rather than iterative, so use with // caution on very deep trees; and watch out for stack overflow errors; // In general, to be used only for debugging. -template -void BinaryTreeDictionary::verify_tree_helper(TreeList* tl) const { +template class FreeList_t> +void BinaryTreeDictionary::verify_tree_helper(TreeList* tl) const { if (tl == NULL) return; guarantee(tl->size() != 0, "A list must has a size"); @@ -1332,15 +1398,25 @@ verify_tree_helper(tl->right()); } -template -void BinaryTreeDictionary::verify() const { +template class FreeList_t> +void BinaryTreeDictionary::verify() const { verify_tree(); guarantee(total_size() == total_size_in_tree(root()), "Total Size inconsistency"); } +template class TreeList; +template class BinaryTreeDictionary; +template class TreeChunk; + +template class TreeList; +template class BinaryTreeDictionary; +template class TreeChunk; + + #ifndef SERIALGC // Explicitly instantiate these types for FreeChunk. -template class BinaryTreeDictionary; -template class TreeChunk; -template class TreeList; +template class TreeList; +template class BinaryTreeDictionary; +template class TreeChunk; + #endif // SERIALGC diff -r 556dd9e475c6 -r dc16fe422c53 src/share/vm/memory/binaryTreeDictionary.hpp --- a/src/share/vm/memory/binaryTreeDictionary.hpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/share/vm/memory/binaryTreeDictionary.hpp Fri Oct 26 14:09:52 2012 -0700 @@ -37,77 +37,78 @@ // A TreeList is a FreeList which can be used to maintain a // binary tree of free lists. -template class TreeChunk; -template class BinaryTreeDictionary; -template class AscendTreeCensusClosure; -template class DescendTreeCensusClosure; -template class DescendTreeSearchClosure; +template class FreeList_t> class TreeChunk; +template class FreeList_t> class BinaryTreeDictionary; +template class FreeList_t> class AscendTreeCensusClosure; +template class FreeList_t> class DescendTreeCensusClosure; +template class FreeList_t> class DescendTreeSearchClosure; -template -class TreeList: public FreeList { - friend class TreeChunk; - friend class BinaryTreeDictionary; - friend class AscendTreeCensusClosure; - friend class DescendTreeCensusClosure; - friend class DescendTreeSearchClosure; +template class FreeList_t> +class TreeList : public FreeList_t { + friend class TreeChunk; + friend class BinaryTreeDictionary; + friend class AscendTreeCensusClosure; + friend class DescendTreeCensusClosure; + friend class DescendTreeSearchClosure; - TreeList* _parent; - TreeList* _left; - TreeList* _right; + TreeList* _parent; + TreeList* _left; + TreeList* _right; protected: - TreeList* parent() const { return _parent; } - TreeList* left() const { return _left; } - TreeList* right() const { return _right; } - // Explicitly import these names into our namespace to fix name lookup with templates - using FreeList::head; - using FreeList::set_head; + TreeList* parent() const { return _parent; } + TreeList* left() const { return _left; } + TreeList* right() const { return _right; } - using FreeList::tail; - using FreeList::set_tail; - using FreeList::link_tail; + // Wrapper on call to base class, to get the template to compile. + Chunk_t* head() const { return FreeList_t::head(); } + Chunk_t* tail() const { return FreeList_t::tail(); } + void set_head(Chunk_t* head) { FreeList_t::set_head(head); } + void set_tail(Chunk_t* tail) { FreeList_t::set_tail(tail); } - using FreeList::increment_count; - NOT_PRODUCT(using FreeList::increment_returned_bytes_by;) - using FreeList::verify_chunk_in_free_list; - using FreeList::size; + size_t size() const { return FreeList_t::size(); } // Accessors for links in tree. - void set_left(TreeList* tl) { + void set_left(TreeList* tl) { _left = tl; if (tl != NULL) tl->set_parent(this); } - void set_right(TreeList* tl) { + void set_right(TreeList* tl) { _right = tl; if (tl != NULL) tl->set_parent(this); } - void set_parent(TreeList* tl) { _parent = tl; } + void set_parent(TreeList* tl) { _parent = tl; } - void clearLeft() { _left = NULL; } + void clear_left() { _left = NULL; } void clear_right() { _right = NULL; } void clear_parent() { _parent = NULL; } - void initialize() { clearLeft(); clear_right(), clear_parent(); } + void initialize() { clear_left(); clear_right(), clear_parent(); FreeList_t::initialize(); } // For constructing a TreeList from a Tree chunk or // address and size. - static TreeList* as_TreeList(TreeChunk* tc); - static TreeList* as_TreeList(HeapWord* addr, size_t size); + TreeList(); + static TreeList* + as_TreeList(TreeChunk* tc); + static TreeList* as_TreeList(HeapWord* addr, size_t size); // Returns the head of the free list as a pointer to a TreeChunk. - TreeChunk* head_as_TreeChunk(); + TreeChunk* head_as_TreeChunk(); // Returns the first available chunk in the free list as a pointer // to a TreeChunk. - TreeChunk* first_available(); + TreeChunk* first_available(); // Returns the block with the largest heap address amongst // those in the list for this size; potentially slow and expensive, // use with caution! - TreeChunk* largest_address(); + TreeChunk* largest_address(); + + TreeList* get_better_list( + BinaryTreeDictionary* dictionary); // remove_chunk_replace_if_needed() removes the given "tc" from the TreeList. // If "tc" is the first chunk in the list, it is also the @@ -115,10 +116,10 @@ // returns the possibly replaced TreeList* for the node in // the tree. It also updates the parent of the original // node to point to the new node. - TreeList* remove_chunk_replace_if_needed(TreeChunk* tc); + TreeList* remove_chunk_replace_if_needed(TreeChunk* tc); // See FreeList. - void return_chunk_at_head(TreeChunk* tc); - void return_chunk_at_tail(TreeChunk* tc); + void return_chunk_at_head(TreeChunk* tc); + void return_chunk_at_tail(TreeChunk* tc); }; // A TreeChunk is a subclass of a Chunk that additionally @@ -134,52 +135,54 @@ // on the free list for a node in the tree and is only removed if // it is the last chunk on the free list. -template -class TreeChunk : public Chunk { - friend class TreeList; - TreeList* _list; - TreeList _embedded_list; // if non-null, this chunk is on _list +template class FreeList_t> +class TreeChunk : public Chunk_t { + friend class TreeList; + TreeList* _list; + TreeList _embedded_list; // if non-null, this chunk is on _list + + static size_t _min_tree_chunk_size; + protected: - TreeList* embedded_list() const { return (TreeList*) &_embedded_list; } - void set_embedded_list(TreeList* v) { _embedded_list = *v; } + TreeList* embedded_list() const { return (TreeList*) &_embedded_list; } + void set_embedded_list(TreeList* v) { _embedded_list = *v; } public: - TreeList* list() { return _list; } - void set_list(TreeList* v) { _list = v; } - static TreeChunk* as_TreeChunk(Chunk* fc); + TreeList* list() { return _list; } + void set_list(TreeList* v) { _list = v; } + static TreeChunk* as_TreeChunk(Chunk_t* fc); // Initialize fields in a TreeChunk that should be // initialized when the TreeChunk is being added to // a free list in the tree. void initialize() { embedded_list()->initialize(); } - Chunk* next() const { return Chunk::next(); } - Chunk* prev() const { return Chunk::prev(); } - size_t size() const volatile { return Chunk::size(); } + Chunk_t* next() const { return Chunk_t::next(); } + Chunk_t* prev() const { return Chunk_t::prev(); } + size_t size() const volatile { return Chunk_t::size(); } + + static size_t min_size() { + return _min_tree_chunk_size; + } // debugging void verify_tree_chunk_list() const; + void assert_is_mangled() const; }; -template -class BinaryTreeDictionary: public FreeBlockDictionary { +template class FreeList_t> +class BinaryTreeDictionary: public FreeBlockDictionary { friend class VMStructs; - bool _splay; - bool _adaptive_freelists; size_t _total_size; size_t _total_free_blocks; - TreeList* _root; + TreeList* _root; // private accessors - bool splay() const { return _splay; } - void set_splay(bool v) { _splay = v; } void set_total_size(size_t v) { _total_size = v; } virtual void inc_total_size(size_t v); virtual void dec_total_size(size_t v); - size_t total_free_blocks() const { return _total_free_blocks; } void set_total_free_blocks(size_t v) { _total_free_blocks = v; } - TreeList* root() const { return _root; } - void set_root(TreeList* v) { _root = v; } - bool adaptive_freelists() { return _adaptive_freelists; } + TreeList* root() const { return _root; } + void set_root(TreeList* v) { _root = v; } // This field is added and can be set to point to the // the Mutex used to synchronize access to the @@ -191,54 +194,55 @@ // return it. If the chunk // is the last chunk of that size, remove the node for that size // from the tree. - TreeChunk* get_chunk_from_tree(size_t size, enum FreeBlockDictionary::Dither dither, bool splay); - // Return a list of the specified size or NULL from the tree. - // The list is not removed from the tree. - TreeList* find_list (size_t size) const; + TreeChunk* get_chunk_from_tree(size_t size, enum FreeBlockDictionary::Dither dither); // Remove this chunk from the tree. If the removal results // in an empty list in the tree, remove the empty list. - TreeChunk* remove_chunk_from_tree(TreeChunk* tc); + TreeChunk* remove_chunk_from_tree(TreeChunk* tc); // Remove the node in the trees starting at tl that has the // minimum value and return it. Repair the tree as needed. - TreeList* remove_tree_minimum(TreeList* tl); - void semi_splay_step(TreeList* tl); + TreeList* remove_tree_minimum(TreeList* tl); // Add this free chunk to the tree. - void insert_chunk_in_tree(Chunk* freeChunk); + void insert_chunk_in_tree(Chunk_t* freeChunk); public: - static const size_t min_tree_chunk_size = sizeof(TreeChunk)/HeapWordSize; + // Return a list of the specified size or NULL from the tree. + // The list is not removed from the tree. + TreeList* find_list (size_t size) const; void verify_tree() const; // verify that the given chunk is in the tree. - bool verify_chunk_in_free_list(Chunk* tc) const; + bool verify_chunk_in_free_list(Chunk_t* tc) const; private: - void verify_tree_helper(TreeList* tl) const; - static size_t verify_prev_free_ptrs(TreeList* tl); + void verify_tree_helper(TreeList* tl) const; + static size_t verify_prev_free_ptrs(TreeList* tl); // Returns the total number of chunks in the list. - size_t total_list_length(TreeList* tl) const; + size_t total_list_length(TreeList* tl) const; // Returns the total number of words in the chunks in the tree // starting at "tl". - size_t total_size_in_tree(TreeList* tl) const; + size_t total_size_in_tree(TreeList* tl) const; // Returns the sum of the square of the size of each block // in the tree starting at "tl". - double sum_of_squared_block_sizes(TreeList* const tl) const; + double sum_of_squared_block_sizes(TreeList* const tl) const; // Returns the total number of free blocks in the tree starting // at "tl". - size_t total_free_blocks_in_tree(TreeList* tl) const; - size_t num_free_blocks() const; - size_t treeHeight() const; - size_t tree_height_helper(TreeList* tl) const; - size_t total_nodes_in_tree(TreeList* tl) const; - size_t total_nodes_helper(TreeList* tl) const; + size_t total_free_blocks_in_tree(TreeList* tl) const; + size_t num_free_blocks() const; + size_t tree_height() const; + size_t tree_height_helper(TreeList* tl) const; + size_t total_nodes_in_tree(TreeList* tl) const; + size_t total_nodes_helper(TreeList* tl) const; public: // Constructor - BinaryTreeDictionary(bool adaptive_freelists, bool splay = false); - BinaryTreeDictionary(MemRegion mr, bool adaptive_freelists, bool splay = false); + BinaryTreeDictionary() : + _total_size(0), _total_free_blocks(0), _root(0) {} + + BinaryTreeDictionary(MemRegion mr); // Public accessors size_t total_size() const { return _total_size; } + size_t total_free_blocks() const { return _total_free_blocks; } // Reset the dictionary to the initial conditions with // a single free chunk. @@ -249,23 +253,24 @@ // Return a chunk of size "size" or greater from // the tree. - // want a better dynamic splay strategy for the future. - Chunk* get_chunk(size_t size, enum FreeBlockDictionary::Dither dither) { - FreeBlockDictionary::verify_par_locked(); - Chunk* res = get_chunk_from_tree(size, dither, splay()); + Chunk_t* get_chunk(size_t size, enum FreeBlockDictionary::Dither dither) { + FreeBlockDictionary::verify_par_locked(); + Chunk_t* res = get_chunk_from_tree(size, dither); assert(res == NULL || res->is_free(), "Should be returning a free chunk"); + assert(dither != FreeBlockDictionary::exactly || + res == NULL || res->size() == size, "Not correct size"); return res; } - void return_chunk(Chunk* chunk) { - FreeBlockDictionary::verify_par_locked(); + void return_chunk(Chunk_t* chunk) { + FreeBlockDictionary::verify_par_locked(); insert_chunk_in_tree(chunk); } - void remove_chunk(Chunk* chunk) { - FreeBlockDictionary::verify_par_locked(); - remove_chunk_from_tree((TreeChunk*)chunk); + void remove_chunk(Chunk_t* chunk) { + FreeBlockDictionary::verify_par_locked(); + remove_chunk_from_tree((TreeChunk*)chunk); assert(chunk->is_free(), "Should still be a free chunk"); } @@ -281,19 +286,19 @@ } size_t min_size() const { - return min_tree_chunk_size; + return TreeChunk::min_size(); } double sum_of_squared_block_sizes() const { return sum_of_squared_block_sizes(root()); } - Chunk* find_chunk_ends_at(HeapWord* target) const; + Chunk_t* find_chunk_ends_at(HeapWord* target) const; // Find the list with size "size" in the binary tree and update // the statistics in the list according to "split" (chunk was // split or coalesce) and "birth" (chunk was added or removed). - void dict_census_udpate(size_t size, bool split, bool birth); + void dict_census_update(size_t size, bool split, bool birth); // Return true if the dictionary is overpopulated (more chunks of // this size than desired) for size "size". bool coal_dict_over_populated(size_t size); @@ -307,7 +312,7 @@ // statistics for the sweep. void end_sweep_dict_census(double splitSurplusPercent); // Return the largest free chunk in the tree. - Chunk* find_largest_dict() const; + Chunk_t* find_largest_dict() const; // Accessors for statistics void set_tree_surplus(double splitSurplusPercent); void set_tree_hints(void); diff -r 556dd9e475c6 -r dc16fe422c53 src/share/vm/memory/filemap.cpp --- a/src/share/vm/memory/filemap.cpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/share/vm/memory/filemap.cpp Fri Oct 26 14:09:52 2012 -0700 @@ -29,6 +29,7 @@ #include "runtime/arguments.hpp" #include "runtime/java.hpp" #include "runtime/os.hpp" +#include "services/memTracker.hpp" #include "utilities/defaultStream.hpp" # include @@ -344,25 +345,14 @@ fail_continue(err_msg("Unable to reserved shared space at required address " INTPTR_FORMAT, requested_addr)); return rs; } + // the reserved virtual memory is for mapping class data sharing archive + if (MemTracker::is_on()) { + MemTracker::record_virtual_memory_type((address)rs.base(), mtClassShared); + } return rs; } // Memory map a region in the address space. - -char* FileMapInfo::map_region(int i, ReservedSpace rs) { - struct FileMapInfo::FileMapHeader::space_info* si = &_header._space[i]; - size_t used = si->_used; - size_t size = align_size_up(used, os::vm_allocation_granularity()); - - ReservedSpace mapped_rs = rs.first_part(size, true, true); - ReservedSpace unmapped_rs = rs.last_part(size); - mapped_rs.release(); - - return map_region(i); -} - - -// Memory map a region in the address space. static const char* shared_region_name[] = { "ReadOnly", "ReadWrite", "MiscData", "MiscCode"}; char* FileMapInfo::map_region(int i) { diff -r 556dd9e475c6 -r dc16fe422c53 src/share/vm/memory/filemap.hpp --- a/src/share/vm/memory/filemap.hpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/share/vm/memory/filemap.hpp Fri Oct 26 14:09:52 2012 -0700 @@ -125,7 +125,6 @@ size_t capacity, bool read_only, bool allow_exec); void write_bytes(const void* buffer, int count); void write_bytes_aligned(const void* buffer, int count); - char* map_region(int i, ReservedSpace rs); char* map_region(int i); void unmap_region(int i); void close(); diff -r 556dd9e475c6 -r dc16fe422c53 src/share/vm/memory/freeBlockDictionary.cpp --- a/src/share/vm/memory/freeBlockDictionary.cpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/share/vm/memory/freeBlockDictionary.cpp Fri Oct 26 14:09:52 2012 -0700 @@ -27,6 +27,8 @@ #include "gc_implementation/concurrentMarkSweep/freeChunk.hpp" #endif // SERIALGC #include "memory/freeBlockDictionary.hpp" +#include "memory/metablock.hpp" +#include "memory/metachunk.hpp" #ifdef TARGET_OS_FAMILY_linux # include "thread_linux.inline.hpp" #endif @@ -62,6 +64,9 @@ } #endif +template class FreeBlockDictionary; +template class FreeBlockDictionary; + #ifndef SERIALGC // Explicitly instantiate for FreeChunk template class FreeBlockDictionary; diff -r 556dd9e475c6 -r dc16fe422c53 src/share/vm/memory/freeBlockDictionary.hpp --- a/src/share/vm/memory/freeBlockDictionary.hpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/share/vm/memory/freeBlockDictionary.hpp Fri Oct 26 14:09:52 2012 -0700 @@ -66,7 +66,7 @@ virtual void reset(HeapWord* addr, size_t size) = 0; virtual void reset() = 0; - virtual void dict_census_udpate(size_t size, bool split, bool birth) = 0; + virtual void dict_census_update(size_t size, bool split, bool birth) = 0; virtual bool coal_dict_over_populated(size_t size) = 0; virtual void begin_sweep_dict_census(double coalSurplusPercent, float inter_sweep_current, float inter_sweep_estimate, diff -r 556dd9e475c6 -r dc16fe422c53 src/share/vm/memory/freeList.cpp --- a/src/share/vm/memory/freeList.cpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/share/vm/memory/freeList.cpp Fri Oct 26 14:09:52 2012 -0700 @@ -25,6 +25,8 @@ #include "precompiled.hpp" #include "memory/freeBlockDictionary.hpp" #include "memory/freeList.hpp" +#include "memory/metablock.hpp" +#include "memory/metachunk.hpp" #include "memory/sharedHeap.hpp" #include "runtime/globals.hpp" #include "runtime/mutex.hpp" @@ -49,8 +51,6 @@ { _size = 0; _count = 0; - _hint = 0; - init_statistics(); } template @@ -62,34 +62,50 @@ { _size = fc->size(); _count = 1; - _hint = 0; - init_statistics(); -#ifndef PRODUCT - _allocation_stats.set_returned_bytes(size() * HeapWordSize); -#endif } template -void FreeList::reset(size_t hint) { +void FreeList::link_head(Chunk* v) { + assert_proper_lock_protection(); + set_head(v); + // If this method is not used (just set the head instead), + // this check can be avoided. + if (v != NULL) { + v->link_prev(NULL); + } +} + + + +template +void FreeList::reset() { + // Don't set the _size to 0 because this method is + // used with a existing list that has a size but which has + // been emptied. + // Don't clear the _protecting_lock of an existing list. set_count(0); set_head(NULL); set_tail(NULL); - set_hint(hint); } template -void FreeList::init_statistics(bool split_birth) { - _allocation_stats.initialize(split_birth); +void FreeList::initialize() { +#ifdef ASSERT + // Needed early because it might be checked in other initializing code. + set_protecting_lock(NULL); +#endif + reset(); + set_size(0); } -template -Chunk* FreeList::get_chunk_at_head() { +template +Chunk_t* FreeList::get_chunk_at_head() { assert_proper_lock_protection(); assert(head() == NULL || head()->prev() == NULL, "list invariant"); assert(tail() == NULL || tail()->next() == NULL, "list invariant"); - Chunk* fc = head(); + Chunk_t* fc = head(); if (fc != NULL) { - Chunk* nextFC = fc->next(); + Chunk_t* nextFC = fc->next(); if (nextFC != NULL) { // The chunk fc being removed has a "next". Set the "next" to the // "prev" of fc. @@ -197,11 +213,6 @@ link_tail(chunk); } increment_count(); // of # of chunks in list - DEBUG_ONLY( - if (record_return) { - increment_returned_bytes_by(size()*HeapWordSize); - } - ) assert(head() == NULL || head()->prev() == NULL, "list invariant"); assert(tail() == NULL || tail()->next() == NULL, "list invariant"); assert(head() == NULL || head()->size() == size(), "wrong item on list"); @@ -233,11 +244,6 @@ } link_tail(chunk); increment_count(); // of # of chunks in list - DEBUG_ONLY( - if (record_return) { - increment_returned_bytes_by(size()*HeapWordSize); - } - ) assert(head() == NULL || head()->prev() == NULL, "list invariant"); assert(tail() == NULL || tail()->next() == NULL, "list invariant"); assert(head() == NULL || head()->size() == size(), "wrong item on list"); @@ -273,7 +279,7 @@ } } -// verify_chunk_in_free_list() is used to verify that an item is in this free list. +// verify_chunk_in_free_lists() is used to verify that an item is in this free list. // It is used as a debugging aid. template bool FreeList::verify_chunk_in_free_list(Chunk* fc) const { @@ -294,40 +300,14 @@ #ifndef PRODUCT template -void FreeList::verify_stats() const { - // The +1 of the LH comparand is to allow some "looseness" in - // checking: we usually call this interface when adding a block - // and we'll subsequently update the stats; we cannot update the - // stats beforehand because in the case of the large-block BT - // dictionary for example, this might be the first block and - // in that case there would be no place that we could record - // the stats (which are kept in the block itself). - assert((_allocation_stats.prev_sweep() + _allocation_stats.split_births() - + _allocation_stats.coal_births() + 1) // Total Production Stock + 1 - >= (_allocation_stats.split_deaths() + _allocation_stats.coal_deaths() - + (ssize_t)count()), // Total Current Stock + depletion - err_msg("FreeList " PTR_FORMAT " of size " SIZE_FORMAT - " violates Conservation Principle: " - "prev_sweep(" SIZE_FORMAT ")" - " + split_births(" SIZE_FORMAT ")" - " + coal_births(" SIZE_FORMAT ") + 1 >= " - " split_deaths(" SIZE_FORMAT ")" - " coal_deaths(" SIZE_FORMAT ")" - " + count(" SSIZE_FORMAT ")", - this, _size, _allocation_stats.prev_sweep(), _allocation_stats.split_births(), - _allocation_stats.split_births(), _allocation_stats.split_deaths(), - _allocation_stats.coal_deaths(), count())); -} - -template void FreeList::assert_proper_lock_protection_work() const { - assert(_protecting_lock != NULL, "Don't call this directly"); + assert(protecting_lock() != NULL, "Don't call this directly"); assert(ParallelGCThreads > 0, "Don't call this directly"); Thread* thr = Thread::current(); if (thr->is_VM_thread() || thr->is_ConcurrentGC_thread()) { // assert that we are holding the freelist lock } else if (thr->is_GC_task_thread()) { - assert(_protecting_lock->owned_by_self(), "FreeList RACE DETECTED"); + assert(protecting_lock()->owned_by_self(), "FreeList RACE DETECTED"); } else if (thr->is_Java_thread()) { assert(!SafepointSynchronize::is_at_safepoint(), "Should not be executing"); } else { @@ -350,21 +330,17 @@ // to the call is a non-null string, it is printed in the first column; // otherwise, if the argument is null (the default), then the size of the // (free list) block is printed in the first column. -template -void FreeList::print_on(outputStream* st, const char* c) const { +template +void FreeList::print_on(outputStream* st, const char* c) const { if (c != NULL) { st->print("%16s", c); } else { st->print(SIZE_FORMAT_W(16), size()); } - st->print("\t" - SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t" - SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\n", - bfr_surp(), surplus(), desired(), prev_sweep(), before_sweep(), - count(), coal_births(), coal_deaths(), split_births(), split_deaths()); } +template class FreeList; +template class FreeList; #ifndef SERIALGC -// Needs to be after the definitions have been seen. template class FreeList; #endif // SERIALGC diff -r 556dd9e475c6 -r dc16fe422c53 src/share/vm/memory/freeList.hpp --- a/src/share/vm/memory/freeList.hpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/share/vm/memory/freeList.hpp Fri Oct 26 14:09:52 2012 -0700 @@ -40,23 +40,19 @@ // for that implementation. class Mutex; -template class TreeList; -template class PrintTreeCensusClosure; -template +template class FreeList VALUE_OBJ_CLASS_SPEC { friend class CompactibleFreeListSpace; friend class VMStructs; - friend class PrintTreeCensusClosure; private: - Chunk* _head; // Head of list of free chunks - Chunk* _tail; // Tail of list of free chunks + Chunk_t* _head; // Head of list of free chunks + Chunk_t* _tail; // Tail of list of free chunks size_t _size; // Size in Heap words of each chunk ssize_t _count; // Number of entries in list - size_t _hint; // next larger size list with a positive surplus - AllocationStats _allocation_stats; // allocation-related statistics + protected: #ifdef ASSERT Mutex* _protecting_lock; @@ -71,10 +67,6 @@ #endif } - // Initialize the allocation statistics. - protected: - void init_statistics(bool split_birth = false); - void set_count(ssize_t v) { _count = v;} void increment_count() { _count++; } @@ -89,52 +81,48 @@ // Construct a list without any entries. FreeList(); // Construct a list with "fc" as the first (and lone) entry in the list. - FreeList(Chunk* fc); + FreeList(Chunk_t* fc); - // Reset the head, tail, hint, and count of a free list. - void reset(size_t hint); + // Do initialization + void initialize(); + + // Reset the head, tail, and count of a free list. + void reset(); // Declare the current free list to be protected by the given lock. #ifdef ASSERT - void set_protecting_lock(Mutex* protecting_lock) { - _protecting_lock = protecting_lock; + Mutex* protecting_lock() const { return _protecting_lock; } + void set_protecting_lock(Mutex* v) { + _protecting_lock = v; } #endif // Accessors. - Chunk* head() const { + Chunk_t* head() const { assert_proper_lock_protection(); return _head; } - void set_head(Chunk* v) { + void set_head(Chunk_t* v) { assert_proper_lock_protection(); _head = v; assert(!_head || _head->size() == _size, "bad chunk size"); } // Set the head of the list and set the prev field of non-null // values to NULL. - void link_head(Chunk* v) { - assert_proper_lock_protection(); - set_head(v); - // If this method is not used (just set the head instead), - // this check can be avoided. - if (v != NULL) { - v->link_prev(NULL); - } - } + void link_head(Chunk_t* v); - Chunk* tail() const { + Chunk_t* tail() const { assert_proper_lock_protection(); return _tail; } - void set_tail(Chunk* v) { + void set_tail(Chunk_t* v) { assert_proper_lock_protection(); _tail = v; assert(!_tail || _tail->size() == _size, "bad chunk size"); } // Set the tail of the list and set the next field of non-null // values to NULL. - void link_tail(Chunk* v) { + void link_tail(Chunk_t* v) { assert_proper_lock_protection(); set_tail(v); if (v != NULL) { @@ -152,174 +140,45 @@ assert_proper_lock_protection(); _size = v; } - ssize_t count() const { - return _count; - } - size_t hint() const { - return _hint; - } - void set_hint(size_t v) { - assert_proper_lock_protection(); - assert(v == 0 || _size < v, "Bad hint"); _hint = v; - } - - // Accessors for statistics - AllocationStats* allocation_stats() { - assert_proper_lock_protection(); - return &_allocation_stats; - } - - ssize_t desired() const { - return _allocation_stats.desired(); - } - void set_desired(ssize_t v) { - assert_proper_lock_protection(); - _allocation_stats.set_desired(v); - } - void compute_desired(float inter_sweep_current, - float inter_sweep_estimate, - float intra_sweep_estimate) { - assert_proper_lock_protection(); - _allocation_stats.compute_desired(_count, - inter_sweep_current, - inter_sweep_estimate, - intra_sweep_estimate); - } - ssize_t coal_desired() const { - return _allocation_stats.coal_desired(); - } - void set_coal_desired(ssize_t v) { - assert_proper_lock_protection(); - _allocation_stats.set_coal_desired(v); - } - - ssize_t surplus() const { - return _allocation_stats.surplus(); - } - void set_surplus(ssize_t v) { - assert_proper_lock_protection(); - _allocation_stats.set_surplus(v); - } - void increment_surplus() { - assert_proper_lock_protection(); - _allocation_stats.increment_surplus(); - } - void decrement_surplus() { - assert_proper_lock_protection(); - _allocation_stats.decrement_surplus(); - } + ssize_t count() const { return _count; } + void set_count(ssize_t v) { _count = v;} - ssize_t bfr_surp() const { - return _allocation_stats.bfr_surp(); - } - void set_bfr_surp(ssize_t v) { - assert_proper_lock_protection(); - _allocation_stats.set_bfr_surp(v); - } - ssize_t prev_sweep() const { - return _allocation_stats.prev_sweep(); - } - void set_prev_sweep(ssize_t v) { - assert_proper_lock_protection(); - _allocation_stats.set_prev_sweep(v); - } - ssize_t before_sweep() const { - return _allocation_stats.before_sweep(); - } - void set_before_sweep(ssize_t v) { - assert_proper_lock_protection(); - _allocation_stats.set_before_sweep(v); - } - - ssize_t coal_births() const { - return _allocation_stats.coal_births(); - } - void set_coal_births(ssize_t v) { - assert_proper_lock_protection(); - _allocation_stats.set_coal_births(v); - } - void increment_coal_births() { - assert_proper_lock_protection(); - _allocation_stats.increment_coal_births(); - } + size_t get_better_size() { return size(); } - ssize_t coal_deaths() const { - return _allocation_stats.coal_deaths(); - } - void set_coal_deaths(ssize_t v) { - assert_proper_lock_protection(); - _allocation_stats.set_coal_deaths(v); - } - void increment_coal_deaths() { - assert_proper_lock_protection(); - _allocation_stats.increment_coal_deaths(); - } - - ssize_t split_births() const { - return _allocation_stats.split_births(); - } - void set_split_births(ssize_t v) { - assert_proper_lock_protection(); - _allocation_stats.set_split_births(v); - } - void increment_split_births() { - assert_proper_lock_protection(); - _allocation_stats.increment_split_births(); - } - - ssize_t split_deaths() const { - return _allocation_stats.split_deaths(); - } - void set_split_deaths(ssize_t v) { - assert_proper_lock_protection(); - _allocation_stats.set_split_deaths(v); - } - void increment_split_deaths() { - assert_proper_lock_protection(); - _allocation_stats.increment_split_deaths(); - } - - NOT_PRODUCT( - // For debugging. The "_returned_bytes" in all the lists are summed - // and compared with the total number of bytes swept during a - // collection. - size_t returned_bytes() const { return _allocation_stats.returned_bytes(); } - void set_returned_bytes(size_t v) { _allocation_stats.set_returned_bytes(v); } - void increment_returned_bytes_by(size_t v) { - _allocation_stats.set_returned_bytes(_allocation_stats.returned_bytes() + v); - } - ) + size_t returned_bytes() const { ShouldNotReachHere(); return 0; } + void set_returned_bytes(size_t v) {} + void increment_returned_bytes_by(size_t v) {} // Unlink head of list and return it. Returns NULL if // the list is empty. - Chunk* get_chunk_at_head(); + Chunk_t* get_chunk_at_head(); // Remove the first "n" or "count", whichever is smaller, chunks from the // list, setting "fl", which is required to be empty, to point to them. - void getFirstNChunksFromList(size_t n, FreeList* fl); + void getFirstNChunksFromList(size_t n, FreeList* fl); // Unlink this chunk from it's free list - void remove_chunk(Chunk* fc); + void remove_chunk(Chunk_t* fc); // Add this chunk to this free list. - void return_chunk_at_head(Chunk* fc); - void return_chunk_at_tail(Chunk* fc); + void return_chunk_at_head(Chunk_t* fc); + void return_chunk_at_tail(Chunk_t* fc); // Similar to returnChunk* but also records some diagnostic // information. - void return_chunk_at_head(Chunk* fc, bool record_return); - void return_chunk_at_tail(Chunk* fc, bool record_return); + void return_chunk_at_head(Chunk_t* fc, bool record_return); + void return_chunk_at_tail(Chunk_t* fc, bool record_return); // Prepend "fl" (whose size is required to be the same as that of "this") // to the front of "this" list. - void prepend(FreeList* fl); + void prepend(FreeList* fl); // Verify that the chunk is in the list. // found. Return NULL if "fc" is not found. - bool verify_chunk_in_free_list(Chunk* fc) const; + bool verify_chunk_in_free_list(Chunk_t* fc) const; // Stats verification - void verify_stats() const PRODUCT_RETURN; +// void verify_stats() const { ShouldNotReachHere(); }; // Printing support static void print_labels_on(outputStream* st, const char* c); diff -r 556dd9e475c6 -r dc16fe422c53 src/share/vm/memory/metablock.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/memory/metablock.hpp Fri Oct 26 14:09:52 2012 -0700 @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ +#ifndef SHARE_VM_MEMORY_METABLOCK_HPP +#define SHARE_VM_MEMORY_METABLOCK_HPP + +// Metablock are the unit of allocation from a Chunk. It is initialized +// with the size of the requested allocation. That size is overwritten +// once the allocation returns. +// +// A Metablock may be reused by its SpaceManager but are never moved between +// SpaceManagers. There is no explicit link to the Metachunk +// from which it was allocated. Metablock may be deallocated and +// put on a freelist but the space is never freed, rather +// the Metachunk it is a part of will be deallocated when it's +// associated class loader is collected. + +class Metablock VALUE_OBJ_CLASS_SPEC { + friend class VMStructs; + private: + // Used to align the allocation (see below). + union block_t { + void* _data[3]; + struct header_t { + size_t _word_size; + Metablock* _next; + Metablock* _prev; + } _header; + } _block; + static size_t _min_block_byte_size; + static size_t _overhead; + + typedef union block_t Block; + typedef struct header_t Header; + const Block* block() const { return &_block; } + const Block::header_t* header() const { return &(block()->_header); } + public: + + static Metablock* initialize(MetaWord* p, size_t word_size); + + // This places the body of the block at a 2 word boundary + // because every block starts on a 2 word boundary. Work out + // how to make the body on a 2 word boundary if the block + // starts on a arbitrary boundary. JJJ + + size_t word_size() const { return header()->_word_size; } + void set_word_size(size_t v) { _block._header._word_size = v; } + size_t size() const volatile { return _block._header._word_size; } + void set_size(size_t v) { _block._header._word_size = v; } + Metablock* next() const { return header()->_next; } + void set_next(Metablock* v) { _block._header._next = v; } + Metablock* prev() const { return header()->_prev; } + void set_prev(Metablock* v) { _block._header._prev = v; } + + static size_t min_block_byte_size() { return _min_block_byte_size; } + static size_t overhead() { return _overhead; } + + bool is_free() { return header()->_word_size != 0; } + void clear_next() { set_next(NULL); } + void link_prev(Metablock* ptr) { set_prev(ptr); } + uintptr_t* end() { return ((uintptr_t*) this) + size(); } + bool cantCoalesce() const { return false; } + void link_next(Metablock* ptr) { set_next(ptr); } + void link_after(Metablock* ptr){ + link_next(ptr); + if (ptr != NULL) ptr->link_prev(this); + } + + // Should not be needed in a free list of Metablocks + void markNotFree() { ShouldNotReachHere(); } + + // Debug support +#ifdef ASSERT + void* prev_addr() const { return (void*)&_block._header._prev; } + void* next_addr() const { return (void*)&_block._header._next; } + void* size_addr() const { return (void*)&_block._header._word_size; } +#endif + bool verify_chunk_in_free_list(Metablock* tc) const { return true; } + bool verify_par_locked() { return true; } + + void assert_is_mangled() const {/* Don't check "\*/} +}; +#endif // SHARE_VM_MEMORY_METABLOCK_HPP diff -r 556dd9e475c6 -r dc16fe422c53 src/share/vm/memory/metachunk.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/memory/metachunk.hpp Fri Oct 26 14:09:52 2012 -0700 @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ +#ifndef SHARE_VM_MEMORY_METACHUNK_HPP +#define SHARE_VM_MEMORY_METACHUNK_HPP + +// Metachunk - Quantum of allocation from a Virtualspace +// Metachunks are reused (when freed are put on a global freelist) and +// have no permanent association to a SpaceManager. + +// +--------------+ <- end +// | | --+ ---+ +// | | | free | +// | | | | +// | | | | capacity +// | | | | +// | | <- top --+ | +// | | ---+ | +// | | | used | +// | | | | +// | | | | +// +--------------+ <- bottom ---+ ---+ + +class Metachunk VALUE_OBJ_CLASS_SPEC { + // link to support lists of chunks + Metachunk* _next; + Metachunk* _prev; + + MetaWord* _bottom; + MetaWord* _end; + MetaWord* _top; + size_t _word_size; + // Used in a guarantee() so included in the Product builds + // even through it is only for debugging. + bool _is_free; + + // Metachunks are allocated out of a MetadataVirtualSpace and + // and use some of its space to describe itself (plus alignment + // considerations). Metadata is allocated in the rest of the chunk. + // This size is the overhead of maintaining the Metachunk within + // the space. + static size_t _overhead; + + void set_bottom(MetaWord* v) { _bottom = v; } + void set_end(MetaWord* v) { _end = v; } + void set_top(MetaWord* v) { _top = v; } + void set_word_size(size_t v) { _word_size = v; } + public: +#ifdef ASSERT + Metachunk() : _bottom(NULL), _end(NULL), _top(NULL), _is_free(false) {} +#else + Metachunk() : _bottom(NULL), _end(NULL), _top(NULL) {} +#endif + + // Used to add a Metachunk to a list of Metachunks + void set_next(Metachunk* v) { _next = v; assert(v != this, "Boom");} + void set_prev(Metachunk* v) { _prev = v; assert(v != this, "Boom");} + + MetaWord* allocate(size_t word_size); + static Metachunk* initialize(MetaWord* ptr, size_t word_size); + + // Accessors + Metachunk* next() const { return _next; } + Metachunk* prev() const { return _prev; } + MetaWord* bottom() const { return _bottom; } + MetaWord* end() const { return _end; } + MetaWord* top() const { return _top; } + size_t word_size() const { return _word_size; } + size_t size() const volatile { return _word_size; } + void set_size(size_t v) { _word_size = v; } + bool is_free() { return _is_free; } + void set_is_free(bool v) { _is_free = v; } + static size_t overhead() { return _overhead; } + void clear_next() { set_next(NULL); } + void link_prev(Metachunk* ptr) { set_prev(ptr); } + uintptr_t* end() { return ((uintptr_t*) this) + size(); } + bool cantCoalesce() const { return false; } + void link_next(Metachunk* ptr) { set_next(ptr); } + void link_after(Metachunk* ptr){ + link_next(ptr); + if (ptr != NULL) ptr->link_prev(this); + } + + // Reset top to bottom so chunk can be reused. + void reset_empty() { _top = (_bottom + _overhead); } + bool is_empty() { return _top == (_bottom + _overhead); } + + // used (has been allocated) + // free (available for future allocations) + // capacity (total size of chunk) + size_t used_word_size(); + size_t free_word_size(); + size_t capacity_word_size(); + + // Debug support +#ifdef ASSERT + void* prev_addr() const { return (void*)&_prev; } + void* next_addr() const { return (void*)&_next; } + void* size_addr() const { return (void*)&_word_size; } +#endif + bool verify_chunk_in_free_list(Metachunk* tc) const { return true; } + bool verify_par_locked() { return true; } + + void assert_is_mangled() const {/* Don't check "\*/} + +#ifdef ASSERT + void mangle(); +#endif // ASSERT + + void print_on(outputStream* st) const; + void verify(); +}; +#endif // SHARE_VM_MEMORY_METACHUNK_HPP diff -r 556dd9e475c6 -r dc16fe422c53 src/share/vm/memory/metaspace.cpp --- a/src/share/vm/memory/metaspace.cpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/share/vm/memory/metaspace.cpp Fri Oct 26 14:09:52 2012 -0700 @@ -24,9 +24,12 @@ #include "precompiled.hpp" #include "gc_interface/collectedHeap.hpp" #include "memory/binaryTreeDictionary.hpp" +#include "memory/freeList.hpp" #include "memory/collectorPolicy.hpp" #include "memory/filemap.hpp" #include "memory/freeList.hpp" +#include "memory/metablock.hpp" +#include "memory/metachunk.hpp" #include "memory/metaspace.hpp" #include "memory/metaspaceShared.hpp" #include "memory/resourceArea.hpp" @@ -37,15 +40,8 @@ #include "utilities/copy.hpp" #include "utilities/debug.hpp" -// Define this macro to deallocate Metablock. If not defined, -// blocks are not yet deallocated and are only mangled. -#undef DEALLOCATE_BLOCKS - -// Easily recognizable patterns -// These patterns can be the same in 32bit or 64bit since -// they only have to be easily recognizable. -const void* metaspace_allocation_leader = (void*) 0X11111111; -const void* metaspace_allocation_trailer = (void*) 0X77777777; +typedef BinaryTreeDictionary BlockTreeDictionary; +typedef BinaryTreeDictionary ChunkTreeDictionary; // Parameters for stress mode testing const uint metadata_deallocate_a_lot_block = 10; @@ -53,7 +49,6 @@ size_t const allocation_from_dictionary_limit = 64 * K; const size_t metadata_chunk_initialize = 0xf7f7f7f7; const size_t metadata_deallocate = 0xf5f5f5f5; -const size_t metadata_space_manager_allocate = 0xf3f3f3f3; MetaWord* last_allocated = 0; @@ -62,11 +57,12 @@ SmallIndex = 0, MediumIndex = 1, HumongousIndex = 2, - NumberOfFreeLists = 3 + NumberOfFreeLists = 2, + NumberOfInUseLists = 3 }; static ChunkIndex next_chunk_index(ChunkIndex i) { - assert(i < NumberOfFreeLists, "Out of bound"); + assert(i < NumberOfInUseLists, "Out of bound"); return (ChunkIndex) (i+1); } @@ -100,164 +96,13 @@ // the Chunk after the header for the Chunk) where as Metachunks // point to space in a VirtualSpace. To replace Metachunks with // Chunks, change Chunks so that they can be allocated out of a VirtualSpace. -// - -// Metablock are the unit of allocation from a Chunk. It contains -// the size of the requested allocation in a debug build. -// Also in a debug build it has a marker before and after the -// body of the block. The address of the body is the address returned -// by the allocation. -// -// Layout in a debug build. In a product build only the body is present. -// -// +-----------+-----------+------------+ +-----------+ -// | word size | leader | body | ... | trailer | -// +-----------+-----------+------------+ +-----------+ -// -// A Metablock may be reused by its SpaceManager but are never moved between -// SpaceManagers. There is no explicit link to the Metachunk -// from which it was allocated. Metablock are not deallocated, rather -// the Metachunk it is a part of will be deallocated when it's -// associated class loader is collected. -// -// When the word size of a block is passed in to the deallocation -// call the word size no longer needs to be part of a Metablock. - -class Metablock { - friend class VMStructs; - private: - // Used to align the allocation (see below) and for debugging. -#ifdef ASSERT - struct { - size_t _word_size; - void* _leader; - } _header; - void* _data[1]; -#endif - static size_t _overhead; - +size_t Metablock::_min_block_byte_size = sizeof(Metablock); #ifdef ASSERT - void set_word_size(size_t v) { _header._word_size = v; } - void* leader() { return _header._leader; } - void* trailer() { - jlong index = (jlong) _header._word_size - sizeof(_header)/BytesPerWord - 1; - assert(index > 0, err_msg("Bad indexling of trailer %d", index)); - void** ptr = &_data[index]; - return *ptr; - } - void set_leader(void* v) { _header._leader = v; } - void set_trailer(void* v) { - void** ptr = &_data[_header._word_size - sizeof(_header)/BytesPerWord - 1]; - *ptr = v; - } - public: - size_t word_size() { return _header._word_size; } -#endif - public: - - static Metablock* initialize(MetaWord* p, size_t word_size); - - // This places the body of the block at a 2 word boundary - // because every block starts on a 2 word boundary. Work out - // how to make the body on a 2 word boundary if the block - // starts on a arbitrary boundary. JJJ - -#ifdef ASSERT - MetaWord* data() { return (MetaWord*) &_data[0]; } + size_t Metablock::_overhead = + Chunk::aligned_overhead_size(sizeof(Metablock)) / BytesPerWord; #else - MetaWord* data() { return (MetaWord*) this; } -#endif - static Metablock* metablock_from_data(MetaWord* p) { -#ifdef ASSERT - size_t word_offset = offset_of(Metablock, _data)/BytesPerWord; - Metablock* result = (Metablock*) (p - word_offset); - return result; -#else - return (Metablock*) p; + size_t Metablock::_overhead = 0; #endif - } - - static size_t overhead() { return _overhead; } - void verify(); -}; - -// Metachunk - Quantum of allocation from a Virtualspace -// Metachunks are reused (when freed are put on a global freelist) and -// have no permanent association to a SpaceManager. - -// +--------------+ <- end -// | | --+ ---+ -// | | | free | -// | | | | -// | | | | capacity -// | | | | -// | | <- top --+ | -// | | ---+ | -// | | | used | -// | | | | -// | | | | -// +--------------+ <- bottom ---+ ---+ - -class Metachunk VALUE_OBJ_CLASS_SPEC { - // link to support lists of chunks - Metachunk* _next; - - MetaWord* _bottom; - MetaWord* _end; - MetaWord* _top; - size_t _word_size; - - // Metachunks are allocated out of a MetadataVirtualSpace and - // and use some of its space to describe itself (plus alignment - // considerations). Metadata is allocated in the rest of the chunk. - // This size is the overhead of maintaining the Metachunk within - // the space. - static size_t _overhead; - - void set_bottom(MetaWord* v) { _bottom = v; } - void set_end(MetaWord* v) { _end = v; } - void set_top(MetaWord* v) { _top = v; } - void set_word_size(size_t v) { _word_size = v; } - public: - - // Used to add a Metachunk to a list of Metachunks - void set_next(Metachunk* v) { _next = v; assert(v != this, "Boom");} - - Metablock* allocate(size_t word_size); - static Metachunk* initialize(MetaWord* ptr, size_t word_size); - - // Accessors - Metachunk* next() const { return _next; } - MetaWord* bottom() const { return _bottom; } - MetaWord* end() const { return _end; } - MetaWord* top() const { return _top; } - size_t word_size() const { return _word_size; } - static size_t overhead() { return _overhead; } - - // Reset top to bottom so chunk can be reused. - void reset_empty() { _top = (_bottom + _overhead); } - bool is_empty() { return _top == (_bottom + _overhead); } - - // used (has been allocated) - // free (available for future allocations) - // capacity (total size of chunk) - size_t used_word_size(); - size_t free_word_size(); - size_t capacity_word_size(); - -#ifdef ASSERT - void mangle() { - // Mangle the payload of the chunk and not the links that - // maintain list of chunks. - HeapWord* start = (HeapWord*)(bottom() + overhead()); - size_t word_size = capacity_word_size() - overhead(); - Copy::fill_to_words(start, word_size, metadata_chunk_initialize); - } -#endif // ASSERT - - void print_on(outputStream* st) const; - void verify(); -}; // Pointer to list of Metachunks. @@ -292,7 +137,10 @@ // SmallChunk // MediumChunk // HumongousChunk - ChunkList _free_chunks[3]; + ChunkList _free_chunks[NumberOfFreeLists]; + + // HumongousChunk + ChunkTreeDictionary _humongous_dictionary; // ChunkManager in all lists of this type size_t _free_chunks_total; @@ -337,7 +185,9 @@ } ChunkList* free_medium_chunks() { return &_free_chunks[1]; } ChunkList* free_small_chunks() { return &_free_chunks[0]; } - ChunkList* free_humongous_chunks() { return &_free_chunks[2]; } + ChunkTreeDictionary* humongous_dictionary() { + return &_humongous_dictionary; + } ChunkList* free_chunks(ChunkIndex index); @@ -356,41 +206,35 @@ void locked_print_free_chunks(outputStream* st); void locked_print_sum_free_chunks(outputStream* st); + + void print_on(outputStream* st); }; // Used to manage the free list of Metablocks (a block corresponds // to the allocation of a quantum of metadata). class BlockFreelist VALUE_OBJ_CLASS_SPEC { -#ifdef DEALLOCATE_BLOCKS - BinaryTreeDictionary* _dictionary; -#endif - static Metablock* initialize_free_chunk(Metablock* block, size_t word_size); - -#ifdef DEALLOCATE_BLOCKS + BlockTreeDictionary* _dictionary; + static Metablock* initialize_free_chunk(MetaWord* p, size_t word_size); + // Accessors - BinaryTreeDictionary* dictionary() const { return _dictionary; } -#endif + BlockTreeDictionary* dictionary() const { return _dictionary; } public: BlockFreelist(); ~BlockFreelist(); // Get and return a block to the free list - Metablock* get_block(size_t word_size); - void return_block(Metablock* block, size_t word_size); - - size_t totalSize() { -#ifdef DEALLOCATE_BLOCKS - if (dictionary() == NULL) { - return 0; - } else { - return dictionary()->totalSize(); - } -#else + MetaWord* get_block(size_t word_size); + void return_block(MetaWord* p, size_t word_size); + + size_t total_size() { + if (dictionary() == NULL) { return 0; -#endif + } else { + return dictionary()->total_size(); } +} void print_on(outputStream* st) const; }; @@ -600,7 +444,6 @@ }; }; - class Metadebug : AllStatic { // Debugging support for Metaspaces static int _deallocate_block_a_lot_count; @@ -655,7 +498,7 @@ // List of chunks in use by this SpaceManager. Allocations // are done from the current chunk. The list is used for deallocating // chunks when the SpaceManager is freed. - Metachunk* _chunks_in_use[NumberOfFreeLists]; + Metachunk* _chunks_in_use[NumberOfInUseLists]; Metachunk* _current_chunk; // Virtual space where allocation comes from. @@ -700,24 +543,6 @@ // Add chunk to the list of chunks in use void add_chunk(Metachunk* v, bool make_current); - // Debugging support - void verify_chunks_in_use_index(ChunkIndex index, Metachunk* v) { - switch (index) { - case 0: - assert(v->word_size() == SmallChunk, "Not a SmallChunk"); - break; - case 1: - assert(v->word_size() == MediumChunk, "Not a MediumChunk"); - break; - case 2: - assert(v->word_size() > MediumChunk, "Not a HumongousChunk"); - break; - default: - assert(false, "Wrong list."); - } - } - - protected: Mutex* lock() const { return _lock; } public: @@ -751,10 +576,10 @@ MetaWord* allocate(size_t word_size); // Helper for allocations - Metablock* allocate_work(size_t word_size); + MetaWord* allocate_work(size_t word_size); // Returns a block to the per manager freelist - void deallocate(MetaWord* p); + void deallocate(MetaWord* p, size_t word_size); // Based on the allocation size and a minimum chunk size, // returned chunk size (for expanding space for chunk allocation). @@ -763,7 +588,7 @@ // Called when an allocation from the current chunk fails. // Gets a new chunk (may require getting a new virtual space), // and allocates from that chunk. - Metablock* grow_and_allocate(size_t word_size); + MetaWord* grow_and_allocate(size_t word_size); // debugging support. @@ -780,6 +605,8 @@ uint const SpaceManager::_small_chunk_limit = 4; + + const char* SpaceManager::_expand_lock_name = "SpaceManager chunk allocation lock"; const int SpaceManager::_expand_lock_rank = Monitor::leaf - 1; @@ -788,39 +615,26 @@ SpaceManager::_expand_lock_name, Mutex::_allow_vm_block_flag); -#ifdef ASSERT -size_t Metablock::_overhead = - Chunk::aligned_overhead_size(sizeof(Metablock)) / BytesPerWord; -#else -size_t Metablock::_overhead = 0; -#endif size_t Metachunk::_overhead = Chunk::aligned_overhead_size(sizeof(Metachunk)) / BytesPerWord; // New blocks returned by the Metaspace are zero initialized. // We should fix the constructors to not assume this instead. Metablock* Metablock::initialize(MetaWord* p, size_t word_size) { + if (p == NULL) { + return NULL; + } + Metablock* result = (Metablock*) p; // Clear the memory Copy::fill_to_aligned_words((HeapWord*)result, word_size); #ifdef ASSERT result->set_word_size(word_size); - // Check after work size is set. - result->set_leader((void*) metaspace_allocation_leader); - result->set_trailer((void*) metaspace_allocation_trailer); #endif return result; } -void Metablock::verify() { -#ifdef ASSERT - assert(leader() == metaspace_allocation_leader && - trailer() == metaspace_allocation_trailer, - "block has been corrupted"); -#endif -} - // Metachunk methods Metachunk* Metachunk::initialize(MetaWord* ptr, size_t word_size) { @@ -843,18 +657,13 @@ } -Metablock* Metachunk::allocate(size_t word_size) { - Metablock* result = NULL; +MetaWord* Metachunk::allocate(size_t word_size) { + MetaWord* result = NULL; // If available, bump the pointer to allocate. if (free_word_size() >= word_size) { - result = Metablock::initialize(_top, word_size); + result = _top; _top = _top + word_size; } -#ifdef ASSERT - assert(result == NULL || - result->word_size() == word_size, - "Block size is not set correctly"); -#endif return result; } @@ -878,103 +687,85 @@ bottom(), top(), end(), word_size()); } +#ifdef ASSERT +void Metachunk::mangle() { + // Mangle the payload of the chunk and not the links that + // maintain list of chunks. + HeapWord* start = (HeapWord*)(bottom() + overhead()); + size_t word_size = capacity_word_size() - overhead(); + Copy::fill_to_words(start, word_size, metadata_chunk_initialize); +} +#endif // ASSERT void Metachunk::verify() { #ifdef ASSERT // Cannot walk through the blocks unless the blocks have // headers with sizes. - MetaWord* curr = bottom() + overhead(); - while (curr < top()) { - Metablock* block = (Metablock*) curr; - size_t word_size = block->word_size(); - block->verify(); - curr = curr + word_size; - } + assert(_bottom <= _top && + _top <= _end, + "Chunk has been smashed"); + assert(SpaceManager::is_humongous(_word_size) || + _word_size == SpaceManager::MediumChunk || + _word_size == SpaceManager::SmallChunk, + "Chunk size is wrong"); #endif return; } // BlockFreelist methods -#ifdef DEALLOCATE_BLOCKS BlockFreelist::BlockFreelist() : _dictionary(NULL) {} -#else -BlockFreelist::BlockFreelist() {} -#endif BlockFreelist::~BlockFreelist() { -#ifdef DEALLOCATE_BLOCKS if (_dictionary != NULL) { if (Verbose && TraceMetadataChunkAllocation) { _dictionary->print_free_lists(gclog_or_tty); } delete _dictionary; } -#endif } -Metablock* BlockFreelist::initialize_free_chunk(Metablock* block, size_t word_size) { -#ifdef DEALLOCATE_BLOCKS -#ifdef ASSERT - assert(word_size = block->word_size(), "Wrong chunk size"); -#endif - Metablock* result = block; - result->setSize(word_size); - result->linkPrev(NULL); - result->linkNext(NULL); - - return result; -#else - ShouldNotReachHere(); +Metablock* BlockFreelist::initialize_free_chunk(MetaWord* p, size_t word_size) { + Metablock* block = (Metablock*) p; + block->set_word_size(word_size); + block->set_prev(NULL); + block->set_next(NULL); + return block; -#endif } -void BlockFreelist::return_block(Metablock* block, size_t word_size) { -#ifdef ASSERT - assert(word_size = block->word_size(), "Block size is wrong");; -#endif - Metablock* free_chunk = initialize_free_chunk(block, word_size); -#ifdef DEALLOCATE_BLOCKS +void BlockFreelist::return_block(MetaWord* p, size_t word_size) { + Metablock* free_chunk = initialize_free_chunk(p, word_size); if (dictionary() == NULL) { - _dictionary = new BinaryTreeDictionary(false /* adaptive_freelists */); + _dictionary = new BlockTreeDictionary(); } - dictionary()->returnChunk(free_chunk); -#endif + dictionary()->return_chunk(free_chunk); } -Metablock* BlockFreelist::get_block(size_t word_size) { -#ifdef DEALLOCATE_BLOCKS +MetaWord* BlockFreelist::get_block(size_t word_size) { if (dictionary() == NULL) { return NULL; } - Metablock* free_chunk = - dictionary()->getChunk(word_size, FreeBlockDictionary::exactly); -#else - Metablock* free_chunk = NULL; -#endif - if (free_chunk == NULL) { + if (word_size < TreeChunk::min_size()) { + // Dark matter. Too small for dictionary. return NULL; } - assert(free_chunk->word_size() == word_size, "Size of chunk is incorrect"); - Metablock* block = Metablock::initialize((MetaWord*) free_chunk, word_size); -#ifdef ASSERT - assert(block->word_size() == word_size, "Block size is not set correctly"); -#endif - - return block; + + Metablock* free_block = + dictionary()->get_chunk(word_size, FreeBlockDictionary::exactly); + if (free_block == NULL) { + return NULL; + } + + return (MetaWord*) free_block; } void BlockFreelist::print_on(outputStream* st) const { -#ifdef DEALLOCATE_BLOCKS if (dictionary() == NULL) { return; } dictionary()->print_free_lists(st); -#else - return; -#endif } // VirtualSpaceNode methods @@ -1597,14 +1388,11 @@ Metadebug::deallocate_block_a_lot_count() % MetaDataDeallocateALotInterval == 0 ) { Metadebug::set_deallocate_block_a_lot_count(0); for (uint i = 0; i < metadata_deallocate_a_lot_block; i++) { - Metablock* dummy_block = sm->allocate_work(raw_word_size); + MetaWord* dummy_block = sm->allocate_work(raw_word_size); if (dummy_block == 0) { break; } -#ifdef ASSERT - assert(dummy_block->word_size() == raw_word_size, "Block size is not set correctly"); -#endif - sm->deallocate(dummy_block->data()); + sm->deallocate(dummy_block, raw_word_size); } } else { Metadebug::inc_deallocate_block_a_lot_count(); @@ -1784,8 +1572,8 @@ } void ChunkManager::locked_verify() { + locked_verify_free_chunks_count(); locked_verify_free_chunks_total(); - locked_verify_free_chunks_count(); } void ChunkManager::locked_print_free_chunks(outputStream* st) { @@ -1803,7 +1591,6 @@ return &_free_chunks[index]; } - // These methods that sum the free chunk lists are used in printing // methods that are used in product builds. size_t ChunkManager::sum_free_chunks() { @@ -1818,6 +1605,7 @@ result = result + list->sum_list_capacity(); } + result = result + humongous_dictionary()->total_size(); return result; } @@ -1831,6 +1619,7 @@ } count = count + list->sum_list_count(); } + count = count + humongous_dictionary()->total_free_blocks(); return count; } @@ -1875,23 +1664,24 @@ assert_lock_strong(SpaceManager::expand_lock()); locked_verify(); - ChunkList* free_list = find_free_chunks_list(word_size); - assert(free_list != NULL, "Sanity check"); - - Metachunk* chunk = free_list->head(); - debug_only(Metachunk* debug_head = chunk;) - - if (chunk == NULL) { - return NULL; - } - - Metachunk* prev_chunk = chunk; - if (chunk->word_size() == word_size) { - // Chunk is being removed from the chunks free list. - dec_free_chunks_total(chunk->capacity_word_size()); + + Metachunk* chunk = NULL; + if (!SpaceManager::is_humongous(word_size)) { + ChunkList* free_list = find_free_chunks_list(word_size); + assert(free_list != NULL, "Sanity check"); + + chunk = free_list->head(); + debug_only(Metachunk* debug_head = chunk;) + + if (chunk == NULL) { + return NULL; + } + // Remove the chunk as the head of the list. free_list->set_head(chunk->next()); chunk->set_next(NULL); + // Chunk has been removed from the chunks free list. + dec_free_chunks_total(chunk->capacity_word_size()); if (TraceMetadataChunkAllocation && Verbose) { tty->print_cr("ChunkManager::free_chunks_get: free_list " @@ -1899,79 +1689,24 @@ free_list, chunk, chunk->word_size()); } } else { - assert(SpaceManager::is_humongous(word_size), - "Should only need to check humongous"); - // This code to find the best fit is just for purposes of - // investigating the loss due to fragmentation on a humongous - // chunk. It will be replace by a binaryTreeDictionary for - // the humongous chunks. - uint count = 0; - Metachunk* best_fit = NULL; - Metachunk* best_fit_prev = NULL; - while (chunk != NULL) { - count++; - if (chunk->word_size() < word_size) { - prev_chunk = chunk; - chunk = chunk->next(); - } else if (chunk->word_size() == word_size) { - break; - } else { - if (best_fit == NULL || - best_fit->word_size() > chunk->word_size()) { - best_fit_prev = prev_chunk; - best_fit = chunk; - } - prev_chunk = chunk; - chunk = chunk->next(); - } - } - if (chunk == NULL) { - prev_chunk = best_fit_prev; - chunk = best_fit; + chunk = humongous_dictionary()->get_chunk( + word_size, + FreeBlockDictionary::atLeast); + + if (chunk != NULL) { + if (TraceMetadataHumongousAllocation) { + size_t waste = chunk->word_size() - word_size; + tty->print_cr("Free list allocate humongous chunk size " SIZE_FORMAT + " for requested size " SIZE_FORMAT + " waste " SIZE_FORMAT, + chunk->word_size(), word_size, waste); } - if (chunk != NULL) { - if (TraceMetadataHumongousAllocation) { - size_t waste = chunk->word_size() - word_size; - tty->print_cr("Free list allocate humongous chunk size " SIZE_FORMAT - " for requested size " SIZE_FORMAT - " waste " SIZE_FORMAT - " found at " SIZE_FORMAT " of " SIZE_FORMAT, - chunk->word_size(), word_size, waste, - count, free_list->sum_list_count()); - } - // Chunk is being removed from the chunks free list. - dec_free_chunks_total(chunk->capacity_word_size()); - // Remove the chunk if it is at the head of the list. - if (chunk == free_list->head()) { - free_list->set_head(chunk->next()); - - if (TraceMetadataHumongousAllocation) { - tty->print_cr("ChunkManager::free_chunks_get: humongous free_list " - PTR_FORMAT " chunk " PTR_FORMAT " size " SIZE_FORMAT - " new head " PTR_FORMAT, - free_list, chunk, chunk->word_size(), - free_list->head()); - } - } else { - // Remove a chunk in the interior of the list - prev_chunk->set_next(chunk->next()); - - if (TraceMetadataHumongousAllocation) { - tty->print_cr("ChunkManager::free_chunks_get: humongous free_list " - PTR_FORMAT " chunk " PTR_FORMAT " size " SIZE_FORMAT - PTR_FORMAT " prev " PTR_FORMAT " next " PTR_FORMAT, - free_list, chunk, chunk->word_size(), - prev_chunk, chunk->next()); - } - } - chunk->set_next(NULL); - } else { - if (TraceMetadataHumongousAllocation) { - tty->print_cr("ChunkManager::free_chunks_get: New humongous chunk of size " - SIZE_FORMAT, - word_size); - } - } + // Chunk is being removed from the chunks free list. + dec_free_chunks_total(chunk->capacity_word_size()); +#ifdef ASSERT + chunk->set_is_free(false); +#endif + } } locked_verify(); return chunk; @@ -2000,12 +1735,18 @@ return chunk; } +void ChunkManager::print_on(outputStream* out) { + if (PrintFLSStatistics != 0) { + humongous_dictionary()->report_statistics(); + } +} + // SpaceManager methods size_t SpaceManager::sum_free_in_chunks_in_use() const { MutexLockerEx cl(lock(), Mutex::_no_safepoint_check_flag); size_t free = 0; - for (ChunkIndex i = SmallIndex; i < NumberOfFreeLists; i = next_chunk_index(i)) { + for (ChunkIndex i = SmallIndex; i < NumberOfInUseLists; i = next_chunk_index(i)) { Metachunk* chunk = chunks_in_use(i); while (chunk != NULL) { free += chunk->free_word_size(); @@ -2018,11 +1759,12 @@ size_t SpaceManager::sum_waste_in_chunks_in_use() const { MutexLockerEx cl(lock(), Mutex::_no_safepoint_check_flag); size_t result = 0; - for (ChunkIndex i = SmallIndex; i < NumberOfFreeLists; i = next_chunk_index(i)) { - // Count the free space in all the chunk but not the - // current chunk from which allocations are still being done. + for (ChunkIndex i = SmallIndex; i < NumberOfInUseLists; i = next_chunk_index(i)) { + + result += sum_waste_in_chunks_in_use(i); } + return result; } @@ -2033,10 +1775,10 @@ // Count the free space in all the chunk but not the // current chunk from which allocations are still being done. if (chunk != NULL) { - while (chunk != NULL) { - if (chunk != current_chunk()) { - result += chunk->free_word_size(); - } + Metachunk* prev = chunk; + while (chunk != NULL && chunk != current_chunk()) { + result += chunk->free_word_size(); + prev = chunk; chunk = chunk->next(); count++; } @@ -2047,7 +1789,7 @@ size_t SpaceManager::sum_capacity_in_chunks_in_use() const { MutexLockerEx cl(lock(), Mutex::_no_safepoint_check_flag); size_t sum = 0; - for (ChunkIndex i = SmallIndex; i < NumberOfFreeLists; i = next_chunk_index(i)) { + for (ChunkIndex i = SmallIndex; i < NumberOfInUseLists; i = next_chunk_index(i)) { Metachunk* chunk = chunks_in_use(i); while (chunk != NULL) { // Just changed this sum += chunk->capacity_word_size(); @@ -2061,9 +1803,10 @@ size_t SpaceManager::sum_count_in_chunks_in_use() { size_t count = 0; - for (ChunkIndex i = SmallIndex; i < NumberOfFreeLists; i = next_chunk_index(i)) { + for (ChunkIndex i = SmallIndex; i < NumberOfInUseLists; i = next_chunk_index(i)) { count = count + sum_count_in_chunks_in_use(i); } + return count; } @@ -2081,7 +1824,7 @@ size_t SpaceManager::sum_used_in_chunks_in_use() const { MutexLockerEx cl(lock(), Mutex::_no_safepoint_check_flag); size_t used = 0; - for (ChunkIndex i = SmallIndex; i < NumberOfFreeLists; i = next_chunk_index(i)) { + for (ChunkIndex i = SmallIndex; i < NumberOfInUseLists; i = next_chunk_index(i)) { Metachunk* chunk = chunks_in_use(i); while (chunk != NULL) { used += chunk->used_word_size(); @@ -2139,15 +1882,13 @@ gclog_or_tty->print_cr(" word_size " PTR_FORMAT, word_size); gclog_or_tty->print_cr(" chunk_word_size " PTR_FORMAT, chunk_word_size); - gclog_or_tty->print_cr(" block overhead " PTR_FORMAT - " chunk overhead " PTR_FORMAT, - Metablock::overhead(), + gclog_or_tty->print_cr(" chunk overhead " PTR_FORMAT, Metachunk::overhead()); } return chunk_word_size; } -Metablock* SpaceManager::grow_and_allocate(size_t word_size) { +MetaWord* SpaceManager::grow_and_allocate(size_t word_size) { assert(vs_list()->current_virtual_space() != NULL, "Should have been set"); assert(current_chunk() == NULL || @@ -2180,7 +1921,7 @@ void SpaceManager::print_on(outputStream* st) const { for (ChunkIndex i = SmallIndex; - i < NumberOfFreeLists ; + i < NumberOfInUseLists ; i = next_chunk_index(i) ) { st->print_cr(" chunks_in_use " PTR_FORMAT " chunk size " PTR_FORMAT, chunks_in_use(i), @@ -2191,8 +1932,11 @@ sum_waste_in_chunks_in_use(SmallIndex), sum_waste_in_chunks_in_use(MediumIndex), sum_waste_in_chunks_in_use(HumongousIndex)); - // Nothing in them yet - // block_freelists()->print_on(st); + // block free lists + if (block_freelists() != NULL) { + st->print_cr("total in block free lists " SIZE_FORMAT, + block_freelists()->total_size()); + } } SpaceManager::SpaceManager(Mutex* lock, VirtualSpaceList* vs_list) : @@ -2200,7 +1944,7 @@ _allocation_total(0), _lock(lock) { Metadebug::init_allocation_fail_alot_count(); - for (ChunkIndex i = SmallIndex; i < NumberOfFreeLists; i = next_chunk_index(i)) { + for (ChunkIndex i = SmallIndex; i < NumberOfInUseLists; i = next_chunk_index(i)) { _chunks_in_use[i] = NULL; } _current_chunk = NULL; @@ -2262,22 +2006,24 @@ // Humongous chunks are never the current chunk. Metachunk* humongous_chunks = chunks_in_use(HumongousIndex); - if (humongous_chunks != NULL) { - chunk_manager->free_humongous_chunks()->add_at_head(humongous_chunks); - set_chunks_in_use(HumongousIndex, NULL); + while (humongous_chunks != NULL) { +#ifdef ASSERT + humongous_chunks->set_is_free(true); +#endif + Metachunk* next_humongous_chunks = humongous_chunks->next(); + chunk_manager->humongous_dictionary()->return_chunk(humongous_chunks); + humongous_chunks = next_humongous_chunks; } + set_chunks_in_use(HumongousIndex, NULL); chunk_manager->locked_verify(); } -void SpaceManager::deallocate(MetaWord* p) { +void SpaceManager::deallocate(MetaWord* p, size_t word_size) { assert_lock_strong(_lock); - ShouldNotReachHere(); // Where is this needed. -#ifdef DEALLOCATE_BLOCKS - Metablock* block = Metablock::metablock_from_data(p); - // This is expense but kept it until integration JJJ - assert(contains((address)block), "Block does not belong to this metaspace"); - block_freelists()->return_block(block, word_size); -#endif + size_t min_size = TreeChunk::min_size(); + assert(word_size >= min_size, + err_msg("Should not deallocate dark matter " SIZE_FORMAT, word_size)); + block_freelists()->return_block(p, word_size); } // Adds a chunk to the list of chunks in use. @@ -2366,50 +2112,40 @@ MetaWord* SpaceManager::allocate(size_t word_size) { MutexLockerEx cl(lock(), Mutex::_no_safepoint_check_flag); - size_t block_overhead = Metablock::overhead(); // If only the dictionary is going to be used (i.e., no // indexed free list), then there is a minimum size requirement. // MinChunkSize is a placeholder for the real minimum size JJJ - size_t byte_size_with_overhead = (word_size + block_overhead) * BytesPerWord; -#ifdef DEALLOCATE_BLOCKS - size_t raw_bytes_size = MAX2(ARENA_ALIGN(byte_size_with_overhead), - MinChunkSize * BytesPerWord); -#else - size_t raw_bytes_size = ARENA_ALIGN(byte_size_with_overhead); -#endif + size_t byte_size = word_size * BytesPerWord; + + size_t byte_size_with_overhead = byte_size + Metablock::overhead(); + + size_t raw_bytes_size = MAX2(byte_size_with_overhead, + Metablock::min_block_byte_size()); + raw_bytes_size = ARENA_ALIGN(raw_bytes_size); size_t raw_word_size = raw_bytes_size / BytesPerWord; assert(raw_word_size * BytesPerWord == raw_bytes_size, "Size problem"); BlockFreelist* fl = block_freelists(); - Metablock* block = NULL; + MetaWord* p = NULL; // Allocation from the dictionary is expensive in the sense that // the dictionary has to be searched for a size. Don't allocate // from the dictionary until it starts to get fat. Is this // a reasonable policy? Maybe an skinny dictionary is fast enough // for allocations. Do some profiling. JJJ - if (fl->totalSize() > allocation_from_dictionary_limit) { - block = fl->get_block(raw_word_size); + if (fl->total_size() > allocation_from_dictionary_limit) { + p = fl->get_block(raw_word_size); } - if (block == NULL) { - block = allocate_work(raw_word_size); - if (block == NULL) { - return NULL; - } + if (p == NULL) { + p = allocate_work(raw_word_size); } Metadebug::deallocate_block_a_lot(this, raw_word_size); - // Push the allocation past the word containing the size and leader. -#ifdef ASSERT - MetaWord* result = block->data(); - return result; -#else - return (MetaWord*) block; -#endif + return p; } // Returns the address of spaced allocated for "word_size". // This methods does not know about blocks (Metablocks) -Metablock* SpaceManager::allocate_work(size_t word_size) { +MetaWord* SpaceManager::allocate_work(size_t word_size) { assert_lock_strong(_lock); #ifdef ASSERT if (Metadebug::test_metadata_failure()) { @@ -2417,7 +2153,7 @@ } #endif // Is there space in the current chunk? - Metablock* result = NULL; + MetaWord* result = NULL; // For DumpSharedSpaces, only allocate out of the current chunk which is // never null because we gave it the size we wanted. Caller reports out @@ -2436,8 +2172,8 @@ } if (result > 0) { inc_allocation_total(word_size); - assert(result != (Metablock*) chunks_in_use(MediumIndex), "Head of the list is being allocated"); - assert(result->word_size() == word_size, "Size not set correctly"); + assert(result != (MetaWord*) chunks_in_use(MediumIndex), + "Head of the list is being allocated"); } return result; @@ -2447,13 +2183,13 @@ // If there are blocks in the dictionary, then // verfication of chunks does not work since // being in the dictionary alters a chunk. - if (block_freelists()->totalSize() == 0) { + if (block_freelists()->total_size() == 0) { // Skip the small chunks because their next link points to // medium chunks. This is because the small chunk is the // current chunk (for allocations) until it is full and the // the addition of the next chunk does not NULL the next // like of the small chunk. - for (ChunkIndex i = MediumIndex; i < NumberOfFreeLists; i = next_chunk_index(i)) { + for (ChunkIndex i = MediumIndex; i < NumberOfInUseLists; i = next_chunk_index(i)) { Metachunk* curr = chunks_in_use(i); while (curr != NULL) { curr->verify(); @@ -2492,7 +2228,7 @@ // Add up statistics for all chunks in this SpaceManager. for (ChunkIndex index = SmallIndex; - index < NumberOfFreeLists; + index < NumberOfInUseLists; index = next_chunk_index(index)) { for (Metachunk* curr = chunks_in_use(index); curr != NULL; @@ -2521,7 +2257,7 @@ #ifdef ASSERT void SpaceManager::mangle_freed_chunks() { for (ChunkIndex index = SmallIndex; - index < NumberOfFreeLists; + index < NumberOfInUseLists; index = next_chunk_index(index)) { for (Metachunk* curr = chunks_in_use(index); curr != NULL; @@ -2833,13 +2569,12 @@ } } - MetaWord* Metaspace::allocate(size_t word_size, MetadataType mdtype) { // DumpSharedSpaces doesn't use class metadata area (yet) if (mdtype == ClassType && !DumpSharedSpaces) { - return class_vsm()->allocate(word_size); + return class_vsm()->allocate(word_size); } else { - return vsm()->allocate(word_size); + return vsm()->allocate(word_size); } } @@ -2853,6 +2588,7 @@ gclog_or_tty->print_cr("Increase capacity to GC from " SIZE_FORMAT " to " SIZE_FORMAT, before_inc, MetaspaceGC::capacity_until_GC()); } + result = allocate(word_size, mdtype); return result; @@ -2889,37 +2625,39 @@ void Metaspace::deallocate(MetaWord* ptr, size_t word_size, bool is_class) { if (SafepointSynchronize::is_at_safepoint()) { assert(Thread::current()->is_VM_thread(), "should be the VM thread"); - // Don't take lock -#ifdef DEALLOCATE_BLOCKS - if (is_class) { - class_vsm()->deallocate(ptr); - } else { - vsm()->deallocate(ptr); + // Don't take Heap_lock + MutexLocker ml(vsm()->lock()); + if (word_size < TreeChunk::min_size()) { + // Dark matter. Too small for dictionary. +#ifdef ASSERT + Copy::fill_to_words((HeapWord*)ptr, word_size, 0xf5f5f5f5); +#endif + return; } -#else -#ifdef ASSERT - Copy::fill_to_words((HeapWord*)ptr, word_size, metadata_deallocate); -#endif -#endif - + if (is_class) { + class_vsm()->deallocate(ptr, word_size); + } else { + vsm()->deallocate(ptr, word_size); + } } else { MutexLocker ml(vsm()->lock()); -#ifdef DEALLOCATE_BLOCKS - if (is_class) { - class_vsm()->deallocate(ptr); - } else { - vsm()->deallocate(ptr); + if (word_size < TreeChunk::min_size()) { + // Dark matter. Too small for dictionary. +#ifdef ASSERT + Copy::fill_to_words((HeapWord*)ptr, word_size, 0xf5f5f5f5); +#endif + return; } -#else -#ifdef ASSERT - Copy::fill_to_words((HeapWord*)ptr, word_size, metadata_deallocate); -#endif -#endif + if (is_class) { + class_vsm()->deallocate(ptr, word_size); + } else { + vsm()->deallocate(ptr, word_size); + } } } -MetaWord* Metaspace::allocate(ClassLoaderData* loader_data, size_t word_size, +Metablock* Metaspace::allocate(ClassLoaderData* loader_data, size_t word_size, bool read_only, MetadataType mdtype, TRAPS) { if (HAS_PENDING_EXCEPTION) { assert(false, "Should not allocate with exception pending"); @@ -2943,7 +2681,7 @@ if (result == NULL) { report_out_of_shared_space(read_only ? SharedReadOnly : SharedReadWrite); } - return result; + return Metablock::initialize(result, word_size); } result = loader_data->metaspace_non_null()->allocate(word_size, mdtype); @@ -2951,7 +2689,7 @@ if (result == NULL) { // Try to clean out some memory and retry. result = - Universe::heap()->collector_policy()->satisfy_failed_metadata_allocation( + Universe::heap()->collector_policy()->satisfy_failed_metadata_allocation( loader_data, word_size, mdtype); // If result is still null, we are out of memory. @@ -2967,7 +2705,7 @@ THROW_OOP_0(Universe::out_of_memory_error_perm_gen()); } } - return result; + return Metablock::initialize(result, word_size); } void Metaspace::print_on(outputStream* out) const { diff -r 556dd9e475c6 -r dc16fe422c53 src/share/vm/memory/metaspace.hpp --- a/src/share/vm/memory/metaspace.hpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/share/vm/memory/metaspace.hpp Fri Oct 26 14:09:52 2012 -0700 @@ -57,12 +57,10 @@ // class ClassLoaderData; +class Metablock; class MetaWord; class Mutex; class outputStream; -class FreeChunk; -template class FreeList; -template class BinaryTreeDictionary; class SpaceManager; // Metaspaces each have a SpaceManager and allocations @@ -128,7 +126,7 @@ size_t capacity_words(MetadataType mdtype) const; size_t waste_words(MetadataType mdtype) const; - static MetaWord* allocate(ClassLoaderData* loader_data, size_t size, + static Metablock* allocate(ClassLoaderData* loader_data, size_t size, bool read_only, MetadataType mdtype, TRAPS); void deallocate(MetaWord* ptr, size_t byte_size, bool is_class); diff -r 556dd9e475c6 -r dc16fe422c53 src/share/vm/memory/metaspaceShared.cpp --- a/src/share/vm/memory/metaspaceShared.cpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/share/vm/memory/metaspaceShared.cpp Fri Oct 26 14:09:52 2012 -0700 @@ -663,8 +663,8 @@ if (_ro_base == NULL || _rw_base == NULL) { return false; } else { - return ((p > _ro_base && p < (_ro_base + SharedReadOnlySize)) || - (p > _rw_base && p < (_rw_base + SharedReadWriteSize))); + return ((p >= _ro_base && p < (_ro_base + SharedReadOnlySize)) || + (p >= _rw_base && p < (_rw_base + SharedReadWriteSize))); } } @@ -693,14 +693,6 @@ ReservedSpace shared_rs = mapinfo->reserve_shared_memory(); if (!shared_rs.is_reserved()) return false; - // Split reserved memory into pieces (windows needs this) - ReservedSpace ro_rs = shared_rs.first_part(SharedReadOnlySize); - ReservedSpace tmp_rs1 = shared_rs.last_part(SharedReadOnlySize); - ReservedSpace rw_rs = tmp_rs1.first_part(SharedReadWriteSize); - ReservedSpace tmp_rs2 = tmp_rs1.last_part(SharedReadWriteSize); - ReservedSpace md_rs = tmp_rs2.first_part(SharedMiscDataSize); - ReservedSpace mc_rs = tmp_rs2.last_part(SharedMiscDataSize); - // Map each shared region if ((_ro_base = mapinfo->map_region(ro)) != NULL && (_rw_base = mapinfo->map_region(rw)) != NULL && diff -r 556dd9e475c6 -r dc16fe422c53 src/share/vm/memory/resourceArea.hpp --- a/src/share/vm/memory/resourceArea.hpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/share/vm/memory/resourceArea.hpp Fri Oct 26 14:09:52 2012 -0700 @@ -127,15 +127,21 @@ void reset_to_mark() { if (UseMallocOnly) free_malloced_objects(); - if( _chunk->next() ) // Delete later chunks + if( _chunk->next() ) { // Delete later chunks + // reset arena size before delete chunks. Otherwise, the total + // arena size could exceed total chunk size + assert(_area->size_in_bytes() > size_in_bytes(), "Sanity check"); + _area->set_size_in_bytes(size_in_bytes()); _chunk->next_chop(); + } else { + assert(_area->size_in_bytes() == size_in_bytes(), "Sanity check"); + } _area->_chunk = _chunk; // Roll back arena to saved chunk _area->_hwm = _hwm; _area->_max = _max; // clear out this chunk (to detect allocation bugs) if (ZapResourceArea) memset(_hwm, badResourceValue, _max - _hwm); - _area->set_size_in_bytes(size_in_bytes()); } ~ResourceMark() { @@ -219,15 +225,21 @@ void reset_to_mark() { if (UseMallocOnly) free_malloced_objects(); - if( _chunk->next() ) // Delete later chunks + if( _chunk->next() ) { // Delete later chunks + // reset arena size before delete chunks. Otherwise, the total + // arena size could exceed total chunk size + assert(_area->size_in_bytes() > size_in_bytes(), "Sanity check"); + _area->set_size_in_bytes(size_in_bytes()); _chunk->next_chop(); + } else { + assert(_area->size_in_bytes() == size_in_bytes(), "Sanity check"); + } _area->_chunk = _chunk; // Roll back arena to saved chunk _area->_hwm = _hwm; _area->_max = _max; // clear out this chunk (to detect allocation bugs) if (ZapResourceArea) memset(_hwm, badResourceValue, _max - _hwm); - _area->set_size_in_bytes(size_in_bytes()); } ~DeoptResourceMark() { diff -r 556dd9e475c6 -r dc16fe422c53 src/share/vm/oops/method.cpp --- a/src/share/vm/oops/method.cpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/share/vm/oops/method.cpp Fri Oct 26 14:09:52 2012 -0700 @@ -1155,8 +1155,12 @@ vmSymbols::SID Method::klass_id_for_intrinsics(Klass* holder) { // if loader is not the default loader (i.e., != NULL), we can't know the intrinsics // because we are not loading from core libraries - if (InstanceKlass::cast(holder)->class_loader() != NULL) + // exception: the AES intrinsics come from lib/ext/sunjce_provider.jar + // which does not use the class default class loader so we check for its loader here + if ((InstanceKlass::cast(holder)->class_loader() != NULL) && + InstanceKlass::cast(holder)->class_loader()->klass()->name() != vmSymbols::sun_misc_Launcher_ExtClassLoader()) { return vmSymbols::NO_SID; // regardless of name, no intrinsics here + } // see if the klass name is well-known: Symbol* klass_name = InstanceKlass::cast(holder)->name(); diff -r 556dd9e475c6 -r dc16fe422c53 src/share/vm/opto/c2_globals.hpp --- a/src/share/vm/opto/c2_globals.hpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/share/vm/opto/c2_globals.hpp Fri Oct 26 14:09:52 2012 -0700 @@ -439,6 +439,9 @@ product(bool, DoEscapeAnalysis, true, \ "Perform escape analysis") \ \ + develop(bool, ExitEscapeAnalysisOnTimeout, true, \ + "Exit or throw assert in EA when it reaches time limit") \ + \ notproduct(bool, PrintEscapeAnalysis, false, \ "Print the results of escape analysis") \ \ diff -r 556dd9e475c6 -r dc16fe422c53 src/share/vm/opto/callGenerator.cpp --- a/src/share/vm/opto/callGenerator.cpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/share/vm/opto/callGenerator.cpp Fri Oct 26 14:09:52 2012 -0700 @@ -670,6 +670,129 @@ } +//------------------------PredictedIntrinsicGenerator------------------------------ +// Internal class which handles all predicted Intrinsic calls. +class PredictedIntrinsicGenerator : public CallGenerator { + CallGenerator* _intrinsic; + CallGenerator* _cg; + +public: + PredictedIntrinsicGenerator(CallGenerator* intrinsic, + CallGenerator* cg) + : CallGenerator(cg->method()) + { + _intrinsic = intrinsic; + _cg = cg; + } + + virtual bool is_virtual() const { return true; } + virtual bool is_inlined() const { return true; } + virtual bool is_intrinsic() const { return true; } + + virtual JVMState* generate(JVMState* jvms); +}; + + +CallGenerator* CallGenerator::for_predicted_intrinsic(CallGenerator* intrinsic, + CallGenerator* cg) { + return new PredictedIntrinsicGenerator(intrinsic, cg); +} + + +JVMState* PredictedIntrinsicGenerator::generate(JVMState* jvms) { + GraphKit kit(jvms); + PhaseGVN& gvn = kit.gvn(); + + CompileLog* log = kit.C->log(); + if (log != NULL) { + log->elem("predicted_intrinsic bci='%d' method='%d'", + jvms->bci(), log->identify(method())); + } + + Node* slow_ctl = _intrinsic->generate_predicate(kit.sync_jvms()); + if (kit.failing()) + return NULL; // might happen because of NodeCountInliningCutoff + + SafePointNode* slow_map = NULL; + JVMState* slow_jvms; + if (slow_ctl != NULL) { + PreserveJVMState pjvms(&kit); + kit.set_control(slow_ctl); + if (!kit.stopped()) { + slow_jvms = _cg->generate(kit.sync_jvms()); + if (kit.failing()) + return NULL; // might happen because of NodeCountInliningCutoff + assert(slow_jvms != NULL, "must be"); + kit.add_exception_states_from(slow_jvms); + kit.set_map(slow_jvms->map()); + if (!kit.stopped()) + slow_map = kit.stop(); + } + } + + if (kit.stopped()) { + // Predicate is always false. + kit.set_jvms(slow_jvms); + return kit.transfer_exceptions_into_jvms(); + } + + // Generate intrinsic code: + JVMState* new_jvms = _intrinsic->generate(kit.sync_jvms()); + if (new_jvms == NULL) { + // Intrinsic failed, so use slow code or make a direct call. + if (slow_map == NULL) { + CallGenerator* cg = CallGenerator::for_direct_call(method()); + new_jvms = cg->generate(kit.sync_jvms()); + } else { + kit.set_jvms(slow_jvms); + return kit.transfer_exceptions_into_jvms(); + } + } + kit.add_exception_states_from(new_jvms); + kit.set_jvms(new_jvms); + + // Need to merge slow and fast? + if (slow_map == NULL) { + // The fast path is the only path remaining. + return kit.transfer_exceptions_into_jvms(); + } + + if (kit.stopped()) { + // Intrinsic method threw an exception, so it's just the slow path after all. + kit.set_jvms(slow_jvms); + return kit.transfer_exceptions_into_jvms(); + } + + // Finish the diamond. + kit.C->set_has_split_ifs(true); // Has chance for split-if optimization + RegionNode* region = new (kit.C) RegionNode(3); + region->init_req(1, kit.control()); + region->init_req(2, slow_map->control()); + kit.set_control(gvn.transform(region)); + Node* iophi = PhiNode::make(region, kit.i_o(), Type::ABIO); + iophi->set_req(2, slow_map->i_o()); + kit.set_i_o(gvn.transform(iophi)); + kit.merge_memory(slow_map->merged_memory(), region, 2); + uint tos = kit.jvms()->stkoff() + kit.sp(); + uint limit = slow_map->req(); + for (uint i = TypeFunc::Parms; i < limit; i++) { + // Skip unused stack slots; fast forward to monoff(); + if (i == tos) { + i = kit.jvms()->monoff(); + if( i >= limit ) break; + } + Node* m = kit.map()->in(i); + Node* n = slow_map->in(i); + if (m != n) { + const Type* t = gvn.type(m)->meet(gvn.type(n)); + Node* phi = PhiNode::make(region, m, t); + phi->set_req(2, n); + kit.map()->set_req(i, gvn.transform(phi)); + } + } + return kit.transfer_exceptions_into_jvms(); +} + //-------------------------UncommonTrapCallGenerator----------------------------- // Internal class which handles all out-of-line calls checking receiver type. class UncommonTrapCallGenerator : public CallGenerator { diff -r 556dd9e475c6 -r dc16fe422c53 src/share/vm/opto/callGenerator.hpp --- a/src/share/vm/opto/callGenerator.hpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/share/vm/opto/callGenerator.hpp Fri Oct 26 14:09:52 2012 -0700 @@ -143,6 +143,9 @@ // Registry for intrinsics: static CallGenerator* for_intrinsic(ciMethod* m); static void register_intrinsic(ciMethod* m, CallGenerator* cg); + static CallGenerator* for_predicted_intrinsic(CallGenerator* intrinsic, + CallGenerator* cg); + virtual Node* generate_predicate(JVMState* jvms) { return NULL; }; static void print_inlining(ciMethod* callee, int inline_level, int bci, const char* msg) { if (PrintInlining) diff -r 556dd9e475c6 -r dc16fe422c53 src/share/vm/opto/compile.cpp --- a/src/share/vm/opto/compile.cpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/share/vm/opto/compile.cpp Fri Oct 26 14:09:52 2012 -0700 @@ -3047,9 +3047,9 @@ case T_LONG: case T_DOUBLE: return (_v._value.j == other._v._value.j); case T_OBJECT: - case T_METADATA: return (_v._metadata == other._v._metadata); case T_ADDRESS: return (_v._value.l == other._v._value.l); case T_VOID: return (_v._value.l == other._v._value.l); // jump-table entries + case T_METADATA: return (_v._metadata == other._v._metadata); default: ShouldNotReachHere(); } return false; diff -r 556dd9e475c6 -r dc16fe422c53 src/share/vm/opto/compile.hpp --- a/src/share/vm/opto/compile.hpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/share/vm/opto/compile.hpp Fri Oct 26 14:09:52 2012 -0700 @@ -149,7 +149,7 @@ private: BasicType _type; union { - jvalue _value; + jvalue _value; Metadata* _metadata; } _v; int _offset; // offset of this constant (in bytes) relative to the constant table base. diff -r 556dd9e475c6 -r dc16fe422c53 src/share/vm/opto/doCall.cpp --- a/src/share/vm/opto/doCall.cpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/share/vm/opto/doCall.cpp Fri Oct 26 14:09:52 2012 -0700 @@ -107,7 +107,17 @@ // intrinsics handle strict f.p. correctly. if (allow_inline && allow_intrinsics) { CallGenerator* cg = find_intrinsic(callee, call_is_virtual); - if (cg != NULL) return cg; + if (cg != NULL) { + if (cg->is_predicted()) { + // Code without intrinsic but, hopefully, inlined. + CallGenerator* inline_cg = this->call_generator(callee, + vtable_index, call_is_virtual, jvms, allow_inline, prof_factor, false); + if (inline_cg != NULL) { + cg = CallGenerator::for_predicted_intrinsic(cg, inline_cg); + } + } + return cg; + } } // Do method handle calls. diff -r 556dd9e475c6 -r dc16fe422c53 src/share/vm/opto/escape.cpp --- a/src/share/vm/opto/escape.cpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/share/vm/opto/escape.cpp Fri Oct 26 14:09:52 2012 -0700 @@ -893,12 +893,16 @@ arg_has_oops && (i > TypeFunc::Parms); #ifdef ASSERT if (!(is_arraycopy || - call->as_CallLeaf()->_name != NULL && - (strcmp(call->as_CallLeaf()->_name, "g1_wb_pre") == 0 || - strcmp(call->as_CallLeaf()->_name, "g1_wb_post") == 0 )) - ) { + (call->as_CallLeaf()->_name != NULL && + (strcmp(call->as_CallLeaf()->_name, "g1_wb_pre") == 0 || + strcmp(call->as_CallLeaf()->_name, "g1_wb_post") == 0 || + strcmp(call->as_CallLeaf()->_name, "aescrypt_encryptBlock") == 0 || + strcmp(call->as_CallLeaf()->_name, "aescrypt_decryptBlock") == 0 || + strcmp(call->as_CallLeaf()->_name, "cipherBlockChaining_encryptAESCrypt") == 0 || + strcmp(call->as_CallLeaf()->_name, "cipherBlockChaining_decryptAESCrypt") == 0) + ))) { call->dump(); - assert(false, "EA: unexpected CallLeaf"); + fatal(err_msg_res("EA unexpected CallLeaf %s", call->as_CallLeaf()->_name)); } #endif // Always process arraycopy's destination object since @@ -1080,7 +1084,7 @@ C->log()->text("%s", (iterations >= CG_BUILD_ITER_LIMIT) ? "iterations" : "time"); C->log()->end_elem(" limit'"); } - assert(false, err_msg_res("infinite EA connection graph build (%f sec, %d iterations) with %d nodes and worklist size %d", + assert(ExitEscapeAnalysisOnTimeout, err_msg_res("infinite EA connection graph build (%f sec, %d iterations) with %d nodes and worklist size %d", time.seconds(), iterations, nodes_size(), ptnodes_worklist.length())); // Possible infinite build_connection_graph loop, // bailout (no changes to ideal graph were made). diff -r 556dd9e475c6 -r dc16fe422c53 src/share/vm/opto/library_call.cpp --- a/src/share/vm/opto/library_call.cpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/share/vm/opto/library_call.cpp Fri Oct 26 14:09:52 2012 -0700 @@ -44,18 +44,22 @@ public: private: bool _is_virtual; + bool _is_predicted; vmIntrinsics::ID _intrinsic_id; public: - LibraryIntrinsic(ciMethod* m, bool is_virtual, vmIntrinsics::ID id) + LibraryIntrinsic(ciMethod* m, bool is_virtual, bool is_predicted, vmIntrinsics::ID id) : InlineCallGenerator(m), _is_virtual(is_virtual), + _is_predicted(is_predicted), _intrinsic_id(id) { } virtual bool is_intrinsic() const { return true; } virtual bool is_virtual() const { return _is_virtual; } + virtual bool is_predicted() const { return _is_predicted; } virtual JVMState* generate(JVMState* jvms); + virtual Node* generate_predicate(JVMState* jvms); vmIntrinsics::ID intrinsic_id() const { return _intrinsic_id; } }; @@ -83,6 +87,7 @@ int arg_size() const { return callee()->arg_size(); } bool try_to_inline(); + Node* try_to_predicate(); // Helper functions to inline natives void push_result(RegionNode* region, PhiNode* value); @@ -148,6 +153,7 @@ CallJavaNode* generate_method_call_virtual(vmIntrinsics::ID method_id) { return generate_method_call(method_id, true, false); } + Node * load_field_from_object(Node * fromObj, const char * fieldName, const char * fieldTypeString, bool is_exact, bool is_static); Node* make_string_method_node(int opcode, Node* str1_start, Node* cnt1, Node* str2_start, Node* cnt2); Node* make_string_method_node(int opcode, Node* str1, Node* str2); @@ -253,6 +259,10 @@ bool inline_reverseBytes(vmIntrinsics::ID id); bool inline_reference_get(); + bool inline_aescrypt_Block(vmIntrinsics::ID id); + bool inline_cipherBlockChaining_AESCrypt(vmIntrinsics::ID id); + Node* inline_cipherBlockChaining_AESCrypt_predicate(bool decrypting); + Node* get_key_start_from_aescrypt_object(Node* aescrypt_object); }; @@ -306,6 +316,8 @@ } } + bool is_predicted = false; + switch (id) { case vmIntrinsics::_compareTo: if (!SpecialStringCompareTo) return NULL; @@ -413,6 +425,18 @@ break; #endif + case vmIntrinsics::_aescrypt_encryptBlock: + case vmIntrinsics::_aescrypt_decryptBlock: + if (!UseAESIntrinsics) return NULL; + break; + + case vmIntrinsics::_cipherBlockChaining_encryptAESCrypt: + case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt: + if (!UseAESIntrinsics) return NULL; + // these two require the predicated logic + is_predicted = true; + break; + default: assert(id <= vmIntrinsics::LAST_COMPILER_INLINE, "caller responsibility"); assert(id != vmIntrinsics::_Object_init && id != vmIntrinsics::_invoke, "enum out of order?"); @@ -444,7 +468,7 @@ if (!InlineUnsafeOps) return NULL; } - return new LibraryIntrinsic(m, is_virtual, (vmIntrinsics::ID) id); + return new LibraryIntrinsic(m, is_virtual, is_predicted, (vmIntrinsics::ID) id); } //----------------------register_library_intrinsics----------------------- @@ -496,6 +520,47 @@ return NULL; } +Node* LibraryIntrinsic::generate_predicate(JVMState* jvms) { + LibraryCallKit kit(jvms, this); + Compile* C = kit.C; + int nodes = C->unique(); +#ifndef PRODUCT + assert(is_predicted(), "sanity"); + if ((PrintIntrinsics || PrintInlining NOT_PRODUCT( || PrintOptoInlining) ) && Verbose) { + char buf[1000]; + const char* str = vmIntrinsics::short_name_as_C_string(intrinsic_id(), buf, sizeof(buf)); + tty->print_cr("Predicate for intrinsic %s", str); + } +#endif + + Node* slow_ctl = kit.try_to_predicate(); + if (!kit.failing()) { + if (C->log()) { + C->log()->elem("predicate_intrinsic id='%s'%s nodes='%d'", + vmIntrinsics::name_at(intrinsic_id()), + (is_virtual() ? " virtual='1'" : ""), + C->unique() - nodes); + } + return slow_ctl; // Could be NULL if the check folds. + } + + // The intrinsic bailed out + if (PrintIntrinsics || PrintInlining NOT_PRODUCT( || PrintOptoInlining) ) { + if (jvms->has_method()) { + // Not a root compile. + const char* msg = "failed to generate predicate for intrinsic"; + CompileTask::print_inlining(kit.callee(), jvms->depth() - 1, kit.bci(), msg); + } else { + // Root compile + tty->print("Did not generate predicate for intrinsic %s%s at bci:%d in", + vmIntrinsics::name_at(intrinsic_id()), + (is_virtual() ? " (virtual)" : ""), kit.bci()); + } + } + C->gather_intrinsic_statistics(intrinsic_id(), is_virtual(), Compile::_intrinsic_failed); + return NULL; +} + bool LibraryCallKit::try_to_inline() { // Handle symbolic names for otherwise undistinguished boolean switches: const bool is_store = true; @@ -767,6 +832,14 @@ case vmIntrinsics::_Reference_get: return inline_reference_get(); + case vmIntrinsics::_aescrypt_encryptBlock: + case vmIntrinsics::_aescrypt_decryptBlock: + return inline_aescrypt_Block(intrinsic_id()); + + case vmIntrinsics::_cipherBlockChaining_encryptAESCrypt: + case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt: + return inline_cipherBlockChaining_AESCrypt(intrinsic_id()); + default: // If you get here, it may be that someone has added a new intrinsic // to the list in vmSymbols.hpp without implementing it here. @@ -780,6 +853,36 @@ } } +Node* LibraryCallKit::try_to_predicate() { + if (!jvms()->has_method()) { + // Root JVMState has a null method. + assert(map()->memory()->Opcode() == Op_Parm, ""); + // Insert the memory aliasing node + set_all_memory(reset_memory()); + } + assert(merged_memory(), ""); + + switch (intrinsic_id()) { + case vmIntrinsics::_cipherBlockChaining_encryptAESCrypt: + return inline_cipherBlockChaining_AESCrypt_predicate(false); + case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt: + return inline_cipherBlockChaining_AESCrypt_predicate(true); + + default: + // If you get here, it may be that someone has added a new intrinsic + // to the list in vmSymbols.hpp without implementing it here. +#ifndef PRODUCT + if ((PrintMiscellaneous && (Verbose || WizardMode)) || PrintOpto) { + tty->print_cr("*** Warning: Unimplemented predicate for intrinsic %s(%d)", + vmIntrinsics::name_at(intrinsic_id()), intrinsic_id()); + } +#endif + Node* slow_ctl = control(); + set_control(top()); // No fast path instrinsic + return slow_ctl; + } +} + //------------------------------push_result------------------------------ // Helper function for finishing intrinsics. void LibraryCallKit::push_result(RegionNode* region, PhiNode* value) { @@ -3830,7 +3933,7 @@ vtable_index*vtableEntry::size()) * wordSize + vtableEntry::method_offset_in_bytes(); Node* entry_addr = basic_plus_adr(obj_klass, entry_offset); - Node* target_call = make_load(NULL, entry_addr, TypeInstPtr::NOTNULL, T_OBJECT); + Node* target_call = make_load(NULL, entry_addr, TypePtr::NOTNULL, T_ADDRESS); // Compare the target method with the expected method (e.g., Object.hashCode). const TypePtr* native_call_addr = TypeMetadataPtr::make(method); @@ -5613,3 +5716,265 @@ push(result); return true; } + + +Node * LibraryCallKit::load_field_from_object(Node * fromObj, const char * fieldName, const char * fieldTypeString, + bool is_exact=true, bool is_static=false) { + + const TypeInstPtr* tinst = _gvn.type(fromObj)->isa_instptr(); + assert(tinst != NULL, "obj is null"); + assert(tinst->klass()->is_loaded(), "obj is not loaded"); + assert(!is_exact || tinst->klass_is_exact(), "klass not exact"); + + ciField* field = tinst->klass()->as_instance_klass()->get_field_by_name(ciSymbol::make(fieldName), + ciSymbol::make(fieldTypeString), + is_static); + if (field == NULL) return (Node *) NULL; + assert (field != NULL, "undefined field"); + + // Next code copied from Parse::do_get_xxx(): + + // Compute address and memory type. + int offset = field->offset_in_bytes(); + bool is_vol = field->is_volatile(); + ciType* field_klass = field->type(); + assert(field_klass->is_loaded(), "should be loaded"); + const TypePtr* adr_type = C->alias_type(field)->adr_type(); + Node *adr = basic_plus_adr(fromObj, fromObj, offset); + BasicType bt = field->layout_type(); + + // Build the resultant type of the load + const Type *type = TypeOopPtr::make_from_klass(field_klass->as_klass()); + + // Build the load. + Node* loadedField = make_load(NULL, adr, type, bt, adr_type, is_vol); + return loadedField; +} + + +//------------------------------inline_aescrypt_Block----------------------- +bool LibraryCallKit::inline_aescrypt_Block(vmIntrinsics::ID id) { + address stubAddr; + const char *stubName; + assert(UseAES, "need AES instruction support"); + + switch(id) { + case vmIntrinsics::_aescrypt_encryptBlock: + stubAddr = StubRoutines::aescrypt_encryptBlock(); + stubName = "aescrypt_encryptBlock"; + break; + case vmIntrinsics::_aescrypt_decryptBlock: + stubAddr = StubRoutines::aescrypt_decryptBlock(); + stubName = "aescrypt_decryptBlock"; + break; + } + if (stubAddr == NULL) return false; + + // Restore the stack and pop off the arguments. + int nargs = 5; // this + 2 oop/offset combos + assert(callee()->signature()->size() == nargs-1, "encryptBlock has 4 arguments"); + + Node *aescrypt_object = argument(0); + Node *src = argument(1); + Node *src_offset = argument(2); + Node *dest = argument(3); + Node *dest_offset = argument(4); + + // (1) src and dest are arrays. + const Type* src_type = src->Value(&_gvn); + const Type* dest_type = dest->Value(&_gvn); + const TypeAryPtr* top_src = src_type->isa_aryptr(); + const TypeAryPtr* top_dest = dest_type->isa_aryptr(); + assert (top_src != NULL && top_src->klass() != NULL && top_dest != NULL && top_dest->klass() != NULL, "args are strange"); + + // for the quick and dirty code we will skip all the checks. + // we are just trying to get the call to be generated. + Node* src_start = src; + Node* dest_start = dest; + if (src_offset != NULL || dest_offset != NULL) { + assert(src_offset != NULL && dest_offset != NULL, ""); + src_start = array_element_address(src, src_offset, T_BYTE); + dest_start = array_element_address(dest, dest_offset, T_BYTE); + } + + // now need to get the start of its expanded key array + // this requires a newer class file that has this array as littleEndian ints, otherwise we revert to java + Node* k_start = get_key_start_from_aescrypt_object(aescrypt_object); + if (k_start == NULL) return false; + + // Call the stub. + make_runtime_call(RC_LEAF|RC_NO_FP, OptoRuntime::aescrypt_block_Type(), + stubAddr, stubName, TypePtr::BOTTOM, + src_start, dest_start, k_start); + + return true; +} + +//------------------------------inline_cipherBlockChaining_AESCrypt----------------------- +bool LibraryCallKit::inline_cipherBlockChaining_AESCrypt(vmIntrinsics::ID id) { + address stubAddr; + const char *stubName; + + assert(UseAES, "need AES instruction support"); + + switch(id) { + case vmIntrinsics::_cipherBlockChaining_encryptAESCrypt: + stubAddr = StubRoutines::cipherBlockChaining_encryptAESCrypt(); + stubName = "cipherBlockChaining_encryptAESCrypt"; + break; + case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt: + stubAddr = StubRoutines::cipherBlockChaining_decryptAESCrypt(); + stubName = "cipherBlockChaining_decryptAESCrypt"; + break; + } + if (stubAddr == NULL) return false; + + + // Restore the stack and pop off the arguments. + int nargs = 6; // this + oop/offset + len + oop/offset + assert(callee()->signature()->size() == nargs-1, "wrong number of arguments"); + Node *cipherBlockChaining_object = argument(0); + Node *src = argument(1); + Node *src_offset = argument(2); + Node *len = argument(3); + Node *dest = argument(4); + Node *dest_offset = argument(5); + + // (1) src and dest are arrays. + const Type* src_type = src->Value(&_gvn); + const Type* dest_type = dest->Value(&_gvn); + const TypeAryPtr* top_src = src_type->isa_aryptr(); + const TypeAryPtr* top_dest = dest_type->isa_aryptr(); + assert (top_src != NULL && top_src->klass() != NULL + && top_dest != NULL && top_dest->klass() != NULL, "args are strange"); + + // checks are the responsibility of the caller + Node* src_start = src; + Node* dest_start = dest; + if (src_offset != NULL || dest_offset != NULL) { + assert(src_offset != NULL && dest_offset != NULL, ""); + src_start = array_element_address(src, src_offset, T_BYTE); + dest_start = array_element_address(dest, dest_offset, T_BYTE); + } + + // if we are in this set of code, we "know" the embeddedCipher is an AESCrypt object + // (because of the predicated logic executed earlier). + // so we cast it here safely. + // this requires a newer class file that has this array as littleEndian ints, otherwise we revert to java + + Node* embeddedCipherObj = load_field_from_object(cipherBlockChaining_object, "embeddedCipher", "Lcom/sun/crypto/provider/SymmetricCipher;", /*is_exact*/ false); + if (embeddedCipherObj == NULL) return false; + + // cast it to what we know it will be at runtime + const TypeInstPtr* tinst = _gvn.type(cipherBlockChaining_object)->isa_instptr(); + assert(tinst != NULL, "CBC obj is null"); + assert(tinst->klass()->is_loaded(), "CBC obj is not loaded"); + ciKlass* klass_AESCrypt = tinst->klass()->as_instance_klass()->find_klass(ciSymbol::make("com/sun/crypto/provider/AESCrypt")); + if (!klass_AESCrypt->is_loaded()) return false; + + ciInstanceKlass* instklass_AESCrypt = klass_AESCrypt->as_instance_klass(); + const TypeKlassPtr* aklass = TypeKlassPtr::make(instklass_AESCrypt); + const TypeOopPtr* xtype = aklass->as_instance_type(); + Node* aescrypt_object = new(C) CheckCastPPNode(control(), embeddedCipherObj, xtype); + aescrypt_object = _gvn.transform(aescrypt_object); + + // we need to get the start of the aescrypt_object's expanded key array + Node* k_start = get_key_start_from_aescrypt_object(aescrypt_object); + if (k_start == NULL) return false; + + // similarly, get the start address of the r vector + Node* objRvec = load_field_from_object(cipherBlockChaining_object, "r", "[B", /*is_exact*/ false); + if (objRvec == NULL) return false; + Node* r_start = array_element_address(objRvec, intcon(0), T_BYTE); + + // Call the stub, passing src_start, dest_start, k_start, r_start and src_len + make_runtime_call(RC_LEAF|RC_NO_FP, + OptoRuntime::cipherBlockChaining_aescrypt_Type(), + stubAddr, stubName, TypePtr::BOTTOM, + src_start, dest_start, k_start, r_start, len); + + // return is void so no result needs to be pushed + + return true; +} + +//------------------------------get_key_start_from_aescrypt_object----------------------- +Node * LibraryCallKit::get_key_start_from_aescrypt_object(Node *aescrypt_object) { + Node* objAESCryptKey = load_field_from_object(aescrypt_object, "K", "[I", /*is_exact*/ false); + assert (objAESCryptKey != NULL, "wrong version of com.sun.crypto.provider.AESCrypt"); + if (objAESCryptKey == NULL) return (Node *) NULL; + + // now have the array, need to get the start address of the K array + Node* k_start = array_element_address(objAESCryptKey, intcon(0), T_INT); + return k_start; +} + +//----------------------------inline_cipherBlockChaining_AESCrypt_predicate---------------------------- +// Return node representing slow path of predicate check. +// the pseudo code we want to emulate with this predicate is: +// for encryption: +// if (embeddedCipherObj instanceof AESCrypt) do_intrinsic, else do_javapath +// for decryption: +// if ((embeddedCipherObj instanceof AESCrypt) && (cipher!=plain)) do_intrinsic, else do_javapath +// note cipher==plain is more conservative than the original java code but that's OK +// +Node* LibraryCallKit::inline_cipherBlockChaining_AESCrypt_predicate(bool decrypting) { + // First, check receiver for NULL since it is virtual method. + int nargs = arg_size(); + Node* objCBC = argument(0); + _sp += nargs; + objCBC = do_null_check(objCBC, T_OBJECT); + _sp -= nargs; + + if (stopped()) return NULL; // Always NULL + + // Load embeddedCipher field of CipherBlockChaining object. + Node* embeddedCipherObj = load_field_from_object(objCBC, "embeddedCipher", "Lcom/sun/crypto/provider/SymmetricCipher;", /*is_exact*/ false); + + // get AESCrypt klass for instanceOf check + // AESCrypt might not be loaded yet if some other SymmetricCipher got us to this compile point + // will have same classloader as CipherBlockChaining object + const TypeInstPtr* tinst = _gvn.type(objCBC)->isa_instptr(); + assert(tinst != NULL, "CBCobj is null"); + assert(tinst->klass()->is_loaded(), "CBCobj is not loaded"); + + // we want to do an instanceof comparison against the AESCrypt class + ciKlass* klass_AESCrypt = tinst->klass()->as_instance_klass()->find_klass(ciSymbol::make("com/sun/crypto/provider/AESCrypt")); + if (!klass_AESCrypt->is_loaded()) { + // if AESCrypt is not even loaded, we never take the intrinsic fast path + Node* ctrl = control(); + set_control(top()); // no regular fast path + return ctrl; + } + ciInstanceKlass* instklass_AESCrypt = klass_AESCrypt->as_instance_klass(); + + _sp += nargs; // gen_instanceof might do an uncommon trap + Node* instof = gen_instanceof(embeddedCipherObj, makecon(TypeKlassPtr::make(instklass_AESCrypt))); + _sp -= nargs; + Node* cmp_instof = _gvn.transform(new (C) CmpINode(instof, intcon(1))); + Node* bool_instof = _gvn.transform(new (C) BoolNode(cmp_instof, BoolTest::ne)); + + Node* instof_false = generate_guard(bool_instof, NULL, PROB_MIN); + + // for encryption, we are done + if (!decrypting) + return instof_false; // even if it is NULL + + // for decryption, we need to add a further check to avoid + // taking the intrinsic path when cipher and plain are the same + // see the original java code for why. + RegionNode* region = new(C) RegionNode(3); + region->init_req(1, instof_false); + Node* src = argument(1); + Node *dest = argument(4); + Node* cmp_src_dest = _gvn.transform(new (C) CmpPNode(src, dest)); + Node* bool_src_dest = _gvn.transform(new (C) BoolNode(cmp_src_dest, BoolTest::eq)); + Node* src_dest_conjoint = generate_guard(bool_src_dest, NULL, PROB_MIN); + region->init_req(2, src_dest_conjoint); + + record_for_igvn(region); + return _gvn.transform(region); + +} + + diff -r 556dd9e475c6 -r dc16fe422c53 src/share/vm/opto/mulnode.cpp --- a/src/share/vm/opto/mulnode.cpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/share/vm/opto/mulnode.cpp Fri Oct 26 14:09:52 2012 -0700 @@ -479,24 +479,27 @@ return new (phase->C) AndINode(load,phase->intcon(mask&0xFFFF)); // Masking bits off of a Short? Loading a Character does some masking - if (lop == Op_LoadS && (mask & 0xFFFF0000) == 0 ) { - Node *ldus = new (phase->C) LoadUSNode(load->in(MemNode::Control), - load->in(MemNode::Memory), - load->in(MemNode::Address), - load->adr_type()); - ldus = phase->transform(ldus); - return new (phase->C) AndINode(ldus, phase->intcon(mask & 0xFFFF)); - } + if (can_reshape && + load->outcnt() == 1 && load->unique_out() == this) { + if (lop == Op_LoadS && (mask & 0xFFFF0000) == 0 ) { + Node *ldus = new (phase->C) LoadUSNode(load->in(MemNode::Control), + load->in(MemNode::Memory), + load->in(MemNode::Address), + load->adr_type()); + ldus = phase->transform(ldus); + return new (phase->C) AndINode(ldus, phase->intcon(mask & 0xFFFF)); + } - // Masking sign bits off of a Byte? Do an unsigned byte load plus - // an and. - if (lop == Op_LoadB && (mask & 0xFFFFFF00) == 0) { - Node* ldub = new (phase->C) LoadUBNode(load->in(MemNode::Control), - load->in(MemNode::Memory), - load->in(MemNode::Address), - load->adr_type()); - ldub = phase->transform(ldub); - return new (phase->C) AndINode(ldub, phase->intcon(mask)); + // Masking sign bits off of a Byte? Do an unsigned byte load plus + // an and. + if (lop == Op_LoadB && (mask & 0xFFFFFF00) == 0) { + Node* ldub = new (phase->C) LoadUBNode(load->in(MemNode::Control), + load->in(MemNode::Memory), + load->in(MemNode::Address), + load->adr_type()); + ldub = phase->transform(ldub); + return new (phase->C) AndINode(ldub, phase->intcon(mask)); + } } // Masking off sign bits? Dont make them! @@ -923,7 +926,9 @@ set_req(2, phase->intcon(0)); return this; } - else if( ld->Opcode() == Op_LoadUS ) + else if( can_reshape && + ld->Opcode() == Op_LoadUS && + ld->outcnt() == 1 && ld->unique_out() == shl) // Replace zero-extension-load with sign-extension-load return new (phase->C) LoadSNode( ld->in(MemNode::Control), ld->in(MemNode::Memory), diff -r 556dd9e475c6 -r dc16fe422c53 src/share/vm/opto/runtime.cpp --- a/src/share/vm/opto/runtime.cpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/share/vm/opto/runtime.cpp Fri Oct 26 14:09:52 2012 -0700 @@ -811,6 +811,48 @@ return TypeFunc::make(domain, range); } +// for aescrypt encrypt/decrypt operations, just three pointers returning void (length is constant) +const TypeFunc* OptoRuntime::aescrypt_block_Type() { + // create input type (domain) + int num_args = 3; + int argcnt = num_args; + const Type** fields = TypeTuple::fields(argcnt); + int argp = TypeFunc::Parms; + fields[argp++] = TypePtr::NOTNULL; // src + fields[argp++] = TypePtr::NOTNULL; // dest + fields[argp++] = TypePtr::NOTNULL; // k array + assert(argp == TypeFunc::Parms+argcnt, "correct decoding"); + const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields); + + // no result type needed + fields = TypeTuple::fields(1); + fields[TypeFunc::Parms+0] = NULL; // void + const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields); + return TypeFunc::make(domain, range); +} + +// for cipherBlockChaining calls of aescrypt encrypt/decrypt, four pointers and a length, returning void +const TypeFunc* OptoRuntime::cipherBlockChaining_aescrypt_Type() { + // create input type (domain) + int num_args = 5; + int argcnt = num_args; + const Type** fields = TypeTuple::fields(argcnt); + int argp = TypeFunc::Parms; + fields[argp++] = TypePtr::NOTNULL; // src + fields[argp++] = TypePtr::NOTNULL; // dest + fields[argp++] = TypePtr::NOTNULL; // k array + fields[argp++] = TypePtr::NOTNULL; // r array + fields[argp++] = TypeInt::INT; // src len + assert(argp == TypeFunc::Parms+argcnt, "correct decoding"); + const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields); + + // no result type needed + fields = TypeTuple::fields(1); + fields[TypeFunc::Parms+0] = NULL; // void + const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields); + return TypeFunc::make(domain, range); +} + //------------- Interpreter state access for on stack replacement const TypeFunc* OptoRuntime::osr_end_Type() { // create input type (domain) diff -r 556dd9e475c6 -r dc16fe422c53 src/share/vm/opto/runtime.hpp --- a/src/share/vm/opto/runtime.hpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/share/vm/opto/runtime.hpp Fri Oct 26 14:09:52 2012 -0700 @@ -280,6 +280,9 @@ static const TypeFunc* array_fill_Type(); + static const TypeFunc* aescrypt_block_Type(); + static const TypeFunc* cipherBlockChaining_aescrypt_Type(); + // leaf on stack replacement interpreter accessor types static const TypeFunc* osr_end_Type(); diff -r 556dd9e475c6 -r dc16fe422c53 src/share/vm/opto/superword.cpp --- a/src/share/vm/opto/superword.cpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/share/vm/opto/superword.cpp Fri Oct 26 14:09:52 2012 -0700 @@ -1776,16 +1776,15 @@ set_velt_type(n, container_type(n)); } - // Propagate narrowed type backwards through operations + // Propagate integer narrowed type backwards through operations // that don't depend on higher order bits for (int i = _block.length() - 1; i >= 0; i--) { Node* n = _block.at(i); // Only integer types need be examined - const Type* vt = velt_type(n); - if (vt->basic_type() == T_INT) { + const Type* vtn = velt_type(n); + if (vtn->basic_type() == T_INT) { uint start, end; VectorNode::vector_operands(n, &start, &end); - const Type* vt = velt_type(n); for (uint j = start; j < end; j++) { Node* in = n->in(j); @@ -1801,6 +1800,24 @@ } } if (same_type) { + // For right shifts of small integer types (bool, byte, char, short) + // we need precise information about sign-ness. Only Load nodes have + // this information because Store nodes are the same for signed and + // unsigned values. And any arithmetic operation after a load may + // expand a value to signed Int so such right shifts can't be used + // because vector elements do not have upper bits of Int. + const Type* vt = vtn; + if (VectorNode::is_shift(in)) { + Node* load = in->in(1); + if (load->is_Load() && in_bb(load) && (velt_type(load)->basic_type() == T_INT)) { + vt = velt_type(load); + } else if (in->Opcode() != Op_LShiftI) { + // Widen type to Int to avoid creation of right shift vector + // (align + data_size(s1) check in stmts_can_pack() will fail). + // Note, left shifts work regardless type. + vt = TypeInt::INT; + } + } set_velt_type(in, vt); } } @@ -1841,7 +1858,20 @@ // Smallest type containing range of values const Type* SuperWord::container_type(Node* n) { if (n->is_Mem()) { - return Type::get_const_basic_type(n->as_Mem()->memory_type()); + BasicType bt = n->as_Mem()->memory_type(); + if (n->is_Store() && (bt == T_CHAR)) { + // Use T_SHORT type instead of T_CHAR for stored values because any + // preceding arithmetic operation extends values to signed Int. + bt = T_SHORT; + } + if (n->Opcode() == Op_LoadUB) { + // Adjust type for unsigned byte loads, it is important for right shifts. + // T_BOOLEAN is used because there is no basic type representing type + // TypeInt::UBYTE. Use of T_BOOLEAN for vectors is fine because only + // size (one byte) and sign is important. + bt = T_BOOLEAN; + } + return Type::get_const_basic_type(bt); } const Type* t = _igvn.type(n); if (t->basic_type() == T_INT) { diff -r 556dd9e475c6 -r dc16fe422c53 src/share/vm/opto/type.cpp --- a/src/share/vm/opto/type.cpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/share/vm/opto/type.cpp Fri Oct 26 14:09:52 2012 -0700 @@ -61,7 +61,7 @@ { Bad, T_ILLEGAL, "tuple:", false, Node::NotAMachineReg, relocInfo::none }, // Tuple { Bad, T_ARRAY, "array:", false, Node::NotAMachineReg, relocInfo::none }, // Array -#if defined(IA32) || defined(AMD64) +#ifndef SPARC { Bad, T_ILLEGAL, "vectors:", false, Op_VecS, relocInfo::none }, // VectorS { Bad, T_ILLEGAL, "vectord:", false, Op_VecD, relocInfo::none }, // VectorD { Bad, T_ILLEGAL, "vectorx:", false, Op_VecX, relocInfo::none }, // VectorX diff -r 556dd9e475c6 -r dc16fe422c53 src/share/vm/opto/vectornode.cpp --- a/src/share/vm/opto/vectornode.cpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/share/vm/opto/vectornode.cpp Fri Oct 26 14:09:52 2012 -0700 @@ -29,8 +29,7 @@ //------------------------------VectorNode-------------------------------------- // Return the vector operator for the specified scalar operation -// and vector length. Also used to check if the code generator -// supports the vector operation. +// and vector length. int VectorNode::opcode(int sopc, BasicType bt) { switch (sopc) { case Op_AddI: @@ -75,7 +74,7 @@ case T_BYTE: return 0; // Unimplemented case T_CHAR: case T_SHORT: return Op_MulVS; - case T_INT: return Matcher::match_rule_supported(Op_MulVI) ? Op_MulVI : 0; // SSE4_1 + case T_INT: return Op_MulVI; } ShouldNotReachHere(); case Op_MulF: @@ -104,9 +103,9 @@ return Op_LShiftVL; case Op_RShiftI: switch (bt) { - case T_BOOLEAN: + case T_BOOLEAN:return Op_URShiftVB; // boolean is unsigned value + case T_CHAR: return Op_URShiftVS; // char is unsigned value case T_BYTE: return Op_RShiftVB; - case T_CHAR: case T_SHORT: return Op_RShiftVS; case T_INT: return Op_RShiftVI; } @@ -116,10 +115,14 @@ return Op_RShiftVL; case Op_URShiftI: switch (bt) { - case T_BOOLEAN: - case T_BYTE: return Op_URShiftVB; - case T_CHAR: - case T_SHORT: return Op_URShiftVS; + case T_BOOLEAN:return Op_URShiftVB; + case T_CHAR: return Op_URShiftVS; + case T_BYTE: + case T_SHORT: return 0; // Vector logical right shift for signed short + // values produces incorrect Java result for + // negative data because java code should convert + // a short value into int value with sign + // extension before a shift. case T_INT: return Op_URShiftVI; } ShouldNotReachHere(); @@ -157,12 +160,14 @@ return 0; // Unimplemented } +// Also used to check if the code generator +// supports the vector operation. bool VectorNode::implemented(int opc, uint vlen, BasicType bt) { if (is_java_primitive(bt) && (vlen > 1) && is_power_of_2(vlen) && Matcher::vector_size_supported(bt, vlen)) { int vopc = VectorNode::opcode(opc, bt); - return vopc > 0 && Matcher::has_match_rule(vopc); + return vopc > 0 && Matcher::match_rule_supported(vopc); } return false; } diff -r 556dd9e475c6 -r dc16fe422c53 src/share/vm/prims/unsafe.cpp --- a/src/share/vm/prims/unsafe.cpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/share/vm/prims/unsafe.cpp Fri Oct 26 14:09:52 2012 -0700 @@ -124,6 +124,8 @@ assert((void*)p->obj_field_addr((jint)byte_offset) == ptr_plus_disp, "raw [ptr+disp] must be consistent with oop::field_base"); } + jlong p_size = HeapWordSize * (jlong)(p->size()); + assert(byte_offset < p_size, err_msg("Unsafe access: offset " INT64_FORMAT " > object's size " INT64_FORMAT, byte_offset, p_size)); } #endif if (sizeof(char*) == sizeof(jint)) // (this constant folds!) diff -r 556dd9e475c6 -r dc16fe422c53 src/share/vm/runtime/arguments.cpp --- a/src/share/vm/runtime/arguments.cpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/share/vm/runtime/arguments.cpp Fri Oct 26 14:09:52 2012 -0700 @@ -791,6 +791,10 @@ st->print("jvm_args: "); print_jvm_args_on(st); } st->print_cr("java_command: %s", java_command() ? java_command() : ""); + if (_java_class_path != NULL) { + char* path = _java_class_path->value(); + st->print_cr("java_class_path (initial): %s", strlen(path) == 0 ? "" : path ); + } st->print_cr("Launcher Type: %s", _sun_java_launcher); } @@ -2771,6 +2775,11 @@ return JNI_EINVAL; } FLAG_SET_CMDLINE(uintx, MaxDirectMemorySize, max_direct_memory_size); + } else if (match_option(option, "-XX:+UseVMInterruptibleIO", &tail)) { + // NOTE! In JDK 9, the UseVMInterruptibleIO flag will completely go + // away and will cause VM initialization failures! + warning("-XX:+UseVMInterruptibleIO is obsolete and will be removed in a future release."); + FLAG_SET_CMDLINE(bool, UseVMInterruptibleIO, true); } else if (match_option(option, "-XX:", &tail)) { // -XX:xxxx // Skip -XX:Flags= since that case has already been handled if (strncmp(tail, "Flags=", strlen("Flags=")) != 0) { @@ -2786,10 +2795,6 @@ // Change the default value for flags which have different default values // when working with older JDKs. - if (JDK_Version::current().compare_major(6) <= 0 && - FLAG_IS_DEFAULT(UseVMInterruptibleIO)) { - FLAG_SET_DEFAULT(UseVMInterruptibleIO, true); - } #ifdef LINUX if (JDK_Version::current().compare_major(6) <= 0 && FLAG_IS_DEFAULT(UseLinuxPosixThreadCPUClocks)) { diff -r 556dd9e475c6 -r dc16fe422c53 src/share/vm/runtime/globals.hpp --- a/src/share/vm/runtime/globals.hpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/share/vm/runtime/globals.hpp Fri Oct 26 14:09:52 2012 -0700 @@ -533,6 +533,9 @@ product(intx, UseSSE, 99, \ "Highest supported SSE instructions set on x86/x64") \ \ + product(bool, UseAES, false, \ + "Control whether AES instructions can be used on x86/x64") \ + \ product(uintx, LargePageSizeInBytes, 0, \ "Large page size (0 to let VM choose the page size") \ \ @@ -635,6 +638,9 @@ product(bool, UseSSE42Intrinsics, false, \ "SSE4.2 versions of intrinsics") \ \ + product(bool, UseAESIntrinsics, false, \ + "use intrinsics for AES versions of crypto") \ + \ develop(bool, TraceCallFixup, false, \ "traces all call fixups") \ \ diff -r 556dd9e475c6 -r dc16fe422c53 src/share/vm/runtime/handles.cpp --- a/src/share/vm/runtime/handles.cpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/share/vm/runtime/handles.cpp Fri Oct 26 14:09:52 2012 -0700 @@ -158,13 +158,18 @@ // Delete later chunks if( _chunk->next() ) { + // reset arena size before delete chunks. Otherwise, the total + // arena size could exceed total chunk size + assert(area->size_in_bytes() > size_in_bytes(), "Sanity check"); + area->set_size_in_bytes(size_in_bytes()); _chunk->next_chop(); + } else { + assert(area->size_in_bytes() == size_in_bytes(), "Sanity check"); } // Roll back arena to saved top markers area->_chunk = _chunk; area->_hwm = _hwm; area->_max = _max; - area->set_size_in_bytes(_size_in_bytes); #ifdef ASSERT // clear out first chunk (to detect allocation bugs) if (ZapVMHandleArea) { diff -r 556dd9e475c6 -r dc16fe422c53 src/share/vm/runtime/handles.hpp --- a/src/share/vm/runtime/handles.hpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/share/vm/runtime/handles.hpp Fri Oct 26 14:09:52 2012 -0700 @@ -297,6 +297,7 @@ void set_previous_handle_mark(HandleMark* mark) { _previous_handle_mark = mark; } HandleMark* previous_handle_mark() const { return _previous_handle_mark; } + size_t size_in_bytes() const { return _size_in_bytes; } public: HandleMark(); // see handles_inline.hpp HandleMark(Thread* thread) { initialize(thread); } diff -r 556dd9e475c6 -r dc16fe422c53 src/share/vm/runtime/handles.inline.hpp --- a/src/share/vm/runtime/handles.inline.hpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/share/vm/runtime/handles.inline.hpp Fri Oct 26 14:09:52 2012 -0700 @@ -136,13 +136,18 @@ HandleArea* area = _area; // help compilers with poor alias analysis // Delete later chunks if( _chunk->next() ) { + // reset arena size before delete chunks. Otherwise, the total + // arena size could exceed total chunk size + assert(area->size_in_bytes() > size_in_bytes(), "Sanity check"); + area->set_size_in_bytes(size_in_bytes()); _chunk->next_chop(); + } else { + assert(area->size_in_bytes() == size_in_bytes(), "Sanity check"); } // Roll back arena to saved top markers area->_chunk = _chunk; area->_hwm = _hwm; area->_max = _max; - area->set_size_in_bytes(_size_in_bytes); debug_only(area->_handle_mark_nesting--); } diff -r 556dd9e475c6 -r dc16fe422c53 src/share/vm/runtime/os.cpp --- a/src/share/vm/runtime/os.cpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/share/vm/runtime/os.cpp Fri Oct 26 14:09:52 2012 -0700 @@ -600,9 +600,7 @@ if (PrintMalloc && tty != NULL) tty->print_cr("os::malloc " SIZE_FORMAT " bytes --> " PTR_FORMAT, size, memblock); // we do not track MallocCushion memory - if (MemTracker::is_on()) { MemTracker::record_malloc((address)memblock, size, memflags, caller == 0 ? CALLER_PC : caller); - } return memblock; } @@ -613,7 +611,7 @@ NOT_PRODUCT(inc_stat_counter(&num_mallocs, 1)); NOT_PRODUCT(inc_stat_counter(&alloc_bytes, size)); void* ptr = ::realloc(memblock, size); - if (ptr != NULL && MemTracker::is_on()) { + if (ptr != NULL) { MemTracker::record_realloc((address)memblock, (address)ptr, size, memflags, caller == 0 ? CALLER_PC : caller); } @@ -1401,7 +1399,7 @@ char* os::reserve_memory(size_t bytes, char* addr, size_t alignment_hint) { char* result = pd_reserve_memory(bytes, addr, alignment_hint); - if (result != NULL && MemTracker::is_on()) { + if (result != NULL) { MemTracker::record_virtual_memory_reserve((address)result, bytes, CALLER_PC); } @@ -1409,7 +1407,7 @@ } char* os::attempt_reserve_memory_at(size_t bytes, char* addr) { char* result = pd_attempt_reserve_memory_at(bytes, addr); - if (result != NULL && MemTracker::is_on()) { + if (result != NULL) { MemTracker::record_virtual_memory_reserve((address)result, bytes, CALLER_PC); } return result; @@ -1422,7 +1420,7 @@ bool os::commit_memory(char* addr, size_t bytes, bool executable) { bool res = pd_commit_memory(addr, bytes, executable); - if (res && MemTracker::is_on()) { + if (res) { MemTracker::record_virtual_memory_commit((address)addr, bytes, CALLER_PC); } return res; @@ -1431,7 +1429,7 @@ bool os::commit_memory(char* addr, size_t size, size_t alignment_hint, bool executable) { bool res = os::pd_commit_memory(addr, size, alignment_hint, executable); - if (res && MemTracker::is_on()) { + if (res) { MemTracker::record_virtual_memory_commit((address)addr, size, CALLER_PC); } return res; @@ -1458,8 +1456,9 @@ char *addr, size_t bytes, bool read_only, bool allow_exec) { char* result = pd_map_memory(fd, file_name, file_offset, addr, bytes, read_only, allow_exec); - if (result != NULL && MemTracker::is_on()) { + if (result != NULL) { MemTracker::record_virtual_memory_reserve((address)result, bytes, CALLER_PC); + MemTracker::record_virtual_memory_commit((address)result, bytes, CALLER_PC); } return result; } @@ -1474,6 +1473,7 @@ bool os::unmap_memory(char *addr, size_t bytes) { bool result = pd_unmap_memory(addr, bytes); if (result) { + MemTracker::record_virtual_memory_uncommit((address)addr, bytes); MemTracker::record_virtual_memory_release((address)addr, bytes); } return result; diff -r 556dd9e475c6 -r dc16fe422c53 src/share/vm/runtime/stubRoutines.cpp --- a/src/share/vm/runtime/stubRoutines.cpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/share/vm/runtime/stubRoutines.cpp Fri Oct 26 14:09:52 2012 -0700 @@ -120,6 +120,10 @@ address StubRoutines::_arrayof_jshort_fill; address StubRoutines::_arrayof_jint_fill; +address StubRoutines::_aescrypt_encryptBlock = NULL; +address StubRoutines::_aescrypt_decryptBlock = NULL; +address StubRoutines::_cipherBlockChaining_encryptAESCrypt = NULL; +address StubRoutines::_cipherBlockChaining_decryptAESCrypt = NULL; double (* StubRoutines::_intrinsic_log )(double) = NULL; double (* StubRoutines::_intrinsic_log10 )(double) = NULL; diff -r 556dd9e475c6 -r dc16fe422c53 src/share/vm/runtime/stubRoutines.hpp --- a/src/share/vm/runtime/stubRoutines.hpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/share/vm/runtime/stubRoutines.hpp Fri Oct 26 14:09:52 2012 -0700 @@ -199,6 +199,11 @@ // zero heap space aligned to jlong (8 bytes) static address _zero_aligned_words; + static address _aescrypt_encryptBlock; + static address _aescrypt_decryptBlock; + static address _cipherBlockChaining_encryptAESCrypt; + static address _cipherBlockChaining_decryptAESCrypt; + // These are versions of the java.lang.Math methods which perform // the same operations as the intrinsic version. They are used for // constant folding in the compiler to ensure equivalence. If the @@ -330,6 +335,11 @@ static address arrayof_jshort_fill() { return _arrayof_jshort_fill; } static address arrayof_jint_fill() { return _arrayof_jint_fill; } + static address aescrypt_encryptBlock() { return _aescrypt_encryptBlock; } + static address aescrypt_decryptBlock() { return _aescrypt_decryptBlock; } + static address cipherBlockChaining_encryptAESCrypt() { return _cipherBlockChaining_encryptAESCrypt; } + static address cipherBlockChaining_decryptAESCrypt() { return _cipherBlockChaining_decryptAESCrypt; } + static address select_fill_function(BasicType t, bool aligned, const char* &name); static address zero_aligned_words() { return _zero_aligned_words; } diff -r 556dd9e475c6 -r dc16fe422c53 src/share/vm/runtime/thread.cpp --- a/src/share/vm/runtime/thread.cpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/share/vm/runtime/thread.cpp Fri Oct 26 14:09:52 2012 -0700 @@ -323,12 +323,10 @@ os::initialize_thread(this); #if INCLUDE_NMT - // record thread's native stack, stack grows downward - if (MemTracker::is_on()) { - address stack_low_addr = stack_base() - stack_size(); - MemTracker::record_thread_stack(stack_low_addr, stack_size(), this, + // record thread's native stack, stack grows downward + address stack_low_addr = stack_base() - stack_size(); + MemTracker::record_thread_stack(stack_low_addr, stack_size(), this, CURRENT_PC); - } #endif // INCLUDE_NMT } @@ -345,6 +343,9 @@ if (_stack_base != NULL) { address low_stack_addr = stack_base() - stack_size(); MemTracker::release_thread_stack(low_stack_addr, stack_size(), this); +#ifdef ASSERT + set_stack_base(NULL); +#endif } #endif // INCLUDE_NMT @@ -1521,10 +1522,12 @@ tty->print_cr("terminate thread %p", this); } - // Info NMT that this JavaThread is exiting, its memory - // recorder should be collected + // By now, this thread should already be invisible to safepoint, + // and its per-thread recorder also collected. assert(!is_safepoint_visible(), "wrong state"); - MemTracker::thread_exiting(this); +#if INCLUDE_NMT + assert(get_recorder() == NULL, "Already collected"); +#endif // INCLUDE_NMT // JSR166 -- return the parker to the free list Parker::Release(_parker); @@ -2425,6 +2428,7 @@ } void JavaThread::remove_stack_guard_pages() { + assert(Thread::current() == this, "from different thread"); if (_stack_guard_state == stack_guard_unused) return; address low_addr = stack_base() - stack_size(); size_t len = (StackYellowPages + StackRedPages) * os::vm_page_size(); @@ -4093,7 +4097,10 @@ // Now, this thread is not visible to safepoint p->set_safepoint_visible(false); - + // once the thread becomes safepoint invisible, we can not use its per-thread + // recorder. And Threads::do_threads() no longer walks this thread, so we have + // to release its per-thread recorder here. + MemTracker::thread_exiting(p); } // unlock Threads_lock // Since Events::log uses a lock, we grab it outside the Threads_lock diff -r 556dd9e475c6 -r dc16fe422c53 src/share/vm/runtime/vmStructs.cpp --- a/src/share/vm/runtime/vmStructs.cpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/share/vm/runtime/vmStructs.cpp Fri Oct 26 14:09:52 2012 -0700 @@ -59,6 +59,7 @@ #include "memory/generation.hpp" #include "memory/generationSpec.hpp" #include "memory/heap.hpp" +#include "memory/metablock.hpp" #include "memory/space.hpp" #include "memory/tenuredGeneration.hpp" #include "memory/universe.hpp" @@ -249,6 +250,7 @@ typedef Hashtable KlassHashtable; typedef HashtableEntry KlassHashtableEntry; typedef TwoOopHashtable SymbolTwoOopHashtable; +typedef BinaryTreeDictionary MetablockTreeDictionary; //-------------------------------------------------------------------------------- // VM_STRUCTS @@ -1237,7 +1239,15 @@ nonstatic_field(AccessFlags, _flags, jint) \ nonstatic_field(elapsedTimer, _counter, jlong) \ nonstatic_field(elapsedTimer, _active, bool) \ - nonstatic_field(InvocationCounter, _counter, unsigned int) + nonstatic_field(InvocationCounter, _counter, unsigned int) \ + volatile_nonstatic_field(FreeChunk, _size, size_t) \ + nonstatic_field(FreeChunk, _next, FreeChunk*) \ + nonstatic_field(FreeChunk, _prev, FreeChunk*) \ + nonstatic_field(FreeList, _size, size_t) \ + nonstatic_field(FreeList, _size, size_t) \ + nonstatic_field(FreeList, _count, ssize_t) \ + nonstatic_field(FreeList, _count, ssize_t) \ + nonstatic_field(MetablockTreeDictionary, _total_size, size_t) /* NOTE that we do not use the last_entry() macro here; it is used */ /* in vmStructs__.hpp's VM_STRUCTS_OS_CPU macro (and must */ @@ -2080,7 +2090,24 @@ declare_toplevel_type(Universe) \ declare_toplevel_type(vframeArray) \ declare_toplevel_type(vframeArrayElement) \ - declare_toplevel_type(Annotations*) + declare_toplevel_type(Annotations*) \ + \ + /***************/ \ + /* Miscellaneous types */ \ + /***************/ \ + \ + /* freelist */ \ + declare_toplevel_type(FreeChunk*) \ + declare_toplevel_type(Metablock*) \ + declare_toplevel_type(FreeBlockDictionary*) \ + declare_toplevel_type(FreeList*) \ + declare_toplevel_type(FreeList) \ + declare_toplevel_type(FreeBlockDictionary*) \ + declare_toplevel_type(FreeList*) \ + declare_toplevel_type(FreeList) \ + declare_toplevel_type(MetablockTreeDictionary*) \ + declare_type(MetablockTreeDictionary, FreeBlockDictionary) \ + declare_type(MetablockTreeDictionary, FreeBlockDictionary) /* NOTE that we do not use the last_entry() macro here; it is used */ diff -r 556dd9e475c6 -r dc16fe422c53 src/share/vm/services/attachListener.cpp --- a/src/share/vm/services/attachListener.cpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/share/vm/services/attachListener.cpp Fri Oct 26 14:09:52 2012 -0700 @@ -404,6 +404,8 @@ static void attach_listener_thread_entry(JavaThread* thread, TRAPS) { os::set_priority(thread, NearMaxPriority); + thread->record_stack_base_and_size(); + if (AttachListener::pd_init() != 0) { return; } diff -r 556dd9e475c6 -r dc16fe422c53 src/share/vm/services/memBaseline.cpp --- a/src/share/vm/services/memBaseline.cpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/share/vm/services/memBaseline.cpp Fri Oct 26 14:09:52 2012 -0700 @@ -40,6 +40,7 @@ {mtSymbol, "Symbol"}, {mtNMT, "Memory Tracking"}, {mtChunk, "Pooled Free Chunks"}, + {mtClassShared,"Shared spaces for classes"}, {mtNone, "Unknown"} // It can happen when type tagging records are lagging // behind }; @@ -55,6 +56,7 @@ _malloc_cs = NULL; _vm_cs = NULL; + _vm_map = NULL; _number_of_classes = 0; _number_of_threads = 0; @@ -72,6 +74,11 @@ _vm_cs = NULL; } + if (_vm_map != NULL) { + delete _vm_map; + _vm_map = NULL; + } + reset(); } @@ -85,6 +92,7 @@ if (_malloc_cs != NULL) _malloc_cs->clear(); if (_vm_cs != NULL) _vm_cs->clear(); + if (_vm_map != NULL) _vm_map->clear(); for (int index = 0; index < NUMBER_OF_MEMORY_TYPE; index ++) { _malloc_data[index].clear(); @@ -94,39 +102,33 @@ } MemBaseline::~MemBaseline() { - if (_malloc_cs != NULL) { - delete _malloc_cs; - } - - if (_vm_cs != NULL) { - delete _vm_cs; - } + clear(); } // baseline malloc'd memory records, generate overall summary and summaries by // memory types bool MemBaseline::baseline_malloc_summary(const MemPointerArray* malloc_records) { - MemPointerArrayIteratorImpl mItr((MemPointerArray*)malloc_records); - MemPointerRecord* mptr = (MemPointerRecord*)mItr.current(); + MemPointerArrayIteratorImpl malloc_itr((MemPointerArray*)malloc_records); + MemPointerRecord* malloc_ptr = (MemPointerRecord*)malloc_itr.current(); size_t used_arena_size = 0; int index; - while (mptr != NULL) { - index = flag2index(FLAGS_TO_MEMORY_TYPE(mptr->flags())); - size_t size = mptr->size(); + while (malloc_ptr != NULL) { + index = flag2index(FLAGS_TO_MEMORY_TYPE(malloc_ptr->flags())); + size_t size = malloc_ptr->size(); _total_malloced += size; _malloc_data[index].inc(size); - if (MemPointerRecord::is_arena_record(mptr->flags())) { + if (MemPointerRecord::is_arena_record(malloc_ptr->flags())) { // see if arena size record present - MemPointerRecord* next_p = (MemPointerRecordEx*)mItr.peek_next(); - if (MemPointerRecord::is_arena_size_record(next_p->flags())) { - assert(next_p->is_size_record_of_arena(mptr), "arena records do not match"); - size = next_p->size(); + MemPointerRecord* next_malloc_ptr = (MemPointerRecordEx*)malloc_itr.peek_next(); + if (MemPointerRecord::is_arena_size_record(next_malloc_ptr->flags())) { + assert(next_malloc_ptr->is_size_record_of_arena(malloc_ptr), "arena records do not match"); + size = next_malloc_ptr->size(); _arena_data[index].inc(size); used_arena_size += size; - mItr.next(); + malloc_itr.next(); } } - mptr = (MemPointerRecordEx*)mItr.next(); + malloc_ptr = (MemPointerRecordEx*)malloc_itr.next(); } // substract used arena size to get size of arena chunk in free list @@ -142,20 +144,23 @@ // baseline mmap'd memory records, generate overall summary and summaries by // memory types bool MemBaseline::baseline_vm_summary(const MemPointerArray* vm_records) { - MemPointerArrayIteratorImpl vItr((MemPointerArray*)vm_records); - VMMemRegion* vptr = (VMMemRegion*)vItr.current(); + MemPointerArrayIteratorImpl vm_itr((MemPointerArray*)vm_records); + VMMemRegion* vm_ptr = (VMMemRegion*)vm_itr.current(); int index; - while (vptr != NULL) { - index = flag2index(FLAGS_TO_MEMORY_TYPE(vptr->flags())); - + while (vm_ptr != NULL) { + if (vm_ptr->is_reserved_region()) { + index = flag2index(FLAGS_TO_MEMORY_TYPE(vm_ptr->flags())); // we use the number of thread stack to count threads - if (IS_MEMORY_TYPE(vptr->flags(), mtThreadStack)) { + if (IS_MEMORY_TYPE(vm_ptr->flags(), mtThreadStack)) { _number_of_threads ++; } - _total_vm_reserved += vptr->reserved_size(); - _total_vm_committed += vptr->committed_size(); - _vm_data[index].inc(vptr->reserved_size(), vptr->committed_size()); - vptr = (VMMemRegion*)vItr.next(); + _total_vm_reserved += vm_ptr->size(); + _vm_data[index].inc(vm_ptr->size(), 0); + } else { + _total_vm_committed += vm_ptr->size(); + _vm_data[index].inc(0, vm_ptr->size()); + } + vm_ptr = (VMMemRegion*)vm_itr.next(); } return true; } @@ -165,41 +170,57 @@ bool MemBaseline::baseline_malloc_details(const MemPointerArray* malloc_records) { assert(MemTracker::track_callsite(), "detail tracking is off"); - MemPointerArrayIteratorImpl mItr((MemPointerArray*)malloc_records); - MemPointerRecordEx* mptr = (MemPointerRecordEx*)mItr.current(); - MallocCallsitePointer mp; + MemPointerArrayIteratorImpl malloc_itr(const_cast(malloc_records)); + MemPointerRecordEx* malloc_ptr = (MemPointerRecordEx*)malloc_itr.current(); + MallocCallsitePointer malloc_callsite; + // initailize malloc callsite array if (_malloc_cs == NULL) { _malloc_cs = new (std::nothrow) MemPointerArrayImpl(64); // out of native memory - if (_malloc_cs == NULL) { + if (_malloc_cs == NULL || _malloc_cs->out_of_memory()) { return false; } } else { _malloc_cs->clear(); } + MemPointerArray* malloc_data = const_cast(malloc_records); + + // sort into callsite pc order. Details are aggregated by callsites + malloc_data->sort((FN_SORT)malloc_sort_by_pc); + bool ret = true; + // baseline memory that is totaled over 1 KB - while (mptr != NULL) { - if (!MemPointerRecord::is_arena_size_record(mptr->flags())) { + while (malloc_ptr != NULL) { + if (!MemPointerRecord::is_arena_size_record(malloc_ptr->flags())) { // skip thread stacks - if (!IS_MEMORY_TYPE(mptr->flags(), mtThreadStack)) { - if (mp.addr() != mptr->pc()) { - if ((mp.amount()/K) > 0) { - if (!_malloc_cs->append(&mp)) { - return false; + if (!IS_MEMORY_TYPE(malloc_ptr->flags(), mtThreadStack)) { + if (malloc_callsite.addr() != malloc_ptr->pc()) { + if ((malloc_callsite.amount()/K) > 0) { + if (!_malloc_cs->append(&malloc_callsite)) { + ret = false; + break; } } - mp = MallocCallsitePointer(mptr->pc()); + malloc_callsite = MallocCallsitePointer(malloc_ptr->pc()); } - mp.inc(mptr->size()); + malloc_callsite.inc(malloc_ptr->size()); } } - mptr = (MemPointerRecordEx*)mItr.next(); + malloc_ptr = (MemPointerRecordEx*)malloc_itr.next(); } - if (mp.addr() != 0 && (mp.amount()/K) > 0) { - if (!_malloc_cs->append(&mp)) { + // restore to address order. Snapshot malloc data is maintained in memory + // address order. + malloc_data->sort((FN_SORT)malloc_sort_by_addr); + + if (!ret) { + return false; + } + // deal with last record + if (malloc_callsite.addr() != 0 && (malloc_callsite.amount()/K) > 0) { + if (!_malloc_cs->append(&malloc_callsite)) { return false; } } @@ -210,34 +231,106 @@ bool MemBaseline::baseline_vm_details(const MemPointerArray* vm_records) { assert(MemTracker::track_callsite(), "detail tracking is off"); - VMCallsitePointer vp; - MemPointerArrayIteratorImpl vItr((MemPointerArray*)vm_records); - VMMemRegionEx* vptr = (VMMemRegionEx*)vItr.current(); + VMCallsitePointer vm_callsite; + VMCallsitePointer* cur_callsite = NULL; + MemPointerArrayIteratorImpl vm_itr((MemPointerArray*)vm_records); + VMMemRegionEx* vm_ptr = (VMMemRegionEx*)vm_itr.current(); + // initialize virtual memory map array + if (_vm_map == NULL) { + _vm_map = new (std::nothrow) MemPointerArrayImpl(vm_records->length()); + if (_vm_map == NULL || _vm_map->out_of_memory()) { + return false; + } + } else { + _vm_map->clear(); + } + + // initialize virtual memory callsite array if (_vm_cs == NULL) { _vm_cs = new (std::nothrow) MemPointerArrayImpl(64); - if (_vm_cs == NULL) { + if (_vm_cs == NULL || _vm_cs->out_of_memory()) { return false; } } else { _vm_cs->clear(); } - while (vptr != NULL) { - if (vp.addr() != vptr->pc()) { - if (!_vm_cs->append(&vp)) { + // consolidate virtual memory data + VMMemRegionEx* reserved_rec = NULL; + VMMemRegionEx* committed_rec = NULL; + + // vm_ptr is coming in increasing base address order + while (vm_ptr != NULL) { + if (vm_ptr->is_reserved_region()) { + // consolidate reserved memory regions for virtual memory map. + // The criteria for consolidation is: + // 1. two adjacent reserved memory regions + // 2. belong to the same memory type + // 3. reserved from the same callsite + if (reserved_rec == NULL || + reserved_rec->base() + reserved_rec->size() != vm_ptr->addr() || + FLAGS_TO_MEMORY_TYPE(reserved_rec->flags()) != FLAGS_TO_MEMORY_TYPE(vm_ptr->flags()) || + reserved_rec->pc() != vm_ptr->pc()) { + if (!_vm_map->append(vm_ptr)) { return false; } - vp = VMCallsitePointer(vptr->pc()); + // inserted reserved region, we need the pointer to the element in virtual + // memory map array. + reserved_rec = (VMMemRegionEx*)_vm_map->at(_vm_map->length() - 1); + } else { + reserved_rec->expand_region(vm_ptr->addr(), vm_ptr->size()); } - vp.inc(vptr->size(), vptr->committed_size()); - vptr = (VMMemRegionEx*)vItr.next(); - } - if (vp.addr() != 0) { - if (!_vm_cs->append(&vp)) { + + if (cur_callsite != NULL && !_vm_cs->append(cur_callsite)) { return false; } + vm_callsite = VMCallsitePointer(vm_ptr->pc()); + cur_callsite = &vm_callsite; + vm_callsite.inc(vm_ptr->size(), 0); + } else { + // consolidate committed memory regions for virtual memory map + // The criterial is: + // 1. two adjacent committed memory regions + // 2. committed from the same callsite + if (committed_rec == NULL || + committed_rec->base() + committed_rec->size() != vm_ptr->addr() || + committed_rec->pc() != vm_ptr->pc()) { + if (!_vm_map->append(vm_ptr)) { + return false; } + committed_rec = (VMMemRegionEx*)_vm_map->at(_vm_map->length() - 1); + } else { + committed_rec->expand_region(vm_ptr->addr(), vm_ptr->size()); + } + vm_callsite.inc(0, vm_ptr->size()); + } + vm_ptr = (VMMemRegionEx*)vm_itr.next(); + } + // deal with last record + if (cur_callsite != NULL && !_vm_cs->append(cur_callsite)) { + return false; + } + + // sort it into callsite pc order. Details are aggregated by callsites + _vm_cs->sort((FN_SORT)bl_vm_sort_by_pc); + + // walk the array to consolidate record by pc + MemPointerArrayIteratorImpl itr(_vm_cs); + VMCallsitePointer* callsite_rec = (VMCallsitePointer*)itr.current(); + VMCallsitePointer* next_rec = (VMCallsitePointer*)itr.next(); + while (next_rec != NULL) { + assert(callsite_rec != NULL, "Sanity check"); + if (next_rec->addr() == callsite_rec->addr()) { + callsite_rec->inc(next_rec->reserved_amount(), next_rec->committed_amount()); + itr.remove(); + next_rec = (VMCallsitePointer*)itr.current(); + } else { + callsite_rec = next_rec; + next_rec = (VMCallsitePointer*)itr.next(); + } + } + return true; } @@ -251,12 +344,8 @@ _number_of_classes = SystemDictionary::number_of_classes(); if (!summary_only && MemTracker::track_callsite() && _baselined) { - ((MemPointerArray*)snapshot._alloc_ptrs)->sort((FN_SORT)malloc_sort_by_pc); - ((MemPointerArray*)snapshot._vm_ptrs)->sort((FN_SORT)vm_sort_by_pc); _baselined = baseline_malloc_details(snapshot._alloc_ptrs) && baseline_vm_details(snapshot._vm_ptrs); - ((MemPointerArray*)snapshot._alloc_ptrs)->sort((FN_SORT)malloc_sort_by_addr); - ((MemPointerArray*)snapshot._vm_ptrs)->sort((FN_SORT)vm_sort_by_addr); } return _baselined; } @@ -278,7 +367,7 @@ return MemType2NameMap[index]._name; } } - assert(false, "no type"); + assert(false, err_msg("bad type %x", type)); return NULL; } @@ -341,13 +430,6 @@ return UNSIGNED_COMPARE(mp1->addr(), mp2->addr()); } -// sort snapshot mmap'd records in callsite pc order -int MemBaseline::vm_sort_by_pc(const void* p1, const void* p2) { - assert(MemTracker::track_callsite(),"Just check"); - const VMMemRegionEx* mp1 = (const VMMemRegionEx*)p1; - const VMMemRegionEx* mp2 = (const VMMemRegionEx*)p2; - return UNSIGNED_COMPARE(mp1->pc(), mp2->pc()); -} // sort baselined mmap'd records in size (reserved size) order int MemBaseline::bl_vm_sort_by_size(const void* p1, const void* p2) { @@ -376,12 +458,3 @@ return delta; } -// sort snapshot mmap'd records in memory block address order -int MemBaseline::vm_sort_by_addr(const void* p1, const void* p2) { - assert(MemTracker::is_on(), "Just check"); - const VMMemRegion* mp1 = (const VMMemRegion*)p1; - const VMMemRegion* mp2 = (const VMMemRegion*)p2; - int delta = UNSIGNED_COMPARE(mp1->addr(), mp2->addr()); - assert(delta != 0, "dup pointer"); - return delta; -} diff -r 556dd9e475c6 -r dc16fe422c53 src/share/vm/services/memBaseline.hpp --- a/src/share/vm/services/memBaseline.hpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/share/vm/services/memBaseline.hpp Fri Oct 26 14:09:52 2012 -0700 @@ -320,6 +320,8 @@ // only available when detail tracking is on. MemPointerArray* _malloc_cs; MemPointerArray* _vm_cs; + // virtual memory map + MemPointerArray* _vm_map; private: static MemType2Name MemType2NameMap[NUMBER_OF_MEMORY_TYPE]; @@ -432,9 +434,6 @@ static int malloc_sort_by_pc(const void* p1, const void* p2); static int malloc_sort_by_addr(const void* p1, const void* p2); - static int vm_sort_by_pc(const void* p1, const void* p2); - static int vm_sort_by_addr(const void* p1, const void* p2); - private: // sorting functions for baselined records static int bl_malloc_sort_by_size(const void* p1, const void* p2); diff -r 556dd9e475c6 -r dc16fe422c53 src/share/vm/services/memPtr.cpp --- a/src/share/vm/services/memPtr.cpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/share/vm/services/memPtr.cpp Fri Oct 26 14:09:52 2012 -0700 @@ -40,35 +40,3 @@ return seq; } - - -bool VMMemRegion::contains(const VMMemRegion* mr) const { - assert(base() != 0, "Sanity check"); - assert(size() != 0 || committed_size() != 0, - "Sanity check"); - address base_addr = base(); - address end_addr = base_addr + - (is_reserve_record()? reserved_size(): committed_size()); - if (mr->is_reserve_record()) { - if (mr->base() == base_addr && mr->size() == size()) { - // the same range - return true; - } - return false; - } else if (mr->is_commit_record() || mr->is_uncommit_record()) { - assert(mr->base() != 0 && mr->committed_size() > 0, - "bad record"); - return (mr->base() >= base_addr && - (mr->base() + mr->committed_size()) <= end_addr); - } else if (mr->is_type_tagging_record()) { - assert(mr->base() != NULL, "Sanity check"); - return (mr->base() >= base_addr && mr->base() < end_addr); - } else if (mr->is_release_record()) { - assert(mr->base() != 0 && mr->size() > 0, - "bad record"); - return (mr->base() == base_addr && mr->size() == size()); - } else { - ShouldNotReachHere(); - return false; - } -} diff -r 556dd9e475c6 -r dc16fe422c53 src/share/vm/services/memPtr.hpp --- a/src/share/vm/services/memPtr.hpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/share/vm/services/memPtr.hpp Fri Oct 26 14:09:52 2012 -0700 @@ -291,6 +291,26 @@ inline bool is_type_tagging_record() const { return is_virtual_memory_type_record(_flags); } + + // if the two memory pointer records actually represent the same + // memory block + inline bool is_same_region(const MemPointerRecord* other) const { + return (addr() == other->addr() && size() == other->size()); + } + + // if this memory region fully contains another one + inline bool contains_region(const MemPointerRecord* other) const { + return contains_region(other->addr(), other->size()); + } + + // if this memory region fully contains specified memory range + inline bool contains_region(address add, size_t sz) const { + return (addr() <= add && addr() + size() >= add + sz); + } + + inline bool contains_address(address add) const { + return (addr() <= add && addr() + size() > add); + } }; // MemPointerRecordEx also records callsite pc, from where @@ -321,66 +341,32 @@ } }; -// a virtual memory region +// a virtual memory region. The region can represent a reserved +// virtual memory region or a committed memory region class VMMemRegion : public MemPointerRecord { - private: - // committed size - size_t _committed_size; - public: - VMMemRegion(): _committed_size(0) { } + VMMemRegion() { } void init(const MemPointerRecord* mp) { - assert(mp->is_vm_pointer(), "not virtual memory pointer"); + assert(mp->is_vm_pointer(), "Sanity check"); _addr = mp->addr(); - if (mp->is_commit_record() || mp->is_uncommit_record()) { - _committed_size = mp->size(); - set_size(_committed_size); - } else { set_size(mp->size()); - _committed_size = 0; - } set_flags(mp->flags()); } VMMemRegion& operator=(const VMMemRegion& other) { MemPointerRecord::operator=(other); - _committed_size = other.committed_size(); return *this; } - inline bool is_reserve_record() const { - return is_virtual_memory_reserve_record(flags()); - } - - inline bool is_release_record() const { - return is_virtual_memory_release_record(flags()); - } - - // resize reserved VM range - inline void set_reserved_size(size_t new_size) { - assert(new_size >= committed_size(), "resize"); - set_size(new_size); + inline bool is_reserved_region() const { + return is_allocation_record(); } - inline void commit(size_t size) { - _committed_size += size; + inline bool is_committed_region() const { + return is_commit_record(); } - inline void uncommit(size_t size) { - if (_committed_size >= size) { - _committed_size -= size; - } else { - _committed_size = 0; - } - } - - /* - * if this virtual memory range covers whole range of - * the other VMMemRegion - */ - bool contains(const VMMemRegion* mr) const; - /* base address of this virtual memory range */ inline address base() const { return addr(); @@ -391,13 +377,28 @@ set_flags(flags() | (f & mt_masks)); } - // release part of memory range - inline void partial_release(address add, size_t sz) { - assert(add >= addr() && add < addr() + size(), "not valid address"); - // for now, it can partially release from the both ends, - // but not in the middle + // expand this region to also cover specified range. + // The range has to be on either end of the memory region. + void expand_region(address addr, size_t sz) { + if (addr < base()) { + assert(addr + sz == base(), "Sanity check"); + _addr = addr; + set_size(size() + sz); + } else { + assert(base() + size() == addr, "Sanity check"); + set_size(size() + sz); + } + } + + // exclude the specified address range from this region. + // The excluded memory range has to be on either end of this memory + // region. + inline void exclude_region(address add, size_t sz) { + assert(is_reserved_region() || is_committed_region(), "Sanity check"); + assert(addr() != NULL && size() != 0, "Sanity check"); + assert(add >= addr() && add < addr() + size(), "Sanity check"); assert(add == addr() || (add + sz) == (addr() + size()), - "release in the middle"); + "exclude in the middle"); if (add == addr()) { set_addr(add + sz); set_size(size() - sz); @@ -405,16 +406,6 @@ set_size(size() - sz); } } - - // the committed size of the virtual memory block - inline size_t committed_size() const { - return _committed_size; - } - - // the reserved size of the virtual memory block - inline size_t reserved_size() const { - return size(); - } }; class VMMemRegionEx : public VMMemRegion { diff -r 556dd9e475c6 -r dc16fe422c53 src/share/vm/services/memRecorder.cpp --- a/src/share/vm/services/memRecorder.cpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/share/vm/services/memRecorder.cpp Fri Oct 26 14:09:52 2012 -0700 @@ -31,14 +31,19 @@ #include "services/memTracker.hpp" MemPointer* SequencedRecordIterator::next_record() { - MemPointer* itr_cur = _itr.current(); - if (itr_cur == NULL) return NULL; - MemPointer* itr_next = _itr.next(); + MemPointerRecord* itr_cur = (MemPointerRecord*)_itr.current(); + if (itr_cur == NULL) { + return itr_cur; + } + + MemPointerRecord* itr_next = (MemPointerRecord*)_itr.next(); - while (itr_next != NULL && - same_kind((MemPointerRecord*)itr_cur, (MemPointerRecord*)itr_next)) { + // don't collapse virtual memory records + while (itr_next != NULL && !itr_cur->is_vm_pointer() && + !itr_next->is_vm_pointer() && + same_kind(itr_cur, itr_next)) { itr_cur = itr_next; - itr_next = _itr.next(); + itr_next = (MemPointerRecord*)_itr.next(); } return itr_cur; diff -r 556dd9e475c6 -r dc16fe422c53 src/share/vm/services/memRecorder.hpp --- a/src/share/vm/services/memRecorder.hpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/share/vm/services/memRecorder.hpp Fri Oct 26 14:09:52 2012 -0700 @@ -188,6 +188,7 @@ // Test if the two records are the same kind: the same memory block and allocation // type. inline bool same_kind(const MemPointerRecord* p1, const MemPointerRecord* p2) const { + assert(!p1->is_vm_pointer() && !p2->is_vm_pointer(), "malloc pointer only"); return (p1->addr() == p2->addr() && (p1->flags() &MemPointerRecord::tag_masks) == (p2->flags() & MemPointerRecord::tag_masks)); diff -r 556dd9e475c6 -r dc16fe422c53 src/share/vm/services/memReporter.cpp --- a/src/share/vm/services/memReporter.cpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/share/vm/services/memReporter.cpp Fri Oct 26 14:09:52 2012 -0700 @@ -51,6 +51,7 @@ report_summaries(baseline); if (!summary_only && MemTracker::track_callsite()) { + report_virtual_memory_map(baseline); report_callsites(baseline); } _outputer.done(); @@ -74,6 +75,25 @@ _outputer.done_category_summary(); } +void BaselineReporter::report_virtual_memory_map(const MemBaseline& baseline) { + _outputer.start_virtual_memory_map(); + MemBaseline* pBL = const_cast(&baseline); + MemPointerArrayIteratorImpl itr = MemPointerArrayIteratorImpl(pBL->_vm_map); + VMMemRegionEx* rgn = (VMMemRegionEx*)itr.current(); + while (rgn != NULL) { + if (rgn->is_reserved_region()) { + _outputer.reserved_memory_region(FLAGS_TO_MEMORY_TYPE(rgn->flags()), + rgn->base(), rgn->base() + rgn->size(), amount_in_current_scale(rgn->size()), rgn->pc()); + } else { + _outputer.committed_memory_region(rgn->base(), rgn->base() + rgn->size(), + amount_in_current_scale(rgn->size()), rgn->pc()); + } + rgn = (VMMemRegionEx*)itr.next(); + } + + _outputer.done_virtual_memory_map(); +} + void BaselineReporter::report_callsites(const MemBaseline& baseline) { _outputer.start_callsite(); MemBaseline* pBL = const_cast(&baseline); @@ -324,6 +344,40 @@ _output->print_cr(" "); } + +void BaselineTTYOutputer::start_virtual_memory_map() { + _output->print_cr("Virtual memory map:"); +} + +void BaselineTTYOutputer::reserved_memory_region(MEMFLAGS type, address base, address end, + size_t size, address pc) { + const char* unit = memory_unit(_scale); + char buf[128]; + int offset; + _output->print_cr(" "); + _output->print_cr("[" PTR_FORMAT " - " PTR_FORMAT "] reserved %d%s for %s", base, end, size, unit, + MemBaseline::type2name(type)); + if (os::dll_address_to_function_name(pc, buf, sizeof(buf), &offset)) { + _output->print_cr("\t\tfrom [%s+0x%x]", buf, offset); + } +} + +void BaselineTTYOutputer::committed_memory_region(address base, address end, size_t size, address pc) { + const char* unit = memory_unit(_scale); + char buf[128]; + int offset; + _output->print("\t[" PTR_FORMAT " - " PTR_FORMAT "] committed %d%s", base, end, size, unit); + if (os::dll_address_to_function_name(pc, buf, sizeof(buf), &offset)) { + _output->print_cr(" from [%s+0x%x]", buf, offset); + } +} + +void BaselineTTYOutputer::done_virtual_memory_map() { + _output->print_cr(" "); +} + + + void BaselineTTYOutputer::start_callsite() { _output->print_cr("Details:"); _output->print_cr(" "); @@ -337,7 +391,7 @@ size_t malloc_count) { if (malloc_amt > 0) { const char* unit = memory_unit(_scale); - char buf[64]; + char buf[128]; int offset; if (pc == 0) { _output->print("[BOOTSTRAP]%18s", " "); @@ -357,7 +411,7 @@ size_t committed_amt) { if (reserved_amt > 0) { const char* unit = memory_unit(_scale); - char buf[64]; + char buf[128]; int offset; if (pc == 0) { _output->print("[BOOTSTRAP]%18s", " "); @@ -502,7 +556,7 @@ int malloc_diff, int malloc_count_diff) { if (malloc_diff != 0) { const char* unit = memory_unit(_scale); - char buf[64]; + char buf[128]; int offset; if (pc == 0) { _output->print_cr("[BOOTSTRAP]%18s", " "); diff -r 556dd9e475c6 -r dc16fe422c53 src/share/vm/services/memReporter.hpp --- a/src/share/vm/services/memReporter.hpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/share/vm/services/memReporter.hpp Fri Oct 26 14:09:52 2012 -0700 @@ -93,6 +93,11 @@ virtual void done_category_summary() = 0; + virtual void start_virtual_memory_map() = 0; + virtual void reserved_memory_region(MEMFLAGS type, address base, address end, size_t size, address pc) = 0; + virtual void committed_memory_region(address base, address end, size_t size, address pc) = 0; + virtual void done_virtual_memory_map() = 0; + /* * Report callsite information */ @@ -136,6 +141,7 @@ private: void report_summaries(const MemBaseline& baseline); + void report_virtual_memory_map(const MemBaseline& baseline); void report_callsites(const MemBaseline& baseline); void diff_summaries(const MemBaseline& cur, const MemBaseline& prev); @@ -251,6 +257,13 @@ void done_category_summary(); + // virtual memory map + void start_virtual_memory_map(); + void reserved_memory_region(MEMFLAGS type, address base, address end, size_t size, address pc); + void committed_memory_region(address base, address end, size_t size, address pc); + void done_virtual_memory_map(); + + /* * Report callsite information */ diff -r 556dd9e475c6 -r dc16fe422c53 src/share/vm/services/memSnapshot.cpp --- a/src/share/vm/services/memSnapshot.cpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/share/vm/services/memSnapshot.cpp Fri Oct 26 14:09:52 2012 -0700 @@ -31,6 +31,220 @@ #include "services/memSnapshot.hpp" #include "services/memTracker.hpp" + +bool VMMemPointerIterator::insert_record(MemPointerRecord* rec) { + VMMemRegionEx new_rec; + assert(rec->is_allocation_record() || rec->is_commit_record(), + "Sanity check"); + if (MemTracker::track_callsite()) { + new_rec.init((MemPointerRecordEx*)rec); + } else { + new_rec.init(rec); + } + return insert(&new_rec); +} + +bool VMMemPointerIterator::insert_record_after(MemPointerRecord* rec) { + VMMemRegionEx new_rec; + assert(rec->is_allocation_record() || rec->is_commit_record(), + "Sanity check"); + if (MemTracker::track_callsite()) { + new_rec.init((MemPointerRecordEx*)rec); + } else { + new_rec.init(rec); + } + return insert_after(&new_rec); +} + +// we don't consolidate reserved regions, since they may be categorized +// in different types. +bool VMMemPointerIterator::add_reserved_region(MemPointerRecord* rec) { + assert(rec->is_allocation_record(), "Sanity check"); + VMMemRegion* cur = (VMMemRegion*)current(); + + // we don't have anything yet + if (cur == NULL) { + return insert_record(rec); + } + + assert(cur->is_reserved_region(), "Sanity check"); + // duplicated records + if (cur->is_same_region(rec)) { + return true; + } + assert(cur->base() > rec->addr(), "Just check: locate()"); + assert(rec->addr() + rec->size() <= cur->base(), "Can not overlap"); + return insert_record(rec); +} + +// we do consolidate committed regions +bool VMMemPointerIterator::add_committed_region(MemPointerRecord* rec) { + assert(rec->is_commit_record(), "Sanity check"); + VMMemRegion* cur; + cur = (VMMemRegion*)current(); + assert(cur->is_reserved_region() && cur->contains_region(rec), + "Sanity check"); + + // thread's native stack is always marked as "committed", ignore + // the "commit" operation for creating stack guard pages + if (FLAGS_TO_MEMORY_TYPE(cur->flags()) == mtThreadStack && + FLAGS_TO_MEMORY_TYPE(rec->flags()) != mtThreadStack) { + return true; + } + + cur = (VMMemRegion*)next(); + while (cur != NULL && cur->is_committed_region()) { + // duplicated commit records + if(cur->contains_region(rec)) { + return true; + } + if (cur->base() > rec->addr()) { + // committed regions can not overlap + assert(rec->addr() + rec->size() <= cur->base(), "Can not overlap"); + if (rec->addr() + rec->size() == cur->base()) { + cur->expand_region(rec->addr(), rec->size()); + return true; + } else { + return insert_record(rec); + } + } else if (cur->base() + cur->size() == rec->addr()) { + cur->expand_region(rec->addr(), rec->size()); + VMMemRegion* next_reg = (VMMemRegion*)next(); + // see if we can consolidate next committed region + if (next_reg != NULL && next_reg->is_committed_region() && + next_reg->base() == cur->base() + cur->size()) { + cur->expand_region(next_reg->base(), next_reg->size()); + remove(); + } + return true; + } + cur = (VMMemRegion*)next(); + } + return insert_record(rec); +} + +bool VMMemPointerIterator::remove_uncommitted_region(MemPointerRecord* rec) { + assert(rec->is_uncommit_record(), "sanity check"); + VMMemRegion* cur; + cur = (VMMemRegion*)current(); + assert(cur->is_reserved_region() && cur->contains_region(rec), + "Sanity check"); + // thread's native stack is always marked as "committed", ignore + // the "commit" operation for creating stack guard pages + if (FLAGS_TO_MEMORY_TYPE(cur->flags()) == mtThreadStack && + FLAGS_TO_MEMORY_TYPE(rec->flags()) != mtThreadStack) { + return true; + } + + cur = (VMMemRegion*)next(); + while (cur != NULL && cur->is_committed_region()) { + // region already uncommitted, must be due to duplicated record + if (cur->addr() >= rec->addr() + rec->size()) { + break; + } else if (cur->contains_region(rec)) { + // uncommit whole region + if (cur->is_same_region(rec)) { + remove(); + break; + } else if (rec->addr() == cur->addr() || + rec->addr() + rec->size() == cur->addr() + cur->size()) { + // uncommitted from either end of current memory region. + cur->exclude_region(rec->addr(), rec->size()); + break; + } else { // split the committed region and release the middle + address high_addr = cur->addr() + cur->size(); + size_t sz = high_addr - rec->addr(); + cur->exclude_region(rec->addr(), sz); + sz = high_addr - (rec->addr() + rec->size()); + if (MemTracker::track_callsite()) { + MemPointerRecordEx tmp(rec->addr() + rec->size(), cur->flags(), sz, + ((VMMemRegionEx*)cur)->pc()); + return insert_record_after(&tmp); + } else { + MemPointerRecord tmp(rec->addr() + rec->size(), cur->flags(), sz); + return insert_record_after(&tmp); + } + } + } + cur = (VMMemRegion*)next(); + } + + // we may not find committed record due to duplicated records + return true; +} + +bool VMMemPointerIterator::remove_released_region(MemPointerRecord* rec) { + assert(rec->is_deallocation_record(), "Sanity check"); + VMMemRegion* cur = (VMMemRegion*)current(); + assert(cur->is_reserved_region() && cur->contains_region(rec), + "Sanity check"); +#ifdef ASSERT + VMMemRegion* next_reg = (VMMemRegion*)peek_next(); + // should not have any committed memory in this reserved region + assert(next_reg == NULL || !next_reg->is_committed_region(), "Sanity check"); +#endif + if (rec->is_same_region(cur)) { + remove(); + } else if (rec->addr() == cur->addr() || + rec->addr() + rec->size() == cur->addr() + cur->size()) { + // released region is at either end of this region + cur->exclude_region(rec->addr(), rec->size()); + } else { // split the reserved region and release the middle + address high_addr = cur->addr() + cur->size(); + size_t sz = high_addr - rec->addr(); + cur->exclude_region(rec->addr(), sz); + sz = high_addr - rec->addr() - rec->size(); + if (MemTracker::track_callsite()) { + MemPointerRecordEx tmp(rec->addr() + rec->size(), cur->flags(), sz, + ((VMMemRegionEx*)cur)->pc()); + return insert_reserved_region(&tmp); + } else { + MemPointerRecord tmp(rec->addr() + rec->size(), cur->flags(), sz); + return insert_reserved_region(&tmp); + } + } + return true; +} + +bool VMMemPointerIterator::insert_reserved_region(MemPointerRecord* rec) { + // skip all 'commit' records associated with previous reserved region + VMMemRegion* p = (VMMemRegion*)next(); + while (p != NULL && p->is_committed_region() && + p->base() + p->size() < rec->addr()) { + p = (VMMemRegion*)next(); + } + return insert_record(rec); +} + +bool VMMemPointerIterator::split_reserved_region(VMMemRegion* rgn, address new_rgn_addr, size_t new_rgn_size) { + assert(rgn->contains_region(new_rgn_addr, new_rgn_size), "Not fully contained"); + address pc = (MemTracker::track_callsite() ? ((VMMemRegionEx*)rgn)->pc() : NULL); + if (rgn->base() == new_rgn_addr) { // new region is at the beginning of the region + size_t sz = rgn->size() - new_rgn_size; + // the original region becomes 'new' region + rgn->exclude_region(new_rgn_addr + new_rgn_size, sz); + // remaining becomes next region + MemPointerRecordEx next_rgn(new_rgn_addr + new_rgn_size, rgn->flags(), sz, pc); + return insert_reserved_region(&next_rgn); + } else if (rgn->base() + rgn->size() == new_rgn_addr + new_rgn_size) { + rgn->exclude_region(new_rgn_addr, new_rgn_size); + MemPointerRecordEx next_rgn(new_rgn_addr, rgn->flags(), new_rgn_size, pc); + return insert_reserved_region(&next_rgn); + } else { + // the orginal region will be split into three + address rgn_high_addr = rgn->base() + rgn->size(); + // first region + rgn->exclude_region(new_rgn_addr, (rgn_high_addr - new_rgn_addr)); + // the second region is the new region + MemPointerRecordEx new_rgn(new_rgn_addr, rgn->flags(), new_rgn_size, pc); + if (!insert_reserved_region(&new_rgn)) return false; + // the remaining region + MemPointerRecordEx rem_rgn(new_rgn_addr + new_rgn_size, rgn->flags(), + rgn_high_addr - (new_rgn_addr + new_rgn_size), pc); + return insert_reserved_region(&rem_rgn); + } +} + static int sort_in_seq_order(const void* p1, const void* p2) { assert(p1 != NULL && p2 != NULL, "Sanity check"); const MemPointerRecord* mp1 = (MemPointerRecord*)p1; @@ -61,11 +275,11 @@ } -MemPointerArrayIteratorImpl StagingArea::virtual_memory_record_walker() { +VMRecordIterator StagingArea::virtual_memory_record_walker() { MemPointerArray* arr = vm_data(); // sort into seq number order arr->sort((FN_SORT)sort_in_seq_order); - return MemPointerArrayIteratorImpl(arr); + return VMRecordIterator(arr); } @@ -135,6 +349,8 @@ return false; } } else { + // locate matched record and/or also position the iterator to proper + // location for this incoming record. p2 = (MemPointerRecord*)malloc_staging_itr.locate(p1->addr()); // we have not seen this memory block, so just add to staging area if (p2 == NULL) { @@ -199,7 +415,7 @@ MallocRecordIterator malloc_itr = _staging_area.malloc_record_walker(); bool promoted = false; if (promote_malloc_records(&malloc_itr)) { - MemPointerArrayIteratorImpl vm_itr = _staging_area.virtual_memory_record_walker(); + VMRecordIterator vm_itr = _staging_area.virtual_memory_record_walker(); if (promote_virtual_memory_records(&vm_itr)) { promoted = true; } @@ -218,7 +434,7 @@ matched_rec = (MemPointerRecord*)malloc_snapshot_itr.locate(new_rec->addr()); // found matched memory block if (matched_rec != NULL && new_rec->addr() == matched_rec->addr()) { - // snapshot already contains 'lived' records + // snapshot already contains 'live' records assert(matched_rec->is_allocation_record() || matched_rec->is_arena_size_record(), "Sanity check"); // update block states @@ -277,87 +493,60 @@ bool MemSnapshot::promote_virtual_memory_records(MemPointerArrayIterator* itr) { VMMemPointerIterator vm_snapshot_itr(_vm_ptrs); MemPointerRecord* new_rec = (MemPointerRecord*)itr->current(); - VMMemRegionEx new_vm_rec; - VMMemRegion* matched_rec; + VMMemRegion* reserved_rec; while (new_rec != NULL) { assert(new_rec->is_vm_pointer(), "Sanity check"); - if (MemTracker::track_callsite()) { - new_vm_rec.init((MemPointerRecordEx*)new_rec); - } else { - new_vm_rec.init(new_rec); - } - matched_rec = (VMMemRegion*)vm_snapshot_itr.locate(new_rec->addr()); - if (matched_rec != NULL && - (matched_rec->contains(&new_vm_rec) || matched_rec->base() == new_vm_rec.base())) { + + // locate a reserved region that contains the specified address, or + // the nearest reserved region has base address just above the specified + // address + reserved_rec = (VMMemRegion*)vm_snapshot_itr.locate(new_rec->addr()); + if (reserved_rec != NULL && reserved_rec->contains_region(new_rec)) { // snapshot can only have 'live' records - assert(matched_rec->is_reserve_record(), "Sanity check"); - if (new_vm_rec.is_reserve_record() && matched_rec->base() == new_vm_rec.base()) { - // resize reserved virtual memory range - // resize has to cover committed area - assert(new_vm_rec.size() >= matched_rec->committed_size(), "Sanity check"); - matched_rec->set_reserved_size(new_vm_rec.size()); - } else if (new_vm_rec.is_commit_record()) { - // commit memory inside reserved memory range - assert(new_vm_rec.committed_size() <= matched_rec->reserved_size(), "Sanity check"); - // thread stacks are marked committed, so we ignore 'commit' record for creating - // stack guard pages - if (FLAGS_TO_MEMORY_TYPE(matched_rec->flags()) != mtThreadStack) { - matched_rec->commit(new_vm_rec.committed_size()); - } - } else if (new_vm_rec.is_uncommit_record()) { - if (FLAGS_TO_MEMORY_TYPE(matched_rec->flags()) == mtThreadStack) { - // ignore 'uncommit' record from removing stack guard pages, uncommit - // thread stack as whole - if (matched_rec->committed_size() == new_vm_rec.committed_size()) { - matched_rec->uncommit(new_vm_rec.committed_size()); - } - } else { - // uncommit memory inside reserved memory range - assert(new_vm_rec.committed_size() <= matched_rec->committed_size(), - "Sanity check"); - matched_rec->uncommit(new_vm_rec.committed_size()); - } - } else if (new_vm_rec.is_type_tagging_record()) { - // tag this virtual memory range to a memory type - // can not re-tag a memory range to different type - assert(FLAGS_TO_MEMORY_TYPE(matched_rec->flags()) == mtNone || - FLAGS_TO_MEMORY_TYPE(matched_rec->flags()) == FLAGS_TO_MEMORY_TYPE(new_vm_rec.flags()), - "Sanity check"); - matched_rec->tag(new_vm_rec.flags()); - } else if (new_vm_rec.is_release_record()) { - // release part or whole memory range - if (new_vm_rec.base() == matched_rec->base() && - new_vm_rec.size() == matched_rec->size()) { - // release whole virtual memory range - assert(matched_rec->committed_size() == 0, "Sanity check"); - vm_snapshot_itr.remove(); - } else { - // partial release - matched_rec->partial_release(new_vm_rec.base(), new_vm_rec.size()); - } - } else { - // multiple reserve/commit on the same virtual memory range - assert((new_vm_rec.is_reserve_record() || new_vm_rec.is_commit_record()) && - (new_vm_rec.base() == matched_rec->base() && new_vm_rec.size() == matched_rec->size()), - "Sanity check"); - matched_rec->tag(new_vm_rec.flags()); - } - } else { - // no matched record - if (new_vm_rec.is_reserve_record()) { - if (matched_rec == NULL || matched_rec->base() > new_vm_rec.base()) { - if (!vm_snapshot_itr.insert(&new_vm_rec)) { - return false; - } - } else { - if (!vm_snapshot_itr.insert_after(&new_vm_rec)) { + assert(reserved_rec->is_reserved_region(), "Sanity check"); + if (new_rec->is_allocation_record()) { + if (!reserved_rec->is_same_region(new_rec)) { + // only deal with split a bigger reserved region into smaller regions. + // So far, CDS is the only use case. + if (!vm_snapshot_itr.split_reserved_region(reserved_rec, new_rec->addr(), new_rec->size())) { return false; } } - } else { - // throw out obsolete records, which are the commit/uncommit/release/tag records - // on memory regions that are already released. - } + } else if (new_rec->is_uncommit_record()) { + if (!vm_snapshot_itr.remove_uncommitted_region(new_rec)) { + return false; + } + } else if (new_rec->is_commit_record()) { + // insert or expand existing committed region to cover this + // newly committed region + if (!vm_snapshot_itr.add_committed_region(new_rec)) { + return false; + } + } else if (new_rec->is_deallocation_record()) { + // release part or all memory region + if (!vm_snapshot_itr.remove_released_region(new_rec)) { + return false; + } + } else if (new_rec->is_type_tagging_record()) { + // tag this reserved virtual memory range to a memory type. Can not re-tag a memory range + // to different type. + assert(FLAGS_TO_MEMORY_TYPE(reserved_rec->flags()) == mtNone || + FLAGS_TO_MEMORY_TYPE(reserved_rec->flags()) == FLAGS_TO_MEMORY_TYPE(new_rec->flags()), + "Sanity check"); + reserved_rec->tag(new_rec->flags()); + } else { + ShouldNotReachHere(); + } + } else { + /* + * The assertion failure indicates mis-matched virtual memory records. The likely + * scenario is, that some virtual memory operations are not going through os::xxxx_memory() + * api, which have to be tracked manually. (perfMemory is an example). + */ + assert(new_rec->is_allocation_record(), "Sanity check"); + if (!vm_snapshot_itr.add_reserved_region(new_rec)) { + return false; + } } new_rec = (MemPointerRecord*)itr->next(); } @@ -433,5 +622,33 @@ cur = (MemPointerRecord*)vm_itr.next(); } } + +void MemSnapshot::dump_all_vm_pointers() { + MemPointerArrayIteratorImpl itr(_vm_ptrs); + VMMemRegion* ptr = (VMMemRegion*)itr.current(); + tty->print_cr("dump virtual memory pointers:"); + while (ptr != NULL) { + if (ptr->is_committed_region()) { + tty->print("\t"); + } + tty->print("[" PTR_FORMAT " - " PTR_FORMAT "] [%x]", ptr->addr(), + (ptr->addr() + ptr->size()), ptr->flags()); + + if (MemTracker::track_callsite()) { + VMMemRegionEx* ex = (VMMemRegionEx*)ptr; + if (ex->pc() != NULL) { + char buf[1024]; + if (os::dll_address_to_function_name(ex->pc(), buf, sizeof(buf), NULL)) { + tty->print_cr("\t%s", buf); + } else { + tty->print_cr(""); + } + } + } + + ptr = (VMMemRegion*)itr.next(); + } + tty->flush(); +} #endif // ASSERT diff -r 556dd9e475c6 -r dc16fe422c53 src/share/vm/services/memSnapshot.hpp --- a/src/share/vm/services/memSnapshot.hpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/share/vm/services/memSnapshot.hpp Fri Oct 26 14:09:52 2012 -0700 @@ -111,33 +111,41 @@ MemPointerIterator(arr) { } - // locate an existing record that contains specified address, or - // the record, where the record with specified address, should - // be inserted. - // virtual memory record array is sorted in address order, so - // binary search is performed + // locate an existing reserved memory region that contains specified address, + // or the reserved region just above this address, where the incoming + // reserved region should be inserted. virtual MemPointer* locate(address addr) { - int index_low = 0; - int index_high = _array->length(); - int index_mid = (index_high + index_low) / 2; - int r = 1; - while (index_low < index_high && (r = compare(index_mid, addr)) != 0) { - if (r > 0) { - index_high = index_mid; - } else { - index_low = index_mid; + reset(); + VMMemRegion* reg = (VMMemRegion*)current(); + while (reg != NULL) { + if (reg->is_reserved_region()) { + if (reg->contains_address(addr) || addr < reg->base()) { + return reg; } - index_mid = (index_high + index_low) / 2; } - if (r == 0) { - // update current location - _pos = index_mid; - return _array->at(index_mid); - } else { + reg = (VMMemRegion*)next(); + } return NULL; } - } + + // following methods update virtual memory in the context + // of 'current' position, which is properly positioned by + // callers via locate method. + bool add_reserved_region(MemPointerRecord* rec); + bool add_committed_region(MemPointerRecord* rec); + bool remove_uncommitted_region(MemPointerRecord* rec); + bool remove_released_region(MemPointerRecord* rec); + // split a reserved region to create a new memory region with specified base and size + bool split_reserved_region(VMMemRegion* rgn, address new_rgn_addr, size_t new_rgn_size); + private: + bool insert_record(MemPointerRecord* rec); + bool insert_record_after(MemPointerRecord* rec); + + bool insert_reserved_region(MemPointerRecord* rec); + + // reset current position + inline void reset() { _pos = 0; } #ifdef ASSERT virtual bool is_dup_pointer(const MemPointer* ptr1, const MemPointer* ptr2) const { @@ -154,32 +162,17 @@ (p1->flags() & MemPointerRecord::tag_masks) == MemPointerRecord::tag_release; } #endif - // compare if an address falls into a memory region, - // return 0, if the address falls into a memory region at specified index - // return 1, if memory region pointed by specified index is higher than the address - // return -1, if memory region pointed by specified index is lower than the address - int compare(int index, address addr) const { - VMMemRegion* r = (VMMemRegion*)_array->at(index); - assert(r->is_reserve_record(), "Sanity check"); - if (r->addr() > addr) { - return 1; - } else if (r->addr() + r->reserved_size() <= addr) { - return -1; - } else { - return 0; - } - } }; class MallocRecordIterator : public MemPointerArrayIterator { - private: + protected: MemPointerArrayIteratorImpl _itr; public: MallocRecordIterator(MemPointerArray* arr) : _itr(arr) { } - MemPointer* current() const { + virtual MemPointer* current() const { MemPointerRecord* cur = (MemPointerRecord*)_itr.current(); assert(cur == NULL || !cur->is_vm_pointer(), "seek error"); MemPointerRecord* next = (MemPointerRecord*)_itr.peek_next(); @@ -194,7 +187,7 @@ } } - MemPointer* next() { + virtual MemPointer* next() { MemPointerRecord* cur = (MemPointerRecord*)_itr.current(); assert(cur == NULL || !cur->is_vm_pointer(), "Sanity check"); MemPointerRecord* next = (MemPointerRecord*)_itr.next(); @@ -214,6 +207,63 @@ bool insert_after(MemPointer* ptr) { ShouldNotReachHere(); return false; } }; +// collapse duplicated records. Eliminating duplicated records here, is much +// cheaper than during promotion phase. However, it does have limitation - it +// can only eliminate duplicated records within the generation, there are +// still chances seeing duplicated records during promotion. +// We want to use the record with higher sequence number, because it has +// more accurate callsite pc. +class VMRecordIterator : public MallocRecordIterator { + public: + VMRecordIterator(MemPointerArray* arr) : MallocRecordIterator(arr) { + MemPointerRecord* cur = (MemPointerRecord*)_itr.current(); + MemPointerRecord* next = (MemPointerRecord*)_itr.peek_next(); + while (next != NULL) { + assert(cur != NULL, "Sanity check"); + assert(((SeqMemPointerRecord*)next)->seq() > ((SeqMemPointerRecord*)cur)->seq(), + "pre-sort order"); + + if (is_duplicated_record(cur, next)) { + _itr.next(); + next = (MemPointerRecord*)_itr.peek_next(); + } else { + break; + } + } + } + + virtual MemPointer* current() const { + return _itr.current(); + } + + // get next record, but skip the duplicated records + virtual MemPointer* next() { + MemPointerRecord* cur = (MemPointerRecord*)_itr.next(); + MemPointerRecord* next = (MemPointerRecord*)_itr.peek_next(); + while (next != NULL) { + assert(cur != NULL, "Sanity check"); + assert(((SeqMemPointerRecord*)next)->seq() > ((SeqMemPointerRecord*)cur)->seq(), + "pre-sort order"); + + if (is_duplicated_record(cur, next)) { + _itr.next(); + cur = next; + next = (MemPointerRecord*)_itr.peek_next(); + } else { + break; + } + } + return cur; + } + + private: + bool is_duplicated_record(MemPointerRecord* p1, MemPointerRecord* p2) const { + bool ret = (p1->addr() == p2->addr() && p1->size() == p2->size() && p1->flags() == p2->flags()); + assert(!(ret && FLAGS_TO_MEMORY_TYPE(p1->flags()) == mtThreadStack), "dup on stack record"); + return ret; + } +}; + class StagingArea : public _ValueObj { private: MemPointerArray* _malloc_data; @@ -233,7 +283,8 @@ return MallocRecordIterator(malloc_data()); } - MemPointerArrayIteratorImpl virtual_memory_record_walker(); + VMRecordIterator virtual_memory_record_walker(); + bool init(); void clear() { assert(_malloc_data != NULL && _vm_data != NULL, "Just check"); @@ -293,6 +344,8 @@ NOT_PRODUCT(void check_staging_data();) NOT_PRODUCT(void check_malloc_pointers();) NOT_PRODUCT(bool has_allocation_record(address addr);) + // dump all virtual memory pointers in snapshot + DEBUG_ONLY( void dump_all_vm_pointers();) private: // copy pointer data from src to dest @@ -302,5 +355,4 @@ bool promote_virtual_memory_records(MemPointerArrayIterator* itr); }; - #endif // SHARE_VM_SERVICES_MEM_SNAPSHOT_HPP diff -r 556dd9e475c6 -r dc16fe422c53 src/share/vm/services/memTracker.cpp --- a/src/share/vm/services/memTracker.cpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/share/vm/services/memTracker.cpp Fri Oct 26 14:09:52 2012 -0700 @@ -364,7 +364,7 @@ if (thread != NULL) { if (thread->is_Java_thread() && ((JavaThread*)thread)->is_safepoint_visible()) { - JavaThread* java_thread = static_cast(thread); + JavaThread* java_thread = (JavaThread*)thread; JavaThreadState state = java_thread->thread_state(); if (SafepointSynchronize::safepoint_safe(java_thread, state)) { // JavaThreads that are safepoint safe, can run through safepoint, @@ -472,6 +472,8 @@ // it should guarantee that NMT is fully sync-ed. ThreadCritical tc; + SequenceGenerator::reset(); + // walk all JavaThreads to collect recorders SyncThreadRecorderClosure stc; Threads::threads_do(&stc); @@ -484,11 +486,12 @@ pending_recorders = _global_recorder; _global_recorder = NULL; } - SequenceGenerator::reset(); // check _worker_thread with lock to avoid racing condition if (_worker_thread != NULL) { _worker_thread->at_sync_point(pending_recorders); } + + assert(SequenceGenerator::peek() == 1, "Should not have memory activities during sync-point"); } } diff -r 556dd9e475c6 -r dc16fe422c53 src/share/vm/services/memTracker.hpp --- a/src/share/vm/services/memTracker.hpp Thu Oct 25 09:53:16 2012 -0700 +++ b/src/share/vm/services/memTracker.hpp Fri Oct 26 14:09:52 2012 -0700 @@ -113,8 +113,10 @@ #include "thread_solaris.inline.hpp" #endif -#ifdef _DEBUG - #define DEBUG_CALLER_PC os::get_caller_pc(3) +extern bool NMT_track_callsite; + +#ifdef ASSERT + #define DEBUG_CALLER_PC (NMT_track_callsite ? os::get_caller_pc(2) : 0) #else #define DEBUG_CALLER_PC 0 #endif @@ -261,7 +263,7 @@ // record a 'malloc' call static inline void record_malloc(address addr, size_t size, MEMFLAGS flags, address pc = 0, Thread* thread = NULL) { - if (NMT_CAN_TRACK(flags)) { + if (is_on() && NMT_CAN_TRACK(flags)) { assert(size > 0, "Sanity check"); create_memory_record(addr, (flags|MemPointerRecord::malloc_tag()), size, pc, thread); } @@ -275,7 +277,7 @@ // record a 'realloc' call static inline void record_realloc(address old_addr, address new_addr, size_t size, MEMFLAGS flags, address pc = 0, Thread* thread = NULL) { - if (is_on()) { + if (is_on() && NMT_CAN_TRACK(flags)) { assert(size > 0, "Sanity check"); record_free(old_addr, flags, thread); record_malloc(new_addr, size, flags, pc, thread); @@ -317,6 +319,7 @@ static inline void release_thread_stack(address addr, size_t size, Thread* thr) { if (is_on()) { assert(size > 0 && thr != NULL, "Sanity check"); + assert(!thr->is_Java_thread(), "too early"); create_memory_record(addr, MemPointerRecord::virtual_memory_uncommit_tag() | mtThreadStack, size, DEBUG_CALLER_PC, thr); create_memory_record(addr, MemPointerRecord::virtual_memory_release_tag() | mtThreadStack, @@ -326,11 +329,11 @@ // record a virtual memory 'commit' call static inline void record_virtual_memory_commit(address addr, size_t size, - address pc = 0, Thread* thread = NULL) { + address pc, Thread* thread = NULL) { if (is_on()) { assert(size > 0, "Sanity check"); create_memory_record(addr, MemPointerRecord::virtual_memory_commit_tag(), - size, DEBUG_CALLER_PC, thread); + size, pc, thread); } } diff -r 556dd9e475c6 -r dc16fe422c53 test/compiler/6340864/TestByteVect.java --- a/test/compiler/6340864/TestByteVect.java Thu Oct 25 09:53:16 2012 -0700 +++ b/test/compiler/6340864/TestByteVect.java Fri Oct 26 14:09:52 2012 -0700 @@ -33,7 +33,7 @@ public class TestByteVect { private static final int ARRLEN = 997; private static final int ITERS = 11000; - private static final int ADD_INIT = 0; + private static final int ADD_INIT = 63; private static final int BIT_MASK = 0xB7; private static final int VALUE = 3; private static final int SHIFT = 8; @@ -76,6 +76,7 @@ test_subc(a0, a1); test_subv(a0, a1, (byte)VALUE); test_suba(a0, a1, a2); + test_mulc(a0, a1); test_mulv(a0, a1, (byte)VALUE); test_mula(a0, a1, a2); @@ -88,6 +89,7 @@ test_divc_n(a0, a1); test_divv(a0, a1, (byte)-VALUE); test_diva(a0, a1, a3); + test_andc(a0, a1); test_andv(a0, a1, (byte)BIT_MASK); test_anda(a0, a1, a4); @@ -97,30 +99,49 @@ test_xorc(a0, a1); test_xorv(a0, a1, (byte)BIT_MASK); test_xora(a0, a1, a4); + test_sllc(a0, a1); test_sllv(a0, a1, VALUE); test_srlc(a0, a1); test_srlv(a0, a1, VALUE); test_srac(a0, a1); test_srav(a0, a1, VALUE); + test_sllc_n(a0, a1); test_sllv(a0, a1, -VALUE); test_srlc_n(a0, a1); test_srlv(a0, a1, -VALUE); test_srac_n(a0, a1); test_srav(a0, a1, -VALUE); + test_sllc_o(a0, a1); test_sllv(a0, a1, SHIFT); test_srlc_o(a0, a1); test_srlv(a0, a1, SHIFT); test_srac_o(a0, a1); test_srav(a0, a1, SHIFT); + test_sllc_on(a0, a1); test_sllv(a0, a1, -SHIFT); test_srlc_on(a0, a1); test_srlv(a0, a1, -SHIFT); test_srac_on(a0, a1); test_srav(a0, a1, -SHIFT); + + test_sllc_add(a0, a1); + test_sllv_add(a0, a1, ADD_INIT); + test_srlc_add(a0, a1); + test_srlv_add(a0, a1, ADD_INIT); + test_srac_add(a0, a1); + test_srav_add(a0, a1, ADD_INIT); + + test_sllc_and(a0, a1); + test_sllv_and(a0, a1, BIT_MASK); + test_srlc_and(a0, a1); + test_srlv_and(a0, a1, BIT_MASK); + test_srac_and(a0, a1); + test_srav_and(a0, a1, BIT_MASK); + test_pack2(p2, a1); test_unpack2(a0, p2); test_pack2_swap(p2, a1); @@ -369,6 +390,60 @@ errn += verify("test_srav_on: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>(-SHIFT))); } + test_sllc_add(a0, a1); + for (int i=0; i>>VALUE)); + } + test_srlv_add(a0, a1, ADD_INIT); + for (int i=0; i>>VALUE)); + } + + test_srac_add(a0, a1); + for (int i=0; i>VALUE)); + } + test_srav_add(a0, a1, ADD_INIT); + for (int i=0; i>VALUE)); + } + + test_sllc_and(a0, a1); + for (int i=0; i>>VALUE)); + } + test_srlv_and(a0, a1, BIT_MASK); + for (int i=0; i>>VALUE)); + } + + test_srac_and(a0, a1); + for (int i=0; i>VALUE)); + } + test_srav_and(a0, a1, BIT_MASK); + for (int i=0; i>VALUE)); + } + test_pack2(p2, a1); for (int i=0; i>>b); } } + static void test_srlc_add(byte[] a0, byte[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (byte)((a1[i] + ADD_INIT)>>>VALUE); + } + } + static void test_srlv_add(byte[] a0, byte[] a1, int b) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (byte)((a1[i] + b)>>>VALUE); + } + } + static void test_srlc_and(byte[] a0, byte[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (byte)((a1[i] & BIT_MASK)>>>VALUE); + } + } + static void test_srlv_and(byte[] a0, byte[] a1, int b) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (byte)((a1[i] & b)>>>VALUE); + } + } static void test_srac(byte[] a0, byte[] a1) { for (int i = 0; i < a0.length; i+=1) { @@ -1088,6 +1281,26 @@ a0[i] = (byte)(a1[i]>>b); } } + static void test_srac_add(byte[] a0, byte[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (byte)((a1[i] + ADD_INIT)>>VALUE); + } + } + static void test_srav_add(byte[] a0, byte[] a1, int b) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (byte)((a1[i] + b)>>VALUE); + } + } + static void test_srac_and(byte[] a0, byte[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (byte)((a1[i] & BIT_MASK)>>VALUE); + } + } + static void test_srav_and(byte[] a0, byte[] a1, int b) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (byte)((a1[i] & b)>>VALUE); + } + } static void test_pack2(short[] p2, byte[] a1) { if (p2.length*2 > a1.length) return; diff -r 556dd9e475c6 -r dc16fe422c53 test/compiler/6340864/TestIntVect.java --- a/test/compiler/6340864/TestIntVect.java Thu Oct 25 09:53:16 2012 -0700 +++ b/test/compiler/6340864/TestIntVect.java Fri Oct 26 14:09:52 2012 -0700 @@ -74,6 +74,7 @@ test_subc(a0, a1); test_subv(a0, a1, (int)VALUE); test_suba(a0, a1, a2); + test_mulc(a0, a1); test_mulv(a0, a1, (int)VALUE); test_mula(a0, a1, a2); @@ -86,6 +87,7 @@ test_divc_n(a0, a1); test_divv(a0, a1, (int)-VALUE); test_diva(a0, a1, a3); + test_andc(a0, a1); test_andv(a0, a1, (int)BIT_MASK); test_anda(a0, a1, a4); @@ -95,30 +97,49 @@ test_xorc(a0, a1); test_xorv(a0, a1, (int)BIT_MASK); test_xora(a0, a1, a4); + test_sllc(a0, a1); test_sllv(a0, a1, VALUE); test_srlc(a0, a1); test_srlv(a0, a1, VALUE); test_srac(a0, a1); test_srav(a0, a1, VALUE); + test_sllc_n(a0, a1); test_sllv(a0, a1, -VALUE); test_srlc_n(a0, a1); test_srlv(a0, a1, -VALUE); test_srac_n(a0, a1); test_srav(a0, a1, -VALUE); + test_sllc_o(a0, a1); test_sllv(a0, a1, SHIFT); test_srlc_o(a0, a1); test_srlv(a0, a1, SHIFT); test_srac_o(a0, a1); test_srav(a0, a1, SHIFT); + test_sllc_on(a0, a1); test_sllv(a0, a1, -SHIFT); test_srlc_on(a0, a1); test_srlv(a0, a1, -SHIFT); test_srac_on(a0, a1); test_srav(a0, a1, -SHIFT); + + test_sllc_add(a0, a1); + test_sllv_add(a0, a1, ADD_INIT); + test_srlc_add(a0, a1); + test_srlv_add(a0, a1, ADD_INIT); + test_srac_add(a0, a1); + test_srav_add(a0, a1, ADD_INIT); + + test_sllc_and(a0, a1); + test_sllv_and(a0, a1, BIT_MASK); + test_srlc_and(a0, a1); + test_srlv_and(a0, a1, BIT_MASK); + test_srac_and(a0, a1); + test_srav_and(a0, a1, BIT_MASK); + test_pack2(p2, a1); test_unpack2(a0, p2); test_pack2_swap(p2, a1); @@ -359,6 +380,60 @@ errn += verify("test_srav_on: ", i, a0[i], (int)((int)(ADD_INIT+i)>>(-SHIFT))); } + test_sllc_add(a0, a1); + for (int i=0; i>>VALUE)); + } + test_srlv_add(a0, a1, ADD_INIT); + for (int i=0; i>>VALUE)); + } + + test_srac_add(a0, a1); + for (int i=0; i>VALUE)); + } + test_srav_add(a0, a1, ADD_INIT); + for (int i=0; i>VALUE)); + } + + test_sllc_and(a0, a1); + for (int i=0; i>>VALUE)); + } + test_srlv_and(a0, a1, BIT_MASK); + for (int i=0; i>>VALUE)); + } + + test_srac_and(a0, a1); + for (int i=0; i>VALUE)); + } + test_srav_and(a0, a1, BIT_MASK); + for (int i=0; i>VALUE)); + } + test_pack2(p2, a1); for (int i=0; i>>b); } } + static void test_srlc_add(int[] a0, int[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (int)((a1[i] + ADD_INIT)>>>VALUE); + } + } + static void test_srlv_add(int[] a0, int[] a1, int b) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (int)((a1[i] + b)>>>VALUE); + } + } + static void test_srlc_and(int[] a0, int[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (int)((a1[i] & BIT_MASK)>>>VALUE); + } + } + static void test_srlv_and(int[] a0, int[] a1, int b) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (int)((a1[i] & b)>>>VALUE); + } + } static void test_srac(int[] a0, int[] a1) { for (int i = 0; i < a0.length; i+=1) { @@ -960,6 +1153,26 @@ a0[i] = (int)(a1[i]>>b); } } + static void test_srac_add(int[] a0, int[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (int)((a1[i] + ADD_INIT)>>VALUE); + } + } + static void test_srav_add(int[] a0, int[] a1, int b) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (int)((a1[i] + b)>>VALUE); + } + } + static void test_srac_and(int[] a0, int[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (int)((a1[i] & BIT_MASK)>>VALUE); + } + } + static void test_srav_and(int[] a0, int[] a1, int b) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (int)((a1[i] & b)>>VALUE); + } + } static void test_pack2(long[] p2, int[] a1) { if (p2.length*2 > a1.length) return; diff -r 556dd9e475c6 -r dc16fe422c53 test/compiler/6340864/TestLongVect.java --- a/test/compiler/6340864/TestLongVect.java Thu Oct 25 09:53:16 2012 -0700 +++ b/test/compiler/6340864/TestLongVect.java Fri Oct 26 14:09:52 2012 -0700 @@ -73,6 +73,7 @@ test_subc(a0, a1); test_subv(a0, a1, (long)VALUE); test_suba(a0, a1, a2); + test_mulc(a0, a1); test_mulv(a0, a1, (long)VALUE); test_mula(a0, a1, a2); @@ -85,6 +86,7 @@ test_divc_n(a0, a1); test_divv(a0, a1, (long)-VALUE); test_diva(a0, a1, a3); + test_andc(a0, a1); test_andv(a0, a1, (long)BIT_MASK); test_anda(a0, a1, a4); @@ -94,30 +96,48 @@ test_xorc(a0, a1); test_xorv(a0, a1, (long)BIT_MASK); test_xora(a0, a1, a4); + test_sllc(a0, a1); test_sllv(a0, a1, VALUE); test_srlc(a0, a1); test_srlv(a0, a1, VALUE); test_srac(a0, a1); test_srav(a0, a1, VALUE); + test_sllc_n(a0, a1); test_sllv(a0, a1, -VALUE); test_srlc_n(a0, a1); test_srlv(a0, a1, -VALUE); test_srac_n(a0, a1); test_srav(a0, a1, -VALUE); + test_sllc_o(a0, a1); test_sllv(a0, a1, SHIFT); test_srlc_o(a0, a1); test_srlv(a0, a1, SHIFT); test_srac_o(a0, a1); test_srav(a0, a1, SHIFT); + test_sllc_on(a0, a1); test_sllv(a0, a1, -SHIFT); test_srlc_on(a0, a1); test_srlv(a0, a1, -SHIFT); test_srac_on(a0, a1); test_srav(a0, a1, -SHIFT); + + test_sllc_add(a0, a1); + test_sllv_add(a0, a1, ADD_INIT); + test_srlc_add(a0, a1); + test_srlv_add(a0, a1, ADD_INIT); + test_srac_add(a0, a1); + test_srav_add(a0, a1, ADD_INIT); + + test_sllc_and(a0, a1); + test_sllv_and(a0, a1, BIT_MASK); + test_srlc_and(a0, a1); + test_srlv_and(a0, a1, BIT_MASK); + test_srac_and(a0, a1); + test_srav_and(a0, a1, BIT_MASK); } // Test and verify results System.out.println("Verification"); @@ -354,6 +374,60 @@ errn += verify("test_srav_on: ", i, a0[i], (long)((long)(ADD_INIT+i)>>(-SHIFT))); } + test_sllc_add(a0, a1); + for (int i=0; i>>VALUE)); + } + test_srlv_add(a0, a1, ADD_INIT); + for (int i=0; i>>VALUE)); + } + + test_srac_add(a0, a1); + for (int i=0; i>VALUE)); + } + test_srav_add(a0, a1, ADD_INIT); + for (int i=0; i>VALUE)); + } + + test_sllc_and(a0, a1); + for (int i=0; i>>VALUE)); + } + test_srlv_and(a0, a1, BIT_MASK); + for (int i=0; i>>VALUE)); + } + + test_srac_and(a0, a1); + for (int i=0; i>VALUE)); + } + test_srav_and(a0, a1, BIT_MASK); + for (int i=0; i>VALUE)); + } + } if (errn > 0) @@ -696,6 +770,84 @@ end = System.currentTimeMillis(); System.out.println("test_srav_on: " + (end - start)); + start = System.currentTimeMillis(); + for (int i=0; i>>b); } } + static void test_srlc_add(long[] a0, long[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (long)((a1[i] + ADD_INIT)>>>VALUE); + } + } + static void test_srlv_add(long[] a0, long[] a1, long b) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (long)((a1[i] + b)>>>VALUE); + } + } + static void test_srlc_and(long[] a0, long[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (long)((a1[i] & BIT_MASK)>>>VALUE); + } + } + static void test_srlv_and(long[] a0, long[] a1, long b) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (long)((a1[i] & b)>>>VALUE); + } + } static void test_srac(long[] a0, long[] a1) { for (int i = 0; i < a0.length; i+=1) { @@ -906,6 +1098,26 @@ a0[i] = (long)(a1[i]>>b); } } + static void test_srac_add(long[] a0, long[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (long)((a1[i] + ADD_INIT)>>VALUE); + } + } + static void test_srav_add(long[] a0, long[] a1, long b) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (long)((a1[i] + b)>>VALUE); + } + } + static void test_srac_and(long[] a0, long[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (long)((a1[i] & BIT_MASK)>>VALUE); + } + } + static void test_srav_and(long[] a0, long[] a1, long b) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (long)((a1[i] & b)>>VALUE); + } + } static int verify(String text, int i, long elem, long val) { if (elem != val) { diff -r 556dd9e475c6 -r dc16fe422c53 test/compiler/6340864/TestShortVect.java --- a/test/compiler/6340864/TestShortVect.java Thu Oct 25 09:53:16 2012 -0700 +++ b/test/compiler/6340864/TestShortVect.java Fri Oct 26 14:09:52 2012 -0700 @@ -75,6 +75,7 @@ test_subc(a0, a1); test_subv(a0, a1, (short)VALUE); test_suba(a0, a1, a2); + test_mulc(a0, a1); test_mulv(a0, a1, (short)VALUE); test_mula(a0, a1, a2); @@ -87,6 +88,7 @@ test_divc_n(a0, a1); test_divv(a0, a1, (short)-VALUE); test_diva(a0, a1, a3); + test_andc(a0, a1); test_andv(a0, a1, (short)BIT_MASK); test_anda(a0, a1, a4); @@ -96,30 +98,49 @@ test_xorc(a0, a1); test_xorv(a0, a1, (short)BIT_MASK); test_xora(a0, a1, a4); + test_sllc(a0, a1); test_sllv(a0, a1, VALUE); test_srlc(a0, a1); test_srlv(a0, a1, VALUE); test_srac(a0, a1); test_srav(a0, a1, VALUE); + test_sllc_n(a0, a1); test_sllv(a0, a1, -VALUE); test_srlc_n(a0, a1); test_srlv(a0, a1, -VALUE); test_srac_n(a0, a1); test_srav(a0, a1, -VALUE); + test_sllc_o(a0, a1); test_sllv(a0, a1, SHIFT); test_srlc_o(a0, a1); test_srlv(a0, a1, SHIFT); test_srac_o(a0, a1); test_srav(a0, a1, SHIFT); + test_sllc_on(a0, a1); test_sllv(a0, a1, -SHIFT); test_srlc_on(a0, a1); test_srlv(a0, a1, -SHIFT); test_srac_on(a0, a1); test_srav(a0, a1, -SHIFT); + + test_sllc_add(a0, a1); + test_sllv_add(a0, a1, ADD_INIT); + test_srlc_add(a0, a1); + test_srlv_add(a0, a1, ADD_INIT); + test_srac_add(a0, a1); + test_srav_add(a0, a1, ADD_INIT); + + test_sllc_and(a0, a1); + test_sllv_and(a0, a1, BIT_MASK); + test_srlc_and(a0, a1); + test_srlv_and(a0, a1, BIT_MASK); + test_srac_and(a0, a1); + test_srav_and(a0, a1, BIT_MASK); + test_pack2(p2, a1); test_unpack2(a0, p2); test_pack2_swap(p2, a1); @@ -364,6 +385,60 @@ errn += verify("test_srav_on: ", i, a0[i], (short)((short)(ADD_INIT+i)>>(-SHIFT))); } + test_sllc_add(a0, a1); + for (int i=0; i>>VALUE)); + } + test_srlv_add(a0, a1, ADD_INIT); + for (int i=0; i>>VALUE)); + } + + test_srac_add(a0, a1); + for (int i=0; i>VALUE)); + } + test_srav_add(a0, a1, ADD_INIT); + for (int i=0; i>VALUE)); + } + + test_sllc_and(a0, a1); + for (int i=0; i>>VALUE)); + } + test_srlv_and(a0, a1, BIT_MASK); + for (int i=0; i>>VALUE)); + } + + test_srac_and(a0, a1); + for (int i=0; i>VALUE)); + } + test_srav_and(a0, a1, BIT_MASK); + for (int i=0; i>VALUE)); + } + test_pack2(p2, a1); for (int i=0; i>>b); } } + static void test_srlc_add(short[] a0, short[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (short)((a1[i] + ADD_INIT)>>>VALUE); + } + } + static void test_srlv_add(short[] a0, short[] a1, int b) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (short)((a1[i] + b)>>>VALUE); + } + } + static void test_srlc_and(short[] a0, short[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (short)((a1[i] & BIT_MASK)>>>VALUE); + } + } + static void test_srlv_and(short[] a0, short[] a1, int b) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (short)((a1[i] & b)>>>VALUE); + } + } static void test_srac(short[] a0, short[] a1) { for (int i = 0; i < a0.length; i+=1) { @@ -1020,6 +1213,26 @@ a0[i] = (short)(a1[i]>>b); } } + static void test_srac_add(short[] a0, short[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (short)((a1[i] + ADD_INIT)>>VALUE); + } + } + static void test_srav_add(short[] a0, short[] a1, int b) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (short)((a1[i] + b)>>VALUE); + } + } + static void test_srac_and(short[] a0, short[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (short)((a1[i] & BIT_MASK)>>VALUE); + } + } + static void test_srav_and(short[] a0, short[] a1, int b) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (short)((a1[i] & b)>>VALUE); + } + } static void test_pack2(int[] p2, short[] a1) { if (p2.length*2 > a1.length) return; diff -r 556dd9e475c6 -r dc16fe422c53 test/compiler/7184394/TestAESBase.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/compiler/7184394/TestAESBase.java Fri Oct 26 14:09:52 2012 -0700 @@ -0,0 +1,154 @@ +/* + * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/** + * @author Tom Deneau + */ + +import javax.crypto.Cipher; +import javax.crypto.KeyGenerator; +import javax.crypto.SecretKey; +import javax.crypto.spec.IvParameterSpec; +import javax.crypto.spec.SecretKeySpec; +import java.security.AlgorithmParameters; + +import java.util.Random; +import java.util.Arrays; + +abstract public class TestAESBase { + int msgSize = Integer.getInteger("msgSize", 646); + boolean checkOutput = Boolean.getBoolean("checkOutput"); + boolean noReinit = Boolean.getBoolean("noReinit"); + int keySize = Integer.getInteger("keySize", 128); + String algorithm = System.getProperty("algorithm", "AES"); + String mode = System.getProperty("mode", "CBC"); + byte[] input; + byte[] encode; + byte[] expectedEncode; + byte[] decode; + byte[] expectedDecode; + Random random = new Random(0); + Cipher cipher; + Cipher dCipher; + String paddingStr = "PKCS5Padding"; + AlgorithmParameters algParams; + SecretKey key; + int ivLen; + + static int numThreads = 0; + int threadId; + static synchronized int getThreadId() { + int id = numThreads; + numThreads++; + return id; + } + + abstract public void run(); + + public void prepare() { + try { + System.out.println("\nmsgSize=" + msgSize + ", key size=" + keySize + ", reInit=" + !noReinit + ", checkOutput=" + checkOutput); + + int keyLenBytes = (keySize == 0 ? 16 : keySize/8); + byte keyBytes[] = new byte[keyLenBytes]; + if (keySize == 128) + keyBytes = new byte[] {-8, -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7}; + else + random.nextBytes(keyBytes); + + key = new SecretKeySpec(keyBytes, algorithm); + if (threadId == 0) { + System.out.println("Algorithm: " + key.getAlgorithm() + "(" + + key.getEncoded().length * 8 + "bit)"); + } + input = new byte[msgSize]; + for (int i=0; i 0 ? Integer.valueOf(args[0]) : 1000000); + System.out.println(iters + " iterations"); + TestAESEncode etest = new TestAESEncode(); + etest.prepare(); + long start = System.nanoTime(); + for (int i=0; i> 16; } + static int loadUS_signExt_1 (char[] ca) { return (ca[0] << 16) >> 16; } + + static long loadB2L_mask8 (byte[] ba) { return ba[0] & 0x55; } + static long loadB2L_mask8_1 (byte[] ba) { return ba[0] & 0x55; } + + public static void main(String[] args) { + for (int i = Byte.MIN_VALUE; i < Byte.MAX_VALUE; i++) { + byte[] ba = new byte[] { (byte) i}; + + { long v1 = loadB2L_mask8(ba); + long v2 = loadB2L_mask8_1(ba); + if (v1 != v2) + throw new InternalError(String.format("loadB2L_mask8 failed: %x != %x", v1, v2)); } + } + + for (int i = Short.MIN_VALUE; i < Short.MAX_VALUE; i++) { + short[] sa = new short[] { (short)i }; + char[] ca = new char[] { (char)i }; + + { long v1 = loadS2LmaskFF(sa); + long v2 = loadS2LmaskFF_1(sa); + if (v1 != v2) + throw new InternalError(String.format("loadS2LmaskFF failed: %x != %x", v1, v2)); } + + { long v1 = loadS2Lmask16(sa); + long v2 = loadS2Lmask16_1(sa); + if (v1 != v2) + throw new InternalError(String.format("loadS2Lmask16 failed: %x != %x", v1, v2)); } + + { long v1 = loadS2Lmask13(sa); + long v2 = loadS2Lmask13_1(sa); + if (v1 != v2) + throw new InternalError(String.format("loadS2Lmask13 failed: %x != %x", v1, v2)); } + + { int v1 = loadUS_signExt(ca); + int v2 = loadUS_signExt_1(ca); + if (v1 != v2) + throw new InternalError(String.format("loadUS_signExt failed: %x != %x", v1, v2)); } + } + + System.out.println("TEST PASSED."); + } +} diff -r 556dd9e475c6 -r dc16fe422c53 test/compiler/8001183/TestCharVect.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/compiler/8001183/TestCharVect.java Fri Oct 26 14:09:52 2012 -0700 @@ -0,0 +1,1332 @@ +/* + * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/** + * @test + * @bug 8001183 + * @summary incorrect results of char vectors right shift operaiton + * + * @run main/othervm/timeout=400 -Xbatch -Xmx64m TestCharVect + */ + +public class TestCharVect { + private static final int ARRLEN = 997; + private static final int ITERS = 11000; + private static final int ADD_INIT = Character.MAX_VALUE-500; + private static final int BIT_MASK = 0xB731; + private static final int VALUE = 7; + private static final int SHIFT = 16; + + public static void main(String args[]) { + System.out.println("Testing Char vectors"); + int errn = test(); + if (errn > 0) { + System.err.println("FAILED: " + errn + " errors"); + System.exit(97); + } + System.out.println("PASSED"); + } + + static int test() { + char[] a0 = new char[ARRLEN]; + char[] a1 = new char[ARRLEN]; + short[] a2 = new short[ARRLEN]; + short[] a3 = new short[ARRLEN]; + short[] a4 = new short[ARRLEN]; + int[] p2 = new int[ARRLEN/2]; + long[] p4 = new long[ARRLEN/4]; + // Initialize + int gold_sum = 0; + for (int i=0; i>>VALUE)); + } + test_srlv(a0, a1, VALUE); + for (int i=0; i>>VALUE)); + } + + test_srac(a0, a1); + for (int i=0; i>VALUE)); + } + test_srav(a0, a1, VALUE); + for (int i=0; i>VALUE)); + } + + test_sllc_n(a0, a1); + for (int i=0; i>>(-VALUE))); + } + test_srlv(a0, a1, -VALUE); + for (int i=0; i>>(-VALUE))); + } + + test_srac_n(a0, a1); + for (int i=0; i>(-VALUE))); + } + test_srav(a0, a1, -VALUE); + for (int i=0; i>(-VALUE))); + } + + test_sllc_o(a0, a1); + for (int i=0; i>>SHIFT)); + } + test_srlv(a0, a1, SHIFT); + for (int i=0; i>>SHIFT)); + } + + test_srac_o(a0, a1); + for (int i=0; i>SHIFT)); + } + test_srav(a0, a1, SHIFT); + for (int i=0; i>SHIFT)); + } + + test_sllc_on(a0, a1); + for (int i=0; i>>(-SHIFT))); + } + test_srlv(a0, a1, -SHIFT); + for (int i=0; i>>(-SHIFT))); + } + + test_srac_on(a0, a1); + for (int i=0; i>(-SHIFT))); + } + test_srav(a0, a1, -SHIFT); + for (int i=0; i>(-SHIFT))); + } + + test_sllc_add(a0, a1); + for (int i=0; i>>VALUE)); + } + test_srlv_add(a0, a1, ADD_INIT); + for (int i=0; i>>VALUE)); + } + + test_srac_add(a0, a1); + for (int i=0; i>VALUE)); + } + test_srav_add(a0, a1, ADD_INIT); + for (int i=0; i>VALUE)); + } + + test_sllc_and(a0, a1); + for (int i=0; i>>VALUE)); + } + test_srlv_and(a0, a1, BIT_MASK); + for (int i=0; i>>VALUE)); + } + + test_srac_and(a0, a1); + for (int i=0; i>VALUE)); + } + test_srav_and(a0, a1, BIT_MASK); + for (int i=0; i>VALUE)); + } + + test_pack2(p2, a1); + for (int i=0; i 0) + return errn; + + System.out.println("Time"); + long start, end; + + start = System.currentTimeMillis(); + for (int i=0; i>>VALUE); + } + } + static void test_srlc_n(char[] a0, char[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (char)(a1[i]>>>(-VALUE)); + } + } + static void test_srlc_o(char[] a0, char[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (char)(a1[i]>>>SHIFT); + } + } + static void test_srlc_on(char[] a0, char[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (char)(a1[i]>>>(-SHIFT)); + } + } + static void test_srlv(char[] a0, char[] a1, int b) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (char)(a1[i]>>>b); + } + } + static void test_srlc_add(char[] a0, char[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (char)((a1[i] + ADD_INIT)>>>VALUE); + } + } + static void test_srlv_add(char[] a0, char[] a1, int b) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (char)((a1[i] + b)>>>VALUE); + } + } + static void test_srlc_and(char[] a0, char[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (char)((a1[i] & BIT_MASK)>>>VALUE); + } + } + static void test_srlv_and(char[] a0, char[] a1, int b) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (char)((a1[i] & b)>>>VALUE); + } + } + + static void test_srac(char[] a0, char[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (char)(a1[i]>>VALUE); + } + } + static void test_srac_n(char[] a0, char[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (char)(a1[i]>>(-VALUE)); + } + } + static void test_srac_o(char[] a0, char[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (char)(a1[i]>>SHIFT); + } + } + static void test_srac_on(char[] a0, char[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (char)(a1[i]>>(-SHIFT)); + } + } + static void test_srav(char[] a0, char[] a1, int b) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (char)(a1[i]>>b); + } + } + static void test_srac_add(char[] a0, char[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (char)((a1[i] + ADD_INIT)>>VALUE); + } + } + static void test_srav_add(char[] a0, char[] a1, int b) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (char)((a1[i] + b)>>VALUE); + } + } + static void test_srac_and(char[] a0, char[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (char)((a1[i] & BIT_MASK)>>VALUE); + } + } + static void test_srav_and(char[] a0, char[] a1, int b) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (char)((a1[i] & b)>>VALUE); + } + } + + static void test_pack2(int[] p2, char[] a1) { + if (p2.length*2 > a1.length) return; + for (int i = 0; i < p2.length; i+=1) { + int l0 = (int)a1[i*2+0]; + int l1 = (int)a1[i*2+1]; + p2[i] = (l1 << 16) | (l0 & 0xFFFF); + } + } + static void test_unpack2(char[] a0, int[] p2) { + if (p2.length*2 > a0.length) return; + for (int i = 0; i < p2.length; i+=1) { + int l = p2[i]; + a0[i*2+0] = (char)(l & 0xFFFF); + a0[i*2+1] = (char)(l >> 16); + } + } + static void test_pack2_swap(int[] p2, char[] a1) { + if (p2.length*2 > a1.length) return; + for (int i = 0; i < p2.length; i+=1) { + int l0 = (int)a1[i*2+0]; + int l1 = (int)a1[i*2+1]; + p2[i] = (l0 << 16) | (l1 & 0xFFFF); + } + } + static void test_unpack2_swap(char[] a0, int[] p2) { + if (p2.length*2 > a0.length) return; + for (int i = 0; i < p2.length; i+=1) { + int l = p2[i]; + a0[i*2+0] = (char)(l >> 16); + a0[i*2+1] = (char)(l & 0xFFFF); + } + } + + static void test_pack4(long[] p4, char[] a1) { + if (p4.length*4 > a1.length) return; + for (int i = 0; i < p4.length; i+=1) { + long l0 = (long)a1[i*4+0]; + long l1 = (long)a1[i*4+1]; + long l2 = (long)a1[i*4+2]; + long l3 = (long)a1[i*4+3]; + p4[i] = (l0 & 0xFFFFl) | + ((l1 & 0xFFFFl) << 16) | + ((l2 & 0xFFFFl) << 32) | + ((l3 & 0xFFFFl) << 48); + } + } + static void test_unpack4(char[] a0, long[] p4) { + if (p4.length*4 > a0.length) return; + for (int i = 0; i < p4.length; i+=1) { + long l = p4[i]; + a0[i*4+0] = (char)(l & 0xFFFFl); + a0[i*4+1] = (char)(l >> 16); + a0[i*4+2] = (char)(l >> 32); + a0[i*4+3] = (char)(l >> 48); + } + } + static void test_pack4_swap(long[] p4, char[] a1) { + if (p4.length*4 > a1.length) return; + for (int i = 0; i < p4.length; i+=1) { + long l0 = (long)a1[i*4+0]; + long l1 = (long)a1[i*4+1]; + long l2 = (long)a1[i*4+2]; + long l3 = (long)a1[i*4+3]; + p4[i] = (l3 & 0xFFFFl) | + ((l2 & 0xFFFFl) << 16) | + ((l1 & 0xFFFFl) << 32) | + ((l0 & 0xFFFFl) << 48); + } + } + static void test_unpack4_swap(char[] a0, long[] p4) { + if (p4.length*4 > a0.length) return; + for (int i = 0; i < p4.length; i+=1) { + long l = p4[i]; + a0[i*4+0] = (char)(l >> 48); + a0[i*4+1] = (char)(l >> 32); + a0[i*4+2] = (char)(l >> 16); + a0[i*4+3] = (char)(l & 0xFFFFl); + } + } + + static int verify(String text, int i, int elem, int val) { + if (elem != val) { + System.err.println(text + "[" + i + "] = " + elem + " != " + val); + return 1; + } + return 0; + } + + static int verify(String text, int i, long elem, long val) { + if (elem != val) { + System.err.println(text + "[" + i + "] = " + Long.toHexString(elem) + " != " + Long.toHexString(val)); + return 1; + } + return 0; + } +}