# HG changeset patch # User dholmes # Date 1338636741 14400 # Node ID 4434fdad6b37225491999d338ef8da56f9ea9da1 # Parent 6e2633440960afd711f294ab42e12a14fd521280# Parent fab99b17c1de94d65e22f91c0b83ecf39826a7a3 Merge diff -r fab99b17c1de -r 4434fdad6b37 .hgtags --- a/.hgtags Fri Jun 01 20:17:46 2012 +0200 +++ b/.hgtags Sat Jun 02 07:32:21 2012 -0400 @@ -248,3 +248,7 @@ 73147e6c48813b5fee904aa33f79a77103250ff4 hs24-b10 96a403721094ecdaf6a1f4f52ebd0a82e07df199 jdk8-b39 14b0e07ab9a6fa1662414496b7e07ac8450cf517 hs24-b11 +ff9decc8235d5af80ea45fda4ecbe643ea252564 jdk8-b40 +785573170238f0eae6dc8e22ecf1050fbc9ea055 hs24-b12 +37add4fa0296705f67481e1fd50e2900cd25e39b jdk8-b41 +bd568544be7fcd12a9327e6c448592198d57b043 hs24-b13 diff -r fab99b17c1de -r 4434fdad6b37 agent/src/share/classes/sun/jvm/hotspot/oops/AccessFlags.java --- a/agent/src/share/classes/sun/jvm/hotspot/oops/AccessFlags.java Fri Jun 01 20:17:46 2012 +0200 +++ b/agent/src/share/classes/sun/jvm/hotspot/oops/AccessFlags.java Sat Jun 02 07:32:21 2012 -0400 @@ -81,6 +81,7 @@ // field flags public boolean fieldAccessWatched () { return (flags & JVM_ACC_FIELD_ACCESS_WATCHED) != 0; } public boolean fieldModificationWatched() { return (flags & JVM_ACC_FIELD_MODIFICATION_WATCHED) != 0; } + public boolean fieldHasGenericSignature() { return (flags & JVM_ACC_FIELD_HAS_GENERIC_SIGNATURE)!= 0; } public void printOn(PrintStream tty) { // prints only .class flags and not the hotspot internal flags diff -r fab99b17c1de -r 4434fdad6b37 agent/src/share/classes/sun/jvm/hotspot/oops/InstanceKlass.java --- a/agent/src/share/classes/sun/jvm/hotspot/oops/InstanceKlass.java Fri Jun 01 20:17:46 2012 +0200 +++ b/agent/src/share/classes/sun/jvm/hotspot/oops/InstanceKlass.java Sat Jun 02 07:32:21 2012 -0400 @@ -50,7 +50,6 @@ private static int INITVAL_INDEX_OFFSET; private static int LOW_OFFSET; private static int HIGH_OFFSET; - private static int GENERIC_SIGNATURE_INDEX_OFFSET; private static int FIELD_SLOTS; // ClassState constants @@ -99,7 +98,6 @@ INITVAL_INDEX_OFFSET = db.lookupIntConstant("FieldInfo::initval_index_offset").intValue(); LOW_OFFSET = db.lookupIntConstant("FieldInfo::low_offset").intValue(); HIGH_OFFSET = db.lookupIntConstant("FieldInfo::high_offset").intValue(); - GENERIC_SIGNATURE_INDEX_OFFSET = db.lookupIntConstant("FieldInfo::generic_signature_offset").intValue(); FIELD_SLOTS = db.lookupIntConstant("FieldInfo::field_slots").intValue(); // read ClassState constants CLASS_STATE_UNPARSABLE_BY_GC = db.lookupIntConstant("instanceKlass::unparsable_by_gc").intValue(); @@ -279,7 +277,25 @@ } public short getFieldGenericSignatureIndex(int index) { - return getFields().getShortAt(index * FIELD_SLOTS + GENERIC_SIGNATURE_INDEX_OFFSET); + int len = (int)getFields().getLength(); + int allFieldsCount = getAllFieldsCount(); + int generic_signature_slot = allFieldsCount * FIELD_SLOTS; + for (int i = 0; i < allFieldsCount; i++) { + short flags = getFieldAccessFlags(i); + AccessFlags access = new AccessFlags(flags); + if (i == index) { + if (access.fieldHasGenericSignature()) { + return getFields().getShortAt(generic_signature_slot); + } else { + return 0; + } + } else { + if (access.fieldHasGenericSignature()) { + generic_signature_slot ++; + } + } + } + return 0; } public Symbol getFieldGenericSignature(int index) { @@ -309,7 +325,18 @@ public ObjArray getTransitiveInterfaces() { return (ObjArray) transitiveInterfaces.getValue(this); } public TypeArray getFields() { return (TypeArray) fields.getValue(this); } public int getJavaFieldsCount() { return (int) javaFieldsCount.getValue(this); } - public int getAllFieldsCount() { return (int)getFields().getLength() / FIELD_SLOTS; } + public int getAllFieldsCount() { + int len = (int)getFields().getLength(); + int allFieldsCount = 0; + for (; allFieldsCount*FIELD_SLOTS < len; allFieldsCount++) { + short flags = getFieldAccessFlags(allFieldsCount); + AccessFlags access = new AccessFlags(flags); + if (access.fieldHasGenericSignature()) { + len --; + } + } + return allFieldsCount; + } public ConstantPool getConstants() { return (ConstantPool) constants.getValue(this); } public Oop getClassLoader() { return classLoader.getValue(this); } public Oop getProtectionDomain() { return protectionDomain.getValue(this); } diff -r fab99b17c1de -r 4434fdad6b37 agent/src/share/classes/sun/jvm/hotspot/runtime/ClassConstants.java --- a/agent/src/share/classes/sun/jvm/hotspot/runtime/ClassConstants.java Fri Jun 01 20:17:46 2012 +0200 +++ b/agent/src/share/classes/sun/jvm/hotspot/runtime/ClassConstants.java Sat Jun 02 07:32:21 2012 -0400 @@ -153,6 +153,8 @@ public static final long JVM_ACC_FIELD_ACCESS_WATCHED = 0x00002000; // field modification is watched by JVMTI public static final long JVM_ACC_FIELD_MODIFICATION_WATCHED = 0x00008000; + // field has generic signature + public static final long JVM_ACC_FIELD_HAS_GENERIC_SIGNATURE = 0x00000800; // flags accepted by set_field_flags public static final long JVM_ACC_FIELD_FLAGS = 0x00008000 | JVM_ACC_WRITTEN_FLAGS; diff -r fab99b17c1de -r 4434fdad6b37 make/hotspot_version --- a/make/hotspot_version Fri Jun 01 20:17:46 2012 +0200 +++ b/make/hotspot_version Sat Jun 02 07:32:21 2012 -0400 @@ -35,7 +35,7 @@ HS_MAJOR_VER=24 HS_MINOR_VER=0 -HS_BUILD_NUMBER=12 +HS_BUILD_NUMBER=14 JDK_MAJOR_VER=1 JDK_MINOR_VER=8 diff -r fab99b17c1de -r 4434fdad6b37 make/jprt.properties --- a/make/jprt.properties Fri Jun 01 20:17:46 2012 +0200 +++ b/make/jprt.properties Sat Jun 02 07:32:21 2012 -0400 @@ -133,7 +133,8 @@ ${jprt.my.linux.x64}-{product|fastdebug}, \ ${jprt.my.macosx.x64}-{product|fastdebug|debug}, \ ${jprt.my.windows.i586}-{product|fastdebug|debug}, \ - ${jprt.my.windows.x64}-{product|fastdebug|debug} + ${jprt.my.windows.x64}-{product|fastdebug|debug}, \ + ${jprt.my.linux.armvfp}-{product|fastdebug} jprt.build.targets.open= \ ${jprt.my.solaris.i586}-{productOpen}, \ diff -r fab99b17c1de -r 4434fdad6b37 src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp --- a/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp Fri Jun 01 20:17:46 2012 +0200 +++ b/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp Sat Jun 02 07:32:21 2012 -0400 @@ -738,7 +738,8 @@ case vmIntrinsics::_dlog: // fall through case vmIntrinsics::_dsin: // fall through case vmIntrinsics::_dtan: // fall through - case vmIntrinsics::_dcos: { + case vmIntrinsics::_dcos: // fall through + case vmIntrinsics::_dexp: { assert(x->number_of_arguments() == 1, "wrong type"); address runtime_entry = NULL; @@ -758,12 +759,23 @@ case vmIntrinsics::_dlog10: runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dlog10); break; + case vmIntrinsics::_dexp: + runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dexp); + break; default: ShouldNotReachHere(); } LIR_Opr result = call_runtime(x->argument_at(0), runtime_entry, x->type(), NULL); set_result(x, result); + break; + } + case vmIntrinsics::_dpow: { + assert(x->number_of_arguments() == 2, "wrong type"); + address runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dpow); + LIR_Opr result = call_runtime(x->argument_at(0), x->argument_at(1), runtime_entry, x->type(), NULL); + set_result(x, result); + break; } } } diff -r fab99b17c1de -r 4434fdad6b37 src/cpu/sparc/vm/interpreter_sparc.cpp --- a/src/cpu/sparc/vm/interpreter_sparc.cpp Fri Jun 01 20:17:46 2012 +0200 +++ b/src/cpu/sparc/vm/interpreter_sparc.cpp Sat Jun 02 07:32:21 2012 -0400 @@ -403,6 +403,8 @@ case Interpreter::java_lang_math_abs : break; case Interpreter::java_lang_math_log : break; case Interpreter::java_lang_math_log10 : break; + case Interpreter::java_lang_math_pow : break; + case Interpreter::java_lang_math_exp : break; case Interpreter::java_lang_ref_reference_get : entry_point = ((InterpreterGenerator*)this)->generate_Reference_get_entry(); break; default : ShouldNotReachHere(); break; diff -r fab99b17c1de -r 4434fdad6b37 src/cpu/x86/vm/assembler_x86.cpp --- a/src/cpu/x86/vm/assembler_x86.cpp Fri Jun 01 20:17:46 2012 +0200 +++ b/src/cpu/x86/vm/assembler_x86.cpp Sat Jun 02 07:32:21 2012 -0400 @@ -3578,6 +3578,21 @@ emit_byte(0xF1); } +void Assembler::frndint() { + emit_byte(0xD9); + emit_byte(0xFC); +} + +void Assembler::f2xm1() { + emit_byte(0xD9); + emit_byte(0xF0); +} + +void Assembler::fldl2e() { + emit_byte(0xD9); + emit_byte(0xEA); +} + // SSE SIMD prefix byte values corresponding to VexSimdPrefix encoding. static int simd_pre[4] = { 0, 0x66, 0xF3, 0xF2 }; // SSE opcode second byte values (first is 0x0F) corresponding to VexOpcode encoding. @@ -6868,6 +6883,243 @@ Assembler::fldcw(as_Address(src)); } +void MacroAssembler::pow_exp_core_encoding() { + // kills rax, rcx, rdx + subptr(rsp,sizeof(jdouble)); + // computes 2^X. Stack: X ... + // f2xm1 computes 2^X-1 but only operates on -1<=X<=1. Get int(X) and + // keep it on the thread's stack to compute 2^int(X) later + // then compute 2^(X-int(X)) as (2^(X-int(X)-1+1) + // final result is obtained with: 2^X = 2^int(X) * 2^(X-int(X)) + fld_s(0); // Stack: X X ... + frndint(); // Stack: int(X) X ... + fsuba(1); // Stack: int(X) X-int(X) ... + fistp_s(Address(rsp,0)); // move int(X) as integer to thread's stack. Stack: X-int(X) ... + f2xm1(); // Stack: 2^(X-int(X))-1 ... + fld1(); // Stack: 1 2^(X-int(X))-1 ... + faddp(1); // Stack: 2^(X-int(X)) + // computes 2^(int(X)): add exponent bias (1023) to int(X), then + // shift int(X)+1023 to exponent position. + // Exponent is limited to 11 bits if int(X)+1023 does not fit in 11 + // bits, set result to NaN. 0x000 and 0x7FF are reserved exponent + // values so detect them and set result to NaN. + movl(rax,Address(rsp,0)); + movl(rcx, -2048); // 11 bit mask and valid NaN binary encoding + addl(rax, 1023); + movl(rdx,rax); + shll(rax,20); + // Check that 0 < int(X)+1023 < 2047. Otherwise set rax to NaN. + addl(rdx,1); + // Check that 1 < int(X)+1023+1 < 2048 + // in 3 steps: + // 1- (int(X)+1023+1)&-2048 == 0 => 0 <= int(X)+1023+1 < 2048 + // 2- (int(X)+1023+1)&-2048 != 0 + // 3- (int(X)+1023+1)&-2048 != 1 + // Do 2- first because addl just updated the flags. + cmov32(Assembler::equal,rax,rcx); + cmpl(rdx,1); + cmov32(Assembler::equal,rax,rcx); + testl(rdx,rcx); + cmov32(Assembler::notEqual,rax,rcx); + movl(Address(rsp,4),rax); + movl(Address(rsp,0),0); + fmul_d(Address(rsp,0)); // Stack: 2^X ... + addptr(rsp,sizeof(jdouble)); +} + +void MacroAssembler::fast_pow() { + // computes X^Y = 2^(Y * log2(X)) + // if fast computation is not possible, result is NaN. Requires + // fallback from user of this macro. + fyl2x(); // Stack: (Y*log2(X)) ... + pow_exp_core_encoding(); // Stack: exp(X) ... +} + +void MacroAssembler::fast_exp() { + // computes exp(X) = 2^(X * log2(e)) + // if fast computation is not possible, result is NaN. Requires + // fallback from user of this macro. + fldl2e(); // Stack: log2(e) X ... + fmulp(1); // Stack: (X*log2(e)) ... + pow_exp_core_encoding(); // Stack: exp(X) ... +} + +void MacroAssembler::pow_or_exp(bool is_exp, int num_fpu_regs_in_use) { + // kills rax, rcx, rdx + // pow and exp needs 2 extra registers on the fpu stack. + Label slow_case, done; + Register tmp = noreg; + if (!VM_Version::supports_cmov()) { + // fcmp needs a temporary so preserve rdx, + tmp = rdx; + } + Register tmp2 = rax; + Register tmp3 = rcx; + + if (is_exp) { + // Stack: X + fld_s(0); // duplicate argument for runtime call. Stack: X X + fast_exp(); // Stack: exp(X) X + fcmp(tmp, 0, false, false); // Stack: exp(X) X + // exp(X) not equal to itself: exp(X) is NaN go to slow case. + jcc(Assembler::parity, slow_case); + // get rid of duplicate argument. Stack: exp(X) + if (num_fpu_regs_in_use > 0) { + fxch(); + fpop(); + } else { + ffree(1); + } + jmp(done); + } else { + // Stack: X Y + Label x_negative, y_odd; + + fldz(); // Stack: 0 X Y + fcmp(tmp, 1, true, false); // Stack: X Y + jcc(Assembler::above, x_negative); + + // X >= 0 + + fld_s(1); // duplicate arguments for runtime call. Stack: Y X Y + fld_s(1); // Stack: X Y X Y + fast_pow(); // Stack: X^Y X Y + fcmp(tmp, 0, false, false); // Stack: X^Y X Y + // X^Y not equal to itself: X^Y is NaN go to slow case. + jcc(Assembler::parity, slow_case); + // get rid of duplicate arguments. Stack: X^Y + if (num_fpu_regs_in_use > 0) { + fxch(); fpop(); + fxch(); fpop(); + } else { + ffree(2); + ffree(1); + } + jmp(done); + + // X <= 0 + bind(x_negative); + + fld_s(1); // Stack: Y X Y + frndint(); // Stack: int(Y) X Y + fcmp(tmp, 2, false, false); // Stack: int(Y) X Y + jcc(Assembler::notEqual, slow_case); + + subptr(rsp, 8); + + // For X^Y, when X < 0, Y has to be an integer and the final + // result depends on whether it's odd or even. We just checked + // that int(Y) == Y. We move int(Y) to gp registers as a 64 bit + // integer to test its parity. If int(Y) is huge and doesn't fit + // in the 64 bit integer range, the integer indefinite value will + // end up in the gp registers. Huge numbers are all even, the + // integer indefinite number is even so it's fine. + +#ifdef ASSERT + // Let's check we don't end up with an integer indefinite number + // when not expected. First test for huge numbers: check whether + // int(Y)+1 == int(Y) which is true for very large numbers and + // those are all even. A 64 bit integer is guaranteed to not + // overflow for numbers where y+1 != y (when precision is set to + // double precision). + Label y_not_huge; + + fld1(); // Stack: 1 int(Y) X Y + fadd(1); // Stack: 1+int(Y) int(Y) X Y + +#ifdef _LP64 + // trip to memory to force the precision down from double extended + // precision + fstp_d(Address(rsp, 0)); + fld_d(Address(rsp, 0)); +#endif + + fcmp(tmp, 1, true, false); // Stack: int(Y) X Y +#endif + + // move int(Y) as 64 bit integer to thread's stack + fistp_d(Address(rsp,0)); // Stack: X Y + +#ifdef ASSERT + jcc(Assembler::notEqual, y_not_huge); + + // Y is huge so we know it's even. It may not fit in a 64 bit + // integer and we don't want the debug code below to see the + // integer indefinite value so overwrite int(Y) on the thread's + // stack with 0. + movl(Address(rsp, 0), 0); + movl(Address(rsp, 4), 0); + + bind(y_not_huge); +#endif + + fld_s(1); // duplicate arguments for runtime call. Stack: Y X Y + fld_s(1); // Stack: X Y X Y + fabs(); // Stack: abs(X) Y X Y + fast_pow(); // Stack: abs(X)^Y X Y + fcmp(tmp, 0, false, false); // Stack: abs(X)^Y X Y + // abs(X)^Y not equal to itself: abs(X)^Y is NaN go to slow case. + + pop(tmp2); + NOT_LP64(pop(tmp3)); + jcc(Assembler::parity, slow_case); + +#ifdef ASSERT + // Check that int(Y) is not integer indefinite value (int + // overflow). Shouldn't happen because for values that would + // overflow, 1+int(Y)==Y which was tested earlier. +#ifndef _LP64 + { + Label integer; + testl(tmp2, tmp2); + jcc(Assembler::notZero, integer); + cmpl(tmp3, 0x80000000); + jcc(Assembler::notZero, integer); + stop("integer indefinite value shouldn't be seen here"); + bind(integer); + } +#else + { + Label integer; + mov(tmp3, tmp2); // preserve tmp2 for parity check below + shlq(tmp3, 1); + jcc(Assembler::carryClear, integer); + jcc(Assembler::notZero, integer); + stop("integer indefinite value shouldn't be seen here"); + bind(integer); + } +#endif +#endif + + // get rid of duplicate arguments. Stack: X^Y + if (num_fpu_regs_in_use > 0) { + fxch(); fpop(); + fxch(); fpop(); + } else { + ffree(2); + ffree(1); + } + + testl(tmp2, 1); + jcc(Assembler::zero, done); // X <= 0, Y even: X^Y = abs(X)^Y + // X <= 0, Y even: X^Y = -abs(X)^Y + + fchs(); // Stack: -abs(X)^Y Y + jmp(done); + } + + // slow case: runtime call + bind(slow_case); + + fpop(); // pop incorrect result or int(Y) + + fp_runtime_fallback(is_exp ? CAST_FROM_FN_PTR(address, SharedRuntime::dexp) : CAST_FROM_FN_PTR(address, SharedRuntime::dpow), + is_exp ? 1 : 2, num_fpu_regs_in_use); + + // Come here with result in F-TOS + bind(done); +} + void MacroAssembler::fpop() { ffree(); fincstp(); @@ -8045,6 +8297,144 @@ #endif } +void MacroAssembler::fp_runtime_fallback(address runtime_entry, int nb_args, int num_fpu_regs_in_use) { + pusha(); + + // if we are coming from c1, xmm registers may be live + if (UseSSE >= 1) { + subptr(rsp, sizeof(jdouble)* LP64_ONLY(16) NOT_LP64(8)); + } + int off = 0; + if (UseSSE == 1) { + movflt(Address(rsp,off++*sizeof(jdouble)),xmm0); + movflt(Address(rsp,off++*sizeof(jdouble)),xmm1); + movflt(Address(rsp,off++*sizeof(jdouble)),xmm2); + movflt(Address(rsp,off++*sizeof(jdouble)),xmm3); + movflt(Address(rsp,off++*sizeof(jdouble)),xmm4); + movflt(Address(rsp,off++*sizeof(jdouble)),xmm5); + movflt(Address(rsp,off++*sizeof(jdouble)),xmm6); + movflt(Address(rsp,off++*sizeof(jdouble)),xmm7); + } else if (UseSSE >= 2) { + movdbl(Address(rsp,off++*sizeof(jdouble)),xmm0); + movdbl(Address(rsp,off++*sizeof(jdouble)),xmm1); + movdbl(Address(rsp,off++*sizeof(jdouble)),xmm2); + movdbl(Address(rsp,off++*sizeof(jdouble)),xmm3); + movdbl(Address(rsp,off++*sizeof(jdouble)),xmm4); + movdbl(Address(rsp,off++*sizeof(jdouble)),xmm5); + movdbl(Address(rsp,off++*sizeof(jdouble)),xmm6); + movdbl(Address(rsp,off++*sizeof(jdouble)),xmm7); +#ifdef _LP64 + movdbl(Address(rsp,off++*sizeof(jdouble)),xmm8); + movdbl(Address(rsp,off++*sizeof(jdouble)),xmm9); + movdbl(Address(rsp,off++*sizeof(jdouble)),xmm10); + movdbl(Address(rsp,off++*sizeof(jdouble)),xmm11); + movdbl(Address(rsp,off++*sizeof(jdouble)),xmm12); + movdbl(Address(rsp,off++*sizeof(jdouble)),xmm13); + movdbl(Address(rsp,off++*sizeof(jdouble)),xmm14); + movdbl(Address(rsp,off++*sizeof(jdouble)),xmm15); +#endif + } + + // Preserve registers across runtime call + int incoming_argument_and_return_value_offset = -1; + if (num_fpu_regs_in_use > 1) { + // Must preserve all other FPU regs (could alternatively convert + // SharedRuntime::dsin, dcos etc. into assembly routines known not to trash + // FPU state, but can not trust C compiler) + NEEDS_CLEANUP; + // NOTE that in this case we also push the incoming argument(s) to + // the stack and restore it later; we also use this stack slot to + // hold the return value from dsin, dcos etc. + for (int i = 0; i < num_fpu_regs_in_use; i++) { + subptr(rsp, sizeof(jdouble)); + fstp_d(Address(rsp, 0)); + } + incoming_argument_and_return_value_offset = sizeof(jdouble)*(num_fpu_regs_in_use-1); + for (int i = nb_args-1; i >= 0; i--) { + fld_d(Address(rsp, incoming_argument_and_return_value_offset-i*sizeof(jdouble))); + } + } + + subptr(rsp, nb_args*sizeof(jdouble)); + for (int i = 0; i < nb_args; i++) { + fstp_d(Address(rsp, i*sizeof(jdouble))); + } + +#ifdef _LP64 + if (nb_args > 0) { + movdbl(xmm0, Address(rsp, 0)); + } + if (nb_args > 1) { + movdbl(xmm1, Address(rsp, sizeof(jdouble))); + } + assert(nb_args <= 2, "unsupported number of args"); +#endif // _LP64 + + // NOTE: we must not use call_VM_leaf here because that requires a + // complete interpreter frame in debug mode -- same bug as 4387334 + // MacroAssembler::call_VM_leaf_base is perfectly safe and will + // do proper 64bit abi + + NEEDS_CLEANUP; + // Need to add stack banging before this runtime call if it needs to + // be taken; however, there is no generic stack banging routine at + // the MacroAssembler level + + MacroAssembler::call_VM_leaf_base(runtime_entry, 0); + +#ifdef _LP64 + movsd(Address(rsp, 0), xmm0); + fld_d(Address(rsp, 0)); +#endif // _LP64 + addptr(rsp, sizeof(jdouble) * nb_args); + if (num_fpu_regs_in_use > 1) { + // Must save return value to stack and then restore entire FPU + // stack except incoming arguments + fstp_d(Address(rsp, incoming_argument_and_return_value_offset)); + for (int i = 0; i < num_fpu_regs_in_use - nb_args; i++) { + fld_d(Address(rsp, 0)); + addptr(rsp, sizeof(jdouble)); + } + fld_d(Address(rsp, (nb_args-1)*sizeof(jdouble))); + addptr(rsp, sizeof(jdouble) * nb_args); + } + + off = 0; + if (UseSSE == 1) { + movflt(xmm0, Address(rsp,off++*sizeof(jdouble))); + movflt(xmm1, Address(rsp,off++*sizeof(jdouble))); + movflt(xmm2, Address(rsp,off++*sizeof(jdouble))); + movflt(xmm3, Address(rsp,off++*sizeof(jdouble))); + movflt(xmm4, Address(rsp,off++*sizeof(jdouble))); + movflt(xmm5, Address(rsp,off++*sizeof(jdouble))); + movflt(xmm6, Address(rsp,off++*sizeof(jdouble))); + movflt(xmm7, Address(rsp,off++*sizeof(jdouble))); + } else if (UseSSE >= 2) { + movdbl(xmm0, Address(rsp,off++*sizeof(jdouble))); + movdbl(xmm1, Address(rsp,off++*sizeof(jdouble))); + movdbl(xmm2, Address(rsp,off++*sizeof(jdouble))); + movdbl(xmm3, Address(rsp,off++*sizeof(jdouble))); + movdbl(xmm4, Address(rsp,off++*sizeof(jdouble))); + movdbl(xmm5, Address(rsp,off++*sizeof(jdouble))); + movdbl(xmm6, Address(rsp,off++*sizeof(jdouble))); + movdbl(xmm7, Address(rsp,off++*sizeof(jdouble))); +#ifdef _LP64 + movdbl(xmm8, Address(rsp,off++*sizeof(jdouble))); + movdbl(xmm9, Address(rsp,off++*sizeof(jdouble))); + movdbl(xmm10, Address(rsp,off++*sizeof(jdouble))); + movdbl(xmm11, Address(rsp,off++*sizeof(jdouble))); + movdbl(xmm12, Address(rsp,off++*sizeof(jdouble))); + movdbl(xmm13, Address(rsp,off++*sizeof(jdouble))); + movdbl(xmm14, Address(rsp,off++*sizeof(jdouble))); + movdbl(xmm15, Address(rsp,off++*sizeof(jdouble))); +#endif + } + if (UseSSE >= 1) { + addptr(rsp, sizeof(jdouble)* LP64_ONLY(16) NOT_LP64(8)); + } + popa(); +} + static const double pi_4 = 0.7853981633974483; void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) { @@ -8092,73 +8482,27 @@ // slow case: runtime call bind(slow_case); - // Preserve registers across runtime call - pusha(); - int incoming_argument_and_return_value_offset = -1; - if (num_fpu_regs_in_use > 1) { - // Must preserve all other FPU regs (could alternatively convert - // SharedRuntime::dsin and dcos into assembly routines known not to trash - // FPU state, but can not trust C compiler) - NEEDS_CLEANUP; - // NOTE that in this case we also push the incoming argument to - // the stack and restore it later; we also use this stack slot to - // hold the return value from dsin or dcos. - for (int i = 0; i < num_fpu_regs_in_use; i++) { - subptr(rsp, sizeof(jdouble)); - fstp_d(Address(rsp, 0)); - } - incoming_argument_and_return_value_offset = sizeof(jdouble)*(num_fpu_regs_in_use-1); - fld_d(Address(rsp, incoming_argument_and_return_value_offset)); - } - subptr(rsp, sizeof(jdouble)); - fstp_d(Address(rsp, 0)); -#ifdef _LP64 - movdbl(xmm0, Address(rsp, 0)); -#endif // _LP64 - - // NOTE: we must not use call_VM_leaf here because that requires a - // complete interpreter frame in debug mode -- same bug as 4387334 - // MacroAssembler::call_VM_leaf_base is perfectly safe and will - // do proper 64bit abi - - NEEDS_CLEANUP; - // Need to add stack banging before this runtime call if it needs to - // be taken; however, there is no generic stack banging routine at - // the MacroAssembler level + switch(trig) { case 's': { - MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), 0); + fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), 1, num_fpu_regs_in_use); } break; case 'c': { - MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), 0); + fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), 1, num_fpu_regs_in_use); } break; case 't': { - MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), 0); + fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), 1, num_fpu_regs_in_use); } break; default: assert(false, "bad intrinsic"); break; } -#ifdef _LP64 - movsd(Address(rsp, 0), xmm0); - fld_d(Address(rsp, 0)); -#endif // _LP64 - addptr(rsp, sizeof(jdouble)); - if (num_fpu_regs_in_use > 1) { - // Must save return value to stack and then restore entire FPU stack - fstp_d(Address(rsp, incoming_argument_and_return_value_offset)); - for (int i = 0; i < num_fpu_regs_in_use; i++) { - fld_d(Address(rsp, 0)); - addptr(rsp, sizeof(jdouble)); - } - } - popa(); // Come here with result in F-TOS bind(done); diff -r fab99b17c1de -r 4434fdad6b37 src/cpu/x86/vm/assembler_x86.hpp --- a/src/cpu/x86/vm/assembler_x86.hpp Fri Jun 01 20:17:46 2012 +0200 +++ b/src/cpu/x86/vm/assembler_x86.hpp Sat Jun 02 07:32:21 2012 -0400 @@ -1148,6 +1148,9 @@ void fxsave(Address dst); void fyl2x(); + void frndint(); + void f2xm1(); + void fldl2e(); void hlt(); @@ -2387,7 +2390,28 @@ void ldmxcsr(Address src) { Assembler::ldmxcsr(src); } void ldmxcsr(AddressLiteral src); + // compute pow(x,y) and exp(x) with x86 instructions. Don't cover + // all corner cases and may result in NaN and require fallback to a + // runtime call. + void fast_pow(); + void fast_exp(); + + // computes exp(x). Fallback to runtime call included. + void exp_with_fallback(int num_fpu_regs_in_use) { pow_or_exp(true, num_fpu_regs_in_use); } + // computes pow(x,y). Fallback to runtime call included. + void pow_with_fallback(int num_fpu_regs_in_use) { pow_or_exp(false, num_fpu_regs_in_use); } + private: + + // call runtime as a fallback for trig functions and pow/exp. + void fp_runtime_fallback(address runtime_entry, int nb_args, int num_fpu_regs_in_use); + + // computes 2^(Ylog2X); Ylog2X in ST(0) + void pow_exp_core_encoding(); + + // computes pow(x,y) or exp(x). Fallback to runtime call included. + void pow_or_exp(bool is_exp, int num_fpu_regs_in_use); + // these are private because users should be doing movflt/movdbl void movss(Address dst, XMMRegister src) { Assembler::movss(dst, src); } diff -r fab99b17c1de -r 4434fdad6b37 src/cpu/x86/vm/c1_LIRAssembler_x86.cpp --- a/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp Fri Jun 01 20:17:46 2012 +0200 +++ b/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp Sat Jun 02 07:32:21 2012 -0400 @@ -2446,6 +2446,12 @@ // Should consider not saving rbx, if not necessary __ trigfunc('t', op->as_Op2()->fpu_stack_size()); break; + case lir_exp : + __ exp_with_fallback(op->as_Op2()->fpu_stack_size()); + break; + case lir_pow : + __ pow_with_fallback(op->as_Op2()->fpu_stack_size()); + break; default : ShouldNotReachHere(); } } else { diff -r fab99b17c1de -r 4434fdad6b37 src/cpu/x86/vm/c1_LIRGenerator_x86.cpp --- a/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp Fri Jun 01 20:17:46 2012 +0200 +++ b/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp Sat Jun 02 07:32:21 2012 -0400 @@ -823,7 +823,7 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) { - assert(x->number_of_arguments() == 1, "wrong type"); + assert(x->number_of_arguments() == 1 || (x->number_of_arguments() == 2 && x->id() == vmIntrinsics::_dpow), "wrong type"); LIRItem value(x->argument_at(0), this); bool use_fpu = false; @@ -834,6 +834,8 @@ case vmIntrinsics::_dtan: case vmIntrinsics::_dlog: case vmIntrinsics::_dlog10: + case vmIntrinsics::_dexp: + case vmIntrinsics::_dpow: use_fpu = true; } } else { @@ -843,20 +845,37 @@ value.load_item(); LIR_Opr calc_input = value.result(); + LIR_Opr calc_input2 = NULL; + if (x->id() == vmIntrinsics::_dpow) { + LIRItem extra_arg(x->argument_at(1), this); + if (UseSSE < 2) { + extra_arg.set_destroys_register(); + } + extra_arg.load_item(); + calc_input2 = extra_arg.result(); + } LIR_Opr calc_result = rlock_result(x); - // sin and cos need two free fpu stack slots, so register two temporary operands + // sin, cos, pow and exp need two free fpu stack slots, so register + // two temporary operands LIR_Opr tmp1 = FrameMap::caller_save_fpu_reg_at(0); LIR_Opr tmp2 = FrameMap::caller_save_fpu_reg_at(1); if (use_fpu) { LIR_Opr tmp = FrameMap::fpu0_double_opr; + int tmp_start = 1; + if (calc_input2 != NULL) { + __ move(calc_input2, tmp); + tmp_start = 2; + calc_input2 = tmp; + } __ move(calc_input, tmp); calc_input = tmp; calc_result = tmp; - tmp1 = FrameMap::caller_save_fpu_reg_at(1); - tmp2 = FrameMap::caller_save_fpu_reg_at(2); + + tmp1 = FrameMap::caller_save_fpu_reg_at(tmp_start); + tmp2 = FrameMap::caller_save_fpu_reg_at(tmp_start + 1); } switch(x->id()) { @@ -867,6 +886,8 @@ case vmIntrinsics::_dtan: __ tan (calc_input, calc_result, tmp1, tmp2); break; case vmIntrinsics::_dlog: __ log (calc_input, calc_result, tmp1); break; case vmIntrinsics::_dlog10: __ log10(calc_input, calc_result, tmp1); break; + case vmIntrinsics::_dexp: __ exp (calc_input, calc_result, tmp1, tmp2, FrameMap::rax_opr, FrameMap::rcx_opr, FrameMap::rdx_opr); break; + case vmIntrinsics::_dpow: __ pow (calc_input, calc_input2, calc_result, tmp1, tmp2, FrameMap::rax_opr, FrameMap::rcx_opr, FrameMap::rdx_opr); break; default: ShouldNotReachHere(); } diff -r fab99b17c1de -r 4434fdad6b37 src/cpu/x86/vm/c1_LinearScan_x86.cpp --- a/src/cpu/x86/vm/c1_LinearScan_x86.cpp Fri Jun 01 20:17:46 2012 +0200 +++ b/src/cpu/x86/vm/c1_LinearScan_x86.cpp Sat Jun 02 07:32:21 2012 -0400 @@ -690,8 +690,8 @@ case lir_mul_strictfp: case lir_div_strictfp: { - assert(op2->tmp_opr()->is_fpu_register(), "strict operations need temporary fpu stack slot"); - insert_free_if_dead(op2->tmp_opr()); + assert(op2->tmp1_opr()->is_fpu_register(), "strict operations need temporary fpu stack slot"); + insert_free_if_dead(op2->tmp1_opr()); assert(sim()->stack_size() <= 7, "at least one stack slot must be free"); // fall-through: continue with the normal handling of lir_mul and lir_div } @@ -787,16 +787,17 @@ case lir_log: case lir_log10: { - // log and log10 needs one temporary fpu stack slot, so there is ontemporary - // registers stored in temp of the operation. - // the stack allocator must guarantee that the stack slots are really free, - // otherwise there might be a stack overflow. + // log and log10 need one temporary fpu stack slot, so + // there is one temporary registers stored in temp of the + // operation. the stack allocator must guarantee that the stack + // slots are really free, otherwise there might be a stack + // overflow. assert(right->is_illegal(), "must be"); assert(left->is_fpu_register(), "must be"); assert(res->is_fpu_register(), "must be"); - assert(op2->tmp_opr()->is_fpu_register(), "must be"); + assert(op2->tmp1_opr()->is_fpu_register(), "must be"); - insert_free_if_dead(op2->tmp_opr()); + insert_free_if_dead(op2->tmp1_opr()); insert_free_if_dead(res, left); insert_exchange(left); do_rename(left, res); @@ -812,8 +813,9 @@ case lir_tan: case lir_sin: - case lir_cos: { - // sin and cos need two temporary fpu stack slots, so there are two temporary + case lir_cos: + case lir_exp: { + // sin, cos and exp need two temporary fpu stack slots, so there are two temporary // registers (stored in right and temp of the operation). // the stack allocator must guarantee that the stack slots are really free, // otherwise there might be a stack overflow. @@ -821,11 +823,11 @@ assert(res->is_fpu_register(), "must be"); // assert(left->is_last_use(), "old value gets destroyed"); assert(right->is_fpu_register(), "right is used as the first temporary register"); - assert(op2->tmp_opr()->is_fpu_register(), "temp is used as the second temporary register"); - assert(fpu_num(left) != fpu_num(right) && fpu_num(right) != fpu_num(op2->tmp_opr()) && fpu_num(op2->tmp_opr()) != fpu_num(res), "need distinct temp registers"); + assert(op2->tmp1_opr()->is_fpu_register(), "temp is used as the second temporary register"); + assert(fpu_num(left) != fpu_num(right) && fpu_num(right) != fpu_num(op2->tmp1_opr()) && fpu_num(op2->tmp1_opr()) != fpu_num(res), "need distinct temp registers"); insert_free_if_dead(right); - insert_free_if_dead(op2->tmp_opr()); + insert_free_if_dead(op2->tmp1_opr()); insert_free_if_dead(res, left); insert_exchange(left); @@ -839,6 +841,53 @@ break; } + case lir_pow: { + // pow needs two temporary fpu stack slots, so there are two temporary + // registers (stored in tmp1 and tmp2 of the operation). + // the stack allocator must guarantee that the stack slots are really free, + // otherwise there might be a stack overflow. + assert(left->is_fpu_register(), "must be"); + assert(right->is_fpu_register(), "must be"); + assert(res->is_fpu_register(), "must be"); + + assert(op2->tmp1_opr()->is_fpu_register(), "tmp1 is the first temporary register"); + assert(op2->tmp2_opr()->is_fpu_register(), "tmp2 is the second temporary register"); + assert(fpu_num(left) != fpu_num(right) && fpu_num(left) != fpu_num(op2->tmp1_opr()) && fpu_num(left) != fpu_num(op2->tmp2_opr()) && fpu_num(left) != fpu_num(res), "need distinct temp registers"); + assert(fpu_num(right) != fpu_num(op2->tmp1_opr()) && fpu_num(right) != fpu_num(op2->tmp2_opr()) && fpu_num(right) != fpu_num(res), "need distinct temp registers"); + assert(fpu_num(op2->tmp1_opr()) != fpu_num(op2->tmp2_opr()) && fpu_num(op2->tmp1_opr()) != fpu_num(res), "need distinct temp registers"); + assert(fpu_num(op2->tmp2_opr()) != fpu_num(res), "need distinct temp registers"); + + insert_free_if_dead(op2->tmp1_opr()); + insert_free_if_dead(op2->tmp2_opr()); + + // Must bring both operands to top of stack with following operand ordering: + // * fpu stack before pow: ... right left + // * fpu stack after pow: ... left + + insert_free_if_dead(res, right); + + if (tos_offset(right) != 1) { + insert_exchange(right); + insert_exchange(1); + } + insert_exchange(left); + assert(tos_offset(right) == 1, "check"); + assert(tos_offset(left) == 0, "check"); + + new_left = to_fpu_stack_top(left); + new_right = to_fpu_stack(right); + + op2->set_fpu_stack_size(sim()->stack_size()); + assert(sim()->stack_size() <= 6, "at least two stack slots must be free"); + + sim()->pop(); + + do_rename(right, res); + + new_res = to_fpu_stack_top(res); + break; + } + default: { assert(false, "missed a fpu-operation"); } diff -r fab99b17c1de -r 4434fdad6b37 src/cpu/x86/vm/interpreter_x86_32.cpp --- a/src/cpu/x86/vm/interpreter_x86_32.cpp Fri Jun 01 20:17:46 2012 +0200 +++ b/src/cpu/x86/vm/interpreter_x86_32.cpp Sat Jun 02 07:32:21 2012 -0400 @@ -181,6 +181,19 @@ __ push_fTOS(); __ pop_fTOS(); break; + case Interpreter::java_lang_math_pow: + __ fld_d(Address(rsp, 3*wordSize)); // second argument + __ pow_with_fallback(0); + // Store to stack to convert 80bit precision back to 64bits + __ push_fTOS(); + __ pop_fTOS(); + break; + case Interpreter::java_lang_math_exp: + __ exp_with_fallback(0); + // Store to stack to convert 80bit precision back to 64bits + __ push_fTOS(); + __ pop_fTOS(); + break; default : ShouldNotReachHere(); } diff -r fab99b17c1de -r 4434fdad6b37 src/cpu/x86/vm/interpreter_x86_64.cpp --- a/src/cpu/x86/vm/interpreter_x86_64.cpp Fri Jun 01 20:17:46 2012 +0200 +++ b/src/cpu/x86/vm/interpreter_x86_64.cpp Sat Jun 02 07:32:21 2012 -0400 @@ -271,6 +271,14 @@ case Interpreter::java_lang_math_log10: __ flog10(); break; + case Interpreter::java_lang_math_pow: + __ fld_d(Address(rsp, 3*wordSize)); // second argument (one + // empty stack slot) + __ pow_with_fallback(0); + break; + case Interpreter::java_lang_math_exp: + __ exp_with_fallback(0); + break; default : ShouldNotReachHere(); } diff -r fab99b17c1de -r 4434fdad6b37 src/cpu/x86/vm/stubGenerator_x86_32.cpp --- a/src/cpu/x86/vm/stubGenerator_x86_32.cpp Fri Jun 01 20:17:46 2012 +0200 +++ b/src/cpu/x86/vm/stubGenerator_x86_32.cpp Sat Jun 02 07:32:21 2012 -0400 @@ -2136,11 +2136,23 @@ __ trigfunc('t'); __ ret(0); } + { + StubCodeMark mark(this, "StubRoutines", "exp"); + StubRoutines::_intrinsic_exp = (double (*)(double)) __ pc(); - // The intrinsic version of these seem to return the same value as - // the strict version. - StubRoutines::_intrinsic_exp = SharedRuntime::dexp; - StubRoutines::_intrinsic_pow = SharedRuntime::dpow; + __ fld_d(Address(rsp, 4)); + __ exp_with_fallback(0); + __ ret(0); + } + { + StubCodeMark mark(this, "StubRoutines", "pow"); + StubRoutines::_intrinsic_pow = (double (*)(double,double)) __ pc(); + + __ fld_d(Address(rsp, 12)); + __ fld_d(Address(rsp, 4)); + __ pow_with_fallback(0); + __ ret(0); + } } public: diff -r fab99b17c1de -r 4434fdad6b37 src/cpu/x86/vm/stubGenerator_x86_64.cpp --- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp Fri Jun 01 20:17:46 2012 +0200 +++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp Sat Jun 02 07:32:21 2012 -0400 @@ -2928,11 +2928,34 @@ __ addq(rsp, 8); __ ret(0); } - - // The intrinsic version of these seem to return the same value as - // the strict version. - StubRoutines::_intrinsic_exp = SharedRuntime::dexp; - StubRoutines::_intrinsic_pow = SharedRuntime::dpow; + { + StubCodeMark mark(this, "StubRoutines", "exp"); + StubRoutines::_intrinsic_exp = (double (*)(double)) __ pc(); + + __ subq(rsp, 8); + __ movdbl(Address(rsp, 0), xmm0); + __ fld_d(Address(rsp, 0)); + __ exp_with_fallback(0); + __ fstp_d(Address(rsp, 0)); + __ movdbl(xmm0, Address(rsp, 0)); + __ addq(rsp, 8); + __ ret(0); + } + { + StubCodeMark mark(this, "StubRoutines", "pow"); + StubRoutines::_intrinsic_pow = (double (*)(double,double)) __ pc(); + + __ subq(rsp, 8); + __ movdbl(Address(rsp, 0), xmm1); + __ fld_d(Address(rsp, 0)); + __ movdbl(Address(rsp, 0), xmm0); + __ fld_d(Address(rsp, 0)); + __ pow_with_fallback(0); + __ fstp_d(Address(rsp, 0)); + __ movdbl(xmm0, Address(rsp, 0)); + __ addq(rsp, 8); + __ ret(0); + } } #undef __ diff -r fab99b17c1de -r 4434fdad6b37 src/cpu/x86/vm/templateInterpreter_x86_32.cpp --- a/src/cpu/x86/vm/templateInterpreter_x86_32.cpp Fri Jun 01 20:17:46 2012 +0200 +++ b/src/cpu/x86/vm/templateInterpreter_x86_32.cpp Sat Jun 02 07:32:21 2012 -0400 @@ -1518,7 +1518,9 @@ case Interpreter::java_lang_math_abs : // fall thru case Interpreter::java_lang_math_log : // fall thru case Interpreter::java_lang_math_log10 : // fall thru - case Interpreter::java_lang_math_sqrt : entry_point = ((InterpreterGenerator*)this)->generate_math_entry(kind); break; + case Interpreter::java_lang_math_sqrt : // fall thru + case Interpreter::java_lang_math_pow : // fall thru + case Interpreter::java_lang_math_exp : entry_point = ((InterpreterGenerator*)this)->generate_math_entry(kind); break; case Interpreter::java_lang_ref_reference_get : entry_point = ((InterpreterGenerator*)this)->generate_Reference_get_entry(); break; default : ShouldNotReachHere(); break; @@ -1540,7 +1542,9 @@ case Interpreter::java_lang_math_abs : // fall thru case Interpreter::java_lang_math_log : // fall thru case Interpreter::java_lang_math_log10 : // fall thru - case Interpreter::java_lang_math_sqrt : + case Interpreter::java_lang_math_sqrt : // fall thru + case Interpreter::java_lang_math_pow : // fall thru + case Interpreter::java_lang_math_exp : return false; default: return true; diff -r fab99b17c1de -r 4434fdad6b37 src/cpu/x86/vm/templateInterpreter_x86_64.cpp --- a/src/cpu/x86/vm/templateInterpreter_x86_64.cpp Fri Jun 01 20:17:46 2012 +0200 +++ b/src/cpu/x86/vm/templateInterpreter_x86_64.cpp Sat Jun 02 07:32:21 2012 -0400 @@ -1534,7 +1534,9 @@ case Interpreter::java_lang_math_abs : // fall thru case Interpreter::java_lang_math_log : // fall thru case Interpreter::java_lang_math_log10 : // fall thru - case Interpreter::java_lang_math_sqrt : entry_point = ((InterpreterGenerator*) this)->generate_math_entry(kind); break; + case Interpreter::java_lang_math_sqrt : // fall thru + case Interpreter::java_lang_math_pow : // fall thru + case Interpreter::java_lang_math_exp : entry_point = ((InterpreterGenerator*) this)->generate_math_entry(kind); break; case Interpreter::java_lang_ref_reference_get : entry_point = ((InterpreterGenerator*)this)->generate_Reference_get_entry(); break; default : ShouldNotReachHere(); break; @@ -1558,7 +1560,9 @@ case Interpreter::java_lang_math_abs : // fall thru case Interpreter::java_lang_math_log : // fall thru case Interpreter::java_lang_math_log10 : // fall thru - case Interpreter::java_lang_math_sqrt : + case Interpreter::java_lang_math_sqrt : // fall thru + case Interpreter::java_lang_math_pow : // fall thru + case Interpreter::java_lang_math_exp : return false; default: return true; diff -r fab99b17c1de -r 4434fdad6b37 src/cpu/x86/vm/x86_32.ad --- a/src/cpu/x86/vm/x86_32.ad Fri Jun 01 20:17:46 2012 +0200 +++ b/src/cpu/x86/vm/x86_32.ad Sat Jun 02 07:32:21 2012 -0400 @@ -2536,45 +2536,6 @@ __ fld_d(Address(rsp, 0)); %} - // Compute X^Y using Intel's fast hardware instructions, if possible. - // Otherwise return a NaN. - enc_class pow_exp_core_encoding %{ - // FPR1 holds Y*ln2(X). Compute FPR1 = 2^(Y*ln2(X)) - emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xC0); // fdup = fld st(0) Q Q - emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xFC); // frndint int(Q) Q - emit_opcode(cbuf,0xDC); emit_opcode(cbuf,0xE9); // fsub st(1) -= st(0); int(Q) frac(Q) - emit_opcode(cbuf,0xDB); // FISTP [ESP] frac(Q) - emit_opcode(cbuf,0x1C); - emit_d8(cbuf,0x24); - emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xF0); // f2xm1 2^frac(Q)-1 - emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xE8); // fld1 1 2^frac(Q)-1 - emit_opcode(cbuf,0xDE); emit_opcode(cbuf,0xC1); // faddp 2^frac(Q) - emit_opcode(cbuf,0x8B); // mov rax,[esp+0]=int(Q) - encode_RegMem(cbuf, EAX_enc, ESP_enc, 0x4, 0, 0, false); - emit_opcode(cbuf,0xC7); // mov rcx,0xFFFFF800 - overflow mask - emit_rm(cbuf, 0x3, 0x0, ECX_enc); - emit_d32(cbuf,0xFFFFF800); - emit_opcode(cbuf,0x81); // add rax,1023 - the double exponent bias - emit_rm(cbuf, 0x3, 0x0, EAX_enc); - emit_d32(cbuf,1023); - emit_opcode(cbuf,0x8B); // mov rbx,eax - emit_rm(cbuf, 0x3, EBX_enc, EAX_enc); - emit_opcode(cbuf,0xC1); // shl rax,20 - Slide to exponent position - emit_rm(cbuf,0x3,0x4,EAX_enc); - emit_d8(cbuf,20); - emit_opcode(cbuf,0x85); // test rbx,ecx - check for overflow - emit_rm(cbuf, 0x3, EBX_enc, ECX_enc); - emit_opcode(cbuf,0x0F); emit_opcode(cbuf,0x45); // CMOVne rax,ecx - overflow; stuff NAN into EAX - emit_rm(cbuf, 0x3, EAX_enc, ECX_enc); - emit_opcode(cbuf,0x89); // mov [esp+4],eax - Store as part of double word - encode_RegMem(cbuf, EAX_enc, ESP_enc, 0x4, 0, 4, false); - emit_opcode(cbuf,0xC7); // mov [esp+0],0 - [ESP] = (double)(1< $Y // KILL $rax, $rcx, $rdx" %} + ins_encode %{ + __ subptr(rsp, 8); + __ fld_s($X$$reg - 1); + __ fast_pow(); + __ addptr(rsp, 8); + %} + ins_pipe( pipe_slow ); +%} + +instruct powD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{ predicate (UseSSE>=2); match(Set dst (PowD src0 src1)); // Raise src0 to the src1'th power - effect(KILL tmp1, KILL rax, KILL rbx, KILL rcx ); - format %{ "SUB ESP,8\t\t# Fast-path POW encoding\n\t" - "MOVSD [ESP],$src1\n\t" - "FLD FPR1,$src1\n\t" - "MOVSD [ESP],$src0\n\t" - "FLD FPR1,$src0\n\t" - "FYL2X \t\t\t# Q=Y*ln2(X)\n\t" - - "FDUP \t\t\t# Q Q\n\t" - "FRNDINT\t\t\t# int(Q) Q\n\t" - "FSUB ST(1),ST(0)\t# int(Q) frac(Q)\n\t" - "FISTP dword [ESP]\n\t" - "F2XM1 \t\t\t# 2^frac(Q)-1 int(Q)\n\t" - "FLD1 \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t" - "FADDP \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead - "MOV EAX,[ESP]\t# Pick up int(Q)\n\t" - "MOV ECX,0xFFFFF800\t# Overflow mask\n\t" - "ADD EAX,1023\t\t# Double exponent bias\n\t" - "MOV EBX,EAX\t\t# Preshifted biased expo\n\t" - "SHL EAX,20\t\t# Shift exponent into place\n\t" - "TEST EBX,ECX\t\t# Check for overflow\n\t" - "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t" - "MOV [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t" - "MOV [ESP+0],0\n\t" - "FMUL ST(0),[ESP+0]\t# Scale\n\t" - - "FST_D [ESP]\n\t" - "MOVSD $dst,[ESP]\n\t" - "ADD ESP,8" - %} - ins_encode( push_stack_temp_qword, - push_xmm_to_fpr1(src1), - push_xmm_to_fpr1(src0), - Opcode(0xD9), Opcode(0xF1), // fyl2x - pow_exp_core_encoding, - Push_ResultD(dst) ); - ins_pipe( pipe_slow ); -%} - - -instruct expDPR_reg(regDPR1 dpr1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{ + effect(KILL rax, KILL rdx, KILL rcx, KILL cr); + format %{ "fast_pow $src0 $src1 -> $dst // KILL $rax, $rcx, $rdx" %} + ins_encode %{ + __ subptr(rsp, 8); + __ movdbl(Address(rsp, 0), $src1$$XMMRegister); + __ fld_d(Address(rsp, 0)); + __ movdbl(Address(rsp, 0), $src0$$XMMRegister); + __ fld_d(Address(rsp, 0)); + __ fast_pow(); + __ fstp_d(Address(rsp, 0)); + __ movdbl($dst$$XMMRegister, Address(rsp, 0)); + __ addptr(rsp, 8); + %} + ins_pipe( pipe_slow ); +%} + + +instruct expDPR_reg(regDPR1 dpr1, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{ predicate (UseSSE<=1); match(Set dpr1 (ExpD dpr1)); - effect(KILL rax, KILL rbx, KILL rcx); - format %{ "SUB ESP,8\t\t# Fast-path EXP encoding" - "FLDL2E \t\t\t# Ld log2(e) X\n\t" - "FMULP \t\t\t# Q=X*log2(e)\n\t" - - "FDUP \t\t\t# Q Q\n\t" - "FRNDINT\t\t\t# int(Q) Q\n\t" - "FSUB ST(1),ST(0)\t# int(Q) frac(Q)\n\t" - "FISTP dword [ESP]\n\t" - "F2XM1 \t\t\t# 2^frac(Q)-1 int(Q)\n\t" - "FLD1 \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t" - "FADDP \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead - "MOV EAX,[ESP]\t# Pick up int(Q)\n\t" - "MOV ECX,0xFFFFF800\t# Overflow mask\n\t" - "ADD EAX,1023\t\t# Double exponent bias\n\t" - "MOV EBX,EAX\t\t# Preshifted biased expo\n\t" - "SHL EAX,20\t\t# Shift exponent into place\n\t" - "TEST EBX,ECX\t\t# Check for overflow\n\t" - "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t" - "MOV [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t" - "MOV [ESP+0],0\n\t" - "FMUL ST(0),[ESP+0]\t# Scale\n\t" - - "ADD ESP,8" - %} - ins_encode( push_stack_temp_qword, - Opcode(0xD9), Opcode(0xEA), // fldl2e - Opcode(0xDE), Opcode(0xC9), // fmulp - pow_exp_core_encoding, - pop_stack_temp_qword); - ins_pipe( pipe_slow ); -%} - -instruct expD_reg(regD dst, regD src, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{ + effect(KILL rax, KILL rcx, KILL rdx, KILL cr); + format %{ "fast_exp $dpr1 -> $dpr1 // KILL $rax, $rcx, $rdx" %} + ins_encode %{ + __ fast_exp(); + %} + ins_pipe( pipe_slow ); +%} + +instruct expD_reg(regD dst, regD src, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{ predicate (UseSSE>=2); match(Set dst (ExpD src)); - effect(KILL tmp1, KILL rax, KILL rbx, KILL rcx); - format %{ "SUB ESP,8\t\t# Fast-path EXP encoding\n\t" - "MOVSD [ESP],$src\n\t" - "FLDL2E \t\t\t# Ld log2(e) X\n\t" - "FMULP \t\t\t# Q=X*log2(e) X\n\t" - - "FDUP \t\t\t# Q Q\n\t" - "FRNDINT\t\t\t# int(Q) Q\n\t" - "FSUB ST(1),ST(0)\t# int(Q) frac(Q)\n\t" - "FISTP dword [ESP]\n\t" - "F2XM1 \t\t\t# 2^frac(Q)-1 int(Q)\n\t" - "FLD1 \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t" - "FADDP \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead - "MOV EAX,[ESP]\t# Pick up int(Q)\n\t" - "MOV ECX,0xFFFFF800\t# Overflow mask\n\t" - "ADD EAX,1023\t\t# Double exponent bias\n\t" - "MOV EBX,EAX\t\t# Preshifted biased expo\n\t" - "SHL EAX,20\t\t# Shift exponent into place\n\t" - "TEST EBX,ECX\t\t# Check for overflow\n\t" - "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t" - "MOV [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t" - "MOV [ESP+0],0\n\t" - "FMUL ST(0),[ESP+0]\t# Scale\n\t" - - "FST_D [ESP]\n\t" - "MOVSD $dst,[ESP]\n\t" - "ADD ESP,8" - %} - ins_encode( Push_SrcD(src), - Opcode(0xD9), Opcode(0xEA), // fldl2e - Opcode(0xDE), Opcode(0xC9), // fmulp - pow_exp_core_encoding, - Push_ResultD(dst) ); - ins_pipe( pipe_slow ); -%} - - + effect(KILL rax, KILL rcx, KILL rdx, KILL cr); + format %{ "fast_exp $dst -> $src // KILL $rax, $rcx, $rdx" %} + ins_encode %{ + __ subptr(rsp, 8); + __ movdbl(Address(rsp, 0), $src$$XMMRegister); + __ fld_d(Address(rsp, 0)); + __ fast_exp(); + __ fstp_d(Address(rsp, 0)); + __ movdbl($dst$$XMMRegister, Address(rsp, 0)); + __ addptr(rsp, 8); + %} + ins_pipe( pipe_slow ); +%} instruct log10DPR_reg(regDPR1 dst, regDPR1 src) %{ predicate (UseSSE<=1); diff -r fab99b17c1de -r 4434fdad6b37 src/cpu/x86/vm/x86_64.ad --- a/src/cpu/x86/vm/x86_64.ad Fri Jun 01 20:17:46 2012 +0200 +++ b/src/cpu/x86/vm/x86_64.ad Sat Jun 02 07:32:21 2012 -0400 @@ -9823,7 +9823,39 @@ ins_pipe( pipe_slow ); %} - +instruct powD_reg(regD dst, regD src0, regD src1, rax_RegI rax, rdx_RegI rdx, rcx_RegI rcx, rFlagsReg cr) %{ + match(Set dst (PowD src0 src1)); // Raise src0 to the src1'th power + effect(KILL rax, KILL rdx, KILL rcx, KILL cr); + format %{ "fast_pow $src0 $src1 -> $dst // KILL $rax, $rcx, $rdx" %} + ins_encode %{ + __ subptr(rsp, 8); + __ movdbl(Address(rsp, 0), $src1$$XMMRegister); + __ fld_d(Address(rsp, 0)); + __ movdbl(Address(rsp, 0), $src0$$XMMRegister); + __ fld_d(Address(rsp, 0)); + __ fast_pow(); + __ fstp_d(Address(rsp, 0)); + __ movdbl($dst$$XMMRegister, Address(rsp, 0)); + __ addptr(rsp, 8); + %} + ins_pipe( pipe_slow ); +%} + +instruct expD_reg(regD dst, regD src, rax_RegI rax, rdx_RegI rdx, rcx_RegI rcx, rFlagsReg cr) %{ + match(Set dst (ExpD src)); + effect(KILL rax, KILL rcx, KILL rdx, KILL cr); + format %{ "fast_exp $dst -> $src // KILL $rax, $rcx, $rdx" %} + ins_encode %{ + __ subptr(rsp, 8); + __ movdbl(Address(rsp, 0), $src$$XMMRegister); + __ fld_d(Address(rsp, 0)); + __ fast_exp(); + __ fstp_d(Address(rsp, 0)); + __ movdbl($dst$$XMMRegister, Address(rsp, 0)); + __ addptr(rsp, 8); + %} + ins_pipe( pipe_slow ); +%} //----------Arithmetic Conversion Instructions--------------------------------- diff -r fab99b17c1de -r 4434fdad6b37 src/os_cpu/bsd_x86/vm/os_bsd_x86.cpp --- a/src/os_cpu/bsd_x86/vm/os_bsd_x86.cpp Fri Jun 01 20:17:46 2012 +0200 +++ b/src/os_cpu/bsd_x86/vm/os_bsd_x86.cpp Sat Jun 02 07:32:21 2012 -0400 @@ -522,11 +522,12 @@ if ((sig == SIGSEGV || sig == SIGBUS) && os::is_poll_address((address)info->si_addr)) { stub = SharedRuntime::get_poll_stub(pc); -#if defined(__APPLE__) && !defined(AMD64) +#if defined(__APPLE__) // 32-bit Darwin reports a SIGBUS for nearly all memory access exceptions. + // 64-bit Darwin may also use a SIGBUS (seen with compressed oops). // Catching SIGBUS here prevents the implicit SIGBUS NULL check below from // being called, so only do so if the implicit NULL check is not necessary. - } else if (sig == SIGBUS && MacroAssembler::needs_explicit_null_check((int)info->si_addr)) { + } else if (sig == SIGBUS && MacroAssembler::needs_explicit_null_check((intptr_t)info->si_addr)) { #else } else if (sig == SIGBUS /* && info->si_code == BUS_OBJERR */) { #endif diff -r fab99b17c1de -r 4434fdad6b37 src/share/vm/c1/c1_GraphBuilder.cpp --- a/src/share/vm/c1/c1_GraphBuilder.cpp Fri Jun 01 20:17:46 2012 +0200 +++ b/src/share/vm/c1/c1_GraphBuilder.cpp Sat Jun 02 07:32:21 2012 -0400 @@ -2949,6 +2949,8 @@ case vmIntrinsics::_dtan : // fall through case vmIntrinsics::_dlog : // fall through case vmIntrinsics::_dlog10 : // fall through + case vmIntrinsics::_dexp : // fall through + case vmIntrinsics::_dpow : // fall through { // Compiles where the root method is an intrinsic need a special // compilation environment because the bytecodes for the method @@ -2969,6 +2971,9 @@ _state = start_block->state()->copy_for_parsing(); _last = start_block; load_local(doubleType, 0); + if (scope->method()->intrinsic_id() == vmIntrinsics::_dpow) { + load_local(doubleType, 2); + } // Emit the intrinsic node. bool result = try_inline_intrinsics(scope->method()); @@ -3182,6 +3187,8 @@ case vmIntrinsics::_dtan : // fall through case vmIntrinsics::_dlog : // fall through case vmIntrinsics::_dlog10 : // fall through + case vmIntrinsics::_dexp : // fall through + case vmIntrinsics::_dpow : // fall through if (!InlineMathNatives) return false; cantrap = false; preserves_state = true; diff -r fab99b17c1de -r 4434fdad6b37 src/share/vm/c1/c1_LIR.cpp --- a/src/share/vm/c1/c1_LIR.cpp Fri Jun 01 20:17:46 2012 +0200 +++ b/src/share/vm/c1/c1_LIR.cpp Sat Jun 02 07:32:21 2012 -0400 @@ -624,11 +624,13 @@ { assert(op->as_Op2() != NULL, "must be"); LIR_Op2* op2 = (LIR_Op2*)op; + assert(op2->_tmp2->is_illegal() && op2->_tmp3->is_illegal() && + op2->_tmp4->is_illegal() && op2->_tmp5->is_illegal(), "not used"); if (op2->_info) do_info(op2->_info); if (op2->_opr1->is_valid()) do_input(op2->_opr1); if (op2->_opr2->is_valid()) do_input(op2->_opr2); - if (op2->_tmp->is_valid()) do_temp(op2->_tmp); + if (op2->_tmp1->is_valid()) do_temp(op2->_tmp1); if (op2->_result->is_valid()) do_output(op2->_result); break; @@ -641,7 +643,8 @@ assert(op->as_Op2() != NULL, "must be"); LIR_Op2* op2 = (LIR_Op2*)op; - assert(op2->_info == NULL && op2->_tmp->is_illegal(), "not used"); + assert(op2->_info == NULL && op2->_tmp1->is_illegal() && op2->_tmp2->is_illegal() && + op2->_tmp3->is_illegal() && op2->_tmp4->is_illegal() && op2->_tmp5->is_illegal(), "not used"); assert(op2->_opr1->is_valid() && op2->_opr2->is_valid() && op2->_result->is_valid(), "used"); do_input(op2->_opr1); @@ -665,10 +668,12 @@ assert(op2->_opr1->is_valid(), "used"); assert(op2->_opr2->is_valid(), "used"); assert(op2->_result->is_valid(), "used"); + assert(op2->_tmp2->is_illegal() && op2->_tmp3->is_illegal() && + op2->_tmp4->is_illegal() && op2->_tmp5->is_illegal(), "not used"); do_input(op2->_opr1); do_temp(op2->_opr1); do_input(op2->_opr2); do_temp(op2->_opr2); - if (op2->_tmp->is_valid()) do_temp(op2->_tmp); + if (op2->_tmp1->is_valid()) do_temp(op2->_tmp1); do_output(op2->_result); break; @@ -682,6 +687,8 @@ if (op2->_opr1->is_valid()) do_temp(op2->_opr1); if (op2->_opr2->is_valid()) do_input(op2->_opr2); // exception object is input parameter assert(op2->_result->is_illegal(), "no result"); + assert(op2->_tmp2->is_illegal() && op2->_tmp3->is_illegal() && + op2->_tmp4->is_illegal() && op2->_tmp5->is_illegal(), "not used"); break; } @@ -702,7 +709,8 @@ case lir_sin: case lir_cos: case lir_log: - case lir_log10: { + case lir_log10: + case lir_exp: { assert(op->as_Op2() != NULL, "must be"); LIR_Op2* op2 = (LIR_Op2*)op; @@ -711,16 +719,47 @@ // Register input operand as temp to guarantee that it doesn't // overlap with the input. assert(op2->_info == NULL, "not used"); + assert(op2->_tmp5->is_illegal(), "not used"); + assert(op2->_tmp2->is_valid() == (op->code() == lir_exp), "not used"); + assert(op2->_tmp3->is_valid() == (op->code() == lir_exp), "not used"); + assert(op2->_tmp4->is_valid() == (op->code() == lir_exp), "not used"); assert(op2->_opr1->is_valid(), "used"); do_input(op2->_opr1); do_temp(op2->_opr1); if (op2->_opr2->is_valid()) do_temp(op2->_opr2); - if (op2->_tmp->is_valid()) do_temp(op2->_tmp); + if (op2->_tmp1->is_valid()) do_temp(op2->_tmp1); + if (op2->_tmp2->is_valid()) do_temp(op2->_tmp2); + if (op2->_tmp3->is_valid()) do_temp(op2->_tmp3); + if (op2->_tmp4->is_valid()) do_temp(op2->_tmp4); if (op2->_result->is_valid()) do_output(op2->_result); break; } + case lir_pow: { + assert(op->as_Op2() != NULL, "must be"); + LIR_Op2* op2 = (LIR_Op2*)op; + + // On x86 pow needs two temporary fpu stack slots: tmp1 and + // tmp2. Register input operands as temps to guarantee that it + // doesn't overlap with the temporary slots. + assert(op2->_info == NULL, "not used"); + assert(op2->_opr1->is_valid() && op2->_opr2->is_valid(), "used"); + assert(op2->_tmp1->is_valid() && op2->_tmp2->is_valid() && op2->_tmp3->is_valid() + && op2->_tmp4->is_valid() && op2->_tmp5->is_valid(), "used"); + assert(op2->_result->is_valid(), "used"); + + do_input(op2->_opr1); do_temp(op2->_opr1); + do_input(op2->_opr2); do_temp(op2->_opr2); + do_temp(op2->_tmp1); + do_temp(op2->_tmp2); + do_temp(op2->_tmp3); + do_temp(op2->_tmp4); + do_temp(op2->_tmp5); + do_output(op2->_result); + + break; + } // LIR_Op3 case lir_idiv: @@ -1670,6 +1709,8 @@ case lir_tan: s = "tan"; break; case lir_log: s = "log"; break; case lir_log10: s = "log10"; break; + case lir_exp: s = "exp"; break; + case lir_pow: s = "pow"; break; case lir_logic_and: s = "logic_and"; break; case lir_logic_or: s = "logic_or"; break; case lir_logic_xor: s = "logic_xor"; break; @@ -1892,7 +1933,11 @@ } in_opr1()->print(out); out->print(" "); in_opr2()->print(out); out->print(" "); - if (tmp_opr()->is_valid()) { tmp_opr()->print(out); out->print(" "); } + if (tmp1_opr()->is_valid()) { tmp1_opr()->print(out); out->print(" "); } + if (tmp2_opr()->is_valid()) { tmp2_opr()->print(out); out->print(" "); } + if (tmp3_opr()->is_valid()) { tmp3_opr()->print(out); out->print(" "); } + if (tmp4_opr()->is_valid()) { tmp4_opr()->print(out); out->print(" "); } + if (tmp5_opr()->is_valid()) { tmp5_opr()->print(out); out->print(" "); } result_opr()->print(out); } diff -r fab99b17c1de -r 4434fdad6b37 src/share/vm/c1/c1_LIR.hpp --- a/src/share/vm/c1/c1_LIR.hpp Fri Jun 01 20:17:46 2012 +0200 +++ b/src/share/vm/c1/c1_LIR.hpp Sat Jun 02 07:32:21 2012 -0400 @@ -916,6 +916,8 @@ , lir_tan , lir_log , lir_log10 + , lir_exp + , lir_pow , lir_logic_and , lir_logic_or , lir_logic_xor @@ -1560,7 +1562,11 @@ LIR_Opr _opr1; LIR_Opr _opr2; BasicType _type; - LIR_Opr _tmp; + LIR_Opr _tmp1; + LIR_Opr _tmp2; + LIR_Opr _tmp3; + LIR_Opr _tmp4; + LIR_Opr _tmp5; LIR_Condition _condition; void verify() const; @@ -1573,7 +1579,11 @@ , _type(T_ILLEGAL) , _condition(condition) , _fpu_stack_size(0) - , _tmp(LIR_OprFact::illegalOpr) { + , _tmp1(LIR_OprFact::illegalOpr) + , _tmp2(LIR_OprFact::illegalOpr) + , _tmp3(LIR_OprFact::illegalOpr) + , _tmp4(LIR_OprFact::illegalOpr) + , _tmp5(LIR_OprFact::illegalOpr) { assert(code == lir_cmp, "code check"); } @@ -1584,7 +1594,11 @@ , _type(type) , _condition(condition) , _fpu_stack_size(0) - , _tmp(LIR_OprFact::illegalOpr) { + , _tmp1(LIR_OprFact::illegalOpr) + , _tmp2(LIR_OprFact::illegalOpr) + , _tmp3(LIR_OprFact::illegalOpr) + , _tmp4(LIR_OprFact::illegalOpr) + , _tmp5(LIR_OprFact::illegalOpr) { assert(code == lir_cmove, "code check"); assert(type != T_ILLEGAL, "cmove should have type"); } @@ -1597,25 +1611,38 @@ , _type(type) , _condition(lir_cond_unknown) , _fpu_stack_size(0) - , _tmp(LIR_OprFact::illegalOpr) { + , _tmp1(LIR_OprFact::illegalOpr) + , _tmp2(LIR_OprFact::illegalOpr) + , _tmp3(LIR_OprFact::illegalOpr) + , _tmp4(LIR_OprFact::illegalOpr) + , _tmp5(LIR_OprFact::illegalOpr) { assert(code != lir_cmp && is_in_range(code, begin_op2, end_op2), "code check"); } - LIR_Op2(LIR_Code code, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, LIR_Opr tmp) + LIR_Op2(LIR_Code code, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, LIR_Opr tmp1, LIR_Opr tmp2 = LIR_OprFact::illegalOpr, + LIR_Opr tmp3 = LIR_OprFact::illegalOpr, LIR_Opr tmp4 = LIR_OprFact::illegalOpr, LIR_Opr tmp5 = LIR_OprFact::illegalOpr) : LIR_Op(code, result, NULL) , _opr1(opr1) , _opr2(opr2) , _type(T_ILLEGAL) , _condition(lir_cond_unknown) , _fpu_stack_size(0) - , _tmp(tmp) { + , _tmp1(tmp1) + , _tmp2(tmp2) + , _tmp3(tmp3) + , _tmp4(tmp4) + , _tmp5(tmp5) { assert(code != lir_cmp && is_in_range(code, begin_op2, end_op2), "code check"); } LIR_Opr in_opr1() const { return _opr1; } LIR_Opr in_opr2() const { return _opr2; } BasicType type() const { return _type; } - LIR_Opr tmp_opr() const { return _tmp; } + LIR_Opr tmp1_opr() const { return _tmp1; } + LIR_Opr tmp2_opr() const { return _tmp2; } + LIR_Opr tmp3_opr() const { return _tmp3; } + LIR_Opr tmp4_opr() const { return _tmp4; } + LIR_Opr tmp5_opr() const { return _tmp5; } LIR_Condition condition() const { assert(code() == lir_cmp || code() == lir_cmove, "only valid for cmp and cmove"); return _condition; } @@ -2025,6 +2052,8 @@ void sin (LIR_Opr from, LIR_Opr to, LIR_Opr tmp1, LIR_Opr tmp2) { append(new LIR_Op2(lir_sin , from, tmp1, to, tmp2)); } void cos (LIR_Opr from, LIR_Opr to, LIR_Opr tmp1, LIR_Opr tmp2) { append(new LIR_Op2(lir_cos , from, tmp1, to, tmp2)); } void tan (LIR_Opr from, LIR_Opr to, LIR_Opr tmp1, LIR_Opr tmp2) { append(new LIR_Op2(lir_tan , from, tmp1, to, tmp2)); } + void exp (LIR_Opr from, LIR_Opr to, LIR_Opr tmp1, LIR_Opr tmp2, LIR_Opr tmp3, LIR_Opr tmp4, LIR_Opr tmp5) { append(new LIR_Op2(lir_exp , from, tmp1, to, tmp2, tmp3, tmp4, tmp5)); } + void pow (LIR_Opr arg1, LIR_Opr arg2, LIR_Opr res, LIR_Opr tmp1, LIR_Opr tmp2, LIR_Opr tmp3, LIR_Opr tmp4, LIR_Opr tmp5) { append(new LIR_Op2(lir_pow, arg1, arg2, res, tmp1, tmp2, tmp3, tmp4, tmp5)); } void add (LIR_Opr left, LIR_Opr right, LIR_Opr res) { append(new LIR_Op2(lir_add, left, right, res)); } void sub (LIR_Opr left, LIR_Opr right, LIR_Opr res, CodeEmitInfo* info = NULL) { append(new LIR_Op2(lir_sub, left, right, res, info)); } diff -r fab99b17c1de -r 4434fdad6b37 src/share/vm/c1/c1_LIRAssembler.cpp --- a/src/share/vm/c1/c1_LIRAssembler.cpp Fri Jun 01 20:17:46 2012 +0200 +++ b/src/share/vm/c1/c1_LIRAssembler.cpp Sat Jun 02 07:32:21 2012 -0400 @@ -718,7 +718,7 @@ if (op->in_opr2()->is_constant()) { shift_op(op->code(), op->in_opr1(), op->in_opr2()->as_constant_ptr()->as_jint(), op->result_opr()); } else { - shift_op(op->code(), op->in_opr1(), op->in_opr2(), op->result_opr(), op->tmp_opr()); + shift_op(op->code(), op->in_opr1(), op->in_opr2(), op->result_opr(), op->tmp1_opr()); } break; @@ -746,6 +746,8 @@ case lir_cos: case lir_log: case lir_log10: + case lir_exp: + case lir_pow: intrinsic_op(op->code(), op->in_opr1(), op->in_opr2(), op->result_opr(), op); break; diff -r fab99b17c1de -r 4434fdad6b37 src/share/vm/c1/c1_LIRGenerator.cpp --- a/src/share/vm/c1/c1_LIRGenerator.cpp Fri Jun 01 20:17:46 2012 +0200 +++ b/src/share/vm/c1/c1_LIRGenerator.cpp Sat Jun 02 07:32:21 2012 -0400 @@ -2960,7 +2960,9 @@ case vmIntrinsics::_dsqrt: // fall through case vmIntrinsics::_dtan: // fall through case vmIntrinsics::_dsin : // fall through - case vmIntrinsics::_dcos : do_MathIntrinsic(x); break; + case vmIntrinsics::_dcos : // fall through + case vmIntrinsics::_dexp : // fall through + case vmIntrinsics::_dpow : do_MathIntrinsic(x); break; case vmIntrinsics::_arraycopy: do_ArrayCopy(x); break; // java.nio.Buffer.checkIndex diff -r fab99b17c1de -r 4434fdad6b37 src/share/vm/c1/c1_LinearScan.cpp --- a/src/share/vm/c1/c1_LinearScan.cpp Fri Jun 01 20:17:46 2012 +0200 +++ b/src/share/vm/c1/c1_LinearScan.cpp Sat Jun 02 07:32:21 2012 -0400 @@ -6579,6 +6579,8 @@ case lir_abs: case lir_log10: case lir_log: + case lir_pow: + case lir_exp: case lir_logic_and: case lir_logic_or: case lir_logic_xor: diff -r fab99b17c1de -r 4434fdad6b37 src/share/vm/classfile/classFileParser.cpp --- a/src/share/vm/classfile/classFileParser.cpp Fri Jun 01 20:17:46 2012 +0200 +++ b/src/share/vm/classfile/classFileParser.cpp Sat Jun 02 07:32:21 2012 -0400 @@ -1082,12 +1082,36 @@ int num_injected = 0; InjectedField* injected = JavaClasses::get_injected(class_name, &num_injected); - - // Tuples of shorts [access, name index, sig index, initial value index, byte offset, generic signature index] - typeArrayOop new_fields = oopFactory::new_permanent_shortArray((length + num_injected) * FieldInfo::field_slots, CHECK_(nullHandle)); - typeArrayHandle fields(THREAD, new_fields); + int total_fields = length + num_injected; + + // The field array starts with tuples of shorts + // [access, name index, sig index, initial value index, byte offset]. + // A generic signature slot only exists for field with generic + // signature attribute. And the access flag is set with + // JVM_ACC_FIELD_HAS_GENERIC_SIGNATURE for that field. The generic + // signature slots are at the end of the field array and after all + // other fields data. + // + // f1: [access, name index, sig index, initial value index, low_offset, high_offset] + // f2: [access, name index, sig index, initial value index, low_offset, high_offset] + // ... + // fn: [access, name index, sig index, initial value index, low_offset, high_offset] + // [generic signature index] + // [generic signature index] + // ... + // + // Allocate a temporary resource array for field data. For each field, + // a slot is reserved in the temporary array for the generic signature + // index. After parsing all fields, the data are copied to a permanent + // array and any unused slots will be discarded. + ResourceMark rm(THREAD); + u2* fa = NEW_RESOURCE_ARRAY_IN_THREAD( + THREAD, u2, total_fields * (FieldInfo::field_slots + 1)); typeArrayHandle field_annotations; + // The generic signature slots start after all other fields' data. + int generic_signature_slot = total_fields * FieldInfo::field_slots; + int num_generic_signature = 0; for (int n = 0; n < length; n++) { cfs->guarantee_more(8, CHECK_(nullHandle)); // access_flags, name_index, descriptor_index, attributes_count @@ -1135,14 +1159,19 @@ if (is_synthetic) { access_flags.set_is_synthetic(); } + if (generic_signature_index != 0) { + access_flags.set_field_has_generic_signature(); + fa[generic_signature_slot] = generic_signature_index; + generic_signature_slot ++; + num_generic_signature ++; + } } - FieldInfo* field = FieldInfo::from_field_array(fields(), n); + FieldInfo* field = FieldInfo::from_field_array(fa, n); field->initialize(access_flags.as_short(), name_index, signature_index, constantvalue_index, - generic_signature_index, 0); BasicType type = cp->basic_type_for_signature_at(signature_index); @@ -1155,8 +1184,8 @@ field->set_offset(atype); } + int index = length; if (num_injected != 0) { - int index = length; for (int n = 0; n < num_injected; n++) { // Check for duplicates if (injected[n].may_be_java) { @@ -1164,7 +1193,7 @@ Symbol* signature = injected[n].signature(); bool duplicate = false; for (int i = 0; i < length; i++) { - FieldInfo* f = FieldInfo::from_field_array(fields(), i); + FieldInfo* f = FieldInfo::from_field_array(fa, i); if (name == cp->symbol_at(f->name_index()) && signature == cp->symbol_at(f->signature_index())) { // Symbol is desclared in Java so skip this one @@ -1179,12 +1208,11 @@ } // Injected field - FieldInfo* field = FieldInfo::from_field_array(fields(), index); + FieldInfo* field = FieldInfo::from_field_array(fa, index); field->initialize(JVM_ACC_FIELD_INTERNAL, injected[n].name_index, injected[n].signature_index, 0, - 0, 0); BasicType type = FieldType::basic_type(injected[n].signature()); @@ -1197,17 +1225,27 @@ field->set_offset(atype); index++; } - - if (index < length + num_injected) { - // sometimes injected fields already exist in the Java source so - // the fields array could be too long. In that case trim the - // fields array. - new_fields = oopFactory::new_permanent_shortArray(index * FieldInfo::field_slots, CHECK_(nullHandle)); - for (int i = 0; i < index * FieldInfo::field_slots; i++) { - new_fields->short_at_put(i, fields->short_at(i)); - } - fields = new_fields; + } + + // Now copy the fields' data from the temporary resource array. + // Sometimes injected fields already exist in the Java source so + // the fields array could be too long. In that case the + // fields array is trimed. Also unused slots that were reserved + // for generic signature indexes are discarded. + typeArrayOop new_fields = oopFactory::new_permanent_shortArray( + index * FieldInfo::field_slots + num_generic_signature, + CHECK_(nullHandle)); + typeArrayHandle fields(THREAD, new_fields); + { + int i = 0; + for (; i < index * FieldInfo::field_slots; i++) { + new_fields->short_at_put(i, fa[i]); } + for (int j = total_fields * FieldInfo::field_slots; + j < generic_signature_slot; j++) { + new_fields->short_at_put(i++, fa[j]); + } + assert(i == new_fields->length(), ""); } if (_need_verify && length > 1) { diff -r fab99b17c1de -r 4434fdad6b37 src/share/vm/classfile/systemDictionary.cpp --- a/src/share/vm/classfile/systemDictionary.cpp Fri Jun 01 20:17:46 2012 +0200 +++ b/src/share/vm/classfile/systemDictionary.cpp Sat Jun 02 07:32:21 2012 -0400 @@ -2763,7 +2763,7 @@ class_size += ik->local_interfaces()->size(); class_size += ik->transitive_interfaces()->size(); // We do not have to count implementors, since we only store one! - class_size += ik->all_fields_count() * FieldInfo::field_slots; + class_size += ik->fields()->length(); } } diff -r fab99b17c1de -r 4434fdad6b37 src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp --- a/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp Fri Jun 01 20:17:46 2012 +0200 +++ b/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp Sat Jun 02 07:32:21 2012 -0400 @@ -58,8 +58,11 @@ void CompactibleFreeListSpace::set_cms_values() { // Set CMS global values assert(MinChunkSize == 0, "already set"); - #define numQuanta(x,y) ((x+y-1)/y) - MinChunkSize = numQuanta(sizeof(FreeChunk), MinObjAlignmentInBytes) * MinObjAlignment; + + // MinChunkSize should be a multiple of MinObjAlignment and be large enough + // for chunks to contain a FreeChunk. + size_t min_chunk_size_in_bytes = align_size_up(sizeof(FreeChunk), MinObjAlignmentInBytes); + MinChunkSize = min_chunk_size_in_bytes / BytesPerWord; assert(IndexSetStart == 0 && IndexSetStride == 0, "already set"); IndexSetStart = MinChunkSize; @@ -2534,12 +2537,8 @@ " linear allocation buffers"); assert(BinaryTreeDictionary::min_tree_chunk_size*HeapWordSize == sizeof(TreeChunk), "else MIN_TREE_CHUNK_SIZE is wrong"); - assert((IndexSetStride == 2 && IndexSetStart == 4) || // 32-bit - (IndexSetStride == 1 && IndexSetStart == 3), "just checking"); // 64-bit - assert((IndexSetStride != 2) || (IndexSetStart % 2 == 0), - "Some for-loops may be incorrectly initialized"); - assert((IndexSetStride != 2) || (IndexSetSize % 2 == 1), - "For-loops that iterate over IndexSet with stride 2 may be wrong"); + assert(IndexSetStart != 0, "IndexSetStart not initialized"); + assert(IndexSetStride != 0, "IndexSetStride not initialized"); } #endif diff -r fab99b17c1de -r 4434fdad6b37 src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Fri Jun 01 20:17:46 2012 +0200 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Sat Jun 02 07:32:21 2012 -0400 @@ -952,9 +952,18 @@ } should_try_gc = false; } else { - // Read the GC count while still holding the Heap_lock. - gc_count_before = total_collections(); - should_try_gc = true; + // The GCLocker may not be active but the GCLocker initiated + // GC may not yet have been performed (GCLocker::needs_gc() + // returns true). In this case we do not try this GC and + // wait until the GCLocker initiated GC is performed, and + // then retry the allocation. + if (GC_locker::needs_gc()) { + should_try_gc = false; + } else { + // Read the GC count while still holding the Heap_lock. + gc_count_before = total_collections(); + should_try_gc = true; + } } } @@ -975,6 +984,9 @@ return NULL; } } else { + // The GCLocker is either active or the GCLocker initiated + // GC has not yet been performed. Stall until it is and + // then retry the allocation. GC_locker::stall_until_clear(); } @@ -1054,9 +1066,18 @@ if (GC_locker::is_active_and_needs_gc()) { should_try_gc = false; } else { - // Read the GC count while still holding the Heap_lock. - gc_count_before = total_collections(); - should_try_gc = true; + // The GCLocker may not be active but the GCLocker initiated + // GC may not yet have been performed (GCLocker::needs_gc() + // returns true). In this case we do not try this GC and + // wait until the GCLocker initiated GC is performed, and + // then retry the allocation. + if (GC_locker::needs_gc()) { + should_try_gc = false; + } else { + // Read the GC count while still holding the Heap_lock. + gc_count_before = total_collections(); + should_try_gc = true; + } } } @@ -1081,6 +1102,9 @@ return NULL; } } else { + // The GCLocker is either active or the GCLocker initiated + // GC has not yet been performed. Stall until it is and + // then retry the allocation. GC_locker::stall_until_clear(); } @@ -3906,12 +3930,6 @@ gc_epilogue(false); } - - if (ExitAfterGCNum > 0 && total_collections() == ExitAfterGCNum) { - gclog_or_tty->print_cr("Stopping after GC #%d", ExitAfterGCNum); - print_tracing_info(); - vm_exit(-1); - } } // The closing of the inner scope, immediately above, will complete diff -r fab99b17c1de -r 4434fdad6b37 src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp --- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp Fri Jun 01 20:17:46 2012 +0200 +++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp Sat Jun 02 07:32:21 2012 -0400 @@ -133,12 +133,7 @@ ? ParallelGCThreads : 1), _recent_gc_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)), - _all_pause_times_ms(new NumberSeq()), _stop_world_start(0.0), - _all_stop_world_times_ms(new NumberSeq()), - _all_yield_times_ms(new NumberSeq()), - - _summary(new Summary()), _cur_clear_ct_time_ms(0.0), _root_region_scan_wait_time_ms(0.0), @@ -154,12 +149,6 @@ _num_cc_clears(0L), #endif - _aux_num(10), - _all_aux_times_ms(new NumberSeq[_aux_num]), - _cur_aux_start_times_ms(new double[_aux_num]), - _cur_aux_times_ms(new double[_aux_num]), - _cur_aux_times_set(new bool[_aux_num]), - _concurrent_mark_remark_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)), _concurrent_mark_cleanup_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)), @@ -185,8 +174,6 @@ _pause_time_target_ms((double) MaxGCPauseMillis), _gcs_are_young(true), - _young_pause_num(0), - _mixed_pause_num(0), _during_marking(false), _in_marking_window(false), @@ -197,8 +184,6 @@ _recent_avg_pause_time_ratio(0.0), - _all_full_gc_times_ms(new NumberSeq()), - _initiate_conc_mark_if_possible(false), _during_initial_mark_pause(false), _last_young_gc(false), @@ -851,7 +836,7 @@ double full_gc_time_sec = end_sec - _cur_collection_start_sec; double full_gc_time_ms = full_gc_time_sec * 1000.0; - _all_full_gc_times_ms->add(full_gc_time_ms); + _trace_gen1_time_data.record_full_collection(full_gc_time_ms); update_recent_gc_times(end_sec, full_gc_time_ms); @@ -900,7 +885,7 @@ _g1->used(), _g1->recalculate_used())); double s_w_t_ms = (start_time_sec - _stop_world_start) * 1000.0; - _all_stop_world_times_ms->add(s_w_t_ms); + _trace_gen0_time_data.record_start_collection(s_w_t_ms); _stop_world_start = 0.0; _cur_collection_start_sec = start_time_sec; @@ -937,11 +922,6 @@ } #endif - for (int i = 0; i < _aux_num; ++i) { - _cur_aux_times_ms[i] = 0.0; - _cur_aux_times_set[i] = false; - } - // This is initialized to zero here and is set during the evacuation // pause if we actually waited for the root region scanning to finish. _root_region_scan_wait_time_ms = 0.0; @@ -990,7 +970,7 @@ void G1CollectorPolicy::record_concurrent_pause() { if (_stop_world_start > 0.0) { double yield_ms = (os::elapsedTime() - _stop_world_start) * 1000.0; - _all_yield_times_ms->add(yield_ms); + _trace_gen0_time_data.record_yield_time(yield_ms); } } @@ -1197,21 +1177,6 @@ _mmu_tracker->add_pause(end_time_sec - elapsed_ms/1000.0, end_time_sec, false); - // This assert is exempted when we're doing parallel collection pauses, - // because the fragmentation caused by the parallel GC allocation buffers - // can lead to more memory being used during collection than was used - // before. Best leave this out until the fragmentation problem is fixed. - // Pauses in which evacuation failed can also lead to negative - // collections, since no space is reclaimed from a region containing an - // object whose evacuation failed. - // Further, we're now always doing parallel collection. But I'm still - // leaving this here as a placeholder for a more precise assertion later. - // (DLD, 10/05.) - assert((true || parallel) // Always using GC LABs now. - || _g1->evacuation_failed() - || _cur_collection_pause_used_at_start_bytes >= cur_used_bytes, - "Negative collection"); - size_t freed_bytes = _cur_collection_pause_used_at_start_bytes - cur_used_bytes; size_t surviving_bytes = _collection_set_bytes_used_before - freed_bytes; @@ -1259,44 +1224,15 @@ other_time_ms -= _cur_clear_ct_time_ms; // TraceGen0Time and TraceGen1Time summary info updating. - _all_pause_times_ms->add(elapsed_ms); if (update_stats) { - _summary->record_total_time_ms(elapsed_ms); - _summary->record_other_time_ms(other_time_ms); - - MainBodySummary* body_summary = _summary->main_body_summary(); - assert(body_summary != NULL, "should not be null!"); - - body_summary->record_root_region_scan_wait_time_ms( - _root_region_scan_wait_time_ms); - body_summary->record_ext_root_scan_time_ms(ext_root_scan_time); - body_summary->record_satb_filtering_time_ms(satb_filtering_time); - body_summary->record_update_rs_time_ms(update_rs_time); - body_summary->record_scan_rs_time_ms(scan_rs_time); - body_summary->record_obj_copy_time_ms(obj_copy_time); - - if (parallel) { - body_summary->record_parallel_time_ms(_cur_collection_par_time_ms); - body_summary->record_termination_time_ms(termination_time); + double parallel_known_time = known_time + termination_time; + double parallel_other_time = _cur_collection_par_time_ms - parallel_known_time; - double parallel_known_time = known_time + termination_time; - double parallel_other_time = _cur_collection_par_time_ms - parallel_known_time; - body_summary->record_parallel_other_time_ms(parallel_other_time); - } - - body_summary->record_clear_ct_time_ms(_cur_clear_ct_time_ms); - - // We exempt parallel collection from this check because Alloc Buffer - // fragmentation can produce negative collections. Same with evac - // failure. - // Further, we're now always doing parallel collection. But I'm still - // leaving this here as a placeholder for a more precise assertion later. - // (DLD, 10/05. - assert((true || parallel) - || _g1->evacuation_failed() - || surviving_bytes <= _collection_set_bytes_used_before, - "Or else negative collection!"); + _trace_gen0_time_data.record_end_collection( + elapsed_ms, other_time_ms, _root_region_scan_wait_time_ms, _cur_collection_par_time_ms, + ext_root_scan_time, satb_filtering_time, update_rs_time, scan_rs_time, obj_copy_time, + termination_time, parallel_other_time, _cur_clear_ct_time_ms); // this is where we update the allocation rate of the application double app_time_ms = @@ -1349,12 +1285,6 @@ } } - for (int i = 0; i < _aux_num; ++i) { - if (_cur_aux_times_set[i]) { - _all_aux_times_ms[i].add(_cur_aux_times_ms[i]); - } - } - if (G1Log::finer()) { bool print_marking_info = _g1->mark_in_progress() && !last_pause_included_initial_mark; @@ -1436,14 +1366,6 @@ print_stats(2, "Free CSet", (_recorded_young_free_cset_time_ms + _recorded_non_young_free_cset_time_ms)); - - for (int i = 0; i < _aux_num; ++i) { - if (_cur_aux_times_set[i]) { - char buffer[96]; - sprintf(buffer, "Aux%d", i); - print_stats(1, buffer, _cur_aux_times_ms[i]); - } - } } bool new_in_marking_window = _in_marking_window; @@ -1808,179 +1730,9 @@ _g1->collection_set_iterate(&cs_closure); } -void G1CollectorPolicy::print_summary(int level, - const char* str, - NumberSeq* seq) const { - double sum = seq->sum(); - LineBuffer(level + 1).append_and_print_cr("%-24s = %8.2lf s (avg = %8.2lf ms)", - str, sum / 1000.0, seq->avg()); -} - -void G1CollectorPolicy::print_summary_sd(int level, - const char* str, - NumberSeq* seq) const { - print_summary(level, str, seq); - LineBuffer(level + 6).append_and_print_cr("(num = %5d, std dev = %8.2lf ms, max = %8.2lf ms)", - seq->num(), seq->sd(), seq->maximum()); -} - -void G1CollectorPolicy::check_other_times(int level, - NumberSeq* other_times_ms, - NumberSeq* calc_other_times_ms) const { - bool should_print = false; - LineBuffer buf(level + 2); - - double max_sum = MAX2(fabs(other_times_ms->sum()), - fabs(calc_other_times_ms->sum())); - double min_sum = MIN2(fabs(other_times_ms->sum()), - fabs(calc_other_times_ms->sum())); - double sum_ratio = max_sum / min_sum; - if (sum_ratio > 1.1) { - should_print = true; - buf.append_and_print_cr("## CALCULATED OTHER SUM DOESN'T MATCH RECORDED ###"); - } - - double max_avg = MAX2(fabs(other_times_ms->avg()), - fabs(calc_other_times_ms->avg())); - double min_avg = MIN2(fabs(other_times_ms->avg()), - fabs(calc_other_times_ms->avg())); - double avg_ratio = max_avg / min_avg; - if (avg_ratio > 1.1) { - should_print = true; - buf.append_and_print_cr("## CALCULATED OTHER AVG DOESN'T MATCH RECORDED ###"); - } - - if (other_times_ms->sum() < -0.01) { - buf.append_and_print_cr("## RECORDED OTHER SUM IS NEGATIVE ###"); - } - - if (other_times_ms->avg() < -0.01) { - buf.append_and_print_cr("## RECORDED OTHER AVG IS NEGATIVE ###"); - } - - if (calc_other_times_ms->sum() < -0.01) { - should_print = true; - buf.append_and_print_cr("## CALCULATED OTHER SUM IS NEGATIVE ###"); - } - - if (calc_other_times_ms->avg() < -0.01) { - should_print = true; - buf.append_and_print_cr("## CALCULATED OTHER AVG IS NEGATIVE ###"); - } - - if (should_print) - print_summary(level, "Other(Calc)", calc_other_times_ms); -} - -void G1CollectorPolicy::print_summary(PauseSummary* summary) const { - bool parallel = G1CollectedHeap::use_parallel_gc_threads(); - MainBodySummary* body_summary = summary->main_body_summary(); - if (summary->get_total_seq()->num() > 0) { - print_summary_sd(0, "Evacuation Pauses", summary->get_total_seq()); - if (body_summary != NULL) { - print_summary(1, "Root Region Scan Wait", body_summary->get_root_region_scan_wait_seq()); - if (parallel) { - print_summary(1, "Parallel Time", body_summary->get_parallel_seq()); - print_summary(2, "Ext Root Scanning", body_summary->get_ext_root_scan_seq()); - print_summary(2, "SATB Filtering", body_summary->get_satb_filtering_seq()); - print_summary(2, "Update RS", body_summary->get_update_rs_seq()); - print_summary(2, "Scan RS", body_summary->get_scan_rs_seq()); - print_summary(2, "Object Copy", body_summary->get_obj_copy_seq()); - print_summary(2, "Termination", body_summary->get_termination_seq()); - print_summary(2, "Parallel Other", body_summary->get_parallel_other_seq()); - { - NumberSeq* other_parts[] = { - body_summary->get_ext_root_scan_seq(), - body_summary->get_satb_filtering_seq(), - body_summary->get_update_rs_seq(), - body_summary->get_scan_rs_seq(), - body_summary->get_obj_copy_seq(), - body_summary->get_termination_seq() - }; - NumberSeq calc_other_times_ms(body_summary->get_parallel_seq(), - 6, other_parts); - check_other_times(2, body_summary->get_parallel_other_seq(), - &calc_other_times_ms); - } - } else { - print_summary(1, "Ext Root Scanning", body_summary->get_ext_root_scan_seq()); - print_summary(1, "SATB Filtering", body_summary->get_satb_filtering_seq()); - print_summary(1, "Update RS", body_summary->get_update_rs_seq()); - print_summary(1, "Scan RS", body_summary->get_scan_rs_seq()); - print_summary(1, "Object Copy", body_summary->get_obj_copy_seq()); - } - } - print_summary(1, "Clear CT", body_summary->get_clear_ct_seq()); - print_summary(1, "Other", summary->get_other_seq()); - { - if (body_summary != NULL) { - NumberSeq calc_other_times_ms; - if (parallel) { - // parallel - NumberSeq* other_parts[] = { - body_summary->get_root_region_scan_wait_seq(), - body_summary->get_parallel_seq(), - body_summary->get_clear_ct_seq() - }; - calc_other_times_ms = NumberSeq(summary->get_total_seq(), - 3, other_parts); - } else { - // serial - NumberSeq* other_parts[] = { - body_summary->get_root_region_scan_wait_seq(), - body_summary->get_update_rs_seq(), - body_summary->get_ext_root_scan_seq(), - body_summary->get_satb_filtering_seq(), - body_summary->get_scan_rs_seq(), - body_summary->get_obj_copy_seq() - }; - calc_other_times_ms = NumberSeq(summary->get_total_seq(), - 6, other_parts); - } - check_other_times(1, summary->get_other_seq(), &calc_other_times_ms); - } - } - } else { - LineBuffer(1).append_and_print_cr("none"); - } - LineBuffer(0).append_and_print_cr(""); -} - void G1CollectorPolicy::print_tracing_info() const { - if (TraceGen0Time) { - gclog_or_tty->print_cr("ALL PAUSES"); - print_summary_sd(0, "Total", _all_pause_times_ms); - gclog_or_tty->print_cr(""); - gclog_or_tty->print_cr(""); - gclog_or_tty->print_cr(" Young GC Pauses: %8d", _young_pause_num); - gclog_or_tty->print_cr(" Mixed GC Pauses: %8d", _mixed_pause_num); - gclog_or_tty->print_cr(""); - - gclog_or_tty->print_cr("EVACUATION PAUSES"); - print_summary(_summary); - - gclog_or_tty->print_cr("MISC"); - print_summary_sd(0, "Stop World", _all_stop_world_times_ms); - print_summary_sd(0, "Yields", _all_yield_times_ms); - for (int i = 0; i < _aux_num; ++i) { - if (_all_aux_times_ms[i].num() > 0) { - char buffer[96]; - sprintf(buffer, "Aux%d", i); - print_summary_sd(0, buffer, &_all_aux_times_ms[i]); - } - } - } - if (TraceGen1Time) { - if (_all_full_gc_times_ms->num() > 0) { - gclog_or_tty->print("\n%4d full_gcs: total time = %8.2f s", - _all_full_gc_times_ms->num(), - _all_full_gc_times_ms->sum() / 1000.0); - gclog_or_tty->print_cr(" (avg = %8.2fms).", _all_full_gc_times_ms->avg()); - gclog_or_tty->print_cr(" [std. dev = %8.2f ms, max = %8.2f ms]", - _all_full_gc_times_ms->sd(), - _all_full_gc_times_ms->maximum()); - } - } + _trace_gen0_time_data.print(); + _trace_gen1_time_data.print(); } void G1CollectorPolicy::print_yg_surv_rate_info() const { @@ -2531,9 +2283,9 @@ _last_gc_was_young = gcs_are_young() ? true : false; if (_last_gc_was_young) { - ++_young_pause_num; + _trace_gen0_time_data.increment_young_collection_count(); } else { - ++_mixed_pause_num; + _trace_gen0_time_data.increment_mixed_collection_count(); } // The young list is laid with the survivor regions from the previous @@ -2690,3 +2442,133 @@ _recorded_non_young_cset_choice_time_ms = (non_young_end_time_sec - non_young_start_time_sec) * 1000.0; } + +void TraceGen0TimeData::record_start_collection(double time_to_stop_the_world_ms) { + if(TraceGen0Time) { + _all_stop_world_times_ms.add(time_to_stop_the_world_ms); + } +} + +void TraceGen0TimeData::record_yield_time(double yield_time_ms) { + if(TraceGen0Time) { + _all_yield_times_ms.add(yield_time_ms); + } +} + +void TraceGen0TimeData::record_end_collection( + double total_ms, + double other_ms, + double root_region_scan_wait_ms, + double parallel_ms, + double ext_root_scan_ms, + double satb_filtering_ms, + double update_rs_ms, + double scan_rs_ms, + double obj_copy_ms, + double termination_ms, + double parallel_other_ms, + double clear_ct_ms) +{ + if(TraceGen0Time) { + _total.add(total_ms); + _other.add(other_ms); + _root_region_scan_wait.add(root_region_scan_wait_ms); + _parallel.add(parallel_ms); + _ext_root_scan.add(ext_root_scan_ms); + _satb_filtering.add(satb_filtering_ms); + _update_rs.add(update_rs_ms); + _scan_rs.add(scan_rs_ms); + _obj_copy.add(obj_copy_ms); + _termination.add(termination_ms); + _parallel_other.add(parallel_other_ms); + _clear_ct.add(clear_ct_ms); + } +} + +void TraceGen0TimeData::increment_young_collection_count() { + if(TraceGen0Time) { + ++_young_pause_num; + } +} + +void TraceGen0TimeData::increment_mixed_collection_count() { + if(TraceGen0Time) { + ++_mixed_pause_num; + } +} + +void TraceGen0TimeData::print_summary(int level, + const char* str, + const NumberSeq* seq) const { + double sum = seq->sum(); + LineBuffer(level + 1).append_and_print_cr("%-24s = %8.2lf s (avg = %8.2lf ms)", + str, sum / 1000.0, seq->avg()); +} + +void TraceGen0TimeData::print_summary_sd(int level, + const char* str, + const NumberSeq* seq) const { + print_summary(level, str, seq); + LineBuffer(level + 6).append_and_print_cr("(num = %5d, std dev = %8.2lf ms, max = %8.2lf ms)", + seq->num(), seq->sd(), seq->maximum()); +} + +void TraceGen0TimeData::print() const { + if (!TraceGen0Time) { + return; + } + + gclog_or_tty->print_cr("ALL PAUSES"); + print_summary_sd(0, "Total", &_total); + gclog_or_tty->print_cr(""); + gclog_or_tty->print_cr(""); + gclog_or_tty->print_cr(" Young GC Pauses: %8d", _young_pause_num); + gclog_or_tty->print_cr(" Mixed GC Pauses: %8d", _mixed_pause_num); + gclog_or_tty->print_cr(""); + + gclog_or_tty->print_cr("EVACUATION PAUSES"); + + if (_young_pause_num == 0 && _mixed_pause_num == 0) { + gclog_or_tty->print_cr("none"); + } else { + print_summary_sd(0, "Evacuation Pauses", &_total); + print_summary(1, "Root Region Scan Wait", &_root_region_scan_wait); + print_summary(1, "Parallel Time", &_parallel); + print_summary(2, "Ext Root Scanning", &_ext_root_scan); + print_summary(2, "SATB Filtering", &_satb_filtering); + print_summary(2, "Update RS", &_update_rs); + print_summary(2, "Scan RS", &_scan_rs); + print_summary(2, "Object Copy", &_obj_copy); + print_summary(2, "Termination", &_termination); + print_summary(2, "Parallel Other", &_parallel_other); + print_summary(1, "Clear CT", &_clear_ct); + print_summary(1, "Other", &_other); + } + gclog_or_tty->print_cr(""); + + gclog_or_tty->print_cr("MISC"); + print_summary_sd(0, "Stop World", &_all_stop_world_times_ms); + print_summary_sd(0, "Yields", &_all_yield_times_ms); +} + +void TraceGen1TimeData::record_full_collection(double full_gc_time_ms) { + if (TraceGen1Time) { + _all_full_gc_times.add(full_gc_time_ms); + } +} + +void TraceGen1TimeData::print() const { + if (!TraceGen1Time) { + return; + } + + if (_all_full_gc_times.num() > 0) { + gclog_or_tty->print("\n%4d full_gcs: total time = %8.2f s", + _all_full_gc_times.num(), + _all_full_gc_times.sum() / 1000.0); + gclog_or_tty->print_cr(" (avg = %8.2fms).", _all_full_gc_times.avg()); + gclog_or_tty->print_cr(" [std. dev = %8.2f ms, max = %8.2f ms]", + _all_full_gc_times.sd(), + _all_full_gc_times.maximum()); + } +} diff -r fab99b17c1de -r 4434fdad6b37 src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp --- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp Fri Jun 01 20:17:46 2012 +0200 +++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp Sat Jun 02 07:32:21 2012 -0400 @@ -37,49 +37,62 @@ class HeapRegion; class CollectionSetChooser; -// Yes, this is a bit unpleasant... but it saves replicating the same thing -// over and over again and introducing subtle problems through small typos and -// cutting and pasting mistakes. The macros below introduces a number -// sequnce into the following two classes and the methods that access it. +// TraceGen0Time collects data on _both_ young and mixed evacuation pauses +// (the latter may contain non-young regions - i.e. regions that are +// technically in Gen1) while TraceGen1Time collects data about full GCs. +class TraceGen0TimeData : public CHeapObj { + private: + unsigned _young_pause_num; + unsigned _mixed_pause_num; + + NumberSeq _all_stop_world_times_ms; + NumberSeq _all_yield_times_ms; -#define define_num_seq(name) \ -private: \ - NumberSeq _all_##name##_times_ms; \ -public: \ - void record_##name##_time_ms(double ms) { \ - _all_##name##_times_ms.add(ms); \ - } \ - NumberSeq* get_##name##_seq() { \ - return &_all_##name##_times_ms; \ - } + NumberSeq _total; + NumberSeq _other; + NumberSeq _root_region_scan_wait; + NumberSeq _parallel; + NumberSeq _ext_root_scan; + NumberSeq _satb_filtering; + NumberSeq _update_rs; + NumberSeq _scan_rs; + NumberSeq _obj_copy; + NumberSeq _termination; + NumberSeq _parallel_other; + NumberSeq _clear_ct; -class MainBodySummary; - -class PauseSummary: public CHeapObj { - define_num_seq(total) - define_num_seq(other) + void print_summary (int level, const char* str, const NumberSeq* seq) const; + void print_summary_sd (int level, const char* str, const NumberSeq* seq) const; public: - virtual MainBodySummary* main_body_summary() { return NULL; } + TraceGen0TimeData() : _young_pause_num(0), _mixed_pause_num(0) {}; + void record_start_collection(double time_to_stop_the_world_ms); + void record_yield_time(double yield_time_ms); + void record_end_collection( + double total_ms, + double other_ms, + double root_region_scan_wait_ms, + double parallel_ms, + double ext_root_scan_ms, + double satb_filtering_ms, + double update_rs_ms, + double scan_rs_ms, + double obj_copy_ms, + double termination_ms, + double parallel_other_ms, + double clear_ct_ms); + void increment_young_collection_count(); + void increment_mixed_collection_count(); + void print() const; }; -class MainBodySummary: public CHeapObj { - define_num_seq(root_region_scan_wait) - define_num_seq(parallel) // parallel only - define_num_seq(ext_root_scan) - define_num_seq(satb_filtering) - define_num_seq(update_rs) - define_num_seq(scan_rs) - define_num_seq(obj_copy) - define_num_seq(termination) // parallel only - define_num_seq(parallel_other) // parallel only - define_num_seq(clear_ct) -}; +class TraceGen1TimeData : public CHeapObj { + private: + NumberSeq _all_full_gc_times; -class Summary: public PauseSummary, - public MainBodySummary { -public: - virtual MainBodySummary* main_body_summary() { return this; } + public: + void record_full_collection(double full_gc_time_ms); + void print() const; }; // There are three command line options related to the young gen size: @@ -199,19 +212,10 @@ TruncatedSeq* _concurrent_mark_remark_times_ms; TruncatedSeq* _concurrent_mark_cleanup_times_ms; - Summary* _summary; + TraceGen0TimeData _trace_gen0_time_data; + TraceGen1TimeData _trace_gen1_time_data; - NumberSeq* _all_pause_times_ms; - NumberSeq* _all_full_gc_times_ms; double _stop_world_start; - NumberSeq* _all_stop_world_times_ms; - NumberSeq* _all_yield_times_ms; - - int _aux_num; - NumberSeq* _all_aux_times_ms; - double* _cur_aux_start_times_ms; - double* _cur_aux_times_ms; - bool* _cur_aux_times_set; double* _par_last_gc_worker_start_times_ms; double* _par_last_ext_root_scan_times_ms; @@ -243,9 +247,6 @@ bool _last_gc_was_young; - unsigned _young_pause_num; - unsigned _mixed_pause_num; - bool _during_marking; bool _in_marking_window; bool _in_marking_window_im; @@ -557,15 +558,6 @@ void print_par_stats(int level, const char* str, double* data, bool showDecimals = true); - void check_other_times(int level, - NumberSeq* other_times_ms, - NumberSeq* calc_other_times_ms) const; - - void print_summary (PauseSummary* stats) const; - - void print_summary (int level, const char* str, NumberSeq* seq) const; - void print_summary_sd (int level, const char* str, NumberSeq* seq) const; - double avg_value (double* data); double max_value (double* data); double sum_of_values (double* data); @@ -745,10 +737,6 @@ return _bytes_in_collection_set_before_gc; } - unsigned calc_gc_alloc_time_stamp() { - return _all_pause_times_ms->num() + 1; - } - // This should be called after the heap is resized. void record_new_heap_size(uint new_number_of_regions); @@ -867,18 +855,6 @@ _cur_collection_code_root_fixup_time_ms = ms; } - void record_aux_start_time(int i) { - guarantee(i < _aux_num, "should be within range"); - _cur_aux_start_times_ms[i] = os::elapsedTime() * 1000.0; - } - - void record_aux_end_time(int i) { - guarantee(i < _aux_num, "should be within range"); - double ms = os::elapsedTime() * 1000.0 - _cur_aux_start_times_ms[i]; - _cur_aux_times_set[i] = true; - _cur_aux_times_ms[i] += ms; - } - void record_ref_proc_time(double ms) { _cur_ref_proc_time_ms = ms; } diff -r fab99b17c1de -r 4434fdad6b37 src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp --- a/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp Fri Jun 01 20:17:46 2012 +0200 +++ b/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp Sat Jun 02 07:32:21 2012 -0400 @@ -1106,7 +1106,8 @@ void HeapRegionRemSet::setup_remset_size() { // Setup sparse and fine-grain tables sizes. // table_size = base * (log(region_size / 1M) + 1) - int region_size_log_mb = MAX2((int)HeapRegion::LogOfHRGrainBytes - (int)LOG_M, 0); + const int LOG_M = 20; + int region_size_log_mb = MAX2(HeapRegion::LogOfHRGrainBytes - LOG_M, 0); if (FLAG_IS_DEFAULT(G1RSetSparseRegionEntries)) { G1RSetSparseRegionEntries = G1RSetSparseRegionEntriesBase * (region_size_log_mb + 1); } diff -r fab99b17c1de -r 4434fdad6b37 src/share/vm/interpreter/abstractInterpreter.hpp --- a/src/share/vm/interpreter/abstractInterpreter.hpp Fri Jun 01 20:17:46 2012 +0200 +++ b/src/share/vm/interpreter/abstractInterpreter.hpp Sat Jun 02 07:32:21 2012 -0400 @@ -107,6 +107,8 @@ java_lang_math_sqrt, // implementation of java.lang.Math.sqrt (x) java_lang_math_log, // implementation of java.lang.Math.log (x) java_lang_math_log10, // implementation of java.lang.Math.log10 (x) + java_lang_math_pow, // implementation of java.lang.Math.pow (x,y) + java_lang_math_exp, // implementation of java.lang.Math.exp (x) java_lang_ref_reference_get, // implementation of java.lang.ref.Reference.get() number_of_method_entries, invalid = -1 diff -r fab99b17c1de -r 4434fdad6b37 src/share/vm/interpreter/interpreter.cpp --- a/src/share/vm/interpreter/interpreter.cpp Fri Jun 01 20:17:46 2012 +0200 +++ b/src/share/vm/interpreter/interpreter.cpp Sat Jun 02 07:32:21 2012 -0400 @@ -221,6 +221,8 @@ case vmIntrinsics::_dsqrt : return java_lang_math_sqrt ; case vmIntrinsics::_dlog : return java_lang_math_log ; case vmIntrinsics::_dlog10: return java_lang_math_log10; + case vmIntrinsics::_dpow : return java_lang_math_pow ; + case vmIntrinsics::_dexp : return java_lang_math_exp ; case vmIntrinsics::_Reference_get: return java_lang_ref_reference_get; diff -r fab99b17c1de -r 4434fdad6b37 src/share/vm/interpreter/templateInterpreter.cpp --- a/src/share/vm/interpreter/templateInterpreter.cpp Fri Jun 01 20:17:46 2012 +0200 +++ b/src/share/vm/interpreter/templateInterpreter.cpp Sat Jun 02 07:32:21 2012 -0400 @@ -370,6 +370,8 @@ method_entry(java_lang_math_sqrt ) method_entry(java_lang_math_log ) method_entry(java_lang_math_log10) + method_entry(java_lang_math_exp ) + method_entry(java_lang_math_pow ) method_entry(java_lang_ref_reference_get) // all native method kinds (must be one contiguous block) diff -r fab99b17c1de -r 4434fdad6b37 src/share/vm/memory/genCollectedHeap.cpp --- a/src/share/vm/memory/genCollectedHeap.cpp Fri Jun 01 20:17:46 2012 +0200 +++ b/src/share/vm/memory/genCollectedHeap.cpp Sat Jun 02 07:32:21 2012 -0400 @@ -677,11 +677,6 @@ #ifdef TRACESPINNING ParallelTaskTerminator::print_termination_counts(); #endif - - if (ExitAfterGCNum > 0 && total_collections() == ExitAfterGCNum) { - tty->print_cr("Stopping after GC #%d", ExitAfterGCNum); - vm_exit(-1); - } } HeapWord* GenCollectedHeap::satisfy_failed_allocation(size_t size, bool is_tlab) { diff -r fab99b17c1de -r 4434fdad6b37 src/share/vm/oops/fieldInfo.hpp --- a/src/share/vm/oops/fieldInfo.hpp Fri Jun 01 20:17:46 2012 +0200 +++ b/src/share/vm/oops/fieldInfo.hpp Sat Jun 02 07:32:21 2012 -0400 @@ -50,8 +50,7 @@ initval_index_offset = 3, low_offset = 4, high_offset = 5, - generic_signature_offset = 6, - field_slots = 7 + field_slots = 6 }; private: @@ -60,29 +59,28 @@ void set_name_index(u2 val) { _shorts[name_index_offset] = val; } void set_signature_index(u2 val) { _shorts[signature_index_offset] = val; } void set_initval_index(u2 val) { _shorts[initval_index_offset] = val; } - void set_generic_signature_index(u2 val) { _shorts[generic_signature_offset] = val; } u2 name_index() const { return _shorts[name_index_offset]; } u2 signature_index() const { return _shorts[signature_index_offset]; } u2 initval_index() const { return _shorts[initval_index_offset]; } - u2 generic_signature_index() const { return _shorts[generic_signature_offset]; } public: static FieldInfo* from_field_array(typeArrayOop fields, int index) { return ((FieldInfo*)fields->short_at_addr(index * field_slots)); } + static FieldInfo* from_field_array(u2* fields, int index) { + return ((FieldInfo*)(fields + index * field_slots)); + } void initialize(u2 access_flags, u2 name_index, u2 signature_index, u2 initval_index, - u2 generic_signature_index, u4 offset) { _shorts[access_flags_offset] = access_flags; _shorts[name_index_offset] = name_index; _shorts[signature_index_offset] = signature_index; _shorts[initval_index_offset] = initval_index; - _shorts[generic_signature_offset] = generic_signature_index; set_offset(offset); } @@ -105,14 +103,6 @@ return cp->symbol_at(index); } - Symbol* generic_signature(constantPoolHandle cp) const { - int index = generic_signature_index(); - if (index == 0) { - return NULL; - } - return cp->symbol_at(index); - } - void set_access_flags(u2 val) { _shorts[access_flags_offset] = val; } void set_offset(u4 val) { _shorts[low_offset] = extract_low_short_from_int(val); diff -r fab99b17c1de -r 4434fdad6b37 src/share/vm/oops/fieldStreams.hpp --- a/src/share/vm/oops/fieldStreams.hpp Fri Jun 01 20:17:46 2012 +0200 +++ b/src/share/vm/oops/fieldStreams.hpp Sat Jun 02 07:32:21 2012 -0400 @@ -42,21 +42,57 @@ constantPoolHandle _constants; int _index; int _limit; + int _generic_signature_slot; FieldInfo* field() const { return FieldInfo::from_field_array(_fields(), _index); } + int init_generic_signature_start_slot() { + int length = _fields->length(); + int num_fields = 0; + int skipped_generic_signature_slots = 0; + FieldInfo* fi; + AccessFlags flags; + /* Scan from 0 to the current _index. Count the number of generic + signature slots for field[0] to field[_index - 1]. */ + for (int i = 0; i < _index; i++) { + fi = FieldInfo::from_field_array(_fields(), i); + flags.set_flags(fi->access_flags()); + if (flags.field_has_generic_signature()) { + length --; + skipped_generic_signature_slots ++; + } + } + /* Scan from the current _index. */ + for (int i = _index; i*FieldInfo::field_slots < length; i++) { + fi = FieldInfo::from_field_array(_fields(), i); + flags.set_flags(fi->access_flags()); + if (flags.field_has_generic_signature()) { + length --; + } + num_fields ++; + } + _generic_signature_slot = length + skipped_generic_signature_slots; + assert(_generic_signature_slot <= _fields->length(), ""); + return num_fields; + } + FieldStreamBase(typeArrayHandle fields, constantPoolHandle constants, int start, int limit) { _fields = fields; _constants = constants; _index = start; - _limit = limit; + int num_fields = init_generic_signature_start_slot(); + if (limit < start) { + _limit = num_fields; + } else { + _limit = limit; + } } FieldStreamBase(typeArrayHandle fields, constantPoolHandle constants) { _fields = fields; _constants = constants; _index = 0; - _limit = fields->length() / FieldInfo::field_slots; + _limit = init_generic_signature_start_slot(); } public: @@ -65,18 +101,26 @@ _constants = klass->constants(); _index = 0; _limit = klass->java_fields_count(); + init_generic_signature_start_slot(); } FieldStreamBase(instanceKlassHandle klass) { _fields = klass->fields(); _constants = klass->constants(); _index = 0; _limit = klass->java_fields_count(); + init_generic_signature_start_slot(); } // accessors int index() const { return _index; } - void next() { _index += 1; } + void next() { + if (access_flags().field_has_generic_signature()) { + _generic_signature_slot ++; + assert(_generic_signature_slot <= _fields->length(), ""); + } + _index += 1; + } bool done() const { return _index >= _limit; } // Accessors for current field @@ -103,7 +147,13 @@ } Symbol* generic_signature() const { - return field()->generic_signature(_constants); + if (access_flags().field_has_generic_signature()) { + assert(_generic_signature_slot < _fields->length(), "out of bounds"); + int index = _fields->short_at(_generic_signature_slot); + return _constants->symbol_at(index); + } else { + return NULL; + } } int offset() const { @@ -139,11 +189,19 @@ } int generic_signature_index() const { assert(!field()->is_internal(), "regular only"); - return field()->generic_signature_index(); + if (access_flags().field_has_generic_signature()) { + assert(_generic_signature_slot < _fields->length(), "out of bounds"); + return _fields->short_at(_generic_signature_slot); + } else { + return 0; + } } void set_generic_signature_index(int index) { assert(!field()->is_internal(), "regular only"); - field()->set_generic_signature_index(index); + if (access_flags().field_has_generic_signature()) { + assert(_generic_signature_slot < _fields->length(), "out of bounds"); + _fields->short_at_put(_generic_signature_slot, index); + } } int initval_index() const { assert(!field()->is_internal(), "regular only"); @@ -159,8 +217,8 @@ // Iterate over only the internal fields class InternalFieldStream : public FieldStreamBase { public: - InternalFieldStream(instanceKlass* k): FieldStreamBase(k->fields(), k->constants(), k->java_fields_count(), k->all_fields_count()) {} - InternalFieldStream(instanceKlassHandle k): FieldStreamBase(k->fields(), k->constants(), k->java_fields_count(), k->all_fields_count()) {} + InternalFieldStream(instanceKlass* k): FieldStreamBase(k->fields(), k->constants(), k->java_fields_count(), 0) {} + InternalFieldStream(instanceKlassHandle k): FieldStreamBase(k->fields(), k->constants(), k->java_fields_count(), 0) {} }; diff -r fab99b17c1de -r 4434fdad6b37 src/share/vm/oops/instanceKlass.hpp --- a/src/share/vm/oops/instanceKlass.hpp Fri Jun 01 20:17:46 2012 +0200 +++ b/src/share/vm/oops/instanceKlass.hpp Sat Jun 02 07:32:21 2012 -0400 @@ -168,8 +168,19 @@ objArrayOop _local_interfaces; // Interface (klassOops) this class implements transitively. objArrayOop _transitive_interfaces; - // Instance and static variable information, 5-tuples of shorts [access, name - // index, sig index, initval index, offset]. + // Instance and static variable information, starts with 6-tuples of shorts + // [access, name index, sig index, initval index, low_offset, high_offset] + // for all fields, followed by the generic signature data at the end of + // the array. Only fields with generic signature attributes have the generic + // signature data set in the array. The fields array looks like following: + // + // f1: [access, name index, sig index, initial value index, low_offset, high_offset] + // f2: [access, name index, sig index, initial value index, low_offset, high_offset] + // ... + // fn: [access, name index, sig index, initial value index, low_offset, high_offset] + // [generic signature index] + // [generic signature index] + // ... typeArrayOop _fields; // Constant pool for this class. constantPoolOop _constants; @@ -351,9 +362,6 @@ // Number of Java declared fields int java_fields_count() const { return (int)_java_fields_count; } - // Number of fields including any injected fields - int all_fields_count() const { return _fields->length() / FieldInfo::field_slots; } - typeArrayOop fields() const { return _fields; } void set_fields(typeArrayOop f, u2 java_fields_count) { diff -r fab99b17c1de -r 4434fdad6b37 src/share/vm/oops/instanceRefKlass.cpp --- a/src/share/vm/oops/instanceRefKlass.cpp Fri Jun 01 20:17:46 2012 +0200 +++ b/src/share/vm/oops/instanceRefKlass.cpp Sat Jun 02 07:32:21 2012 -0400 @@ -515,6 +515,12 @@ void instanceRefKlass::acquire_pending_list_lock(BasicLock *pending_list_basic_lock) { // we may enter this with pending exception set PRESERVE_EXCEPTION_MARK; // exceptions are never thrown, needed for TRAPS argument + + // Create a HandleMark in case we retry a GC multiple times. + // Each time we attempt the GC, we allocate the handle below + // to hold the pending list lock. We want to free this handle. + HandleMark hm; + Handle h_lock(THREAD, java_lang_ref_Reference::pending_list_lock()); ObjectSynchronizer::fast_enter(h_lock, pending_list_basic_lock, false, THREAD); assert(ObjectSynchronizer::current_thread_holds_lock( @@ -527,7 +533,12 @@ BasicLock *pending_list_basic_lock) { // we may enter this with pending exception set PRESERVE_EXCEPTION_MARK; // exceptions are never thrown, needed for TRAPS argument - // + + // Create a HandleMark in case we retry a GC multiple times. + // Each time we attempt the GC, we allocate the handle below + // to hold the pending list lock. We want to free this handle. + HandleMark hm; + Handle h_lock(THREAD, java_lang_ref_Reference::pending_list_lock()); assert(ObjectSynchronizer::current_thread_holds_lock( JavaThread::current(), h_lock), diff -r fab99b17c1de -r 4434fdad6b37 src/share/vm/opto/doCall.cpp --- a/src/share/vm/opto/doCall.cpp Fri Jun 01 20:17:46 2012 +0200 +++ b/src/share/vm/opto/doCall.cpp Sat Jun 02 07:32:21 2012 -0400 @@ -780,7 +780,7 @@ if( at_method_entry ) { // bump invocation counter if top method (for statistics) if (CountCompiledCalls && depth() == 1) { - const TypeInstPtr* addr_type = TypeInstPtr::make(method()); + const TypeOopPtr* addr_type = TypeOopPtr::make_from_constant(method()); Node* adr1 = makecon(addr_type); Node* adr2 = basic_plus_adr(adr1, adr1, in_bytes(methodOopDesc::compiled_invocation_counter_offset())); increment_counter(adr2); diff -r fab99b17c1de -r 4434fdad6b37 src/share/vm/opto/library_call.cpp --- a/src/share/vm/opto/library_call.cpp Fri Jun 01 20:17:46 2012 +0200 +++ b/src/share/vm/opto/library_call.cpp Sat Jun 02 07:32:21 2012 -0400 @@ -1557,9 +1557,6 @@ // every again. NaN results requires StrictMath.exp handling. if (too_many_traps(Deoptimization::Reason_intrinsic)) return false; - // Do not intrinsify on older platforms which lack cmove. - if (ConditionalMoveLimit == 0) return false; - _sp += arg_size(); // restore stack pointer Node *x = pop_math_arg(); Node *result = _gvn.transform(new (C, 2) ExpDNode(0,x)); @@ -1802,15 +1799,11 @@ case vmIntrinsics::_dsqrt: return Matcher::has_match_rule(Op_SqrtD) ? inline_sqrt(id) : false; case vmIntrinsics::_dabs: return Matcher::has_match_rule(Op_AbsD) ? inline_abs(id) : false; - // These intrinsics don't work on X86. The ad implementation doesn't - // handle NaN's properly. Instead of returning infinity, the ad - // implementation returns a NaN on overflow. See bug: 6304089 - // Once the ad implementations are fixed, change the code below - // to match the intrinsics above - case vmIntrinsics::_dexp: return + Matcher::has_match_rule(Op_ExpD) ? inline_exp(id) : runtime_math(OptoRuntime::Math_D_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dexp), "EXP"); case vmIntrinsics::_dpow: return + Matcher::has_match_rule(Op_PowD) ? inline_pow(id) : runtime_math(OptoRuntime::Math_DD_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dpow), "POW"); // These intrinsics are not yet correctly implemented diff -r fab99b17c1de -r 4434fdad6b37 src/share/vm/opto/subnode.cpp --- a/src/share/vm/opto/subnode.cpp Fri Jun 01 20:17:46 2012 +0200 +++ b/src/share/vm/opto/subnode.cpp Sat Jun 02 07:32:21 2012 -0400 @@ -1314,7 +1314,5 @@ if( t2->base() != Type::DoubleCon ) return Type::DOUBLE; double d1 = t1->getd(); double d2 = t2->getd(); - if( d1 < 0.0 ) return Type::DOUBLE; - if( d2 < 0.0 ) return Type::DOUBLE; return TypeD::make( StubRoutines::intrinsic_pow( d1, d2 ) ); } diff -r fab99b17c1de -r 4434fdad6b37 src/share/vm/runtime/fieldDescriptor.cpp --- a/src/share/vm/runtime/fieldDescriptor.cpp Fri Jun 01 20:17:46 2012 +0200 +++ b/src/share/vm/runtime/fieldDescriptor.cpp Sat Jun 02 07:32:21 2012 -0400 @@ -28,6 +28,7 @@ #include "memory/resourceArea.hpp" #include "memory/universe.inline.hpp" #include "oops/instanceKlass.hpp" +#include "oops/fieldStreams.hpp" #include "runtime/fieldDescriptor.hpp" #include "runtime/handles.inline.hpp" #include "runtime/signature.hpp" @@ -37,6 +38,20 @@ return instanceKlass::cast(_cp->pool_holder())->class_loader(); } +Symbol* fieldDescriptor::generic_signature() const { + int idx = 0; + instanceKlass* ik = instanceKlass::cast(field_holder()); + for (AllFieldStream fs(ik); !fs.done(); fs.next()) { + if (idx == _index) { + return fs.generic_signature(); + } else { + idx ++; + } + } + assert(false, "should never happen"); + return NULL; +} + typeArrayOop fieldDescriptor::annotations() const { instanceKlass* ik = instanceKlass::cast(field_holder()); objArrayOop md = ik->fields_annotations(); diff -r fab99b17c1de -r 4434fdad6b37 src/share/vm/runtime/fieldDescriptor.hpp --- a/src/share/vm/runtime/fieldDescriptor.hpp Fri Jun 01 20:17:46 2012 +0200 +++ b/src/share/vm/runtime/fieldDescriptor.hpp Sat Jun 02 07:32:21 2012 -0400 @@ -67,7 +67,7 @@ oop loader() const; // Offset (in words) of field from start of instanceOop / klassOop int offset() const { return field()->offset(); } - Symbol* generic_signature() const { return field()->generic_signature(_cp); } + Symbol* generic_signature() const; int index() const { return _index; } typeArrayOop annotations() const; diff -r fab99b17c1de -r 4434fdad6b37 src/share/vm/runtime/globals.hpp --- a/src/share/vm/runtime/globals.hpp Fri Jun 01 20:17:46 2012 +0200 +++ b/src/share/vm/runtime/globals.hpp Sat Jun 02 07:32:21 2012 -0400 @@ -3285,9 +3285,6 @@ diagnostic(intx, VerifyGCLevel, 0, \ "Generation level at which to start +VerifyBefore/AfterGC") \ \ - develop(uintx, ExitAfterGCNum, 0, \ - "If non-zero, exit after this GC.") \ - \ product(intx, MaxTenuringThreshold, 15, \ "Maximum value for tenuring threshold") \ \ diff -r fab99b17c1de -r 4434fdad6b37 src/share/vm/runtime/vmStructs.cpp --- a/src/share/vm/runtime/vmStructs.cpp Fri Jun 01 20:17:46 2012 +0200 +++ b/src/share/vm/runtime/vmStructs.cpp Sat Jun 02 07:32:21 2012 -0400 @@ -2352,7 +2352,6 @@ declare_constant(FieldInfo::initval_index_offset) \ declare_constant(FieldInfo::low_offset) \ declare_constant(FieldInfo::high_offset) \ - declare_constant(FieldInfo::generic_signature_offset) \ declare_constant(FieldInfo::field_slots) \ \ /************************************************/ \ diff -r fab99b17c1de -r 4434fdad6b37 src/share/vm/utilities/accessFlags.hpp --- a/src/share/vm/utilities/accessFlags.hpp Fri Jun 01 20:17:46 2012 +0200 +++ b/src/share/vm/utilities/accessFlags.hpp Sat Jun 02 07:32:21 2012 -0400 @@ -80,10 +80,12 @@ JVM_ACC_FIELD_ACCESS_WATCHED = 0x00002000, // field access is watched by JVMTI JVM_ACC_FIELD_MODIFICATION_WATCHED = 0x00008000, // field modification is watched by JVMTI JVM_ACC_FIELD_INTERNAL = 0x00000400, // internal field, same as JVM_ACC_ABSTRACT + JVM_ACC_FIELD_HAS_GENERIC_SIGNATURE = 0x00000800, // field has generic signature JVM_ACC_FIELD_INTERNAL_FLAGS = JVM_ACC_FIELD_ACCESS_WATCHED | JVM_ACC_FIELD_MODIFICATION_WATCHED | - JVM_ACC_FIELD_INTERNAL, + JVM_ACC_FIELD_INTERNAL | + JVM_ACC_FIELD_HAS_GENERIC_SIGNATURE, // flags accepted by set_field_flags() JVM_ACC_FIELD_FLAGS = JVM_RECOGNIZED_FIELD_MODIFIERS | JVM_ACC_FIELD_INTERNAL_FLAGS @@ -156,6 +158,8 @@ bool is_field_modification_watched() const { return (_flags & JVM_ACC_FIELD_MODIFICATION_WATCHED) != 0; } bool is_internal() const { return (_flags & JVM_ACC_FIELD_INTERNAL) != 0; } + bool field_has_generic_signature() const + { return (_flags & JVM_ACC_FIELD_HAS_GENERIC_SIGNATURE) != 0; } // get .class file flags jint get_flags () const { return (_flags & JVM_ACC_WRITTEN_FLAGS); } @@ -225,6 +229,10 @@ atomic_clear_bits(JVM_ACC_FIELD_MODIFICATION_WATCHED); } } + void set_field_has_generic_signature() + { + atomic_set_bits(JVM_ACC_FIELD_HAS_GENERIC_SIGNATURE); + } // Conversion jshort as_short() const { return (jshort)_flags; } diff -r fab99b17c1de -r 4434fdad6b37 src/share/vm/utilities/globalDefinitions.hpp --- a/src/share/vm/utilities/globalDefinitions.hpp Fri Jun 01 20:17:46 2012 +0200 +++ b/src/share/vm/utilities/globalDefinitions.hpp Sat Jun 02 07:32:21 2012 -0400 @@ -161,10 +161,6 @@ const size_t G = M*K; const size_t HWperKB = K / sizeof(HeapWord); -const size_t LOG_K = 10; -const size_t LOG_M = 2 * LOG_K; -const size_t LOG_G = 2 * LOG_M; - const jint min_jint = (jint)1 << (sizeof(jint)*BitsPerByte-1); // 0x80000000 == smallest jint const jint max_jint = (juint)min_jint - 1; // 0x7FFFFFFF == largest jint diff -r fab99b17c1de -r 4434fdad6b37 src/share/vm/utilities/numberSeq.cpp --- a/src/share/vm/utilities/numberSeq.cpp Fri Jun 01 20:17:46 2012 +0200 +++ b/src/share/vm/utilities/numberSeq.cpp Sat Jun 02 07:32:21 2012 -0400 @@ -115,24 +115,6 @@ return true; } -NumberSeq::NumberSeq(NumberSeq *total, int n, NumberSeq **parts) { - guarantee(check_nums(total, n, parts), "all seq lengths should match"); - double sum = total->sum(); - for (int i = 0; i < n; ++i) { - if (parts[i] != NULL) - sum -= parts[i]->sum(); - } - - _num = total->num(); - _sum = sum; - - // we do not calculate these... - _sum_of_squares = -1.0; - _maximum = -1.0; - _davg = -1.0; - _dvariance = -1.0; -} - void NumberSeq::add(double val) { AbsSeq::add(val); diff -r fab99b17c1de -r 4434fdad6b37 src/share/vm/utilities/numberSeq.hpp --- a/src/share/vm/utilities/numberSeq.hpp Fri Jun 01 20:17:46 2012 +0200 +++ b/src/share/vm/utilities/numberSeq.hpp Sat Jun 02 07:32:21 2012 -0400 @@ -93,7 +93,6 @@ public: NumberSeq(double alpha = DEFAULT_ALPHA_VALUE); - NumberSeq(NumberSeq* total, int n_parts, NumberSeq** parts); virtual void add(double val); virtual double maximum() const { return _maximum; } diff -r fab99b17c1de -r 4434fdad6b37 test/gc/7168848/HumongousAlloc.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/gc/7168848/HumongousAlloc.java Sat Jun 02 07:32:21 2012 -0400 @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * @test Humongous.java + * @bug 7168848 + * @summary G1: humongous object allocations should initiate marking cycles when necessary + * @run main/othervm -Xms100m -Xmx100m -XX:+PrintGC -XX:G1HeapRegionSize=1m -XX:+UseG1GC HumongousAlloc + * + */ +import java.lang.management.GarbageCollectorMXBean; +import java.lang.management.ManagementFactory; +import java.util.List; + +public class HumongousAlloc { + + public static byte[] dummy; + private static int sleepFreq = 40; + private static int sleepTime = 1000; + private static double size = 0.75; + private static int iterations = 50; + private static int MB = 1024 * 1024; + + public static void allocate(int size, int sleepTime, int sleepFreq) throws InterruptedException { + System.out.println("Will allocate objects of size: " + size + + " bytes and sleep for " + sleepTime + + " ms after every " + sleepFreq + "th allocation."); + int count = 0; + while (count < iterations) { + for (int i = 0; i < sleepFreq; i++) { + dummy = new byte[size - 16]; + } + Thread.sleep(sleepTime); + count++; + } + } + + public static void main(String[] args) throws InterruptedException { + allocate((int) (size * MB), sleepTime, sleepFreq); + List collectors = ManagementFactory.getGarbageCollectorMXBeans(); + for (GarbageCollectorMXBean collector : collectors) { + if (collector.getName().contains("G1 Old")) { + long count = collector.getCollectionCount(); + if (count > 0) { + throw new RuntimeException("Failed: FullGCs should not have happened. The number of FullGC run is " + count); + } + else { + System.out.println("Passed."); + } + } + } + } +} +