# HG changeset patch # User dcubed # Date 1345862742 25200 # Node ID b3602ff9c1b817f979466456be3eecd8f623625a # Parent 153776c4cb6f90d81428dfc48486e515b2f3cc3c# Parent be82ef2188725e95e65de4093c493c2b31764bea Merge diff -r be82ef218872 -r b3602ff9c1b8 .hgtags --- a/.hgtags Wed Aug 22 10:01:51 2012 +0200 +++ b/.hgtags Fri Aug 24 19:45:42 2012 -0700 @@ -269,3 +269,6 @@ 3b3ad16429701b2eb6712851c2f7c5a726eb2cbe hs24-b19 663fc23da8d51c4c0552cbcb17ffc85f5869d4fd jdk8-b51 4c8f2a12e757e7a808aa85827573e09f75d7459f hs24-b20 +6d0436885201db3f581523344a734793bb989549 jdk8-b52 +54240c1b8e87758f28da2c6a569a926fd9e0910a jdk8-b53 +9e3ae661284dc04185b029d85440fe7811f1ed07 hs24-b21 diff -r be82ef218872 -r b3602ff9c1b8 make/hotspot_version --- a/make/hotspot_version Wed Aug 22 10:01:51 2012 +0200 +++ b/make/hotspot_version Fri Aug 24 19:45:42 2012 -0700 @@ -35,7 +35,7 @@ HS_MAJOR_VER=24 HS_MINOR_VER=0 -HS_BUILD_NUMBER=21 +HS_BUILD_NUMBER=22 JDK_MAJOR_VER=1 JDK_MINOR_VER=8 diff -r be82ef218872 -r b3602ff9c1b8 make/jprt.properties --- a/make/jprt.properties Wed Aug 22 10:01:51 2012 +0200 +++ b/make/jprt.properties Fri Aug 24 19:45:42 2012 -0700 @@ -54,77 +54,77 @@ # Define the Solaris platforms we want for the various releases jprt.my.solaris.sparc.jdk8=solaris_sparc_5.10 jprt.my.solaris.sparc.jdk7=solaris_sparc_5.10 -jprt.my.solaris.sparc.jdk7u6=${jprt.my.solaris.sparc.jdk7} +jprt.my.solaris.sparc.jdk7u8=${jprt.my.solaris.sparc.jdk7} jprt.my.solaris.sparc=${jprt.my.solaris.sparc.${jprt.tools.default.release}} jprt.my.solaris.sparcv9.jdk8=solaris_sparcv9_5.10 jprt.my.solaris.sparcv9.jdk7=solaris_sparcv9_5.10 -jprt.my.solaris.sparcv9.jdk7u6=${jprt.my.solaris.sparcv9.jdk7} +jprt.my.solaris.sparcv9.jdk7u8=${jprt.my.solaris.sparcv9.jdk7} jprt.my.solaris.sparcv9=${jprt.my.solaris.sparcv9.${jprt.tools.default.release}} jprt.my.solaris.i586.jdk8=solaris_i586_5.10 jprt.my.solaris.i586.jdk7=solaris_i586_5.10 -jprt.my.solaris.i586.jdk7u6=${jprt.my.solaris.i586.jdk7} +jprt.my.solaris.i586.jdk7u8=${jprt.my.solaris.i586.jdk7} jprt.my.solaris.i586=${jprt.my.solaris.i586.${jprt.tools.default.release}} jprt.my.solaris.x64.jdk8=solaris_x64_5.10 jprt.my.solaris.x64.jdk7=solaris_x64_5.10 -jprt.my.solaris.x64.jdk7u6=${jprt.my.solaris.x64.jdk7} +jprt.my.solaris.x64.jdk7u8=${jprt.my.solaris.x64.jdk7} jprt.my.solaris.x64=${jprt.my.solaris.x64.${jprt.tools.default.release}} jprt.my.linux.i586.jdk8=linux_i586_2.6 jprt.my.linux.i586.jdk7=linux_i586_2.6 -jprt.my.linux.i586.jdk7u6=${jprt.my.linux.i586.jdk7} +jprt.my.linux.i586.jdk7u8=${jprt.my.linux.i586.jdk7} jprt.my.linux.i586=${jprt.my.linux.i586.${jprt.tools.default.release}} jprt.my.linux.x64.jdk8=linux_x64_2.6 jprt.my.linux.x64.jdk7=linux_x64_2.6 -jprt.my.linux.x64.jdk7u6=${jprt.my.linux.x64.jdk7} +jprt.my.linux.x64.jdk7u8=${jprt.my.linux.x64.jdk7} jprt.my.linux.x64=${jprt.my.linux.x64.${jprt.tools.default.release}} jprt.my.linux.ppc.jdk8=linux_ppc_2.6 jprt.my.linux.ppc.jdk7=linux_ppc_2.6 -jprt.my.linux.ppc.jdk7u6=${jprt.my.linux.ppc.jdk7} +jprt.my.linux.ppc.jdk7u8=${jprt.my.linux.ppc.jdk7} jprt.my.linux.ppc=${jprt.my.linux.ppc.${jprt.tools.default.release}} jprt.my.linux.ppcv2.jdk8=linux_ppcv2_2.6 jprt.my.linux.ppcv2.jdk7=linux_ppcv2_2.6 -jprt.my.linux.ppcv2.jdk7u6=${jprt.my.linux.ppcv2.jdk7} +jprt.my.linux.ppcv2.jdk7u8=${jprt.my.linux.ppcv2.jdk7} jprt.my.linux.ppcv2=${jprt.my.linux.ppcv2.${jprt.tools.default.release}} jprt.my.linux.ppcsflt.jdk8=linux_ppcsflt_2.6 jprt.my.linux.ppcsflt.jdk7=linux_ppcsflt_2.6 -jprt.my.linux.ppcsflt.jdk7u6=${jprt.my.linux.ppcsflt.jdk7} +jprt.my.linux.ppcsflt.jdk7u8=${jprt.my.linux.ppcsflt.jdk7} jprt.my.linux.ppcsflt=${jprt.my.linux.ppcsflt.${jprt.tools.default.release}} jprt.my.linux.armvfp.jdk8=linux_armvfp_2.6 jprt.my.linux.armvfp.jdk7=linux_armvfp_2.6 -jprt.my.linux.armvfp.jdk7u6=${jprt.my.linux.armvfp.jdk7} +jprt.my.linux.armvfp.jdk7u8=${jprt.my.linux.armvfp.jdk7} jprt.my.linux.armvfp=${jprt.my.linux.armvfp.${jprt.tools.default.release}} jprt.my.linux.armv6.jdk8=linux_armv6_2.6 jprt.my.linux.armv6.jdk7=linux_armv6_2.6 -jprt.my.linux.armv6.jdk7u6=${jprt.my.linux.armv6.jdk7} +jprt.my.linux.armv6.jdk7u8=${jprt.my.linux.armv6.jdk7} jprt.my.linux.armv6=${jprt.my.linux.armv6.${jprt.tools.default.release}} jprt.my.linux.armsflt.jdk8=linux_armsflt_2.6 jprt.my.linux.armsflt.jdk7=linux_armsflt_2.6 -jprt.my.linux.armsflt.jdk7u6=${jprt.my.linux.armsflt.jdk7} +jprt.my.linux.armsflt.jdk7u8=${jprt.my.linux.armsflt.jdk7} jprt.my.linux.armsflt=${jprt.my.linux.armsflt.${jprt.tools.default.release}} jprt.my.macosx.x64.jdk8=macosx_x64_10.7 jprt.my.macosx.x64.jdk7=macosx_x64_10.7 -jprt.my.macosx.x64.jdk7u6=${jprt.my.macosx.x64.jdk7} +jprt.my.macosx.x64.jdk7u8=${jprt.my.macosx.x64.jdk7} jprt.my.macosx.x64=${jprt.my.macosx.x64.${jprt.tools.default.release}} jprt.my.windows.i586.jdk8=windows_i586_5.1 jprt.my.windows.i586.jdk7=windows_i586_5.1 -jprt.my.windows.i586.jdk7u6=${jprt.my.windows.i586.jdk7} +jprt.my.windows.i586.jdk7u8=${jprt.my.windows.i586.jdk7} jprt.my.windows.i586=${jprt.my.windows.i586.${jprt.tools.default.release}} jprt.my.windows.x64.jdk8=windows_x64_5.2 jprt.my.windows.x64.jdk7=windows_x64_5.2 -jprt.my.windows.x64.jdk7u6=${jprt.my.windows.x64.jdk7} +jprt.my.windows.x64.jdk7u8=${jprt.my.windows.x64.jdk7} jprt.my.windows.x64=${jprt.my.windows.x64.${jprt.tools.default.release}} # Standard list of jprt build targets for this source tree @@ -159,7 +159,7 @@ jprt.build.targets.jdk8=${jprt.build.targets.all} jprt.build.targets.jdk7=${jprt.build.targets.all} -jprt.build.targets.jdk7u6=${jprt.build.targets.all} +jprt.build.targets.jdk7u8=${jprt.build.targets.all} jprt.build.targets=${jprt.build.targets.${jprt.tools.default.release}} # Subset lists of test targets for this source tree @@ -452,7 +452,7 @@ jprt.test.targets.jdk8=${jprt.test.targets.standard} jprt.test.targets.jdk7=${jprt.test.targets.standard} -jprt.test.targets.jdk7u6=${jprt.test.targets.jdk7} +jprt.test.targets.jdk7u8=${jprt.test.targets.jdk7} jprt.test.targets=${jprt.test.targets.${jprt.tools.default.release}} # The default test/Makefile targets that should be run @@ -512,7 +512,7 @@ jprt.make.rule.test.targets.jdk8=${jprt.make.rule.test.targets.standard} jprt.make.rule.test.targets.jdk7=${jprt.make.rule.test.targets.standard} -jprt.make.rule.test.targets.jdk7u6=${jprt.make.rule.test.targets.jdk7} +jprt.make.rule.test.targets.jdk7u8=${jprt.make.rule.test.targets.jdk7} jprt.make.rule.test.targets=${jprt.make.rule.test.targets.${jprt.tools.default.release}} # 7155453: Work-around to prevent popups on OSX from blocking test completion diff -r be82ef218872 -r b3602ff9c1b8 src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp --- a/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp Wed Aug 22 10:01:51 2012 +0200 +++ b/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp Fri Aug 24 19:45:42 2012 -0700 @@ -435,85 +435,6 @@ } -void G1UnsafeGetObjSATBBarrierStub::emit_code(LIR_Assembler* ce) { - // At this point we know that offset == referent_offset. - // - // So we might have to emit: - // if (src == null) goto continuation. - // - // and we definitely have to emit: - // if (klass(src).reference_type == REF_NONE) goto continuation - // if (!marking_active) goto continuation - // if (pre_val == null) goto continuation - // call pre_barrier(pre_val) - // goto continuation - // - __ bind(_entry); - - assert(src()->is_register(), "sanity"); - Register src_reg = src()->as_register(); - - if (gen_src_check()) { - // The original src operand was not a constant. - // Generate src == null? - if (__ is_in_wdisp16_range(_continuation)) { - __ br_null(src_reg, /*annul*/false, Assembler::pt, _continuation); - } else { - __ cmp(src_reg, G0); - __ brx(Assembler::equal, false, Assembler::pt, _continuation); - } - __ delayed()->nop(); - } - - // Generate src->_klass->_reference_type() == REF_NONE)? - assert(tmp()->is_register(), "sanity"); - Register tmp_reg = tmp()->as_register(); - - __ load_klass(src_reg, tmp_reg); - - Address ref_type_adr(tmp_reg, instanceKlass::reference_type_offset()); - __ ldub(ref_type_adr, tmp_reg); - - // _reference_type field is of type ReferenceType (enum) - assert(REF_NONE == 0, "check this code"); - __ cmp_zero_and_br(Assembler::equal, tmp_reg, _continuation, /*annul*/false, Assembler::pt); - __ delayed()->nop(); - - // Is marking active? - assert(thread()->is_register(), "precondition"); - Register thread_reg = thread()->as_pointer_register(); - - Address in_progress(thread_reg, in_bytes(JavaThread::satb_mark_queue_offset() + - PtrQueue::byte_offset_of_active())); - - if (in_bytes(PtrQueue::byte_width_of_active()) == 4) { - __ ld(in_progress, tmp_reg); - } else { - assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption"); - __ ldsb(in_progress, tmp_reg); - } - - __ cmp_zero_and_br(Assembler::equal, tmp_reg, _continuation, /*annul*/false, Assembler::pt); - __ delayed()->nop(); - - // val == null? - assert(val()->is_register(), "Precondition."); - Register val_reg = val()->as_register(); - - if (__ is_in_wdisp16_range(_continuation)) { - __ br_null(val_reg, /*annul*/false, Assembler::pt, _continuation); - } else { - __ cmp(val_reg, G0); - __ brx(Assembler::equal, false, Assembler::pt, _continuation); - } - __ delayed()->nop(); - - __ call(Runtime1::entry_for(Runtime1::Runtime1::g1_pre_barrier_slow_id)); - __ delayed()->mov(val_reg, G4); - __ br(Assembler::always, false, Assembler::pt, _continuation); - __ delayed()->nop(); -} - jbyte* G1PostBarrierStub::_byte_map_base = NULL; jbyte* G1PostBarrierStub::byte_map_base_slow() { diff -r be82ef218872 -r b3602ff9c1b8 src/cpu/sparc/vm/vm_version_sparc.cpp --- a/src/cpu/sparc/vm/vm_version_sparc.cpp Wed Aug 22 10:01:51 2012 +0200 +++ b/src/cpu/sparc/vm/vm_version_sparc.cpp Fri Aug 24 19:45:42 2012 -0700 @@ -106,10 +106,10 @@ if (FLAG_IS_DEFAULT(OptoLoopAlignment)) { FLAG_SET_DEFAULT(OptoLoopAlignment, 4); } - // When using CMS, we cannot use memset() in BOT updates because - // the sun4v/CMT version in libc_psr uses BIS which exposes - // "phantom zeros" to concurrent readers. See 6948537. - if (FLAG_IS_DEFAULT(UseMemSetInBOT) && UseConcMarkSweepGC) { + // When using CMS or G1, we cannot use memset() in BOT updates + // because the sun4v/CMT version in libc_psr uses BIS which + // exposes "phantom zeros" to concurrent readers. See 6948537. + if (FLAG_IS_DEFAULT(UseMemSetInBOT) && (UseConcMarkSweepGC || UseG1GC)) { FLAG_SET_DEFAULT(UseMemSetInBOT, false); } #ifdef _LP64 diff -r be82ef218872 -r b3602ff9c1b8 src/cpu/x86/vm/assembler_x86.cpp --- a/src/cpu/x86/vm/assembler_x86.cpp Wed Aug 22 10:01:51 2012 +0200 +++ b/src/cpu/x86/vm/assembler_x86.cpp Fri Aug 24 19:45:42 2012 -0700 @@ -999,32 +999,22 @@ void Assembler::addsd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); - emit_byte(0x58); - emit_byte(0xC0 | encode); + emit_simd_arith(0x58, dst, src, VEX_SIMD_F2); } void Assembler::addsd(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - InstructionMark im(this); - simd_prefix(dst, dst, src, VEX_SIMD_F2); - emit_byte(0x58); - emit_operand(dst, src); + emit_simd_arith(0x58, dst, src, VEX_SIMD_F2); } void Assembler::addss(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); - emit_byte(0x58); - emit_byte(0xC0 | encode); + emit_simd_arith(0x58, dst, src, VEX_SIMD_F3); } void Assembler::addss(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - InstructionMark im(this); - simd_prefix(dst, dst, src, VEX_SIMD_F3); - emit_byte(0x58); - emit_operand(dst, src); + emit_simd_arith(0x58, dst, src, VEX_SIMD_F3); } void Assembler::andl(Address dst, int32_t imm32) { @@ -1052,36 +1042,6 @@ emit_arith(0x23, 0xC0, dst, src); } -void Assembler::andpd(XMMRegister dst, Address src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); - InstructionMark im(this); - simd_prefix(dst, dst, src, VEX_SIMD_66); - emit_byte(0x54); - emit_operand(dst, src); -} - -void Assembler::andpd(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); - int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66); - emit_byte(0x54); - emit_byte(0xC0 | encode); -} - -void Assembler::andps(XMMRegister dst, Address src) { - NOT_LP64(assert(VM_Version::supports_sse(), "")); - InstructionMark im(this); - simd_prefix(dst, dst, src, VEX_SIMD_NONE); - emit_byte(0x54); - emit_operand(dst, src); -} - -void Assembler::andps(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse(), "")); - int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE); - emit_byte(0x54); - emit_byte(0xC0 | encode); -} - void Assembler::bsfl(Register dst, Register src) { int encode = prefix_and_encode(dst->encoding(), src->encoding()); emit_byte(0x0F); @@ -1246,61 +1206,42 @@ // NOTE: dbx seems to decode this as comiss even though the // 0x66 is there. Strangly ucomisd comes out correct NOT_LP64(assert(VM_Version::supports_sse2(), "")); - InstructionMark im(this); - simd_prefix(dst, src, VEX_SIMD_66); - emit_byte(0x2F); - emit_operand(dst, src); + emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_66); } void Assembler::comisd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66); - emit_byte(0x2F); - emit_byte(0xC0 | encode); + emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_66); } void Assembler::comiss(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - InstructionMark im(this); - simd_prefix(dst, src, VEX_SIMD_NONE); - emit_byte(0x2F); - emit_operand(dst, src); + emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE); } void Assembler::comiss(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE); - emit_byte(0x2F); - emit_byte(0xC0 | encode); + emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE); } void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3); - emit_byte(0xE6); - emit_byte(0xC0 | encode); + emit_simd_arith_nonds(0xE6, dst, src, VEX_SIMD_F3); } void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE); - emit_byte(0x5B); - emit_byte(0xC0 | encode); + emit_simd_arith_nonds(0x5B, dst, src, VEX_SIMD_NONE); } void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); - emit_byte(0x5A); - emit_byte(0xC0 | encode); + emit_simd_arith(0x5A, dst, src, VEX_SIMD_F2); } void Assembler::cvtsd2ss(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - InstructionMark im(this); - simd_prefix(dst, dst, src, VEX_SIMD_F2); - emit_byte(0x5A); - emit_operand(dst, src); + emit_simd_arith(0x5A, dst, src, VEX_SIMD_F2); } void Assembler::cvtsi2sdl(XMMRegister dst, Register src) { @@ -1312,10 +1253,7 @@ void Assembler::cvtsi2sdl(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - InstructionMark im(this); - simd_prefix(dst, dst, src, VEX_SIMD_F2); - emit_byte(0x2A); - emit_operand(dst, src); + emit_simd_arith(0x2A, dst, src, VEX_SIMD_F2); } void Assembler::cvtsi2ssl(XMMRegister dst, Register src) { @@ -1327,25 +1265,17 @@ void Assembler::cvtsi2ssl(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - InstructionMark im(this); - simd_prefix(dst, dst, src, VEX_SIMD_F3); - emit_byte(0x2A); - emit_operand(dst, src); + emit_simd_arith(0x2A, dst, src, VEX_SIMD_F3); } void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); - emit_byte(0x5A); - emit_byte(0xC0 | encode); + emit_simd_arith(0x5A, dst, src, VEX_SIMD_F3); } void Assembler::cvtss2sd(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - InstructionMark im(this); - simd_prefix(dst, dst, src, VEX_SIMD_F3); - emit_byte(0x5A); - emit_operand(dst, src); + emit_simd_arith(0x5A, dst, src, VEX_SIMD_F3); } @@ -1373,32 +1303,22 @@ void Assembler::divsd(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - InstructionMark im(this); - simd_prefix(dst, dst, src, VEX_SIMD_F2); - emit_byte(0x5E); - emit_operand(dst, src); + emit_simd_arith(0x5E, dst, src, VEX_SIMD_F2); } void Assembler::divsd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); - emit_byte(0x5E); - emit_byte(0xC0 | encode); + emit_simd_arith(0x5E, dst, src, VEX_SIMD_F2); } void Assembler::divss(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - InstructionMark im(this); - simd_prefix(dst, dst, src, VEX_SIMD_F3); - emit_byte(0x5E); - emit_operand(dst, src); + emit_simd_arith(0x5E, dst, src, VEX_SIMD_F3); } void Assembler::divss(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); - emit_byte(0x5E); - emit_byte(0xC0 | encode); + emit_simd_arith(0x5E, dst, src, VEX_SIMD_F3); } void Assembler::emms() { @@ -1634,16 +1554,12 @@ void Assembler::movapd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66); - emit_byte(0x28); - emit_byte(0xC0 | encode); + emit_simd_arith_nonds(0x28, dst, src, VEX_SIMD_66); } void Assembler::movaps(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE); - emit_byte(0x28); - emit_byte(0xC0 | encode); + emit_simd_arith_nonds(0x28, dst, src, VEX_SIMD_NONE); } void Assembler::movlhps(XMMRegister dst, XMMRegister src) { @@ -1712,24 +1628,17 @@ void Assembler::movdqa(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66); - emit_byte(0x6F); - emit_byte(0xC0 | encode); + emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_66); } void Assembler::movdqu(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - InstructionMark im(this); - simd_prefix(dst, src, VEX_SIMD_F3); - emit_byte(0x6F); - emit_operand(dst, src); + emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_F3); } void Assembler::movdqu(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3); - emit_byte(0x6F); - emit_byte(0xC0 | encode); + emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_F3); } void Assembler::movdqu(Address dst, XMMRegister src) { @@ -1810,10 +1719,7 @@ // The selection is done in MacroAssembler::movdbl() and movflt(). void Assembler::movlpd(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - InstructionMark im(this); - simd_prefix(dst, dst, src, VEX_SIMD_66); - emit_byte(0x12); - emit_operand(dst, src); + emit_simd_arith(0x12, dst, src, VEX_SIMD_66); } void Assembler::movq( MMXRegister dst, Address src ) { @@ -1870,17 +1776,12 @@ void Assembler::movsd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); - emit_byte(0x10); - emit_byte(0xC0 | encode); + emit_simd_arith(0x10, dst, src, VEX_SIMD_F2); } void Assembler::movsd(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - InstructionMark im(this); - simd_prefix(dst, src, VEX_SIMD_F2); - emit_byte(0x10); - emit_operand(dst, src); + emit_simd_arith_nonds(0x10, dst, src, VEX_SIMD_F2); } void Assembler::movsd(Address dst, XMMRegister src) { @@ -1893,17 +1794,12 @@ void Assembler::movss(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); - emit_byte(0x10); - emit_byte(0xC0 | encode); + emit_simd_arith(0x10, dst, src, VEX_SIMD_F3); } void Assembler::movss(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - InstructionMark im(this); - simd_prefix(dst, src, VEX_SIMD_F3); - emit_byte(0x10); - emit_operand(dst, src); + emit_simd_arith_nonds(0x10, dst, src, VEX_SIMD_F3); } void Assembler::movss(Address dst, XMMRegister src) { @@ -2001,32 +1897,22 @@ void Assembler::mulsd(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - InstructionMark im(this); - simd_prefix(dst, dst, src, VEX_SIMD_F2); - emit_byte(0x59); - emit_operand(dst, src); + emit_simd_arith(0x59, dst, src, VEX_SIMD_F2); } void Assembler::mulsd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); - emit_byte(0x59); - emit_byte(0xC0 | encode); + emit_simd_arith(0x59, dst, src, VEX_SIMD_F2); } void Assembler::mulss(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - InstructionMark im(this); - simd_prefix(dst, dst, src, VEX_SIMD_F3); - emit_byte(0x59); - emit_operand(dst, src); + emit_simd_arith(0x59, dst, src, VEX_SIMD_F3); } void Assembler::mulss(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); - emit_byte(0x59); - emit_byte(0xC0 | encode); + emit_simd_arith(0x59, dst, src, VEX_SIMD_F3); } void Assembler::negl(Register dst) { @@ -2315,17 +2201,12 @@ void Assembler::packuswb(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); - InstructionMark im(this); - simd_prefix(dst, dst, src, VEX_SIMD_66); - emit_byte(0x67); - emit_operand(dst, src); + emit_simd_arith(0x67, dst, src, VEX_SIMD_66); } void Assembler::packuswb(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66); - emit_byte(0x67); - emit_byte(0xC0 | encode); + emit_simd_arith(0x67, dst, src, VEX_SIMD_66); } void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) { @@ -2339,7 +2220,7 @@ void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) { assert(VM_Version::supports_sse4_2(), ""); - int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A); emit_byte(0x61); emit_byte(0xC0 | encode); emit_byte(imm8); @@ -2355,7 +2236,7 @@ void Assembler::pmovzxbw(XMMRegister dst, XMMRegister src) { assert(VM_Version::supports_sse4_1(), ""); - int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38); emit_byte(0x30); emit_byte(0xC0 | encode); } @@ -2456,28 +2337,10 @@ a_byte(p); } -void Assembler::por(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); - int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66); - emit_byte(0xEB); - emit_byte(0xC0 | encode); -} - -void Assembler::por(XMMRegister dst, Address src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); - assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); - InstructionMark im(this); - simd_prefix(dst, dst, src, VEX_SIMD_66); - emit_byte(0xEB); - emit_operand(dst, src); -} - void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) { assert(isByte(mode), "invalid value"); NOT_LP64(assert(VM_Version::supports_sse2(), "")); - int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66); - emit_byte(0x70); - emit_byte(0xC0 | encode); + emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_66); emit_byte(mode & 0xFF); } @@ -2496,9 +2359,7 @@ void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) { assert(isByte(mode), "invalid value"); NOT_LP64(assert(VM_Version::supports_sse2(), "")); - int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F2); - emit_byte(0x70); - emit_byte(0xC0 | encode); + emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_F2); emit_byte(mode & 0xFF); } @@ -2513,18 +2374,6 @@ emit_byte(mode & 0xFF); } -void Assembler::psrlq(XMMRegister dst, int shift) { - // Shift 64 bit value logically right by specified number of bits. - // HMM Table D-1 says sse2 or mmx. - // Do not confuse it with psrldq SSE2 instruction which - // shifts 128 bit value in xmm register by number of bytes. - NOT_LP64(assert(VM_Version::supports_sse2(), "")); - int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66); - emit_byte(0x73); - emit_byte(0xC0 | encode); - emit_byte(shift); -} - void Assembler::psrldq(XMMRegister dst, int shift) { // Shift 128 bit value in xmm register by number of bytes. NOT_LP64(assert(VM_Version::supports_sse2(), "")); @@ -2545,7 +2394,7 @@ void Assembler::ptest(XMMRegister dst, XMMRegister src) { assert(VM_Version::supports_sse4_1(), ""); - int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38); emit_byte(0x17); emit_byte(0xC0 | encode); } @@ -2553,40 +2402,28 @@ void Assembler::punpcklbw(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); - InstructionMark im(this); - simd_prefix(dst, dst, src, VEX_SIMD_66); - emit_byte(0x60); - emit_operand(dst, src); + emit_simd_arith(0x60, dst, src, VEX_SIMD_66); } void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66); - emit_byte(0x60); - emit_byte(0xC0 | encode); + emit_simd_arith(0x60, dst, src, VEX_SIMD_66); } void Assembler::punpckldq(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); - InstructionMark im(this); - simd_prefix(dst, dst, src, VEX_SIMD_66); - emit_byte(0x62); - emit_operand(dst, src); + emit_simd_arith(0x62, dst, src, VEX_SIMD_66); } void Assembler::punpckldq(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66); - emit_byte(0x62); - emit_byte(0xC0 | encode); + emit_simd_arith(0x62, dst, src, VEX_SIMD_66); } void Assembler::punpcklqdq(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66); - emit_byte(0x6C); - emit_byte(0xC0 | encode); + emit_simd_arith(0x6C, dst, src, VEX_SIMD_66); } void Assembler::push(int32_t imm32) { @@ -2616,22 +2453,6 @@ } #endif -void Assembler::pxor(XMMRegister dst, Address src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); - assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); - InstructionMark im(this); - simd_prefix(dst, dst, src, VEX_SIMD_66); - emit_byte(0xEF); - emit_operand(dst, src); -} - -void Assembler::pxor(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); - int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66); - emit_byte(0xEF); - emit_byte(0xC0 | encode); -} - void Assembler::rcll(Register dst, int imm8) { assert(isShiftCount(imm8), "illegal shift count"); int encode = prefix_and_encode(dst->encoding()); @@ -2790,32 +2611,22 @@ void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); - emit_byte(0x51); - emit_byte(0xC0 | encode); + emit_simd_arith(0x51, dst, src, VEX_SIMD_F2); } void Assembler::sqrtsd(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - InstructionMark im(this); - simd_prefix(dst, dst, src, VEX_SIMD_F2); - emit_byte(0x51); - emit_operand(dst, src); + emit_simd_arith(0x51, dst, src, VEX_SIMD_F2); } void Assembler::sqrtss(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); - emit_byte(0x51); - emit_byte(0xC0 | encode); + emit_simd_arith(0x51, dst, src, VEX_SIMD_F3); } void Assembler::sqrtss(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - InstructionMark im(this); - simd_prefix(dst, dst, src, VEX_SIMD_F3); - emit_byte(0x51); - emit_operand(dst, src); + emit_simd_arith(0x51, dst, src, VEX_SIMD_F3); } void Assembler::stmxcsr( Address dst) { @@ -2865,32 +2676,22 @@ void Assembler::subsd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); - emit_byte(0x5C); - emit_byte(0xC0 | encode); + emit_simd_arith(0x5C, dst, src, VEX_SIMD_F2); } void Assembler::subsd(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - InstructionMark im(this); - simd_prefix(dst, dst, src, VEX_SIMD_F2); - emit_byte(0x5C); - emit_operand(dst, src); + emit_simd_arith(0x5C, dst, src, VEX_SIMD_F2); } void Assembler::subss(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); - emit_byte(0x5C); - emit_byte(0xC0 | encode); + emit_simd_arith(0x5C, dst, src, VEX_SIMD_F3); } void Assembler::subss(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - InstructionMark im(this); - simd_prefix(dst, dst, src, VEX_SIMD_F3); - emit_byte(0x5C); - emit_operand(dst, src); + emit_simd_arith(0x5C, dst, src, VEX_SIMD_F3); } void Assembler::testb(Register dst, int imm8) { @@ -2928,32 +2729,22 @@ void Assembler::ucomisd(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - InstructionMark im(this); - simd_prefix(dst, src, VEX_SIMD_66); - emit_byte(0x2E); - emit_operand(dst, src); + emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66); } void Assembler::ucomisd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66); - emit_byte(0x2E); - emit_byte(0xC0 | encode); + emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66); } void Assembler::ucomiss(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - InstructionMark im(this); - simd_prefix(dst, src, VEX_SIMD_NONE); - emit_byte(0x2E); - emit_operand(dst, src); + emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE); } void Assembler::ucomiss(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE); - emit_byte(0x2E); - emit_byte(0xC0 | encode); + emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE); } @@ -2995,211 +2786,714 @@ emit_arith(0x33, 0xC0, dst, src); } -void Assembler::xorpd(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); - int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66); - emit_byte(0x57); - emit_byte(0xC0 | encode); -} - -void Assembler::xorpd(XMMRegister dst, Address src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); - InstructionMark im(this); - simd_prefix(dst, dst, src, VEX_SIMD_66); - emit_byte(0x57); - emit_operand(dst, src); -} - - -void Assembler::xorps(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse(), "")); - int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE); - emit_byte(0x57); - emit_byte(0xC0 | encode); -} - -void Assembler::xorps(XMMRegister dst, Address src) { - NOT_LP64(assert(VM_Version::supports_sse(), "")); - InstructionMark im(this); - simd_prefix(dst, dst, src, VEX_SIMD_NONE); - emit_byte(0x57); - emit_operand(dst, src); -} - -// AVX 3-operands non destructive source instructions (encoded with VEX prefix) + +// AVX 3-operands scalar float-point arithmetic instructions void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) { assert(VM_Version::supports_avx(), ""); - InstructionMark im(this); - vex_prefix(dst, nds, src, VEX_SIMD_F2); - emit_byte(0x58); - emit_operand(dst, src); + emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false); } void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { assert(VM_Version::supports_avx(), ""); - int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2); - emit_byte(0x58); - emit_byte(0xC0 | encode); + emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false); } void Assembler::vaddss(XMMRegister dst, XMMRegister nds, Address src) { assert(VM_Version::supports_avx(), ""); - InstructionMark im(this); - vex_prefix(dst, nds, src, VEX_SIMD_F3); - emit_byte(0x58); - emit_operand(dst, src); + emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false); } void Assembler::vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) { assert(VM_Version::supports_avx(), ""); - int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3); - emit_byte(0x58); - emit_byte(0xC0 | encode); -} - -void Assembler::vandpd(XMMRegister dst, XMMRegister nds, Address src) { - assert(VM_Version::supports_avx(), ""); - InstructionMark im(this); - vex_prefix(dst, nds, src, VEX_SIMD_66); // 128-bit vector - emit_byte(0x54); - emit_operand(dst, src); -} - -void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src) { - assert(VM_Version::supports_avx(), ""); - InstructionMark im(this); - vex_prefix(dst, nds, src, VEX_SIMD_NONE); // 128-bit vector - emit_byte(0x54); - emit_operand(dst, src); + emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false); } void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, Address src) { assert(VM_Version::supports_avx(), ""); - InstructionMark im(this); - vex_prefix(dst, nds, src, VEX_SIMD_F2); - emit_byte(0x5E); - emit_operand(dst, src); + emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false); } void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { assert(VM_Version::supports_avx(), ""); - int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2); - emit_byte(0x5E); - emit_byte(0xC0 | encode); + emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false); } void Assembler::vdivss(XMMRegister dst, XMMRegister nds, Address src) { assert(VM_Version::supports_avx(), ""); - InstructionMark im(this); - vex_prefix(dst, nds, src, VEX_SIMD_F3); - emit_byte(0x5E); - emit_operand(dst, src); + emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false); } void Assembler::vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) { assert(VM_Version::supports_avx(), ""); - int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3); - emit_byte(0x5E); - emit_byte(0xC0 | encode); + emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false); } void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, Address src) { assert(VM_Version::supports_avx(), ""); - InstructionMark im(this); - vex_prefix(dst, nds, src, VEX_SIMD_F2); - emit_byte(0x59); - emit_operand(dst, src); + emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false); } void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { assert(VM_Version::supports_avx(), ""); - int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2); - emit_byte(0x59); - emit_byte(0xC0 | encode); + emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false); } void Assembler::vmulss(XMMRegister dst, XMMRegister nds, Address src) { - InstructionMark im(this); - vex_prefix(dst, nds, src, VEX_SIMD_F3); - emit_byte(0x59); - emit_operand(dst, src); + assert(VM_Version::supports_avx(), ""); + emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false); } void Assembler::vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) { assert(VM_Version::supports_avx(), ""); - int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3); - emit_byte(0x59); - emit_byte(0xC0 | encode); -} - + emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false); +} void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, Address src) { assert(VM_Version::supports_avx(), ""); - InstructionMark im(this); - vex_prefix(dst, nds, src, VEX_SIMD_F2); - emit_byte(0x5C); - emit_operand(dst, src); + emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false); } void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { assert(VM_Version::supports_avx(), ""); - int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2); - emit_byte(0x5C); - emit_byte(0xC0 | encode); + emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false); } void Assembler::vsubss(XMMRegister dst, XMMRegister nds, Address src) { assert(VM_Version::supports_avx(), ""); - InstructionMark im(this); - vex_prefix(dst, nds, src, VEX_SIMD_F3); - emit_byte(0x5C); - emit_operand(dst, src); + emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false); } void Assembler::vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) { assert(VM_Version::supports_avx(), ""); - int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3); - emit_byte(0x5C); - emit_byte(0xC0 | encode); -} - -void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, Address src) { + emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false); +} + +//====================VECTOR ARITHMETIC===================================== + +// Float-point vector arithmetic + +void Assembler::addpd(XMMRegister dst, XMMRegister src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + emit_simd_arith(0x58, dst, src, VEX_SIMD_66); +} + +void Assembler::addps(XMMRegister dst, XMMRegister src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + emit_simd_arith(0x58, dst, src, VEX_SIMD_NONE); +} + +void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { + assert(VM_Version::supports_avx(), ""); + emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_66, vector256); +} + +void Assembler::vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { + assert(VM_Version::supports_avx(), ""); + emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_NONE, vector256); +} + +void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { + assert(VM_Version::supports_avx(), ""); + emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_66, vector256); +} + +void Assembler::vaddps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { + assert(VM_Version::supports_avx(), ""); + emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_NONE, vector256); +} + +void Assembler::subpd(XMMRegister dst, XMMRegister src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + emit_simd_arith(0x5C, dst, src, VEX_SIMD_66); +} + +void Assembler::subps(XMMRegister dst, XMMRegister src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + emit_simd_arith(0x5C, dst, src, VEX_SIMD_NONE); +} + +void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { + assert(VM_Version::supports_avx(), ""); + emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_66, vector256); +} + +void Assembler::vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { + assert(VM_Version::supports_avx(), ""); + emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_NONE, vector256); +} + +void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { + assert(VM_Version::supports_avx(), ""); + emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_66, vector256); +} + +void Assembler::vsubps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { + assert(VM_Version::supports_avx(), ""); + emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_NONE, vector256); +} + +void Assembler::mulpd(XMMRegister dst, XMMRegister src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + emit_simd_arith(0x59, dst, src, VEX_SIMD_66); +} + +void Assembler::mulps(XMMRegister dst, XMMRegister src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + emit_simd_arith(0x59, dst, src, VEX_SIMD_NONE); +} + +void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { + assert(VM_Version::supports_avx(), ""); + emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_66, vector256); +} + +void Assembler::vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { + assert(VM_Version::supports_avx(), ""); + emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_NONE, vector256); +} + +void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { assert(VM_Version::supports_avx(), ""); - InstructionMark im(this); - vex_prefix(dst, nds, src, VEX_SIMD_66); // 128-bit vector - emit_byte(0x57); - emit_operand(dst, src); + emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_66, vector256); +} + +void Assembler::vmulps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { + assert(VM_Version::supports_avx(), ""); + emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_NONE, vector256); +} + +void Assembler::divpd(XMMRegister dst, XMMRegister src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + emit_simd_arith(0x5E, dst, src, VEX_SIMD_66); +} + +void Assembler::divps(XMMRegister dst, XMMRegister src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + emit_simd_arith(0x5E, dst, src, VEX_SIMD_NONE); +} + +void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { + assert(VM_Version::supports_avx(), ""); + emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_66, vector256); +} + +void Assembler::vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { + assert(VM_Version::supports_avx(), ""); + emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_NONE, vector256); +} + +void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { + assert(VM_Version::supports_avx(), ""); + emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_66, vector256); +} + +void Assembler::vdivps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { + assert(VM_Version::supports_avx(), ""); + emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_NONE, vector256); +} + +void Assembler::andpd(XMMRegister dst, XMMRegister src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + emit_simd_arith(0x54, dst, src, VEX_SIMD_66); +} + +void Assembler::andps(XMMRegister dst, XMMRegister src) { + NOT_LP64(assert(VM_Version::supports_sse(), "")); + emit_simd_arith(0x54, dst, src, VEX_SIMD_NONE); +} + +void Assembler::andps(XMMRegister dst, Address src) { + NOT_LP64(assert(VM_Version::supports_sse(), "")); + emit_simd_arith(0x54, dst, src, VEX_SIMD_NONE); +} + +void Assembler::andpd(XMMRegister dst, Address src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + emit_simd_arith(0x54, dst, src, VEX_SIMD_66); +} + +void Assembler::vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { + assert(VM_Version::supports_avx(), ""); + emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_66, vector256); +} + +void Assembler::vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { + assert(VM_Version::supports_avx(), ""); + emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector256); +} + +void Assembler::vandpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { + assert(VM_Version::supports_avx(), ""); + emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_66, vector256); +} + +void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { + assert(VM_Version::supports_avx(), ""); + emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector256); +} + +void Assembler::xorpd(XMMRegister dst, XMMRegister src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + emit_simd_arith(0x57, dst, src, VEX_SIMD_66); +} + +void Assembler::xorps(XMMRegister dst, XMMRegister src) { + NOT_LP64(assert(VM_Version::supports_sse(), "")); + emit_simd_arith(0x57, dst, src, VEX_SIMD_NONE); +} + +void Assembler::xorpd(XMMRegister dst, Address src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + emit_simd_arith(0x57, dst, src, VEX_SIMD_66); +} + +void Assembler::xorps(XMMRegister dst, Address src) { + NOT_LP64(assert(VM_Version::supports_sse(), "")); + emit_simd_arith(0x57, dst, src, VEX_SIMD_NONE); } void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { assert(VM_Version::supports_avx(), ""); - int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256); - emit_byte(0x57); - emit_byte(0xC0 | encode); -} - -void Assembler::vxorps(XMMRegister dst, XMMRegister nds, Address src) { - assert(VM_Version::supports_avx(), ""); - InstructionMark im(this); - vex_prefix(dst, nds, src, VEX_SIMD_NONE); // 128-bit vector - emit_byte(0x57); - emit_operand(dst, src); + emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_66, vector256); } void Assembler::vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { assert(VM_Version::supports_avx(), ""); - int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_NONE, vector256); - emit_byte(0x57); + emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_NONE, vector256); +} + +void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { + assert(VM_Version::supports_avx(), ""); + emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_66, vector256); +} + +void Assembler::vxorps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { + assert(VM_Version::supports_avx(), ""); + emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_NONE, vector256); +} + + +// Integer vector arithmetic +void Assembler::paddb(XMMRegister dst, XMMRegister src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + emit_simd_arith(0xFC, dst, src, VEX_SIMD_66); +} + +void Assembler::paddw(XMMRegister dst, XMMRegister src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + emit_simd_arith(0xFD, dst, src, VEX_SIMD_66); +} + +void Assembler::paddd(XMMRegister dst, XMMRegister src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + emit_simd_arith(0xFE, dst, src, VEX_SIMD_66); +} + +void Assembler::paddq(XMMRegister dst, XMMRegister src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + emit_simd_arith(0xD4, dst, src, VEX_SIMD_66); +} + +void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { + assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); + emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector256); +} + +void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { + assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); + emit_vex_arith(0xFD, dst, nds, src, VEX_SIMD_66, vector256); +} + +void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { + assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); + emit_vex_arith(0xFE, dst, nds, src, VEX_SIMD_66, vector256); +} + +void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { + assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); + emit_vex_arith(0xD4, dst, nds, src, VEX_SIMD_66, vector256); +} + +void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { + assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); + emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector256); +} + +void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { + assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); + emit_vex_arith(0xFD, dst, nds, src, VEX_SIMD_66, vector256); +} + +void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { + assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); + emit_vex_arith(0xFE, dst, nds, src, VEX_SIMD_66, vector256); +} + +void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { + assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); + emit_vex_arith(0xD4, dst, nds, src, VEX_SIMD_66, vector256); +} + +void Assembler::psubb(XMMRegister dst, XMMRegister src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + emit_simd_arith(0xF8, dst, src, VEX_SIMD_66); +} + +void Assembler::psubw(XMMRegister dst, XMMRegister src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + emit_simd_arith(0xF9, dst, src, VEX_SIMD_66); +} + +void Assembler::psubd(XMMRegister dst, XMMRegister src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + emit_simd_arith(0xFA, dst, src, VEX_SIMD_66); +} + +void Assembler::psubq(XMMRegister dst, XMMRegister src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + emit_simd_arith(0xFB, dst, src, VEX_SIMD_66); +} + +void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { + assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); + emit_vex_arith(0xF8, dst, nds, src, VEX_SIMD_66, vector256); +} + +void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { + assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); + emit_vex_arith(0xF9, dst, nds, src, VEX_SIMD_66, vector256); +} + +void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { + assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); + emit_vex_arith(0xFA, dst, nds, src, VEX_SIMD_66, vector256); +} + +void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { + assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); + emit_vex_arith(0xFB, dst, nds, src, VEX_SIMD_66, vector256); +} + +void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { + assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); + emit_vex_arith(0xF8, dst, nds, src, VEX_SIMD_66, vector256); +} + +void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { + assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); + emit_vex_arith(0xF9, dst, nds, src, VEX_SIMD_66, vector256); +} + +void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { + assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); + emit_vex_arith(0xFA, dst, nds, src, VEX_SIMD_66, vector256); +} + +void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { + assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); + emit_vex_arith(0xFB, dst, nds, src, VEX_SIMD_66, vector256); +} + +void Assembler::pmullw(XMMRegister dst, XMMRegister src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + emit_simd_arith(0xD5, dst, src, VEX_SIMD_66); +} + +void Assembler::pmulld(XMMRegister dst, XMMRegister src) { + assert(VM_Version::supports_sse4_1(), ""); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); + emit_byte(0x40); + emit_byte(0xC0 | encode); +} + +void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { + assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); + emit_vex_arith(0xD5, dst, nds, src, VEX_SIMD_66, vector256); +} + +void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { + assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); + int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_38); + emit_byte(0x40); + emit_byte(0xC0 | encode); +} + +void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { + assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); + emit_vex_arith(0xD5, dst, nds, src, VEX_SIMD_66, vector256); +} + +void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { + assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); + InstructionMark im(this); + int dst_enc = dst->encoding(); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector256); + emit_byte(0x40); + emit_operand(dst, src); +} + +// Shift packed integers left by specified number of bits. +void Assembler::psllw(XMMRegister dst, int shift) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + // XMM6 is for /6 encoding: 66 0F 71 /6 ib + int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66); + emit_byte(0x71); + emit_byte(0xC0 | encode); + emit_byte(shift & 0xFF); +} + +void Assembler::pslld(XMMRegister dst, int shift) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + // XMM6 is for /6 encoding: 66 0F 72 /6 ib + int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66); + emit_byte(0x72); + emit_byte(0xC0 | encode); + emit_byte(shift & 0xFF); +} + +void Assembler::psllq(XMMRegister dst, int shift) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + // XMM6 is for /6 encoding: 66 0F 73 /6 ib + int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66); + emit_byte(0x73); emit_byte(0xC0 | encode); + emit_byte(shift & 0xFF); +} + +void Assembler::psllw(XMMRegister dst, XMMRegister shift) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + emit_simd_arith(0xF1, dst, shift, VEX_SIMD_66); +} + +void Assembler::pslld(XMMRegister dst, XMMRegister shift) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + emit_simd_arith(0xF2, dst, shift, VEX_SIMD_66); +} + +void Assembler::psllq(XMMRegister dst, XMMRegister shift) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + emit_simd_arith(0xF3, dst, shift, VEX_SIMD_66); +} + +void Assembler::vpsllw(XMMRegister dst, XMMRegister src, int shift, bool vector256) { + assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); + // XMM6 is for /6 encoding: 66 0F 71 /6 ib + emit_vex_arith(0x71, xmm6, dst, src, VEX_SIMD_66, vector256); + emit_byte(shift & 0xFF); +} + +void Assembler::vpslld(XMMRegister dst, XMMRegister src, int shift, bool vector256) { + assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); + // XMM6 is for /6 encoding: 66 0F 72 /6 ib + emit_vex_arith(0x72, xmm6, dst, src, VEX_SIMD_66, vector256); + emit_byte(shift & 0xFF); +} + +void Assembler::vpsllq(XMMRegister dst, XMMRegister src, int shift, bool vector256) { + assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); + // XMM6 is for /6 encoding: 66 0F 73 /6 ib + emit_vex_arith(0x73, xmm6, dst, src, VEX_SIMD_66, vector256); + emit_byte(shift & 0xFF); +} + +void Assembler::vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) { + assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); + emit_vex_arith(0xF1, dst, src, shift, VEX_SIMD_66, vector256); +} + +void Assembler::vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) { + assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); + emit_vex_arith(0xF2, dst, src, shift, VEX_SIMD_66, vector256); +} + +void Assembler::vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) { + assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); + emit_vex_arith(0xF3, dst, src, shift, VEX_SIMD_66, vector256); +} + +// Shift packed integers logically right by specified number of bits. +void Assembler::psrlw(XMMRegister dst, int shift) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + // XMM2 is for /2 encoding: 66 0F 71 /2 ib + int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66); + emit_byte(0x71); + emit_byte(0xC0 | encode); + emit_byte(shift & 0xFF); +} + +void Assembler::psrld(XMMRegister dst, int shift) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + // XMM2 is for /2 encoding: 66 0F 72 /2 ib + int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66); + emit_byte(0x72); + emit_byte(0xC0 | encode); + emit_byte(shift & 0xFF); +} + +void Assembler::psrlq(XMMRegister dst, int shift) { + // Do not confuse it with psrldq SSE2 instruction which + // shifts 128 bit value in xmm register by number of bytes. + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + // XMM2 is for /2 encoding: 66 0F 73 /2 ib + int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66); + emit_byte(0x73); + emit_byte(0xC0 | encode); + emit_byte(shift & 0xFF); +} + +void Assembler::psrlw(XMMRegister dst, XMMRegister shift) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + emit_simd_arith(0xD1, dst, shift, VEX_SIMD_66); +} + +void Assembler::psrld(XMMRegister dst, XMMRegister shift) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + emit_simd_arith(0xD2, dst, shift, VEX_SIMD_66); +} + +void Assembler::psrlq(XMMRegister dst, XMMRegister shift) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + emit_simd_arith(0xD3, dst, shift, VEX_SIMD_66); +} + +void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, int shift, bool vector256) { + assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); + // XMM2 is for /2 encoding: 66 0F 73 /2 ib + emit_vex_arith(0x71, xmm2, dst, src, VEX_SIMD_66, vector256); + emit_byte(shift & 0xFF); +} + +void Assembler::vpsrld(XMMRegister dst, XMMRegister src, int shift, bool vector256) { + assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); + // XMM2 is for /2 encoding: 66 0F 73 /2 ib + emit_vex_arith(0x72, xmm2, dst, src, VEX_SIMD_66, vector256); + emit_byte(shift & 0xFF); +} + +void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, int shift, bool vector256) { + assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); + // XMM2 is for /2 encoding: 66 0F 73 /2 ib + emit_vex_arith(0x73, xmm2, dst, src, VEX_SIMD_66, vector256); + emit_byte(shift & 0xFF); +} + +void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) { + assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); + emit_vex_arith(0xD1, dst, src, shift, VEX_SIMD_66, vector256); +} + +void Assembler::vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) { + assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); + emit_vex_arith(0xD2, dst, src, shift, VEX_SIMD_66, vector256); +} + +void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) { + assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); + emit_vex_arith(0xD3, dst, src, shift, VEX_SIMD_66, vector256); +} + +// Shift packed integers arithmetically right by specified number of bits. +void Assembler::psraw(XMMRegister dst, int shift) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + // XMM4 is for /4 encoding: 66 0F 71 /4 ib + int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66); + emit_byte(0x71); + emit_byte(0xC0 | encode); + emit_byte(shift & 0xFF); +} + +void Assembler::psrad(XMMRegister dst, int shift) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + // XMM4 is for /4 encoding: 66 0F 72 /4 ib + int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66); + emit_byte(0x72); + emit_byte(0xC0 | encode); + emit_byte(shift & 0xFF); +} + +void Assembler::psraw(XMMRegister dst, XMMRegister shift) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + emit_simd_arith(0xE1, dst, shift, VEX_SIMD_66); +} + +void Assembler::psrad(XMMRegister dst, XMMRegister shift) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + emit_simd_arith(0xE2, dst, shift, VEX_SIMD_66); +} + +void Assembler::vpsraw(XMMRegister dst, XMMRegister src, int shift, bool vector256) { + assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); + // XMM4 is for /4 encoding: 66 0F 71 /4 ib + emit_vex_arith(0x71, xmm4, dst, src, VEX_SIMD_66, vector256); + emit_byte(shift & 0xFF); +} + +void Assembler::vpsrad(XMMRegister dst, XMMRegister src, int shift, bool vector256) { + assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); + // XMM4 is for /4 encoding: 66 0F 71 /4 ib + emit_vex_arith(0x72, xmm4, dst, src, VEX_SIMD_66, vector256); + emit_byte(shift & 0xFF); +} + +void Assembler::vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) { + assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); + emit_vex_arith(0xE1, dst, src, shift, VEX_SIMD_66, vector256); +} + +void Assembler::vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) { + assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); + emit_vex_arith(0xE2, dst, src, shift, VEX_SIMD_66, vector256); +} + + +// AND packed integers +void Assembler::pand(XMMRegister dst, XMMRegister src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + emit_simd_arith(0xDB, dst, src, VEX_SIMD_66); +} + +void Assembler::vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { + assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); + emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector256); +} + +void Assembler::vpand(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { + assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); + emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector256); +} + +void Assembler::por(XMMRegister dst, XMMRegister src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + emit_simd_arith(0xEB, dst, src, VEX_SIMD_66); +} + +void Assembler::vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { + assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); + emit_vex_arith(0xEB, dst, nds, src, VEX_SIMD_66, vector256); +} + +void Assembler::vpor(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { + assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); + emit_vex_arith(0xEB, dst, nds, src, VEX_SIMD_66, vector256); +} + +void Assembler::pxor(XMMRegister dst, XMMRegister src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + emit_simd_arith(0xEF, dst, src, VEX_SIMD_66); } void Assembler::vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { - assert(VM_Version::supports_avx2() || (!vector256) && VM_Version::supports_avx(), ""); - int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256); - emit_byte(0xEF); - emit_byte(0xC0 | encode); -} + assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); + emit_vex_arith(0xEF, dst, nds, src, VEX_SIMD_66, vector256); +} + +void Assembler::vpxor(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { + assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); + emit_vex_arith(0xEF, dst, nds, src, VEX_SIMD_66, vector256); +} + void Assembler::vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src) { assert(VM_Version::supports_avx(), ""); @@ -3805,6 +4099,49 @@ } } +void Assembler::emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre) { + InstructionMark im(this); + simd_prefix(dst, dst, src, pre); + emit_byte(opcode); + emit_operand(dst, src); +} + +void Assembler::emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre) { + int encode = simd_prefix_and_encode(dst, dst, src, pre); + emit_byte(opcode); + emit_byte(0xC0 | encode); +} + +// Versions with no second source register (non-destructive source). +void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre) { + InstructionMark im(this); + simd_prefix(dst, xnoreg, src, pre); + emit_byte(opcode); + emit_operand(dst, src); +} + +void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre) { + int encode = simd_prefix_and_encode(dst, xnoreg, src, pre); + emit_byte(opcode); + emit_byte(0xC0 | encode); +} + +// 3-operands AVX instructions +void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds, + Address src, VexSimdPrefix pre, bool vector256) { + InstructionMark im(this); + vex_prefix(dst, nds, src, pre, vector256); + emit_byte(opcode); + emit_operand(dst, src); +} + +void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds, + XMMRegister src, VexSimdPrefix pre, bool vector256) { + int encode = vex_prefix_and_encode(dst, nds, src, pre, vector256); + emit_byte(opcode); + emit_byte(0xC0 | encode); +} + #ifndef _LP64 void Assembler::incl(Register dst) { @@ -7968,21 +8305,21 @@ } } -void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { +void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) { if (reachable(src)) { - vandpd(dst, nds, as_Address(src)); + vandpd(dst, nds, as_Address(src), vector256); } else { lea(rscratch1, src); - vandpd(dst, nds, Address(rscratch1, 0)); - } -} - -void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src) { + vandpd(dst, nds, Address(rscratch1, 0), vector256); + } +} + +void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) { if (reachable(src)) { - vandps(dst, nds, as_Address(src)); + vandps(dst, nds, as_Address(src), vector256); } else { lea(rscratch1, src); - vandps(dst, nds, Address(rscratch1, 0)); + vandps(dst, nds, Address(rscratch1, 0), vector256); } } @@ -8040,21 +8377,21 @@ } } -void MacroAssembler::vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { +void MacroAssembler::vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) { if (reachable(src)) { - vxorpd(dst, nds, as_Address(src)); + vxorpd(dst, nds, as_Address(src), vector256); } else { lea(rscratch1, src); - vxorpd(dst, nds, Address(rscratch1, 0)); - } -} - -void MacroAssembler::vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src) { + vxorpd(dst, nds, Address(rscratch1, 0), vector256); + } +} + +void MacroAssembler::vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) { if (reachable(src)) { - vxorps(dst, nds, as_Address(src)); + vxorps(dst, nds, as_Address(src), vector256); } else { lea(rscratch1, src); - vxorps(dst, nds, Address(rscratch1, 0)); + vxorps(dst, nds, Address(rscratch1, 0), vector256); } } diff -r be82ef218872 -r b3602ff9c1b8 src/cpu/x86/vm/assembler_x86.hpp --- a/src/cpu/x86/vm/assembler_x86.hpp Wed Aug 22 10:01:51 2012 +0200 +++ b/src/cpu/x86/vm/assembler_x86.hpp Fri Aug 24 19:45:42 2012 -0700 @@ -617,6 +617,7 @@ VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) { simd_prefix(dst, xnoreg, src, pre, opc); } + void simd_prefix(Address dst, XMMRegister src, VexSimdPrefix pre) { simd_prefix(src, dst, pre); } @@ -626,16 +627,10 @@ simd_prefix(dst, nds, src, pre, VEX_OPCODE_0F, rex_w); } - int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F, bool rex_w = false, bool vector256 = false); - int simd_prefix_and_encode(XMMRegister dst, XMMRegister src, - VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) { - return simd_prefix_and_encode(dst, xnoreg, src, pre, opc); - } - // Move/convert 32-bit integer value. int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, Register src, VexSimdPrefix pre) { @@ -677,6 +672,15 @@ void emit_arith(int op1, int op2, Register dst, jobject obj); void emit_arith(int op1, int op2, Register dst, Register src); + void emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre); + void emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre); + void emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre); + void emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre); + void emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds, + Address src, VexSimdPrefix pre, bool vector256); + void emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds, + XMMRegister src, VexSimdPrefix pre, bool vector256); + void emit_operand(Register reg, Register base, Register index, Address::ScaleFactor scale, int disp, @@ -891,12 +895,6 @@ void andq(Register dst, Address src); void andq(Register dst, Register src); - // Bitwise Logical AND of Packed Double-Precision Floating-Point Values - void andpd(XMMRegister dst, XMMRegister src); - - // Bitwise Logical AND of Packed Single-Precision Floating-Point Values - void andps(XMMRegister dst, XMMRegister src); - void bsfl(Register dst, Register src); void bsrl(Register dst, Register src); @@ -1436,10 +1434,6 @@ void prefetcht2(Address src); void prefetchw(Address src); - // POR - Bitwise logical OR - void por(XMMRegister dst, XMMRegister src); - void por(XMMRegister dst, Address src); - // Shuffle Packed Doublewords void pshufd(XMMRegister dst, XMMRegister src, int mode); void pshufd(XMMRegister dst, Address src, int mode); @@ -1448,9 +1442,6 @@ void pshuflw(XMMRegister dst, XMMRegister src, int mode); void pshuflw(XMMRegister dst, Address src, int mode); - // Shift Right by bits Logical Quadword Immediate - void psrlq(XMMRegister dst, int shift); - // Shift Right by bytes Logical DoubleQuadword Immediate void psrldq(XMMRegister dst, int shift); @@ -1475,10 +1466,6 @@ void pushq(Address src); - // Xor Packed Byte Integer Values - void pxor(XMMRegister dst, Address src); - void pxor(XMMRegister dst, XMMRegister src); - void rcll(Register dst, int imm8); void rclq(Register dst, int imm8); @@ -1601,15 +1588,10 @@ void xorq(Register dst, Address src); void xorq(Register dst, Register src); - // Bitwise Logical XOR of Packed Double-Precision Floating-Point Values - void xorpd(XMMRegister dst, XMMRegister src); - - // Bitwise Logical XOR of Packed Single-Precision Floating-Point Values - void xorps(XMMRegister dst, XMMRegister src); - void set_byte_if_not_zero(Register dst); // sets reg to 1 if not zero, otherwise 0 // AVX 3-operands scalar instructions (encoded with VEX prefix) + void vaddsd(XMMRegister dst, XMMRegister nds, Address src); void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src); void vaddss(XMMRegister dst, XMMRegister nds, Address src); @@ -1627,14 +1609,147 @@ void vsubss(XMMRegister dst, XMMRegister nds, Address src); void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src); - // AVX Vector instrucitons. - void vandpd(XMMRegister dst, XMMRegister nds, Address src); - void vandps(XMMRegister dst, XMMRegister nds, Address src); - void vxorpd(XMMRegister dst, XMMRegister nds, Address src); - void vxorps(XMMRegister dst, XMMRegister nds, Address src); + + //====================VECTOR ARITHMETIC===================================== + + // Add Packed Floating-Point Values + void addpd(XMMRegister dst, XMMRegister src); + void addps(XMMRegister dst, XMMRegister src); + void vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); + void vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); + void vaddpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256); + void vaddps(XMMRegister dst, XMMRegister nds, Address src, bool vector256); + + // Subtract Packed Floating-Point Values + void subpd(XMMRegister dst, XMMRegister src); + void subps(XMMRegister dst, XMMRegister src); + void vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); + void vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); + void vsubpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256); + void vsubps(XMMRegister dst, XMMRegister nds, Address src, bool vector256); + + // Multiply Packed Floating-Point Values + void mulpd(XMMRegister dst, XMMRegister src); + void mulps(XMMRegister dst, XMMRegister src); + void vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); + void vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); + void vmulpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256); + void vmulps(XMMRegister dst, XMMRegister nds, Address src, bool vector256); + + // Divide Packed Floating-Point Values + void divpd(XMMRegister dst, XMMRegister src); + void divps(XMMRegister dst, XMMRegister src); + void vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); + void vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); + void vdivpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256); + void vdivps(XMMRegister dst, XMMRegister nds, Address src, bool vector256); + + // Bitwise Logical AND of Packed Floating-Point Values + void andpd(XMMRegister dst, XMMRegister src); + void andps(XMMRegister dst, XMMRegister src); + void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); + void vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); + void vandpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256); + void vandps(XMMRegister dst, XMMRegister nds, Address src, bool vector256); + + // Bitwise Logical XOR of Packed Floating-Point Values + void xorpd(XMMRegister dst, XMMRegister src); + void xorps(XMMRegister dst, XMMRegister src); void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); + void vxorpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256); + void vxorps(XMMRegister dst, XMMRegister nds, Address src, bool vector256); + + // Add packed integers + void paddb(XMMRegister dst, XMMRegister src); + void paddw(XMMRegister dst, XMMRegister src); + void paddd(XMMRegister dst, XMMRegister src); + void paddq(XMMRegister dst, XMMRegister src); + void vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); + void vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); + void vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); + void vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); + void vpaddb(XMMRegister dst, XMMRegister nds, Address src, bool vector256); + void vpaddw(XMMRegister dst, XMMRegister nds, Address src, bool vector256); + void vpaddd(XMMRegister dst, XMMRegister nds, Address src, bool vector256); + void vpaddq(XMMRegister dst, XMMRegister nds, Address src, bool vector256); + + // Sub packed integers + void psubb(XMMRegister dst, XMMRegister src); + void psubw(XMMRegister dst, XMMRegister src); + void psubd(XMMRegister dst, XMMRegister src); + void psubq(XMMRegister dst, XMMRegister src); + void vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); + void vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); + void vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); + void vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); + void vpsubb(XMMRegister dst, XMMRegister nds, Address src, bool vector256); + void vpsubw(XMMRegister dst, XMMRegister nds, Address src, bool vector256); + void vpsubd(XMMRegister dst, XMMRegister nds, Address src, bool vector256); + void vpsubq(XMMRegister dst, XMMRegister nds, Address src, bool vector256); + + // Multiply packed integers (only shorts and ints) + void pmullw(XMMRegister dst, XMMRegister src); + void pmulld(XMMRegister dst, XMMRegister src); + void vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); + void vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); + void vpmullw(XMMRegister dst, XMMRegister nds, Address src, bool vector256); + void vpmulld(XMMRegister dst, XMMRegister nds, Address src, bool vector256); + + // Shift left packed integers + void psllw(XMMRegister dst, int shift); + void pslld(XMMRegister dst, int shift); + void psllq(XMMRegister dst, int shift); + void psllw(XMMRegister dst, XMMRegister shift); + void pslld(XMMRegister dst, XMMRegister shift); + void psllq(XMMRegister dst, XMMRegister shift); + void vpsllw(XMMRegister dst, XMMRegister src, int shift, bool vector256); + void vpslld(XMMRegister dst, XMMRegister src, int shift, bool vector256); + void vpsllq(XMMRegister dst, XMMRegister src, int shift, bool vector256); + void vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256); + void vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256); + void vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256); + + // Logical shift right packed integers + void psrlw(XMMRegister dst, int shift); + void psrld(XMMRegister dst, int shift); + void psrlq(XMMRegister dst, int shift); + void psrlw(XMMRegister dst, XMMRegister shift); + void psrld(XMMRegister dst, XMMRegister shift); + void psrlq(XMMRegister dst, XMMRegister shift); + void vpsrlw(XMMRegister dst, XMMRegister src, int shift, bool vector256); + void vpsrld(XMMRegister dst, XMMRegister src, int shift, bool vector256); + void vpsrlq(XMMRegister dst, XMMRegister src, int shift, bool vector256); + void vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256); + void vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256); + void vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256); + + // Arithmetic shift right packed integers (only shorts and ints, no instructions for longs) + void psraw(XMMRegister dst, int shift); + void psrad(XMMRegister dst, int shift); + void psraw(XMMRegister dst, XMMRegister shift); + void psrad(XMMRegister dst, XMMRegister shift); + void vpsraw(XMMRegister dst, XMMRegister src, int shift, bool vector256); + void vpsrad(XMMRegister dst, XMMRegister src, int shift, bool vector256); + void vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256); + void vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256); + + // And packed integers + void pand(XMMRegister dst, XMMRegister src); + void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); + void vpand(XMMRegister dst, XMMRegister nds, Address src, bool vector256); + + // Or packed integers + void por(XMMRegister dst, XMMRegister src); + void vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); + void vpor(XMMRegister dst, XMMRegister nds, Address src, bool vector256); + + // Xor packed integers + void pxor(XMMRegister dst, XMMRegister src); void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); + void vpxor(XMMRegister dst, XMMRegister nds, Address src, bool vector256); + + // Copy low 128bit into high 128bit of YMM registers. void vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src); void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src); @@ -2532,11 +2647,13 @@ void vaddss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vaddss(dst, nds, src); } void vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src); - void vandpd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vandpd(dst, nds, src); } - void vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src); - - void vandps(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vandps(dst, nds, src); } - void vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src); + void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { Assembler::vandpd(dst, nds, src, vector256); } + void vandpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { Assembler::vandpd(dst, nds, src, vector256); } + void vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256); + + void vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { Assembler::vandps(dst, nds, src, vector256); } + void vandps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { Assembler::vandps(dst, nds, src, vector256); } + void vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256); void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vdivsd(dst, nds, src); } void vdivsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vdivsd(dst, nds, src); } @@ -2565,12 +2682,12 @@ // AVX Vector instructions void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { Assembler::vxorpd(dst, nds, src, vector256); } - void vxorpd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vxorpd(dst, nds, src); } - void vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src); + void vxorpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { Assembler::vxorpd(dst, nds, src, vector256); } + void vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256); void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { Assembler::vxorps(dst, nds, src, vector256); } - void vxorps(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vxorps(dst, nds, src); } - void vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src); + void vxorps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { Assembler::vxorps(dst, nds, src, vector256); } + void vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256); void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { if (UseAVX > 1 || !vector256) // vpxor 256 bit is available only in AVX2 @@ -2578,6 +2695,12 @@ else Assembler::vxorpd(dst, nds, src, vector256); } + void vpxor(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { + if (UseAVX > 1 || !vector256) // vpxor 256 bit is available only in AVX2 + Assembler::vpxor(dst, nds, src, vector256); + else + Assembler::vxorpd(dst, nds, src, vector256); + } // Move packed integer values from low 128 bit to hign 128 bit in 256 bit vector. void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) { diff -r be82ef218872 -r b3602ff9c1b8 src/cpu/x86/vm/c1_CodeStubs_x86.cpp --- a/src/cpu/x86/vm/c1_CodeStubs_x86.cpp Wed Aug 22 10:01:51 2012 +0200 +++ b/src/cpu/x86/vm/c1_CodeStubs_x86.cpp Fri Aug 24 19:45:42 2012 -0700 @@ -488,68 +488,6 @@ } -void G1UnsafeGetObjSATBBarrierStub::emit_code(LIR_Assembler* ce) { - // At this point we know that offset == referent_offset. - // - // So we might have to emit: - // if (src == null) goto continuation. - // - // and we definitely have to emit: - // if (klass(src).reference_type == REF_NONE) goto continuation - // if (!marking_active) goto continuation - // if (pre_val == null) goto continuation - // call pre_barrier(pre_val) - // goto continuation - // - __ bind(_entry); - - assert(src()->is_register(), "sanity"); - Register src_reg = src()->as_register(); - - if (gen_src_check()) { - // The original src operand was not a constant. - // Generate src == null? - __ cmpptr(src_reg, (int32_t) NULL_WORD); - __ jcc(Assembler::equal, _continuation); - } - - // Generate src->_klass->_reference_type == REF_NONE)? - assert(tmp()->is_register(), "sanity"); - Register tmp_reg = tmp()->as_register(); - - __ load_klass(tmp_reg, src_reg); - - Address ref_type_adr(tmp_reg, instanceKlass::reference_type_offset()); - __ cmpb(ref_type_adr, REF_NONE); - __ jcc(Assembler::equal, _continuation); - - // Is marking active? - assert(thread()->is_register(), "precondition"); - Register thread_reg = thread()->as_pointer_register(); - - Address in_progress(thread_reg, in_bytes(JavaThread::satb_mark_queue_offset() + - PtrQueue::byte_offset_of_active())); - - if (in_bytes(PtrQueue::byte_width_of_active()) == 4) { - __ cmpl(in_progress, 0); - } else { - assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption"); - __ cmpb(in_progress, 0); - } - __ jcc(Assembler::equal, _continuation); - - // val == null? - assert(val()->is_register(), "Precondition."); - Register val_reg = val()->as_register(); - - __ cmpptr(val_reg, (int32_t) NULL_WORD); - __ jcc(Assembler::equal, _continuation); - - ce->store_parameter(val()->as_register(), 0); - __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::g1_pre_barrier_slow_id))); - __ jmp(_continuation); -} - jbyte* G1PostBarrierStub::_byte_map_base = NULL; jbyte* G1PostBarrierStub::byte_map_base_slow() { diff -r be82ef218872 -r b3602ff9c1b8 src/cpu/x86/vm/x86.ad --- a/src/cpu/x86/vm/x86.ad Wed Aug 22 10:01:51 2012 +0200 +++ b/src/cpu/x86/vm/x86.ad Fri Aug 24 19:45:42 2012 -0700 @@ -500,6 +500,24 @@ 0 /*bottom*/ }; +const bool Matcher::match_rule_supported(int opcode) { + if (!has_match_rule(opcode)) + return false; + + switch (opcode) { + case Op_PopCountI: + case Op_PopCountL: + if (!UsePopCountInstruction) + return false; + case Op_MulVI: + if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX + return false; + break; + } + + return true; // Per default match rules are supported. +} + // Max vector size in bytes. 0 if not supported. const int Matcher::vector_width_in_bytes(BasicType bt) { assert(is_java_primitive(bt), "only primitive type vectors"); @@ -1439,8 +1457,9 @@ ins_cost(150); format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} ins_encode %{ + bool vector256 = false; __ vandps($dst$$XMMRegister, $src$$XMMRegister, - ExternalAddress(float_signmask())); + ExternalAddress(float_signmask()), vector256); %} ins_pipe(pipe_slow); %} @@ -1464,8 +1483,9 @@ format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" "# abs double by sign masking" %} ins_encode %{ + bool vector256 = false; __ vandpd($dst$$XMMRegister, $src$$XMMRegister, - ExternalAddress(double_signmask())); + ExternalAddress(double_signmask()), vector256); %} ins_pipe(pipe_slow); %} @@ -1487,8 +1507,9 @@ ins_cost(150); format %{ "vxorps $dst, $src, [0x80000000]\t# neg float by sign flipping" %} ins_encode %{ + bool vector256 = false; __ vxorps($dst$$XMMRegister, $src$$XMMRegister, - ExternalAddress(float_signflip())); + ExternalAddress(float_signflip()), vector256); %} ins_pipe(pipe_slow); %} @@ -1512,8 +1533,9 @@ format %{ "vxorpd $dst, $src, [0x8000000000000000]\t" "# neg double by sign flipping" %} ins_encode %{ + bool vector256 = false; __ vxorpd($dst$$XMMRegister, $src$$XMMRegister, - ExternalAddress(double_signflip())); + ExternalAddress(double_signflip()), vector256); %} ins_pipe(pipe_slow); %} @@ -2382,3 +2404,2416 @@ ins_pipe( fpu_reg_reg ); %} +// ====================VECTOR ARITHMETIC======================================= + +// --------------------------------- ADD -------------------------------------- + +// Bytes vector add +instruct vadd4B(vecS dst, vecS src) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (AddVB dst src)); + format %{ "paddb $dst,$src\t! add packed4B" %} + ins_encode %{ + __ paddb($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); + match(Set dst (AddVB src1 src2)); + format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} + ins_encode %{ + bool vector256 = false; + __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd8B(vecD dst, vecD src) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (AddVB dst src)); + format %{ "paddb $dst,$src\t! add packed8B" %} + ins_encode %{ + __ paddb($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 8); + match(Set dst (AddVB src1 src2)); + format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} + ins_encode %{ + bool vector256 = false; + __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd16B(vecX dst, vecX src) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (AddVB dst src)); + format %{ "paddb $dst,$src\t! add packed16B" %} + ins_encode %{ + __ paddb($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 16); + match(Set dst (AddVB src1 src2)); + format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} + ins_encode %{ + bool vector256 = false; + __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd16B_mem(vecX dst, vecX src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 16); + match(Set dst (AddVB src (LoadVector mem))); + format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} + ins_encode %{ + bool vector256 = false; + __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd32B_reg(vecY dst, vecY src1, vecY src2) %{ + predicate(UseAVX > 1 && n->as_Vector()->length() == 32); + match(Set dst (AddVB src1 src2)); + format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} + ins_encode %{ + bool vector256 = true; + __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd32B_mem(vecY dst, vecY src, memory mem) %{ + predicate(UseAVX > 1 && n->as_Vector()->length() == 32); + match(Set dst (AddVB src (LoadVector mem))); + format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} + ins_encode %{ + bool vector256 = true; + __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); + %} + ins_pipe( pipe_slow ); +%} + +// Shorts/Chars vector add +instruct vadd2S(vecS dst, vecS src) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (AddVS dst src)); + format %{ "paddw $dst,$src\t! add packed2S" %} + ins_encode %{ + __ paddw($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd2S_reg(vecS dst, vecS src1, vecS src2) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); + match(Set dst (AddVS src1 src2)); + format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} + ins_encode %{ + bool vector256 = false; + __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd4S(vecD dst, vecD src) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (AddVS dst src)); + format %{ "paddw $dst,$src\t! add packed4S" %} + ins_encode %{ + __ paddw($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); + match(Set dst (AddVS src1 src2)); + format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} + ins_encode %{ + bool vector256 = false; + __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd8S(vecX dst, vecX src) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (AddVS dst src)); + format %{ "paddw $dst,$src\t! add packed8S" %} + ins_encode %{ + __ paddw($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 8); + match(Set dst (AddVS src1 src2)); + format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} + ins_encode %{ + bool vector256 = false; + __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd8S_mem(vecX dst, vecX src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 8); + match(Set dst (AddVS src (LoadVector mem))); + format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} + ins_encode %{ + bool vector256 = false; + __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd16S_reg(vecY dst, vecY src1, vecY src2) %{ + predicate(UseAVX > 1 && n->as_Vector()->length() == 16); + match(Set dst (AddVS src1 src2)); + format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} + ins_encode %{ + bool vector256 = true; + __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd16S_mem(vecY dst, vecY src, memory mem) %{ + predicate(UseAVX > 1 && n->as_Vector()->length() == 16); + match(Set dst (AddVS src (LoadVector mem))); + format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} + ins_encode %{ + bool vector256 = true; + __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); + %} + ins_pipe( pipe_slow ); +%} + +// Integers vector add +instruct vadd2I(vecD dst, vecD src) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (AddVI dst src)); + format %{ "paddd $dst,$src\t! add packed2I" %} + ins_encode %{ + __ paddd($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); + match(Set dst (AddVI src1 src2)); + format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %} + ins_encode %{ + bool vector256 = false; + __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd4I(vecX dst, vecX src) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (AddVI dst src)); + format %{ "paddd $dst,$src\t! add packed4I" %} + ins_encode %{ + __ paddd($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); + match(Set dst (AddVI src1 src2)); + format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %} + ins_encode %{ + bool vector256 = false; + __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); + match(Set dst (AddVI src (LoadVector mem))); + format %{ "vpaddd $dst,$src,$mem\t! add packed4I" %} + ins_encode %{ + bool vector256 = false; + __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{ + predicate(UseAVX > 1 && n->as_Vector()->length() == 8); + match(Set dst (AddVI src1 src2)); + format %{ "vpaddd $dst,$src1,$src2\t! add packed8I" %} + ins_encode %{ + bool vector256 = true; + __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{ + predicate(UseAVX > 1 && n->as_Vector()->length() == 8); + match(Set dst (AddVI src (LoadVector mem))); + format %{ "vpaddd $dst,$src,$mem\t! add packed8I" %} + ins_encode %{ + bool vector256 = true; + __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); + %} + ins_pipe( pipe_slow ); +%} + +// Longs vector add +instruct vadd2L(vecX dst, vecX src) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (AddVL dst src)); + format %{ "paddq $dst,$src\t! add packed2L" %} + ins_encode %{ + __ paddq($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); + match(Set dst (AddVL src1 src2)); + format %{ "vpaddq $dst,$src1,$src2\t! add packed2L" %} + ins_encode %{ + bool vector256 = false; + __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); + match(Set dst (AddVL src (LoadVector mem))); + format %{ "vpaddq $dst,$src,$mem\t! add packed2L" %} + ins_encode %{ + bool vector256 = false; + __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{ + predicate(UseAVX > 1 && n->as_Vector()->length() == 4); + match(Set dst (AddVL src1 src2)); + format %{ "vpaddq $dst,$src1,$src2\t! add packed4L" %} + ins_encode %{ + bool vector256 = true; + __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{ + predicate(UseAVX > 1 && n->as_Vector()->length() == 4); + match(Set dst (AddVL src (LoadVector mem))); + format %{ "vpaddq $dst,$src,$mem\t! add packed4L" %} + ins_encode %{ + bool vector256 = true; + __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); + %} + ins_pipe( pipe_slow ); +%} + +// Floats vector add +instruct vadd2F(vecD dst, vecD src) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (AddVF dst src)); + format %{ "addps $dst,$src\t! add packed2F" %} + ins_encode %{ + __ addps($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); + match(Set dst (AddVF src1 src2)); + format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %} + ins_encode %{ + bool vector256 = false; + __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd4F(vecX dst, vecX src) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (AddVF dst src)); + format %{ "addps $dst,$src\t! add packed4F" %} + ins_encode %{ + __ addps($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); + match(Set dst (AddVF src1 src2)); + format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %} + ins_encode %{ + bool vector256 = false; + __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); + match(Set dst (AddVF src (LoadVector mem))); + format %{ "vaddps $dst,$src,$mem\t! add packed4F" %} + ins_encode %{ + bool vector256 = false; + __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 8); + match(Set dst (AddVF src1 src2)); + format %{ "vaddps $dst,$src1,$src2\t! add packed8F" %} + ins_encode %{ + bool vector256 = true; + __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 8); + match(Set dst (AddVF src (LoadVector mem))); + format %{ "vaddps $dst,$src,$mem\t! add packed8F" %} + ins_encode %{ + bool vector256 = true; + __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); + %} + ins_pipe( pipe_slow ); +%} + +// Doubles vector add +instruct vadd2D(vecX dst, vecX src) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (AddVD dst src)); + format %{ "addpd $dst,$src\t! add packed2D" %} + ins_encode %{ + __ addpd($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); + match(Set dst (AddVD src1 src2)); + format %{ "vaddpd $dst,$src1,$src2\t! add packed2D" %} + ins_encode %{ + bool vector256 = false; + __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); + match(Set dst (AddVD src (LoadVector mem))); + format %{ "vaddpd $dst,$src,$mem\t! add packed2D" %} + ins_encode %{ + bool vector256 = false; + __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); + match(Set dst (AddVD src1 src2)); + format %{ "vaddpd $dst,$src1,$src2\t! add packed4D" %} + ins_encode %{ + bool vector256 = true; + __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); + match(Set dst (AddVD src (LoadVector mem))); + format %{ "vaddpd $dst,$src,$mem\t! add packed4D" %} + ins_encode %{ + bool vector256 = true; + __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); + %} + ins_pipe( pipe_slow ); +%} + +// --------------------------------- SUB -------------------------------------- + +// Bytes vector sub +instruct vsub4B(vecS dst, vecS src) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (SubVB dst src)); + format %{ "psubb $dst,$src\t! sub packed4B" %} + ins_encode %{ + __ psubb($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub4B_reg(vecS dst, vecS src1, vecS src2) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); + match(Set dst (SubVB src1 src2)); + format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} + ins_encode %{ + bool vector256 = false; + __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub8B(vecD dst, vecD src) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (SubVB dst src)); + format %{ "psubb $dst,$src\t! sub packed8B" %} + ins_encode %{ + __ psubb($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 8); + match(Set dst (SubVB src1 src2)); + format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} + ins_encode %{ + bool vector256 = false; + __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub16B(vecX dst, vecX src) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (SubVB dst src)); + format %{ "psubb $dst,$src\t! sub packed16B" %} + ins_encode %{ + __ psubb($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 16); + match(Set dst (SubVB src1 src2)); + format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} + ins_encode %{ + bool vector256 = false; + __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub16B_mem(vecX dst, vecX src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 16); + match(Set dst (SubVB src (LoadVector mem))); + format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} + ins_encode %{ + bool vector256 = false; + __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub32B_reg(vecY dst, vecY src1, vecY src2) %{ + predicate(UseAVX > 1 && n->as_Vector()->length() == 32); + match(Set dst (SubVB src1 src2)); + format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} + ins_encode %{ + bool vector256 = true; + __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub32B_mem(vecY dst, vecY src, memory mem) %{ + predicate(UseAVX > 1 && n->as_Vector()->length() == 32); + match(Set dst (SubVB src (LoadVector mem))); + format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} + ins_encode %{ + bool vector256 = true; + __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); + %} + ins_pipe( pipe_slow ); +%} + +// Shorts/Chars vector sub +instruct vsub2S(vecS dst, vecS src) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (SubVS dst src)); + format %{ "psubw $dst,$src\t! sub packed2S" %} + ins_encode %{ + __ psubw($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub2S_reg(vecS dst, vecS src1, vecS src2) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); + match(Set dst (SubVS src1 src2)); + format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} + ins_encode %{ + bool vector256 = false; + __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub4S(vecD dst, vecD src) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (SubVS dst src)); + format %{ "psubw $dst,$src\t! sub packed4S" %} + ins_encode %{ + __ psubw($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); + match(Set dst (SubVS src1 src2)); + format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} + ins_encode %{ + bool vector256 = false; + __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub8S(vecX dst, vecX src) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (SubVS dst src)); + format %{ "psubw $dst,$src\t! sub packed8S" %} + ins_encode %{ + __ psubw($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 8); + match(Set dst (SubVS src1 src2)); + format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} + ins_encode %{ + bool vector256 = false; + __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub8S_mem(vecX dst, vecX src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 8); + match(Set dst (SubVS src (LoadVector mem))); + format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} + ins_encode %{ + bool vector256 = false; + __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub16S_reg(vecY dst, vecY src1, vecY src2) %{ + predicate(UseAVX > 1 && n->as_Vector()->length() == 16); + match(Set dst (SubVS src1 src2)); + format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} + ins_encode %{ + bool vector256 = true; + __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub16S_mem(vecY dst, vecY src, memory mem) %{ + predicate(UseAVX > 1 && n->as_Vector()->length() == 16); + match(Set dst (SubVS src (LoadVector mem))); + format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} + ins_encode %{ + bool vector256 = true; + __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); + %} + ins_pipe( pipe_slow ); +%} + +// Integers vector sub +instruct vsub2I(vecD dst, vecD src) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (SubVI dst src)); + format %{ "psubd $dst,$src\t! sub packed2I" %} + ins_encode %{ + __ psubd($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); + match(Set dst (SubVI src1 src2)); + format %{ "vpsubd $dst,$src1,$src2\t! sub packed2I" %} + ins_encode %{ + bool vector256 = false; + __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub4I(vecX dst, vecX src) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (SubVI dst src)); + format %{ "psubd $dst,$src\t! sub packed4I" %} + ins_encode %{ + __ psubd($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); + match(Set dst (SubVI src1 src2)); + format %{ "vpsubd $dst,$src1,$src2\t! sub packed4I" %} + ins_encode %{ + bool vector256 = false; + __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); + match(Set dst (SubVI src (LoadVector mem))); + format %{ "vpsubd $dst,$src,$mem\t! sub packed4I" %} + ins_encode %{ + bool vector256 = false; + __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{ + predicate(UseAVX > 1 && n->as_Vector()->length() == 8); + match(Set dst (SubVI src1 src2)); + format %{ "vpsubd $dst,$src1,$src2\t! sub packed8I" %} + ins_encode %{ + bool vector256 = true; + __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{ + predicate(UseAVX > 1 && n->as_Vector()->length() == 8); + match(Set dst (SubVI src (LoadVector mem))); + format %{ "vpsubd $dst,$src,$mem\t! sub packed8I" %} + ins_encode %{ + bool vector256 = true; + __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); + %} + ins_pipe( pipe_slow ); +%} + +// Longs vector sub +instruct vsub2L(vecX dst, vecX src) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (SubVL dst src)); + format %{ "psubq $dst,$src\t! sub packed2L" %} + ins_encode %{ + __ psubq($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); + match(Set dst (SubVL src1 src2)); + format %{ "vpsubq $dst,$src1,$src2\t! sub packed2L" %} + ins_encode %{ + bool vector256 = false; + __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); + match(Set dst (SubVL src (LoadVector mem))); + format %{ "vpsubq $dst,$src,$mem\t! sub packed2L" %} + ins_encode %{ + bool vector256 = false; + __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{ + predicate(UseAVX > 1 && n->as_Vector()->length() == 4); + match(Set dst (SubVL src1 src2)); + format %{ "vpsubq $dst,$src1,$src2\t! sub packed4L" %} + ins_encode %{ + bool vector256 = true; + __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{ + predicate(UseAVX > 1 && n->as_Vector()->length() == 4); + match(Set dst (SubVL src (LoadVector mem))); + format %{ "vpsubq $dst,$src,$mem\t! sub packed4L" %} + ins_encode %{ + bool vector256 = true; + __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); + %} + ins_pipe( pipe_slow ); +%} + +// Floats vector sub +instruct vsub2F(vecD dst, vecD src) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (SubVF dst src)); + format %{ "subps $dst,$src\t! sub packed2F" %} + ins_encode %{ + __ subps($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); + match(Set dst (SubVF src1 src2)); + format %{ "vsubps $dst,$src1,$src2\t! sub packed2F" %} + ins_encode %{ + bool vector256 = false; + __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub4F(vecX dst, vecX src) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (SubVF dst src)); + format %{ "subps $dst,$src\t! sub packed4F" %} + ins_encode %{ + __ subps($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); + match(Set dst (SubVF src1 src2)); + format %{ "vsubps $dst,$src1,$src2\t! sub packed4F" %} + ins_encode %{ + bool vector256 = false; + __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); + match(Set dst (SubVF src (LoadVector mem))); + format %{ "vsubps $dst,$src,$mem\t! sub packed4F" %} + ins_encode %{ + bool vector256 = false; + __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 8); + match(Set dst (SubVF src1 src2)); + format %{ "vsubps $dst,$src1,$src2\t! sub packed8F" %} + ins_encode %{ + bool vector256 = true; + __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 8); + match(Set dst (SubVF src (LoadVector mem))); + format %{ "vsubps $dst,$src,$mem\t! sub packed8F" %} + ins_encode %{ + bool vector256 = true; + __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); + %} + ins_pipe( pipe_slow ); +%} + +// Doubles vector sub +instruct vsub2D(vecX dst, vecX src) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (SubVD dst src)); + format %{ "subpd $dst,$src\t! sub packed2D" %} + ins_encode %{ + __ subpd($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); + match(Set dst (SubVD src1 src2)); + format %{ "vsubpd $dst,$src1,$src2\t! sub packed2D" %} + ins_encode %{ + bool vector256 = false; + __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); + match(Set dst (SubVD src (LoadVector mem))); + format %{ "vsubpd $dst,$src,$mem\t! sub packed2D" %} + ins_encode %{ + bool vector256 = false; + __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); + match(Set dst (SubVD src1 src2)); + format %{ "vsubpd $dst,$src1,$src2\t! sub packed4D" %} + ins_encode %{ + bool vector256 = true; + __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); + match(Set dst (SubVD src (LoadVector mem))); + format %{ "vsubpd $dst,$src,$mem\t! sub packed4D" %} + ins_encode %{ + bool vector256 = true; + __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); + %} + ins_pipe( pipe_slow ); +%} + +// --------------------------------- MUL -------------------------------------- + +// Shorts/Chars vector mul +instruct vmul2S(vecS dst, vecS src) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (MulVS dst src)); + format %{ "pmullw $dst,$src\t! mul packed2S" %} + ins_encode %{ + __ pmullw($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); + match(Set dst (MulVS src1 src2)); + format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} + ins_encode %{ + bool vector256 = false; + __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vmul4S(vecD dst, vecD src) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (MulVS dst src)); + format %{ "pmullw $dst,$src\t! mul packed4S" %} + ins_encode %{ + __ pmullw($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); + match(Set dst (MulVS src1 src2)); + format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} + ins_encode %{ + bool vector256 = false; + __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vmul8S(vecX dst, vecX src) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (MulVS dst src)); + format %{ "pmullw $dst,$src\t! mul packed8S" %} + ins_encode %{ + __ pmullw($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 8); + match(Set dst (MulVS src1 src2)); + format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} + ins_encode %{ + bool vector256 = false; + __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vmul8S_mem(vecX dst, vecX src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 8); + match(Set dst (MulVS src (LoadVector mem))); + format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} + ins_encode %{ + bool vector256 = false; + __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vmul16S_reg(vecY dst, vecY src1, vecY src2) %{ + predicate(UseAVX > 1 && n->as_Vector()->length() == 16); + match(Set dst (MulVS src1 src2)); + format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} + ins_encode %{ + bool vector256 = true; + __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vmul16S_mem(vecY dst, vecY src, memory mem) %{ + predicate(UseAVX > 1 && n->as_Vector()->length() == 16); + match(Set dst (MulVS src (LoadVector mem))); + format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} + ins_encode %{ + bool vector256 = true; + __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); + %} + ins_pipe( pipe_slow ); +%} + +// Integers vector mul (sse4_1) +instruct vmul2I(vecD dst, vecD src) %{ + predicate(UseSSE > 3 && n->as_Vector()->length() == 2); + match(Set dst (MulVI dst src)); + format %{ "pmulld $dst,$src\t! mul packed2I" %} + ins_encode %{ + __ pmulld($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); + match(Set dst (MulVI src1 src2)); + format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %} + ins_encode %{ + bool vector256 = false; + __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vmul4I(vecX dst, vecX src) %{ + predicate(UseSSE > 3 && n->as_Vector()->length() == 4); + match(Set dst (MulVI dst src)); + format %{ "pmulld $dst,$src\t! mul packed4I" %} + ins_encode %{ + __ pmulld($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); + match(Set dst (MulVI src1 src2)); + format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %} + ins_encode %{ + bool vector256 = false; + __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); + match(Set dst (MulVI src (LoadVector mem))); + format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %} + ins_encode %{ + bool vector256 = false; + __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{ + predicate(UseAVX > 1 && n->as_Vector()->length() == 8); + match(Set dst (MulVI src1 src2)); + format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %} + ins_encode %{ + bool vector256 = true; + __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{ + predicate(UseAVX > 1 && n->as_Vector()->length() == 8); + match(Set dst (MulVI src (LoadVector mem))); + format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %} + ins_encode %{ + bool vector256 = true; + __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); + %} + ins_pipe( pipe_slow ); +%} + +// Floats vector mul +instruct vmul2F(vecD dst, vecD src) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (MulVF dst src)); + format %{ "mulps $dst,$src\t! mul packed2F" %} + ins_encode %{ + __ mulps($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); + match(Set dst (MulVF src1 src2)); + format %{ "vmulps $dst,$src1,$src2\t! mul packed2F" %} + ins_encode %{ + bool vector256 = false; + __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vmul4F(vecX dst, vecX src) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (MulVF dst src)); + format %{ "mulps $dst,$src\t! mul packed4F" %} + ins_encode %{ + __ mulps($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); + match(Set dst (MulVF src1 src2)); + format %{ "vmulps $dst,$src1,$src2\t! mul packed4F" %} + ins_encode %{ + bool vector256 = false; + __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); + match(Set dst (MulVF src (LoadVector mem))); + format %{ "vmulps $dst,$src,$mem\t! mul packed4F" %} + ins_encode %{ + bool vector256 = false; + __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 8); + match(Set dst (MulVF src1 src2)); + format %{ "vmulps $dst,$src1,$src2\t! mul packed8F" %} + ins_encode %{ + bool vector256 = true; + __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 8); + match(Set dst (MulVF src (LoadVector mem))); + format %{ "vmulps $dst,$src,$mem\t! mul packed8F" %} + ins_encode %{ + bool vector256 = true; + __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); + %} + ins_pipe( pipe_slow ); +%} + +// Doubles vector mul +instruct vmul2D(vecX dst, vecX src) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (MulVD dst src)); + format %{ "mulpd $dst,$src\t! mul packed2D" %} + ins_encode %{ + __ mulpd($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); + match(Set dst (MulVD src1 src2)); + format %{ "vmulpd $dst,$src1,$src2\t! mul packed2D" %} + ins_encode %{ + bool vector256 = false; + __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); + match(Set dst (MulVD src (LoadVector mem))); + format %{ "vmulpd $dst,$src,$mem\t! mul packed2D" %} + ins_encode %{ + bool vector256 = false; + __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); + match(Set dst (MulVD src1 src2)); + format %{ "vmulpd $dst,$src1,$src2\t! mul packed4D" %} + ins_encode %{ + bool vector256 = true; + __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); + match(Set dst (MulVD src (LoadVector mem))); + format %{ "vmulpd $dst,$src,$mem\t! mul packed4D" %} + ins_encode %{ + bool vector256 = true; + __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); + %} + ins_pipe( pipe_slow ); +%} + +// --------------------------------- DIV -------------------------------------- + +// Floats vector div +instruct vdiv2F(vecD dst, vecD src) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (DivVF dst src)); + format %{ "divps $dst,$src\t! div packed2F" %} + ins_encode %{ + __ divps($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); + match(Set dst (DivVF src1 src2)); + format %{ "vdivps $dst,$src1,$src2\t! div packed2F" %} + ins_encode %{ + bool vector256 = false; + __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vdiv4F(vecX dst, vecX src) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (DivVF dst src)); + format %{ "divps $dst,$src\t! div packed4F" %} + ins_encode %{ + __ divps($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); + match(Set dst (DivVF src1 src2)); + format %{ "vdivps $dst,$src1,$src2\t! div packed4F" %} + ins_encode %{ + bool vector256 = false; + __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); + match(Set dst (DivVF src (LoadVector mem))); + format %{ "vdivps $dst,$src,$mem\t! div packed4F" %} + ins_encode %{ + bool vector256 = false; + __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 8); + match(Set dst (DivVF src1 src2)); + format %{ "vdivps $dst,$src1,$src2\t! div packed8F" %} + ins_encode %{ + bool vector256 = true; + __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 8); + match(Set dst (DivVF src (LoadVector mem))); + format %{ "vdivps $dst,$src,$mem\t! div packed8F" %} + ins_encode %{ + bool vector256 = true; + __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); + %} + ins_pipe( pipe_slow ); +%} + +// Doubles vector div +instruct vdiv2D(vecX dst, vecX src) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (DivVD dst src)); + format %{ "divpd $dst,$src\t! div packed2D" %} + ins_encode %{ + __ divpd($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); + match(Set dst (DivVD src1 src2)); + format %{ "vdivpd $dst,$src1,$src2\t! div packed2D" %} + ins_encode %{ + bool vector256 = false; + __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); + match(Set dst (DivVD src (LoadVector mem))); + format %{ "vdivpd $dst,$src,$mem\t! div packed2D" %} + ins_encode %{ + bool vector256 = false; + __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); + match(Set dst (DivVD src1 src2)); + format %{ "vdivpd $dst,$src1,$src2\t! div packed4D" %} + ins_encode %{ + bool vector256 = true; + __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); + match(Set dst (DivVD src (LoadVector mem))); + format %{ "vdivpd $dst,$src,$mem\t! div packed4D" %} + ins_encode %{ + bool vector256 = true; + __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); + %} + ins_pipe( pipe_slow ); +%} + +// ------------------------------ LeftShift ----------------------------------- + +// Shorts/Chars vector left shift +instruct vsll2S(vecS dst, regF shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (LShiftVS dst shift)); + format %{ "psllw $dst,$shift\t! left shift packed2S" %} + ins_encode %{ + __ psllw($dst$$XMMRegister, $shift$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsll2S_imm(vecS dst, immI8 shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (LShiftVS dst shift)); + format %{ "psllw $dst,$shift\t! left shift packed2S" %} + ins_encode %{ + __ psllw($dst$$XMMRegister, (int)$shift$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsll2S_reg(vecS dst, vecS src, regF shift) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); + match(Set dst (LShiftVS src shift)); + format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} + ins_encode %{ + bool vector256 = false; + __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsll2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); + match(Set dst (LShiftVS src shift)); + format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} + ins_encode %{ + bool vector256 = false; + __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsll4S(vecD dst, regF shift) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (LShiftVS dst shift)); + format %{ "psllw $dst,$shift\t! left shift packed4S" %} + ins_encode %{ + __ psllw($dst$$XMMRegister, $shift$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsll4S_imm(vecD dst, immI8 shift) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (LShiftVS dst shift)); + format %{ "psllw $dst,$shift\t! left shift packed4S" %} + ins_encode %{ + __ psllw($dst$$XMMRegister, (int)$shift$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsll4S_reg(vecD dst, vecD src, regF shift) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); + match(Set dst (LShiftVS src shift)); + format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} + ins_encode %{ + bool vector256 = false; + __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsll4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); + match(Set dst (LShiftVS src shift)); + format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} + ins_encode %{ + bool vector256 = false; + __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsll8S(vecX dst, regF shift) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (LShiftVS dst shift)); + format %{ "psllw $dst,$shift\t! left shift packed8S" %} + ins_encode %{ + __ psllw($dst$$XMMRegister, $shift$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsll8S_imm(vecX dst, immI8 shift) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (LShiftVS dst shift)); + format %{ "psllw $dst,$shift\t! left shift packed8S" %} + ins_encode %{ + __ psllw($dst$$XMMRegister, (int)$shift$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsll8S_reg(vecX dst, vecX src, regF shift) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 8); + match(Set dst (LShiftVS src shift)); + format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} + ins_encode %{ + bool vector256 = false; + __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsll8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 8); + match(Set dst (LShiftVS src shift)); + format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} + ins_encode %{ + bool vector256 = false; + __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsll16S_reg(vecY dst, vecY src, regF shift) %{ + predicate(UseAVX > 1 && n->as_Vector()->length() == 16); + match(Set dst (LShiftVS src shift)); + format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} + ins_encode %{ + bool vector256 = true; + __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsll16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ + predicate(UseAVX > 1 && n->as_Vector()->length() == 16); + match(Set dst (LShiftVS src shift)); + format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} + ins_encode %{ + bool vector256 = true; + __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); + %} + ins_pipe( pipe_slow ); +%} + +// Integers vector left shift +instruct vsll2I(vecD dst, regF shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (LShiftVI dst shift)); + format %{ "pslld $dst,$shift\t! left shift packed2I" %} + ins_encode %{ + __ pslld($dst$$XMMRegister, $shift$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsll2I_imm(vecD dst, immI8 shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (LShiftVI dst shift)); + format %{ "pslld $dst,$shift\t! left shift packed2I" %} + ins_encode %{ + __ pslld($dst$$XMMRegister, (int)$shift$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsll2I_reg(vecD dst, vecD src, regF shift) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); + match(Set dst (LShiftVI src shift)); + format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} + ins_encode %{ + bool vector256 = false; + __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); + match(Set dst (LShiftVI src shift)); + format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} + ins_encode %{ + bool vector256 = false; + __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsll4I(vecX dst, regF shift) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (LShiftVI dst shift)); + format %{ "pslld $dst,$shift\t! left shift packed4I" %} + ins_encode %{ + __ pslld($dst$$XMMRegister, $shift$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsll4I_imm(vecX dst, immI8 shift) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (LShiftVI dst shift)); + format %{ "pslld $dst,$shift\t! left shift packed4I" %} + ins_encode %{ + __ pslld($dst$$XMMRegister, (int)$shift$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsll4I_reg(vecX dst, vecX src, regF shift) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); + match(Set dst (LShiftVI src shift)); + format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} + ins_encode %{ + bool vector256 = false; + __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); + match(Set dst (LShiftVI src shift)); + format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} + ins_encode %{ + bool vector256 = false; + __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsll8I_reg(vecY dst, vecY src, regF shift) %{ + predicate(UseAVX > 1 && n->as_Vector()->length() == 8); + match(Set dst (LShiftVI src shift)); + format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} + ins_encode %{ + bool vector256 = true; + __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ + predicate(UseAVX > 1 && n->as_Vector()->length() == 8); + match(Set dst (LShiftVI src shift)); + format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} + ins_encode %{ + bool vector256 = true; + __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); + %} + ins_pipe( pipe_slow ); +%} + +// Longs vector left shift +instruct vsll2L(vecX dst, regF shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (LShiftVL dst shift)); + format %{ "psllq $dst,$shift\t! left shift packed2L" %} + ins_encode %{ + __ psllq($dst$$XMMRegister, $shift$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsll2L_imm(vecX dst, immI8 shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (LShiftVL dst shift)); + format %{ "psllq $dst,$shift\t! left shift packed2L" %} + ins_encode %{ + __ psllq($dst$$XMMRegister, (int)$shift$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsll2L_reg(vecX dst, vecX src, regF shift) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); + match(Set dst (LShiftVL src shift)); + format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} + ins_encode %{ + bool vector256 = false; + __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); + match(Set dst (LShiftVL src shift)); + format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} + ins_encode %{ + bool vector256 = false; + __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsll4L_reg(vecY dst, vecY src, regF shift) %{ + predicate(UseAVX > 1 && n->as_Vector()->length() == 4); + match(Set dst (LShiftVL src shift)); + format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} + ins_encode %{ + bool vector256 = true; + __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ + predicate(UseAVX > 1 && n->as_Vector()->length() == 4); + match(Set dst (LShiftVL src shift)); + format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} + ins_encode %{ + bool vector256 = true; + __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); + %} + ins_pipe( pipe_slow ); +%} + +// ----------------------- LogicalRightShift ----------------------------------- + +// Shorts/Chars vector logical right shift produces incorrect Java result +// for negative data because java code convert short value into int with +// sign extension before a shift. + +// Integers vector logical right shift +instruct vsrl2I(vecD dst, regF shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (URShiftVI dst shift)); + format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} + ins_encode %{ + __ psrld($dst$$XMMRegister, $shift$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsrl2I_imm(vecD dst, immI8 shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (URShiftVI dst shift)); + format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} + ins_encode %{ + __ psrld($dst$$XMMRegister, (int)$shift$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsrl2I_reg(vecD dst, vecD src, regF shift) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); + match(Set dst (URShiftVI src shift)); + format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} + ins_encode %{ + bool vector256 = false; + __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); + match(Set dst (URShiftVI src shift)); + format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} + ins_encode %{ + bool vector256 = false; + __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsrl4I(vecX dst, regF shift) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (URShiftVI dst shift)); + format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} + ins_encode %{ + __ psrld($dst$$XMMRegister, $shift$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsrl4I_imm(vecX dst, immI8 shift) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (URShiftVI dst shift)); + format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} + ins_encode %{ + __ psrld($dst$$XMMRegister, (int)$shift$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsrl4I_reg(vecX dst, vecX src, regF shift) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); + match(Set dst (URShiftVI src shift)); + format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} + ins_encode %{ + bool vector256 = false; + __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); + match(Set dst (URShiftVI src shift)); + format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} + ins_encode %{ + bool vector256 = false; + __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsrl8I_reg(vecY dst, vecY src, regF shift) %{ + predicate(UseAVX > 1 && n->as_Vector()->length() == 8); + match(Set dst (URShiftVI src shift)); + format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} + ins_encode %{ + bool vector256 = true; + __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ + predicate(UseAVX > 1 && n->as_Vector()->length() == 8); + match(Set dst (URShiftVI src shift)); + format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} + ins_encode %{ + bool vector256 = true; + __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); + %} + ins_pipe( pipe_slow ); +%} + +// Longs vector logical right shift +instruct vsrl2L(vecX dst, regF shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (URShiftVL dst shift)); + format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} + ins_encode %{ + __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsrl2L_imm(vecX dst, immI8 shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (URShiftVL dst shift)); + format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} + ins_encode %{ + __ psrlq($dst$$XMMRegister, (int)$shift$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsrl2L_reg(vecX dst, vecX src, regF shift) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); + match(Set dst (URShiftVL src shift)); + format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} + ins_encode %{ + bool vector256 = false; + __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); + match(Set dst (URShiftVL src shift)); + format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} + ins_encode %{ + bool vector256 = false; + __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsrl4L_reg(vecY dst, vecY src, regF shift) %{ + predicate(UseAVX > 1 && n->as_Vector()->length() == 4); + match(Set dst (URShiftVL src shift)); + format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} + ins_encode %{ + bool vector256 = true; + __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ + predicate(UseAVX > 1 && n->as_Vector()->length() == 4); + match(Set dst (URShiftVL src shift)); + format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} + ins_encode %{ + bool vector256 = true; + __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); + %} + ins_pipe( pipe_slow ); +%} + +// ------------------- ArithmeticRightShift ----------------------------------- + +// Shorts/Chars vector arithmetic right shift +instruct vsra2S(vecS dst, regF shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (RShiftVS dst shift)); + format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} + ins_encode %{ + __ psraw($dst$$XMMRegister, $shift$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsra2S_imm(vecS dst, immI8 shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (RShiftVS dst shift)); + format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} + ins_encode %{ + __ psraw($dst$$XMMRegister, (int)$shift$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsra2S_reg(vecS dst, vecS src, regF shift) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); + match(Set dst (RShiftVS src shift)); + format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} + ins_encode %{ + bool vector256 = false; + __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsra2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); + match(Set dst (RShiftVS src shift)); + format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} + ins_encode %{ + bool vector256 = false; + __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsra4S(vecD dst, regF shift) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (RShiftVS dst shift)); + format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} + ins_encode %{ + __ psraw($dst$$XMMRegister, $shift$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsra4S_imm(vecD dst, immI8 shift) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (RShiftVS dst shift)); + format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} + ins_encode %{ + __ psraw($dst$$XMMRegister, (int)$shift$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsra4S_reg(vecD dst, vecD src, regF shift) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); + match(Set dst (RShiftVS src shift)); + format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} + ins_encode %{ + bool vector256 = false; + __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsra4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); + match(Set dst (RShiftVS src shift)); + format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} + ins_encode %{ + bool vector256 = false; + __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsra8S(vecX dst, regF shift) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (RShiftVS dst shift)); + format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} + ins_encode %{ + __ psraw($dst$$XMMRegister, $shift$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsra8S_imm(vecX dst, immI8 shift) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (RShiftVS dst shift)); + format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} + ins_encode %{ + __ psraw($dst$$XMMRegister, (int)$shift$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsra8S_reg(vecX dst, vecX src, regF shift) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 8); + match(Set dst (RShiftVS src shift)); + format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} + ins_encode %{ + bool vector256 = false; + __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsra8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 8); + match(Set dst (RShiftVS src shift)); + format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} + ins_encode %{ + bool vector256 = false; + __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsra16S_reg(vecY dst, vecY src, regF shift) %{ + predicate(UseAVX > 1 && n->as_Vector()->length() == 16); + match(Set dst (RShiftVS src shift)); + format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} + ins_encode %{ + bool vector256 = true; + __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsra16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ + predicate(UseAVX > 1 && n->as_Vector()->length() == 16); + match(Set dst (RShiftVS src shift)); + format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} + ins_encode %{ + bool vector256 = true; + __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); + %} + ins_pipe( pipe_slow ); +%} + +// Integers vector arithmetic right shift +instruct vsra2I(vecD dst, regF shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (RShiftVI dst shift)); + format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} + ins_encode %{ + __ psrad($dst$$XMMRegister, $shift$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsra2I_imm(vecD dst, immI8 shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (RShiftVI dst shift)); + format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} + ins_encode %{ + __ psrad($dst$$XMMRegister, (int)$shift$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsra2I_reg(vecD dst, vecD src, regF shift) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); + match(Set dst (RShiftVI src shift)); + format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} + ins_encode %{ + bool vector256 = false; + __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); + match(Set dst (RShiftVI src shift)); + format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} + ins_encode %{ + bool vector256 = false; + __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsra4I(vecX dst, regF shift) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (RShiftVI dst shift)); + format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} + ins_encode %{ + __ psrad($dst$$XMMRegister, $shift$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsra4I_imm(vecX dst, immI8 shift) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (RShiftVI dst shift)); + format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} + ins_encode %{ + __ psrad($dst$$XMMRegister, (int)$shift$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsra4I_reg(vecX dst, vecX src, regF shift) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); + match(Set dst (RShiftVI src shift)); + format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} + ins_encode %{ + bool vector256 = false; + __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); + match(Set dst (RShiftVI src shift)); + format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} + ins_encode %{ + bool vector256 = false; + __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsra8I_reg(vecY dst, vecY src, regF shift) %{ + predicate(UseAVX > 1 && n->as_Vector()->length() == 8); + match(Set dst (RShiftVI src shift)); + format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} + ins_encode %{ + bool vector256 = true; + __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ + predicate(UseAVX > 1 && n->as_Vector()->length() == 8); + match(Set dst (RShiftVI src shift)); + format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} + ins_encode %{ + bool vector256 = true; + __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); + %} + ins_pipe( pipe_slow ); +%} + +// There are no longs vector arithmetic right shift instructions. + + +// --------------------------------- AND -------------------------------------- + +instruct vand4B(vecS dst, vecS src) %{ + predicate(n->as_Vector()->length_in_bytes() == 4); + match(Set dst (AndV dst src)); + format %{ "pand $dst,$src\t! and vectors (4 bytes)" %} + ins_encode %{ + __ pand($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{ + predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); + match(Set dst (AndV src1 src2)); + format %{ "vpand $dst,$src1,$src2\t! and vectors (4 bytes)" %} + ins_encode %{ + bool vector256 = false; + __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vand8B(vecD dst, vecD src) %{ + predicate(n->as_Vector()->length_in_bytes() == 8); + match(Set dst (AndV dst src)); + format %{ "pand $dst,$src\t! and vectors (8 bytes)" %} + ins_encode %{ + __ pand($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{ + predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); + match(Set dst (AndV src1 src2)); + format %{ "vpand $dst,$src1,$src2\t! and vectors (8 bytes)" %} + ins_encode %{ + bool vector256 = false; + __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vand16B(vecX dst, vecX src) %{ + predicate(n->as_Vector()->length_in_bytes() == 16); + match(Set dst (AndV dst src)); + format %{ "pand $dst,$src\t! and vectors (16 bytes)" %} + ins_encode %{ + __ pand($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{ + predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); + match(Set dst (AndV src1 src2)); + format %{ "vpand $dst,$src1,$src2\t! and vectors (16 bytes)" %} + ins_encode %{ + bool vector256 = false; + __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vand16B_mem(vecX dst, vecX src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); + match(Set dst (AndV src (LoadVector mem))); + format %{ "vpand $dst,$src,$mem\t! and vectors (16 bytes)" %} + ins_encode %{ + bool vector256 = false; + __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{ + predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); + match(Set dst (AndV src1 src2)); + format %{ "vpand $dst,$src1,$src2\t! and vectors (32 bytes)" %} + ins_encode %{ + bool vector256 = true; + __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vand32B_mem(vecY dst, vecY src, memory mem) %{ + predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); + match(Set dst (AndV src (LoadVector mem))); + format %{ "vpand $dst,$src,$mem\t! and vectors (32 bytes)" %} + ins_encode %{ + bool vector256 = true; + __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); + %} + ins_pipe( pipe_slow ); +%} + +// --------------------------------- OR --------------------------------------- + +instruct vor4B(vecS dst, vecS src) %{ + predicate(n->as_Vector()->length_in_bytes() == 4); + match(Set dst (OrV dst src)); + format %{ "por $dst,$src\t! or vectors (4 bytes)" %} + ins_encode %{ + __ por($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{ + predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); + match(Set dst (OrV src1 src2)); + format %{ "vpor $dst,$src1,$src2\t! or vectors (4 bytes)" %} + ins_encode %{ + bool vector256 = false; + __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vor8B(vecD dst, vecD src) %{ + predicate(n->as_Vector()->length_in_bytes() == 8); + match(Set dst (OrV dst src)); + format %{ "por $dst,$src\t! or vectors (8 bytes)" %} + ins_encode %{ + __ por($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{ + predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); + match(Set dst (OrV src1 src2)); + format %{ "vpor $dst,$src1,$src2\t! or vectors (8 bytes)" %} + ins_encode %{ + bool vector256 = false; + __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vor16B(vecX dst, vecX src) %{ + predicate(n->as_Vector()->length_in_bytes() == 16); + match(Set dst (OrV dst src)); + format %{ "por $dst,$src\t! or vectors (16 bytes)" %} + ins_encode %{ + __ por($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{ + predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); + match(Set dst (OrV src1 src2)); + format %{ "vpor $dst,$src1,$src2\t! or vectors (16 bytes)" %} + ins_encode %{ + bool vector256 = false; + __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vor16B_mem(vecX dst, vecX src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); + match(Set dst (OrV src (LoadVector mem))); + format %{ "vpor $dst,$src,$mem\t! or vectors (16 bytes)" %} + ins_encode %{ + bool vector256 = false; + __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{ + predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); + match(Set dst (OrV src1 src2)); + format %{ "vpor $dst,$src1,$src2\t! or vectors (32 bytes)" %} + ins_encode %{ + bool vector256 = true; + __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vor32B_mem(vecY dst, vecY src, memory mem) %{ + predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); + match(Set dst (OrV src (LoadVector mem))); + format %{ "vpor $dst,$src,$mem\t! or vectors (32 bytes)" %} + ins_encode %{ + bool vector256 = true; + __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); + %} + ins_pipe( pipe_slow ); +%} + +// --------------------------------- XOR -------------------------------------- + +instruct vxor4B(vecS dst, vecS src) %{ + predicate(n->as_Vector()->length_in_bytes() == 4); + match(Set dst (XorV dst src)); + format %{ "pxor $dst,$src\t! xor vectors (4 bytes)" %} + ins_encode %{ + __ pxor($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{ + predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); + match(Set dst (XorV src1 src2)); + format %{ "vpxor $dst,$src1,$src2\t! xor vectors (4 bytes)" %} + ins_encode %{ + bool vector256 = false; + __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vxor8B(vecD dst, vecD src) %{ + predicate(n->as_Vector()->length_in_bytes() == 8); + match(Set dst (XorV dst src)); + format %{ "pxor $dst,$src\t! xor vectors (8 bytes)" %} + ins_encode %{ + __ pxor($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{ + predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); + match(Set dst (XorV src1 src2)); + format %{ "vpxor $dst,$src1,$src2\t! xor vectors (8 bytes)" %} + ins_encode %{ + bool vector256 = false; + __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vxor16B(vecX dst, vecX src) %{ + predicate(n->as_Vector()->length_in_bytes() == 16); + match(Set dst (XorV dst src)); + format %{ "pxor $dst,$src\t! xor vectors (16 bytes)" %} + ins_encode %{ + __ pxor($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{ + predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); + match(Set dst (XorV src1 src2)); + format %{ "vpxor $dst,$src1,$src2\t! xor vectors (16 bytes)" %} + ins_encode %{ + bool vector256 = false; + __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); + match(Set dst (XorV src (LoadVector mem))); + format %{ "vpxor $dst,$src,$mem\t! xor vectors (16 bytes)" %} + ins_encode %{ + bool vector256 = false; + __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{ + predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); + match(Set dst (XorV src1 src2)); + format %{ "vpxor $dst,$src1,$src2\t! xor vectors (32 bytes)" %} + ins_encode %{ + bool vector256 = true; + __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); + %} + ins_pipe( pipe_slow ); +%} + +instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{ + predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); + match(Set dst (XorV src (LoadVector mem))); + format %{ "vpxor $dst,$src,$mem\t! xor vectors (32 bytes)" %} + ins_encode %{ + bool vector256 = true; + __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); + %} + ins_pipe( pipe_slow ); +%} + diff -r be82ef218872 -r b3602ff9c1b8 src/cpu/x86/vm/x86_32.ad --- a/src/cpu/x86/vm/x86_32.ad Wed Aug 22 10:01:51 2012 +0200 +++ b/src/cpu/x86/vm/x86_32.ad Fri Aug 24 19:45:42 2012 -0700 @@ -1367,22 +1367,6 @@ return offset; } - -const bool Matcher::match_rule_supported(int opcode) { - if (!has_match_rule(opcode)) - return false; - - switch (opcode) { - case Op_PopCountI: - case Op_PopCountL: - if (!UsePopCountInstruction) - return false; - break; - } - - return true; // Per default match rules are supported. -} - int Matcher::regnum_to_fpu_offset(int regnum) { return regnum - 32; // The FP registers are in the second chunk } diff -r be82ef218872 -r b3602ff9c1b8 src/cpu/x86/vm/x86_64.ad --- a/src/cpu/x86/vm/x86_64.ad Wed Aug 22 10:01:51 2012 +0200 +++ b/src/cpu/x86/vm/x86_64.ad Fri Aug 24 19:45:42 2012 -0700 @@ -1513,22 +1513,6 @@ return offset; } - -const bool Matcher::match_rule_supported(int opcode) { - if (!has_match_rule(opcode)) - return false; - - switch (opcode) { - case Op_PopCountI: - case Op_PopCountL: - if (!UsePopCountInstruction) - return false; - break; - } - - return true; // Per default match rules are supported. -} - int Matcher::regnum_to_fpu_offset(int regnum) { return regnum - 32; // The FP registers are in the second chunk @@ -6427,6 +6411,31 @@ ins_pipe(ialu_reg_reg); // XXX %} +// Convert oop into int for vectors alignment masking +instruct convP2I(rRegI dst, rRegP src) +%{ + match(Set dst (ConvL2I (CastP2X src))); + + format %{ "movl $dst, $src\t# ptr -> int" %} + ins_encode %{ + __ movl($dst$$Register, $src$$Register); + %} + ins_pipe(ialu_reg_reg); // XXX +%} + +// Convert compressed oop into int for vectors alignment masking +// in case of 32bit oops (heap < 4Gb). +instruct convN2I(rRegI dst, rRegN src) +%{ + predicate(Universe::narrow_oop_shift() == 0); + match(Set dst (ConvL2I (CastP2X (DecodeN src)))); + + format %{ "movl $dst, $src\t# compressed ptr -> int" %} + ins_encode %{ + __ movl($dst$$Register, $src$$Register); + %} + ins_pipe(ialu_reg_reg); // XXX +%} // Convert oop pointer into compressed form instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{ @@ -10049,11 +10058,10 @@ ins_pipe( pipe_slow ); %} -// The next instructions have long latency and use Int unit. Set high cost. instruct MoveI2F_reg_reg(regF dst, rRegI src) %{ match(Set dst (MoveI2F src)); effect(DEF dst, USE src); - ins_cost(300); + ins_cost(100); format %{ "movd $dst,$src\t# MoveI2F" %} ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); @@ -10064,7 +10072,7 @@ instruct MoveL2D_reg_reg(regD dst, rRegL src) %{ match(Set dst (MoveL2D src)); effect(DEF dst, USE src); - ins_cost(300); + ins_cost(100); format %{ "movd $dst,$src\t# MoveL2D" %} ins_encode %{ __ movdq($dst$$XMMRegister, $src$$Register); diff -r be82ef218872 -r b3602ff9c1b8 src/share/vm/c1/c1_CodeStubs.hpp --- a/src/share/vm/c1/c1_CodeStubs.hpp Wed Aug 22 10:01:51 2012 +0200 +++ b/src/share/vm/c1/c1_CodeStubs.hpp Fri Aug 24 19:45:42 2012 -0700 @@ -574,71 +574,6 @@ #endif // PRODUCT }; -// This G1 barrier code stub is used in Unsafe.getObject. -// It generates a sequence of guards around the SATB -// barrier code that are used to detect when we have -// the referent field of a Reference object. -// The first check is assumed to have been generated -// in the code generated for Unsafe.getObject(). - -class G1UnsafeGetObjSATBBarrierStub: public CodeStub { - private: - LIR_Opr _val; - LIR_Opr _src; - - LIR_Opr _tmp; - LIR_Opr _thread; - - bool _gen_src_check; - - public: - // A G1 barrier that is guarded by generated guards that determine whether - // val (which is the result of Unsafe.getObject() should be recorded in an - // SATB log buffer. We could be reading the referent field of a Reference object - // using Unsafe.getObject() and we need to record the referent. - // - // * val is the operand returned by the unsafe.getObject routine. - // * src is the base object - // * tmp is a temp used to load the klass of src, and then reference type - // * thread is the thread object. - - G1UnsafeGetObjSATBBarrierStub(LIR_Opr val, LIR_Opr src, - LIR_Opr tmp, LIR_Opr thread, - bool gen_src_check) : - _val(val), _src(src), - _tmp(tmp), _thread(thread), - _gen_src_check(gen_src_check) - { - assert(_val->is_register(), "should have already been loaded"); - assert(_src->is_register(), "should have already been loaded"); - - assert(_tmp->is_register(), "should be a temporary register"); - } - - LIR_Opr val() const { return _val; } - LIR_Opr src() const { return _src; } - - LIR_Opr tmp() const { return _tmp; } - LIR_Opr thread() const { return _thread; } - - bool gen_src_check() const { return _gen_src_check; } - - virtual void emit_code(LIR_Assembler* e); - - virtual void visit(LIR_OpVisitState* visitor) { - visitor->do_slow_case(); - visitor->do_input(_val); - visitor->do_input(_src); - visitor->do_input(_thread); - - visitor->do_temp(_tmp); - } - -#ifndef PRODUCT - virtual void print_name(outputStream* out) const { out->print("G1UnsafeGetObjSATBBarrierStub"); } -#endif // PRODUCT -}; - class G1PostBarrierStub: public CodeStub { private: LIR_Opr _addr; diff -r be82ef218872 -r b3602ff9c1b8 src/share/vm/c1/c1_GraphBuilder.cpp --- a/src/share/vm/c1/c1_GraphBuilder.cpp Wed Aug 22 10:01:51 2012 +0200 +++ b/src/share/vm/c1/c1_GraphBuilder.cpp Fri Aug 24 19:45:42 2012 -0700 @@ -1646,10 +1646,6 @@ void GraphBuilder::invoke(Bytecodes::Code code) { - const bool has_receiver = - code == Bytecodes::_invokespecial || - code == Bytecodes::_invokevirtual || - code == Bytecodes::_invokeinterface; const bool is_invokedynamic = (code == Bytecodes::_invokedynamic); bool will_link; @@ -1690,8 +1686,12 @@ // convert them directly to an invokespecial or invokestatic. if (target->is_loaded() && !target->is_abstract() && target->can_be_statically_bound()) { switch (bc_raw) { - case Bytecodes::_invokevirtual: code = Bytecodes::_invokespecial; break; - case Bytecodes::_invokehandle: code = Bytecodes::_invokestatic; break; + case Bytecodes::_invokevirtual: + code = Bytecodes::_invokespecial; + break; + case Bytecodes::_invokehandle: + code = target->is_static() ? Bytecodes::_invokestatic : Bytecodes::_invokespecial; + break; } } @@ -1878,11 +1878,13 @@ // inlining not successful => standard invoke bool is_loaded = target->is_loaded(); ValueType* result_type = as_ValueType(target->return_type()); - - // We require the debug info to be the "state before" because - // invokedynamics may deoptimize. - ValueStack* state_before = is_invokedynamic ? copy_state_before() : copy_state_exhandling(); - + ValueStack* state_before = copy_state_exhandling(); + + // The bytecode (code) might change in this method so we are checking this very late. + const bool has_receiver = + code == Bytecodes::_invokespecial || + code == Bytecodes::_invokevirtual || + code == Bytecodes::_invokeinterface; Values* args = state()->pop_arguments(target->arg_size_no_receiver()); Value recv = has_receiver ? apop() : NULL; int vtable_index = methodOopDesc::invalid_vtable_index; @@ -3058,7 +3060,7 @@ case vmIntrinsics::_Reference_get: { - if (UseG1GC) { + { // With java.lang.ref.reference.get() we must go through the // intrinsic - when G1 is enabled - even when get() is the root // method of the compile so that, if necessary, the value in @@ -3070,6 +3072,9 @@ // object removed from the list of discovered references during // reference processing. + // Also we need intrinsic to prevent commoning reads from this field + // across safepoint since GC can change its value. + // Set up a stream so that appending instructions works properly. ciBytecodeStream s(scope->method()); s.reset_to_bci(0); @@ -3226,7 +3231,6 @@ bool GraphBuilder::try_inline_intrinsics(ciMethod* callee) { - if (!InlineNatives ) INLINE_BAILOUT("intrinsic method inlining disabled"); if (callee->is_synchronized()) { // We don't currently support any synchronized intrinsics return false; @@ -3234,9 +3238,13 @@ // callee seems like a good candidate // determine id + vmIntrinsics::ID id = callee->intrinsic_id(); + if (!InlineNatives && id != vmIntrinsics::_Reference_get) { + // InlineNatives does not control Reference.get + INLINE_BAILOUT("intrinsic method inlining disabled"); + } bool preserves_state = false; bool cantrap = true; - vmIntrinsics::ID id = callee->intrinsic_id(); switch (id) { case vmIntrinsics::_arraycopy: if (!InlineArrayCopy) return false; @@ -3376,11 +3384,10 @@ return true; case vmIntrinsics::_Reference_get: - // It is only when G1 is enabled that we absolutely - // need to use the intrinsic version of Reference.get() - // so that the value in the referent field, if necessary, - // can be registered by the pre-barrier code. - if (!UseG1GC) return false; + // Use the intrinsic version of Reference.get() so that the value in + // the referent field can be registered by the G1 pre-barrier code. + // Also to prevent commoning reads from this field across safepoint + // since GC can change its value. preserves_state = true; break; diff -r be82ef218872 -r b3602ff9c1b8 src/share/vm/c1/c1_Instruction.cpp --- a/src/share/vm/c1/c1_Instruction.cpp Wed Aug 22 10:01:51 2012 +0200 +++ b/src/share/vm/c1/c1_Instruction.cpp Fri Aug 24 19:45:42 2012 -0700 @@ -369,9 +369,6 @@ _signature = new BasicTypeList(number_of_arguments() + (has_receiver() ? 1 : 0)); if (has_receiver()) { _signature->append(as_BasicType(receiver()->type())); - } else if (is_invokedynamic()) { - // Add the synthetic MethodHandle argument to the signature. - _signature->append(T_OBJECT); } for (int i = 0; i < number_of_arguments(); i++) { ValueType* t = argument_at(i)->type(); diff -r be82ef218872 -r b3602ff9c1b8 src/share/vm/c1/c1_LIRAssembler.cpp --- a/src/share/vm/c1/c1_LIRAssembler.cpp Wed Aug 22 10:01:51 2012 +0200 +++ b/src/share/vm/c1/c1_LIRAssembler.cpp Fri Aug 24 19:45:42 2012 -0700 @@ -448,10 +448,10 @@ switch (op->code()) { case lir_static_call: + case lir_dynamic_call: call(op, relocInfo::static_call_type); break; case lir_optvirtual_call: - case lir_dynamic_call: call(op, relocInfo::opt_virtual_call_type); break; case lir_icvirtual_call: @@ -460,7 +460,9 @@ case lir_virtual_call: vtable_call(op); break; - default: ShouldNotReachHere(); + default: + fatal(err_msg_res("unexpected op code: %s", op->name())); + break; } // JSR 292 diff -r be82ef218872 -r b3602ff9c1b8 src/share/vm/c1/c1_LIRGenerator.cpp --- a/src/share/vm/c1/c1_LIRGenerator.cpp Wed Aug 22 10:01:51 2012 +0200 +++ b/src/share/vm/c1/c1_LIRGenerator.cpp Fri Aug 24 19:45:42 2012 -0700 @@ -920,7 +920,8 @@ LIR_Opr LIRGenerator::force_to_spill(LIR_Opr value, BasicType t) { - assert(type2size[t] == type2size[value->type()], "size mismatch"); + assert(type2size[t] == type2size[value->type()], + err_msg_res("size mismatch: t=%s, value->type()=%s", type2name(t), type2name(value->type()))); if (!value->is_register()) { // force into a register LIR_Opr r = new_register(value->type()); @@ -2176,9 +2177,9 @@ off.load_item(); src.load_item(); - LIR_Opr reg = rlock_result(x, x->basic_type()); - - get_Object_unsafe(reg, src.result(), off.result(), type, x->is_volatile()); + LIR_Opr value = rlock_result(x, x->basic_type()); + + get_Object_unsafe(value, src.result(), off.result(), type, x->is_volatile()); #ifndef SERIALGC // We might be reading the value of the referent field of a @@ -2191,19 +2192,16 @@ // if (offset == java_lang_ref_Reference::referent_offset) { // if (src != NULL) { // if (klass(src)->reference_type() != REF_NONE) { - // pre_barrier(..., reg, ...); + // pre_barrier(..., value, ...); // } // } // } - // - // The first non-constant check of either the offset or - // the src operand will be done here; the remainder - // will take place in the generated code stub. if (UseG1GC && type == T_OBJECT) { - bool gen_code_stub = true; // Assume we need to generate the slow code stub. - bool gen_offset_check = true; // Assume the code stub has to generate the offset guard. - bool gen_source_check = true; // Assume the code stub has to check the src object for null. + bool gen_pre_barrier = true; // Assume we need to generate pre_barrier. + bool gen_offset_check = true; // Assume we need to generate the offset guard. + bool gen_source_check = true; // Assume we need to check the src object for null. + bool gen_type_check = true; // Assume we need to check the reference_type. if (off.is_constant()) { jlong off_con = (off.type()->is_int() ? @@ -2215,7 +2213,7 @@ // The constant offset is something other than referent_offset. // We can skip generating/checking the remaining guards and // skip generation of the code stub. - gen_code_stub = false; + gen_pre_barrier = false; } else { // The constant offset is the same as referent_offset - // we do not need to generate a runtime offset check. @@ -2224,11 +2222,11 @@ } // We don't need to generate stub if the source object is an array - if (gen_code_stub && src.type()->is_array()) { - gen_code_stub = false; + if (gen_pre_barrier && src.type()->is_array()) { + gen_pre_barrier = false; } - if (gen_code_stub) { + if (gen_pre_barrier) { // We still need to continue with the checks. if (src.is_constant()) { ciObject* src_con = src.get_jobject_constant(); @@ -2236,7 +2234,7 @@ if (src_con->is_null_object()) { // The constant src object is null - We can skip // generating the code stub. - gen_code_stub = false; + gen_pre_barrier = false; } else { // Non-null constant source object. We still have to generate // the slow stub - but we don't need to generate the runtime @@ -2245,20 +2243,28 @@ } } } - - if (gen_code_stub) { - // Temoraries. - LIR_Opr src_klass = new_register(T_OBJECT); - - // Get the thread pointer for the pre-barrier - LIR_Opr thread = getThreadPointer(); - - CodeStub* stub; + if (gen_pre_barrier && !PatchALot) { + // Can the klass of object be statically determined to be + // a sub-class of Reference? + ciType* type = src.value()->declared_type(); + if ((type != NULL) && type->is_loaded()) { + if (type->is_subtype_of(compilation()->env()->Reference_klass())) { + gen_type_check = false; + } else if (type->is_klass() && + !compilation()->env()->Object_klass()->is_subtype_of(type->as_klass())) { + // Not Reference and not Object klass. + gen_pre_barrier = false; + } + } + } + + if (gen_pre_barrier) { + LabelObj* Lcont = new LabelObj(); // We can have generate one runtime check here. Let's start with // the offset check. if (gen_offset_check) { - // if (offset == referent_offset) -> slow code stub + // if (offset != referent_offset) -> continue // If offset is an int then we can do the comparison with the // referent_offset constant; otherwise we need to move // referent_offset into a temporary register and generate @@ -2273,43 +2279,36 @@ referent_off = new_register(T_LONG); __ move(LIR_OprFact::longConst(java_lang_ref_Reference::referent_offset), referent_off); } - - __ cmp(lir_cond_equal, off.result(), referent_off); - - // Optionally generate "src == null" check. - stub = new G1UnsafeGetObjSATBBarrierStub(reg, src.result(), - src_klass, thread, - gen_source_check); - - __ branch(lir_cond_equal, as_BasicType(off.type()), stub); - } else { - if (gen_source_check) { - // offset is a const and equals referent offset - // if (source != null) -> slow code stub - __ cmp(lir_cond_notEqual, src.result(), LIR_OprFact::oopConst(NULL)); - - // Since we are generating the "if src == null" guard here, - // there is no need to generate the "src == null" check again. - stub = new G1UnsafeGetObjSATBBarrierStub(reg, src.result(), - src_klass, thread, - false); - - __ branch(lir_cond_notEqual, T_OBJECT, stub); - } else { - // We have statically determined that offset == referent_offset - // && src != null so we unconditionally branch to code stub - // to perform the guards and record reg in the SATB log buffer. - - stub = new G1UnsafeGetObjSATBBarrierStub(reg, src.result(), - src_klass, thread, - false); - - __ branch(lir_cond_always, T_ILLEGAL, stub); - } + __ cmp(lir_cond_notEqual, off.result(), referent_off); + __ branch(lir_cond_notEqual, as_BasicType(off.type()), Lcont->label()); + } + if (gen_source_check) { + // offset is a const and equals referent offset + // if (source == null) -> continue + __ cmp(lir_cond_equal, src.result(), LIR_OprFact::oopConst(NULL)); + __ branch(lir_cond_equal, T_OBJECT, Lcont->label()); } - - // Continuation point - __ branch_destination(stub->continuation()); + LIR_Opr src_klass = new_register(T_OBJECT); + if (gen_type_check) { + // We have determined that offset == referent_offset && src != null. + // if (src->_klass->_reference_type == REF_NONE) -> continue + __ move(new LIR_Address(src.result(), oopDesc::klass_offset_in_bytes(), T_OBJECT), src_klass); + LIR_Address* reference_type_addr = new LIR_Address(src_klass, in_bytes(instanceKlass::reference_type_offset()), T_BYTE); + LIR_Opr reference_type = new_register(T_INT); + __ move(reference_type_addr, reference_type); + __ cmp(lir_cond_equal, reference_type, LIR_OprFact::intConst(REF_NONE)); + __ branch(lir_cond_equal, T_INT, Lcont->label()); + } + { + // We have determined that src->_klass->_reference_type != REF_NONE + // so register the value in the referent field with the pre-barrier. + pre_barrier(LIR_OprFact::illegalOpr /* addr_opr */, + value /* pre_val */, + false /* do_load */, + false /* patch */, + NULL /* info */); + } + __ branch_destination(Lcont->label()); } } #endif // SERIALGC @@ -2664,8 +2663,9 @@ void LIRGenerator::invoke_load_arguments(Invoke* x, LIRItemList* args, const LIR_OprList* arg_list) { - int i = (x->has_receiver() || x->is_invokedynamic()) ? 1 : 0; - for (; i < args->length(); i++) { + assert(args->length() == arg_list->length(), + err_msg_res("args=%d, arg_list=%d", args->length(), arg_list->length())); + for (int i = x->has_receiver() ? 1 : 0; i < args->length(); i++) { LIRItem* param = args->at(i); LIR_Opr loc = arg_list->at(i); if (loc->is_register()) { @@ -2705,15 +2705,9 @@ LIRItem* receiver = new LIRItem(x->receiver(), this); argument_items->append(receiver); } - if (x->is_invokedynamic()) { - // Insert a dummy for the synthetic MethodHandle argument. - argument_items->append(NULL); - } - int idx = x->has_receiver() ? 1 : 0; for (int i = 0; i < x->number_of_arguments(); i++) { LIRItem* param = new LIRItem(x->argument_at(i), this); argument_items->append(param); - idx += (param->type()->is_double_word() ? 2 : 1); } return argument_items; } @@ -2758,9 +2752,6 @@ CodeEmitInfo* info = state_for(x, x->state()); - // invokedynamics can deoptimize. - CodeEmitInfo* deopt_info = x->is_invokedynamic() ? state_for(x, x->state_before()) : NULL; - invoke_load_arguments(x, args, arg_list); if (x->has_receiver()) { @@ -2809,41 +2800,8 @@ } break; case Bytecodes::_invokedynamic: { - ciBytecodeStream bcs(x->scope()->method()); - bcs.force_bci(x->state()->bci()); - assert(bcs.cur_bc() == Bytecodes::_invokedynamic, "wrong stream"); - ciCPCache* cpcache = bcs.get_cpcache(); - - // Get CallSite offset from constant pool cache pointer. - int index = bcs.get_method_index(); - size_t call_site_offset = cpcache->get_f1_offset(index); - - // Load CallSite object from constant pool cache. - LIR_Opr call_site = new_register(objectType); - __ oop2reg(cpcache->constant_encoding(), call_site); - __ move_wide(new LIR_Address(call_site, call_site_offset, T_OBJECT), call_site); - - // If this invokedynamic call site hasn't been executed yet in - // the interpreter, the CallSite object in the constant pool - // cache is still null and we need to deoptimize. - if (cpcache->is_f1_null_at(index)) { - // Only deoptimize if the CallSite object is still null; we don't - // recompile methods in C1 after deoptimization so this call site - // might be resolved the next time we execute it after OSR. - DeoptimizeStub* deopt_stub = new DeoptimizeStub(deopt_info); - __ cmp(lir_cond_equal, call_site, LIR_OprFact::oopConst(NULL)); - __ branch(lir_cond_equal, T_OBJECT, deopt_stub); - } - - // Use the receiver register for the synthetic MethodHandle - // argument. - receiver = LIR_Assembler::receiverOpr(); - - // Load target MethodHandle from CallSite object. - __ load(new LIR_Address(call_site, java_lang_invoke_CallSite::target_offset_in_bytes(), T_OBJECT), receiver); - __ call_dynamic(target, receiver, result_register, - SharedRuntime::get_resolve_opt_virtual_call_stub(), + SharedRuntime::get_resolve_static_call_stub(), arg_list, info); break; } diff -r be82ef218872 -r b3602ff9c1b8 src/share/vm/c1/c1_ValueMap.cpp --- a/src/share/vm/c1/c1_ValueMap.cpp Wed Aug 22 10:01:51 2012 +0200 +++ b/src/share/vm/c1/c1_ValueMap.cpp Fri Aug 24 19:45:42 2012 -0700 @@ -190,7 +190,7 @@ LoadField* lf = value->as_LoadField(); \ bool must_kill = lf != NULL \ && lf->field()->holder() == field->holder() \ - && lf->field()->offset() == field->offset(); + && (all_offsets || lf->field()->offset() == field->offset()); #define MUST_KILL_EXCEPTION(must_kill, entry, value) \ assert(entry->nesting() < nesting(), "must not find bigger nesting than current"); \ @@ -205,7 +205,7 @@ GENERIC_KILL_VALUE(MUST_KILL_ARRAY); } -void ValueMap::kill_field(ciField* field) { +void ValueMap::kill_field(ciField* field, bool all_offsets) { GENERIC_KILL_VALUE(MUST_KILL_FIELD); } @@ -280,9 +280,9 @@ ValueMap* value_map_of(BlockBegin* block) { return _gvn->value_map_of(block); } // implementation for abstract methods of ValueNumberingVisitor - void kill_memory() { _too_complicated_loop = true; } - void kill_field(ciField* field) { current_map()->kill_field(field); }; - void kill_array(ValueType* type) { current_map()->kill_array(type); }; + void kill_memory() { _too_complicated_loop = true; } + void kill_field(ciField* field, bool all_offsets) { current_map()->kill_field(field, all_offsets); }; + void kill_array(ValueType* type) { current_map()->kill_array(type); }; public: ShortLoopOptimizer(GlobalValueNumbering* gvn) diff -r be82ef218872 -r b3602ff9c1b8 src/share/vm/c1/c1_ValueMap.hpp --- a/src/share/vm/c1/c1_ValueMap.hpp Wed Aug 22 10:01:51 2012 +0200 +++ b/src/share/vm/c1/c1_ValueMap.hpp Fri Aug 24 19:45:42 2012 -0700 @@ -114,7 +114,7 @@ Value find_insert(Value x); void kill_memory(); - void kill_field(ciField* field); + void kill_field(ciField* field, bool all_offsets); void kill_array(ValueType* type); void kill_exception(); void kill_map(ValueMap* map); @@ -136,7 +136,7 @@ protected: // called by visitor functions for instructions that kill values virtual void kill_memory() = 0; - virtual void kill_field(ciField* field) = 0; + virtual void kill_field(ciField* field, bool all_offsets) = 0; virtual void kill_array(ValueType* type) = 0; // visitor functions @@ -148,7 +148,7 @@ x->field()->is_volatile()) { kill_memory(); } else { - kill_field(x->field()); + kill_field(x->field(), x->needs_patching()); } } void do_StoreIndexed (StoreIndexed* x) { kill_array(x->type()); } @@ -214,9 +214,9 @@ public: // implementation for abstract methods of ValueNumberingVisitor - void kill_memory() { _map->kill_memory(); } - void kill_field(ciField* field) { _map->kill_field(field); } - void kill_array(ValueType* type) { _map->kill_array(type); } + void kill_memory() { _map->kill_memory(); } + void kill_field(ciField* field, bool all_offsets) { _map->kill_field(field, all_offsets); } + void kill_array(ValueType* type) { _map->kill_array(type); } ValueNumberingEffects(ValueMap* map): _map(map) {} }; @@ -234,9 +234,9 @@ void set_value_map_of(BlockBegin* block, ValueMap* map) { assert(value_map_of(block) == NULL, ""); _value_maps.at_put(block->linear_scan_number(), map); } // implementation for abstract methods of ValueNumberingVisitor - void kill_memory() { current_map()->kill_memory(); } - void kill_field(ciField* field) { current_map()->kill_field(field); } - void kill_array(ValueType* type) { current_map()->kill_array(type); } + void kill_memory() { current_map()->kill_memory(); } + void kill_field(ciField* field, bool all_offsets) { current_map()->kill_field(field, all_offsets); } + void kill_array(ValueType* type) { current_map()->kill_array(type); } // main entry point that performs global value numbering GlobalValueNumbering(IR* ir); diff -r be82ef218872 -r b3602ff9c1b8 src/share/vm/gc_implementation/g1/g1BlockOffsetTable.hpp --- a/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.hpp Wed Aug 22 10:01:51 2012 +0200 +++ b/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.hpp Fri Aug 24 19:45:42 2012 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -159,14 +159,30 @@ "right address out of range"); assert(left < right, "Heap addresses out of order"); size_t num_cards = pointer_delta(right, left) >> LogN_words; - memset(&_offset_array[index_for(left)], offset, num_cards); + if (UseMemSetInBOT) { + memset(&_offset_array[index_for(left)], offset, num_cards); + } else { + size_t i = index_for(left); + const size_t end = i + num_cards; + for (; i < end; i++) { + _offset_array[i] = offset; + } + } } void set_offset_array(size_t left, size_t right, u_char offset) { assert(right < _vs.committed_size(), "right address out of range"); - assert(left <= right, "indexes out of order"); + assert(left <= right, "indexes out of order"); size_t num_cards = right - left + 1; - memset(&_offset_array[left], offset, num_cards); + if (UseMemSetInBOT) { + memset(&_offset_array[left], offset, num_cards); + } else { + size_t i = left; + const size_t end = i + num_cards; + for (; i < end; i++) { + _offset_array[i] = offset; + } + } } void check_offset_array(size_t index, HeapWord* high, HeapWord* low) const { diff -r be82ef218872 -r b3602ff9c1b8 src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Wed Aug 22 10:01:51 2012 +0200 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Fri Aug 24 19:45:42 2012 -0700 @@ -1891,6 +1891,8 @@ _young_list(new YoungList(this)), _gc_time_stamp(0), _retained_old_gc_alloc_region(NULL), + _survivor_plab_stats(YoungPLABSize, PLABWeight), + _old_plab_stats(OldPLABSize, PLABWeight), _expand_heap_after_alloc_failure(true), _surviving_young_words(NULL), _old_marking_cycles_started(0), @@ -1932,6 +1934,14 @@ clear_cset_start_regions(); guarantee(_task_queues != NULL, "task_queues allocation failure."); +#ifdef SPARC + // Issue a stern warning, but allow use for experimentation and debugging. + if (VM_Version::is_sun4v() && UseMemSetInBOT) { + assert(!FLAG_IS_DEFAULT(UseMemSetInBOT), "Error"); + warning("Experimental flag -XX:+UseMemSetInBOT is known to cause instability" + " on sun4v; please understand that you are using at your own risk!"); + } +#endif } jint G1CollectedHeap::initialize() { @@ -3580,15 +3590,11 @@ DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); size_t buffer_size = dcqs.buffer_size(); size_t buffer_num = dcqs.completed_buffers_num(); - return buffer_size * buffer_num + extra_cards; -} - -size_t G1CollectedHeap::max_pending_card_num() { - DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); - size_t buffer_size = dcqs.buffer_size(); - size_t buffer_num = dcqs.completed_buffers_num(); - int thread_num = Threads::number_of_threads(); - return (buffer_num + thread_num) * buffer_size; + + // PtrQueueSet::buffer_size() and PtrQueue:size() return sizes + // in bytes - not the number of 'entries'. We need to convert + // into a number of cards. + return (buffer_size * buffer_num + extra_cards) / oopSize; } size_t G1CollectedHeap::cards_scanned() { @@ -4099,17 +4105,22 @@ size_t gclab_word_size; switch (purpose) { case GCAllocForSurvived: - gclab_word_size = YoungPLABSize; + gclab_word_size = _survivor_plab_stats.desired_plab_sz(); break; case GCAllocForTenured: - gclab_word_size = OldPLABSize; + gclab_word_size = _old_plab_stats.desired_plab_sz(); break; default: assert(false, "unknown GCAllocPurpose"); - gclab_word_size = OldPLABSize; + gclab_word_size = _old_plab_stats.desired_plab_sz(); break; } - return gclab_word_size; + + // Prevent humongous PLAB sizes for two reasons: + // * PLABs are allocated using a similar paths as oops, but should + // never be in a humongous region + // * Allowing humongous PLABs needlessly churns the region free lists + return MIN2(_humongous_object_threshold_in_words, gclab_word_size); } void G1CollectedHeap::init_mutator_alloc_region() { @@ -4165,6 +4176,11 @@ // want either way so no reason to check explicitly for either // condition. _retained_old_gc_alloc_region = _old_gc_alloc_region.release(); + + if (ResizePLAB) { + _survivor_plab_stats.adjust_desired_plab_sz(); + _old_plab_stats.adjust_desired_plab_sz(); + } } void G1CollectedHeap::abandon_gc_alloc_regions() { diff -r be82ef218872 -r b3602ff9c1b8 src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp Wed Aug 22 10:01:51 2012 +0200 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp Fri Aug 24 19:45:42 2012 -0700 @@ -33,7 +33,7 @@ #include "gc_implementation/g1/heapRegionSeq.hpp" #include "gc_implementation/g1/heapRegionSets.hpp" #include "gc_implementation/shared/hSpaceCounters.hpp" -#include "gc_implementation/parNew/parGCAllocBuffer.hpp" +#include "gc_implementation/shared/parGCAllocBuffer.hpp" #include "memory/barrierSet.hpp" #include "memory/memRegion.hpp" #include "memory/sharedHeap.hpp" @@ -278,10 +278,33 @@ // survivor objects. SurvivorGCAllocRegion _survivor_gc_alloc_region; + // PLAB sizing policy for survivors. + PLABStats _survivor_plab_stats; + // Alloc region used to satisfy allocation requests by the GC for // old objects. OldGCAllocRegion _old_gc_alloc_region; + // PLAB sizing policy for tenured objects. + PLABStats _old_plab_stats; + + PLABStats* stats_for_purpose(GCAllocPurpose purpose) { + PLABStats* stats = NULL; + + switch (purpose) { + case GCAllocForSurvived: + stats = &_survivor_plab_stats; + break; + case GCAllocForTenured: + stats = &_old_plab_stats; + break; + default: + assert(false, "unrecognized GCAllocPurpose"); + } + + return stats; + } + // The last old region we allocated to during the last GC. // Typically, it is not full so we should re-use it during the next GC. HeapRegion* _retained_old_gc_alloc_region; @@ -314,7 +337,7 @@ G1MonitoringSupport* _g1mm; // Determines PLAB size for a particular allocation purpose. - static size_t desired_plab_sz(GCAllocPurpose purpose); + size_t desired_plab_sz(GCAllocPurpose purpose); // Outside of GC pauses, the number of bytes used in all regions other // than the current allocation region. @@ -1683,7 +1706,6 @@ void stop_conc_gc_threads(); size_t pending_card_num(); - size_t max_pending_card_num(); size_t cards_scanned(); protected: @@ -1811,19 +1833,19 @@ } HeapWord* allocate_slow(GCAllocPurpose purpose, size_t word_sz) { - HeapWord* obj = NULL; size_t gclab_word_size = _g1h->desired_plab_sz(purpose); if (word_sz * 100 < gclab_word_size * ParallelGCBufferWastePct) { G1ParGCAllocBuffer* alloc_buf = alloc_buffer(purpose); - assert(gclab_word_size == alloc_buf->word_sz(), - "dynamic resizing is not supported"); add_to_alloc_buffer_waste(alloc_buf->words_remaining()); - alloc_buf->retire(false, false); + alloc_buf->flush_stats_and_retire(_g1h->stats_for_purpose(purpose), + false /* end_of_gc */, + false /* retain */); HeapWord* buf = _g1h->par_allocate_during_gc(purpose, gclab_word_size); if (buf == NULL) return NULL; // Let caller handle allocation failure. // Otherwise. + alloc_buf->set_word_size(gclab_word_size); alloc_buf->set_buf(buf); obj = alloc_buf->allocate(word_sz); @@ -1908,7 +1930,9 @@ for (int ap = 0; ap < GCAllocPurposeCount; ++ap) { size_t waste = _alloc_buffers[ap]->words_remaining(); add_to_alloc_buffer_waste(waste); - _alloc_buffers[ap]->retire(true, false); + _alloc_buffers[ap]->flush_stats_and_retire(_g1h->stats_for_purpose((GCAllocPurpose)ap), + true /* end_of_gc */, + false /* retain */); } } diff -r be82ef218872 -r b3602ff9c1b8 src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp --- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp Wed Aug 22 10:01:51 2012 +0200 +++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp Fri Aug 24 19:45:42 2012 -0700 @@ -90,7 +90,6 @@ _alloc_rate_ms_seq(new TruncatedSeq(TruncatedSeqLength)), _prev_collection_pause_end_ms(0.0), - _pending_card_diff_seq(new TruncatedSeq(TruncatedSeqLength)), _rs_length_diff_seq(new TruncatedSeq(TruncatedSeqLength)), _cost_per_card_ms_seq(new TruncatedSeq(TruncatedSeqLength)), _young_cards_per_entry_ratio_seq(new TruncatedSeq(TruncatedSeqLength)), @@ -197,7 +196,6 @@ int index = MIN2(_parallel_gc_threads - 1, 7); - _pending_card_diff_seq->add(0.0); _rs_length_diff_seq->add(rs_length_diff_defaults[index]); _cost_per_card_ms_seq->add(cost_per_card_ms_defaults[index]); _young_cards_per_entry_ratio_seq->add( @@ -657,7 +655,7 @@ for (HeapRegion * r = _recorded_survivor_head; r != NULL && r != _recorded_survivor_tail->get_next_young_region(); r = r->get_next_young_region()) { - survivor_regions_evac_time += predict_region_elapsed_time_ms(r, true); + survivor_regions_evac_time += predict_region_elapsed_time_ms(r, gcs_are_young()); } return survivor_regions_evac_time; } @@ -801,9 +799,8 @@ _cur_collection_pause_used_at_start_bytes = start_used; _cur_collection_pause_used_regions_at_start = _g1->used_regions(); _pending_cards = _g1->pending_card_num(); - _max_pending_cards = _g1->max_pending_card_num(); - _bytes_in_collection_set_before_gc = 0; + _collection_set_bytes_used_before = 0; _bytes_copied_during_gc = 0; YoungList* young_list = _g1->young_list(); @@ -1036,12 +1033,6 @@ // do that for any other surv rate groupsx if (update_stats) { - size_t diff = 0; - if (_max_pending_cards >= _pending_cards) { - diff = _max_pending_cards - _pending_cards; - } - _pending_card_diff_seq->add((double) diff); - double cost_per_card_ms = 0.0; if (_pending_cards > 0) { cost_per_card_ms = phase_times()->_update_rs_time / (double) _pending_cards; @@ -1126,9 +1117,9 @@ _constant_other_time_ms_seq->add(constant_other_time_ms); double survival_ratio = 0.0; - if (_bytes_in_collection_set_before_gc > 0) { + if (_collection_set_bytes_used_before > 0) { survival_ratio = (double) _bytes_copied_during_gc / - (double) _bytes_in_collection_set_before_gc; + (double) _collection_set_bytes_used_before; } _pending_cards_seq->add((double) _pending_cards); @@ -1229,18 +1220,6 @@ } double -G1CollectorPolicy::predict_base_elapsed_time_ms(size_t pending_cards) { - size_t rs_length = predict_rs_length_diff(); - size_t card_num; - if (gcs_are_young()) { - card_num = predict_young_card_num(rs_length); - } else { - card_num = predict_non_young_card_num(rs_length); - } - return predict_base_elapsed_time_ms(pending_cards, card_num); -} - -double G1CollectorPolicy::predict_base_elapsed_time_ms(size_t pending_cards, size_t scanned_cards) { return @@ -1250,27 +1229,15 @@ } double -G1CollectorPolicy::predict_region_elapsed_time_ms(HeapRegion* hr, - bool young) { - size_t rs_length = hr->rem_set()->occupied(); +G1CollectorPolicy::predict_base_elapsed_time_ms(size_t pending_cards) { + size_t rs_length = predict_rs_length_diff(); size_t card_num; if (gcs_are_young()) { card_num = predict_young_card_num(rs_length); } else { card_num = predict_non_young_card_num(rs_length); } - size_t bytes_to_copy = predict_bytes_to_copy(hr); - - double region_elapsed_time_ms = - predict_rs_scan_time_ms(card_num) + - predict_object_copy_time_ms(bytes_to_copy); - - if (young) - region_elapsed_time_ms += predict_young_other_time_ms(1); - else - region_elapsed_time_ms += predict_non_young_other_time_ms(1); - - return region_elapsed_time_ms; + return predict_base_elapsed_time_ms(pending_cards, card_num); } size_t G1CollectorPolicy::predict_bytes_to_copy(HeapRegion* hr) { @@ -1286,6 +1253,35 @@ return bytes_to_copy; } +double +G1CollectorPolicy::predict_region_elapsed_time_ms(HeapRegion* hr, + bool for_young_gc) { + size_t rs_length = hr->rem_set()->occupied(); + size_t card_num; + + // Predicting the number of cards is based on which type of GC + // we're predicting for. + if (for_young_gc) { + card_num = predict_young_card_num(rs_length); + } else { + card_num = predict_non_young_card_num(rs_length); + } + size_t bytes_to_copy = predict_bytes_to_copy(hr); + + double region_elapsed_time_ms = + predict_rs_scan_time_ms(card_num) + + predict_object_copy_time_ms(bytes_to_copy); + + // The prediction of the "other" time for this region is based + // upon the region type and NOT the GC type. + if (hr->is_young()) { + region_elapsed_time_ms += predict_young_other_time_ms(1); + } else { + region_elapsed_time_ms += predict_non_young_other_time_ms(1); + } + return region_elapsed_time_ms; +} + void G1CollectorPolicy::init_cset_region_lengths(uint eden_cset_region_length, uint survivor_cset_region_length) { @@ -1342,22 +1338,6 @@ } } -class CountCSClosure: public HeapRegionClosure { - G1CollectorPolicy* _g1_policy; -public: - CountCSClosure(G1CollectorPolicy* g1_policy) : - _g1_policy(g1_policy) {} - bool doHeapRegion(HeapRegion* r) { - _g1_policy->_bytes_in_collection_set_before_gc += r->used(); - return false; - } -}; - -void G1CollectorPolicy::count_CS_bytes_used() { - CountCSClosure cs_closure(this); - _g1->collection_set_iterate(&cs_closure); -} - void G1CollectorPolicy::print_tracing_info() const { _trace_gen0_time_data.print(); _trace_gen1_time_data.print(); @@ -1696,7 +1676,7 @@ // retiring the current allocation region) or a concurrent // refine thread (RSet sampling). - double region_elapsed_time_ms = predict_region_elapsed_time_ms(hr, true); + double region_elapsed_time_ms = predict_region_elapsed_time_ms(hr, gcs_are_young()); size_t used_bytes = hr->used(); _inc_cset_recorded_rs_lengths += rs_length; _inc_cset_predicted_elapsed_time_ms += region_elapsed_time_ms; @@ -1731,7 +1711,7 @@ _inc_cset_recorded_rs_lengths_diffs += rs_lengths_diff; double old_elapsed_time_ms = hr->predicted_elapsed_time_ms(); - double new_region_elapsed_time_ms = predict_region_elapsed_time_ms(hr, true); + double new_region_elapsed_time_ms = predict_region_elapsed_time_ms(hr, gcs_are_young()); double elapsed_ms_diff = new_region_elapsed_time_ms - old_elapsed_time_ms; _inc_cset_predicted_elapsed_time_ms_diffs += elapsed_ms_diff; @@ -1854,8 +1834,7 @@ } void G1CollectorPolicy::finalize_cset(double target_pause_time_ms) { - // Set this here - in case we're not doing young collections. - double non_young_start_time_sec = os::elapsedTime(); + double young_start_time_sec = os::elapsedTime(); YoungList* young_list = _g1->young_list(); finalize_incremental_cset_building(); @@ -1869,17 +1848,14 @@ double predicted_pause_time_ms = base_time_ms; double time_remaining_ms = target_pause_time_ms - base_time_ms; - ergo_verbose3(ErgoCSetConstruction | ErgoHigh, + ergo_verbose4(ErgoCSetConstruction | ErgoHigh, "start choosing CSet", + ergo_format_size("_pending_cards") ergo_format_ms("predicted base time") ergo_format_ms("remaining time") ergo_format_ms("target pause time"), - base_time_ms, time_remaining_ms, target_pause_time_ms); + _pending_cards, base_time_ms, time_remaining_ms, target_pause_time_ms); - HeapRegion* hr; - double young_start_time_sec = os::elapsedTime(); - - _collection_set_bytes_used_before = 0; _last_gc_was_young = gcs_are_young() ? true : false; if (_last_gc_was_young) { @@ -1895,7 +1871,8 @@ uint survivor_region_length = young_list->survivor_length(); uint eden_region_length = young_list->length() - survivor_region_length; init_cset_region_lengths(eden_region_length, survivor_region_length); - hr = young_list->first_survivor_region(); + + HeapRegion* hr = young_list->first_survivor_region(); while (hr != NULL) { assert(hr->is_survivor(), "badly formed young list"); hr->set_young(); @@ -1926,8 +1903,8 @@ phase_times()->_recorded_young_cset_choice_time_ms = (young_end_time_sec - young_start_time_sec) * 1000.0; - // We are doing young collections so reset this. - non_young_start_time_sec = young_end_time_sec; + // Set the start of the non-young choice time. + double non_young_start_time_sec = young_end_time_sec; if (!gcs_are_young()) { CollectionSetChooser* cset_chooser = _collectionSetChooser; @@ -1937,6 +1914,7 @@ uint expensive_region_num = 0; bool check_time_remaining = adaptive_young_list_length(); + HeapRegion* hr = cset_chooser->peek(); while (hr != NULL) { if (old_cset_region_length() >= max_old_cset_length) { @@ -1950,7 +1928,7 @@ break; } - double predicted_time_ms = predict_region_elapsed_time_ms(hr, false); + double predicted_time_ms = predict_region_elapsed_time_ms(hr, gcs_are_young()); if (check_time_remaining) { if (predicted_time_ms > time_remaining_ms) { // Too expensive for the current CSet. @@ -2025,8 +2003,6 @@ stop_incremental_cset_building(); - count_CS_bytes_used(); - ergo_verbose5(ErgoCSetConstruction, "finish choosing CSet", ergo_format_region("eden") diff -r be82ef218872 -r b3602ff9c1b8 src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp --- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp Wed Aug 22 10:01:51 2012 +0200 +++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp Fri Aug 24 19:45:42 2012 -0700 @@ -228,7 +228,6 @@ TruncatedSeq* _alloc_rate_ms_seq; double _prev_collection_pause_end_ms; - TruncatedSeq* _pending_card_diff_seq; TruncatedSeq* _rs_length_diff_seq; TruncatedSeq* _cost_per_card_ms_seq; TruncatedSeq* _young_cards_per_entry_ratio_seq; @@ -295,7 +294,6 @@ double _pause_time_target_ms; size_t _pending_cards; - size_t _max_pending_cards; public: // Accessors @@ -325,28 +323,6 @@ _max_rs_lengths = rs_lengths; } - size_t predict_pending_card_diff() { - double prediction = get_new_neg_prediction(_pending_card_diff_seq); - if (prediction < 0.00001) { - return 0; - } else { - return (size_t) prediction; - } - } - - size_t predict_pending_cards() { - size_t max_pending_card_num = _g1->max_pending_card_num(); - size_t diff = predict_pending_card_diff(); - size_t prediction; - if (diff > max_pending_card_num) { - prediction = max_pending_card_num; - } else { - prediction = max_pending_card_num - diff; - } - - return prediction; - } - size_t predict_rs_length_diff() { return (size_t) get_new_prediction(_rs_length_diff_seq); } @@ -439,7 +415,7 @@ double predict_base_elapsed_time_ms(size_t pending_cards, size_t scanned_cards); size_t predict_bytes_to_copy(HeapRegion* hr); - double predict_region_elapsed_time_ms(HeapRegion* hr, bool young); + double predict_region_elapsed_time_ms(HeapRegion* hr, bool for_young_gc); void set_recorded_rs_lengths(size_t rs_lengths); @@ -495,12 +471,6 @@ } private: - size_t _bytes_in_collection_set_before_gc; - size_t _bytes_copied_during_gc; - - // Used to count used bytes in CS. - friend class CountCSClosure; - // Statistics kept per GC stoppage, pause or full. TruncatedSeq* _recent_prev_end_times_for_all_gcs_sec; @@ -514,9 +484,13 @@ // The number of bytes in the collection set before the pause. Set from // the incrementally built collection set at the start of an evacuation - // pause. + // pause, and incremented in finalize_cset() when adding old regions + // (if any) to the collection set. size_t _collection_set_bytes_used_before; + // The number of bytes copied during the GC. + size_t _bytes_copied_during_gc; + // The associated information that is maintained while the incremental // collection set is being built with young regions. Used to populate // the recorded info for the evacuation pause. @@ -646,9 +620,6 @@ bool predict_will_fit(uint young_length, double base_time_ms, uint base_free_regions, double target_pause_time_ms); - // Count the number of bytes used in the CS. - void count_CS_bytes_used(); - public: G1CollectorPolicy(); @@ -666,10 +637,6 @@ // higher, recalculate the young list target length prediction. void revise_young_list_target_length_if_necessary(); - size_t bytes_in_collection_set() { - return _bytes_in_collection_set_before_gc; - } - // This should be called after the heap is resized. void record_new_heap_size(uint new_number_of_regions); diff -r be82ef218872 -r b3602ff9c1b8 src/share/vm/gc_implementation/g1/g1ErgoVerbose.hpp --- a/src/share/vm/gc_implementation/g1/g1ErgoVerbose.hpp Wed Aug 22 10:01:51 2012 +0200 +++ b/src/share/vm/gc_implementation/g1/g1ErgoVerbose.hpp Fri Aug 24 19:45:42 2012 -0700 @@ -125,6 +125,7 @@ #define ergo_format_double(_name_) ", " _name_ ": %1.2f" #define ergo_format_perc(_name_) ", " _name_ ": %1.2f %%" #define ergo_format_ms(_name_) ", " _name_ ": %1.2f ms" +#define ergo_format_size(_name_) ", " _name_ ": "SIZE_FORMAT // Double parameter format strings #define ergo_format_byte_perc(_name_) \ diff -r be82ef218872 -r b3602ff9c1b8 src/share/vm/gc_implementation/g1/g1_globals.hpp --- a/src/share/vm/gc_implementation/g1/g1_globals.hpp Wed Aug 22 10:01:51 2012 +0200 +++ b/src/share/vm/gc_implementation/g1/g1_globals.hpp Fri Aug 24 19:45:42 2012 -0700 @@ -287,17 +287,17 @@ "The number of times we'll force an overflow during " \ "concurrent marking") \ \ - develop(uintx, G1DefaultMinNewGenPercent, 20, \ + experimental(uintx, G1DefaultMinNewGenPercent, 20, \ "Percentage (0-100) of the heap size to use as minimum " \ "young gen size.") \ \ - develop(uintx, G1DefaultMaxNewGenPercent, 80, \ + experimental(uintx, G1DefaultMaxNewGenPercent, 80, \ "Percentage (0-100) of the heap size to use as maximum " \ "young gen size.") \ \ - develop(uintx, G1OldCSetRegionLiveThresholdPercent, 90, \ + experimental(uintx, G1OldCSetRegionLiveThresholdPercent, 90, \ "Threshold for regions to be added to the collection set. " \ - "Regions with more live bytes that this will not be collected.") \ + "Regions with more live bytes than this will not be collected.") \ \ product(uintx, G1HeapWastePercent, 5, \ "Amount of space, expressed as a percentage of the heap size, " \ @@ -306,7 +306,7 @@ product(uintx, G1MixedGCCountTarget, 4, \ "The target number of mixed GCs after a marking cycle.") \ \ - develop(uintx, G1OldCSetRegionThresholdPercent, 10, \ + experimental(uintx, G1OldCSetRegionThresholdPercent, 10, \ "An upper bound for the number of old CSet regions expressed " \ "as a percentage of the heap size.") \ \ diff -r be82ef218872 -r b3602ff9c1b8 src/share/vm/gc_implementation/g1/heapRegion.cpp --- a/src/share/vm/gc_implementation/g1/heapRegion.cpp Wed Aug 22 10:01:51 2012 +0200 +++ b/src/share/vm/gc_implementation/g1/heapRegion.cpp Fri Aug 24 19:45:42 2012 -0700 @@ -384,10 +384,17 @@ } void HeapRegion::calc_gc_efficiency() { + // GC efficiency is the ratio of how much space would be + // reclaimed over how long we predict it would take to reclaim it. G1CollectedHeap* g1h = G1CollectedHeap::heap(); G1CollectorPolicy* g1p = g1h->g1_policy(); - _gc_efficiency = (double) reclaimable_bytes() / - g1p->predict_region_elapsed_time_ms(this, false); + + // Retrieve a prediction of the elapsed time for this region for + // a mixed gc because the region will only be evacuated during a + // mixed gc. + double region_elapsed_time_ms = + g1p->predict_region_elapsed_time_ms(this, false /* for_young_gc */); + _gc_efficiency = (double) reclaimable_bytes() / region_elapsed_time_ms; } void HeapRegion::set_startsHumongous(HeapWord* new_top, HeapWord* new_end) { diff -r be82ef218872 -r b3602ff9c1b8 src/share/vm/gc_implementation/parNew/parGCAllocBuffer.cpp --- a/src/share/vm/gc_implementation/parNew/parGCAllocBuffer.cpp Wed Aug 22 10:01:51 2012 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,344 +0,0 @@ -/* - * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - * - */ - -#include "precompiled.hpp" -#include "gc_implementation/parNew/parGCAllocBuffer.hpp" -#include "memory/sharedHeap.hpp" -#include "oops/arrayOop.hpp" -#include "oops/oop.inline.hpp" - -ParGCAllocBuffer::ParGCAllocBuffer(size_t desired_plab_sz_) : - _word_sz(desired_plab_sz_), _bottom(NULL), _top(NULL), - _end(NULL), _hard_end(NULL), - _retained(false), _retained_filler(), - _allocated(0), _wasted(0) -{ - assert (min_size() > AlignmentReserve, "Inconsistency!"); - // arrayOopDesc::header_size depends on command line initialization. - FillerHeaderSize = align_object_size(arrayOopDesc::header_size(T_INT)); - AlignmentReserve = oopDesc::header_size() > MinObjAlignment ? FillerHeaderSize : 0; -} - -size_t ParGCAllocBuffer::FillerHeaderSize; - -// If the minimum object size is greater than MinObjAlignment, we can -// end up with a shard at the end of the buffer that's smaller than -// the smallest object. We can't allow that because the buffer must -// look like it's full of objects when we retire it, so we make -// sure we have enough space for a filler int array object. -size_t ParGCAllocBuffer::AlignmentReserve; - -void ParGCAllocBuffer::retire(bool end_of_gc, bool retain) { - assert(!retain || end_of_gc, "Can only retain at GC end."); - if (_retained) { - // If the buffer had been retained shorten the previous filler object. - assert(_retained_filler.end() <= _top, "INVARIANT"); - CollectedHeap::fill_with_object(_retained_filler); - // Wasted space book-keeping, otherwise (normally) done in invalidate() - _wasted += _retained_filler.word_size(); - _retained = false; - } - assert(!end_of_gc || !_retained, "At this point, end_of_gc ==> !_retained."); - if (_top < _hard_end) { - CollectedHeap::fill_with_object(_top, _hard_end); - if (!retain) { - invalidate(); - } else { - // Is there wasted space we'd like to retain for the next GC? - if (pointer_delta(_end, _top) > FillerHeaderSize) { - _retained = true; - _retained_filler = MemRegion(_top, FillerHeaderSize); - _top = _top + FillerHeaderSize; - } else { - invalidate(); - } - } - } -} - -void ParGCAllocBuffer::flush_stats(PLABStats* stats) { - assert(ResizePLAB, "Wasted work"); - stats->add_allocated(_allocated); - stats->add_wasted(_wasted); - stats->add_unused(pointer_delta(_end, _top)); -} - -// Compute desired plab size and latch result for later -// use. This should be called once at the end of parallel -// scavenge; it clears the sensor accumulators. -void PLABStats::adjust_desired_plab_sz() { - assert(ResizePLAB, "Not set"); - if (_allocated == 0) { - assert(_unused == 0, "Inconsistency in PLAB stats"); - _allocated = 1; - } - double wasted_frac = (double)_unused/(double)_allocated; - size_t target_refills = (size_t)((wasted_frac*TargetSurvivorRatio)/ - TargetPLABWastePct); - if (target_refills == 0) { - target_refills = 1; - } - _used = _allocated - _wasted - _unused; - size_t plab_sz = _used/(target_refills*ParallelGCThreads); - if (PrintPLAB) gclog_or_tty->print(" (plab_sz = %d ", plab_sz); - // Take historical weighted average - _filter.sample(plab_sz); - // Clip from above and below, and align to object boundary - plab_sz = MAX2(min_size(), (size_t)_filter.average()); - plab_sz = MIN2(max_size(), plab_sz); - plab_sz = align_object_size(plab_sz); - // Latch the result - if (PrintPLAB) gclog_or_tty->print(" desired_plab_sz = %d) ", plab_sz); - if (ResizePLAB) { - _desired_plab_sz = plab_sz; - } - // Now clear the accumulators for next round: - // note this needs to be fixed in the case where we - // are retaining across scavenges. FIX ME !!! XXX - _allocated = 0; - _wasted = 0; - _unused = 0; -} - -#ifndef PRODUCT -void ParGCAllocBuffer::print() { - gclog_or_tty->print("parGCAllocBuffer: _bottom: %p _top: %p _end: %p _hard_end: %p" - "_retained: %c _retained_filler: [%p,%p)\n", - _bottom, _top, _end, _hard_end, - "FT"[_retained], _retained_filler.start(), _retained_filler.end()); -} -#endif // !PRODUCT - -const size_t ParGCAllocBufferWithBOT::ChunkSizeInWords = -MIN2(CardTableModRefBS::par_chunk_heapword_alignment(), - ((size_t)Generation::GenGrain)/HeapWordSize); -const size_t ParGCAllocBufferWithBOT::ChunkSizeInBytes = -MIN2(CardTableModRefBS::par_chunk_heapword_alignment() * HeapWordSize, - (size_t)Generation::GenGrain); - -ParGCAllocBufferWithBOT::ParGCAllocBufferWithBOT(size_t word_sz, - BlockOffsetSharedArray* bsa) : - ParGCAllocBuffer(word_sz), - _bsa(bsa), - _bt(bsa, MemRegion(_bottom, _hard_end)), - _true_end(_hard_end) -{} - -// The buffer comes with its own BOT, with a shared (obviously) underlying -// BlockOffsetSharedArray. We manipulate this BOT in the normal way -// as we would for any contiguous space. However, on accasion we -// need to do some buffer surgery at the extremities before we -// start using the body of the buffer for allocations. Such surgery -// (as explained elsewhere) is to prevent allocation on a card that -// is in the process of being walked concurrently by another GC thread. -// When such surgery happens at a point that is far removed (to the -// right of the current allocation point, top), we use the "contig" -// parameter below to directly manipulate the shared array without -// modifying the _next_threshold state in the BOT. -void ParGCAllocBufferWithBOT::fill_region_with_block(MemRegion mr, - bool contig) { - CollectedHeap::fill_with_object(mr); - if (contig) { - _bt.alloc_block(mr.start(), mr.end()); - } else { - _bt.BlockOffsetArray::alloc_block(mr.start(), mr.end()); - } -} - -HeapWord* ParGCAllocBufferWithBOT::allocate_slow(size_t word_sz) { - HeapWord* res = NULL; - if (_true_end > _hard_end) { - assert((HeapWord*)align_size_down(intptr_t(_hard_end), - ChunkSizeInBytes) == _hard_end, - "or else _true_end should be equal to _hard_end"); - assert(_retained, "or else _true_end should be equal to _hard_end"); - assert(_retained_filler.end() <= _top, "INVARIANT"); - CollectedHeap::fill_with_object(_retained_filler); - if (_top < _hard_end) { - fill_region_with_block(MemRegion(_top, _hard_end), true); - } - HeapWord* next_hard_end = MIN2(_true_end, _hard_end + ChunkSizeInWords); - _retained_filler = MemRegion(_hard_end, FillerHeaderSize); - _bt.alloc_block(_retained_filler.start(), _retained_filler.word_size()); - _top = _retained_filler.end(); - _hard_end = next_hard_end; - _end = _hard_end - AlignmentReserve; - res = ParGCAllocBuffer::allocate(word_sz); - if (res != NULL) { - _bt.alloc_block(res, word_sz); - } - } - return res; -} - -void -ParGCAllocBufferWithBOT::undo_allocation(HeapWord* obj, size_t word_sz) { - ParGCAllocBuffer::undo_allocation(obj, word_sz); - // This may back us up beyond the previous threshold, so reset. - _bt.set_region(MemRegion(_top, _hard_end)); - _bt.initialize_threshold(); -} - -void ParGCAllocBufferWithBOT::retire(bool end_of_gc, bool retain) { - assert(!retain || end_of_gc, "Can only retain at GC end."); - if (_retained) { - // We're about to make the retained_filler into a block. - _bt.BlockOffsetArray::alloc_block(_retained_filler.start(), - _retained_filler.end()); - } - // Reset _hard_end to _true_end (and update _end) - if (retain && _hard_end != NULL) { - assert(_hard_end <= _true_end, "Invariant."); - _hard_end = _true_end; - _end = MAX2(_top, _hard_end - AlignmentReserve); - assert(_end <= _hard_end, "Invariant."); - } - _true_end = _hard_end; - HeapWord* pre_top = _top; - - ParGCAllocBuffer::retire(end_of_gc, retain); - // Now any old _retained_filler is cut back to size, the free part is - // filled with a filler object, and top is past the header of that - // object. - - if (retain && _top < _end) { - assert(end_of_gc && retain, "Or else retain should be false."); - // If the lab does not start on a card boundary, we don't want to - // allocate onto that card, since that might lead to concurrent - // allocation and card scanning, which we don't support. So we fill - // the first card with a garbage object. - size_t first_card_index = _bsa->index_for(pre_top); - HeapWord* first_card_start = _bsa->address_for_index(first_card_index); - if (first_card_start < pre_top) { - HeapWord* second_card_start = - _bsa->inc_by_region_size(first_card_start); - - // Ensure enough room to fill with the smallest block - second_card_start = MAX2(second_card_start, pre_top + AlignmentReserve); - - // If the end is already in the first card, don't go beyond it! - // Or if the remainder is too small for a filler object, gobble it up. - if (_hard_end < second_card_start || - pointer_delta(_hard_end, second_card_start) < AlignmentReserve) { - second_card_start = _hard_end; - } - if (pre_top < second_card_start) { - MemRegion first_card_suffix(pre_top, second_card_start); - fill_region_with_block(first_card_suffix, true); - } - pre_top = second_card_start; - _top = pre_top; - _end = MAX2(_top, _hard_end - AlignmentReserve); - } - - // If the lab does not end on a card boundary, we don't want to - // allocate onto that card, since that might lead to concurrent - // allocation and card scanning, which we don't support. So we fill - // the last card with a garbage object. - size_t last_card_index = _bsa->index_for(_hard_end); - HeapWord* last_card_start = _bsa->address_for_index(last_card_index); - if (last_card_start < _hard_end) { - - // Ensure enough room to fill with the smallest block - last_card_start = MIN2(last_card_start, _hard_end - AlignmentReserve); - - // If the top is already in the last card, don't go back beyond it! - // Or if the remainder is too small for a filler object, gobble it up. - if (_top > last_card_start || - pointer_delta(last_card_start, _top) < AlignmentReserve) { - last_card_start = _top; - } - if (last_card_start < _hard_end) { - MemRegion last_card_prefix(last_card_start, _hard_end); - fill_region_with_block(last_card_prefix, false); - } - _hard_end = last_card_start; - _end = MAX2(_top, _hard_end - AlignmentReserve); - _true_end = _hard_end; - assert(_end <= _hard_end, "Invariant."); - } - - // At this point: - // 1) we had a filler object from the original top to hard_end. - // 2) We've filled in any partial cards at the front and back. - if (pre_top < _hard_end) { - // Now we can reset the _bt to do allocation in the given area. - MemRegion new_filler(pre_top, _hard_end); - fill_region_with_block(new_filler, false); - _top = pre_top + ParGCAllocBuffer::FillerHeaderSize; - // If there's no space left, don't retain. - if (_top >= _end) { - _retained = false; - invalidate(); - return; - } - _retained_filler = MemRegion(pre_top, _top); - _bt.set_region(MemRegion(_top, _hard_end)); - _bt.initialize_threshold(); - assert(_bt.threshold() > _top, "initialize_threshold failed!"); - - // There may be other reasons for queries into the middle of the - // filler object. When such queries are done in parallel with - // allocation, bad things can happen, if the query involves object - // iteration. So we ensure that such queries do not involve object - // iteration, by putting another filler object on the boundaries of - // such queries. One such is the object spanning a parallel card - // chunk boundary. - - // "chunk_boundary" is the address of the first chunk boundary less - // than "hard_end". - HeapWord* chunk_boundary = - (HeapWord*)align_size_down(intptr_t(_hard_end-1), ChunkSizeInBytes); - assert(chunk_boundary < _hard_end, "Or else above did not work."); - assert(pointer_delta(_true_end, chunk_boundary) >= AlignmentReserve, - "Consequence of last card handling above."); - - if (_top <= chunk_boundary) { - assert(_true_end == _hard_end, "Invariant."); - while (_top <= chunk_boundary) { - assert(pointer_delta(_hard_end, chunk_boundary) >= AlignmentReserve, - "Consequence of last card handling above."); - _bt.BlockOffsetArray::alloc_block(chunk_boundary, _hard_end); - CollectedHeap::fill_with_object(chunk_boundary, _hard_end); - _hard_end = chunk_boundary; - chunk_boundary -= ChunkSizeInWords; - } - _end = _hard_end - AlignmentReserve; - assert(_top <= _end, "Invariant."); - // Now reset the initial filler chunk so it doesn't overlap with - // the one(s) inserted above. - MemRegion new_filler(pre_top, _hard_end); - fill_region_with_block(new_filler, false); - } - } else { - _retained = false; - invalidate(); - } - } else { - assert(!end_of_gc || - (!_retained && _true_end == _hard_end), "Checking."); - } - assert(_end <= _hard_end, "Invariant."); - assert(_top < _end || _top == _hard_end, "Invariant"); -} diff -r be82ef218872 -r b3602ff9c1b8 src/share/vm/gc_implementation/parNew/parGCAllocBuffer.hpp --- a/src/share/vm/gc_implementation/parNew/parGCAllocBuffer.hpp Wed Aug 22 10:01:51 2012 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,249 +0,0 @@ -/* - * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - * - */ - -#ifndef SHARE_VM_GC_IMPLEMENTATION_PARNEW_PARGCALLOCBUFFER_HPP -#define SHARE_VM_GC_IMPLEMENTATION_PARNEW_PARGCALLOCBUFFER_HPP - -#include "memory/allocation.hpp" -#include "memory/blockOffsetTable.hpp" -#include "memory/threadLocalAllocBuffer.hpp" -#include "utilities/globalDefinitions.hpp" - -// Forward decl. - -class PLABStats; - -// A per-thread allocation buffer used during GC. -class ParGCAllocBuffer: public CHeapObj { -protected: - char head[32]; - size_t _word_sz; // in HeapWord units - HeapWord* _bottom; - HeapWord* _top; - HeapWord* _end; // last allocatable address + 1 - HeapWord* _hard_end; // _end + AlignmentReserve - bool _retained; // whether we hold a _retained_filler - MemRegion _retained_filler; - // In support of ergonomic sizing of PLAB's - size_t _allocated; // in HeapWord units - size_t _wasted; // in HeapWord units - char tail[32]; - static size_t FillerHeaderSize; - static size_t AlignmentReserve; - -public: - // Initializes the buffer to be empty, but with the given "word_sz". - // Must get initialized with "set_buf" for an allocation to succeed. - ParGCAllocBuffer(size_t word_sz); - - static const size_t min_size() { - return ThreadLocalAllocBuffer::min_size(); - } - - static const size_t max_size() { - return ThreadLocalAllocBuffer::max_size(); - } - - // If an allocation of the given "word_sz" can be satisfied within the - // buffer, do the allocation, returning a pointer to the start of the - // allocated block. If the allocation request cannot be satisfied, - // return NULL. - HeapWord* allocate(size_t word_sz) { - HeapWord* res = _top; - if (pointer_delta(_end, _top) >= word_sz) { - _top = _top + word_sz; - return res; - } else { - return NULL; - } - } - - // Undo the last allocation in the buffer, which is required to be of the - // "obj" of the given "word_sz". - void undo_allocation(HeapWord* obj, size_t word_sz) { - assert(pointer_delta(_top, _bottom) >= word_sz, "Bad undo"); - assert(pointer_delta(_top, obj) == word_sz, "Bad undo"); - _top = obj; - } - - // The total (word) size of the buffer, including both allocated and - // unallocted space. - size_t word_sz() { return _word_sz; } - - // Should only be done if we are about to reset with a new buffer of the - // given size. - void set_word_size(size_t new_word_sz) { - assert(new_word_sz > AlignmentReserve, "Too small"); - _word_sz = new_word_sz; - } - - // The number of words of unallocated space remaining in the buffer. - size_t words_remaining() { - assert(_end >= _top, "Negative buffer"); - return pointer_delta(_end, _top, HeapWordSize); - } - - bool contains(void* addr) { - return (void*)_bottom <= addr && addr < (void*)_hard_end; - } - - // Sets the space of the buffer to be [buf, space+word_sz()). - void set_buf(HeapWord* buf) { - _bottom = buf; - _top = _bottom; - _hard_end = _bottom + word_sz(); - _end = _hard_end - AlignmentReserve; - assert(_end >= _top, "Negative buffer"); - // In support of ergonomic sizing - _allocated += word_sz(); - } - - // Flush the stats supporting ergonomic sizing of PLAB's - void flush_stats(PLABStats* stats); - void flush_stats_and_retire(PLABStats* stats, bool retain) { - // We flush the stats first in order to get a reading of - // unused space in the last buffer. - if (ResizePLAB) { - flush_stats(stats); - } - // Retire the last allocation buffer. - retire(true, retain); - } - - // Force future allocations to fail and queries for contains() - // to return false - void invalidate() { - assert(!_retained, "Shouldn't retain an invalidated buffer."); - _end = _hard_end; - _wasted += pointer_delta(_end, _top); // unused space - _top = _end; // force future allocations to fail - _bottom = _end; // force future contains() queries to return false - } - - // Fills in the unallocated portion of the buffer with a garbage object. - // If "end_of_gc" is TRUE, is after the last use in the GC. IF "retain" - // is true, attempt to re-use the unused portion in the next GC. - void retire(bool end_of_gc, bool retain); - - void print() PRODUCT_RETURN; -}; - -// PLAB stats book-keeping -class PLABStats VALUE_OBJ_CLASS_SPEC { - size_t _allocated; // total allocated - size_t _wasted; // of which wasted (internal fragmentation) - size_t _unused; // Unused in last buffer - size_t _used; // derived = allocated - wasted - unused - size_t _desired_plab_sz;// output of filter (below), suitably trimmed and quantized - AdaptiveWeightedAverage - _filter; // integrator with decay - - public: - PLABStats(size_t desired_plab_sz_, unsigned wt) : - _allocated(0), - _wasted(0), - _unused(0), - _used(0), - _desired_plab_sz(desired_plab_sz_), - _filter(wt) - { - size_t min_sz = min_size(); - size_t max_sz = max_size(); - size_t aligned_min_sz = align_object_size(min_sz); - size_t aligned_max_sz = align_object_size(max_sz); - assert(min_sz <= aligned_min_sz && max_sz >= aligned_max_sz && - min_sz <= max_sz, - "PLAB clipping computation in adjust_desired_plab_sz()" - " may be incorrect"); - } - - static const size_t min_size() { - return ParGCAllocBuffer::min_size(); - } - - static const size_t max_size() { - return ParGCAllocBuffer::max_size(); - } - - size_t desired_plab_sz() { - return _desired_plab_sz; - } - - void adjust_desired_plab_sz(); // filter computation, latches output to - // _desired_plab_sz, clears sensor accumulators - - void add_allocated(size_t v) { - Atomic::add_ptr(v, &_allocated); - } - - void add_unused(size_t v) { - Atomic::add_ptr(v, &_unused); - } - - void add_wasted(size_t v) { - Atomic::add_ptr(v, &_wasted); - } -}; - -class ParGCAllocBufferWithBOT: public ParGCAllocBuffer { - BlockOffsetArrayContigSpace _bt; - BlockOffsetSharedArray* _bsa; - HeapWord* _true_end; // end of the whole ParGCAllocBuffer - - static const size_t ChunkSizeInWords; - static const size_t ChunkSizeInBytes; - HeapWord* allocate_slow(size_t word_sz); - - void fill_region_with_block(MemRegion mr, bool contig); - -public: - ParGCAllocBufferWithBOT(size_t word_sz, BlockOffsetSharedArray* bsa); - - HeapWord* allocate(size_t word_sz) { - HeapWord* res = ParGCAllocBuffer::allocate(word_sz); - if (res != NULL) { - _bt.alloc_block(res, word_sz); - } else { - res = allocate_slow(word_sz); - } - return res; - } - - void undo_allocation(HeapWord* obj, size_t word_sz); - - void set_buf(HeapWord* buf_start) { - ParGCAllocBuffer::set_buf(buf_start); - _true_end = _hard_end; - _bt.set_region(MemRegion(buf_start, word_sz())); - _bt.initialize_threshold(); - } - - void retire(bool end_of_gc, bool retain); - - MemRegion range() { - return MemRegion(_top, _true_end); - } -}; - -#endif // SHARE_VM_GC_IMPLEMENTATION_PARNEW_PARGCALLOCBUFFER_HPP diff -r be82ef218872 -r b3602ff9c1b8 src/share/vm/gc_implementation/parNew/parNewGeneration.cpp --- a/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp Wed Aug 22 10:01:51 2012 +0200 +++ b/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp Fri Aug 24 19:45:42 2012 -0700 @@ -24,11 +24,11 @@ #include "precompiled.hpp" #include "gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp" -#include "gc_implementation/parNew/parGCAllocBuffer.hpp" #include "gc_implementation/parNew/parNewGeneration.hpp" #include "gc_implementation/parNew/parOopClosures.inline.hpp" #include "gc_implementation/shared/adaptiveSizePolicy.hpp" #include "gc_implementation/shared/ageTable.hpp" +#include "gc_implementation/shared/parGCAllocBuffer.hpp" #include "gc_implementation/shared/spaceDecorator.hpp" #include "memory/defNewGeneration.inline.hpp" #include "memory/genCollectedHeap.hpp" @@ -453,7 +453,8 @@ // retire the last buffer. par_scan_state.to_space_alloc_buffer()-> flush_stats_and_retire(_gen.plab_stats(), - false /* !retain */); + true /* end_of_gc */, + false /* retain */); // Every thread has its own age table. We need to merge // them all into one. diff -r be82ef218872 -r b3602ff9c1b8 src/share/vm/gc_implementation/parNew/parNewGeneration.hpp --- a/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp Wed Aug 22 10:01:51 2012 +0200 +++ b/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp Fri Aug 24 19:45:42 2012 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -25,7 +25,7 @@ #ifndef SHARE_VM_GC_IMPLEMENTATION_PARNEW_PARNEWGENERATION_HPP #define SHARE_VM_GC_IMPLEMENTATION_PARNEW_PARNEWGENERATION_HPP -#include "gc_implementation/parNew/parGCAllocBuffer.hpp" +#include "gc_implementation/shared/parGCAllocBuffer.hpp" #include "memory/defNewGeneration.hpp" #include "utilities/taskqueue.hpp" diff -r be82ef218872 -r b3602ff9c1b8 src/share/vm/gc_implementation/shared/parGCAllocBuffer.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/shared/parGCAllocBuffer.cpp Fri Aug 24 19:45:42 2012 -0700 @@ -0,0 +1,342 @@ +/* + * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "gc_implementation/shared/parGCAllocBuffer.hpp" +#include "memory/sharedHeap.hpp" +#include "oops/arrayOop.hpp" +#include "oops/oop.inline.hpp" + +ParGCAllocBuffer::ParGCAllocBuffer(size_t desired_plab_sz_) : + _word_sz(desired_plab_sz_), _bottom(NULL), _top(NULL), + _end(NULL), _hard_end(NULL), + _retained(false), _retained_filler(), + _allocated(0), _wasted(0) +{ + assert (min_size() > AlignmentReserve, "Inconsistency!"); + // arrayOopDesc::header_size depends on command line initialization. + FillerHeaderSize = align_object_size(arrayOopDesc::header_size(T_INT)); + AlignmentReserve = oopDesc::header_size() > MinObjAlignment ? FillerHeaderSize : 0; +} + +size_t ParGCAllocBuffer::FillerHeaderSize; + +// If the minimum object size is greater than MinObjAlignment, we can +// end up with a shard at the end of the buffer that's smaller than +// the smallest object. We can't allow that because the buffer must +// look like it's full of objects when we retire it, so we make +// sure we have enough space for a filler int array object. +size_t ParGCAllocBuffer::AlignmentReserve; + +void ParGCAllocBuffer::retire(bool end_of_gc, bool retain) { + assert(!retain || end_of_gc, "Can only retain at GC end."); + if (_retained) { + // If the buffer had been retained shorten the previous filler object. + assert(_retained_filler.end() <= _top, "INVARIANT"); + CollectedHeap::fill_with_object(_retained_filler); + // Wasted space book-keeping, otherwise (normally) done in invalidate() + _wasted += _retained_filler.word_size(); + _retained = false; + } + assert(!end_of_gc || !_retained, "At this point, end_of_gc ==> !_retained."); + if (_top < _hard_end) { + CollectedHeap::fill_with_object(_top, _hard_end); + if (!retain) { + invalidate(); + } else { + // Is there wasted space we'd like to retain for the next GC? + if (pointer_delta(_end, _top) > FillerHeaderSize) { + _retained = true; + _retained_filler = MemRegion(_top, FillerHeaderSize); + _top = _top + FillerHeaderSize; + } else { + invalidate(); + } + } + } +} + +void ParGCAllocBuffer::flush_stats(PLABStats* stats) { + assert(ResizePLAB, "Wasted work"); + stats->add_allocated(_allocated); + stats->add_wasted(_wasted); + stats->add_unused(pointer_delta(_end, _top)); +} + +// Compute desired plab size and latch result for later +// use. This should be called once at the end of parallel +// scavenge; it clears the sensor accumulators. +void PLABStats::adjust_desired_plab_sz() { + assert(ResizePLAB, "Not set"); + if (_allocated == 0) { + assert(_unused == 0, "Inconsistency in PLAB stats"); + _allocated = 1; + } + double wasted_frac = (double)_unused/(double)_allocated; + size_t target_refills = (size_t)((wasted_frac*TargetSurvivorRatio)/ + TargetPLABWastePct); + if (target_refills == 0) { + target_refills = 1; + } + _used = _allocated - _wasted - _unused; + size_t plab_sz = _used/(target_refills*ParallelGCThreads); + if (PrintPLAB) gclog_or_tty->print(" (plab_sz = %d ", plab_sz); + // Take historical weighted average + _filter.sample(plab_sz); + // Clip from above and below, and align to object boundary + plab_sz = MAX2(min_size(), (size_t)_filter.average()); + plab_sz = MIN2(max_size(), plab_sz); + plab_sz = align_object_size(plab_sz); + // Latch the result + if (PrintPLAB) gclog_or_tty->print(" desired_plab_sz = %d) ", plab_sz); + _desired_plab_sz = plab_sz; + // Now clear the accumulators for next round: + // note this needs to be fixed in the case where we + // are retaining across scavenges. FIX ME !!! XXX + _allocated = 0; + _wasted = 0; + _unused = 0; +} + +#ifndef PRODUCT +void ParGCAllocBuffer::print() { + gclog_or_tty->print("parGCAllocBuffer: _bottom: %p _top: %p _end: %p _hard_end: %p" + "_retained: %c _retained_filler: [%p,%p)\n", + _bottom, _top, _end, _hard_end, + "FT"[_retained], _retained_filler.start(), _retained_filler.end()); +} +#endif // !PRODUCT + +const size_t ParGCAllocBufferWithBOT::ChunkSizeInWords = +MIN2(CardTableModRefBS::par_chunk_heapword_alignment(), + ((size_t)Generation::GenGrain)/HeapWordSize); +const size_t ParGCAllocBufferWithBOT::ChunkSizeInBytes = +MIN2(CardTableModRefBS::par_chunk_heapword_alignment() * HeapWordSize, + (size_t)Generation::GenGrain); + +ParGCAllocBufferWithBOT::ParGCAllocBufferWithBOT(size_t word_sz, + BlockOffsetSharedArray* bsa) : + ParGCAllocBuffer(word_sz), + _bsa(bsa), + _bt(bsa, MemRegion(_bottom, _hard_end)), + _true_end(_hard_end) +{} + +// The buffer comes with its own BOT, with a shared (obviously) underlying +// BlockOffsetSharedArray. We manipulate this BOT in the normal way +// as we would for any contiguous space. However, on accasion we +// need to do some buffer surgery at the extremities before we +// start using the body of the buffer for allocations. Such surgery +// (as explained elsewhere) is to prevent allocation on a card that +// is in the process of being walked concurrently by another GC thread. +// When such surgery happens at a point that is far removed (to the +// right of the current allocation point, top), we use the "contig" +// parameter below to directly manipulate the shared array without +// modifying the _next_threshold state in the BOT. +void ParGCAllocBufferWithBOT::fill_region_with_block(MemRegion mr, + bool contig) { + CollectedHeap::fill_with_object(mr); + if (contig) { + _bt.alloc_block(mr.start(), mr.end()); + } else { + _bt.BlockOffsetArray::alloc_block(mr.start(), mr.end()); + } +} + +HeapWord* ParGCAllocBufferWithBOT::allocate_slow(size_t word_sz) { + HeapWord* res = NULL; + if (_true_end > _hard_end) { + assert((HeapWord*)align_size_down(intptr_t(_hard_end), + ChunkSizeInBytes) == _hard_end, + "or else _true_end should be equal to _hard_end"); + assert(_retained, "or else _true_end should be equal to _hard_end"); + assert(_retained_filler.end() <= _top, "INVARIANT"); + CollectedHeap::fill_with_object(_retained_filler); + if (_top < _hard_end) { + fill_region_with_block(MemRegion(_top, _hard_end), true); + } + HeapWord* next_hard_end = MIN2(_true_end, _hard_end + ChunkSizeInWords); + _retained_filler = MemRegion(_hard_end, FillerHeaderSize); + _bt.alloc_block(_retained_filler.start(), _retained_filler.word_size()); + _top = _retained_filler.end(); + _hard_end = next_hard_end; + _end = _hard_end - AlignmentReserve; + res = ParGCAllocBuffer::allocate(word_sz); + if (res != NULL) { + _bt.alloc_block(res, word_sz); + } + } + return res; +} + +void +ParGCAllocBufferWithBOT::undo_allocation(HeapWord* obj, size_t word_sz) { + ParGCAllocBuffer::undo_allocation(obj, word_sz); + // This may back us up beyond the previous threshold, so reset. + _bt.set_region(MemRegion(_top, _hard_end)); + _bt.initialize_threshold(); +} + +void ParGCAllocBufferWithBOT::retire(bool end_of_gc, bool retain) { + assert(!retain || end_of_gc, "Can only retain at GC end."); + if (_retained) { + // We're about to make the retained_filler into a block. + _bt.BlockOffsetArray::alloc_block(_retained_filler.start(), + _retained_filler.end()); + } + // Reset _hard_end to _true_end (and update _end) + if (retain && _hard_end != NULL) { + assert(_hard_end <= _true_end, "Invariant."); + _hard_end = _true_end; + _end = MAX2(_top, _hard_end - AlignmentReserve); + assert(_end <= _hard_end, "Invariant."); + } + _true_end = _hard_end; + HeapWord* pre_top = _top; + + ParGCAllocBuffer::retire(end_of_gc, retain); + // Now any old _retained_filler is cut back to size, the free part is + // filled with a filler object, and top is past the header of that + // object. + + if (retain && _top < _end) { + assert(end_of_gc && retain, "Or else retain should be false."); + // If the lab does not start on a card boundary, we don't want to + // allocate onto that card, since that might lead to concurrent + // allocation and card scanning, which we don't support. So we fill + // the first card with a garbage object. + size_t first_card_index = _bsa->index_for(pre_top); + HeapWord* first_card_start = _bsa->address_for_index(first_card_index); + if (first_card_start < pre_top) { + HeapWord* second_card_start = + _bsa->inc_by_region_size(first_card_start); + + // Ensure enough room to fill with the smallest block + second_card_start = MAX2(second_card_start, pre_top + AlignmentReserve); + + // If the end is already in the first card, don't go beyond it! + // Or if the remainder is too small for a filler object, gobble it up. + if (_hard_end < second_card_start || + pointer_delta(_hard_end, second_card_start) < AlignmentReserve) { + second_card_start = _hard_end; + } + if (pre_top < second_card_start) { + MemRegion first_card_suffix(pre_top, second_card_start); + fill_region_with_block(first_card_suffix, true); + } + pre_top = second_card_start; + _top = pre_top; + _end = MAX2(_top, _hard_end - AlignmentReserve); + } + + // If the lab does not end on a card boundary, we don't want to + // allocate onto that card, since that might lead to concurrent + // allocation and card scanning, which we don't support. So we fill + // the last card with a garbage object. + size_t last_card_index = _bsa->index_for(_hard_end); + HeapWord* last_card_start = _bsa->address_for_index(last_card_index); + if (last_card_start < _hard_end) { + + // Ensure enough room to fill with the smallest block + last_card_start = MIN2(last_card_start, _hard_end - AlignmentReserve); + + // If the top is already in the last card, don't go back beyond it! + // Or if the remainder is too small for a filler object, gobble it up. + if (_top > last_card_start || + pointer_delta(last_card_start, _top) < AlignmentReserve) { + last_card_start = _top; + } + if (last_card_start < _hard_end) { + MemRegion last_card_prefix(last_card_start, _hard_end); + fill_region_with_block(last_card_prefix, false); + } + _hard_end = last_card_start; + _end = MAX2(_top, _hard_end - AlignmentReserve); + _true_end = _hard_end; + assert(_end <= _hard_end, "Invariant."); + } + + // At this point: + // 1) we had a filler object from the original top to hard_end. + // 2) We've filled in any partial cards at the front and back. + if (pre_top < _hard_end) { + // Now we can reset the _bt to do allocation in the given area. + MemRegion new_filler(pre_top, _hard_end); + fill_region_with_block(new_filler, false); + _top = pre_top + ParGCAllocBuffer::FillerHeaderSize; + // If there's no space left, don't retain. + if (_top >= _end) { + _retained = false; + invalidate(); + return; + } + _retained_filler = MemRegion(pre_top, _top); + _bt.set_region(MemRegion(_top, _hard_end)); + _bt.initialize_threshold(); + assert(_bt.threshold() > _top, "initialize_threshold failed!"); + + // There may be other reasons for queries into the middle of the + // filler object. When such queries are done in parallel with + // allocation, bad things can happen, if the query involves object + // iteration. So we ensure that such queries do not involve object + // iteration, by putting another filler object on the boundaries of + // such queries. One such is the object spanning a parallel card + // chunk boundary. + + // "chunk_boundary" is the address of the first chunk boundary less + // than "hard_end". + HeapWord* chunk_boundary = + (HeapWord*)align_size_down(intptr_t(_hard_end-1), ChunkSizeInBytes); + assert(chunk_boundary < _hard_end, "Or else above did not work."); + assert(pointer_delta(_true_end, chunk_boundary) >= AlignmentReserve, + "Consequence of last card handling above."); + + if (_top <= chunk_boundary) { + assert(_true_end == _hard_end, "Invariant."); + while (_top <= chunk_boundary) { + assert(pointer_delta(_hard_end, chunk_boundary) >= AlignmentReserve, + "Consequence of last card handling above."); + _bt.BlockOffsetArray::alloc_block(chunk_boundary, _hard_end); + CollectedHeap::fill_with_object(chunk_boundary, _hard_end); + _hard_end = chunk_boundary; + chunk_boundary -= ChunkSizeInWords; + } + _end = _hard_end - AlignmentReserve; + assert(_top <= _end, "Invariant."); + // Now reset the initial filler chunk so it doesn't overlap with + // the one(s) inserted above. + MemRegion new_filler(pre_top, _hard_end); + fill_region_with_block(new_filler, false); + } + } else { + _retained = false; + invalidate(); + } + } else { + assert(!end_of_gc || + (!_retained && _true_end == _hard_end), "Checking."); + } + assert(_end <= _hard_end, "Invariant."); + assert(_top < _end || _top == _hard_end, "Invariant"); +} diff -r be82ef218872 -r b3602ff9c1b8 src/share/vm/gc_implementation/shared/parGCAllocBuffer.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/shared/parGCAllocBuffer.hpp Fri Aug 24 19:45:42 2012 -0700 @@ -0,0 +1,249 @@ +/* + * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_VM_GC_IMPLEMENTATION_PARNEW_PARGCALLOCBUFFER_HPP +#define SHARE_VM_GC_IMPLEMENTATION_PARNEW_PARGCALLOCBUFFER_HPP + +#include "memory/allocation.hpp" +#include "memory/blockOffsetTable.hpp" +#include "memory/threadLocalAllocBuffer.hpp" +#include "utilities/globalDefinitions.hpp" + +// Forward decl. + +class PLABStats; + +// A per-thread allocation buffer used during GC. +class ParGCAllocBuffer: public CHeapObj { +protected: + char head[32]; + size_t _word_sz; // in HeapWord units + HeapWord* _bottom; + HeapWord* _top; + HeapWord* _end; // last allocatable address + 1 + HeapWord* _hard_end; // _end + AlignmentReserve + bool _retained; // whether we hold a _retained_filler + MemRegion _retained_filler; + // In support of ergonomic sizing of PLAB's + size_t _allocated; // in HeapWord units + size_t _wasted; // in HeapWord units + char tail[32]; + static size_t FillerHeaderSize; + static size_t AlignmentReserve; + +public: + // Initializes the buffer to be empty, but with the given "word_sz". + // Must get initialized with "set_buf" for an allocation to succeed. + ParGCAllocBuffer(size_t word_sz); + + static const size_t min_size() { + return ThreadLocalAllocBuffer::min_size(); + } + + static const size_t max_size() { + return ThreadLocalAllocBuffer::max_size(); + } + + // If an allocation of the given "word_sz" can be satisfied within the + // buffer, do the allocation, returning a pointer to the start of the + // allocated block. If the allocation request cannot be satisfied, + // return NULL. + HeapWord* allocate(size_t word_sz) { + HeapWord* res = _top; + if (pointer_delta(_end, _top) >= word_sz) { + _top = _top + word_sz; + return res; + } else { + return NULL; + } + } + + // Undo the last allocation in the buffer, which is required to be of the + // "obj" of the given "word_sz". + void undo_allocation(HeapWord* obj, size_t word_sz) { + assert(pointer_delta(_top, _bottom) >= word_sz, "Bad undo"); + assert(pointer_delta(_top, obj) == word_sz, "Bad undo"); + _top = obj; + } + + // The total (word) size of the buffer, including both allocated and + // unallocted space. + size_t word_sz() { return _word_sz; } + + // Should only be done if we are about to reset with a new buffer of the + // given size. + void set_word_size(size_t new_word_sz) { + assert(new_word_sz > AlignmentReserve, "Too small"); + _word_sz = new_word_sz; + } + + // The number of words of unallocated space remaining in the buffer. + size_t words_remaining() { + assert(_end >= _top, "Negative buffer"); + return pointer_delta(_end, _top, HeapWordSize); + } + + bool contains(void* addr) { + return (void*)_bottom <= addr && addr < (void*)_hard_end; + } + + // Sets the space of the buffer to be [buf, space+word_sz()). + void set_buf(HeapWord* buf) { + _bottom = buf; + _top = _bottom; + _hard_end = _bottom + word_sz(); + _end = _hard_end - AlignmentReserve; + assert(_end >= _top, "Negative buffer"); + // In support of ergonomic sizing + _allocated += word_sz(); + } + + // Flush the stats supporting ergonomic sizing of PLAB's + void flush_stats(PLABStats* stats); + void flush_stats_and_retire(PLABStats* stats, bool end_of_gc, bool retain) { + // We flush the stats first in order to get a reading of + // unused space in the last buffer. + if (ResizePLAB) { + flush_stats(stats); + } + // Retire the last allocation buffer. + retire(end_of_gc, retain); + } + + // Force future allocations to fail and queries for contains() + // to return false + void invalidate() { + assert(!_retained, "Shouldn't retain an invalidated buffer."); + _end = _hard_end; + _wasted += pointer_delta(_end, _top); // unused space + _top = _end; // force future allocations to fail + _bottom = _end; // force future contains() queries to return false + } + + // Fills in the unallocated portion of the buffer with a garbage object. + // If "end_of_gc" is TRUE, is after the last use in the GC. IF "retain" + // is true, attempt to re-use the unused portion in the next GC. + void retire(bool end_of_gc, bool retain); + + void print() PRODUCT_RETURN; +}; + +// PLAB stats book-keeping +class PLABStats VALUE_OBJ_CLASS_SPEC { + size_t _allocated; // total allocated + size_t _wasted; // of which wasted (internal fragmentation) + size_t _unused; // Unused in last buffer + size_t _used; // derived = allocated - wasted - unused + size_t _desired_plab_sz;// output of filter (below), suitably trimmed and quantized + AdaptiveWeightedAverage + _filter; // integrator with decay + + public: + PLABStats(size_t desired_plab_sz_, unsigned wt) : + _allocated(0), + _wasted(0), + _unused(0), + _used(0), + _desired_plab_sz(desired_plab_sz_), + _filter(wt) + { + size_t min_sz = min_size(); + size_t max_sz = max_size(); + size_t aligned_min_sz = align_object_size(min_sz); + size_t aligned_max_sz = align_object_size(max_sz); + assert(min_sz <= aligned_min_sz && max_sz >= aligned_max_sz && + min_sz <= max_sz, + "PLAB clipping computation in adjust_desired_plab_sz()" + " may be incorrect"); + } + + static const size_t min_size() { + return ParGCAllocBuffer::min_size(); + } + + static const size_t max_size() { + return ParGCAllocBuffer::max_size(); + } + + size_t desired_plab_sz() { + return _desired_plab_sz; + } + + void adjust_desired_plab_sz(); // filter computation, latches output to + // _desired_plab_sz, clears sensor accumulators + + void add_allocated(size_t v) { + Atomic::add_ptr(v, &_allocated); + } + + void add_unused(size_t v) { + Atomic::add_ptr(v, &_unused); + } + + void add_wasted(size_t v) { + Atomic::add_ptr(v, &_wasted); + } +}; + +class ParGCAllocBufferWithBOT: public ParGCAllocBuffer { + BlockOffsetArrayContigSpace _bt; + BlockOffsetSharedArray* _bsa; + HeapWord* _true_end; // end of the whole ParGCAllocBuffer + + static const size_t ChunkSizeInWords; + static const size_t ChunkSizeInBytes; + HeapWord* allocate_slow(size_t word_sz); + + void fill_region_with_block(MemRegion mr, bool contig); + +public: + ParGCAllocBufferWithBOT(size_t word_sz, BlockOffsetSharedArray* bsa); + + HeapWord* allocate(size_t word_sz) { + HeapWord* res = ParGCAllocBuffer::allocate(word_sz); + if (res != NULL) { + _bt.alloc_block(res, word_sz); + } else { + res = allocate_slow(word_sz); + } + return res; + } + + void undo_allocation(HeapWord* obj, size_t word_sz); + + void set_buf(HeapWord* buf_start) { + ParGCAllocBuffer::set_buf(buf_start); + _true_end = _hard_end; + _bt.set_region(MemRegion(buf_start, word_sz())); + _bt.initialize_threshold(); + } + + void retire(bool end_of_gc, bool retain); + + MemRegion range() { + return MemRegion(_top, _true_end); + } +}; + +#endif // SHARE_VM_GC_IMPLEMENTATION_PARNEW_PARGCALLOCBUFFER_HPP diff -r be82ef218872 -r b3602ff9c1b8 src/share/vm/memory/tenuredGeneration.cpp --- a/src/share/vm/memory/tenuredGeneration.cpp Wed Aug 22 10:01:51 2012 +0200 +++ b/src/share/vm/memory/tenuredGeneration.cpp Fri Aug 24 19:45:42 2012 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -23,8 +23,8 @@ */ #include "precompiled.hpp" -#include "gc_implementation/parNew/parGCAllocBuffer.hpp" #include "gc_implementation/shared/collectorCounters.hpp" +#include "gc_implementation/shared/parGCAllocBuffer.hpp" #include "memory/allocation.inline.hpp" #include "memory/blockOffsetTable.inline.hpp" #include "memory/generation.inline.hpp" diff -r be82ef218872 -r b3602ff9c1b8 src/share/vm/opto/callGenerator.cpp --- a/src/share/vm/opto/callGenerator.cpp Wed Aug 22 10:01:51 2012 +0200 +++ b/src/share/vm/opto/callGenerator.cpp Fri Aug 24 19:45:42 2012 -0700 @@ -158,74 +158,6 @@ return kit.transfer_exceptions_into_jvms(); } -//---------------------------DynamicCallGenerator----------------------------- -// Internal class which handles all out-of-line invokedynamic calls. -class DynamicCallGenerator : public CallGenerator { -public: - DynamicCallGenerator(ciMethod* method) - : CallGenerator(method) - { - } - virtual JVMState* generate(JVMState* jvms); -}; - -JVMState* DynamicCallGenerator::generate(JVMState* jvms) { - GraphKit kit(jvms); - Compile* C = kit.C; - PhaseGVN& gvn = kit.gvn(); - - if (C->log() != NULL) { - C->log()->elem("dynamic_call bci='%d'", jvms->bci()); - } - - // Get the constant pool cache from the caller class. - ciMethod* caller_method = jvms->method(); - ciBytecodeStream str(caller_method); - str.force_bci(jvms->bci()); // Set the stream to the invokedynamic bci. - assert(str.cur_bc() == Bytecodes::_invokedynamic, "wrong place to issue a dynamic call!"); - ciCPCache* cpcache = str.get_cpcache(); - - // Get the offset of the CallSite from the constant pool cache - // pointer. - int index = str.get_method_index(); - size_t call_site_offset = cpcache->get_f1_offset(index); - - // Load the CallSite object from the constant pool cache. - const TypeOopPtr* cpcache_type = TypeOopPtr::make_from_constant(cpcache); // returns TypeAryPtr of type T_OBJECT - const TypeOopPtr* call_site_type = TypeOopPtr::make_from_klass(C->env()->CallSite_klass()); - Node* cpcache_adr = kit.makecon(cpcache_type); - Node* call_site_adr = kit.basic_plus_adr(cpcache_adr, call_site_offset); - // The oops in the constant pool cache are not compressed; load then as raw pointers. - Node* call_site = kit.make_load(kit.control(), call_site_adr, call_site_type, T_ADDRESS, Compile::AliasIdxRaw); - - // Load the target MethodHandle from the CallSite object. - const TypeOopPtr* target_type = TypeOopPtr::make_from_klass(C->env()->MethodHandle_klass()); - Node* target_mh_adr = kit.basic_plus_adr(call_site, java_lang_invoke_CallSite::target_offset_in_bytes()); - Node* target_mh = kit.make_load(kit.control(), target_mh_adr, target_type, T_OBJECT); - - address resolve_stub = SharedRuntime::get_resolve_opt_virtual_call_stub(); - - CallStaticJavaNode* call = new (C, tf()->domain()->cnt()) CallStaticJavaNode(tf(), resolve_stub, method(), kit.bci()); - // invokedynamic is treated as an optimized invokevirtual. - call->set_optimized_virtual(true); - // Take extra care (in the presence of argument motion) not to trash the SP: - call->set_method_handle_invoke(true); - - // Pass the target MethodHandle as first argument and shift the - // other arguments. - call->init_req(0 + TypeFunc::Parms, target_mh); - uint nargs = call->method()->arg_size(); - for (uint i = 1; i < nargs; i++) { - Node* arg = kit.argument(i - 1); - call->init_req(i + TypeFunc::Parms, arg); - } - - kit.set_edges_for_java_call(call); - Node* ret = kit.set_results_for_java_call(call); - kit.push_node(method()->return_type()->basic_type(), ret); - return kit.transfer_exceptions_into_jvms(); -} - //--------------------------VirtualCallGenerator------------------------------ // Internal class which handles all out-of-line calls checking receiver type. class VirtualCallGenerator : public CallGenerator { @@ -328,12 +260,6 @@ return new VirtualCallGenerator(m, vtable_index); } -CallGenerator* CallGenerator::for_dynamic_call(ciMethod* m) { - assert(m->is_compiled_lambda_form(), "for_dynamic_call mismatch"); - //@@ FIXME: this should be done via a direct call - return new DynamicCallGenerator(m); -} - // Allow inlining decisions to be delayed class LateInlineCallGenerator : public DirectCallGenerator { CallGenerator* _inline_cg; @@ -347,7 +273,7 @@ // Convert the CallStaticJava into an inline virtual void do_late_inline(); - JVMState* generate(JVMState* jvms) { + virtual JVMState* generate(JVMState* jvms) { // Record that this call site should be revisited once the main // parse is finished. Compile::current()->add_late_inline(this); diff -r be82ef218872 -r b3602ff9c1b8 src/share/vm/opto/chaitin.cpp --- a/src/share/vm/opto/chaitin.cpp Wed Aug 22 10:01:51 2012 +0200 +++ b/src/share/vm/opto/chaitin.cpp Fri Aug 24 19:45:42 2012 -0700 @@ -484,24 +484,33 @@ if (_names[i]) { // Live range associated with Node? LRG &lrg = lrgs(_names[i]); if (!lrg.alive()) { - _node_regs[i].set_bad(); + set_bad(i); } else if (lrg.num_regs() == 1) { - _node_regs[i].set1(lrg.reg()); - } else { // Must be a register-pair - if (!lrg._fat_proj) { // Must be aligned adjacent register pair + set1(i, lrg.reg()); + } else { // Must be a register-set + if (!lrg._fat_proj) { // Must be aligned adjacent register set // Live ranges record the highest register in their mask. // We want the low register for the AD file writer's convenience. - _node_regs[i].set2( OptoReg::add(lrg.reg(),(1-lrg.num_regs())) ); + OptoReg::Name hi = lrg.reg(); // Get hi register + OptoReg::Name lo = OptoReg::add(hi, (1-lrg.num_regs())); // Find lo + // We have to use pair [lo,lo+1] even for wide vectors because + // the rest of code generation works only with pairs. It is safe + // since for registers encoding only 'lo' is used. + // Second reg from pair is used in ScheduleAndBundle on SPARC where + // vector max size is 8 which corresponds to registers pair. + // It is also used in BuildOopMaps but oop operations are not + // vectorized. + set2(i, lo); } else { // Misaligned; extract 2 bits OptoReg::Name hi = lrg.reg(); // Get hi register lrg.Remove(hi); // Yank from mask int lo = lrg.mask().find_first_elem(); // Find lo - _node_regs[i].set_pair( hi, lo ); + set_pair(i, hi, lo); } } if( lrg._is_oop ) _node_oops.set(i); } else { - _node_regs[i].set_bad(); + set_bad(i); } } @@ -1121,6 +1130,33 @@ } +//------------------------------is_legal_reg----------------------------------- +// Is 'reg' register legal for 'lrg'? +static bool is_legal_reg(LRG &lrg, OptoReg::Name reg, int chunk) { + if (reg >= chunk && reg < (chunk + RegMask::CHUNK_SIZE) && + lrg.mask().Member(OptoReg::add(reg,-chunk))) { + // RA uses OptoReg which represent the highest element of a registers set. + // For example, vectorX (128bit) on x86 uses [XMM,XMMb,XMMc,XMMd] set + // in which XMMd is used by RA to represent such vectors. A double value + // uses [XMM,XMMb] pairs and XMMb is used by RA for it. + // The register mask uses largest bits set of overlapping register sets. + // On x86 with AVX it uses 8 bits for each XMM registers set. + // + // The 'lrg' already has cleared-to-set register mask (done in Select() + // before calling choose_color()). Passing mask.Member(reg) check above + // indicates that the size (num_regs) of 'reg' set is less or equal to + // 'lrg' set size. + // For set size 1 any register which is member of 'lrg' mask is legal. + if (lrg.num_regs()==1) + return true; + // For larger sets only an aligned register with the same set size is legal. + int mask = lrg.num_regs()-1; + if ((reg&mask) == mask) + return true; + } + return false; +} + //------------------------------bias_color------------------------------------- // Choose a color using the biasing heuristic OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) { @@ -1137,10 +1173,7 @@ while ((datum = elements.next()) != 0) { OptoReg::Name reg = lrgs(datum).reg(); // If this LRG's register is legal for us, choose it - if( reg >= chunk && reg < chunk + RegMask::CHUNK_SIZE && - lrg.mask().Member(OptoReg::add(reg,-chunk)) && - (lrg.num_regs()==1 || // either size 1 - (reg&1) == 1) ) // or aligned (adjacent reg is available since we already cleared-to-pairs) + if (is_legal_reg(lrg, reg, chunk)) return reg; } } @@ -1151,10 +1184,7 @@ if( !(*(_ifg->_yanked))[copy_lrg] ) { OptoReg::Name reg = lrgs(copy_lrg).reg(); // And it is legal for you, - if( reg >= chunk && reg < chunk + RegMask::CHUNK_SIZE && - lrg.mask().Member(OptoReg::add(reg,-chunk)) && - (lrg.num_regs()==1 || // either size 1 - (reg&1) == 1) ) // or aligned (adjacent reg is available since we already cleared-to-pairs) + if (is_legal_reg(lrg, reg, chunk)) return reg; } else if( chunk == 0 ) { // Choose a color which is legal for him diff -r be82ef218872 -r b3602ff9c1b8 src/share/vm/opto/classes.hpp --- a/src/share/vm/opto/classes.hpp Wed Aug 22 10:01:51 2012 +0200 +++ b/src/share/vm/opto/classes.hpp Fri Aug 24 19:45:42 2012 -0700 @@ -256,6 +256,8 @@ macro(SubVL) macro(SubVF) macro(SubVD) +macro(MulVS) +macro(MulVI) macro(MulVF) macro(MulVD) macro(DivVF) @@ -263,9 +265,15 @@ macro(LShiftVB) macro(LShiftVS) macro(LShiftVI) +macro(LShiftVL) macro(RShiftVB) macro(RShiftVS) macro(RShiftVI) +macro(RShiftVL) +macro(URShiftVB) +macro(URShiftVS) +macro(URShiftVI) +macro(URShiftVL) macro(AndV) macro(OrV) macro(XorV) diff -r be82ef218872 -r b3602ff9c1b8 src/share/vm/opto/compile.cpp --- a/src/share/vm/opto/compile.cpp Wed Aug 22 10:01:51 2012 +0200 +++ b/src/share/vm/opto/compile.cpp Fri Aug 24 19:45:42 2012 -0700 @@ -2604,7 +2604,7 @@ if (n->req()-1 > 2) { // Replace many operand PackNodes with a binary tree for matching PackNode* p = (PackNode*) n; - Node* btp = p->binaryTreePack(Compile::current(), 1, n->req()); + Node* btp = p->binary_tree_pack(Compile::current(), 1, n->req()); n->subsume_by(btp); } break; diff -r be82ef218872 -r b3602ff9c1b8 src/share/vm/opto/idealKit.cpp --- a/src/share/vm/opto/idealKit.cpp Wed Aug 22 10:01:51 2012 +0200 +++ b/src/share/vm/opto/idealKit.cpp Fri Aug 24 19:45:42 2012 -0700 @@ -295,7 +295,11 @@ if (_delay_all_transforms) { return delay_transform(n); } else { - return gvn().transform(n); + n = gvn().transform(n); + if (!gvn().is_IterGVN()) { + C->record_for_igvn(n); + } + return n; } } diff -r be82ef218872 -r b3602ff9c1b8 src/share/vm/opto/library_call.cpp --- a/src/share/vm/opto/library_call.cpp Wed Aug 22 10:01:51 2012 +0200 +++ b/src/share/vm/opto/library_call.cpp Fri Aug 24 19:45:42 2012 -0700 @@ -171,7 +171,7 @@ // Helper for inline_unsafe_access. // Generates the guards that check whether the result of // Unsafe.getObject should be recorded in an SATB log buffer. - void insert_g1_pre_barrier(Node* base_oop, Node* offset, Node* pre_val); + void insert_pre_barrier(Node* base_oop, Node* offset, Node* pre_val, int nargs, bool need_mem_bar); bool inline_unsafe_access(bool is_native_ptr, bool is_store, BasicType type, bool is_volatile); bool inline_unsafe_prefetch(bool is_native_ptr, bool is_store, bool is_static); bool inline_unsafe_allocate(); @@ -291,6 +291,8 @@ case vmIntrinsics::_equals: case vmIntrinsics::_equalsC: break; // InlineNatives does not control String.compareTo + case vmIntrinsics::_Reference_get: + break; // InlineNatives does not control Reference.get default: return NULL; } @@ -361,11 +363,10 @@ break; case vmIntrinsics::_Reference_get: - // It is only when G1 is enabled that we absolutely - // need to use the intrinsic version of Reference.get() - // so that the value in the referent field, if necessary, - // can be registered by the pre-barrier code. - if (!UseG1GC) return NULL; + // Use the intrinsic version of Reference.get() so that the value in + // the referent field can be registered by the G1 pre-barrier code. + // Also add memory barrier to prevent commoning reads from this field + // across safepoint since GC can change it value. break; default: @@ -2195,14 +2196,17 @@ const static BasicType T_ADDRESS_HOLDER = T_LONG; -// Helper that guards and inserts a G1 pre-barrier. -void LibraryCallKit::insert_g1_pre_barrier(Node* base_oop, Node* offset, Node* pre_val) { - assert(UseG1GC, "should not call this otherwise"); - +// Helper that guards and inserts a pre-barrier. +void LibraryCallKit::insert_pre_barrier(Node* base_oop, Node* offset, + Node* pre_val, int nargs, bool need_mem_bar) { // We could be accessing the referent field of a reference object. If so, when G1 // is enabled, we need to log the value in the referent field in an SATB buffer. // This routine performs some compile time filters and generates suitable // runtime filters that guard the pre-barrier code. + // Also add memory barrier for non volatile load from the referent field + // to prevent commoning of loads across safepoint. + if (!UseG1GC && !need_mem_bar) + return; // Some compile time checks. @@ -2224,11 +2228,12 @@ const TypeInstPtr* itype = btype->isa_instptr(); if (itype != NULL) { - // Can the klass of base_oop be statically determined - // to be _not_ a sub-class of Reference? + // Can the klass of base_oop be statically determined to be + // _not_ a sub-class of Reference and _not_ Object? ciKlass* klass = itype->klass(); - if (klass->is_subtype_of(env()->Reference_klass()) && - !env()->Reference_klass()->is_subtype_of(klass)) { + if ( klass->is_loaded() && + !klass->is_subtype_of(env()->Reference_klass()) && + !env()->Object_klass()->is_subtype_of(klass)) { return; } } @@ -2238,10 +2243,8 @@ // we need to generate the following runtime filters // // if (offset == java_lang_ref_Reference::_reference_offset) { - // if (base != null) { - // if (instance_of(base, java.lang.ref.Reference)) { - // pre_barrier(_, pre_val, ...); - // } + // if (instance_of(base, java.lang.ref.Reference)) { + // pre_barrier(_, pre_val, ...); // } // } @@ -2254,19 +2257,19 @@ Node* referent_off = __ ConX(java_lang_ref_Reference::referent_offset); __ if_then(offset, BoolTest::eq, referent_off, unlikely); { - __ if_then(base_oop, BoolTest::ne, null(), likely); { - // Update graphKit memory and control from IdealKit. sync_kit(ideal); Node* ref_klass_con = makecon(TypeKlassPtr::make(env()->Reference_klass())); + _sp += nargs; // gen_instanceof might do an uncommon trap Node* is_instof = gen_instanceof(base_oop, ref_klass_con); + _sp -= nargs; // Update IdealKit memory and control from graphKit. __ sync_kit(this); Node* one = __ ConI(1); - + // is_instof == 0 if base_oop == NULL __ if_then(is_instof, BoolTest::eq, one, unlikely); { // Update graphKit from IdeakKit. @@ -2278,12 +2281,15 @@ NULL /* obj */, NULL /* adr */, max_juint /* alias_idx */, NULL /* val */, NULL /* val_type */, pre_val /* pre_val */, T_OBJECT); - + if (need_mem_bar) { + // Add memory barrier to prevent commoning reads from this field + // across safepoint since GC can change its value. + insert_mem_bar(Op_MemBarCPUOrder); + } // Update IdealKit from graphKit. __ sync_kit(this); } __ end_if(); // _ref_type != ref_none - } __ end_if(); // base != NULL } __ end_if(); // offset == referent_offset // Final sync IdealKit and GraphKit. @@ -2418,7 +2424,9 @@ // object (either by using Unsafe directly or through reflection) // then, if G1 is enabled, we need to record the referent in an // SATB log buffer using the pre-barrier mechanism. - bool need_read_barrier = UseG1GC && !is_native_ptr && !is_store && + // Also we need to add memory barrier to prevent commoning reads + // from this field across safepoint since GC can change its value. + bool need_read_barrier = !is_native_ptr && !is_store && offset != top() && heap_base_oop != top(); if (!is_store && type == T_OBJECT) { @@ -2508,7 +2516,7 @@ break; case T_OBJECT: if (need_read_barrier) { - insert_g1_pre_barrier(heap_base_oop, offset, p); + insert_pre_barrier(heap_base_oop, offset, p, nargs, !(is_volatile || need_mem_bar)); } push(p); break; @@ -5484,6 +5492,10 @@ result /* pre_val */, T_OBJECT); + // Add memory barrier to prevent commoning reads from this field + // across safepoint since GC can change its value. + insert_mem_bar(Op_MemBarCPUOrder); + push(result); return true; } diff -r be82ef218872 -r b3602ff9c1b8 src/share/vm/opto/loopnode.cpp --- a/src/share/vm/opto/loopnode.cpp Wed Aug 22 10:01:51 2012 +0200 +++ b/src/share/vm/opto/loopnode.cpp Fri Aug 24 19:45:42 2012 -0700 @@ -1773,6 +1773,8 @@ if (stride_con > 0) tty->print("+"); tty->print("%d", stride_con); + tty->print(" (%d iters) ", (int)cl->profile_trip_cnt()); + if (cl->is_pre_loop ()) tty->print(" pre" ); if (cl->is_main_loop()) tty->print(" main"); if (cl->is_post_loop()) tty->print(" post"); diff -r be82ef218872 -r b3602ff9c1b8 src/share/vm/opto/output.cpp --- a/src/share/vm/opto/output.cpp Wed Aug 22 10:01:51 2012 +0200 +++ b/src/share/vm/opto/output.cpp Fri Aug 24 19:45:42 2012 -0700 @@ -1871,6 +1871,8 @@ if (!do_scheduling()) return; + assert(MaxVectorSize <= 8, "scheduling code works only with pairs"); + NOT_PRODUCT( TracePhase t2("isched", &_t_instrSched, TimeCompiler); ) // Create a data structure for all the scheduling information diff -r be82ef218872 -r b3602ff9c1b8 src/share/vm/opto/superword.cpp --- a/src/share/vm/opto/superword.cpp Wed Aug 22 10:01:51 2012 +0200 +++ b/src/share/vm/opto/superword.cpp Fri Aug 24 19:45:42 2012 -0700 @@ -1058,12 +1058,27 @@ return VectorNode::implemented(p0->Opcode(), p->size(), velt_basic_type(p0)); } +//------------------------------same_inputs-------------------------- +// For pack p, are all idx operands the same? +static bool same_inputs(Node_List* p, int idx) { + Node* p0 = p->at(0); + uint vlen = p->size(); + Node* p0_def = p0->in(idx); + for (uint i = 1; i < vlen; i++) { + Node* pi = p->at(i); + Node* pi_def = pi->in(idx); + if (p0_def != pi_def) + return false; + } + return true; +} + //------------------------------profitable--------------------------- // For pack p, are all operands and all uses (with in the block) vector? bool SuperWord::profitable(Node_List* p) { Node* p0 = p->at(0); uint start, end; - vector_opd_range(p0, &start, &end); + VectorNode::vector_operands(p0, &start, &end); // Return false if some input is not vector and inside block for (uint i = start; i < end; i++) { @@ -1071,15 +1086,20 @@ // For now, return false if not scalar promotion case (inputs are the same.) // Later, implement PackNode and allow differing, non-vector inputs // (maybe just the ones from outside the block.) - Node* p0_def = p0->in(i); - for (uint j = 1; j < p->size(); j++) { - Node* use = p->at(j); - Node* def = use->in(i); - if (p0_def != def) - return false; + if (!same_inputs(p, i)) { + return false; } } } + if (VectorNode::is_shift(p0)) { + // For now, return false if shift count is vector because + // hw does not support it. + if (is_vector_use(p0, 2)) + return false; + // For the same reason return false if different shift counts. + if (!same_inputs(p, 2)) + return false; + } if (!p0->is_Store()) { // For now, return false if not all uses are vector. // Later, implement ExtractNode and allow non-vector uses (maybe @@ -1357,6 +1377,12 @@ // Promote operands to vector Node* in1 = vector_opd(p, 1); Node* in2 = vector_opd(p, 2); + if (VectorNode::is_invariant_vector(in1) && (n->is_Add() || n->is_Mul())) { + // Move invariant vector input into second position to avoid register spilling. + Node* tmp = in1; + in1 = in2; + in2 = tmp; + } vn = VectorNode::make(_phase->C, opc, in1, in2, vlen, velt_basic_type(n)); } else { ShouldNotReachHere(); @@ -1386,19 +1412,40 @@ uint vlen = p->size(); Node* opd = p0->in(opd_idx); - bool same_opd = true; - for (uint i = 1; i < vlen; i++) { - Node* pi = p->at(i); - Node* in = pi->in(opd_idx); - if (opd != in) { - same_opd = false; - break; + if (same_inputs(p, opd_idx)) { + if (opd->is_Vector() || opd->is_LoadVector()) { + assert(((opd_idx != 2) || !VectorNode::is_shift(p0)), "shift's count can't be vector"); + return opd; // input is matching vector } - } - - if (same_opd) { - if (opd->is_Vector() || opd->is_LoadVector()) { - return opd; // input is matching vector + if ((opd_idx == 2) && VectorNode::is_shift(p0)) { + // No vector is needed for shift count. + // Vector instructions do not mask shift count, do it here. + Compile* C = _phase->C; + Node* cnt = opd; + juint mask = (p0->bottom_type() == TypeInt::INT) ? (BitsPerInt - 1) : (BitsPerLong - 1); + const TypeInt* t = opd->find_int_type(); + if (t != NULL && t->is_con()) { + juint shift = t->get_con(); + if (shift > mask) { // Unsigned cmp + cnt = ConNode::make(C, TypeInt::make(shift & mask)); + } + } else { + if (t == NULL || t->_lo < 0 || t->_hi > (int)mask) { + cnt = ConNode::make(C, TypeInt::make(mask)); + _phase->_igvn.register_new_node_with_optimizer(cnt); + cnt = new (C, 3) AndINode(opd, cnt); + _phase->_igvn.register_new_node_with_optimizer(cnt); + _phase->set_ctrl(cnt, _phase->get_ctrl(opd)); + } + assert(opd->bottom_type()->isa_int(), "int type only"); + // Move non constant shift count into XMM register. + cnt = new (_phase->C, 2) MoveI2FNode(cnt); + } + if (cnt != opd) { + _phase->_igvn.register_new_node_with_optimizer(cnt); + _phase->set_ctrl(cnt, _phase->get_ctrl(opd)); + } + return cnt; } assert(!opd->is_StoreVector(), "such vector is not expected here"); // Convert scalar input to vector with the same number of elements as @@ -1428,7 +1475,7 @@ Node* in = pi->in(opd_idx); assert(my_pack(in) == NULL, "Should already have been unpacked"); assert(opd_bt == in->bottom_type()->basic_type(), "all same type"); - pk->add_opd(i, in); + pk->add_opd(in); } _phase->_igvn.register_new_node_with_optimizer(pk); _phase->set_ctrl(pk, _phase->get_ctrl(opd)); @@ -1718,37 +1765,27 @@ for (int i = _block.length() - 1; i >= 0; i--) { Node* n = _block.at(i); // Only integer types need be examined - if (n->bottom_type()->isa_int()) { + const Type* vt = velt_type(n); + if (vt->basic_type() == T_INT) { uint start, end; - vector_opd_range(n, &start, &end); + VectorNode::vector_operands(n, &start, &end); const Type* vt = velt_type(n); for (uint j = start; j < end; j++) { Node* in = n->in(j); - // Don't propagate through a type conversion - if (n->bottom_type() != in->bottom_type()) - continue; - switch(in->Opcode()) { - case Op_AddI: case Op_AddL: - case Op_SubI: case Op_SubL: - case Op_MulI: case Op_MulL: - case Op_AndI: case Op_AndL: - case Op_OrI: case Op_OrL: - case Op_XorI: case Op_XorL: - case Op_LShiftI: case Op_LShiftL: - case Op_CMoveI: case Op_CMoveL: - if (in_bb(in)) { - bool same_type = true; - for (DUIterator_Fast kmax, k = in->fast_outs(kmax); k < kmax; k++) { - Node *use = in->fast_out(k); - if (!in_bb(use) || !same_velt_type(use, n)) { - same_type = false; - break; - } + // Don't propagate through a memory + if (!in->is_Mem() && in_bb(in) && velt_type(in)->basic_type() == T_INT && + data_size(n) < data_size(in)) { + bool same_type = true; + for (DUIterator_Fast kmax, k = in->fast_outs(kmax); k < kmax; k++) { + Node *use = in->fast_out(k); + if (!in_bb(use) || !same_velt_type(use, n)) { + same_type = false; + break; } - if (same_type) { - set_velt_type(in, vt); - } + } + if (same_type) { + set_velt_type(in, vt); } } } @@ -1792,10 +1829,8 @@ } const Type* t = _igvn.type(n); if (t->basic_type() == T_INT) { - if (t->higher_equal(TypeInt::BOOL)) return TypeInt::BOOL; - if (t->higher_equal(TypeInt::BYTE)) return TypeInt::BYTE; - if (t->higher_equal(TypeInt::CHAR)) return TypeInt::CHAR; - if (t->higher_equal(TypeInt::SHORT)) return TypeInt::SHORT; + // A narrow type of arithmetic operations will be determined by + // propagating the type of memory operations. return TypeInt::INT; } return t; @@ -1811,38 +1846,6 @@ return vt1 == vt2; } -//-------------------------vector_opd_range----------------------- -// (Start, end] half-open range defining which operands are vector -void SuperWord::vector_opd_range(Node* n, uint* start, uint* end) { - switch (n->Opcode()) { - case Op_LoadB: case Op_LoadUB: - case Op_LoadS: case Op_LoadUS: - case Op_LoadI: case Op_LoadL: - case Op_LoadF: case Op_LoadD: - case Op_LoadP: - *start = 0; - *end = 0; - return; - case Op_StoreB: case Op_StoreC: - case Op_StoreI: case Op_StoreL: - case Op_StoreF: case Op_StoreD: - case Op_StoreP: - *start = MemNode::ValueIn; - *end = *start + 1; - return; - case Op_LShiftI: case Op_LShiftL: - *start = 1; - *end = 2; - return; - case Op_CMoveI: case Op_CMoveL: case Op_CMoveF: case Op_CMoveD: - *start = 2; - *end = n->req(); - return; - } - *start = 1; - *end = n->req(); // default is all operands -} - //------------------------------in_packset--------------------------- // Are s1 and s2 in a pack pair and ordered as s1,s2? bool SuperWord::in_packset(Node* s1, Node* s2) { @@ -1940,7 +1943,7 @@ // lim0 == original pre loop limit // V == v_align (power of 2) // invar == extra invariant piece of the address expression - // e == k [ +/- invar ] + // e == offset [ +/- invar ] // // When reassociating expressions involving '%' the basic rules are: // (a - b) % k == 0 => a % k == b % k @@ -1993,13 +1996,12 @@ int elt_size = align_to_ref_p.memory_size(); int v_align = vw / elt_size; assert(v_align > 1, "sanity"); - int k = align_to_ref_p.offset_in_bytes() / elt_size; - - Node *kn = _igvn.intcon(k); + int offset = align_to_ref_p.offset_in_bytes() / elt_size; + Node *offsn = _igvn.intcon(offset); - Node *e = kn; + Node *e = offsn; if (align_to_ref_p.invar() != NULL) { - // incorporate any extra invariant piece producing k +/- invar >>> log2(elt) + // incorporate any extra invariant piece producing (offset +/- invar) >>> log2(elt) Node* log2_elt = _igvn.intcon(exact_log2(elt_size)); Node* aref = new (_phase->C, 3) URShiftINode(align_to_ref_p.invar(), log2_elt); _phase->_igvn.register_new_node_with_optimizer(aref); @@ -2014,15 +2016,15 @@ } if (vw > ObjectAlignmentInBytes) { // incorporate base e +/- base && Mask >>> log2(elt) - Node* mask = _igvn.MakeConX(~(-1 << exact_log2(vw))); Node* xbase = new(_phase->C, 2) CastP2XNode(NULL, align_to_ref_p.base()); _phase->_igvn.register_new_node_with_optimizer(xbase); - Node* masked_xbase = new (_phase->C, 3) AndXNode(xbase, mask); +#ifdef _LP64 + xbase = new (_phase->C, 2) ConvL2INode(xbase); + _phase->_igvn.register_new_node_with_optimizer(xbase); +#endif + Node* mask = _igvn.intcon(vw-1); + Node* masked_xbase = new (_phase->C, 3) AndINode(xbase, mask); _phase->_igvn.register_new_node_with_optimizer(masked_xbase); -#ifdef _LP64 - masked_xbase = new (_phase->C, 2) ConvL2INode(masked_xbase); - _phase->_igvn.register_new_node_with_optimizer(masked_xbase); -#endif Node* log2_elt = _igvn.intcon(exact_log2(elt_size)); Node* bref = new (_phase->C, 3) URShiftINode(masked_xbase, log2_elt); _phase->_igvn.register_new_node_with_optimizer(bref); diff -r be82ef218872 -r b3602ff9c1b8 src/share/vm/opto/vectornode.cpp --- a/src/share/vm/opto/vectornode.cpp Wed Aug 22 10:01:51 2012 +0200 +++ b/src/share/vm/opto/vectornode.cpp Fri Aug 24 19:45:42 2012 -0700 @@ -31,7 +31,7 @@ // Return the vector operator for the specified scalar operation // and vector length. Also used to check if the code generator // supports the vector operation. -int VectorNode::opcode(int sopc, uint vlen, BasicType bt) { +int VectorNode::opcode(int sopc, BasicType bt) { switch (sopc) { case Op_AddI: switch (bt) { @@ -69,6 +69,15 @@ case Op_SubD: assert(bt == T_DOUBLE, "must be"); return Op_SubVD; + case Op_MulI: + switch (bt) { + case T_BOOLEAN: + case T_BYTE: return 0; // Unimplemented + case T_CHAR: + case T_SHORT: return Op_MulVS; + case T_INT: return Matcher::match_rule_supported(Op_MulVI) ? Op_MulVI : 0; // SSE4_1 + } + ShouldNotReachHere(); case Op_MulF: assert(bt == T_FLOAT, "must be"); return Op_MulVF; @@ -90,6 +99,9 @@ case T_INT: return Op_LShiftVI; } ShouldNotReachHere(); + case Op_LShiftL: + assert(bt == T_LONG, "must be"); + return Op_LShiftVL; case Op_RShiftI: switch (bt) { case T_BOOLEAN: @@ -99,6 +111,21 @@ case T_INT: return Op_RShiftVI; } ShouldNotReachHere(); + case Op_RShiftL: + assert(bt == T_LONG, "must be"); + return Op_RShiftVL; + case Op_URShiftI: + switch (bt) { + case T_BOOLEAN: + case T_BYTE: return Op_URShiftVB; + case T_CHAR: + case T_SHORT: return Op_URShiftVS; + case T_INT: return Op_URShiftVI; + } + ShouldNotReachHere(); + case Op_URShiftL: + assert(bt == T_LONG, "must be"); + return Op_URShiftVL; case Op_AndI: case Op_AndL: return Op_AndV; @@ -134,16 +161,88 @@ if (is_java_primitive(bt) && (vlen > 1) && is_power_of_2(vlen) && Matcher::vector_size_supported(bt, vlen)) { - int vopc = VectorNode::opcode(opc, vlen, bt); + int vopc = VectorNode::opcode(opc, bt); return vopc > 0 && Matcher::has_match_rule(vopc); } return false; } +bool VectorNode::is_shift(Node* n) { + switch (n->Opcode()) { + case Op_LShiftI: + case Op_LShiftL: + case Op_RShiftI: + case Op_RShiftL: + case Op_URShiftI: + case Op_URShiftL: + return true; + } + return false; +} + +// Check if input is loop invariant vector. +bool VectorNode::is_invariant_vector(Node* n) { + // Only Replicate vector nodes are loop invariant for now. + switch (n->Opcode()) { + case Op_ReplicateB: + case Op_ReplicateS: + case Op_ReplicateI: + case Op_ReplicateL: + case Op_ReplicateF: + case Op_ReplicateD: + return true; + } + return false; +} + +// [Start, end) half-open range defining which operands are vectors +void VectorNode::vector_operands(Node* n, uint* start, uint* end) { + switch (n->Opcode()) { + case Op_LoadB: case Op_LoadUB: + case Op_LoadS: case Op_LoadUS: + case Op_LoadI: case Op_LoadL: + case Op_LoadF: case Op_LoadD: + case Op_LoadP: case Op_LoadN: + *start = 0; + *end = 0; // no vector operands + break; + case Op_StoreB: case Op_StoreC: + case Op_StoreI: case Op_StoreL: + case Op_StoreF: case Op_StoreD: + case Op_StoreP: case Op_StoreN: + *start = MemNode::ValueIn; + *end = MemNode::ValueIn + 1; // 1 vector operand + break; + case Op_LShiftI: case Op_LShiftL: + case Op_RShiftI: case Op_RShiftL: + case Op_URShiftI: case Op_URShiftL: + *start = 1; + *end = 2; // 1 vector operand + break; + case Op_AddI: case Op_AddL: case Op_AddF: case Op_AddD: + case Op_SubI: case Op_SubL: case Op_SubF: case Op_SubD: + case Op_MulI: case Op_MulL: case Op_MulF: case Op_MulD: + case Op_DivF: case Op_DivD: + case Op_AndI: case Op_AndL: + case Op_OrI: case Op_OrL: + case Op_XorI: case Op_XorL: + *start = 1; + *end = 3; // 2 vector operands + break; + case Op_CMoveI: case Op_CMoveL: case Op_CMoveF: case Op_CMoveD: + *start = 2; + *end = n->req(); + break; + default: + *start = 1; + *end = n->req(); // default is all operands + } +} + // Return the vector version of a scalar operation node. VectorNode* VectorNode::make(Compile* C, int opc, Node* n1, Node* n2, uint vlen, BasicType bt) { const TypeVect* vt = TypeVect::make(bt, vlen); - int vopc = VectorNode::opcode(opc, vlen, bt); + int vopc = VectorNode::opcode(opc, bt); switch (vopc) { case Op_AddVB: return new (C, 3) AddVBNode(n1, n2, vt); @@ -160,6 +259,8 @@ case Op_SubVF: return new (C, 3) SubVFNode(n1, n2, vt); case Op_SubVD: return new (C, 3) SubVDNode(n1, n2, vt); + case Op_MulVS: return new (C, 3) MulVSNode(n1, n2, vt); + case Op_MulVI: return new (C, 3) MulVINode(n1, n2, vt); case Op_MulVF: return new (C, 3) MulVFNode(n1, n2, vt); case Op_MulVD: return new (C, 3) MulVDNode(n1, n2, vt); @@ -169,10 +270,17 @@ case Op_LShiftVB: return new (C, 3) LShiftVBNode(n1, n2, vt); case Op_LShiftVS: return new (C, 3) LShiftVSNode(n1, n2, vt); case Op_LShiftVI: return new (C, 3) LShiftVINode(n1, n2, vt); + case Op_LShiftVL: return new (C, 3) LShiftVLNode(n1, n2, vt); case Op_RShiftVB: return new (C, 3) RShiftVBNode(n1, n2, vt); case Op_RShiftVS: return new (C, 3) RShiftVSNode(n1, n2, vt); case Op_RShiftVI: return new (C, 3) RShiftVINode(n1, n2, vt); + case Op_RShiftVL: return new (C, 3) RShiftVLNode(n1, n2, vt); + + case Op_URShiftVB: return new (C, 3) URShiftVBNode(n1, n2, vt); + case Op_URShiftVS: return new (C, 3) URShiftVSNode(n1, n2, vt); + case Op_URShiftVI: return new (C, 3) URShiftVINode(n1, n2, vt); + case Op_URShiftVL: return new (C, 3) URShiftVLNode(n1, n2, vt); case Op_AndV: return new (C, 3) AndVNode(n1, n2, vt); case Op_OrV: return new (C, 3) OrVNode (n1, n2, vt); @@ -214,38 +322,39 @@ switch (bt) { case T_BOOLEAN: case T_BYTE: - return new (C, vlen+1) PackBNode(s, vt); + return new (C, 2) PackBNode(s, vt); case T_CHAR: case T_SHORT: - return new (C, vlen+1) PackSNode(s, vt); + return new (C, 2) PackSNode(s, vt); case T_INT: - return new (C, vlen+1) PackINode(s, vt); + return new (C, 2) PackINode(s, vt); case T_LONG: - return new (C, vlen+1) PackLNode(s, vt); + return new (C, 2) PackLNode(s, vt); case T_FLOAT: - return new (C, vlen+1) PackFNode(s, vt); + return new (C, 2) PackFNode(s, vt); case T_DOUBLE: - return new (C, vlen+1) PackDNode(s, vt); + return new (C, 2) PackDNode(s, vt); } ShouldNotReachHere(); return NULL; } // Create a binary tree form for Packs. [lo, hi) (half-open) range -Node* PackNode::binaryTreePack(Compile* C, int lo, int hi) { +PackNode* PackNode::binary_tree_pack(Compile* C, int lo, int hi) { int ct = hi - lo; assert(is_power_of_2(ct), "power of 2"); if (ct == 2) { PackNode* pk = PackNode::make(C, in(lo), 2, vect_type()->element_basic_type()); - pk->add_opd(1, in(lo+1)); + pk->add_opd(in(lo+1)); return pk; } else { int mid = lo + ct/2; - Node* n1 = binaryTreePack(C, lo, mid); - Node* n2 = binaryTreePack(C, mid, hi ); + PackNode* n1 = binary_tree_pack(C, lo, mid); + PackNode* n2 = binary_tree_pack(C, mid, hi ); - BasicType bt = vect_type()->element_basic_type(); + BasicType bt = n1->vect_type()->element_basic_type(); + assert(bt == n2->vect_type()->element_basic_type(), "should be the same"); switch (bt) { case T_BOOLEAN: case T_BYTE: diff -r be82ef218872 -r b3602ff9c1b8 src/share/vm/opto/vectornode.hpp --- a/src/share/vm/opto/vectornode.hpp Wed Aug 22 10:01:51 2012 +0200 +++ b/src/share/vm/opto/vectornode.hpp Fri Aug 24 19:45:42 2012 -0700 @@ -46,6 +46,7 @@ const TypeVect* vect_type() const { return type()->is_vect(); } uint length() const { return vect_type()->length(); } // Vector length + uint length_in_bytes() const { return vect_type()->length_in_bytes(); } virtual int Opcode() const; @@ -55,9 +56,12 @@ static VectorNode* make(Compile* C, int opc, Node* n1, Node* n2, uint vlen, BasicType bt); - static int opcode(int opc, uint vlen, BasicType bt); + static int opcode(int opc, BasicType bt); static bool implemented(int opc, uint vlen, BasicType bt); - + static bool is_shift(Node* n); + static bool is_invariant_vector(Node* n); + // [Start, end) half-open range defining which operands are vectors + static void vector_operands(Node* n, uint* start, uint* end); }; //===========================Vector=ALU=Operations==================================== @@ -158,6 +162,22 @@ virtual int Opcode() const; }; +//------------------------------MulVSNode--------------------------------------- +// Vector multiply short +class MulVSNode : public VectorNode { + public: + MulVSNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {} + virtual int Opcode() const; +}; + +//------------------------------MulVINode--------------------------------------- +// Vector multiply int +class MulVINode : public VectorNode { + public: + MulVINode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {} + virtual int Opcode() const; +}; + //------------------------------MulVFNode--------------------------------------- // Vector multiply float class MulVFNode : public VectorNode { @@ -191,7 +211,7 @@ }; //------------------------------LShiftVBNode--------------------------------------- -// Vector lshift byte +// Vector left shift bytes class LShiftVBNode : public VectorNode { public: LShiftVBNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {} @@ -199,7 +219,7 @@ }; //------------------------------LShiftVSNode--------------------------------------- -// Vector lshift shorts +// Vector left shift shorts class LShiftVSNode : public VectorNode { public: LShiftVSNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {} @@ -207,39 +227,88 @@ }; //------------------------------LShiftVINode--------------------------------------- -// Vector lshift ints +// Vector left shift ints class LShiftVINode : public VectorNode { public: LShiftVINode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {} virtual int Opcode() const; }; -//------------------------------URShiftVBNode--------------------------------------- -// Vector urshift bytes +//------------------------------LShiftVLNode--------------------------------------- +// Vector left shift longs +class LShiftVLNode : public VectorNode { + public: + LShiftVLNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {} + virtual int Opcode() const; +}; + +//------------------------------RShiftVBNode--------------------------------------- +// Vector right arithmetic (signed) shift bytes class RShiftVBNode : public VectorNode { public: RShiftVBNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {} virtual int Opcode() const; }; -//------------------------------URShiftVSNode--------------------------------------- -// Vector urshift shorts +//------------------------------RShiftVSNode--------------------------------------- +// Vector right arithmetic (signed) shift shorts class RShiftVSNode : public VectorNode { public: RShiftVSNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {} virtual int Opcode() const; }; -//------------------------------URShiftVINode--------------------------------------- -// Vector urshift ints +//------------------------------RShiftVINode--------------------------------------- +// Vector right arithmetic (signed) shift ints class RShiftVINode : public VectorNode { public: RShiftVINode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {} virtual int Opcode() const; }; +//------------------------------RShiftVLNode--------------------------------------- +// Vector right arithmetic (signed) shift longs +class RShiftVLNode : public VectorNode { + public: + RShiftVLNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {} + virtual int Opcode() const; +}; + +//------------------------------URShiftVBNode--------------------------------------- +// Vector right logical (unsigned) shift bytes +class URShiftVBNode : public VectorNode { + public: + URShiftVBNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {} + virtual int Opcode() const; +}; + +//------------------------------URShiftVSNode--------------------------------------- +// Vector right logical (unsigned) shift shorts +class URShiftVSNode : public VectorNode { + public: + URShiftVSNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {} + virtual int Opcode() const; +}; + +//------------------------------URShiftVINode--------------------------------------- +// Vector right logical (unsigned) shift ints +class URShiftVINode : public VectorNode { + public: + URShiftVINode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {} + virtual int Opcode() const; +}; + +//------------------------------URShiftVLNode--------------------------------------- +// Vector right logical (unsigned) shift longs +class URShiftVLNode : public VectorNode { + public: + URShiftVLNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {} + virtual int Opcode() const; +}; + + //------------------------------AndVNode--------------------------------------- -// Vector and +// Vector and integer class AndVNode : public VectorNode { public: AndVNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {} @@ -247,7 +316,7 @@ }; //------------------------------OrVNode--------------------------------------- -// Vector or +// Vector or integer class OrVNode : public VectorNode { public: OrVNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {} @@ -255,7 +324,7 @@ }; //------------------------------XorVNode--------------------------------------- -// Vector xor +// Vector xor integer class XorVNode : public VectorNode { public: XorVNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1,in2,vt) {} @@ -373,12 +442,12 @@ PackNode(Node* in1, Node* n2, const TypeVect* vt) : VectorNode(in1, n2, vt) {} virtual int Opcode() const; - void add_opd(uint i, Node* n) { - init_req(i+1, n); + void add_opd(Node* n) { + add_req(n); } // Create a binary tree form for Packs. [lo, hi) (half-open) range - Node* binaryTreePack(Compile* C, int lo, int hi); + PackNode* binary_tree_pack(Compile* C, int lo, int hi); static PackNode* make(Compile* C, Node* s, uint vlen, BasicType bt); }; diff -r be82ef218872 -r b3602ff9c1b8 src/share/vm/precompiled/precompiled.hpp --- a/src/share/vm/precompiled/precompiled.hpp Wed Aug 22 10:01:51 2012 +0200 +++ b/src/share/vm/precompiled/precompiled.hpp Fri Aug 24 19:45:42 2012 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010, 2011, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -306,7 +306,6 @@ # include "gc_implementation/g1/g1_specialized_oop_closures.hpp" # include "gc_implementation/g1/ptrQueue.hpp" # include "gc_implementation/g1/satbQueue.hpp" -# include "gc_implementation/parNew/parGCAllocBuffer.hpp" # include "gc_implementation/parNew/parOopClosures.hpp" # include "gc_implementation/parallelScavenge/objectStartArray.hpp" # include "gc_implementation/parallelScavenge/parMarkBitMap.hpp" @@ -322,6 +321,7 @@ # include "gc_implementation/parallelScavenge/psYoungGen.hpp" # include "gc_implementation/shared/gcAdaptivePolicyCounters.hpp" # include "gc_implementation/shared/gcPolicyCounters.hpp" +# include "gc_implementation/shared/parGCAllocBuffer.hpp" #endif // SERIALGC #endif // !DONT_USE_PRECOMPILED_HEADER diff -r be82ef218872 -r b3602ff9c1b8 test/compiler/6340864/TestByteVect.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/compiler/6340864/TestByteVect.java Fri Aug 24 19:45:42 2012 -0700 @@ -0,0 +1,1274 @@ +/* + * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/** + * @test + * @bug 6340864 + * @summary Implement vectorization optimizations in hotspot-server + * + * @run main/othervm/timeout=400 -Xbatch -Xmx64m TestByteVect + */ + +public class TestByteVect { + private static final int ARRLEN = 997; + private static final int ITERS = 11000; + private static final int ADD_INIT = 0; + private static final int BIT_MASK = 0xB7; + private static final int VALUE = 3; + private static final int SHIFT = 8; + + public static void main(String args[]) { + System.out.println("Testing Byte vectors"); + int errn = test(); + if (errn > 0) { + System.err.println("FAILED: " + errn + " errors"); + System.exit(97); + } + System.out.println("PASSED"); + } + + static int test() { + byte[] a0 = new byte[ARRLEN]; + byte[] a1 = new byte[ARRLEN]; + byte[] a2 = new byte[ARRLEN]; + byte[] a3 = new byte[ARRLEN]; + byte[] a4 = new byte[ARRLEN]; + short[] p2 = new short[ARRLEN/2]; + int[] p4 = new int[ARRLEN/4]; + long[] p8 = new long[ARRLEN/8]; + // Initialize + int gold_sum = 0; + for (int i=0; i>>VALUE)); + } + test_srlv(a0, a1, VALUE); + for (int i=0; i>>VALUE)); + } + + test_srac(a0, a1); + for (int i=0; i>VALUE)); + } + test_srav(a0, a1, VALUE); + for (int i=0; i>VALUE)); + } + + test_sllc_n(a0, a1); + for (int i=0; i>>(-VALUE))); + } + test_srlv(a0, a1, -VALUE); + for (int i=0; i>>(-VALUE))); + } + + test_srac_n(a0, a1); + for (int i=0; i>(-VALUE))); + } + test_srav(a0, a1, -VALUE); + for (int i=0; i>(-VALUE))); + } + + test_sllc_o(a0, a1); + for (int i=0; i>>SHIFT)); + } + test_srlv(a0, a1, SHIFT); + for (int i=0; i>>SHIFT)); + } + + test_srac_o(a0, a1); + for (int i=0; i>SHIFT)); + } + test_srav(a0, a1, SHIFT); + for (int i=0; i>SHIFT)); + } + + test_sllc_on(a0, a1); + for (int i=0; i>>(-SHIFT))); + } + test_srlv(a0, a1, -SHIFT); + for (int i=0; i>>(-SHIFT))); + } + + test_srac_on(a0, a1); + for (int i=0; i>(-SHIFT))); + } + test_srav(a0, a1, -SHIFT); + for (int i=0; i>(-SHIFT))); + } + + test_pack2(p2, a1); + for (int i=0; i 0) + return errn; + + System.out.println("Time"); + long start, end; + + start = System.currentTimeMillis(); + for (int i=0; i>>VALUE); + } + } + static void test_srlc_n(byte[] a0, byte[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (byte)(a1[i]>>>(-VALUE)); + } + } + static void test_srlc_o(byte[] a0, byte[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (byte)(a1[i]>>>SHIFT); + } + } + static void test_srlc_on(byte[] a0, byte[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (byte)(a1[i]>>>(-SHIFT)); + } + } + static void test_srlv(byte[] a0, byte[] a1, int b) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (byte)(a1[i]>>>b); + } + } + + static void test_srac(byte[] a0, byte[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (byte)(a1[i]>>VALUE); + } + } + static void test_srac_n(byte[] a0, byte[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (byte)(a1[i]>>(-VALUE)); + } + } + static void test_srac_o(byte[] a0, byte[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (byte)(a1[i]>>SHIFT); + } + } + static void test_srac_on(byte[] a0, byte[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (byte)(a1[i]>>(-SHIFT)); + } + } + static void test_srav(byte[] a0, byte[] a1, int b) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (byte)(a1[i]>>b); + } + } + + static void test_pack2(short[] p2, byte[] a1) { + if (p2.length*2 > a1.length) return; + for (int i = 0; i < p2.length; i+=1) { + short l0 = (short)a1[i*2+0]; + short l1 = (short)a1[i*2+1]; + p2[i] = (short)((l1 << 8) | (l0 & 0xFF)); + } + } + static void test_unpack2(byte[] a0, short[] p2) { + if (p2.length*2 > a0.length) return; + for (int i = 0; i < p2.length; i+=1) { + short l = p2[i]; + a0[i*2+0] = (byte)(l & 0xFF); + a0[i*2+1] = (byte)(l >> 8); + } + } + static void test_pack2_swap(short[] p2, byte[] a1) { + if (p2.length*2 > a1.length) return; + for (int i = 0; i < p2.length; i+=1) { + short l0 = (short)a1[i*2+0]; + short l1 = (short)a1[i*2+1]; + p2[i] = (short)((l0 << 8) | (l1 & 0xFF)); + } + } + static void test_unpack2_swap(byte[] a0, short[] p2) { + if (p2.length*2 > a0.length) return; + for (int i = 0; i < p2.length; i+=1) { + short l = p2[i]; + a0[i*2+0] = (byte)(l >> 8); + a0[i*2+1] = (byte)(l & 0xFF); + } + } + + static void test_pack4(int[] p4, byte[] a1) { + if (p4.length*4 > a1.length) return; + for (int i = 0; i < p4.length; i+=1) { + int l0 = (int)a1[i*4+0]; + int l1 = (int)a1[i*4+1]; + int l2 = (int)a1[i*4+2]; + int l3 = (int)a1[i*4+3]; + p4[i] = (l0 & 0xFF) | + ((l1 & 0xFF) << 8) | + ((l2 & 0xFF) << 16) | + ((l3 & 0xFF) << 24); + } + } + static void test_unpack4(byte[] a0, int[] p4) { + if (p4.length*4 > a0.length) return; + for (int i = 0; i < p4.length; i+=1) { + int l = p4[i]; + a0[i*4+0] = (byte)(l & 0xFF); + a0[i*4+1] = (byte)(l >> 8); + a0[i*4+2] = (byte)(l >> 16); + a0[i*4+3] = (byte)(l >> 24); + } + } + static void test_pack4_swap(int[] p4, byte[] a1) { + if (p4.length*4 > a1.length) return; + for (int i = 0; i < p4.length; i+=1) { + int l0 = (int)a1[i*4+0]; + int l1 = (int)a1[i*4+1]; + int l2 = (int)a1[i*4+2]; + int l3 = (int)a1[i*4+3]; + p4[i] = (l3 & 0xFF) | + ((l2 & 0xFF) << 8) | + ((l1 & 0xFF) << 16) | + ((l0 & 0xFF) << 24); + } + } + static void test_unpack4_swap(byte[] a0, int[] p4) { + if (p4.length*4 > a0.length) return; + for (int i = 0; i < p4.length; i+=1) { + int l = p4[i]; + a0[i*4+0] = (byte)(l >> 24); + a0[i*4+1] = (byte)(l >> 16); + a0[i*4+2] = (byte)(l >> 8); + a0[i*4+3] = (byte)(l & 0xFF); + } + } + + static void test_pack8(long[] p8, byte[] a1) { + if (p8.length*8 > a1.length) return; + for (int i = 0; i < p8.length; i+=1) { + long l0 = (long)a1[i*8+0]; + long l1 = (long)a1[i*8+1]; + long l2 = (long)a1[i*8+2]; + long l3 = (long)a1[i*8+3]; + long l4 = (long)a1[i*8+4]; + long l5 = (long)a1[i*8+5]; + long l6 = (long)a1[i*8+6]; + long l7 = (long)a1[i*8+7]; + p8[i] = (l0 & 0xFFl) | + ((l1 & 0xFFl) << 8) | + ((l2 & 0xFFl) << 16) | + ((l3 & 0xFFl) << 24) | + ((l4 & 0xFFl) << 32) | + ((l5 & 0xFFl) << 40) | + ((l6 & 0xFFl) << 48) | + ((l7 & 0xFFl) << 56); + } + } + static void test_unpack8(byte[] a0, long[] p8) { + if (p8.length*8 > a0.length) return; + for (int i = 0; i < p8.length; i+=1) { + long l = p8[i]; + a0[i*8+0] = (byte)(l & 0xFFl); + a0[i*8+1] = (byte)(l >> 8); + a0[i*8+2] = (byte)(l >> 16); + a0[i*8+3] = (byte)(l >> 24); + a0[i*8+4] = (byte)(l >> 32); + a0[i*8+5] = (byte)(l >> 40); + a0[i*8+6] = (byte)(l >> 48); + a0[i*8+7] = (byte)(l >> 56); + } + } + static void test_pack8_swap(long[] p8, byte[] a1) { + if (p8.length*8 > a1.length) return; + for (int i = 0; i < p8.length; i+=1) { + long l0 = (long)a1[i*8+0]; + long l1 = (long)a1[i*8+1]; + long l2 = (long)a1[i*8+2]; + long l3 = (long)a1[i*8+3]; + long l4 = (long)a1[i*8+4]; + long l5 = (long)a1[i*8+5]; + long l6 = (long)a1[i*8+6]; + long l7 = (long)a1[i*8+7]; + p8[i] = (l7 & 0xFFl) | + ((l6 & 0xFFl) << 8) | + ((l5 & 0xFFl) << 16) | + ((l4 & 0xFFl) << 24) | + ((l3 & 0xFFl) << 32) | + ((l2 & 0xFFl) << 40) | + ((l1 & 0xFFl) << 48) | + ((l0 & 0xFFl) << 56); + } + } + static void test_unpack8_swap(byte[] a0, long[] p8) { + if (p8.length*8 > a0.length) return; + for (int i = 0; i < p8.length; i+=1) { + long l = p8[i]; + a0[i*8+0] = (byte)(l >> 56); + a0[i*8+1] = (byte)(l >> 48); + a0[i*8+2] = (byte)(l >> 40); + a0[i*8+3] = (byte)(l >> 32); + a0[i*8+4] = (byte)(l >> 24); + a0[i*8+5] = (byte)(l >> 16); + a0[i*8+6] = (byte)(l >> 8); + a0[i*8+7] = (byte)(l & 0xFFl); + } + } + + static int verify(String text, int i, byte elem, byte val) { + if (elem != val) { + System.err.println(text + "[" + i + "] = " + elem + " != " + val); + return 1; + } + return 0; + } + + static int verify(String text, int i, short elem, short val) { + if (elem != val) { + System.err.println(text + "[" + i + "] = " + elem + " != " + val); + return 1; + } + return 0; + } + + static int verify(String text, int i, int elem, int val) { + if (elem != val) { + System.err.println(text + "[" + i + "] = " + Integer.toHexString(elem) + " != " + Integer.toHexString(val)); + return 1; + } + return 0; + } + + static int verify(String text, int i, long elem, long val) { + if (elem != val) { + System.err.println(text + "[" + i + "] = " + Long.toHexString(elem) + " != " + Long.toHexString(val)); + return 1; + } + return 0; + } +} diff -r be82ef218872 -r b3602ff9c1b8 test/compiler/6340864/TestDoubleVect.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/compiler/6340864/TestDoubleVect.java Fri Aug 24 19:45:42 2012 -0700 @@ -0,0 +1,560 @@ +/* + * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/** + * @test + * @bug 6340864 + * @summary Implement vectorization optimizations in hotspot-server + * + * @run main/othervm/timeout=400 -Xbatch -Xmx64m TestDoubleVect + */ + +public class TestDoubleVect { + private static final int ARRLEN = 997; + private static final int ITERS = 11000; + private static final double ADD_INIT = -7500.; + private static final double VALUE = 15.; + + public static void main(String args[]) { + System.out.println("Testing Double vectors"); + int errn = test(); + if (errn > 0) { + System.err.println("FAILED: " + errn + " errors"); + System.exit(97); + } + System.out.println("PASSED"); + } + + static int test() { + double[] a0 = new double[ARRLEN]; + double[] a1 = new double[ARRLEN]; + double[] a2 = new double[ARRLEN]; + double[] a3 = new double[ARRLEN]; + // Initialize + double gold_sum = 0; + for (int i=0; i 0) + return errn; + + System.out.println("Time"); + long start, end; + + start = System.currentTimeMillis(); + for (int i=0; i 0) { + System.err.println("FAILED: " + errn + " errors"); + System.exit(97); + } + System.out.println("PASSED"); + } + + static int test() { + float[] a0 = new float[ARRLEN]; + float[] a1 = new float[ARRLEN]; + float[] a2 = new float[ARRLEN]; + float[] a3 = new float[ARRLEN]; + // Initialize + float gold_sum = 0; + for (int i=0; i 0) + return errn; + + System.out.println("Time"); + long start, end; + + start = System.currentTimeMillis(); + for (int i=0; i 0) { + System.err.println("FAILED: " + errn + " errors"); + System.exit(97); + } + System.out.println("PASSED"); + } + + static int test() { + int[] a0 = new int[ARRLEN]; + int[] a1 = new int[ARRLEN]; + int[] a2 = new int[ARRLEN]; + int[] a3 = new int[ARRLEN]; + int[] a4 = new int[ARRLEN]; + long[] p2 = new long[ARRLEN/2]; + // Initialize + int gold_sum = 0; + for (int i=0; i>>VALUE)); + } + test_srlv(a0, a1, VALUE); + for (int i=0; i>>VALUE)); + } + + test_srac(a0, a1); + for (int i=0; i>VALUE)); + } + test_srav(a0, a1, VALUE); + for (int i=0; i>VALUE)); + } + + test_sllc_n(a0, a1); + for (int i=0; i>>(-VALUE))); + } + test_srlv(a0, a1, -VALUE); + for (int i=0; i>>(-VALUE))); + } + + test_srac_n(a0, a1); + for (int i=0; i>(-VALUE))); + } + test_srav(a0, a1, -VALUE); + for (int i=0; i>(-VALUE))); + } + + test_sllc_o(a0, a1); + for (int i=0; i>>SHIFT)); + } + test_srlv(a0, a1, SHIFT); + for (int i=0; i>>SHIFT)); + } + + test_srac_o(a0, a1); + for (int i=0; i>SHIFT)); + } + test_srav(a0, a1, SHIFT); + for (int i=0; i>SHIFT)); + } + + test_sllc_on(a0, a1); + for (int i=0; i>>(-SHIFT))); + } + test_srlv(a0, a1, -SHIFT); + for (int i=0; i>>(-SHIFT))); + } + + test_srac_on(a0, a1); + for (int i=0; i>(-SHIFT))); + } + test_srav(a0, a1, -SHIFT); + for (int i=0; i>(-SHIFT))); + } + + test_pack2(p2, a1); + for (int i=0; i 0) + return errn; + + System.out.println("Time"); + long start, end; + + start = System.currentTimeMillis(); + for (int i=0; i>>VALUE); + } + } + static void test_srlc_n(int[] a0, int[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (int)(a1[i]>>>(-VALUE)); + } + } + static void test_srlc_o(int[] a0, int[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (int)(a1[i]>>>SHIFT); + } + } + static void test_srlc_on(int[] a0, int[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (int)(a1[i]>>>(-SHIFT)); + } + } + static void test_srlv(int[] a0, int[] a1, int b) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (int)(a1[i]>>>b); + } + } + + static void test_srac(int[] a0, int[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (int)(a1[i]>>VALUE); + } + } + static void test_srac_n(int[] a0, int[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (int)(a1[i]>>(-VALUE)); + } + } + static void test_srac_o(int[] a0, int[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (int)(a1[i]>>SHIFT); + } + } + static void test_srac_on(int[] a0, int[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (int)(a1[i]>>(-SHIFT)); + } + } + static void test_srav(int[] a0, int[] a1, int b) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (int)(a1[i]>>b); + } + } + + static void test_pack2(long[] p2, int[] a1) { + if (p2.length*2 > a1.length) return; + for (int i = 0; i < p2.length; i+=1) { + long l0 = (long)a1[i*2+0]; + long l1 = (long)a1[i*2+1]; + p2[i] = (l1 << 32) | (l0 & 0xFFFFFFFFl); + } + } + static void test_unpack2(int[] a0, long[] p2) { + if (p2.length*2 > a0.length) return; + for (int i = 0; i < p2.length; i+=1) { + long l = p2[i]; + a0[i*2+0] = (int)(l & 0xFFFFFFFFl); + a0[i*2+1] = (int)(l >> 32); + } + } + static void test_pack2_swap(long[] p2, int[] a1) { + if (p2.length*2 > a1.length) return; + for (int i = 0; i < p2.length; i+=1) { + long l0 = (long)a1[i*2+0]; + long l1 = (long)a1[i*2+1]; + p2[i] = (l0 << 32) | (l1 & 0xFFFFFFFFl); + } + } + static void test_unpack2_swap(int[] a0, long[] p2) { + if (p2.length*2 > a0.length) return; + for (int i = 0; i < p2.length; i+=1) { + long l = p2[i]; + a0[i*2+0] = (int)(l >> 32); + a0[i*2+1] = (int)(l & 0xFFFFFFFFl); + } + } + + static int verify(String text, int i, int elem, int val) { + if (elem != val) { + System.err.println(text + "[" + i + "] = " + elem + " != " + val); + return 1; + } + return 0; + } + + static int verify(String text, int i, long elem, long val) { + if (elem != val) { + System.err.println(text + "[" + i + "] = " + Long.toHexString(elem) + " != " + Long.toHexString(val)); + return 1; + } + return 0; + } +} diff -r be82ef218872 -r b3602ff9c1b8 test/compiler/6340864/TestLongVect.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/compiler/6340864/TestLongVect.java Fri Aug 24 19:45:42 2012 -0700 @@ -0,0 +1,917 @@ +/* + * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/** + * @test + * @bug 6340864 + * @summary Implement vectorization optimizations in hotspot-server + * + * @run main/othervm/timeout=400 -Xbatch -Xmx64m TestLongVect + */ + +public class TestLongVect { + private static final int ARRLEN = 997; + private static final int ITERS = 11000; + private static final long ADD_INIT = Long.MAX_VALUE-500; + private static final long BIT_MASK = 0xEC80F731EC80F731L; + private static final int VALUE = 31; + private static final int SHIFT = 64; + + public static void main(String args[]) { + System.out.println("Testing Long vectors"); + int errn = test(); + if (errn > 0) { + System.err.println("FAILED: " + errn + " errors"); + System.exit(97); + } + System.out.println("PASSED"); + } + + static int test() { + long[] a0 = new long[ARRLEN]; + long[] a1 = new long[ARRLEN]; + long[] a2 = new long[ARRLEN]; + long[] a3 = new long[ARRLEN]; + long[] a4 = new long[ARRLEN]; + // Initialize + long gold_sum = 0; + for (int i=0; i>>VALUE)); + } + test_srlv(a0, a1, VALUE); + for (int i=0; i>>VALUE)); + } + + test_srac(a0, a1); + for (int i=0; i>VALUE)); + } + test_srav(a0, a1, VALUE); + for (int i=0; i>VALUE)); + } + + test_sllc_n(a0, a1); + for (int i=0; i>>(-VALUE))); + } + test_srlv(a0, a1, -VALUE); + for (int i=0; i>>(-VALUE))); + } + + test_srac_n(a0, a1); + for (int i=0; i>(-VALUE))); + } + test_srav(a0, a1, -VALUE); + for (int i=0; i>(-VALUE))); + } + + test_sllc_o(a0, a1); + for (int i=0; i>>SHIFT)); + } + test_srlv(a0, a1, SHIFT); + for (int i=0; i>>SHIFT)); + } + + test_srac_o(a0, a1); + for (int i=0; i>SHIFT)); + } + test_srav(a0, a1, SHIFT); + for (int i=0; i>SHIFT)); + } + + test_sllc_on(a0, a1); + for (int i=0; i>>(-SHIFT))); + } + test_srlv(a0, a1, -SHIFT); + for (int i=0; i>>(-SHIFT))); + } + + test_srac_on(a0, a1); + for (int i=0; i>(-SHIFT))); + } + test_srav(a0, a1, -SHIFT); + for (int i=0; i>(-SHIFT))); + } + + } + + if (errn > 0) + return errn; + + System.out.println("Time"); + long start, end; + + start = System.currentTimeMillis(); + for (int i=0; i>>VALUE); + } + } + static void test_srlc_n(long[] a0, long[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (long)(a1[i]>>>(-VALUE)); + } + } + static void test_srlc_o(long[] a0, long[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (long)(a1[i]>>>SHIFT); + } + } + static void test_srlc_on(long[] a0, long[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (long)(a1[i]>>>(-SHIFT)); + } + } + static void test_srlv(long[] a0, long[] a1, int b) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (long)(a1[i]>>>b); + } + } + + static void test_srac(long[] a0, long[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (long)(a1[i]>>VALUE); + } + } + static void test_srac_n(long[] a0, long[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (long)(a1[i]>>(-VALUE)); + } + } + static void test_srac_o(long[] a0, long[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (long)(a1[i]>>SHIFT); + } + } + static void test_srac_on(long[] a0, long[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (long)(a1[i]>>(-SHIFT)); + } + } + static void test_srav(long[] a0, long[] a1, int b) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (long)(a1[i]>>b); + } + } + + static int verify(String text, int i, long elem, long val) { + if (elem != val) { + System.err.println(text + "[" + i + "] = " + elem + " != " + val); + return 1; + } + return 0; + } +} diff -r be82ef218872 -r b3602ff9c1b8 test/compiler/6340864/TestShortVect.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/compiler/6340864/TestShortVect.java Fri Aug 24 19:45:42 2012 -0700 @@ -0,0 +1,1127 @@ +/* + * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/** + * @test + * @bug 6340864 + * @summary Implement vectorization optimizations in hotspot-server + * + * @run main/othervm/timeout=400 -Xbatch -Xmx64m TestShortVect + */ + +public class TestShortVect { + private static final int ARRLEN = 997; + private static final int ITERS = 11000; + private static final int ADD_INIT = Short.MAX_VALUE-500; + private static final int BIT_MASK = 0xB731; + private static final int VALUE = 7; + private static final int SHIFT = 16; + + public static void main(String args[]) { + System.out.println("Testing Short vectors"); + int errn = test(); + if (errn > 0) { + System.err.println("FAILED: " + errn + " errors"); + System.exit(97); + } + System.out.println("PASSED"); + } + + static int test() { + short[] a0 = new short[ARRLEN]; + short[] a1 = new short[ARRLEN]; + short[] a2 = new short[ARRLEN]; + short[] a3 = new short[ARRLEN]; + short[] a4 = new short[ARRLEN]; + int[] p2 = new int[ARRLEN/2]; + long[] p4 = new long[ARRLEN/4]; + // Initialize + int gold_sum = 0; + for (int i=0; i>>VALUE)); + } + test_srlv(a0, a1, VALUE); + for (int i=0; i>>VALUE)); + } + + test_srac(a0, a1); + for (int i=0; i>VALUE)); + } + test_srav(a0, a1, VALUE); + for (int i=0; i>VALUE)); + } + + test_sllc_n(a0, a1); + for (int i=0; i>>(-VALUE))); + } + test_srlv(a0, a1, -VALUE); + for (int i=0; i>>(-VALUE))); + } + + test_srac_n(a0, a1); + for (int i=0; i>(-VALUE))); + } + test_srav(a0, a1, -VALUE); + for (int i=0; i>(-VALUE))); + } + + test_sllc_o(a0, a1); + for (int i=0; i>>SHIFT)); + } + test_srlv(a0, a1, SHIFT); + for (int i=0; i>>SHIFT)); + } + + test_srac_o(a0, a1); + for (int i=0; i>SHIFT)); + } + test_srav(a0, a1, SHIFT); + for (int i=0; i>SHIFT)); + } + + test_sllc_on(a0, a1); + for (int i=0; i>>(-SHIFT))); + } + test_srlv(a0, a1, -SHIFT); + for (int i=0; i>>(-SHIFT))); + } + + test_srac_on(a0, a1); + for (int i=0; i>(-SHIFT))); + } + test_srav(a0, a1, -SHIFT); + for (int i=0; i>(-SHIFT))); + } + + test_pack2(p2, a1); + for (int i=0; i 0) + return errn; + + System.out.println("Time"); + long start, end; + + start = System.currentTimeMillis(); + for (int i=0; i>>VALUE); + } + } + static void test_srlc_n(short[] a0, short[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (short)(a1[i]>>>(-VALUE)); + } + } + static void test_srlc_o(short[] a0, short[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (short)(a1[i]>>>SHIFT); + } + } + static void test_srlc_on(short[] a0, short[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (short)(a1[i]>>>(-SHIFT)); + } + } + static void test_srlv(short[] a0, short[] a1, int b) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (short)(a1[i]>>>b); + } + } + + static void test_srac(short[] a0, short[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (short)(a1[i]>>VALUE); + } + } + static void test_srac_n(short[] a0, short[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (short)(a1[i]>>(-VALUE)); + } + } + static void test_srac_o(short[] a0, short[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (short)(a1[i]>>SHIFT); + } + } + static void test_srac_on(short[] a0, short[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (short)(a1[i]>>(-SHIFT)); + } + } + static void test_srav(short[] a0, short[] a1, int b) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (short)(a1[i]>>b); + } + } + + static void test_pack2(int[] p2, short[] a1) { + if (p2.length*2 > a1.length) return; + for (int i = 0; i < p2.length; i+=1) { + int l0 = (int)a1[i*2+0]; + int l1 = (int)a1[i*2+1]; + p2[i] = (l1 << 16) | (l0 & 0xFFFF); + } + } + static void test_unpack2(short[] a0, int[] p2) { + if (p2.length*2 > a0.length) return; + for (int i = 0; i < p2.length; i+=1) { + int l = p2[i]; + a0[i*2+0] = (short)(l & 0xFFFF); + a0[i*2+1] = (short)(l >> 16); + } + } + static void test_pack2_swap(int[] p2, short[] a1) { + if (p2.length*2 > a1.length) return; + for (int i = 0; i < p2.length; i+=1) { + int l0 = (int)a1[i*2+0]; + int l1 = (int)a1[i*2+1]; + p2[i] = (l0 << 16) | (l1 & 0xFFFF); + } + } + static void test_unpack2_swap(short[] a0, int[] p2) { + if (p2.length*2 > a0.length) return; + for (int i = 0; i < p2.length; i+=1) { + int l = p2[i]; + a0[i*2+0] = (short)(l >> 16); + a0[i*2+1] = (short)(l & 0xFFFF); + } + } + + static void test_pack4(long[] p4, short[] a1) { + if (p4.length*4 > a1.length) return; + for (int i = 0; i < p4.length; i+=1) { + long l0 = (long)a1[i*4+0]; + long l1 = (long)a1[i*4+1]; + long l2 = (long)a1[i*4+2]; + long l3 = (long)a1[i*4+3]; + p4[i] = (l0 & 0xFFFFl) | + ((l1 & 0xFFFFl) << 16) | + ((l2 & 0xFFFFl) << 32) | + ((l3 & 0xFFFFl) << 48); + } + } + static void test_unpack4(short[] a0, long[] p4) { + if (p4.length*4 > a0.length) return; + for (int i = 0; i < p4.length; i+=1) { + long l = p4[i]; + a0[i*4+0] = (short)(l & 0xFFFFl); + a0[i*4+1] = (short)(l >> 16); + a0[i*4+2] = (short)(l >> 32); + a0[i*4+3] = (short)(l >> 48); + } + } + static void test_pack4_swap(long[] p4, short[] a1) { + if (p4.length*4 > a1.length) return; + for (int i = 0; i < p4.length; i+=1) { + long l0 = (long)a1[i*4+0]; + long l1 = (long)a1[i*4+1]; + long l2 = (long)a1[i*4+2]; + long l3 = (long)a1[i*4+3]; + p4[i] = (l3 & 0xFFFFl) | + ((l2 & 0xFFFFl) << 16) | + ((l1 & 0xFFFFl) << 32) | + ((l0 & 0xFFFFl) << 48); + } + } + static void test_unpack4_swap(short[] a0, long[] p4) { + if (p4.length*4 > a0.length) return; + for (int i = 0; i < p4.length; i+=1) { + long l = p4[i]; + a0[i*4+0] = (short)(l >> 48); + a0[i*4+1] = (short)(l >> 32); + a0[i*4+2] = (short)(l >> 16); + a0[i*4+3] = (short)(l & 0xFFFFl); + } + } + + static int verify(String text, int i, short elem, short val) { + if (elem != val) { + System.err.println(text + "[" + i + "] = " + elem + " != " + val); + return 1; + } + return 0; + } + + static int verify(String text, int i, int elem, int val) { + if (elem != val) { + System.err.println(text + "[" + i + "] = " + Integer.toHexString(elem) + " != " + Integer.toHexString(val)); + return 1; + } + return 0; + } + + static int verify(String text, int i, long elem, long val) { + if (elem != val) { + System.err.println(text + "[" + i + "] = " + Long.toHexString(elem) + " != " + Long.toHexString(val)); + return 1; + } + return 0; + } +} diff -r be82ef218872 -r b3602ff9c1b8 test/compiler/7190310/Test7190310.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/compiler/7190310/Test7190310.java Fri Aug 24 19:45:42 2012 -0700 @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/* + * Manual test + */ + +import java.lang.ref.*; + +public class Test7190310 { + private static Object str = new Object() { + public String toString() { + return "The Object"; + } + + protected void finalize() throws Throwable { + System.out.println("The Object is being finalized"); + super.finalize(); + } + }; + private final static ReferenceQueue rq = + new ReferenceQueue(); + private final static WeakReference wr = + new WeakReference(str, rq); + + public static void main(String[] args) + throws InterruptedException { + Thread reader = new Thread() { + public void run() { + while (wr.get() != null) { + } + System.out.println("wr.get() returned null"); + } + }; + + Thread queueReader = new Thread() { + public void run() { + try { + Reference ref = rq.remove(); + System.out.println(ref); + System.out.println("queueReader returned, ref==wr is " + + (ref == wr)); + } catch (InterruptedException e) { + System.err.println("Sleep interrupted - exiting"); + } + } + }; + + reader.start(); + queueReader.start(); + + Thread.sleep(1000); + str = null; + System.gc(); + } +} + diff -r be82ef218872 -r b3602ff9c1b8 test/compiler/7190310/Test7190310_unsafe.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/compiler/7190310/Test7190310_unsafe.java Fri Aug 24 19:45:42 2012 -0700 @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/* + * @test + * @bug 7190310 + * @summary Inlining WeakReference.get(), and hoisting $referent may lead to non-terminating loops + * @run main/othervm -Xbatch Test7190310_unsafe + */ + +import java.lang.ref.*; +import java.lang.reflect.*; +import sun.misc.Unsafe; + +public class Test7190310_unsafe { + + static class TestObject { + public String toString() { + return "TestObject"; + } + }; + + private static TestObject str = new TestObject(); + private static final WeakReference ref = new WeakReference(str); + + private TestObject obj; + + public static void main(String[] args) throws Exception { + Class c = Test7190310_unsafe.class.getClassLoader().loadClass("sun.misc.Unsafe"); + Field f = c.getDeclaredField("theUnsafe"); + f.setAccessible(true); + Unsafe unsafe = (Unsafe)f.get(c); + + f = Reference.class.getDeclaredField("referent"); + f.setAccessible(true); + long referent_offset = unsafe.objectFieldOffset(f); + + Test7190310_unsafe t = new Test7190310_unsafe(); + TestObject o = new TestObject(); + t.obj = o; + + // Warmup (compile methods) + System.err.println("Warmup"); + Object obj = null; + for (int i = 0; i < 11000; i++) { + obj = getRef0(ref); + } + for (int i = 0; i < 11000; i++) { + obj = getRef1(unsafe, ref, referent_offset); + } + for (int i = 0; i < 11000; i++) { + obj = getRef2(unsafe, ref, referent_offset); + } + for (int i = 0; i < 11000; i++) { + obj = getRef3(unsafe, ref, referent_offset); + } + for (int i = 0; i < 11000; i++) { + obj = getRef4(unsafe, t, referent_offset); + } + + // Access verification + System.err.println("Verification"); + if (!verifyGet(referent_offset, unsafe)) { + System.exit(97); + } + + obj = getRef3(unsafe, t, referent_offset); + if (obj != o) { + System.out.println("FAILED: unsafe.getObject(Object, " + referent_offset + ") " + obj + " != " + o); + System.exit(97); + } + obj = getRef4(unsafe, t, referent_offset); + if (obj != o) { + System.out.println("FAILED: unsafe.getObject(Test7190310, " + referent_offset + ") " + obj + " != " + o); + System.exit(97); + } + } + + static boolean verifyGet(long referent_offset, Unsafe unsafe) throws Exception { + // Access verification + System.out.println("referent: " + str); + Object obj = getRef0(ref); + if (obj != str) { + System.out.println("FAILED: weakRef.get() " + obj + " != " + str); + return false; + } + obj = getRef1(unsafe, ref, referent_offset); + if (obj != str) { + System.out.println("FAILED: unsafe.getObject(weakRef, " + referent_offset + ") " + obj + " != " + str); + return false; + } + obj = getRef2(unsafe, ref, referent_offset); + if (obj != str) { + System.out.println("FAILED: unsafe.getObject(abstRef, " + referent_offset + ") " + obj + " != " + str); + return false; + } + obj = getRef3(unsafe, ref, referent_offset); + if (obj != str) { + System.out.println("FAILED: unsafe.getObject(Object, " + referent_offset + ") " + obj + " != " + str); + return false; + } + return true; + } + + static Object getRef0(WeakReference ref) throws Exception { + return ref.get(); + } + static Object getRef1(Unsafe unsafe, WeakReference ref, long referent_offset) throws Exception { + return unsafe.getObject(ref, referent_offset); + } + static Object getRef2(Unsafe unsafe, Reference ref, long referent_offset) throws Exception { + return unsafe.getObject(ref, referent_offset); + } + static Object getRef3(Unsafe unsafe, Object ref, long referent_offset) throws Exception { + return unsafe.getObject(ref, referent_offset); + } + static Object getRef4(Unsafe unsafe, Test7190310_unsafe ref, long referent_offset) throws Exception { + return unsafe.getObject(ref, referent_offset); + } +} + diff -r be82ef218872 -r b3602ff9c1b8 test/compiler/7192963/TestByteVect.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/compiler/7192963/TestByteVect.java Fri Aug 24 19:45:42 2012 -0700 @@ -0,0 +1,203 @@ +/* + * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/** + * @test + * @bug 7192963 + * @summary assert(_in[req-1] == this) failed: Must pass arg count to 'new' + * + * @run main/othervm/timeout=400 -Xbatch -Xmx64m TestByteVect + */ + +public class TestByteVect { + private static final int ARRLEN = 997; + private static final int ITERS = 11000; + public static void main(String args[]) { + System.out.println("Testing Byte vectors"); + int errn = test(); + if (errn > 0) { + System.err.println("FAILED: " + errn + " errors"); + System.exit(97); + } + System.out.println("PASSED"); + } + + static int test() { + byte[] a0 = new byte[ARRLEN]; + byte[] a1 = new byte[ARRLEN]; + // Initialize + for (int i=0; i 0) + return errn; + + System.out.println("Time"); + long start, end; + + start = System.currentTimeMillis(); + for (int i=0; i 0) { + System.err.println("FAILED: " + errn + " errors"); + System.exit(97); + } + System.out.println("PASSED"); + } + + static int test() { + double[] a0 = new double[ARRLEN]; + double[] a1 = new double[ARRLEN]; + // Initialize + for (int i=0; i 0) + return errn; + + System.out.println("Time"); + long start, end; + + start = System.currentTimeMillis(); + for (int i=0; i 0) { + System.err.println("FAILED: " + errn + " errors"); + System.exit(97); + } + System.out.println("PASSED"); + } + + static int test() { + float[] a0 = new float[ARRLEN]; + float[] a1 = new float[ARRLEN]; + // Initialize + for (int i=0; i 0) + return errn; + + System.out.println("Time"); + long start, end; + + start = System.currentTimeMillis(); + for (int i=0; i 0) { + System.err.println("FAILED: " + errn + " errors"); + System.exit(97); + } + System.out.println("PASSED"); + } + + static int test() { + int[] a0 = new int[ARRLEN]; + int[] a1 = new int[ARRLEN]; + // Initialize + for (int i=0; i 0) + return errn; + + System.out.println("Time"); + long start, end; + + start = System.currentTimeMillis(); + for (int i=0; i 0) { + System.err.println("FAILED: " + errn + " errors"); + System.exit(97); + } + System.out.println("PASSED"); + } + + static int test() { + long[] a0 = new long[ARRLEN]; + long[] a1 = new long[ARRLEN]; + // Initialize + for (int i=0; i 0) + return errn; + + System.out.println("Time"); + long start, end; + + start = System.currentTimeMillis(); + for (int i=0; i 0) { + System.err.println("FAILED: " + errn + " errors"); + System.exit(97); + } + System.out.println("PASSED"); + } + + static int test() { + short[] a0 = new short[ARRLEN]; + short[] a1 = new short[ARRLEN]; + // Initialize + for (int i=0; i 0) + return errn; + + System.out.println("Time"); + long start, end; + + start = System.currentTimeMillis(); + for (int i=0; i