# HG changeset patch # User kvn # Date 1224022226 25200 # Node ID 2649e5276dd74cb5cbdd9a63affa73835f9689c0 # Parent 78c058bc5cdc54e3391dd50638895cb70ab0703a 6532536: Optimize arraycopy stubs for Intel cpus Summary: Use SSE2 movdqu in arraycopy stubs on newest Intel's cpus Reviewed-by: rasbold diff -r 78c058bc5cdc -r 2649e5276dd7 src/cpu/x86/vm/assembler_x86.cpp --- a/src/cpu/x86/vm/assembler_x86.cpp Tue Oct 14 06:58:58 2008 -0700 +++ b/src/cpu/x86/vm/assembler_x86.cpp Tue Oct 14 15:10:26 2008 -0700 @@ -1575,6 +1575,35 @@ emit_operand(src, dst); } +void Assembler::movdqu(XMMRegister dst, Address src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + InstructionMark im(this); + emit_byte(0xF3); + prefix(src, dst); + emit_byte(0x0F); + emit_byte(0x6F); + emit_operand(dst, src); +} + +void Assembler::movdqu(XMMRegister dst, XMMRegister src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + emit_byte(0xF3); + int encode = prefixq_and_encode(dst->encoding(), src->encoding()); + emit_byte(0x0F); + emit_byte(0x6F); + emit_byte(0xC0 | encode); +} + +void Assembler::movdqu(Address dst, XMMRegister src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + InstructionMark im(this); + emit_byte(0xF3); + prefix(dst, src); + emit_byte(0x0F); + emit_byte(0x7F); + emit_operand(src, dst); +} + // Uses zero extension on 64bit void Assembler::movl(Register dst, int32_t imm32) { diff -r 78c058bc5cdc -r 2649e5276dd7 src/cpu/x86/vm/assembler_x86.hpp --- a/src/cpu/x86/vm/assembler_x86.hpp Tue Oct 14 06:58:58 2008 -0700 +++ b/src/cpu/x86/vm/assembler_x86.hpp Tue Oct 14 15:10:26 2008 -0700 @@ -1055,6 +1055,11 @@ void movdqa(XMMRegister dst, Address src); void movdqa(XMMRegister dst, XMMRegister src); + // Move Unaligned Double Quadword + void movdqu(Address dst, XMMRegister src); + void movdqu(XMMRegister dst, Address src); + void movdqu(XMMRegister dst, XMMRegister src); + void movl(Register dst, int32_t imm32); void movl(Address dst, int32_t imm32); void movl(Register dst, Register src); diff -r 78c058bc5cdc -r 2649e5276dd7 src/cpu/x86/vm/stubGenerator_x86_32.cpp --- a/src/cpu/x86/vm/stubGenerator_x86_32.cpp Tue Oct 14 06:58:58 2008 -0700 +++ b/src/cpu/x86/vm/stubGenerator_x86_32.cpp Tue Oct 14 15:10:26 2008 -0700 @@ -791,6 +791,69 @@ } } + + // Copy 64 bytes chunks + // + // Inputs: + // from - source array address + // to_from - destination array address - from + // qword_count - 8-bytes element count, negative + // + void xmm_copy_forward(Register from, Register to_from, Register qword_count) { + assert( UseSSE >= 2, "supported cpu only" ); + Label L_copy_64_bytes_loop, L_copy_64_bytes, L_copy_8_bytes, L_exit; + // Copy 64-byte chunks + __ jmpb(L_copy_64_bytes); + __ align(16); + __ BIND(L_copy_64_bytes_loop); + + if(UseUnalignedLoadStores) { + __ movdqu(xmm0, Address(from, 0)); + __ movdqu(Address(from, to_from, Address::times_1, 0), xmm0); + __ movdqu(xmm1, Address(from, 16)); + __ movdqu(Address(from, to_from, Address::times_1, 16), xmm1); + __ movdqu(xmm2, Address(from, 32)); + __ movdqu(Address(from, to_from, Address::times_1, 32), xmm2); + __ movdqu(xmm3, Address(from, 48)); + __ movdqu(Address(from, to_from, Address::times_1, 48), xmm3); + + } else { + __ movq(xmm0, Address(from, 0)); + __ movq(Address(from, to_from, Address::times_1, 0), xmm0); + __ movq(xmm1, Address(from, 8)); + __ movq(Address(from, to_from, Address::times_1, 8), xmm1); + __ movq(xmm2, Address(from, 16)); + __ movq(Address(from, to_from, Address::times_1, 16), xmm2); + __ movq(xmm3, Address(from, 24)); + __ movq(Address(from, to_from, Address::times_1, 24), xmm3); + __ movq(xmm4, Address(from, 32)); + __ movq(Address(from, to_from, Address::times_1, 32), xmm4); + __ movq(xmm5, Address(from, 40)); + __ movq(Address(from, to_from, Address::times_1, 40), xmm5); + __ movq(xmm6, Address(from, 48)); + __ movq(Address(from, to_from, Address::times_1, 48), xmm6); + __ movq(xmm7, Address(from, 56)); + __ movq(Address(from, to_from, Address::times_1, 56), xmm7); + } + + __ addl(from, 64); + __ BIND(L_copy_64_bytes); + __ subl(qword_count, 8); + __ jcc(Assembler::greaterEqual, L_copy_64_bytes_loop); + __ addl(qword_count, 8); + __ jccb(Assembler::zero, L_exit); + // + // length is too short, just copy qwords + // + __ BIND(L_copy_8_bytes); + __ movq(xmm0, Address(from, 0)); + __ movq(Address(from, to_from, Address::times_1), xmm0); + __ addl(from, 8); + __ decrement(qword_count); + __ jcc(Assembler::greater, L_copy_8_bytes); + __ BIND(L_exit); + } + // Copy 64 bytes chunks // // Inputs: @@ -799,6 +862,7 @@ // qword_count - 8-bytes element count, negative // void mmx_copy_forward(Register from, Register to_from, Register qword_count) { + assert( VM_Version::supports_mmx(), "supported cpu only" ); Label L_copy_64_bytes_loop, L_copy_64_bytes, L_copy_8_bytes, L_exit; // Copy 64-byte chunks __ jmpb(L_copy_64_bytes); @@ -876,7 +940,7 @@ __ subptr(to, from); // to --> to_from __ cmpl(count, 2< to_from if (VM_Version::supports_mmx()) { - mmx_copy_forward(from, to_from, count); + if (UseXMMForArrayCopy) { + xmm_copy_forward(from, to_from, count); + } else { + mmx_copy_forward(from, to_from, count); + } } else { __ jmpb(L_copy_8_bytes); __ align(16); @@ -1196,8 +1277,13 @@ __ align(16); __ BIND(L_copy_8_bytes_loop); if (VM_Version::supports_mmx()) { - __ movq(mmx0, Address(from, count, Address::times_8)); - __ movq(Address(to, count, Address::times_8), mmx0); + if (UseXMMForArrayCopy) { + __ movq(xmm0, Address(from, count, Address::times_8)); + __ movq(Address(to, count, Address::times_8), xmm0); + } else { + __ movq(mmx0, Address(from, count, Address::times_8)); + __ movq(Address(to, count, Address::times_8), mmx0); + } } else { __ fild_d(Address(from, count, Address::times_8)); __ fistp_d(Address(to, count, Address::times_8)); @@ -1206,7 +1292,7 @@ __ decrement(count); __ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop); - if (VM_Version::supports_mmx()) { + if (VM_Version::supports_mmx() && !UseXMMForArrayCopy) { __ emms(); } inc_copy_counter_np(T_LONG); diff -r 78c058bc5cdc -r 2649e5276dd7 src/cpu/x86/vm/stubGenerator_x86_64.cpp --- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp Tue Oct 14 06:58:58 2008 -0700 +++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp Tue Oct 14 15:10:26 2008 -0700 @@ -1251,6 +1251,7 @@ } } + // Copy big chunks forward // // Inputs: @@ -1268,14 +1269,22 @@ Label L_loop; __ align(16); __ BIND(L_loop); - __ movq(to, Address(end_from, qword_count, Address::times_8, -24)); - __ movq(Address(end_to, qword_count, Address::times_8, -24), to); - __ movq(to, Address(end_from, qword_count, Address::times_8, -16)); - __ movq(Address(end_to, qword_count, Address::times_8, -16), to); - __ movq(to, Address(end_from, qword_count, Address::times_8, - 8)); - __ movq(Address(end_to, qword_count, Address::times_8, - 8), to); - __ movq(to, Address(end_from, qword_count, Address::times_8, - 0)); - __ movq(Address(end_to, qword_count, Address::times_8, - 0), to); + if(UseUnalignedLoadStores) { + __ movdqu(xmm0, Address(end_from, qword_count, Address::times_8, -24)); + __ movdqu(Address(end_to, qword_count, Address::times_8, -24), xmm0); + __ movdqu(xmm1, Address(end_from, qword_count, Address::times_8, - 8)); + __ movdqu(Address(end_to, qword_count, Address::times_8, - 8), xmm1); + + } else { + __ movq(to, Address(end_from, qword_count, Address::times_8, -24)); + __ movq(Address(end_to, qword_count, Address::times_8, -24), to); + __ movq(to, Address(end_from, qword_count, Address::times_8, -16)); + __ movq(Address(end_to, qword_count, Address::times_8, -16), to); + __ movq(to, Address(end_from, qword_count, Address::times_8, - 8)); + __ movq(Address(end_to, qword_count, Address::times_8, - 8), to); + __ movq(to, Address(end_from, qword_count, Address::times_8, - 0)); + __ movq(Address(end_to, qword_count, Address::times_8, - 0), to); + } __ BIND(L_copy_32_bytes); __ addptr(qword_count, 4); __ jcc(Assembler::lessEqual, L_loop); @@ -1301,14 +1310,22 @@ Label L_loop; __ align(16); __ BIND(L_loop); - __ movq(to, Address(from, qword_count, Address::times_8, 24)); - __ movq(Address(dest, qword_count, Address::times_8, 24), to); - __ movq(to, Address(from, qword_count, Address::times_8, 16)); - __ movq(Address(dest, qword_count, Address::times_8, 16), to); - __ movq(to, Address(from, qword_count, Address::times_8, 8)); - __ movq(Address(dest, qword_count, Address::times_8, 8), to); - __ movq(to, Address(from, qword_count, Address::times_8, 0)); - __ movq(Address(dest, qword_count, Address::times_8, 0), to); + if(UseUnalignedLoadStores) { + __ movdqu(xmm0, Address(from, qword_count, Address::times_8, 16)); + __ movdqu(Address(dest, qword_count, Address::times_8, 16), xmm0); + __ movdqu(xmm1, Address(from, qword_count, Address::times_8, 0)); + __ movdqu(Address(dest, qword_count, Address::times_8, 0), xmm1); + + } else { + __ movq(to, Address(from, qword_count, Address::times_8, 24)); + __ movq(Address(dest, qword_count, Address::times_8, 24), to); + __ movq(to, Address(from, qword_count, Address::times_8, 16)); + __ movq(Address(dest, qword_count, Address::times_8, 16), to); + __ movq(to, Address(from, qword_count, Address::times_8, 8)); + __ movq(Address(dest, qword_count, Address::times_8, 8), to); + __ movq(to, Address(from, qword_count, Address::times_8, 0)); + __ movq(Address(dest, qword_count, Address::times_8, 0), to); + } __ BIND(L_copy_32_bytes); __ subptr(qword_count, 4); __ jcc(Assembler::greaterEqual, L_loop); diff -r 78c058bc5cdc -r 2649e5276dd7 src/cpu/x86/vm/vm_version_x86_32.cpp --- a/src/cpu/x86/vm/vm_version_x86_32.cpp Tue Oct 14 06:58:58 2008 -0700 +++ b/src/cpu/x86/vm/vm_version_x86_32.cpp Tue Oct 14 15:10:26 2008 -0700 @@ -242,9 +242,11 @@ _supports_cx8 = supports_cmpxchg8(); // if the OS doesn't support SSE, we can't use this feature even if the HW does if( !os::supports_sse()) - _cpuFeatures &= ~(CPU_SSE|CPU_SSE2|CPU_SSE3|CPU_SSSE3|CPU_SSE4|CPU_SSE4A); - if (UseSSE < 4) - _cpuFeatures &= ~CPU_SSE4; + _cpuFeatures &= ~(CPU_SSE|CPU_SSE2|CPU_SSE3|CPU_SSSE3|CPU_SSE4A|CPU_SSE4_1|CPU_SSE4_2); + if (UseSSE < 4) { + _cpuFeatures &= ~CPU_SSE4_1; + _cpuFeatures &= ~CPU_SSE4_2; + } if (UseSSE < 3) { _cpuFeatures &= ~CPU_SSE3; _cpuFeatures &= ~CPU_SSSE3; @@ -261,7 +263,7 @@ } char buf[256]; - jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s", + jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", cores_per_cpu(), threads_per_core(), cpu_family(), _model, _stepping, (supports_cmov() ? ", cmov" : ""), @@ -272,7 +274,8 @@ (supports_sse2() ? ", sse2" : ""), (supports_sse3() ? ", sse3" : ""), (supports_ssse3()? ", ssse3": ""), - (supports_sse4() ? ", sse4" : ""), + (supports_sse4_1() ? ", sse4.1" : ""), + (supports_sse4_2() ? ", sse4.2" : ""), (supports_mmx_ext() ? ", mmxext" : ""), (supports_3dnow() ? ", 3dnow" : ""), (supports_3dnow2() ? ", 3dnowext" : ""), @@ -285,7 +288,7 @@ // older Pentiums which do not support it. if( UseSSE > 4 ) UseSSE=4; if( UseSSE < 0 ) UseSSE=0; - if( !supports_sse4() ) // Drop to 3 if no SSE4 support + if( !supports_sse4_1() ) // Drop to 3 if no SSE4 support UseSSE = MIN2((intx)3,UseSSE); if( !supports_sse3() ) // Drop to 2 if no SSE3 support UseSSE = MIN2((intx)2,UseSSE); @@ -375,6 +378,14 @@ MaxLoopPad = 11; } #endif // COMPILER2 + if( FLAG_IS_DEFAULT(UseXMMForArrayCopy) ) { + UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus + } + if( supports_sse4_2() && supports_ht() ) { // Newest Intel cpus + if( FLAG_IS_DEFAULT(UseUnalignedLoadStores) && UseXMMForArrayCopy ) { + UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus + } + } } } @@ -413,7 +424,7 @@ #ifndef PRODUCT if (PrintMiscellaneous && Verbose) { - tty->print_cr("Logical CPUs per package: %u", + tty->print_cr("Logical CPUs per core: %u", logical_processors_per_package()); tty->print_cr("UseSSE=%d",UseSSE); tty->print("Allocation: "); diff -r 78c058bc5cdc -r 2649e5276dd7 src/cpu/x86/vm/vm_version_x86_32.hpp --- a/src/cpu/x86/vm/vm_version_x86_32.hpp Tue Oct 14 06:58:58 2008 -0700 +++ b/src/cpu/x86/vm/vm_version_x86_32.hpp Tue Oct 14 15:10:26 2008 -0700 @@ -68,9 +68,9 @@ cmpxchg16: 1, : 4, dca : 1, - : 4, - popcnt : 1, - : 8; + sse4_1 : 1, + sse4_2 : 1, + : 11; } bits; }; @@ -177,8 +177,9 @@ CPU_SSE2 = (1 << 7), CPU_SSE3 = (1 << 8), // sse3 comes from cpuid 1 (ECX) CPU_SSSE3= (1 << 9), - CPU_SSE4 = (1 <<10), - CPU_SSE4A= (1 <<11) + CPU_SSE4A= (1 <<10), + CPU_SSE4_1 = (1 << 11), + CPU_SSE4_2 = (1 << 12) } cpuFeatureFlags; // cpuid information block. All info derived from executing cpuid with @@ -240,22 +241,14 @@ static CpuidInfo _cpuid_info; // Extractors and predicates - static bool is_extended_cpu_family() { - const uint32_t Extended_Cpu_Family = 0xf; - return _cpuid_info.std_cpuid1_rax.bits.family == Extended_Cpu_Family; - } static uint32_t extended_cpu_family() { uint32_t result = _cpuid_info.std_cpuid1_rax.bits.family; - if (is_extended_cpu_family()) { - result += _cpuid_info.std_cpuid1_rax.bits.ext_family; - } + result += _cpuid_info.std_cpuid1_rax.bits.ext_family; return result; } static uint32_t extended_cpu_model() { uint32_t result = _cpuid_info.std_cpuid1_rax.bits.model; - if (is_extended_cpu_family()) { - result |= _cpuid_info.std_cpuid1_rax.bits.ext_model << 4; - } + result |= _cpuid_info.std_cpuid1_rax.bits.ext_model << 4; return result; } static uint32_t cpu_stepping() { @@ -293,6 +286,10 @@ result |= CPU_SSSE3; if (is_amd() && _cpuid_info.ext_cpuid1_rcx.bits.sse4a != 0) result |= CPU_SSE4A; + if (_cpuid_info.std_cpuid1_rcx.bits.sse4_1 != 0) + result |= CPU_SSE4_1; + if (_cpuid_info.std_cpuid1_rcx.bits.sse4_2 != 0) + result |= CPU_SSE4_2; return result; } @@ -380,7 +377,8 @@ static bool supports_sse2() { return (_cpuFeatures & CPU_SSE2) != 0; } static bool supports_sse3() { return (_cpuFeatures & CPU_SSE3) != 0; } static bool supports_ssse3() { return (_cpuFeatures & CPU_SSSE3)!= 0; } - static bool supports_sse4() { return (_cpuFeatures & CPU_SSE4) != 0; } + static bool supports_sse4_1() { return (_cpuFeatures & CPU_SSE4_1) != 0; } + static bool supports_sse4_2() { return (_cpuFeatures & CPU_SSE4_2) != 0; } // // AMD features // diff -r 78c058bc5cdc -r 2649e5276dd7 src/cpu/x86/vm/vm_version_x86_64.cpp --- a/src/cpu/x86/vm/vm_version_x86_64.cpp Tue Oct 14 06:58:58 2008 -0700 +++ b/src/cpu/x86/vm/vm_version_x86_64.cpp Tue Oct 14 15:10:26 2008 -0700 @@ -186,8 +186,10 @@ if (!VM_Version::supports_sse2()) { vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported"); } - if (UseSSE < 4) - _cpuFeatures &= ~CPU_SSE4; + if (UseSSE < 4) { + _cpuFeatures &= ~CPU_SSE4_1; + _cpuFeatures &= ~CPU_SSE4_2; + } if (UseSSE < 3) { _cpuFeatures &= ~CPU_SSE3; _cpuFeatures &= ~CPU_SSSE3; @@ -204,7 +206,7 @@ } char buf[256]; - jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s", + jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", cores_per_cpu(), threads_per_core(), cpu_family(), _model, _stepping, (supports_cmov() ? ", cmov" : ""), @@ -215,7 +217,8 @@ (supports_sse2() ? ", sse2" : ""), (supports_sse3() ? ", sse3" : ""), (supports_ssse3()? ", ssse3": ""), - (supports_sse4() ? ", sse4" : ""), + (supports_sse4_1() ? ", sse4.1" : ""), + (supports_sse4_2() ? ", sse4.2" : ""), (supports_mmx_ext() ? ", mmxext" : ""), (supports_3dnow() ? ", 3dnow" : ""), (supports_3dnow2() ? ", 3dnowext" : ""), @@ -228,7 +231,7 @@ // older Pentiums which do not support it. if( UseSSE > 4 ) UseSSE=4; if( UseSSE < 0 ) UseSSE=0; - if( !supports_sse4() ) // Drop to 3 if no SSE4 support + if( !supports_sse4_1() ) // Drop to 3 if no SSE4 support UseSSE = MIN2((intx)3,UseSSE); if( !supports_sse3() ) // Drop to 2 if no SSE3 support UseSSE = MIN2((intx)2,UseSSE); @@ -314,6 +317,14 @@ MaxLoopPad = 11; } #endif // COMPILER2 + if( FLAG_IS_DEFAULT(UseXMMForArrayCopy) ) { + UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus + } + if( supports_sse4_2() && supports_ht() ) { // Newest Intel cpus + if( FLAG_IS_DEFAULT(UseUnalignedLoadStores) && UseXMMForArrayCopy ) { + UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus + } + } } } @@ -355,7 +366,7 @@ #ifndef PRODUCT if (PrintMiscellaneous && Verbose) { - tty->print_cr("Logical CPUs per package: %u", + tty->print_cr("Logical CPUs per core: %u", logical_processors_per_package()); tty->print_cr("UseSSE=%d",UseSSE); tty->print("Allocation: "); diff -r 78c058bc5cdc -r 2649e5276dd7 src/cpu/x86/vm/vm_version_x86_64.hpp --- a/src/cpu/x86/vm/vm_version_x86_64.hpp Tue Oct 14 06:58:58 2008 -0700 +++ b/src/cpu/x86/vm/vm_version_x86_64.hpp Tue Oct 14 15:10:26 2008 -0700 @@ -68,9 +68,9 @@ cmpxchg16: 1, : 4, dca : 1, - : 4, - popcnt : 1, - : 8; + sse4_1 : 1, + sse4_2 : 1, + : 11; } bits; }; @@ -177,8 +177,9 @@ CPU_SSE2 = (1 << 7), CPU_SSE3 = (1 << 8), CPU_SSSE3= (1 << 9), - CPU_SSE4 = (1 <<10), - CPU_SSE4A= (1 <<11) + CPU_SSE4A= (1 <<10), + CPU_SSE4_1 = (1 << 11), + CPU_SSE4_2 = (1 << 12) } cpuFeatureFlags; // cpuid information block. All info derived from executing cpuid with @@ -240,22 +241,14 @@ static CpuidInfo _cpuid_info; // Extractors and predicates - static bool is_extended_cpu_family() { - const uint32_t Extended_Cpu_Family = 0xf; - return _cpuid_info.std_cpuid1_eax.bits.family == Extended_Cpu_Family; - } static uint32_t extended_cpu_family() { uint32_t result = _cpuid_info.std_cpuid1_eax.bits.family; - if (is_extended_cpu_family()) { - result += _cpuid_info.std_cpuid1_eax.bits.ext_family; - } + result += _cpuid_info.std_cpuid1_eax.bits.ext_family; return result; } static uint32_t extended_cpu_model() { uint32_t result = _cpuid_info.std_cpuid1_eax.bits.model; - if (is_extended_cpu_family()) { - result |= _cpuid_info.std_cpuid1_eax.bits.ext_model << 4; - } + result |= _cpuid_info.std_cpuid1_eax.bits.ext_model << 4; return result; } static uint32_t cpu_stepping() { @@ -293,6 +286,10 @@ result |= CPU_SSSE3; if (is_amd() && _cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0) result |= CPU_SSE4A; + if (_cpuid_info.std_cpuid1_ecx.bits.sse4_1 != 0) + result |= CPU_SSE4_1; + if (_cpuid_info.std_cpuid1_ecx.bits.sse4_2 != 0) + result |= CPU_SSE4_2; return result; } @@ -380,7 +377,8 @@ static bool supports_sse2() { return (_cpuFeatures & CPU_SSE2) != 0; } static bool supports_sse3() { return (_cpuFeatures & CPU_SSE3) != 0; } static bool supports_ssse3() { return (_cpuFeatures & CPU_SSSE3)!= 0; } - static bool supports_sse4() { return (_cpuFeatures & CPU_SSE4) != 0; } + static bool supports_sse4_1() { return (_cpuFeatures & CPU_SSE4_1) != 0; } + static bool supports_sse4_2() { return (_cpuFeatures & CPU_SSE4_2) != 0; } // // AMD features // diff -r 78c058bc5cdc -r 2649e5276dd7 src/os/solaris/vm/os_solaris.cpp --- a/src/os/solaris/vm/os_solaris.cpp Tue Oct 14 06:58:58 2008 -0700 +++ b/src/os/solaris/vm/os_solaris.cpp Tue Oct 14 15:10:26 2008 -0700 @@ -3758,7 +3758,7 @@ int maxClamped = MIN2(iaLimits.maxPrio, (int)iaInfo->ia_uprilim); iaInfo->ia_upri = scale_to_lwp_priority(iaLimits.minPrio, maxClamped, newPrio); iaInfo->ia_uprilim = IA_NOCHANGE; - iaInfo->ia_nice = IA_NOCHANGE; +// iaInfo->ia_nice = IA_NOCHANGE; iaInfo->ia_mode = IA_NOCHANGE; if (ThreadPriorityVerbose) { tty->print_cr ("IA: [%d...%d] %d->%d\n", diff -r 78c058bc5cdc -r 2649e5276dd7 src/share/vm/runtime/globals.hpp --- a/src/share/vm/runtime/globals.hpp Tue Oct 14 06:58:58 2008 -0700 +++ b/src/share/vm/runtime/globals.hpp Tue Oct 14 15:10:26 2008 -0700 @@ -991,6 +991,12 @@ product(bool, UseXmmI2F, false, \ "Use SSE2 CVTDQ2PS instruction to convert Integer to Float") \ \ + product(bool, UseXMMForArrayCopy, false, \ + "Use SSE2 MOVQ instruction for Arraycopy") \ + \ + product(bool, UseUnalignedLoadStores, false, \ + "Use SSE2 MOVDQU instruction for Arraycopy") \ + \ product(intx, FieldsAllocationStyle, 1, \ "0 - type based with oops first, 1 - with oops last") \ \