# HG changeset patch # User apangin # Date 1235687155 28800 # Node ID 7898caac2071a844fd35c5b3f73a65413ccee8a6 # Parent 1b68c738c0d9ef614f51ec8c19d35fd5a95a653a# Parent ef3b3df478b961647251a6c843842211c95f1c63 Merge diff -r 1b68c738c0d9 -r 7898caac2071 .hgtags --- a/.hgtags Sun Feb 22 17:21:13 2009 -0800 +++ b/.hgtags Thu Feb 26 14:25:55 2009 -0800 @@ -20,3 +20,5 @@ fc6a5ae3fef5ebacfa896dbb3ae37715e388e282 jdk7-b43 809e899c638bd9b21836abf9d09ab2a30ff3900b jdk7-b44 945bf754069766e76873c53102fae48abf04cf5b jdk7-b45 +16bb38eeda35b46268eefa4c1f829eb086e0ca46 jdk7-b46 +fcb923bad68e2b10380a030ea83a723f4dc3d4d6 jdk7-b47 diff -r 1b68c738c0d9 -r 7898caac2071 make/hotspot_version --- a/make/hotspot_version Sun Feb 22 17:21:13 2009 -0800 +++ b/make/hotspot_version Thu Feb 26 14:25:55 2009 -0800 @@ -35,7 +35,7 @@ HS_MAJOR_VER=15 HS_MINOR_VER=0 -HS_BUILD_NUMBER=01 +HS_BUILD_NUMBER=02 JDK_MAJOR_VER=1 JDK_MINOR_VER=7 diff -r 1b68c738c0d9 -r 7898caac2071 src/cpu/x86/vm/vm_version_x86.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/x86/vm/vm_version_x86.cpp Thu Feb 26 14:25:55 2009 -0800 @@ -0,0 +1,514 @@ +/* + * Copyright 1997-2009 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +# include "incls/_precompiled.incl" +# include "incls/_vm_version_x86.cpp.incl" + + +int VM_Version::_cpu; +int VM_Version::_model; +int VM_Version::_stepping; +int VM_Version::_cpuFeatures; +const char* VM_Version::_features_str = ""; +VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, }; + +static BufferBlob* stub_blob; +static const int stub_size = 300; + +extern "C" { + typedef void (*getPsrInfo_stub_t)(void*); +} +static getPsrInfo_stub_t getPsrInfo_stub = NULL; + + +class VM_Version_StubGenerator: public StubCodeGenerator { + public: + + VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {} + + address generate_getPsrInfo() { + // Flags to test CPU type. + const uint32_t EFL_AC = 0x40000; + const uint32_t EFL_ID = 0x200000; + // Values for when we don't have a CPUID instruction. + const int CPU_FAMILY_SHIFT = 8; + const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT); + const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT); + + Label detect_486, cpu486, detect_586, std_cpuid1; + Label ext_cpuid1, ext_cpuid5, done; + + StubCodeMark mark(this, "VM_Version", "getPsrInfo_stub"); +# define __ _masm-> + + address start = __ pc(); + + // + // void getPsrInfo(VM_Version::CpuidInfo* cpuid_info); + // + // LP64: rcx and rdx are first and second argument registers on windows + + __ push(rbp); +#ifdef _LP64 + __ mov(rbp, c_rarg0); // cpuid_info address +#else + __ movptr(rbp, Address(rsp, 8)); // cpuid_info address +#endif + __ push(rbx); + __ push(rsi); + __ pushf(); // preserve rbx, and flags + __ pop(rax); + __ push(rax); + __ mov(rcx, rax); + // + // if we are unable to change the AC flag, we have a 386 + // + __ xorl(rax, EFL_AC); + __ push(rax); + __ popf(); + __ pushf(); + __ pop(rax); + __ cmpptr(rax, rcx); + __ jccb(Assembler::notEqual, detect_486); + + __ movl(rax, CPU_FAMILY_386); + __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax); + __ jmp(done); + + // + // If we are unable to change the ID flag, we have a 486 which does + // not support the "cpuid" instruction. + // + __ bind(detect_486); + __ mov(rax, rcx); + __ xorl(rax, EFL_ID); + __ push(rax); + __ popf(); + __ pushf(); + __ pop(rax); + __ cmpptr(rcx, rax); + __ jccb(Assembler::notEqual, detect_586); + + __ bind(cpu486); + __ movl(rax, CPU_FAMILY_486); + __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax); + __ jmp(done); + + // + // At this point, we have a chip which supports the "cpuid" instruction + // + __ bind(detect_586); + __ xorl(rax, rax); + __ cpuid(); + __ orl(rax, rax); + __ jcc(Assembler::equal, cpu486); // if cpuid doesn't support an input + // value of at least 1, we give up and + // assume a 486 + __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); + __ movl(Address(rsi, 0), rax); + __ movl(Address(rsi, 4), rbx); + __ movl(Address(rsi, 8), rcx); + __ movl(Address(rsi,12), rdx); + + __ cmpl(rax, 3); // Is cpuid(0x4) supported? + __ jccb(Assembler::belowEqual, std_cpuid1); + + // + // cpuid(0x4) Deterministic cache params + // + __ movl(rax, 4); + __ xorl(rcx, rcx); // L1 cache + __ cpuid(); + __ push(rax); + __ andl(rax, 0x1f); // Determine if valid cache parameters used + __ orl(rax, rax); // eax[4:0] == 0 indicates invalid cache + __ pop(rax); + __ jccb(Assembler::equal, std_cpuid1); + + __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset()))); + __ movl(Address(rsi, 0), rax); + __ movl(Address(rsi, 4), rbx); + __ movl(Address(rsi, 8), rcx); + __ movl(Address(rsi,12), rdx); + + // + // Standard cpuid(0x1) + // + __ bind(std_cpuid1); + __ movl(rax, 1); + __ cpuid(); + __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); + __ movl(Address(rsi, 0), rax); + __ movl(Address(rsi, 4), rbx); + __ movl(Address(rsi, 8), rcx); + __ movl(Address(rsi,12), rdx); + + __ movl(rax, 0x80000000); + __ cpuid(); + __ cmpl(rax, 0x80000000); // Is cpuid(0x80000001) supported? + __ jcc(Assembler::belowEqual, done); + __ cmpl(rax, 0x80000004); // Is cpuid(0x80000005) supported? + __ jccb(Assembler::belowEqual, ext_cpuid1); + __ cmpl(rax, 0x80000007); // Is cpuid(0x80000008) supported? + __ jccb(Assembler::belowEqual, ext_cpuid5); + // + // Extended cpuid(0x80000008) + // + __ movl(rax, 0x80000008); + __ cpuid(); + __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset()))); + __ movl(Address(rsi, 0), rax); + __ movl(Address(rsi, 4), rbx); + __ movl(Address(rsi, 8), rcx); + __ movl(Address(rsi,12), rdx); + + // + // Extended cpuid(0x80000005) + // + __ bind(ext_cpuid5); + __ movl(rax, 0x80000005); + __ cpuid(); + __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset()))); + __ movl(Address(rsi, 0), rax); + __ movl(Address(rsi, 4), rbx); + __ movl(Address(rsi, 8), rcx); + __ movl(Address(rsi,12), rdx); + + // + // Extended cpuid(0x80000001) + // + __ bind(ext_cpuid1); + __ movl(rax, 0x80000001); + __ cpuid(); + __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset()))); + __ movl(Address(rsi, 0), rax); + __ movl(Address(rsi, 4), rbx); + __ movl(Address(rsi, 8), rcx); + __ movl(Address(rsi,12), rdx); + + // + // return + // + __ bind(done); + __ popf(); + __ pop(rsi); + __ pop(rbx); + __ pop(rbp); + __ ret(0); + +# undef __ + + return start; + }; +}; + + +void VM_Version::get_processor_features() { + + _cpu = 4; // 486 by default + _model = 0; + _stepping = 0; + _cpuFeatures = 0; + _logical_processors_per_package = 1; + + if (!Use486InstrsOnly) { + // Get raw processor info + getPsrInfo_stub(&_cpuid_info); + assert_is_initialized(); + _cpu = extended_cpu_family(); + _model = extended_cpu_model(); + _stepping = cpu_stepping(); + + if (cpu_family() > 4) { // it supports CPUID + _cpuFeatures = feature_flags(); + // Logical processors are only available on P4s and above, + // and only if hyperthreading is available. + _logical_processors_per_package = logical_processor_count(); + } + } + + _supports_cx8 = supports_cmpxchg8(); + +#ifdef _LP64 + // OS should support SSE for x64 and hardware should support at least SSE2. + if (!VM_Version::supports_sse2()) { + vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported"); + } +#endif + + // If the OS doesn't support SSE, we can't use this feature even if the HW does + if (!os::supports_sse()) + _cpuFeatures &= ~(CPU_SSE|CPU_SSE2|CPU_SSE3|CPU_SSSE3|CPU_SSE4A|CPU_SSE4_1|CPU_SSE4_2); + + if (UseSSE < 4) { + _cpuFeatures &= ~CPU_SSE4_1; + _cpuFeatures &= ~CPU_SSE4_2; + } + + if (UseSSE < 3) { + _cpuFeatures &= ~CPU_SSE3; + _cpuFeatures &= ~CPU_SSSE3; + _cpuFeatures &= ~CPU_SSE4A; + } + + if (UseSSE < 2) + _cpuFeatures &= ~CPU_SSE2; + + if (UseSSE < 1) + _cpuFeatures &= ~CPU_SSE; + + if (logical_processors_per_package() == 1) { + // HT processor could be installed on a system which doesn't support HT. + _cpuFeatures &= ~CPU_HT; + } + + char buf[256]; + jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", + cores_per_cpu(), threads_per_core(), + cpu_family(), _model, _stepping, + (supports_cmov() ? ", cmov" : ""), + (supports_cmpxchg8() ? ", cx8" : ""), + (supports_fxsr() ? ", fxsr" : ""), + (supports_mmx() ? ", mmx" : ""), + (supports_sse() ? ", sse" : ""), + (supports_sse2() ? ", sse2" : ""), + (supports_sse3() ? ", sse3" : ""), + (supports_ssse3()? ", ssse3": ""), + (supports_sse4_1() ? ", sse4.1" : ""), + (supports_sse4_2() ? ", sse4.2" : ""), + (supports_mmx_ext() ? ", mmxext" : ""), + (supports_3dnow() ? ", 3dnow" : ""), + (supports_3dnow2() ? ", 3dnowext" : ""), + (supports_sse4a() ? ", sse4a": ""), + (supports_ht() ? ", ht": "")); + _features_str = strdup(buf); + + // UseSSE is set to the smaller of what hardware supports and what + // the command line requires. I.e., you cannot set UseSSE to 2 on + // older Pentiums which do not support it. + if( UseSSE > 4 ) UseSSE=4; + if( UseSSE < 0 ) UseSSE=0; + if( !supports_sse4_1() ) // Drop to 3 if no SSE4 support + UseSSE = MIN2((intx)3,UseSSE); + if( !supports_sse3() ) // Drop to 2 if no SSE3 support + UseSSE = MIN2((intx)2,UseSSE); + if( !supports_sse2() ) // Drop to 1 if no SSE2 support + UseSSE = MIN2((intx)1,UseSSE); + if( !supports_sse () ) // Drop to 0 if no SSE support + UseSSE = 0; + + // On new cpus instructions which update whole XMM register should be used + // to prevent partial register stall due to dependencies on high half. + // + // UseXmmLoadAndClearUpper == true --> movsd(xmm, mem) + // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem) + // UseXmmRegToRegMoveAll == true --> movaps(xmm, xmm), movapd(xmm, xmm). + // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm), movsd(xmm, xmm). + + if( is_amd() ) { // AMD cpus specific settings + if( supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop) ) { + // Use it on new AMD cpus starting from Opteron. + UseAddressNop = true; + } + if( supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift) ) { + // Use it on new AMD cpus starting from Opteron. + UseNewLongLShift = true; + } + if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) { + if( supports_sse4a() ) { + UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron + } else { + UseXmmLoadAndClearUpper = false; + } + } + if( FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll) ) { + if( supports_sse4a() ) { + UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h' + } else { + UseXmmRegToRegMoveAll = false; + } + } + if( FLAG_IS_DEFAULT(UseXmmI2F) ) { + if( supports_sse4a() ) { + UseXmmI2F = true; + } else { + UseXmmI2F = false; + } + } + if( FLAG_IS_DEFAULT(UseXmmI2D) ) { + if( supports_sse4a() ) { + UseXmmI2D = true; + } else { + UseXmmI2D = false; + } + } + } + + if( is_intel() ) { // Intel cpus specific settings + if( FLAG_IS_DEFAULT(UseStoreImmI16) ) { + UseStoreImmI16 = false; // don't use it on Intel cpus + } + if( cpu_family() == 6 || cpu_family() == 15 ) { + if( FLAG_IS_DEFAULT(UseAddressNop) ) { + // Use it on all Intel cpus starting from PentiumPro + UseAddressNop = true; + } + } + if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) { + UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus + } + if( FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll) ) { + if( supports_sse3() ) { + UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus + } else { + UseXmmRegToRegMoveAll = false; + } + } + if( cpu_family() == 6 && supports_sse3() ) { // New Intel cpus +#ifdef COMPILER2 + if( FLAG_IS_DEFAULT(MaxLoopPad) ) { + // For new Intel cpus do the next optimization: + // don't align the beginning of a loop if there are enough instructions + // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp) + // in current fetch line (OptoLoopAlignment) or the padding + // is big (> MaxLoopPad). + // Set MaxLoopPad to 11 for new Intel cpus to reduce number of + // generated NOP instructions. 11 is the largest size of one + // address NOP instruction '0F 1F' (see Assembler::nop(i)). + MaxLoopPad = 11; + } +#endif // COMPILER2 + if( FLAG_IS_DEFAULT(UseXMMForArrayCopy) ) { + UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus + } + if( supports_sse4_2() && supports_ht() ) { // Newest Intel cpus + if( FLAG_IS_DEFAULT(UseUnalignedLoadStores) && UseXMMForArrayCopy ) { + UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus + } + } + } + } + + assert(0 <= ReadPrefetchInstr && ReadPrefetchInstr <= 3, "invalid value"); + assert(0 <= AllocatePrefetchInstr && AllocatePrefetchInstr <= 3, "invalid value"); + + // set valid Prefetch instruction + if( ReadPrefetchInstr < 0 ) ReadPrefetchInstr = 0; + if( ReadPrefetchInstr > 3 ) ReadPrefetchInstr = 3; + if( ReadPrefetchInstr == 3 && !supports_3dnow() ) ReadPrefetchInstr = 0; + if( !supports_sse() && supports_3dnow() ) ReadPrefetchInstr = 3; + + if( AllocatePrefetchInstr < 0 ) AllocatePrefetchInstr = 0; + if( AllocatePrefetchInstr > 3 ) AllocatePrefetchInstr = 3; + if( AllocatePrefetchInstr == 3 && !supports_3dnow() ) AllocatePrefetchInstr=0; + if( !supports_sse() && supports_3dnow() ) AllocatePrefetchInstr = 3; + + // Allocation prefetch settings + intx cache_line_size = L1_data_cache_line_size(); + if( cache_line_size > AllocatePrefetchStepSize ) + AllocatePrefetchStepSize = cache_line_size; + if( FLAG_IS_DEFAULT(AllocatePrefetchLines) ) + AllocatePrefetchLines = 3; // Optimistic value + assert(AllocatePrefetchLines > 0, "invalid value"); + if( AllocatePrefetchLines < 1 ) // set valid value in product VM + AllocatePrefetchLines = 1; // Conservative value + + AllocatePrefetchDistance = allocate_prefetch_distance(); + AllocatePrefetchStyle = allocate_prefetch_style(); + + if( AllocatePrefetchStyle == 2 && is_intel() && + cpu_family() == 6 && supports_sse3() ) { // watermark prefetching on Core +#ifdef _LP64 + AllocatePrefetchDistance = 384; +#else + AllocatePrefetchDistance = 320; +#endif + } + assert(AllocatePrefetchDistance % AllocatePrefetchStepSize == 0, "invalid value"); + +#ifdef _LP64 + // Prefetch settings + PrefetchCopyIntervalInBytes = prefetch_copy_interval_in_bytes(); + PrefetchScanIntervalInBytes = prefetch_scan_interval_in_bytes(); + PrefetchFieldsAhead = prefetch_fields_ahead(); +#endif + +#ifndef PRODUCT + if (PrintMiscellaneous && Verbose) { + tty->print_cr("Logical CPUs per core: %u", + logical_processors_per_package()); + tty->print_cr("UseSSE=%d",UseSSE); + tty->print("Allocation: "); + if (AllocatePrefetchStyle <= 0 || UseSSE == 0 && !supports_3dnow()) { + tty->print_cr("no prefetching"); + } else { + if (UseSSE == 0 && supports_3dnow()) { + tty->print("PREFETCHW"); + } else if (UseSSE >= 1) { + if (AllocatePrefetchInstr == 0) { + tty->print("PREFETCHNTA"); + } else if (AllocatePrefetchInstr == 1) { + tty->print("PREFETCHT0"); + } else if (AllocatePrefetchInstr == 2) { + tty->print("PREFETCHT2"); + } else if (AllocatePrefetchInstr == 3) { + tty->print("PREFETCHW"); + } + } + if (AllocatePrefetchLines > 1) { + tty->print_cr(" %d, %d lines with step %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize); + } else { + tty->print_cr(" %d, one line", AllocatePrefetchDistance); + } + } + + if (PrefetchCopyIntervalInBytes > 0) { + tty->print_cr("PrefetchCopyIntervalInBytes %d", PrefetchCopyIntervalInBytes); + } + if (PrefetchScanIntervalInBytes > 0) { + tty->print_cr("PrefetchScanIntervalInBytes %d", PrefetchScanIntervalInBytes); + } + if (PrefetchFieldsAhead > 0) { + tty->print_cr("PrefetchFieldsAhead %d", PrefetchFieldsAhead); + } + } +#endif // !PRODUCT +} + +void VM_Version::initialize() { + ResourceMark rm; + // Making this stub must be FIRST use of assembler + + stub_blob = BufferBlob::create("getPsrInfo_stub", stub_size); + if (stub_blob == NULL) { + vm_exit_during_initialization("Unable to allocate getPsrInfo_stub"); + } + CodeBuffer c(stub_blob->instructions_begin(), + stub_blob->instructions_size()); + VM_Version_StubGenerator g(&c); + getPsrInfo_stub = CAST_TO_FN_PTR(getPsrInfo_stub_t, + g.generate_getPsrInfo()); + + get_processor_features(); +} diff -r 1b68c738c0d9 -r 7898caac2071 src/cpu/x86/vm/vm_version_x86.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/x86/vm/vm_version_x86.hpp Thu Feb 26 14:25:55 2009 -0800 @@ -0,0 +1,459 @@ +/* + * Copyright 1997-2009 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +class VM_Version : public Abstract_VM_Version { +public: + // cpuid result register layouts. These are all unions of a uint32_t + // (in case anyone wants access to the register as a whole) and a bitfield. + + union StdCpuid1Eax { + uint32_t value; + struct { + uint32_t stepping : 4, + model : 4, + family : 4, + proc_type : 2, + : 2, + ext_model : 4, + ext_family : 8, + : 4; + } bits; + }; + + union StdCpuid1Ebx { // example, unused + uint32_t value; + struct { + uint32_t brand_id : 8, + clflush_size : 8, + threads_per_cpu : 8, + apic_id : 8; + } bits; + }; + + union StdCpuid1Ecx { + uint32_t value; + struct { + uint32_t sse3 : 1, + : 2, + monitor : 1, + : 1, + vmx : 1, + : 1, + est : 1, + : 1, + ssse3 : 1, + cid : 1, + : 2, + cmpxchg16: 1, + : 4, + dca : 1, + sse4_1 : 1, + sse4_2 : 1, + : 11; + } bits; + }; + + union StdCpuid1Edx { + uint32_t value; + struct { + uint32_t : 4, + tsc : 1, + : 3, + cmpxchg8 : 1, + : 6, + cmov : 1, + : 7, + mmx : 1, + fxsr : 1, + sse : 1, + sse2 : 1, + : 1, + ht : 1, + : 3; + } bits; + }; + + union DcpCpuid4Eax { + uint32_t value; + struct { + uint32_t cache_type : 5, + : 21, + cores_per_cpu : 6; + } bits; + }; + + union DcpCpuid4Ebx { + uint32_t value; + struct { + uint32_t L1_line_size : 12, + partitions : 10, + associativity : 10; + } bits; + }; + + union ExtCpuid1Ecx { + uint32_t value; + struct { + uint32_t LahfSahf : 1, + CmpLegacy : 1, + : 4, + abm : 1, + sse4a : 1, + misalignsse : 1, + prefetchw : 1, + : 22; + } bits; + }; + + union ExtCpuid1Edx { + uint32_t value; + struct { + uint32_t : 22, + mmx_amd : 1, + mmx : 1, + fxsr : 1, + : 4, + long_mode : 1, + tdnow2 : 1, + tdnow : 1; + } bits; + }; + + union ExtCpuid5Ex { + uint32_t value; + struct { + uint32_t L1_line_size : 8, + L1_tag_lines : 8, + L1_assoc : 8, + L1_size : 8; + } bits; + }; + + union ExtCpuid8Ecx { + uint32_t value; + struct { + uint32_t cores_per_cpu : 8, + : 24; + } bits; + }; + +protected: + static int _cpu; + static int _model; + static int _stepping; + static int _cpuFeatures; // features returned by the "cpuid" instruction + // 0 if this instruction is not available + static const char* _features_str; + + enum { + CPU_CX8 = (1 << 0), // next bits are from cpuid 1 (EDX) + CPU_CMOV = (1 << 1), + CPU_FXSR = (1 << 2), + CPU_HT = (1 << 3), + CPU_MMX = (1 << 4), + CPU_3DNOW = (1 << 5), // 3DNow comes from cpuid 0x80000001 (EDX) + CPU_SSE = (1 << 6), + CPU_SSE2 = (1 << 7), + CPU_SSE3 = (1 << 8), // SSE3 comes from cpuid 1 (ECX) + CPU_SSSE3 = (1 << 9), + CPU_SSE4A = (1 << 10), + CPU_SSE4_1 = (1 << 11), + CPU_SSE4_2 = (1 << 12) + } cpuFeatureFlags; + + // cpuid information block. All info derived from executing cpuid with + // various function numbers is stored here. Intel and AMD info is + // merged in this block: accessor methods disentangle it. + // + // The info block is laid out in subblocks of 4 dwords corresponding to + // eax, ebx, ecx and edx, whether or not they contain anything useful. + struct CpuidInfo { + // cpuid function 0 + uint32_t std_max_function; + uint32_t std_vendor_name_0; + uint32_t std_vendor_name_1; + uint32_t std_vendor_name_2; + + // cpuid function 1 + StdCpuid1Eax std_cpuid1_eax; + StdCpuid1Ebx std_cpuid1_ebx; + StdCpuid1Ecx std_cpuid1_ecx; + StdCpuid1Edx std_cpuid1_edx; + + // cpuid function 4 (deterministic cache parameters) + DcpCpuid4Eax dcp_cpuid4_eax; + DcpCpuid4Ebx dcp_cpuid4_ebx; + uint32_t dcp_cpuid4_ecx; // unused currently + uint32_t dcp_cpuid4_edx; // unused currently + + // cpuid function 0x80000000 // example, unused + uint32_t ext_max_function; + uint32_t ext_vendor_name_0; + uint32_t ext_vendor_name_1; + uint32_t ext_vendor_name_2; + + // cpuid function 0x80000001 + uint32_t ext_cpuid1_eax; // reserved + uint32_t ext_cpuid1_ebx; // reserved + ExtCpuid1Ecx ext_cpuid1_ecx; + ExtCpuid1Edx ext_cpuid1_edx; + + // cpuid functions 0x80000002 thru 0x80000004: example, unused + uint32_t proc_name_0, proc_name_1, proc_name_2, proc_name_3; + uint32_t proc_name_4, proc_name_5, proc_name_6, proc_name_7; + uint32_t proc_name_8, proc_name_9, proc_name_10,proc_name_11; + + // cpuid function 0x80000005 //AMD L1, Intel reserved + uint32_t ext_cpuid5_eax; // unused currently + uint32_t ext_cpuid5_ebx; // reserved + ExtCpuid5Ex ext_cpuid5_ecx; // L1 data cache info (AMD) + ExtCpuid5Ex ext_cpuid5_edx; // L1 instruction cache info (AMD) + + // cpuid function 0x80000008 + uint32_t ext_cpuid8_eax; // unused currently + uint32_t ext_cpuid8_ebx; // reserved + ExtCpuid8Ecx ext_cpuid8_ecx; + uint32_t ext_cpuid8_edx; // reserved + }; + + // The actual cpuid info block + static CpuidInfo _cpuid_info; + + // Extractors and predicates + static uint32_t extended_cpu_family() { + uint32_t result = _cpuid_info.std_cpuid1_eax.bits.family; + result += _cpuid_info.std_cpuid1_eax.bits.ext_family; + return result; + } + static uint32_t extended_cpu_model() { + uint32_t result = _cpuid_info.std_cpuid1_eax.bits.model; + result |= _cpuid_info.std_cpuid1_eax.bits.ext_model << 4; + return result; + } + static uint32_t cpu_stepping() { + uint32_t result = _cpuid_info.std_cpuid1_eax.bits.stepping; + return result; + } + static uint logical_processor_count() { + uint result = threads_per_core(); + return result; + } + static uint32_t feature_flags() { + uint32_t result = 0; + if (_cpuid_info.std_cpuid1_edx.bits.cmpxchg8 != 0) + result |= CPU_CX8; + if (_cpuid_info.std_cpuid1_edx.bits.cmov != 0) + result |= CPU_CMOV; + if (_cpuid_info.std_cpuid1_edx.bits.fxsr != 0 || is_amd() && + _cpuid_info.ext_cpuid1_edx.bits.fxsr != 0) + result |= CPU_FXSR; + // HT flag is set for multi-core processors also. + if (threads_per_core() > 1) + result |= CPU_HT; + if (_cpuid_info.std_cpuid1_edx.bits.mmx != 0 || is_amd() && + _cpuid_info.ext_cpuid1_edx.bits.mmx != 0) + result |= CPU_MMX; + if (is_amd() && _cpuid_info.ext_cpuid1_edx.bits.tdnow != 0) + result |= CPU_3DNOW; + if (_cpuid_info.std_cpuid1_edx.bits.sse != 0) + result |= CPU_SSE; + if (_cpuid_info.std_cpuid1_edx.bits.sse2 != 0) + result |= CPU_SSE2; + if (_cpuid_info.std_cpuid1_ecx.bits.sse3 != 0) + result |= CPU_SSE3; + if (_cpuid_info.std_cpuid1_ecx.bits.ssse3 != 0) + result |= CPU_SSSE3; + if (is_amd() && _cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0) + result |= CPU_SSE4A; + if (_cpuid_info.std_cpuid1_ecx.bits.sse4_1 != 0) + result |= CPU_SSE4_1; + if (_cpuid_info.std_cpuid1_ecx.bits.sse4_2 != 0) + result |= CPU_SSE4_2; + return result; + } + + static void get_processor_features(); + +public: + // Offsets for cpuid asm stub + static ByteSize std_cpuid0_offset() { return byte_offset_of(CpuidInfo, std_max_function); } + static ByteSize std_cpuid1_offset() { return byte_offset_of(CpuidInfo, std_cpuid1_eax); } + static ByteSize dcp_cpuid4_offset() { return byte_offset_of(CpuidInfo, dcp_cpuid4_eax); } + static ByteSize ext_cpuid1_offset() { return byte_offset_of(CpuidInfo, ext_cpuid1_eax); } + static ByteSize ext_cpuid5_offset() { return byte_offset_of(CpuidInfo, ext_cpuid5_eax); } + static ByteSize ext_cpuid8_offset() { return byte_offset_of(CpuidInfo, ext_cpuid8_eax); } + + // Initialization + static void initialize(); + + // Asserts + static void assert_is_initialized() { + assert(_cpuid_info.std_cpuid1_eax.bits.family != 0, "VM_Version not initialized"); + } + + // + // Processor family: + // 3 - 386 + // 4 - 486 + // 5 - Pentium + // 6 - PentiumPro, Pentium II, Celeron, Xeon, Pentium III, Athlon, + // Pentium M, Core Solo, Core Duo, Core2 Duo + // family 6 model: 9, 13, 14, 15 + // 0x0f - Pentium 4, Opteron + // + // Note: The cpu family should be used to select between + // instruction sequences which are valid on all Intel + // processors. Use the feature test functions below to + // determine whether a particular instruction is supported. + // + static int cpu_family() { return _cpu;} + static bool is_P6() { return cpu_family() >= 6; } + + static bool is_amd() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x68747541; } // 'htuA' + static bool is_intel() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x756e6547; } // 'uneG' + + static uint cores_per_cpu() { + uint result = 1; + if (is_intel()) { + result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1); + } else if (is_amd()) { + result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1); + } + return result; + } + + static uint threads_per_core() { + uint result = 1; + if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) { + result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu / + cores_per_cpu(); + } + return result; + } + + static intx L1_data_cache_line_size() { + intx result = 0; + if (is_intel()) { + result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1); + } else if (is_amd()) { + result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size; + } + if (result < 32) // not defined ? + result = 32; // 32 bytes by default on x86 and other x64 + return result; + } + + // + // Feature identification + // + static bool supports_cpuid() { return _cpuFeatures != 0; } + static bool supports_cmpxchg8() { return (_cpuFeatures & CPU_CX8) != 0; } + static bool supports_cmov() { return (_cpuFeatures & CPU_CMOV) != 0; } + static bool supports_fxsr() { return (_cpuFeatures & CPU_FXSR) != 0; } + static bool supports_ht() { return (_cpuFeatures & CPU_HT) != 0; } + static bool supports_mmx() { return (_cpuFeatures & CPU_MMX) != 0; } + static bool supports_sse() { return (_cpuFeatures & CPU_SSE) != 0; } + static bool supports_sse2() { return (_cpuFeatures & CPU_SSE2) != 0; } + static bool supports_sse3() { return (_cpuFeatures & CPU_SSE3) != 0; } + static bool supports_ssse3() { return (_cpuFeatures & CPU_SSSE3)!= 0; } + static bool supports_sse4_1() { return (_cpuFeatures & CPU_SSE4_1) != 0; } + static bool supports_sse4_2() { return (_cpuFeatures & CPU_SSE4_2) != 0; } + // + // AMD features + // + static bool supports_3dnow() { return (_cpuFeatures & CPU_3DNOW) != 0; } + static bool supports_mmx_ext() { return is_amd() && _cpuid_info.ext_cpuid1_edx.bits.mmx_amd != 0; } + static bool supports_3dnow2() { return is_amd() && _cpuid_info.ext_cpuid1_edx.bits.tdnow2 != 0; } + static bool supports_sse4a() { return (_cpuFeatures & CPU_SSE4A) != 0; } + + static bool supports_compare_and_exchange() { return true; } + + static const char* cpu_features() { return _features_str; } + + static intx allocate_prefetch_distance() { + // This method should be called before allocate_prefetch_style(). + // + // Hardware prefetching (distance/size in bytes): + // Pentium 3 - 64 / 32 + // Pentium 4 - 256 / 128 + // Athlon - 64 / 32 ???? + // Opteron - 128 / 64 only when 2 sequential cache lines accessed + // Core - 128 / 64 + // + // Software prefetching (distance in bytes / instruction with best score): + // Pentium 3 - 128 / prefetchnta + // Pentium 4 - 512 / prefetchnta + // Athlon - 128 / prefetchnta + // Opteron - 256 / prefetchnta + // Core - 256 / prefetchnta + // It will be used only when AllocatePrefetchStyle > 0 + + intx count = AllocatePrefetchDistance; + if (count < 0) { // default ? + if (is_amd()) { // AMD + if (supports_sse2()) + count = 256; // Opteron + else + count = 128; // Athlon + } else { // Intel + if (supports_sse2()) + if (cpu_family() == 6) { + count = 256; // Pentium M, Core, Core2 + } else { + count = 512; // Pentium 4 + } + else + count = 128; // Pentium 3 (and all other old CPUs) + } + } + return count; + } + static intx allocate_prefetch_style() { + assert(AllocatePrefetchStyle >= 0, "AllocatePrefetchStyle should be positive"); + // Return 0 if AllocatePrefetchDistance was not defined. + return AllocatePrefetchDistance > 0 ? AllocatePrefetchStyle : 0; + } + + // Prefetch interval for gc copy/scan == 9 dcache lines. Derived from + // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap. + // Tested intervals from 128 to 2048 in increments of 64 == one cache line. + // 256 bytes (4 dcache lines) was the nearest runner-up to 576. + + // gc copy/scan is disabled if prefetchw isn't supported, because + // Prefetch::write emits an inlined prefetchw on Linux. + // Do not use the 3dnow prefetchw instruction. It isn't supported on em64t. + // The used prefetcht0 instruction works for both amd64 and em64t. + static intx prefetch_copy_interval_in_bytes() { + intx interval = PrefetchCopyIntervalInBytes; + return interval >= 0 ? interval : 576; + } + static intx prefetch_scan_interval_in_bytes() { + intx interval = PrefetchScanIntervalInBytes; + return interval >= 0 ? interval : 576; + } + static intx prefetch_fields_ahead() { + intx count = PrefetchFieldsAhead; + return count >= 0 ? count : 1; + } +}; diff -r 1b68c738c0d9 -r 7898caac2071 src/cpu/x86/vm/vm_version_x86_32.cpp --- a/src/cpu/x86/vm/vm_version_x86_32.cpp Sun Feb 22 17:21:13 2009 -0800 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,472 +0,0 @@ -/* - * Copyright 1997-2008 Sun Microsystems, Inc. All Rights Reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. - * - */ - -# include "incls/_precompiled.incl" -# include "incls/_vm_version_x86_32.cpp.incl" - - -int VM_Version::_cpu; -int VM_Version::_model; -int VM_Version::_stepping; -int VM_Version::_cpuFeatures; -const char* VM_Version::_features_str = ""; -VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, }; - -static BufferBlob* stub_blob; -static const int stub_size = 300; - -extern "C" { - typedef void (*getPsrInfo_stub_t)(void*); -} -static getPsrInfo_stub_t getPsrInfo_stub = NULL; - - -class VM_Version_StubGenerator: public StubCodeGenerator { - public: - - VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {} - - address generate_getPsrInfo() { - // Flags to test CPU type. - const uint32_t EFL_AC = 0x40000; - const uint32_t EFL_ID = 0x200000; - // Values for when we don't have a CPUID instruction. - const int CPU_FAMILY_SHIFT = 8; - const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT); - const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT); - - Label detect_486, cpu486, detect_586, std_cpuid1; - Label ext_cpuid1, ext_cpuid5, done; - - StubCodeMark mark(this, "VM_Version", "getPsrInfo_stub"); -# define __ _masm-> - - address start = __ pc(); - - // - // void getPsrInfo(VM_Version::CpuidInfo* cpuid_info); - // - __ push(rbp); - __ movptr(rbp, Address(rsp, 8)); // cpuid_info address - __ push(rbx); - __ push(rsi); - __ pushf(); // preserve rbx, and flags - __ pop(rax); - __ push(rax); - __ mov(rcx, rax); - // - // if we are unable to change the AC flag, we have a 386 - // - __ xorl(rax, EFL_AC); - __ push(rax); - __ popf(); - __ pushf(); - __ pop(rax); - __ cmpptr(rax, rcx); - __ jccb(Assembler::notEqual, detect_486); - - __ movl(rax, CPU_FAMILY_386); - __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax); - __ jmp(done); - - // - // If we are unable to change the ID flag, we have a 486 which does - // not support the "cpuid" instruction. - // - __ bind(detect_486); - __ mov(rax, rcx); - __ xorl(rax, EFL_ID); - __ push(rax); - __ popf(); - __ pushf(); - __ pop(rax); - __ cmpptr(rcx, rax); - __ jccb(Assembler::notEqual, detect_586); - - __ bind(cpu486); - __ movl(rax, CPU_FAMILY_486); - __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax); - __ jmp(done); - - // - // at this point, we have a chip which supports the "cpuid" instruction - // - __ bind(detect_586); - __ xorptr(rax, rax); - __ cpuid(); - __ orptr(rax, rax); - __ jcc(Assembler::equal, cpu486); // if cpuid doesn't support an input - // value of at least 1, we give up and - // assume a 486 - __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); - __ movl(Address(rsi, 0), rax); - __ movl(Address(rsi, 4), rbx); - __ movl(Address(rsi, 8), rcx); - __ movl(Address(rsi,12), rdx); - - __ cmpl(rax, 3); // Is cpuid(0x4) supported? - __ jccb(Assembler::belowEqual, std_cpuid1); - - // - // cpuid(0x4) Deterministic cache params - // - __ movl(rax, 4); // and rcx already set to 0x0 - __ xorl(rcx, rcx); - __ cpuid(); - __ push(rax); - __ andl(rax, 0x1f); // Determine if valid cache parameters used - __ orl(rax, rax); // rax,[4:0] == 0 indicates invalid cache - __ pop(rax); - __ jccb(Assembler::equal, std_cpuid1); - - __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset()))); - __ movl(Address(rsi, 0), rax); - __ movl(Address(rsi, 4), rbx); - __ movl(Address(rsi, 8), rcx); - __ movl(Address(rsi,12), rdx); - - // - // Standard cpuid(0x1) - // - __ bind(std_cpuid1); - __ movl(rax, 1); - __ cpuid(); - __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); - __ movl(Address(rsi, 0), rax); - __ movl(Address(rsi, 4), rbx); - __ movl(Address(rsi, 8), rcx); - __ movl(Address(rsi,12), rdx); - - __ movl(rax, 0x80000000); - __ cpuid(); - __ cmpl(rax, 0x80000000); // Is cpuid(0x80000001) supported? - __ jcc(Assembler::belowEqual, done); - __ cmpl(rax, 0x80000004); // Is cpuid(0x80000005) supported? - __ jccb(Assembler::belowEqual, ext_cpuid1); - __ cmpl(rax, 0x80000007); // Is cpuid(0x80000008) supported? - __ jccb(Assembler::belowEqual, ext_cpuid5); - // - // Extended cpuid(0x80000008) - // - __ movl(rax, 0x80000008); - __ cpuid(); - __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset()))); - __ movl(Address(rsi, 0), rax); - __ movl(Address(rsi, 4), rbx); - __ movl(Address(rsi, 8), rcx); - __ movl(Address(rsi,12), rdx); - - // - // Extended cpuid(0x80000005) - // - __ bind(ext_cpuid5); - __ movl(rax, 0x80000005); - __ cpuid(); - __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset()))); - __ movl(Address(rsi, 0), rax); - __ movl(Address(rsi, 4), rbx); - __ movl(Address(rsi, 8), rcx); - __ movl(Address(rsi,12), rdx); - - // - // Extended cpuid(0x80000001) - // - __ bind(ext_cpuid1); - __ movl(rax, 0x80000001); - __ cpuid(); - __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset()))); - __ movl(Address(rsi, 0), rax); - __ movl(Address(rsi, 4), rbx); - __ movl(Address(rsi, 8), rcx); - __ movl(Address(rsi,12), rdx); - - // - // return - // - __ bind(done); - __ popf(); - __ pop(rsi); - __ pop(rbx); - __ pop(rbp); - __ ret(0); - -# undef __ - - return start; - }; -}; - - -void VM_Version::get_processor_features() { - - _cpu = 4; // 486 by default - _model = 0; - _stepping = 0; - _cpuFeatures = 0; - _logical_processors_per_package = 1; - if (!Use486InstrsOnly) { - // Get raw processor info - getPsrInfo_stub(&_cpuid_info); - assert_is_initialized(); - _cpu = extended_cpu_family(); - _model = extended_cpu_model(); - _stepping = cpu_stepping(); - if (cpu_family() > 4) { // it supports CPUID - _cpuFeatures = feature_flags(); - // Logical processors are only available on P4s and above, - // and only if hyperthreading is available. - _logical_processors_per_package = logical_processor_count(); - } - } - _supports_cx8 = supports_cmpxchg8(); - // if the OS doesn't support SSE, we can't use this feature even if the HW does - if( !os::supports_sse()) - _cpuFeatures &= ~(CPU_SSE|CPU_SSE2|CPU_SSE3|CPU_SSSE3|CPU_SSE4A|CPU_SSE4_1|CPU_SSE4_2); - if (UseSSE < 4) { - _cpuFeatures &= ~CPU_SSE4_1; - _cpuFeatures &= ~CPU_SSE4_2; - } - if (UseSSE < 3) { - _cpuFeatures &= ~CPU_SSE3; - _cpuFeatures &= ~CPU_SSSE3; - _cpuFeatures &= ~CPU_SSE4A; - } - if (UseSSE < 2) - _cpuFeatures &= ~CPU_SSE2; - if (UseSSE < 1) - _cpuFeatures &= ~CPU_SSE; - - if (logical_processors_per_package() == 1) { - // HT processor could be installed on a system which doesn't support HT. - _cpuFeatures &= ~CPU_HT; - } - - char buf[256]; - jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", - cores_per_cpu(), threads_per_core(), - cpu_family(), _model, _stepping, - (supports_cmov() ? ", cmov" : ""), - (supports_cmpxchg8() ? ", cx8" : ""), - (supports_fxsr() ? ", fxsr" : ""), - (supports_mmx() ? ", mmx" : ""), - (supports_sse() ? ", sse" : ""), - (supports_sse2() ? ", sse2" : ""), - (supports_sse3() ? ", sse3" : ""), - (supports_ssse3()? ", ssse3": ""), - (supports_sse4_1() ? ", sse4.1" : ""), - (supports_sse4_2() ? ", sse4.2" : ""), - (supports_mmx_ext() ? ", mmxext" : ""), - (supports_3dnow() ? ", 3dnow" : ""), - (supports_3dnow2() ? ", 3dnowext" : ""), - (supports_sse4a() ? ", sse4a": ""), - (supports_ht() ? ", ht": "")); - _features_str = strdup(buf); - - // UseSSE is set to the smaller of what hardware supports and what - // the command line requires. I.e., you cannot set UseSSE to 2 on - // older Pentiums which do not support it. - if( UseSSE > 4 ) UseSSE=4; - if( UseSSE < 0 ) UseSSE=0; - if( !supports_sse4_1() ) // Drop to 3 if no SSE4 support - UseSSE = MIN2((intx)3,UseSSE); - if( !supports_sse3() ) // Drop to 2 if no SSE3 support - UseSSE = MIN2((intx)2,UseSSE); - if( !supports_sse2() ) // Drop to 1 if no SSE2 support - UseSSE = MIN2((intx)1,UseSSE); - if( !supports_sse () ) // Drop to 0 if no SSE support - UseSSE = 0; - - // On new cpus instructions which update whole XMM register should be used - // to prevent partial register stall due to dependencies on high half. - // - // UseXmmLoadAndClearUpper == true --> movsd(xmm, mem) - // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem) - // UseXmmRegToRegMoveAll == true --> movaps(xmm, xmm), movapd(xmm, xmm). - // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm), movsd(xmm, xmm). - - if( is_amd() ) { // AMD cpus specific settings - if( supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop) ) { - // Use it on new AMD cpus starting from Opteron. - UseAddressNop = true; - } - if( supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift) ) { - // Use it on new AMD cpus starting from Opteron. - UseNewLongLShift = true; - } - if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) { - if( supports_sse4a() ) { - UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron - } else { - UseXmmLoadAndClearUpper = false; - } - } - if( FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll) ) { - if( supports_sse4a() ) { - UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h' - } else { - UseXmmRegToRegMoveAll = false; - } - } - if( FLAG_IS_DEFAULT(UseXmmI2F) ) { - if( supports_sse4a() ) { - UseXmmI2F = true; - } else { - UseXmmI2F = false; - } - } - if( FLAG_IS_DEFAULT(UseXmmI2D) ) { - if( supports_sse4a() ) { - UseXmmI2D = true; - } else { - UseXmmI2D = false; - } - } - } - - if( is_intel() ) { // Intel cpus specific settings - if( FLAG_IS_DEFAULT(UseStoreImmI16) ) { - UseStoreImmI16 = false; // don't use it on Intel cpus - } - if( cpu_family() == 6 || cpu_family() == 15 ) { - if( FLAG_IS_DEFAULT(UseAddressNop) ) { - // Use it on all Intel cpus starting from PentiumPro - UseAddressNop = true; - } - } - if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) { - UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus - } - if( FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll) ) { - if( supports_sse3() ) { - UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus - } else { - UseXmmRegToRegMoveAll = false; - } - } - if( cpu_family() == 6 && supports_sse3() ) { // New Intel cpus -#ifdef COMPILER2 - if( FLAG_IS_DEFAULT(MaxLoopPad) ) { - // For new Intel cpus do the next optimization: - // don't align the beginning of a loop if there are enough instructions - // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp) - // in current fetch line (OptoLoopAlignment) or the padding - // is big (> MaxLoopPad). - // Set MaxLoopPad to 11 for new Intel cpus to reduce number of - // generated NOP instructions. 11 is the largest size of one - // address NOP instruction '0F 1F' (see Assembler::nop(i)). - MaxLoopPad = 11; - } -#endif // COMPILER2 - if( FLAG_IS_DEFAULT(UseXMMForArrayCopy) ) { - UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus - } - if( supports_sse4_2() && supports_ht() ) { // Newest Intel cpus - if( FLAG_IS_DEFAULT(UseUnalignedLoadStores) && UseXMMForArrayCopy ) { - UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus - } - } - } - } - - assert(0 <= ReadPrefetchInstr && ReadPrefetchInstr <= 3, "invalid value"); - assert(0 <= AllocatePrefetchInstr && AllocatePrefetchInstr <= 3, "invalid value"); - - // set valid Prefetch instruction - if( ReadPrefetchInstr < 0 ) ReadPrefetchInstr = 0; - if( ReadPrefetchInstr > 3 ) ReadPrefetchInstr = 3; - if( ReadPrefetchInstr == 3 && !supports_3dnow() ) ReadPrefetchInstr = 0; - if( !supports_sse() && supports_3dnow() ) ReadPrefetchInstr = 3; - - if( AllocatePrefetchInstr < 0 ) AllocatePrefetchInstr = 0; - if( AllocatePrefetchInstr > 3 ) AllocatePrefetchInstr = 3; - if( AllocatePrefetchInstr == 3 && !supports_3dnow() ) AllocatePrefetchInstr=0; - if( !supports_sse() && supports_3dnow() ) AllocatePrefetchInstr = 3; - - // Allocation prefetch settings - intx cache_line_size = L1_data_cache_line_size(); - if( cache_line_size > AllocatePrefetchStepSize ) - AllocatePrefetchStepSize = cache_line_size; - if( FLAG_IS_DEFAULT(AllocatePrefetchLines) ) - AllocatePrefetchLines = 3; // Optimistic value - assert(AllocatePrefetchLines > 0, "invalid value"); - if( AllocatePrefetchLines < 1 ) // set valid value in product VM - AllocatePrefetchLines = 1; // Conservative value - - AllocatePrefetchDistance = allocate_prefetch_distance(); - AllocatePrefetchStyle = allocate_prefetch_style(); - - if( AllocatePrefetchStyle == 2 && is_intel() && - cpu_family() == 6 && supports_sse3() ) { // watermark prefetching on Core - AllocatePrefetchDistance = 320; - } - assert(AllocatePrefetchDistance % AllocatePrefetchStepSize == 0, "invalid value"); - -#ifndef PRODUCT - if (PrintMiscellaneous && Verbose) { - tty->print_cr("Logical CPUs per core: %u", - logical_processors_per_package()); - tty->print_cr("UseSSE=%d",UseSSE); - tty->print("Allocation: "); - if (AllocatePrefetchStyle <= 0 || UseSSE == 0 && !supports_3dnow()) { - tty->print_cr("no prefetching"); - } else { - if (UseSSE == 0 && supports_3dnow()) { - tty->print("PREFETCHW"); - } else if (UseSSE >= 1) { - if (AllocatePrefetchInstr == 0) { - tty->print("PREFETCHNTA"); - } else if (AllocatePrefetchInstr == 1) { - tty->print("PREFETCHT0"); - } else if (AllocatePrefetchInstr == 2) { - tty->print("PREFETCHT2"); - } else if (AllocatePrefetchInstr == 3) { - tty->print("PREFETCHW"); - } - } - if (AllocatePrefetchLines > 1) { - tty->print_cr(" %d, %d lines with step %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize); - } else { - tty->print_cr(" %d, one line", AllocatePrefetchDistance); - } - } - } -#endif // !PRODUCT -} - -void VM_Version::initialize() { - ResourceMark rm; - // Making this stub must be FIRST use of assembler - - stub_blob = BufferBlob::create("getPsrInfo_stub", stub_size); - if (stub_blob == NULL) { - vm_exit_during_initialization("Unable to allocate getPsrInfo_stub"); - } - CodeBuffer c(stub_blob->instructions_begin(), - stub_blob->instructions_size()); - VM_Version_StubGenerator g(&c); - getPsrInfo_stub = CAST_TO_FN_PTR(getPsrInfo_stub_t, - g.generate_getPsrInfo()); - - get_processor_features(); -} diff -r 1b68c738c0d9 -r 7898caac2071 src/cpu/x86/vm/vm_version_x86_32.hpp --- a/src/cpu/x86/vm/vm_version_x86_32.hpp Sun Feb 22 17:21:13 2009 -0800 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,439 +0,0 @@ -/* - * Copyright 1997-2008 Sun Microsystems, Inc. All Rights Reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. - * - */ - -class VM_Version: public Abstract_VM_Version { -public: - // cpuid result register layouts. These are all unions of a uint32_t - // (in case anyone wants access to the register as a whole) and a bitfield. - - union StdCpuid1Eax { - uint32_t value; - struct { - uint32_t stepping : 4, - model : 4, - family : 4, - proc_type : 2, - : 2, - ext_model : 4, - ext_family : 8, - : 4; - } bits; - }; - - union StdCpuid1Ebx { // example, unused - uint32_t value; - struct { - uint32_t brand_id : 8, - clflush_size : 8, - threads_per_cpu : 8, - apic_id : 8; - } bits; - }; - - union StdCpuid1Ecx { - uint32_t value; - struct { - uint32_t sse3 : 1, - : 2, - monitor : 1, - : 1, - vmx : 1, - : 1, - est : 1, - : 1, - ssse3 : 1, - cid : 1, - : 2, - cmpxchg16: 1, - : 4, - dca : 1, - sse4_1 : 1, - sse4_2 : 1, - : 11; - } bits; - }; - - union StdCpuid1Edx { - uint32_t value; - struct { - uint32_t : 4, - tsc : 1, - : 3, - cmpxchg8 : 1, - : 6, - cmov : 1, - : 7, - mmx : 1, - fxsr : 1, - sse : 1, - sse2 : 1, - : 1, - ht : 1, - : 3; - } bits; - }; - - union DcpCpuid4Eax { - uint32_t value; - struct { - uint32_t cache_type : 5, - : 21, - cores_per_cpu : 6; - } bits; - }; - - union DcpCpuid4Ebx { - uint32_t value; - struct { - uint32_t L1_line_size : 12, - partitions : 10, - associativity : 10; - } bits; - }; - - union ExtCpuid1Ecx { - uint32_t value; - struct { - uint32_t LahfSahf : 1, - CmpLegacy : 1, - : 4, - abm : 1, - sse4a : 1, - misalignsse : 1, - prefetchw : 1, - : 22; - } bits; - }; - - union ExtCpuid1Edx { - uint32_t value; - struct { - uint32_t : 22, - mmx_amd : 1, - mmx : 1, - fxsr : 1, - : 4, - long_mode : 1, - tdnow2 : 1, - tdnow : 1; - } bits; - }; - - union ExtCpuid5Ex { - uint32_t value; - struct { - uint32_t L1_line_size : 8, - L1_tag_lines : 8, - L1_assoc : 8, - L1_size : 8; - } bits; - }; - - union ExtCpuid8Ecx { - uint32_t value; - struct { - uint32_t cores_per_cpu : 8, - : 24; - } bits; - }; - -protected: - static int _cpu; - static int _model; - static int _stepping; - static int _cpuFeatures; // features returned by the "cpuid" instruction - // 0 if this instruction is not available - static const char* _features_str; - - enum { - CPU_CX8 = (1 << 0), // next bits are from cpuid 1 (EDX) - CPU_CMOV = (1 << 1), - CPU_FXSR = (1 << 2), - CPU_HT = (1 << 3), - CPU_MMX = (1 << 4), - CPU_3DNOW= (1 << 5), // 3DNow comes from cpuid 0x80000001 (EDX) - CPU_SSE = (1 << 6), - CPU_SSE2 = (1 << 7), - CPU_SSE3 = (1 << 8), // sse3 comes from cpuid 1 (ECX) - CPU_SSSE3= (1 << 9), - CPU_SSE4A= (1 <<10), - CPU_SSE4_1 = (1 << 11), - CPU_SSE4_2 = (1 << 12) - } cpuFeatureFlags; - - // cpuid information block. All info derived from executing cpuid with - // various function numbers is stored here. Intel and AMD info is - // merged in this block: accessor methods disentangle it. - // - // The info block is laid out in subblocks of 4 dwords corresponding to - // rax, rbx, rcx and rdx, whether or not they contain anything useful. - struct CpuidInfo { - // cpuid function 0 - uint32_t std_max_function; - uint32_t std_vendor_name_0; - uint32_t std_vendor_name_1; - uint32_t std_vendor_name_2; - - // cpuid function 1 - StdCpuid1Eax std_cpuid1_rax; - StdCpuid1Ebx std_cpuid1_rbx; - StdCpuid1Ecx std_cpuid1_rcx; - StdCpuid1Edx std_cpuid1_rdx; - - // cpuid function 4 (deterministic cache parameters) - DcpCpuid4Eax dcp_cpuid4_rax; - DcpCpuid4Ebx dcp_cpuid4_rbx; - uint32_t dcp_cpuid4_rcx; // unused currently - uint32_t dcp_cpuid4_rdx; // unused currently - - // cpuid function 0x80000000 // example, unused - uint32_t ext_max_function; - uint32_t ext_vendor_name_0; - uint32_t ext_vendor_name_1; - uint32_t ext_vendor_name_2; - - // cpuid function 0x80000001 - uint32_t ext_cpuid1_rax; // reserved - uint32_t ext_cpuid1_rbx; // reserved - ExtCpuid1Ecx ext_cpuid1_rcx; - ExtCpuid1Edx ext_cpuid1_rdx; - - // cpuid functions 0x80000002 thru 0x80000004: example, unused - uint32_t proc_name_0, proc_name_1, proc_name_2, proc_name_3; - uint32_t proc_name_4, proc_name_5, proc_name_6, proc_name_7; - uint32_t proc_name_8, proc_name_9, proc_name_10,proc_name_11; - - // cpuid function 0x80000005 //AMD L1, Intel reserved - uint32_t ext_cpuid5_rax; // unused currently - uint32_t ext_cpuid5_rbx; // reserved - ExtCpuid5Ex ext_cpuid5_rcx; // L1 data cache info (AMD) - ExtCpuid5Ex ext_cpuid5_rdx; // L1 instruction cache info (AMD) - - // cpuid function 0x80000008 - uint32_t ext_cpuid8_rax; // unused currently - uint32_t ext_cpuid8_rbx; // reserved - ExtCpuid8Ecx ext_cpuid8_rcx; - uint32_t ext_cpuid8_rdx; // reserved - }; - - // The actual cpuid info block - static CpuidInfo _cpuid_info; - - // Extractors and predicates - static uint32_t extended_cpu_family() { - uint32_t result = _cpuid_info.std_cpuid1_rax.bits.family; - result += _cpuid_info.std_cpuid1_rax.bits.ext_family; - return result; - } - static uint32_t extended_cpu_model() { - uint32_t result = _cpuid_info.std_cpuid1_rax.bits.model; - result |= _cpuid_info.std_cpuid1_rax.bits.ext_model << 4; - return result; - } - static uint32_t cpu_stepping() { - uint32_t result = _cpuid_info.std_cpuid1_rax.bits.stepping; - return result; - } - static uint logical_processor_count() { - uint result = threads_per_core(); - return result; - } - static uint32_t feature_flags() { - uint32_t result = 0; - if (_cpuid_info.std_cpuid1_rdx.bits.cmpxchg8 != 0) - result |= CPU_CX8; - if (_cpuid_info.std_cpuid1_rdx.bits.cmov != 0) - result |= CPU_CMOV; - if (_cpuid_info.std_cpuid1_rdx.bits.fxsr != 0 || is_amd() && - _cpuid_info.ext_cpuid1_rdx.bits.fxsr != 0) - result |= CPU_FXSR; - // HT flag is set for multi-core processors also. - if (threads_per_core() > 1) - result |= CPU_HT; - if (_cpuid_info.std_cpuid1_rdx.bits.mmx != 0 || is_amd() && - _cpuid_info.ext_cpuid1_rdx.bits.mmx != 0) - result |= CPU_MMX; - if (is_amd() && _cpuid_info.ext_cpuid1_rdx.bits.tdnow != 0) - result |= CPU_3DNOW; - if (_cpuid_info.std_cpuid1_rdx.bits.sse != 0) - result |= CPU_SSE; - if (_cpuid_info.std_cpuid1_rdx.bits.sse2 != 0) - result |= CPU_SSE2; - if (_cpuid_info.std_cpuid1_rcx.bits.sse3 != 0) - result |= CPU_SSE3; - if (_cpuid_info.std_cpuid1_rcx.bits.ssse3 != 0) - result |= CPU_SSSE3; - if (is_amd() && _cpuid_info.ext_cpuid1_rcx.bits.sse4a != 0) - result |= CPU_SSE4A; - if (_cpuid_info.std_cpuid1_rcx.bits.sse4_1 != 0) - result |= CPU_SSE4_1; - if (_cpuid_info.std_cpuid1_rcx.bits.sse4_2 != 0) - result |= CPU_SSE4_2; - return result; - } - - static void get_processor_features(); - -public: - // Offsets for cpuid asm stub - static ByteSize std_cpuid0_offset() { return byte_offset_of(CpuidInfo, std_max_function); } - static ByteSize std_cpuid1_offset() { return byte_offset_of(CpuidInfo, std_cpuid1_rax); } - static ByteSize dcp_cpuid4_offset() { return byte_offset_of(CpuidInfo, dcp_cpuid4_rax); } - static ByteSize ext_cpuid1_offset() { return byte_offset_of(CpuidInfo, ext_cpuid1_rax); } - static ByteSize ext_cpuid5_offset() { return byte_offset_of(CpuidInfo, ext_cpuid5_rax); } - static ByteSize ext_cpuid8_offset() { return byte_offset_of(CpuidInfo, ext_cpuid8_rax); } - - // Initialization - static void initialize(); - - // Asserts - static void assert_is_initialized() { - assert(_cpuid_info.std_cpuid1_rax.bits.family != 0, "VM_Version not initialized"); - } - - // - // Processor family: - // 3 - 386 - // 4 - 486 - // 5 - Pentium - // 6 - PentiumPro, Pentium II, Celeron, Xeon, Pentium III, Athlon, - // Pentium M, Core Solo, Core Duo, Core2 Duo - // family 6 model: 9, 13, 14, 15 - // 0x0f - Pentium 4, Opteron - // - // Note: The cpu family should be used to select between - // instruction sequences which are valid on all Intel - // processors. Use the feature test functions below to - // determine whether a particular instruction is supported. - // - static int cpu_family() { return _cpu;} - static bool is_P6() { return cpu_family() >= 6; } - - static bool is_amd() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x68747541; } // 'htuA' - static bool is_intel() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x756e6547; } // 'uneG' - - static uint cores_per_cpu() { - uint result = 1; - if (is_intel()) { - result = (_cpuid_info.dcp_cpuid4_rax.bits.cores_per_cpu + 1); - } else if (is_amd()) { - result = (_cpuid_info.ext_cpuid8_rcx.bits.cores_per_cpu + 1); - } - return result; - } - - static uint threads_per_core() { - uint result = 1; - if (_cpuid_info.std_cpuid1_rdx.bits.ht != 0) { - result = _cpuid_info.std_cpuid1_rbx.bits.threads_per_cpu / - cores_per_cpu(); - } - return result; - } - - static intx L1_data_cache_line_size() { - intx result = 0; - if (is_intel()) { - result = (_cpuid_info.dcp_cpuid4_rbx.bits.L1_line_size + 1); - } else if (is_amd()) { - result = _cpuid_info.ext_cpuid5_rcx.bits.L1_line_size; - } - if (result < 32) // not defined ? - result = 32; // 32 bytes by default on x86 - return result; - } - - // - // Feature identification - // - static bool supports_cpuid() { return _cpuFeatures != 0; } - static bool supports_cmpxchg8() { return (_cpuFeatures & CPU_CX8) != 0; } - static bool supports_cmov() { return (_cpuFeatures & CPU_CMOV) != 0; } - static bool supports_fxsr() { return (_cpuFeatures & CPU_FXSR) != 0; } - static bool supports_ht() { return (_cpuFeatures & CPU_HT) != 0; } - static bool supports_mmx() { return (_cpuFeatures & CPU_MMX) != 0; } - static bool supports_sse() { return (_cpuFeatures & CPU_SSE) != 0; } - static bool supports_sse2() { return (_cpuFeatures & CPU_SSE2) != 0; } - static bool supports_sse3() { return (_cpuFeatures & CPU_SSE3) != 0; } - static bool supports_ssse3() { return (_cpuFeatures & CPU_SSSE3)!= 0; } - static bool supports_sse4_1() { return (_cpuFeatures & CPU_SSE4_1) != 0; } - static bool supports_sse4_2() { return (_cpuFeatures & CPU_SSE4_2) != 0; } - // - // AMD features - // - static bool supports_3dnow() { return (_cpuFeatures & CPU_3DNOW) != 0; } - static bool supports_mmx_ext() { return is_amd() && _cpuid_info.ext_cpuid1_rdx.bits.mmx_amd != 0; } - static bool supports_3dnow2() { return is_amd() && _cpuid_info.ext_cpuid1_rdx.bits.tdnow2 != 0; } - static bool supports_sse4a() { return (_cpuFeatures & CPU_SSE4A) != 0; } - - static bool supports_compare_and_exchange() { return true; } - - static const char* cpu_features() { return _features_str; } - - static intx allocate_prefetch_distance() { - // This method should be called before allocate_prefetch_style(). - // - // Hardware prefetching (distance/size in bytes): - // Pentium 3 - 64 / 32 - // Pentium 4 - 256 / 128 - // Athlon - 64 / 32 ???? - // Opteron - 128 / 64 only when 2 sequential cache lines accessed - // Core - 128 / 64 - // - // Software prefetching (distance in bytes / instruction with best score): - // Pentium 3 - 128 / prefetchnta - // Pentium 4 - 512 / prefetchnta - // Athlon - 128 / prefetchnta - // Opteron - 256 / prefetchnta - // Core - 256 / prefetchnta - // It will be used only when AllocatePrefetchStyle > 0 - - intx count = AllocatePrefetchDistance; - if (count < 0) { // default ? - if (is_amd()) { // AMD - if (supports_sse2()) - count = 256; // Opteron - else - count = 128; // Athlon - } else { // Intel - if (supports_sse2()) - if (cpu_family() == 6) { - count = 256; // Pentium M, Core, Core2 - } else { - count = 512; // Pentium 4 - } - else - count = 128; // Pentium 3 (and all other old CPUs) - } - } - return count; - } - static intx allocate_prefetch_style() { - assert(AllocatePrefetchStyle >= 0, "AllocatePrefetchStyle should be positive"); - // Return 0 if AllocatePrefetchDistance was not defined or - // prefetch instruction is not supported. - return (AllocatePrefetchDistance > 0 && - (supports_3dnow() || supports_sse())) ? AllocatePrefetchStyle : 0; - } -}; diff -r 1b68c738c0d9 -r 7898caac2071 src/cpu/x86/vm/vm_version_x86_64.cpp --- a/src/cpu/x86/vm/vm_version_x86_64.cpp Sun Feb 22 17:21:13 2009 -0800 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,419 +0,0 @@ -/* - * Copyright 2003-2008 Sun Microsystems, Inc. All Rights Reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. - * - */ - -# include "incls/_precompiled.incl" -# include "incls/_vm_version_x86_64.cpp.incl" - -int VM_Version::_cpu; -int VM_Version::_model; -int VM_Version::_stepping; -int VM_Version::_cpuFeatures; -const char* VM_Version::_features_str = ""; -VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, }; - -static BufferBlob* stub_blob; -static const int stub_size = 300; - -extern "C" { - typedef void (*getPsrInfo_stub_t)(void*); -} -static getPsrInfo_stub_t getPsrInfo_stub = NULL; - - -class VM_Version_StubGenerator: public StubCodeGenerator { - public: - - VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {} - - address generate_getPsrInfo() { - - Label std_cpuid1, ext_cpuid1, ext_cpuid5, done; - - StubCodeMark mark(this, "VM_Version", "getPsrInfo_stub"); -# define __ _masm-> - - address start = __ pc(); - - // - // void getPsrInfo(VM_Version::CpuidInfo* cpuid_info); - // - // rcx and rdx are first and second argument registers on windows - - __ push(rbp); - __ mov(rbp, c_rarg0); // cpuid_info address - __ push(rbx); - __ push(rsi); - - // - // we have a chip which supports the "cpuid" instruction - // - __ xorl(rax, rax); - __ cpuid(); - __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); - __ movl(Address(rsi, 0), rax); - __ movl(Address(rsi, 4), rbx); - __ movl(Address(rsi, 8), rcx); - __ movl(Address(rsi,12), rdx); - - __ cmpl(rax, 3); // Is cpuid(0x4) supported? - __ jccb(Assembler::belowEqual, std_cpuid1); - - // - // cpuid(0x4) Deterministic cache params - // - __ movl(rax, 4); - __ xorl(rcx, rcx); // L1 cache - __ cpuid(); - __ push(rax); - __ andl(rax, 0x1f); // Determine if valid cache parameters used - __ orl(rax, rax); // eax[4:0] == 0 indicates invalid cache - __ pop(rax); - __ jccb(Assembler::equal, std_cpuid1); - - __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset()))); - __ movl(Address(rsi, 0), rax); - __ movl(Address(rsi, 4), rbx); - __ movl(Address(rsi, 8), rcx); - __ movl(Address(rsi,12), rdx); - - // - // Standard cpuid(0x1) - // - __ bind(std_cpuid1); - __ movl(rax, 1); - __ cpuid(); - __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); - __ movl(Address(rsi, 0), rax); - __ movl(Address(rsi, 4), rbx); - __ movl(Address(rsi, 8), rcx); - __ movl(Address(rsi,12), rdx); - - __ movl(rax, 0x80000000); - __ cpuid(); - __ cmpl(rax, 0x80000000); // Is cpuid(0x80000001) supported? - __ jcc(Assembler::belowEqual, done); - __ cmpl(rax, 0x80000004); // Is cpuid(0x80000005) supported? - __ jccb(Assembler::belowEqual, ext_cpuid1); - __ cmpl(rax, 0x80000007); // Is cpuid(0x80000008) supported? - __ jccb(Assembler::belowEqual, ext_cpuid5); - // - // Extended cpuid(0x80000008) - // - __ movl(rax, 0x80000008); - __ cpuid(); - __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset()))); - __ movl(Address(rsi, 0), rax); - __ movl(Address(rsi, 4), rbx); - __ movl(Address(rsi, 8), rcx); - __ movl(Address(rsi,12), rdx); - - // - // Extended cpuid(0x80000005) - // - __ bind(ext_cpuid5); - __ movl(rax, 0x80000005); - __ cpuid(); - __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset()))); - __ movl(Address(rsi, 0), rax); - __ movl(Address(rsi, 4), rbx); - __ movl(Address(rsi, 8), rcx); - __ movl(Address(rsi,12), rdx); - - // - // Extended cpuid(0x80000001) - // - __ bind(ext_cpuid1); - __ movl(rax, 0x80000001); - __ cpuid(); - __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset()))); - __ movl(Address(rsi, 0), rax); - __ movl(Address(rsi, 4), rbx); - __ movl(Address(rsi, 8), rcx); - __ movl(Address(rsi,12), rdx); - - // - // return - // - __ bind(done); - __ pop(rsi); - __ pop(rbx); - __ pop(rbp); - __ ret(0); - -# undef __ - - return start; - }; -}; - - -void VM_Version::get_processor_features() { - - _logical_processors_per_package = 1; - // Get raw processor info - getPsrInfo_stub(&_cpuid_info); - assert_is_initialized(); - _cpu = extended_cpu_family(); - _model = extended_cpu_model(); - _stepping = cpu_stepping(); - _cpuFeatures = feature_flags(); - // Logical processors are only available on P4s and above, - // and only if hyperthreading is available. - _logical_processors_per_package = logical_processor_count(); - _supports_cx8 = supports_cmpxchg8(); - // OS should support SSE for x64 and hardware should support at least SSE2. - if (!VM_Version::supports_sse2()) { - vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported"); - } - if (UseSSE < 4) { - _cpuFeatures &= ~CPU_SSE4_1; - _cpuFeatures &= ~CPU_SSE4_2; - } - if (UseSSE < 3) { - _cpuFeatures &= ~CPU_SSE3; - _cpuFeatures &= ~CPU_SSSE3; - _cpuFeatures &= ~CPU_SSE4A; - } - if (UseSSE < 2) - _cpuFeatures &= ~CPU_SSE2; - if (UseSSE < 1) - _cpuFeatures &= ~CPU_SSE; - - if (logical_processors_per_package() == 1) { - // HT processor could be installed on a system which doesn't support HT. - _cpuFeatures &= ~CPU_HT; - } - - char buf[256]; - jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", - cores_per_cpu(), threads_per_core(), - cpu_family(), _model, _stepping, - (supports_cmov() ? ", cmov" : ""), - (supports_cmpxchg8() ? ", cx8" : ""), - (supports_fxsr() ? ", fxsr" : ""), - (supports_mmx() ? ", mmx" : ""), - (supports_sse() ? ", sse" : ""), - (supports_sse2() ? ", sse2" : ""), - (supports_sse3() ? ", sse3" : ""), - (supports_ssse3()? ", ssse3": ""), - (supports_sse4_1() ? ", sse4.1" : ""), - (supports_sse4_2() ? ", sse4.2" : ""), - (supports_mmx_ext() ? ", mmxext" : ""), - (supports_3dnow() ? ", 3dnow" : ""), - (supports_3dnow2() ? ", 3dnowext" : ""), - (supports_sse4a() ? ", sse4a": ""), - (supports_ht() ? ", ht": "")); - _features_str = strdup(buf); - - // UseSSE is set to the smaller of what hardware supports and what - // the command line requires. I.e., you cannot set UseSSE to 2 on - // older Pentiums which do not support it. - if( UseSSE > 4 ) UseSSE=4; - if( UseSSE < 0 ) UseSSE=0; - if( !supports_sse4_1() ) // Drop to 3 if no SSE4 support - UseSSE = MIN2((intx)3,UseSSE); - if( !supports_sse3() ) // Drop to 2 if no SSE3 support - UseSSE = MIN2((intx)2,UseSSE); - if( !supports_sse2() ) // Drop to 1 if no SSE2 support - UseSSE = MIN2((intx)1,UseSSE); - if( !supports_sse () ) // Drop to 0 if no SSE support - UseSSE = 0; - - // On new cpus instructions which update whole XMM register should be used - // to prevent partial register stall due to dependencies on high half. - // - // UseXmmLoadAndClearUpper == true --> movsd(xmm, mem) - // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem) - // UseXmmRegToRegMoveAll == true --> movaps(xmm, xmm), movapd(xmm, xmm). - // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm), movsd(xmm, xmm). - - if( is_amd() ) { // AMD cpus specific settings - if( FLAG_IS_DEFAULT(UseAddressNop) ) { - // Use it on all AMD cpus starting from Opteron (don't need - // a cpu check since only Opteron and new cpus support 64-bits mode). - UseAddressNop = true; - } - if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) { - if( supports_sse4a() ) { - UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron - } else { - UseXmmLoadAndClearUpper = false; - } - } - if( FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll) ) { - if( supports_sse4a() ) { - UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h' - } else { - UseXmmRegToRegMoveAll = false; - } - } - if( FLAG_IS_DEFAULT(UseXmmI2F) ) { - if( supports_sse4a() ) { - UseXmmI2F = true; - } else { - UseXmmI2F = false; - } - } - if( FLAG_IS_DEFAULT(UseXmmI2D) ) { - if( supports_sse4a() ) { - UseXmmI2D = true; - } else { - UseXmmI2D = false; - } - } - } - - if( is_intel() ) { // Intel cpus specific settings - if( FLAG_IS_DEFAULT(UseStoreImmI16) ) { - UseStoreImmI16 = false; // don't use it on Intel cpus - } - if( FLAG_IS_DEFAULT(UseAddressNop) ) { - // Use it on all Intel cpus starting from PentiumPro - // (don't need a cpu check since only new cpus support 64-bits mode). - UseAddressNop = true; - } - if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) { - UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus - } - if( FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll) ) { - if( supports_sse3() ) { - UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus - } else { - UseXmmRegToRegMoveAll = false; - } - } - if( cpu_family() == 6 && supports_sse3() ) { // New Intel cpus -#ifdef COMPILER2 - if( FLAG_IS_DEFAULT(MaxLoopPad) ) { - // For new Intel cpus do the next optimization: - // don't align the beginning of a loop if there are enough instructions - // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp) - // in current fetch line (OptoLoopAlignment) or the padding - // is big (> MaxLoopPad). - // Set MaxLoopPad to 11 for new Intel cpus to reduce number of - // generated NOP instructions. 11 is the largest size of one - // address NOP instruction '0F 1F' (see Assembler::nop(i)). - MaxLoopPad = 11; - } -#endif // COMPILER2 - if( FLAG_IS_DEFAULT(UseXMMForArrayCopy) ) { - UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus - } - if( supports_sse4_2() && supports_ht() ) { // Newest Intel cpus - if( FLAG_IS_DEFAULT(UseUnalignedLoadStores) && UseXMMForArrayCopy ) { - UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus - } - } - } - } - - assert(0 <= ReadPrefetchInstr && ReadPrefetchInstr <= 3, "invalid value"); - assert(0 <= AllocatePrefetchInstr && AllocatePrefetchInstr <= 3, "invalid value"); - - // set valid Prefetch instruction - if( ReadPrefetchInstr < 0 ) ReadPrefetchInstr = 0; - if( ReadPrefetchInstr > 3 ) ReadPrefetchInstr = 3; - if( ReadPrefetchInstr == 3 && !supports_3dnow() ) ReadPrefetchInstr = 0; - - if( AllocatePrefetchInstr < 0 ) AllocatePrefetchInstr = 0; - if( AllocatePrefetchInstr > 3 ) AllocatePrefetchInstr = 3; - if( AllocatePrefetchInstr == 3 && !supports_3dnow() ) AllocatePrefetchInstr=0; - - // Allocation prefetch settings - intx cache_line_size = L1_data_cache_line_size(); - if( cache_line_size > AllocatePrefetchStepSize ) - AllocatePrefetchStepSize = cache_line_size; - if( FLAG_IS_DEFAULT(AllocatePrefetchLines) ) - AllocatePrefetchLines = 3; // Optimistic value - assert(AllocatePrefetchLines > 0, "invalid value"); - if( AllocatePrefetchLines < 1 ) // set valid value in product VM - AllocatePrefetchLines = 1; // Conservative value - - AllocatePrefetchDistance = allocate_prefetch_distance(); - AllocatePrefetchStyle = allocate_prefetch_style(); - - if( AllocatePrefetchStyle == 2 && is_intel() && - cpu_family() == 6 && supports_sse3() ) { // watermark prefetching on Core - AllocatePrefetchDistance = 384; - } - assert(AllocatePrefetchDistance % AllocatePrefetchStepSize == 0, "invalid value"); - - // Prefetch settings - PrefetchCopyIntervalInBytes = prefetch_copy_interval_in_bytes(); - PrefetchScanIntervalInBytes = prefetch_scan_interval_in_bytes(); - PrefetchFieldsAhead = prefetch_fields_ahead(); - -#ifndef PRODUCT - if (PrintMiscellaneous && Verbose) { - tty->print_cr("Logical CPUs per core: %u", - logical_processors_per_package()); - tty->print_cr("UseSSE=%d",UseSSE); - tty->print("Allocation: "); - if (AllocatePrefetchStyle <= 0) { - tty->print_cr("no prefetching"); - } else { - if (AllocatePrefetchInstr == 0) { - tty->print("PREFETCHNTA"); - } else if (AllocatePrefetchInstr == 1) { - tty->print("PREFETCHT0"); - } else if (AllocatePrefetchInstr == 2) { - tty->print("PREFETCHT2"); - } else if (AllocatePrefetchInstr == 3) { - tty->print("PREFETCHW"); - } - if (AllocatePrefetchLines > 1) { - tty->print_cr(" %d, %d lines with step %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize); - } else { - tty->print_cr(" %d, one line", AllocatePrefetchDistance); - } - } - if (PrefetchCopyIntervalInBytes > 0) { - tty->print_cr("PrefetchCopyIntervalInBytes %d", PrefetchCopyIntervalInBytes); - } - if (PrefetchScanIntervalInBytes > 0) { - tty->print_cr("PrefetchScanIntervalInBytes %d", PrefetchScanIntervalInBytes); - } - if (PrefetchFieldsAhead > 0) { - tty->print_cr("PrefetchFieldsAhead %d", PrefetchFieldsAhead); - } - } -#endif // !PRODUCT -} - -void VM_Version::initialize() { - ResourceMark rm; - // Making this stub must be FIRST use of assembler - - stub_blob = BufferBlob::create("getPsrInfo_stub", stub_size); - if (stub_blob == NULL) { - vm_exit_during_initialization("Unable to allocate getPsrInfo_stub"); - } - CodeBuffer c(stub_blob->instructions_begin(), - stub_blob->instructions_size()); - VM_Version_StubGenerator g(&c); - getPsrInfo_stub = CAST_TO_FN_PTR(getPsrInfo_stub_t, - g.generate_getPsrInfo()); - - get_processor_features(); -} diff -r 1b68c738c0d9 -r 7898caac2071 src/cpu/x86/vm/vm_version_x86_64.hpp --- a/src/cpu/x86/vm/vm_version_x86_64.hpp Sun Feb 22 17:21:13 2009 -0800 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,449 +0,0 @@ -/* - * Copyright 2003-2008 Sun Microsystems, Inc. All Rights Reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. - * - */ - -class VM_Version : public Abstract_VM_Version { -public: - // cpuid result register layouts. These are all unions of a uint32_t - // (in case anyone wants access to the register as a whole) and a bitfield. - - union StdCpuid1Eax { - uint32_t value; - struct { - uint32_t stepping : 4, - model : 4, - family : 4, - proc_type : 2, - : 2, - ext_model : 4, - ext_family : 8, - : 4; - } bits; - }; - - union StdCpuid1Ebx { // example, unused - uint32_t value; - struct { - uint32_t brand_id : 8, - clflush_size : 8, - threads_per_cpu : 8, - apic_id : 8; - } bits; - }; - - union StdCpuid1Ecx { - uint32_t value; - struct { - uint32_t sse3 : 1, - : 2, - monitor : 1, - : 1, - vmx : 1, - : 1, - est : 1, - : 1, - ssse3 : 1, - cid : 1, - : 2, - cmpxchg16: 1, - : 4, - dca : 1, - sse4_1 : 1, - sse4_2 : 1, - : 11; - } bits; - }; - - union StdCpuid1Edx { - uint32_t value; - struct { - uint32_t : 4, - tsc : 1, - : 3, - cmpxchg8 : 1, - : 6, - cmov : 1, - : 7, - mmx : 1, - fxsr : 1, - sse : 1, - sse2 : 1, - : 1, - ht : 1, - : 3; - } bits; - }; - - union DcpCpuid4Eax { - uint32_t value; - struct { - uint32_t cache_type : 5, - : 21, - cores_per_cpu : 6; - } bits; - }; - - union DcpCpuid4Ebx { - uint32_t value; - struct { - uint32_t L1_line_size : 12, - partitions : 10, - associativity : 10; - } bits; - }; - - union ExtCpuid1Edx { - uint32_t value; - struct { - uint32_t : 22, - mmx_amd : 1, - mmx : 1, - fxsr : 1, - : 4, - long_mode : 1, - tdnow2 : 1, - tdnow : 1; - } bits; - }; - - union ExtCpuid1Ecx { - uint32_t value; - struct { - uint32_t LahfSahf : 1, - CmpLegacy : 1, - : 4, - abm : 1, - sse4a : 1, - misalignsse : 1, - prefetchw : 1, - : 22; - } bits; - }; - - union ExtCpuid5Ex { - uint32_t value; - struct { - uint32_t L1_line_size : 8, - L1_tag_lines : 8, - L1_assoc : 8, - L1_size : 8; - } bits; - }; - - union ExtCpuid8Ecx { - uint32_t value; - struct { - uint32_t cores_per_cpu : 8, - : 24; - } bits; - }; - -protected: - static int _cpu; - static int _model; - static int _stepping; - static int _cpuFeatures; // features returned by the "cpuid" instruction - // 0 if this instruction is not available - static const char* _features_str; - - enum { - CPU_CX8 = (1 << 0), // next bits are from cpuid 1 (EDX) - CPU_CMOV = (1 << 1), - CPU_FXSR = (1 << 2), - CPU_HT = (1 << 3), - CPU_MMX = (1 << 4), - CPU_3DNOW= (1 << 5), - CPU_SSE = (1 << 6), - CPU_SSE2 = (1 << 7), - CPU_SSE3 = (1 << 8), - CPU_SSSE3= (1 << 9), - CPU_SSE4A= (1 <<10), - CPU_SSE4_1 = (1 << 11), - CPU_SSE4_2 = (1 << 12) - } cpuFeatureFlags; - - // cpuid information block. All info derived from executing cpuid with - // various function numbers is stored here. Intel and AMD info is - // merged in this block: accessor methods disentangle it. - // - // The info block is laid out in subblocks of 4 dwords corresponding to - // eax, ebx, ecx and edx, whether or not they contain anything useful. - struct CpuidInfo { - // cpuid function 0 - uint32_t std_max_function; - uint32_t std_vendor_name_0; - uint32_t std_vendor_name_1; - uint32_t std_vendor_name_2; - - // cpuid function 1 - StdCpuid1Eax std_cpuid1_eax; - StdCpuid1Ebx std_cpuid1_ebx; - StdCpuid1Ecx std_cpuid1_ecx; - StdCpuid1Edx std_cpuid1_edx; - - // cpuid function 4 (deterministic cache parameters) - DcpCpuid4Eax dcp_cpuid4_eax; - DcpCpuid4Ebx dcp_cpuid4_ebx; - uint32_t dcp_cpuid4_ecx; // unused currently - uint32_t dcp_cpuid4_edx; // unused currently - - // cpuid function 0x80000000 // example, unused - uint32_t ext_max_function; - uint32_t ext_vendor_name_0; - uint32_t ext_vendor_name_1; - uint32_t ext_vendor_name_2; - - // cpuid function 0x80000001 - uint32_t ext_cpuid1_eax; // reserved - uint32_t ext_cpuid1_ebx; // reserved - ExtCpuid1Ecx ext_cpuid1_ecx; - ExtCpuid1Edx ext_cpuid1_edx; - - // cpuid functions 0x80000002 thru 0x80000004: example, unused - uint32_t proc_name_0, proc_name_1, proc_name_2, proc_name_3; - uint32_t proc_name_4, proc_name_5, proc_name_6, proc_name_7; - uint32_t proc_name_8, proc_name_9, proc_name_10,proc_name_11; - - // cpuid function 0x80000005 //AMD L1, Intel reserved - uint32_t ext_cpuid5_eax; // unused currently - uint32_t ext_cpuid5_ebx; // reserved - ExtCpuid5Ex ext_cpuid5_ecx; // L1 data cache info (AMD) - ExtCpuid5Ex ext_cpuid5_edx; // L1 instruction cache info (AMD) - - // cpuid function 0x80000008 - uint32_t ext_cpuid8_eax; // unused currently - uint32_t ext_cpuid8_ebx; // reserved - ExtCpuid8Ecx ext_cpuid8_ecx; - uint32_t ext_cpuid8_edx; // reserved - }; - - // The actual cpuid info block - static CpuidInfo _cpuid_info; - - // Extractors and predicates - static uint32_t extended_cpu_family() { - uint32_t result = _cpuid_info.std_cpuid1_eax.bits.family; - result += _cpuid_info.std_cpuid1_eax.bits.ext_family; - return result; - } - static uint32_t extended_cpu_model() { - uint32_t result = _cpuid_info.std_cpuid1_eax.bits.model; - result |= _cpuid_info.std_cpuid1_eax.bits.ext_model << 4; - return result; - } - static uint32_t cpu_stepping() { - uint32_t result = _cpuid_info.std_cpuid1_eax.bits.stepping; - return result; - } - static uint logical_processor_count() { - uint result = threads_per_core(); - return result; - } - static uint32_t feature_flags() { - uint32_t result = 0; - if (_cpuid_info.std_cpuid1_edx.bits.cmpxchg8 != 0) - result |= CPU_CX8; - if (_cpuid_info.std_cpuid1_edx.bits.cmov != 0) - result |= CPU_CMOV; - if (_cpuid_info.std_cpuid1_edx.bits.fxsr != 0 || is_amd() && - _cpuid_info.ext_cpuid1_edx.bits.fxsr != 0) - result |= CPU_FXSR; - // HT flag is set for multi-core processors also. - if (threads_per_core() > 1) - result |= CPU_HT; - if (_cpuid_info.std_cpuid1_edx.bits.mmx != 0 || is_amd() && - _cpuid_info.ext_cpuid1_edx.bits.mmx != 0) - result |= CPU_MMX; - if (is_amd() && _cpuid_info.ext_cpuid1_edx.bits.tdnow != 0) - result |= CPU_3DNOW; - if (_cpuid_info.std_cpuid1_edx.bits.sse != 0) - result |= CPU_SSE; - if (_cpuid_info.std_cpuid1_edx.bits.sse2 != 0) - result |= CPU_SSE2; - if (_cpuid_info.std_cpuid1_ecx.bits.sse3 != 0) - result |= CPU_SSE3; - if (_cpuid_info.std_cpuid1_ecx.bits.ssse3 != 0) - result |= CPU_SSSE3; - if (is_amd() && _cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0) - result |= CPU_SSE4A; - if (_cpuid_info.std_cpuid1_ecx.bits.sse4_1 != 0) - result |= CPU_SSE4_1; - if (_cpuid_info.std_cpuid1_ecx.bits.sse4_2 != 0) - result |= CPU_SSE4_2; - return result; - } - - static void get_processor_features(); - -public: - // Offsets for cpuid asm stub - static ByteSize std_cpuid0_offset() { return byte_offset_of(CpuidInfo, std_max_function); } - static ByteSize std_cpuid1_offset() { return byte_offset_of(CpuidInfo, std_cpuid1_eax); } - static ByteSize dcp_cpuid4_offset() { return byte_offset_of(CpuidInfo, dcp_cpuid4_eax); } - static ByteSize ext_cpuid1_offset() { return byte_offset_of(CpuidInfo, ext_cpuid1_eax); } - static ByteSize ext_cpuid5_offset() { return byte_offset_of(CpuidInfo, ext_cpuid5_eax); } - static ByteSize ext_cpuid8_offset() { return byte_offset_of(CpuidInfo, ext_cpuid8_eax); } - - // Initialization - static void initialize(); - - // Asserts - static void assert_is_initialized() { - assert(_cpuid_info.std_cpuid1_eax.bits.family != 0, "VM_Version not initialized"); - } - - // - // Processor family: - // 3 - 386 - // 4 - 486 - // 5 - Pentium - // 6 - PentiumPro, Pentium II, Celeron, Xeon, Pentium III, Athlon, - // Pentium M, Core Solo, Core Duo, Core2 Duo - // family 6 model: 9, 13, 14, 15 - // 0x0f - Pentium 4, Opteron - // - // Note: The cpu family should be used to select between - // instruction sequences which are valid on all Intel - // processors. Use the feature test functions below to - // determine whether a particular instruction is supported. - // - static int cpu_family() { return _cpu;} - static bool is_P6() { return cpu_family() >= 6; } - - static bool is_amd() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x68747541; } // 'htuA' - static bool is_intel() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x756e6547; } // 'uneG' - - static uint cores_per_cpu() { - uint result = 1; - if (is_intel()) { - result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1); - } else if (is_amd()) { - result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1); - } - return result; - } - - static uint threads_per_core() { - uint result = 1; - if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) { - result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu / - cores_per_cpu(); - } - return result; - } - - static intx L1_data_cache_line_size() { - intx result = 0; - if (is_intel()) { - result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1); - } else if (is_amd()) { - result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size; - } - if (result < 32) // not defined ? - result = 32; // 32 bytes by default for other x64 - return result; - } - - // - // Feature identification - // - static bool supports_cpuid() { return _cpuFeatures != 0; } - static bool supports_cmpxchg8() { return (_cpuFeatures & CPU_CX8) != 0; } - static bool supports_cmov() { return (_cpuFeatures & CPU_CMOV) != 0; } - static bool supports_fxsr() { return (_cpuFeatures & CPU_FXSR) != 0; } - static bool supports_ht() { return (_cpuFeatures & CPU_HT) != 0; } - static bool supports_mmx() { return (_cpuFeatures & CPU_MMX) != 0; } - static bool supports_sse() { return (_cpuFeatures & CPU_SSE) != 0; } - static bool supports_sse2() { return (_cpuFeatures & CPU_SSE2) != 0; } - static bool supports_sse3() { return (_cpuFeatures & CPU_SSE3) != 0; } - static bool supports_ssse3() { return (_cpuFeatures & CPU_SSSE3)!= 0; } - static bool supports_sse4_1() { return (_cpuFeatures & CPU_SSE4_1) != 0; } - static bool supports_sse4_2() { return (_cpuFeatures & CPU_SSE4_2) != 0; } - // - // AMD features - // - static bool supports_3dnow() { return (_cpuFeatures & CPU_3DNOW) != 0; } - static bool supports_mmx_ext() { return is_amd() && _cpuid_info.ext_cpuid1_edx.bits.mmx_amd != 0; } - static bool supports_3dnow2() { return is_amd() && _cpuid_info.ext_cpuid1_edx.bits.tdnow2 != 0; } - static bool supports_sse4a() { return (_cpuFeatures & CPU_SSE4A) != 0; } - - static bool supports_compare_and_exchange() { return true; } - - static const char* cpu_features() { return _features_str; } - - static intx allocate_prefetch_distance() { - // This method should be called before allocate_prefetch_style(). - // - // Hardware prefetching (distance/size in bytes): - // Pentium 4 - 256 / 128 - // Opteron - 128 / 64 only when 2 sequential cache lines accessed - // Core - 128 / 64 - // - // Software prefetching (distance in bytes / instruction with best score): - // Pentium 4 - 512 / prefetchnta - // Opteron - 256 / prefetchnta - // Core - 256 / prefetchnta - // It will be used only when AllocatePrefetchStyle > 0 - - intx count = AllocatePrefetchDistance; - if (count < 0) { // default ? - if (is_amd()) { // AMD - count = 256; // Opteron - } else { // Intel - if (cpu_family() == 6) { - count = 256;// Pentium M, Core, Core2 - } else { - count = 512;// Pentium 4 - } - } - } - return count; - } - static intx allocate_prefetch_style() { - assert(AllocatePrefetchStyle >= 0, "AllocatePrefetchStyle should be positive"); - // Return 0 if AllocatePrefetchDistance was not defined. - return AllocatePrefetchDistance > 0 ? AllocatePrefetchStyle : 0; - } - - // Prefetch interval for gc copy/scan == 9 dcache lines. Derived from - // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap. - // Tested intervals from 128 to 2048 in increments of 64 == one cache line. - // 256 bytes (4 dcache lines) was the nearest runner-up to 576. - - // gc copy/scan is disabled if prefetchw isn't supported, because - // Prefetch::write emits an inlined prefetchw on Linux. - // Do not use the 3dnow prefetchw instruction. It isn't supported on em64t. - // The used prefetcht0 instruction works for both amd64 and em64t. - static intx prefetch_copy_interval_in_bytes() { - intx interval = PrefetchCopyIntervalInBytes; - return interval >= 0 ? interval : 576; - } - static intx prefetch_scan_interval_in_bytes() { - intx interval = PrefetchScanIntervalInBytes; - return interval >= 0 ? interval : 576; - } - static intx prefetch_fields_ahead() { - intx count = PrefetchFieldsAhead; - return count >= 0 ? count : 1; - } -}; diff -r 1b68c738c0d9 -r 7898caac2071 src/os_cpu/solaris_x86/vm/os_solaris_x86.cpp --- a/src/os_cpu/solaris_x86/vm/os_solaris_x86.cpp Sun Feb 22 17:21:13 2009 -0800 +++ b/src/os_cpu/solaris_x86/vm/os_solaris_x86.cpp Thu Feb 26 14:25:55 2009 -0800 @@ -1,5 +1,5 @@ /* - * Copyright 1999-2008 Sun Microsystems, Inc. All Rights Reserved. + * Copyright 1999-2009 Sun Microsystems, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -299,14 +299,18 @@ } +#endif // AMD64 + bool os::supports_sse() { +#ifdef AMD64 + return true; +#else if (sse_status == SSE_UNKNOWN) check_for_sse_support(); return sse_status == SSE_SUPPORTED; +#endif // AMD64 } -#endif // AMD64 - bool os::is_allocatable(size_t bytes) { #ifdef AMD64 return true; diff -r 1b68c738c0d9 -r 7898caac2071 src/os_cpu/solaris_x86/vm/os_solaris_x86.hpp --- a/src/os_cpu/solaris_x86/vm/os_solaris_x86.hpp Sun Feb 22 17:21:13 2009 -0800 +++ b/src/os_cpu/solaris_x86/vm/os_solaris_x86.hpp Thu Feb 26 14:25:55 2009 -0800 @@ -1,5 +1,5 @@ /* - * Copyright 1999-2004 Sun Microsystems, Inc. All Rights Reserved. + * Copyright 1999-2009 Sun Microsystems, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -41,8 +41,9 @@ static void fence_bootstrap (); static void setup_fpu(); +#endif // AMD64 + static bool supports_sse(); -#endif // AMD64 static bool is_allocatable(size_t bytes); diff -r 1b68c738c0d9 -r 7898caac2071 src/share/vm/includeDB_core --- a/src/share/vm/includeDB_core Sun Feb 22 17:21:13 2009 -0800 +++ b/src/share/vm/includeDB_core Thu Feb 26 14:25:55 2009 -0800 @@ -1,5 +1,5 @@ // -// Copyright 1997-2008 Sun Microsystems, Inc. All Rights Reserved. +// Copyright 1997-2009 Sun Microsystems, Inc. All Rights Reserved. // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. // // This code is free software; you can redistribute it and/or modify it @@ -176,7 +176,7 @@ arguments.cpp oop.inline.hpp arguments.cpp os_.inline.hpp arguments.cpp universe.inline.hpp -arguments.cpp vm_version_.hpp +arguments.cpp vm_version_.hpp arguments.hpp java.hpp arguments.hpp perfData.hpp @@ -241,7 +241,7 @@ assembler.hpp register_.hpp assembler.hpp relocInfo.hpp assembler.hpp top.hpp -assembler.hpp vm_version_.hpp +assembler.hpp vm_version_.hpp assembler.inline.hpp assembler.hpp assembler.inline.hpp codeBuffer.hpp @@ -280,7 +280,7 @@ atomic_.inline.hpp atomic.hpp atomic_.inline.hpp os.hpp -atomic_.inline.hpp vm_version_.hpp +atomic_.inline.hpp vm_version_.hpp // attachListener is jck optional, put cpp deps in includeDB_features @@ -2176,7 +2176,7 @@ interpreterRuntime.cpp threadCritical.hpp interpreterRuntime.cpp universe.inline.hpp interpreterRuntime.cpp vmSymbols.hpp -interpreterRuntime.cpp vm_version_.hpp +interpreterRuntime.cpp vm_version_.hpp interpreterRuntime.hpp bytecode.hpp interpreterRuntime.hpp frame.inline.hpp @@ -2279,7 +2279,7 @@ java.cpp universe.hpp java.cpp vmError.hpp java.cpp vm_operations.hpp -java.cpp vm_version_.hpp +java.cpp vm_version_.hpp java.cpp vtune.hpp java.hpp os.hpp @@ -3485,7 +3485,7 @@ register_.cpp register_.hpp register_.hpp register.hpp -register_.hpp vm_version_.hpp +register_.hpp vm_version_.hpp registerMap.hpp globalDefinitions.hpp registerMap.hpp register_.hpp @@ -3835,7 +3835,7 @@ statSampler.cpp statSampler.hpp statSampler.cpp systemDictionary.hpp statSampler.cpp vmSymbols.hpp -statSampler.cpp vm_version_.hpp +statSampler.cpp vm_version_.hpp statSampler.hpp perfData.hpp statSampler.hpp task.hpp @@ -4579,22 +4579,22 @@ vm_version.cpp arguments.hpp vm_version.cpp oop.inline.hpp vm_version.cpp universe.hpp -vm_version.cpp vm_version_.hpp +vm_version.cpp vm_version_.hpp vm_version.hpp allocation.hpp vm_version.hpp ostream.hpp -vm_version_.cpp assembler_.inline.hpp -vm_version_.cpp java.hpp -vm_version_.cpp os_.inline.hpp -vm_version_.cpp resourceArea.hpp -vm_version_.cpp stubCodeGenerator.hpp -vm_version_.cpp vm_version_.hpp - -vm_version_.hpp globals_extension.hpp -vm_version_.hpp vm_version.hpp - -vm_version_.cpp vm_version_.hpp +vm_version_.cpp assembler_.inline.hpp +vm_version_.cpp java.hpp +vm_version_.cpp os_.inline.hpp +vm_version_.cpp resourceArea.hpp +vm_version_.cpp stubCodeGenerator.hpp +vm_version_.cpp vm_version_.hpp + +vm_version_.hpp globals_extension.hpp +vm_version_.hpp vm_version.hpp + +vm_version_.cpp vm_version_.hpp vmreg.cpp assembler.hpp vmreg.cpp vmreg.hpp diff -r 1b68c738c0d9 -r 7898caac2071 src/share/vm/opto/escape.cpp --- a/src/share/vm/opto/escape.cpp Sun Feb 22 17:21:13 2009 -0800 +++ b/src/share/vm/opto/escape.cpp Thu Feb 26 14:25:55 2009 -0800 @@ -756,6 +756,16 @@ } else { break; } + } else if (result->Opcode() == Op_SCMemProj) { + assert(result->in(0)->is_LoadStore(), "sanity"); + const Type *at = phase->type(result->in(0)->in(MemNode::Address)); + if (at != Type::TOP) { + assert (at->isa_ptr() != NULL, "pointer type required."); + int idx = C->get_alias_index(at->is_ptr()); + assert(idx != alias_idx, "Object is not scalar replaceable if a LoadStore node access its field"); + break; + } + result = result->in(0)->in(MemNode::Memory); } } if (result->is_Phi()) { diff -r 1b68c738c0d9 -r 7898caac2071 src/share/vm/opto/macro.cpp --- a/src/share/vm/opto/macro.cpp Sun Feb 22 17:21:13 2009 -0800 +++ b/src/share/vm/opto/macro.cpp Thu Feb 26 14:25:55 2009 -0800 @@ -250,6 +250,15 @@ assert(adr_idx == Compile::AliasIdxRaw, "address must match or be raw"); } mem = mem->in(MemNode::Memory); + } else if (mem->Opcode() == Op_SCMemProj) { + assert(mem->in(0)->is_LoadStore(), "sanity"); + const TypePtr* atype = mem->in(0)->in(MemNode::Address)->bottom_type()->is_ptr(); + int adr_idx = Compile::current()->get_alias_index(atype); + if (adr_idx == alias_idx) { + assert(false, "Object is not scalar replaceable if a LoadStore node access its field"); + return NULL; + } + mem = mem->in(0)->in(MemNode::Memory); } else { return mem; } @@ -329,8 +338,15 @@ return NULL; } values.at_put(j, val); + } else if (val->Opcode() == Op_SCMemProj) { + assert(val->in(0)->is_LoadStore(), "sanity"); + assert(false, "Object is not scalar replaceable if a LoadStore node access its field"); + return NULL; } else { +#ifdef ASSERT + val->dump(); assert(false, "unknown node on this path"); +#endif return NULL; // unknown node on this path } } diff -r 1b68c738c0d9 -r 7898caac2071 src/share/vm/opto/matcher.cpp --- a/src/share/vm/opto/matcher.cpp Sun Feb 22 17:21:13 2009 -0800 +++ b/src/share/vm/opto/matcher.cpp Thu Feb 26 14:25:55 2009 -0800 @@ -1707,11 +1707,18 @@ void Matcher::find_shared( Node *n ) { // Allocate stack of size C->unique() * 2 to avoid frequent realloc MStack mstack(C->unique() * 2); + // Mark nodes as address_visited if they are inputs to an address expression + VectorSet address_visited(Thread::current()->resource_area()); mstack.push(n, Visit); // Don't need to pre-visit root node while (mstack.is_nonempty()) { n = mstack.node(); // Leave node on stack Node_State nstate = mstack.state(); + uint nop = n->Opcode(); if (nstate == Pre_Visit) { + if (address_visited.test(n->_idx)) { // Visited in address already? + // Flag as visited and shared now. + set_visited(n); + } if (is_visited(n)) { // Visited already? // Node is shared and has no reason to clone. Flag it as shared. // This causes it to match into a register for the sharing. @@ -1726,7 +1733,7 @@ set_visited(n); // Flag as visited now bool mem_op = false; - switch( n->Opcode() ) { // Handle some opcodes special + switch( nop ) { // Handle some opcodes special case Op_Phi: // Treat Phis as shared roots case Op_Parm: case Op_Proj: // All handled specially during matching @@ -1887,34 +1894,51 @@ // to have a single use so force sharing here. set_shared(m->in(AddPNode::Base)->in(1)); } + + // Some inputs for address expression are not put on stack + // to avoid marking them as shared and forcing them into register + // if they are used only in address expressions. + // But they should be marked as shared if there are other uses + // besides address expressions. + Node *off = m->in(AddPNode::Offset); - if( off->is_Con() ) { - set_visited(m); // Flag as visited now + if( off->is_Con() && + // When there are other uses besides address expressions + // put it on stack and mark as shared. + !is_visited(m) ) { + address_visited.test_set(m->_idx); // Flag as address_visited Node *adr = m->in(AddPNode::Address); // Intel, ARM and friends can handle 2 adds in addressing mode if( clone_shift_expressions && adr->is_AddP() && // AtomicAdd is not an addressing expression. // Cheap to find it by looking for screwy base. - !adr->in(AddPNode::Base)->is_top() ) { - set_visited(adr); // Flag as visited now + !adr->in(AddPNode::Base)->is_top() && + // Are there other uses besides address expressions? + !is_visited(adr) ) { + address_visited.set(adr->_idx); // Flag as address_visited Node *shift = adr->in(AddPNode::Offset); // Check for shift by small constant as well if( shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() && - shift->in(2)->get_int() <= 3 ) { - set_visited(shift); // Flag as visited now + shift->in(2)->get_int() <= 3 && + // Are there other uses besides address expressions? + !is_visited(shift) ) { + address_visited.set(shift->_idx); // Flag as address_visited mstack.push(shift->in(2), Visit); + Node *conv = shift->in(1); #ifdef _LP64 // Allow Matcher to match the rule which bypass // ConvI2L operation for an array index on LP64 // if the index value is positive. - if( shift->in(1)->Opcode() == Op_ConvI2L && - shift->in(1)->as_Type()->type()->is_long()->_lo >= 0 ) { - set_visited(shift->in(1)); // Flag as visited now - mstack.push(shift->in(1)->in(1), Pre_Visit); + if( conv->Opcode() == Op_ConvI2L && + conv->as_Type()->type()->is_long()->_lo >= 0 && + // Are there other uses besides address expressions? + !is_visited(conv) ) { + address_visited.set(conv->_idx); // Flag as address_visited + mstack.push(conv->in(1), Pre_Visit); } else #endif - mstack.push(shift->in(1), Pre_Visit); + mstack.push(conv, Pre_Visit); } else { mstack.push(shift, Pre_Visit); } diff -r 1b68c738c0d9 -r 7898caac2071 src/share/vm/opto/memnode.cpp --- a/src/share/vm/opto/memnode.cpp Sun Feb 22 17:21:13 2009 -0800 +++ b/src/share/vm/opto/memnode.cpp Thu Feb 26 14:25:55 2009 -0800 @@ -1066,11 +1066,11 @@ break; } } - LoadNode* load = NULL; - if (allocation != NULL && base->in(load_index)->is_Load()) { - load = base->in(load_index)->as_Load(); - } - if (load != NULL && in(Memory)->is_Phi() && in(Memory)->in(0) == base->in(0)) { + bool has_load = ( allocation != NULL && + (base->in(load_index)->is_Load() || + base->in(load_index)->is_DecodeN() && + base->in(load_index)->in(1)->is_Load()) ); + if (has_load && in(Memory)->is_Phi() && in(Memory)->in(0) == base->in(0)) { // Push the loads from the phi that comes from valueOf up // through it to allow elimination of the loads and the recovery // of the original value. @@ -1106,11 +1106,20 @@ result->set_req(load_index, in2); return result; } - } else if (base->is_Load()) { + } else if (base->is_Load() || + base->is_DecodeN() && base->in(1)->is_Load()) { + if (base->is_DecodeN()) { + // Get LoadN node which loads cached Integer object + base = base->in(1); + } // Eliminate the load of Integer.value for integers from the cache // array by deriving the value from the index into the array. // Capture the offset of the load and then reverse the computation. Node* load_base = base->in(Address)->in(AddPNode::Base); + if (load_base->is_DecodeN()) { + // Get LoadN node which loads IntegerCache.cache field + load_base = load_base->in(1); + } if (load_base != NULL) { Compile::AliasType* atp = phase->C->alias_type(load_base->adr_type()); intptr_t cache_offset;