Mercurial > hg > truffle
comparison src/cpu/x86/vm/vm_version_x86.cpp @ 4759:127b3692c168
7116452: Add support for AVX instructions
Summary: Added support for AVX extension to the x86 instruction set.
Reviewed-by: never
author | kvn |
---|---|
date | Wed, 14 Dec 2011 14:54:38 -0800 |
parents | f08d439fab8c |
children | 22cee0ee8927 |
comparison
equal
deleted
inserted
replaced
4758:d725f0affb1a | 4759:127b3692c168 |
---|---|
48 int VM_Version::_cpuFeatures; | 48 int VM_Version::_cpuFeatures; |
49 const char* VM_Version::_features_str = ""; | 49 const char* VM_Version::_features_str = ""; |
50 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, }; | 50 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, }; |
51 | 51 |
52 static BufferBlob* stub_blob; | 52 static BufferBlob* stub_blob; |
53 static const int stub_size = 400; | 53 static const int stub_size = 500; |
54 | 54 |
55 extern "C" { | 55 extern "C" { |
56 typedef void (*getPsrInfo_stub_t)(void*); | 56 typedef void (*getPsrInfo_stub_t)(void*); |
57 } | 57 } |
58 static getPsrInfo_stub_t getPsrInfo_stub = NULL; | 58 static getPsrInfo_stub_t getPsrInfo_stub = NULL; |
71 const int CPU_FAMILY_SHIFT = 8; | 71 const int CPU_FAMILY_SHIFT = 8; |
72 const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT); | 72 const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT); |
73 const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT); | 73 const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT); |
74 | 74 |
75 Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4; | 75 Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4; |
76 Label ext_cpuid1, ext_cpuid5, done; | 76 Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, done; |
77 | 77 |
78 StubCodeMark mark(this, "VM_Version", "getPsrInfo_stub"); | 78 StubCodeMark mark(this, "VM_Version", "getPsrInfo_stub"); |
79 # define __ _masm-> | 79 # define __ _masm-> |
80 | 80 |
81 address start = __ pc(); | 81 address start = __ pc(); |
227 __ movl(Address(rsi, 0), rax); | 227 __ movl(Address(rsi, 0), rax); |
228 __ movl(Address(rsi, 4), rbx); | 228 __ movl(Address(rsi, 4), rbx); |
229 __ movl(Address(rsi, 8), rcx); | 229 __ movl(Address(rsi, 8), rcx); |
230 __ movl(Address(rsi,12), rdx); | 230 __ movl(Address(rsi,12), rdx); |
231 | 231 |
232 // | |
233 // Check if OS has enabled XGETBV instruction to access XCR0 | |
234 // (OSXSAVE feature flag) and CPU supports AVX | |
235 // | |
236 __ andl(rcx, 0x18000000); | |
237 __ cmpl(rcx, 0x18000000); | |
238 __ jccb(Assembler::notEqual, sef_cpuid); | |
239 | |
240 // | |
241 // XCR0, XFEATURE_ENABLED_MASK register | |
242 // | |
243 __ xorl(rcx, rcx); // zero for XCR0 register | |
244 __ xgetbv(); | |
245 __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); | |
246 __ movl(Address(rsi, 0), rax); | |
247 __ movl(Address(rsi, 4), rdx); | |
248 | |
249 // | |
250 // cpuid(0x7) Structured Extended Features | |
251 // | |
252 __ bind(sef_cpuid); | |
253 __ movl(rax, 7); | |
254 __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported? | |
255 __ jccb(Assembler::greater, ext_cpuid); | |
256 | |
257 __ xorl(rcx, rcx); | |
258 __ cpuid(); | |
259 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset()))); | |
260 __ movl(Address(rsi, 0), rax); | |
261 __ movl(Address(rsi, 4), rbx); | |
262 | |
263 // | |
264 // Extended cpuid(0x80000000) | |
265 // | |
266 __ bind(ext_cpuid); | |
232 __ movl(rax, 0x80000000); | 267 __ movl(rax, 0x80000000); |
233 __ cpuid(); | 268 __ cpuid(); |
234 __ cmpl(rax, 0x80000000); // Is cpuid(0x80000001) supported? | 269 __ cmpl(rax, 0x80000000); // Is cpuid(0x80000001) supported? |
235 __ jcc(Assembler::belowEqual, done); | 270 __ jcc(Assembler::belowEqual, done); |
236 __ cmpl(rax, 0x80000004); // Is cpuid(0x80000005) supported? | 271 __ cmpl(rax, 0x80000004); // Is cpuid(0x80000005) supported? |
357 _cpuFeatures &= ~CPU_SSE2; | 392 _cpuFeatures &= ~CPU_SSE2; |
358 | 393 |
359 if (UseSSE < 1) | 394 if (UseSSE < 1) |
360 _cpuFeatures &= ~CPU_SSE; | 395 _cpuFeatures &= ~CPU_SSE; |
361 | 396 |
397 if (UseAVX < 2) | |
398 _cpuFeatures &= ~CPU_AVX2; | |
399 | |
400 if (UseAVX < 1) | |
401 _cpuFeatures &= ~CPU_AVX; | |
402 | |
362 if (logical_processors_per_package() == 1) { | 403 if (logical_processors_per_package() == 1) { |
363 // HT processor could be installed on a system which doesn't support HT. | 404 // HT processor could be installed on a system which doesn't support HT. |
364 _cpuFeatures &= ~CPU_HT; | 405 _cpuFeatures &= ~CPU_HT; |
365 } | 406 } |
366 | 407 |
367 char buf[256]; | 408 char buf[256]; |
368 jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", | 409 jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", |
369 cores_per_cpu(), threads_per_core(), | 410 cores_per_cpu(), threads_per_core(), |
370 cpu_family(), _model, _stepping, | 411 cpu_family(), _model, _stepping, |
371 (supports_cmov() ? ", cmov" : ""), | 412 (supports_cmov() ? ", cmov" : ""), |
372 (supports_cmpxchg8() ? ", cx8" : ""), | 413 (supports_cmpxchg8() ? ", cx8" : ""), |
373 (supports_fxsr() ? ", fxsr" : ""), | 414 (supports_fxsr() ? ", fxsr" : ""), |
377 (supports_sse3() ? ", sse3" : ""), | 418 (supports_sse3() ? ", sse3" : ""), |
378 (supports_ssse3()? ", ssse3": ""), | 419 (supports_ssse3()? ", ssse3": ""), |
379 (supports_sse4_1() ? ", sse4.1" : ""), | 420 (supports_sse4_1() ? ", sse4.1" : ""), |
380 (supports_sse4_2() ? ", sse4.2" : ""), | 421 (supports_sse4_2() ? ", sse4.2" : ""), |
381 (supports_popcnt() ? ", popcnt" : ""), | 422 (supports_popcnt() ? ", popcnt" : ""), |
423 (supports_avx() ? ", avx" : ""), | |
424 (supports_avx2() ? ", avx2" : ""), | |
382 (supports_mmx_ext() ? ", mmxext" : ""), | 425 (supports_mmx_ext() ? ", mmxext" : ""), |
383 (supports_3dnow_prefetch() ? ", 3dnowpref" : ""), | 426 (supports_3dnow_prefetch() ? ", 3dnowpref" : ""), |
384 (supports_lzcnt() ? ", lzcnt": ""), | 427 (supports_lzcnt() ? ", lzcnt": ""), |
385 (supports_sse4a() ? ", sse4a": ""), | 428 (supports_sse4a() ? ", sse4a": ""), |
386 (supports_ht() ? ", ht": "")); | 429 (supports_ht() ? ", ht": "")); |
387 _features_str = strdup(buf); | 430 _features_str = strdup(buf); |
388 | 431 |
389 // UseSSE is set to the smaller of what hardware supports and what | 432 // UseSSE is set to the smaller of what hardware supports and what |
390 // the command line requires. I.e., you cannot set UseSSE to 2 on | 433 // the command line requires. I.e., you cannot set UseSSE to 2 on |
391 // older Pentiums which do not support it. | 434 // older Pentiums which do not support it. |
392 if( UseSSE > 4 ) UseSSE=4; | 435 if (UseSSE > 4) UseSSE=4; |
393 if( UseSSE < 0 ) UseSSE=0; | 436 if (UseSSE < 0) UseSSE=0; |
394 if( !supports_sse4_1() ) // Drop to 3 if no SSE4 support | 437 if (!supports_sse4_1()) // Drop to 3 if no SSE4 support |
395 UseSSE = MIN2((intx)3,UseSSE); | 438 UseSSE = MIN2((intx)3,UseSSE); |
396 if( !supports_sse3() ) // Drop to 2 if no SSE3 support | 439 if (!supports_sse3()) // Drop to 2 if no SSE3 support |
397 UseSSE = MIN2((intx)2,UseSSE); | 440 UseSSE = MIN2((intx)2,UseSSE); |
398 if( !supports_sse2() ) // Drop to 1 if no SSE2 support | 441 if (!supports_sse2()) // Drop to 1 if no SSE2 support |
399 UseSSE = MIN2((intx)1,UseSSE); | 442 UseSSE = MIN2((intx)1,UseSSE); |
400 if( !supports_sse () ) // Drop to 0 if no SSE support | 443 if (!supports_sse ()) // Drop to 0 if no SSE support |
401 UseSSE = 0; | 444 UseSSE = 0; |
445 | |
446 if (UseAVX > 2) UseAVX=2; | |
447 if (UseAVX < 0) UseAVX=0; | |
448 if (!supports_avx2()) // Drop to 1 if no AVX2 support | |
449 UseAVX = MIN2((intx)1,UseAVX); | |
450 if (!supports_avx ()) // Drop to 0 if no AVX support | |
451 UseAVX = 0; | |
402 | 452 |
403 // On new cpus instructions which update whole XMM register should be used | 453 // On new cpus instructions which update whole XMM register should be used |
404 // to prevent partial register stall due to dependencies on high half. | 454 // to prevent partial register stall due to dependencies on high half. |
405 // | 455 // |
406 // UseXmmLoadAndClearUpper == true --> movsd(xmm, mem) | 456 // UseXmmLoadAndClearUpper == true --> movsd(xmm, mem) |
532 // Use population count instruction if available. | 582 // Use population count instruction if available. |
533 if (supports_popcnt()) { | 583 if (supports_popcnt()) { |
534 if (FLAG_IS_DEFAULT(UsePopCountInstruction)) { | 584 if (FLAG_IS_DEFAULT(UsePopCountInstruction)) { |
535 UsePopCountInstruction = true; | 585 UsePopCountInstruction = true; |
536 } | 586 } |
587 } else if (UsePopCountInstruction) { | |
588 warning("POPCNT instruction is not available on this CPU"); | |
589 FLAG_SET_DEFAULT(UsePopCountInstruction, false); | |
537 } | 590 } |
538 | 591 |
539 #ifdef COMPILER2 | 592 #ifdef COMPILER2 |
540 if (UseFPUForSpilling) { | 593 if (UseFPUForSpilling) { |
541 if (UseSSE < 2) { | 594 if (UseSSE < 2) { |
603 | 656 |
604 #ifndef PRODUCT | 657 #ifndef PRODUCT |
605 if (PrintMiscellaneous && Verbose) { | 658 if (PrintMiscellaneous && Verbose) { |
606 tty->print_cr("Logical CPUs per core: %u", | 659 tty->print_cr("Logical CPUs per core: %u", |
607 logical_processors_per_package()); | 660 logical_processors_per_package()); |
608 tty->print_cr("UseSSE=%d",UseSSE); | 661 tty->print("UseSSE=%d",UseSSE); |
662 if (UseAVX > 0) { | |
663 tty->print(" UseAVX=%d",UseAVX); | |
664 } | |
665 tty->cr(); | |
609 tty->print("Allocation"); | 666 tty->print("Allocation"); |
610 if (AllocatePrefetchStyle <= 0 || UseSSE == 0 && !supports_3dnow_prefetch()) { | 667 if (AllocatePrefetchStyle <= 0 || UseSSE == 0 && !supports_3dnow_prefetch()) { |
611 tty->print_cr(": no prefetching"); | 668 tty->print_cr(": no prefetching"); |
612 } else { | 669 } else { |
613 tty->print(" prefetching: "); | 670 tty->print(" prefetching: "); |