comparison src/cpu/x86/vm/vm_version_x86.cpp @ 4759:127b3692c168

7116452: Add support for AVX instructions Summary: Added support for AVX extension to the x86 instruction set. Reviewed-by: never
author kvn
date Wed, 14 Dec 2011 14:54:38 -0800
parents f08d439fab8c
children 22cee0ee8927
comparison
equal deleted inserted replaced
4758:d725f0affb1a 4759:127b3692c168
48 int VM_Version::_cpuFeatures; 48 int VM_Version::_cpuFeatures;
49 const char* VM_Version::_features_str = ""; 49 const char* VM_Version::_features_str = "";
50 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, }; 50 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, };
51 51
52 static BufferBlob* stub_blob; 52 static BufferBlob* stub_blob;
53 static const int stub_size = 400; 53 static const int stub_size = 500;
54 54
55 extern "C" { 55 extern "C" {
56 typedef void (*getPsrInfo_stub_t)(void*); 56 typedef void (*getPsrInfo_stub_t)(void*);
57 } 57 }
58 static getPsrInfo_stub_t getPsrInfo_stub = NULL; 58 static getPsrInfo_stub_t getPsrInfo_stub = NULL;
71 const int CPU_FAMILY_SHIFT = 8; 71 const int CPU_FAMILY_SHIFT = 8;
72 const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT); 72 const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT);
73 const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT); 73 const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT);
74 74
75 Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4; 75 Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
76 Label ext_cpuid1, ext_cpuid5, done; 76 Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, done;
77 77
78 StubCodeMark mark(this, "VM_Version", "getPsrInfo_stub"); 78 StubCodeMark mark(this, "VM_Version", "getPsrInfo_stub");
79 # define __ _masm-> 79 # define __ _masm->
80 80
81 address start = __ pc(); 81 address start = __ pc();
227 __ movl(Address(rsi, 0), rax); 227 __ movl(Address(rsi, 0), rax);
228 __ movl(Address(rsi, 4), rbx); 228 __ movl(Address(rsi, 4), rbx);
229 __ movl(Address(rsi, 8), rcx); 229 __ movl(Address(rsi, 8), rcx);
230 __ movl(Address(rsi,12), rdx); 230 __ movl(Address(rsi,12), rdx);
231 231
232 //
233 // Check if OS has enabled XGETBV instruction to access XCR0
234 // (OSXSAVE feature flag) and CPU supports AVX
235 //
236 __ andl(rcx, 0x18000000);
237 __ cmpl(rcx, 0x18000000);
238 __ jccb(Assembler::notEqual, sef_cpuid);
239
240 //
241 // XCR0, XFEATURE_ENABLED_MASK register
242 //
243 __ xorl(rcx, rcx); // zero for XCR0 register
244 __ xgetbv();
245 __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset())));
246 __ movl(Address(rsi, 0), rax);
247 __ movl(Address(rsi, 4), rdx);
248
249 //
250 // cpuid(0x7) Structured Extended Features
251 //
252 __ bind(sef_cpuid);
253 __ movl(rax, 7);
254 __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported?
255 __ jccb(Assembler::greater, ext_cpuid);
256
257 __ xorl(rcx, rcx);
258 __ cpuid();
259 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
260 __ movl(Address(rsi, 0), rax);
261 __ movl(Address(rsi, 4), rbx);
262
263 //
264 // Extended cpuid(0x80000000)
265 //
266 __ bind(ext_cpuid);
232 __ movl(rax, 0x80000000); 267 __ movl(rax, 0x80000000);
233 __ cpuid(); 268 __ cpuid();
234 __ cmpl(rax, 0x80000000); // Is cpuid(0x80000001) supported? 269 __ cmpl(rax, 0x80000000); // Is cpuid(0x80000001) supported?
235 __ jcc(Assembler::belowEqual, done); 270 __ jcc(Assembler::belowEqual, done);
236 __ cmpl(rax, 0x80000004); // Is cpuid(0x80000005) supported? 271 __ cmpl(rax, 0x80000004); // Is cpuid(0x80000005) supported?
357 _cpuFeatures &= ~CPU_SSE2; 392 _cpuFeatures &= ~CPU_SSE2;
358 393
359 if (UseSSE < 1) 394 if (UseSSE < 1)
360 _cpuFeatures &= ~CPU_SSE; 395 _cpuFeatures &= ~CPU_SSE;
361 396
397 if (UseAVX < 2)
398 _cpuFeatures &= ~CPU_AVX2;
399
400 if (UseAVX < 1)
401 _cpuFeatures &= ~CPU_AVX;
402
362 if (logical_processors_per_package() == 1) { 403 if (logical_processors_per_package() == 1) {
363 // HT processor could be installed on a system which doesn't support HT. 404 // HT processor could be installed on a system which doesn't support HT.
364 _cpuFeatures &= ~CPU_HT; 405 _cpuFeatures &= ~CPU_HT;
365 } 406 }
366 407
367 char buf[256]; 408 char buf[256];
368 jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", 409 jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
369 cores_per_cpu(), threads_per_core(), 410 cores_per_cpu(), threads_per_core(),
370 cpu_family(), _model, _stepping, 411 cpu_family(), _model, _stepping,
371 (supports_cmov() ? ", cmov" : ""), 412 (supports_cmov() ? ", cmov" : ""),
372 (supports_cmpxchg8() ? ", cx8" : ""), 413 (supports_cmpxchg8() ? ", cx8" : ""),
373 (supports_fxsr() ? ", fxsr" : ""), 414 (supports_fxsr() ? ", fxsr" : ""),
377 (supports_sse3() ? ", sse3" : ""), 418 (supports_sse3() ? ", sse3" : ""),
378 (supports_ssse3()? ", ssse3": ""), 419 (supports_ssse3()? ", ssse3": ""),
379 (supports_sse4_1() ? ", sse4.1" : ""), 420 (supports_sse4_1() ? ", sse4.1" : ""),
380 (supports_sse4_2() ? ", sse4.2" : ""), 421 (supports_sse4_2() ? ", sse4.2" : ""),
381 (supports_popcnt() ? ", popcnt" : ""), 422 (supports_popcnt() ? ", popcnt" : ""),
423 (supports_avx() ? ", avx" : ""),
424 (supports_avx2() ? ", avx2" : ""),
382 (supports_mmx_ext() ? ", mmxext" : ""), 425 (supports_mmx_ext() ? ", mmxext" : ""),
383 (supports_3dnow_prefetch() ? ", 3dnowpref" : ""), 426 (supports_3dnow_prefetch() ? ", 3dnowpref" : ""),
384 (supports_lzcnt() ? ", lzcnt": ""), 427 (supports_lzcnt() ? ", lzcnt": ""),
385 (supports_sse4a() ? ", sse4a": ""), 428 (supports_sse4a() ? ", sse4a": ""),
386 (supports_ht() ? ", ht": "")); 429 (supports_ht() ? ", ht": ""));
387 _features_str = strdup(buf); 430 _features_str = strdup(buf);
388 431
389 // UseSSE is set to the smaller of what hardware supports and what 432 // UseSSE is set to the smaller of what hardware supports and what
390 // the command line requires. I.e., you cannot set UseSSE to 2 on 433 // the command line requires. I.e., you cannot set UseSSE to 2 on
391 // older Pentiums which do not support it. 434 // older Pentiums which do not support it.
392 if( UseSSE > 4 ) UseSSE=4; 435 if (UseSSE > 4) UseSSE=4;
393 if( UseSSE < 0 ) UseSSE=0; 436 if (UseSSE < 0) UseSSE=0;
394 if( !supports_sse4_1() ) // Drop to 3 if no SSE4 support 437 if (!supports_sse4_1()) // Drop to 3 if no SSE4 support
395 UseSSE = MIN2((intx)3,UseSSE); 438 UseSSE = MIN2((intx)3,UseSSE);
396 if( !supports_sse3() ) // Drop to 2 if no SSE3 support 439 if (!supports_sse3()) // Drop to 2 if no SSE3 support
397 UseSSE = MIN2((intx)2,UseSSE); 440 UseSSE = MIN2((intx)2,UseSSE);
398 if( !supports_sse2() ) // Drop to 1 if no SSE2 support 441 if (!supports_sse2()) // Drop to 1 if no SSE2 support
399 UseSSE = MIN2((intx)1,UseSSE); 442 UseSSE = MIN2((intx)1,UseSSE);
400 if( !supports_sse () ) // Drop to 0 if no SSE support 443 if (!supports_sse ()) // Drop to 0 if no SSE support
401 UseSSE = 0; 444 UseSSE = 0;
445
446 if (UseAVX > 2) UseAVX=2;
447 if (UseAVX < 0) UseAVX=0;
448 if (!supports_avx2()) // Drop to 1 if no AVX2 support
449 UseAVX = MIN2((intx)1,UseAVX);
450 if (!supports_avx ()) // Drop to 0 if no AVX support
451 UseAVX = 0;
402 452
403 // On new cpus instructions which update whole XMM register should be used 453 // On new cpus instructions which update whole XMM register should be used
404 // to prevent partial register stall due to dependencies on high half. 454 // to prevent partial register stall due to dependencies on high half.
405 // 455 //
406 // UseXmmLoadAndClearUpper == true --> movsd(xmm, mem) 456 // UseXmmLoadAndClearUpper == true --> movsd(xmm, mem)
532 // Use population count instruction if available. 582 // Use population count instruction if available.
533 if (supports_popcnt()) { 583 if (supports_popcnt()) {
534 if (FLAG_IS_DEFAULT(UsePopCountInstruction)) { 584 if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
535 UsePopCountInstruction = true; 585 UsePopCountInstruction = true;
536 } 586 }
587 } else if (UsePopCountInstruction) {
588 warning("POPCNT instruction is not available on this CPU");
589 FLAG_SET_DEFAULT(UsePopCountInstruction, false);
537 } 590 }
538 591
539 #ifdef COMPILER2 592 #ifdef COMPILER2
540 if (UseFPUForSpilling) { 593 if (UseFPUForSpilling) {
541 if (UseSSE < 2) { 594 if (UseSSE < 2) {
603 656
604 #ifndef PRODUCT 657 #ifndef PRODUCT
605 if (PrintMiscellaneous && Verbose) { 658 if (PrintMiscellaneous && Verbose) {
606 tty->print_cr("Logical CPUs per core: %u", 659 tty->print_cr("Logical CPUs per core: %u",
607 logical_processors_per_package()); 660 logical_processors_per_package());
608 tty->print_cr("UseSSE=%d",UseSSE); 661 tty->print("UseSSE=%d",UseSSE);
662 if (UseAVX > 0) {
663 tty->print(" UseAVX=%d",UseAVX);
664 }
665 tty->cr();
609 tty->print("Allocation"); 666 tty->print("Allocation");
610 if (AllocatePrefetchStyle <= 0 || UseSSE == 0 && !supports_3dnow_prefetch()) { 667 if (AllocatePrefetchStyle <= 0 || UseSSE == 0 && !supports_3dnow_prefetch()) {
611 tty->print_cr(": no prefetching"); 668 tty->print_cr(": no prefetching");
612 } else { 669 } else {
613 tty->print(" prefetching: "); 670 tty->print(" prefetching: ");