Mercurial > hg > graal-compiler
comparison src/cpu/x86/vm/vm_version_x86.cpp @ 6179:8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
Summary: Increase vector size up to 256-bits for YMM AVX registers on x86.
Reviewed-by: never, twisti, roland
author | kvn |
---|---|
date | Fri, 15 Jun 2012 01:25:19 -0700 |
parents | 0368109684cb |
children | 2c368ea3e844 |
comparison
equal
deleted
inserted
replaced
6146:eba1d5bce9e8 | 6179:8c92982cbbc4 |
---|---|
465 if (!supports_avx2()) // Drop to 1 if no AVX2 support | 465 if (!supports_avx2()) // Drop to 1 if no AVX2 support |
466 UseAVX = MIN2((intx)1,UseAVX); | 466 UseAVX = MIN2((intx)1,UseAVX); |
467 if (!supports_avx ()) // Drop to 0 if no AVX support | 467 if (!supports_avx ()) // Drop to 0 if no AVX support |
468 UseAVX = 0; | 468 UseAVX = 0; |
469 | 469 |
470 #ifdef COMPILER2 | |
471 if (UseFPUForSpilling) { | |
472 if (UseSSE < 2) { | |
473 // Only supported with SSE2+ | |
474 FLAG_SET_DEFAULT(UseFPUForSpilling, false); | |
475 } | |
476 } | |
477 if (MaxVectorSize > 0) { | |
478 if (!is_power_of_2(MaxVectorSize)) { | |
479 warning("MaxVectorSize must be a power of 2"); | |
480 FLAG_SET_DEFAULT(MaxVectorSize, 32); | |
481 } | |
482 if (MaxVectorSize > 32) { | |
483 FLAG_SET_DEFAULT(MaxVectorSize, 32); | |
484 } | |
485 if (MaxVectorSize > 16 && UseAVX == 0) { | |
486 // Only supported with AVX+ | |
487 FLAG_SET_DEFAULT(MaxVectorSize, 16); | |
488 } | |
489 if (UseSSE < 2) { | |
490 // Only supported with SSE2+ | |
491 FLAG_SET_DEFAULT(MaxVectorSize, 0); | |
492 } | |
493 } | |
494 #endif | |
495 | |
470 // On new cpus instructions which update whole XMM register should be used | 496 // On new cpus instructions which update whole XMM register should be used |
471 // to prevent partial register stall due to dependencies on high half. | 497 // to prevent partial register stall due to dependencies on high half. |
472 // | 498 // |
473 // UseXmmLoadAndClearUpper == true --> movsd(xmm, mem) | 499 // UseXmmLoadAndClearUpper == true --> movsd(xmm, mem) |
474 // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem) | 500 // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem) |
542 if( FLAG_IS_DEFAULT(UseUnalignedLoadStores) && UseXMMForArrayCopy ) { | 568 if( FLAG_IS_DEFAULT(UseUnalignedLoadStores) && UseXMMForArrayCopy ) { |
543 UseUnalignedLoadStores = true; | 569 UseUnalignedLoadStores = true; |
544 } | 570 } |
545 } | 571 } |
546 | 572 |
573 #ifdef COMPILER2 | |
574 if (MaxVectorSize > 16) { | |
575 // Limit vectors size to 16 bytes on current AMD cpus. | |
576 FLAG_SET_DEFAULT(MaxVectorSize, 16); | |
577 } | |
578 #endif // COMPILER2 | |
547 } | 579 } |
548 | 580 |
549 if( is_intel() ) { // Intel cpus specific settings | 581 if( is_intel() ) { // Intel cpus specific settings |
550 if( FLAG_IS_DEFAULT(UseStoreImmI16) ) { | 582 if( FLAG_IS_DEFAULT(UseStoreImmI16) ) { |
551 UseStoreImmI16 = false; // don't use it on Intel cpus | 583 UseStoreImmI16 = false; // don't use it on Intel cpus |
603 } | 635 } |
604 } else if (UsePopCountInstruction) { | 636 } else if (UsePopCountInstruction) { |
605 warning("POPCNT instruction is not available on this CPU"); | 637 warning("POPCNT instruction is not available on this CPU"); |
606 FLAG_SET_DEFAULT(UsePopCountInstruction, false); | 638 FLAG_SET_DEFAULT(UsePopCountInstruction, false); |
607 } | 639 } |
608 | |
609 #ifdef COMPILER2 | |
610 if (UseFPUForSpilling) { | |
611 if (UseSSE < 2) { | |
612 // Only supported with SSE2+ | |
613 FLAG_SET_DEFAULT(UseFPUForSpilling, false); | |
614 } | |
615 } | |
616 #endif | |
617 | 640 |
618 assert(0 <= ReadPrefetchInstr && ReadPrefetchInstr <= 3, "invalid value"); | 641 assert(0 <= ReadPrefetchInstr && ReadPrefetchInstr <= 3, "invalid value"); |
619 assert(0 <= AllocatePrefetchInstr && AllocatePrefetchInstr <= 3, "invalid value"); | 642 assert(0 <= AllocatePrefetchInstr && AllocatePrefetchInstr <= 3, "invalid value"); |
620 | 643 |
621 // set valid Prefetch instruction | 644 // set valid Prefetch instruction |