comparison src/cpu/x86/vm/x86_32.ad @ 4768:8940fd98d540

Merge
author kvn
date Thu, 29 Dec 2011 11:37:50 -0800
parents 1dc233a8c7fe
children e9a5e0a812c8
comparison
equal deleted inserted replaced
4730:7faca6dfa2ed 4768:8940fd98d540
279 return 6; // fldcw 279 return 6; // fldcw
280 return 0; 280 return 0;
281 } 281 }
282 282
283 static int preserve_SP_size() { 283 static int preserve_SP_size() {
284 return LP64_ONLY(1 +) 2; // [rex,] op, rm(reg/reg) 284 return 2; // op, rm(reg/reg)
285 } 285 }
286 286
287 // !!!!! Special hack to get all type of calls to specify the byte offset 287 // !!!!! Special hack to get all type of calls to specify the byte offset
288 // from the start of the call to the point where the return address 288 // from the start of the call to the point where the return address
289 // will point. 289 // will point.
493 emit_opcode( cbuf, 0x8B ); 493 emit_opcode( cbuf, 0x8B );
494 emit_rm(cbuf, 0x3, dst_encoding, src_encoding ); 494 emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
495 } 495 }
496 } 496 }
497 497
498 void encode_CopyXD( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) { 498 void emit_cmpfp_fixup(MacroAssembler& _masm) {
499 if( dst_encoding == src_encoding ) { 499 Label exit;
500 // reg-reg copy, use an empty encoding 500 __ jccb(Assembler::noParity, exit);
501 } else { 501 __ pushf();
502 MacroAssembler _masm(&cbuf); 502 //
503 503 // comiss/ucomiss instructions set ZF,PF,CF flags and
504 __ movdqa(as_XMMRegister(dst_encoding), as_XMMRegister(src_encoding)); 504 // zero OF,AF,SF for NaN values.
505 } 505 // Fixup flags by zeroing ZF,PF so that compare of NaN
506 // values returns 'less than' result (CF is set).
507 // Leave the rest of flags unchanged.
508 //
509 // 7 6 5 4 3 2 1 0
510 // |S|Z|r|A|r|P|r|C| (r - reserved bit)
511 // 0 0 1 0 1 0 1 1 (0x2B)
512 //
513 __ andl(Address(rsp, 0), 0xffffff2b);
514 __ popf();
515 __ bind(exit);
516 }
517
518 void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
519 Label done;
520 __ movl(dst, -1);
521 __ jcc(Assembler::parity, done);
522 __ jcc(Assembler::below, done);
523 __ setb(Assembler::notEqual, dst);
524 __ movzbl(dst, dst);
525 __ bind(done);
506 } 526 }
507 527
508 528
509 //============================================================================= 529 //=============================================================================
510 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty; 530 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
790 } 810 }
791 811
792 // Helper for XMM registers. Extra opcode bits, limited syntax. 812 // Helper for XMM registers. Extra opcode bits, limited syntax.
793 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load, 813 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
794 int offset, int reg_lo, int reg_hi, int size, outputStream* st ) { 814 int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
795 if( cbuf ) { 815 if (cbuf) {
796 if( reg_lo+1 == reg_hi ) { // double move? 816 MacroAssembler _masm(cbuf);
797 if( is_load && !UseXmmLoadAndClearUpper ) 817 if (reg_lo+1 == reg_hi) { // double move?
798 emit_opcode(*cbuf, 0x66 ); // use 'movlpd' for load 818 if (is_load) {
799 else 819 __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
800 emit_opcode(*cbuf, 0xF2 ); // use 'movsd' otherwise 820 } else {
821 __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
822 }
801 } else { 823 } else {
802 emit_opcode(*cbuf, 0xF3 ); 824 if (is_load) {
825 __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
826 } else {
827 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
828 }
803 } 829 }
804 emit_opcode(*cbuf, 0x0F );
805 if( reg_lo+1 == reg_hi && is_load && !UseXmmLoadAndClearUpper )
806 emit_opcode(*cbuf, 0x12 ); // use 'movlpd' for load
807 else
808 emit_opcode(*cbuf, is_load ? 0x10 : 0x11 );
809 encode_RegMem(*cbuf, Matcher::_regEncode[reg_lo], ESP_enc, 0x4, 0, offset, false);
810 #ifndef PRODUCT 830 #ifndef PRODUCT
811 } else if( !do_size ) { 831 } else if (!do_size) {
812 if( size != 0 ) st->print("\n\t"); 832 if (size != 0) st->print("\n\t");
813 if( reg_lo+1 == reg_hi ) { // double move? 833 if (reg_lo+1 == reg_hi) { // double move?
814 if( is_load ) st->print("%s %s,[ESP + #%d]", 834 if (is_load) st->print("%s %s,[ESP + #%d]",
815 UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD", 835 UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
816 Matcher::regName[reg_lo], offset); 836 Matcher::regName[reg_lo], offset);
817 else st->print("MOVSD [ESP + #%d],%s", 837 else st->print("MOVSD [ESP + #%d],%s",
818 offset, Matcher::regName[reg_lo]); 838 offset, Matcher::regName[reg_lo]);
819 } else { 839 } else {
820 if( is_load ) st->print("MOVSS %s,[ESP + #%d]", 840 if (is_load) st->print("MOVSS %s,[ESP + #%d]",
821 Matcher::regName[reg_lo], offset); 841 Matcher::regName[reg_lo], offset);
822 else st->print("MOVSS [ESP + #%d],%s", 842 else st->print("MOVSS [ESP + #%d],%s",
823 offset, Matcher::regName[reg_lo]); 843 offset, Matcher::regName[reg_lo]);
824 } 844 }
825 #endif 845 #endif
826 } 846 }
827 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 847 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
848 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes.
828 return size+5+offset_size; 849 return size+5+offset_size;
829 } 850 }
830 851
831 852
832 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 853 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
833 int src_hi, int dst_hi, int size, outputStream* st ) { 854 int src_hi, int dst_hi, int size, outputStream* st ) {
834 if( UseXmmRegToRegMoveAll ) {//Use movaps,movapd to move between xmm registers 855 if (cbuf) {
835 if( cbuf ) { 856 MacroAssembler _masm(cbuf);
836 if( (src_lo+1 == src_hi && dst_lo+1 == dst_hi) ) { 857 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
837 emit_opcode(*cbuf, 0x66 ); 858 __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
838 } 859 as_XMMRegister(Matcher::_regEncode[src_lo]));
839 emit_opcode(*cbuf, 0x0F ); 860 } else {
840 emit_opcode(*cbuf, 0x28 ); 861 __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
841 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst_lo], Matcher::_regEncode[src_lo] ); 862 as_XMMRegister(Matcher::_regEncode[src_lo]));
863 }
842 #ifndef PRODUCT 864 #ifndef PRODUCT
843 } else if( !do_size ) { 865 } else if (!do_size) {
844 if( size != 0 ) st->print("\n\t"); 866 if (size != 0) st->print("\n\t");
845 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move? 867 if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
868 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
846 st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 869 st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
847 } else { 870 } else {
848 st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 871 st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
849 } 872 }
850 #endif 873 } else {
851 }
852 return size + ((src_lo+1 == src_hi && dst_lo+1 == dst_hi) ? 4 : 3);
853 } else {
854 if( cbuf ) {
855 emit_opcode(*cbuf, (src_lo+1 == src_hi && dst_lo+1 == dst_hi) ? 0xF2 : 0xF3 );
856 emit_opcode(*cbuf, 0x0F );
857 emit_opcode(*cbuf, 0x10 );
858 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst_lo], Matcher::_regEncode[src_lo] );
859 #ifndef PRODUCT
860 } else if( !do_size ) {
861 if( size != 0 ) st->print("\n\t");
862 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move? 874 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
863 st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 875 st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
864 } else { 876 } else {
865 st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 877 st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
866 } 878 }
879 }
867 #endif 880 #endif
868 }
869 return size+4;
870 } 881 }
882 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes.
883 // Only MOVAPS SSE prefix uses 1 byte.
884 int sz = 4;
885 if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
886 UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
887 return size + sz;
871 } 888 }
872 889
873 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 890 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
874 int src_hi, int dst_hi, int size, outputStream* st ) { 891 int src_hi, int dst_hi, int size, outputStream* st ) {
875 // 32-bit 892 // 32-bit
876 if (cbuf) { 893 if (cbuf) {
877 emit_opcode(*cbuf, 0x66); 894 MacroAssembler _masm(cbuf);
878 emit_opcode(*cbuf, 0x0F); 895 __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
879 emit_opcode(*cbuf, 0x6E); 896 as_Register(Matcher::_regEncode[src_lo]));
880 emit_rm(*cbuf, 0x3, Matcher::_regEncode[dst_lo] & 7, Matcher::_regEncode[src_lo] & 7);
881 #ifndef PRODUCT 897 #ifndef PRODUCT
882 } else if (!do_size) { 898 } else if (!do_size) {
883 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 899 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
884 #endif 900 #endif
885 } 901 }
889 905
890 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 906 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
891 int src_hi, int dst_hi, int size, outputStream* st ) { 907 int src_hi, int dst_hi, int size, outputStream* st ) {
892 // 32-bit 908 // 32-bit
893 if (cbuf) { 909 if (cbuf) {
894 emit_opcode(*cbuf, 0x66); 910 MacroAssembler _masm(cbuf);
895 emit_opcode(*cbuf, 0x0F); 911 __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
896 emit_opcode(*cbuf, 0x7E); 912 as_XMMRegister(Matcher::_regEncode[src_lo]));
897 emit_rm(*cbuf, 0x3, Matcher::_regEncode[src_lo] & 7, Matcher::_regEncode[dst_lo] & 7);
898 #ifndef PRODUCT 913 #ifndef PRODUCT
899 } else if (!do_size) { 914 } else if (!do_size) {
900 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 915 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
901 #endif 916 #endif
902 } 917 }
1758 enc_class enc_cmov(cmpOp cop ) %{ // CMOV 1773 enc_class enc_cmov(cmpOp cop ) %{ // CMOV
1759 $$$emit8$primary; 1774 $$$emit8$primary;
1760 emit_cc(cbuf, $secondary, $cop$$cmpcode); 1775 emit_cc(cbuf, $secondary, $cop$$cmpcode);
1761 %} 1776 %}
1762 1777
1763 enc_class enc_cmov_d(cmpOp cop, regD src ) %{ // CMOV 1778 enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
1764 int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1); 1779 int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
1765 emit_d8(cbuf, op >> 8 ); 1780 emit_d8(cbuf, op >> 8 );
1766 emit_d8(cbuf, op & 255); 1781 emit_d8(cbuf, op & 255);
1767 %} 1782 %}
1768 1783
1927 cbuf.set_insts_mark(); 1942 cbuf.set_insts_mark();
1928 $$$emit8$primary; 1943 $$$emit8$primary;
1929 emit_rm(cbuf, 0x01, $secondary, EAX_enc ); // R/M byte 1944 emit_rm(cbuf, 0x01, $secondary, EAX_enc ); // R/M byte
1930 emit_d8(cbuf, disp); // Displacement 1945 emit_d8(cbuf, disp); // Displacement
1931 1946
1932 %}
1933
1934 enc_class Xor_Reg (eRegI dst) %{
1935 emit_opcode(cbuf, 0x33);
1936 emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
1937 %} 1947 %}
1938 1948
1939 // Following encoding is no longer used, but may be restored if calling 1949 // Following encoding is no longer used, but may be restored if calling
1940 // convention changes significantly. 1950 // convention changes significantly.
1941 // Became: Xor_Reg(EBP), Java_To_Runtime( labl ) 1951 // Became: Xor_Reg(EBP), Java_To_Runtime( labl )
2011 emit_d32(cbuf, src_con); 2021 emit_d32(cbuf, src_con);
2012 } 2022 }
2013 %} 2023 %}
2014 2024
2015 2025
2016 enc_class MovI2X_reg(regX dst, eRegI src) %{
2017 emit_opcode(cbuf, 0x66 ); // MOVD dst,src
2018 emit_opcode(cbuf, 0x0F );
2019 emit_opcode(cbuf, 0x6E );
2020 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2021 %}
2022
2023 enc_class MovX2I_reg(eRegI dst, regX src) %{
2024 emit_opcode(cbuf, 0x66 ); // MOVD dst,src
2025 emit_opcode(cbuf, 0x0F );
2026 emit_opcode(cbuf, 0x7E );
2027 emit_rm(cbuf, 0x3, $src$$reg, $dst$$reg);
2028 %}
2029
2030 enc_class MovL2XD_reg(regXD dst, eRegL src, regXD tmp) %{
2031 { // MOVD $dst,$src.lo
2032 emit_opcode(cbuf,0x66);
2033 emit_opcode(cbuf,0x0F);
2034 emit_opcode(cbuf,0x6E);
2035 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2036 }
2037 { // MOVD $tmp,$src.hi
2038 emit_opcode(cbuf,0x66);
2039 emit_opcode(cbuf,0x0F);
2040 emit_opcode(cbuf,0x6E);
2041 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
2042 }
2043 { // PUNPCKLDQ $dst,$tmp
2044 emit_opcode(cbuf,0x66);
2045 emit_opcode(cbuf,0x0F);
2046 emit_opcode(cbuf,0x62);
2047 emit_rm(cbuf, 0x3, $dst$$reg, $tmp$$reg);
2048 }
2049 %}
2050
2051 enc_class MovXD2L_reg(eRegL dst, regXD src, regXD tmp) %{
2052 { // MOVD $dst.lo,$src
2053 emit_opcode(cbuf,0x66);
2054 emit_opcode(cbuf,0x0F);
2055 emit_opcode(cbuf,0x7E);
2056 emit_rm(cbuf, 0x3, $src$$reg, $dst$$reg);
2057 }
2058 { // PSHUFLW $tmp,$src,0x4E (01001110b)
2059 emit_opcode(cbuf,0xF2);
2060 emit_opcode(cbuf,0x0F);
2061 emit_opcode(cbuf,0x70);
2062 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
2063 emit_d8(cbuf, 0x4E);
2064 }
2065 { // MOVD $dst.hi,$tmp
2066 emit_opcode(cbuf,0x66);
2067 emit_opcode(cbuf,0x0F);
2068 emit_opcode(cbuf,0x7E);
2069 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg));
2070 }
2071 %}
2072
2073
2074 // Encode a reg-reg copy. If it is useless, then empty encoding. 2026 // Encode a reg-reg copy. If it is useless, then empty encoding.
2075 enc_class enc_Copy( eRegI dst, eRegI src ) %{ 2027 enc_class enc_Copy( eRegI dst, eRegI src ) %{
2076 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 2028 encode_Copy( cbuf, $dst$$reg, $src$$reg );
2077 %} 2029 %}
2078 2030
2079 enc_class enc_CopyL_Lo( eRegI dst, eRegL src ) %{ 2031 enc_class enc_CopyL_Lo( eRegI dst, eRegL src ) %{
2080 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 2032 encode_Copy( cbuf, $dst$$reg, $src$$reg );
2081 %} 2033 %}
2082 2034
2083 // Encode xmm reg-reg copy. If it is useless, then empty encoding.
2084 enc_class enc_CopyXD( RegXD dst, RegXD src ) %{
2085 encode_CopyXD( cbuf, $dst$$reg, $src$$reg );
2086 %}
2087
2088 enc_class RegReg (eRegI dst, eRegI src) %{ // RegReg(Many) 2035 enc_class RegReg (eRegI dst, eRegI src) %{ // RegReg(Many)
2089 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2036 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2090 %} 2037 %}
2091 2038
2092 enc_class RegReg_Lo(eRegL dst, eRegL src) %{ // RegReg(Many) 2039 enc_class RegReg_Lo(eRegL dst, eRegL src) %{ // RegReg(Many)
2114 enc_class Con32 (immI src) %{ // Con32(storeImmI) 2061 enc_class Con32 (immI src) %{ // Con32(storeImmI)
2115 // Output immediate 2062 // Output immediate
2116 $$$emit32$src$$constant; 2063 $$$emit32$src$$constant;
2117 %} 2064 %}
2118 2065
2119 enc_class Con32F_as_bits(immF src) %{ // storeF_imm 2066 enc_class Con32FPR_as_bits(immFPR src) %{ // storeF_imm
2120 // Output Float immediate bits 2067 // Output Float immediate bits
2121 jfloat jf = $src$$constant; 2068 jfloat jf = $src$$constant;
2122 int jf_as_bits = jint_cast( jf ); 2069 int jf_as_bits = jint_cast( jf );
2123 emit_d32(cbuf, jf_as_bits); 2070 emit_d32(cbuf, jf_as_bits);
2124 %} 2071 %}
2125 2072
2126 enc_class Con32XF_as_bits(immXF src) %{ // storeX_imm 2073 enc_class Con32F_as_bits(immF src) %{ // storeX_imm
2127 // Output Float immediate bits 2074 // Output Float immediate bits
2128 jfloat jf = $src$$constant; 2075 jfloat jf = $src$$constant;
2129 int jf_as_bits = jint_cast( jf ); 2076 int jf_as_bits = jint_cast( jf );
2130 emit_d32(cbuf, jf_as_bits); 2077 emit_d32(cbuf, jf_as_bits);
2131 %} 2078 %}
2334 // move dst,src 2281 // move dst,src
2335 emit_opcode(cbuf,0x8B); 2282 emit_opcode(cbuf,0x8B);
2336 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2283 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2337 %} 2284 %}
2338 2285
2339 enc_class enc_FP_store(memory mem, regD src) %{ 2286 enc_class enc_FPR_store(memory mem, regDPR src) %{
2340 // If src is FPR1, we can just FST to store it. 2287 // If src is FPR1, we can just FST to store it.
2341 // Else we need to FLD it to FPR1, then FSTP to store/pop it. 2288 // Else we need to FLD it to FPR1, then FSTP to store/pop it.
2342 int reg_encoding = 0x2; // Just store 2289 int reg_encoding = 0x2; // Just store
2343 int base = $mem$$base; 2290 int base = $mem$$base;
2344 int index = $mem$$index; 2291 int index = $mem$$index;
2483 %} 2430 %}
2484 2431
2485 2432
2486 // ----------------- Encodings for floating point unit ----------------- 2433 // ----------------- Encodings for floating point unit -----------------
2487 // May leave result in FPU-TOS or FPU reg depending on opcodes 2434 // May leave result in FPU-TOS or FPU reg depending on opcodes
2488 enc_class OpcReg_F (regF src) %{ // FMUL, FDIV 2435 enc_class OpcReg_FPR(regFPR src) %{ // FMUL, FDIV
2489 $$$emit8$primary; 2436 $$$emit8$primary;
2490 emit_rm(cbuf, 0x3, $secondary, $src$$reg ); 2437 emit_rm(cbuf, 0x3, $secondary, $src$$reg );
2491 %} 2438 %}
2492 2439
2493 // Pop argument in FPR0 with FSTP ST(0) 2440 // Pop argument in FPR0 with FSTP ST(0)
2495 emit_opcode( cbuf, 0xDD ); 2442 emit_opcode( cbuf, 0xDD );
2496 emit_d8( cbuf, 0xD8 ); 2443 emit_d8( cbuf, 0xD8 );
2497 %} 2444 %}
2498 2445
2499 // !!!!! equivalent to Pop_Reg_F 2446 // !!!!! equivalent to Pop_Reg_F
2500 enc_class Pop_Reg_D( regD dst ) %{ 2447 enc_class Pop_Reg_DPR( regDPR dst ) %{
2501 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2448 emit_opcode( cbuf, 0xDD ); // FSTP ST(i)
2502 emit_d8( cbuf, 0xD8+$dst$$reg ); 2449 emit_d8( cbuf, 0xD8+$dst$$reg );
2503 %} 2450 %}
2504 2451
2505 enc_class Push_Reg_D( regD dst ) %{ 2452 enc_class Push_Reg_DPR( regDPR dst ) %{
2506 emit_opcode( cbuf, 0xD9 ); 2453 emit_opcode( cbuf, 0xD9 );
2507 emit_d8( cbuf, 0xC0-1+$dst$$reg ); // FLD ST(i-1) 2454 emit_d8( cbuf, 0xC0-1+$dst$$reg ); // FLD ST(i-1)
2508 %} 2455 %}
2509 2456
2510 enc_class strictfp_bias1( regD dst ) %{ 2457 enc_class strictfp_bias1( regDPR dst ) %{
2511 emit_opcode( cbuf, 0xDB ); // FLD m80real 2458 emit_opcode( cbuf, 0xDB ); // FLD m80real
2512 emit_opcode( cbuf, 0x2D ); 2459 emit_opcode( cbuf, 0x2D );
2513 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() ); 2460 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() );
2514 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2461 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0
2515 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2462 emit_opcode( cbuf, 0xC8+$dst$$reg );
2516 %} 2463 %}
2517 2464
2518 enc_class strictfp_bias2( regD dst ) %{ 2465 enc_class strictfp_bias2( regDPR dst ) %{
2519 emit_opcode( cbuf, 0xDB ); // FLD m80real 2466 emit_opcode( cbuf, 0xDB ); // FLD m80real
2520 emit_opcode( cbuf, 0x2D ); 2467 emit_opcode( cbuf, 0x2D );
2521 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() ); 2468 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() );
2522 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2469 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0
2523 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2470 emit_opcode( cbuf, 0xC8+$dst$$reg );
2539 // Push the integer in stackSlot 'src' onto FP-stack 2486 // Push the integer in stackSlot 'src' onto FP-stack
2540 enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src] 2487 enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src]
2541 store_to_stackslot( cbuf, $primary, $secondary, $src$$disp ); 2488 store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
2542 %} 2489 %}
2543 2490
2544 // Push the float in stackSlot 'src' onto FP-stack
2545 enc_class Push_Mem_F( memory src ) %{ // FLD_S [ESP+src]
2546 store_to_stackslot( cbuf, 0xD9, 0x00, $src$$disp );
2547 %}
2548
2549 // Push the double in stackSlot 'src' onto FP-stack
2550 enc_class Push_Mem_D( memory src ) %{ // FLD_D [ESP+src]
2551 store_to_stackslot( cbuf, 0xDD, 0x00, $src$$disp );
2552 %}
2553
2554 // Push FPU's TOS float to a stack-slot, and pop FPU-stack 2491 // Push FPU's TOS float to a stack-slot, and pop FPU-stack
2555 enc_class Pop_Mem_F( stackSlotF dst ) %{ // FSTP_S [ESP+dst] 2492 enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
2556 store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp ); 2493 store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
2557 %} 2494 %}
2558 2495
2559 // Same as Pop_Mem_F except for opcode 2496 // Same as Pop_Mem_F except for opcode
2560 // Push FPU's TOS double to a stack-slot, and pop FPU-stack 2497 // Push FPU's TOS double to a stack-slot, and pop FPU-stack
2561 enc_class Pop_Mem_D( stackSlotD dst ) %{ // FSTP_D [ESP+dst] 2498 enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
2562 store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp ); 2499 store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
2563 %} 2500 %}
2564 2501
2565 enc_class Pop_Reg_F( regF dst ) %{ 2502 enc_class Pop_Reg_FPR( regFPR dst ) %{
2566 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2503 emit_opcode( cbuf, 0xDD ); // FSTP ST(i)
2567 emit_d8( cbuf, 0xD8+$dst$$reg ); 2504 emit_d8( cbuf, 0xD8+$dst$$reg );
2568 %} 2505 %}
2569 2506
2570 enc_class Push_Reg_F( regF dst ) %{ 2507 enc_class Push_Reg_FPR( regFPR dst ) %{
2571 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2508 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1)
2572 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2509 emit_d8( cbuf, 0xC0-1+$dst$$reg );
2573 %} 2510 %}
2574 2511
2575 // Push FPU's float to a stack-slot, and pop FPU-stack 2512 // Push FPU's float to a stack-slot, and pop FPU-stack
2576 enc_class Pop_Mem_Reg_F( stackSlotF dst, regF src ) %{ 2513 enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
2577 int pop = 0x02; 2514 int pop = 0x02;
2578 if ($src$$reg != FPR1L_enc) { 2515 if ($src$$reg != FPR1L_enc) {
2579 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2516 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1)
2580 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2517 emit_d8( cbuf, 0xC0-1+$src$$reg );
2581 pop = 0x03; 2518 pop = 0x03;
2582 } 2519 }
2583 store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S [ESP+dst] 2520 store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S [ESP+dst]
2584 %} 2521 %}
2585 2522
2586 // Push FPU's double to a stack-slot, and pop FPU-stack 2523 // Push FPU's double to a stack-slot, and pop FPU-stack
2587 enc_class Pop_Mem_Reg_D( stackSlotD dst, regD src ) %{ 2524 enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
2588 int pop = 0x02; 2525 int pop = 0x02;
2589 if ($src$$reg != FPR1L_enc) { 2526 if ($src$$reg != FPR1L_enc) {
2590 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2527 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1)
2591 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2528 emit_d8( cbuf, 0xC0-1+$src$$reg );
2592 pop = 0x03; 2529 pop = 0x03;
2593 } 2530 }
2594 store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D [ESP+dst] 2531 store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D [ESP+dst]
2595 %} 2532 %}
2596 2533
2597 // Push FPU's double to a FPU-stack-slot, and pop FPU-stack 2534 // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
2598 enc_class Pop_Reg_Reg_D( regD dst, regF src ) %{ 2535 enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
2599 int pop = 0xD0 - 1; // -1 since we skip FLD 2536 int pop = 0xD0 - 1; // -1 since we skip FLD
2600 if ($src$$reg != FPR1L_enc) { 2537 if ($src$$reg != FPR1L_enc) {
2601 emit_opcode( cbuf, 0xD9 ); // FLD ST(src-1) 2538 emit_opcode( cbuf, 0xD9 ); // FLD ST(src-1)
2602 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2539 emit_d8( cbuf, 0xC0-1+$src$$reg );
2603 pop = 0xD8; 2540 pop = 0xD8;
2605 emit_opcode( cbuf, 0xDD ); 2542 emit_opcode( cbuf, 0xDD );
2606 emit_d8( cbuf, pop+$dst$$reg ); // FST<P> ST(i) 2543 emit_d8( cbuf, pop+$dst$$reg ); // FST<P> ST(i)
2607 %} 2544 %}
2608 2545
2609 2546
2610 enc_class Mul_Add_F( regF dst, regF src, regF src1, regF src2 ) %{ 2547 enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
2611 MacroAssembler masm(&cbuf);
2612 masm.fld_s( $src1$$reg-1); // nothing at TOS, load TOS from src1.reg
2613 masm.fmul( $src2$$reg+0); // value at TOS
2614 masm.fadd( $src$$reg+0); // value at TOS
2615 masm.fstp_d( $dst$$reg+0); // value at TOS, popped off after store
2616 %}
2617
2618
2619 enc_class Push_Reg_Mod_D( regD dst, regD src) %{
2620 // load dst in FPR0 2548 // load dst in FPR0
2621 emit_opcode( cbuf, 0xD9 ); 2549 emit_opcode( cbuf, 0xD9 );
2622 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2550 emit_d8( cbuf, 0xC0-1+$dst$$reg );
2623 if ($src$$reg != FPR1L_enc) { 2551 if ($src$$reg != FPR1L_enc) {
2624 // fincstp 2552 // fincstp
2632 emit_opcode (cbuf, 0xD9); 2560 emit_opcode (cbuf, 0xD9);
2633 emit_opcode (cbuf, 0xF6); 2561 emit_opcode (cbuf, 0xF6);
2634 } 2562 }
2635 %} 2563 %}
2636 2564
2637 enc_class Push_ModD_encoding( regXD src0, regXD src1) %{ 2565 enc_class Push_ModD_encoding(regD src0, regD src1) %{
2638 // Allocate a word 2566 MacroAssembler _masm(&cbuf);
2639 emit_opcode(cbuf,0x83); // SUB ESP,8 2567 __ subptr(rsp, 8);
2640 emit_opcode(cbuf,0xEC); 2568 __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
2641 emit_d8(cbuf,0x08); 2569 __ fld_d(Address(rsp, 0));
2642 2570 __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
2643 emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src1 2571 __ fld_d(Address(rsp, 0));
2644 emit_opcode (cbuf, 0x0F ); 2572 %}
2645 emit_opcode (cbuf, 0x11 ); 2573
2646 encode_RegMem(cbuf, $src1$$reg, ESP_enc, 0x4, 0, 0, false); 2574 enc_class Push_ModF_encoding(regF src0, regF src1) %{
2647 2575 MacroAssembler _masm(&cbuf);
2648 emit_opcode(cbuf,0xDD ); // FLD_D [ESP] 2576 __ subptr(rsp, 4);
2649 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); 2577 __ movflt(Address(rsp, 0), $src1$$XMMRegister);
2650 2578 __ fld_s(Address(rsp, 0));
2651 emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src0 2579 __ movflt(Address(rsp, 0), $src0$$XMMRegister);
2652 emit_opcode (cbuf, 0x0F ); 2580 __ fld_s(Address(rsp, 0));
2653 emit_opcode (cbuf, 0x11 ); 2581 %}
2654 encode_RegMem(cbuf, $src0$$reg, ESP_enc, 0x4, 0, 0, false); 2582
2655 2583 enc_class Push_ResultD(regD dst) %{
2656 emit_opcode(cbuf,0xDD ); // FLD_D [ESP] 2584 MacroAssembler _masm(&cbuf);
2657 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); 2585 __ fstp_d(Address(rsp, 0));
2658 2586 __ movdbl($dst$$XMMRegister, Address(rsp, 0));
2659 %} 2587 __ addptr(rsp, 8);
2660 2588 %}
2661 enc_class Push_ModX_encoding( regX src0, regX src1) %{ 2589
2662 // Allocate a word 2590 enc_class Push_ResultF(regF dst, immI d8) %{
2663 emit_opcode(cbuf,0x83); // SUB ESP,4 2591 MacroAssembler _masm(&cbuf);
2664 emit_opcode(cbuf,0xEC); 2592 __ fstp_s(Address(rsp, 0));
2665 emit_d8(cbuf,0x04); 2593 __ movflt($dst$$XMMRegister, Address(rsp, 0));
2666 2594 __ addptr(rsp, $d8$$constant);
2667 emit_opcode (cbuf, 0xF3 ); // MOVSS [ESP], src1 2595 %}
2668 emit_opcode (cbuf, 0x0F ); 2596
2669 emit_opcode (cbuf, 0x11 ); 2597 enc_class Push_SrcD(regD src) %{
2670 encode_RegMem(cbuf, $src1$$reg, ESP_enc, 0x4, 0, 0, false); 2598 MacroAssembler _masm(&cbuf);
2671 2599 __ subptr(rsp, 8);
2672 emit_opcode(cbuf,0xD9 ); // FLD [ESP] 2600 __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2673 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); 2601 __ fld_d(Address(rsp, 0));
2674
2675 emit_opcode (cbuf, 0xF3 ); // MOVSS [ESP], src0
2676 emit_opcode (cbuf, 0x0F );
2677 emit_opcode (cbuf, 0x11 );
2678 encode_RegMem(cbuf, $src0$$reg, ESP_enc, 0x4, 0, 0, false);
2679
2680 emit_opcode(cbuf,0xD9 ); // FLD [ESP]
2681 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2682
2683 %}
2684
2685 enc_class Push_ResultXD(regXD dst) %{
2686 store_to_stackslot( cbuf, 0xDD, 0x03, 0 ); //FSTP [ESP]
2687
2688 // UseXmmLoadAndClearUpper ? movsd dst,[esp] : movlpd dst,[esp]
2689 emit_opcode (cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
2690 emit_opcode (cbuf, 0x0F );
2691 emit_opcode (cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12);
2692 encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false);
2693
2694 emit_opcode(cbuf,0x83); // ADD ESP,8
2695 emit_opcode(cbuf,0xC4);
2696 emit_d8(cbuf,0x08);
2697 %}
2698
2699 enc_class Push_ResultX(regX dst, immI d8) %{
2700 store_to_stackslot( cbuf, 0xD9, 0x03, 0 ); //FSTP_S [ESP]
2701
2702 emit_opcode (cbuf, 0xF3 ); // MOVSS dst(xmm), [ESP]
2703 emit_opcode (cbuf, 0x0F );
2704 emit_opcode (cbuf, 0x10 );
2705 encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false);
2706
2707 emit_opcode(cbuf,0x83); // ADD ESP,d8 (4 or 8)
2708 emit_opcode(cbuf,0xC4);
2709 emit_d8(cbuf,$d8$$constant);
2710 %}
2711
2712 enc_class Push_SrcXD(regXD src) %{
2713 // Allocate a word
2714 emit_opcode(cbuf,0x83); // SUB ESP,8
2715 emit_opcode(cbuf,0xEC);
2716 emit_d8(cbuf,0x08);
2717
2718 emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src
2719 emit_opcode (cbuf, 0x0F );
2720 emit_opcode (cbuf, 0x11 );
2721 encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
2722
2723 emit_opcode(cbuf,0xDD ); // FLD_D [ESP]
2724 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2725 %} 2602 %}
2726 2603
2727 enc_class push_stack_temp_qword() %{ 2604 enc_class push_stack_temp_qword() %{
2728 emit_opcode(cbuf,0x83); // SUB ESP,8 2605 MacroAssembler _masm(&cbuf);
2729 emit_opcode(cbuf,0xEC); 2606 __ subptr(rsp, 8);
2730 emit_d8 (cbuf,0x08);
2731 %} 2607 %}
2732 2608
2733 enc_class pop_stack_temp_qword() %{ 2609 enc_class pop_stack_temp_qword() %{
2734 emit_opcode(cbuf,0x83); // ADD ESP,8 2610 MacroAssembler _masm(&cbuf);
2735 emit_opcode(cbuf,0xC4); 2611 __ addptr(rsp, 8);
2736 emit_d8 (cbuf,0x08); 2612 %}
2737 %} 2613
2738 2614 enc_class push_xmm_to_fpr1(regD src) %{
2739 enc_class push_xmm_to_fpr1( regXD xmm_src ) %{ 2615 MacroAssembler _masm(&cbuf);
2740 emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], xmm_src 2616 __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2741 emit_opcode (cbuf, 0x0F ); 2617 __ fld_d(Address(rsp, 0));
2742 emit_opcode (cbuf, 0x11 );
2743 encode_RegMem(cbuf, $xmm_src$$reg, ESP_enc, 0x4, 0, 0, false);
2744
2745 emit_opcode(cbuf,0xDD ); // FLD_D [ESP]
2746 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2747 %} 2618 %}
2748 2619
2749 // Compute X^Y using Intel's fast hardware instructions, if possible. 2620 // Compute X^Y using Intel's fast hardware instructions, if possible.
2750 // Otherwise return a NaN. 2621 // Otherwise return a NaN.
2751 enc_class pow_exp_core_encoding %{ 2622 enc_class pow_exp_core_encoding %{
2783 emit_d32(cbuf,0); 2654 emit_d32(cbuf,0);
2784 emit_opcode(cbuf,0xDC); // fmul dword st(0),[esp+0]; FPR1 = 2^int(Q)*2^frac(Q) = 2^Q 2655 emit_opcode(cbuf,0xDC); // fmul dword st(0),[esp+0]; FPR1 = 2^int(Q)*2^frac(Q) = 2^Q
2785 encode_RegMem(cbuf, 0x1, ESP_enc, 0x4, 0, 0, false); 2656 encode_RegMem(cbuf, 0x1, ESP_enc, 0x4, 0, 0, false);
2786 %} 2657 %}
2787 2658
2788 // enc_class Pop_Reg_Mod_D( regD dst, regD src) 2659 enc_class Push_Result_Mod_DPR( regDPR src) %{
2789 // was replaced by Push_Result_Mod_D followed by Pop_Reg_X() or Pop_Mem_X()
2790
2791 enc_class Push_Result_Mod_D( regD src) %{
2792 if ($src$$reg != FPR1L_enc) { 2660 if ($src$$reg != FPR1L_enc) {
2793 // fincstp 2661 // fincstp
2794 emit_opcode (cbuf, 0xD9); 2662 emit_opcode (cbuf, 0xD9);
2795 emit_opcode (cbuf, 0xF7); 2663 emit_opcode (cbuf, 0xF7);
2796 // FXCH FPR1 with src 2664 // FXCH FPR1 with src
2815 // jnp ::skip 2683 // jnp ::skip
2816 emit_opcode( cbuf, 0x7B ); 2684 emit_opcode( cbuf, 0x7B );
2817 emit_opcode( cbuf, 0x05 ); 2685 emit_opcode( cbuf, 0x05 );
2818 %} 2686 %}
2819 2687
2820 enc_class emitModD() %{ 2688 enc_class emitModDPR() %{
2821 // fprem must be iterative 2689 // fprem must be iterative
2822 // :: loop 2690 // :: loop
2823 // fprem 2691 // fprem
2824 emit_opcode( cbuf, 0xD9 ); 2692 emit_opcode( cbuf, 0xD9 );
2825 emit_opcode( cbuf, 0xF8 ); 2693 emit_opcode( cbuf, 0xF8 );
2920 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2788 emit_opcode( cbuf, 0xB8 + $dst$$reg);
2921 emit_d32( cbuf, 1 ); 2789 emit_d32( cbuf, 1 );
2922 %} 2790 %}
2923 2791
2924 2792
2925 // XMM version of CmpF_Result. Because the XMM compare
2926 // instructions set the EFLAGS directly. It becomes simpler than
2927 // the float version above.
2928 enc_class CmpX_Result(eRegI dst) %{
2929 MacroAssembler _masm(&cbuf);
2930 Label nan, inc, done;
2931
2932 __ jccb(Assembler::parity, nan);
2933 __ jccb(Assembler::equal, done);
2934 __ jccb(Assembler::above, inc);
2935 __ bind(nan);
2936 __ decrement(as_Register($dst$$reg)); // NO L qqq
2937 __ jmpb(done);
2938 __ bind(inc);
2939 __ increment(as_Register($dst$$reg)); // NO L qqq
2940 __ bind(done);
2941 %}
2942
2943 // Compare the longs and set flags 2793 // Compare the longs and set flags
2944 // BROKEN! Do Not use as-is 2794 // BROKEN! Do Not use as-is
2945 enc_class cmpl_test( eRegL src1, eRegL src2 ) %{ 2795 enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
2946 // CMP $src1.hi,$src2.hi 2796 // CMP $src1.hi,$src2.hi
2947 emit_opcode( cbuf, 0x3B ); 2797 emit_opcode( cbuf, 0x3B );
3158 emit_opcode(cbuf,0xF7); // NEG lo 3008 emit_opcode(cbuf,0xF7); // NEG lo
3159 emit_rm (cbuf,0x3, 0x3, $dst$$reg ); 3009 emit_rm (cbuf,0x3, 0x3, $dst$$reg );
3160 emit_opcode(cbuf,0x83); // SBB hi,0 3010 emit_opcode(cbuf,0x83); // SBB hi,0
3161 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 3011 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
3162 emit_d8 (cbuf,0 ); 3012 emit_d8 (cbuf,0 );
3163 %}
3164
3165 enc_class movq_ld(regXD dst, memory mem) %{
3166 MacroAssembler _masm(&cbuf);
3167 __ movq($dst$$XMMRegister, $mem$$Address);
3168 %}
3169
3170 enc_class movq_st(memory mem, regXD src) %{
3171 MacroAssembler _masm(&cbuf);
3172 __ movq($mem$$Address, $src$$XMMRegister);
3173 %}
3174
3175 enc_class pshufd_8x8(regX dst, regX src) %{
3176 MacroAssembler _masm(&cbuf);
3177
3178 encode_CopyXD(cbuf, $dst$$reg, $src$$reg);
3179 __ punpcklbw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg));
3180 __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg), 0x00);
3181 %}
3182
3183 enc_class pshufd_4x16(regX dst, regX src) %{
3184 MacroAssembler _masm(&cbuf);
3185
3186 __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), 0x00);
3187 %}
3188
3189 enc_class pshufd(regXD dst, regXD src, int mode) %{
3190 MacroAssembler _masm(&cbuf);
3191
3192 __ pshufd(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), $mode);
3193 %}
3194
3195 enc_class pxor(regXD dst, regXD src) %{
3196 MacroAssembler _masm(&cbuf);
3197
3198 __ pxor(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg));
3199 %}
3200
3201 enc_class mov_i2x(regXD dst, eRegI src) %{
3202 MacroAssembler _masm(&cbuf);
3203
3204 __ movdl(as_XMMRegister($dst$$reg), as_Register($src$$reg));
3205 %} 3013 %}
3206 3014
3207 3015
3208 // Because the transitions from emitted code to the runtime 3016 // Because the transitions from emitted code to the runtime
3209 // monitorenter/exit helper stubs are so slow it's critical that 3017 // monitorenter/exit helper stubs are so slow it's critical that
3755 // Convert a double to an int. Java semantics require we do complex 3563 // Convert a double to an int. Java semantics require we do complex
3756 // manglelations in the corner cases. So we set the rounding mode to 3564 // manglelations in the corner cases. So we set the rounding mode to
3757 // 'zero', store the darned double down as an int, and reset the 3565 // 'zero', store the darned double down as an int, and reset the
3758 // rounding mode to 'nearest'. The hardware throws an exception which 3566 // rounding mode to 'nearest'. The hardware throws an exception which
3759 // patches up the correct value directly to the stack. 3567 // patches up the correct value directly to the stack.
3760 enc_class D2I_encoding( regD src ) %{ 3568 enc_class DPR2I_encoding( regDPR src ) %{
3761 // Flip to round-to-zero mode. We attempted to allow invalid-op 3569 // Flip to round-to-zero mode. We attempted to allow invalid-op
3762 // exceptions here, so that a NAN or other corner-case value will 3570 // exceptions here, so that a NAN or other corner-case value will
3763 // thrown an exception (but normal values get converted at full speed). 3571 // thrown an exception (but normal values get converted at full speed).
3764 // However, I2C adapters and other float-stack manglers leave pending 3572 // However, I2C adapters and other float-stack manglers leave pending
3765 // invalid-op exceptions hanging. We would have to clear them before 3573 // invalid-op exceptions hanging. We would have to clear them before
3798 emit_opcode(cbuf,0xE8); // Call into runtime 3606 emit_opcode(cbuf,0xE8); // Call into runtime
3799 emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 3607 emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3800 // Carry on here... 3608 // Carry on here...
3801 %} 3609 %}
3802 3610
3803 enc_class D2L_encoding( regD src ) %{ 3611 enc_class DPR2L_encoding( regDPR src ) %{
3804 emit_opcode(cbuf,0xD9); // FLDCW trunc 3612 emit_opcode(cbuf,0xD9); // FLDCW trunc
3805 emit_opcode(cbuf,0x2D); 3613 emit_opcode(cbuf,0x2D);
3806 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 3614 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3807 // Allocate a word 3615 // Allocate a word
3808 emit_opcode(cbuf,0x83); // SUB ESP,8 3616 emit_opcode(cbuf,0x83); // SUB ESP,8
3840 emit_opcode(cbuf,0xE8); // Call into runtime 3648 emit_opcode(cbuf,0xE8); // Call into runtime
3841 emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 3649 emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3842 // Carry on here... 3650 // Carry on here...
3843 %} 3651 %}
3844 3652
3845 enc_class X2L_encoding( regX src ) %{ 3653 enc_class FMul_ST_reg( eRegFPR src1 ) %{
3846 // Allocate a word
3847 emit_opcode(cbuf,0x83); // SUB ESP,8
3848 emit_opcode(cbuf,0xEC);
3849 emit_d8(cbuf,0x08);
3850
3851 emit_opcode (cbuf, 0xF3 ); // MOVSS [ESP], src
3852 emit_opcode (cbuf, 0x0F );
3853 emit_opcode (cbuf, 0x11 );
3854 encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
3855
3856 emit_opcode(cbuf,0xD9 ); // FLD_S [ESP]
3857 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
3858
3859 emit_opcode(cbuf,0xD9); // FLDCW trunc
3860 emit_opcode(cbuf,0x2D);
3861 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3862
3863 // Encoding assumes a double has been pushed into FPR0.
3864 // Store down the double as a long, popping the FPU stack
3865 emit_opcode(cbuf,0xDF); // FISTP [ESP]
3866 emit_opcode(cbuf,0x3C);
3867 emit_d8(cbuf,0x24);
3868
3869 // Restore the rounding mode; mask the exception
3870 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode
3871 emit_opcode(cbuf,0x2D);
3872 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3873 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3874 : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3875
3876 // Load the converted int; adjust CPU stack
3877 emit_opcode(cbuf,0x58); // POP EAX
3878
3879 emit_opcode(cbuf,0x5A); // POP EDX
3880
3881 emit_opcode(cbuf,0x81); // CMP EDX,imm
3882 emit_d8 (cbuf,0xFA); // rdx
3883 emit_d32 (cbuf,0x80000000);// 0x80000000
3884
3885 emit_opcode(cbuf,0x75); // JNE around_slow_call
3886 emit_d8 (cbuf,0x13+4); // Size of slow_call
3887
3888 emit_opcode(cbuf,0x85); // TEST EAX,EAX
3889 emit_opcode(cbuf,0xC0); // 2/rax,/rax,
3890
3891 emit_opcode(cbuf,0x75); // JNE around_slow_call
3892 emit_d8 (cbuf,0x13); // Size of slow_call
3893
3894 // Allocate a word
3895 emit_opcode(cbuf,0x83); // SUB ESP,4
3896 emit_opcode(cbuf,0xEC);
3897 emit_d8(cbuf,0x04);
3898
3899 emit_opcode (cbuf, 0xF3 ); // MOVSS [ESP], src
3900 emit_opcode (cbuf, 0x0F );
3901 emit_opcode (cbuf, 0x11 );
3902 encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
3903
3904 emit_opcode(cbuf,0xD9 ); // FLD_S [ESP]
3905 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
3906
3907 emit_opcode(cbuf,0x83); // ADD ESP,4
3908 emit_opcode(cbuf,0xC4);
3909 emit_d8(cbuf,0x04);
3910
3911 // CALL directly to the runtime
3912 cbuf.set_insts_mark();
3913 emit_opcode(cbuf,0xE8); // Call into runtime
3914 emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3915 // Carry on here...
3916 %}
3917
3918 enc_class XD2L_encoding( regXD src ) %{
3919 // Allocate a word
3920 emit_opcode(cbuf,0x83); // SUB ESP,8
3921 emit_opcode(cbuf,0xEC);
3922 emit_d8(cbuf,0x08);
3923
3924 emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src
3925 emit_opcode (cbuf, 0x0F );
3926 emit_opcode (cbuf, 0x11 );
3927 encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
3928
3929 emit_opcode(cbuf,0xDD ); // FLD_D [ESP]
3930 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
3931
3932 emit_opcode(cbuf,0xD9); // FLDCW trunc
3933 emit_opcode(cbuf,0x2D);
3934 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3935
3936 // Encoding assumes a double has been pushed into FPR0.
3937 // Store down the double as a long, popping the FPU stack
3938 emit_opcode(cbuf,0xDF); // FISTP [ESP]
3939 emit_opcode(cbuf,0x3C);
3940 emit_d8(cbuf,0x24);
3941
3942 // Restore the rounding mode; mask the exception
3943 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode
3944 emit_opcode(cbuf,0x2D);
3945 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3946 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3947 : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3948
3949 // Load the converted int; adjust CPU stack
3950 emit_opcode(cbuf,0x58); // POP EAX
3951
3952 emit_opcode(cbuf,0x5A); // POP EDX
3953
3954 emit_opcode(cbuf,0x81); // CMP EDX,imm
3955 emit_d8 (cbuf,0xFA); // rdx
3956 emit_d32 (cbuf,0x80000000); // 0x80000000
3957
3958 emit_opcode(cbuf,0x75); // JNE around_slow_call
3959 emit_d8 (cbuf,0x13+4); // Size of slow_call
3960
3961 emit_opcode(cbuf,0x85); // TEST EAX,EAX
3962 emit_opcode(cbuf,0xC0); // 2/rax,/rax,
3963
3964 emit_opcode(cbuf,0x75); // JNE around_slow_call
3965 emit_d8 (cbuf,0x13); // Size of slow_call
3966
3967 // Push src onto stack slow-path
3968 // Allocate a word
3969 emit_opcode(cbuf,0x83); // SUB ESP,8
3970 emit_opcode(cbuf,0xEC);
3971 emit_d8(cbuf,0x08);
3972
3973 emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src
3974 emit_opcode (cbuf, 0x0F );
3975 emit_opcode (cbuf, 0x11 );
3976 encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
3977
3978 emit_opcode(cbuf,0xDD ); // FLD_D [ESP]
3979 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
3980
3981 emit_opcode(cbuf,0x83); // ADD ESP,8
3982 emit_opcode(cbuf,0xC4);
3983 emit_d8(cbuf,0x08);
3984
3985 // CALL directly to the runtime
3986 cbuf.set_insts_mark();
3987 emit_opcode(cbuf,0xE8); // Call into runtime
3988 emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3989 // Carry on here...
3990 %}
3991
3992 enc_class D2X_encoding( regX dst, regD src ) %{
3993 // Allocate a word
3994 emit_opcode(cbuf,0x83); // SUB ESP,4
3995 emit_opcode(cbuf,0xEC);
3996 emit_d8(cbuf,0x04);
3997 int pop = 0x02;
3998 if ($src$$reg != FPR1L_enc) {
3999 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1)
4000 emit_d8( cbuf, 0xC0-1+$src$$reg );
4001 pop = 0x03;
4002 }
4003 store_to_stackslot( cbuf, 0xD9, pop, 0 ); // FST<P>_S [ESP]
4004
4005 emit_opcode (cbuf, 0xF3 ); // MOVSS dst(xmm), [ESP]
4006 emit_opcode (cbuf, 0x0F );
4007 emit_opcode (cbuf, 0x10 );
4008 encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false);
4009
4010 emit_opcode(cbuf,0x83); // ADD ESP,4
4011 emit_opcode(cbuf,0xC4);
4012 emit_d8(cbuf,0x04);
4013 // Carry on here...
4014 %}
4015
4016 enc_class FX2I_encoding( regX src, eRegI dst ) %{
4017 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
4018
4019 // Compare the result to see if we need to go to the slow path
4020 emit_opcode(cbuf,0x81); // CMP dst,imm
4021 emit_rm (cbuf,0x3,0x7,$dst$$reg);
4022 emit_d32 (cbuf,0x80000000); // 0x80000000
4023
4024 emit_opcode(cbuf,0x75); // JNE around_slow_call
4025 emit_d8 (cbuf,0x13); // Size of slow_call
4026 // Store xmm to a temp memory
4027 // location and push it onto stack.
4028
4029 emit_opcode(cbuf,0x83); // SUB ESP,4
4030 emit_opcode(cbuf,0xEC);
4031 emit_d8(cbuf, $primary ? 0x8 : 0x4);
4032
4033 emit_opcode (cbuf, $primary ? 0xF2 : 0xF3 ); // MOVSS [ESP], xmm
4034 emit_opcode (cbuf, 0x0F );
4035 emit_opcode (cbuf, 0x11 );
4036 encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
4037
4038 emit_opcode(cbuf, $primary ? 0xDD : 0xD9 ); // FLD [ESP]
4039 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
4040
4041 emit_opcode(cbuf,0x83); // ADD ESP,4
4042 emit_opcode(cbuf,0xC4);
4043 emit_d8(cbuf, $primary ? 0x8 : 0x4);
4044
4045 // CALL directly to the runtime
4046 cbuf.set_insts_mark();
4047 emit_opcode(cbuf,0xE8); // Call into runtime
4048 emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
4049
4050 // Carry on here...
4051 %}
4052
4053 enc_class X2D_encoding( regD dst, regX src ) %{
4054 // Allocate a word
4055 emit_opcode(cbuf,0x83); // SUB ESP,4
4056 emit_opcode(cbuf,0xEC);
4057 emit_d8(cbuf,0x04);
4058
4059 emit_opcode (cbuf, 0xF3 ); // MOVSS [ESP], xmm
4060 emit_opcode (cbuf, 0x0F );
4061 emit_opcode (cbuf, 0x11 );
4062 encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
4063
4064 emit_opcode(cbuf,0xD9 ); // FLD_S [ESP]
4065 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
4066
4067 emit_opcode(cbuf,0x83); // ADD ESP,4
4068 emit_opcode(cbuf,0xC4);
4069 emit_d8(cbuf,0x04);
4070
4071 // Carry on here...
4072 %}
4073
4074 enc_class AbsXF_encoding(regX dst) %{
4075 address signmask_address=(address)float_signmask_pool;
4076 // andpd:\tANDPS $dst,[signconst]
4077 emit_opcode(cbuf, 0x0F);
4078 emit_opcode(cbuf, 0x54);
4079 emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
4080 emit_d32(cbuf, (int)signmask_address);
4081 %}
4082
4083 enc_class AbsXD_encoding(regXD dst) %{
4084 address signmask_address=(address)double_signmask_pool;
4085 // andpd:\tANDPD $dst,[signconst]
4086 emit_opcode(cbuf, 0x66);
4087 emit_opcode(cbuf, 0x0F);
4088 emit_opcode(cbuf, 0x54);
4089 emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
4090 emit_d32(cbuf, (int)signmask_address);
4091 %}
4092
4093 enc_class NegXF_encoding(regX dst) %{
4094 address signmask_address=(address)float_signflip_pool;
4095 // andpd:\tXORPS $dst,[signconst]
4096 emit_opcode(cbuf, 0x0F);
4097 emit_opcode(cbuf, 0x57);
4098 emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
4099 emit_d32(cbuf, (int)signmask_address);
4100 %}
4101
4102 enc_class NegXD_encoding(regXD dst) %{
4103 address signmask_address=(address)double_signflip_pool;
4104 // andpd:\tXORPD $dst,[signconst]
4105 emit_opcode(cbuf, 0x66);
4106 emit_opcode(cbuf, 0x0F);
4107 emit_opcode(cbuf, 0x57);
4108 emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
4109 emit_d32(cbuf, (int)signmask_address);
4110 %}
4111
4112 enc_class FMul_ST_reg( eRegF src1 ) %{
4113 // Operand was loaded from memory into fp ST (stack top) 3654 // Operand was loaded from memory into fp ST (stack top)
4114 // FMUL ST,$src /* D8 C8+i */ 3655 // FMUL ST,$src /* D8 C8+i */
4115 emit_opcode(cbuf, 0xD8); 3656 emit_opcode(cbuf, 0xD8);
4116 emit_opcode(cbuf, 0xC8 + $src1$$reg); 3657 emit_opcode(cbuf, 0xC8 + $src1$$reg);
4117 %} 3658 %}
4118 3659
4119 enc_class FAdd_ST_reg( eRegF src2 ) %{ 3660 enc_class FAdd_ST_reg( eRegFPR src2 ) %{
4120 // FADDP ST,src2 /* D8 C0+i */ 3661 // FADDP ST,src2 /* D8 C0+i */
4121 emit_opcode(cbuf, 0xD8); 3662 emit_opcode(cbuf, 0xD8);
4122 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3663 emit_opcode(cbuf, 0xC0 + $src2$$reg);
4123 //could use FADDP src2,fpST /* DE C0+i */ 3664 //could use FADDP src2,fpST /* DE C0+i */
4124 %} 3665 %}
4125 3666
4126 enc_class FAddP_reg_ST( eRegF src2 ) %{ 3667 enc_class FAddP_reg_ST( eRegFPR src2 ) %{
4127 // FADDP src2,ST /* DE C0+i */ 3668 // FADDP src2,ST /* DE C0+i */
4128 emit_opcode(cbuf, 0xDE); 3669 emit_opcode(cbuf, 0xDE);
4129 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3670 emit_opcode(cbuf, 0xC0 + $src2$$reg);
4130 %} 3671 %}
4131 3672
4132 enc_class subF_divF_encode( eRegF src1, eRegF src2) %{ 3673 enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
4133 // Operand has been loaded into fp ST (stack top) 3674 // Operand has been loaded into fp ST (stack top)
4134 // FSUB ST,$src1 3675 // FSUB ST,$src1
4135 emit_opcode(cbuf, 0xD8); 3676 emit_opcode(cbuf, 0xD8);
4136 emit_opcode(cbuf, 0xE0 + $src1$$reg); 3677 emit_opcode(cbuf, 0xE0 + $src1$$reg);
4137 3678
4138 // FDIV 3679 // FDIV
4139 emit_opcode(cbuf, 0xD8); 3680 emit_opcode(cbuf, 0xD8);
4140 emit_opcode(cbuf, 0xF0 + $src2$$reg); 3681 emit_opcode(cbuf, 0xF0 + $src2$$reg);
4141 %} 3682 %}
4142 3683
4143 enc_class MulFAddF (eRegF src1, eRegF src2) %{ 3684 enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
4144 // Operand was loaded from memory into fp ST (stack top) 3685 // Operand was loaded from memory into fp ST (stack top)
4145 // FADD ST,$src /* D8 C0+i */ 3686 // FADD ST,$src /* D8 C0+i */
4146 emit_opcode(cbuf, 0xD8); 3687 emit_opcode(cbuf, 0xD8);
4147 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3688 emit_opcode(cbuf, 0xC0 + $src1$$reg);
4148 3689
4150 emit_opcode(cbuf, 0xD8); 3691 emit_opcode(cbuf, 0xD8);
4151 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3692 emit_opcode(cbuf, 0xC8 + $src2$$reg);
4152 %} 3693 %}
4153 3694
4154 3695
4155 enc_class MulFAddFreverse (eRegF src1, eRegF src2) %{ 3696 enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
4156 // Operand was loaded from memory into fp ST (stack top) 3697 // Operand was loaded from memory into fp ST (stack top)
4157 // FADD ST,$src /* D8 C0+i */ 3698 // FADD ST,$src /* D8 C0+i */
4158 emit_opcode(cbuf, 0xD8); 3699 emit_opcode(cbuf, 0xD8);
4159 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3700 emit_opcode(cbuf, 0xC0 + $src1$$reg);
4160 3701
4172 int scale = $mem$$scale; 3713 int scale = $mem$$scale;
4173 int displace = $mem$$disp; 3714 int displace = $mem$$disp;
4174 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals 3715 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4175 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop); 3716 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop);
4176 store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp ); 3717 store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
4177 %}
4178
4179 enc_class enc_loadLX_volatile( memory mem, stackSlotL dst, regXD tmp ) %{
4180 { // Atomic long load
4181 // UseXmmLoadAndClearUpper ? movsd $tmp,$mem : movlpd $tmp,$mem
4182 emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
4183 emit_opcode(cbuf,0x0F);
4184 emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0x10 : 0x12);
4185 int base = $mem$$base;
4186 int index = $mem$$index;
4187 int scale = $mem$$scale;
4188 int displace = $mem$$disp;
4189 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4190 encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4191 }
4192 { // MOVSD $dst,$tmp ! atomic long store
4193 emit_opcode(cbuf,0xF2);
4194 emit_opcode(cbuf,0x0F);
4195 emit_opcode(cbuf,0x11);
4196 int base = $dst$$base;
4197 int index = $dst$$index;
4198 int scale = $dst$$scale;
4199 int displace = $dst$$disp;
4200 bool disp_is_oop = $dst->disp_is_oop(); // disp-as-oop when working with static globals
4201 encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4202 }
4203 %}
4204
4205 enc_class enc_loadLX_reg_volatile( memory mem, eRegL dst, regXD tmp ) %{
4206 { // Atomic long load
4207 // UseXmmLoadAndClearUpper ? movsd $tmp,$mem : movlpd $tmp,$mem
4208 emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
4209 emit_opcode(cbuf,0x0F);
4210 emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0x10 : 0x12);
4211 int base = $mem$$base;
4212 int index = $mem$$index;
4213 int scale = $mem$$scale;
4214 int displace = $mem$$disp;
4215 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4216 encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4217 }
4218 { // MOVD $dst.lo,$tmp
4219 emit_opcode(cbuf,0x66);
4220 emit_opcode(cbuf,0x0F);
4221 emit_opcode(cbuf,0x7E);
4222 emit_rm(cbuf, 0x3, $tmp$$reg, $dst$$reg);
4223 }
4224 { // PSRLQ $tmp,32
4225 emit_opcode(cbuf,0x66);
4226 emit_opcode(cbuf,0x0F);
4227 emit_opcode(cbuf,0x73);
4228 emit_rm(cbuf, 0x3, 0x02, $tmp$$reg);
4229 emit_d8(cbuf, 0x20);
4230 }
4231 { // MOVD $dst.hi,$tmp
4232 emit_opcode(cbuf,0x66);
4233 emit_opcode(cbuf,0x0F);
4234 emit_opcode(cbuf,0x7E);
4235 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg));
4236 }
4237 %} 3718 %}
4238 3719
4239 // Volatile Store Long. Must be atomic, so move it into 3720 // Volatile Store Long. Must be atomic, so move it into
4240 // the FP TOS and then do a 64-bit FIST. Has to probe the 3721 // the FP TOS and then do a 64-bit FIST. Has to probe the
4241 // target address before the store (for null-ptr checks) 3722 // target address before the store (for null-ptr checks)
4249 int index = $mem$$index; 3730 int index = $mem$$index;
4250 int scale = $mem$$scale; 3731 int scale = $mem$$scale;
4251 int displace = $mem$$disp; 3732 int displace = $mem$$disp;
4252 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals 3733 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4253 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop); 3734 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop);
4254 %}
4255
4256 enc_class enc_storeLX_volatile( memory mem, stackSlotL src, regXD tmp) %{
4257 { // Atomic long load
4258 // UseXmmLoadAndClearUpper ? movsd $tmp,[$src] : movlpd $tmp,[$src]
4259 emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
4260 emit_opcode(cbuf,0x0F);
4261 emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0x10 : 0x12);
4262 int base = $src$$base;
4263 int index = $src$$index;
4264 int scale = $src$$scale;
4265 int displace = $src$$disp;
4266 bool disp_is_oop = $src->disp_is_oop(); // disp-as-oop when working with static globals
4267 encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4268 }
4269 cbuf.set_insts_mark(); // Mark start of MOVSD in case $mem has an oop
4270 { // MOVSD $mem,$tmp ! atomic long store
4271 emit_opcode(cbuf,0xF2);
4272 emit_opcode(cbuf,0x0F);
4273 emit_opcode(cbuf,0x11);
4274 int base = $mem$$base;
4275 int index = $mem$$index;
4276 int scale = $mem$$scale;
4277 int displace = $mem$$disp;
4278 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4279 encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4280 }
4281 %}
4282
4283 enc_class enc_storeLX_reg_volatile( memory mem, eRegL src, regXD tmp, regXD tmp2) %{
4284 { // MOVD $tmp,$src.lo
4285 emit_opcode(cbuf,0x66);
4286 emit_opcode(cbuf,0x0F);
4287 emit_opcode(cbuf,0x6E);
4288 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
4289 }
4290 { // MOVD $tmp2,$src.hi
4291 emit_opcode(cbuf,0x66);
4292 emit_opcode(cbuf,0x0F);
4293 emit_opcode(cbuf,0x6E);
4294 emit_rm(cbuf, 0x3, $tmp2$$reg, HIGH_FROM_LOW($src$$reg));
4295 }
4296 { // PUNPCKLDQ $tmp,$tmp2
4297 emit_opcode(cbuf,0x66);
4298 emit_opcode(cbuf,0x0F);
4299 emit_opcode(cbuf,0x62);
4300 emit_rm(cbuf, 0x3, $tmp$$reg, $tmp2$$reg);
4301 }
4302 cbuf.set_insts_mark(); // Mark start of MOVSD in case $mem has an oop
4303 { // MOVSD $mem,$tmp ! atomic long store
4304 emit_opcode(cbuf,0xF2);
4305 emit_opcode(cbuf,0x0F);
4306 emit_opcode(cbuf,0x11);
4307 int base = $mem$$base;
4308 int index = $mem$$index;
4309 int scale = $mem$$scale;
4310 int displace = $mem$$disp;
4311 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4312 encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4313 }
4314 %} 3735 %}
4315 3736
4316 // Safepoint Poll. This polls the safepoint page, and causes an 3737 // Safepoint Poll. This polls the safepoint page, and causes an
4317 // exception if it is not readable. Unfortunately, it kills the condition code 3738 // exception if it is not readable. Unfortunately, it kills the condition code
4318 // in the process 3739 // in the process
4703 format %{ %} 4124 format %{ %}
4704 interface(CONST_INTER); 4125 interface(CONST_INTER);
4705 %} 4126 %}
4706 4127
4707 //Double Immediate zero 4128 //Double Immediate zero
4708 operand immD0() %{ 4129 operand immDPR0() %{
4709 // Do additional (and counter-intuitive) test against NaN to work around VC++ 4130 // Do additional (and counter-intuitive) test against NaN to work around VC++
4710 // bug that generates code such that NaNs compare equal to 0.0 4131 // bug that generates code such that NaNs compare equal to 0.0
4711 predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) ); 4132 predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
4712 match(ConD); 4133 match(ConD);
4713 4134
4715 format %{ %} 4136 format %{ %}
4716 interface(CONST_INTER); 4137 interface(CONST_INTER);
4717 %} 4138 %}
4718 4139
4719 // Double Immediate one 4140 // Double Immediate one
4720 operand immD1() %{ 4141 operand immDPR1() %{
4721 predicate( UseSSE<=1 && n->getd() == 1.0 ); 4142 predicate( UseSSE<=1 && n->getd() == 1.0 );
4722 match(ConD); 4143 match(ConD);
4723 4144
4724 op_cost(5); 4145 op_cost(5);
4725 format %{ %} 4146 format %{ %}
4726 interface(CONST_INTER); 4147 interface(CONST_INTER);
4727 %} 4148 %}
4728 4149
4729 // Double Immediate 4150 // Double Immediate
4730 operand immD() %{ 4151 operand immDPR() %{
4731 predicate(UseSSE<=1); 4152 predicate(UseSSE<=1);
4732 match(ConD); 4153 match(ConD);
4733 4154
4734 op_cost(5); 4155 op_cost(5);
4735 format %{ %} 4156 format %{ %}
4736 interface(CONST_INTER); 4157 interface(CONST_INTER);
4737 %} 4158 %}
4738 4159
4739 operand immXD() %{ 4160 operand immD() %{
4740 predicate(UseSSE>=2); 4161 predicate(UseSSE>=2);
4741 match(ConD); 4162 match(ConD);
4742 4163
4743 op_cost(5); 4164 op_cost(5);
4744 format %{ %} 4165 format %{ %}
4745 interface(CONST_INTER); 4166 interface(CONST_INTER);
4746 %} 4167 %}
4747 4168
4748 // Double Immediate zero 4169 // Double Immediate zero
4749 operand immXD0() %{ 4170 operand immD0() %{
4750 // Do additional (and counter-intuitive) test against NaN to work around VC++ 4171 // Do additional (and counter-intuitive) test against NaN to work around VC++
4751 // bug that generates code such that NaNs compare equal to 0.0 AND do not 4172 // bug that generates code such that NaNs compare equal to 0.0 AND do not
4752 // compare equal to -0.0. 4173 // compare equal to -0.0.
4753 predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 ); 4174 predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
4754 match(ConD); 4175 match(ConD);
4756 format %{ %} 4177 format %{ %}
4757 interface(CONST_INTER); 4178 interface(CONST_INTER);
4758 %} 4179 %}
4759 4180
4760 // Float Immediate zero 4181 // Float Immediate zero
4761 operand immF0() %{ 4182 operand immFPR0() %{
4762 predicate(UseSSE == 0 && n->getf() == 0.0F); 4183 predicate(UseSSE == 0 && n->getf() == 0.0F);
4763 match(ConF); 4184 match(ConF);
4764 4185
4765 op_cost(5); 4186 op_cost(5);
4766 format %{ %} 4187 format %{ %}
4767 interface(CONST_INTER); 4188 interface(CONST_INTER);
4768 %} 4189 %}
4769 4190
4770 // Float Immediate one 4191 // Float Immediate one
4771 operand immF1() %{ 4192 operand immFPR1() %{
4772 predicate(UseSSE == 0 && n->getf() == 1.0F); 4193 predicate(UseSSE == 0 && n->getf() == 1.0F);
4773 match(ConF); 4194 match(ConF);
4774 4195
4775 op_cost(5); 4196 op_cost(5);
4776 format %{ %} 4197 format %{ %}
4777 interface(CONST_INTER); 4198 interface(CONST_INTER);
4778 %} 4199 %}
4779 4200
4780 // Float Immediate 4201 // Float Immediate
4781 operand immF() %{ 4202 operand immFPR() %{
4782 predicate( UseSSE == 0 ); 4203 predicate( UseSSE == 0 );
4783 match(ConF); 4204 match(ConF);
4784 4205
4785 op_cost(5); 4206 op_cost(5);
4786 format %{ %} 4207 format %{ %}
4787 interface(CONST_INTER); 4208 interface(CONST_INTER);
4788 %} 4209 %}
4789 4210
4790 // Float Immediate 4211 // Float Immediate
4791 operand immXF() %{ 4212 operand immF() %{
4792 predicate(UseSSE >= 1); 4213 predicate(UseSSE >= 1);
4793 match(ConF); 4214 match(ConF);
4794 4215
4795 op_cost(5); 4216 op_cost(5);
4796 format %{ %} 4217 format %{ %}
4797 interface(CONST_INTER); 4218 interface(CONST_INTER);
4798 %} 4219 %}
4799 4220
4800 // Float Immediate zero. Zero and not -0.0 4221 // Float Immediate zero. Zero and not -0.0
4801 operand immXF0() %{ 4222 operand immF0() %{
4802 predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 ); 4223 predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
4803 match(ConF); 4224 match(ConF);
4804 4225
4805 op_cost(5); 4226 op_cost(5);
4806 format %{ %} 4227 format %{ %}
5172 format %{ "FLAGS_LEGT" %} 4593 format %{ "FLAGS_LEGT" %}
5173 interface(REG_INTER); 4594 interface(REG_INTER);
5174 %} 4595 %}
5175 4596
5176 // Float register operands 4597 // Float register operands
5177 operand regD() %{ 4598 operand regDPR() %{
5178 predicate( UseSSE < 2 ); 4599 predicate( UseSSE < 2 );
5179 constraint(ALLOC_IN_RC(dbl_reg)); 4600 constraint(ALLOC_IN_RC(dbl_reg));
5180 match(RegD); 4601 match(RegD);
5181 match(regDPR1); 4602 match(regDPR1);
5182 match(regDPR2); 4603 match(regDPR2);
5183 format %{ %} 4604 format %{ %}
5184 interface(REG_INTER); 4605 interface(REG_INTER);
5185 %} 4606 %}
5186 4607
5187 operand regDPR1(regD reg) %{ 4608 operand regDPR1(regDPR reg) %{
5188 predicate( UseSSE < 2 ); 4609 predicate( UseSSE < 2 );
5189 constraint(ALLOC_IN_RC(dbl_reg0)); 4610 constraint(ALLOC_IN_RC(dbl_reg0));
5190 match(reg); 4611 match(reg);
5191 format %{ "FPR1" %} 4612 format %{ "FPR1" %}
5192 interface(REG_INTER); 4613 interface(REG_INTER);
5193 %} 4614 %}
5194 4615
5195 operand regDPR2(regD reg) %{ 4616 operand regDPR2(regDPR reg) %{
5196 predicate( UseSSE < 2 ); 4617 predicate( UseSSE < 2 );
5197 constraint(ALLOC_IN_RC(dbl_reg1)); 4618 constraint(ALLOC_IN_RC(dbl_reg1));
5198 match(reg); 4619 match(reg);
5199 format %{ "FPR2" %} 4620 format %{ "FPR2" %}
5200 interface(REG_INTER); 4621 interface(REG_INTER);
5201 %} 4622 %}
5202 4623
5203 operand regnotDPR1(regD reg) %{ 4624 operand regnotDPR1(regDPR reg) %{
5204 predicate( UseSSE < 2 ); 4625 predicate( UseSSE < 2 );
5205 constraint(ALLOC_IN_RC(dbl_notreg0)); 4626 constraint(ALLOC_IN_RC(dbl_notreg0));
5206 match(reg); 4627 match(reg);
5207 format %{ %} 4628 format %{ %}
5208 interface(REG_INTER); 4629 interface(REG_INTER);
5209 %} 4630 %}
5210 4631
5211 // XMM Double register operands 4632 // XMM Double register operands
5212 operand regXD() %{ 4633 operand regD() %{
5213 predicate( UseSSE>=2 ); 4634 predicate( UseSSE>=2 );
5214 constraint(ALLOC_IN_RC(xdb_reg)); 4635 constraint(ALLOC_IN_RC(xdb_reg));
5215 match(RegD); 4636 match(RegD);
5216 match(regXD6); 4637 match(regD6);
5217 match(regXD7); 4638 match(regD7);
5218 format %{ %} 4639 format %{ %}
5219 interface(REG_INTER); 4640 interface(REG_INTER);
5220 %} 4641 %}
5221 4642
5222 // XMM6 double register operands 4643 // XMM6 double register operands
5223 operand regXD6(regXD reg) %{ 4644 operand regD6(regD reg) %{
5224 predicate( UseSSE>=2 ); 4645 predicate( UseSSE>=2 );
5225 constraint(ALLOC_IN_RC(xdb_reg6)); 4646 constraint(ALLOC_IN_RC(xdb_reg6));
5226 match(reg); 4647 match(reg);
5227 format %{ "XMM6" %} 4648 format %{ "XMM6" %}
5228 interface(REG_INTER); 4649 interface(REG_INTER);
5229 %} 4650 %}
5230 4651
5231 // XMM7 double register operands 4652 // XMM7 double register operands
5232 operand regXD7(regXD reg) %{ 4653 operand regD7(regD reg) %{
5233 predicate( UseSSE>=2 ); 4654 predicate( UseSSE>=2 );
5234 constraint(ALLOC_IN_RC(xdb_reg7)); 4655 constraint(ALLOC_IN_RC(xdb_reg7));
5235 match(reg); 4656 match(reg);
5236 format %{ "XMM7" %} 4657 format %{ "XMM7" %}
5237 interface(REG_INTER); 4658 interface(REG_INTER);
5238 %} 4659 %}
5239 4660
5240 // Float register operands 4661 // Float register operands
5241 operand regF() %{ 4662 operand regFPR() %{
5242 predicate( UseSSE < 2 ); 4663 predicate( UseSSE < 2 );
5243 constraint(ALLOC_IN_RC(flt_reg)); 4664 constraint(ALLOC_IN_RC(flt_reg));
5244 match(RegF); 4665 match(RegF);
5245 match(regFPR1); 4666 match(regFPR1);
5246 format %{ %} 4667 format %{ %}
5247 interface(REG_INTER); 4668 interface(REG_INTER);
5248 %} 4669 %}
5249 4670
5250 // Float register operands 4671 // Float register operands
5251 operand regFPR1(regF reg) %{ 4672 operand regFPR1(regFPR reg) %{
5252 predicate( UseSSE < 2 ); 4673 predicate( UseSSE < 2 );
5253 constraint(ALLOC_IN_RC(flt_reg0)); 4674 constraint(ALLOC_IN_RC(flt_reg0));
5254 match(reg); 4675 match(reg);
5255 format %{ "FPR1" %} 4676 format %{ "FPR1" %}
5256 interface(REG_INTER); 4677 interface(REG_INTER);
5257 %} 4678 %}
5258 4679
5259 // XMM register operands 4680 // XMM register operands
5260 operand regX() %{ 4681 operand regF() %{
5261 predicate( UseSSE>=1 ); 4682 predicate( UseSSE>=1 );
5262 constraint(ALLOC_IN_RC(xmm_reg)); 4683 constraint(ALLOC_IN_RC(xmm_reg));
5263 match(RegF); 4684 match(RegF);
5264 format %{ %} 4685 format %{ %}
5265 interface(REG_INTER); 4686 interface(REG_INTER);
5999 cr : S3(read); 5420 cr : S3(read);
6000 DECODE : S0(2); // any 2 decoders 5421 DECODE : S0(2); // any 2 decoders
6001 %} 5422 %}
6002 5423
6003 // Conditional move double reg-reg 5424 // Conditional move double reg-reg
6004 pipe_class pipe_cmovD_reg( eFlagsReg cr, regDPR1 dst, regD src) %{ 5425 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
6005 single_instruction; 5426 single_instruction;
6006 dst : S4(write); 5427 dst : S4(write);
6007 src : S3(read); 5428 src : S3(read);
6008 cr : S3(read); 5429 cr : S3(read);
6009 DECODE : S0; // any decoder 5430 DECODE : S0; // any decoder
6010 %} 5431 %}
6011 5432
6012 // Float reg-reg operation 5433 // Float reg-reg operation
6013 pipe_class fpu_reg(regD dst) %{ 5434 pipe_class fpu_reg(regDPR dst) %{
6014 instruction_count(2); 5435 instruction_count(2);
6015 dst : S3(read); 5436 dst : S3(read);
6016 DECODE : S0(2); // any 2 decoders 5437 DECODE : S0(2); // any 2 decoders
6017 FPU : S3; 5438 FPU : S3;
6018 %} 5439 %}
6019 5440
6020 // Float reg-reg operation 5441 // Float reg-reg operation
6021 pipe_class fpu_reg_reg(regD dst, regD src) %{ 5442 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
6022 instruction_count(2); 5443 instruction_count(2);
6023 dst : S4(write); 5444 dst : S4(write);
6024 src : S3(read); 5445 src : S3(read);
6025 DECODE : S0(2); // any 2 decoders 5446 DECODE : S0(2); // any 2 decoders
6026 FPU : S3; 5447 FPU : S3;
6027 %} 5448 %}
6028 5449
6029 // Float reg-reg operation 5450 // Float reg-reg operation
6030 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2) %{ 5451 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
6031 instruction_count(3); 5452 instruction_count(3);
6032 dst : S4(write); 5453 dst : S4(write);
6033 src1 : S3(read); 5454 src1 : S3(read);
6034 src2 : S3(read); 5455 src2 : S3(read);
6035 DECODE : S0(3); // any 3 decoders 5456 DECODE : S0(3); // any 3 decoders
6036 FPU : S3(2); 5457 FPU : S3(2);
6037 %} 5458 %}
6038 5459
6039 // Float reg-reg operation 5460 // Float reg-reg operation
6040 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3) %{ 5461 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
6041 instruction_count(4); 5462 instruction_count(4);
6042 dst : S4(write); 5463 dst : S4(write);
6043 src1 : S3(read); 5464 src1 : S3(read);
6044 src2 : S3(read); 5465 src2 : S3(read);
6045 src3 : S3(read); 5466 src3 : S3(read);
6046 DECODE : S0(4); // any 3 decoders 5467 DECODE : S0(4); // any 3 decoders
6047 FPU : S3(2); 5468 FPU : S3(2);
6048 %} 5469 %}
6049 5470
6050 // Float reg-reg operation 5471 // Float reg-reg operation
6051 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3) %{ 5472 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
6052 instruction_count(4); 5473 instruction_count(4);
6053 dst : S4(write); 5474 dst : S4(write);
6054 src1 : S3(read); 5475 src1 : S3(read);
6055 src2 : S3(read); 5476 src2 : S3(read);
6056 src3 : S3(read); 5477 src3 : S3(read);
6059 FPU : S3(2); 5480 FPU : S3(2);
6060 MEM : S3; 5481 MEM : S3;
6061 %} 5482 %}
6062 5483
6063 // Float reg-mem operation 5484 // Float reg-mem operation
6064 pipe_class fpu_reg_mem(regD dst, memory mem) %{ 5485 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
6065 instruction_count(2); 5486 instruction_count(2);
6066 dst : S5(write); 5487 dst : S5(write);
6067 mem : S3(read); 5488 mem : S3(read);
6068 D0 : S0; // big decoder only 5489 D0 : S0; // big decoder only
6069 DECODE : S1; // any decoder for FPU POP 5490 DECODE : S1; // any decoder for FPU POP
6070 FPU : S4; 5491 FPU : S4;
6071 MEM : S3; // any mem 5492 MEM : S3; // any mem
6072 %} 5493 %}
6073 5494
6074 // Float reg-mem operation 5495 // Float reg-mem operation
6075 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem) %{ 5496 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
6076 instruction_count(3); 5497 instruction_count(3);
6077 dst : S5(write); 5498 dst : S5(write);
6078 src1 : S3(read); 5499 src1 : S3(read);
6079 mem : S3(read); 5500 mem : S3(read);
6080 D0 : S0; // big decoder only 5501 D0 : S0; // big decoder only
6082 FPU : S4; 5503 FPU : S4;
6083 MEM : S3; // any mem 5504 MEM : S3; // any mem
6084 %} 5505 %}
6085 5506
6086 // Float mem-reg operation 5507 // Float mem-reg operation
6087 pipe_class fpu_mem_reg(memory mem, regD src) %{ 5508 pipe_class fpu_mem_reg(memory mem, regDPR src) %{
6088 instruction_count(2); 5509 instruction_count(2);
6089 src : S5(read); 5510 src : S5(read);
6090 mem : S3(read); 5511 mem : S3(read);
6091 DECODE : S0; // any decoder for FPU PUSH 5512 DECODE : S0; // any decoder for FPU PUSH
6092 D0 : S1; // big decoder only 5513 D0 : S1; // big decoder only
6093 FPU : S4; 5514 FPU : S4;
6094 MEM : S3; // any mem 5515 MEM : S3; // any mem
6095 %} 5516 %}
6096 5517
6097 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2) %{ 5518 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
6098 instruction_count(3); 5519 instruction_count(3);
6099 src1 : S3(read); 5520 src1 : S3(read);
6100 src2 : S3(read); 5521 src2 : S3(read);
6101 mem : S3(read); 5522 mem : S3(read);
6102 DECODE : S0(2); // any decoder for FPU PUSH 5523 DECODE : S0(2); // any decoder for FPU PUSH
6103 D0 : S1; // big decoder only 5524 D0 : S1; // big decoder only
6104 FPU : S4; 5525 FPU : S4;
6105 MEM : S3; // any mem 5526 MEM : S3; // any mem
6106 %} 5527 %}
6107 5528
6108 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2) %{ 5529 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
6109 instruction_count(3); 5530 instruction_count(3);
6110 src1 : S3(read); 5531 src1 : S3(read);
6111 src2 : S3(read); 5532 src2 : S3(read);
6112 mem : S4(read); 5533 mem : S4(read);
6113 DECODE : S0; // any decoder for FPU PUSH 5534 DECODE : S0; // any decoder for FPU PUSH
6132 D0 : S0(3); // big decoder only 5553 D0 : S0(3); // big decoder only
6133 FPU : S4; 5554 FPU : S4;
6134 MEM : S3(3); // any mem 5555 MEM : S3(3); // any mem
6135 %} 5556 %}
6136 5557
6137 pipe_class fpu_mem_reg_con(memory mem, regD src1) %{ 5558 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
6138 instruction_count(3); 5559 instruction_count(3);
6139 src1 : S4(read); 5560 src1 : S4(read);
6140 mem : S4(read); 5561 mem : S4(read);
6141 DECODE : S0; // any decoder for FPU PUSH 5562 DECODE : S0; // any decoder for FPU PUSH
6142 D0 : S0(2); // big decoder only 5563 D0 : S0(2); // big decoder only
6143 FPU : S4; 5564 FPU : S4;
6144 MEM : S3(2); // any mem 5565 MEM : S3(2); // any mem
6145 %} 5566 %}
6146 5567
6147 // Float load constant 5568 // Float load constant
6148 pipe_class fpu_reg_con(regD dst) %{ 5569 pipe_class fpu_reg_con(regDPR dst) %{
6149 instruction_count(2); 5570 instruction_count(2);
6150 dst : S5(write); 5571 dst : S5(write);
6151 D0 : S0; // big decoder only for the load 5572 D0 : S0; // big decoder only for the load
6152 DECODE : S1; // any decoder for FPU POP 5573 DECODE : S1; // any decoder for FPU POP
6153 FPU : S4; 5574 FPU : S4;
6154 MEM : S3; // any mem 5575 MEM : S3; // any mem
6155 %} 5576 %}
6156 5577
6157 // Float load constant 5578 // Float load constant
6158 pipe_class fpu_reg_reg_con(regD dst, regD src) %{ 5579 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
6159 instruction_count(3); 5580 instruction_count(3);
6160 dst : S5(write); 5581 dst : S5(write);
6161 src : S3(read); 5582 src : S3(read);
6162 D0 : S0; // big decoder only for the load 5583 D0 : S0; // big decoder only for the load
6163 DECODE : S1(2); // any decoder for FPU POP 5584 DECODE : S1(2); // any decoder for FPU POP
6868 "FISTp $dst" %} 6289 "FISTp $dst" %}
6869 ins_encode(enc_loadL_volatile(mem,dst)); 6290 ins_encode(enc_loadL_volatile(mem,dst));
6870 ins_pipe( fpu_reg_mem ); 6291 ins_pipe( fpu_reg_mem );
6871 %} 6292 %}
6872 6293
6873 instruct loadLX_volatile(stackSlotL dst, memory mem, regXD tmp) %{ 6294 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
6874 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 6295 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
6875 match(Set dst (LoadL mem)); 6296 match(Set dst (LoadL mem));
6876 effect(TEMP tmp); 6297 effect(TEMP tmp);
6877 ins_cost(180); 6298 ins_cost(180);
6878 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 6299 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t"
6879 "MOVSD $dst,$tmp" %} 6300 "MOVSD $dst,$tmp" %}
6880 ins_encode(enc_loadLX_volatile(mem, dst, tmp)); 6301 ins_encode %{
6302 __ movdbl($tmp$$XMMRegister, $mem$$Address);
6303 __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
6304 %}
6881 ins_pipe( pipe_slow ); 6305 ins_pipe( pipe_slow );
6882 %} 6306 %}
6883 6307
6884 instruct loadLX_reg_volatile(eRegL dst, memory mem, regXD tmp) %{ 6308 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
6885 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 6309 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
6886 match(Set dst (LoadL mem)); 6310 match(Set dst (LoadL mem));
6887 effect(TEMP tmp); 6311 effect(TEMP tmp);
6888 ins_cost(160); 6312 ins_cost(160);
6889 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 6313 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t"
6890 "MOVD $dst.lo,$tmp\n\t" 6314 "MOVD $dst.lo,$tmp\n\t"
6891 "PSRLQ $tmp,32\n\t" 6315 "PSRLQ $tmp,32\n\t"
6892 "MOVD $dst.hi,$tmp" %} 6316 "MOVD $dst.hi,$tmp" %}
6893 ins_encode(enc_loadLX_reg_volatile(mem, dst, tmp)); 6317 ins_encode %{
6318 __ movdbl($tmp$$XMMRegister, $mem$$Address);
6319 __ movdl($dst$$Register, $tmp$$XMMRegister);
6320 __ psrlq($tmp$$XMMRegister, 32);
6321 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
6322 %}
6894 ins_pipe( pipe_slow ); 6323 ins_pipe( pipe_slow );
6895 %} 6324 %}
6896 6325
6897 // Load Range 6326 // Load Range
6898 instruct loadRange(eRegI dst, memory mem) %{ 6327 instruct loadRange(eRegI dst, memory mem) %{
6927 ins_encode( OpcP, RegMem(dst,mem)); 6356 ins_encode( OpcP, RegMem(dst,mem));
6928 ins_pipe( ialu_reg_mem ); 6357 ins_pipe( ialu_reg_mem );
6929 %} 6358 %}
6930 6359
6931 // Load Double 6360 // Load Double
6932 instruct loadD(regD dst, memory mem) %{ 6361 instruct loadDPR(regDPR dst, memory mem) %{
6933 predicate(UseSSE<=1); 6362 predicate(UseSSE<=1);
6934 match(Set dst (LoadD mem)); 6363 match(Set dst (LoadD mem));
6935 6364
6936 ins_cost(150); 6365 ins_cost(150);
6937 format %{ "FLD_D ST,$mem\n\t" 6366 format %{ "FLD_D ST,$mem\n\t"
6938 "FSTP $dst" %} 6367 "FSTP $dst" %}
6939 opcode(0xDD); /* DD /0 */ 6368 opcode(0xDD); /* DD /0 */
6940 ins_encode( OpcP, RMopc_Mem(0x00,mem), 6369 ins_encode( OpcP, RMopc_Mem(0x00,mem),
6941 Pop_Reg_D(dst) ); 6370 Pop_Reg_DPR(dst) );
6942 ins_pipe( fpu_reg_mem ); 6371 ins_pipe( fpu_reg_mem );
6943 %} 6372 %}
6944 6373
6945 // Load Double to XMM 6374 // Load Double to XMM
6946 instruct loadXD(regXD dst, memory mem) %{ 6375 instruct loadD(regD dst, memory mem) %{
6947 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 6376 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
6948 match(Set dst (LoadD mem)); 6377 match(Set dst (LoadD mem));
6949 ins_cost(145); 6378 ins_cost(145);
6950 format %{ "MOVSD $dst,$mem" %} 6379 format %{ "MOVSD $dst,$mem" %}
6951 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x10), RegMem(dst,mem)); 6380 ins_encode %{
6381 __ movdbl ($dst$$XMMRegister, $mem$$Address);
6382 %}
6952 ins_pipe( pipe_slow ); 6383 ins_pipe( pipe_slow );
6953 %} 6384 %}
6954 6385
6955 instruct loadXD_partial(regXD dst, memory mem) %{ 6386 instruct loadD_partial(regD dst, memory mem) %{
6956 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 6387 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
6957 match(Set dst (LoadD mem)); 6388 match(Set dst (LoadD mem));
6958 ins_cost(145); 6389 ins_cost(145);
6959 format %{ "MOVLPD $dst,$mem" %} 6390 format %{ "MOVLPD $dst,$mem" %}
6960 ins_encode( Opcode(0x66), Opcode(0x0F), Opcode(0x12), RegMem(dst,mem)); 6391 ins_encode %{
6392 __ movdbl ($dst$$XMMRegister, $mem$$Address);
6393 %}
6961 ins_pipe( pipe_slow ); 6394 ins_pipe( pipe_slow );
6962 %} 6395 %}
6963 6396
6964 // Load to XMM register (single-precision floating point) 6397 // Load to XMM register (single-precision floating point)
6965 // MOVSS instruction 6398 // MOVSS instruction
6966 instruct loadX(regX dst, memory mem) %{ 6399 instruct loadF(regF dst, memory mem) %{
6967 predicate(UseSSE>=1); 6400 predicate(UseSSE>=1);
6968 match(Set dst (LoadF mem)); 6401 match(Set dst (LoadF mem));
6969 ins_cost(145); 6402 ins_cost(145);
6970 format %{ "MOVSS $dst,$mem" %} 6403 format %{ "MOVSS $dst,$mem" %}
6971 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x10), RegMem(dst,mem)); 6404 ins_encode %{
6405 __ movflt ($dst$$XMMRegister, $mem$$Address);
6406 %}
6972 ins_pipe( pipe_slow ); 6407 ins_pipe( pipe_slow );
6973 %} 6408 %}
6974 6409
6975 // Load Float 6410 // Load Float
6976 instruct loadF(regF dst, memory mem) %{ 6411 instruct loadFPR(regFPR dst, memory mem) %{
6977 predicate(UseSSE==0); 6412 predicate(UseSSE==0);
6978 match(Set dst (LoadF mem)); 6413 match(Set dst (LoadF mem));
6979 6414
6980 ins_cost(150); 6415 ins_cost(150);
6981 format %{ "FLD_S ST,$mem\n\t" 6416 format %{ "FLD_S ST,$mem\n\t"
6982 "FSTP $dst" %} 6417 "FSTP $dst" %}
6983 opcode(0xD9); /* D9 /0 */ 6418 opcode(0xD9); /* D9 /0 */
6984 ins_encode( OpcP, RMopc_Mem(0x00,mem), 6419 ins_encode( OpcP, RMopc_Mem(0x00,mem),
6985 Pop_Reg_F(dst) ); 6420 Pop_Reg_FPR(dst) );
6986 ins_pipe( fpu_reg_mem ); 6421 ins_pipe( fpu_reg_mem );
6987 %} 6422 %}
6988 6423
6989 // Load Aligned Packed Byte to XMM register 6424 // Load Aligned Packed Byte to XMM register
6990 instruct loadA8B(regXD dst, memory mem) %{ 6425 instruct loadA8B(regD dst, memory mem) %{
6991 predicate(UseSSE>=1); 6426 predicate(UseSSE>=1);
6992 match(Set dst (Load8B mem)); 6427 match(Set dst (Load8B mem));
6993 ins_cost(125); 6428 ins_cost(125);
6994 format %{ "MOVQ $dst,$mem\t! packed8B" %} 6429 format %{ "MOVQ $dst,$mem\t! packed8B" %}
6995 ins_encode( movq_ld(dst, mem)); 6430 ins_encode %{
6431 __ movq($dst$$XMMRegister, $mem$$Address);
6432 %}
6996 ins_pipe( pipe_slow ); 6433 ins_pipe( pipe_slow );
6997 %} 6434 %}
6998 6435
6999 // Load Aligned Packed Short to XMM register 6436 // Load Aligned Packed Short to XMM register
7000 instruct loadA4S(regXD dst, memory mem) %{ 6437 instruct loadA4S(regD dst, memory mem) %{
7001 predicate(UseSSE>=1); 6438 predicate(UseSSE>=1);
7002 match(Set dst (Load4S mem)); 6439 match(Set dst (Load4S mem));
7003 ins_cost(125); 6440 ins_cost(125);
7004 format %{ "MOVQ $dst,$mem\t! packed4S" %} 6441 format %{ "MOVQ $dst,$mem\t! packed4S" %}
7005 ins_encode( movq_ld(dst, mem)); 6442 ins_encode %{
6443 __ movq($dst$$XMMRegister, $mem$$Address);
6444 %}
7006 ins_pipe( pipe_slow ); 6445 ins_pipe( pipe_slow );
7007 %} 6446 %}
7008 6447
7009 // Load Aligned Packed Char to XMM register 6448 // Load Aligned Packed Char to XMM register
7010 instruct loadA4C(regXD dst, memory mem) %{ 6449 instruct loadA4C(regD dst, memory mem) %{
7011 predicate(UseSSE>=1); 6450 predicate(UseSSE>=1);
7012 match(Set dst (Load4C mem)); 6451 match(Set dst (Load4C mem));
7013 ins_cost(125); 6452 ins_cost(125);
7014 format %{ "MOVQ $dst,$mem\t! packed4C" %} 6453 format %{ "MOVQ $dst,$mem\t! packed4C" %}
7015 ins_encode( movq_ld(dst, mem)); 6454 ins_encode %{
6455 __ movq($dst$$XMMRegister, $mem$$Address);
6456 %}
7016 ins_pipe( pipe_slow ); 6457 ins_pipe( pipe_slow );
7017 %} 6458 %}
7018 6459
7019 // Load Aligned Packed Integer to XMM register 6460 // Load Aligned Packed Integer to XMM register
7020 instruct load2IU(regXD dst, memory mem) %{ 6461 instruct load2IU(regD dst, memory mem) %{
7021 predicate(UseSSE>=1); 6462 predicate(UseSSE>=1);
7022 match(Set dst (Load2I mem)); 6463 match(Set dst (Load2I mem));
7023 ins_cost(125); 6464 ins_cost(125);
7024 format %{ "MOVQ $dst,$mem\t! packed2I" %} 6465 format %{ "MOVQ $dst,$mem\t! packed2I" %}
7025 ins_encode( movq_ld(dst, mem)); 6466 ins_encode %{
6467 __ movq($dst$$XMMRegister, $mem$$Address);
6468 %}
7026 ins_pipe( pipe_slow ); 6469 ins_pipe( pipe_slow );
7027 %} 6470 %}
7028 6471
7029 // Load Aligned Packed Single to XMM 6472 // Load Aligned Packed Single to XMM
7030 instruct loadA2F(regXD dst, memory mem) %{ 6473 instruct loadA2F(regD dst, memory mem) %{
7031 predicate(UseSSE>=1); 6474 predicate(UseSSE>=1);
7032 match(Set dst (Load2F mem)); 6475 match(Set dst (Load2F mem));
7033 ins_cost(145); 6476 ins_cost(145);
7034 format %{ "MOVQ $dst,$mem\t! packed2F" %} 6477 format %{ "MOVQ $dst,$mem\t! packed2F" %}
7035 ins_encode( movq_ld(dst, mem)); 6478 ins_encode %{
6479 __ movq($dst$$XMMRegister, $mem$$Address);
6480 %}
7036 ins_pipe( pipe_slow ); 6481 ins_pipe( pipe_slow );
7037 %} 6482 %}
7038 6483
7039 // Load Effective Address 6484 // Load Effective Address
7040 instruct leaP8(eRegP dst, indOffset8 mem) %{ 6485 instruct leaP8(eRegP dst, indOffset8 mem) %{
7137 opcode(0x33,0x33); 6582 opcode(0x33,0x33);
7138 ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) ); 6583 ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
7139 ins_pipe( ialu_reg_long ); 6584 ins_pipe( ialu_reg_long );
7140 %} 6585 %}
7141 6586
6587 // The instruction usage is guarded by predicate in operand immFPR().
6588 instruct loadConFPR(regFPR dst, immFPR con) %{
6589 match(Set dst con);
6590 ins_cost(125);
6591 format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
6592 "FSTP $dst" %}
6593 ins_encode %{
6594 __ fld_s($constantaddress($con));
6595 __ fstp_d($dst$$reg);
6596 %}
6597 ins_pipe(fpu_reg_con);
6598 %}
6599
6600 // The instruction usage is guarded by predicate in operand immFPR0().
6601 instruct loadConFPR0(regFPR dst, immFPR0 con) %{
6602 match(Set dst con);
6603 ins_cost(125);
6604 format %{ "FLDZ ST\n\t"
6605 "FSTP $dst" %}
6606 ins_encode %{
6607 __ fldz();
6608 __ fstp_d($dst$$reg);
6609 %}
6610 ins_pipe(fpu_reg_con);
6611 %}
6612
6613 // The instruction usage is guarded by predicate in operand immFPR1().
6614 instruct loadConFPR1(regFPR dst, immFPR1 con) %{
6615 match(Set dst con);
6616 ins_cost(125);
6617 format %{ "FLD1 ST\n\t"
6618 "FSTP $dst" %}
6619 ins_encode %{
6620 __ fld1();
6621 __ fstp_d($dst$$reg);
6622 %}
6623 ins_pipe(fpu_reg_con);
6624 %}
6625
7142 // The instruction usage is guarded by predicate in operand immF(). 6626 // The instruction usage is guarded by predicate in operand immF().
7143 instruct loadConF(regF dst, immF con) %{ 6627 instruct loadConF(regF dst, immF con) %{
7144 match(Set dst con); 6628 match(Set dst con);
7145 ins_cost(125); 6629 ins_cost(125);
7146 format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t" 6630 format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %}
7147 "FSTP $dst" %} 6631 ins_encode %{
7148 ins_encode %{ 6632 __ movflt($dst$$XMMRegister, $constantaddress($con));
7149 __ fld_s($constantaddress($con)); 6633 %}
7150 __ fstp_d($dst$$reg); 6634 ins_pipe(pipe_slow);
7151 %}
7152 ins_pipe(fpu_reg_con);
7153 %} 6635 %}
7154 6636
7155 // The instruction usage is guarded by predicate in operand immF0(). 6637 // The instruction usage is guarded by predicate in operand immF0().
7156 instruct loadConF0(regF dst, immF0 con) %{ 6638 instruct loadConF0(regF dst, immF0 src) %{
7157 match(Set dst con);
7158 ins_cost(125);
7159 format %{ "FLDZ ST\n\t"
7160 "FSTP $dst" %}
7161 ins_encode %{
7162 __ fldz();
7163 __ fstp_d($dst$$reg);
7164 %}
7165 ins_pipe(fpu_reg_con);
7166 %}
7167
7168 // The instruction usage is guarded by predicate in operand immF1().
7169 instruct loadConF1(regF dst, immF1 con) %{
7170 match(Set dst con);
7171 ins_cost(125);
7172 format %{ "FLD1 ST\n\t"
7173 "FSTP $dst" %}
7174 ins_encode %{
7175 __ fld1();
7176 __ fstp_d($dst$$reg);
7177 %}
7178 ins_pipe(fpu_reg_con);
7179 %}
7180
7181 // The instruction usage is guarded by predicate in operand immXF().
7182 instruct loadConX(regX dst, immXF con) %{
7183 match(Set dst con);
7184 ins_cost(125);
7185 format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %}
7186 ins_encode %{
7187 __ movflt($dst$$XMMRegister, $constantaddress($con));
7188 %}
7189 ins_pipe(pipe_slow);
7190 %}
7191
7192 // The instruction usage is guarded by predicate in operand immXF0().
7193 instruct loadConX0(regX dst, immXF0 src) %{
7194 match(Set dst src); 6639 match(Set dst src);
7195 ins_cost(100); 6640 ins_cost(100);
7196 format %{ "XORPS $dst,$dst\t# float 0.0" %} 6641 format %{ "XORPS $dst,$dst\t# float 0.0" %}
7197 ins_encode %{ 6642 ins_encode %{
7198 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 6643 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
7199 %} 6644 %}
7200 ins_pipe(pipe_slow); 6645 ins_pipe(pipe_slow);
6646 %}
6647
6648 // The instruction usage is guarded by predicate in operand immDPR().
6649 instruct loadConDPR(regDPR dst, immDPR con) %{
6650 match(Set dst con);
6651 ins_cost(125);
6652
6653 format %{ "FLD_D ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
6654 "FSTP $dst" %}
6655 ins_encode %{
6656 __ fld_d($constantaddress($con));
6657 __ fstp_d($dst$$reg);
6658 %}
6659 ins_pipe(fpu_reg_con);
6660 %}
6661
6662 // The instruction usage is guarded by predicate in operand immDPR0().
6663 instruct loadConDPR0(regDPR dst, immDPR0 con) %{
6664 match(Set dst con);
6665 ins_cost(125);
6666
6667 format %{ "FLDZ ST\n\t"
6668 "FSTP $dst" %}
6669 ins_encode %{
6670 __ fldz();
6671 __ fstp_d($dst$$reg);
6672 %}
6673 ins_pipe(fpu_reg_con);
6674 %}
6675
6676 // The instruction usage is guarded by predicate in operand immDPR1().
6677 instruct loadConDPR1(regDPR dst, immDPR1 con) %{
6678 match(Set dst con);
6679 ins_cost(125);
6680
6681 format %{ "FLD1 ST\n\t"
6682 "FSTP $dst" %}
6683 ins_encode %{
6684 __ fld1();
6685 __ fstp_d($dst$$reg);
6686 %}
6687 ins_pipe(fpu_reg_con);
7201 %} 6688 %}
7202 6689
7203 // The instruction usage is guarded by predicate in operand immD(). 6690 // The instruction usage is guarded by predicate in operand immD().
7204 instruct loadConD(regD dst, immD con) %{ 6691 instruct loadConD(regD dst, immD con) %{
7205 match(Set dst con); 6692 match(Set dst con);
7206 ins_cost(125); 6693 ins_cost(125);
7207 6694 format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %}
7208 format %{ "FLD_D ST,[$constantaddress]\t# load from constant table: double=$con\n\t" 6695 ins_encode %{
7209 "FSTP $dst" %} 6696 __ movdbl($dst$$XMMRegister, $constantaddress($con));
7210 ins_encode %{ 6697 %}
7211 __ fld_d($constantaddress($con)); 6698 ins_pipe(pipe_slow);
7212 __ fstp_d($dst$$reg);
7213 %}
7214 ins_pipe(fpu_reg_con);
7215 %} 6699 %}
7216 6700
7217 // The instruction usage is guarded by predicate in operand immD0(). 6701 // The instruction usage is guarded by predicate in operand immD0().
7218 instruct loadConD0(regD dst, immD0 con) %{ 6702 instruct loadConD0(regD dst, immD0 src) %{
7219 match(Set dst con);
7220 ins_cost(125);
7221
7222 format %{ "FLDZ ST\n\t"
7223 "FSTP $dst" %}
7224 ins_encode %{
7225 __ fldz();
7226 __ fstp_d($dst$$reg);
7227 %}
7228 ins_pipe(fpu_reg_con);
7229 %}
7230
7231 // The instruction usage is guarded by predicate in operand immD1().
7232 instruct loadConD1(regD dst, immD1 con) %{
7233 match(Set dst con);
7234 ins_cost(125);
7235
7236 format %{ "FLD1 ST\n\t"
7237 "FSTP $dst" %}
7238 ins_encode %{
7239 __ fld1();
7240 __ fstp_d($dst$$reg);
7241 %}
7242 ins_pipe(fpu_reg_con);
7243 %}
7244
7245 // The instruction usage is guarded by predicate in operand immXD().
7246 instruct loadConXD(regXD dst, immXD con) %{
7247 match(Set dst con);
7248 ins_cost(125);
7249 format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %}
7250 ins_encode %{
7251 __ movdbl($dst$$XMMRegister, $constantaddress($con));
7252 %}
7253 ins_pipe(pipe_slow);
7254 %}
7255
7256 // The instruction usage is guarded by predicate in operand immXD0().
7257 instruct loadConXD0(regXD dst, immXD0 src) %{
7258 match(Set dst src); 6703 match(Set dst src);
7259 ins_cost(100); 6704 ins_cost(100);
7260 format %{ "XORPD $dst,$dst\t# double 0.0" %} 6705 format %{ "XORPD $dst,$dst\t# double 0.0" %}
7261 ins_encode( Opcode(0x66), Opcode(0x0F), Opcode(0x57), RegReg(dst,dst)); 6706 ins_encode %{
6707 __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
6708 %}
7262 ins_pipe( pipe_slow ); 6709 ins_pipe( pipe_slow );
7263 %} 6710 %}
7264 6711
7265 // Load Stack Slot 6712 // Load Stack Slot
7266 instruct loadSSI(eRegI dst, stackSlotI src) %{ 6713 instruct loadSSI(eRegI dst, stackSlotI src) %{
7294 ins_encode( OpcP, RegMem(dst,src)); 6741 ins_encode( OpcP, RegMem(dst,src));
7295 ins_pipe( ialu_reg_mem ); 6742 ins_pipe( ialu_reg_mem );
7296 %} 6743 %}
7297 6744
7298 // Load Stack Slot 6745 // Load Stack Slot
7299 instruct loadSSF(regF dst, stackSlotF src) %{ 6746 instruct loadSSF(regFPR dst, stackSlotF src) %{
7300 match(Set dst src); 6747 match(Set dst src);
7301 ins_cost(125); 6748 ins_cost(125);
7302 6749
7303 format %{ "FLD_S $src\n\t" 6750 format %{ "FLD_S $src\n\t"
7304 "FSTP $dst" %} 6751 "FSTP $dst" %}
7305 opcode(0xD9); /* D9 /0, FLD m32real */ 6752 opcode(0xD9); /* D9 /0, FLD m32real */
7306 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6753 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
7307 Pop_Reg_F(dst) ); 6754 Pop_Reg_FPR(dst) );
7308 ins_pipe( fpu_reg_mem ); 6755 ins_pipe( fpu_reg_mem );
7309 %} 6756 %}
7310 6757
7311 // Load Stack Slot 6758 // Load Stack Slot
7312 instruct loadSSD(regD dst, stackSlotD src) %{ 6759 instruct loadSSD(regDPR dst, stackSlotD src) %{
7313 match(Set dst src); 6760 match(Set dst src);
7314 ins_cost(125); 6761 ins_cost(125);
7315 6762
7316 format %{ "FLD_D $src\n\t" 6763 format %{ "FLD_D $src\n\t"
7317 "FSTP $dst" %} 6764 "FSTP $dst" %}
7318 opcode(0xDD); /* DD /0, FLD m64real */ 6765 opcode(0xDD); /* DD /0, FLD m64real */
7319 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6766 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
7320 Pop_Reg_D(dst) ); 6767 Pop_Reg_DPR(dst) );
7321 ins_pipe( fpu_reg_mem ); 6768 ins_pipe( fpu_reg_mem );
7322 %} 6769 %}
7323 6770
7324 // Prefetch instructions. 6771 // Prefetch instructions.
7325 // Must be safe to execute with invalid address (cannot fault). 6772 // Must be safe to execute with invalid address (cannot fault).
7550 opcode(0x3B); 6997 opcode(0x3B);
7551 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src)); 6998 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
7552 ins_pipe( fpu_reg_mem ); 6999 ins_pipe( fpu_reg_mem );
7553 %} 7000 %}
7554 7001
7555 instruct storeLX_volatile(memory mem, stackSlotL src, regXD tmp, eFlagsReg cr) %{ 7002 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
7556 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 7003 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
7557 match(Set mem (StoreL mem src)); 7004 match(Set mem (StoreL mem src));
7558 effect( TEMP tmp, KILL cr ); 7005 effect( TEMP tmp, KILL cr );
7559 ins_cost(380); 7006 ins_cost(380);
7560 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 7007 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t"
7561 "MOVSD $tmp,$src\n\t" 7008 "MOVSD $tmp,$src\n\t"
7562 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 7009 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %}
7563 opcode(0x3B); 7010 ins_encode %{
7564 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeLX_volatile(mem, src, tmp)); 7011 __ cmpl(rax, $mem$$Address);
7012 __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
7013 __ movdbl($mem$$Address, $tmp$$XMMRegister);
7014 %}
7565 ins_pipe( pipe_slow ); 7015 ins_pipe( pipe_slow );
7566 %} 7016 %}
7567 7017
7568 instruct storeLX_reg_volatile(memory mem, eRegL src, regXD tmp2, regXD tmp, eFlagsReg cr) %{ 7018 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
7569 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 7019 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
7570 match(Set mem (StoreL mem src)); 7020 match(Set mem (StoreL mem src));
7571 effect( TEMP tmp2 , TEMP tmp, KILL cr ); 7021 effect( TEMP tmp2 , TEMP tmp, KILL cr );
7572 ins_cost(360); 7022 ins_cost(360);
7573 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 7023 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t"
7574 "MOVD $tmp,$src.lo\n\t" 7024 "MOVD $tmp,$src.lo\n\t"
7575 "MOVD $tmp2,$src.hi\n\t" 7025 "MOVD $tmp2,$src.hi\n\t"
7576 "PUNPCKLDQ $tmp,$tmp2\n\t" 7026 "PUNPCKLDQ $tmp,$tmp2\n\t"
7577 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 7027 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %}
7578 opcode(0x3B); 7028 ins_encode %{
7579 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeLX_reg_volatile(mem, src, tmp, tmp2)); 7029 __ cmpl(rax, $mem$$Address);
7030 __ movdl($tmp$$XMMRegister, $src$$Register);
7031 __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
7032 __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
7033 __ movdbl($mem$$Address, $tmp$$XMMRegister);
7034 %}
7580 ins_pipe( pipe_slow ); 7035 ins_pipe( pipe_slow );
7581 %} 7036 %}
7582 7037
7583 // Store Pointer; for storing unknown oops and raw pointers 7038 // Store Pointer; for storing unknown oops and raw pointers
7584 instruct storeP(memory mem, anyRegP src) %{ 7039 instruct storeP(memory mem, anyRegP src) %{
7636 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 7091 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src ));
7637 ins_pipe( ialu_mem_imm ); 7092 ins_pipe( ialu_mem_imm );
7638 %} 7093 %}
7639 7094
7640 // Store Aligned Packed Byte XMM register to memory 7095 // Store Aligned Packed Byte XMM register to memory
7641 instruct storeA8B(memory mem, regXD src) %{ 7096 instruct storeA8B(memory mem, regD src) %{
7642 predicate(UseSSE>=1); 7097 predicate(UseSSE>=1);
7643 match(Set mem (Store8B mem src)); 7098 match(Set mem (Store8B mem src));
7644 ins_cost(145); 7099 ins_cost(145);
7645 format %{ "MOVQ $mem,$src\t! packed8B" %} 7100 format %{ "MOVQ $mem,$src\t! packed8B" %}
7646 ins_encode( movq_st(mem, src)); 7101 ins_encode %{
7102 __ movq($mem$$Address, $src$$XMMRegister);
7103 %}
7647 ins_pipe( pipe_slow ); 7104 ins_pipe( pipe_slow );
7648 %} 7105 %}
7649 7106
7650 // Store Aligned Packed Char/Short XMM register to memory 7107 // Store Aligned Packed Char/Short XMM register to memory
7651 instruct storeA4C(memory mem, regXD src) %{ 7108 instruct storeA4C(memory mem, regD src) %{
7652 predicate(UseSSE>=1); 7109 predicate(UseSSE>=1);
7653 match(Set mem (Store4C mem src)); 7110 match(Set mem (Store4C mem src));
7654 ins_cost(145); 7111 ins_cost(145);
7655 format %{ "MOVQ $mem,$src\t! packed4C" %} 7112 format %{ "MOVQ $mem,$src\t! packed4C" %}
7656 ins_encode( movq_st(mem, src)); 7113 ins_encode %{
7114 __ movq($mem$$Address, $src$$XMMRegister);
7115 %}
7657 ins_pipe( pipe_slow ); 7116 ins_pipe( pipe_slow );
7658 %} 7117 %}
7659 7118
7660 // Store Aligned Packed Integer XMM register to memory 7119 // Store Aligned Packed Integer XMM register to memory
7661 instruct storeA2I(memory mem, regXD src) %{ 7120 instruct storeA2I(memory mem, regD src) %{
7662 predicate(UseSSE>=1); 7121 predicate(UseSSE>=1);
7663 match(Set mem (Store2I mem src)); 7122 match(Set mem (Store2I mem src));
7664 ins_cost(145); 7123 ins_cost(145);
7665 format %{ "MOVQ $mem,$src\t! packed2I" %} 7124 format %{ "MOVQ $mem,$src\t! packed2I" %}
7666 ins_encode( movq_st(mem, src)); 7125 ins_encode %{
7126 __ movq($mem$$Address, $src$$XMMRegister);
7127 %}
7667 ins_pipe( pipe_slow ); 7128 ins_pipe( pipe_slow );
7668 %} 7129 %}
7669 7130
7670 // Store CMS card-mark Immediate 7131 // Store CMS card-mark Immediate
7671 instruct storeImmCM(memory mem, immI8 src) %{ 7132 instruct storeImmCM(memory mem, immI8 src) %{
7677 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 7138 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src ));
7678 ins_pipe( ialu_mem_imm ); 7139 ins_pipe( ialu_mem_imm );
7679 %} 7140 %}
7680 7141
7681 // Store Double 7142 // Store Double
7682 instruct storeD( memory mem, regDPR1 src) %{ 7143 instruct storeDPR( memory mem, regDPR1 src) %{
7683 predicate(UseSSE<=1); 7144 predicate(UseSSE<=1);
7684 match(Set mem (StoreD mem src)); 7145 match(Set mem (StoreD mem src));
7685 7146
7686 ins_cost(100); 7147 ins_cost(100);
7687 format %{ "FST_D $mem,$src" %} 7148 format %{ "FST_D $mem,$src" %}
7688 opcode(0xDD); /* DD /2 */ 7149 opcode(0xDD); /* DD /2 */
7689 ins_encode( enc_FP_store(mem,src) ); 7150 ins_encode( enc_FPR_store(mem,src) );
7690 ins_pipe( fpu_mem_reg ); 7151 ins_pipe( fpu_mem_reg );
7691 %} 7152 %}
7692 7153
7693 // Store double does rounding on x86 7154 // Store double does rounding on x86
7694 instruct storeD_rounded( memory mem, regDPR1 src) %{ 7155 instruct storeDPR_rounded( memory mem, regDPR1 src) %{
7695 predicate(UseSSE<=1); 7156 predicate(UseSSE<=1);
7696 match(Set mem (StoreD mem (RoundDouble src))); 7157 match(Set mem (StoreD mem (RoundDouble src)));
7697 7158
7698 ins_cost(100); 7159 ins_cost(100);
7699 format %{ "FST_D $mem,$src\t# round" %} 7160 format %{ "FST_D $mem,$src\t# round" %}
7700 opcode(0xDD); /* DD /2 */ 7161 opcode(0xDD); /* DD /2 */
7701 ins_encode( enc_FP_store(mem,src) ); 7162 ins_encode( enc_FPR_store(mem,src) );
7702 ins_pipe( fpu_mem_reg ); 7163 ins_pipe( fpu_mem_reg );
7703 %} 7164 %}
7704 7165
7705 // Store XMM register to memory (double-precision floating points) 7166 // Store XMM register to memory (double-precision floating points)
7706 // MOVSD instruction 7167 // MOVSD instruction
7707 instruct storeXD(memory mem, regXD src) %{ 7168 instruct storeD(memory mem, regD src) %{
7708 predicate(UseSSE>=2); 7169 predicate(UseSSE>=2);
7709 match(Set mem (StoreD mem src)); 7170 match(Set mem (StoreD mem src));
7710 ins_cost(95); 7171 ins_cost(95);
7711 format %{ "MOVSD $mem,$src" %} 7172 format %{ "MOVSD $mem,$src" %}
7712 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x11), RegMem(src, mem)); 7173 ins_encode %{
7174 __ movdbl($mem$$Address, $src$$XMMRegister);
7175 %}
7713 ins_pipe( pipe_slow ); 7176 ins_pipe( pipe_slow );
7714 %} 7177 %}
7715 7178
7716 // Store XMM register to memory (single-precision floating point) 7179 // Store XMM register to memory (single-precision floating point)
7717 // MOVSS instruction 7180 // MOVSS instruction
7718 instruct storeX(memory mem, regX src) %{ 7181 instruct storeF(memory mem, regF src) %{
7719 predicate(UseSSE>=1); 7182 predicate(UseSSE>=1);
7720 match(Set mem (StoreF mem src)); 7183 match(Set mem (StoreF mem src));
7721 ins_cost(95); 7184 ins_cost(95);
7722 format %{ "MOVSS $mem,$src" %} 7185 format %{ "MOVSS $mem,$src" %}
7723 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x11), RegMem(src, mem)); 7186 ins_encode %{
7187 __ movflt($mem$$Address, $src$$XMMRegister);
7188 %}
7724 ins_pipe( pipe_slow ); 7189 ins_pipe( pipe_slow );
7725 %} 7190 %}
7726 7191
7727 // Store Aligned Packed Single Float XMM register to memory 7192 // Store Aligned Packed Single Float XMM register to memory
7728 instruct storeA2F(memory mem, regXD src) %{ 7193 instruct storeA2F(memory mem, regD src) %{
7729 predicate(UseSSE>=1); 7194 predicate(UseSSE>=1);
7730 match(Set mem (Store2F mem src)); 7195 match(Set mem (Store2F mem src));
7731 ins_cost(145); 7196 ins_cost(145);
7732 format %{ "MOVQ $mem,$src\t! packed2F" %} 7197 format %{ "MOVQ $mem,$src\t! packed2F" %}
7733 ins_encode( movq_st(mem, src)); 7198 ins_encode %{
7199 __ movq($mem$$Address, $src$$XMMRegister);
7200 %}
7734 ins_pipe( pipe_slow ); 7201 ins_pipe( pipe_slow );
7735 %} 7202 %}
7736 7203
7737 // Store Float 7204 // Store Float
7738 instruct storeF( memory mem, regFPR1 src) %{ 7205 instruct storeFPR( memory mem, regFPR1 src) %{
7739 predicate(UseSSE==0); 7206 predicate(UseSSE==0);
7740 match(Set mem (StoreF mem src)); 7207 match(Set mem (StoreF mem src));
7741 7208
7742 ins_cost(100); 7209 ins_cost(100);
7743 format %{ "FST_S $mem,$src" %} 7210 format %{ "FST_S $mem,$src" %}
7744 opcode(0xD9); /* D9 /2 */ 7211 opcode(0xD9); /* D9 /2 */
7745 ins_encode( enc_FP_store(mem,src) ); 7212 ins_encode( enc_FPR_store(mem,src) );
7746 ins_pipe( fpu_mem_reg ); 7213 ins_pipe( fpu_mem_reg );
7747 %} 7214 %}
7748 7215
7749 // Store Float does rounding on x86 7216 // Store Float does rounding on x86
7750 instruct storeF_rounded( memory mem, regFPR1 src) %{ 7217 instruct storeFPR_rounded( memory mem, regFPR1 src) %{
7751 predicate(UseSSE==0); 7218 predicate(UseSSE==0);
7752 match(Set mem (StoreF mem (RoundFloat src))); 7219 match(Set mem (StoreF mem (RoundFloat src)));
7753 7220
7754 ins_cost(100); 7221 ins_cost(100);
7755 format %{ "FST_S $mem,$src\t# round" %} 7222 format %{ "FST_S $mem,$src\t# round" %}
7756 opcode(0xD9); /* D9 /2 */ 7223 opcode(0xD9); /* D9 /2 */
7757 ins_encode( enc_FP_store(mem,src) ); 7224 ins_encode( enc_FPR_store(mem,src) );
7758 ins_pipe( fpu_mem_reg ); 7225 ins_pipe( fpu_mem_reg );
7759 %} 7226 %}
7760 7227
7761 // Store Float does rounding on x86 7228 // Store Float does rounding on x86
7762 instruct storeF_Drounded( memory mem, regDPR1 src) %{ 7229 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
7763 predicate(UseSSE<=1); 7230 predicate(UseSSE<=1);
7764 match(Set mem (StoreF mem (ConvD2F src))); 7231 match(Set mem (StoreF mem (ConvD2F src)));
7765 7232
7766 ins_cost(100); 7233 ins_cost(100);
7767 format %{ "FST_S $mem,$src\t# D-round" %} 7234 format %{ "FST_S $mem,$src\t# D-round" %}
7768 opcode(0xD9); /* D9 /2 */ 7235 opcode(0xD9); /* D9 /2 */
7769 ins_encode( enc_FP_store(mem,src) ); 7236 ins_encode( enc_FPR_store(mem,src) );
7770 ins_pipe( fpu_mem_reg ); 7237 ins_pipe( fpu_mem_reg );
7771 %} 7238 %}
7772 7239
7773 // Store immediate Float value (it is faster than store from FPU register) 7240 // Store immediate Float value (it is faster than store from FPU register)
7241 // The instruction usage is guarded by predicate in operand immFPR().
7242 instruct storeFPR_imm( memory mem, immFPR src) %{
7243 match(Set mem (StoreF mem src));
7244
7245 ins_cost(50);
7246 format %{ "MOV $mem,$src\t# store float" %}
7247 opcode(0xC7); /* C7 /0 */
7248 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32FPR_as_bits( src ));
7249 ins_pipe( ialu_mem_imm );
7250 %}
7251
7252 // Store immediate Float value (it is faster than store from XMM register)
7774 // The instruction usage is guarded by predicate in operand immF(). 7253 // The instruction usage is guarded by predicate in operand immF().
7775 instruct storeF_imm( memory mem, immF src) %{ 7254 instruct storeF_imm( memory mem, immF src) %{
7776 match(Set mem (StoreF mem src)); 7255 match(Set mem (StoreF mem src));
7777 7256
7778 ins_cost(50); 7257 ins_cost(50);
7779 format %{ "MOV $mem,$src\t# store float" %} 7258 format %{ "MOV $mem,$src\t# store float" %}
7780 opcode(0xC7); /* C7 /0 */ 7259 opcode(0xC7); /* C7 /0 */
7781 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits( src )); 7260 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits( src ));
7782 ins_pipe( ialu_mem_imm );
7783 %}
7784
7785 // Store immediate Float value (it is faster than store from XMM register)
7786 // The instruction usage is guarded by predicate in operand immXF().
7787 instruct storeX_imm( memory mem, immXF src) %{
7788 match(Set mem (StoreF mem src));
7789
7790 ins_cost(50);
7791 format %{ "MOV $mem,$src\t# store float" %}
7792 opcode(0xC7); /* C7 /0 */
7793 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32XF_as_bits( src ));
7794 ins_pipe( ialu_mem_imm ); 7261 ins_pipe( ialu_mem_imm );
7795 %} 7262 %}
7796 7263
7797 // Store Integer to stack slot 7264 // Store Integer to stack slot
7798 instruct storeSSI(stackSlotI dst, eRegI src) %{ 7265 instruct storeSSI(stackSlotI dst, eRegI src) %{
7895 predicate(Matcher::post_store_load_barrier(n)); 7362 predicate(Matcher::post_store_load_barrier(n));
7896 ins_cost(0); 7363 ins_cost(0);
7897 7364
7898 size(0); 7365 size(0);
7899 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %} 7366 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
7367 ins_encode( );
7368 ins_pipe(empty);
7369 %}
7370
7371 instruct membar_storestore() %{
7372 match(MemBarStoreStore);
7373 ins_cost(0);
7374
7375 size(0);
7376 format %{ "MEMBAR-storestore (empty encoding)" %}
7900 ins_encode( ); 7377 ins_encode( );
7901 ins_pipe(empty); 7378 ins_pipe(empty);
7902 %} 7379 %}
7903 7380
7904 //----------Move Instructions-------------------------------------------------- 7381 //----------Move Instructions--------------------------------------------------
8086 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 7563 // ins_encode( enc_cmov(cop), RegMem( dst, src ) );
8087 // ins_pipe( pipe_cmov_mem ); 7564 // ins_pipe( pipe_cmov_mem );
8088 //%} 7565 //%}
8089 7566
8090 // Conditional move 7567 // Conditional move
8091 instruct fcmovD_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regD src) %{ 7568 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
8092 predicate(UseSSE<=1); 7569 predicate(UseSSE<=1);
8093 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 7570 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
8094 ins_cost(200); 7571 ins_cost(200);
8095 format %{ "FCMOV$cop $dst,$src\t# double" %} 7572 format %{ "FCMOV$cop $dst,$src\t# double" %}
8096 opcode(0xDA); 7573 opcode(0xDA);
8097 ins_encode( enc_cmov_d(cop,src) ); 7574 ins_encode( enc_cmov_dpr(cop,src) );
8098 ins_pipe( pipe_cmovD_reg ); 7575 ins_pipe( pipe_cmovDPR_reg );
8099 %} 7576 %}
8100 7577
8101 // Conditional move 7578 // Conditional move
8102 instruct fcmovF_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regF src) %{ 7579 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
8103 predicate(UseSSE==0); 7580 predicate(UseSSE==0);
8104 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 7581 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
8105 ins_cost(200); 7582 ins_cost(200);
8106 format %{ "FCMOV$cop $dst,$src\t# float" %} 7583 format %{ "FCMOV$cop $dst,$src\t# float" %}
8107 opcode(0xDA); 7584 opcode(0xDA);
8108 ins_encode( enc_cmov_d(cop,src) ); 7585 ins_encode( enc_cmov_dpr(cop,src) );
8109 ins_pipe( pipe_cmovD_reg ); 7586 ins_pipe( pipe_cmovDPR_reg );
8110 %} 7587 %}
8111 7588
8112 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 7589 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
8113 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{ 7590 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
8114 predicate(UseSSE<=1); 7591 predicate(UseSSE<=1);
8115 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 7592 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
8116 ins_cost(200); 7593 ins_cost(200);
8117 format %{ "Jn$cop skip\n\t" 7594 format %{ "Jn$cop skip\n\t"
8118 "MOV $dst,$src\t# double\n" 7595 "MOV $dst,$src\t# double\n"
8119 "skip:" %} 7596 "skip:" %}
8120 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 7597 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */
8121 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_D(src), OpcP, RegOpc(dst) ); 7598 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
8122 ins_pipe( pipe_cmovD_reg ); 7599 ins_pipe( pipe_cmovDPR_reg );
8123 %} 7600 %}
8124 7601
8125 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 7602 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
8126 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{ 7603 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
8127 predicate(UseSSE==0); 7604 predicate(UseSSE==0);
8128 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 7605 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
8129 ins_cost(200); 7606 ins_cost(200);
8130 format %{ "Jn$cop skip\n\t" 7607 format %{ "Jn$cop skip\n\t"
8131 "MOV $dst,$src\t# float\n" 7608 "MOV $dst,$src\t# float\n"
8132 "skip:" %} 7609 "skip:" %}
8133 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 7610 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */
8134 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_F(src), OpcP, RegOpc(dst) ); 7611 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
8135 ins_pipe( pipe_cmovD_reg ); 7612 ins_pipe( pipe_cmovDPR_reg );
8136 %} 7613 %}
8137 7614
8138 // No CMOVE with SSE/SSE2 7615 // No CMOVE with SSE/SSE2
8139 instruct fcmovX_regS(cmpOp cop, eFlagsReg cr, regX dst, regX src) %{ 7616 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
8140 predicate (UseSSE>=1); 7617 predicate (UseSSE>=1);
8141 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 7618 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
8142 ins_cost(200); 7619 ins_cost(200);
8143 format %{ "Jn$cop skip\n\t" 7620 format %{ "Jn$cop skip\n\t"
8144 "MOVSS $dst,$src\t# float\n" 7621 "MOVSS $dst,$src\t# float\n"
8152 %} 7629 %}
8153 ins_pipe( pipe_slow ); 7630 ins_pipe( pipe_slow );
8154 %} 7631 %}
8155 7632
8156 // No CMOVE with SSE/SSE2 7633 // No CMOVE with SSE/SSE2
8157 instruct fcmovXD_regS(cmpOp cop, eFlagsReg cr, regXD dst, regXD src) %{ 7634 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
8158 predicate (UseSSE>=2); 7635 predicate (UseSSE>=2);
8159 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 7636 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
8160 ins_cost(200); 7637 ins_cost(200);
8161 format %{ "Jn$cop skip\n\t" 7638 format %{ "Jn$cop skip\n\t"
8162 "MOVSD $dst,$src\t# float\n" 7639 "MOVSD $dst,$src\t# float\n"
8170 %} 7647 %}
8171 ins_pipe( pipe_slow ); 7648 ins_pipe( pipe_slow );
8172 %} 7649 %}
8173 7650
8174 // unsigned version 7651 // unsigned version
8175 instruct fcmovX_regU(cmpOpU cop, eFlagsRegU cr, regX dst, regX src) %{ 7652 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
8176 predicate (UseSSE>=1); 7653 predicate (UseSSE>=1);
8177 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 7654 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
8178 ins_cost(200); 7655 ins_cost(200);
8179 format %{ "Jn$cop skip\n\t" 7656 format %{ "Jn$cop skip\n\t"
8180 "MOVSS $dst,$src\t# float\n" 7657 "MOVSS $dst,$src\t# float\n"
8187 __ bind(skip); 7664 __ bind(skip);
8188 %} 7665 %}
8189 ins_pipe( pipe_slow ); 7666 ins_pipe( pipe_slow );
8190 %} 7667 %}
8191 7668
8192 instruct fcmovX_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regX dst, regX src) %{ 7669 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
8193 predicate (UseSSE>=1); 7670 predicate (UseSSE>=1);
8194 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 7671 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
8195 ins_cost(200); 7672 ins_cost(200);
8196 expand %{ 7673 expand %{
8197 fcmovX_regU(cop, cr, dst, src); 7674 fcmovF_regU(cop, cr, dst, src);
8198 %} 7675 %}
8199 %} 7676 %}
8200 7677
8201 // unsigned version 7678 // unsigned version
8202 instruct fcmovXD_regU(cmpOpU cop, eFlagsRegU cr, regXD dst, regXD src) %{ 7679 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
8203 predicate (UseSSE>=2); 7680 predicate (UseSSE>=2);
8204 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 7681 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
8205 ins_cost(200); 7682 ins_cost(200);
8206 format %{ "Jn$cop skip\n\t" 7683 format %{ "Jn$cop skip\n\t"
8207 "MOVSD $dst,$src\t# float\n" 7684 "MOVSD $dst,$src\t# float\n"
8214 __ bind(skip); 7691 __ bind(skip);
8215 %} 7692 %}
8216 ins_pipe( pipe_slow ); 7693 ins_pipe( pipe_slow );
8217 %} 7694 %}
8218 7695
8219 instruct fcmovXD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regXD dst, regXD src) %{ 7696 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
8220 predicate (UseSSE>=2); 7697 predicate (UseSSE>=2);
8221 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 7698 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
8222 ins_cost(200); 7699 ins_cost(200);
8223 expand %{ 7700 expand %{
8224 fcmovXD_regU(cop, cr, dst, src); 7701 fcmovD_regU(cop, cr, dst, src);
8225 %} 7702 %}
8226 %} 7703 %}
8227 7704
8228 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{ 7705 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
8229 predicate(VM_Version::supports_cmov() ); 7706 predicate(VM_Version::supports_cmov() );
8438 ins_encode( OpcP, RegMem(dst,mem)); 7915 ins_encode( OpcP, RegMem(dst,mem));
8439 ins_pipe( ialu_reg_mem ); 7916 ins_pipe( ialu_reg_mem );
8440 %} 7917 %}
8441 7918
8442 // LoadLong-locked - same as a volatile long load when used with compare-swap 7919 // LoadLong-locked - same as a volatile long load when used with compare-swap
8443 instruct loadLLocked(stackSlotL dst, load_long_memory mem) %{ 7920 instruct loadLLocked(stackSlotL dst, memory mem) %{
8444 predicate(UseSSE<=1); 7921 predicate(UseSSE<=1);
8445 match(Set dst (LoadLLocked mem)); 7922 match(Set dst (LoadLLocked mem));
8446 7923
8447 ins_cost(200); 7924 ins_cost(200);
8448 format %{ "FILD $mem\t# Atomic volatile long load\n\t" 7925 format %{ "FILD $mem\t# Atomic volatile long load\n\t"
8449 "FISTp $dst" %} 7926 "FISTp $dst" %}
8450 ins_encode(enc_loadL_volatile(mem,dst)); 7927 ins_encode(enc_loadL_volatile(mem,dst));
8451 ins_pipe( fpu_reg_mem ); 7928 ins_pipe( fpu_reg_mem );
8452 %} 7929 %}
8453 7930
8454 instruct loadLX_Locked(stackSlotL dst, load_long_memory mem, regXD tmp) %{ 7931 instruct loadLX_Locked(stackSlotL dst, memory mem, regD tmp) %{
8455 predicate(UseSSE>=2); 7932 predicate(UseSSE>=2);
8456 match(Set dst (LoadLLocked mem)); 7933 match(Set dst (LoadLLocked mem));
8457 effect(TEMP tmp); 7934 effect(TEMP tmp);
8458 ins_cost(180); 7935 ins_cost(180);
8459 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 7936 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t"
8460 "MOVSD $dst,$tmp" %} 7937 "MOVSD $dst,$tmp" %}
8461 ins_encode(enc_loadLX_volatile(mem, dst, tmp)); 7938 ins_encode %{
7939 __ movdbl($tmp$$XMMRegister, $mem$$Address);
7940 __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
7941 %}
8462 ins_pipe( pipe_slow ); 7942 ins_pipe( pipe_slow );
8463 %} 7943 %}
8464 7944
8465 instruct loadLX_reg_Locked(eRegL dst, load_long_memory mem, regXD tmp) %{ 7945 instruct loadLX_reg_Locked(eRegL dst, memory mem, regD tmp) %{
8466 predicate(UseSSE>=2); 7946 predicate(UseSSE>=2);
8467 match(Set dst (LoadLLocked mem)); 7947 match(Set dst (LoadLLocked mem));
8468 effect(TEMP tmp); 7948 effect(TEMP tmp);
8469 ins_cost(160); 7949 ins_cost(160);
8470 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 7950 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t"
8471 "MOVD $dst.lo,$tmp\n\t" 7951 "MOVD $dst.lo,$tmp\n\t"
8472 "PSRLQ $tmp,32\n\t" 7952 "PSRLQ $tmp,32\n\t"
8473 "MOVD $dst.hi,$tmp" %} 7953 "MOVD $dst.hi,$tmp" %}
8474 ins_encode(enc_loadLX_reg_volatile(mem, dst, tmp)); 7954 ins_encode %{
7955 __ movdbl($tmp$$XMMRegister, $mem$$Address);
7956 __ movdl($dst$$Register, $tmp$$XMMRegister);
7957 __ psrlq($tmp$$XMMRegister, 32);
7958 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
7959 %}
8475 ins_pipe( pipe_slow ); 7960 ins_pipe( pipe_slow );
8476 %} 7961 %}
8477 7962
8478 // Conditional-store of the updated heap-top. 7963 // Conditional-store of the updated heap-top.
8479 // Used during allocation of the shared heap. 7964 // Used during allocation of the shared heap.
10052 // Double Math 9537 // Double Math
10053 9538
10054 // Compare & branch 9539 // Compare & branch
10055 9540
10056 // P6 version of float compare, sets condition codes in EFLAGS 9541 // P6 version of float compare, sets condition codes in EFLAGS
10057 instruct cmpD_cc_P6(eFlagsRegU cr, regD src1, regD src2, eAXRegI rax) %{ 9542 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
10058 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9543 predicate(VM_Version::supports_cmov() && UseSSE <=1);
10059 match(Set cr (CmpD src1 src2)); 9544 match(Set cr (CmpD src1 src2));
10060 effect(KILL rax); 9545 effect(KILL rax);
10061 ins_cost(150); 9546 ins_cost(150);
10062 format %{ "FLD $src1\n\t" 9547 format %{ "FLD $src1\n\t"
10064 "JNP exit\n\t" 9549 "JNP exit\n\t"
10065 "MOV ah,1 // saw a NaN, set CF\n\t" 9550 "MOV ah,1 // saw a NaN, set CF\n\t"
10066 "SAHF\n" 9551 "SAHF\n"
10067 "exit:\tNOP // avoid branch to branch" %} 9552 "exit:\tNOP // avoid branch to branch" %}
10068 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9553 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10069 ins_encode( Push_Reg_D(src1), 9554 ins_encode( Push_Reg_DPR(src1),
10070 OpcP, RegOpc(src2), 9555 OpcP, RegOpc(src2),
10071 cmpF_P6_fixup ); 9556 cmpF_P6_fixup );
10072 ins_pipe( pipe_slow ); 9557 ins_pipe( pipe_slow );
10073 %} 9558 %}
10074 9559
10075 instruct cmpD_cc_P6CF(eFlagsRegUCF cr, regD src1, regD src2) %{ 9560 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
10076 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9561 predicate(VM_Version::supports_cmov() && UseSSE <=1);
10077 match(Set cr (CmpD src1 src2)); 9562 match(Set cr (CmpD src1 src2));
10078 ins_cost(150); 9563 ins_cost(150);
10079 format %{ "FLD $src1\n\t" 9564 format %{ "FLD $src1\n\t"
10080 "FUCOMIP ST,$src2 // P6 instruction" %} 9565 "FUCOMIP ST,$src2 // P6 instruction" %}
10081 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9566 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10082 ins_encode( Push_Reg_D(src1), 9567 ins_encode( Push_Reg_DPR(src1),
10083 OpcP, RegOpc(src2)); 9568 OpcP, RegOpc(src2));
10084 ins_pipe( pipe_slow ); 9569 ins_pipe( pipe_slow );
10085 %} 9570 %}
10086 9571
10087 // Compare & branch 9572 // Compare & branch
10088 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2, eAXRegI rax) %{ 9573 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
10089 predicate(UseSSE<=1); 9574 predicate(UseSSE<=1);
10090 match(Set cr (CmpD src1 src2)); 9575 match(Set cr (CmpD src1 src2));
10091 effect(KILL rax); 9576 effect(KILL rax);
10092 ins_cost(200); 9577 ins_cost(200);
10093 format %{ "FLD $src1\n\t" 9578 format %{ "FLD $src1\n\t"
10096 "TEST AX,0x400\n\t" 9581 "TEST AX,0x400\n\t"
10097 "JZ,s flags\n\t" 9582 "JZ,s flags\n\t"
10098 "MOV AH,1\t# unordered treat as LT\n" 9583 "MOV AH,1\t# unordered treat as LT\n"
10099 "flags:\tSAHF" %} 9584 "flags:\tSAHF" %}
10100 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9585 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10101 ins_encode( Push_Reg_D(src1), 9586 ins_encode( Push_Reg_DPR(src1),
10102 OpcP, RegOpc(src2), 9587 OpcP, RegOpc(src2),
10103 fpu_flags); 9588 fpu_flags);
10104 ins_pipe( pipe_slow ); 9589 ins_pipe( pipe_slow );
10105 %} 9590 %}
10106 9591
10107 // Compare vs zero into -1,0,1 9592 // Compare vs zero into -1,0,1
10108 instruct cmpD_0(eRegI dst, regD src1, immD0 zero, eAXRegI rax, eFlagsReg cr) %{ 9593 instruct cmpDPR_0(eRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
10109 predicate(UseSSE<=1); 9594 predicate(UseSSE<=1);
10110 match(Set dst (CmpD3 src1 zero)); 9595 match(Set dst (CmpD3 src1 zero));
10111 effect(KILL cr, KILL rax); 9596 effect(KILL cr, KILL rax);
10112 ins_cost(280); 9597 ins_cost(280);
10113 format %{ "FTSTD $dst,$src1" %} 9598 format %{ "FTSTD $dst,$src1" %}
10114 opcode(0xE4, 0xD9); 9599 opcode(0xE4, 0xD9);
10115 ins_encode( Push_Reg_D(src1), 9600 ins_encode( Push_Reg_DPR(src1),
10116 OpcS, OpcP, PopFPU, 9601 OpcS, OpcP, PopFPU,
10117 CmpF_Result(dst)); 9602 CmpF_Result(dst));
10118 ins_pipe( pipe_slow ); 9603 ins_pipe( pipe_slow );
10119 %} 9604 %}
10120 9605
10121 // Compare into -1,0,1 9606 // Compare into -1,0,1
10122 instruct cmpD_reg(eRegI dst, regD src1, regD src2, eAXRegI rax, eFlagsReg cr) %{ 9607 instruct cmpDPR_reg(eRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
10123 predicate(UseSSE<=1); 9608 predicate(UseSSE<=1);
10124 match(Set dst (CmpD3 src1 src2)); 9609 match(Set dst (CmpD3 src1 src2));
10125 effect(KILL cr, KILL rax); 9610 effect(KILL cr, KILL rax);
10126 ins_cost(300); 9611 ins_cost(300);
10127 format %{ "FCMPD $dst,$src1,$src2" %} 9612 format %{ "FCMPD $dst,$src1,$src2" %}
10128 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9613 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10129 ins_encode( Push_Reg_D(src1), 9614 ins_encode( Push_Reg_DPR(src1),
10130 OpcP, RegOpc(src2), 9615 OpcP, RegOpc(src2),
10131 CmpF_Result(dst)); 9616 CmpF_Result(dst));
10132 ins_pipe( pipe_slow ); 9617 ins_pipe( pipe_slow );
10133 %} 9618 %}
10134 9619
10135 // float compare and set condition codes in EFLAGS by XMM regs 9620 // float compare and set condition codes in EFLAGS by XMM regs
10136 instruct cmpXD_cc(eFlagsRegU cr, regXD dst, regXD src, eAXRegI rax) %{ 9621 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
10137 predicate(UseSSE>=2); 9622 predicate(UseSSE>=2);
10138 match(Set cr (CmpD dst src)); 9623 match(Set cr (CmpD src1 src2));
10139 effect(KILL rax); 9624 ins_cost(145);
10140 ins_cost(125); 9625 format %{ "UCOMISD $src1,$src2\n\t"
10141 format %{ "COMISD $dst,$src\n" 9626 "JNP,s exit\n\t"
10142 "\tJNP exit\n" 9627 "PUSHF\t# saw NaN, set CF\n\t"
10143 "\tMOV ah,1 // saw a NaN, set CF\n" 9628 "AND [rsp], #0xffffff2b\n\t"
10144 "\tSAHF\n" 9629 "POPF\n"
10145 "exit:\tNOP // avoid branch to branch" %} 9630 "exit:" %}
10146 opcode(0x66, 0x0F, 0x2F); 9631 ins_encode %{
10147 ins_encode(OpcP, OpcS, Opcode(tertiary), RegReg(dst, src), cmpF_P6_fixup); 9632 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9633 emit_cmpfp_fixup(_masm);
9634 %}
10148 ins_pipe( pipe_slow ); 9635 ins_pipe( pipe_slow );
10149 %} 9636 %}
10150 9637
10151 instruct cmpXD_ccCF(eFlagsRegUCF cr, regXD dst, regXD src) %{ 9638 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
10152 predicate(UseSSE>=2); 9639 predicate(UseSSE>=2);
10153 match(Set cr (CmpD dst src)); 9640 match(Set cr (CmpD src1 src2));
10154 ins_cost(100); 9641 ins_cost(100);
10155 format %{ "COMISD $dst,$src" %} 9642 format %{ "UCOMISD $src1,$src2" %}
10156 opcode(0x66, 0x0F, 0x2F); 9643 ins_encode %{
10157 ins_encode(OpcP, OpcS, Opcode(tertiary), RegReg(dst, src)); 9644 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9645 %}
10158 ins_pipe( pipe_slow ); 9646 ins_pipe( pipe_slow );
10159 %} 9647 %}
10160 9648
10161 // float compare and set condition codes in EFLAGS by XMM regs 9649 // float compare and set condition codes in EFLAGS by XMM regs
10162 instruct cmpXD_ccmem(eFlagsRegU cr, regXD dst, memory src, eAXRegI rax) %{ 9650 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
10163 predicate(UseSSE>=2); 9651 predicate(UseSSE>=2);
10164 match(Set cr (CmpD dst (LoadD src))); 9652 match(Set cr (CmpD src1 (LoadD src2)));
10165 effect(KILL rax);
10166 ins_cost(145); 9653 ins_cost(145);
10167 format %{ "COMISD $dst,$src\n" 9654 format %{ "UCOMISD $src1,$src2\n\t"
10168 "\tJNP exit\n" 9655 "JNP,s exit\n\t"
10169 "\tMOV ah,1 // saw a NaN, set CF\n" 9656 "PUSHF\t# saw NaN, set CF\n\t"
10170 "\tSAHF\n" 9657 "AND [rsp], #0xffffff2b\n\t"
10171 "exit:\tNOP // avoid branch to branch" %} 9658 "POPF\n"
10172 opcode(0x66, 0x0F, 0x2F); 9659 "exit:" %}
10173 ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(dst, src), cmpF_P6_fixup); 9660 ins_encode %{
9661 __ ucomisd($src1$$XMMRegister, $src2$$Address);
9662 emit_cmpfp_fixup(_masm);
9663 %}
10174 ins_pipe( pipe_slow ); 9664 ins_pipe( pipe_slow );
10175 %} 9665 %}
10176 9666
10177 instruct cmpXD_ccmemCF(eFlagsRegUCF cr, regXD dst, memory src) %{ 9667 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
10178 predicate(UseSSE>=2); 9668 predicate(UseSSE>=2);
10179 match(Set cr (CmpD dst (LoadD src))); 9669 match(Set cr (CmpD src1 (LoadD src2)));
10180 ins_cost(100); 9670 ins_cost(100);
10181 format %{ "COMISD $dst,$src" %} 9671 format %{ "UCOMISD $src1,$src2" %}
10182 opcode(0x66, 0x0F, 0x2F); 9672 ins_encode %{
10183 ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(dst, src)); 9673 __ ucomisd($src1$$XMMRegister, $src2$$Address);
9674 %}
10184 ins_pipe( pipe_slow ); 9675 ins_pipe( pipe_slow );
10185 %} 9676 %}
10186 9677
10187 // Compare into -1,0,1 in XMM 9678 // Compare into -1,0,1 in XMM
10188 instruct cmpXD_reg(eRegI dst, regXD src1, regXD src2, eFlagsReg cr) %{ 9679 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
10189 predicate(UseSSE>=2); 9680 predicate(UseSSE>=2);
10190 match(Set dst (CmpD3 src1 src2)); 9681 match(Set dst (CmpD3 src1 src2));
10191 effect(KILL cr); 9682 effect(KILL cr);
10192 ins_cost(255); 9683 ins_cost(255);
10193 format %{ "XOR $dst,$dst\n" 9684 format %{ "UCOMISD $src1, $src2\n\t"
10194 "\tCOMISD $src1,$src2\n" 9685 "MOV $dst, #-1\n\t"
10195 "\tJP,s nan\n" 9686 "JP,s done\n\t"
10196 "\tJEQ,s exit\n" 9687 "JB,s done\n\t"
10197 "\tJA,s inc\n" 9688 "SETNE $dst\n\t"
10198 "nan:\tDEC $dst\n" 9689 "MOVZB $dst, $dst\n"
10199 "\tJMP,s exit\n" 9690 "done:" %}
10200 "inc:\tINC $dst\n" 9691 ins_encode %{
10201 "exit:" 9692 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
10202 %} 9693 emit_cmpfp3(_masm, $dst$$Register);
10203 opcode(0x66, 0x0F, 0x2F); 9694 %}
10204 ins_encode(Xor_Reg(dst), OpcP, OpcS, Opcode(tertiary), RegReg(src1, src2),
10205 CmpX_Result(dst));
10206 ins_pipe( pipe_slow ); 9695 ins_pipe( pipe_slow );
10207 %} 9696 %}
10208 9697
10209 // Compare into -1,0,1 in XMM and memory 9698 // Compare into -1,0,1 in XMM and memory
10210 instruct cmpXD_regmem(eRegI dst, regXD src1, memory mem, eFlagsReg cr) %{ 9699 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
10211 predicate(UseSSE>=2); 9700 predicate(UseSSE>=2);
10212 match(Set dst (CmpD3 src1 (LoadD mem))); 9701 match(Set dst (CmpD3 src1 (LoadD src2)));
10213 effect(KILL cr); 9702 effect(KILL cr);
10214 ins_cost(275); 9703 ins_cost(275);
10215 format %{ "COMISD $src1,$mem\n" 9704 format %{ "UCOMISD $src1, $src2\n\t"
10216 "\tMOV $dst,0\t\t# do not blow flags\n" 9705 "MOV $dst, #-1\n\t"
10217 "\tJP,s nan\n" 9706 "JP,s done\n\t"
10218 "\tJEQ,s exit\n" 9707 "JB,s done\n\t"
10219 "\tJA,s inc\n" 9708 "SETNE $dst\n\t"
10220 "nan:\tDEC $dst\n" 9709 "MOVZB $dst, $dst\n"
10221 "\tJMP,s exit\n" 9710 "done:" %}
10222 "inc:\tINC $dst\n" 9711 ins_encode %{
10223 "exit:" 9712 __ ucomisd($src1$$XMMRegister, $src2$$Address);
10224 %} 9713 emit_cmpfp3(_masm, $dst$$Register);
10225 opcode(0x66, 0x0F, 0x2F); 9714 %}
10226 ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(src1, mem),
10227 LdImmI(dst,0x0), CmpX_Result(dst));
10228 ins_pipe( pipe_slow ); 9715 ins_pipe( pipe_slow );
10229 %} 9716 %}
10230 9717
10231 9718
10232 instruct subD_reg(regD dst, regD src) %{ 9719 instruct subDPR_reg(regDPR dst, regDPR src) %{
10233 predicate (UseSSE <=1); 9720 predicate (UseSSE <=1);
10234 match(Set dst (SubD dst src)); 9721 match(Set dst (SubD dst src));
10235 9722
10236 format %{ "FLD $src\n\t" 9723 format %{ "FLD $src\n\t"
10237 "DSUBp $dst,ST" %} 9724 "DSUBp $dst,ST" %}
10238 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 9725 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */
10239 ins_cost(150); 9726 ins_cost(150);
10240 ins_encode( Push_Reg_D(src), 9727 ins_encode( Push_Reg_DPR(src),
10241 OpcP, RegOpc(dst) ); 9728 OpcP, RegOpc(dst) );
10242 ins_pipe( fpu_reg_reg ); 9729 ins_pipe( fpu_reg_reg );
10243 %} 9730 %}
10244 9731
10245 instruct subD_reg_round(stackSlotD dst, regD src1, regD src2) %{ 9732 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
10246 predicate (UseSSE <=1); 9733 predicate (UseSSE <=1);
10247 match(Set dst (RoundDouble (SubD src1 src2))); 9734 match(Set dst (RoundDouble (SubD src1 src2)));
10248 ins_cost(250); 9735 ins_cost(250);
10249 9736
10250 format %{ "FLD $src2\n\t" 9737 format %{ "FLD $src2\n\t"
10251 "DSUB ST,$src1\n\t" 9738 "DSUB ST,$src1\n\t"
10252 "FSTP_D $dst\t# D-round" %} 9739 "FSTP_D $dst\t# D-round" %}
10253 opcode(0xD8, 0x5); 9740 opcode(0xD8, 0x5);
10254 ins_encode( Push_Reg_D(src2), 9741 ins_encode( Push_Reg_DPR(src2),
10255 OpcP, RegOpc(src1), Pop_Mem_D(dst) ); 9742 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
10256 ins_pipe( fpu_mem_reg_reg ); 9743 ins_pipe( fpu_mem_reg_reg );
10257 %} 9744 %}
10258 9745
10259 9746
10260 instruct subD_reg_mem(regD dst, memory src) %{ 9747 instruct subDPR_reg_mem(regDPR dst, memory src) %{
10261 predicate (UseSSE <=1); 9748 predicate (UseSSE <=1);
10262 match(Set dst (SubD dst (LoadD src))); 9749 match(Set dst (SubD dst (LoadD src)));
10263 ins_cost(150); 9750 ins_cost(150);
10264 9751
10265 format %{ "FLD $src\n\t" 9752 format %{ "FLD $src\n\t"
10268 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9755 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10269 OpcP, RegOpc(dst) ); 9756 OpcP, RegOpc(dst) );
10270 ins_pipe( fpu_reg_mem ); 9757 ins_pipe( fpu_reg_mem );
10271 %} 9758 %}
10272 9759
10273 instruct absD_reg(regDPR1 dst, regDPR1 src) %{ 9760 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
10274 predicate (UseSSE<=1); 9761 predicate (UseSSE<=1);
10275 match(Set dst (AbsD src)); 9762 match(Set dst (AbsD src));
10276 ins_cost(100); 9763 ins_cost(100);
10277 format %{ "FABS" %} 9764 format %{ "FABS" %}
10278 opcode(0xE1, 0xD9); 9765 opcode(0xE1, 0xD9);
10279 ins_encode( OpcS, OpcP ); 9766 ins_encode( OpcS, OpcP );
10280 ins_pipe( fpu_reg_reg ); 9767 ins_pipe( fpu_reg_reg );
10281 %} 9768 %}
10282 9769
10283 instruct absXD_reg( regXD dst ) %{ 9770 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
10284 predicate(UseSSE>=2);
10285 match(Set dst (AbsD dst));
10286 format %{ "ANDPD $dst,[0x7FFFFFFFFFFFFFFF]\t# ABS D by sign masking" %}
10287 ins_encode( AbsXD_encoding(dst));
10288 ins_pipe( pipe_slow );
10289 %}
10290
10291 instruct negD_reg(regDPR1 dst, regDPR1 src) %{
10292 predicate(UseSSE<=1); 9771 predicate(UseSSE<=1);
10293 match(Set dst (NegD src)); 9772 match(Set dst (NegD src));
10294 ins_cost(100); 9773 ins_cost(100);
10295 format %{ "FCHS" %} 9774 format %{ "FCHS" %}
10296 opcode(0xE0, 0xD9); 9775 opcode(0xE0, 0xD9);
10297 ins_encode( OpcS, OpcP ); 9776 ins_encode( OpcS, OpcP );
10298 ins_pipe( fpu_reg_reg ); 9777 ins_pipe( fpu_reg_reg );
10299 %} 9778 %}
10300 9779
10301 instruct negXD_reg( regXD dst ) %{ 9780 instruct addDPR_reg(regDPR dst, regDPR src) %{
10302 predicate(UseSSE>=2);
10303 match(Set dst (NegD dst));
10304 format %{ "XORPD $dst,[0x8000000000000000]\t# CHS D by sign flipping" %}
10305 ins_encode %{
10306 __ xorpd($dst$$XMMRegister,
10307 ExternalAddress((address)double_signflip_pool));
10308 %}
10309 ins_pipe( pipe_slow );
10310 %}
10311
10312 instruct addD_reg(regD dst, regD src) %{
10313 predicate(UseSSE<=1); 9781 predicate(UseSSE<=1);
10314 match(Set dst (AddD dst src)); 9782 match(Set dst (AddD dst src));
10315 format %{ "FLD $src\n\t" 9783 format %{ "FLD $src\n\t"
10316 "DADD $dst,ST" %} 9784 "DADD $dst,ST" %}
10317 size(4); 9785 size(4);
10318 ins_cost(150); 9786 ins_cost(150);
10319 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 9787 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10320 ins_encode( Push_Reg_D(src), 9788 ins_encode( Push_Reg_DPR(src),
10321 OpcP, RegOpc(dst) ); 9789 OpcP, RegOpc(dst) );
10322 ins_pipe( fpu_reg_reg ); 9790 ins_pipe( fpu_reg_reg );
10323 %} 9791 %}
10324 9792
10325 9793
10326 instruct addD_reg_round(stackSlotD dst, regD src1, regD src2) %{ 9794 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
10327 predicate(UseSSE<=1); 9795 predicate(UseSSE<=1);
10328 match(Set dst (RoundDouble (AddD src1 src2))); 9796 match(Set dst (RoundDouble (AddD src1 src2)));
10329 ins_cost(250); 9797 ins_cost(250);
10330 9798
10331 format %{ "FLD $src2\n\t" 9799 format %{ "FLD $src2\n\t"
10332 "DADD ST,$src1\n\t" 9800 "DADD ST,$src1\n\t"
10333 "FSTP_D $dst\t# D-round" %} 9801 "FSTP_D $dst\t# D-round" %}
10334 opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/ 9802 opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
10335 ins_encode( Push_Reg_D(src2), 9803 ins_encode( Push_Reg_DPR(src2),
10336 OpcP, RegOpc(src1), Pop_Mem_D(dst) ); 9804 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
10337 ins_pipe( fpu_mem_reg_reg ); 9805 ins_pipe( fpu_mem_reg_reg );
10338 %} 9806 %}
10339 9807
10340 9808
10341 instruct addD_reg_mem(regD dst, memory src) %{ 9809 instruct addDPR_reg_mem(regDPR dst, memory src) %{
10342 predicate(UseSSE<=1); 9810 predicate(UseSSE<=1);
10343 match(Set dst (AddD dst (LoadD src))); 9811 match(Set dst (AddD dst (LoadD src)));
10344 ins_cost(150); 9812 ins_cost(150);
10345 9813
10346 format %{ "FLD $src\n\t" 9814 format %{ "FLD $src\n\t"
10350 OpcP, RegOpc(dst) ); 9818 OpcP, RegOpc(dst) );
10351 ins_pipe( fpu_reg_mem ); 9819 ins_pipe( fpu_reg_mem );
10352 %} 9820 %}
10353 9821
10354 // add-to-memory 9822 // add-to-memory
10355 instruct addD_mem_reg(memory dst, regD src) %{ 9823 instruct addDPR_mem_reg(memory dst, regDPR src) %{
10356 predicate(UseSSE<=1); 9824 predicate(UseSSE<=1);
10357 match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src)))); 9825 match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
10358 ins_cost(150); 9826 ins_cost(150);
10359 9827
10360 format %{ "FLD_D $dst\n\t" 9828 format %{ "FLD_D $dst\n\t"
10366 set_instruction_start, 9834 set_instruction_start,
10367 Opcode(0xDD), RMopc_Mem(0x03,dst) ); 9835 Opcode(0xDD), RMopc_Mem(0x03,dst) );
10368 ins_pipe( fpu_reg_mem ); 9836 ins_pipe( fpu_reg_mem );
10369 %} 9837 %}
10370 9838
10371 instruct addD_reg_imm1(regD dst, immD1 con) %{ 9839 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
10372 predicate(UseSSE<=1); 9840 predicate(UseSSE<=1);
10373 match(Set dst (AddD dst con)); 9841 match(Set dst (AddD dst con));
10374 ins_cost(125); 9842 ins_cost(125);
10375 format %{ "FLD1\n\t" 9843 format %{ "FLD1\n\t"
10376 "DADDp $dst,ST" %} 9844 "DADDp $dst,ST" %}
10379 __ faddp($dst$$reg); 9847 __ faddp($dst$$reg);
10380 %} 9848 %}
10381 ins_pipe(fpu_reg); 9849 ins_pipe(fpu_reg);
10382 %} 9850 %}
10383 9851
10384 instruct addD_reg_imm(regD dst, immD con) %{ 9852 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
10385 predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9853 predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
10386 match(Set dst (AddD dst con)); 9854 match(Set dst (AddD dst con));
10387 ins_cost(200); 9855 ins_cost(200);
10388 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9856 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t"
10389 "DADDp $dst,ST" %} 9857 "DADDp $dst,ST" %}
10392 __ faddp($dst$$reg); 9860 __ faddp($dst$$reg);
10393 %} 9861 %}
10394 ins_pipe(fpu_reg_mem); 9862 ins_pipe(fpu_reg_mem);
10395 %} 9863 %}
10396 9864
10397 instruct addD_reg_imm_round(stackSlotD dst, regD src, immD con) %{ 9865 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
10398 predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 ); 9866 predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
10399 match(Set dst (RoundDouble (AddD src con))); 9867 match(Set dst (RoundDouble (AddD src con)));
10400 ins_cost(200); 9868 ins_cost(200);
10401 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9869 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t"
10402 "DADD ST,$src\n\t" 9870 "DADD ST,$src\n\t"
10407 __ fstp_d(Address(rsp, $dst$$disp)); 9875 __ fstp_d(Address(rsp, $dst$$disp));
10408 %} 9876 %}
10409 ins_pipe(fpu_mem_reg_con); 9877 ins_pipe(fpu_mem_reg_con);
10410 %} 9878 %}
10411 9879
10412 // Add two double precision floating point values in xmm 9880 instruct mulDPR_reg(regDPR dst, regDPR src) %{
10413 instruct addXD_reg(regXD dst, regXD src) %{
10414 predicate(UseSSE>=2);
10415 match(Set dst (AddD dst src));
10416 format %{ "ADDSD $dst,$src" %}
10417 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x58), RegReg(dst, src));
10418 ins_pipe( pipe_slow );
10419 %}
10420
10421 instruct addXD_imm(regXD dst, immXD con) %{
10422 predicate(UseSSE>=2);
10423 match(Set dst (AddD dst con));
10424 format %{ "ADDSD $dst,[$constantaddress]\t# load from constant table: double=$con" %}
10425 ins_encode %{
10426 __ addsd($dst$$XMMRegister, $constantaddress($con));
10427 %}
10428 ins_pipe(pipe_slow);
10429 %}
10430
10431 instruct addXD_mem(regXD dst, memory mem) %{
10432 predicate(UseSSE>=2);
10433 match(Set dst (AddD dst (LoadD mem)));
10434 format %{ "ADDSD $dst,$mem" %}
10435 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x58), RegMem(dst,mem));
10436 ins_pipe( pipe_slow );
10437 %}
10438
10439 // Sub two double precision floating point values in xmm
10440 instruct subXD_reg(regXD dst, regXD src) %{
10441 predicate(UseSSE>=2);
10442 match(Set dst (SubD dst src));
10443 format %{ "SUBSD $dst,$src" %}
10444 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5C), RegReg(dst, src));
10445 ins_pipe( pipe_slow );
10446 %}
10447
10448 instruct subXD_imm(regXD dst, immXD con) %{
10449 predicate(UseSSE>=2);
10450 match(Set dst (SubD dst con));
10451 format %{ "SUBSD $dst,[$constantaddress]\t# load from constant table: double=$con" %}
10452 ins_encode %{
10453 __ subsd($dst$$XMMRegister, $constantaddress($con));
10454 %}
10455 ins_pipe(pipe_slow);
10456 %}
10457
10458 instruct subXD_mem(regXD dst, memory mem) %{
10459 predicate(UseSSE>=2);
10460 match(Set dst (SubD dst (LoadD mem)));
10461 format %{ "SUBSD $dst,$mem" %}
10462 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5C), RegMem(dst,mem));
10463 ins_pipe( pipe_slow );
10464 %}
10465
10466 // Mul two double precision floating point values in xmm
10467 instruct mulXD_reg(regXD dst, regXD src) %{
10468 predicate(UseSSE>=2);
10469 match(Set dst (MulD dst src));
10470 format %{ "MULSD $dst,$src" %}
10471 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x59), RegReg(dst, src));
10472 ins_pipe( pipe_slow );
10473 %}
10474
10475 instruct mulXD_imm(regXD dst, immXD con) %{
10476 predicate(UseSSE>=2);
10477 match(Set dst (MulD dst con));
10478 format %{ "MULSD $dst,[$constantaddress]\t# load from constant table: double=$con" %}
10479 ins_encode %{
10480 __ mulsd($dst$$XMMRegister, $constantaddress($con));
10481 %}
10482 ins_pipe(pipe_slow);
10483 %}
10484
10485 instruct mulXD_mem(regXD dst, memory mem) %{
10486 predicate(UseSSE>=2);
10487 match(Set dst (MulD dst (LoadD mem)));
10488 format %{ "MULSD $dst,$mem" %}
10489 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x59), RegMem(dst,mem));
10490 ins_pipe( pipe_slow );
10491 %}
10492
10493 // Div two double precision floating point values in xmm
10494 instruct divXD_reg(regXD dst, regXD src) %{
10495 predicate(UseSSE>=2);
10496 match(Set dst (DivD dst src));
10497 format %{ "DIVSD $dst,$src" %}
10498 opcode(0xF2, 0x0F, 0x5E);
10499 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5E), RegReg(dst, src));
10500 ins_pipe( pipe_slow );
10501 %}
10502
10503 instruct divXD_imm(regXD dst, immXD con) %{
10504 predicate(UseSSE>=2);
10505 match(Set dst (DivD dst con));
10506 format %{ "DIVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %}
10507 ins_encode %{
10508 __ divsd($dst$$XMMRegister, $constantaddress($con));
10509 %}
10510 ins_pipe(pipe_slow);
10511 %}
10512
10513 instruct divXD_mem(regXD dst, memory mem) %{
10514 predicate(UseSSE>=2);
10515 match(Set dst (DivD dst (LoadD mem)));
10516 format %{ "DIVSD $dst,$mem" %}
10517 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5E), RegMem(dst,mem));
10518 ins_pipe( pipe_slow );
10519 %}
10520
10521
10522 instruct mulD_reg(regD dst, regD src) %{
10523 predicate(UseSSE<=1); 9881 predicate(UseSSE<=1);
10524 match(Set dst (MulD dst src)); 9882 match(Set dst (MulD dst src));
10525 format %{ "FLD $src\n\t" 9883 format %{ "FLD $src\n\t"
10526 "DMULp $dst,ST" %} 9884 "DMULp $dst,ST" %}
10527 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9885 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
10528 ins_cost(150); 9886 ins_cost(150);
10529 ins_encode( Push_Reg_D(src), 9887 ins_encode( Push_Reg_DPR(src),
10530 OpcP, RegOpc(dst) ); 9888 OpcP, RegOpc(dst) );
10531 ins_pipe( fpu_reg_reg ); 9889 ins_pipe( fpu_reg_reg );
10532 %} 9890 %}
10533 9891
10534 // Strict FP instruction biases argument before multiply then 9892 // Strict FP instruction biases argument before multiply then
10537 // scale arg1 by multiplying arg1 by 2^(-15360) 9895 // scale arg1 by multiplying arg1 by 2^(-15360)
10538 // load arg2 9896 // load arg2
10539 // multiply scaled arg1 by arg2 9897 // multiply scaled arg1 by arg2
10540 // rescale product by 2^(15360) 9898 // rescale product by 2^(15360)
10541 // 9899 //
10542 instruct strictfp_mulD_reg(regDPR1 dst, regnotDPR1 src) %{ 9900 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
10543 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); 9901 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
10544 match(Set dst (MulD dst src)); 9902 match(Set dst (MulD dst src));
10545 ins_cost(1); // Select this instruction for all strict FP double multiplies 9903 ins_cost(1); // Select this instruction for all strict FP double multiplies
10546 9904
10547 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" 9905 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t"
10550 "DMULp $dst,ST\n\t" 9908 "DMULp $dst,ST\n\t"
10551 "FLD StubRoutines::_fpu_subnormal_bias2\n\t" 9909 "FLD StubRoutines::_fpu_subnormal_bias2\n\t"
10552 "DMULp $dst,ST\n\t" %} 9910 "DMULp $dst,ST\n\t" %}
10553 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9911 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
10554 ins_encode( strictfp_bias1(dst), 9912 ins_encode( strictfp_bias1(dst),
10555 Push_Reg_D(src), 9913 Push_Reg_DPR(src),
10556 OpcP, RegOpc(dst), 9914 OpcP, RegOpc(dst),
10557 strictfp_bias2(dst) ); 9915 strictfp_bias2(dst) );
10558 ins_pipe( fpu_reg_reg ); 9916 ins_pipe( fpu_reg_reg );
10559 %} 9917 %}
10560 9918
10561 instruct mulD_reg_imm(regD dst, immD con) %{ 9919 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
10562 predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9920 predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
10563 match(Set dst (MulD dst con)); 9921 match(Set dst (MulD dst con));
10564 ins_cost(200); 9922 ins_cost(200);
10565 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9923 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t"
10566 "DMULp $dst,ST" %} 9924 "DMULp $dst,ST" %}
10570 %} 9928 %}
10571 ins_pipe(fpu_reg_mem); 9929 ins_pipe(fpu_reg_mem);
10572 %} 9930 %}
10573 9931
10574 9932
10575 instruct mulD_reg_mem(regD dst, memory src) %{ 9933 instruct mulDPR_reg_mem(regDPR dst, memory src) %{
10576 predicate( UseSSE<=1 ); 9934 predicate( UseSSE<=1 );
10577 match(Set dst (MulD dst (LoadD src))); 9935 match(Set dst (MulD dst (LoadD src)));
10578 ins_cost(200); 9936 ins_cost(200);
10579 format %{ "FLD_D $src\n\t" 9937 format %{ "FLD_D $src\n\t"
10580 "DMULp $dst,ST" %} 9938 "DMULp $dst,ST" %}
10584 ins_pipe( fpu_reg_mem ); 9942 ins_pipe( fpu_reg_mem );
10585 %} 9943 %}
10586 9944
10587 // 9945 //
10588 // Cisc-alternate to reg-reg multiply 9946 // Cisc-alternate to reg-reg multiply
10589 instruct mulD_reg_mem_cisc(regD dst, regD src, memory mem) %{ 9947 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
10590 predicate( UseSSE<=1 ); 9948 predicate( UseSSE<=1 );
10591 match(Set dst (MulD src (LoadD mem))); 9949 match(Set dst (MulD src (LoadD mem)));
10592 ins_cost(250); 9950 ins_cost(250);
10593 format %{ "FLD_D $mem\n\t" 9951 format %{ "FLD_D $mem\n\t"
10594 "DMUL ST,$src\n\t" 9952 "DMUL ST,$src\n\t"
10595 "FSTP_D $dst" %} 9953 "FSTP_D $dst" %}
10596 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */ 9954 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */
10597 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem), 9955 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
10598 OpcReg_F(src), 9956 OpcReg_FPR(src),
10599 Pop_Reg_D(dst) ); 9957 Pop_Reg_DPR(dst) );
10600 ins_pipe( fpu_reg_reg_mem ); 9958 ins_pipe( fpu_reg_reg_mem );
10601 %} 9959 %}
10602 9960
10603 9961
10604 // MACRO3 -- addD a mulD 9962 // MACRO3 -- addDPR a mulDPR
10605 // This instruction is a '2-address' instruction in that the result goes 9963 // This instruction is a '2-address' instruction in that the result goes
10606 // back to src2. This eliminates a move from the macro; possibly the 9964 // back to src2. This eliminates a move from the macro; possibly the
10607 // register allocator will have to add it back (and maybe not). 9965 // register allocator will have to add it back (and maybe not).
10608 instruct addD_mulD_reg(regD src2, regD src1, regD src0) %{ 9966 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
10609 predicate( UseSSE<=1 ); 9967 predicate( UseSSE<=1 );
10610 match(Set src2 (AddD (MulD src0 src1) src2)); 9968 match(Set src2 (AddD (MulD src0 src1) src2));
10611 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9969 format %{ "FLD $src0\t# ===MACRO3d===\n\t"
10612 "DMUL ST,$src1\n\t" 9970 "DMUL ST,$src1\n\t"
10613 "DADDp $src2,ST" %} 9971 "DADDp $src2,ST" %}
10614 ins_cost(250); 9972 ins_cost(250);
10615 opcode(0xDD); /* LoadD DD /0 */ 9973 opcode(0xDD); /* LoadD DD /0 */
10616 ins_encode( Push_Reg_F(src0), 9974 ins_encode( Push_Reg_FPR(src0),
10617 FMul_ST_reg(src1), 9975 FMul_ST_reg(src1),
10618 FAddP_reg_ST(src2) ); 9976 FAddP_reg_ST(src2) );
10619 ins_pipe( fpu_reg_reg_reg ); 9977 ins_pipe( fpu_reg_reg_reg );
10620 %} 9978 %}
10621 9979
10622 9980
10623 // MACRO3 -- subD a mulD 9981 // MACRO3 -- subDPR a mulDPR
10624 instruct subD_mulD_reg(regD src2, regD src1, regD src0) %{ 9982 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
10625 predicate( UseSSE<=1 ); 9983 predicate( UseSSE<=1 );
10626 match(Set src2 (SubD (MulD src0 src1) src2)); 9984 match(Set src2 (SubD (MulD src0 src1) src2));
10627 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9985 format %{ "FLD $src0\t# ===MACRO3d===\n\t"
10628 "DMUL ST,$src1\n\t" 9986 "DMUL ST,$src1\n\t"
10629 "DSUBRp $src2,ST" %} 9987 "DSUBRp $src2,ST" %}
10630 ins_cost(250); 9988 ins_cost(250);
10631 ins_encode( Push_Reg_F(src0), 9989 ins_encode( Push_Reg_FPR(src0),
10632 FMul_ST_reg(src1), 9990 FMul_ST_reg(src1),
10633 Opcode(0xDE), Opc_plus(0xE0,src2)); 9991 Opcode(0xDE), Opc_plus(0xE0,src2));
10634 ins_pipe( fpu_reg_reg_reg ); 9992 ins_pipe( fpu_reg_reg_reg );
10635 %} 9993 %}
10636 9994
10637 9995
10638 instruct divD_reg(regD dst, regD src) %{ 9996 instruct divDPR_reg(regDPR dst, regDPR src) %{
10639 predicate( UseSSE<=1 ); 9997 predicate( UseSSE<=1 );
10640 match(Set dst (DivD dst src)); 9998 match(Set dst (DivD dst src));
10641 9999
10642 format %{ "FLD $src\n\t" 10000 format %{ "FLD $src\n\t"
10643 "FDIVp $dst,ST" %} 10001 "FDIVp $dst,ST" %}
10644 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10002 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10645 ins_cost(150); 10003 ins_cost(150);
10646 ins_encode( Push_Reg_D(src), 10004 ins_encode( Push_Reg_DPR(src),
10647 OpcP, RegOpc(dst) ); 10005 OpcP, RegOpc(dst) );
10648 ins_pipe( fpu_reg_reg ); 10006 ins_pipe( fpu_reg_reg );
10649 %} 10007 %}
10650 10008
10651 // Strict FP instruction biases argument before division then 10009 // Strict FP instruction biases argument before division then
10654 // scale dividend by multiplying dividend by 2^(-15360) 10012 // scale dividend by multiplying dividend by 2^(-15360)
10655 // load divisor 10013 // load divisor
10656 // divide scaled dividend by divisor 10014 // divide scaled dividend by divisor
10657 // rescale quotient by 2^(15360) 10015 // rescale quotient by 2^(15360)
10658 // 10016 //
10659 instruct strictfp_divD_reg(regDPR1 dst, regnotDPR1 src) %{ 10017 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
10660 predicate (UseSSE<=1); 10018 predicate (UseSSE<=1);
10661 match(Set dst (DivD dst src)); 10019 match(Set dst (DivD dst src));
10662 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); 10020 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
10663 ins_cost(01); 10021 ins_cost(01);
10664 10022
10668 "FDIVp $dst,ST\n\t" 10026 "FDIVp $dst,ST\n\t"
10669 "FLD StubRoutines::_fpu_subnormal_bias2\n\t" 10027 "FLD StubRoutines::_fpu_subnormal_bias2\n\t"
10670 "DMULp $dst,ST\n\t" %} 10028 "DMULp $dst,ST\n\t" %}
10671 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10029 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10672 ins_encode( strictfp_bias1(dst), 10030 ins_encode( strictfp_bias1(dst),
10673 Push_Reg_D(src), 10031 Push_Reg_DPR(src),
10674 OpcP, RegOpc(dst), 10032 OpcP, RegOpc(dst),
10675 strictfp_bias2(dst) ); 10033 strictfp_bias2(dst) );
10676 ins_pipe( fpu_reg_reg ); 10034 ins_pipe( fpu_reg_reg );
10677 %} 10035 %}
10678 10036
10679 instruct divD_reg_round(stackSlotD dst, regD src1, regD src2) %{ 10037 instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
10680 predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) ); 10038 predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) );
10681 match(Set dst (RoundDouble (DivD src1 src2))); 10039 match(Set dst (RoundDouble (DivD src1 src2)));
10682 10040
10683 format %{ "FLD $src1\n\t" 10041 format %{ "FLD $src1\n\t"
10684 "FDIV ST,$src2\n\t" 10042 "FDIV ST,$src2\n\t"
10685 "FSTP_D $dst\t# D-round" %} 10043 "FSTP_D $dst\t# D-round" %}
10686 opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */ 10044 opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */
10687 ins_encode( Push_Reg_D(src1), 10045 ins_encode( Push_Reg_DPR(src1),
10688 OpcP, RegOpc(src2), Pop_Mem_D(dst) ); 10046 OpcP, RegOpc(src2), Pop_Mem_DPR(dst) );
10689 ins_pipe( fpu_mem_reg_reg ); 10047 ins_pipe( fpu_mem_reg_reg );
10690 %} 10048 %}
10691 10049
10692 10050
10693 instruct modD_reg(regD dst, regD src, eAXRegI rax, eFlagsReg cr) %{ 10051 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
10694 predicate(UseSSE<=1); 10052 predicate(UseSSE<=1);
10695 match(Set dst (ModD dst src)); 10053 match(Set dst (ModD dst src));
10696 effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS 10054 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10697 10055
10698 format %{ "DMOD $dst,$src" %} 10056 format %{ "DMOD $dst,$src" %}
10699 ins_cost(250); 10057 ins_cost(250);
10700 ins_encode(Push_Reg_Mod_D(dst, src), 10058 ins_encode(Push_Reg_Mod_DPR(dst, src),
10701 emitModD(), 10059 emitModDPR(),
10702 Push_Result_Mod_D(src), 10060 Push_Result_Mod_DPR(src),
10703 Pop_Reg_D(dst)); 10061 Pop_Reg_DPR(dst));
10704 ins_pipe( pipe_slow ); 10062 ins_pipe( pipe_slow );
10705 %} 10063 %}
10706 10064
10707 instruct modXD_reg(regXD dst, regXD src0, regXD src1, eAXRegI rax, eFlagsReg cr) %{ 10065 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
10708 predicate(UseSSE>=2); 10066 predicate(UseSSE>=2);
10709 match(Set dst (ModD src0 src1)); 10067 match(Set dst (ModD src0 src1));
10710 effect(KILL rax, KILL cr); 10068 effect(KILL rax, KILL cr);
10711 10069
10712 format %{ "SUB ESP,8\t # DMOD\n" 10070 format %{ "SUB ESP,8\t # DMOD\n"
10723 "\tMOVSD $dst,[ESP+0]\n" 10081 "\tMOVSD $dst,[ESP+0]\n"
10724 "\tADD ESP,8\n" 10082 "\tADD ESP,8\n"
10725 "\tFSTP ST0\t # Restore FPU Stack" 10083 "\tFSTP ST0\t # Restore FPU Stack"
10726 %} 10084 %}
10727 ins_cost(250); 10085 ins_cost(250);
10728 ins_encode( Push_ModD_encoding(src0, src1), emitModD(), Push_ResultXD(dst), PopFPU); 10086 ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
10729 ins_pipe( pipe_slow ); 10087 ins_pipe( pipe_slow );
10730 %} 10088 %}
10731 10089
10732 instruct sinD_reg(regDPR1 dst, regDPR1 src) %{ 10090 instruct sinDPR_reg(regDPR1 dst, regDPR1 src) %{
10733 predicate (UseSSE<=1); 10091 predicate (UseSSE<=1);
10734 match(Set dst (SinD src)); 10092 match(Set dst (SinD src));
10735 ins_cost(1800); 10093 ins_cost(1800);
10736 format %{ "DSIN $dst" %} 10094 format %{ "DSIN $dst" %}
10737 opcode(0xD9, 0xFE); 10095 opcode(0xD9, 0xFE);
10738 ins_encode( OpcP, OpcS ); 10096 ins_encode( OpcP, OpcS );
10739 ins_pipe( pipe_slow ); 10097 ins_pipe( pipe_slow );
10740 %} 10098 %}
10741 10099
10742 instruct sinXD_reg(regXD dst, eFlagsReg cr) %{ 10100 instruct sinD_reg(regD dst, eFlagsReg cr) %{
10743 predicate (UseSSE>=2); 10101 predicate (UseSSE>=2);
10744 match(Set dst (SinD dst)); 10102 match(Set dst (SinD dst));
10745 effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8" 10103 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
10746 ins_cost(1800); 10104 ins_cost(1800);
10747 format %{ "DSIN $dst" %} 10105 format %{ "DSIN $dst" %}
10748 opcode(0xD9, 0xFE); 10106 opcode(0xD9, 0xFE);
10749 ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) ); 10107 ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) );
10750 ins_pipe( pipe_slow ); 10108 ins_pipe( pipe_slow );
10751 %} 10109 %}
10752 10110
10753 instruct cosD_reg(regDPR1 dst, regDPR1 src) %{ 10111 instruct cosDPR_reg(regDPR1 dst, regDPR1 src) %{
10754 predicate (UseSSE<=1); 10112 predicate (UseSSE<=1);
10755 match(Set dst (CosD src)); 10113 match(Set dst (CosD src));
10756 ins_cost(1800); 10114 ins_cost(1800);
10757 format %{ "DCOS $dst" %} 10115 format %{ "DCOS $dst" %}
10758 opcode(0xD9, 0xFF); 10116 opcode(0xD9, 0xFF);
10759 ins_encode( OpcP, OpcS ); 10117 ins_encode( OpcP, OpcS );
10760 ins_pipe( pipe_slow ); 10118 ins_pipe( pipe_slow );
10761 %} 10119 %}
10762 10120
10763 instruct cosXD_reg(regXD dst, eFlagsReg cr) %{ 10121 instruct cosD_reg(regD dst, eFlagsReg cr) %{
10764 predicate (UseSSE>=2); 10122 predicate (UseSSE>=2);
10765 match(Set dst (CosD dst)); 10123 match(Set dst (CosD dst));
10766 effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8" 10124 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
10767 ins_cost(1800); 10125 ins_cost(1800);
10768 format %{ "DCOS $dst" %} 10126 format %{ "DCOS $dst" %}
10769 opcode(0xD9, 0xFF); 10127 opcode(0xD9, 0xFF);
10770 ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) ); 10128 ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) );
10771 ins_pipe( pipe_slow ); 10129 ins_pipe( pipe_slow );
10772 %} 10130 %}
10773 10131
10774 instruct tanD_reg(regDPR1 dst, regDPR1 src) %{ 10132 instruct tanDPR_reg(regDPR1 dst, regDPR1 src) %{
10775 predicate (UseSSE<=1); 10133 predicate (UseSSE<=1);
10776 match(Set dst(TanD src)); 10134 match(Set dst(TanD src));
10777 format %{ "DTAN $dst" %} 10135 format %{ "DTAN $dst" %}
10778 ins_encode( Opcode(0xD9), Opcode(0xF2), // fptan 10136 ins_encode( Opcode(0xD9), Opcode(0xF2), // fptan
10779 Opcode(0xDD), Opcode(0xD8)); // fstp st 10137 Opcode(0xDD), Opcode(0xD8)); // fstp st
10780 ins_pipe( pipe_slow ); 10138 ins_pipe( pipe_slow );
10781 %} 10139 %}
10782 10140
10783 instruct tanXD_reg(regXD dst, eFlagsReg cr) %{ 10141 instruct tanD_reg(regD dst, eFlagsReg cr) %{
10784 predicate (UseSSE>=2); 10142 predicate (UseSSE>=2);
10785 match(Set dst(TanD dst)); 10143 match(Set dst(TanD dst));
10786 effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8" 10144 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
10787 format %{ "DTAN $dst" %} 10145 format %{ "DTAN $dst" %}
10788 ins_encode( Push_SrcXD(dst), 10146 ins_encode( Push_SrcD(dst),
10789 Opcode(0xD9), Opcode(0xF2), // fptan 10147 Opcode(0xD9), Opcode(0xF2), // fptan
10790 Opcode(0xDD), Opcode(0xD8), // fstp st 10148 Opcode(0xDD), Opcode(0xD8), // fstp st
10791 Push_ResultXD(dst) ); 10149 Push_ResultD(dst) );
10792 ins_pipe( pipe_slow ); 10150 ins_pipe( pipe_slow );
10793 %} 10151 %}
10794 10152
10795 instruct atanD_reg(regD dst, regD src) %{ 10153 instruct atanDPR_reg(regDPR dst, regDPR src) %{
10796 predicate (UseSSE<=1); 10154 predicate (UseSSE<=1);
10797 match(Set dst(AtanD dst src)); 10155 match(Set dst(AtanD dst src));
10798 format %{ "DATA $dst,$src" %} 10156 format %{ "DATA $dst,$src" %}
10799 opcode(0xD9, 0xF3); 10157 opcode(0xD9, 0xF3);
10800 ins_encode( Push_Reg_D(src), 10158 ins_encode( Push_Reg_DPR(src),
10801 OpcP, OpcS, RegOpc(dst) ); 10159 OpcP, OpcS, RegOpc(dst) );
10802 ins_pipe( pipe_slow ); 10160 ins_pipe( pipe_slow );
10803 %} 10161 %}
10804 10162
10805 instruct atanXD_reg(regXD dst, regXD src, eFlagsReg cr) %{ 10163 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
10806 predicate (UseSSE>=2); 10164 predicate (UseSSE>=2);
10807 match(Set dst(AtanD dst src)); 10165 match(Set dst(AtanD dst src));
10808 effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8" 10166 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
10809 format %{ "DATA $dst,$src" %} 10167 format %{ "DATA $dst,$src" %}
10810 opcode(0xD9, 0xF3); 10168 opcode(0xD9, 0xF3);
10811 ins_encode( Push_SrcXD(src), 10169 ins_encode( Push_SrcD(src),
10812 OpcP, OpcS, Push_ResultXD(dst) ); 10170 OpcP, OpcS, Push_ResultD(dst) );
10813 ins_pipe( pipe_slow ); 10171 ins_pipe( pipe_slow );
10814 %} 10172 %}
10815 10173
10816 instruct sqrtD_reg(regD dst, regD src) %{ 10174 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
10817 predicate (UseSSE<=1); 10175 predicate (UseSSE<=1);
10818 match(Set dst (SqrtD src)); 10176 match(Set dst (SqrtD src));
10819 format %{ "DSQRT $dst,$src" %} 10177 format %{ "DSQRT $dst,$src" %}
10820 opcode(0xFA, 0xD9); 10178 opcode(0xFA, 0xD9);
10821 ins_encode( Push_Reg_D(src), 10179 ins_encode( Push_Reg_DPR(src),
10822 OpcS, OpcP, Pop_Reg_D(dst) ); 10180 OpcS, OpcP, Pop_Reg_DPR(dst) );
10823 ins_pipe( pipe_slow ); 10181 ins_pipe( pipe_slow );
10824 %} 10182 %}
10825 10183
10826 instruct powD_reg(regD X, regDPR1 Y, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{ 10184 instruct powDPR_reg(regDPR X, regDPR1 Y, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
10827 predicate (UseSSE<=1); 10185 predicate (UseSSE<=1);
10828 match(Set Y (PowD X Y)); // Raise X to the Yth power 10186 match(Set Y (PowD X Y)); // Raise X to the Yth power
10829 effect(KILL rax, KILL rbx, KILL rcx); 10187 effect(KILL rax, KILL rbx, KILL rcx);
10830 format %{ "SUB ESP,8\t\t# Fast-path POW encoding\n\t" 10188 format %{ "SUB ESP,8\t\t# Fast-path POW encoding\n\t"
10831 "FLD_D $X\n\t" 10189 "FLD_D $X\n\t"
10850 "FMUL ST(0),[ESP+0]\t# Scale\n\t" 10208 "FMUL ST(0),[ESP+0]\t# Scale\n\t"
10851 10209
10852 "ADD ESP,8" 10210 "ADD ESP,8"
10853 %} 10211 %}
10854 ins_encode( push_stack_temp_qword, 10212 ins_encode( push_stack_temp_qword,
10855 Push_Reg_D(X), 10213 Push_Reg_DPR(X),
10856 Opcode(0xD9), Opcode(0xF1), // fyl2x 10214 Opcode(0xD9), Opcode(0xF1), // fyl2x
10857 pow_exp_core_encoding, 10215 pow_exp_core_encoding,
10858 pop_stack_temp_qword); 10216 pop_stack_temp_qword);
10859 ins_pipe( pipe_slow ); 10217 ins_pipe( pipe_slow );
10860 %} 10218 %}
10861 10219
10862 instruct powXD_reg(regXD dst, regXD src0, regXD src1, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx ) %{ 10220 instruct powD_reg(regD dst, regD src0, regD src1, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx ) %{
10863 predicate (UseSSE>=2); 10221 predicate (UseSSE>=2);
10864 match(Set dst (PowD src0 src1)); // Raise src0 to the src1'th power 10222 match(Set dst (PowD src0 src1)); // Raise src0 to the src1'th power
10865 effect(KILL tmp1, KILL rax, KILL rbx, KILL rcx ); 10223 effect(KILL tmp1, KILL rax, KILL rbx, KILL rcx );
10866 format %{ "SUB ESP,8\t\t# Fast-path POW encoding\n\t" 10224 format %{ "SUB ESP,8\t\t# Fast-path POW encoding\n\t"
10867 "MOVSD [ESP],$src1\n\t" 10225 "MOVSD [ESP],$src1\n\t"
10895 ins_encode( push_stack_temp_qword, 10253 ins_encode( push_stack_temp_qword,
10896 push_xmm_to_fpr1(src1), 10254 push_xmm_to_fpr1(src1),
10897 push_xmm_to_fpr1(src0), 10255 push_xmm_to_fpr1(src0),
10898 Opcode(0xD9), Opcode(0xF1), // fyl2x 10256 Opcode(0xD9), Opcode(0xF1), // fyl2x
10899 pow_exp_core_encoding, 10257 pow_exp_core_encoding,
10900 Push_ResultXD(dst) ); 10258 Push_ResultD(dst) );
10901 ins_pipe( pipe_slow ); 10259 ins_pipe( pipe_slow );
10902 %} 10260 %}
10903 10261
10904 10262
10905 instruct expD_reg(regDPR1 dpr1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{ 10263 instruct expDPR_reg(regDPR1 dpr1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
10906 predicate (UseSSE<=1); 10264 predicate (UseSSE<=1);
10907 match(Set dpr1 (ExpD dpr1)); 10265 match(Set dpr1 (ExpD dpr1));
10908 effect(KILL rax, KILL rbx, KILL rcx); 10266 effect(KILL rax, KILL rbx, KILL rcx);
10909 format %{ "SUB ESP,8\t\t# Fast-path EXP encoding" 10267 format %{ "SUB ESP,8\t\t# Fast-path EXP encoding"
10910 "FLDL2E \t\t\t# Ld log2(e) X\n\t" 10268 "FLDL2E \t\t\t# Ld log2(e) X\n\t"
10936 pow_exp_core_encoding, 10294 pow_exp_core_encoding,
10937 pop_stack_temp_qword); 10295 pop_stack_temp_qword);
10938 ins_pipe( pipe_slow ); 10296 ins_pipe( pipe_slow );
10939 %} 10297 %}
10940 10298
10941 instruct expXD_reg(regXD dst, regXD src, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{ 10299 instruct expD_reg(regD dst, regD src, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
10942 predicate (UseSSE>=2); 10300 predicate (UseSSE>=2);
10943 match(Set dst (ExpD src)); 10301 match(Set dst (ExpD src));
10944 effect(KILL tmp1, KILL rax, KILL rbx, KILL rcx); 10302 effect(KILL tmp1, KILL rax, KILL rbx, KILL rcx);
10945 format %{ "SUB ESP,8\t\t# Fast-path EXP encoding\n\t" 10303 format %{ "SUB ESP,8\t\t# Fast-path EXP encoding\n\t"
10946 "MOVSD [ESP],$src\n\t" 10304 "MOVSD [ESP],$src\n\t"
10967 10325
10968 "FST_D [ESP]\n\t" 10326 "FST_D [ESP]\n\t"
10969 "MOVSD $dst,[ESP]\n\t" 10327 "MOVSD $dst,[ESP]\n\t"
10970 "ADD ESP,8" 10328 "ADD ESP,8"
10971 %} 10329 %}
10972 ins_encode( Push_SrcXD(src), 10330 ins_encode( Push_SrcD(src),
10973 Opcode(0xD9), Opcode(0xEA), // fldl2e 10331 Opcode(0xD9), Opcode(0xEA), // fldl2e
10974 Opcode(0xDE), Opcode(0xC9), // fmulp 10332 Opcode(0xDE), Opcode(0xC9), // fmulp
10975 pow_exp_core_encoding, 10333 pow_exp_core_encoding,
10976 Push_ResultXD(dst) ); 10334 Push_ResultD(dst) );
10977 ins_pipe( pipe_slow ); 10335 ins_pipe( pipe_slow );
10978 %} 10336 %}
10979 10337
10980 10338
10981 10339
10982 instruct log10D_reg(regDPR1 dst, regDPR1 src) %{ 10340 instruct log10DPR_reg(regDPR1 dst, regDPR1 src) %{
10983 predicate (UseSSE<=1); 10341 predicate (UseSSE<=1);
10984 // The source Double operand on FPU stack 10342 // The source Double operand on FPU stack
10985 match(Set dst (Log10D src)); 10343 match(Set dst (Log10D src));
10986 // fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number 10344 // fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number
10987 // fxch ; swap ST(0) with ST(1) 10345 // fxch ; swap ST(0) with ST(1)
10995 Opcode(0xD9), Opcode(0xF1)); // fyl2x 10353 Opcode(0xD9), Opcode(0xF1)); // fyl2x
10996 10354
10997 ins_pipe( pipe_slow ); 10355 ins_pipe( pipe_slow );
10998 %} 10356 %}
10999 10357
11000 instruct log10XD_reg(regXD dst, regXD src, eFlagsReg cr) %{ 10358 instruct log10D_reg(regD dst, regD src, eFlagsReg cr) %{
11001 predicate (UseSSE>=2); 10359 predicate (UseSSE>=2);
11002 effect(KILL cr); 10360 effect(KILL cr);
11003 match(Set dst (Log10D src)); 10361 match(Set dst (Log10D src));
11004 // fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number 10362 // fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number
11005 // fyl2x ; compute log_10(2) * log_2(x) 10363 // fyl2x ; compute log_10(2) * log_2(x)
11006 format %{ "FLDLG2 \t\t\t#Log10\n\t" 10364 format %{ "FLDLG2 \t\t\t#Log10\n\t"
11007 "FYL2X \t\t\t# Q=Log10*Log_2(x)" 10365 "FYL2X \t\t\t# Q=Log10*Log_2(x)"
11008 %} 10366 %}
11009 ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2 10367 ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2
11010 Push_SrcXD(src), 10368 Push_SrcD(src),
11011 Opcode(0xD9), Opcode(0xF1), // fyl2x 10369 Opcode(0xD9), Opcode(0xF1), // fyl2x
11012 Push_ResultXD(dst)); 10370 Push_ResultD(dst));
11013 10371
11014 ins_pipe( pipe_slow ); 10372 ins_pipe( pipe_slow );
11015 %} 10373 %}
11016 10374
11017 instruct logD_reg(regDPR1 dst, regDPR1 src) %{ 10375 instruct logDPR_reg(regDPR1 dst, regDPR1 src) %{
11018 predicate (UseSSE<=1); 10376 predicate (UseSSE<=1);
11019 // The source Double operand on FPU stack 10377 // The source Double operand on FPU stack
11020 match(Set dst (LogD src)); 10378 match(Set dst (LogD src));
11021 // fldln2 ; push log_e(2) on the FPU stack; full 80-bit number 10379 // fldln2 ; push log_e(2) on the FPU stack; full 80-bit number
11022 // fxch ; swap ST(0) with ST(1) 10380 // fxch ; swap ST(0) with ST(1)
11030 Opcode(0xD9), Opcode(0xF1)); // fyl2x 10388 Opcode(0xD9), Opcode(0xF1)); // fyl2x
11031 10389
11032 ins_pipe( pipe_slow ); 10390 ins_pipe( pipe_slow );
11033 %} 10391 %}
11034 10392
11035 instruct logXD_reg(regXD dst, regXD src, eFlagsReg cr) %{ 10393 instruct logD_reg(regD dst, regD src, eFlagsReg cr) %{
11036 predicate (UseSSE>=2); 10394 predicate (UseSSE>=2);
11037 effect(KILL cr); 10395 effect(KILL cr);
11038 // The source and result Double operands in XMM registers 10396 // The source and result Double operands in XMM registers
11039 match(Set dst (LogD src)); 10397 match(Set dst (LogD src));
11040 // fldln2 ; push log_e(2) on the FPU stack; full 80-bit number 10398 // fldln2 ; push log_e(2) on the FPU stack; full 80-bit number
11041 // fyl2x ; compute log_e(2) * log_2(x) 10399 // fyl2x ; compute log_e(2) * log_2(x)
11042 format %{ "FLDLN2 \t\t\t#Log_e\n\t" 10400 format %{ "FLDLN2 \t\t\t#Log_e\n\t"
11043 "FYL2X \t\t\t# Q=Log_e*Log_2(x)" 10401 "FYL2X \t\t\t# Q=Log_e*Log_2(x)"
11044 %} 10402 %}
11045 ins_encode( Opcode(0xD9), Opcode(0xED), // fldln2 10403 ins_encode( Opcode(0xD9), Opcode(0xED), // fldln2
11046 Push_SrcXD(src), 10404 Push_SrcD(src),
11047 Opcode(0xD9), Opcode(0xF1), // fyl2x 10405 Opcode(0xD9), Opcode(0xF1), // fyl2x
11048 Push_ResultXD(dst)); 10406 Push_ResultD(dst));
11049 ins_pipe( pipe_slow ); 10407 ins_pipe( pipe_slow );
11050 %} 10408 %}
11051 10409
11052 //-------------Float Instructions------------------------------- 10410 //-------------Float Instructions-------------------------------
11053 // Float Math 10411 // Float Math
11064 // jcc(Assembler::equal, exit); 10422 // jcc(Assembler::equal, exit);
11065 // movl(dst, greater_result); 10423 // movl(dst, greater_result);
11066 // exit: 10424 // exit:
11067 10425
11068 // P6 version of float compare, sets condition codes in EFLAGS 10426 // P6 version of float compare, sets condition codes in EFLAGS
11069 instruct cmpF_cc_P6(eFlagsRegU cr, regF src1, regF src2, eAXRegI rax) %{ 10427 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
11070 predicate(VM_Version::supports_cmov() && UseSSE == 0); 10428 predicate(VM_Version::supports_cmov() && UseSSE == 0);
11071 match(Set cr (CmpF src1 src2)); 10429 match(Set cr (CmpF src1 src2));
11072 effect(KILL rax); 10430 effect(KILL rax);
11073 ins_cost(150); 10431 ins_cost(150);
11074 format %{ "FLD $src1\n\t" 10432 format %{ "FLD $src1\n\t"
11076 "JNP exit\n\t" 10434 "JNP exit\n\t"
11077 "MOV ah,1 // saw a NaN, set CF (treat as LT)\n\t" 10435 "MOV ah,1 // saw a NaN, set CF (treat as LT)\n\t"
11078 "SAHF\n" 10436 "SAHF\n"
11079 "exit:\tNOP // avoid branch to branch" %} 10437 "exit:\tNOP // avoid branch to branch" %}
11080 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 10438 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
11081 ins_encode( Push_Reg_D(src1), 10439 ins_encode( Push_Reg_DPR(src1),
11082 OpcP, RegOpc(src2), 10440 OpcP, RegOpc(src2),
11083 cmpF_P6_fixup ); 10441 cmpF_P6_fixup );
11084 ins_pipe( pipe_slow ); 10442 ins_pipe( pipe_slow );
11085 %} 10443 %}
11086 10444
11087 instruct cmpF_cc_P6CF(eFlagsRegUCF cr, regF src1, regF src2) %{ 10445 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
11088 predicate(VM_Version::supports_cmov() && UseSSE == 0); 10446 predicate(VM_Version::supports_cmov() && UseSSE == 0);
11089 match(Set cr (CmpF src1 src2)); 10447 match(Set cr (CmpF src1 src2));
11090 ins_cost(100); 10448 ins_cost(100);
11091 format %{ "FLD $src1\n\t" 10449 format %{ "FLD $src1\n\t"
11092 "FUCOMIP ST,$src2 // P6 instruction" %} 10450 "FUCOMIP ST,$src2 // P6 instruction" %}
11093 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 10451 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
11094 ins_encode( Push_Reg_D(src1), 10452 ins_encode( Push_Reg_DPR(src1),
11095 OpcP, RegOpc(src2)); 10453 OpcP, RegOpc(src2));
11096 ins_pipe( pipe_slow ); 10454 ins_pipe( pipe_slow );
11097 %} 10455 %}
11098 10456
11099 10457
11100 // Compare & branch 10458 // Compare & branch
11101 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2, eAXRegI rax) %{ 10459 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
11102 predicate(UseSSE == 0); 10460 predicate(UseSSE == 0);
11103 match(Set cr (CmpF src1 src2)); 10461 match(Set cr (CmpF src1 src2));
11104 effect(KILL rax); 10462 effect(KILL rax);
11105 ins_cost(200); 10463 ins_cost(200);
11106 format %{ "FLD $src1\n\t" 10464 format %{ "FLD $src1\n\t"
11109 "TEST AX,0x400\n\t" 10467 "TEST AX,0x400\n\t"
11110 "JZ,s flags\n\t" 10468 "JZ,s flags\n\t"
11111 "MOV AH,1\t# unordered treat as LT\n" 10469 "MOV AH,1\t# unordered treat as LT\n"
11112 "flags:\tSAHF" %} 10470 "flags:\tSAHF" %}
11113 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10471 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
11114 ins_encode( Push_Reg_D(src1), 10472 ins_encode( Push_Reg_DPR(src1),
11115 OpcP, RegOpc(src2), 10473 OpcP, RegOpc(src2),
11116 fpu_flags); 10474 fpu_flags);
11117 ins_pipe( pipe_slow ); 10475 ins_pipe( pipe_slow );
11118 %} 10476 %}
11119 10477
11120 // Compare vs zero into -1,0,1 10478 // Compare vs zero into -1,0,1
11121 instruct cmpF_0(eRegI dst, regF src1, immF0 zero, eAXRegI rax, eFlagsReg cr) %{ 10479 instruct cmpFPR_0(eRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
11122 predicate(UseSSE == 0); 10480 predicate(UseSSE == 0);
11123 match(Set dst (CmpF3 src1 zero)); 10481 match(Set dst (CmpF3 src1 zero));
11124 effect(KILL cr, KILL rax); 10482 effect(KILL cr, KILL rax);
11125 ins_cost(280); 10483 ins_cost(280);
11126 format %{ "FTSTF $dst,$src1" %} 10484 format %{ "FTSTF $dst,$src1" %}
11127 opcode(0xE4, 0xD9); 10485 opcode(0xE4, 0xD9);
11128 ins_encode( Push_Reg_D(src1), 10486 ins_encode( Push_Reg_DPR(src1),
11129 OpcS, OpcP, PopFPU, 10487 OpcS, OpcP, PopFPU,
11130 CmpF_Result(dst)); 10488 CmpF_Result(dst));
11131 ins_pipe( pipe_slow ); 10489 ins_pipe( pipe_slow );
11132 %} 10490 %}
11133 10491
11134 // Compare into -1,0,1 10492 // Compare into -1,0,1
11135 instruct cmpF_reg(eRegI dst, regF src1, regF src2, eAXRegI rax, eFlagsReg cr) %{ 10493 instruct cmpFPR_reg(eRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
11136 predicate(UseSSE == 0); 10494 predicate(UseSSE == 0);
11137 match(Set dst (CmpF3 src1 src2)); 10495 match(Set dst (CmpF3 src1 src2));
11138 effect(KILL cr, KILL rax); 10496 effect(KILL cr, KILL rax);
11139 ins_cost(300); 10497 ins_cost(300);
11140 format %{ "FCMPF $dst,$src1,$src2" %} 10498 format %{ "FCMPF $dst,$src1,$src2" %}
11141 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10499 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
11142 ins_encode( Push_Reg_D(src1), 10500 ins_encode( Push_Reg_DPR(src1),
11143 OpcP, RegOpc(src2), 10501 OpcP, RegOpc(src2),
11144 CmpF_Result(dst)); 10502 CmpF_Result(dst));
11145 ins_pipe( pipe_slow ); 10503 ins_pipe( pipe_slow );
11146 %} 10504 %}
11147 10505
11148 // float compare and set condition codes in EFLAGS by XMM regs 10506 // float compare and set condition codes in EFLAGS by XMM regs
11149 instruct cmpX_cc(eFlagsRegU cr, regX dst, regX src, eAXRegI rax) %{ 10507 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
11150 predicate(UseSSE>=1); 10508 predicate(UseSSE>=1);
11151 match(Set cr (CmpF dst src)); 10509 match(Set cr (CmpF src1 src2));
11152 effect(KILL rax);
11153 ins_cost(145); 10510 ins_cost(145);
11154 format %{ "COMISS $dst,$src\n" 10511 format %{ "UCOMISS $src1,$src2\n\t"
11155 "\tJNP exit\n" 10512 "JNP,s exit\n\t"
11156 "\tMOV ah,1 // saw a NaN, set CF\n" 10513 "PUSHF\t# saw NaN, set CF\n\t"
11157 "\tSAHF\n" 10514 "AND [rsp], #0xffffff2b\n\t"
11158 "exit:\tNOP // avoid branch to branch" %} 10515 "POPF\n"
11159 opcode(0x0F, 0x2F); 10516 "exit:" %}
11160 ins_encode(OpcP, OpcS, RegReg(dst, src), cmpF_P6_fixup); 10517 ins_encode %{
10518 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10519 emit_cmpfp_fixup(_masm);
10520 %}
11161 ins_pipe( pipe_slow ); 10521 ins_pipe( pipe_slow );
11162 %} 10522 %}
11163 10523
11164 instruct cmpX_ccCF(eFlagsRegUCF cr, regX dst, regX src) %{ 10524 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
11165 predicate(UseSSE>=1); 10525 predicate(UseSSE>=1);
11166 match(Set cr (CmpF dst src)); 10526 match(Set cr (CmpF src1 src2));
11167 ins_cost(100); 10527 ins_cost(100);
11168 format %{ "COMISS $dst,$src" %} 10528 format %{ "UCOMISS $src1,$src2" %}
11169 opcode(0x0F, 0x2F); 10529 ins_encode %{
11170 ins_encode(OpcP, OpcS, RegReg(dst, src)); 10530 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10531 %}
11171 ins_pipe( pipe_slow ); 10532 ins_pipe( pipe_slow );
11172 %} 10533 %}
11173 10534
11174 // float compare and set condition codes in EFLAGS by XMM regs 10535 // float compare and set condition codes in EFLAGS by XMM regs
11175 instruct cmpX_ccmem(eFlagsRegU cr, regX dst, memory src, eAXRegI rax) %{ 10536 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
11176 predicate(UseSSE>=1); 10537 predicate(UseSSE>=1);
11177 match(Set cr (CmpF dst (LoadF src))); 10538 match(Set cr (CmpF src1 (LoadF src2)));
11178 effect(KILL rax);
11179 ins_cost(165); 10539 ins_cost(165);
11180 format %{ "COMISS $dst,$src\n" 10540 format %{ "UCOMISS $src1,$src2\n\t"
11181 "\tJNP exit\n" 10541 "JNP,s exit\n\t"
11182 "\tMOV ah,1 // saw a NaN, set CF\n" 10542 "PUSHF\t# saw NaN, set CF\n\t"
11183 "\tSAHF\n" 10543 "AND [rsp], #0xffffff2b\n\t"
11184 "exit:\tNOP // avoid branch to branch" %} 10544 "POPF\n"
11185 opcode(0x0F, 0x2F); 10545 "exit:" %}
11186 ins_encode(OpcP, OpcS, RegMem(dst, src), cmpF_P6_fixup); 10546 ins_encode %{
10547 __ ucomiss($src1$$XMMRegister, $src2$$Address);
10548 emit_cmpfp_fixup(_masm);
10549 %}
11187 ins_pipe( pipe_slow ); 10550 ins_pipe( pipe_slow );
11188 %} 10551 %}
11189 10552
11190 instruct cmpX_ccmemCF(eFlagsRegUCF cr, regX dst, memory src) %{ 10553 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
11191 predicate(UseSSE>=1); 10554 predicate(UseSSE>=1);
11192 match(Set cr (CmpF dst (LoadF src))); 10555 match(Set cr (CmpF src1 (LoadF src2)));
11193 ins_cost(100); 10556 ins_cost(100);
11194 format %{ "COMISS $dst,$src" %} 10557 format %{ "UCOMISS $src1,$src2" %}
11195 opcode(0x0F, 0x2F); 10558 ins_encode %{
11196 ins_encode(OpcP, OpcS, RegMem(dst, src)); 10559 __ ucomiss($src1$$XMMRegister, $src2$$Address);
10560 %}
11197 ins_pipe( pipe_slow ); 10561 ins_pipe( pipe_slow );
11198 %} 10562 %}
11199 10563
11200 // Compare into -1,0,1 in XMM 10564 // Compare into -1,0,1 in XMM
11201 instruct cmpX_reg(eRegI dst, regX src1, regX src2, eFlagsReg cr) %{ 10565 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
11202 predicate(UseSSE>=1); 10566 predicate(UseSSE>=1);
11203 match(Set dst (CmpF3 src1 src2)); 10567 match(Set dst (CmpF3 src1 src2));
11204 effect(KILL cr); 10568 effect(KILL cr);
11205 ins_cost(255); 10569 ins_cost(255);
11206 format %{ "XOR $dst,$dst\n" 10570 format %{ "UCOMISS $src1, $src2\n\t"
11207 "\tCOMISS $src1,$src2\n" 10571 "MOV $dst, #-1\n\t"
11208 "\tJP,s nan\n" 10572 "JP,s done\n\t"
11209 "\tJEQ,s exit\n" 10573 "JB,s done\n\t"
11210 "\tJA,s inc\n" 10574 "SETNE $dst\n\t"
11211 "nan:\tDEC $dst\n" 10575 "MOVZB $dst, $dst\n"
11212 "\tJMP,s exit\n" 10576 "done:" %}
11213 "inc:\tINC $dst\n" 10577 ins_encode %{
11214 "exit:" 10578 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
11215 %} 10579 emit_cmpfp3(_masm, $dst$$Register);
11216 opcode(0x0F, 0x2F); 10580 %}
11217 ins_encode(Xor_Reg(dst), OpcP, OpcS, RegReg(src1, src2), CmpX_Result(dst));
11218 ins_pipe( pipe_slow ); 10581 ins_pipe( pipe_slow );
11219 %} 10582 %}
11220 10583
11221 // Compare into -1,0,1 in XMM and memory 10584 // Compare into -1,0,1 in XMM and memory
11222 instruct cmpX_regmem(eRegI dst, regX src1, memory mem, eFlagsReg cr) %{ 10585 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
11223 predicate(UseSSE>=1); 10586 predicate(UseSSE>=1);
11224 match(Set dst (CmpF3 src1 (LoadF mem))); 10587 match(Set dst (CmpF3 src1 (LoadF src2)));
11225 effect(KILL cr); 10588 effect(KILL cr);
11226 ins_cost(275); 10589 ins_cost(275);
11227 format %{ "COMISS $src1,$mem\n" 10590 format %{ "UCOMISS $src1, $src2\n\t"
11228 "\tMOV $dst,0\t\t# do not blow flags\n" 10591 "MOV $dst, #-1\n\t"
11229 "\tJP,s nan\n" 10592 "JP,s done\n\t"
11230 "\tJEQ,s exit\n" 10593 "JB,s done\n\t"
11231 "\tJA,s inc\n" 10594 "SETNE $dst\n\t"
11232 "nan:\tDEC $dst\n" 10595 "MOVZB $dst, $dst\n"
11233 "\tJMP,s exit\n" 10596 "done:" %}
11234 "inc:\tINC $dst\n" 10597 ins_encode %{
11235 "exit:" 10598 __ ucomiss($src1$$XMMRegister, $src2$$Address);
11236 %} 10599 emit_cmpfp3(_masm, $dst$$Register);
11237 opcode(0x0F, 0x2F); 10600 %}
11238 ins_encode(OpcP, OpcS, RegMem(src1, mem), LdImmI(dst,0x0), CmpX_Result(dst));
11239 ins_pipe( pipe_slow ); 10601 ins_pipe( pipe_slow );
11240 %} 10602 %}
11241 10603
11242 // Spill to obtain 24-bit precision 10604 // Spill to obtain 24-bit precision
11243 instruct subF24_reg(stackSlotF dst, regF src1, regF src2) %{ 10605 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
11244 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10606 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11245 match(Set dst (SubF src1 src2)); 10607 match(Set dst (SubF src1 src2));
11246 10608
11247 format %{ "FSUB $dst,$src1 - $src2" %} 10609 format %{ "FSUB $dst,$src1 - $src2" %}
11248 opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */ 10610 opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
11249 ins_encode( Push_Reg_F(src1), 10611 ins_encode( Push_Reg_FPR(src1),
11250 OpcReg_F(src2), 10612 OpcReg_FPR(src2),
11251 Pop_Mem_F(dst) ); 10613 Pop_Mem_FPR(dst) );
11252 ins_pipe( fpu_mem_reg_reg ); 10614 ins_pipe( fpu_mem_reg_reg );
11253 %} 10615 %}
11254 // 10616 //
11255 // This instruction does not round to 24-bits 10617 // This instruction does not round to 24-bits
11256 instruct subF_reg(regF dst, regF src) %{ 10618 instruct subFPR_reg(regFPR dst, regFPR src) %{
11257 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10619 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11258 match(Set dst (SubF dst src)); 10620 match(Set dst (SubF dst src));
11259 10621
11260 format %{ "FSUB $dst,$src" %} 10622 format %{ "FSUB $dst,$src" %}
11261 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 10623 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */
11262 ins_encode( Push_Reg_F(src), 10624 ins_encode( Push_Reg_FPR(src),
11263 OpcP, RegOpc(dst) ); 10625 OpcP, RegOpc(dst) );
11264 ins_pipe( fpu_reg_reg ); 10626 ins_pipe( fpu_reg_reg );
11265 %} 10627 %}
11266 10628
11267 // Spill to obtain 24-bit precision 10629 // Spill to obtain 24-bit precision
11268 instruct addF24_reg(stackSlotF dst, regF src1, regF src2) %{ 10630 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
11269 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10631 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11270 match(Set dst (AddF src1 src2)); 10632 match(Set dst (AddF src1 src2));
11271 10633
11272 format %{ "FADD $dst,$src1,$src2" %} 10634 format %{ "FADD $dst,$src1,$src2" %}
11273 opcode(0xD8, 0x0); /* D8 C0+i */ 10635 opcode(0xD8, 0x0); /* D8 C0+i */
11274 ins_encode( Push_Reg_F(src2), 10636 ins_encode( Push_Reg_FPR(src2),
11275 OpcReg_F(src1), 10637 OpcReg_FPR(src1),
11276 Pop_Mem_F(dst) ); 10638 Pop_Mem_FPR(dst) );
11277 ins_pipe( fpu_mem_reg_reg ); 10639 ins_pipe( fpu_mem_reg_reg );
11278 %} 10640 %}
11279 // 10641 //
11280 // This instruction does not round to 24-bits 10642 // This instruction does not round to 24-bits
11281 instruct addF_reg(regF dst, regF src) %{ 10643 instruct addFPR_reg(regFPR dst, regFPR src) %{
11282 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10644 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11283 match(Set dst (AddF dst src)); 10645 match(Set dst (AddF dst src));
11284 10646
11285 format %{ "FLD $src\n\t" 10647 format %{ "FLD $src\n\t"
11286 "FADDp $dst,ST" %} 10648 "FADDp $dst,ST" %}
11287 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 10649 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
11288 ins_encode( Push_Reg_F(src), 10650 ins_encode( Push_Reg_FPR(src),
11289 OpcP, RegOpc(dst) ); 10651 OpcP, RegOpc(dst) );
11290 ins_pipe( fpu_reg_reg ); 10652 ins_pipe( fpu_reg_reg );
11291 %} 10653 %}
11292 10654
11293 // Add two single precision floating point values in xmm 10655 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
11294 instruct addX_reg(regX dst, regX src) %{
11295 predicate(UseSSE>=1);
11296 match(Set dst (AddF dst src));
11297 format %{ "ADDSS $dst,$src" %}
11298 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x58), RegReg(dst, src));
11299 ins_pipe( pipe_slow );
11300 %}
11301
11302 instruct addX_imm(regX dst, immXF con) %{
11303 predicate(UseSSE>=1);
11304 match(Set dst (AddF dst con));
11305 format %{ "ADDSS $dst,[$constantaddress]\t# load from constant table: float=$con" %}
11306 ins_encode %{
11307 __ addss($dst$$XMMRegister, $constantaddress($con));
11308 %}
11309 ins_pipe(pipe_slow);
11310 %}
11311
11312 instruct addX_mem(regX dst, memory mem) %{
11313 predicate(UseSSE>=1);
11314 match(Set dst (AddF dst (LoadF mem)));
11315 format %{ "ADDSS $dst,$mem" %}
11316 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x58), RegMem(dst, mem));
11317 ins_pipe( pipe_slow );
11318 %}
11319
11320 // Subtract two single precision floating point values in xmm
11321 instruct subX_reg(regX dst, regX src) %{
11322 predicate(UseSSE>=1);
11323 match(Set dst (SubF dst src));
11324 format %{ "SUBSS $dst,$src" %}
11325 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5C), RegReg(dst, src));
11326 ins_pipe( pipe_slow );
11327 %}
11328
11329 instruct subX_imm(regX dst, immXF con) %{
11330 predicate(UseSSE>=1);
11331 match(Set dst (SubF dst con));
11332 format %{ "SUBSS $dst,[$constantaddress]\t# load from constant table: float=$con" %}
11333 ins_encode %{
11334 __ subss($dst$$XMMRegister, $constantaddress($con));
11335 %}
11336 ins_pipe(pipe_slow);
11337 %}
11338
11339 instruct subX_mem(regX dst, memory mem) %{
11340 predicate(UseSSE>=1);
11341 match(Set dst (SubF dst (LoadF mem)));
11342 format %{ "SUBSS $dst,$mem" %}
11343 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5C), RegMem(dst,mem));
11344 ins_pipe( pipe_slow );
11345 %}
11346
11347 // Multiply two single precision floating point values in xmm
11348 instruct mulX_reg(regX dst, regX src) %{
11349 predicate(UseSSE>=1);
11350 match(Set dst (MulF dst src));
11351 format %{ "MULSS $dst,$src" %}
11352 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x59), RegReg(dst, src));
11353 ins_pipe( pipe_slow );
11354 %}
11355
11356 instruct mulX_imm(regX dst, immXF con) %{
11357 predicate(UseSSE>=1);
11358 match(Set dst (MulF dst con));
11359 format %{ "MULSS $dst,[$constantaddress]\t# load from constant table: float=$con" %}
11360 ins_encode %{
11361 __ mulss($dst$$XMMRegister, $constantaddress($con));
11362 %}
11363 ins_pipe(pipe_slow);
11364 %}
11365
11366 instruct mulX_mem(regX dst, memory mem) %{
11367 predicate(UseSSE>=1);
11368 match(Set dst (MulF dst (LoadF mem)));
11369 format %{ "MULSS $dst,$mem" %}
11370 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x59), RegMem(dst,mem));
11371 ins_pipe( pipe_slow );
11372 %}
11373
11374 // Divide two single precision floating point values in xmm
11375 instruct divX_reg(regX dst, regX src) %{
11376 predicate(UseSSE>=1);
11377 match(Set dst (DivF dst src));
11378 format %{ "DIVSS $dst,$src" %}
11379 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5E), RegReg(dst, src));
11380 ins_pipe( pipe_slow );
11381 %}
11382
11383 instruct divX_imm(regX dst, immXF con) %{
11384 predicate(UseSSE>=1);
11385 match(Set dst (DivF dst con));
11386 format %{ "DIVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %}
11387 ins_encode %{
11388 __ divss($dst$$XMMRegister, $constantaddress($con));
11389 %}
11390 ins_pipe(pipe_slow);
11391 %}
11392
11393 instruct divX_mem(regX dst, memory mem) %{
11394 predicate(UseSSE>=1);
11395 match(Set dst (DivF dst (LoadF mem)));
11396 format %{ "DIVSS $dst,$mem" %}
11397 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5E), RegMem(dst,mem));
11398 ins_pipe( pipe_slow );
11399 %}
11400
11401 // Get the square root of a single precision floating point values in xmm
11402 instruct sqrtX_reg(regX dst, regX src) %{
11403 predicate(UseSSE>=1);
11404 match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
11405 format %{ "SQRTSS $dst,$src" %}
11406 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x51), RegReg(dst, src));
11407 ins_pipe( pipe_slow );
11408 %}
11409
11410 instruct sqrtX_mem(regX dst, memory mem) %{
11411 predicate(UseSSE>=1);
11412 match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF mem)))));
11413 format %{ "SQRTSS $dst,$mem" %}
11414 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x51), RegMem(dst, mem));
11415 ins_pipe( pipe_slow );
11416 %}
11417
11418 // Get the square root of a double precision floating point values in xmm
11419 instruct sqrtXD_reg(regXD dst, regXD src) %{
11420 predicate(UseSSE>=2);
11421 match(Set dst (SqrtD src));
11422 format %{ "SQRTSD $dst,$src" %}
11423 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x51), RegReg(dst, src));
11424 ins_pipe( pipe_slow );
11425 %}
11426
11427 instruct sqrtXD_mem(regXD dst, memory mem) %{
11428 predicate(UseSSE>=2);
11429 match(Set dst (SqrtD (LoadD mem)));
11430 format %{ "SQRTSD $dst,$mem" %}
11431 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x51), RegMem(dst, mem));
11432 ins_pipe( pipe_slow );
11433 %}
11434
11435 instruct absF_reg(regFPR1 dst, regFPR1 src) %{
11436 predicate(UseSSE==0); 10656 predicate(UseSSE==0);
11437 match(Set dst (AbsF src)); 10657 match(Set dst (AbsF src));
11438 ins_cost(100); 10658 ins_cost(100);
11439 format %{ "FABS" %} 10659 format %{ "FABS" %}
11440 opcode(0xE1, 0xD9); 10660 opcode(0xE1, 0xD9);
11441 ins_encode( OpcS, OpcP ); 10661 ins_encode( OpcS, OpcP );
11442 ins_pipe( fpu_reg_reg ); 10662 ins_pipe( fpu_reg_reg );
11443 %} 10663 %}
11444 10664
11445 instruct absX_reg(regX dst ) %{ 10665 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
11446 predicate(UseSSE>=1);
11447 match(Set dst (AbsF dst));
11448 format %{ "ANDPS $dst,[0x7FFFFFFF]\t# ABS F by sign masking" %}
11449 ins_encode( AbsXF_encoding(dst));
11450 ins_pipe( pipe_slow );
11451 %}
11452
11453 instruct negF_reg(regFPR1 dst, regFPR1 src) %{
11454 predicate(UseSSE==0); 10666 predicate(UseSSE==0);
11455 match(Set dst (NegF src)); 10667 match(Set dst (NegF src));
11456 ins_cost(100); 10668 ins_cost(100);
11457 format %{ "FCHS" %} 10669 format %{ "FCHS" %}
11458 opcode(0xE0, 0xD9); 10670 opcode(0xE0, 0xD9);
11459 ins_encode( OpcS, OpcP ); 10671 ins_encode( OpcS, OpcP );
11460 ins_pipe( fpu_reg_reg ); 10672 ins_pipe( fpu_reg_reg );
11461 %} 10673 %}
11462 10674
11463 instruct negX_reg( regX dst ) %{ 10675 // Cisc-alternate to addFPR_reg
11464 predicate(UseSSE>=1);
11465 match(Set dst (NegF dst));
11466 format %{ "XORPS $dst,[0x80000000]\t# CHS F by sign flipping" %}
11467 ins_encode( NegXF_encoding(dst));
11468 ins_pipe( pipe_slow );
11469 %}
11470
11471 // Cisc-alternate to addF_reg
11472 // Spill to obtain 24-bit precision 10676 // Spill to obtain 24-bit precision
11473 instruct addF24_reg_mem(stackSlotF dst, regF src1, memory src2) %{ 10677 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
11474 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10678 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11475 match(Set dst (AddF src1 (LoadF src2))); 10679 match(Set dst (AddF src1 (LoadF src2)));
11476 10680
11477 format %{ "FLD $src2\n\t" 10681 format %{ "FLD $src2\n\t"
11478 "FADD ST,$src1\n\t" 10682 "FADD ST,$src1\n\t"
11479 "FSTP_S $dst" %} 10683 "FSTP_S $dst" %}
11480 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10684 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */
11481 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10685 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11482 OpcReg_F(src1), 10686 OpcReg_FPR(src1),
11483 Pop_Mem_F(dst) ); 10687 Pop_Mem_FPR(dst) );
11484 ins_pipe( fpu_mem_reg_mem ); 10688 ins_pipe( fpu_mem_reg_mem );
11485 %} 10689 %}
11486 // 10690 //
11487 // Cisc-alternate to addF_reg 10691 // Cisc-alternate to addFPR_reg
11488 // This instruction does not round to 24-bits 10692 // This instruction does not round to 24-bits
11489 instruct addF_reg_mem(regF dst, memory src) %{ 10693 instruct addFPR_reg_mem(regFPR dst, memory src) %{
11490 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10694 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11491 match(Set dst (AddF dst (LoadF src))); 10695 match(Set dst (AddF dst (LoadF src)));
11492 10696
11493 format %{ "FADD $dst,$src" %} 10697 format %{ "FADD $dst,$src" %}
11494 opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/ /* LoadF D9 /0 */ 10698 opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/ /* LoadF D9 /0 */
11497 ins_pipe( fpu_reg_mem ); 10701 ins_pipe( fpu_reg_mem );
11498 %} 10702 %}
11499 10703
11500 // // Following two instructions for _222_mpegaudio 10704 // // Following two instructions for _222_mpegaudio
11501 // Spill to obtain 24-bit precision 10705 // Spill to obtain 24-bit precision
11502 instruct addF24_mem_reg(stackSlotF dst, regF src2, memory src1 ) %{ 10706 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
11503 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10707 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11504 match(Set dst (AddF src1 src2)); 10708 match(Set dst (AddF src1 src2));
11505 10709
11506 format %{ "FADD $dst,$src1,$src2" %} 10710 format %{ "FADD $dst,$src1,$src2" %}
11507 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10711 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */
11508 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1), 10712 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
11509 OpcReg_F(src2), 10713 OpcReg_FPR(src2),
11510 Pop_Mem_F(dst) ); 10714 Pop_Mem_FPR(dst) );
11511 ins_pipe( fpu_mem_reg_mem ); 10715 ins_pipe( fpu_mem_reg_mem );
11512 %} 10716 %}
11513 10717
11514 // Cisc-spill variant 10718 // Cisc-spill variant
11515 // Spill to obtain 24-bit precision 10719 // Spill to obtain 24-bit precision
11516 instruct addF24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{ 10720 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
11517 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10721 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11518 match(Set dst (AddF src1 (LoadF src2))); 10722 match(Set dst (AddF src1 (LoadF src2)));
11519 10723
11520 format %{ "FADD $dst,$src1,$src2 cisc" %} 10724 format %{ "FADD $dst,$src1,$src2 cisc" %}
11521 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10725 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */
11522 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10726 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11523 set_instruction_start, 10727 set_instruction_start,
11524 OpcP, RMopc_Mem(secondary,src1), 10728 OpcP, RMopc_Mem(secondary,src1),
11525 Pop_Mem_F(dst) ); 10729 Pop_Mem_FPR(dst) );
11526 ins_pipe( fpu_mem_mem_mem ); 10730 ins_pipe( fpu_mem_mem_mem );
11527 %} 10731 %}
11528 10732
11529 // Spill to obtain 24-bit precision 10733 // Spill to obtain 24-bit precision
11530 instruct addF24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10734 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
11531 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10735 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11532 match(Set dst (AddF src1 src2)); 10736 match(Set dst (AddF src1 src2));
11533 10737
11534 format %{ "FADD $dst,$src1,$src2" %} 10738 format %{ "FADD $dst,$src1,$src2" %}
11535 opcode(0xD8, 0x0, 0xD9); /* D8 /0 */ /* LoadF D9 /0 */ 10739 opcode(0xD8, 0x0, 0xD9); /* D8 /0 */ /* LoadF D9 /0 */
11536 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10740 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11537 set_instruction_start, 10741 set_instruction_start,
11538 OpcP, RMopc_Mem(secondary,src1), 10742 OpcP, RMopc_Mem(secondary,src1),
11539 Pop_Mem_F(dst) ); 10743 Pop_Mem_FPR(dst) );
11540 ins_pipe( fpu_mem_mem_mem ); 10744 ins_pipe( fpu_mem_mem_mem );
11541 %} 10745 %}
11542 10746
11543 10747
11544 // Spill to obtain 24-bit precision 10748 // Spill to obtain 24-bit precision
11545 instruct addF24_reg_imm(stackSlotF dst, regF src, immF con) %{ 10749 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
11546 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10750 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11547 match(Set dst (AddF src con)); 10751 match(Set dst (AddF src con));
11548 format %{ "FLD $src\n\t" 10752 format %{ "FLD $src\n\t"
11549 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10753 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
11550 "FSTP_S $dst" %} 10754 "FSTP_S $dst" %}
11555 %} 10759 %}
11556 ins_pipe(fpu_mem_reg_con); 10760 ins_pipe(fpu_mem_reg_con);
11557 %} 10761 %}
11558 // 10762 //
11559 // This instruction does not round to 24-bits 10763 // This instruction does not round to 24-bits
11560 instruct addF_reg_imm(regF dst, regF src, immF con) %{ 10764 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
11561 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10765 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11562 match(Set dst (AddF src con)); 10766 match(Set dst (AddF src con));
11563 format %{ "FLD $src\n\t" 10767 format %{ "FLD $src\n\t"
11564 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10768 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
11565 "FSTP $dst" %} 10769 "FSTP $dst" %}
11570 %} 10774 %}
11571 ins_pipe(fpu_reg_reg_con); 10775 ins_pipe(fpu_reg_reg_con);
11572 %} 10776 %}
11573 10777
11574 // Spill to obtain 24-bit precision 10778 // Spill to obtain 24-bit precision
11575 instruct mulF24_reg(stackSlotF dst, regF src1, regF src2) %{ 10779 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
11576 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10780 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11577 match(Set dst (MulF src1 src2)); 10781 match(Set dst (MulF src1 src2));
11578 10782
11579 format %{ "FLD $src1\n\t" 10783 format %{ "FLD $src1\n\t"
11580 "FMUL $src2\n\t" 10784 "FMUL $src2\n\t"
11581 "FSTP_S $dst" %} 10785 "FSTP_S $dst" %}
11582 opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */ 10786 opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
11583 ins_encode( Push_Reg_F(src1), 10787 ins_encode( Push_Reg_FPR(src1),
11584 OpcReg_F(src2), 10788 OpcReg_FPR(src2),
11585 Pop_Mem_F(dst) ); 10789 Pop_Mem_FPR(dst) );
11586 ins_pipe( fpu_mem_reg_reg ); 10790 ins_pipe( fpu_mem_reg_reg );
11587 %} 10791 %}
11588 // 10792 //
11589 // This instruction does not round to 24-bits 10793 // This instruction does not round to 24-bits
11590 instruct mulF_reg(regF dst, regF src1, regF src2) %{ 10794 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
11591 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10795 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11592 match(Set dst (MulF src1 src2)); 10796 match(Set dst (MulF src1 src2));
11593 10797
11594 format %{ "FLD $src1\n\t" 10798 format %{ "FLD $src1\n\t"
11595 "FMUL $src2\n\t" 10799 "FMUL $src2\n\t"
11596 "FSTP_S $dst" %} 10800 "FSTP_S $dst" %}
11597 opcode(0xD8, 0x1); /* D8 C8+i */ 10801 opcode(0xD8, 0x1); /* D8 C8+i */
11598 ins_encode( Push_Reg_F(src2), 10802 ins_encode( Push_Reg_FPR(src2),
11599 OpcReg_F(src1), 10803 OpcReg_FPR(src1),
11600 Pop_Reg_F(dst) ); 10804 Pop_Reg_FPR(dst) );
11601 ins_pipe( fpu_reg_reg_reg ); 10805 ins_pipe( fpu_reg_reg_reg );
11602 %} 10806 %}
11603 10807
11604 10808
11605 // Spill to obtain 24-bit precision 10809 // Spill to obtain 24-bit precision
11606 // Cisc-alternate to reg-reg multiply 10810 // Cisc-alternate to reg-reg multiply
11607 instruct mulF24_reg_mem(stackSlotF dst, regF src1, memory src2) %{ 10811 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
11608 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10812 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11609 match(Set dst (MulF src1 (LoadF src2))); 10813 match(Set dst (MulF src1 (LoadF src2)));
11610 10814
11611 format %{ "FLD_S $src2\n\t" 10815 format %{ "FLD_S $src2\n\t"
11612 "FMUL $src1\n\t" 10816 "FMUL $src1\n\t"
11613 "FSTP_S $dst" %} 10817 "FSTP_S $dst" %}
11614 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */ 10818 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */
11615 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10819 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11616 OpcReg_F(src1), 10820 OpcReg_FPR(src1),
11617 Pop_Mem_F(dst) ); 10821 Pop_Mem_FPR(dst) );
11618 ins_pipe( fpu_mem_reg_mem ); 10822 ins_pipe( fpu_mem_reg_mem );
11619 %} 10823 %}
11620 // 10824 //
11621 // This instruction does not round to 24-bits 10825 // This instruction does not round to 24-bits
11622 // Cisc-alternate to reg-reg multiply 10826 // Cisc-alternate to reg-reg multiply
11623 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 10827 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
11624 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10828 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11625 match(Set dst (MulF src1 (LoadF src2))); 10829 match(Set dst (MulF src1 (LoadF src2)));
11626 10830
11627 format %{ "FMUL $dst,$src1,$src2" %} 10831 format %{ "FMUL $dst,$src1,$src2" %}
11628 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */ 10832 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */
11629 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10833 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11630 OpcReg_F(src1), 10834 OpcReg_FPR(src1),
11631 Pop_Reg_F(dst) ); 10835 Pop_Reg_FPR(dst) );
11632 ins_pipe( fpu_reg_reg_mem ); 10836 ins_pipe( fpu_reg_reg_mem );
11633 %} 10837 %}
11634 10838
11635 // Spill to obtain 24-bit precision 10839 // Spill to obtain 24-bit precision
11636 instruct mulF24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10840 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
11637 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10841 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11638 match(Set dst (MulF src1 src2)); 10842 match(Set dst (MulF src1 src2));
11639 10843
11640 format %{ "FMUL $dst,$src1,$src2" %} 10844 format %{ "FMUL $dst,$src1,$src2" %}
11641 opcode(0xD8, 0x1, 0xD9); /* D8 /1 */ /* LoadF D9 /0 */ 10845 opcode(0xD8, 0x1, 0xD9); /* D8 /1 */ /* LoadF D9 /0 */
11642 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10846 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11643 set_instruction_start, 10847 set_instruction_start,
11644 OpcP, RMopc_Mem(secondary,src1), 10848 OpcP, RMopc_Mem(secondary,src1),
11645 Pop_Mem_F(dst) ); 10849 Pop_Mem_FPR(dst) );
11646 ins_pipe( fpu_mem_mem_mem ); 10850 ins_pipe( fpu_mem_mem_mem );
11647 %} 10851 %}
11648 10852
11649 // Spill to obtain 24-bit precision 10853 // Spill to obtain 24-bit precision
11650 instruct mulF24_reg_imm(stackSlotF dst, regF src, immF con) %{ 10854 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
11651 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10855 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11652 match(Set dst (MulF src con)); 10856 match(Set dst (MulF src con));
11653 10857
11654 format %{ "FLD $src\n\t" 10858 format %{ "FLD $src\n\t"
11655 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10859 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
11661 %} 10865 %}
11662 ins_pipe(fpu_mem_reg_con); 10866 ins_pipe(fpu_mem_reg_con);
11663 %} 10867 %}
11664 // 10868 //
11665 // This instruction does not round to 24-bits 10869 // This instruction does not round to 24-bits
11666 instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 10870 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
11667 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10871 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11668 match(Set dst (MulF src con)); 10872 match(Set dst (MulF src con));
11669 10873
11670 format %{ "FLD $src\n\t" 10874 format %{ "FLD $src\n\t"
11671 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10875 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
11678 ins_pipe(fpu_reg_reg_con); 10882 ins_pipe(fpu_reg_reg_con);
11679 %} 10883 %}
11680 10884
11681 10885
11682 // 10886 //
11683 // MACRO1 -- subsume unshared load into mulF 10887 // MACRO1 -- subsume unshared load into mulFPR
11684 // This instruction does not round to 24-bits 10888 // This instruction does not round to 24-bits
11685 instruct mulF_reg_load1(regF dst, regF src, memory mem1 ) %{ 10889 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
11686 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10890 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11687 match(Set dst (MulF (LoadF mem1) src)); 10891 match(Set dst (MulF (LoadF mem1) src));
11688 10892
11689 format %{ "FLD $mem1 ===MACRO1===\n\t" 10893 format %{ "FLD $mem1 ===MACRO1===\n\t"
11690 "FMUL ST,$src\n\t" 10894 "FMUL ST,$src\n\t"
11691 "FSTP $dst" %} 10895 "FSTP $dst" %}
11692 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */ 10896 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */
11693 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1), 10897 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
11694 OpcReg_F(src), 10898 OpcReg_FPR(src),
11695 Pop_Reg_F(dst) ); 10899 Pop_Reg_FPR(dst) );
11696 ins_pipe( fpu_reg_reg_mem ); 10900 ins_pipe( fpu_reg_reg_mem );
11697 %} 10901 %}
11698 // 10902 //
11699 // MACRO2 -- addF a mulF which subsumed an unshared load 10903 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load
11700 // This instruction does not round to 24-bits 10904 // This instruction does not round to 24-bits
11701 instruct addF_mulF_reg_load1(regF dst, memory mem1, regF src1, regF src2) %{ 10905 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
11702 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10906 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11703 match(Set dst (AddF (MulF (LoadF mem1) src1) src2)); 10907 match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
11704 ins_cost(95); 10908 ins_cost(95);
11705 10909
11706 format %{ "FLD $mem1 ===MACRO2===\n\t" 10910 format %{ "FLD $mem1 ===MACRO2===\n\t"
11707 "FMUL ST,$src1 subsume mulF left load\n\t" 10911 "FMUL ST,$src1 subsume mulFPR left load\n\t"
11708 "FADD ST,$src2\n\t" 10912 "FADD ST,$src2\n\t"
11709 "FSTP $dst" %} 10913 "FSTP $dst" %}
11710 opcode(0xD9); /* LoadF D9 /0 */ 10914 opcode(0xD9); /* LoadF D9 /0 */
11711 ins_encode( OpcP, RMopc_Mem(0x00,mem1), 10915 ins_encode( OpcP, RMopc_Mem(0x00,mem1),
11712 FMul_ST_reg(src1), 10916 FMul_ST_reg(src1),
11713 FAdd_ST_reg(src2), 10917 FAdd_ST_reg(src2),
11714 Pop_Reg_F(dst) ); 10918 Pop_Reg_FPR(dst) );
11715 ins_pipe( fpu_reg_mem_reg_reg ); 10919 ins_pipe( fpu_reg_mem_reg_reg );
11716 %} 10920 %}
11717 10921
11718 // MACRO3 -- addF a mulF 10922 // MACRO3 -- addFPR a mulFPR
11719 // This instruction does not round to 24-bits. It is a '2-address' 10923 // This instruction does not round to 24-bits. It is a '2-address'
11720 // instruction in that the result goes back to src2. This eliminates 10924 // instruction in that the result goes back to src2. This eliminates
11721 // a move from the macro; possibly the register allocator will have 10925 // a move from the macro; possibly the register allocator will have
11722 // to add it back (and maybe not). 10926 // to add it back (and maybe not).
11723 instruct addF_mulF_reg(regF src2, regF src1, regF src0) %{ 10927 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
11724 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10928 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11725 match(Set src2 (AddF (MulF src0 src1) src2)); 10929 match(Set src2 (AddF (MulF src0 src1) src2));
11726 10930
11727 format %{ "FLD $src0 ===MACRO3===\n\t" 10931 format %{ "FLD $src0 ===MACRO3===\n\t"
11728 "FMUL ST,$src1\n\t" 10932 "FMUL ST,$src1\n\t"
11729 "FADDP $src2,ST" %} 10933 "FADDP $src2,ST" %}
11730 opcode(0xD9); /* LoadF D9 /0 */ 10934 opcode(0xD9); /* LoadF D9 /0 */
11731 ins_encode( Push_Reg_F(src0), 10935 ins_encode( Push_Reg_FPR(src0),
11732 FMul_ST_reg(src1), 10936 FMul_ST_reg(src1),
11733 FAddP_reg_ST(src2) ); 10937 FAddP_reg_ST(src2) );
11734 ins_pipe( fpu_reg_reg_reg ); 10938 ins_pipe( fpu_reg_reg_reg );
11735 %} 10939 %}
11736 10940
11737 // MACRO4 -- divF subF 10941 // MACRO4 -- divFPR subFPR
11738 // This instruction does not round to 24-bits 10942 // This instruction does not round to 24-bits
11739 instruct subF_divF_reg(regF dst, regF src1, regF src2, regF src3) %{ 10943 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
11740 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10944 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11741 match(Set dst (DivF (SubF src2 src1) src3)); 10945 match(Set dst (DivF (SubF src2 src1) src3));
11742 10946
11743 format %{ "FLD $src2 ===MACRO4===\n\t" 10947 format %{ "FLD $src2 ===MACRO4===\n\t"
11744 "FSUB ST,$src1\n\t" 10948 "FSUB ST,$src1\n\t"
11745 "FDIV ST,$src3\n\t" 10949 "FDIV ST,$src3\n\t"
11746 "FSTP $dst" %} 10950 "FSTP $dst" %}
11747 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10951 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
11748 ins_encode( Push_Reg_F(src2), 10952 ins_encode( Push_Reg_FPR(src2),
11749 subF_divF_encode(src1,src3), 10953 subFPR_divFPR_encode(src1,src3),
11750 Pop_Reg_F(dst) ); 10954 Pop_Reg_FPR(dst) );
11751 ins_pipe( fpu_reg_reg_reg_reg ); 10955 ins_pipe( fpu_reg_reg_reg_reg );
11752 %} 10956 %}
11753 10957
11754 // Spill to obtain 24-bit precision 10958 // Spill to obtain 24-bit precision
11755 instruct divF24_reg(stackSlotF dst, regF src1, regF src2) %{ 10959 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
11756 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10960 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11757 match(Set dst (DivF src1 src2)); 10961 match(Set dst (DivF src1 src2));
11758 10962
11759 format %{ "FDIV $dst,$src1,$src2" %} 10963 format %{ "FDIV $dst,$src1,$src2" %}
11760 opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/ 10964 opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
11761 ins_encode( Push_Reg_F(src1), 10965 ins_encode( Push_Reg_FPR(src1),
11762 OpcReg_F(src2), 10966 OpcReg_FPR(src2),
11763 Pop_Mem_F(dst) ); 10967 Pop_Mem_FPR(dst) );
11764 ins_pipe( fpu_mem_reg_reg ); 10968 ins_pipe( fpu_mem_reg_reg );
11765 %} 10969 %}
11766 // 10970 //
11767 // This instruction does not round to 24-bits 10971 // This instruction does not round to 24-bits
11768 instruct divF_reg(regF dst, regF src) %{ 10972 instruct divFPR_reg(regFPR dst, regFPR src) %{
11769 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10973 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11770 match(Set dst (DivF dst src)); 10974 match(Set dst (DivF dst src));
11771 10975
11772 format %{ "FDIV $dst,$src" %} 10976 format %{ "FDIV $dst,$src" %}
11773 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10977 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
11774 ins_encode( Push_Reg_F(src), 10978 ins_encode( Push_Reg_FPR(src),
11775 OpcP, RegOpc(dst) ); 10979 OpcP, RegOpc(dst) );
11776 ins_pipe( fpu_reg_reg ); 10980 ins_pipe( fpu_reg_reg );
11777 %} 10981 %}
11778 10982
11779 10983
11780 // Spill to obtain 24-bit precision 10984 // Spill to obtain 24-bit precision
11781 instruct modF24_reg(stackSlotF dst, regF src1, regF src2, eAXRegI rax, eFlagsReg cr) %{ 10985 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
11782 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10986 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11783 match(Set dst (ModF src1 src2)); 10987 match(Set dst (ModF src1 src2));
11784 effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS 10988 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
11785 10989
11786 format %{ "FMOD $dst,$src1,$src2" %} 10990 format %{ "FMOD $dst,$src1,$src2" %}
11787 ins_encode( Push_Reg_Mod_D(src1, src2), 10991 ins_encode( Push_Reg_Mod_DPR(src1, src2),
11788 emitModD(), 10992 emitModDPR(),
11789 Push_Result_Mod_D(src2), 10993 Push_Result_Mod_DPR(src2),
11790 Pop_Mem_F(dst)); 10994 Pop_Mem_FPR(dst));
11791 ins_pipe( pipe_slow ); 10995 ins_pipe( pipe_slow );
11792 %} 10996 %}
11793 // 10997 //
11794 // This instruction does not round to 24-bits 10998 // This instruction does not round to 24-bits
11795 instruct modF_reg(regF dst, regF src, eAXRegI rax, eFlagsReg cr) %{ 10999 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
11796 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11000 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11797 match(Set dst (ModF dst src)); 11001 match(Set dst (ModF dst src));
11798 effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS 11002 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
11799 11003
11800 format %{ "FMOD $dst,$src" %} 11004 format %{ "FMOD $dst,$src" %}
11801 ins_encode(Push_Reg_Mod_D(dst, src), 11005 ins_encode(Push_Reg_Mod_DPR(dst, src),
11802 emitModD(), 11006 emitModDPR(),
11803 Push_Result_Mod_D(src), 11007 Push_Result_Mod_DPR(src),
11804 Pop_Reg_F(dst)); 11008 Pop_Reg_FPR(dst));
11805 ins_pipe( pipe_slow ); 11009 ins_pipe( pipe_slow );
11806 %} 11010 %}
11807 11011
11808 instruct modX_reg(regX dst, regX src0, regX src1, eAXRegI rax, eFlagsReg cr) %{ 11012 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
11809 predicate(UseSSE>=1); 11013 predicate(UseSSE>=1);
11810 match(Set dst (ModF src0 src1)); 11014 match(Set dst (ModF src0 src1));
11811 effect(KILL rax, KILL cr); 11015 effect(KILL rax, KILL cr);
11812 format %{ "SUB ESP,4\t # FMOD\n" 11016 format %{ "SUB ESP,4\t # FMOD\n"
11813 "\tMOVSS [ESP+0],$src1\n" 11017 "\tMOVSS [ESP+0],$src1\n"
11823 "\tMOVSS $dst,[ESP+0]\n" 11027 "\tMOVSS $dst,[ESP+0]\n"
11824 "\tADD ESP,4\n" 11028 "\tADD ESP,4\n"
11825 "\tFSTP ST0\t # Restore FPU Stack" 11029 "\tFSTP ST0\t # Restore FPU Stack"
11826 %} 11030 %}
11827 ins_cost(250); 11031 ins_cost(250);
11828 ins_encode( Push_ModX_encoding(src0, src1), emitModD(), Push_ResultX(dst,0x4), PopFPU); 11032 ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
11829 ins_pipe( pipe_slow ); 11033 ins_pipe( pipe_slow );
11830 %} 11034 %}
11831 11035
11832 11036
11833 //----------Arithmetic Conversion Instructions--------------------------------- 11037 //----------Arithmetic Conversion Instructions---------------------------------
11834 // The conversions operations are all Alpha sorted. Please keep it that way! 11038 // The conversions operations are all Alpha sorted. Please keep it that way!
11835 11039
11836 instruct roundFloat_mem_reg(stackSlotF dst, regF src) %{ 11040 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
11837 predicate(UseSSE==0); 11041 predicate(UseSSE==0);
11838 match(Set dst (RoundFloat src)); 11042 match(Set dst (RoundFloat src));
11839 ins_cost(125); 11043 ins_cost(125);
11840 format %{ "FST_S $dst,$src\t# F-round" %} 11044 format %{ "FST_S $dst,$src\t# F-round" %}
11841 ins_encode( Pop_Mem_Reg_F(dst, src) ); 11045 ins_encode( Pop_Mem_Reg_FPR(dst, src) );
11842 ins_pipe( fpu_mem_reg ); 11046 ins_pipe( fpu_mem_reg );
11843 %} 11047 %}
11844 11048
11845 instruct roundDouble_mem_reg(stackSlotD dst, regD src) %{ 11049 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
11846 predicate(UseSSE<=1); 11050 predicate(UseSSE<=1);
11847 match(Set dst (RoundDouble src)); 11051 match(Set dst (RoundDouble src));
11848 ins_cost(125); 11052 ins_cost(125);
11849 format %{ "FST_D $dst,$src\t# D-round" %} 11053 format %{ "FST_D $dst,$src\t# D-round" %}
11850 ins_encode( Pop_Mem_Reg_D(dst, src) ); 11054 ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11851 ins_pipe( fpu_mem_reg ); 11055 ins_pipe( fpu_mem_reg );
11852 %} 11056 %}
11853 11057
11854 // Force rounding to 24-bit precision and 6-bit exponent 11058 // Force rounding to 24-bit precision and 6-bit exponent
11855 instruct convD2F_reg(stackSlotF dst, regD src) %{ 11059 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
11856 predicate(UseSSE==0); 11060 predicate(UseSSE==0);
11857 match(Set dst (ConvD2F src)); 11061 match(Set dst (ConvD2F src));
11858 format %{ "FST_S $dst,$src\t# F-round" %} 11062 format %{ "FST_S $dst,$src\t# F-round" %}
11859 expand %{ 11063 expand %{
11860 roundFloat_mem_reg(dst,src); 11064 roundFloat_mem_reg(dst,src);
11861 %} 11065 %}
11862 %} 11066 %}
11863 11067
11864 // Force rounding to 24-bit precision and 6-bit exponent 11068 // Force rounding to 24-bit precision and 6-bit exponent
11865 instruct convD2X_reg(regX dst, regD src, eFlagsReg cr) %{ 11069 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
11866 predicate(UseSSE==1); 11070 predicate(UseSSE==1);
11867 match(Set dst (ConvD2F src)); 11071 match(Set dst (ConvD2F src));
11868 effect( KILL cr ); 11072 effect( KILL cr );
11869 format %{ "SUB ESP,4\n\t" 11073 format %{ "SUB ESP,4\n\t"
11870 "FST_S [ESP],$src\t# F-round\n\t" 11074 "FST_S [ESP],$src\t# F-round\n\t"
11871 "MOVSS $dst,[ESP]\n\t" 11075 "MOVSS $dst,[ESP]\n\t"
11872 "ADD ESP,4" %} 11076 "ADD ESP,4" %}
11873 ins_encode( D2X_encoding(dst, src) ); 11077 ins_encode %{
11078 __ subptr(rsp, 4);
11079 if ($src$$reg != FPR1L_enc) {
11080 __ fld_s($src$$reg-1);
11081 __ fstp_s(Address(rsp, 0));
11082 } else {
11083 __ fst_s(Address(rsp, 0));
11084 }
11085 __ movflt($dst$$XMMRegister, Address(rsp, 0));
11086 __ addptr(rsp, 4);
11087 %}
11874 ins_pipe( pipe_slow ); 11088 ins_pipe( pipe_slow );
11875 %} 11089 %}
11876 11090
11877 // Force rounding double precision to single precision 11091 // Force rounding double precision to single precision
11878 instruct convXD2X_reg(regX dst, regXD src) %{ 11092 instruct convD2F_reg(regF dst, regD src) %{
11879 predicate(UseSSE>=2); 11093 predicate(UseSSE>=2);
11880 match(Set dst (ConvD2F src)); 11094 match(Set dst (ConvD2F src));
11881 format %{ "CVTSD2SS $dst,$src\t# F-round" %} 11095 format %{ "CVTSD2SS $dst,$src\t# F-round" %}
11882 opcode(0xF2, 0x0F, 0x5A); 11096 ins_encode %{
11883 ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src)); 11097 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
11098 %}
11884 ins_pipe( pipe_slow ); 11099 ins_pipe( pipe_slow );
11885 %} 11100 %}
11886 11101
11887 instruct convF2D_reg_reg(regD dst, regF src) %{ 11102 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
11888 predicate(UseSSE==0); 11103 predicate(UseSSE==0);
11889 match(Set dst (ConvF2D src)); 11104 match(Set dst (ConvF2D src));
11890 format %{ "FST_S $dst,$src\t# D-round" %} 11105 format %{ "FST_S $dst,$src\t# D-round" %}
11891 ins_encode( Pop_Reg_Reg_D(dst, src)); 11106 ins_encode( Pop_Reg_Reg_DPR(dst, src));
11892 ins_pipe( fpu_reg_reg ); 11107 ins_pipe( fpu_reg_reg );
11893 %} 11108 %}
11894 11109
11895 instruct convF2D_reg(stackSlotD dst, regF src) %{ 11110 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
11896 predicate(UseSSE==1); 11111 predicate(UseSSE==1);
11897 match(Set dst (ConvF2D src)); 11112 match(Set dst (ConvF2D src));
11898 format %{ "FST_D $dst,$src\t# D-round" %} 11113 format %{ "FST_D $dst,$src\t# D-round" %}
11899 expand %{ 11114 expand %{
11900 roundDouble_mem_reg(dst,src); 11115 roundDouble_mem_reg(dst,src);
11901 %} 11116 %}
11902 %} 11117 %}
11903 11118
11904 instruct convX2D_reg(regD dst, regX src, eFlagsReg cr) %{ 11119 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
11905 predicate(UseSSE==1); 11120 predicate(UseSSE==1);
11906 match(Set dst (ConvF2D src)); 11121 match(Set dst (ConvF2D src));
11907 effect( KILL cr ); 11122 effect( KILL cr );
11908 format %{ "SUB ESP,4\n\t" 11123 format %{ "SUB ESP,4\n\t"
11909 "MOVSS [ESP] $src\n\t" 11124 "MOVSS [ESP] $src\n\t"
11910 "FLD_S [ESP]\n\t" 11125 "FLD_S [ESP]\n\t"
11911 "ADD ESP,4\n\t" 11126 "ADD ESP,4\n\t"
11912 "FSTP $dst\t# D-round" %} 11127 "FSTP $dst\t# D-round" %}
11913 ins_encode( X2D_encoding(dst, src), Pop_Reg_D(dst)); 11128 ins_encode %{
11129 __ subptr(rsp, 4);
11130 __ movflt(Address(rsp, 0), $src$$XMMRegister);
11131 __ fld_s(Address(rsp, 0));
11132 __ addptr(rsp, 4);
11133 __ fstp_d($dst$$reg);
11134 %}
11914 ins_pipe( pipe_slow ); 11135 ins_pipe( pipe_slow );
11915 %} 11136 %}
11916 11137
11917 instruct convX2XD_reg(regXD dst, regX src) %{ 11138 instruct convF2D_reg(regD dst, regF src) %{
11918 predicate(UseSSE>=2); 11139 predicate(UseSSE>=2);
11919 match(Set dst (ConvF2D src)); 11140 match(Set dst (ConvF2D src));
11920 format %{ "CVTSS2SD $dst,$src\t# D-round" %} 11141 format %{ "CVTSS2SD $dst,$src\t# D-round" %}
11921 opcode(0xF3, 0x0F, 0x5A); 11142 ins_encode %{
11922 ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src)); 11143 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
11144 %}
11923 ins_pipe( pipe_slow ); 11145 ins_pipe( pipe_slow );
11924 %} 11146 %}
11925 11147
11926 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 11148 // Convert a double to an int. If the double is a NAN, stuff a zero in instead.
11927 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{ 11149 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
11928 predicate(UseSSE<=1); 11150 predicate(UseSSE<=1);
11929 match(Set dst (ConvD2I src)); 11151 match(Set dst (ConvD2I src));
11930 effect( KILL tmp, KILL cr ); 11152 effect( KILL tmp, KILL cr );
11931 format %{ "FLD $src\t# Convert double to int \n\t" 11153 format %{ "FLD $src\t# Convert double to int \n\t"
11932 "FLDCW trunc mode\n\t" 11154 "FLDCW trunc mode\n\t"
11937 "CMP EAX,0x80000000\n\t" 11159 "CMP EAX,0x80000000\n\t"
11938 "JNE,s fast\n\t" 11160 "JNE,s fast\n\t"
11939 "FLD_D $src\n\t" 11161 "FLD_D $src\n\t"
11940 "CALL d2i_wrapper\n" 11162 "CALL d2i_wrapper\n"
11941 "fast:" %} 11163 "fast:" %}
11942 ins_encode( Push_Reg_D(src), D2I_encoding(src) ); 11164 ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
11943 ins_pipe( pipe_slow ); 11165 ins_pipe( pipe_slow );
11944 %} 11166 %}
11945 11167
11946 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 11168 // Convert a double to an int. If the double is a NAN, stuff a zero in instead.
11947 instruct convXD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regXD src, eFlagsReg cr ) %{ 11169 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
11948 predicate(UseSSE>=2); 11170 predicate(UseSSE>=2);
11949 match(Set dst (ConvD2I src)); 11171 match(Set dst (ConvD2I src));
11950 effect( KILL tmp, KILL cr ); 11172 effect( KILL tmp, KILL cr );
11951 format %{ "CVTTSD2SI $dst, $src\n\t" 11173 format %{ "CVTTSD2SI $dst, $src\n\t"
11952 "CMP $dst,0x80000000\n\t" 11174 "CMP $dst,0x80000000\n\t"
11955 "MOVSD [ESP], $src\n\t" 11177 "MOVSD [ESP], $src\n\t"
11956 "FLD_D [ESP]\n\t" 11178 "FLD_D [ESP]\n\t"
11957 "ADD ESP, 8\n\t" 11179 "ADD ESP, 8\n\t"
11958 "CALL d2i_wrapper\n" 11180 "CALL d2i_wrapper\n"
11959 "fast:" %} 11181 "fast:" %}
11960 opcode(0x1); // double-precision conversion 11182 ins_encode %{
11961 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x2C), FX2I_encoding(src,dst)); 11183 Label fast;
11184 __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
11185 __ cmpl($dst$$Register, 0x80000000);
11186 __ jccb(Assembler::notEqual, fast);
11187 __ subptr(rsp, 8);
11188 __ movdbl(Address(rsp, 0), $src$$XMMRegister);
11189 __ fld_d(Address(rsp, 0));
11190 __ addptr(rsp, 8);
11191 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
11192 __ bind(fast);
11193 %}
11962 ins_pipe( pipe_slow ); 11194 ins_pipe( pipe_slow );
11963 %} 11195 %}
11964 11196
11965 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{ 11197 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
11966 predicate(UseSSE<=1); 11198 predicate(UseSSE<=1);
11967 match(Set dst (ConvD2L src)); 11199 match(Set dst (ConvD2L src));
11968 effect( KILL cr ); 11200 effect( KILL cr );
11969 format %{ "FLD $src\t# Convert double to long\n\t" 11201 format %{ "FLD $src\t# Convert double to long\n\t"
11970 "FLDCW trunc mode\n\t" 11202 "FLDCW trunc mode\n\t"
11978 "TEST EAX,EAX\n\t" 11210 "TEST EAX,EAX\n\t"
11979 "JNE,s fast\n\t" 11211 "JNE,s fast\n\t"
11980 "FLD $src\n\t" 11212 "FLD $src\n\t"
11981 "CALL d2l_wrapper\n" 11213 "CALL d2l_wrapper\n"
11982 "fast:" %} 11214 "fast:" %}
11983 ins_encode( Push_Reg_D(src), D2L_encoding(src) ); 11215 ins_encode( Push_Reg_DPR(src), DPR2L_encoding(src) );
11984 ins_pipe( pipe_slow ); 11216 ins_pipe( pipe_slow );
11985 %} 11217 %}
11986 11218
11987 // XMM lacks a float/double->long conversion, so use the old FPU stack. 11219 // XMM lacks a float/double->long conversion, so use the old FPU stack.
11988 instruct convXD2L_reg_reg( eADXRegL dst, regXD src, eFlagsReg cr ) %{ 11220 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
11989 predicate (UseSSE>=2); 11221 predicate (UseSSE>=2);
11990 match(Set dst (ConvD2L src)); 11222 match(Set dst (ConvD2L src));
11991 effect( KILL cr ); 11223 effect( KILL cr );
11992 format %{ "SUB ESP,8\t# Convert double to long\n\t" 11224 format %{ "SUB ESP,8\t# Convert double to long\n\t"
11993 "MOVSD [ESP],$src\n\t" 11225 "MOVSD [ESP],$src\n\t"
12002 "TEST EAX,EAX\n\t" 11234 "TEST EAX,EAX\n\t"
12003 "JNE,s fast\n\t" 11235 "JNE,s fast\n\t"
12004 "SUB ESP,8\n\t" 11236 "SUB ESP,8\n\t"
12005 "MOVSD [ESP],$src\n\t" 11237 "MOVSD [ESP],$src\n\t"
12006 "FLD_D [ESP]\n\t" 11238 "FLD_D [ESP]\n\t"
11239 "ADD ESP,8\n\t"
12007 "CALL d2l_wrapper\n" 11240 "CALL d2l_wrapper\n"
12008 "fast:" %} 11241 "fast:" %}
12009 ins_encode( XD2L_encoding(src) ); 11242 ins_encode %{
11243 Label fast;
11244 __ subptr(rsp, 8);
11245 __ movdbl(Address(rsp, 0), $src$$XMMRegister);
11246 __ fld_d(Address(rsp, 0));
11247 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
11248 __ fistp_d(Address(rsp, 0));
11249 // Restore the rounding mode, mask the exception
11250 if (Compile::current()->in_24_bit_fp_mode()) {
11251 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
11252 } else {
11253 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
11254 }
11255 // Load the converted long, adjust CPU stack
11256 __ pop(rax);
11257 __ pop(rdx);
11258 __ cmpl(rdx, 0x80000000);
11259 __ jccb(Assembler::notEqual, fast);
11260 __ testl(rax, rax);
11261 __ jccb(Assembler::notEqual, fast);
11262 __ subptr(rsp, 8);
11263 __ movdbl(Address(rsp, 0), $src$$XMMRegister);
11264 __ fld_d(Address(rsp, 0));
11265 __ addptr(rsp, 8);
11266 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
11267 __ bind(fast);
11268 %}
12010 ins_pipe( pipe_slow ); 11269 ins_pipe( pipe_slow );
12011 %} 11270 %}
12012 11271
12013 // Convert a double to an int. Java semantics require we do complex 11272 // Convert a double to an int. Java semantics require we do complex
12014 // manglations in the corner cases. So we set the rounding mode to 11273 // manglations in the corner cases. So we set the rounding mode to
12015 // 'zero', store the darned double down as an int, and reset the 11274 // 'zero', store the darned double down as an int, and reset the
12016 // rounding mode to 'nearest'. The hardware stores a flag value down 11275 // rounding mode to 'nearest'. The hardware stores a flag value down
12017 // if we would overflow or converted a NAN; we check for this and 11276 // if we would overflow or converted a NAN; we check for this and
12018 // and go the slow path if needed. 11277 // and go the slow path if needed.
12019 instruct convF2I_reg_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{ 11278 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
12020 predicate(UseSSE==0); 11279 predicate(UseSSE==0);
12021 match(Set dst (ConvF2I src)); 11280 match(Set dst (ConvF2I src));
12022 effect( KILL tmp, KILL cr ); 11281 effect( KILL tmp, KILL cr );
12023 format %{ "FLD $src\t# Convert float to int \n\t" 11282 format %{ "FLD $src\t# Convert float to int \n\t"
12024 "FLDCW trunc mode\n\t" 11283 "FLDCW trunc mode\n\t"
12029 "CMP EAX,0x80000000\n\t" 11288 "CMP EAX,0x80000000\n\t"
12030 "JNE,s fast\n\t" 11289 "JNE,s fast\n\t"
12031 "FLD $src\n\t" 11290 "FLD $src\n\t"
12032 "CALL d2i_wrapper\n" 11291 "CALL d2i_wrapper\n"
12033 "fast:" %} 11292 "fast:" %}
12034 // D2I_encoding works for F2I 11293 // DPR2I_encoding works for FPR2I
12035 ins_encode( Push_Reg_F(src), D2I_encoding(src) ); 11294 ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
12036 ins_pipe( pipe_slow ); 11295 ins_pipe( pipe_slow );
12037 %} 11296 %}
12038 11297
12039 // Convert a float in xmm to an int reg. 11298 // Convert a float in xmm to an int reg.
12040 instruct convX2I_reg(eAXRegI dst, eDXRegI tmp, regX src, eFlagsReg cr ) %{ 11299 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
12041 predicate(UseSSE>=1); 11300 predicate(UseSSE>=1);
12042 match(Set dst (ConvF2I src)); 11301 match(Set dst (ConvF2I src));
12043 effect( KILL tmp, KILL cr ); 11302 effect( KILL tmp, KILL cr );
12044 format %{ "CVTTSS2SI $dst, $src\n\t" 11303 format %{ "CVTTSS2SI $dst, $src\n\t"
12045 "CMP $dst,0x80000000\n\t" 11304 "CMP $dst,0x80000000\n\t"
12048 "MOVSS [ESP], $src\n\t" 11307 "MOVSS [ESP], $src\n\t"
12049 "FLD [ESP]\n\t" 11308 "FLD [ESP]\n\t"
12050 "ADD ESP, 4\n\t" 11309 "ADD ESP, 4\n\t"
12051 "CALL d2i_wrapper\n" 11310 "CALL d2i_wrapper\n"
12052 "fast:" %} 11311 "fast:" %}
12053 opcode(0x0); // single-precision conversion 11312 ins_encode %{
12054 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x2C), FX2I_encoding(src,dst)); 11313 Label fast;
11314 __ cvttss2sil($dst$$Register, $src$$XMMRegister);
11315 __ cmpl($dst$$Register, 0x80000000);
11316 __ jccb(Assembler::notEqual, fast);
11317 __ subptr(rsp, 4);
11318 __ movflt(Address(rsp, 0), $src$$XMMRegister);
11319 __ fld_s(Address(rsp, 0));
11320 __ addptr(rsp, 4);
11321 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
11322 __ bind(fast);
11323 %}
12055 ins_pipe( pipe_slow ); 11324 ins_pipe( pipe_slow );
12056 %} 11325 %}
12057 11326
12058 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{ 11327 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
12059 predicate(UseSSE==0); 11328 predicate(UseSSE==0);
12060 match(Set dst (ConvF2L src)); 11329 match(Set dst (ConvF2L src));
12061 effect( KILL cr ); 11330 effect( KILL cr );
12062 format %{ "FLD $src\t# Convert float to long\n\t" 11331 format %{ "FLD $src\t# Convert float to long\n\t"
12063 "FLDCW trunc mode\n\t" 11332 "FLDCW trunc mode\n\t"
12071 "TEST EAX,EAX\n\t" 11340 "TEST EAX,EAX\n\t"
12072 "JNE,s fast\n\t" 11341 "JNE,s fast\n\t"
12073 "FLD $src\n\t" 11342 "FLD $src\n\t"
12074 "CALL d2l_wrapper\n" 11343 "CALL d2l_wrapper\n"
12075 "fast:" %} 11344 "fast:" %}
12076 // D2L_encoding works for F2L 11345 // DPR2L_encoding works for FPR2L
12077 ins_encode( Push_Reg_F(src), D2L_encoding(src) ); 11346 ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
12078 ins_pipe( pipe_slow ); 11347 ins_pipe( pipe_slow );
12079 %} 11348 %}
12080 11349
12081 // XMM lacks a float/double->long conversion, so use the old FPU stack. 11350 // XMM lacks a float/double->long conversion, so use the old FPU stack.
12082 instruct convX2L_reg_reg( eADXRegL dst, regX src, eFlagsReg cr ) %{ 11351 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
12083 predicate (UseSSE>=1); 11352 predicate (UseSSE>=1);
12084 match(Set dst (ConvF2L src)); 11353 match(Set dst (ConvF2L src));
12085 effect( KILL cr ); 11354 effect( KILL cr );
12086 format %{ "SUB ESP,8\t# Convert float to long\n\t" 11355 format %{ "SUB ESP,8\t# Convert float to long\n\t"
12087 "MOVSS [ESP],$src\n\t" 11356 "MOVSS [ESP],$src\n\t"
12099 "MOVSS [ESP],$src\n\t" 11368 "MOVSS [ESP],$src\n\t"
12100 "FLD_S [ESP]\n\t" 11369 "FLD_S [ESP]\n\t"
12101 "ADD ESP,4\n\t" 11370 "ADD ESP,4\n\t"
12102 "CALL d2l_wrapper\n" 11371 "CALL d2l_wrapper\n"
12103 "fast:" %} 11372 "fast:" %}
12104 ins_encode( X2L_encoding(src) ); 11373 ins_encode %{
11374 Label fast;
11375 __ subptr(rsp, 8);
11376 __ movflt(Address(rsp, 0), $src$$XMMRegister);
11377 __ fld_s(Address(rsp, 0));
11378 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
11379 __ fistp_d(Address(rsp, 0));
11380 // Restore the rounding mode, mask the exception
11381 if (Compile::current()->in_24_bit_fp_mode()) {
11382 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
11383 } else {
11384 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
11385 }
11386 // Load the converted long, adjust CPU stack
11387 __ pop(rax);
11388 __ pop(rdx);
11389 __ cmpl(rdx, 0x80000000);
11390 __ jccb(Assembler::notEqual, fast);
11391 __ testl(rax, rax);
11392 __ jccb(Assembler::notEqual, fast);
11393 __ subptr(rsp, 4);
11394 __ movflt(Address(rsp, 0), $src$$XMMRegister);
11395 __ fld_s(Address(rsp, 0));
11396 __ addptr(rsp, 4);
11397 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
11398 __ bind(fast);
11399 %}
12105 ins_pipe( pipe_slow ); 11400 ins_pipe( pipe_slow );
12106 %} 11401 %}
12107 11402
12108 instruct convI2D_reg(regD dst, stackSlotI src) %{ 11403 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
12109 predicate( UseSSE<=1 ); 11404 predicate( UseSSE<=1 );
12110 match(Set dst (ConvI2D src)); 11405 match(Set dst (ConvI2D src));
12111 format %{ "FILD $src\n\t" 11406 format %{ "FILD $src\n\t"
12112 "FSTP $dst" %} 11407 "FSTP $dst" %}
12113 opcode(0xDB, 0x0); /* DB /0 */ 11408 opcode(0xDB, 0x0); /* DB /0 */
12114 ins_encode(Push_Mem_I(src), Pop_Reg_D(dst)); 11409 ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
12115 ins_pipe( fpu_reg_mem ); 11410 ins_pipe( fpu_reg_mem );
12116 %} 11411 %}
12117 11412
12118 instruct convI2XD_reg(regXD dst, eRegI src) %{ 11413 instruct convI2D_reg(regD dst, eRegI src) %{
12119 predicate( UseSSE>=2 && !UseXmmI2D ); 11414 predicate( UseSSE>=2 && !UseXmmI2D );
12120 match(Set dst (ConvI2D src)); 11415 match(Set dst (ConvI2D src));
12121 format %{ "CVTSI2SD $dst,$src" %} 11416 format %{ "CVTSI2SD $dst,$src" %}
12122 opcode(0xF2, 0x0F, 0x2A); 11417 ins_encode %{
12123 ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src)); 11418 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
11419 %}
12124 ins_pipe( pipe_slow ); 11420 ins_pipe( pipe_slow );
12125 %} 11421 %}
12126 11422
12127 instruct convI2XD_mem(regXD dst, memory mem) %{ 11423 instruct convI2D_mem(regD dst, memory mem) %{
12128 predicate( UseSSE>=2 ); 11424 predicate( UseSSE>=2 );
12129 match(Set dst (ConvI2D (LoadI mem))); 11425 match(Set dst (ConvI2D (LoadI mem)));
12130 format %{ "CVTSI2SD $dst,$mem" %} 11426 format %{ "CVTSI2SD $dst,$mem" %}
12131 opcode(0xF2, 0x0F, 0x2A); 11427 ins_encode %{
12132 ins_encode( OpcP, OpcS, Opcode(tertiary), RegMem(dst, mem)); 11428 __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
11429 %}
12133 ins_pipe( pipe_slow ); 11430 ins_pipe( pipe_slow );
12134 %} 11431 %}
12135 11432
12136 instruct convXI2XD_reg(regXD dst, eRegI src) 11433 instruct convXI2D_reg(regD dst, eRegI src)
12137 %{ 11434 %{
12138 predicate( UseSSE>=2 && UseXmmI2D ); 11435 predicate( UseSSE>=2 && UseXmmI2D );
12139 match(Set dst (ConvI2D src)); 11436 match(Set dst (ConvI2D src));
12140 11437
12141 format %{ "MOVD $dst,$src\n\t" 11438 format %{ "MOVD $dst,$src\n\t"
12145 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister); 11442 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
12146 %} 11443 %}
12147 ins_pipe(pipe_slow); // XXX 11444 ins_pipe(pipe_slow); // XXX
12148 %} 11445 %}
12149 11446
12150 instruct convI2D_mem(regD dst, memory mem) %{ 11447 instruct convI2DPR_mem(regDPR dst, memory mem) %{
12151 predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr()); 11448 predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
12152 match(Set dst (ConvI2D (LoadI mem))); 11449 match(Set dst (ConvI2D (LoadI mem)));
12153 format %{ "FILD $mem\n\t" 11450 format %{ "FILD $mem\n\t"
12154 "FSTP $dst" %} 11451 "FSTP $dst" %}
12155 opcode(0xDB); /* DB /0 */ 11452 opcode(0xDB); /* DB /0 */
12156 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11453 ins_encode( OpcP, RMopc_Mem(0x00,mem),
12157 Pop_Reg_D(dst)); 11454 Pop_Reg_DPR(dst));
12158 ins_pipe( fpu_reg_mem ); 11455 ins_pipe( fpu_reg_mem );
12159 %} 11456 %}
12160 11457
12161 // Convert a byte to a float; no rounding step needed. 11458 // Convert a byte to a float; no rounding step needed.
12162 instruct conv24I2F_reg(regF dst, stackSlotI src) %{ 11459 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
12163 predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 ); 11460 predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
12164 match(Set dst (ConvI2F src)); 11461 match(Set dst (ConvI2F src));
12165 format %{ "FILD $src\n\t" 11462 format %{ "FILD $src\n\t"
12166 "FSTP $dst" %} 11463 "FSTP $dst" %}
12167 11464
12168 opcode(0xDB, 0x0); /* DB /0 */ 11465 opcode(0xDB, 0x0); /* DB /0 */
12169 ins_encode(Push_Mem_I(src), Pop_Reg_F(dst)); 11466 ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
12170 ins_pipe( fpu_reg_mem ); 11467 ins_pipe( fpu_reg_mem );
12171 %} 11468 %}
12172 11469
12173 // In 24-bit mode, force exponent rounding by storing back out 11470 // In 24-bit mode, force exponent rounding by storing back out
12174 instruct convI2F_SSF(stackSlotF dst, stackSlotI src) %{ 11471 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
12175 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11472 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
12176 match(Set dst (ConvI2F src)); 11473 match(Set dst (ConvI2F src));
12177 ins_cost(200); 11474 ins_cost(200);
12178 format %{ "FILD $src\n\t" 11475 format %{ "FILD $src\n\t"
12179 "FSTP_S $dst" %} 11476 "FSTP_S $dst" %}
12180 opcode(0xDB, 0x0); /* DB /0 */ 11477 opcode(0xDB, 0x0); /* DB /0 */
12181 ins_encode( Push_Mem_I(src), 11478 ins_encode( Push_Mem_I(src),
12182 Pop_Mem_F(dst)); 11479 Pop_Mem_FPR(dst));
12183 ins_pipe( fpu_mem_mem ); 11480 ins_pipe( fpu_mem_mem );
12184 %} 11481 %}
12185 11482
12186 // In 24-bit mode, force exponent rounding by storing back out 11483 // In 24-bit mode, force exponent rounding by storing back out
12187 instruct convI2F_SSF_mem(stackSlotF dst, memory mem) %{ 11484 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
12188 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11485 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
12189 match(Set dst (ConvI2F (LoadI mem))); 11486 match(Set dst (ConvI2F (LoadI mem)));
12190 ins_cost(200); 11487 ins_cost(200);
12191 format %{ "FILD $mem\n\t" 11488 format %{ "FILD $mem\n\t"
12192 "FSTP_S $dst" %} 11489 "FSTP_S $dst" %}
12193 opcode(0xDB); /* DB /0 */ 11490 opcode(0xDB); /* DB /0 */
12194 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11491 ins_encode( OpcP, RMopc_Mem(0x00,mem),
12195 Pop_Mem_F(dst)); 11492 Pop_Mem_FPR(dst));
12196 ins_pipe( fpu_mem_mem ); 11493 ins_pipe( fpu_mem_mem );
12197 %} 11494 %}
12198 11495
12199 // This instruction does not round to 24-bits 11496 // This instruction does not round to 24-bits
12200 instruct convI2F_reg(regF dst, stackSlotI src) %{ 11497 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
12201 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11498 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
12202 match(Set dst (ConvI2F src)); 11499 match(Set dst (ConvI2F src));
12203 format %{ "FILD $src\n\t" 11500 format %{ "FILD $src\n\t"
12204 "FSTP $dst" %} 11501 "FSTP $dst" %}
12205 opcode(0xDB, 0x0); /* DB /0 */ 11502 opcode(0xDB, 0x0); /* DB /0 */
12206 ins_encode( Push_Mem_I(src), 11503 ins_encode( Push_Mem_I(src),
12207 Pop_Reg_F(dst)); 11504 Pop_Reg_FPR(dst));
12208 ins_pipe( fpu_reg_mem ); 11505 ins_pipe( fpu_reg_mem );
12209 %} 11506 %}
12210 11507
12211 // This instruction does not round to 24-bits 11508 // This instruction does not round to 24-bits
12212 instruct convI2F_mem(regF dst, memory mem) %{ 11509 instruct convI2FPR_mem(regFPR dst, memory mem) %{
12213 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11510 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
12214 match(Set dst (ConvI2F (LoadI mem))); 11511 match(Set dst (ConvI2F (LoadI mem)));
12215 format %{ "FILD $mem\n\t" 11512 format %{ "FILD $mem\n\t"
12216 "FSTP $dst" %} 11513 "FSTP $dst" %}
12217 opcode(0xDB); /* DB /0 */ 11514 opcode(0xDB); /* DB /0 */
12218 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11515 ins_encode( OpcP, RMopc_Mem(0x00,mem),
12219 Pop_Reg_F(dst)); 11516 Pop_Reg_FPR(dst));
12220 ins_pipe( fpu_reg_mem ); 11517 ins_pipe( fpu_reg_mem );
12221 %} 11518 %}
12222 11519
12223 // Convert an int to a float in xmm; no rounding step needed. 11520 // Convert an int to a float in xmm; no rounding step needed.
12224 instruct convI2X_reg(regX dst, eRegI src) %{ 11521 instruct convI2F_reg(regF dst, eRegI src) %{
12225 predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F ); 11522 predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
12226 match(Set dst (ConvI2F src)); 11523 match(Set dst (ConvI2F src));
12227 format %{ "CVTSI2SS $dst, $src" %} 11524 format %{ "CVTSI2SS $dst, $src" %}
12228 11525 ins_encode %{
12229 opcode(0xF3, 0x0F, 0x2A); /* F3 0F 2A /r */ 11526 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
12230 ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src)); 11527 %}
12231 ins_pipe( pipe_slow ); 11528 ins_pipe( pipe_slow );
12232 %} 11529 %}
12233 11530
12234 instruct convXI2X_reg(regX dst, eRegI src) 11531 instruct convXI2F_reg(regF dst, eRegI src)
12235 %{ 11532 %{
12236 predicate( UseSSE>=2 && UseXmmI2F ); 11533 predicate( UseSSE>=2 && UseXmmI2F );
12237 match(Set dst (ConvI2F src)); 11534 match(Set dst (ConvI2F src));
12238 11535
12239 format %{ "MOVD $dst,$src\n\t" 11536 format %{ "MOVD $dst,$src\n\t"
12278 opcode(0x33); // XOR 11575 opcode(0x33); // XOR
12279 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11576 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
12280 ins_pipe( ialu_reg_reg_long ); 11577 ins_pipe( ialu_reg_reg_long );
12281 %} 11578 %}
12282 11579
12283 instruct convL2D_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{ 11580 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
12284 predicate (UseSSE<=1); 11581 predicate (UseSSE<=1);
12285 match(Set dst (ConvL2D src)); 11582 match(Set dst (ConvL2D src));
12286 effect( KILL cr ); 11583 effect( KILL cr );
12287 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11584 format %{ "PUSH $src.hi\t# Convert long to double\n\t"
12288 "PUSH $src.lo\n\t" 11585 "PUSH $src.lo\n\t"
12289 "FILD ST,[ESP + #0]\n\t" 11586 "FILD ST,[ESP + #0]\n\t"
12290 "ADD ESP,8\n\t" 11587 "ADD ESP,8\n\t"
12291 "FSTP_D $dst\t# D-round" %} 11588 "FSTP_D $dst\t# D-round" %}
12292 opcode(0xDF, 0x5); /* DF /5 */ 11589 opcode(0xDF, 0x5); /* DF /5 */
12293 ins_encode(convert_long_double(src), Pop_Mem_D(dst)); 11590 ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
12294 ins_pipe( pipe_slow ); 11591 ins_pipe( pipe_slow );
12295 %} 11592 %}
12296 11593
12297 instruct convL2XD_reg( regXD dst, eRegL src, eFlagsReg cr) %{ 11594 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
12298 predicate (UseSSE>=2); 11595 predicate (UseSSE>=2);
12299 match(Set dst (ConvL2D src)); 11596 match(Set dst (ConvL2D src));
12300 effect( KILL cr ); 11597 effect( KILL cr );
12301 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11598 format %{ "PUSH $src.hi\t# Convert long to double\n\t"
12302 "PUSH $src.lo\n\t" 11599 "PUSH $src.lo\n\t"
12303 "FILD_D [ESP]\n\t" 11600 "FILD_D [ESP]\n\t"
12304 "FSTP_D [ESP]\n\t" 11601 "FSTP_D [ESP]\n\t"
12305 "MOVSD $dst,[ESP]\n\t" 11602 "MOVSD $dst,[ESP]\n\t"
12306 "ADD ESP,8" %} 11603 "ADD ESP,8" %}
12307 opcode(0xDF, 0x5); /* DF /5 */ 11604 opcode(0xDF, 0x5); /* DF /5 */
12308 ins_encode(convert_long_double2(src), Push_ResultXD(dst)); 11605 ins_encode(convert_long_double2(src), Push_ResultD(dst));
12309 ins_pipe( pipe_slow ); 11606 ins_pipe( pipe_slow );
12310 %} 11607 %}
12311 11608
12312 instruct convL2X_reg( regX dst, eRegL src, eFlagsReg cr) %{ 11609 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
12313 predicate (UseSSE>=1); 11610 predicate (UseSSE>=1);
12314 match(Set dst (ConvL2F src)); 11611 match(Set dst (ConvL2F src));
12315 effect( KILL cr ); 11612 effect( KILL cr );
12316 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11613 format %{ "PUSH $src.hi\t# Convert long to single float\n\t"
12317 "PUSH $src.lo\n\t" 11614 "PUSH $src.lo\n\t"
12318 "FILD_D [ESP]\n\t" 11615 "FILD_D [ESP]\n\t"
12319 "FSTP_S [ESP]\n\t" 11616 "FSTP_S [ESP]\n\t"
12320 "MOVSS $dst,[ESP]\n\t" 11617 "MOVSS $dst,[ESP]\n\t"
12321 "ADD ESP,8" %} 11618 "ADD ESP,8" %}
12322 opcode(0xDF, 0x5); /* DF /5 */ 11619 opcode(0xDF, 0x5); /* DF /5 */
12323 ins_encode(convert_long_double2(src), Push_ResultX(dst,0x8)); 11620 ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
12324 ins_pipe( pipe_slow ); 11621 ins_pipe( pipe_slow );
12325 %} 11622 %}
12326 11623
12327 instruct convL2F_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{ 11624 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
12328 match(Set dst (ConvL2F src)); 11625 match(Set dst (ConvL2F src));
12329 effect( KILL cr ); 11626 effect( KILL cr );
12330 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11627 format %{ "PUSH $src.hi\t# Convert long to single float\n\t"
12331 "PUSH $src.lo\n\t" 11628 "PUSH $src.lo\n\t"
12332 "FILD ST,[ESP + #0]\n\t" 11629 "FILD ST,[ESP + #0]\n\t"
12333 "ADD ESP,8\n\t" 11630 "ADD ESP,8\n\t"
12334 "FSTP_S $dst\t# F-round" %} 11631 "FSTP_S $dst\t# F-round" %}
12335 opcode(0xDF, 0x5); /* DF /5 */ 11632 opcode(0xDF, 0x5); /* DF /5 */
12336 ins_encode(convert_long_double(src), Pop_Mem_F(dst)); 11633 ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
12337 ins_pipe( pipe_slow ); 11634 ins_pipe( pipe_slow );
12338 %} 11635 %}
12339 11636
12340 instruct convL2I_reg( eRegI dst, eRegL src ) %{ 11637 instruct convL2I_reg( eRegI dst, eRegL src ) %{
12341 match(Set dst (ConvL2I src)); 11638 match(Set dst (ConvL2I src));
12349 instruct MoveF2I_stack_reg(eRegI dst, stackSlotF src) %{ 11646 instruct MoveF2I_stack_reg(eRegI dst, stackSlotF src) %{
12350 match(Set dst (MoveF2I src)); 11647 match(Set dst (MoveF2I src));
12351 effect( DEF dst, USE src ); 11648 effect( DEF dst, USE src );
12352 ins_cost(100); 11649 ins_cost(100);
12353 format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %} 11650 format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %}
12354 opcode(0x8B); 11651 ins_encode %{
12355 ins_encode( OpcP, RegMem(dst,src)); 11652 __ movl($dst$$Register, Address(rsp, $src$$disp));
11653 %}
12356 ins_pipe( ialu_reg_mem ); 11654 ins_pipe( ialu_reg_mem );
12357 %} 11655 %}
12358 11656
12359 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{ 11657 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
12360 predicate(UseSSE==0); 11658 predicate(UseSSE==0);
12361 match(Set dst (MoveF2I src)); 11659 match(Set dst (MoveF2I src));
12362 effect( DEF dst, USE src ); 11660 effect( DEF dst, USE src );
12363 11661
12364 ins_cost(125); 11662 ins_cost(125);
12365 format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %} 11663 format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %}
12366 ins_encode( Pop_Mem_Reg_F(dst, src) ); 11664 ins_encode( Pop_Mem_Reg_FPR(dst, src) );
12367 ins_pipe( fpu_mem_reg ); 11665 ins_pipe( fpu_mem_reg );
12368 %} 11666 %}
12369 11667
12370 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regX src) %{ 11668 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
12371 predicate(UseSSE>=1); 11669 predicate(UseSSE>=1);
12372 match(Set dst (MoveF2I src)); 11670 match(Set dst (MoveF2I src));
12373 effect( DEF dst, USE src ); 11671 effect( DEF dst, USE src );
12374 11672
12375 ins_cost(95); 11673 ins_cost(95);
12376 format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %} 11674 format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %}
12377 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x11), RegMem(src, dst)); 11675 ins_encode %{
11676 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11677 %}
12378 ins_pipe( pipe_slow ); 11678 ins_pipe( pipe_slow );
12379 %} 11679 %}
12380 11680
12381 instruct MoveF2I_reg_reg_sse(eRegI dst, regX src) %{ 11681 instruct MoveF2I_reg_reg_sse(eRegI dst, regF src) %{
12382 predicate(UseSSE>=2); 11682 predicate(UseSSE>=2);
12383 match(Set dst (MoveF2I src)); 11683 match(Set dst (MoveF2I src));
12384 effect( DEF dst, USE src ); 11684 effect( DEF dst, USE src );
12385 ins_cost(85); 11685 ins_cost(85);
12386 format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %} 11686 format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %}
12387 ins_encode( MovX2I_reg(dst, src)); 11687 ins_encode %{
11688 __ movdl($dst$$Register, $src$$XMMRegister);
11689 %}
12388 ins_pipe( pipe_slow ); 11690 ins_pipe( pipe_slow );
12389 %} 11691 %}
12390 11692
12391 instruct MoveI2F_reg_stack(stackSlotF dst, eRegI src) %{ 11693 instruct MoveI2F_reg_stack(stackSlotF dst, eRegI src) %{
12392 match(Set dst (MoveI2F src)); 11694 match(Set dst (MoveI2F src));
12393 effect( DEF dst, USE src ); 11695 effect( DEF dst, USE src );
12394 11696
12395 ins_cost(100); 11697 ins_cost(100);
12396 format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %} 11698 format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %}
12397 opcode(0x89); 11699 ins_encode %{
12398 ins_encode( OpcPRegSS( dst, src ) ); 11700 __ movl(Address(rsp, $dst$$disp), $src$$Register);
11701 %}
12399 ins_pipe( ialu_mem_reg ); 11702 ins_pipe( ialu_mem_reg );
12400 %} 11703 %}
12401 11704
12402 11705
12403 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{ 11706 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
12404 predicate(UseSSE==0); 11707 predicate(UseSSE==0);
12405 match(Set dst (MoveI2F src)); 11708 match(Set dst (MoveI2F src));
12406 effect(DEF dst, USE src); 11709 effect(DEF dst, USE src);
12407 11710
12408 ins_cost(125); 11711 ins_cost(125);
12409 format %{ "FLD_S $src\n\t" 11712 format %{ "FLD_S $src\n\t"
12410 "FSTP $dst\t# MoveI2F_stack_reg" %} 11713 "FSTP $dst\t# MoveI2F_stack_reg" %}
12411 opcode(0xD9); /* D9 /0, FLD m32real */ 11714 opcode(0xD9); /* D9 /0, FLD m32real */
12412 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11715 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
12413 Pop_Reg_F(dst) ); 11716 Pop_Reg_FPR(dst) );
12414 ins_pipe( fpu_reg_mem ); 11717 ins_pipe( fpu_reg_mem );
12415 %} 11718 %}
12416 11719
12417 instruct MoveI2F_stack_reg_sse(regX dst, stackSlotI src) %{ 11720 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
12418 predicate(UseSSE>=1); 11721 predicate(UseSSE>=1);
12419 match(Set dst (MoveI2F src)); 11722 match(Set dst (MoveI2F src));
12420 effect( DEF dst, USE src ); 11723 effect( DEF dst, USE src );
12421 11724
12422 ins_cost(95); 11725 ins_cost(95);
12423 format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %} 11726 format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %}
12424 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x10), RegMem(dst,src)); 11727 ins_encode %{
11728 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11729 %}
12425 ins_pipe( pipe_slow ); 11730 ins_pipe( pipe_slow );
12426 %} 11731 %}
12427 11732
12428 instruct MoveI2F_reg_reg_sse(regX dst, eRegI src) %{ 11733 instruct MoveI2F_reg_reg_sse(regF dst, eRegI src) %{
12429 predicate(UseSSE>=2); 11734 predicate(UseSSE>=2);
12430 match(Set dst (MoveI2F src)); 11735 match(Set dst (MoveI2F src));
12431 effect( DEF dst, USE src ); 11736 effect( DEF dst, USE src );
12432 11737
12433 ins_cost(85); 11738 ins_cost(85);
12434 format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %} 11739 format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %}
12435 ins_encode( MovI2X_reg(dst, src) ); 11740 ins_encode %{
11741 __ movdl($dst$$XMMRegister, $src$$Register);
11742 %}
12436 ins_pipe( pipe_slow ); 11743 ins_pipe( pipe_slow );
12437 %} 11744 %}
12438 11745
12439 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{ 11746 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
12440 match(Set dst (MoveD2L src)); 11747 match(Set dst (MoveD2L src));
12446 opcode(0x8B, 0x8B); 11753 opcode(0x8B, 0x8B);
12447 ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src)); 11754 ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
12448 ins_pipe( ialu_mem_long_reg ); 11755 ins_pipe( ialu_mem_long_reg );
12449 %} 11756 %}
12450 11757
12451 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{ 11758 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
12452 predicate(UseSSE<=1); 11759 predicate(UseSSE<=1);
12453 match(Set dst (MoveD2L src)); 11760 match(Set dst (MoveD2L src));
12454 effect(DEF dst, USE src); 11761 effect(DEF dst, USE src);
12455 11762
12456 ins_cost(125); 11763 ins_cost(125);
12457 format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %} 11764 format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %}
12458 ins_encode( Pop_Mem_Reg_D(dst, src) ); 11765 ins_encode( Pop_Mem_Reg_DPR(dst, src) );
12459 ins_pipe( fpu_mem_reg ); 11766 ins_pipe( fpu_mem_reg );
12460 %} 11767 %}
12461 11768
12462 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regXD src) %{ 11769 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
12463 predicate(UseSSE>=2); 11770 predicate(UseSSE>=2);
12464 match(Set dst (MoveD2L src)); 11771 match(Set dst (MoveD2L src));
12465 effect(DEF dst, USE src); 11772 effect(DEF dst, USE src);
12466 ins_cost(95); 11773 ins_cost(95);
12467
12468 format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %} 11774 format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %}
12469 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x11), RegMem(src,dst)); 11775 ins_encode %{
11776 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11777 %}
12470 ins_pipe( pipe_slow ); 11778 ins_pipe( pipe_slow );
12471 %} 11779 %}
12472 11780
12473 instruct MoveD2L_reg_reg_sse(eRegL dst, regXD src, regXD tmp) %{ 11781 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
12474 predicate(UseSSE>=2); 11782 predicate(UseSSE>=2);
12475 match(Set dst (MoveD2L src)); 11783 match(Set dst (MoveD2L src));
12476 effect(DEF dst, USE src, TEMP tmp); 11784 effect(DEF dst, USE src, TEMP tmp);
12477 ins_cost(85); 11785 ins_cost(85);
12478 format %{ "MOVD $dst.lo,$src\n\t" 11786 format %{ "MOVD $dst.lo,$src\n\t"
12479 "PSHUFLW $tmp,$src,0x4E\n\t" 11787 "PSHUFLW $tmp,$src,0x4E\n\t"
12480 "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %} 11788 "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
12481 ins_encode( MovXD2L_reg(dst, src, tmp) ); 11789 ins_encode %{
11790 __ movdl($dst$$Register, $src$$XMMRegister);
11791 __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
11792 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
11793 %}
12482 ins_pipe( pipe_slow ); 11794 ins_pipe( pipe_slow );
12483 %} 11795 %}
12484 11796
12485 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{ 11797 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
12486 match(Set dst (MoveL2D src)); 11798 match(Set dst (MoveL2D src));
12493 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 11805 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
12494 ins_pipe( ialu_mem_long_reg ); 11806 ins_pipe( ialu_mem_long_reg );
12495 %} 11807 %}
12496 11808
12497 11809
12498 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{ 11810 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
12499 predicate(UseSSE<=1); 11811 predicate(UseSSE<=1);
12500 match(Set dst (MoveL2D src)); 11812 match(Set dst (MoveL2D src));
12501 effect(DEF dst, USE src); 11813 effect(DEF dst, USE src);
12502 ins_cost(125); 11814 ins_cost(125);
12503 11815
12504 format %{ "FLD_D $src\n\t" 11816 format %{ "FLD_D $src\n\t"
12505 "FSTP $dst\t# MoveL2D_stack_reg" %} 11817 "FSTP $dst\t# MoveL2D_stack_reg" %}
12506 opcode(0xDD); /* DD /0, FLD m64real */ 11818 opcode(0xDD); /* DD /0, FLD m64real */
12507 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11819 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
12508 Pop_Reg_D(dst) ); 11820 Pop_Reg_DPR(dst) );
12509 ins_pipe( fpu_reg_mem ); 11821 ins_pipe( fpu_reg_mem );
12510 %} 11822 %}
12511 11823
12512 11824
12513 instruct MoveL2D_stack_reg_sse(regXD dst, stackSlotL src) %{ 11825 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
12514 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 11826 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
12515 match(Set dst (MoveL2D src)); 11827 match(Set dst (MoveL2D src));
12516 effect(DEF dst, USE src); 11828 effect(DEF dst, USE src);
12517 11829
12518 ins_cost(95); 11830 ins_cost(95);
12519 format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11831 format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %}
12520 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x10), RegMem(dst,src)); 11832 ins_encode %{
11833 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11834 %}
12521 ins_pipe( pipe_slow ); 11835 ins_pipe( pipe_slow );
12522 %} 11836 %}
12523 11837
12524 instruct MoveL2D_stack_reg_sse_partial(regXD dst, stackSlotL src) %{ 11838 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
12525 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 11839 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
12526 match(Set dst (MoveL2D src)); 11840 match(Set dst (MoveL2D src));
12527 effect(DEF dst, USE src); 11841 effect(DEF dst, USE src);
12528 11842
12529 ins_cost(95); 11843 ins_cost(95);
12530 format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11844 format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
12531 ins_encode( Opcode(0x66), Opcode(0x0F), Opcode(0x12), RegMem(dst,src)); 11845 ins_encode %{
11846 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11847 %}
12532 ins_pipe( pipe_slow ); 11848 ins_pipe( pipe_slow );
12533 %} 11849 %}
12534 11850
12535 instruct MoveL2D_reg_reg_sse(regXD dst, eRegL src, regXD tmp) %{ 11851 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
12536 predicate(UseSSE>=2); 11852 predicate(UseSSE>=2);
12537 match(Set dst (MoveL2D src)); 11853 match(Set dst (MoveL2D src));
12538 effect(TEMP dst, USE src, TEMP tmp); 11854 effect(TEMP dst, USE src, TEMP tmp);
12539 ins_cost(85); 11855 ins_cost(85);
12540 format %{ "MOVD $dst,$src.lo\n\t" 11856 format %{ "MOVD $dst,$src.lo\n\t"
12541 "MOVD $tmp,$src.hi\n\t" 11857 "MOVD $tmp,$src.hi\n\t"
12542 "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %} 11858 "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
12543 ins_encode( MovL2XD_reg(dst, src, tmp) ); 11859 ins_encode %{
11860 __ movdl($dst$$XMMRegister, $src$$Register);
11861 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
11862 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
11863 %}
12544 ins_pipe( pipe_slow ); 11864 ins_pipe( pipe_slow );
12545 %} 11865 %}
12546 11866
12547 // Replicate scalar to packed byte (1 byte) values in xmm 11867 // Replicate scalar to packed byte (1 byte) values in xmm
12548 instruct Repl8B_reg(regXD dst, regXD src) %{ 11868 instruct Repl8B_reg(regD dst, regD src) %{
12549 predicate(UseSSE>=2); 11869 predicate(UseSSE>=2);
12550 match(Set dst (Replicate8B src)); 11870 match(Set dst (Replicate8B src));
12551 format %{ "MOVDQA $dst,$src\n\t" 11871 format %{ "MOVDQA $dst,$src\n\t"
12552 "PUNPCKLBW $dst,$dst\n\t" 11872 "PUNPCKLBW $dst,$dst\n\t"
12553 "PSHUFLW $dst,$dst,0x00\t! replicate8B" %} 11873 "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
12554 ins_encode( pshufd_8x8(dst, src)); 11874 ins_encode %{
11875 if ($dst$$reg != $src$$reg) {
11876 __ movdqa($dst$$XMMRegister, $src$$XMMRegister);
11877 }
11878 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
11879 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
11880 %}
12555 ins_pipe( pipe_slow ); 11881 ins_pipe( pipe_slow );
12556 %} 11882 %}
12557 11883
12558 // Replicate scalar to packed byte (1 byte) values in xmm 11884 // Replicate scalar to packed byte (1 byte) values in xmm
12559 instruct Repl8B_eRegI(regXD dst, eRegI src) %{ 11885 instruct Repl8B_eRegI(regD dst, eRegI src) %{
12560 predicate(UseSSE>=2); 11886 predicate(UseSSE>=2);
12561 match(Set dst (Replicate8B src)); 11887 match(Set dst (Replicate8B src));
12562 format %{ "MOVD $dst,$src\n\t" 11888 format %{ "MOVD $dst,$src\n\t"
12563 "PUNPCKLBW $dst,$dst\n\t" 11889 "PUNPCKLBW $dst,$dst\n\t"
12564 "PSHUFLW $dst,$dst,0x00\t! replicate8B" %} 11890 "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
12565 ins_encode( mov_i2x(dst, src), pshufd_8x8(dst, dst)); 11891 ins_encode %{
11892 __ movdl($dst$$XMMRegister, $src$$Register);
11893 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
11894 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
11895 %}
12566 ins_pipe( pipe_slow ); 11896 ins_pipe( pipe_slow );
12567 %} 11897 %}
12568 11898
12569 // Replicate scalar zero to packed byte (1 byte) values in xmm 11899 // Replicate scalar zero to packed byte (1 byte) values in xmm
12570 instruct Repl8B_immI0(regXD dst, immI0 zero) %{ 11900 instruct Repl8B_immI0(regD dst, immI0 zero) %{
12571 predicate(UseSSE>=2); 11901 predicate(UseSSE>=2);
12572 match(Set dst (Replicate8B zero)); 11902 match(Set dst (Replicate8B zero));
12573 format %{ "PXOR $dst,$dst\t! replicate8B" %} 11903 format %{ "PXOR $dst,$dst\t! replicate8B" %}
12574 ins_encode( pxor(dst, dst)); 11904 ins_encode %{
11905 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
11906 %}
12575 ins_pipe( fpu_reg_reg ); 11907 ins_pipe( fpu_reg_reg );
12576 %} 11908 %}
12577 11909
12578 // Replicate scalar to packed shore (2 byte) values in xmm 11910 // Replicate scalar to packed shore (2 byte) values in xmm
12579 instruct Repl4S_reg(regXD dst, regXD src) %{ 11911 instruct Repl4S_reg(regD dst, regD src) %{
12580 predicate(UseSSE>=2); 11912 predicate(UseSSE>=2);
12581 match(Set dst (Replicate4S src)); 11913 match(Set dst (Replicate4S src));
12582 format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %} 11914 format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %}
12583 ins_encode( pshufd_4x16(dst, src)); 11915 ins_encode %{
11916 __ pshuflw($dst$$XMMRegister, $src$$XMMRegister, 0x00);
11917 %}
12584 ins_pipe( fpu_reg_reg ); 11918 ins_pipe( fpu_reg_reg );
12585 %} 11919 %}
12586 11920
12587 // Replicate scalar to packed shore (2 byte) values in xmm 11921 // Replicate scalar to packed shore (2 byte) values in xmm
12588 instruct Repl4S_eRegI(regXD dst, eRegI src) %{ 11922 instruct Repl4S_eRegI(regD dst, eRegI src) %{
12589 predicate(UseSSE>=2); 11923 predicate(UseSSE>=2);
12590 match(Set dst (Replicate4S src)); 11924 match(Set dst (Replicate4S src));
12591 format %{ "MOVD $dst,$src\n\t" 11925 format %{ "MOVD $dst,$src\n\t"
12592 "PSHUFLW $dst,$dst,0x00\t! replicate4S" %} 11926 "PSHUFLW $dst,$dst,0x00\t! replicate4S" %}
12593 ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst)); 11927 ins_encode %{
11928 __ movdl($dst$$XMMRegister, $src$$Register);
11929 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
11930 %}
12594 ins_pipe( fpu_reg_reg ); 11931 ins_pipe( fpu_reg_reg );
12595 %} 11932 %}
12596 11933
12597 // Replicate scalar zero to packed short (2 byte) values in xmm 11934 // Replicate scalar zero to packed short (2 byte) values in xmm
12598 instruct Repl4S_immI0(regXD dst, immI0 zero) %{ 11935 instruct Repl4S_immI0(regD dst, immI0 zero) %{
12599 predicate(UseSSE>=2); 11936 predicate(UseSSE>=2);
12600 match(Set dst (Replicate4S zero)); 11937 match(Set dst (Replicate4S zero));
12601 format %{ "PXOR $dst,$dst\t! replicate4S" %} 11938 format %{ "PXOR $dst,$dst\t! replicate4S" %}
12602 ins_encode( pxor(dst, dst)); 11939 ins_encode %{
11940 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
11941 %}
12603 ins_pipe( fpu_reg_reg ); 11942 ins_pipe( fpu_reg_reg );
12604 %} 11943 %}
12605 11944
12606 // Replicate scalar to packed char (2 byte) values in xmm 11945 // Replicate scalar to packed char (2 byte) values in xmm
12607 instruct Repl4C_reg(regXD dst, regXD src) %{ 11946 instruct Repl4C_reg(regD dst, regD src) %{
12608 predicate(UseSSE>=2); 11947 predicate(UseSSE>=2);
12609 match(Set dst (Replicate4C src)); 11948 match(Set dst (Replicate4C src));
12610 format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %} 11949 format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %}
12611 ins_encode( pshufd_4x16(dst, src)); 11950 ins_encode %{
11951 __ pshuflw($dst$$XMMRegister, $src$$XMMRegister, 0x00);
11952 %}
12612 ins_pipe( fpu_reg_reg ); 11953 ins_pipe( fpu_reg_reg );
12613 %} 11954 %}
12614 11955
12615 // Replicate scalar to packed char (2 byte) values in xmm 11956 // Replicate scalar to packed char (2 byte) values in xmm
12616 instruct Repl4C_eRegI(regXD dst, eRegI src) %{ 11957 instruct Repl4C_eRegI(regD dst, eRegI src) %{
12617 predicate(UseSSE>=2); 11958 predicate(UseSSE>=2);
12618 match(Set dst (Replicate4C src)); 11959 match(Set dst (Replicate4C src));
12619 format %{ "MOVD $dst,$src\n\t" 11960 format %{ "MOVD $dst,$src\n\t"
12620 "PSHUFLW $dst,$dst,0x00\t! replicate4C" %} 11961 "PSHUFLW $dst,$dst,0x00\t! replicate4C" %}
12621 ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst)); 11962 ins_encode %{
11963 __ movdl($dst$$XMMRegister, $src$$Register);
11964 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
11965 %}
12622 ins_pipe( fpu_reg_reg ); 11966 ins_pipe( fpu_reg_reg );
12623 %} 11967 %}
12624 11968
12625 // Replicate scalar zero to packed char (2 byte) values in xmm 11969 // Replicate scalar zero to packed char (2 byte) values in xmm
12626 instruct Repl4C_immI0(regXD dst, immI0 zero) %{ 11970 instruct Repl4C_immI0(regD dst, immI0 zero) %{
12627 predicate(UseSSE>=2); 11971 predicate(UseSSE>=2);
12628 match(Set dst (Replicate4C zero)); 11972 match(Set dst (Replicate4C zero));
12629 format %{ "PXOR $dst,$dst\t! replicate4C" %} 11973 format %{ "PXOR $dst,$dst\t! replicate4C" %}
12630 ins_encode( pxor(dst, dst)); 11974 ins_encode %{
11975 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
11976 %}
12631 ins_pipe( fpu_reg_reg ); 11977 ins_pipe( fpu_reg_reg );
12632 %} 11978 %}
12633 11979
12634 // Replicate scalar to packed integer (4 byte) values in xmm 11980 // Replicate scalar to packed integer (4 byte) values in xmm
12635 instruct Repl2I_reg(regXD dst, regXD src) %{ 11981 instruct Repl2I_reg(regD dst, regD src) %{
12636 predicate(UseSSE>=2); 11982 predicate(UseSSE>=2);
12637 match(Set dst (Replicate2I src)); 11983 match(Set dst (Replicate2I src));
12638 format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %} 11984 format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %}
12639 ins_encode( pshufd(dst, src, 0x00)); 11985 ins_encode %{
11986 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
11987 %}
12640 ins_pipe( fpu_reg_reg ); 11988 ins_pipe( fpu_reg_reg );
12641 %} 11989 %}
12642 11990
12643 // Replicate scalar to packed integer (4 byte) values in xmm 11991 // Replicate scalar to packed integer (4 byte) values in xmm
12644 instruct Repl2I_eRegI(regXD dst, eRegI src) %{ 11992 instruct Repl2I_eRegI(regD dst, eRegI src) %{
12645 predicate(UseSSE>=2); 11993 predicate(UseSSE>=2);
12646 match(Set dst (Replicate2I src)); 11994 match(Set dst (Replicate2I src));
12647 format %{ "MOVD $dst,$src\n\t" 11995 format %{ "MOVD $dst,$src\n\t"
12648 "PSHUFD $dst,$dst,0x00\t! replicate2I" %} 11996 "PSHUFD $dst,$dst,0x00\t! replicate2I" %}
12649 ins_encode( mov_i2x(dst, src), pshufd(dst, dst, 0x00)); 11997 ins_encode %{
11998 __ movdl($dst$$XMMRegister, $src$$Register);
11999 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
12000 %}
12650 ins_pipe( fpu_reg_reg ); 12001 ins_pipe( fpu_reg_reg );
12651 %} 12002 %}
12652 12003
12653 // Replicate scalar zero to packed integer (2 byte) values in xmm 12004 // Replicate scalar zero to packed integer (2 byte) values in xmm
12654 instruct Repl2I_immI0(regXD dst, immI0 zero) %{ 12005 instruct Repl2I_immI0(regD dst, immI0 zero) %{
12655 predicate(UseSSE>=2); 12006 predicate(UseSSE>=2);
12656 match(Set dst (Replicate2I zero)); 12007 match(Set dst (Replicate2I zero));
12657 format %{ "PXOR $dst,$dst\t! replicate2I" %} 12008 format %{ "PXOR $dst,$dst\t! replicate2I" %}
12658 ins_encode( pxor(dst, dst)); 12009 ins_encode %{
12010 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
12011 %}
12659 ins_pipe( fpu_reg_reg ); 12012 ins_pipe( fpu_reg_reg );
12660 %} 12013 %}
12661 12014
12662 // Replicate scalar to packed single precision floating point values in xmm 12015 // Replicate scalar to packed single precision floating point values in xmm
12663 instruct Repl2F_reg(regXD dst, regXD src) %{ 12016 instruct Repl2F_reg(regD dst, regD src) %{
12664 predicate(UseSSE>=2); 12017 predicate(UseSSE>=2);
12665 match(Set dst (Replicate2F src)); 12018 match(Set dst (Replicate2F src));
12666 format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %} 12019 format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
12667 ins_encode( pshufd(dst, src, 0xe0)); 12020 ins_encode %{
12021 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0xe0);
12022 %}
12668 ins_pipe( fpu_reg_reg ); 12023 ins_pipe( fpu_reg_reg );
12669 %} 12024 %}
12670 12025
12671 // Replicate scalar to packed single precision floating point values in xmm 12026 // Replicate scalar to packed single precision floating point values in xmm
12672 instruct Repl2F_regX(regXD dst, regX src) %{ 12027 instruct Repl2F_regF(regD dst, regF src) %{
12673 predicate(UseSSE>=2); 12028 predicate(UseSSE>=2);
12674 match(Set dst (Replicate2F src)); 12029 match(Set dst (Replicate2F src));
12675 format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %} 12030 format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
12676 ins_encode( pshufd(dst, src, 0xe0)); 12031 ins_encode %{
12032 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0xe0);
12033 %}
12677 ins_pipe( fpu_reg_reg ); 12034 ins_pipe( fpu_reg_reg );
12678 %} 12035 %}
12679 12036
12680 // Replicate scalar to packed single precision floating point values in xmm 12037 // Replicate scalar to packed single precision floating point values in xmm
12681 instruct Repl2F_immXF0(regXD dst, immXF0 zero) %{ 12038 instruct Repl2F_immF0(regD dst, immF0 zero) %{
12682 predicate(UseSSE>=2); 12039 predicate(UseSSE>=2);
12683 match(Set dst (Replicate2F zero)); 12040 match(Set dst (Replicate2F zero));
12684 format %{ "PXOR $dst,$dst\t! replicate2F" %} 12041 format %{ "PXOR $dst,$dst\t! replicate2F" %}
12685 ins_encode( pxor(dst, dst)); 12042 ins_encode %{
12043 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
12044 %}
12686 ins_pipe( fpu_reg_reg ); 12045 ins_pipe( fpu_reg_reg );
12687 %} 12046 %}
12688 12047
12689 // ======================================================================= 12048 // =======================================================================
12690 // fast clearing of an array 12049 // fast clearing of an array
12700 Opcode(0xF3), Opcode(0xAB) ); 12059 Opcode(0xF3), Opcode(0xAB) );
12701 ins_pipe( pipe_slow ); 12060 ins_pipe( pipe_slow );
12702 %} 12061 %}
12703 12062
12704 instruct string_compare(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 12063 instruct string_compare(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
12705 eAXRegI result, regXD tmp1, eFlagsReg cr) %{ 12064 eAXRegI result, regD tmp1, eFlagsReg cr) %{
12706 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 12065 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
12707 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 12066 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
12708 12067
12709 format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 12068 format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
12710 ins_encode %{ 12069 ins_encode %{
12715 ins_pipe( pipe_slow ); 12074 ins_pipe( pipe_slow );
12716 %} 12075 %}
12717 12076
12718 // fast string equals 12077 // fast string equals
12719 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, 12078 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
12720 regXD tmp1, regXD tmp2, eBXRegI tmp3, eFlagsReg cr) %{ 12079 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
12721 match(Set result (StrEquals (Binary str1 str2) cnt)); 12080 match(Set result (StrEquals (Binary str1 str2) cnt));
12722 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); 12081 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
12723 12082
12724 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} 12083 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
12725 ins_encode %{ 12084 ins_encode %{
12730 ins_pipe( pipe_slow ); 12089 ins_pipe( pipe_slow );
12731 %} 12090 %}
12732 12091
12733 // fast search of substring with known size. 12092 // fast search of substring with known size.
12734 instruct string_indexof_con(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 12093 instruct string_indexof_con(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
12735 eBXRegI result, regXD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 12094 eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
12736 predicate(UseSSE42Intrinsics); 12095 predicate(UseSSE42Intrinsics);
12737 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 12096 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
12738 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 12097 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
12739 12098
12740 format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} 12099 format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %}
12757 %} 12116 %}
12758 ins_pipe( pipe_slow ); 12117 ins_pipe( pipe_slow );
12759 %} 12118 %}
12760 12119
12761 instruct string_indexof(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 12120 instruct string_indexof(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
12762 eBXRegI result, regXD vec, eCXRegI tmp, eFlagsReg cr) %{ 12121 eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
12763 predicate(UseSSE42Intrinsics); 12122 predicate(UseSSE42Intrinsics);
12764 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 12123 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
12765 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 12124 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
12766 12125
12767 format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 12126 format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
12774 ins_pipe( pipe_slow ); 12133 ins_pipe( pipe_slow );
12775 %} 12134 %}
12776 12135
12777 // fast array equals 12136 // fast array equals
12778 instruct array_equals(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 12137 instruct array_equals(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12779 regXD tmp1, regXD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 12138 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12780 %{ 12139 %{
12781 match(Set result (AryEq ary1 ary2)); 12140 match(Set result (AryEq ary1 ary2));
12782 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 12141 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12783 //ins_cost(300); 12142 //ins_cost(300);
12784 12143
13600 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12959 ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13601 ins_pipe( pipe_cmov_reg ); 12960 ins_pipe( pipe_cmov_reg );
13602 %} 12961 %}
13603 12962
13604 // Compare 2 longs and CMOVE doubles 12963 // Compare 2 longs and CMOVE doubles
13605 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{ 12964 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
13606 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12965 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13607 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12966 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13608 ins_cost(200); 12967 ins_cost(200);
13609 expand %{ 12968 expand %{
13610 fcmovD_regS(cmp,flags,dst,src); 12969 fcmovDPR_regS(cmp,flags,dst,src);
13611 %} 12970 %}
13612 %} 12971 %}
13613 12972
13614 // Compare 2 longs and CMOVE doubles 12973 // Compare 2 longs and CMOVE doubles
13615 instruct cmovXDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regXD dst, regXD src) %{ 12974 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
13616 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12975 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13617 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12976 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13618 ins_cost(200); 12977 ins_cost(200);
13619 expand %{ 12978 expand %{
13620 fcmovXD_regS(cmp,flags,dst,src); 12979 fcmovD_regS(cmp,flags,dst,src);
13621 %} 12980 %}
13622 %} 12981 %}
13623 12982
13624 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{ 12983 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
13625 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12984 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13626 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12985 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13627 ins_cost(200); 12986 ins_cost(200);
13628 expand %{ 12987 expand %{
13629 fcmovF_regS(cmp,flags,dst,src); 12988 fcmovFPR_regS(cmp,flags,dst,src);
13630 %} 12989 %}
13631 %} 12990 %}
13632 12991
13633 instruct cmovXX_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regX dst, regX src) %{ 12992 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
13634 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12993 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13635 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12994 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13636 ins_cost(200); 12995 ins_cost(200);
13637 expand %{ 12996 expand %{
13638 fcmovX_regS(cmp,flags,dst,src); 12997 fcmovF_regS(cmp,flags,dst,src);
13639 %} 12998 %}
13640 %} 12999 %}
13641 13000
13642 //====== 13001 //======
13643 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 13002 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares.
13728 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13087 ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13729 ins_pipe( pipe_cmov_reg ); 13088 ins_pipe( pipe_cmov_reg );
13730 %} 13089 %}
13731 13090
13732 // Compare 2 longs and CMOVE doubles 13091 // Compare 2 longs and CMOVE doubles
13733 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{ 13092 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
13734 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13093 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13735 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13094 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13736 ins_cost(200); 13095 ins_cost(200);
13737 expand %{ 13096 expand %{
13738 fcmovD_regS(cmp,flags,dst,src); 13097 fcmovDPR_regS(cmp,flags,dst,src);
13739 %} 13098 %}
13740 %} 13099 %}
13741 13100
13742 // Compare 2 longs and CMOVE doubles 13101 // Compare 2 longs and CMOVE doubles
13743 instruct cmovXDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regXD dst, regXD src) %{ 13102 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
13744 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13103 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13745 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13104 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13746 ins_cost(200); 13105 ins_cost(200);
13747 expand %{ 13106 expand %{
13748 fcmovXD_regS(cmp,flags,dst,src); 13107 fcmovD_regS(cmp,flags,dst,src);
13749 %} 13108 %}
13750 %} 13109 %}
13751 13110
13752 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{ 13111 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
13753 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13112 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13754 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13113 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13755 ins_cost(200); 13114 ins_cost(200);
13756 expand %{ 13115 expand %{
13757 fcmovF_regS(cmp,flags,dst,src); 13116 fcmovFPR_regS(cmp,flags,dst,src);
13758 %} 13117 %}
13759 %} 13118 %}
13760 13119
13761 instruct cmovXX_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regX dst, regX src) %{ 13120 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
13762 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13121 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13763 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13122 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13764 ins_cost(200); 13123 ins_cost(200);
13765 expand %{ 13124 expand %{
13766 fcmovX_regS(cmp,flags,dst,src); 13125 fcmovF_regS(cmp,flags,dst,src);
13767 %} 13126 %}
13768 %} 13127 %}
13769 13128
13770 //====== 13129 //======
13771 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 13130 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares.
13861 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13220 ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13862 ins_pipe( pipe_cmov_reg ); 13221 ins_pipe( pipe_cmov_reg );
13863 %} 13222 %}
13864 13223
13865 // Compare 2 longs and CMOVE doubles 13224 // Compare 2 longs and CMOVE doubles
13866 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{ 13225 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
13867 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13226 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13868 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13227 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13869 ins_cost(200); 13228 ins_cost(200);
13870 expand %{ 13229 expand %{
13871 fcmovD_regS(cmp,flags,dst,src); 13230 fcmovDPR_regS(cmp,flags,dst,src);
13872 %} 13231 %}
13873 %} 13232 %}
13874 13233
13875 // Compare 2 longs and CMOVE doubles 13234 // Compare 2 longs and CMOVE doubles
13876 instruct cmovXDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regXD dst, regXD src) %{ 13235 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
13877 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13236 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13878 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13237 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13879 ins_cost(200); 13238 ins_cost(200);
13880 expand %{ 13239 expand %{
13881 fcmovXD_regS(cmp,flags,dst,src); 13240 fcmovD_regS(cmp,flags,dst,src);
13882 %} 13241 %}
13883 %} 13242 %}
13884 13243
13885 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{ 13244 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
13886 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13245 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13887 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13246 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13888 ins_cost(200); 13247 ins_cost(200);
13889 expand %{ 13248 expand %{
13890 fcmovF_regS(cmp,flags,dst,src); 13249 fcmovFPR_regS(cmp,flags,dst,src);
13891 %} 13250 %}
13892 %} 13251 %}
13893 13252
13894 13253
13895 instruct cmovXX_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regX dst, regX src) %{ 13254 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
13896 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13255 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13897 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13256 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13898 ins_cost(200); 13257 ins_cost(200);
13899 expand %{ 13258 expand %{
13900 fcmovX_regS(cmp,flags,dst,src); 13259 fcmovF_regS(cmp,flags,dst,src);
13901 %} 13260 %}
13902 %} 13261 %}
13903 13262
13904 13263
13905 // ============================================================================ 13264 // ============================================================================