Mercurial > hg > truffle
comparison src/cpu/x86/vm/x86_32.ad @ 4768:8940fd98d540
Merge
author | kvn |
---|---|
date | Thu, 29 Dec 2011 11:37:50 -0800 |
parents | 1dc233a8c7fe |
children | e9a5e0a812c8 |
comparison
equal
deleted
inserted
replaced
4730:7faca6dfa2ed | 4768:8940fd98d540 |
---|---|
279 return 6; // fldcw | 279 return 6; // fldcw |
280 return 0; | 280 return 0; |
281 } | 281 } |
282 | 282 |
283 static int preserve_SP_size() { | 283 static int preserve_SP_size() { |
284 return LP64_ONLY(1 +) 2; // [rex,] op, rm(reg/reg) | 284 return 2; // op, rm(reg/reg) |
285 } | 285 } |
286 | 286 |
287 // !!!!! Special hack to get all type of calls to specify the byte offset | 287 // !!!!! Special hack to get all type of calls to specify the byte offset |
288 // from the start of the call to the point where the return address | 288 // from the start of the call to the point where the return address |
289 // will point. | 289 // will point. |
493 emit_opcode( cbuf, 0x8B ); | 493 emit_opcode( cbuf, 0x8B ); |
494 emit_rm(cbuf, 0x3, dst_encoding, src_encoding ); | 494 emit_rm(cbuf, 0x3, dst_encoding, src_encoding ); |
495 } | 495 } |
496 } | 496 } |
497 | 497 |
498 void encode_CopyXD( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) { | 498 void emit_cmpfp_fixup(MacroAssembler& _masm) { |
499 if( dst_encoding == src_encoding ) { | 499 Label exit; |
500 // reg-reg copy, use an empty encoding | 500 __ jccb(Assembler::noParity, exit); |
501 } else { | 501 __ pushf(); |
502 MacroAssembler _masm(&cbuf); | 502 // |
503 | 503 // comiss/ucomiss instructions set ZF,PF,CF flags and |
504 __ movdqa(as_XMMRegister(dst_encoding), as_XMMRegister(src_encoding)); | 504 // zero OF,AF,SF for NaN values. |
505 } | 505 // Fixup flags by zeroing ZF,PF so that compare of NaN |
506 // values returns 'less than' result (CF is set). | |
507 // Leave the rest of flags unchanged. | |
508 // | |
509 // 7 6 5 4 3 2 1 0 | |
510 // |S|Z|r|A|r|P|r|C| (r - reserved bit) | |
511 // 0 0 1 0 1 0 1 1 (0x2B) | |
512 // | |
513 __ andl(Address(rsp, 0), 0xffffff2b); | |
514 __ popf(); | |
515 __ bind(exit); | |
516 } | |
517 | |
518 void emit_cmpfp3(MacroAssembler& _masm, Register dst) { | |
519 Label done; | |
520 __ movl(dst, -1); | |
521 __ jcc(Assembler::parity, done); | |
522 __ jcc(Assembler::below, done); | |
523 __ setb(Assembler::notEqual, dst); | |
524 __ movzbl(dst, dst); | |
525 __ bind(done); | |
506 } | 526 } |
507 | 527 |
508 | 528 |
509 //============================================================================= | 529 //============================================================================= |
510 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty; | 530 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty; |
790 } | 810 } |
791 | 811 |
792 // Helper for XMM registers. Extra opcode bits, limited syntax. | 812 // Helper for XMM registers. Extra opcode bits, limited syntax. |
793 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load, | 813 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load, |
794 int offset, int reg_lo, int reg_hi, int size, outputStream* st ) { | 814 int offset, int reg_lo, int reg_hi, int size, outputStream* st ) { |
795 if( cbuf ) { | 815 if (cbuf) { |
796 if( reg_lo+1 == reg_hi ) { // double move? | 816 MacroAssembler _masm(cbuf); |
797 if( is_load && !UseXmmLoadAndClearUpper ) | 817 if (reg_lo+1 == reg_hi) { // double move? |
798 emit_opcode(*cbuf, 0x66 ); // use 'movlpd' for load | 818 if (is_load) { |
799 else | 819 __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); |
800 emit_opcode(*cbuf, 0xF2 ); // use 'movsd' otherwise | 820 } else { |
821 __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); | |
822 } | |
801 } else { | 823 } else { |
802 emit_opcode(*cbuf, 0xF3 ); | 824 if (is_load) { |
825 __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); | |
826 } else { | |
827 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); | |
828 } | |
803 } | 829 } |
804 emit_opcode(*cbuf, 0x0F ); | |
805 if( reg_lo+1 == reg_hi && is_load && !UseXmmLoadAndClearUpper ) | |
806 emit_opcode(*cbuf, 0x12 ); // use 'movlpd' for load | |
807 else | |
808 emit_opcode(*cbuf, is_load ? 0x10 : 0x11 ); | |
809 encode_RegMem(*cbuf, Matcher::_regEncode[reg_lo], ESP_enc, 0x4, 0, offset, false); | |
810 #ifndef PRODUCT | 830 #ifndef PRODUCT |
811 } else if( !do_size ) { | 831 } else if (!do_size) { |
812 if( size != 0 ) st->print("\n\t"); | 832 if (size != 0) st->print("\n\t"); |
813 if( reg_lo+1 == reg_hi ) { // double move? | 833 if (reg_lo+1 == reg_hi) { // double move? |
814 if( is_load ) st->print("%s %s,[ESP + #%d]", | 834 if (is_load) st->print("%s %s,[ESP + #%d]", |
815 UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD", | 835 UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD", |
816 Matcher::regName[reg_lo], offset); | 836 Matcher::regName[reg_lo], offset); |
817 else st->print("MOVSD [ESP + #%d],%s", | 837 else st->print("MOVSD [ESP + #%d],%s", |
818 offset, Matcher::regName[reg_lo]); | 838 offset, Matcher::regName[reg_lo]); |
819 } else { | 839 } else { |
820 if( is_load ) st->print("MOVSS %s,[ESP + #%d]", | 840 if (is_load) st->print("MOVSS %s,[ESP + #%d]", |
821 Matcher::regName[reg_lo], offset); | 841 Matcher::regName[reg_lo], offset); |
822 else st->print("MOVSS [ESP + #%d],%s", | 842 else st->print("MOVSS [ESP + #%d],%s", |
823 offset, Matcher::regName[reg_lo]); | 843 offset, Matcher::regName[reg_lo]); |
824 } | 844 } |
825 #endif | 845 #endif |
826 } | 846 } |
827 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); | 847 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); |
848 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes. | |
828 return size+5+offset_size; | 849 return size+5+offset_size; |
829 } | 850 } |
830 | 851 |
831 | 852 |
832 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, | 853 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, |
833 int src_hi, int dst_hi, int size, outputStream* st ) { | 854 int src_hi, int dst_hi, int size, outputStream* st ) { |
834 if( UseXmmRegToRegMoveAll ) {//Use movaps,movapd to move between xmm registers | 855 if (cbuf) { |
835 if( cbuf ) { | 856 MacroAssembler _masm(cbuf); |
836 if( (src_lo+1 == src_hi && dst_lo+1 == dst_hi) ) { | 857 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? |
837 emit_opcode(*cbuf, 0x66 ); | 858 __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]), |
838 } | 859 as_XMMRegister(Matcher::_regEncode[src_lo])); |
839 emit_opcode(*cbuf, 0x0F ); | 860 } else { |
840 emit_opcode(*cbuf, 0x28 ); | 861 __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]), |
841 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst_lo], Matcher::_regEncode[src_lo] ); | 862 as_XMMRegister(Matcher::_regEncode[src_lo])); |
863 } | |
842 #ifndef PRODUCT | 864 #ifndef PRODUCT |
843 } else if( !do_size ) { | 865 } else if (!do_size) { |
844 if( size != 0 ) st->print("\n\t"); | 866 if (size != 0) st->print("\n\t"); |
845 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move? | 867 if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers |
868 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? | |
846 st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); | 869 st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); |
847 } else { | 870 } else { |
848 st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); | 871 st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); |
849 } | 872 } |
850 #endif | 873 } else { |
851 } | |
852 return size + ((src_lo+1 == src_hi && dst_lo+1 == dst_hi) ? 4 : 3); | |
853 } else { | |
854 if( cbuf ) { | |
855 emit_opcode(*cbuf, (src_lo+1 == src_hi && dst_lo+1 == dst_hi) ? 0xF2 : 0xF3 ); | |
856 emit_opcode(*cbuf, 0x0F ); | |
857 emit_opcode(*cbuf, 0x10 ); | |
858 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst_lo], Matcher::_regEncode[src_lo] ); | |
859 #ifndef PRODUCT | |
860 } else if( !do_size ) { | |
861 if( size != 0 ) st->print("\n\t"); | |
862 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move? | 874 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move? |
863 st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); | 875 st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); |
864 } else { | 876 } else { |
865 st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); | 877 st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); |
866 } | 878 } |
879 } | |
867 #endif | 880 #endif |
868 } | |
869 return size+4; | |
870 } | 881 } |
882 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes. | |
883 // Only MOVAPS SSE prefix uses 1 byte. | |
884 int sz = 4; | |
885 if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) && | |
886 UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3; | |
887 return size + sz; | |
871 } | 888 } |
872 | 889 |
873 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, | 890 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, |
874 int src_hi, int dst_hi, int size, outputStream* st ) { | 891 int src_hi, int dst_hi, int size, outputStream* st ) { |
875 // 32-bit | 892 // 32-bit |
876 if (cbuf) { | 893 if (cbuf) { |
877 emit_opcode(*cbuf, 0x66); | 894 MacroAssembler _masm(cbuf); |
878 emit_opcode(*cbuf, 0x0F); | 895 __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]), |
879 emit_opcode(*cbuf, 0x6E); | 896 as_Register(Matcher::_regEncode[src_lo])); |
880 emit_rm(*cbuf, 0x3, Matcher::_regEncode[dst_lo] & 7, Matcher::_regEncode[src_lo] & 7); | |
881 #ifndef PRODUCT | 897 #ifndef PRODUCT |
882 } else if (!do_size) { | 898 } else if (!do_size) { |
883 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); | 899 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); |
884 #endif | 900 #endif |
885 } | 901 } |
889 | 905 |
890 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, | 906 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, |
891 int src_hi, int dst_hi, int size, outputStream* st ) { | 907 int src_hi, int dst_hi, int size, outputStream* st ) { |
892 // 32-bit | 908 // 32-bit |
893 if (cbuf) { | 909 if (cbuf) { |
894 emit_opcode(*cbuf, 0x66); | 910 MacroAssembler _masm(cbuf); |
895 emit_opcode(*cbuf, 0x0F); | 911 __ movdl(as_Register(Matcher::_regEncode[dst_lo]), |
896 emit_opcode(*cbuf, 0x7E); | 912 as_XMMRegister(Matcher::_regEncode[src_lo])); |
897 emit_rm(*cbuf, 0x3, Matcher::_regEncode[src_lo] & 7, Matcher::_regEncode[dst_lo] & 7); | |
898 #ifndef PRODUCT | 913 #ifndef PRODUCT |
899 } else if (!do_size) { | 914 } else if (!do_size) { |
900 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); | 915 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); |
901 #endif | 916 #endif |
902 } | 917 } |
1758 enc_class enc_cmov(cmpOp cop ) %{ // CMOV | 1773 enc_class enc_cmov(cmpOp cop ) %{ // CMOV |
1759 $$$emit8$primary; | 1774 $$$emit8$primary; |
1760 emit_cc(cbuf, $secondary, $cop$$cmpcode); | 1775 emit_cc(cbuf, $secondary, $cop$$cmpcode); |
1761 %} | 1776 %} |
1762 | 1777 |
1763 enc_class enc_cmov_d(cmpOp cop, regD src ) %{ // CMOV | 1778 enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV |
1764 int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1); | 1779 int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1); |
1765 emit_d8(cbuf, op >> 8 ); | 1780 emit_d8(cbuf, op >> 8 ); |
1766 emit_d8(cbuf, op & 255); | 1781 emit_d8(cbuf, op & 255); |
1767 %} | 1782 %} |
1768 | 1783 |
1927 cbuf.set_insts_mark(); | 1942 cbuf.set_insts_mark(); |
1928 $$$emit8$primary; | 1943 $$$emit8$primary; |
1929 emit_rm(cbuf, 0x01, $secondary, EAX_enc ); // R/M byte | 1944 emit_rm(cbuf, 0x01, $secondary, EAX_enc ); // R/M byte |
1930 emit_d8(cbuf, disp); // Displacement | 1945 emit_d8(cbuf, disp); // Displacement |
1931 | 1946 |
1932 %} | |
1933 | |
1934 enc_class Xor_Reg (eRegI dst) %{ | |
1935 emit_opcode(cbuf, 0x33); | |
1936 emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg); | |
1937 %} | 1947 %} |
1938 | 1948 |
1939 // Following encoding is no longer used, but may be restored if calling | 1949 // Following encoding is no longer used, but may be restored if calling |
1940 // convention changes significantly. | 1950 // convention changes significantly. |
1941 // Became: Xor_Reg(EBP), Java_To_Runtime( labl ) | 1951 // Became: Xor_Reg(EBP), Java_To_Runtime( labl ) |
2011 emit_d32(cbuf, src_con); | 2021 emit_d32(cbuf, src_con); |
2012 } | 2022 } |
2013 %} | 2023 %} |
2014 | 2024 |
2015 | 2025 |
2016 enc_class MovI2X_reg(regX dst, eRegI src) %{ | |
2017 emit_opcode(cbuf, 0x66 ); // MOVD dst,src | |
2018 emit_opcode(cbuf, 0x0F ); | |
2019 emit_opcode(cbuf, 0x6E ); | |
2020 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); | |
2021 %} | |
2022 | |
2023 enc_class MovX2I_reg(eRegI dst, regX src) %{ | |
2024 emit_opcode(cbuf, 0x66 ); // MOVD dst,src | |
2025 emit_opcode(cbuf, 0x0F ); | |
2026 emit_opcode(cbuf, 0x7E ); | |
2027 emit_rm(cbuf, 0x3, $src$$reg, $dst$$reg); | |
2028 %} | |
2029 | |
2030 enc_class MovL2XD_reg(regXD dst, eRegL src, regXD tmp) %{ | |
2031 { // MOVD $dst,$src.lo | |
2032 emit_opcode(cbuf,0x66); | |
2033 emit_opcode(cbuf,0x0F); | |
2034 emit_opcode(cbuf,0x6E); | |
2035 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); | |
2036 } | |
2037 { // MOVD $tmp,$src.hi | |
2038 emit_opcode(cbuf,0x66); | |
2039 emit_opcode(cbuf,0x0F); | |
2040 emit_opcode(cbuf,0x6E); | |
2041 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg)); | |
2042 } | |
2043 { // PUNPCKLDQ $dst,$tmp | |
2044 emit_opcode(cbuf,0x66); | |
2045 emit_opcode(cbuf,0x0F); | |
2046 emit_opcode(cbuf,0x62); | |
2047 emit_rm(cbuf, 0x3, $dst$$reg, $tmp$$reg); | |
2048 } | |
2049 %} | |
2050 | |
2051 enc_class MovXD2L_reg(eRegL dst, regXD src, regXD tmp) %{ | |
2052 { // MOVD $dst.lo,$src | |
2053 emit_opcode(cbuf,0x66); | |
2054 emit_opcode(cbuf,0x0F); | |
2055 emit_opcode(cbuf,0x7E); | |
2056 emit_rm(cbuf, 0x3, $src$$reg, $dst$$reg); | |
2057 } | |
2058 { // PSHUFLW $tmp,$src,0x4E (01001110b) | |
2059 emit_opcode(cbuf,0xF2); | |
2060 emit_opcode(cbuf,0x0F); | |
2061 emit_opcode(cbuf,0x70); | |
2062 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg); | |
2063 emit_d8(cbuf, 0x4E); | |
2064 } | |
2065 { // MOVD $dst.hi,$tmp | |
2066 emit_opcode(cbuf,0x66); | |
2067 emit_opcode(cbuf,0x0F); | |
2068 emit_opcode(cbuf,0x7E); | |
2069 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg)); | |
2070 } | |
2071 %} | |
2072 | |
2073 | |
2074 // Encode a reg-reg copy. If it is useless, then empty encoding. | 2026 // Encode a reg-reg copy. If it is useless, then empty encoding. |
2075 enc_class enc_Copy( eRegI dst, eRegI src ) %{ | 2027 enc_class enc_Copy( eRegI dst, eRegI src ) %{ |
2076 encode_Copy( cbuf, $dst$$reg, $src$$reg ); | 2028 encode_Copy( cbuf, $dst$$reg, $src$$reg ); |
2077 %} | 2029 %} |
2078 | 2030 |
2079 enc_class enc_CopyL_Lo( eRegI dst, eRegL src ) %{ | 2031 enc_class enc_CopyL_Lo( eRegI dst, eRegL src ) %{ |
2080 encode_Copy( cbuf, $dst$$reg, $src$$reg ); | 2032 encode_Copy( cbuf, $dst$$reg, $src$$reg ); |
2081 %} | 2033 %} |
2082 | 2034 |
2083 // Encode xmm reg-reg copy. If it is useless, then empty encoding. | |
2084 enc_class enc_CopyXD( RegXD dst, RegXD src ) %{ | |
2085 encode_CopyXD( cbuf, $dst$$reg, $src$$reg ); | |
2086 %} | |
2087 | |
2088 enc_class RegReg (eRegI dst, eRegI src) %{ // RegReg(Many) | 2035 enc_class RegReg (eRegI dst, eRegI src) %{ // RegReg(Many) |
2089 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); | 2036 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); |
2090 %} | 2037 %} |
2091 | 2038 |
2092 enc_class RegReg_Lo(eRegL dst, eRegL src) %{ // RegReg(Many) | 2039 enc_class RegReg_Lo(eRegL dst, eRegL src) %{ // RegReg(Many) |
2114 enc_class Con32 (immI src) %{ // Con32(storeImmI) | 2061 enc_class Con32 (immI src) %{ // Con32(storeImmI) |
2115 // Output immediate | 2062 // Output immediate |
2116 $$$emit32$src$$constant; | 2063 $$$emit32$src$$constant; |
2117 %} | 2064 %} |
2118 | 2065 |
2119 enc_class Con32F_as_bits(immF src) %{ // storeF_imm | 2066 enc_class Con32FPR_as_bits(immFPR src) %{ // storeF_imm |
2120 // Output Float immediate bits | 2067 // Output Float immediate bits |
2121 jfloat jf = $src$$constant; | 2068 jfloat jf = $src$$constant; |
2122 int jf_as_bits = jint_cast( jf ); | 2069 int jf_as_bits = jint_cast( jf ); |
2123 emit_d32(cbuf, jf_as_bits); | 2070 emit_d32(cbuf, jf_as_bits); |
2124 %} | 2071 %} |
2125 | 2072 |
2126 enc_class Con32XF_as_bits(immXF src) %{ // storeX_imm | 2073 enc_class Con32F_as_bits(immF src) %{ // storeX_imm |
2127 // Output Float immediate bits | 2074 // Output Float immediate bits |
2128 jfloat jf = $src$$constant; | 2075 jfloat jf = $src$$constant; |
2129 int jf_as_bits = jint_cast( jf ); | 2076 int jf_as_bits = jint_cast( jf ); |
2130 emit_d32(cbuf, jf_as_bits); | 2077 emit_d32(cbuf, jf_as_bits); |
2131 %} | 2078 %} |
2334 // move dst,src | 2281 // move dst,src |
2335 emit_opcode(cbuf,0x8B); | 2282 emit_opcode(cbuf,0x8B); |
2336 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); | 2283 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); |
2337 %} | 2284 %} |
2338 | 2285 |
2339 enc_class enc_FP_store(memory mem, regD src) %{ | 2286 enc_class enc_FPR_store(memory mem, regDPR src) %{ |
2340 // If src is FPR1, we can just FST to store it. | 2287 // If src is FPR1, we can just FST to store it. |
2341 // Else we need to FLD it to FPR1, then FSTP to store/pop it. | 2288 // Else we need to FLD it to FPR1, then FSTP to store/pop it. |
2342 int reg_encoding = 0x2; // Just store | 2289 int reg_encoding = 0x2; // Just store |
2343 int base = $mem$$base; | 2290 int base = $mem$$base; |
2344 int index = $mem$$index; | 2291 int index = $mem$$index; |
2483 %} | 2430 %} |
2484 | 2431 |
2485 | 2432 |
2486 // ----------------- Encodings for floating point unit ----------------- | 2433 // ----------------- Encodings for floating point unit ----------------- |
2487 // May leave result in FPU-TOS or FPU reg depending on opcodes | 2434 // May leave result in FPU-TOS or FPU reg depending on opcodes |
2488 enc_class OpcReg_F (regF src) %{ // FMUL, FDIV | 2435 enc_class OpcReg_FPR(regFPR src) %{ // FMUL, FDIV |
2489 $$$emit8$primary; | 2436 $$$emit8$primary; |
2490 emit_rm(cbuf, 0x3, $secondary, $src$$reg ); | 2437 emit_rm(cbuf, 0x3, $secondary, $src$$reg ); |
2491 %} | 2438 %} |
2492 | 2439 |
2493 // Pop argument in FPR0 with FSTP ST(0) | 2440 // Pop argument in FPR0 with FSTP ST(0) |
2495 emit_opcode( cbuf, 0xDD ); | 2442 emit_opcode( cbuf, 0xDD ); |
2496 emit_d8( cbuf, 0xD8 ); | 2443 emit_d8( cbuf, 0xD8 ); |
2497 %} | 2444 %} |
2498 | 2445 |
2499 // !!!!! equivalent to Pop_Reg_F | 2446 // !!!!! equivalent to Pop_Reg_F |
2500 enc_class Pop_Reg_D( regD dst ) %{ | 2447 enc_class Pop_Reg_DPR( regDPR dst ) %{ |
2501 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) | 2448 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) |
2502 emit_d8( cbuf, 0xD8+$dst$$reg ); | 2449 emit_d8( cbuf, 0xD8+$dst$$reg ); |
2503 %} | 2450 %} |
2504 | 2451 |
2505 enc_class Push_Reg_D( regD dst ) %{ | 2452 enc_class Push_Reg_DPR( regDPR dst ) %{ |
2506 emit_opcode( cbuf, 0xD9 ); | 2453 emit_opcode( cbuf, 0xD9 ); |
2507 emit_d8( cbuf, 0xC0-1+$dst$$reg ); // FLD ST(i-1) | 2454 emit_d8( cbuf, 0xC0-1+$dst$$reg ); // FLD ST(i-1) |
2508 %} | 2455 %} |
2509 | 2456 |
2510 enc_class strictfp_bias1( regD dst ) %{ | 2457 enc_class strictfp_bias1( regDPR dst ) %{ |
2511 emit_opcode( cbuf, 0xDB ); // FLD m80real | 2458 emit_opcode( cbuf, 0xDB ); // FLD m80real |
2512 emit_opcode( cbuf, 0x2D ); | 2459 emit_opcode( cbuf, 0x2D ); |
2513 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() ); | 2460 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() ); |
2514 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 | 2461 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 |
2515 emit_opcode( cbuf, 0xC8+$dst$$reg ); | 2462 emit_opcode( cbuf, 0xC8+$dst$$reg ); |
2516 %} | 2463 %} |
2517 | 2464 |
2518 enc_class strictfp_bias2( regD dst ) %{ | 2465 enc_class strictfp_bias2( regDPR dst ) %{ |
2519 emit_opcode( cbuf, 0xDB ); // FLD m80real | 2466 emit_opcode( cbuf, 0xDB ); // FLD m80real |
2520 emit_opcode( cbuf, 0x2D ); | 2467 emit_opcode( cbuf, 0x2D ); |
2521 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() ); | 2468 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() ); |
2522 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 | 2469 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 |
2523 emit_opcode( cbuf, 0xC8+$dst$$reg ); | 2470 emit_opcode( cbuf, 0xC8+$dst$$reg ); |
2539 // Push the integer in stackSlot 'src' onto FP-stack | 2486 // Push the integer in stackSlot 'src' onto FP-stack |
2540 enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src] | 2487 enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src] |
2541 store_to_stackslot( cbuf, $primary, $secondary, $src$$disp ); | 2488 store_to_stackslot( cbuf, $primary, $secondary, $src$$disp ); |
2542 %} | 2489 %} |
2543 | 2490 |
2544 // Push the float in stackSlot 'src' onto FP-stack | |
2545 enc_class Push_Mem_F( memory src ) %{ // FLD_S [ESP+src] | |
2546 store_to_stackslot( cbuf, 0xD9, 0x00, $src$$disp ); | |
2547 %} | |
2548 | |
2549 // Push the double in stackSlot 'src' onto FP-stack | |
2550 enc_class Push_Mem_D( memory src ) %{ // FLD_D [ESP+src] | |
2551 store_to_stackslot( cbuf, 0xDD, 0x00, $src$$disp ); | |
2552 %} | |
2553 | |
2554 // Push FPU's TOS float to a stack-slot, and pop FPU-stack | 2491 // Push FPU's TOS float to a stack-slot, and pop FPU-stack |
2555 enc_class Pop_Mem_F( stackSlotF dst ) %{ // FSTP_S [ESP+dst] | 2492 enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst] |
2556 store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp ); | 2493 store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp ); |
2557 %} | 2494 %} |
2558 | 2495 |
2559 // Same as Pop_Mem_F except for opcode | 2496 // Same as Pop_Mem_F except for opcode |
2560 // Push FPU's TOS double to a stack-slot, and pop FPU-stack | 2497 // Push FPU's TOS double to a stack-slot, and pop FPU-stack |
2561 enc_class Pop_Mem_D( stackSlotD dst ) %{ // FSTP_D [ESP+dst] | 2498 enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst] |
2562 store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp ); | 2499 store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp ); |
2563 %} | 2500 %} |
2564 | 2501 |
2565 enc_class Pop_Reg_F( regF dst ) %{ | 2502 enc_class Pop_Reg_FPR( regFPR dst ) %{ |
2566 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) | 2503 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) |
2567 emit_d8( cbuf, 0xD8+$dst$$reg ); | 2504 emit_d8( cbuf, 0xD8+$dst$$reg ); |
2568 %} | 2505 %} |
2569 | 2506 |
2570 enc_class Push_Reg_F( regF dst ) %{ | 2507 enc_class Push_Reg_FPR( regFPR dst ) %{ |
2571 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) | 2508 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) |
2572 emit_d8( cbuf, 0xC0-1+$dst$$reg ); | 2509 emit_d8( cbuf, 0xC0-1+$dst$$reg ); |
2573 %} | 2510 %} |
2574 | 2511 |
2575 // Push FPU's float to a stack-slot, and pop FPU-stack | 2512 // Push FPU's float to a stack-slot, and pop FPU-stack |
2576 enc_class Pop_Mem_Reg_F( stackSlotF dst, regF src ) %{ | 2513 enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{ |
2577 int pop = 0x02; | 2514 int pop = 0x02; |
2578 if ($src$$reg != FPR1L_enc) { | 2515 if ($src$$reg != FPR1L_enc) { |
2579 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) | 2516 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) |
2580 emit_d8( cbuf, 0xC0-1+$src$$reg ); | 2517 emit_d8( cbuf, 0xC0-1+$src$$reg ); |
2581 pop = 0x03; | 2518 pop = 0x03; |
2582 } | 2519 } |
2583 store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S [ESP+dst] | 2520 store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S [ESP+dst] |
2584 %} | 2521 %} |
2585 | 2522 |
2586 // Push FPU's double to a stack-slot, and pop FPU-stack | 2523 // Push FPU's double to a stack-slot, and pop FPU-stack |
2587 enc_class Pop_Mem_Reg_D( stackSlotD dst, regD src ) %{ | 2524 enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{ |
2588 int pop = 0x02; | 2525 int pop = 0x02; |
2589 if ($src$$reg != FPR1L_enc) { | 2526 if ($src$$reg != FPR1L_enc) { |
2590 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) | 2527 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) |
2591 emit_d8( cbuf, 0xC0-1+$src$$reg ); | 2528 emit_d8( cbuf, 0xC0-1+$src$$reg ); |
2592 pop = 0x03; | 2529 pop = 0x03; |
2593 } | 2530 } |
2594 store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D [ESP+dst] | 2531 store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D [ESP+dst] |
2595 %} | 2532 %} |
2596 | 2533 |
2597 // Push FPU's double to a FPU-stack-slot, and pop FPU-stack | 2534 // Push FPU's double to a FPU-stack-slot, and pop FPU-stack |
2598 enc_class Pop_Reg_Reg_D( regD dst, regF src ) %{ | 2535 enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{ |
2599 int pop = 0xD0 - 1; // -1 since we skip FLD | 2536 int pop = 0xD0 - 1; // -1 since we skip FLD |
2600 if ($src$$reg != FPR1L_enc) { | 2537 if ($src$$reg != FPR1L_enc) { |
2601 emit_opcode( cbuf, 0xD9 ); // FLD ST(src-1) | 2538 emit_opcode( cbuf, 0xD9 ); // FLD ST(src-1) |
2602 emit_d8( cbuf, 0xC0-1+$src$$reg ); | 2539 emit_d8( cbuf, 0xC0-1+$src$$reg ); |
2603 pop = 0xD8; | 2540 pop = 0xD8; |
2605 emit_opcode( cbuf, 0xDD ); | 2542 emit_opcode( cbuf, 0xDD ); |
2606 emit_d8( cbuf, pop+$dst$$reg ); // FST<P> ST(i) | 2543 emit_d8( cbuf, pop+$dst$$reg ); // FST<P> ST(i) |
2607 %} | 2544 %} |
2608 | 2545 |
2609 | 2546 |
2610 enc_class Mul_Add_F( regF dst, regF src, regF src1, regF src2 ) %{ | 2547 enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{ |
2611 MacroAssembler masm(&cbuf); | |
2612 masm.fld_s( $src1$$reg-1); // nothing at TOS, load TOS from src1.reg | |
2613 masm.fmul( $src2$$reg+0); // value at TOS | |
2614 masm.fadd( $src$$reg+0); // value at TOS | |
2615 masm.fstp_d( $dst$$reg+0); // value at TOS, popped off after store | |
2616 %} | |
2617 | |
2618 | |
2619 enc_class Push_Reg_Mod_D( regD dst, regD src) %{ | |
2620 // load dst in FPR0 | 2548 // load dst in FPR0 |
2621 emit_opcode( cbuf, 0xD9 ); | 2549 emit_opcode( cbuf, 0xD9 ); |
2622 emit_d8( cbuf, 0xC0-1+$dst$$reg ); | 2550 emit_d8( cbuf, 0xC0-1+$dst$$reg ); |
2623 if ($src$$reg != FPR1L_enc) { | 2551 if ($src$$reg != FPR1L_enc) { |
2624 // fincstp | 2552 // fincstp |
2632 emit_opcode (cbuf, 0xD9); | 2560 emit_opcode (cbuf, 0xD9); |
2633 emit_opcode (cbuf, 0xF6); | 2561 emit_opcode (cbuf, 0xF6); |
2634 } | 2562 } |
2635 %} | 2563 %} |
2636 | 2564 |
2637 enc_class Push_ModD_encoding( regXD src0, regXD src1) %{ | 2565 enc_class Push_ModD_encoding(regD src0, regD src1) %{ |
2638 // Allocate a word | 2566 MacroAssembler _masm(&cbuf); |
2639 emit_opcode(cbuf,0x83); // SUB ESP,8 | 2567 __ subptr(rsp, 8); |
2640 emit_opcode(cbuf,0xEC); | 2568 __ movdbl(Address(rsp, 0), $src1$$XMMRegister); |
2641 emit_d8(cbuf,0x08); | 2569 __ fld_d(Address(rsp, 0)); |
2642 | 2570 __ movdbl(Address(rsp, 0), $src0$$XMMRegister); |
2643 emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src1 | 2571 __ fld_d(Address(rsp, 0)); |
2644 emit_opcode (cbuf, 0x0F ); | 2572 %} |
2645 emit_opcode (cbuf, 0x11 ); | 2573 |
2646 encode_RegMem(cbuf, $src1$$reg, ESP_enc, 0x4, 0, 0, false); | 2574 enc_class Push_ModF_encoding(regF src0, regF src1) %{ |
2647 | 2575 MacroAssembler _masm(&cbuf); |
2648 emit_opcode(cbuf,0xDD ); // FLD_D [ESP] | 2576 __ subptr(rsp, 4); |
2649 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); | 2577 __ movflt(Address(rsp, 0), $src1$$XMMRegister); |
2650 | 2578 __ fld_s(Address(rsp, 0)); |
2651 emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src0 | 2579 __ movflt(Address(rsp, 0), $src0$$XMMRegister); |
2652 emit_opcode (cbuf, 0x0F ); | 2580 __ fld_s(Address(rsp, 0)); |
2653 emit_opcode (cbuf, 0x11 ); | 2581 %} |
2654 encode_RegMem(cbuf, $src0$$reg, ESP_enc, 0x4, 0, 0, false); | 2582 |
2655 | 2583 enc_class Push_ResultD(regD dst) %{ |
2656 emit_opcode(cbuf,0xDD ); // FLD_D [ESP] | 2584 MacroAssembler _masm(&cbuf); |
2657 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); | 2585 __ fstp_d(Address(rsp, 0)); |
2658 | 2586 __ movdbl($dst$$XMMRegister, Address(rsp, 0)); |
2659 %} | 2587 __ addptr(rsp, 8); |
2660 | 2588 %} |
2661 enc_class Push_ModX_encoding( regX src0, regX src1) %{ | 2589 |
2662 // Allocate a word | 2590 enc_class Push_ResultF(regF dst, immI d8) %{ |
2663 emit_opcode(cbuf,0x83); // SUB ESP,4 | 2591 MacroAssembler _masm(&cbuf); |
2664 emit_opcode(cbuf,0xEC); | 2592 __ fstp_s(Address(rsp, 0)); |
2665 emit_d8(cbuf,0x04); | 2593 __ movflt($dst$$XMMRegister, Address(rsp, 0)); |
2666 | 2594 __ addptr(rsp, $d8$$constant); |
2667 emit_opcode (cbuf, 0xF3 ); // MOVSS [ESP], src1 | 2595 %} |
2668 emit_opcode (cbuf, 0x0F ); | 2596 |
2669 emit_opcode (cbuf, 0x11 ); | 2597 enc_class Push_SrcD(regD src) %{ |
2670 encode_RegMem(cbuf, $src1$$reg, ESP_enc, 0x4, 0, 0, false); | 2598 MacroAssembler _masm(&cbuf); |
2671 | 2599 __ subptr(rsp, 8); |
2672 emit_opcode(cbuf,0xD9 ); // FLD [ESP] | 2600 __ movdbl(Address(rsp, 0), $src$$XMMRegister); |
2673 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); | 2601 __ fld_d(Address(rsp, 0)); |
2674 | |
2675 emit_opcode (cbuf, 0xF3 ); // MOVSS [ESP], src0 | |
2676 emit_opcode (cbuf, 0x0F ); | |
2677 emit_opcode (cbuf, 0x11 ); | |
2678 encode_RegMem(cbuf, $src0$$reg, ESP_enc, 0x4, 0, 0, false); | |
2679 | |
2680 emit_opcode(cbuf,0xD9 ); // FLD [ESP] | |
2681 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); | |
2682 | |
2683 %} | |
2684 | |
2685 enc_class Push_ResultXD(regXD dst) %{ | |
2686 store_to_stackslot( cbuf, 0xDD, 0x03, 0 ); //FSTP [ESP] | |
2687 | |
2688 // UseXmmLoadAndClearUpper ? movsd dst,[esp] : movlpd dst,[esp] | |
2689 emit_opcode (cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66); | |
2690 emit_opcode (cbuf, 0x0F ); | |
2691 emit_opcode (cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12); | |
2692 encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false); | |
2693 | |
2694 emit_opcode(cbuf,0x83); // ADD ESP,8 | |
2695 emit_opcode(cbuf,0xC4); | |
2696 emit_d8(cbuf,0x08); | |
2697 %} | |
2698 | |
2699 enc_class Push_ResultX(regX dst, immI d8) %{ | |
2700 store_to_stackslot( cbuf, 0xD9, 0x03, 0 ); //FSTP_S [ESP] | |
2701 | |
2702 emit_opcode (cbuf, 0xF3 ); // MOVSS dst(xmm), [ESP] | |
2703 emit_opcode (cbuf, 0x0F ); | |
2704 emit_opcode (cbuf, 0x10 ); | |
2705 encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false); | |
2706 | |
2707 emit_opcode(cbuf,0x83); // ADD ESP,d8 (4 or 8) | |
2708 emit_opcode(cbuf,0xC4); | |
2709 emit_d8(cbuf,$d8$$constant); | |
2710 %} | |
2711 | |
2712 enc_class Push_SrcXD(regXD src) %{ | |
2713 // Allocate a word | |
2714 emit_opcode(cbuf,0x83); // SUB ESP,8 | |
2715 emit_opcode(cbuf,0xEC); | |
2716 emit_d8(cbuf,0x08); | |
2717 | |
2718 emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src | |
2719 emit_opcode (cbuf, 0x0F ); | |
2720 emit_opcode (cbuf, 0x11 ); | |
2721 encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false); | |
2722 | |
2723 emit_opcode(cbuf,0xDD ); // FLD_D [ESP] | |
2724 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); | |
2725 %} | 2602 %} |
2726 | 2603 |
2727 enc_class push_stack_temp_qword() %{ | 2604 enc_class push_stack_temp_qword() %{ |
2728 emit_opcode(cbuf,0x83); // SUB ESP,8 | 2605 MacroAssembler _masm(&cbuf); |
2729 emit_opcode(cbuf,0xEC); | 2606 __ subptr(rsp, 8); |
2730 emit_d8 (cbuf,0x08); | |
2731 %} | 2607 %} |
2732 | 2608 |
2733 enc_class pop_stack_temp_qword() %{ | 2609 enc_class pop_stack_temp_qword() %{ |
2734 emit_opcode(cbuf,0x83); // ADD ESP,8 | 2610 MacroAssembler _masm(&cbuf); |
2735 emit_opcode(cbuf,0xC4); | 2611 __ addptr(rsp, 8); |
2736 emit_d8 (cbuf,0x08); | 2612 %} |
2737 %} | 2613 |
2738 | 2614 enc_class push_xmm_to_fpr1(regD src) %{ |
2739 enc_class push_xmm_to_fpr1( regXD xmm_src ) %{ | 2615 MacroAssembler _masm(&cbuf); |
2740 emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], xmm_src | 2616 __ movdbl(Address(rsp, 0), $src$$XMMRegister); |
2741 emit_opcode (cbuf, 0x0F ); | 2617 __ fld_d(Address(rsp, 0)); |
2742 emit_opcode (cbuf, 0x11 ); | |
2743 encode_RegMem(cbuf, $xmm_src$$reg, ESP_enc, 0x4, 0, 0, false); | |
2744 | |
2745 emit_opcode(cbuf,0xDD ); // FLD_D [ESP] | |
2746 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); | |
2747 %} | 2618 %} |
2748 | 2619 |
2749 // Compute X^Y using Intel's fast hardware instructions, if possible. | 2620 // Compute X^Y using Intel's fast hardware instructions, if possible. |
2750 // Otherwise return a NaN. | 2621 // Otherwise return a NaN. |
2751 enc_class pow_exp_core_encoding %{ | 2622 enc_class pow_exp_core_encoding %{ |
2783 emit_d32(cbuf,0); | 2654 emit_d32(cbuf,0); |
2784 emit_opcode(cbuf,0xDC); // fmul dword st(0),[esp+0]; FPR1 = 2^int(Q)*2^frac(Q) = 2^Q | 2655 emit_opcode(cbuf,0xDC); // fmul dword st(0),[esp+0]; FPR1 = 2^int(Q)*2^frac(Q) = 2^Q |
2785 encode_RegMem(cbuf, 0x1, ESP_enc, 0x4, 0, 0, false); | 2656 encode_RegMem(cbuf, 0x1, ESP_enc, 0x4, 0, 0, false); |
2786 %} | 2657 %} |
2787 | 2658 |
2788 // enc_class Pop_Reg_Mod_D( regD dst, regD src) | 2659 enc_class Push_Result_Mod_DPR( regDPR src) %{ |
2789 // was replaced by Push_Result_Mod_D followed by Pop_Reg_X() or Pop_Mem_X() | |
2790 | |
2791 enc_class Push_Result_Mod_D( regD src) %{ | |
2792 if ($src$$reg != FPR1L_enc) { | 2660 if ($src$$reg != FPR1L_enc) { |
2793 // fincstp | 2661 // fincstp |
2794 emit_opcode (cbuf, 0xD9); | 2662 emit_opcode (cbuf, 0xD9); |
2795 emit_opcode (cbuf, 0xF7); | 2663 emit_opcode (cbuf, 0xF7); |
2796 // FXCH FPR1 with src | 2664 // FXCH FPR1 with src |
2815 // jnp ::skip | 2683 // jnp ::skip |
2816 emit_opcode( cbuf, 0x7B ); | 2684 emit_opcode( cbuf, 0x7B ); |
2817 emit_opcode( cbuf, 0x05 ); | 2685 emit_opcode( cbuf, 0x05 ); |
2818 %} | 2686 %} |
2819 | 2687 |
2820 enc_class emitModD() %{ | 2688 enc_class emitModDPR() %{ |
2821 // fprem must be iterative | 2689 // fprem must be iterative |
2822 // :: loop | 2690 // :: loop |
2823 // fprem | 2691 // fprem |
2824 emit_opcode( cbuf, 0xD9 ); | 2692 emit_opcode( cbuf, 0xD9 ); |
2825 emit_opcode( cbuf, 0xF8 ); | 2693 emit_opcode( cbuf, 0xF8 ); |
2920 emit_opcode( cbuf, 0xB8 + $dst$$reg); | 2788 emit_opcode( cbuf, 0xB8 + $dst$$reg); |
2921 emit_d32( cbuf, 1 ); | 2789 emit_d32( cbuf, 1 ); |
2922 %} | 2790 %} |
2923 | 2791 |
2924 | 2792 |
2925 // XMM version of CmpF_Result. Because the XMM compare | |
2926 // instructions set the EFLAGS directly. It becomes simpler than | |
2927 // the float version above. | |
2928 enc_class CmpX_Result(eRegI dst) %{ | |
2929 MacroAssembler _masm(&cbuf); | |
2930 Label nan, inc, done; | |
2931 | |
2932 __ jccb(Assembler::parity, nan); | |
2933 __ jccb(Assembler::equal, done); | |
2934 __ jccb(Assembler::above, inc); | |
2935 __ bind(nan); | |
2936 __ decrement(as_Register($dst$$reg)); // NO L qqq | |
2937 __ jmpb(done); | |
2938 __ bind(inc); | |
2939 __ increment(as_Register($dst$$reg)); // NO L qqq | |
2940 __ bind(done); | |
2941 %} | |
2942 | |
2943 // Compare the longs and set flags | 2793 // Compare the longs and set flags |
2944 // BROKEN! Do Not use as-is | 2794 // BROKEN! Do Not use as-is |
2945 enc_class cmpl_test( eRegL src1, eRegL src2 ) %{ | 2795 enc_class cmpl_test( eRegL src1, eRegL src2 ) %{ |
2946 // CMP $src1.hi,$src2.hi | 2796 // CMP $src1.hi,$src2.hi |
2947 emit_opcode( cbuf, 0x3B ); | 2797 emit_opcode( cbuf, 0x3B ); |
3158 emit_opcode(cbuf,0xF7); // NEG lo | 3008 emit_opcode(cbuf,0xF7); // NEG lo |
3159 emit_rm (cbuf,0x3, 0x3, $dst$$reg ); | 3009 emit_rm (cbuf,0x3, 0x3, $dst$$reg ); |
3160 emit_opcode(cbuf,0x83); // SBB hi,0 | 3010 emit_opcode(cbuf,0x83); // SBB hi,0 |
3161 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); | 3011 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); |
3162 emit_d8 (cbuf,0 ); | 3012 emit_d8 (cbuf,0 ); |
3163 %} | |
3164 | |
3165 enc_class movq_ld(regXD dst, memory mem) %{ | |
3166 MacroAssembler _masm(&cbuf); | |
3167 __ movq($dst$$XMMRegister, $mem$$Address); | |
3168 %} | |
3169 | |
3170 enc_class movq_st(memory mem, regXD src) %{ | |
3171 MacroAssembler _masm(&cbuf); | |
3172 __ movq($mem$$Address, $src$$XMMRegister); | |
3173 %} | |
3174 | |
3175 enc_class pshufd_8x8(regX dst, regX src) %{ | |
3176 MacroAssembler _masm(&cbuf); | |
3177 | |
3178 encode_CopyXD(cbuf, $dst$$reg, $src$$reg); | |
3179 __ punpcklbw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg)); | |
3180 __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg), 0x00); | |
3181 %} | |
3182 | |
3183 enc_class pshufd_4x16(regX dst, regX src) %{ | |
3184 MacroAssembler _masm(&cbuf); | |
3185 | |
3186 __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), 0x00); | |
3187 %} | |
3188 | |
3189 enc_class pshufd(regXD dst, regXD src, int mode) %{ | |
3190 MacroAssembler _masm(&cbuf); | |
3191 | |
3192 __ pshufd(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), $mode); | |
3193 %} | |
3194 | |
3195 enc_class pxor(regXD dst, regXD src) %{ | |
3196 MacroAssembler _masm(&cbuf); | |
3197 | |
3198 __ pxor(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg)); | |
3199 %} | |
3200 | |
3201 enc_class mov_i2x(regXD dst, eRegI src) %{ | |
3202 MacroAssembler _masm(&cbuf); | |
3203 | |
3204 __ movdl(as_XMMRegister($dst$$reg), as_Register($src$$reg)); | |
3205 %} | 3013 %} |
3206 | 3014 |
3207 | 3015 |
3208 // Because the transitions from emitted code to the runtime | 3016 // Because the transitions from emitted code to the runtime |
3209 // monitorenter/exit helper stubs are so slow it's critical that | 3017 // monitorenter/exit helper stubs are so slow it's critical that |
3755 // Convert a double to an int. Java semantics require we do complex | 3563 // Convert a double to an int. Java semantics require we do complex |
3756 // manglelations in the corner cases. So we set the rounding mode to | 3564 // manglelations in the corner cases. So we set the rounding mode to |
3757 // 'zero', store the darned double down as an int, and reset the | 3565 // 'zero', store the darned double down as an int, and reset the |
3758 // rounding mode to 'nearest'. The hardware throws an exception which | 3566 // rounding mode to 'nearest'. The hardware throws an exception which |
3759 // patches up the correct value directly to the stack. | 3567 // patches up the correct value directly to the stack. |
3760 enc_class D2I_encoding( regD src ) %{ | 3568 enc_class DPR2I_encoding( regDPR src ) %{ |
3761 // Flip to round-to-zero mode. We attempted to allow invalid-op | 3569 // Flip to round-to-zero mode. We attempted to allow invalid-op |
3762 // exceptions here, so that a NAN or other corner-case value will | 3570 // exceptions here, so that a NAN or other corner-case value will |
3763 // thrown an exception (but normal values get converted at full speed). | 3571 // thrown an exception (but normal values get converted at full speed). |
3764 // However, I2C adapters and other float-stack manglers leave pending | 3572 // However, I2C adapters and other float-stack manglers leave pending |
3765 // invalid-op exceptions hanging. We would have to clear them before | 3573 // invalid-op exceptions hanging. We would have to clear them before |
3798 emit_opcode(cbuf,0xE8); // Call into runtime | 3606 emit_opcode(cbuf,0xE8); // Call into runtime |
3799 emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); | 3607 emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); |
3800 // Carry on here... | 3608 // Carry on here... |
3801 %} | 3609 %} |
3802 | 3610 |
3803 enc_class D2L_encoding( regD src ) %{ | 3611 enc_class DPR2L_encoding( regDPR src ) %{ |
3804 emit_opcode(cbuf,0xD9); // FLDCW trunc | 3612 emit_opcode(cbuf,0xD9); // FLDCW trunc |
3805 emit_opcode(cbuf,0x2D); | 3613 emit_opcode(cbuf,0x2D); |
3806 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); | 3614 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); |
3807 // Allocate a word | 3615 // Allocate a word |
3808 emit_opcode(cbuf,0x83); // SUB ESP,8 | 3616 emit_opcode(cbuf,0x83); // SUB ESP,8 |
3840 emit_opcode(cbuf,0xE8); // Call into runtime | 3648 emit_opcode(cbuf,0xE8); // Call into runtime |
3841 emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); | 3649 emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); |
3842 // Carry on here... | 3650 // Carry on here... |
3843 %} | 3651 %} |
3844 | 3652 |
3845 enc_class X2L_encoding( regX src ) %{ | 3653 enc_class FMul_ST_reg( eRegFPR src1 ) %{ |
3846 // Allocate a word | |
3847 emit_opcode(cbuf,0x83); // SUB ESP,8 | |
3848 emit_opcode(cbuf,0xEC); | |
3849 emit_d8(cbuf,0x08); | |
3850 | |
3851 emit_opcode (cbuf, 0xF3 ); // MOVSS [ESP], src | |
3852 emit_opcode (cbuf, 0x0F ); | |
3853 emit_opcode (cbuf, 0x11 ); | |
3854 encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false); | |
3855 | |
3856 emit_opcode(cbuf,0xD9 ); // FLD_S [ESP] | |
3857 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); | |
3858 | |
3859 emit_opcode(cbuf,0xD9); // FLDCW trunc | |
3860 emit_opcode(cbuf,0x2D); | |
3861 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); | |
3862 | |
3863 // Encoding assumes a double has been pushed into FPR0. | |
3864 // Store down the double as a long, popping the FPU stack | |
3865 emit_opcode(cbuf,0xDF); // FISTP [ESP] | |
3866 emit_opcode(cbuf,0x3C); | |
3867 emit_d8(cbuf,0x24); | |
3868 | |
3869 // Restore the rounding mode; mask the exception | |
3870 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode | |
3871 emit_opcode(cbuf,0x2D); | |
3872 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() | |
3873 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() | |
3874 : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); | |
3875 | |
3876 // Load the converted int; adjust CPU stack | |
3877 emit_opcode(cbuf,0x58); // POP EAX | |
3878 | |
3879 emit_opcode(cbuf,0x5A); // POP EDX | |
3880 | |
3881 emit_opcode(cbuf,0x81); // CMP EDX,imm | |
3882 emit_d8 (cbuf,0xFA); // rdx | |
3883 emit_d32 (cbuf,0x80000000);// 0x80000000 | |
3884 | |
3885 emit_opcode(cbuf,0x75); // JNE around_slow_call | |
3886 emit_d8 (cbuf,0x13+4); // Size of slow_call | |
3887 | |
3888 emit_opcode(cbuf,0x85); // TEST EAX,EAX | |
3889 emit_opcode(cbuf,0xC0); // 2/rax,/rax, | |
3890 | |
3891 emit_opcode(cbuf,0x75); // JNE around_slow_call | |
3892 emit_d8 (cbuf,0x13); // Size of slow_call | |
3893 | |
3894 // Allocate a word | |
3895 emit_opcode(cbuf,0x83); // SUB ESP,4 | |
3896 emit_opcode(cbuf,0xEC); | |
3897 emit_d8(cbuf,0x04); | |
3898 | |
3899 emit_opcode (cbuf, 0xF3 ); // MOVSS [ESP], src | |
3900 emit_opcode (cbuf, 0x0F ); | |
3901 emit_opcode (cbuf, 0x11 ); | |
3902 encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false); | |
3903 | |
3904 emit_opcode(cbuf,0xD9 ); // FLD_S [ESP] | |
3905 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); | |
3906 | |
3907 emit_opcode(cbuf,0x83); // ADD ESP,4 | |
3908 emit_opcode(cbuf,0xC4); | |
3909 emit_d8(cbuf,0x04); | |
3910 | |
3911 // CALL directly to the runtime | |
3912 cbuf.set_insts_mark(); | |
3913 emit_opcode(cbuf,0xE8); // Call into runtime | |
3914 emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); | |
3915 // Carry on here... | |
3916 %} | |
3917 | |
3918 enc_class XD2L_encoding( regXD src ) %{ | |
3919 // Allocate a word | |
3920 emit_opcode(cbuf,0x83); // SUB ESP,8 | |
3921 emit_opcode(cbuf,0xEC); | |
3922 emit_d8(cbuf,0x08); | |
3923 | |
3924 emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src | |
3925 emit_opcode (cbuf, 0x0F ); | |
3926 emit_opcode (cbuf, 0x11 ); | |
3927 encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false); | |
3928 | |
3929 emit_opcode(cbuf,0xDD ); // FLD_D [ESP] | |
3930 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); | |
3931 | |
3932 emit_opcode(cbuf,0xD9); // FLDCW trunc | |
3933 emit_opcode(cbuf,0x2D); | |
3934 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); | |
3935 | |
3936 // Encoding assumes a double has been pushed into FPR0. | |
3937 // Store down the double as a long, popping the FPU stack | |
3938 emit_opcode(cbuf,0xDF); // FISTP [ESP] | |
3939 emit_opcode(cbuf,0x3C); | |
3940 emit_d8(cbuf,0x24); | |
3941 | |
3942 // Restore the rounding mode; mask the exception | |
3943 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode | |
3944 emit_opcode(cbuf,0x2D); | |
3945 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() | |
3946 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() | |
3947 : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); | |
3948 | |
3949 // Load the converted int; adjust CPU stack | |
3950 emit_opcode(cbuf,0x58); // POP EAX | |
3951 | |
3952 emit_opcode(cbuf,0x5A); // POP EDX | |
3953 | |
3954 emit_opcode(cbuf,0x81); // CMP EDX,imm | |
3955 emit_d8 (cbuf,0xFA); // rdx | |
3956 emit_d32 (cbuf,0x80000000); // 0x80000000 | |
3957 | |
3958 emit_opcode(cbuf,0x75); // JNE around_slow_call | |
3959 emit_d8 (cbuf,0x13+4); // Size of slow_call | |
3960 | |
3961 emit_opcode(cbuf,0x85); // TEST EAX,EAX | |
3962 emit_opcode(cbuf,0xC0); // 2/rax,/rax, | |
3963 | |
3964 emit_opcode(cbuf,0x75); // JNE around_slow_call | |
3965 emit_d8 (cbuf,0x13); // Size of slow_call | |
3966 | |
3967 // Push src onto stack slow-path | |
3968 // Allocate a word | |
3969 emit_opcode(cbuf,0x83); // SUB ESP,8 | |
3970 emit_opcode(cbuf,0xEC); | |
3971 emit_d8(cbuf,0x08); | |
3972 | |
3973 emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src | |
3974 emit_opcode (cbuf, 0x0F ); | |
3975 emit_opcode (cbuf, 0x11 ); | |
3976 encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false); | |
3977 | |
3978 emit_opcode(cbuf,0xDD ); // FLD_D [ESP] | |
3979 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); | |
3980 | |
3981 emit_opcode(cbuf,0x83); // ADD ESP,8 | |
3982 emit_opcode(cbuf,0xC4); | |
3983 emit_d8(cbuf,0x08); | |
3984 | |
3985 // CALL directly to the runtime | |
3986 cbuf.set_insts_mark(); | |
3987 emit_opcode(cbuf,0xE8); // Call into runtime | |
3988 emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); | |
3989 // Carry on here... | |
3990 %} | |
3991 | |
3992 enc_class D2X_encoding( regX dst, regD src ) %{ | |
3993 // Allocate a word | |
3994 emit_opcode(cbuf,0x83); // SUB ESP,4 | |
3995 emit_opcode(cbuf,0xEC); | |
3996 emit_d8(cbuf,0x04); | |
3997 int pop = 0x02; | |
3998 if ($src$$reg != FPR1L_enc) { | |
3999 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) | |
4000 emit_d8( cbuf, 0xC0-1+$src$$reg ); | |
4001 pop = 0x03; | |
4002 } | |
4003 store_to_stackslot( cbuf, 0xD9, pop, 0 ); // FST<P>_S [ESP] | |
4004 | |
4005 emit_opcode (cbuf, 0xF3 ); // MOVSS dst(xmm), [ESP] | |
4006 emit_opcode (cbuf, 0x0F ); | |
4007 emit_opcode (cbuf, 0x10 ); | |
4008 encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false); | |
4009 | |
4010 emit_opcode(cbuf,0x83); // ADD ESP,4 | |
4011 emit_opcode(cbuf,0xC4); | |
4012 emit_d8(cbuf,0x04); | |
4013 // Carry on here... | |
4014 %} | |
4015 | |
4016 enc_class FX2I_encoding( regX src, eRegI dst ) %{ | |
4017 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); | |
4018 | |
4019 // Compare the result to see if we need to go to the slow path | |
4020 emit_opcode(cbuf,0x81); // CMP dst,imm | |
4021 emit_rm (cbuf,0x3,0x7,$dst$$reg); | |
4022 emit_d32 (cbuf,0x80000000); // 0x80000000 | |
4023 | |
4024 emit_opcode(cbuf,0x75); // JNE around_slow_call | |
4025 emit_d8 (cbuf,0x13); // Size of slow_call | |
4026 // Store xmm to a temp memory | |
4027 // location and push it onto stack. | |
4028 | |
4029 emit_opcode(cbuf,0x83); // SUB ESP,4 | |
4030 emit_opcode(cbuf,0xEC); | |
4031 emit_d8(cbuf, $primary ? 0x8 : 0x4); | |
4032 | |
4033 emit_opcode (cbuf, $primary ? 0xF2 : 0xF3 ); // MOVSS [ESP], xmm | |
4034 emit_opcode (cbuf, 0x0F ); | |
4035 emit_opcode (cbuf, 0x11 ); | |
4036 encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false); | |
4037 | |
4038 emit_opcode(cbuf, $primary ? 0xDD : 0xD9 ); // FLD [ESP] | |
4039 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); | |
4040 | |
4041 emit_opcode(cbuf,0x83); // ADD ESP,4 | |
4042 emit_opcode(cbuf,0xC4); | |
4043 emit_d8(cbuf, $primary ? 0x8 : 0x4); | |
4044 | |
4045 // CALL directly to the runtime | |
4046 cbuf.set_insts_mark(); | |
4047 emit_opcode(cbuf,0xE8); // Call into runtime | |
4048 emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); | |
4049 | |
4050 // Carry on here... | |
4051 %} | |
4052 | |
4053 enc_class X2D_encoding( regD dst, regX src ) %{ | |
4054 // Allocate a word | |
4055 emit_opcode(cbuf,0x83); // SUB ESP,4 | |
4056 emit_opcode(cbuf,0xEC); | |
4057 emit_d8(cbuf,0x04); | |
4058 | |
4059 emit_opcode (cbuf, 0xF3 ); // MOVSS [ESP], xmm | |
4060 emit_opcode (cbuf, 0x0F ); | |
4061 emit_opcode (cbuf, 0x11 ); | |
4062 encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false); | |
4063 | |
4064 emit_opcode(cbuf,0xD9 ); // FLD_S [ESP] | |
4065 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); | |
4066 | |
4067 emit_opcode(cbuf,0x83); // ADD ESP,4 | |
4068 emit_opcode(cbuf,0xC4); | |
4069 emit_d8(cbuf,0x04); | |
4070 | |
4071 // Carry on here... | |
4072 %} | |
4073 | |
4074 enc_class AbsXF_encoding(regX dst) %{ | |
4075 address signmask_address=(address)float_signmask_pool; | |
4076 // andpd:\tANDPS $dst,[signconst] | |
4077 emit_opcode(cbuf, 0x0F); | |
4078 emit_opcode(cbuf, 0x54); | |
4079 emit_rm(cbuf, 0x0, $dst$$reg, 0x5); | |
4080 emit_d32(cbuf, (int)signmask_address); | |
4081 %} | |
4082 | |
4083 enc_class AbsXD_encoding(regXD dst) %{ | |
4084 address signmask_address=(address)double_signmask_pool; | |
4085 // andpd:\tANDPD $dst,[signconst] | |
4086 emit_opcode(cbuf, 0x66); | |
4087 emit_opcode(cbuf, 0x0F); | |
4088 emit_opcode(cbuf, 0x54); | |
4089 emit_rm(cbuf, 0x0, $dst$$reg, 0x5); | |
4090 emit_d32(cbuf, (int)signmask_address); | |
4091 %} | |
4092 | |
4093 enc_class NegXF_encoding(regX dst) %{ | |
4094 address signmask_address=(address)float_signflip_pool; | |
4095 // andpd:\tXORPS $dst,[signconst] | |
4096 emit_opcode(cbuf, 0x0F); | |
4097 emit_opcode(cbuf, 0x57); | |
4098 emit_rm(cbuf, 0x0, $dst$$reg, 0x5); | |
4099 emit_d32(cbuf, (int)signmask_address); | |
4100 %} | |
4101 | |
4102 enc_class NegXD_encoding(regXD dst) %{ | |
4103 address signmask_address=(address)double_signflip_pool; | |
4104 // andpd:\tXORPD $dst,[signconst] | |
4105 emit_opcode(cbuf, 0x66); | |
4106 emit_opcode(cbuf, 0x0F); | |
4107 emit_opcode(cbuf, 0x57); | |
4108 emit_rm(cbuf, 0x0, $dst$$reg, 0x5); | |
4109 emit_d32(cbuf, (int)signmask_address); | |
4110 %} | |
4111 | |
4112 enc_class FMul_ST_reg( eRegF src1 ) %{ | |
4113 // Operand was loaded from memory into fp ST (stack top) | 3654 // Operand was loaded from memory into fp ST (stack top) |
4114 // FMUL ST,$src /* D8 C8+i */ | 3655 // FMUL ST,$src /* D8 C8+i */ |
4115 emit_opcode(cbuf, 0xD8); | 3656 emit_opcode(cbuf, 0xD8); |
4116 emit_opcode(cbuf, 0xC8 + $src1$$reg); | 3657 emit_opcode(cbuf, 0xC8 + $src1$$reg); |
4117 %} | 3658 %} |
4118 | 3659 |
4119 enc_class FAdd_ST_reg( eRegF src2 ) %{ | 3660 enc_class FAdd_ST_reg( eRegFPR src2 ) %{ |
4120 // FADDP ST,src2 /* D8 C0+i */ | 3661 // FADDP ST,src2 /* D8 C0+i */ |
4121 emit_opcode(cbuf, 0xD8); | 3662 emit_opcode(cbuf, 0xD8); |
4122 emit_opcode(cbuf, 0xC0 + $src2$$reg); | 3663 emit_opcode(cbuf, 0xC0 + $src2$$reg); |
4123 //could use FADDP src2,fpST /* DE C0+i */ | 3664 //could use FADDP src2,fpST /* DE C0+i */ |
4124 %} | 3665 %} |
4125 | 3666 |
4126 enc_class FAddP_reg_ST( eRegF src2 ) %{ | 3667 enc_class FAddP_reg_ST( eRegFPR src2 ) %{ |
4127 // FADDP src2,ST /* DE C0+i */ | 3668 // FADDP src2,ST /* DE C0+i */ |
4128 emit_opcode(cbuf, 0xDE); | 3669 emit_opcode(cbuf, 0xDE); |
4129 emit_opcode(cbuf, 0xC0 + $src2$$reg); | 3670 emit_opcode(cbuf, 0xC0 + $src2$$reg); |
4130 %} | 3671 %} |
4131 | 3672 |
4132 enc_class subF_divF_encode( eRegF src1, eRegF src2) %{ | 3673 enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{ |
4133 // Operand has been loaded into fp ST (stack top) | 3674 // Operand has been loaded into fp ST (stack top) |
4134 // FSUB ST,$src1 | 3675 // FSUB ST,$src1 |
4135 emit_opcode(cbuf, 0xD8); | 3676 emit_opcode(cbuf, 0xD8); |
4136 emit_opcode(cbuf, 0xE0 + $src1$$reg); | 3677 emit_opcode(cbuf, 0xE0 + $src1$$reg); |
4137 | 3678 |
4138 // FDIV | 3679 // FDIV |
4139 emit_opcode(cbuf, 0xD8); | 3680 emit_opcode(cbuf, 0xD8); |
4140 emit_opcode(cbuf, 0xF0 + $src2$$reg); | 3681 emit_opcode(cbuf, 0xF0 + $src2$$reg); |
4141 %} | 3682 %} |
4142 | 3683 |
4143 enc_class MulFAddF (eRegF src1, eRegF src2) %{ | 3684 enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{ |
4144 // Operand was loaded from memory into fp ST (stack top) | 3685 // Operand was loaded from memory into fp ST (stack top) |
4145 // FADD ST,$src /* D8 C0+i */ | 3686 // FADD ST,$src /* D8 C0+i */ |
4146 emit_opcode(cbuf, 0xD8); | 3687 emit_opcode(cbuf, 0xD8); |
4147 emit_opcode(cbuf, 0xC0 + $src1$$reg); | 3688 emit_opcode(cbuf, 0xC0 + $src1$$reg); |
4148 | 3689 |
4150 emit_opcode(cbuf, 0xD8); | 3691 emit_opcode(cbuf, 0xD8); |
4151 emit_opcode(cbuf, 0xC8 + $src2$$reg); | 3692 emit_opcode(cbuf, 0xC8 + $src2$$reg); |
4152 %} | 3693 %} |
4153 | 3694 |
4154 | 3695 |
4155 enc_class MulFAddFreverse (eRegF src1, eRegF src2) %{ | 3696 enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{ |
4156 // Operand was loaded from memory into fp ST (stack top) | 3697 // Operand was loaded from memory into fp ST (stack top) |
4157 // FADD ST,$src /* D8 C0+i */ | 3698 // FADD ST,$src /* D8 C0+i */ |
4158 emit_opcode(cbuf, 0xD8); | 3699 emit_opcode(cbuf, 0xD8); |
4159 emit_opcode(cbuf, 0xC0 + $src1$$reg); | 3700 emit_opcode(cbuf, 0xC0 + $src1$$reg); |
4160 | 3701 |
4172 int scale = $mem$$scale; | 3713 int scale = $mem$$scale; |
4173 int displace = $mem$$disp; | 3714 int displace = $mem$$disp; |
4174 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals | 3715 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals |
4175 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop); | 3716 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop); |
4176 store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp ); | 3717 store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp ); |
4177 %} | |
4178 | |
4179 enc_class enc_loadLX_volatile( memory mem, stackSlotL dst, regXD tmp ) %{ | |
4180 { // Atomic long load | |
4181 // UseXmmLoadAndClearUpper ? movsd $tmp,$mem : movlpd $tmp,$mem | |
4182 emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0xF2 : 0x66); | |
4183 emit_opcode(cbuf,0x0F); | |
4184 emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0x10 : 0x12); | |
4185 int base = $mem$$base; | |
4186 int index = $mem$$index; | |
4187 int scale = $mem$$scale; | |
4188 int displace = $mem$$disp; | |
4189 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals | |
4190 encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop); | |
4191 } | |
4192 { // MOVSD $dst,$tmp ! atomic long store | |
4193 emit_opcode(cbuf,0xF2); | |
4194 emit_opcode(cbuf,0x0F); | |
4195 emit_opcode(cbuf,0x11); | |
4196 int base = $dst$$base; | |
4197 int index = $dst$$index; | |
4198 int scale = $dst$$scale; | |
4199 int displace = $dst$$disp; | |
4200 bool disp_is_oop = $dst->disp_is_oop(); // disp-as-oop when working with static globals | |
4201 encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop); | |
4202 } | |
4203 %} | |
4204 | |
4205 enc_class enc_loadLX_reg_volatile( memory mem, eRegL dst, regXD tmp ) %{ | |
4206 { // Atomic long load | |
4207 // UseXmmLoadAndClearUpper ? movsd $tmp,$mem : movlpd $tmp,$mem | |
4208 emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0xF2 : 0x66); | |
4209 emit_opcode(cbuf,0x0F); | |
4210 emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0x10 : 0x12); | |
4211 int base = $mem$$base; | |
4212 int index = $mem$$index; | |
4213 int scale = $mem$$scale; | |
4214 int displace = $mem$$disp; | |
4215 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals | |
4216 encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop); | |
4217 } | |
4218 { // MOVD $dst.lo,$tmp | |
4219 emit_opcode(cbuf,0x66); | |
4220 emit_opcode(cbuf,0x0F); | |
4221 emit_opcode(cbuf,0x7E); | |
4222 emit_rm(cbuf, 0x3, $tmp$$reg, $dst$$reg); | |
4223 } | |
4224 { // PSRLQ $tmp,32 | |
4225 emit_opcode(cbuf,0x66); | |
4226 emit_opcode(cbuf,0x0F); | |
4227 emit_opcode(cbuf,0x73); | |
4228 emit_rm(cbuf, 0x3, 0x02, $tmp$$reg); | |
4229 emit_d8(cbuf, 0x20); | |
4230 } | |
4231 { // MOVD $dst.hi,$tmp | |
4232 emit_opcode(cbuf,0x66); | |
4233 emit_opcode(cbuf,0x0F); | |
4234 emit_opcode(cbuf,0x7E); | |
4235 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg)); | |
4236 } | |
4237 %} | 3718 %} |
4238 | 3719 |
4239 // Volatile Store Long. Must be atomic, so move it into | 3720 // Volatile Store Long. Must be atomic, so move it into |
4240 // the FP TOS and then do a 64-bit FIST. Has to probe the | 3721 // the FP TOS and then do a 64-bit FIST. Has to probe the |
4241 // target address before the store (for null-ptr checks) | 3722 // target address before the store (for null-ptr checks) |
4249 int index = $mem$$index; | 3730 int index = $mem$$index; |
4250 int scale = $mem$$scale; | 3731 int scale = $mem$$scale; |
4251 int displace = $mem$$disp; | 3732 int displace = $mem$$disp; |
4252 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals | 3733 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals |
4253 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop); | 3734 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop); |
4254 %} | |
4255 | |
4256 enc_class enc_storeLX_volatile( memory mem, stackSlotL src, regXD tmp) %{ | |
4257 { // Atomic long load | |
4258 // UseXmmLoadAndClearUpper ? movsd $tmp,[$src] : movlpd $tmp,[$src] | |
4259 emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0xF2 : 0x66); | |
4260 emit_opcode(cbuf,0x0F); | |
4261 emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0x10 : 0x12); | |
4262 int base = $src$$base; | |
4263 int index = $src$$index; | |
4264 int scale = $src$$scale; | |
4265 int displace = $src$$disp; | |
4266 bool disp_is_oop = $src->disp_is_oop(); // disp-as-oop when working with static globals | |
4267 encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop); | |
4268 } | |
4269 cbuf.set_insts_mark(); // Mark start of MOVSD in case $mem has an oop | |
4270 { // MOVSD $mem,$tmp ! atomic long store | |
4271 emit_opcode(cbuf,0xF2); | |
4272 emit_opcode(cbuf,0x0F); | |
4273 emit_opcode(cbuf,0x11); | |
4274 int base = $mem$$base; | |
4275 int index = $mem$$index; | |
4276 int scale = $mem$$scale; | |
4277 int displace = $mem$$disp; | |
4278 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals | |
4279 encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop); | |
4280 } | |
4281 %} | |
4282 | |
4283 enc_class enc_storeLX_reg_volatile( memory mem, eRegL src, regXD tmp, regXD tmp2) %{ | |
4284 { // MOVD $tmp,$src.lo | |
4285 emit_opcode(cbuf,0x66); | |
4286 emit_opcode(cbuf,0x0F); | |
4287 emit_opcode(cbuf,0x6E); | |
4288 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg); | |
4289 } | |
4290 { // MOVD $tmp2,$src.hi | |
4291 emit_opcode(cbuf,0x66); | |
4292 emit_opcode(cbuf,0x0F); | |
4293 emit_opcode(cbuf,0x6E); | |
4294 emit_rm(cbuf, 0x3, $tmp2$$reg, HIGH_FROM_LOW($src$$reg)); | |
4295 } | |
4296 { // PUNPCKLDQ $tmp,$tmp2 | |
4297 emit_opcode(cbuf,0x66); | |
4298 emit_opcode(cbuf,0x0F); | |
4299 emit_opcode(cbuf,0x62); | |
4300 emit_rm(cbuf, 0x3, $tmp$$reg, $tmp2$$reg); | |
4301 } | |
4302 cbuf.set_insts_mark(); // Mark start of MOVSD in case $mem has an oop | |
4303 { // MOVSD $mem,$tmp ! atomic long store | |
4304 emit_opcode(cbuf,0xF2); | |
4305 emit_opcode(cbuf,0x0F); | |
4306 emit_opcode(cbuf,0x11); | |
4307 int base = $mem$$base; | |
4308 int index = $mem$$index; | |
4309 int scale = $mem$$scale; | |
4310 int displace = $mem$$disp; | |
4311 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals | |
4312 encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop); | |
4313 } | |
4314 %} | 3735 %} |
4315 | 3736 |
4316 // Safepoint Poll. This polls the safepoint page, and causes an | 3737 // Safepoint Poll. This polls the safepoint page, and causes an |
4317 // exception if it is not readable. Unfortunately, it kills the condition code | 3738 // exception if it is not readable. Unfortunately, it kills the condition code |
4318 // in the process | 3739 // in the process |
4703 format %{ %} | 4124 format %{ %} |
4704 interface(CONST_INTER); | 4125 interface(CONST_INTER); |
4705 %} | 4126 %} |
4706 | 4127 |
4707 //Double Immediate zero | 4128 //Double Immediate zero |
4708 operand immD0() %{ | 4129 operand immDPR0() %{ |
4709 // Do additional (and counter-intuitive) test against NaN to work around VC++ | 4130 // Do additional (and counter-intuitive) test against NaN to work around VC++ |
4710 // bug that generates code such that NaNs compare equal to 0.0 | 4131 // bug that generates code such that NaNs compare equal to 0.0 |
4711 predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) ); | 4132 predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) ); |
4712 match(ConD); | 4133 match(ConD); |
4713 | 4134 |
4715 format %{ %} | 4136 format %{ %} |
4716 interface(CONST_INTER); | 4137 interface(CONST_INTER); |
4717 %} | 4138 %} |
4718 | 4139 |
4719 // Double Immediate one | 4140 // Double Immediate one |
4720 operand immD1() %{ | 4141 operand immDPR1() %{ |
4721 predicate( UseSSE<=1 && n->getd() == 1.0 ); | 4142 predicate( UseSSE<=1 && n->getd() == 1.0 ); |
4722 match(ConD); | 4143 match(ConD); |
4723 | 4144 |
4724 op_cost(5); | 4145 op_cost(5); |
4725 format %{ %} | 4146 format %{ %} |
4726 interface(CONST_INTER); | 4147 interface(CONST_INTER); |
4727 %} | 4148 %} |
4728 | 4149 |
4729 // Double Immediate | 4150 // Double Immediate |
4730 operand immD() %{ | 4151 operand immDPR() %{ |
4731 predicate(UseSSE<=1); | 4152 predicate(UseSSE<=1); |
4732 match(ConD); | 4153 match(ConD); |
4733 | 4154 |
4734 op_cost(5); | 4155 op_cost(5); |
4735 format %{ %} | 4156 format %{ %} |
4736 interface(CONST_INTER); | 4157 interface(CONST_INTER); |
4737 %} | 4158 %} |
4738 | 4159 |
4739 operand immXD() %{ | 4160 operand immD() %{ |
4740 predicate(UseSSE>=2); | 4161 predicate(UseSSE>=2); |
4741 match(ConD); | 4162 match(ConD); |
4742 | 4163 |
4743 op_cost(5); | 4164 op_cost(5); |
4744 format %{ %} | 4165 format %{ %} |
4745 interface(CONST_INTER); | 4166 interface(CONST_INTER); |
4746 %} | 4167 %} |
4747 | 4168 |
4748 // Double Immediate zero | 4169 // Double Immediate zero |
4749 operand immXD0() %{ | 4170 operand immD0() %{ |
4750 // Do additional (and counter-intuitive) test against NaN to work around VC++ | 4171 // Do additional (and counter-intuitive) test against NaN to work around VC++ |
4751 // bug that generates code such that NaNs compare equal to 0.0 AND do not | 4172 // bug that generates code such that NaNs compare equal to 0.0 AND do not |
4752 // compare equal to -0.0. | 4173 // compare equal to -0.0. |
4753 predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 ); | 4174 predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 ); |
4754 match(ConD); | 4175 match(ConD); |
4756 format %{ %} | 4177 format %{ %} |
4757 interface(CONST_INTER); | 4178 interface(CONST_INTER); |
4758 %} | 4179 %} |
4759 | 4180 |
4760 // Float Immediate zero | 4181 // Float Immediate zero |
4761 operand immF0() %{ | 4182 operand immFPR0() %{ |
4762 predicate(UseSSE == 0 && n->getf() == 0.0F); | 4183 predicate(UseSSE == 0 && n->getf() == 0.0F); |
4763 match(ConF); | 4184 match(ConF); |
4764 | 4185 |
4765 op_cost(5); | 4186 op_cost(5); |
4766 format %{ %} | 4187 format %{ %} |
4767 interface(CONST_INTER); | 4188 interface(CONST_INTER); |
4768 %} | 4189 %} |
4769 | 4190 |
4770 // Float Immediate one | 4191 // Float Immediate one |
4771 operand immF1() %{ | 4192 operand immFPR1() %{ |
4772 predicate(UseSSE == 0 && n->getf() == 1.0F); | 4193 predicate(UseSSE == 0 && n->getf() == 1.0F); |
4773 match(ConF); | 4194 match(ConF); |
4774 | 4195 |
4775 op_cost(5); | 4196 op_cost(5); |
4776 format %{ %} | 4197 format %{ %} |
4777 interface(CONST_INTER); | 4198 interface(CONST_INTER); |
4778 %} | 4199 %} |
4779 | 4200 |
4780 // Float Immediate | 4201 // Float Immediate |
4781 operand immF() %{ | 4202 operand immFPR() %{ |
4782 predicate( UseSSE == 0 ); | 4203 predicate( UseSSE == 0 ); |
4783 match(ConF); | 4204 match(ConF); |
4784 | 4205 |
4785 op_cost(5); | 4206 op_cost(5); |
4786 format %{ %} | 4207 format %{ %} |
4787 interface(CONST_INTER); | 4208 interface(CONST_INTER); |
4788 %} | 4209 %} |
4789 | 4210 |
4790 // Float Immediate | 4211 // Float Immediate |
4791 operand immXF() %{ | 4212 operand immF() %{ |
4792 predicate(UseSSE >= 1); | 4213 predicate(UseSSE >= 1); |
4793 match(ConF); | 4214 match(ConF); |
4794 | 4215 |
4795 op_cost(5); | 4216 op_cost(5); |
4796 format %{ %} | 4217 format %{ %} |
4797 interface(CONST_INTER); | 4218 interface(CONST_INTER); |
4798 %} | 4219 %} |
4799 | 4220 |
4800 // Float Immediate zero. Zero and not -0.0 | 4221 // Float Immediate zero. Zero and not -0.0 |
4801 operand immXF0() %{ | 4222 operand immF0() %{ |
4802 predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 ); | 4223 predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 ); |
4803 match(ConF); | 4224 match(ConF); |
4804 | 4225 |
4805 op_cost(5); | 4226 op_cost(5); |
4806 format %{ %} | 4227 format %{ %} |
5172 format %{ "FLAGS_LEGT" %} | 4593 format %{ "FLAGS_LEGT" %} |
5173 interface(REG_INTER); | 4594 interface(REG_INTER); |
5174 %} | 4595 %} |
5175 | 4596 |
5176 // Float register operands | 4597 // Float register operands |
5177 operand regD() %{ | 4598 operand regDPR() %{ |
5178 predicate( UseSSE < 2 ); | 4599 predicate( UseSSE < 2 ); |
5179 constraint(ALLOC_IN_RC(dbl_reg)); | 4600 constraint(ALLOC_IN_RC(dbl_reg)); |
5180 match(RegD); | 4601 match(RegD); |
5181 match(regDPR1); | 4602 match(regDPR1); |
5182 match(regDPR2); | 4603 match(regDPR2); |
5183 format %{ %} | 4604 format %{ %} |
5184 interface(REG_INTER); | 4605 interface(REG_INTER); |
5185 %} | 4606 %} |
5186 | 4607 |
5187 operand regDPR1(regD reg) %{ | 4608 operand regDPR1(regDPR reg) %{ |
5188 predicate( UseSSE < 2 ); | 4609 predicate( UseSSE < 2 ); |
5189 constraint(ALLOC_IN_RC(dbl_reg0)); | 4610 constraint(ALLOC_IN_RC(dbl_reg0)); |
5190 match(reg); | 4611 match(reg); |
5191 format %{ "FPR1" %} | 4612 format %{ "FPR1" %} |
5192 interface(REG_INTER); | 4613 interface(REG_INTER); |
5193 %} | 4614 %} |
5194 | 4615 |
5195 operand regDPR2(regD reg) %{ | 4616 operand regDPR2(regDPR reg) %{ |
5196 predicate( UseSSE < 2 ); | 4617 predicate( UseSSE < 2 ); |
5197 constraint(ALLOC_IN_RC(dbl_reg1)); | 4618 constraint(ALLOC_IN_RC(dbl_reg1)); |
5198 match(reg); | 4619 match(reg); |
5199 format %{ "FPR2" %} | 4620 format %{ "FPR2" %} |
5200 interface(REG_INTER); | 4621 interface(REG_INTER); |
5201 %} | 4622 %} |
5202 | 4623 |
5203 operand regnotDPR1(regD reg) %{ | 4624 operand regnotDPR1(regDPR reg) %{ |
5204 predicate( UseSSE < 2 ); | 4625 predicate( UseSSE < 2 ); |
5205 constraint(ALLOC_IN_RC(dbl_notreg0)); | 4626 constraint(ALLOC_IN_RC(dbl_notreg0)); |
5206 match(reg); | 4627 match(reg); |
5207 format %{ %} | 4628 format %{ %} |
5208 interface(REG_INTER); | 4629 interface(REG_INTER); |
5209 %} | 4630 %} |
5210 | 4631 |
5211 // XMM Double register operands | 4632 // XMM Double register operands |
5212 operand regXD() %{ | 4633 operand regD() %{ |
5213 predicate( UseSSE>=2 ); | 4634 predicate( UseSSE>=2 ); |
5214 constraint(ALLOC_IN_RC(xdb_reg)); | 4635 constraint(ALLOC_IN_RC(xdb_reg)); |
5215 match(RegD); | 4636 match(RegD); |
5216 match(regXD6); | 4637 match(regD6); |
5217 match(regXD7); | 4638 match(regD7); |
5218 format %{ %} | 4639 format %{ %} |
5219 interface(REG_INTER); | 4640 interface(REG_INTER); |
5220 %} | 4641 %} |
5221 | 4642 |
5222 // XMM6 double register operands | 4643 // XMM6 double register operands |
5223 operand regXD6(regXD reg) %{ | 4644 operand regD6(regD reg) %{ |
5224 predicate( UseSSE>=2 ); | 4645 predicate( UseSSE>=2 ); |
5225 constraint(ALLOC_IN_RC(xdb_reg6)); | 4646 constraint(ALLOC_IN_RC(xdb_reg6)); |
5226 match(reg); | 4647 match(reg); |
5227 format %{ "XMM6" %} | 4648 format %{ "XMM6" %} |
5228 interface(REG_INTER); | 4649 interface(REG_INTER); |
5229 %} | 4650 %} |
5230 | 4651 |
5231 // XMM7 double register operands | 4652 // XMM7 double register operands |
5232 operand regXD7(regXD reg) %{ | 4653 operand regD7(regD reg) %{ |
5233 predicate( UseSSE>=2 ); | 4654 predicate( UseSSE>=2 ); |
5234 constraint(ALLOC_IN_RC(xdb_reg7)); | 4655 constraint(ALLOC_IN_RC(xdb_reg7)); |
5235 match(reg); | 4656 match(reg); |
5236 format %{ "XMM7" %} | 4657 format %{ "XMM7" %} |
5237 interface(REG_INTER); | 4658 interface(REG_INTER); |
5238 %} | 4659 %} |
5239 | 4660 |
5240 // Float register operands | 4661 // Float register operands |
5241 operand regF() %{ | 4662 operand regFPR() %{ |
5242 predicate( UseSSE < 2 ); | 4663 predicate( UseSSE < 2 ); |
5243 constraint(ALLOC_IN_RC(flt_reg)); | 4664 constraint(ALLOC_IN_RC(flt_reg)); |
5244 match(RegF); | 4665 match(RegF); |
5245 match(regFPR1); | 4666 match(regFPR1); |
5246 format %{ %} | 4667 format %{ %} |
5247 interface(REG_INTER); | 4668 interface(REG_INTER); |
5248 %} | 4669 %} |
5249 | 4670 |
5250 // Float register operands | 4671 // Float register operands |
5251 operand regFPR1(regF reg) %{ | 4672 operand regFPR1(regFPR reg) %{ |
5252 predicate( UseSSE < 2 ); | 4673 predicate( UseSSE < 2 ); |
5253 constraint(ALLOC_IN_RC(flt_reg0)); | 4674 constraint(ALLOC_IN_RC(flt_reg0)); |
5254 match(reg); | 4675 match(reg); |
5255 format %{ "FPR1" %} | 4676 format %{ "FPR1" %} |
5256 interface(REG_INTER); | 4677 interface(REG_INTER); |
5257 %} | 4678 %} |
5258 | 4679 |
5259 // XMM register operands | 4680 // XMM register operands |
5260 operand regX() %{ | 4681 operand regF() %{ |
5261 predicate( UseSSE>=1 ); | 4682 predicate( UseSSE>=1 ); |
5262 constraint(ALLOC_IN_RC(xmm_reg)); | 4683 constraint(ALLOC_IN_RC(xmm_reg)); |
5263 match(RegF); | 4684 match(RegF); |
5264 format %{ %} | 4685 format %{ %} |
5265 interface(REG_INTER); | 4686 interface(REG_INTER); |
5999 cr : S3(read); | 5420 cr : S3(read); |
6000 DECODE : S0(2); // any 2 decoders | 5421 DECODE : S0(2); // any 2 decoders |
6001 %} | 5422 %} |
6002 | 5423 |
6003 // Conditional move double reg-reg | 5424 // Conditional move double reg-reg |
6004 pipe_class pipe_cmovD_reg( eFlagsReg cr, regDPR1 dst, regD src) %{ | 5425 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{ |
6005 single_instruction; | 5426 single_instruction; |
6006 dst : S4(write); | 5427 dst : S4(write); |
6007 src : S3(read); | 5428 src : S3(read); |
6008 cr : S3(read); | 5429 cr : S3(read); |
6009 DECODE : S0; // any decoder | 5430 DECODE : S0; // any decoder |
6010 %} | 5431 %} |
6011 | 5432 |
6012 // Float reg-reg operation | 5433 // Float reg-reg operation |
6013 pipe_class fpu_reg(regD dst) %{ | 5434 pipe_class fpu_reg(regDPR dst) %{ |
6014 instruction_count(2); | 5435 instruction_count(2); |
6015 dst : S3(read); | 5436 dst : S3(read); |
6016 DECODE : S0(2); // any 2 decoders | 5437 DECODE : S0(2); // any 2 decoders |
6017 FPU : S3; | 5438 FPU : S3; |
6018 %} | 5439 %} |
6019 | 5440 |
6020 // Float reg-reg operation | 5441 // Float reg-reg operation |
6021 pipe_class fpu_reg_reg(regD dst, regD src) %{ | 5442 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{ |
6022 instruction_count(2); | 5443 instruction_count(2); |
6023 dst : S4(write); | 5444 dst : S4(write); |
6024 src : S3(read); | 5445 src : S3(read); |
6025 DECODE : S0(2); // any 2 decoders | 5446 DECODE : S0(2); // any 2 decoders |
6026 FPU : S3; | 5447 FPU : S3; |
6027 %} | 5448 %} |
6028 | 5449 |
6029 // Float reg-reg operation | 5450 // Float reg-reg operation |
6030 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2) %{ | 5451 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{ |
6031 instruction_count(3); | 5452 instruction_count(3); |
6032 dst : S4(write); | 5453 dst : S4(write); |
6033 src1 : S3(read); | 5454 src1 : S3(read); |
6034 src2 : S3(read); | 5455 src2 : S3(read); |
6035 DECODE : S0(3); // any 3 decoders | 5456 DECODE : S0(3); // any 3 decoders |
6036 FPU : S3(2); | 5457 FPU : S3(2); |
6037 %} | 5458 %} |
6038 | 5459 |
6039 // Float reg-reg operation | 5460 // Float reg-reg operation |
6040 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3) %{ | 5461 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{ |
6041 instruction_count(4); | 5462 instruction_count(4); |
6042 dst : S4(write); | 5463 dst : S4(write); |
6043 src1 : S3(read); | 5464 src1 : S3(read); |
6044 src2 : S3(read); | 5465 src2 : S3(read); |
6045 src3 : S3(read); | 5466 src3 : S3(read); |
6046 DECODE : S0(4); // any 3 decoders | 5467 DECODE : S0(4); // any 3 decoders |
6047 FPU : S3(2); | 5468 FPU : S3(2); |
6048 %} | 5469 %} |
6049 | 5470 |
6050 // Float reg-reg operation | 5471 // Float reg-reg operation |
6051 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3) %{ | 5472 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{ |
6052 instruction_count(4); | 5473 instruction_count(4); |
6053 dst : S4(write); | 5474 dst : S4(write); |
6054 src1 : S3(read); | 5475 src1 : S3(read); |
6055 src2 : S3(read); | 5476 src2 : S3(read); |
6056 src3 : S3(read); | 5477 src3 : S3(read); |
6059 FPU : S3(2); | 5480 FPU : S3(2); |
6060 MEM : S3; | 5481 MEM : S3; |
6061 %} | 5482 %} |
6062 | 5483 |
6063 // Float reg-mem operation | 5484 // Float reg-mem operation |
6064 pipe_class fpu_reg_mem(regD dst, memory mem) %{ | 5485 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{ |
6065 instruction_count(2); | 5486 instruction_count(2); |
6066 dst : S5(write); | 5487 dst : S5(write); |
6067 mem : S3(read); | 5488 mem : S3(read); |
6068 D0 : S0; // big decoder only | 5489 D0 : S0; // big decoder only |
6069 DECODE : S1; // any decoder for FPU POP | 5490 DECODE : S1; // any decoder for FPU POP |
6070 FPU : S4; | 5491 FPU : S4; |
6071 MEM : S3; // any mem | 5492 MEM : S3; // any mem |
6072 %} | 5493 %} |
6073 | 5494 |
6074 // Float reg-mem operation | 5495 // Float reg-mem operation |
6075 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem) %{ | 5496 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{ |
6076 instruction_count(3); | 5497 instruction_count(3); |
6077 dst : S5(write); | 5498 dst : S5(write); |
6078 src1 : S3(read); | 5499 src1 : S3(read); |
6079 mem : S3(read); | 5500 mem : S3(read); |
6080 D0 : S0; // big decoder only | 5501 D0 : S0; // big decoder only |
6082 FPU : S4; | 5503 FPU : S4; |
6083 MEM : S3; // any mem | 5504 MEM : S3; // any mem |
6084 %} | 5505 %} |
6085 | 5506 |
6086 // Float mem-reg operation | 5507 // Float mem-reg operation |
6087 pipe_class fpu_mem_reg(memory mem, regD src) %{ | 5508 pipe_class fpu_mem_reg(memory mem, regDPR src) %{ |
6088 instruction_count(2); | 5509 instruction_count(2); |
6089 src : S5(read); | 5510 src : S5(read); |
6090 mem : S3(read); | 5511 mem : S3(read); |
6091 DECODE : S0; // any decoder for FPU PUSH | 5512 DECODE : S0; // any decoder for FPU PUSH |
6092 D0 : S1; // big decoder only | 5513 D0 : S1; // big decoder only |
6093 FPU : S4; | 5514 FPU : S4; |
6094 MEM : S3; // any mem | 5515 MEM : S3; // any mem |
6095 %} | 5516 %} |
6096 | 5517 |
6097 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2) %{ | 5518 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{ |
6098 instruction_count(3); | 5519 instruction_count(3); |
6099 src1 : S3(read); | 5520 src1 : S3(read); |
6100 src2 : S3(read); | 5521 src2 : S3(read); |
6101 mem : S3(read); | 5522 mem : S3(read); |
6102 DECODE : S0(2); // any decoder for FPU PUSH | 5523 DECODE : S0(2); // any decoder for FPU PUSH |
6103 D0 : S1; // big decoder only | 5524 D0 : S1; // big decoder only |
6104 FPU : S4; | 5525 FPU : S4; |
6105 MEM : S3; // any mem | 5526 MEM : S3; // any mem |
6106 %} | 5527 %} |
6107 | 5528 |
6108 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2) %{ | 5529 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{ |
6109 instruction_count(3); | 5530 instruction_count(3); |
6110 src1 : S3(read); | 5531 src1 : S3(read); |
6111 src2 : S3(read); | 5532 src2 : S3(read); |
6112 mem : S4(read); | 5533 mem : S4(read); |
6113 DECODE : S0; // any decoder for FPU PUSH | 5534 DECODE : S0; // any decoder for FPU PUSH |
6132 D0 : S0(3); // big decoder only | 5553 D0 : S0(3); // big decoder only |
6133 FPU : S4; | 5554 FPU : S4; |
6134 MEM : S3(3); // any mem | 5555 MEM : S3(3); // any mem |
6135 %} | 5556 %} |
6136 | 5557 |
6137 pipe_class fpu_mem_reg_con(memory mem, regD src1) %{ | 5558 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{ |
6138 instruction_count(3); | 5559 instruction_count(3); |
6139 src1 : S4(read); | 5560 src1 : S4(read); |
6140 mem : S4(read); | 5561 mem : S4(read); |
6141 DECODE : S0; // any decoder for FPU PUSH | 5562 DECODE : S0; // any decoder for FPU PUSH |
6142 D0 : S0(2); // big decoder only | 5563 D0 : S0(2); // big decoder only |
6143 FPU : S4; | 5564 FPU : S4; |
6144 MEM : S3(2); // any mem | 5565 MEM : S3(2); // any mem |
6145 %} | 5566 %} |
6146 | 5567 |
6147 // Float load constant | 5568 // Float load constant |
6148 pipe_class fpu_reg_con(regD dst) %{ | 5569 pipe_class fpu_reg_con(regDPR dst) %{ |
6149 instruction_count(2); | 5570 instruction_count(2); |
6150 dst : S5(write); | 5571 dst : S5(write); |
6151 D0 : S0; // big decoder only for the load | 5572 D0 : S0; // big decoder only for the load |
6152 DECODE : S1; // any decoder for FPU POP | 5573 DECODE : S1; // any decoder for FPU POP |
6153 FPU : S4; | 5574 FPU : S4; |
6154 MEM : S3; // any mem | 5575 MEM : S3; // any mem |
6155 %} | 5576 %} |
6156 | 5577 |
6157 // Float load constant | 5578 // Float load constant |
6158 pipe_class fpu_reg_reg_con(regD dst, regD src) %{ | 5579 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{ |
6159 instruction_count(3); | 5580 instruction_count(3); |
6160 dst : S5(write); | 5581 dst : S5(write); |
6161 src : S3(read); | 5582 src : S3(read); |
6162 D0 : S0; // big decoder only for the load | 5583 D0 : S0; // big decoder only for the load |
6163 DECODE : S1(2); // any decoder for FPU POP | 5584 DECODE : S1(2); // any decoder for FPU POP |
6868 "FISTp $dst" %} | 6289 "FISTp $dst" %} |
6869 ins_encode(enc_loadL_volatile(mem,dst)); | 6290 ins_encode(enc_loadL_volatile(mem,dst)); |
6870 ins_pipe( fpu_reg_mem ); | 6291 ins_pipe( fpu_reg_mem ); |
6871 %} | 6292 %} |
6872 | 6293 |
6873 instruct loadLX_volatile(stackSlotL dst, memory mem, regXD tmp) %{ | 6294 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{ |
6874 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); | 6295 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); |
6875 match(Set dst (LoadL mem)); | 6296 match(Set dst (LoadL mem)); |
6876 effect(TEMP tmp); | 6297 effect(TEMP tmp); |
6877 ins_cost(180); | 6298 ins_cost(180); |
6878 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" | 6299 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" |
6879 "MOVSD $dst,$tmp" %} | 6300 "MOVSD $dst,$tmp" %} |
6880 ins_encode(enc_loadLX_volatile(mem, dst, tmp)); | 6301 ins_encode %{ |
6302 __ movdbl($tmp$$XMMRegister, $mem$$Address); | |
6303 __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister); | |
6304 %} | |
6881 ins_pipe( pipe_slow ); | 6305 ins_pipe( pipe_slow ); |
6882 %} | 6306 %} |
6883 | 6307 |
6884 instruct loadLX_reg_volatile(eRegL dst, memory mem, regXD tmp) %{ | 6308 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{ |
6885 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); | 6309 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); |
6886 match(Set dst (LoadL mem)); | 6310 match(Set dst (LoadL mem)); |
6887 effect(TEMP tmp); | 6311 effect(TEMP tmp); |
6888 ins_cost(160); | 6312 ins_cost(160); |
6889 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" | 6313 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" |
6890 "MOVD $dst.lo,$tmp\n\t" | 6314 "MOVD $dst.lo,$tmp\n\t" |
6891 "PSRLQ $tmp,32\n\t" | 6315 "PSRLQ $tmp,32\n\t" |
6892 "MOVD $dst.hi,$tmp" %} | 6316 "MOVD $dst.hi,$tmp" %} |
6893 ins_encode(enc_loadLX_reg_volatile(mem, dst, tmp)); | 6317 ins_encode %{ |
6318 __ movdbl($tmp$$XMMRegister, $mem$$Address); | |
6319 __ movdl($dst$$Register, $tmp$$XMMRegister); | |
6320 __ psrlq($tmp$$XMMRegister, 32); | |
6321 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); | |
6322 %} | |
6894 ins_pipe( pipe_slow ); | 6323 ins_pipe( pipe_slow ); |
6895 %} | 6324 %} |
6896 | 6325 |
6897 // Load Range | 6326 // Load Range |
6898 instruct loadRange(eRegI dst, memory mem) %{ | 6327 instruct loadRange(eRegI dst, memory mem) %{ |
6927 ins_encode( OpcP, RegMem(dst,mem)); | 6356 ins_encode( OpcP, RegMem(dst,mem)); |
6928 ins_pipe( ialu_reg_mem ); | 6357 ins_pipe( ialu_reg_mem ); |
6929 %} | 6358 %} |
6930 | 6359 |
6931 // Load Double | 6360 // Load Double |
6932 instruct loadD(regD dst, memory mem) %{ | 6361 instruct loadDPR(regDPR dst, memory mem) %{ |
6933 predicate(UseSSE<=1); | 6362 predicate(UseSSE<=1); |
6934 match(Set dst (LoadD mem)); | 6363 match(Set dst (LoadD mem)); |
6935 | 6364 |
6936 ins_cost(150); | 6365 ins_cost(150); |
6937 format %{ "FLD_D ST,$mem\n\t" | 6366 format %{ "FLD_D ST,$mem\n\t" |
6938 "FSTP $dst" %} | 6367 "FSTP $dst" %} |
6939 opcode(0xDD); /* DD /0 */ | 6368 opcode(0xDD); /* DD /0 */ |
6940 ins_encode( OpcP, RMopc_Mem(0x00,mem), | 6369 ins_encode( OpcP, RMopc_Mem(0x00,mem), |
6941 Pop_Reg_D(dst) ); | 6370 Pop_Reg_DPR(dst) ); |
6942 ins_pipe( fpu_reg_mem ); | 6371 ins_pipe( fpu_reg_mem ); |
6943 %} | 6372 %} |
6944 | 6373 |
6945 // Load Double to XMM | 6374 // Load Double to XMM |
6946 instruct loadXD(regXD dst, memory mem) %{ | 6375 instruct loadD(regD dst, memory mem) %{ |
6947 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); | 6376 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); |
6948 match(Set dst (LoadD mem)); | 6377 match(Set dst (LoadD mem)); |
6949 ins_cost(145); | 6378 ins_cost(145); |
6950 format %{ "MOVSD $dst,$mem" %} | 6379 format %{ "MOVSD $dst,$mem" %} |
6951 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x10), RegMem(dst,mem)); | 6380 ins_encode %{ |
6381 __ movdbl ($dst$$XMMRegister, $mem$$Address); | |
6382 %} | |
6952 ins_pipe( pipe_slow ); | 6383 ins_pipe( pipe_slow ); |
6953 %} | 6384 %} |
6954 | 6385 |
6955 instruct loadXD_partial(regXD dst, memory mem) %{ | 6386 instruct loadD_partial(regD dst, memory mem) %{ |
6956 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); | 6387 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); |
6957 match(Set dst (LoadD mem)); | 6388 match(Set dst (LoadD mem)); |
6958 ins_cost(145); | 6389 ins_cost(145); |
6959 format %{ "MOVLPD $dst,$mem" %} | 6390 format %{ "MOVLPD $dst,$mem" %} |
6960 ins_encode( Opcode(0x66), Opcode(0x0F), Opcode(0x12), RegMem(dst,mem)); | 6391 ins_encode %{ |
6392 __ movdbl ($dst$$XMMRegister, $mem$$Address); | |
6393 %} | |
6961 ins_pipe( pipe_slow ); | 6394 ins_pipe( pipe_slow ); |
6962 %} | 6395 %} |
6963 | 6396 |
6964 // Load to XMM register (single-precision floating point) | 6397 // Load to XMM register (single-precision floating point) |
6965 // MOVSS instruction | 6398 // MOVSS instruction |
6966 instruct loadX(regX dst, memory mem) %{ | 6399 instruct loadF(regF dst, memory mem) %{ |
6967 predicate(UseSSE>=1); | 6400 predicate(UseSSE>=1); |
6968 match(Set dst (LoadF mem)); | 6401 match(Set dst (LoadF mem)); |
6969 ins_cost(145); | 6402 ins_cost(145); |
6970 format %{ "MOVSS $dst,$mem" %} | 6403 format %{ "MOVSS $dst,$mem" %} |
6971 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x10), RegMem(dst,mem)); | 6404 ins_encode %{ |
6405 __ movflt ($dst$$XMMRegister, $mem$$Address); | |
6406 %} | |
6972 ins_pipe( pipe_slow ); | 6407 ins_pipe( pipe_slow ); |
6973 %} | 6408 %} |
6974 | 6409 |
6975 // Load Float | 6410 // Load Float |
6976 instruct loadF(regF dst, memory mem) %{ | 6411 instruct loadFPR(regFPR dst, memory mem) %{ |
6977 predicate(UseSSE==0); | 6412 predicate(UseSSE==0); |
6978 match(Set dst (LoadF mem)); | 6413 match(Set dst (LoadF mem)); |
6979 | 6414 |
6980 ins_cost(150); | 6415 ins_cost(150); |
6981 format %{ "FLD_S ST,$mem\n\t" | 6416 format %{ "FLD_S ST,$mem\n\t" |
6982 "FSTP $dst" %} | 6417 "FSTP $dst" %} |
6983 opcode(0xD9); /* D9 /0 */ | 6418 opcode(0xD9); /* D9 /0 */ |
6984 ins_encode( OpcP, RMopc_Mem(0x00,mem), | 6419 ins_encode( OpcP, RMopc_Mem(0x00,mem), |
6985 Pop_Reg_F(dst) ); | 6420 Pop_Reg_FPR(dst) ); |
6986 ins_pipe( fpu_reg_mem ); | 6421 ins_pipe( fpu_reg_mem ); |
6987 %} | 6422 %} |
6988 | 6423 |
6989 // Load Aligned Packed Byte to XMM register | 6424 // Load Aligned Packed Byte to XMM register |
6990 instruct loadA8B(regXD dst, memory mem) %{ | 6425 instruct loadA8B(regD dst, memory mem) %{ |
6991 predicate(UseSSE>=1); | 6426 predicate(UseSSE>=1); |
6992 match(Set dst (Load8B mem)); | 6427 match(Set dst (Load8B mem)); |
6993 ins_cost(125); | 6428 ins_cost(125); |
6994 format %{ "MOVQ $dst,$mem\t! packed8B" %} | 6429 format %{ "MOVQ $dst,$mem\t! packed8B" %} |
6995 ins_encode( movq_ld(dst, mem)); | 6430 ins_encode %{ |
6431 __ movq($dst$$XMMRegister, $mem$$Address); | |
6432 %} | |
6996 ins_pipe( pipe_slow ); | 6433 ins_pipe( pipe_slow ); |
6997 %} | 6434 %} |
6998 | 6435 |
6999 // Load Aligned Packed Short to XMM register | 6436 // Load Aligned Packed Short to XMM register |
7000 instruct loadA4S(regXD dst, memory mem) %{ | 6437 instruct loadA4S(regD dst, memory mem) %{ |
7001 predicate(UseSSE>=1); | 6438 predicate(UseSSE>=1); |
7002 match(Set dst (Load4S mem)); | 6439 match(Set dst (Load4S mem)); |
7003 ins_cost(125); | 6440 ins_cost(125); |
7004 format %{ "MOVQ $dst,$mem\t! packed4S" %} | 6441 format %{ "MOVQ $dst,$mem\t! packed4S" %} |
7005 ins_encode( movq_ld(dst, mem)); | 6442 ins_encode %{ |
6443 __ movq($dst$$XMMRegister, $mem$$Address); | |
6444 %} | |
7006 ins_pipe( pipe_slow ); | 6445 ins_pipe( pipe_slow ); |
7007 %} | 6446 %} |
7008 | 6447 |
7009 // Load Aligned Packed Char to XMM register | 6448 // Load Aligned Packed Char to XMM register |
7010 instruct loadA4C(regXD dst, memory mem) %{ | 6449 instruct loadA4C(regD dst, memory mem) %{ |
7011 predicate(UseSSE>=1); | 6450 predicate(UseSSE>=1); |
7012 match(Set dst (Load4C mem)); | 6451 match(Set dst (Load4C mem)); |
7013 ins_cost(125); | 6452 ins_cost(125); |
7014 format %{ "MOVQ $dst,$mem\t! packed4C" %} | 6453 format %{ "MOVQ $dst,$mem\t! packed4C" %} |
7015 ins_encode( movq_ld(dst, mem)); | 6454 ins_encode %{ |
6455 __ movq($dst$$XMMRegister, $mem$$Address); | |
6456 %} | |
7016 ins_pipe( pipe_slow ); | 6457 ins_pipe( pipe_slow ); |
7017 %} | 6458 %} |
7018 | 6459 |
7019 // Load Aligned Packed Integer to XMM register | 6460 // Load Aligned Packed Integer to XMM register |
7020 instruct load2IU(regXD dst, memory mem) %{ | 6461 instruct load2IU(regD dst, memory mem) %{ |
7021 predicate(UseSSE>=1); | 6462 predicate(UseSSE>=1); |
7022 match(Set dst (Load2I mem)); | 6463 match(Set dst (Load2I mem)); |
7023 ins_cost(125); | 6464 ins_cost(125); |
7024 format %{ "MOVQ $dst,$mem\t! packed2I" %} | 6465 format %{ "MOVQ $dst,$mem\t! packed2I" %} |
7025 ins_encode( movq_ld(dst, mem)); | 6466 ins_encode %{ |
6467 __ movq($dst$$XMMRegister, $mem$$Address); | |
6468 %} | |
7026 ins_pipe( pipe_slow ); | 6469 ins_pipe( pipe_slow ); |
7027 %} | 6470 %} |
7028 | 6471 |
7029 // Load Aligned Packed Single to XMM | 6472 // Load Aligned Packed Single to XMM |
7030 instruct loadA2F(regXD dst, memory mem) %{ | 6473 instruct loadA2F(regD dst, memory mem) %{ |
7031 predicate(UseSSE>=1); | 6474 predicate(UseSSE>=1); |
7032 match(Set dst (Load2F mem)); | 6475 match(Set dst (Load2F mem)); |
7033 ins_cost(145); | 6476 ins_cost(145); |
7034 format %{ "MOVQ $dst,$mem\t! packed2F" %} | 6477 format %{ "MOVQ $dst,$mem\t! packed2F" %} |
7035 ins_encode( movq_ld(dst, mem)); | 6478 ins_encode %{ |
6479 __ movq($dst$$XMMRegister, $mem$$Address); | |
6480 %} | |
7036 ins_pipe( pipe_slow ); | 6481 ins_pipe( pipe_slow ); |
7037 %} | 6482 %} |
7038 | 6483 |
7039 // Load Effective Address | 6484 // Load Effective Address |
7040 instruct leaP8(eRegP dst, indOffset8 mem) %{ | 6485 instruct leaP8(eRegP dst, indOffset8 mem) %{ |
7137 opcode(0x33,0x33); | 6582 opcode(0x33,0x33); |
7138 ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) ); | 6583 ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) ); |
7139 ins_pipe( ialu_reg_long ); | 6584 ins_pipe( ialu_reg_long ); |
7140 %} | 6585 %} |
7141 | 6586 |
6587 // The instruction usage is guarded by predicate in operand immFPR(). | |
6588 instruct loadConFPR(regFPR dst, immFPR con) %{ | |
6589 match(Set dst con); | |
6590 ins_cost(125); | |
6591 format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t" | |
6592 "FSTP $dst" %} | |
6593 ins_encode %{ | |
6594 __ fld_s($constantaddress($con)); | |
6595 __ fstp_d($dst$$reg); | |
6596 %} | |
6597 ins_pipe(fpu_reg_con); | |
6598 %} | |
6599 | |
6600 // The instruction usage is guarded by predicate in operand immFPR0(). | |
6601 instruct loadConFPR0(regFPR dst, immFPR0 con) %{ | |
6602 match(Set dst con); | |
6603 ins_cost(125); | |
6604 format %{ "FLDZ ST\n\t" | |
6605 "FSTP $dst" %} | |
6606 ins_encode %{ | |
6607 __ fldz(); | |
6608 __ fstp_d($dst$$reg); | |
6609 %} | |
6610 ins_pipe(fpu_reg_con); | |
6611 %} | |
6612 | |
6613 // The instruction usage is guarded by predicate in operand immFPR1(). | |
6614 instruct loadConFPR1(regFPR dst, immFPR1 con) %{ | |
6615 match(Set dst con); | |
6616 ins_cost(125); | |
6617 format %{ "FLD1 ST\n\t" | |
6618 "FSTP $dst" %} | |
6619 ins_encode %{ | |
6620 __ fld1(); | |
6621 __ fstp_d($dst$$reg); | |
6622 %} | |
6623 ins_pipe(fpu_reg_con); | |
6624 %} | |
6625 | |
7142 // The instruction usage is guarded by predicate in operand immF(). | 6626 // The instruction usage is guarded by predicate in operand immF(). |
7143 instruct loadConF(regF dst, immF con) %{ | 6627 instruct loadConF(regF dst, immF con) %{ |
7144 match(Set dst con); | 6628 match(Set dst con); |
7145 ins_cost(125); | 6629 ins_cost(125); |
7146 format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t" | 6630 format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} |
7147 "FSTP $dst" %} | 6631 ins_encode %{ |
7148 ins_encode %{ | 6632 __ movflt($dst$$XMMRegister, $constantaddress($con)); |
7149 __ fld_s($constantaddress($con)); | 6633 %} |
7150 __ fstp_d($dst$$reg); | 6634 ins_pipe(pipe_slow); |
7151 %} | |
7152 ins_pipe(fpu_reg_con); | |
7153 %} | 6635 %} |
7154 | 6636 |
7155 // The instruction usage is guarded by predicate in operand immF0(). | 6637 // The instruction usage is guarded by predicate in operand immF0(). |
7156 instruct loadConF0(regF dst, immF0 con) %{ | 6638 instruct loadConF0(regF dst, immF0 src) %{ |
7157 match(Set dst con); | |
7158 ins_cost(125); | |
7159 format %{ "FLDZ ST\n\t" | |
7160 "FSTP $dst" %} | |
7161 ins_encode %{ | |
7162 __ fldz(); | |
7163 __ fstp_d($dst$$reg); | |
7164 %} | |
7165 ins_pipe(fpu_reg_con); | |
7166 %} | |
7167 | |
7168 // The instruction usage is guarded by predicate in operand immF1(). | |
7169 instruct loadConF1(regF dst, immF1 con) %{ | |
7170 match(Set dst con); | |
7171 ins_cost(125); | |
7172 format %{ "FLD1 ST\n\t" | |
7173 "FSTP $dst" %} | |
7174 ins_encode %{ | |
7175 __ fld1(); | |
7176 __ fstp_d($dst$$reg); | |
7177 %} | |
7178 ins_pipe(fpu_reg_con); | |
7179 %} | |
7180 | |
7181 // The instruction usage is guarded by predicate in operand immXF(). | |
7182 instruct loadConX(regX dst, immXF con) %{ | |
7183 match(Set dst con); | |
7184 ins_cost(125); | |
7185 format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} | |
7186 ins_encode %{ | |
7187 __ movflt($dst$$XMMRegister, $constantaddress($con)); | |
7188 %} | |
7189 ins_pipe(pipe_slow); | |
7190 %} | |
7191 | |
7192 // The instruction usage is guarded by predicate in operand immXF0(). | |
7193 instruct loadConX0(regX dst, immXF0 src) %{ | |
7194 match(Set dst src); | 6639 match(Set dst src); |
7195 ins_cost(100); | 6640 ins_cost(100); |
7196 format %{ "XORPS $dst,$dst\t# float 0.0" %} | 6641 format %{ "XORPS $dst,$dst\t# float 0.0" %} |
7197 ins_encode %{ | 6642 ins_encode %{ |
7198 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); | 6643 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); |
7199 %} | 6644 %} |
7200 ins_pipe(pipe_slow); | 6645 ins_pipe(pipe_slow); |
6646 %} | |
6647 | |
6648 // The instruction usage is guarded by predicate in operand immDPR(). | |
6649 instruct loadConDPR(regDPR dst, immDPR con) %{ | |
6650 match(Set dst con); | |
6651 ins_cost(125); | |
6652 | |
6653 format %{ "FLD_D ST,[$constantaddress]\t# load from constant table: double=$con\n\t" | |
6654 "FSTP $dst" %} | |
6655 ins_encode %{ | |
6656 __ fld_d($constantaddress($con)); | |
6657 __ fstp_d($dst$$reg); | |
6658 %} | |
6659 ins_pipe(fpu_reg_con); | |
6660 %} | |
6661 | |
6662 // The instruction usage is guarded by predicate in operand immDPR0(). | |
6663 instruct loadConDPR0(regDPR dst, immDPR0 con) %{ | |
6664 match(Set dst con); | |
6665 ins_cost(125); | |
6666 | |
6667 format %{ "FLDZ ST\n\t" | |
6668 "FSTP $dst" %} | |
6669 ins_encode %{ | |
6670 __ fldz(); | |
6671 __ fstp_d($dst$$reg); | |
6672 %} | |
6673 ins_pipe(fpu_reg_con); | |
6674 %} | |
6675 | |
6676 // The instruction usage is guarded by predicate in operand immDPR1(). | |
6677 instruct loadConDPR1(regDPR dst, immDPR1 con) %{ | |
6678 match(Set dst con); | |
6679 ins_cost(125); | |
6680 | |
6681 format %{ "FLD1 ST\n\t" | |
6682 "FSTP $dst" %} | |
6683 ins_encode %{ | |
6684 __ fld1(); | |
6685 __ fstp_d($dst$$reg); | |
6686 %} | |
6687 ins_pipe(fpu_reg_con); | |
7201 %} | 6688 %} |
7202 | 6689 |
7203 // The instruction usage is guarded by predicate in operand immD(). | 6690 // The instruction usage is guarded by predicate in operand immD(). |
7204 instruct loadConD(regD dst, immD con) %{ | 6691 instruct loadConD(regD dst, immD con) %{ |
7205 match(Set dst con); | 6692 match(Set dst con); |
7206 ins_cost(125); | 6693 ins_cost(125); |
7207 | 6694 format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} |
7208 format %{ "FLD_D ST,[$constantaddress]\t# load from constant table: double=$con\n\t" | 6695 ins_encode %{ |
7209 "FSTP $dst" %} | 6696 __ movdbl($dst$$XMMRegister, $constantaddress($con)); |
7210 ins_encode %{ | 6697 %} |
7211 __ fld_d($constantaddress($con)); | 6698 ins_pipe(pipe_slow); |
7212 __ fstp_d($dst$$reg); | |
7213 %} | |
7214 ins_pipe(fpu_reg_con); | |
7215 %} | 6699 %} |
7216 | 6700 |
7217 // The instruction usage is guarded by predicate in operand immD0(). | 6701 // The instruction usage is guarded by predicate in operand immD0(). |
7218 instruct loadConD0(regD dst, immD0 con) %{ | 6702 instruct loadConD0(regD dst, immD0 src) %{ |
7219 match(Set dst con); | |
7220 ins_cost(125); | |
7221 | |
7222 format %{ "FLDZ ST\n\t" | |
7223 "FSTP $dst" %} | |
7224 ins_encode %{ | |
7225 __ fldz(); | |
7226 __ fstp_d($dst$$reg); | |
7227 %} | |
7228 ins_pipe(fpu_reg_con); | |
7229 %} | |
7230 | |
7231 // The instruction usage is guarded by predicate in operand immD1(). | |
7232 instruct loadConD1(regD dst, immD1 con) %{ | |
7233 match(Set dst con); | |
7234 ins_cost(125); | |
7235 | |
7236 format %{ "FLD1 ST\n\t" | |
7237 "FSTP $dst" %} | |
7238 ins_encode %{ | |
7239 __ fld1(); | |
7240 __ fstp_d($dst$$reg); | |
7241 %} | |
7242 ins_pipe(fpu_reg_con); | |
7243 %} | |
7244 | |
7245 // The instruction usage is guarded by predicate in operand immXD(). | |
7246 instruct loadConXD(regXD dst, immXD con) %{ | |
7247 match(Set dst con); | |
7248 ins_cost(125); | |
7249 format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} | |
7250 ins_encode %{ | |
7251 __ movdbl($dst$$XMMRegister, $constantaddress($con)); | |
7252 %} | |
7253 ins_pipe(pipe_slow); | |
7254 %} | |
7255 | |
7256 // The instruction usage is guarded by predicate in operand immXD0(). | |
7257 instruct loadConXD0(regXD dst, immXD0 src) %{ | |
7258 match(Set dst src); | 6703 match(Set dst src); |
7259 ins_cost(100); | 6704 ins_cost(100); |
7260 format %{ "XORPD $dst,$dst\t# double 0.0" %} | 6705 format %{ "XORPD $dst,$dst\t# double 0.0" %} |
7261 ins_encode( Opcode(0x66), Opcode(0x0F), Opcode(0x57), RegReg(dst,dst)); | 6706 ins_encode %{ |
6707 __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister); | |
6708 %} | |
7262 ins_pipe( pipe_slow ); | 6709 ins_pipe( pipe_slow ); |
7263 %} | 6710 %} |
7264 | 6711 |
7265 // Load Stack Slot | 6712 // Load Stack Slot |
7266 instruct loadSSI(eRegI dst, stackSlotI src) %{ | 6713 instruct loadSSI(eRegI dst, stackSlotI src) %{ |
7294 ins_encode( OpcP, RegMem(dst,src)); | 6741 ins_encode( OpcP, RegMem(dst,src)); |
7295 ins_pipe( ialu_reg_mem ); | 6742 ins_pipe( ialu_reg_mem ); |
7296 %} | 6743 %} |
7297 | 6744 |
7298 // Load Stack Slot | 6745 // Load Stack Slot |
7299 instruct loadSSF(regF dst, stackSlotF src) %{ | 6746 instruct loadSSF(regFPR dst, stackSlotF src) %{ |
7300 match(Set dst src); | 6747 match(Set dst src); |
7301 ins_cost(125); | 6748 ins_cost(125); |
7302 | 6749 |
7303 format %{ "FLD_S $src\n\t" | 6750 format %{ "FLD_S $src\n\t" |
7304 "FSTP $dst" %} | 6751 "FSTP $dst" %} |
7305 opcode(0xD9); /* D9 /0, FLD m32real */ | 6752 opcode(0xD9); /* D9 /0, FLD m32real */ |
7306 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), | 6753 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), |
7307 Pop_Reg_F(dst) ); | 6754 Pop_Reg_FPR(dst) ); |
7308 ins_pipe( fpu_reg_mem ); | 6755 ins_pipe( fpu_reg_mem ); |
7309 %} | 6756 %} |
7310 | 6757 |
7311 // Load Stack Slot | 6758 // Load Stack Slot |
7312 instruct loadSSD(regD dst, stackSlotD src) %{ | 6759 instruct loadSSD(regDPR dst, stackSlotD src) %{ |
7313 match(Set dst src); | 6760 match(Set dst src); |
7314 ins_cost(125); | 6761 ins_cost(125); |
7315 | 6762 |
7316 format %{ "FLD_D $src\n\t" | 6763 format %{ "FLD_D $src\n\t" |
7317 "FSTP $dst" %} | 6764 "FSTP $dst" %} |
7318 opcode(0xDD); /* DD /0, FLD m64real */ | 6765 opcode(0xDD); /* DD /0, FLD m64real */ |
7319 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), | 6766 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), |
7320 Pop_Reg_D(dst) ); | 6767 Pop_Reg_DPR(dst) ); |
7321 ins_pipe( fpu_reg_mem ); | 6768 ins_pipe( fpu_reg_mem ); |
7322 %} | 6769 %} |
7323 | 6770 |
7324 // Prefetch instructions. | 6771 // Prefetch instructions. |
7325 // Must be safe to execute with invalid address (cannot fault). | 6772 // Must be safe to execute with invalid address (cannot fault). |
7550 opcode(0x3B); | 6997 opcode(0x3B); |
7551 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src)); | 6998 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src)); |
7552 ins_pipe( fpu_reg_mem ); | 6999 ins_pipe( fpu_reg_mem ); |
7553 %} | 7000 %} |
7554 | 7001 |
7555 instruct storeLX_volatile(memory mem, stackSlotL src, regXD tmp, eFlagsReg cr) %{ | 7002 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{ |
7556 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); | 7003 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); |
7557 match(Set mem (StoreL mem src)); | 7004 match(Set mem (StoreL mem src)); |
7558 effect( TEMP tmp, KILL cr ); | 7005 effect( TEMP tmp, KILL cr ); |
7559 ins_cost(380); | 7006 ins_cost(380); |
7560 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" | 7007 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" |
7561 "MOVSD $tmp,$src\n\t" | 7008 "MOVSD $tmp,$src\n\t" |
7562 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} | 7009 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} |
7563 opcode(0x3B); | 7010 ins_encode %{ |
7564 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeLX_volatile(mem, src, tmp)); | 7011 __ cmpl(rax, $mem$$Address); |
7012 __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp)); | |
7013 __ movdbl($mem$$Address, $tmp$$XMMRegister); | |
7014 %} | |
7565 ins_pipe( pipe_slow ); | 7015 ins_pipe( pipe_slow ); |
7566 %} | 7016 %} |
7567 | 7017 |
7568 instruct storeLX_reg_volatile(memory mem, eRegL src, regXD tmp2, regXD tmp, eFlagsReg cr) %{ | 7018 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{ |
7569 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); | 7019 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); |
7570 match(Set mem (StoreL mem src)); | 7020 match(Set mem (StoreL mem src)); |
7571 effect( TEMP tmp2 , TEMP tmp, KILL cr ); | 7021 effect( TEMP tmp2 , TEMP tmp, KILL cr ); |
7572 ins_cost(360); | 7022 ins_cost(360); |
7573 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" | 7023 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" |
7574 "MOVD $tmp,$src.lo\n\t" | 7024 "MOVD $tmp,$src.lo\n\t" |
7575 "MOVD $tmp2,$src.hi\n\t" | 7025 "MOVD $tmp2,$src.hi\n\t" |
7576 "PUNPCKLDQ $tmp,$tmp2\n\t" | 7026 "PUNPCKLDQ $tmp,$tmp2\n\t" |
7577 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} | 7027 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} |
7578 opcode(0x3B); | 7028 ins_encode %{ |
7579 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeLX_reg_volatile(mem, src, tmp, tmp2)); | 7029 __ cmpl(rax, $mem$$Address); |
7030 __ movdl($tmp$$XMMRegister, $src$$Register); | |
7031 __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register)); | |
7032 __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister); | |
7033 __ movdbl($mem$$Address, $tmp$$XMMRegister); | |
7034 %} | |
7580 ins_pipe( pipe_slow ); | 7035 ins_pipe( pipe_slow ); |
7581 %} | 7036 %} |
7582 | 7037 |
7583 // Store Pointer; for storing unknown oops and raw pointers | 7038 // Store Pointer; for storing unknown oops and raw pointers |
7584 instruct storeP(memory mem, anyRegP src) %{ | 7039 instruct storeP(memory mem, anyRegP src) %{ |
7636 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); | 7091 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); |
7637 ins_pipe( ialu_mem_imm ); | 7092 ins_pipe( ialu_mem_imm ); |
7638 %} | 7093 %} |
7639 | 7094 |
7640 // Store Aligned Packed Byte XMM register to memory | 7095 // Store Aligned Packed Byte XMM register to memory |
7641 instruct storeA8B(memory mem, regXD src) %{ | 7096 instruct storeA8B(memory mem, regD src) %{ |
7642 predicate(UseSSE>=1); | 7097 predicate(UseSSE>=1); |
7643 match(Set mem (Store8B mem src)); | 7098 match(Set mem (Store8B mem src)); |
7644 ins_cost(145); | 7099 ins_cost(145); |
7645 format %{ "MOVQ $mem,$src\t! packed8B" %} | 7100 format %{ "MOVQ $mem,$src\t! packed8B" %} |
7646 ins_encode( movq_st(mem, src)); | 7101 ins_encode %{ |
7102 __ movq($mem$$Address, $src$$XMMRegister); | |
7103 %} | |
7647 ins_pipe( pipe_slow ); | 7104 ins_pipe( pipe_slow ); |
7648 %} | 7105 %} |
7649 | 7106 |
7650 // Store Aligned Packed Char/Short XMM register to memory | 7107 // Store Aligned Packed Char/Short XMM register to memory |
7651 instruct storeA4C(memory mem, regXD src) %{ | 7108 instruct storeA4C(memory mem, regD src) %{ |
7652 predicate(UseSSE>=1); | 7109 predicate(UseSSE>=1); |
7653 match(Set mem (Store4C mem src)); | 7110 match(Set mem (Store4C mem src)); |
7654 ins_cost(145); | 7111 ins_cost(145); |
7655 format %{ "MOVQ $mem,$src\t! packed4C" %} | 7112 format %{ "MOVQ $mem,$src\t! packed4C" %} |
7656 ins_encode( movq_st(mem, src)); | 7113 ins_encode %{ |
7114 __ movq($mem$$Address, $src$$XMMRegister); | |
7115 %} | |
7657 ins_pipe( pipe_slow ); | 7116 ins_pipe( pipe_slow ); |
7658 %} | 7117 %} |
7659 | 7118 |
7660 // Store Aligned Packed Integer XMM register to memory | 7119 // Store Aligned Packed Integer XMM register to memory |
7661 instruct storeA2I(memory mem, regXD src) %{ | 7120 instruct storeA2I(memory mem, regD src) %{ |
7662 predicate(UseSSE>=1); | 7121 predicate(UseSSE>=1); |
7663 match(Set mem (Store2I mem src)); | 7122 match(Set mem (Store2I mem src)); |
7664 ins_cost(145); | 7123 ins_cost(145); |
7665 format %{ "MOVQ $mem,$src\t! packed2I" %} | 7124 format %{ "MOVQ $mem,$src\t! packed2I" %} |
7666 ins_encode( movq_st(mem, src)); | 7125 ins_encode %{ |
7126 __ movq($mem$$Address, $src$$XMMRegister); | |
7127 %} | |
7667 ins_pipe( pipe_slow ); | 7128 ins_pipe( pipe_slow ); |
7668 %} | 7129 %} |
7669 | 7130 |
7670 // Store CMS card-mark Immediate | 7131 // Store CMS card-mark Immediate |
7671 instruct storeImmCM(memory mem, immI8 src) %{ | 7132 instruct storeImmCM(memory mem, immI8 src) %{ |
7677 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); | 7138 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); |
7678 ins_pipe( ialu_mem_imm ); | 7139 ins_pipe( ialu_mem_imm ); |
7679 %} | 7140 %} |
7680 | 7141 |
7681 // Store Double | 7142 // Store Double |
7682 instruct storeD( memory mem, regDPR1 src) %{ | 7143 instruct storeDPR( memory mem, regDPR1 src) %{ |
7683 predicate(UseSSE<=1); | 7144 predicate(UseSSE<=1); |
7684 match(Set mem (StoreD mem src)); | 7145 match(Set mem (StoreD mem src)); |
7685 | 7146 |
7686 ins_cost(100); | 7147 ins_cost(100); |
7687 format %{ "FST_D $mem,$src" %} | 7148 format %{ "FST_D $mem,$src" %} |
7688 opcode(0xDD); /* DD /2 */ | 7149 opcode(0xDD); /* DD /2 */ |
7689 ins_encode( enc_FP_store(mem,src) ); | 7150 ins_encode( enc_FPR_store(mem,src) ); |
7690 ins_pipe( fpu_mem_reg ); | 7151 ins_pipe( fpu_mem_reg ); |
7691 %} | 7152 %} |
7692 | 7153 |
7693 // Store double does rounding on x86 | 7154 // Store double does rounding on x86 |
7694 instruct storeD_rounded( memory mem, regDPR1 src) %{ | 7155 instruct storeDPR_rounded( memory mem, regDPR1 src) %{ |
7695 predicate(UseSSE<=1); | 7156 predicate(UseSSE<=1); |
7696 match(Set mem (StoreD mem (RoundDouble src))); | 7157 match(Set mem (StoreD mem (RoundDouble src))); |
7697 | 7158 |
7698 ins_cost(100); | 7159 ins_cost(100); |
7699 format %{ "FST_D $mem,$src\t# round" %} | 7160 format %{ "FST_D $mem,$src\t# round" %} |
7700 opcode(0xDD); /* DD /2 */ | 7161 opcode(0xDD); /* DD /2 */ |
7701 ins_encode( enc_FP_store(mem,src) ); | 7162 ins_encode( enc_FPR_store(mem,src) ); |
7702 ins_pipe( fpu_mem_reg ); | 7163 ins_pipe( fpu_mem_reg ); |
7703 %} | 7164 %} |
7704 | 7165 |
7705 // Store XMM register to memory (double-precision floating points) | 7166 // Store XMM register to memory (double-precision floating points) |
7706 // MOVSD instruction | 7167 // MOVSD instruction |
7707 instruct storeXD(memory mem, regXD src) %{ | 7168 instruct storeD(memory mem, regD src) %{ |
7708 predicate(UseSSE>=2); | 7169 predicate(UseSSE>=2); |
7709 match(Set mem (StoreD mem src)); | 7170 match(Set mem (StoreD mem src)); |
7710 ins_cost(95); | 7171 ins_cost(95); |
7711 format %{ "MOVSD $mem,$src" %} | 7172 format %{ "MOVSD $mem,$src" %} |
7712 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x11), RegMem(src, mem)); | 7173 ins_encode %{ |
7174 __ movdbl($mem$$Address, $src$$XMMRegister); | |
7175 %} | |
7713 ins_pipe( pipe_slow ); | 7176 ins_pipe( pipe_slow ); |
7714 %} | 7177 %} |
7715 | 7178 |
7716 // Store XMM register to memory (single-precision floating point) | 7179 // Store XMM register to memory (single-precision floating point) |
7717 // MOVSS instruction | 7180 // MOVSS instruction |
7718 instruct storeX(memory mem, regX src) %{ | 7181 instruct storeF(memory mem, regF src) %{ |
7719 predicate(UseSSE>=1); | 7182 predicate(UseSSE>=1); |
7720 match(Set mem (StoreF mem src)); | 7183 match(Set mem (StoreF mem src)); |
7721 ins_cost(95); | 7184 ins_cost(95); |
7722 format %{ "MOVSS $mem,$src" %} | 7185 format %{ "MOVSS $mem,$src" %} |
7723 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x11), RegMem(src, mem)); | 7186 ins_encode %{ |
7187 __ movflt($mem$$Address, $src$$XMMRegister); | |
7188 %} | |
7724 ins_pipe( pipe_slow ); | 7189 ins_pipe( pipe_slow ); |
7725 %} | 7190 %} |
7726 | 7191 |
7727 // Store Aligned Packed Single Float XMM register to memory | 7192 // Store Aligned Packed Single Float XMM register to memory |
7728 instruct storeA2F(memory mem, regXD src) %{ | 7193 instruct storeA2F(memory mem, regD src) %{ |
7729 predicate(UseSSE>=1); | 7194 predicate(UseSSE>=1); |
7730 match(Set mem (Store2F mem src)); | 7195 match(Set mem (Store2F mem src)); |
7731 ins_cost(145); | 7196 ins_cost(145); |
7732 format %{ "MOVQ $mem,$src\t! packed2F" %} | 7197 format %{ "MOVQ $mem,$src\t! packed2F" %} |
7733 ins_encode( movq_st(mem, src)); | 7198 ins_encode %{ |
7199 __ movq($mem$$Address, $src$$XMMRegister); | |
7200 %} | |
7734 ins_pipe( pipe_slow ); | 7201 ins_pipe( pipe_slow ); |
7735 %} | 7202 %} |
7736 | 7203 |
7737 // Store Float | 7204 // Store Float |
7738 instruct storeF( memory mem, regFPR1 src) %{ | 7205 instruct storeFPR( memory mem, regFPR1 src) %{ |
7739 predicate(UseSSE==0); | 7206 predicate(UseSSE==0); |
7740 match(Set mem (StoreF mem src)); | 7207 match(Set mem (StoreF mem src)); |
7741 | 7208 |
7742 ins_cost(100); | 7209 ins_cost(100); |
7743 format %{ "FST_S $mem,$src" %} | 7210 format %{ "FST_S $mem,$src" %} |
7744 opcode(0xD9); /* D9 /2 */ | 7211 opcode(0xD9); /* D9 /2 */ |
7745 ins_encode( enc_FP_store(mem,src) ); | 7212 ins_encode( enc_FPR_store(mem,src) ); |
7746 ins_pipe( fpu_mem_reg ); | 7213 ins_pipe( fpu_mem_reg ); |
7747 %} | 7214 %} |
7748 | 7215 |
7749 // Store Float does rounding on x86 | 7216 // Store Float does rounding on x86 |
7750 instruct storeF_rounded( memory mem, regFPR1 src) %{ | 7217 instruct storeFPR_rounded( memory mem, regFPR1 src) %{ |
7751 predicate(UseSSE==0); | 7218 predicate(UseSSE==0); |
7752 match(Set mem (StoreF mem (RoundFloat src))); | 7219 match(Set mem (StoreF mem (RoundFloat src))); |
7753 | 7220 |
7754 ins_cost(100); | 7221 ins_cost(100); |
7755 format %{ "FST_S $mem,$src\t# round" %} | 7222 format %{ "FST_S $mem,$src\t# round" %} |
7756 opcode(0xD9); /* D9 /2 */ | 7223 opcode(0xD9); /* D9 /2 */ |
7757 ins_encode( enc_FP_store(mem,src) ); | 7224 ins_encode( enc_FPR_store(mem,src) ); |
7758 ins_pipe( fpu_mem_reg ); | 7225 ins_pipe( fpu_mem_reg ); |
7759 %} | 7226 %} |
7760 | 7227 |
7761 // Store Float does rounding on x86 | 7228 // Store Float does rounding on x86 |
7762 instruct storeF_Drounded( memory mem, regDPR1 src) %{ | 7229 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{ |
7763 predicate(UseSSE<=1); | 7230 predicate(UseSSE<=1); |
7764 match(Set mem (StoreF mem (ConvD2F src))); | 7231 match(Set mem (StoreF mem (ConvD2F src))); |
7765 | 7232 |
7766 ins_cost(100); | 7233 ins_cost(100); |
7767 format %{ "FST_S $mem,$src\t# D-round" %} | 7234 format %{ "FST_S $mem,$src\t# D-round" %} |
7768 opcode(0xD9); /* D9 /2 */ | 7235 opcode(0xD9); /* D9 /2 */ |
7769 ins_encode( enc_FP_store(mem,src) ); | 7236 ins_encode( enc_FPR_store(mem,src) ); |
7770 ins_pipe( fpu_mem_reg ); | 7237 ins_pipe( fpu_mem_reg ); |
7771 %} | 7238 %} |
7772 | 7239 |
7773 // Store immediate Float value (it is faster than store from FPU register) | 7240 // Store immediate Float value (it is faster than store from FPU register) |
7241 // The instruction usage is guarded by predicate in operand immFPR(). | |
7242 instruct storeFPR_imm( memory mem, immFPR src) %{ | |
7243 match(Set mem (StoreF mem src)); | |
7244 | |
7245 ins_cost(50); | |
7246 format %{ "MOV $mem,$src\t# store float" %} | |
7247 opcode(0xC7); /* C7 /0 */ | |
7248 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32FPR_as_bits( src )); | |
7249 ins_pipe( ialu_mem_imm ); | |
7250 %} | |
7251 | |
7252 // Store immediate Float value (it is faster than store from XMM register) | |
7774 // The instruction usage is guarded by predicate in operand immF(). | 7253 // The instruction usage is guarded by predicate in operand immF(). |
7775 instruct storeF_imm( memory mem, immF src) %{ | 7254 instruct storeF_imm( memory mem, immF src) %{ |
7776 match(Set mem (StoreF mem src)); | 7255 match(Set mem (StoreF mem src)); |
7777 | 7256 |
7778 ins_cost(50); | 7257 ins_cost(50); |
7779 format %{ "MOV $mem,$src\t# store float" %} | 7258 format %{ "MOV $mem,$src\t# store float" %} |
7780 opcode(0xC7); /* C7 /0 */ | 7259 opcode(0xC7); /* C7 /0 */ |
7781 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits( src )); | 7260 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits( src )); |
7782 ins_pipe( ialu_mem_imm ); | |
7783 %} | |
7784 | |
7785 // Store immediate Float value (it is faster than store from XMM register) | |
7786 // The instruction usage is guarded by predicate in operand immXF(). | |
7787 instruct storeX_imm( memory mem, immXF src) %{ | |
7788 match(Set mem (StoreF mem src)); | |
7789 | |
7790 ins_cost(50); | |
7791 format %{ "MOV $mem,$src\t# store float" %} | |
7792 opcode(0xC7); /* C7 /0 */ | |
7793 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32XF_as_bits( src )); | |
7794 ins_pipe( ialu_mem_imm ); | 7261 ins_pipe( ialu_mem_imm ); |
7795 %} | 7262 %} |
7796 | 7263 |
7797 // Store Integer to stack slot | 7264 // Store Integer to stack slot |
7798 instruct storeSSI(stackSlotI dst, eRegI src) %{ | 7265 instruct storeSSI(stackSlotI dst, eRegI src) %{ |
7895 predicate(Matcher::post_store_load_barrier(n)); | 7362 predicate(Matcher::post_store_load_barrier(n)); |
7896 ins_cost(0); | 7363 ins_cost(0); |
7897 | 7364 |
7898 size(0); | 7365 size(0); |
7899 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %} | 7366 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %} |
7367 ins_encode( ); | |
7368 ins_pipe(empty); | |
7369 %} | |
7370 | |
7371 instruct membar_storestore() %{ | |
7372 match(MemBarStoreStore); | |
7373 ins_cost(0); | |
7374 | |
7375 size(0); | |
7376 format %{ "MEMBAR-storestore (empty encoding)" %} | |
7900 ins_encode( ); | 7377 ins_encode( ); |
7901 ins_pipe(empty); | 7378 ins_pipe(empty); |
7902 %} | 7379 %} |
7903 | 7380 |
7904 //----------Move Instructions-------------------------------------------------- | 7381 //----------Move Instructions-------------------------------------------------- |
8086 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); | 7563 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); |
8087 // ins_pipe( pipe_cmov_mem ); | 7564 // ins_pipe( pipe_cmov_mem ); |
8088 //%} | 7565 //%} |
8089 | 7566 |
8090 // Conditional move | 7567 // Conditional move |
8091 instruct fcmovD_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regD src) %{ | 7568 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{ |
8092 predicate(UseSSE<=1); | 7569 predicate(UseSSE<=1); |
8093 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); | 7570 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); |
8094 ins_cost(200); | 7571 ins_cost(200); |
8095 format %{ "FCMOV$cop $dst,$src\t# double" %} | 7572 format %{ "FCMOV$cop $dst,$src\t# double" %} |
8096 opcode(0xDA); | 7573 opcode(0xDA); |
8097 ins_encode( enc_cmov_d(cop,src) ); | 7574 ins_encode( enc_cmov_dpr(cop,src) ); |
8098 ins_pipe( pipe_cmovD_reg ); | 7575 ins_pipe( pipe_cmovDPR_reg ); |
8099 %} | 7576 %} |
8100 | 7577 |
8101 // Conditional move | 7578 // Conditional move |
8102 instruct fcmovF_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regF src) %{ | 7579 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{ |
8103 predicate(UseSSE==0); | 7580 predicate(UseSSE==0); |
8104 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); | 7581 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); |
8105 ins_cost(200); | 7582 ins_cost(200); |
8106 format %{ "FCMOV$cop $dst,$src\t# float" %} | 7583 format %{ "FCMOV$cop $dst,$src\t# float" %} |
8107 opcode(0xDA); | 7584 opcode(0xDA); |
8108 ins_encode( enc_cmov_d(cop,src) ); | 7585 ins_encode( enc_cmov_dpr(cop,src) ); |
8109 ins_pipe( pipe_cmovD_reg ); | 7586 ins_pipe( pipe_cmovDPR_reg ); |
8110 %} | 7587 %} |
8111 | 7588 |
8112 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. | 7589 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. |
8113 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{ | 7590 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{ |
8114 predicate(UseSSE<=1); | 7591 predicate(UseSSE<=1); |
8115 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); | 7592 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); |
8116 ins_cost(200); | 7593 ins_cost(200); |
8117 format %{ "Jn$cop skip\n\t" | 7594 format %{ "Jn$cop skip\n\t" |
8118 "MOV $dst,$src\t# double\n" | 7595 "MOV $dst,$src\t# double\n" |
8119 "skip:" %} | 7596 "skip:" %} |
8120 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ | 7597 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ |
8121 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_D(src), OpcP, RegOpc(dst) ); | 7598 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) ); |
8122 ins_pipe( pipe_cmovD_reg ); | 7599 ins_pipe( pipe_cmovDPR_reg ); |
8123 %} | 7600 %} |
8124 | 7601 |
8125 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. | 7602 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. |
8126 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{ | 7603 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{ |
8127 predicate(UseSSE==0); | 7604 predicate(UseSSE==0); |
8128 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); | 7605 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); |
8129 ins_cost(200); | 7606 ins_cost(200); |
8130 format %{ "Jn$cop skip\n\t" | 7607 format %{ "Jn$cop skip\n\t" |
8131 "MOV $dst,$src\t# float\n" | 7608 "MOV $dst,$src\t# float\n" |
8132 "skip:" %} | 7609 "skip:" %} |
8133 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ | 7610 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ |
8134 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_F(src), OpcP, RegOpc(dst) ); | 7611 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) ); |
8135 ins_pipe( pipe_cmovD_reg ); | 7612 ins_pipe( pipe_cmovDPR_reg ); |
8136 %} | 7613 %} |
8137 | 7614 |
8138 // No CMOVE with SSE/SSE2 | 7615 // No CMOVE with SSE/SSE2 |
8139 instruct fcmovX_regS(cmpOp cop, eFlagsReg cr, regX dst, regX src) %{ | 7616 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{ |
8140 predicate (UseSSE>=1); | 7617 predicate (UseSSE>=1); |
8141 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); | 7618 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); |
8142 ins_cost(200); | 7619 ins_cost(200); |
8143 format %{ "Jn$cop skip\n\t" | 7620 format %{ "Jn$cop skip\n\t" |
8144 "MOVSS $dst,$src\t# float\n" | 7621 "MOVSS $dst,$src\t# float\n" |
8152 %} | 7629 %} |
8153 ins_pipe( pipe_slow ); | 7630 ins_pipe( pipe_slow ); |
8154 %} | 7631 %} |
8155 | 7632 |
8156 // No CMOVE with SSE/SSE2 | 7633 // No CMOVE with SSE/SSE2 |
8157 instruct fcmovXD_regS(cmpOp cop, eFlagsReg cr, regXD dst, regXD src) %{ | 7634 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{ |
8158 predicate (UseSSE>=2); | 7635 predicate (UseSSE>=2); |
8159 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); | 7636 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); |
8160 ins_cost(200); | 7637 ins_cost(200); |
8161 format %{ "Jn$cop skip\n\t" | 7638 format %{ "Jn$cop skip\n\t" |
8162 "MOVSD $dst,$src\t# float\n" | 7639 "MOVSD $dst,$src\t# float\n" |
8170 %} | 7647 %} |
8171 ins_pipe( pipe_slow ); | 7648 ins_pipe( pipe_slow ); |
8172 %} | 7649 %} |
8173 | 7650 |
8174 // unsigned version | 7651 // unsigned version |
8175 instruct fcmovX_regU(cmpOpU cop, eFlagsRegU cr, regX dst, regX src) %{ | 7652 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{ |
8176 predicate (UseSSE>=1); | 7653 predicate (UseSSE>=1); |
8177 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); | 7654 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); |
8178 ins_cost(200); | 7655 ins_cost(200); |
8179 format %{ "Jn$cop skip\n\t" | 7656 format %{ "Jn$cop skip\n\t" |
8180 "MOVSS $dst,$src\t# float\n" | 7657 "MOVSS $dst,$src\t# float\n" |
8187 __ bind(skip); | 7664 __ bind(skip); |
8188 %} | 7665 %} |
8189 ins_pipe( pipe_slow ); | 7666 ins_pipe( pipe_slow ); |
8190 %} | 7667 %} |
8191 | 7668 |
8192 instruct fcmovX_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regX dst, regX src) %{ | 7669 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{ |
8193 predicate (UseSSE>=1); | 7670 predicate (UseSSE>=1); |
8194 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); | 7671 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); |
8195 ins_cost(200); | 7672 ins_cost(200); |
8196 expand %{ | 7673 expand %{ |
8197 fcmovX_regU(cop, cr, dst, src); | 7674 fcmovF_regU(cop, cr, dst, src); |
8198 %} | 7675 %} |
8199 %} | 7676 %} |
8200 | 7677 |
8201 // unsigned version | 7678 // unsigned version |
8202 instruct fcmovXD_regU(cmpOpU cop, eFlagsRegU cr, regXD dst, regXD src) %{ | 7679 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{ |
8203 predicate (UseSSE>=2); | 7680 predicate (UseSSE>=2); |
8204 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); | 7681 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); |
8205 ins_cost(200); | 7682 ins_cost(200); |
8206 format %{ "Jn$cop skip\n\t" | 7683 format %{ "Jn$cop skip\n\t" |
8207 "MOVSD $dst,$src\t# float\n" | 7684 "MOVSD $dst,$src\t# float\n" |
8214 __ bind(skip); | 7691 __ bind(skip); |
8215 %} | 7692 %} |
8216 ins_pipe( pipe_slow ); | 7693 ins_pipe( pipe_slow ); |
8217 %} | 7694 %} |
8218 | 7695 |
8219 instruct fcmovXD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regXD dst, regXD src) %{ | 7696 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{ |
8220 predicate (UseSSE>=2); | 7697 predicate (UseSSE>=2); |
8221 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); | 7698 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); |
8222 ins_cost(200); | 7699 ins_cost(200); |
8223 expand %{ | 7700 expand %{ |
8224 fcmovXD_regU(cop, cr, dst, src); | 7701 fcmovD_regU(cop, cr, dst, src); |
8225 %} | 7702 %} |
8226 %} | 7703 %} |
8227 | 7704 |
8228 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{ | 7705 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{ |
8229 predicate(VM_Version::supports_cmov() ); | 7706 predicate(VM_Version::supports_cmov() ); |
8438 ins_encode( OpcP, RegMem(dst,mem)); | 7915 ins_encode( OpcP, RegMem(dst,mem)); |
8439 ins_pipe( ialu_reg_mem ); | 7916 ins_pipe( ialu_reg_mem ); |
8440 %} | 7917 %} |
8441 | 7918 |
8442 // LoadLong-locked - same as a volatile long load when used with compare-swap | 7919 // LoadLong-locked - same as a volatile long load when used with compare-swap |
8443 instruct loadLLocked(stackSlotL dst, load_long_memory mem) %{ | 7920 instruct loadLLocked(stackSlotL dst, memory mem) %{ |
8444 predicate(UseSSE<=1); | 7921 predicate(UseSSE<=1); |
8445 match(Set dst (LoadLLocked mem)); | 7922 match(Set dst (LoadLLocked mem)); |
8446 | 7923 |
8447 ins_cost(200); | 7924 ins_cost(200); |
8448 format %{ "FILD $mem\t# Atomic volatile long load\n\t" | 7925 format %{ "FILD $mem\t# Atomic volatile long load\n\t" |
8449 "FISTp $dst" %} | 7926 "FISTp $dst" %} |
8450 ins_encode(enc_loadL_volatile(mem,dst)); | 7927 ins_encode(enc_loadL_volatile(mem,dst)); |
8451 ins_pipe( fpu_reg_mem ); | 7928 ins_pipe( fpu_reg_mem ); |
8452 %} | 7929 %} |
8453 | 7930 |
8454 instruct loadLX_Locked(stackSlotL dst, load_long_memory mem, regXD tmp) %{ | 7931 instruct loadLX_Locked(stackSlotL dst, memory mem, regD tmp) %{ |
8455 predicate(UseSSE>=2); | 7932 predicate(UseSSE>=2); |
8456 match(Set dst (LoadLLocked mem)); | 7933 match(Set dst (LoadLLocked mem)); |
8457 effect(TEMP tmp); | 7934 effect(TEMP tmp); |
8458 ins_cost(180); | 7935 ins_cost(180); |
8459 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" | 7936 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" |
8460 "MOVSD $dst,$tmp" %} | 7937 "MOVSD $dst,$tmp" %} |
8461 ins_encode(enc_loadLX_volatile(mem, dst, tmp)); | 7938 ins_encode %{ |
7939 __ movdbl($tmp$$XMMRegister, $mem$$Address); | |
7940 __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister); | |
7941 %} | |
8462 ins_pipe( pipe_slow ); | 7942 ins_pipe( pipe_slow ); |
8463 %} | 7943 %} |
8464 | 7944 |
8465 instruct loadLX_reg_Locked(eRegL dst, load_long_memory mem, regXD tmp) %{ | 7945 instruct loadLX_reg_Locked(eRegL dst, memory mem, regD tmp) %{ |
8466 predicate(UseSSE>=2); | 7946 predicate(UseSSE>=2); |
8467 match(Set dst (LoadLLocked mem)); | 7947 match(Set dst (LoadLLocked mem)); |
8468 effect(TEMP tmp); | 7948 effect(TEMP tmp); |
8469 ins_cost(160); | 7949 ins_cost(160); |
8470 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" | 7950 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" |
8471 "MOVD $dst.lo,$tmp\n\t" | 7951 "MOVD $dst.lo,$tmp\n\t" |
8472 "PSRLQ $tmp,32\n\t" | 7952 "PSRLQ $tmp,32\n\t" |
8473 "MOVD $dst.hi,$tmp" %} | 7953 "MOVD $dst.hi,$tmp" %} |
8474 ins_encode(enc_loadLX_reg_volatile(mem, dst, tmp)); | 7954 ins_encode %{ |
7955 __ movdbl($tmp$$XMMRegister, $mem$$Address); | |
7956 __ movdl($dst$$Register, $tmp$$XMMRegister); | |
7957 __ psrlq($tmp$$XMMRegister, 32); | |
7958 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); | |
7959 %} | |
8475 ins_pipe( pipe_slow ); | 7960 ins_pipe( pipe_slow ); |
8476 %} | 7961 %} |
8477 | 7962 |
8478 // Conditional-store of the updated heap-top. | 7963 // Conditional-store of the updated heap-top. |
8479 // Used during allocation of the shared heap. | 7964 // Used during allocation of the shared heap. |
10052 // Double Math | 9537 // Double Math |
10053 | 9538 |
10054 // Compare & branch | 9539 // Compare & branch |
10055 | 9540 |
10056 // P6 version of float compare, sets condition codes in EFLAGS | 9541 // P6 version of float compare, sets condition codes in EFLAGS |
10057 instruct cmpD_cc_P6(eFlagsRegU cr, regD src1, regD src2, eAXRegI rax) %{ | 9542 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ |
10058 predicate(VM_Version::supports_cmov() && UseSSE <=1); | 9543 predicate(VM_Version::supports_cmov() && UseSSE <=1); |
10059 match(Set cr (CmpD src1 src2)); | 9544 match(Set cr (CmpD src1 src2)); |
10060 effect(KILL rax); | 9545 effect(KILL rax); |
10061 ins_cost(150); | 9546 ins_cost(150); |
10062 format %{ "FLD $src1\n\t" | 9547 format %{ "FLD $src1\n\t" |
10064 "JNP exit\n\t" | 9549 "JNP exit\n\t" |
10065 "MOV ah,1 // saw a NaN, set CF\n\t" | 9550 "MOV ah,1 // saw a NaN, set CF\n\t" |
10066 "SAHF\n" | 9551 "SAHF\n" |
10067 "exit:\tNOP // avoid branch to branch" %} | 9552 "exit:\tNOP // avoid branch to branch" %} |
10068 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ | 9553 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ |
10069 ins_encode( Push_Reg_D(src1), | 9554 ins_encode( Push_Reg_DPR(src1), |
10070 OpcP, RegOpc(src2), | 9555 OpcP, RegOpc(src2), |
10071 cmpF_P6_fixup ); | 9556 cmpF_P6_fixup ); |
10072 ins_pipe( pipe_slow ); | 9557 ins_pipe( pipe_slow ); |
10073 %} | 9558 %} |
10074 | 9559 |
10075 instruct cmpD_cc_P6CF(eFlagsRegUCF cr, regD src1, regD src2) %{ | 9560 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{ |
10076 predicate(VM_Version::supports_cmov() && UseSSE <=1); | 9561 predicate(VM_Version::supports_cmov() && UseSSE <=1); |
10077 match(Set cr (CmpD src1 src2)); | 9562 match(Set cr (CmpD src1 src2)); |
10078 ins_cost(150); | 9563 ins_cost(150); |
10079 format %{ "FLD $src1\n\t" | 9564 format %{ "FLD $src1\n\t" |
10080 "FUCOMIP ST,$src2 // P6 instruction" %} | 9565 "FUCOMIP ST,$src2 // P6 instruction" %} |
10081 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ | 9566 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ |
10082 ins_encode( Push_Reg_D(src1), | 9567 ins_encode( Push_Reg_DPR(src1), |
10083 OpcP, RegOpc(src2)); | 9568 OpcP, RegOpc(src2)); |
10084 ins_pipe( pipe_slow ); | 9569 ins_pipe( pipe_slow ); |
10085 %} | 9570 %} |
10086 | 9571 |
10087 // Compare & branch | 9572 // Compare & branch |
10088 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2, eAXRegI rax) %{ | 9573 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ |
10089 predicate(UseSSE<=1); | 9574 predicate(UseSSE<=1); |
10090 match(Set cr (CmpD src1 src2)); | 9575 match(Set cr (CmpD src1 src2)); |
10091 effect(KILL rax); | 9576 effect(KILL rax); |
10092 ins_cost(200); | 9577 ins_cost(200); |
10093 format %{ "FLD $src1\n\t" | 9578 format %{ "FLD $src1\n\t" |
10096 "TEST AX,0x400\n\t" | 9581 "TEST AX,0x400\n\t" |
10097 "JZ,s flags\n\t" | 9582 "JZ,s flags\n\t" |
10098 "MOV AH,1\t# unordered treat as LT\n" | 9583 "MOV AH,1\t# unordered treat as LT\n" |
10099 "flags:\tSAHF" %} | 9584 "flags:\tSAHF" %} |
10100 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ | 9585 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ |
10101 ins_encode( Push_Reg_D(src1), | 9586 ins_encode( Push_Reg_DPR(src1), |
10102 OpcP, RegOpc(src2), | 9587 OpcP, RegOpc(src2), |
10103 fpu_flags); | 9588 fpu_flags); |
10104 ins_pipe( pipe_slow ); | 9589 ins_pipe( pipe_slow ); |
10105 %} | 9590 %} |
10106 | 9591 |
10107 // Compare vs zero into -1,0,1 | 9592 // Compare vs zero into -1,0,1 |
10108 instruct cmpD_0(eRegI dst, regD src1, immD0 zero, eAXRegI rax, eFlagsReg cr) %{ | 9593 instruct cmpDPR_0(eRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{ |
10109 predicate(UseSSE<=1); | 9594 predicate(UseSSE<=1); |
10110 match(Set dst (CmpD3 src1 zero)); | 9595 match(Set dst (CmpD3 src1 zero)); |
10111 effect(KILL cr, KILL rax); | 9596 effect(KILL cr, KILL rax); |
10112 ins_cost(280); | 9597 ins_cost(280); |
10113 format %{ "FTSTD $dst,$src1" %} | 9598 format %{ "FTSTD $dst,$src1" %} |
10114 opcode(0xE4, 0xD9); | 9599 opcode(0xE4, 0xD9); |
10115 ins_encode( Push_Reg_D(src1), | 9600 ins_encode( Push_Reg_DPR(src1), |
10116 OpcS, OpcP, PopFPU, | 9601 OpcS, OpcP, PopFPU, |
10117 CmpF_Result(dst)); | 9602 CmpF_Result(dst)); |
10118 ins_pipe( pipe_slow ); | 9603 ins_pipe( pipe_slow ); |
10119 %} | 9604 %} |
10120 | 9605 |
10121 // Compare into -1,0,1 | 9606 // Compare into -1,0,1 |
10122 instruct cmpD_reg(eRegI dst, regD src1, regD src2, eAXRegI rax, eFlagsReg cr) %{ | 9607 instruct cmpDPR_reg(eRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{ |
10123 predicate(UseSSE<=1); | 9608 predicate(UseSSE<=1); |
10124 match(Set dst (CmpD3 src1 src2)); | 9609 match(Set dst (CmpD3 src1 src2)); |
10125 effect(KILL cr, KILL rax); | 9610 effect(KILL cr, KILL rax); |
10126 ins_cost(300); | 9611 ins_cost(300); |
10127 format %{ "FCMPD $dst,$src1,$src2" %} | 9612 format %{ "FCMPD $dst,$src1,$src2" %} |
10128 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ | 9613 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ |
10129 ins_encode( Push_Reg_D(src1), | 9614 ins_encode( Push_Reg_DPR(src1), |
10130 OpcP, RegOpc(src2), | 9615 OpcP, RegOpc(src2), |
10131 CmpF_Result(dst)); | 9616 CmpF_Result(dst)); |
10132 ins_pipe( pipe_slow ); | 9617 ins_pipe( pipe_slow ); |
10133 %} | 9618 %} |
10134 | 9619 |
10135 // float compare and set condition codes in EFLAGS by XMM regs | 9620 // float compare and set condition codes in EFLAGS by XMM regs |
10136 instruct cmpXD_cc(eFlagsRegU cr, regXD dst, regXD src, eAXRegI rax) %{ | 9621 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{ |
10137 predicate(UseSSE>=2); | 9622 predicate(UseSSE>=2); |
10138 match(Set cr (CmpD dst src)); | 9623 match(Set cr (CmpD src1 src2)); |
10139 effect(KILL rax); | 9624 ins_cost(145); |
10140 ins_cost(125); | 9625 format %{ "UCOMISD $src1,$src2\n\t" |
10141 format %{ "COMISD $dst,$src\n" | 9626 "JNP,s exit\n\t" |
10142 "\tJNP exit\n" | 9627 "PUSHF\t# saw NaN, set CF\n\t" |
10143 "\tMOV ah,1 // saw a NaN, set CF\n" | 9628 "AND [rsp], #0xffffff2b\n\t" |
10144 "\tSAHF\n" | 9629 "POPF\n" |
10145 "exit:\tNOP // avoid branch to branch" %} | 9630 "exit:" %} |
10146 opcode(0x66, 0x0F, 0x2F); | 9631 ins_encode %{ |
10147 ins_encode(OpcP, OpcS, Opcode(tertiary), RegReg(dst, src), cmpF_P6_fixup); | 9632 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); |
9633 emit_cmpfp_fixup(_masm); | |
9634 %} | |
10148 ins_pipe( pipe_slow ); | 9635 ins_pipe( pipe_slow ); |
10149 %} | 9636 %} |
10150 | 9637 |
10151 instruct cmpXD_ccCF(eFlagsRegUCF cr, regXD dst, regXD src) %{ | 9638 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{ |
10152 predicate(UseSSE>=2); | 9639 predicate(UseSSE>=2); |
10153 match(Set cr (CmpD dst src)); | 9640 match(Set cr (CmpD src1 src2)); |
10154 ins_cost(100); | 9641 ins_cost(100); |
10155 format %{ "COMISD $dst,$src" %} | 9642 format %{ "UCOMISD $src1,$src2" %} |
10156 opcode(0x66, 0x0F, 0x2F); | 9643 ins_encode %{ |
10157 ins_encode(OpcP, OpcS, Opcode(tertiary), RegReg(dst, src)); | 9644 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); |
9645 %} | |
10158 ins_pipe( pipe_slow ); | 9646 ins_pipe( pipe_slow ); |
10159 %} | 9647 %} |
10160 | 9648 |
10161 // float compare and set condition codes in EFLAGS by XMM regs | 9649 // float compare and set condition codes in EFLAGS by XMM regs |
10162 instruct cmpXD_ccmem(eFlagsRegU cr, regXD dst, memory src, eAXRegI rax) %{ | 9650 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{ |
10163 predicate(UseSSE>=2); | 9651 predicate(UseSSE>=2); |
10164 match(Set cr (CmpD dst (LoadD src))); | 9652 match(Set cr (CmpD src1 (LoadD src2))); |
10165 effect(KILL rax); | |
10166 ins_cost(145); | 9653 ins_cost(145); |
10167 format %{ "COMISD $dst,$src\n" | 9654 format %{ "UCOMISD $src1,$src2\n\t" |
10168 "\tJNP exit\n" | 9655 "JNP,s exit\n\t" |
10169 "\tMOV ah,1 // saw a NaN, set CF\n" | 9656 "PUSHF\t# saw NaN, set CF\n\t" |
10170 "\tSAHF\n" | 9657 "AND [rsp], #0xffffff2b\n\t" |
10171 "exit:\tNOP // avoid branch to branch" %} | 9658 "POPF\n" |
10172 opcode(0x66, 0x0F, 0x2F); | 9659 "exit:" %} |
10173 ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(dst, src), cmpF_P6_fixup); | 9660 ins_encode %{ |
9661 __ ucomisd($src1$$XMMRegister, $src2$$Address); | |
9662 emit_cmpfp_fixup(_masm); | |
9663 %} | |
10174 ins_pipe( pipe_slow ); | 9664 ins_pipe( pipe_slow ); |
10175 %} | 9665 %} |
10176 | 9666 |
10177 instruct cmpXD_ccmemCF(eFlagsRegUCF cr, regXD dst, memory src) %{ | 9667 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{ |
10178 predicate(UseSSE>=2); | 9668 predicate(UseSSE>=2); |
10179 match(Set cr (CmpD dst (LoadD src))); | 9669 match(Set cr (CmpD src1 (LoadD src2))); |
10180 ins_cost(100); | 9670 ins_cost(100); |
10181 format %{ "COMISD $dst,$src" %} | 9671 format %{ "UCOMISD $src1,$src2" %} |
10182 opcode(0x66, 0x0F, 0x2F); | 9672 ins_encode %{ |
10183 ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(dst, src)); | 9673 __ ucomisd($src1$$XMMRegister, $src2$$Address); |
9674 %} | |
10184 ins_pipe( pipe_slow ); | 9675 ins_pipe( pipe_slow ); |
10185 %} | 9676 %} |
10186 | 9677 |
10187 // Compare into -1,0,1 in XMM | 9678 // Compare into -1,0,1 in XMM |
10188 instruct cmpXD_reg(eRegI dst, regXD src1, regXD src2, eFlagsReg cr) %{ | 9679 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{ |
10189 predicate(UseSSE>=2); | 9680 predicate(UseSSE>=2); |
10190 match(Set dst (CmpD3 src1 src2)); | 9681 match(Set dst (CmpD3 src1 src2)); |
10191 effect(KILL cr); | 9682 effect(KILL cr); |
10192 ins_cost(255); | 9683 ins_cost(255); |
10193 format %{ "XOR $dst,$dst\n" | 9684 format %{ "UCOMISD $src1, $src2\n\t" |
10194 "\tCOMISD $src1,$src2\n" | 9685 "MOV $dst, #-1\n\t" |
10195 "\tJP,s nan\n" | 9686 "JP,s done\n\t" |
10196 "\tJEQ,s exit\n" | 9687 "JB,s done\n\t" |
10197 "\tJA,s inc\n" | 9688 "SETNE $dst\n\t" |
10198 "nan:\tDEC $dst\n" | 9689 "MOVZB $dst, $dst\n" |
10199 "\tJMP,s exit\n" | 9690 "done:" %} |
10200 "inc:\tINC $dst\n" | 9691 ins_encode %{ |
10201 "exit:" | 9692 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); |
10202 %} | 9693 emit_cmpfp3(_masm, $dst$$Register); |
10203 opcode(0x66, 0x0F, 0x2F); | 9694 %} |
10204 ins_encode(Xor_Reg(dst), OpcP, OpcS, Opcode(tertiary), RegReg(src1, src2), | |
10205 CmpX_Result(dst)); | |
10206 ins_pipe( pipe_slow ); | 9695 ins_pipe( pipe_slow ); |
10207 %} | 9696 %} |
10208 | 9697 |
10209 // Compare into -1,0,1 in XMM and memory | 9698 // Compare into -1,0,1 in XMM and memory |
10210 instruct cmpXD_regmem(eRegI dst, regXD src1, memory mem, eFlagsReg cr) %{ | 9699 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{ |
10211 predicate(UseSSE>=2); | 9700 predicate(UseSSE>=2); |
10212 match(Set dst (CmpD3 src1 (LoadD mem))); | 9701 match(Set dst (CmpD3 src1 (LoadD src2))); |
10213 effect(KILL cr); | 9702 effect(KILL cr); |
10214 ins_cost(275); | 9703 ins_cost(275); |
10215 format %{ "COMISD $src1,$mem\n" | 9704 format %{ "UCOMISD $src1, $src2\n\t" |
10216 "\tMOV $dst,0\t\t# do not blow flags\n" | 9705 "MOV $dst, #-1\n\t" |
10217 "\tJP,s nan\n" | 9706 "JP,s done\n\t" |
10218 "\tJEQ,s exit\n" | 9707 "JB,s done\n\t" |
10219 "\tJA,s inc\n" | 9708 "SETNE $dst\n\t" |
10220 "nan:\tDEC $dst\n" | 9709 "MOVZB $dst, $dst\n" |
10221 "\tJMP,s exit\n" | 9710 "done:" %} |
10222 "inc:\tINC $dst\n" | 9711 ins_encode %{ |
10223 "exit:" | 9712 __ ucomisd($src1$$XMMRegister, $src2$$Address); |
10224 %} | 9713 emit_cmpfp3(_masm, $dst$$Register); |
10225 opcode(0x66, 0x0F, 0x2F); | 9714 %} |
10226 ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(src1, mem), | |
10227 LdImmI(dst,0x0), CmpX_Result(dst)); | |
10228 ins_pipe( pipe_slow ); | 9715 ins_pipe( pipe_slow ); |
10229 %} | 9716 %} |
10230 | 9717 |
10231 | 9718 |
10232 instruct subD_reg(regD dst, regD src) %{ | 9719 instruct subDPR_reg(regDPR dst, regDPR src) %{ |
10233 predicate (UseSSE <=1); | 9720 predicate (UseSSE <=1); |
10234 match(Set dst (SubD dst src)); | 9721 match(Set dst (SubD dst src)); |
10235 | 9722 |
10236 format %{ "FLD $src\n\t" | 9723 format %{ "FLD $src\n\t" |
10237 "DSUBp $dst,ST" %} | 9724 "DSUBp $dst,ST" %} |
10238 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ | 9725 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ |
10239 ins_cost(150); | 9726 ins_cost(150); |
10240 ins_encode( Push_Reg_D(src), | 9727 ins_encode( Push_Reg_DPR(src), |
10241 OpcP, RegOpc(dst) ); | 9728 OpcP, RegOpc(dst) ); |
10242 ins_pipe( fpu_reg_reg ); | 9729 ins_pipe( fpu_reg_reg ); |
10243 %} | 9730 %} |
10244 | 9731 |
10245 instruct subD_reg_round(stackSlotD dst, regD src1, regD src2) %{ | 9732 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ |
10246 predicate (UseSSE <=1); | 9733 predicate (UseSSE <=1); |
10247 match(Set dst (RoundDouble (SubD src1 src2))); | 9734 match(Set dst (RoundDouble (SubD src1 src2))); |
10248 ins_cost(250); | 9735 ins_cost(250); |
10249 | 9736 |
10250 format %{ "FLD $src2\n\t" | 9737 format %{ "FLD $src2\n\t" |
10251 "DSUB ST,$src1\n\t" | 9738 "DSUB ST,$src1\n\t" |
10252 "FSTP_D $dst\t# D-round" %} | 9739 "FSTP_D $dst\t# D-round" %} |
10253 opcode(0xD8, 0x5); | 9740 opcode(0xD8, 0x5); |
10254 ins_encode( Push_Reg_D(src2), | 9741 ins_encode( Push_Reg_DPR(src2), |
10255 OpcP, RegOpc(src1), Pop_Mem_D(dst) ); | 9742 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); |
10256 ins_pipe( fpu_mem_reg_reg ); | 9743 ins_pipe( fpu_mem_reg_reg ); |
10257 %} | 9744 %} |
10258 | 9745 |
10259 | 9746 |
10260 instruct subD_reg_mem(regD dst, memory src) %{ | 9747 instruct subDPR_reg_mem(regDPR dst, memory src) %{ |
10261 predicate (UseSSE <=1); | 9748 predicate (UseSSE <=1); |
10262 match(Set dst (SubD dst (LoadD src))); | 9749 match(Set dst (SubD dst (LoadD src))); |
10263 ins_cost(150); | 9750 ins_cost(150); |
10264 | 9751 |
10265 format %{ "FLD $src\n\t" | 9752 format %{ "FLD $src\n\t" |
10268 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), | 9755 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), |
10269 OpcP, RegOpc(dst) ); | 9756 OpcP, RegOpc(dst) ); |
10270 ins_pipe( fpu_reg_mem ); | 9757 ins_pipe( fpu_reg_mem ); |
10271 %} | 9758 %} |
10272 | 9759 |
10273 instruct absD_reg(regDPR1 dst, regDPR1 src) %{ | 9760 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{ |
10274 predicate (UseSSE<=1); | 9761 predicate (UseSSE<=1); |
10275 match(Set dst (AbsD src)); | 9762 match(Set dst (AbsD src)); |
10276 ins_cost(100); | 9763 ins_cost(100); |
10277 format %{ "FABS" %} | 9764 format %{ "FABS" %} |
10278 opcode(0xE1, 0xD9); | 9765 opcode(0xE1, 0xD9); |
10279 ins_encode( OpcS, OpcP ); | 9766 ins_encode( OpcS, OpcP ); |
10280 ins_pipe( fpu_reg_reg ); | 9767 ins_pipe( fpu_reg_reg ); |
10281 %} | 9768 %} |
10282 | 9769 |
10283 instruct absXD_reg( regXD dst ) %{ | 9770 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{ |
10284 predicate(UseSSE>=2); | |
10285 match(Set dst (AbsD dst)); | |
10286 format %{ "ANDPD $dst,[0x7FFFFFFFFFFFFFFF]\t# ABS D by sign masking" %} | |
10287 ins_encode( AbsXD_encoding(dst)); | |
10288 ins_pipe( pipe_slow ); | |
10289 %} | |
10290 | |
10291 instruct negD_reg(regDPR1 dst, regDPR1 src) %{ | |
10292 predicate(UseSSE<=1); | 9771 predicate(UseSSE<=1); |
10293 match(Set dst (NegD src)); | 9772 match(Set dst (NegD src)); |
10294 ins_cost(100); | 9773 ins_cost(100); |
10295 format %{ "FCHS" %} | 9774 format %{ "FCHS" %} |
10296 opcode(0xE0, 0xD9); | 9775 opcode(0xE0, 0xD9); |
10297 ins_encode( OpcS, OpcP ); | 9776 ins_encode( OpcS, OpcP ); |
10298 ins_pipe( fpu_reg_reg ); | 9777 ins_pipe( fpu_reg_reg ); |
10299 %} | 9778 %} |
10300 | 9779 |
10301 instruct negXD_reg( regXD dst ) %{ | 9780 instruct addDPR_reg(regDPR dst, regDPR src) %{ |
10302 predicate(UseSSE>=2); | |
10303 match(Set dst (NegD dst)); | |
10304 format %{ "XORPD $dst,[0x8000000000000000]\t# CHS D by sign flipping" %} | |
10305 ins_encode %{ | |
10306 __ xorpd($dst$$XMMRegister, | |
10307 ExternalAddress((address)double_signflip_pool)); | |
10308 %} | |
10309 ins_pipe( pipe_slow ); | |
10310 %} | |
10311 | |
10312 instruct addD_reg(regD dst, regD src) %{ | |
10313 predicate(UseSSE<=1); | 9781 predicate(UseSSE<=1); |
10314 match(Set dst (AddD dst src)); | 9782 match(Set dst (AddD dst src)); |
10315 format %{ "FLD $src\n\t" | 9783 format %{ "FLD $src\n\t" |
10316 "DADD $dst,ST" %} | 9784 "DADD $dst,ST" %} |
10317 size(4); | 9785 size(4); |
10318 ins_cost(150); | 9786 ins_cost(150); |
10319 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ | 9787 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ |
10320 ins_encode( Push_Reg_D(src), | 9788 ins_encode( Push_Reg_DPR(src), |
10321 OpcP, RegOpc(dst) ); | 9789 OpcP, RegOpc(dst) ); |
10322 ins_pipe( fpu_reg_reg ); | 9790 ins_pipe( fpu_reg_reg ); |
10323 %} | 9791 %} |
10324 | 9792 |
10325 | 9793 |
10326 instruct addD_reg_round(stackSlotD dst, regD src1, regD src2) %{ | 9794 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ |
10327 predicate(UseSSE<=1); | 9795 predicate(UseSSE<=1); |
10328 match(Set dst (RoundDouble (AddD src1 src2))); | 9796 match(Set dst (RoundDouble (AddD src1 src2))); |
10329 ins_cost(250); | 9797 ins_cost(250); |
10330 | 9798 |
10331 format %{ "FLD $src2\n\t" | 9799 format %{ "FLD $src2\n\t" |
10332 "DADD ST,$src1\n\t" | 9800 "DADD ST,$src1\n\t" |
10333 "FSTP_D $dst\t# D-round" %} | 9801 "FSTP_D $dst\t# D-round" %} |
10334 opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/ | 9802 opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/ |
10335 ins_encode( Push_Reg_D(src2), | 9803 ins_encode( Push_Reg_DPR(src2), |
10336 OpcP, RegOpc(src1), Pop_Mem_D(dst) ); | 9804 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); |
10337 ins_pipe( fpu_mem_reg_reg ); | 9805 ins_pipe( fpu_mem_reg_reg ); |
10338 %} | 9806 %} |
10339 | 9807 |
10340 | 9808 |
10341 instruct addD_reg_mem(regD dst, memory src) %{ | 9809 instruct addDPR_reg_mem(regDPR dst, memory src) %{ |
10342 predicate(UseSSE<=1); | 9810 predicate(UseSSE<=1); |
10343 match(Set dst (AddD dst (LoadD src))); | 9811 match(Set dst (AddD dst (LoadD src))); |
10344 ins_cost(150); | 9812 ins_cost(150); |
10345 | 9813 |
10346 format %{ "FLD $src\n\t" | 9814 format %{ "FLD $src\n\t" |
10350 OpcP, RegOpc(dst) ); | 9818 OpcP, RegOpc(dst) ); |
10351 ins_pipe( fpu_reg_mem ); | 9819 ins_pipe( fpu_reg_mem ); |
10352 %} | 9820 %} |
10353 | 9821 |
10354 // add-to-memory | 9822 // add-to-memory |
10355 instruct addD_mem_reg(memory dst, regD src) %{ | 9823 instruct addDPR_mem_reg(memory dst, regDPR src) %{ |
10356 predicate(UseSSE<=1); | 9824 predicate(UseSSE<=1); |
10357 match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src)))); | 9825 match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src)))); |
10358 ins_cost(150); | 9826 ins_cost(150); |
10359 | 9827 |
10360 format %{ "FLD_D $dst\n\t" | 9828 format %{ "FLD_D $dst\n\t" |
10366 set_instruction_start, | 9834 set_instruction_start, |
10367 Opcode(0xDD), RMopc_Mem(0x03,dst) ); | 9835 Opcode(0xDD), RMopc_Mem(0x03,dst) ); |
10368 ins_pipe( fpu_reg_mem ); | 9836 ins_pipe( fpu_reg_mem ); |
10369 %} | 9837 %} |
10370 | 9838 |
10371 instruct addD_reg_imm1(regD dst, immD1 con) %{ | 9839 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{ |
10372 predicate(UseSSE<=1); | 9840 predicate(UseSSE<=1); |
10373 match(Set dst (AddD dst con)); | 9841 match(Set dst (AddD dst con)); |
10374 ins_cost(125); | 9842 ins_cost(125); |
10375 format %{ "FLD1\n\t" | 9843 format %{ "FLD1\n\t" |
10376 "DADDp $dst,ST" %} | 9844 "DADDp $dst,ST" %} |
10379 __ faddp($dst$$reg); | 9847 __ faddp($dst$$reg); |
10380 %} | 9848 %} |
10381 ins_pipe(fpu_reg); | 9849 ins_pipe(fpu_reg); |
10382 %} | 9850 %} |
10383 | 9851 |
10384 instruct addD_reg_imm(regD dst, immD con) %{ | 9852 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{ |
10385 predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); | 9853 predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); |
10386 match(Set dst (AddD dst con)); | 9854 match(Set dst (AddD dst con)); |
10387 ins_cost(200); | 9855 ins_cost(200); |
10388 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" | 9856 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" |
10389 "DADDp $dst,ST" %} | 9857 "DADDp $dst,ST" %} |
10392 __ faddp($dst$$reg); | 9860 __ faddp($dst$$reg); |
10393 %} | 9861 %} |
10394 ins_pipe(fpu_reg_mem); | 9862 ins_pipe(fpu_reg_mem); |
10395 %} | 9863 %} |
10396 | 9864 |
10397 instruct addD_reg_imm_round(stackSlotD dst, regD src, immD con) %{ | 9865 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{ |
10398 predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 ); | 9866 predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 ); |
10399 match(Set dst (RoundDouble (AddD src con))); | 9867 match(Set dst (RoundDouble (AddD src con))); |
10400 ins_cost(200); | 9868 ins_cost(200); |
10401 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" | 9869 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" |
10402 "DADD ST,$src\n\t" | 9870 "DADD ST,$src\n\t" |
10407 __ fstp_d(Address(rsp, $dst$$disp)); | 9875 __ fstp_d(Address(rsp, $dst$$disp)); |
10408 %} | 9876 %} |
10409 ins_pipe(fpu_mem_reg_con); | 9877 ins_pipe(fpu_mem_reg_con); |
10410 %} | 9878 %} |
10411 | 9879 |
10412 // Add two double precision floating point values in xmm | 9880 instruct mulDPR_reg(regDPR dst, regDPR src) %{ |
10413 instruct addXD_reg(regXD dst, regXD src) %{ | |
10414 predicate(UseSSE>=2); | |
10415 match(Set dst (AddD dst src)); | |
10416 format %{ "ADDSD $dst,$src" %} | |
10417 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x58), RegReg(dst, src)); | |
10418 ins_pipe( pipe_slow ); | |
10419 %} | |
10420 | |
10421 instruct addXD_imm(regXD dst, immXD con) %{ | |
10422 predicate(UseSSE>=2); | |
10423 match(Set dst (AddD dst con)); | |
10424 format %{ "ADDSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} | |
10425 ins_encode %{ | |
10426 __ addsd($dst$$XMMRegister, $constantaddress($con)); | |
10427 %} | |
10428 ins_pipe(pipe_slow); | |
10429 %} | |
10430 | |
10431 instruct addXD_mem(regXD dst, memory mem) %{ | |
10432 predicate(UseSSE>=2); | |
10433 match(Set dst (AddD dst (LoadD mem))); | |
10434 format %{ "ADDSD $dst,$mem" %} | |
10435 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x58), RegMem(dst,mem)); | |
10436 ins_pipe( pipe_slow ); | |
10437 %} | |
10438 | |
10439 // Sub two double precision floating point values in xmm | |
10440 instruct subXD_reg(regXD dst, regXD src) %{ | |
10441 predicate(UseSSE>=2); | |
10442 match(Set dst (SubD dst src)); | |
10443 format %{ "SUBSD $dst,$src" %} | |
10444 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5C), RegReg(dst, src)); | |
10445 ins_pipe( pipe_slow ); | |
10446 %} | |
10447 | |
10448 instruct subXD_imm(regXD dst, immXD con) %{ | |
10449 predicate(UseSSE>=2); | |
10450 match(Set dst (SubD dst con)); | |
10451 format %{ "SUBSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} | |
10452 ins_encode %{ | |
10453 __ subsd($dst$$XMMRegister, $constantaddress($con)); | |
10454 %} | |
10455 ins_pipe(pipe_slow); | |
10456 %} | |
10457 | |
10458 instruct subXD_mem(regXD dst, memory mem) %{ | |
10459 predicate(UseSSE>=2); | |
10460 match(Set dst (SubD dst (LoadD mem))); | |
10461 format %{ "SUBSD $dst,$mem" %} | |
10462 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5C), RegMem(dst,mem)); | |
10463 ins_pipe( pipe_slow ); | |
10464 %} | |
10465 | |
10466 // Mul two double precision floating point values in xmm | |
10467 instruct mulXD_reg(regXD dst, regXD src) %{ | |
10468 predicate(UseSSE>=2); | |
10469 match(Set dst (MulD dst src)); | |
10470 format %{ "MULSD $dst,$src" %} | |
10471 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x59), RegReg(dst, src)); | |
10472 ins_pipe( pipe_slow ); | |
10473 %} | |
10474 | |
10475 instruct mulXD_imm(regXD dst, immXD con) %{ | |
10476 predicate(UseSSE>=2); | |
10477 match(Set dst (MulD dst con)); | |
10478 format %{ "MULSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} | |
10479 ins_encode %{ | |
10480 __ mulsd($dst$$XMMRegister, $constantaddress($con)); | |
10481 %} | |
10482 ins_pipe(pipe_slow); | |
10483 %} | |
10484 | |
10485 instruct mulXD_mem(regXD dst, memory mem) %{ | |
10486 predicate(UseSSE>=2); | |
10487 match(Set dst (MulD dst (LoadD mem))); | |
10488 format %{ "MULSD $dst,$mem" %} | |
10489 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x59), RegMem(dst,mem)); | |
10490 ins_pipe( pipe_slow ); | |
10491 %} | |
10492 | |
10493 // Div two double precision floating point values in xmm | |
10494 instruct divXD_reg(regXD dst, regXD src) %{ | |
10495 predicate(UseSSE>=2); | |
10496 match(Set dst (DivD dst src)); | |
10497 format %{ "DIVSD $dst,$src" %} | |
10498 opcode(0xF2, 0x0F, 0x5E); | |
10499 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5E), RegReg(dst, src)); | |
10500 ins_pipe( pipe_slow ); | |
10501 %} | |
10502 | |
10503 instruct divXD_imm(regXD dst, immXD con) %{ | |
10504 predicate(UseSSE>=2); | |
10505 match(Set dst (DivD dst con)); | |
10506 format %{ "DIVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} | |
10507 ins_encode %{ | |
10508 __ divsd($dst$$XMMRegister, $constantaddress($con)); | |
10509 %} | |
10510 ins_pipe(pipe_slow); | |
10511 %} | |
10512 | |
10513 instruct divXD_mem(regXD dst, memory mem) %{ | |
10514 predicate(UseSSE>=2); | |
10515 match(Set dst (DivD dst (LoadD mem))); | |
10516 format %{ "DIVSD $dst,$mem" %} | |
10517 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5E), RegMem(dst,mem)); | |
10518 ins_pipe( pipe_slow ); | |
10519 %} | |
10520 | |
10521 | |
10522 instruct mulD_reg(regD dst, regD src) %{ | |
10523 predicate(UseSSE<=1); | 9881 predicate(UseSSE<=1); |
10524 match(Set dst (MulD dst src)); | 9882 match(Set dst (MulD dst src)); |
10525 format %{ "FLD $src\n\t" | 9883 format %{ "FLD $src\n\t" |
10526 "DMULp $dst,ST" %} | 9884 "DMULp $dst,ST" %} |
10527 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ | 9885 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ |
10528 ins_cost(150); | 9886 ins_cost(150); |
10529 ins_encode( Push_Reg_D(src), | 9887 ins_encode( Push_Reg_DPR(src), |
10530 OpcP, RegOpc(dst) ); | 9888 OpcP, RegOpc(dst) ); |
10531 ins_pipe( fpu_reg_reg ); | 9889 ins_pipe( fpu_reg_reg ); |
10532 %} | 9890 %} |
10533 | 9891 |
10534 // Strict FP instruction biases argument before multiply then | 9892 // Strict FP instruction biases argument before multiply then |
10537 // scale arg1 by multiplying arg1 by 2^(-15360) | 9895 // scale arg1 by multiplying arg1 by 2^(-15360) |
10538 // load arg2 | 9896 // load arg2 |
10539 // multiply scaled arg1 by arg2 | 9897 // multiply scaled arg1 by arg2 |
10540 // rescale product by 2^(15360) | 9898 // rescale product by 2^(15360) |
10541 // | 9899 // |
10542 instruct strictfp_mulD_reg(regDPR1 dst, regnotDPR1 src) %{ | 9900 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{ |
10543 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); | 9901 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); |
10544 match(Set dst (MulD dst src)); | 9902 match(Set dst (MulD dst src)); |
10545 ins_cost(1); // Select this instruction for all strict FP double multiplies | 9903 ins_cost(1); // Select this instruction for all strict FP double multiplies |
10546 | 9904 |
10547 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" | 9905 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" |
10550 "DMULp $dst,ST\n\t" | 9908 "DMULp $dst,ST\n\t" |
10551 "FLD StubRoutines::_fpu_subnormal_bias2\n\t" | 9909 "FLD StubRoutines::_fpu_subnormal_bias2\n\t" |
10552 "DMULp $dst,ST\n\t" %} | 9910 "DMULp $dst,ST\n\t" %} |
10553 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ | 9911 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ |
10554 ins_encode( strictfp_bias1(dst), | 9912 ins_encode( strictfp_bias1(dst), |
10555 Push_Reg_D(src), | 9913 Push_Reg_DPR(src), |
10556 OpcP, RegOpc(dst), | 9914 OpcP, RegOpc(dst), |
10557 strictfp_bias2(dst) ); | 9915 strictfp_bias2(dst) ); |
10558 ins_pipe( fpu_reg_reg ); | 9916 ins_pipe( fpu_reg_reg ); |
10559 %} | 9917 %} |
10560 | 9918 |
10561 instruct mulD_reg_imm(regD dst, immD con) %{ | 9919 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{ |
10562 predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); | 9920 predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); |
10563 match(Set dst (MulD dst con)); | 9921 match(Set dst (MulD dst con)); |
10564 ins_cost(200); | 9922 ins_cost(200); |
10565 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" | 9923 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" |
10566 "DMULp $dst,ST" %} | 9924 "DMULp $dst,ST" %} |
10570 %} | 9928 %} |
10571 ins_pipe(fpu_reg_mem); | 9929 ins_pipe(fpu_reg_mem); |
10572 %} | 9930 %} |
10573 | 9931 |
10574 | 9932 |
10575 instruct mulD_reg_mem(regD dst, memory src) %{ | 9933 instruct mulDPR_reg_mem(regDPR dst, memory src) %{ |
10576 predicate( UseSSE<=1 ); | 9934 predicate( UseSSE<=1 ); |
10577 match(Set dst (MulD dst (LoadD src))); | 9935 match(Set dst (MulD dst (LoadD src))); |
10578 ins_cost(200); | 9936 ins_cost(200); |
10579 format %{ "FLD_D $src\n\t" | 9937 format %{ "FLD_D $src\n\t" |
10580 "DMULp $dst,ST" %} | 9938 "DMULp $dst,ST" %} |
10584 ins_pipe( fpu_reg_mem ); | 9942 ins_pipe( fpu_reg_mem ); |
10585 %} | 9943 %} |
10586 | 9944 |
10587 // | 9945 // |
10588 // Cisc-alternate to reg-reg multiply | 9946 // Cisc-alternate to reg-reg multiply |
10589 instruct mulD_reg_mem_cisc(regD dst, regD src, memory mem) %{ | 9947 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{ |
10590 predicate( UseSSE<=1 ); | 9948 predicate( UseSSE<=1 ); |
10591 match(Set dst (MulD src (LoadD mem))); | 9949 match(Set dst (MulD src (LoadD mem))); |
10592 ins_cost(250); | 9950 ins_cost(250); |
10593 format %{ "FLD_D $mem\n\t" | 9951 format %{ "FLD_D $mem\n\t" |
10594 "DMUL ST,$src\n\t" | 9952 "DMUL ST,$src\n\t" |
10595 "FSTP_D $dst" %} | 9953 "FSTP_D $dst" %} |
10596 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */ | 9954 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */ |
10597 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem), | 9955 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem), |
10598 OpcReg_F(src), | 9956 OpcReg_FPR(src), |
10599 Pop_Reg_D(dst) ); | 9957 Pop_Reg_DPR(dst) ); |
10600 ins_pipe( fpu_reg_reg_mem ); | 9958 ins_pipe( fpu_reg_reg_mem ); |
10601 %} | 9959 %} |
10602 | 9960 |
10603 | 9961 |
10604 // MACRO3 -- addD a mulD | 9962 // MACRO3 -- addDPR a mulDPR |
10605 // This instruction is a '2-address' instruction in that the result goes | 9963 // This instruction is a '2-address' instruction in that the result goes |
10606 // back to src2. This eliminates a move from the macro; possibly the | 9964 // back to src2. This eliminates a move from the macro; possibly the |
10607 // register allocator will have to add it back (and maybe not). | 9965 // register allocator will have to add it back (and maybe not). |
10608 instruct addD_mulD_reg(regD src2, regD src1, regD src0) %{ | 9966 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ |
10609 predicate( UseSSE<=1 ); | 9967 predicate( UseSSE<=1 ); |
10610 match(Set src2 (AddD (MulD src0 src1) src2)); | 9968 match(Set src2 (AddD (MulD src0 src1) src2)); |
10611 format %{ "FLD $src0\t# ===MACRO3d===\n\t" | 9969 format %{ "FLD $src0\t# ===MACRO3d===\n\t" |
10612 "DMUL ST,$src1\n\t" | 9970 "DMUL ST,$src1\n\t" |
10613 "DADDp $src2,ST" %} | 9971 "DADDp $src2,ST" %} |
10614 ins_cost(250); | 9972 ins_cost(250); |
10615 opcode(0xDD); /* LoadD DD /0 */ | 9973 opcode(0xDD); /* LoadD DD /0 */ |
10616 ins_encode( Push_Reg_F(src0), | 9974 ins_encode( Push_Reg_FPR(src0), |
10617 FMul_ST_reg(src1), | 9975 FMul_ST_reg(src1), |
10618 FAddP_reg_ST(src2) ); | 9976 FAddP_reg_ST(src2) ); |
10619 ins_pipe( fpu_reg_reg_reg ); | 9977 ins_pipe( fpu_reg_reg_reg ); |
10620 %} | 9978 %} |
10621 | 9979 |
10622 | 9980 |
10623 // MACRO3 -- subD a mulD | 9981 // MACRO3 -- subDPR a mulDPR |
10624 instruct subD_mulD_reg(regD src2, regD src1, regD src0) %{ | 9982 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ |
10625 predicate( UseSSE<=1 ); | 9983 predicate( UseSSE<=1 ); |
10626 match(Set src2 (SubD (MulD src0 src1) src2)); | 9984 match(Set src2 (SubD (MulD src0 src1) src2)); |
10627 format %{ "FLD $src0\t# ===MACRO3d===\n\t" | 9985 format %{ "FLD $src0\t# ===MACRO3d===\n\t" |
10628 "DMUL ST,$src1\n\t" | 9986 "DMUL ST,$src1\n\t" |
10629 "DSUBRp $src2,ST" %} | 9987 "DSUBRp $src2,ST" %} |
10630 ins_cost(250); | 9988 ins_cost(250); |
10631 ins_encode( Push_Reg_F(src0), | 9989 ins_encode( Push_Reg_FPR(src0), |
10632 FMul_ST_reg(src1), | 9990 FMul_ST_reg(src1), |
10633 Opcode(0xDE), Opc_plus(0xE0,src2)); | 9991 Opcode(0xDE), Opc_plus(0xE0,src2)); |
10634 ins_pipe( fpu_reg_reg_reg ); | 9992 ins_pipe( fpu_reg_reg_reg ); |
10635 %} | 9993 %} |
10636 | 9994 |
10637 | 9995 |
10638 instruct divD_reg(regD dst, regD src) %{ | 9996 instruct divDPR_reg(regDPR dst, regDPR src) %{ |
10639 predicate( UseSSE<=1 ); | 9997 predicate( UseSSE<=1 ); |
10640 match(Set dst (DivD dst src)); | 9998 match(Set dst (DivD dst src)); |
10641 | 9999 |
10642 format %{ "FLD $src\n\t" | 10000 format %{ "FLD $src\n\t" |
10643 "FDIVp $dst,ST" %} | 10001 "FDIVp $dst,ST" %} |
10644 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ | 10002 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ |
10645 ins_cost(150); | 10003 ins_cost(150); |
10646 ins_encode( Push_Reg_D(src), | 10004 ins_encode( Push_Reg_DPR(src), |
10647 OpcP, RegOpc(dst) ); | 10005 OpcP, RegOpc(dst) ); |
10648 ins_pipe( fpu_reg_reg ); | 10006 ins_pipe( fpu_reg_reg ); |
10649 %} | 10007 %} |
10650 | 10008 |
10651 // Strict FP instruction biases argument before division then | 10009 // Strict FP instruction biases argument before division then |
10654 // scale dividend by multiplying dividend by 2^(-15360) | 10012 // scale dividend by multiplying dividend by 2^(-15360) |
10655 // load divisor | 10013 // load divisor |
10656 // divide scaled dividend by divisor | 10014 // divide scaled dividend by divisor |
10657 // rescale quotient by 2^(15360) | 10015 // rescale quotient by 2^(15360) |
10658 // | 10016 // |
10659 instruct strictfp_divD_reg(regDPR1 dst, regnotDPR1 src) %{ | 10017 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{ |
10660 predicate (UseSSE<=1); | 10018 predicate (UseSSE<=1); |
10661 match(Set dst (DivD dst src)); | 10019 match(Set dst (DivD dst src)); |
10662 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); | 10020 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); |
10663 ins_cost(01); | 10021 ins_cost(01); |
10664 | 10022 |
10668 "FDIVp $dst,ST\n\t" | 10026 "FDIVp $dst,ST\n\t" |
10669 "FLD StubRoutines::_fpu_subnormal_bias2\n\t" | 10027 "FLD StubRoutines::_fpu_subnormal_bias2\n\t" |
10670 "DMULp $dst,ST\n\t" %} | 10028 "DMULp $dst,ST\n\t" %} |
10671 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ | 10029 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ |
10672 ins_encode( strictfp_bias1(dst), | 10030 ins_encode( strictfp_bias1(dst), |
10673 Push_Reg_D(src), | 10031 Push_Reg_DPR(src), |
10674 OpcP, RegOpc(dst), | 10032 OpcP, RegOpc(dst), |
10675 strictfp_bias2(dst) ); | 10033 strictfp_bias2(dst) ); |
10676 ins_pipe( fpu_reg_reg ); | 10034 ins_pipe( fpu_reg_reg ); |
10677 %} | 10035 %} |
10678 | 10036 |
10679 instruct divD_reg_round(stackSlotD dst, regD src1, regD src2) %{ | 10037 instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ |
10680 predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) ); | 10038 predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) ); |
10681 match(Set dst (RoundDouble (DivD src1 src2))); | 10039 match(Set dst (RoundDouble (DivD src1 src2))); |
10682 | 10040 |
10683 format %{ "FLD $src1\n\t" | 10041 format %{ "FLD $src1\n\t" |
10684 "FDIV ST,$src2\n\t" | 10042 "FDIV ST,$src2\n\t" |
10685 "FSTP_D $dst\t# D-round" %} | 10043 "FSTP_D $dst\t# D-round" %} |
10686 opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */ | 10044 opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */ |
10687 ins_encode( Push_Reg_D(src1), | 10045 ins_encode( Push_Reg_DPR(src1), |
10688 OpcP, RegOpc(src2), Pop_Mem_D(dst) ); | 10046 OpcP, RegOpc(src2), Pop_Mem_DPR(dst) ); |
10689 ins_pipe( fpu_mem_reg_reg ); | 10047 ins_pipe( fpu_mem_reg_reg ); |
10690 %} | 10048 %} |
10691 | 10049 |
10692 | 10050 |
10693 instruct modD_reg(regD dst, regD src, eAXRegI rax, eFlagsReg cr) %{ | 10051 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{ |
10694 predicate(UseSSE<=1); | 10052 predicate(UseSSE<=1); |
10695 match(Set dst (ModD dst src)); | 10053 match(Set dst (ModD dst src)); |
10696 effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS | 10054 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS |
10697 | 10055 |
10698 format %{ "DMOD $dst,$src" %} | 10056 format %{ "DMOD $dst,$src" %} |
10699 ins_cost(250); | 10057 ins_cost(250); |
10700 ins_encode(Push_Reg_Mod_D(dst, src), | 10058 ins_encode(Push_Reg_Mod_DPR(dst, src), |
10701 emitModD(), | 10059 emitModDPR(), |
10702 Push_Result_Mod_D(src), | 10060 Push_Result_Mod_DPR(src), |
10703 Pop_Reg_D(dst)); | 10061 Pop_Reg_DPR(dst)); |
10704 ins_pipe( pipe_slow ); | 10062 ins_pipe( pipe_slow ); |
10705 %} | 10063 %} |
10706 | 10064 |
10707 instruct modXD_reg(regXD dst, regXD src0, regXD src1, eAXRegI rax, eFlagsReg cr) %{ | 10065 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{ |
10708 predicate(UseSSE>=2); | 10066 predicate(UseSSE>=2); |
10709 match(Set dst (ModD src0 src1)); | 10067 match(Set dst (ModD src0 src1)); |
10710 effect(KILL rax, KILL cr); | 10068 effect(KILL rax, KILL cr); |
10711 | 10069 |
10712 format %{ "SUB ESP,8\t # DMOD\n" | 10070 format %{ "SUB ESP,8\t # DMOD\n" |
10723 "\tMOVSD $dst,[ESP+0]\n" | 10081 "\tMOVSD $dst,[ESP+0]\n" |
10724 "\tADD ESP,8\n" | 10082 "\tADD ESP,8\n" |
10725 "\tFSTP ST0\t # Restore FPU Stack" | 10083 "\tFSTP ST0\t # Restore FPU Stack" |
10726 %} | 10084 %} |
10727 ins_cost(250); | 10085 ins_cost(250); |
10728 ins_encode( Push_ModD_encoding(src0, src1), emitModD(), Push_ResultXD(dst), PopFPU); | 10086 ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU); |
10729 ins_pipe( pipe_slow ); | 10087 ins_pipe( pipe_slow ); |
10730 %} | 10088 %} |
10731 | 10089 |
10732 instruct sinD_reg(regDPR1 dst, regDPR1 src) %{ | 10090 instruct sinDPR_reg(regDPR1 dst, regDPR1 src) %{ |
10733 predicate (UseSSE<=1); | 10091 predicate (UseSSE<=1); |
10734 match(Set dst (SinD src)); | 10092 match(Set dst (SinD src)); |
10735 ins_cost(1800); | 10093 ins_cost(1800); |
10736 format %{ "DSIN $dst" %} | 10094 format %{ "DSIN $dst" %} |
10737 opcode(0xD9, 0xFE); | 10095 opcode(0xD9, 0xFE); |
10738 ins_encode( OpcP, OpcS ); | 10096 ins_encode( OpcP, OpcS ); |
10739 ins_pipe( pipe_slow ); | 10097 ins_pipe( pipe_slow ); |
10740 %} | 10098 %} |
10741 | 10099 |
10742 instruct sinXD_reg(regXD dst, eFlagsReg cr) %{ | 10100 instruct sinD_reg(regD dst, eFlagsReg cr) %{ |
10743 predicate (UseSSE>=2); | 10101 predicate (UseSSE>=2); |
10744 match(Set dst (SinD dst)); | 10102 match(Set dst (SinD dst)); |
10745 effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8" | 10103 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" |
10746 ins_cost(1800); | 10104 ins_cost(1800); |
10747 format %{ "DSIN $dst" %} | 10105 format %{ "DSIN $dst" %} |
10748 opcode(0xD9, 0xFE); | 10106 opcode(0xD9, 0xFE); |
10749 ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) ); | 10107 ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) ); |
10750 ins_pipe( pipe_slow ); | 10108 ins_pipe( pipe_slow ); |
10751 %} | 10109 %} |
10752 | 10110 |
10753 instruct cosD_reg(regDPR1 dst, regDPR1 src) %{ | 10111 instruct cosDPR_reg(regDPR1 dst, regDPR1 src) %{ |
10754 predicate (UseSSE<=1); | 10112 predicate (UseSSE<=1); |
10755 match(Set dst (CosD src)); | 10113 match(Set dst (CosD src)); |
10756 ins_cost(1800); | 10114 ins_cost(1800); |
10757 format %{ "DCOS $dst" %} | 10115 format %{ "DCOS $dst" %} |
10758 opcode(0xD9, 0xFF); | 10116 opcode(0xD9, 0xFF); |
10759 ins_encode( OpcP, OpcS ); | 10117 ins_encode( OpcP, OpcS ); |
10760 ins_pipe( pipe_slow ); | 10118 ins_pipe( pipe_slow ); |
10761 %} | 10119 %} |
10762 | 10120 |
10763 instruct cosXD_reg(regXD dst, eFlagsReg cr) %{ | 10121 instruct cosD_reg(regD dst, eFlagsReg cr) %{ |
10764 predicate (UseSSE>=2); | 10122 predicate (UseSSE>=2); |
10765 match(Set dst (CosD dst)); | 10123 match(Set dst (CosD dst)); |
10766 effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8" | 10124 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" |
10767 ins_cost(1800); | 10125 ins_cost(1800); |
10768 format %{ "DCOS $dst" %} | 10126 format %{ "DCOS $dst" %} |
10769 opcode(0xD9, 0xFF); | 10127 opcode(0xD9, 0xFF); |
10770 ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) ); | 10128 ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) ); |
10771 ins_pipe( pipe_slow ); | 10129 ins_pipe( pipe_slow ); |
10772 %} | 10130 %} |
10773 | 10131 |
10774 instruct tanD_reg(regDPR1 dst, regDPR1 src) %{ | 10132 instruct tanDPR_reg(regDPR1 dst, regDPR1 src) %{ |
10775 predicate (UseSSE<=1); | 10133 predicate (UseSSE<=1); |
10776 match(Set dst(TanD src)); | 10134 match(Set dst(TanD src)); |
10777 format %{ "DTAN $dst" %} | 10135 format %{ "DTAN $dst" %} |
10778 ins_encode( Opcode(0xD9), Opcode(0xF2), // fptan | 10136 ins_encode( Opcode(0xD9), Opcode(0xF2), // fptan |
10779 Opcode(0xDD), Opcode(0xD8)); // fstp st | 10137 Opcode(0xDD), Opcode(0xD8)); // fstp st |
10780 ins_pipe( pipe_slow ); | 10138 ins_pipe( pipe_slow ); |
10781 %} | 10139 %} |
10782 | 10140 |
10783 instruct tanXD_reg(regXD dst, eFlagsReg cr) %{ | 10141 instruct tanD_reg(regD dst, eFlagsReg cr) %{ |
10784 predicate (UseSSE>=2); | 10142 predicate (UseSSE>=2); |
10785 match(Set dst(TanD dst)); | 10143 match(Set dst(TanD dst)); |
10786 effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8" | 10144 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" |
10787 format %{ "DTAN $dst" %} | 10145 format %{ "DTAN $dst" %} |
10788 ins_encode( Push_SrcXD(dst), | 10146 ins_encode( Push_SrcD(dst), |
10789 Opcode(0xD9), Opcode(0xF2), // fptan | 10147 Opcode(0xD9), Opcode(0xF2), // fptan |
10790 Opcode(0xDD), Opcode(0xD8), // fstp st | 10148 Opcode(0xDD), Opcode(0xD8), // fstp st |
10791 Push_ResultXD(dst) ); | 10149 Push_ResultD(dst) ); |
10792 ins_pipe( pipe_slow ); | 10150 ins_pipe( pipe_slow ); |
10793 %} | 10151 %} |
10794 | 10152 |
10795 instruct atanD_reg(regD dst, regD src) %{ | 10153 instruct atanDPR_reg(regDPR dst, regDPR src) %{ |
10796 predicate (UseSSE<=1); | 10154 predicate (UseSSE<=1); |
10797 match(Set dst(AtanD dst src)); | 10155 match(Set dst(AtanD dst src)); |
10798 format %{ "DATA $dst,$src" %} | 10156 format %{ "DATA $dst,$src" %} |
10799 opcode(0xD9, 0xF3); | 10157 opcode(0xD9, 0xF3); |
10800 ins_encode( Push_Reg_D(src), | 10158 ins_encode( Push_Reg_DPR(src), |
10801 OpcP, OpcS, RegOpc(dst) ); | 10159 OpcP, OpcS, RegOpc(dst) ); |
10802 ins_pipe( pipe_slow ); | 10160 ins_pipe( pipe_slow ); |
10803 %} | 10161 %} |
10804 | 10162 |
10805 instruct atanXD_reg(regXD dst, regXD src, eFlagsReg cr) %{ | 10163 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{ |
10806 predicate (UseSSE>=2); | 10164 predicate (UseSSE>=2); |
10807 match(Set dst(AtanD dst src)); | 10165 match(Set dst(AtanD dst src)); |
10808 effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8" | 10166 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" |
10809 format %{ "DATA $dst,$src" %} | 10167 format %{ "DATA $dst,$src" %} |
10810 opcode(0xD9, 0xF3); | 10168 opcode(0xD9, 0xF3); |
10811 ins_encode( Push_SrcXD(src), | 10169 ins_encode( Push_SrcD(src), |
10812 OpcP, OpcS, Push_ResultXD(dst) ); | 10170 OpcP, OpcS, Push_ResultD(dst) ); |
10813 ins_pipe( pipe_slow ); | 10171 ins_pipe( pipe_slow ); |
10814 %} | 10172 %} |
10815 | 10173 |
10816 instruct sqrtD_reg(regD dst, regD src) %{ | 10174 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{ |
10817 predicate (UseSSE<=1); | 10175 predicate (UseSSE<=1); |
10818 match(Set dst (SqrtD src)); | 10176 match(Set dst (SqrtD src)); |
10819 format %{ "DSQRT $dst,$src" %} | 10177 format %{ "DSQRT $dst,$src" %} |
10820 opcode(0xFA, 0xD9); | 10178 opcode(0xFA, 0xD9); |
10821 ins_encode( Push_Reg_D(src), | 10179 ins_encode( Push_Reg_DPR(src), |
10822 OpcS, OpcP, Pop_Reg_D(dst) ); | 10180 OpcS, OpcP, Pop_Reg_DPR(dst) ); |
10823 ins_pipe( pipe_slow ); | 10181 ins_pipe( pipe_slow ); |
10824 %} | 10182 %} |
10825 | 10183 |
10826 instruct powD_reg(regD X, regDPR1 Y, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{ | 10184 instruct powDPR_reg(regDPR X, regDPR1 Y, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{ |
10827 predicate (UseSSE<=1); | 10185 predicate (UseSSE<=1); |
10828 match(Set Y (PowD X Y)); // Raise X to the Yth power | 10186 match(Set Y (PowD X Y)); // Raise X to the Yth power |
10829 effect(KILL rax, KILL rbx, KILL rcx); | 10187 effect(KILL rax, KILL rbx, KILL rcx); |
10830 format %{ "SUB ESP,8\t\t# Fast-path POW encoding\n\t" | 10188 format %{ "SUB ESP,8\t\t# Fast-path POW encoding\n\t" |
10831 "FLD_D $X\n\t" | 10189 "FLD_D $X\n\t" |
10850 "FMUL ST(0),[ESP+0]\t# Scale\n\t" | 10208 "FMUL ST(0),[ESP+0]\t# Scale\n\t" |
10851 | 10209 |
10852 "ADD ESP,8" | 10210 "ADD ESP,8" |
10853 %} | 10211 %} |
10854 ins_encode( push_stack_temp_qword, | 10212 ins_encode( push_stack_temp_qword, |
10855 Push_Reg_D(X), | 10213 Push_Reg_DPR(X), |
10856 Opcode(0xD9), Opcode(0xF1), // fyl2x | 10214 Opcode(0xD9), Opcode(0xF1), // fyl2x |
10857 pow_exp_core_encoding, | 10215 pow_exp_core_encoding, |
10858 pop_stack_temp_qword); | 10216 pop_stack_temp_qword); |
10859 ins_pipe( pipe_slow ); | 10217 ins_pipe( pipe_slow ); |
10860 %} | 10218 %} |
10861 | 10219 |
10862 instruct powXD_reg(regXD dst, regXD src0, regXD src1, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx ) %{ | 10220 instruct powD_reg(regD dst, regD src0, regD src1, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx ) %{ |
10863 predicate (UseSSE>=2); | 10221 predicate (UseSSE>=2); |
10864 match(Set dst (PowD src0 src1)); // Raise src0 to the src1'th power | 10222 match(Set dst (PowD src0 src1)); // Raise src0 to the src1'th power |
10865 effect(KILL tmp1, KILL rax, KILL rbx, KILL rcx ); | 10223 effect(KILL tmp1, KILL rax, KILL rbx, KILL rcx ); |
10866 format %{ "SUB ESP,8\t\t# Fast-path POW encoding\n\t" | 10224 format %{ "SUB ESP,8\t\t# Fast-path POW encoding\n\t" |
10867 "MOVSD [ESP],$src1\n\t" | 10225 "MOVSD [ESP],$src1\n\t" |
10895 ins_encode( push_stack_temp_qword, | 10253 ins_encode( push_stack_temp_qword, |
10896 push_xmm_to_fpr1(src1), | 10254 push_xmm_to_fpr1(src1), |
10897 push_xmm_to_fpr1(src0), | 10255 push_xmm_to_fpr1(src0), |
10898 Opcode(0xD9), Opcode(0xF1), // fyl2x | 10256 Opcode(0xD9), Opcode(0xF1), // fyl2x |
10899 pow_exp_core_encoding, | 10257 pow_exp_core_encoding, |
10900 Push_ResultXD(dst) ); | 10258 Push_ResultD(dst) ); |
10901 ins_pipe( pipe_slow ); | 10259 ins_pipe( pipe_slow ); |
10902 %} | 10260 %} |
10903 | 10261 |
10904 | 10262 |
10905 instruct expD_reg(regDPR1 dpr1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{ | 10263 instruct expDPR_reg(regDPR1 dpr1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{ |
10906 predicate (UseSSE<=1); | 10264 predicate (UseSSE<=1); |
10907 match(Set dpr1 (ExpD dpr1)); | 10265 match(Set dpr1 (ExpD dpr1)); |
10908 effect(KILL rax, KILL rbx, KILL rcx); | 10266 effect(KILL rax, KILL rbx, KILL rcx); |
10909 format %{ "SUB ESP,8\t\t# Fast-path EXP encoding" | 10267 format %{ "SUB ESP,8\t\t# Fast-path EXP encoding" |
10910 "FLDL2E \t\t\t# Ld log2(e) X\n\t" | 10268 "FLDL2E \t\t\t# Ld log2(e) X\n\t" |
10936 pow_exp_core_encoding, | 10294 pow_exp_core_encoding, |
10937 pop_stack_temp_qword); | 10295 pop_stack_temp_qword); |
10938 ins_pipe( pipe_slow ); | 10296 ins_pipe( pipe_slow ); |
10939 %} | 10297 %} |
10940 | 10298 |
10941 instruct expXD_reg(regXD dst, regXD src, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{ | 10299 instruct expD_reg(regD dst, regD src, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{ |
10942 predicate (UseSSE>=2); | 10300 predicate (UseSSE>=2); |
10943 match(Set dst (ExpD src)); | 10301 match(Set dst (ExpD src)); |
10944 effect(KILL tmp1, KILL rax, KILL rbx, KILL rcx); | 10302 effect(KILL tmp1, KILL rax, KILL rbx, KILL rcx); |
10945 format %{ "SUB ESP,8\t\t# Fast-path EXP encoding\n\t" | 10303 format %{ "SUB ESP,8\t\t# Fast-path EXP encoding\n\t" |
10946 "MOVSD [ESP],$src\n\t" | 10304 "MOVSD [ESP],$src\n\t" |
10967 | 10325 |
10968 "FST_D [ESP]\n\t" | 10326 "FST_D [ESP]\n\t" |
10969 "MOVSD $dst,[ESP]\n\t" | 10327 "MOVSD $dst,[ESP]\n\t" |
10970 "ADD ESP,8" | 10328 "ADD ESP,8" |
10971 %} | 10329 %} |
10972 ins_encode( Push_SrcXD(src), | 10330 ins_encode( Push_SrcD(src), |
10973 Opcode(0xD9), Opcode(0xEA), // fldl2e | 10331 Opcode(0xD9), Opcode(0xEA), // fldl2e |
10974 Opcode(0xDE), Opcode(0xC9), // fmulp | 10332 Opcode(0xDE), Opcode(0xC9), // fmulp |
10975 pow_exp_core_encoding, | 10333 pow_exp_core_encoding, |
10976 Push_ResultXD(dst) ); | 10334 Push_ResultD(dst) ); |
10977 ins_pipe( pipe_slow ); | 10335 ins_pipe( pipe_slow ); |
10978 %} | 10336 %} |
10979 | 10337 |
10980 | 10338 |
10981 | 10339 |
10982 instruct log10D_reg(regDPR1 dst, regDPR1 src) %{ | 10340 instruct log10DPR_reg(regDPR1 dst, regDPR1 src) %{ |
10983 predicate (UseSSE<=1); | 10341 predicate (UseSSE<=1); |
10984 // The source Double operand on FPU stack | 10342 // The source Double operand on FPU stack |
10985 match(Set dst (Log10D src)); | 10343 match(Set dst (Log10D src)); |
10986 // fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number | 10344 // fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number |
10987 // fxch ; swap ST(0) with ST(1) | 10345 // fxch ; swap ST(0) with ST(1) |
10995 Opcode(0xD9), Opcode(0xF1)); // fyl2x | 10353 Opcode(0xD9), Opcode(0xF1)); // fyl2x |
10996 | 10354 |
10997 ins_pipe( pipe_slow ); | 10355 ins_pipe( pipe_slow ); |
10998 %} | 10356 %} |
10999 | 10357 |
11000 instruct log10XD_reg(regXD dst, regXD src, eFlagsReg cr) %{ | 10358 instruct log10D_reg(regD dst, regD src, eFlagsReg cr) %{ |
11001 predicate (UseSSE>=2); | 10359 predicate (UseSSE>=2); |
11002 effect(KILL cr); | 10360 effect(KILL cr); |
11003 match(Set dst (Log10D src)); | 10361 match(Set dst (Log10D src)); |
11004 // fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number | 10362 // fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number |
11005 // fyl2x ; compute log_10(2) * log_2(x) | 10363 // fyl2x ; compute log_10(2) * log_2(x) |
11006 format %{ "FLDLG2 \t\t\t#Log10\n\t" | 10364 format %{ "FLDLG2 \t\t\t#Log10\n\t" |
11007 "FYL2X \t\t\t# Q=Log10*Log_2(x)" | 10365 "FYL2X \t\t\t# Q=Log10*Log_2(x)" |
11008 %} | 10366 %} |
11009 ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2 | 10367 ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2 |
11010 Push_SrcXD(src), | 10368 Push_SrcD(src), |
11011 Opcode(0xD9), Opcode(0xF1), // fyl2x | 10369 Opcode(0xD9), Opcode(0xF1), // fyl2x |
11012 Push_ResultXD(dst)); | 10370 Push_ResultD(dst)); |
11013 | 10371 |
11014 ins_pipe( pipe_slow ); | 10372 ins_pipe( pipe_slow ); |
11015 %} | 10373 %} |
11016 | 10374 |
11017 instruct logD_reg(regDPR1 dst, regDPR1 src) %{ | 10375 instruct logDPR_reg(regDPR1 dst, regDPR1 src) %{ |
11018 predicate (UseSSE<=1); | 10376 predicate (UseSSE<=1); |
11019 // The source Double operand on FPU stack | 10377 // The source Double operand on FPU stack |
11020 match(Set dst (LogD src)); | 10378 match(Set dst (LogD src)); |
11021 // fldln2 ; push log_e(2) on the FPU stack; full 80-bit number | 10379 // fldln2 ; push log_e(2) on the FPU stack; full 80-bit number |
11022 // fxch ; swap ST(0) with ST(1) | 10380 // fxch ; swap ST(0) with ST(1) |
11030 Opcode(0xD9), Opcode(0xF1)); // fyl2x | 10388 Opcode(0xD9), Opcode(0xF1)); // fyl2x |
11031 | 10389 |
11032 ins_pipe( pipe_slow ); | 10390 ins_pipe( pipe_slow ); |
11033 %} | 10391 %} |
11034 | 10392 |
11035 instruct logXD_reg(regXD dst, regXD src, eFlagsReg cr) %{ | 10393 instruct logD_reg(regD dst, regD src, eFlagsReg cr) %{ |
11036 predicate (UseSSE>=2); | 10394 predicate (UseSSE>=2); |
11037 effect(KILL cr); | 10395 effect(KILL cr); |
11038 // The source and result Double operands in XMM registers | 10396 // The source and result Double operands in XMM registers |
11039 match(Set dst (LogD src)); | 10397 match(Set dst (LogD src)); |
11040 // fldln2 ; push log_e(2) on the FPU stack; full 80-bit number | 10398 // fldln2 ; push log_e(2) on the FPU stack; full 80-bit number |
11041 // fyl2x ; compute log_e(2) * log_2(x) | 10399 // fyl2x ; compute log_e(2) * log_2(x) |
11042 format %{ "FLDLN2 \t\t\t#Log_e\n\t" | 10400 format %{ "FLDLN2 \t\t\t#Log_e\n\t" |
11043 "FYL2X \t\t\t# Q=Log_e*Log_2(x)" | 10401 "FYL2X \t\t\t# Q=Log_e*Log_2(x)" |
11044 %} | 10402 %} |
11045 ins_encode( Opcode(0xD9), Opcode(0xED), // fldln2 | 10403 ins_encode( Opcode(0xD9), Opcode(0xED), // fldln2 |
11046 Push_SrcXD(src), | 10404 Push_SrcD(src), |
11047 Opcode(0xD9), Opcode(0xF1), // fyl2x | 10405 Opcode(0xD9), Opcode(0xF1), // fyl2x |
11048 Push_ResultXD(dst)); | 10406 Push_ResultD(dst)); |
11049 ins_pipe( pipe_slow ); | 10407 ins_pipe( pipe_slow ); |
11050 %} | 10408 %} |
11051 | 10409 |
11052 //-------------Float Instructions------------------------------- | 10410 //-------------Float Instructions------------------------------- |
11053 // Float Math | 10411 // Float Math |
11064 // jcc(Assembler::equal, exit); | 10422 // jcc(Assembler::equal, exit); |
11065 // movl(dst, greater_result); | 10423 // movl(dst, greater_result); |
11066 // exit: | 10424 // exit: |
11067 | 10425 |
11068 // P6 version of float compare, sets condition codes in EFLAGS | 10426 // P6 version of float compare, sets condition codes in EFLAGS |
11069 instruct cmpF_cc_P6(eFlagsRegU cr, regF src1, regF src2, eAXRegI rax) %{ | 10427 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ |
11070 predicate(VM_Version::supports_cmov() && UseSSE == 0); | 10428 predicate(VM_Version::supports_cmov() && UseSSE == 0); |
11071 match(Set cr (CmpF src1 src2)); | 10429 match(Set cr (CmpF src1 src2)); |
11072 effect(KILL rax); | 10430 effect(KILL rax); |
11073 ins_cost(150); | 10431 ins_cost(150); |
11074 format %{ "FLD $src1\n\t" | 10432 format %{ "FLD $src1\n\t" |
11076 "JNP exit\n\t" | 10434 "JNP exit\n\t" |
11077 "MOV ah,1 // saw a NaN, set CF (treat as LT)\n\t" | 10435 "MOV ah,1 // saw a NaN, set CF (treat as LT)\n\t" |
11078 "SAHF\n" | 10436 "SAHF\n" |
11079 "exit:\tNOP // avoid branch to branch" %} | 10437 "exit:\tNOP // avoid branch to branch" %} |
11080 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ | 10438 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ |
11081 ins_encode( Push_Reg_D(src1), | 10439 ins_encode( Push_Reg_DPR(src1), |
11082 OpcP, RegOpc(src2), | 10440 OpcP, RegOpc(src2), |
11083 cmpF_P6_fixup ); | 10441 cmpF_P6_fixup ); |
11084 ins_pipe( pipe_slow ); | 10442 ins_pipe( pipe_slow ); |
11085 %} | 10443 %} |
11086 | 10444 |
11087 instruct cmpF_cc_P6CF(eFlagsRegUCF cr, regF src1, regF src2) %{ | 10445 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{ |
11088 predicate(VM_Version::supports_cmov() && UseSSE == 0); | 10446 predicate(VM_Version::supports_cmov() && UseSSE == 0); |
11089 match(Set cr (CmpF src1 src2)); | 10447 match(Set cr (CmpF src1 src2)); |
11090 ins_cost(100); | 10448 ins_cost(100); |
11091 format %{ "FLD $src1\n\t" | 10449 format %{ "FLD $src1\n\t" |
11092 "FUCOMIP ST,$src2 // P6 instruction" %} | 10450 "FUCOMIP ST,$src2 // P6 instruction" %} |
11093 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ | 10451 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ |
11094 ins_encode( Push_Reg_D(src1), | 10452 ins_encode( Push_Reg_DPR(src1), |
11095 OpcP, RegOpc(src2)); | 10453 OpcP, RegOpc(src2)); |
11096 ins_pipe( pipe_slow ); | 10454 ins_pipe( pipe_slow ); |
11097 %} | 10455 %} |
11098 | 10456 |
11099 | 10457 |
11100 // Compare & branch | 10458 // Compare & branch |
11101 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2, eAXRegI rax) %{ | 10459 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ |
11102 predicate(UseSSE == 0); | 10460 predicate(UseSSE == 0); |
11103 match(Set cr (CmpF src1 src2)); | 10461 match(Set cr (CmpF src1 src2)); |
11104 effect(KILL rax); | 10462 effect(KILL rax); |
11105 ins_cost(200); | 10463 ins_cost(200); |
11106 format %{ "FLD $src1\n\t" | 10464 format %{ "FLD $src1\n\t" |
11109 "TEST AX,0x400\n\t" | 10467 "TEST AX,0x400\n\t" |
11110 "JZ,s flags\n\t" | 10468 "JZ,s flags\n\t" |
11111 "MOV AH,1\t# unordered treat as LT\n" | 10469 "MOV AH,1\t# unordered treat as LT\n" |
11112 "flags:\tSAHF" %} | 10470 "flags:\tSAHF" %} |
11113 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ | 10471 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ |
11114 ins_encode( Push_Reg_D(src1), | 10472 ins_encode( Push_Reg_DPR(src1), |
11115 OpcP, RegOpc(src2), | 10473 OpcP, RegOpc(src2), |
11116 fpu_flags); | 10474 fpu_flags); |
11117 ins_pipe( pipe_slow ); | 10475 ins_pipe( pipe_slow ); |
11118 %} | 10476 %} |
11119 | 10477 |
11120 // Compare vs zero into -1,0,1 | 10478 // Compare vs zero into -1,0,1 |
11121 instruct cmpF_0(eRegI dst, regF src1, immF0 zero, eAXRegI rax, eFlagsReg cr) %{ | 10479 instruct cmpFPR_0(eRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{ |
11122 predicate(UseSSE == 0); | 10480 predicate(UseSSE == 0); |
11123 match(Set dst (CmpF3 src1 zero)); | 10481 match(Set dst (CmpF3 src1 zero)); |
11124 effect(KILL cr, KILL rax); | 10482 effect(KILL cr, KILL rax); |
11125 ins_cost(280); | 10483 ins_cost(280); |
11126 format %{ "FTSTF $dst,$src1" %} | 10484 format %{ "FTSTF $dst,$src1" %} |
11127 opcode(0xE4, 0xD9); | 10485 opcode(0xE4, 0xD9); |
11128 ins_encode( Push_Reg_D(src1), | 10486 ins_encode( Push_Reg_DPR(src1), |
11129 OpcS, OpcP, PopFPU, | 10487 OpcS, OpcP, PopFPU, |
11130 CmpF_Result(dst)); | 10488 CmpF_Result(dst)); |
11131 ins_pipe( pipe_slow ); | 10489 ins_pipe( pipe_slow ); |
11132 %} | 10490 %} |
11133 | 10491 |
11134 // Compare into -1,0,1 | 10492 // Compare into -1,0,1 |
11135 instruct cmpF_reg(eRegI dst, regF src1, regF src2, eAXRegI rax, eFlagsReg cr) %{ | 10493 instruct cmpFPR_reg(eRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ |
11136 predicate(UseSSE == 0); | 10494 predicate(UseSSE == 0); |
11137 match(Set dst (CmpF3 src1 src2)); | 10495 match(Set dst (CmpF3 src1 src2)); |
11138 effect(KILL cr, KILL rax); | 10496 effect(KILL cr, KILL rax); |
11139 ins_cost(300); | 10497 ins_cost(300); |
11140 format %{ "FCMPF $dst,$src1,$src2" %} | 10498 format %{ "FCMPF $dst,$src1,$src2" %} |
11141 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ | 10499 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ |
11142 ins_encode( Push_Reg_D(src1), | 10500 ins_encode( Push_Reg_DPR(src1), |
11143 OpcP, RegOpc(src2), | 10501 OpcP, RegOpc(src2), |
11144 CmpF_Result(dst)); | 10502 CmpF_Result(dst)); |
11145 ins_pipe( pipe_slow ); | 10503 ins_pipe( pipe_slow ); |
11146 %} | 10504 %} |
11147 | 10505 |
11148 // float compare and set condition codes in EFLAGS by XMM regs | 10506 // float compare and set condition codes in EFLAGS by XMM regs |
11149 instruct cmpX_cc(eFlagsRegU cr, regX dst, regX src, eAXRegI rax) %{ | 10507 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{ |
11150 predicate(UseSSE>=1); | 10508 predicate(UseSSE>=1); |
11151 match(Set cr (CmpF dst src)); | 10509 match(Set cr (CmpF src1 src2)); |
11152 effect(KILL rax); | |
11153 ins_cost(145); | 10510 ins_cost(145); |
11154 format %{ "COMISS $dst,$src\n" | 10511 format %{ "UCOMISS $src1,$src2\n\t" |
11155 "\tJNP exit\n" | 10512 "JNP,s exit\n\t" |
11156 "\tMOV ah,1 // saw a NaN, set CF\n" | 10513 "PUSHF\t# saw NaN, set CF\n\t" |
11157 "\tSAHF\n" | 10514 "AND [rsp], #0xffffff2b\n\t" |
11158 "exit:\tNOP // avoid branch to branch" %} | 10515 "POPF\n" |
11159 opcode(0x0F, 0x2F); | 10516 "exit:" %} |
11160 ins_encode(OpcP, OpcS, RegReg(dst, src), cmpF_P6_fixup); | 10517 ins_encode %{ |
10518 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); | |
10519 emit_cmpfp_fixup(_masm); | |
10520 %} | |
11161 ins_pipe( pipe_slow ); | 10521 ins_pipe( pipe_slow ); |
11162 %} | 10522 %} |
11163 | 10523 |
11164 instruct cmpX_ccCF(eFlagsRegUCF cr, regX dst, regX src) %{ | 10524 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{ |
11165 predicate(UseSSE>=1); | 10525 predicate(UseSSE>=1); |
11166 match(Set cr (CmpF dst src)); | 10526 match(Set cr (CmpF src1 src2)); |
11167 ins_cost(100); | 10527 ins_cost(100); |
11168 format %{ "COMISS $dst,$src" %} | 10528 format %{ "UCOMISS $src1,$src2" %} |
11169 opcode(0x0F, 0x2F); | 10529 ins_encode %{ |
11170 ins_encode(OpcP, OpcS, RegReg(dst, src)); | 10530 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); |
10531 %} | |
11171 ins_pipe( pipe_slow ); | 10532 ins_pipe( pipe_slow ); |
11172 %} | 10533 %} |
11173 | 10534 |
11174 // float compare and set condition codes in EFLAGS by XMM regs | 10535 // float compare and set condition codes in EFLAGS by XMM regs |
11175 instruct cmpX_ccmem(eFlagsRegU cr, regX dst, memory src, eAXRegI rax) %{ | 10536 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{ |
11176 predicate(UseSSE>=1); | 10537 predicate(UseSSE>=1); |
11177 match(Set cr (CmpF dst (LoadF src))); | 10538 match(Set cr (CmpF src1 (LoadF src2))); |
11178 effect(KILL rax); | |
11179 ins_cost(165); | 10539 ins_cost(165); |
11180 format %{ "COMISS $dst,$src\n" | 10540 format %{ "UCOMISS $src1,$src2\n\t" |
11181 "\tJNP exit\n" | 10541 "JNP,s exit\n\t" |
11182 "\tMOV ah,1 // saw a NaN, set CF\n" | 10542 "PUSHF\t# saw NaN, set CF\n\t" |
11183 "\tSAHF\n" | 10543 "AND [rsp], #0xffffff2b\n\t" |
11184 "exit:\tNOP // avoid branch to branch" %} | 10544 "POPF\n" |
11185 opcode(0x0F, 0x2F); | 10545 "exit:" %} |
11186 ins_encode(OpcP, OpcS, RegMem(dst, src), cmpF_P6_fixup); | 10546 ins_encode %{ |
10547 __ ucomiss($src1$$XMMRegister, $src2$$Address); | |
10548 emit_cmpfp_fixup(_masm); | |
10549 %} | |
11187 ins_pipe( pipe_slow ); | 10550 ins_pipe( pipe_slow ); |
11188 %} | 10551 %} |
11189 | 10552 |
11190 instruct cmpX_ccmemCF(eFlagsRegUCF cr, regX dst, memory src) %{ | 10553 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{ |
11191 predicate(UseSSE>=1); | 10554 predicate(UseSSE>=1); |
11192 match(Set cr (CmpF dst (LoadF src))); | 10555 match(Set cr (CmpF src1 (LoadF src2))); |
11193 ins_cost(100); | 10556 ins_cost(100); |
11194 format %{ "COMISS $dst,$src" %} | 10557 format %{ "UCOMISS $src1,$src2" %} |
11195 opcode(0x0F, 0x2F); | 10558 ins_encode %{ |
11196 ins_encode(OpcP, OpcS, RegMem(dst, src)); | 10559 __ ucomiss($src1$$XMMRegister, $src2$$Address); |
10560 %} | |
11197 ins_pipe( pipe_slow ); | 10561 ins_pipe( pipe_slow ); |
11198 %} | 10562 %} |
11199 | 10563 |
11200 // Compare into -1,0,1 in XMM | 10564 // Compare into -1,0,1 in XMM |
11201 instruct cmpX_reg(eRegI dst, regX src1, regX src2, eFlagsReg cr) %{ | 10565 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{ |
11202 predicate(UseSSE>=1); | 10566 predicate(UseSSE>=1); |
11203 match(Set dst (CmpF3 src1 src2)); | 10567 match(Set dst (CmpF3 src1 src2)); |
11204 effect(KILL cr); | 10568 effect(KILL cr); |
11205 ins_cost(255); | 10569 ins_cost(255); |
11206 format %{ "XOR $dst,$dst\n" | 10570 format %{ "UCOMISS $src1, $src2\n\t" |
11207 "\tCOMISS $src1,$src2\n" | 10571 "MOV $dst, #-1\n\t" |
11208 "\tJP,s nan\n" | 10572 "JP,s done\n\t" |
11209 "\tJEQ,s exit\n" | 10573 "JB,s done\n\t" |
11210 "\tJA,s inc\n" | 10574 "SETNE $dst\n\t" |
11211 "nan:\tDEC $dst\n" | 10575 "MOVZB $dst, $dst\n" |
11212 "\tJMP,s exit\n" | 10576 "done:" %} |
11213 "inc:\tINC $dst\n" | 10577 ins_encode %{ |
11214 "exit:" | 10578 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); |
11215 %} | 10579 emit_cmpfp3(_masm, $dst$$Register); |
11216 opcode(0x0F, 0x2F); | 10580 %} |
11217 ins_encode(Xor_Reg(dst), OpcP, OpcS, RegReg(src1, src2), CmpX_Result(dst)); | |
11218 ins_pipe( pipe_slow ); | 10581 ins_pipe( pipe_slow ); |
11219 %} | 10582 %} |
11220 | 10583 |
11221 // Compare into -1,0,1 in XMM and memory | 10584 // Compare into -1,0,1 in XMM and memory |
11222 instruct cmpX_regmem(eRegI dst, regX src1, memory mem, eFlagsReg cr) %{ | 10585 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{ |
11223 predicate(UseSSE>=1); | 10586 predicate(UseSSE>=1); |
11224 match(Set dst (CmpF3 src1 (LoadF mem))); | 10587 match(Set dst (CmpF3 src1 (LoadF src2))); |
11225 effect(KILL cr); | 10588 effect(KILL cr); |
11226 ins_cost(275); | 10589 ins_cost(275); |
11227 format %{ "COMISS $src1,$mem\n" | 10590 format %{ "UCOMISS $src1, $src2\n\t" |
11228 "\tMOV $dst,0\t\t# do not blow flags\n" | 10591 "MOV $dst, #-1\n\t" |
11229 "\tJP,s nan\n" | 10592 "JP,s done\n\t" |
11230 "\tJEQ,s exit\n" | 10593 "JB,s done\n\t" |
11231 "\tJA,s inc\n" | 10594 "SETNE $dst\n\t" |
11232 "nan:\tDEC $dst\n" | 10595 "MOVZB $dst, $dst\n" |
11233 "\tJMP,s exit\n" | 10596 "done:" %} |
11234 "inc:\tINC $dst\n" | 10597 ins_encode %{ |
11235 "exit:" | 10598 __ ucomiss($src1$$XMMRegister, $src2$$Address); |
11236 %} | 10599 emit_cmpfp3(_masm, $dst$$Register); |
11237 opcode(0x0F, 0x2F); | 10600 %} |
11238 ins_encode(OpcP, OpcS, RegMem(src1, mem), LdImmI(dst,0x0), CmpX_Result(dst)); | |
11239 ins_pipe( pipe_slow ); | 10601 ins_pipe( pipe_slow ); |
11240 %} | 10602 %} |
11241 | 10603 |
11242 // Spill to obtain 24-bit precision | 10604 // Spill to obtain 24-bit precision |
11243 instruct subF24_reg(stackSlotF dst, regF src1, regF src2) %{ | 10605 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ |
11244 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); | 10606 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); |
11245 match(Set dst (SubF src1 src2)); | 10607 match(Set dst (SubF src1 src2)); |
11246 | 10608 |
11247 format %{ "FSUB $dst,$src1 - $src2" %} | 10609 format %{ "FSUB $dst,$src1 - $src2" %} |
11248 opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */ | 10610 opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */ |
11249 ins_encode( Push_Reg_F(src1), | 10611 ins_encode( Push_Reg_FPR(src1), |
11250 OpcReg_F(src2), | 10612 OpcReg_FPR(src2), |
11251 Pop_Mem_F(dst) ); | 10613 Pop_Mem_FPR(dst) ); |
11252 ins_pipe( fpu_mem_reg_reg ); | 10614 ins_pipe( fpu_mem_reg_reg ); |
11253 %} | 10615 %} |
11254 // | 10616 // |
11255 // This instruction does not round to 24-bits | 10617 // This instruction does not round to 24-bits |
11256 instruct subF_reg(regF dst, regF src) %{ | 10618 instruct subFPR_reg(regFPR dst, regFPR src) %{ |
11257 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); | 10619 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); |
11258 match(Set dst (SubF dst src)); | 10620 match(Set dst (SubF dst src)); |
11259 | 10621 |
11260 format %{ "FSUB $dst,$src" %} | 10622 format %{ "FSUB $dst,$src" %} |
11261 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ | 10623 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ |
11262 ins_encode( Push_Reg_F(src), | 10624 ins_encode( Push_Reg_FPR(src), |
11263 OpcP, RegOpc(dst) ); | 10625 OpcP, RegOpc(dst) ); |
11264 ins_pipe( fpu_reg_reg ); | 10626 ins_pipe( fpu_reg_reg ); |
11265 %} | 10627 %} |
11266 | 10628 |
11267 // Spill to obtain 24-bit precision | 10629 // Spill to obtain 24-bit precision |
11268 instruct addF24_reg(stackSlotF dst, regF src1, regF src2) %{ | 10630 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ |
11269 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); | 10631 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); |
11270 match(Set dst (AddF src1 src2)); | 10632 match(Set dst (AddF src1 src2)); |
11271 | 10633 |
11272 format %{ "FADD $dst,$src1,$src2" %} | 10634 format %{ "FADD $dst,$src1,$src2" %} |
11273 opcode(0xD8, 0x0); /* D8 C0+i */ | 10635 opcode(0xD8, 0x0); /* D8 C0+i */ |
11274 ins_encode( Push_Reg_F(src2), | 10636 ins_encode( Push_Reg_FPR(src2), |
11275 OpcReg_F(src1), | 10637 OpcReg_FPR(src1), |
11276 Pop_Mem_F(dst) ); | 10638 Pop_Mem_FPR(dst) ); |
11277 ins_pipe( fpu_mem_reg_reg ); | 10639 ins_pipe( fpu_mem_reg_reg ); |
11278 %} | 10640 %} |
11279 // | 10641 // |
11280 // This instruction does not round to 24-bits | 10642 // This instruction does not round to 24-bits |
11281 instruct addF_reg(regF dst, regF src) %{ | 10643 instruct addFPR_reg(regFPR dst, regFPR src) %{ |
11282 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); | 10644 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); |
11283 match(Set dst (AddF dst src)); | 10645 match(Set dst (AddF dst src)); |
11284 | 10646 |
11285 format %{ "FLD $src\n\t" | 10647 format %{ "FLD $src\n\t" |
11286 "FADDp $dst,ST" %} | 10648 "FADDp $dst,ST" %} |
11287 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ | 10649 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ |
11288 ins_encode( Push_Reg_F(src), | 10650 ins_encode( Push_Reg_FPR(src), |
11289 OpcP, RegOpc(dst) ); | 10651 OpcP, RegOpc(dst) ); |
11290 ins_pipe( fpu_reg_reg ); | 10652 ins_pipe( fpu_reg_reg ); |
11291 %} | 10653 %} |
11292 | 10654 |
11293 // Add two single precision floating point values in xmm | 10655 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{ |
11294 instruct addX_reg(regX dst, regX src) %{ | |
11295 predicate(UseSSE>=1); | |
11296 match(Set dst (AddF dst src)); | |
11297 format %{ "ADDSS $dst,$src" %} | |
11298 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x58), RegReg(dst, src)); | |
11299 ins_pipe( pipe_slow ); | |
11300 %} | |
11301 | |
11302 instruct addX_imm(regX dst, immXF con) %{ | |
11303 predicate(UseSSE>=1); | |
11304 match(Set dst (AddF dst con)); | |
11305 format %{ "ADDSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} | |
11306 ins_encode %{ | |
11307 __ addss($dst$$XMMRegister, $constantaddress($con)); | |
11308 %} | |
11309 ins_pipe(pipe_slow); | |
11310 %} | |
11311 | |
11312 instruct addX_mem(regX dst, memory mem) %{ | |
11313 predicate(UseSSE>=1); | |
11314 match(Set dst (AddF dst (LoadF mem))); | |
11315 format %{ "ADDSS $dst,$mem" %} | |
11316 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x58), RegMem(dst, mem)); | |
11317 ins_pipe( pipe_slow ); | |
11318 %} | |
11319 | |
11320 // Subtract two single precision floating point values in xmm | |
11321 instruct subX_reg(regX dst, regX src) %{ | |
11322 predicate(UseSSE>=1); | |
11323 match(Set dst (SubF dst src)); | |
11324 format %{ "SUBSS $dst,$src" %} | |
11325 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5C), RegReg(dst, src)); | |
11326 ins_pipe( pipe_slow ); | |
11327 %} | |
11328 | |
11329 instruct subX_imm(regX dst, immXF con) %{ | |
11330 predicate(UseSSE>=1); | |
11331 match(Set dst (SubF dst con)); | |
11332 format %{ "SUBSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} | |
11333 ins_encode %{ | |
11334 __ subss($dst$$XMMRegister, $constantaddress($con)); | |
11335 %} | |
11336 ins_pipe(pipe_slow); | |
11337 %} | |
11338 | |
11339 instruct subX_mem(regX dst, memory mem) %{ | |
11340 predicate(UseSSE>=1); | |
11341 match(Set dst (SubF dst (LoadF mem))); | |
11342 format %{ "SUBSS $dst,$mem" %} | |
11343 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5C), RegMem(dst,mem)); | |
11344 ins_pipe( pipe_slow ); | |
11345 %} | |
11346 | |
11347 // Multiply two single precision floating point values in xmm | |
11348 instruct mulX_reg(regX dst, regX src) %{ | |
11349 predicate(UseSSE>=1); | |
11350 match(Set dst (MulF dst src)); | |
11351 format %{ "MULSS $dst,$src" %} | |
11352 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x59), RegReg(dst, src)); | |
11353 ins_pipe( pipe_slow ); | |
11354 %} | |
11355 | |
11356 instruct mulX_imm(regX dst, immXF con) %{ | |
11357 predicate(UseSSE>=1); | |
11358 match(Set dst (MulF dst con)); | |
11359 format %{ "MULSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} | |
11360 ins_encode %{ | |
11361 __ mulss($dst$$XMMRegister, $constantaddress($con)); | |
11362 %} | |
11363 ins_pipe(pipe_slow); | |
11364 %} | |
11365 | |
11366 instruct mulX_mem(regX dst, memory mem) %{ | |
11367 predicate(UseSSE>=1); | |
11368 match(Set dst (MulF dst (LoadF mem))); | |
11369 format %{ "MULSS $dst,$mem" %} | |
11370 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x59), RegMem(dst,mem)); | |
11371 ins_pipe( pipe_slow ); | |
11372 %} | |
11373 | |
11374 // Divide two single precision floating point values in xmm | |
11375 instruct divX_reg(regX dst, regX src) %{ | |
11376 predicate(UseSSE>=1); | |
11377 match(Set dst (DivF dst src)); | |
11378 format %{ "DIVSS $dst,$src" %} | |
11379 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5E), RegReg(dst, src)); | |
11380 ins_pipe( pipe_slow ); | |
11381 %} | |
11382 | |
11383 instruct divX_imm(regX dst, immXF con) %{ | |
11384 predicate(UseSSE>=1); | |
11385 match(Set dst (DivF dst con)); | |
11386 format %{ "DIVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} | |
11387 ins_encode %{ | |
11388 __ divss($dst$$XMMRegister, $constantaddress($con)); | |
11389 %} | |
11390 ins_pipe(pipe_slow); | |
11391 %} | |
11392 | |
11393 instruct divX_mem(regX dst, memory mem) %{ | |
11394 predicate(UseSSE>=1); | |
11395 match(Set dst (DivF dst (LoadF mem))); | |
11396 format %{ "DIVSS $dst,$mem" %} | |
11397 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5E), RegMem(dst,mem)); | |
11398 ins_pipe( pipe_slow ); | |
11399 %} | |
11400 | |
11401 // Get the square root of a single precision floating point values in xmm | |
11402 instruct sqrtX_reg(regX dst, regX src) %{ | |
11403 predicate(UseSSE>=1); | |
11404 match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); | |
11405 format %{ "SQRTSS $dst,$src" %} | |
11406 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x51), RegReg(dst, src)); | |
11407 ins_pipe( pipe_slow ); | |
11408 %} | |
11409 | |
11410 instruct sqrtX_mem(regX dst, memory mem) %{ | |
11411 predicate(UseSSE>=1); | |
11412 match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF mem))))); | |
11413 format %{ "SQRTSS $dst,$mem" %} | |
11414 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x51), RegMem(dst, mem)); | |
11415 ins_pipe( pipe_slow ); | |
11416 %} | |
11417 | |
11418 // Get the square root of a double precision floating point values in xmm | |
11419 instruct sqrtXD_reg(regXD dst, regXD src) %{ | |
11420 predicate(UseSSE>=2); | |
11421 match(Set dst (SqrtD src)); | |
11422 format %{ "SQRTSD $dst,$src" %} | |
11423 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x51), RegReg(dst, src)); | |
11424 ins_pipe( pipe_slow ); | |
11425 %} | |
11426 | |
11427 instruct sqrtXD_mem(regXD dst, memory mem) %{ | |
11428 predicate(UseSSE>=2); | |
11429 match(Set dst (SqrtD (LoadD mem))); | |
11430 format %{ "SQRTSD $dst,$mem" %} | |
11431 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x51), RegMem(dst, mem)); | |
11432 ins_pipe( pipe_slow ); | |
11433 %} | |
11434 | |
11435 instruct absF_reg(regFPR1 dst, regFPR1 src) %{ | |
11436 predicate(UseSSE==0); | 10656 predicate(UseSSE==0); |
11437 match(Set dst (AbsF src)); | 10657 match(Set dst (AbsF src)); |
11438 ins_cost(100); | 10658 ins_cost(100); |
11439 format %{ "FABS" %} | 10659 format %{ "FABS" %} |
11440 opcode(0xE1, 0xD9); | 10660 opcode(0xE1, 0xD9); |
11441 ins_encode( OpcS, OpcP ); | 10661 ins_encode( OpcS, OpcP ); |
11442 ins_pipe( fpu_reg_reg ); | 10662 ins_pipe( fpu_reg_reg ); |
11443 %} | 10663 %} |
11444 | 10664 |
11445 instruct absX_reg(regX dst ) %{ | 10665 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{ |
11446 predicate(UseSSE>=1); | |
11447 match(Set dst (AbsF dst)); | |
11448 format %{ "ANDPS $dst,[0x7FFFFFFF]\t# ABS F by sign masking" %} | |
11449 ins_encode( AbsXF_encoding(dst)); | |
11450 ins_pipe( pipe_slow ); | |
11451 %} | |
11452 | |
11453 instruct negF_reg(regFPR1 dst, regFPR1 src) %{ | |
11454 predicate(UseSSE==0); | 10666 predicate(UseSSE==0); |
11455 match(Set dst (NegF src)); | 10667 match(Set dst (NegF src)); |
11456 ins_cost(100); | 10668 ins_cost(100); |
11457 format %{ "FCHS" %} | 10669 format %{ "FCHS" %} |
11458 opcode(0xE0, 0xD9); | 10670 opcode(0xE0, 0xD9); |
11459 ins_encode( OpcS, OpcP ); | 10671 ins_encode( OpcS, OpcP ); |
11460 ins_pipe( fpu_reg_reg ); | 10672 ins_pipe( fpu_reg_reg ); |
11461 %} | 10673 %} |
11462 | 10674 |
11463 instruct negX_reg( regX dst ) %{ | 10675 // Cisc-alternate to addFPR_reg |
11464 predicate(UseSSE>=1); | |
11465 match(Set dst (NegF dst)); | |
11466 format %{ "XORPS $dst,[0x80000000]\t# CHS F by sign flipping" %} | |
11467 ins_encode( NegXF_encoding(dst)); | |
11468 ins_pipe( pipe_slow ); | |
11469 %} | |
11470 | |
11471 // Cisc-alternate to addF_reg | |
11472 // Spill to obtain 24-bit precision | 10676 // Spill to obtain 24-bit precision |
11473 instruct addF24_reg_mem(stackSlotF dst, regF src1, memory src2) %{ | 10677 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ |
11474 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); | 10678 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); |
11475 match(Set dst (AddF src1 (LoadF src2))); | 10679 match(Set dst (AddF src1 (LoadF src2))); |
11476 | 10680 |
11477 format %{ "FLD $src2\n\t" | 10681 format %{ "FLD $src2\n\t" |
11478 "FADD ST,$src1\n\t" | 10682 "FADD ST,$src1\n\t" |
11479 "FSTP_S $dst" %} | 10683 "FSTP_S $dst" %} |
11480 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ | 10684 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ |
11481 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), | 10685 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), |
11482 OpcReg_F(src1), | 10686 OpcReg_FPR(src1), |
11483 Pop_Mem_F(dst) ); | 10687 Pop_Mem_FPR(dst) ); |
11484 ins_pipe( fpu_mem_reg_mem ); | 10688 ins_pipe( fpu_mem_reg_mem ); |
11485 %} | 10689 %} |
11486 // | 10690 // |
11487 // Cisc-alternate to addF_reg | 10691 // Cisc-alternate to addFPR_reg |
11488 // This instruction does not round to 24-bits | 10692 // This instruction does not round to 24-bits |
11489 instruct addF_reg_mem(regF dst, memory src) %{ | 10693 instruct addFPR_reg_mem(regFPR dst, memory src) %{ |
11490 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); | 10694 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); |
11491 match(Set dst (AddF dst (LoadF src))); | 10695 match(Set dst (AddF dst (LoadF src))); |
11492 | 10696 |
11493 format %{ "FADD $dst,$src" %} | 10697 format %{ "FADD $dst,$src" %} |
11494 opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/ /* LoadF D9 /0 */ | 10698 opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/ /* LoadF D9 /0 */ |
11497 ins_pipe( fpu_reg_mem ); | 10701 ins_pipe( fpu_reg_mem ); |
11498 %} | 10702 %} |
11499 | 10703 |
11500 // // Following two instructions for _222_mpegaudio | 10704 // // Following two instructions for _222_mpegaudio |
11501 // Spill to obtain 24-bit precision | 10705 // Spill to obtain 24-bit precision |
11502 instruct addF24_mem_reg(stackSlotF dst, regF src2, memory src1 ) %{ | 10706 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{ |
11503 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); | 10707 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); |
11504 match(Set dst (AddF src1 src2)); | 10708 match(Set dst (AddF src1 src2)); |
11505 | 10709 |
11506 format %{ "FADD $dst,$src1,$src2" %} | 10710 format %{ "FADD $dst,$src1,$src2" %} |
11507 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ | 10711 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ |
11508 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1), | 10712 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1), |
11509 OpcReg_F(src2), | 10713 OpcReg_FPR(src2), |
11510 Pop_Mem_F(dst) ); | 10714 Pop_Mem_FPR(dst) ); |
11511 ins_pipe( fpu_mem_reg_mem ); | 10715 ins_pipe( fpu_mem_reg_mem ); |
11512 %} | 10716 %} |
11513 | 10717 |
11514 // Cisc-spill variant | 10718 // Cisc-spill variant |
11515 // Spill to obtain 24-bit precision | 10719 // Spill to obtain 24-bit precision |
11516 instruct addF24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{ | 10720 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{ |
11517 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); | 10721 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); |
11518 match(Set dst (AddF src1 (LoadF src2))); | 10722 match(Set dst (AddF src1 (LoadF src2))); |
11519 | 10723 |
11520 format %{ "FADD $dst,$src1,$src2 cisc" %} | 10724 format %{ "FADD $dst,$src1,$src2 cisc" %} |
11521 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ | 10725 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ |
11522 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), | 10726 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), |
11523 set_instruction_start, | 10727 set_instruction_start, |
11524 OpcP, RMopc_Mem(secondary,src1), | 10728 OpcP, RMopc_Mem(secondary,src1), |
11525 Pop_Mem_F(dst) ); | 10729 Pop_Mem_FPR(dst) ); |
11526 ins_pipe( fpu_mem_mem_mem ); | 10730 ins_pipe( fpu_mem_mem_mem ); |
11527 %} | 10731 %} |
11528 | 10732 |
11529 // Spill to obtain 24-bit precision | 10733 // Spill to obtain 24-bit precision |
11530 instruct addF24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ | 10734 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ |
11531 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); | 10735 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); |
11532 match(Set dst (AddF src1 src2)); | 10736 match(Set dst (AddF src1 src2)); |
11533 | 10737 |
11534 format %{ "FADD $dst,$src1,$src2" %} | 10738 format %{ "FADD $dst,$src1,$src2" %} |
11535 opcode(0xD8, 0x0, 0xD9); /* D8 /0 */ /* LoadF D9 /0 */ | 10739 opcode(0xD8, 0x0, 0xD9); /* D8 /0 */ /* LoadF D9 /0 */ |
11536 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), | 10740 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), |
11537 set_instruction_start, | 10741 set_instruction_start, |
11538 OpcP, RMopc_Mem(secondary,src1), | 10742 OpcP, RMopc_Mem(secondary,src1), |
11539 Pop_Mem_F(dst) ); | 10743 Pop_Mem_FPR(dst) ); |
11540 ins_pipe( fpu_mem_mem_mem ); | 10744 ins_pipe( fpu_mem_mem_mem ); |
11541 %} | 10745 %} |
11542 | 10746 |
11543 | 10747 |
11544 // Spill to obtain 24-bit precision | 10748 // Spill to obtain 24-bit precision |
11545 instruct addF24_reg_imm(stackSlotF dst, regF src, immF con) %{ | 10749 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ |
11546 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); | 10750 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); |
11547 match(Set dst (AddF src con)); | 10751 match(Set dst (AddF src con)); |
11548 format %{ "FLD $src\n\t" | 10752 format %{ "FLD $src\n\t" |
11549 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" | 10753 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" |
11550 "FSTP_S $dst" %} | 10754 "FSTP_S $dst" %} |
11555 %} | 10759 %} |
11556 ins_pipe(fpu_mem_reg_con); | 10760 ins_pipe(fpu_mem_reg_con); |
11557 %} | 10761 %} |
11558 // | 10762 // |
11559 // This instruction does not round to 24-bits | 10763 // This instruction does not round to 24-bits |
11560 instruct addF_reg_imm(regF dst, regF src, immF con) %{ | 10764 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ |
11561 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); | 10765 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); |
11562 match(Set dst (AddF src con)); | 10766 match(Set dst (AddF src con)); |
11563 format %{ "FLD $src\n\t" | 10767 format %{ "FLD $src\n\t" |
11564 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" | 10768 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" |
11565 "FSTP $dst" %} | 10769 "FSTP $dst" %} |
11570 %} | 10774 %} |
11571 ins_pipe(fpu_reg_reg_con); | 10775 ins_pipe(fpu_reg_reg_con); |
11572 %} | 10776 %} |
11573 | 10777 |
11574 // Spill to obtain 24-bit precision | 10778 // Spill to obtain 24-bit precision |
11575 instruct mulF24_reg(stackSlotF dst, regF src1, regF src2) %{ | 10779 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ |
11576 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); | 10780 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); |
11577 match(Set dst (MulF src1 src2)); | 10781 match(Set dst (MulF src1 src2)); |
11578 | 10782 |
11579 format %{ "FLD $src1\n\t" | 10783 format %{ "FLD $src1\n\t" |
11580 "FMUL $src2\n\t" | 10784 "FMUL $src2\n\t" |
11581 "FSTP_S $dst" %} | 10785 "FSTP_S $dst" %} |
11582 opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */ | 10786 opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */ |
11583 ins_encode( Push_Reg_F(src1), | 10787 ins_encode( Push_Reg_FPR(src1), |
11584 OpcReg_F(src2), | 10788 OpcReg_FPR(src2), |
11585 Pop_Mem_F(dst) ); | 10789 Pop_Mem_FPR(dst) ); |
11586 ins_pipe( fpu_mem_reg_reg ); | 10790 ins_pipe( fpu_mem_reg_reg ); |
11587 %} | 10791 %} |
11588 // | 10792 // |
11589 // This instruction does not round to 24-bits | 10793 // This instruction does not round to 24-bits |
11590 instruct mulF_reg(regF dst, regF src1, regF src2) %{ | 10794 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{ |
11591 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); | 10795 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); |
11592 match(Set dst (MulF src1 src2)); | 10796 match(Set dst (MulF src1 src2)); |
11593 | 10797 |
11594 format %{ "FLD $src1\n\t" | 10798 format %{ "FLD $src1\n\t" |
11595 "FMUL $src2\n\t" | 10799 "FMUL $src2\n\t" |
11596 "FSTP_S $dst" %} | 10800 "FSTP_S $dst" %} |
11597 opcode(0xD8, 0x1); /* D8 C8+i */ | 10801 opcode(0xD8, 0x1); /* D8 C8+i */ |
11598 ins_encode( Push_Reg_F(src2), | 10802 ins_encode( Push_Reg_FPR(src2), |
11599 OpcReg_F(src1), | 10803 OpcReg_FPR(src1), |
11600 Pop_Reg_F(dst) ); | 10804 Pop_Reg_FPR(dst) ); |
11601 ins_pipe( fpu_reg_reg_reg ); | 10805 ins_pipe( fpu_reg_reg_reg ); |
11602 %} | 10806 %} |
11603 | 10807 |
11604 | 10808 |
11605 // Spill to obtain 24-bit precision | 10809 // Spill to obtain 24-bit precision |
11606 // Cisc-alternate to reg-reg multiply | 10810 // Cisc-alternate to reg-reg multiply |
11607 instruct mulF24_reg_mem(stackSlotF dst, regF src1, memory src2) %{ | 10811 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ |
11608 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); | 10812 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); |
11609 match(Set dst (MulF src1 (LoadF src2))); | 10813 match(Set dst (MulF src1 (LoadF src2))); |
11610 | 10814 |
11611 format %{ "FLD_S $src2\n\t" | 10815 format %{ "FLD_S $src2\n\t" |
11612 "FMUL $src1\n\t" | 10816 "FMUL $src1\n\t" |
11613 "FSTP_S $dst" %} | 10817 "FSTP_S $dst" %} |
11614 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */ | 10818 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */ |
11615 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), | 10819 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), |
11616 OpcReg_F(src1), | 10820 OpcReg_FPR(src1), |
11617 Pop_Mem_F(dst) ); | 10821 Pop_Mem_FPR(dst) ); |
11618 ins_pipe( fpu_mem_reg_mem ); | 10822 ins_pipe( fpu_mem_reg_mem ); |
11619 %} | 10823 %} |
11620 // | 10824 // |
11621 // This instruction does not round to 24-bits | 10825 // This instruction does not round to 24-bits |
11622 // Cisc-alternate to reg-reg multiply | 10826 // Cisc-alternate to reg-reg multiply |
11623 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ | 10827 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{ |
11624 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); | 10828 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); |
11625 match(Set dst (MulF src1 (LoadF src2))); | 10829 match(Set dst (MulF src1 (LoadF src2))); |
11626 | 10830 |
11627 format %{ "FMUL $dst,$src1,$src2" %} | 10831 format %{ "FMUL $dst,$src1,$src2" %} |
11628 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */ | 10832 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */ |
11629 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), | 10833 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), |
11630 OpcReg_F(src1), | 10834 OpcReg_FPR(src1), |
11631 Pop_Reg_F(dst) ); | 10835 Pop_Reg_FPR(dst) ); |
11632 ins_pipe( fpu_reg_reg_mem ); | 10836 ins_pipe( fpu_reg_reg_mem ); |
11633 %} | 10837 %} |
11634 | 10838 |
11635 // Spill to obtain 24-bit precision | 10839 // Spill to obtain 24-bit precision |
11636 instruct mulF24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ | 10840 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ |
11637 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); | 10841 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); |
11638 match(Set dst (MulF src1 src2)); | 10842 match(Set dst (MulF src1 src2)); |
11639 | 10843 |
11640 format %{ "FMUL $dst,$src1,$src2" %} | 10844 format %{ "FMUL $dst,$src1,$src2" %} |
11641 opcode(0xD8, 0x1, 0xD9); /* D8 /1 */ /* LoadF D9 /0 */ | 10845 opcode(0xD8, 0x1, 0xD9); /* D8 /1 */ /* LoadF D9 /0 */ |
11642 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), | 10846 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), |
11643 set_instruction_start, | 10847 set_instruction_start, |
11644 OpcP, RMopc_Mem(secondary,src1), | 10848 OpcP, RMopc_Mem(secondary,src1), |
11645 Pop_Mem_F(dst) ); | 10849 Pop_Mem_FPR(dst) ); |
11646 ins_pipe( fpu_mem_mem_mem ); | 10850 ins_pipe( fpu_mem_mem_mem ); |
11647 %} | 10851 %} |
11648 | 10852 |
11649 // Spill to obtain 24-bit precision | 10853 // Spill to obtain 24-bit precision |
11650 instruct mulF24_reg_imm(stackSlotF dst, regF src, immF con) %{ | 10854 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ |
11651 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); | 10855 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); |
11652 match(Set dst (MulF src con)); | 10856 match(Set dst (MulF src con)); |
11653 | 10857 |
11654 format %{ "FLD $src\n\t" | 10858 format %{ "FLD $src\n\t" |
11655 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" | 10859 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" |
11661 %} | 10865 %} |
11662 ins_pipe(fpu_mem_reg_con); | 10866 ins_pipe(fpu_mem_reg_con); |
11663 %} | 10867 %} |
11664 // | 10868 // |
11665 // This instruction does not round to 24-bits | 10869 // This instruction does not round to 24-bits |
11666 instruct mulF_reg_imm(regF dst, regF src, immF con) %{ | 10870 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ |
11667 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); | 10871 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); |
11668 match(Set dst (MulF src con)); | 10872 match(Set dst (MulF src con)); |
11669 | 10873 |
11670 format %{ "FLD $src\n\t" | 10874 format %{ "FLD $src\n\t" |
11671 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" | 10875 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" |
11678 ins_pipe(fpu_reg_reg_con); | 10882 ins_pipe(fpu_reg_reg_con); |
11679 %} | 10883 %} |
11680 | 10884 |
11681 | 10885 |
11682 // | 10886 // |
11683 // MACRO1 -- subsume unshared load into mulF | 10887 // MACRO1 -- subsume unshared load into mulFPR |
11684 // This instruction does not round to 24-bits | 10888 // This instruction does not round to 24-bits |
11685 instruct mulF_reg_load1(regF dst, regF src, memory mem1 ) %{ | 10889 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{ |
11686 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); | 10890 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); |
11687 match(Set dst (MulF (LoadF mem1) src)); | 10891 match(Set dst (MulF (LoadF mem1) src)); |
11688 | 10892 |
11689 format %{ "FLD $mem1 ===MACRO1===\n\t" | 10893 format %{ "FLD $mem1 ===MACRO1===\n\t" |
11690 "FMUL ST,$src\n\t" | 10894 "FMUL ST,$src\n\t" |
11691 "FSTP $dst" %} | 10895 "FSTP $dst" %} |
11692 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */ | 10896 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */ |
11693 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1), | 10897 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1), |
11694 OpcReg_F(src), | 10898 OpcReg_FPR(src), |
11695 Pop_Reg_F(dst) ); | 10899 Pop_Reg_FPR(dst) ); |
11696 ins_pipe( fpu_reg_reg_mem ); | 10900 ins_pipe( fpu_reg_reg_mem ); |
11697 %} | 10901 %} |
11698 // | 10902 // |
11699 // MACRO2 -- addF a mulF which subsumed an unshared load | 10903 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load |
11700 // This instruction does not round to 24-bits | 10904 // This instruction does not round to 24-bits |
11701 instruct addF_mulF_reg_load1(regF dst, memory mem1, regF src1, regF src2) %{ | 10905 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{ |
11702 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); | 10906 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); |
11703 match(Set dst (AddF (MulF (LoadF mem1) src1) src2)); | 10907 match(Set dst (AddF (MulF (LoadF mem1) src1) src2)); |
11704 ins_cost(95); | 10908 ins_cost(95); |
11705 | 10909 |
11706 format %{ "FLD $mem1 ===MACRO2===\n\t" | 10910 format %{ "FLD $mem1 ===MACRO2===\n\t" |
11707 "FMUL ST,$src1 subsume mulF left load\n\t" | 10911 "FMUL ST,$src1 subsume mulFPR left load\n\t" |
11708 "FADD ST,$src2\n\t" | 10912 "FADD ST,$src2\n\t" |
11709 "FSTP $dst" %} | 10913 "FSTP $dst" %} |
11710 opcode(0xD9); /* LoadF D9 /0 */ | 10914 opcode(0xD9); /* LoadF D9 /0 */ |
11711 ins_encode( OpcP, RMopc_Mem(0x00,mem1), | 10915 ins_encode( OpcP, RMopc_Mem(0x00,mem1), |
11712 FMul_ST_reg(src1), | 10916 FMul_ST_reg(src1), |
11713 FAdd_ST_reg(src2), | 10917 FAdd_ST_reg(src2), |
11714 Pop_Reg_F(dst) ); | 10918 Pop_Reg_FPR(dst) ); |
11715 ins_pipe( fpu_reg_mem_reg_reg ); | 10919 ins_pipe( fpu_reg_mem_reg_reg ); |
11716 %} | 10920 %} |
11717 | 10921 |
11718 // MACRO3 -- addF a mulF | 10922 // MACRO3 -- addFPR a mulFPR |
11719 // This instruction does not round to 24-bits. It is a '2-address' | 10923 // This instruction does not round to 24-bits. It is a '2-address' |
11720 // instruction in that the result goes back to src2. This eliminates | 10924 // instruction in that the result goes back to src2. This eliminates |
11721 // a move from the macro; possibly the register allocator will have | 10925 // a move from the macro; possibly the register allocator will have |
11722 // to add it back (and maybe not). | 10926 // to add it back (and maybe not). |
11723 instruct addF_mulF_reg(regF src2, regF src1, regF src0) %{ | 10927 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{ |
11724 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); | 10928 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); |
11725 match(Set src2 (AddF (MulF src0 src1) src2)); | 10929 match(Set src2 (AddF (MulF src0 src1) src2)); |
11726 | 10930 |
11727 format %{ "FLD $src0 ===MACRO3===\n\t" | 10931 format %{ "FLD $src0 ===MACRO3===\n\t" |
11728 "FMUL ST,$src1\n\t" | 10932 "FMUL ST,$src1\n\t" |
11729 "FADDP $src2,ST" %} | 10933 "FADDP $src2,ST" %} |
11730 opcode(0xD9); /* LoadF D9 /0 */ | 10934 opcode(0xD9); /* LoadF D9 /0 */ |
11731 ins_encode( Push_Reg_F(src0), | 10935 ins_encode( Push_Reg_FPR(src0), |
11732 FMul_ST_reg(src1), | 10936 FMul_ST_reg(src1), |
11733 FAddP_reg_ST(src2) ); | 10937 FAddP_reg_ST(src2) ); |
11734 ins_pipe( fpu_reg_reg_reg ); | 10938 ins_pipe( fpu_reg_reg_reg ); |
11735 %} | 10939 %} |
11736 | 10940 |
11737 // MACRO4 -- divF subF | 10941 // MACRO4 -- divFPR subFPR |
11738 // This instruction does not round to 24-bits | 10942 // This instruction does not round to 24-bits |
11739 instruct subF_divF_reg(regF dst, regF src1, regF src2, regF src3) %{ | 10943 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{ |
11740 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); | 10944 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); |
11741 match(Set dst (DivF (SubF src2 src1) src3)); | 10945 match(Set dst (DivF (SubF src2 src1) src3)); |
11742 | 10946 |
11743 format %{ "FLD $src2 ===MACRO4===\n\t" | 10947 format %{ "FLD $src2 ===MACRO4===\n\t" |
11744 "FSUB ST,$src1\n\t" | 10948 "FSUB ST,$src1\n\t" |
11745 "FDIV ST,$src3\n\t" | 10949 "FDIV ST,$src3\n\t" |
11746 "FSTP $dst" %} | 10950 "FSTP $dst" %} |
11747 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ | 10951 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ |
11748 ins_encode( Push_Reg_F(src2), | 10952 ins_encode( Push_Reg_FPR(src2), |
11749 subF_divF_encode(src1,src3), | 10953 subFPR_divFPR_encode(src1,src3), |
11750 Pop_Reg_F(dst) ); | 10954 Pop_Reg_FPR(dst) ); |
11751 ins_pipe( fpu_reg_reg_reg_reg ); | 10955 ins_pipe( fpu_reg_reg_reg_reg ); |
11752 %} | 10956 %} |
11753 | 10957 |
11754 // Spill to obtain 24-bit precision | 10958 // Spill to obtain 24-bit precision |
11755 instruct divF24_reg(stackSlotF dst, regF src1, regF src2) %{ | 10959 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ |
11756 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); | 10960 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); |
11757 match(Set dst (DivF src1 src2)); | 10961 match(Set dst (DivF src1 src2)); |
11758 | 10962 |
11759 format %{ "FDIV $dst,$src1,$src2" %} | 10963 format %{ "FDIV $dst,$src1,$src2" %} |
11760 opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/ | 10964 opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/ |
11761 ins_encode( Push_Reg_F(src1), | 10965 ins_encode( Push_Reg_FPR(src1), |
11762 OpcReg_F(src2), | 10966 OpcReg_FPR(src2), |
11763 Pop_Mem_F(dst) ); | 10967 Pop_Mem_FPR(dst) ); |
11764 ins_pipe( fpu_mem_reg_reg ); | 10968 ins_pipe( fpu_mem_reg_reg ); |
11765 %} | 10969 %} |
11766 // | 10970 // |
11767 // This instruction does not round to 24-bits | 10971 // This instruction does not round to 24-bits |
11768 instruct divF_reg(regF dst, regF src) %{ | 10972 instruct divFPR_reg(regFPR dst, regFPR src) %{ |
11769 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); | 10973 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); |
11770 match(Set dst (DivF dst src)); | 10974 match(Set dst (DivF dst src)); |
11771 | 10975 |
11772 format %{ "FDIV $dst,$src" %} | 10976 format %{ "FDIV $dst,$src" %} |
11773 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ | 10977 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ |
11774 ins_encode( Push_Reg_F(src), | 10978 ins_encode( Push_Reg_FPR(src), |
11775 OpcP, RegOpc(dst) ); | 10979 OpcP, RegOpc(dst) ); |
11776 ins_pipe( fpu_reg_reg ); | 10980 ins_pipe( fpu_reg_reg ); |
11777 %} | 10981 %} |
11778 | 10982 |
11779 | 10983 |
11780 // Spill to obtain 24-bit precision | 10984 // Spill to obtain 24-bit precision |
11781 instruct modF24_reg(stackSlotF dst, regF src1, regF src2, eAXRegI rax, eFlagsReg cr) %{ | 10985 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ |
11782 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); | 10986 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); |
11783 match(Set dst (ModF src1 src2)); | 10987 match(Set dst (ModF src1 src2)); |
11784 effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS | 10988 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS |
11785 | 10989 |
11786 format %{ "FMOD $dst,$src1,$src2" %} | 10990 format %{ "FMOD $dst,$src1,$src2" %} |
11787 ins_encode( Push_Reg_Mod_D(src1, src2), | 10991 ins_encode( Push_Reg_Mod_DPR(src1, src2), |
11788 emitModD(), | 10992 emitModDPR(), |
11789 Push_Result_Mod_D(src2), | 10993 Push_Result_Mod_DPR(src2), |
11790 Pop_Mem_F(dst)); | 10994 Pop_Mem_FPR(dst)); |
11791 ins_pipe( pipe_slow ); | 10995 ins_pipe( pipe_slow ); |
11792 %} | 10996 %} |
11793 // | 10997 // |
11794 // This instruction does not round to 24-bits | 10998 // This instruction does not round to 24-bits |
11795 instruct modF_reg(regF dst, regF src, eAXRegI rax, eFlagsReg cr) %{ | 10999 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{ |
11796 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); | 11000 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); |
11797 match(Set dst (ModF dst src)); | 11001 match(Set dst (ModF dst src)); |
11798 effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS | 11002 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS |
11799 | 11003 |
11800 format %{ "FMOD $dst,$src" %} | 11004 format %{ "FMOD $dst,$src" %} |
11801 ins_encode(Push_Reg_Mod_D(dst, src), | 11005 ins_encode(Push_Reg_Mod_DPR(dst, src), |
11802 emitModD(), | 11006 emitModDPR(), |
11803 Push_Result_Mod_D(src), | 11007 Push_Result_Mod_DPR(src), |
11804 Pop_Reg_F(dst)); | 11008 Pop_Reg_FPR(dst)); |
11805 ins_pipe( pipe_slow ); | 11009 ins_pipe( pipe_slow ); |
11806 %} | 11010 %} |
11807 | 11011 |
11808 instruct modX_reg(regX dst, regX src0, regX src1, eAXRegI rax, eFlagsReg cr) %{ | 11012 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{ |
11809 predicate(UseSSE>=1); | 11013 predicate(UseSSE>=1); |
11810 match(Set dst (ModF src0 src1)); | 11014 match(Set dst (ModF src0 src1)); |
11811 effect(KILL rax, KILL cr); | 11015 effect(KILL rax, KILL cr); |
11812 format %{ "SUB ESP,4\t # FMOD\n" | 11016 format %{ "SUB ESP,4\t # FMOD\n" |
11813 "\tMOVSS [ESP+0],$src1\n" | 11017 "\tMOVSS [ESP+0],$src1\n" |
11823 "\tMOVSS $dst,[ESP+0]\n" | 11027 "\tMOVSS $dst,[ESP+0]\n" |
11824 "\tADD ESP,4\n" | 11028 "\tADD ESP,4\n" |
11825 "\tFSTP ST0\t # Restore FPU Stack" | 11029 "\tFSTP ST0\t # Restore FPU Stack" |
11826 %} | 11030 %} |
11827 ins_cost(250); | 11031 ins_cost(250); |
11828 ins_encode( Push_ModX_encoding(src0, src1), emitModD(), Push_ResultX(dst,0x4), PopFPU); | 11032 ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU); |
11829 ins_pipe( pipe_slow ); | 11033 ins_pipe( pipe_slow ); |
11830 %} | 11034 %} |
11831 | 11035 |
11832 | 11036 |
11833 //----------Arithmetic Conversion Instructions--------------------------------- | 11037 //----------Arithmetic Conversion Instructions--------------------------------- |
11834 // The conversions operations are all Alpha sorted. Please keep it that way! | 11038 // The conversions operations are all Alpha sorted. Please keep it that way! |
11835 | 11039 |
11836 instruct roundFloat_mem_reg(stackSlotF dst, regF src) %{ | 11040 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{ |
11837 predicate(UseSSE==0); | 11041 predicate(UseSSE==0); |
11838 match(Set dst (RoundFloat src)); | 11042 match(Set dst (RoundFloat src)); |
11839 ins_cost(125); | 11043 ins_cost(125); |
11840 format %{ "FST_S $dst,$src\t# F-round" %} | 11044 format %{ "FST_S $dst,$src\t# F-round" %} |
11841 ins_encode( Pop_Mem_Reg_F(dst, src) ); | 11045 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); |
11842 ins_pipe( fpu_mem_reg ); | 11046 ins_pipe( fpu_mem_reg ); |
11843 %} | 11047 %} |
11844 | 11048 |
11845 instruct roundDouble_mem_reg(stackSlotD dst, regD src) %{ | 11049 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{ |
11846 predicate(UseSSE<=1); | 11050 predicate(UseSSE<=1); |
11847 match(Set dst (RoundDouble src)); | 11051 match(Set dst (RoundDouble src)); |
11848 ins_cost(125); | 11052 ins_cost(125); |
11849 format %{ "FST_D $dst,$src\t# D-round" %} | 11053 format %{ "FST_D $dst,$src\t# D-round" %} |
11850 ins_encode( Pop_Mem_Reg_D(dst, src) ); | 11054 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); |
11851 ins_pipe( fpu_mem_reg ); | 11055 ins_pipe( fpu_mem_reg ); |
11852 %} | 11056 %} |
11853 | 11057 |
11854 // Force rounding to 24-bit precision and 6-bit exponent | 11058 // Force rounding to 24-bit precision and 6-bit exponent |
11855 instruct convD2F_reg(stackSlotF dst, regD src) %{ | 11059 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{ |
11856 predicate(UseSSE==0); | 11060 predicate(UseSSE==0); |
11857 match(Set dst (ConvD2F src)); | 11061 match(Set dst (ConvD2F src)); |
11858 format %{ "FST_S $dst,$src\t# F-round" %} | 11062 format %{ "FST_S $dst,$src\t# F-round" %} |
11859 expand %{ | 11063 expand %{ |
11860 roundFloat_mem_reg(dst,src); | 11064 roundFloat_mem_reg(dst,src); |
11861 %} | 11065 %} |
11862 %} | 11066 %} |
11863 | 11067 |
11864 // Force rounding to 24-bit precision and 6-bit exponent | 11068 // Force rounding to 24-bit precision and 6-bit exponent |
11865 instruct convD2X_reg(regX dst, regD src, eFlagsReg cr) %{ | 11069 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{ |
11866 predicate(UseSSE==1); | 11070 predicate(UseSSE==1); |
11867 match(Set dst (ConvD2F src)); | 11071 match(Set dst (ConvD2F src)); |
11868 effect( KILL cr ); | 11072 effect( KILL cr ); |
11869 format %{ "SUB ESP,4\n\t" | 11073 format %{ "SUB ESP,4\n\t" |
11870 "FST_S [ESP],$src\t# F-round\n\t" | 11074 "FST_S [ESP],$src\t# F-round\n\t" |
11871 "MOVSS $dst,[ESP]\n\t" | 11075 "MOVSS $dst,[ESP]\n\t" |
11872 "ADD ESP,4" %} | 11076 "ADD ESP,4" %} |
11873 ins_encode( D2X_encoding(dst, src) ); | 11077 ins_encode %{ |
11078 __ subptr(rsp, 4); | |
11079 if ($src$$reg != FPR1L_enc) { | |
11080 __ fld_s($src$$reg-1); | |
11081 __ fstp_s(Address(rsp, 0)); | |
11082 } else { | |
11083 __ fst_s(Address(rsp, 0)); | |
11084 } | |
11085 __ movflt($dst$$XMMRegister, Address(rsp, 0)); | |
11086 __ addptr(rsp, 4); | |
11087 %} | |
11874 ins_pipe( pipe_slow ); | 11088 ins_pipe( pipe_slow ); |
11875 %} | 11089 %} |
11876 | 11090 |
11877 // Force rounding double precision to single precision | 11091 // Force rounding double precision to single precision |
11878 instruct convXD2X_reg(regX dst, regXD src) %{ | 11092 instruct convD2F_reg(regF dst, regD src) %{ |
11879 predicate(UseSSE>=2); | 11093 predicate(UseSSE>=2); |
11880 match(Set dst (ConvD2F src)); | 11094 match(Set dst (ConvD2F src)); |
11881 format %{ "CVTSD2SS $dst,$src\t# F-round" %} | 11095 format %{ "CVTSD2SS $dst,$src\t# F-round" %} |
11882 opcode(0xF2, 0x0F, 0x5A); | 11096 ins_encode %{ |
11883 ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src)); | 11097 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister); |
11098 %} | |
11884 ins_pipe( pipe_slow ); | 11099 ins_pipe( pipe_slow ); |
11885 %} | 11100 %} |
11886 | 11101 |
11887 instruct convF2D_reg_reg(regD dst, regF src) %{ | 11102 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{ |
11888 predicate(UseSSE==0); | 11103 predicate(UseSSE==0); |
11889 match(Set dst (ConvF2D src)); | 11104 match(Set dst (ConvF2D src)); |
11890 format %{ "FST_S $dst,$src\t# D-round" %} | 11105 format %{ "FST_S $dst,$src\t# D-round" %} |
11891 ins_encode( Pop_Reg_Reg_D(dst, src)); | 11106 ins_encode( Pop_Reg_Reg_DPR(dst, src)); |
11892 ins_pipe( fpu_reg_reg ); | 11107 ins_pipe( fpu_reg_reg ); |
11893 %} | 11108 %} |
11894 | 11109 |
11895 instruct convF2D_reg(stackSlotD dst, regF src) %{ | 11110 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{ |
11896 predicate(UseSSE==1); | 11111 predicate(UseSSE==1); |
11897 match(Set dst (ConvF2D src)); | 11112 match(Set dst (ConvF2D src)); |
11898 format %{ "FST_D $dst,$src\t# D-round" %} | 11113 format %{ "FST_D $dst,$src\t# D-round" %} |
11899 expand %{ | 11114 expand %{ |
11900 roundDouble_mem_reg(dst,src); | 11115 roundDouble_mem_reg(dst,src); |
11901 %} | 11116 %} |
11902 %} | 11117 %} |
11903 | 11118 |
11904 instruct convX2D_reg(regD dst, regX src, eFlagsReg cr) %{ | 11119 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{ |
11905 predicate(UseSSE==1); | 11120 predicate(UseSSE==1); |
11906 match(Set dst (ConvF2D src)); | 11121 match(Set dst (ConvF2D src)); |
11907 effect( KILL cr ); | 11122 effect( KILL cr ); |
11908 format %{ "SUB ESP,4\n\t" | 11123 format %{ "SUB ESP,4\n\t" |
11909 "MOVSS [ESP] $src\n\t" | 11124 "MOVSS [ESP] $src\n\t" |
11910 "FLD_S [ESP]\n\t" | 11125 "FLD_S [ESP]\n\t" |
11911 "ADD ESP,4\n\t" | 11126 "ADD ESP,4\n\t" |
11912 "FSTP $dst\t# D-round" %} | 11127 "FSTP $dst\t# D-round" %} |
11913 ins_encode( X2D_encoding(dst, src), Pop_Reg_D(dst)); | 11128 ins_encode %{ |
11129 __ subptr(rsp, 4); | |
11130 __ movflt(Address(rsp, 0), $src$$XMMRegister); | |
11131 __ fld_s(Address(rsp, 0)); | |
11132 __ addptr(rsp, 4); | |
11133 __ fstp_d($dst$$reg); | |
11134 %} | |
11914 ins_pipe( pipe_slow ); | 11135 ins_pipe( pipe_slow ); |
11915 %} | 11136 %} |
11916 | 11137 |
11917 instruct convX2XD_reg(regXD dst, regX src) %{ | 11138 instruct convF2D_reg(regD dst, regF src) %{ |
11918 predicate(UseSSE>=2); | 11139 predicate(UseSSE>=2); |
11919 match(Set dst (ConvF2D src)); | 11140 match(Set dst (ConvF2D src)); |
11920 format %{ "CVTSS2SD $dst,$src\t# D-round" %} | 11141 format %{ "CVTSS2SD $dst,$src\t# D-round" %} |
11921 opcode(0xF3, 0x0F, 0x5A); | 11142 ins_encode %{ |
11922 ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src)); | 11143 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister); |
11144 %} | |
11923 ins_pipe( pipe_slow ); | 11145 ins_pipe( pipe_slow ); |
11924 %} | 11146 %} |
11925 | 11147 |
11926 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. | 11148 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. |
11927 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{ | 11149 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{ |
11928 predicate(UseSSE<=1); | 11150 predicate(UseSSE<=1); |
11929 match(Set dst (ConvD2I src)); | 11151 match(Set dst (ConvD2I src)); |
11930 effect( KILL tmp, KILL cr ); | 11152 effect( KILL tmp, KILL cr ); |
11931 format %{ "FLD $src\t# Convert double to int \n\t" | 11153 format %{ "FLD $src\t# Convert double to int \n\t" |
11932 "FLDCW trunc mode\n\t" | 11154 "FLDCW trunc mode\n\t" |
11937 "CMP EAX,0x80000000\n\t" | 11159 "CMP EAX,0x80000000\n\t" |
11938 "JNE,s fast\n\t" | 11160 "JNE,s fast\n\t" |
11939 "FLD_D $src\n\t" | 11161 "FLD_D $src\n\t" |
11940 "CALL d2i_wrapper\n" | 11162 "CALL d2i_wrapper\n" |
11941 "fast:" %} | 11163 "fast:" %} |
11942 ins_encode( Push_Reg_D(src), D2I_encoding(src) ); | 11164 ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) ); |
11943 ins_pipe( pipe_slow ); | 11165 ins_pipe( pipe_slow ); |
11944 %} | 11166 %} |
11945 | 11167 |
11946 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. | 11168 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. |
11947 instruct convXD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regXD src, eFlagsReg cr ) %{ | 11169 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{ |
11948 predicate(UseSSE>=2); | 11170 predicate(UseSSE>=2); |
11949 match(Set dst (ConvD2I src)); | 11171 match(Set dst (ConvD2I src)); |
11950 effect( KILL tmp, KILL cr ); | 11172 effect( KILL tmp, KILL cr ); |
11951 format %{ "CVTTSD2SI $dst, $src\n\t" | 11173 format %{ "CVTTSD2SI $dst, $src\n\t" |
11952 "CMP $dst,0x80000000\n\t" | 11174 "CMP $dst,0x80000000\n\t" |
11955 "MOVSD [ESP], $src\n\t" | 11177 "MOVSD [ESP], $src\n\t" |
11956 "FLD_D [ESP]\n\t" | 11178 "FLD_D [ESP]\n\t" |
11957 "ADD ESP, 8\n\t" | 11179 "ADD ESP, 8\n\t" |
11958 "CALL d2i_wrapper\n" | 11180 "CALL d2i_wrapper\n" |
11959 "fast:" %} | 11181 "fast:" %} |
11960 opcode(0x1); // double-precision conversion | 11182 ins_encode %{ |
11961 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x2C), FX2I_encoding(src,dst)); | 11183 Label fast; |
11184 __ cvttsd2sil($dst$$Register, $src$$XMMRegister); | |
11185 __ cmpl($dst$$Register, 0x80000000); | |
11186 __ jccb(Assembler::notEqual, fast); | |
11187 __ subptr(rsp, 8); | |
11188 __ movdbl(Address(rsp, 0), $src$$XMMRegister); | |
11189 __ fld_d(Address(rsp, 0)); | |
11190 __ addptr(rsp, 8); | |
11191 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper()))); | |
11192 __ bind(fast); | |
11193 %} | |
11962 ins_pipe( pipe_slow ); | 11194 ins_pipe( pipe_slow ); |
11963 %} | 11195 %} |
11964 | 11196 |
11965 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{ | 11197 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{ |
11966 predicate(UseSSE<=1); | 11198 predicate(UseSSE<=1); |
11967 match(Set dst (ConvD2L src)); | 11199 match(Set dst (ConvD2L src)); |
11968 effect( KILL cr ); | 11200 effect( KILL cr ); |
11969 format %{ "FLD $src\t# Convert double to long\n\t" | 11201 format %{ "FLD $src\t# Convert double to long\n\t" |
11970 "FLDCW trunc mode\n\t" | 11202 "FLDCW trunc mode\n\t" |
11978 "TEST EAX,EAX\n\t" | 11210 "TEST EAX,EAX\n\t" |
11979 "JNE,s fast\n\t" | 11211 "JNE,s fast\n\t" |
11980 "FLD $src\n\t" | 11212 "FLD $src\n\t" |
11981 "CALL d2l_wrapper\n" | 11213 "CALL d2l_wrapper\n" |
11982 "fast:" %} | 11214 "fast:" %} |
11983 ins_encode( Push_Reg_D(src), D2L_encoding(src) ); | 11215 ins_encode( Push_Reg_DPR(src), DPR2L_encoding(src) ); |
11984 ins_pipe( pipe_slow ); | 11216 ins_pipe( pipe_slow ); |
11985 %} | 11217 %} |
11986 | 11218 |
11987 // XMM lacks a float/double->long conversion, so use the old FPU stack. | 11219 // XMM lacks a float/double->long conversion, so use the old FPU stack. |
11988 instruct convXD2L_reg_reg( eADXRegL dst, regXD src, eFlagsReg cr ) %{ | 11220 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{ |
11989 predicate (UseSSE>=2); | 11221 predicate (UseSSE>=2); |
11990 match(Set dst (ConvD2L src)); | 11222 match(Set dst (ConvD2L src)); |
11991 effect( KILL cr ); | 11223 effect( KILL cr ); |
11992 format %{ "SUB ESP,8\t# Convert double to long\n\t" | 11224 format %{ "SUB ESP,8\t# Convert double to long\n\t" |
11993 "MOVSD [ESP],$src\n\t" | 11225 "MOVSD [ESP],$src\n\t" |
12002 "TEST EAX,EAX\n\t" | 11234 "TEST EAX,EAX\n\t" |
12003 "JNE,s fast\n\t" | 11235 "JNE,s fast\n\t" |
12004 "SUB ESP,8\n\t" | 11236 "SUB ESP,8\n\t" |
12005 "MOVSD [ESP],$src\n\t" | 11237 "MOVSD [ESP],$src\n\t" |
12006 "FLD_D [ESP]\n\t" | 11238 "FLD_D [ESP]\n\t" |
11239 "ADD ESP,8\n\t" | |
12007 "CALL d2l_wrapper\n" | 11240 "CALL d2l_wrapper\n" |
12008 "fast:" %} | 11241 "fast:" %} |
12009 ins_encode( XD2L_encoding(src) ); | 11242 ins_encode %{ |
11243 Label fast; | |
11244 __ subptr(rsp, 8); | |
11245 __ movdbl(Address(rsp, 0), $src$$XMMRegister); | |
11246 __ fld_d(Address(rsp, 0)); | |
11247 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc())); | |
11248 __ fistp_d(Address(rsp, 0)); | |
11249 // Restore the rounding mode, mask the exception | |
11250 if (Compile::current()->in_24_bit_fp_mode()) { | |
11251 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); | |
11252 } else { | |
11253 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); | |
11254 } | |
11255 // Load the converted long, adjust CPU stack | |
11256 __ pop(rax); | |
11257 __ pop(rdx); | |
11258 __ cmpl(rdx, 0x80000000); | |
11259 __ jccb(Assembler::notEqual, fast); | |
11260 __ testl(rax, rax); | |
11261 __ jccb(Assembler::notEqual, fast); | |
11262 __ subptr(rsp, 8); | |
11263 __ movdbl(Address(rsp, 0), $src$$XMMRegister); | |
11264 __ fld_d(Address(rsp, 0)); | |
11265 __ addptr(rsp, 8); | |
11266 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper()))); | |
11267 __ bind(fast); | |
11268 %} | |
12010 ins_pipe( pipe_slow ); | 11269 ins_pipe( pipe_slow ); |
12011 %} | 11270 %} |
12012 | 11271 |
12013 // Convert a double to an int. Java semantics require we do complex | 11272 // Convert a double to an int. Java semantics require we do complex |
12014 // manglations in the corner cases. So we set the rounding mode to | 11273 // manglations in the corner cases. So we set the rounding mode to |
12015 // 'zero', store the darned double down as an int, and reset the | 11274 // 'zero', store the darned double down as an int, and reset the |
12016 // rounding mode to 'nearest'. The hardware stores a flag value down | 11275 // rounding mode to 'nearest'. The hardware stores a flag value down |
12017 // if we would overflow or converted a NAN; we check for this and | 11276 // if we would overflow or converted a NAN; we check for this and |
12018 // and go the slow path if needed. | 11277 // and go the slow path if needed. |
12019 instruct convF2I_reg_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{ | 11278 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{ |
12020 predicate(UseSSE==0); | 11279 predicate(UseSSE==0); |
12021 match(Set dst (ConvF2I src)); | 11280 match(Set dst (ConvF2I src)); |
12022 effect( KILL tmp, KILL cr ); | 11281 effect( KILL tmp, KILL cr ); |
12023 format %{ "FLD $src\t# Convert float to int \n\t" | 11282 format %{ "FLD $src\t# Convert float to int \n\t" |
12024 "FLDCW trunc mode\n\t" | 11283 "FLDCW trunc mode\n\t" |
12029 "CMP EAX,0x80000000\n\t" | 11288 "CMP EAX,0x80000000\n\t" |
12030 "JNE,s fast\n\t" | 11289 "JNE,s fast\n\t" |
12031 "FLD $src\n\t" | 11290 "FLD $src\n\t" |
12032 "CALL d2i_wrapper\n" | 11291 "CALL d2i_wrapper\n" |
12033 "fast:" %} | 11292 "fast:" %} |
12034 // D2I_encoding works for F2I | 11293 // DPR2I_encoding works for FPR2I |
12035 ins_encode( Push_Reg_F(src), D2I_encoding(src) ); | 11294 ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) ); |
12036 ins_pipe( pipe_slow ); | 11295 ins_pipe( pipe_slow ); |
12037 %} | 11296 %} |
12038 | 11297 |
12039 // Convert a float in xmm to an int reg. | 11298 // Convert a float in xmm to an int reg. |
12040 instruct convX2I_reg(eAXRegI dst, eDXRegI tmp, regX src, eFlagsReg cr ) %{ | 11299 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{ |
12041 predicate(UseSSE>=1); | 11300 predicate(UseSSE>=1); |
12042 match(Set dst (ConvF2I src)); | 11301 match(Set dst (ConvF2I src)); |
12043 effect( KILL tmp, KILL cr ); | 11302 effect( KILL tmp, KILL cr ); |
12044 format %{ "CVTTSS2SI $dst, $src\n\t" | 11303 format %{ "CVTTSS2SI $dst, $src\n\t" |
12045 "CMP $dst,0x80000000\n\t" | 11304 "CMP $dst,0x80000000\n\t" |
12048 "MOVSS [ESP], $src\n\t" | 11307 "MOVSS [ESP], $src\n\t" |
12049 "FLD [ESP]\n\t" | 11308 "FLD [ESP]\n\t" |
12050 "ADD ESP, 4\n\t" | 11309 "ADD ESP, 4\n\t" |
12051 "CALL d2i_wrapper\n" | 11310 "CALL d2i_wrapper\n" |
12052 "fast:" %} | 11311 "fast:" %} |
12053 opcode(0x0); // single-precision conversion | 11312 ins_encode %{ |
12054 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x2C), FX2I_encoding(src,dst)); | 11313 Label fast; |
11314 __ cvttss2sil($dst$$Register, $src$$XMMRegister); | |
11315 __ cmpl($dst$$Register, 0x80000000); | |
11316 __ jccb(Assembler::notEqual, fast); | |
11317 __ subptr(rsp, 4); | |
11318 __ movflt(Address(rsp, 0), $src$$XMMRegister); | |
11319 __ fld_s(Address(rsp, 0)); | |
11320 __ addptr(rsp, 4); | |
11321 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper()))); | |
11322 __ bind(fast); | |
11323 %} | |
12055 ins_pipe( pipe_slow ); | 11324 ins_pipe( pipe_slow ); |
12056 %} | 11325 %} |
12057 | 11326 |
12058 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{ | 11327 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{ |
12059 predicate(UseSSE==0); | 11328 predicate(UseSSE==0); |
12060 match(Set dst (ConvF2L src)); | 11329 match(Set dst (ConvF2L src)); |
12061 effect( KILL cr ); | 11330 effect( KILL cr ); |
12062 format %{ "FLD $src\t# Convert float to long\n\t" | 11331 format %{ "FLD $src\t# Convert float to long\n\t" |
12063 "FLDCW trunc mode\n\t" | 11332 "FLDCW trunc mode\n\t" |
12071 "TEST EAX,EAX\n\t" | 11340 "TEST EAX,EAX\n\t" |
12072 "JNE,s fast\n\t" | 11341 "JNE,s fast\n\t" |
12073 "FLD $src\n\t" | 11342 "FLD $src\n\t" |
12074 "CALL d2l_wrapper\n" | 11343 "CALL d2l_wrapper\n" |
12075 "fast:" %} | 11344 "fast:" %} |
12076 // D2L_encoding works for F2L | 11345 // DPR2L_encoding works for FPR2L |
12077 ins_encode( Push_Reg_F(src), D2L_encoding(src) ); | 11346 ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) ); |
12078 ins_pipe( pipe_slow ); | 11347 ins_pipe( pipe_slow ); |
12079 %} | 11348 %} |
12080 | 11349 |
12081 // XMM lacks a float/double->long conversion, so use the old FPU stack. | 11350 // XMM lacks a float/double->long conversion, so use the old FPU stack. |
12082 instruct convX2L_reg_reg( eADXRegL dst, regX src, eFlagsReg cr ) %{ | 11351 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{ |
12083 predicate (UseSSE>=1); | 11352 predicate (UseSSE>=1); |
12084 match(Set dst (ConvF2L src)); | 11353 match(Set dst (ConvF2L src)); |
12085 effect( KILL cr ); | 11354 effect( KILL cr ); |
12086 format %{ "SUB ESP,8\t# Convert float to long\n\t" | 11355 format %{ "SUB ESP,8\t# Convert float to long\n\t" |
12087 "MOVSS [ESP],$src\n\t" | 11356 "MOVSS [ESP],$src\n\t" |
12099 "MOVSS [ESP],$src\n\t" | 11368 "MOVSS [ESP],$src\n\t" |
12100 "FLD_S [ESP]\n\t" | 11369 "FLD_S [ESP]\n\t" |
12101 "ADD ESP,4\n\t" | 11370 "ADD ESP,4\n\t" |
12102 "CALL d2l_wrapper\n" | 11371 "CALL d2l_wrapper\n" |
12103 "fast:" %} | 11372 "fast:" %} |
12104 ins_encode( X2L_encoding(src) ); | 11373 ins_encode %{ |
11374 Label fast; | |
11375 __ subptr(rsp, 8); | |
11376 __ movflt(Address(rsp, 0), $src$$XMMRegister); | |
11377 __ fld_s(Address(rsp, 0)); | |
11378 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc())); | |
11379 __ fistp_d(Address(rsp, 0)); | |
11380 // Restore the rounding mode, mask the exception | |
11381 if (Compile::current()->in_24_bit_fp_mode()) { | |
11382 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); | |
11383 } else { | |
11384 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); | |
11385 } | |
11386 // Load the converted long, adjust CPU stack | |
11387 __ pop(rax); | |
11388 __ pop(rdx); | |
11389 __ cmpl(rdx, 0x80000000); | |
11390 __ jccb(Assembler::notEqual, fast); | |
11391 __ testl(rax, rax); | |
11392 __ jccb(Assembler::notEqual, fast); | |
11393 __ subptr(rsp, 4); | |
11394 __ movflt(Address(rsp, 0), $src$$XMMRegister); | |
11395 __ fld_s(Address(rsp, 0)); | |
11396 __ addptr(rsp, 4); | |
11397 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper()))); | |
11398 __ bind(fast); | |
11399 %} | |
12105 ins_pipe( pipe_slow ); | 11400 ins_pipe( pipe_slow ); |
12106 %} | 11401 %} |
12107 | 11402 |
12108 instruct convI2D_reg(regD dst, stackSlotI src) %{ | 11403 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{ |
12109 predicate( UseSSE<=1 ); | 11404 predicate( UseSSE<=1 ); |
12110 match(Set dst (ConvI2D src)); | 11405 match(Set dst (ConvI2D src)); |
12111 format %{ "FILD $src\n\t" | 11406 format %{ "FILD $src\n\t" |
12112 "FSTP $dst" %} | 11407 "FSTP $dst" %} |
12113 opcode(0xDB, 0x0); /* DB /0 */ | 11408 opcode(0xDB, 0x0); /* DB /0 */ |
12114 ins_encode(Push_Mem_I(src), Pop_Reg_D(dst)); | 11409 ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst)); |
12115 ins_pipe( fpu_reg_mem ); | 11410 ins_pipe( fpu_reg_mem ); |
12116 %} | 11411 %} |
12117 | 11412 |
12118 instruct convI2XD_reg(regXD dst, eRegI src) %{ | 11413 instruct convI2D_reg(regD dst, eRegI src) %{ |
12119 predicate( UseSSE>=2 && !UseXmmI2D ); | 11414 predicate( UseSSE>=2 && !UseXmmI2D ); |
12120 match(Set dst (ConvI2D src)); | 11415 match(Set dst (ConvI2D src)); |
12121 format %{ "CVTSI2SD $dst,$src" %} | 11416 format %{ "CVTSI2SD $dst,$src" %} |
12122 opcode(0xF2, 0x0F, 0x2A); | 11417 ins_encode %{ |
12123 ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src)); | 11418 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register); |
11419 %} | |
12124 ins_pipe( pipe_slow ); | 11420 ins_pipe( pipe_slow ); |
12125 %} | 11421 %} |
12126 | 11422 |
12127 instruct convI2XD_mem(regXD dst, memory mem) %{ | 11423 instruct convI2D_mem(regD dst, memory mem) %{ |
12128 predicate( UseSSE>=2 ); | 11424 predicate( UseSSE>=2 ); |
12129 match(Set dst (ConvI2D (LoadI mem))); | 11425 match(Set dst (ConvI2D (LoadI mem))); |
12130 format %{ "CVTSI2SD $dst,$mem" %} | 11426 format %{ "CVTSI2SD $dst,$mem" %} |
12131 opcode(0xF2, 0x0F, 0x2A); | 11427 ins_encode %{ |
12132 ins_encode( OpcP, OpcS, Opcode(tertiary), RegMem(dst, mem)); | 11428 __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address); |
11429 %} | |
12133 ins_pipe( pipe_slow ); | 11430 ins_pipe( pipe_slow ); |
12134 %} | 11431 %} |
12135 | 11432 |
12136 instruct convXI2XD_reg(regXD dst, eRegI src) | 11433 instruct convXI2D_reg(regD dst, eRegI src) |
12137 %{ | 11434 %{ |
12138 predicate( UseSSE>=2 && UseXmmI2D ); | 11435 predicate( UseSSE>=2 && UseXmmI2D ); |
12139 match(Set dst (ConvI2D src)); | 11436 match(Set dst (ConvI2D src)); |
12140 | 11437 |
12141 format %{ "MOVD $dst,$src\n\t" | 11438 format %{ "MOVD $dst,$src\n\t" |
12145 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister); | 11442 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister); |
12146 %} | 11443 %} |
12147 ins_pipe(pipe_slow); // XXX | 11444 ins_pipe(pipe_slow); // XXX |
12148 %} | 11445 %} |
12149 | 11446 |
12150 instruct convI2D_mem(regD dst, memory mem) %{ | 11447 instruct convI2DPR_mem(regDPR dst, memory mem) %{ |
12151 predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr()); | 11448 predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr()); |
12152 match(Set dst (ConvI2D (LoadI mem))); | 11449 match(Set dst (ConvI2D (LoadI mem))); |
12153 format %{ "FILD $mem\n\t" | 11450 format %{ "FILD $mem\n\t" |
12154 "FSTP $dst" %} | 11451 "FSTP $dst" %} |
12155 opcode(0xDB); /* DB /0 */ | 11452 opcode(0xDB); /* DB /0 */ |
12156 ins_encode( OpcP, RMopc_Mem(0x00,mem), | 11453 ins_encode( OpcP, RMopc_Mem(0x00,mem), |
12157 Pop_Reg_D(dst)); | 11454 Pop_Reg_DPR(dst)); |
12158 ins_pipe( fpu_reg_mem ); | 11455 ins_pipe( fpu_reg_mem ); |
12159 %} | 11456 %} |
12160 | 11457 |
12161 // Convert a byte to a float; no rounding step needed. | 11458 // Convert a byte to a float; no rounding step needed. |
12162 instruct conv24I2F_reg(regF dst, stackSlotI src) %{ | 11459 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{ |
12163 predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 ); | 11460 predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 ); |
12164 match(Set dst (ConvI2F src)); | 11461 match(Set dst (ConvI2F src)); |
12165 format %{ "FILD $src\n\t" | 11462 format %{ "FILD $src\n\t" |
12166 "FSTP $dst" %} | 11463 "FSTP $dst" %} |
12167 | 11464 |
12168 opcode(0xDB, 0x0); /* DB /0 */ | 11465 opcode(0xDB, 0x0); /* DB /0 */ |
12169 ins_encode(Push_Mem_I(src), Pop_Reg_F(dst)); | 11466 ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst)); |
12170 ins_pipe( fpu_reg_mem ); | 11467 ins_pipe( fpu_reg_mem ); |
12171 %} | 11468 %} |
12172 | 11469 |
12173 // In 24-bit mode, force exponent rounding by storing back out | 11470 // In 24-bit mode, force exponent rounding by storing back out |
12174 instruct convI2F_SSF(stackSlotF dst, stackSlotI src) %{ | 11471 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{ |
12175 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); | 11472 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); |
12176 match(Set dst (ConvI2F src)); | 11473 match(Set dst (ConvI2F src)); |
12177 ins_cost(200); | 11474 ins_cost(200); |
12178 format %{ "FILD $src\n\t" | 11475 format %{ "FILD $src\n\t" |
12179 "FSTP_S $dst" %} | 11476 "FSTP_S $dst" %} |
12180 opcode(0xDB, 0x0); /* DB /0 */ | 11477 opcode(0xDB, 0x0); /* DB /0 */ |
12181 ins_encode( Push_Mem_I(src), | 11478 ins_encode( Push_Mem_I(src), |
12182 Pop_Mem_F(dst)); | 11479 Pop_Mem_FPR(dst)); |
12183 ins_pipe( fpu_mem_mem ); | 11480 ins_pipe( fpu_mem_mem ); |
12184 %} | 11481 %} |
12185 | 11482 |
12186 // In 24-bit mode, force exponent rounding by storing back out | 11483 // In 24-bit mode, force exponent rounding by storing back out |
12187 instruct convI2F_SSF_mem(stackSlotF dst, memory mem) %{ | 11484 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{ |
12188 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); | 11485 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); |
12189 match(Set dst (ConvI2F (LoadI mem))); | 11486 match(Set dst (ConvI2F (LoadI mem))); |
12190 ins_cost(200); | 11487 ins_cost(200); |
12191 format %{ "FILD $mem\n\t" | 11488 format %{ "FILD $mem\n\t" |
12192 "FSTP_S $dst" %} | 11489 "FSTP_S $dst" %} |
12193 opcode(0xDB); /* DB /0 */ | 11490 opcode(0xDB); /* DB /0 */ |
12194 ins_encode( OpcP, RMopc_Mem(0x00,mem), | 11491 ins_encode( OpcP, RMopc_Mem(0x00,mem), |
12195 Pop_Mem_F(dst)); | 11492 Pop_Mem_FPR(dst)); |
12196 ins_pipe( fpu_mem_mem ); | 11493 ins_pipe( fpu_mem_mem ); |
12197 %} | 11494 %} |
12198 | 11495 |
12199 // This instruction does not round to 24-bits | 11496 // This instruction does not round to 24-bits |
12200 instruct convI2F_reg(regF dst, stackSlotI src) %{ | 11497 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{ |
12201 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); | 11498 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); |
12202 match(Set dst (ConvI2F src)); | 11499 match(Set dst (ConvI2F src)); |
12203 format %{ "FILD $src\n\t" | 11500 format %{ "FILD $src\n\t" |
12204 "FSTP $dst" %} | 11501 "FSTP $dst" %} |
12205 opcode(0xDB, 0x0); /* DB /0 */ | 11502 opcode(0xDB, 0x0); /* DB /0 */ |
12206 ins_encode( Push_Mem_I(src), | 11503 ins_encode( Push_Mem_I(src), |
12207 Pop_Reg_F(dst)); | 11504 Pop_Reg_FPR(dst)); |
12208 ins_pipe( fpu_reg_mem ); | 11505 ins_pipe( fpu_reg_mem ); |
12209 %} | 11506 %} |
12210 | 11507 |
12211 // This instruction does not round to 24-bits | 11508 // This instruction does not round to 24-bits |
12212 instruct convI2F_mem(regF dst, memory mem) %{ | 11509 instruct convI2FPR_mem(regFPR dst, memory mem) %{ |
12213 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); | 11510 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); |
12214 match(Set dst (ConvI2F (LoadI mem))); | 11511 match(Set dst (ConvI2F (LoadI mem))); |
12215 format %{ "FILD $mem\n\t" | 11512 format %{ "FILD $mem\n\t" |
12216 "FSTP $dst" %} | 11513 "FSTP $dst" %} |
12217 opcode(0xDB); /* DB /0 */ | 11514 opcode(0xDB); /* DB /0 */ |
12218 ins_encode( OpcP, RMopc_Mem(0x00,mem), | 11515 ins_encode( OpcP, RMopc_Mem(0x00,mem), |
12219 Pop_Reg_F(dst)); | 11516 Pop_Reg_FPR(dst)); |
12220 ins_pipe( fpu_reg_mem ); | 11517 ins_pipe( fpu_reg_mem ); |
12221 %} | 11518 %} |
12222 | 11519 |
12223 // Convert an int to a float in xmm; no rounding step needed. | 11520 // Convert an int to a float in xmm; no rounding step needed. |
12224 instruct convI2X_reg(regX dst, eRegI src) %{ | 11521 instruct convI2F_reg(regF dst, eRegI src) %{ |
12225 predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F ); | 11522 predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F ); |
12226 match(Set dst (ConvI2F src)); | 11523 match(Set dst (ConvI2F src)); |
12227 format %{ "CVTSI2SS $dst, $src" %} | 11524 format %{ "CVTSI2SS $dst, $src" %} |
12228 | 11525 ins_encode %{ |
12229 opcode(0xF3, 0x0F, 0x2A); /* F3 0F 2A /r */ | 11526 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register); |
12230 ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src)); | 11527 %} |
12231 ins_pipe( pipe_slow ); | 11528 ins_pipe( pipe_slow ); |
12232 %} | 11529 %} |
12233 | 11530 |
12234 instruct convXI2X_reg(regX dst, eRegI src) | 11531 instruct convXI2F_reg(regF dst, eRegI src) |
12235 %{ | 11532 %{ |
12236 predicate( UseSSE>=2 && UseXmmI2F ); | 11533 predicate( UseSSE>=2 && UseXmmI2F ); |
12237 match(Set dst (ConvI2F src)); | 11534 match(Set dst (ConvI2F src)); |
12238 | 11535 |
12239 format %{ "MOVD $dst,$src\n\t" | 11536 format %{ "MOVD $dst,$src\n\t" |
12278 opcode(0x33); // XOR | 11575 opcode(0x33); // XOR |
12279 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); | 11576 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); |
12280 ins_pipe( ialu_reg_reg_long ); | 11577 ins_pipe( ialu_reg_reg_long ); |
12281 %} | 11578 %} |
12282 | 11579 |
12283 instruct convL2D_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{ | 11580 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{ |
12284 predicate (UseSSE<=1); | 11581 predicate (UseSSE<=1); |
12285 match(Set dst (ConvL2D src)); | 11582 match(Set dst (ConvL2D src)); |
12286 effect( KILL cr ); | 11583 effect( KILL cr ); |
12287 format %{ "PUSH $src.hi\t# Convert long to double\n\t" | 11584 format %{ "PUSH $src.hi\t# Convert long to double\n\t" |
12288 "PUSH $src.lo\n\t" | 11585 "PUSH $src.lo\n\t" |
12289 "FILD ST,[ESP + #0]\n\t" | 11586 "FILD ST,[ESP + #0]\n\t" |
12290 "ADD ESP,8\n\t" | 11587 "ADD ESP,8\n\t" |
12291 "FSTP_D $dst\t# D-round" %} | 11588 "FSTP_D $dst\t# D-round" %} |
12292 opcode(0xDF, 0x5); /* DF /5 */ | 11589 opcode(0xDF, 0x5); /* DF /5 */ |
12293 ins_encode(convert_long_double(src), Pop_Mem_D(dst)); | 11590 ins_encode(convert_long_double(src), Pop_Mem_DPR(dst)); |
12294 ins_pipe( pipe_slow ); | 11591 ins_pipe( pipe_slow ); |
12295 %} | 11592 %} |
12296 | 11593 |
12297 instruct convL2XD_reg( regXD dst, eRegL src, eFlagsReg cr) %{ | 11594 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{ |
12298 predicate (UseSSE>=2); | 11595 predicate (UseSSE>=2); |
12299 match(Set dst (ConvL2D src)); | 11596 match(Set dst (ConvL2D src)); |
12300 effect( KILL cr ); | 11597 effect( KILL cr ); |
12301 format %{ "PUSH $src.hi\t# Convert long to double\n\t" | 11598 format %{ "PUSH $src.hi\t# Convert long to double\n\t" |
12302 "PUSH $src.lo\n\t" | 11599 "PUSH $src.lo\n\t" |
12303 "FILD_D [ESP]\n\t" | 11600 "FILD_D [ESP]\n\t" |
12304 "FSTP_D [ESP]\n\t" | 11601 "FSTP_D [ESP]\n\t" |
12305 "MOVSD $dst,[ESP]\n\t" | 11602 "MOVSD $dst,[ESP]\n\t" |
12306 "ADD ESP,8" %} | 11603 "ADD ESP,8" %} |
12307 opcode(0xDF, 0x5); /* DF /5 */ | 11604 opcode(0xDF, 0x5); /* DF /5 */ |
12308 ins_encode(convert_long_double2(src), Push_ResultXD(dst)); | 11605 ins_encode(convert_long_double2(src), Push_ResultD(dst)); |
12309 ins_pipe( pipe_slow ); | 11606 ins_pipe( pipe_slow ); |
12310 %} | 11607 %} |
12311 | 11608 |
12312 instruct convL2X_reg( regX dst, eRegL src, eFlagsReg cr) %{ | 11609 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{ |
12313 predicate (UseSSE>=1); | 11610 predicate (UseSSE>=1); |
12314 match(Set dst (ConvL2F src)); | 11611 match(Set dst (ConvL2F src)); |
12315 effect( KILL cr ); | 11612 effect( KILL cr ); |
12316 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" | 11613 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" |
12317 "PUSH $src.lo\n\t" | 11614 "PUSH $src.lo\n\t" |
12318 "FILD_D [ESP]\n\t" | 11615 "FILD_D [ESP]\n\t" |
12319 "FSTP_S [ESP]\n\t" | 11616 "FSTP_S [ESP]\n\t" |
12320 "MOVSS $dst,[ESP]\n\t" | 11617 "MOVSS $dst,[ESP]\n\t" |
12321 "ADD ESP,8" %} | 11618 "ADD ESP,8" %} |
12322 opcode(0xDF, 0x5); /* DF /5 */ | 11619 opcode(0xDF, 0x5); /* DF /5 */ |
12323 ins_encode(convert_long_double2(src), Push_ResultX(dst,0x8)); | 11620 ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8)); |
12324 ins_pipe( pipe_slow ); | 11621 ins_pipe( pipe_slow ); |
12325 %} | 11622 %} |
12326 | 11623 |
12327 instruct convL2F_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{ | 11624 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{ |
12328 match(Set dst (ConvL2F src)); | 11625 match(Set dst (ConvL2F src)); |
12329 effect( KILL cr ); | 11626 effect( KILL cr ); |
12330 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" | 11627 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" |
12331 "PUSH $src.lo\n\t" | 11628 "PUSH $src.lo\n\t" |
12332 "FILD ST,[ESP + #0]\n\t" | 11629 "FILD ST,[ESP + #0]\n\t" |
12333 "ADD ESP,8\n\t" | 11630 "ADD ESP,8\n\t" |
12334 "FSTP_S $dst\t# F-round" %} | 11631 "FSTP_S $dst\t# F-round" %} |
12335 opcode(0xDF, 0x5); /* DF /5 */ | 11632 opcode(0xDF, 0x5); /* DF /5 */ |
12336 ins_encode(convert_long_double(src), Pop_Mem_F(dst)); | 11633 ins_encode(convert_long_double(src), Pop_Mem_FPR(dst)); |
12337 ins_pipe( pipe_slow ); | 11634 ins_pipe( pipe_slow ); |
12338 %} | 11635 %} |
12339 | 11636 |
12340 instruct convL2I_reg( eRegI dst, eRegL src ) %{ | 11637 instruct convL2I_reg( eRegI dst, eRegL src ) %{ |
12341 match(Set dst (ConvL2I src)); | 11638 match(Set dst (ConvL2I src)); |
12349 instruct MoveF2I_stack_reg(eRegI dst, stackSlotF src) %{ | 11646 instruct MoveF2I_stack_reg(eRegI dst, stackSlotF src) %{ |
12350 match(Set dst (MoveF2I src)); | 11647 match(Set dst (MoveF2I src)); |
12351 effect( DEF dst, USE src ); | 11648 effect( DEF dst, USE src ); |
12352 ins_cost(100); | 11649 ins_cost(100); |
12353 format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %} | 11650 format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %} |
12354 opcode(0x8B); | 11651 ins_encode %{ |
12355 ins_encode( OpcP, RegMem(dst,src)); | 11652 __ movl($dst$$Register, Address(rsp, $src$$disp)); |
11653 %} | |
12356 ins_pipe( ialu_reg_mem ); | 11654 ins_pipe( ialu_reg_mem ); |
12357 %} | 11655 %} |
12358 | 11656 |
12359 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{ | 11657 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{ |
12360 predicate(UseSSE==0); | 11658 predicate(UseSSE==0); |
12361 match(Set dst (MoveF2I src)); | 11659 match(Set dst (MoveF2I src)); |
12362 effect( DEF dst, USE src ); | 11660 effect( DEF dst, USE src ); |
12363 | 11661 |
12364 ins_cost(125); | 11662 ins_cost(125); |
12365 format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %} | 11663 format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %} |
12366 ins_encode( Pop_Mem_Reg_F(dst, src) ); | 11664 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); |
12367 ins_pipe( fpu_mem_reg ); | 11665 ins_pipe( fpu_mem_reg ); |
12368 %} | 11666 %} |
12369 | 11667 |
12370 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regX src) %{ | 11668 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{ |
12371 predicate(UseSSE>=1); | 11669 predicate(UseSSE>=1); |
12372 match(Set dst (MoveF2I src)); | 11670 match(Set dst (MoveF2I src)); |
12373 effect( DEF dst, USE src ); | 11671 effect( DEF dst, USE src ); |
12374 | 11672 |
12375 ins_cost(95); | 11673 ins_cost(95); |
12376 format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %} | 11674 format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %} |
12377 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x11), RegMem(src, dst)); | 11675 ins_encode %{ |
11676 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister); | |
11677 %} | |
12378 ins_pipe( pipe_slow ); | 11678 ins_pipe( pipe_slow ); |
12379 %} | 11679 %} |
12380 | 11680 |
12381 instruct MoveF2I_reg_reg_sse(eRegI dst, regX src) %{ | 11681 instruct MoveF2I_reg_reg_sse(eRegI dst, regF src) %{ |
12382 predicate(UseSSE>=2); | 11682 predicate(UseSSE>=2); |
12383 match(Set dst (MoveF2I src)); | 11683 match(Set dst (MoveF2I src)); |
12384 effect( DEF dst, USE src ); | 11684 effect( DEF dst, USE src ); |
12385 ins_cost(85); | 11685 ins_cost(85); |
12386 format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %} | 11686 format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %} |
12387 ins_encode( MovX2I_reg(dst, src)); | 11687 ins_encode %{ |
11688 __ movdl($dst$$Register, $src$$XMMRegister); | |
11689 %} | |
12388 ins_pipe( pipe_slow ); | 11690 ins_pipe( pipe_slow ); |
12389 %} | 11691 %} |
12390 | 11692 |
12391 instruct MoveI2F_reg_stack(stackSlotF dst, eRegI src) %{ | 11693 instruct MoveI2F_reg_stack(stackSlotF dst, eRegI src) %{ |
12392 match(Set dst (MoveI2F src)); | 11694 match(Set dst (MoveI2F src)); |
12393 effect( DEF dst, USE src ); | 11695 effect( DEF dst, USE src ); |
12394 | 11696 |
12395 ins_cost(100); | 11697 ins_cost(100); |
12396 format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %} | 11698 format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %} |
12397 opcode(0x89); | 11699 ins_encode %{ |
12398 ins_encode( OpcPRegSS( dst, src ) ); | 11700 __ movl(Address(rsp, $dst$$disp), $src$$Register); |
11701 %} | |
12399 ins_pipe( ialu_mem_reg ); | 11702 ins_pipe( ialu_mem_reg ); |
12400 %} | 11703 %} |
12401 | 11704 |
12402 | 11705 |
12403 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{ | 11706 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{ |
12404 predicate(UseSSE==0); | 11707 predicate(UseSSE==0); |
12405 match(Set dst (MoveI2F src)); | 11708 match(Set dst (MoveI2F src)); |
12406 effect(DEF dst, USE src); | 11709 effect(DEF dst, USE src); |
12407 | 11710 |
12408 ins_cost(125); | 11711 ins_cost(125); |
12409 format %{ "FLD_S $src\n\t" | 11712 format %{ "FLD_S $src\n\t" |
12410 "FSTP $dst\t# MoveI2F_stack_reg" %} | 11713 "FSTP $dst\t# MoveI2F_stack_reg" %} |
12411 opcode(0xD9); /* D9 /0, FLD m32real */ | 11714 opcode(0xD9); /* D9 /0, FLD m32real */ |
12412 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), | 11715 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), |
12413 Pop_Reg_F(dst) ); | 11716 Pop_Reg_FPR(dst) ); |
12414 ins_pipe( fpu_reg_mem ); | 11717 ins_pipe( fpu_reg_mem ); |
12415 %} | 11718 %} |
12416 | 11719 |
12417 instruct MoveI2F_stack_reg_sse(regX dst, stackSlotI src) %{ | 11720 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{ |
12418 predicate(UseSSE>=1); | 11721 predicate(UseSSE>=1); |
12419 match(Set dst (MoveI2F src)); | 11722 match(Set dst (MoveI2F src)); |
12420 effect( DEF dst, USE src ); | 11723 effect( DEF dst, USE src ); |
12421 | 11724 |
12422 ins_cost(95); | 11725 ins_cost(95); |
12423 format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %} | 11726 format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %} |
12424 ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x10), RegMem(dst,src)); | 11727 ins_encode %{ |
11728 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp)); | |
11729 %} | |
12425 ins_pipe( pipe_slow ); | 11730 ins_pipe( pipe_slow ); |
12426 %} | 11731 %} |
12427 | 11732 |
12428 instruct MoveI2F_reg_reg_sse(regX dst, eRegI src) %{ | 11733 instruct MoveI2F_reg_reg_sse(regF dst, eRegI src) %{ |
12429 predicate(UseSSE>=2); | 11734 predicate(UseSSE>=2); |
12430 match(Set dst (MoveI2F src)); | 11735 match(Set dst (MoveI2F src)); |
12431 effect( DEF dst, USE src ); | 11736 effect( DEF dst, USE src ); |
12432 | 11737 |
12433 ins_cost(85); | 11738 ins_cost(85); |
12434 format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %} | 11739 format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %} |
12435 ins_encode( MovI2X_reg(dst, src) ); | 11740 ins_encode %{ |
11741 __ movdl($dst$$XMMRegister, $src$$Register); | |
11742 %} | |
12436 ins_pipe( pipe_slow ); | 11743 ins_pipe( pipe_slow ); |
12437 %} | 11744 %} |
12438 | 11745 |
12439 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{ | 11746 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{ |
12440 match(Set dst (MoveD2L src)); | 11747 match(Set dst (MoveD2L src)); |
12446 opcode(0x8B, 0x8B); | 11753 opcode(0x8B, 0x8B); |
12447 ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src)); | 11754 ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src)); |
12448 ins_pipe( ialu_mem_long_reg ); | 11755 ins_pipe( ialu_mem_long_reg ); |
12449 %} | 11756 %} |
12450 | 11757 |
12451 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{ | 11758 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{ |
12452 predicate(UseSSE<=1); | 11759 predicate(UseSSE<=1); |
12453 match(Set dst (MoveD2L src)); | 11760 match(Set dst (MoveD2L src)); |
12454 effect(DEF dst, USE src); | 11761 effect(DEF dst, USE src); |
12455 | 11762 |
12456 ins_cost(125); | 11763 ins_cost(125); |
12457 format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %} | 11764 format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %} |
12458 ins_encode( Pop_Mem_Reg_D(dst, src) ); | 11765 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); |
12459 ins_pipe( fpu_mem_reg ); | 11766 ins_pipe( fpu_mem_reg ); |
12460 %} | 11767 %} |
12461 | 11768 |
12462 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regXD src) %{ | 11769 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{ |
12463 predicate(UseSSE>=2); | 11770 predicate(UseSSE>=2); |
12464 match(Set dst (MoveD2L src)); | 11771 match(Set dst (MoveD2L src)); |
12465 effect(DEF dst, USE src); | 11772 effect(DEF dst, USE src); |
12466 ins_cost(95); | 11773 ins_cost(95); |
12467 | |
12468 format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %} | 11774 format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %} |
12469 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x11), RegMem(src,dst)); | 11775 ins_encode %{ |
11776 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister); | |
11777 %} | |
12470 ins_pipe( pipe_slow ); | 11778 ins_pipe( pipe_slow ); |
12471 %} | 11779 %} |
12472 | 11780 |
12473 instruct MoveD2L_reg_reg_sse(eRegL dst, regXD src, regXD tmp) %{ | 11781 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{ |
12474 predicate(UseSSE>=2); | 11782 predicate(UseSSE>=2); |
12475 match(Set dst (MoveD2L src)); | 11783 match(Set dst (MoveD2L src)); |
12476 effect(DEF dst, USE src, TEMP tmp); | 11784 effect(DEF dst, USE src, TEMP tmp); |
12477 ins_cost(85); | 11785 ins_cost(85); |
12478 format %{ "MOVD $dst.lo,$src\n\t" | 11786 format %{ "MOVD $dst.lo,$src\n\t" |
12479 "PSHUFLW $tmp,$src,0x4E\n\t" | 11787 "PSHUFLW $tmp,$src,0x4E\n\t" |
12480 "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %} | 11788 "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %} |
12481 ins_encode( MovXD2L_reg(dst, src, tmp) ); | 11789 ins_encode %{ |
11790 __ movdl($dst$$Register, $src$$XMMRegister); | |
11791 __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e); | |
11792 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); | |
11793 %} | |
12482 ins_pipe( pipe_slow ); | 11794 ins_pipe( pipe_slow ); |
12483 %} | 11795 %} |
12484 | 11796 |
12485 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{ | 11797 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{ |
12486 match(Set dst (MoveL2D src)); | 11798 match(Set dst (MoveL2D src)); |
12493 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); | 11805 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); |
12494 ins_pipe( ialu_mem_long_reg ); | 11806 ins_pipe( ialu_mem_long_reg ); |
12495 %} | 11807 %} |
12496 | 11808 |
12497 | 11809 |
12498 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{ | 11810 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{ |
12499 predicate(UseSSE<=1); | 11811 predicate(UseSSE<=1); |
12500 match(Set dst (MoveL2D src)); | 11812 match(Set dst (MoveL2D src)); |
12501 effect(DEF dst, USE src); | 11813 effect(DEF dst, USE src); |
12502 ins_cost(125); | 11814 ins_cost(125); |
12503 | 11815 |
12504 format %{ "FLD_D $src\n\t" | 11816 format %{ "FLD_D $src\n\t" |
12505 "FSTP $dst\t# MoveL2D_stack_reg" %} | 11817 "FSTP $dst\t# MoveL2D_stack_reg" %} |
12506 opcode(0xDD); /* DD /0, FLD m64real */ | 11818 opcode(0xDD); /* DD /0, FLD m64real */ |
12507 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), | 11819 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), |
12508 Pop_Reg_D(dst) ); | 11820 Pop_Reg_DPR(dst) ); |
12509 ins_pipe( fpu_reg_mem ); | 11821 ins_pipe( fpu_reg_mem ); |
12510 %} | 11822 %} |
12511 | 11823 |
12512 | 11824 |
12513 instruct MoveL2D_stack_reg_sse(regXD dst, stackSlotL src) %{ | 11825 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{ |
12514 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); | 11826 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); |
12515 match(Set dst (MoveL2D src)); | 11827 match(Set dst (MoveL2D src)); |
12516 effect(DEF dst, USE src); | 11828 effect(DEF dst, USE src); |
12517 | 11829 |
12518 ins_cost(95); | 11830 ins_cost(95); |
12519 format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %} | 11831 format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %} |
12520 ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x10), RegMem(dst,src)); | 11832 ins_encode %{ |
11833 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); | |
11834 %} | |
12521 ins_pipe( pipe_slow ); | 11835 ins_pipe( pipe_slow ); |
12522 %} | 11836 %} |
12523 | 11837 |
12524 instruct MoveL2D_stack_reg_sse_partial(regXD dst, stackSlotL src) %{ | 11838 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{ |
12525 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); | 11839 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); |
12526 match(Set dst (MoveL2D src)); | 11840 match(Set dst (MoveL2D src)); |
12527 effect(DEF dst, USE src); | 11841 effect(DEF dst, USE src); |
12528 | 11842 |
12529 ins_cost(95); | 11843 ins_cost(95); |
12530 format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %} | 11844 format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %} |
12531 ins_encode( Opcode(0x66), Opcode(0x0F), Opcode(0x12), RegMem(dst,src)); | 11845 ins_encode %{ |
11846 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); | |
11847 %} | |
12532 ins_pipe( pipe_slow ); | 11848 ins_pipe( pipe_slow ); |
12533 %} | 11849 %} |
12534 | 11850 |
12535 instruct MoveL2D_reg_reg_sse(regXD dst, eRegL src, regXD tmp) %{ | 11851 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{ |
12536 predicate(UseSSE>=2); | 11852 predicate(UseSSE>=2); |
12537 match(Set dst (MoveL2D src)); | 11853 match(Set dst (MoveL2D src)); |
12538 effect(TEMP dst, USE src, TEMP tmp); | 11854 effect(TEMP dst, USE src, TEMP tmp); |
12539 ins_cost(85); | 11855 ins_cost(85); |
12540 format %{ "MOVD $dst,$src.lo\n\t" | 11856 format %{ "MOVD $dst,$src.lo\n\t" |
12541 "MOVD $tmp,$src.hi\n\t" | 11857 "MOVD $tmp,$src.hi\n\t" |
12542 "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %} | 11858 "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %} |
12543 ins_encode( MovL2XD_reg(dst, src, tmp) ); | 11859 ins_encode %{ |
11860 __ movdl($dst$$XMMRegister, $src$$Register); | |
11861 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); | |
11862 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); | |
11863 %} | |
12544 ins_pipe( pipe_slow ); | 11864 ins_pipe( pipe_slow ); |
12545 %} | 11865 %} |
12546 | 11866 |
12547 // Replicate scalar to packed byte (1 byte) values in xmm | 11867 // Replicate scalar to packed byte (1 byte) values in xmm |
12548 instruct Repl8B_reg(regXD dst, regXD src) %{ | 11868 instruct Repl8B_reg(regD dst, regD src) %{ |
12549 predicate(UseSSE>=2); | 11869 predicate(UseSSE>=2); |
12550 match(Set dst (Replicate8B src)); | 11870 match(Set dst (Replicate8B src)); |
12551 format %{ "MOVDQA $dst,$src\n\t" | 11871 format %{ "MOVDQA $dst,$src\n\t" |
12552 "PUNPCKLBW $dst,$dst\n\t" | 11872 "PUNPCKLBW $dst,$dst\n\t" |
12553 "PSHUFLW $dst,$dst,0x00\t! replicate8B" %} | 11873 "PSHUFLW $dst,$dst,0x00\t! replicate8B" %} |
12554 ins_encode( pshufd_8x8(dst, src)); | 11874 ins_encode %{ |
11875 if ($dst$$reg != $src$$reg) { | |
11876 __ movdqa($dst$$XMMRegister, $src$$XMMRegister); | |
11877 } | |
11878 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); | |
11879 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); | |
11880 %} | |
12555 ins_pipe( pipe_slow ); | 11881 ins_pipe( pipe_slow ); |
12556 %} | 11882 %} |
12557 | 11883 |
12558 // Replicate scalar to packed byte (1 byte) values in xmm | 11884 // Replicate scalar to packed byte (1 byte) values in xmm |
12559 instruct Repl8B_eRegI(regXD dst, eRegI src) %{ | 11885 instruct Repl8B_eRegI(regD dst, eRegI src) %{ |
12560 predicate(UseSSE>=2); | 11886 predicate(UseSSE>=2); |
12561 match(Set dst (Replicate8B src)); | 11887 match(Set dst (Replicate8B src)); |
12562 format %{ "MOVD $dst,$src\n\t" | 11888 format %{ "MOVD $dst,$src\n\t" |
12563 "PUNPCKLBW $dst,$dst\n\t" | 11889 "PUNPCKLBW $dst,$dst\n\t" |
12564 "PSHUFLW $dst,$dst,0x00\t! replicate8B" %} | 11890 "PSHUFLW $dst,$dst,0x00\t! replicate8B" %} |
12565 ins_encode( mov_i2x(dst, src), pshufd_8x8(dst, dst)); | 11891 ins_encode %{ |
11892 __ movdl($dst$$XMMRegister, $src$$Register); | |
11893 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); | |
11894 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); | |
11895 %} | |
12566 ins_pipe( pipe_slow ); | 11896 ins_pipe( pipe_slow ); |
12567 %} | 11897 %} |
12568 | 11898 |
12569 // Replicate scalar zero to packed byte (1 byte) values in xmm | 11899 // Replicate scalar zero to packed byte (1 byte) values in xmm |
12570 instruct Repl8B_immI0(regXD dst, immI0 zero) %{ | 11900 instruct Repl8B_immI0(regD dst, immI0 zero) %{ |
12571 predicate(UseSSE>=2); | 11901 predicate(UseSSE>=2); |
12572 match(Set dst (Replicate8B zero)); | 11902 match(Set dst (Replicate8B zero)); |
12573 format %{ "PXOR $dst,$dst\t! replicate8B" %} | 11903 format %{ "PXOR $dst,$dst\t! replicate8B" %} |
12574 ins_encode( pxor(dst, dst)); | 11904 ins_encode %{ |
11905 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); | |
11906 %} | |
12575 ins_pipe( fpu_reg_reg ); | 11907 ins_pipe( fpu_reg_reg ); |
12576 %} | 11908 %} |
12577 | 11909 |
12578 // Replicate scalar to packed shore (2 byte) values in xmm | 11910 // Replicate scalar to packed shore (2 byte) values in xmm |
12579 instruct Repl4S_reg(regXD dst, regXD src) %{ | 11911 instruct Repl4S_reg(regD dst, regD src) %{ |
12580 predicate(UseSSE>=2); | 11912 predicate(UseSSE>=2); |
12581 match(Set dst (Replicate4S src)); | 11913 match(Set dst (Replicate4S src)); |
12582 format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %} | 11914 format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %} |
12583 ins_encode( pshufd_4x16(dst, src)); | 11915 ins_encode %{ |
11916 __ pshuflw($dst$$XMMRegister, $src$$XMMRegister, 0x00); | |
11917 %} | |
12584 ins_pipe( fpu_reg_reg ); | 11918 ins_pipe( fpu_reg_reg ); |
12585 %} | 11919 %} |
12586 | 11920 |
12587 // Replicate scalar to packed shore (2 byte) values in xmm | 11921 // Replicate scalar to packed shore (2 byte) values in xmm |
12588 instruct Repl4S_eRegI(regXD dst, eRegI src) %{ | 11922 instruct Repl4S_eRegI(regD dst, eRegI src) %{ |
12589 predicate(UseSSE>=2); | 11923 predicate(UseSSE>=2); |
12590 match(Set dst (Replicate4S src)); | 11924 match(Set dst (Replicate4S src)); |
12591 format %{ "MOVD $dst,$src\n\t" | 11925 format %{ "MOVD $dst,$src\n\t" |
12592 "PSHUFLW $dst,$dst,0x00\t! replicate4S" %} | 11926 "PSHUFLW $dst,$dst,0x00\t! replicate4S" %} |
12593 ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst)); | 11927 ins_encode %{ |
11928 __ movdl($dst$$XMMRegister, $src$$Register); | |
11929 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); | |
11930 %} | |
12594 ins_pipe( fpu_reg_reg ); | 11931 ins_pipe( fpu_reg_reg ); |
12595 %} | 11932 %} |
12596 | 11933 |
12597 // Replicate scalar zero to packed short (2 byte) values in xmm | 11934 // Replicate scalar zero to packed short (2 byte) values in xmm |
12598 instruct Repl4S_immI0(regXD dst, immI0 zero) %{ | 11935 instruct Repl4S_immI0(regD dst, immI0 zero) %{ |
12599 predicate(UseSSE>=2); | 11936 predicate(UseSSE>=2); |
12600 match(Set dst (Replicate4S zero)); | 11937 match(Set dst (Replicate4S zero)); |
12601 format %{ "PXOR $dst,$dst\t! replicate4S" %} | 11938 format %{ "PXOR $dst,$dst\t! replicate4S" %} |
12602 ins_encode( pxor(dst, dst)); | 11939 ins_encode %{ |
11940 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); | |
11941 %} | |
12603 ins_pipe( fpu_reg_reg ); | 11942 ins_pipe( fpu_reg_reg ); |
12604 %} | 11943 %} |
12605 | 11944 |
12606 // Replicate scalar to packed char (2 byte) values in xmm | 11945 // Replicate scalar to packed char (2 byte) values in xmm |
12607 instruct Repl4C_reg(regXD dst, regXD src) %{ | 11946 instruct Repl4C_reg(regD dst, regD src) %{ |
12608 predicate(UseSSE>=2); | 11947 predicate(UseSSE>=2); |
12609 match(Set dst (Replicate4C src)); | 11948 match(Set dst (Replicate4C src)); |
12610 format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %} | 11949 format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %} |
12611 ins_encode( pshufd_4x16(dst, src)); | 11950 ins_encode %{ |
11951 __ pshuflw($dst$$XMMRegister, $src$$XMMRegister, 0x00); | |
11952 %} | |
12612 ins_pipe( fpu_reg_reg ); | 11953 ins_pipe( fpu_reg_reg ); |
12613 %} | 11954 %} |
12614 | 11955 |
12615 // Replicate scalar to packed char (2 byte) values in xmm | 11956 // Replicate scalar to packed char (2 byte) values in xmm |
12616 instruct Repl4C_eRegI(regXD dst, eRegI src) %{ | 11957 instruct Repl4C_eRegI(regD dst, eRegI src) %{ |
12617 predicate(UseSSE>=2); | 11958 predicate(UseSSE>=2); |
12618 match(Set dst (Replicate4C src)); | 11959 match(Set dst (Replicate4C src)); |
12619 format %{ "MOVD $dst,$src\n\t" | 11960 format %{ "MOVD $dst,$src\n\t" |
12620 "PSHUFLW $dst,$dst,0x00\t! replicate4C" %} | 11961 "PSHUFLW $dst,$dst,0x00\t! replicate4C" %} |
12621 ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst)); | 11962 ins_encode %{ |
11963 __ movdl($dst$$XMMRegister, $src$$Register); | |
11964 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); | |
11965 %} | |
12622 ins_pipe( fpu_reg_reg ); | 11966 ins_pipe( fpu_reg_reg ); |
12623 %} | 11967 %} |
12624 | 11968 |
12625 // Replicate scalar zero to packed char (2 byte) values in xmm | 11969 // Replicate scalar zero to packed char (2 byte) values in xmm |
12626 instruct Repl4C_immI0(regXD dst, immI0 zero) %{ | 11970 instruct Repl4C_immI0(regD dst, immI0 zero) %{ |
12627 predicate(UseSSE>=2); | 11971 predicate(UseSSE>=2); |
12628 match(Set dst (Replicate4C zero)); | 11972 match(Set dst (Replicate4C zero)); |
12629 format %{ "PXOR $dst,$dst\t! replicate4C" %} | 11973 format %{ "PXOR $dst,$dst\t! replicate4C" %} |
12630 ins_encode( pxor(dst, dst)); | 11974 ins_encode %{ |
11975 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); | |
11976 %} | |
12631 ins_pipe( fpu_reg_reg ); | 11977 ins_pipe( fpu_reg_reg ); |
12632 %} | 11978 %} |
12633 | 11979 |
12634 // Replicate scalar to packed integer (4 byte) values in xmm | 11980 // Replicate scalar to packed integer (4 byte) values in xmm |
12635 instruct Repl2I_reg(regXD dst, regXD src) %{ | 11981 instruct Repl2I_reg(regD dst, regD src) %{ |
12636 predicate(UseSSE>=2); | 11982 predicate(UseSSE>=2); |
12637 match(Set dst (Replicate2I src)); | 11983 match(Set dst (Replicate2I src)); |
12638 format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %} | 11984 format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %} |
12639 ins_encode( pshufd(dst, src, 0x00)); | 11985 ins_encode %{ |
11986 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); | |
11987 %} | |
12640 ins_pipe( fpu_reg_reg ); | 11988 ins_pipe( fpu_reg_reg ); |
12641 %} | 11989 %} |
12642 | 11990 |
12643 // Replicate scalar to packed integer (4 byte) values in xmm | 11991 // Replicate scalar to packed integer (4 byte) values in xmm |
12644 instruct Repl2I_eRegI(regXD dst, eRegI src) %{ | 11992 instruct Repl2I_eRegI(regD dst, eRegI src) %{ |
12645 predicate(UseSSE>=2); | 11993 predicate(UseSSE>=2); |
12646 match(Set dst (Replicate2I src)); | 11994 match(Set dst (Replicate2I src)); |
12647 format %{ "MOVD $dst,$src\n\t" | 11995 format %{ "MOVD $dst,$src\n\t" |
12648 "PSHUFD $dst,$dst,0x00\t! replicate2I" %} | 11996 "PSHUFD $dst,$dst,0x00\t! replicate2I" %} |
12649 ins_encode( mov_i2x(dst, src), pshufd(dst, dst, 0x00)); | 11997 ins_encode %{ |
11998 __ movdl($dst$$XMMRegister, $src$$Register); | |
11999 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); | |
12000 %} | |
12650 ins_pipe( fpu_reg_reg ); | 12001 ins_pipe( fpu_reg_reg ); |
12651 %} | 12002 %} |
12652 | 12003 |
12653 // Replicate scalar zero to packed integer (2 byte) values in xmm | 12004 // Replicate scalar zero to packed integer (2 byte) values in xmm |
12654 instruct Repl2I_immI0(regXD dst, immI0 zero) %{ | 12005 instruct Repl2I_immI0(regD dst, immI0 zero) %{ |
12655 predicate(UseSSE>=2); | 12006 predicate(UseSSE>=2); |
12656 match(Set dst (Replicate2I zero)); | 12007 match(Set dst (Replicate2I zero)); |
12657 format %{ "PXOR $dst,$dst\t! replicate2I" %} | 12008 format %{ "PXOR $dst,$dst\t! replicate2I" %} |
12658 ins_encode( pxor(dst, dst)); | 12009 ins_encode %{ |
12010 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); | |
12011 %} | |
12659 ins_pipe( fpu_reg_reg ); | 12012 ins_pipe( fpu_reg_reg ); |
12660 %} | 12013 %} |
12661 | 12014 |
12662 // Replicate scalar to packed single precision floating point values in xmm | 12015 // Replicate scalar to packed single precision floating point values in xmm |
12663 instruct Repl2F_reg(regXD dst, regXD src) %{ | 12016 instruct Repl2F_reg(regD dst, regD src) %{ |
12664 predicate(UseSSE>=2); | 12017 predicate(UseSSE>=2); |
12665 match(Set dst (Replicate2F src)); | 12018 match(Set dst (Replicate2F src)); |
12666 format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %} | 12019 format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %} |
12667 ins_encode( pshufd(dst, src, 0xe0)); | 12020 ins_encode %{ |
12021 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0xe0); | |
12022 %} | |
12668 ins_pipe( fpu_reg_reg ); | 12023 ins_pipe( fpu_reg_reg ); |
12669 %} | 12024 %} |
12670 | 12025 |
12671 // Replicate scalar to packed single precision floating point values in xmm | 12026 // Replicate scalar to packed single precision floating point values in xmm |
12672 instruct Repl2F_regX(regXD dst, regX src) %{ | 12027 instruct Repl2F_regF(regD dst, regF src) %{ |
12673 predicate(UseSSE>=2); | 12028 predicate(UseSSE>=2); |
12674 match(Set dst (Replicate2F src)); | 12029 match(Set dst (Replicate2F src)); |
12675 format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %} | 12030 format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %} |
12676 ins_encode( pshufd(dst, src, 0xe0)); | 12031 ins_encode %{ |
12032 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0xe0); | |
12033 %} | |
12677 ins_pipe( fpu_reg_reg ); | 12034 ins_pipe( fpu_reg_reg ); |
12678 %} | 12035 %} |
12679 | 12036 |
12680 // Replicate scalar to packed single precision floating point values in xmm | 12037 // Replicate scalar to packed single precision floating point values in xmm |
12681 instruct Repl2F_immXF0(regXD dst, immXF0 zero) %{ | 12038 instruct Repl2F_immF0(regD dst, immF0 zero) %{ |
12682 predicate(UseSSE>=2); | 12039 predicate(UseSSE>=2); |
12683 match(Set dst (Replicate2F zero)); | 12040 match(Set dst (Replicate2F zero)); |
12684 format %{ "PXOR $dst,$dst\t! replicate2F" %} | 12041 format %{ "PXOR $dst,$dst\t! replicate2F" %} |
12685 ins_encode( pxor(dst, dst)); | 12042 ins_encode %{ |
12043 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); | |
12044 %} | |
12686 ins_pipe( fpu_reg_reg ); | 12045 ins_pipe( fpu_reg_reg ); |
12687 %} | 12046 %} |
12688 | 12047 |
12689 // ======================================================================= | 12048 // ======================================================================= |
12690 // fast clearing of an array | 12049 // fast clearing of an array |
12700 Opcode(0xF3), Opcode(0xAB) ); | 12059 Opcode(0xF3), Opcode(0xAB) ); |
12701 ins_pipe( pipe_slow ); | 12060 ins_pipe( pipe_slow ); |
12702 %} | 12061 %} |
12703 | 12062 |
12704 instruct string_compare(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, | 12063 instruct string_compare(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, |
12705 eAXRegI result, regXD tmp1, eFlagsReg cr) %{ | 12064 eAXRegI result, regD tmp1, eFlagsReg cr) %{ |
12706 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); | 12065 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); |
12707 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); | 12066 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); |
12708 | 12067 |
12709 format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} | 12068 format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} |
12710 ins_encode %{ | 12069 ins_encode %{ |
12715 ins_pipe( pipe_slow ); | 12074 ins_pipe( pipe_slow ); |
12716 %} | 12075 %} |
12717 | 12076 |
12718 // fast string equals | 12077 // fast string equals |
12719 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, | 12078 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, |
12720 regXD tmp1, regXD tmp2, eBXRegI tmp3, eFlagsReg cr) %{ | 12079 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{ |
12721 match(Set result (StrEquals (Binary str1 str2) cnt)); | 12080 match(Set result (StrEquals (Binary str1 str2) cnt)); |
12722 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); | 12081 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); |
12723 | 12082 |
12724 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} | 12083 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} |
12725 ins_encode %{ | 12084 ins_encode %{ |
12730 ins_pipe( pipe_slow ); | 12089 ins_pipe( pipe_slow ); |
12731 %} | 12090 %} |
12732 | 12091 |
12733 // fast search of substring with known size. | 12092 // fast search of substring with known size. |
12734 instruct string_indexof_con(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, | 12093 instruct string_indexof_con(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, |
12735 eBXRegI result, regXD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ | 12094 eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ |
12736 predicate(UseSSE42Intrinsics); | 12095 predicate(UseSSE42Intrinsics); |
12737 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); | 12096 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); |
12738 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); | 12097 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); |
12739 | 12098 |
12740 format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} | 12099 format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} |
12757 %} | 12116 %} |
12758 ins_pipe( pipe_slow ); | 12117 ins_pipe( pipe_slow ); |
12759 %} | 12118 %} |
12760 | 12119 |
12761 instruct string_indexof(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, | 12120 instruct string_indexof(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, |
12762 eBXRegI result, regXD vec, eCXRegI tmp, eFlagsReg cr) %{ | 12121 eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ |
12763 predicate(UseSSE42Intrinsics); | 12122 predicate(UseSSE42Intrinsics); |
12764 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); | 12123 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); |
12765 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); | 12124 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); |
12766 | 12125 |
12767 format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} | 12126 format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} |
12774 ins_pipe( pipe_slow ); | 12133 ins_pipe( pipe_slow ); |
12775 %} | 12134 %} |
12776 | 12135 |
12777 // fast array equals | 12136 // fast array equals |
12778 instruct array_equals(eDIRegP ary1, eSIRegP ary2, eAXRegI result, | 12137 instruct array_equals(eDIRegP ary1, eSIRegP ary2, eAXRegI result, |
12779 regXD tmp1, regXD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) | 12138 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) |
12780 %{ | 12139 %{ |
12781 match(Set result (AryEq ary1 ary2)); | 12140 match(Set result (AryEq ary1 ary2)); |
12782 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); | 12141 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); |
12783 //ins_cost(300); | 12142 //ins_cost(300); |
12784 | 12143 |
13600 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); | 12959 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); |
13601 ins_pipe( pipe_cmov_reg ); | 12960 ins_pipe( pipe_cmov_reg ); |
13602 %} | 12961 %} |
13603 | 12962 |
13604 // Compare 2 longs and CMOVE doubles | 12963 // Compare 2 longs and CMOVE doubles |
13605 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{ | 12964 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{ |
13606 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); | 12965 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); |
13607 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); | 12966 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); |
13608 ins_cost(200); | 12967 ins_cost(200); |
13609 expand %{ | 12968 expand %{ |
13610 fcmovD_regS(cmp,flags,dst,src); | 12969 fcmovDPR_regS(cmp,flags,dst,src); |
13611 %} | 12970 %} |
13612 %} | 12971 %} |
13613 | 12972 |
13614 // Compare 2 longs and CMOVE doubles | 12973 // Compare 2 longs and CMOVE doubles |
13615 instruct cmovXDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regXD dst, regXD src) %{ | 12974 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{ |
13616 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); | 12975 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); |
13617 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); | 12976 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); |
13618 ins_cost(200); | 12977 ins_cost(200); |
13619 expand %{ | 12978 expand %{ |
13620 fcmovXD_regS(cmp,flags,dst,src); | 12979 fcmovD_regS(cmp,flags,dst,src); |
13621 %} | 12980 %} |
13622 %} | 12981 %} |
13623 | 12982 |
13624 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{ | 12983 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{ |
13625 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); | 12984 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); |
13626 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); | 12985 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); |
13627 ins_cost(200); | 12986 ins_cost(200); |
13628 expand %{ | 12987 expand %{ |
13629 fcmovF_regS(cmp,flags,dst,src); | 12988 fcmovFPR_regS(cmp,flags,dst,src); |
13630 %} | 12989 %} |
13631 %} | 12990 %} |
13632 | 12991 |
13633 instruct cmovXX_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regX dst, regX src) %{ | 12992 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{ |
13634 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); | 12993 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); |
13635 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); | 12994 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); |
13636 ins_cost(200); | 12995 ins_cost(200); |
13637 expand %{ | 12996 expand %{ |
13638 fcmovX_regS(cmp,flags,dst,src); | 12997 fcmovF_regS(cmp,flags,dst,src); |
13639 %} | 12998 %} |
13640 %} | 12999 %} |
13641 | 13000 |
13642 //====== | 13001 //====== |
13643 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. | 13002 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. |
13728 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); | 13087 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); |
13729 ins_pipe( pipe_cmov_reg ); | 13088 ins_pipe( pipe_cmov_reg ); |
13730 %} | 13089 %} |
13731 | 13090 |
13732 // Compare 2 longs and CMOVE doubles | 13091 // Compare 2 longs and CMOVE doubles |
13733 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{ | 13092 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{ |
13734 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); | 13093 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); |
13735 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); | 13094 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); |
13736 ins_cost(200); | 13095 ins_cost(200); |
13737 expand %{ | 13096 expand %{ |
13738 fcmovD_regS(cmp,flags,dst,src); | 13097 fcmovDPR_regS(cmp,flags,dst,src); |
13739 %} | 13098 %} |
13740 %} | 13099 %} |
13741 | 13100 |
13742 // Compare 2 longs and CMOVE doubles | 13101 // Compare 2 longs and CMOVE doubles |
13743 instruct cmovXDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regXD dst, regXD src) %{ | 13102 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{ |
13744 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); | 13103 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); |
13745 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); | 13104 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); |
13746 ins_cost(200); | 13105 ins_cost(200); |
13747 expand %{ | 13106 expand %{ |
13748 fcmovXD_regS(cmp,flags,dst,src); | 13107 fcmovD_regS(cmp,flags,dst,src); |
13749 %} | 13108 %} |
13750 %} | 13109 %} |
13751 | 13110 |
13752 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{ | 13111 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{ |
13753 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); | 13112 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); |
13754 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); | 13113 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); |
13755 ins_cost(200); | 13114 ins_cost(200); |
13756 expand %{ | 13115 expand %{ |
13757 fcmovF_regS(cmp,flags,dst,src); | 13116 fcmovFPR_regS(cmp,flags,dst,src); |
13758 %} | 13117 %} |
13759 %} | 13118 %} |
13760 | 13119 |
13761 instruct cmovXX_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regX dst, regX src) %{ | 13120 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{ |
13762 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); | 13121 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); |
13763 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); | 13122 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); |
13764 ins_cost(200); | 13123 ins_cost(200); |
13765 expand %{ | 13124 expand %{ |
13766 fcmovX_regS(cmp,flags,dst,src); | 13125 fcmovF_regS(cmp,flags,dst,src); |
13767 %} | 13126 %} |
13768 %} | 13127 %} |
13769 | 13128 |
13770 //====== | 13129 //====== |
13771 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. | 13130 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. |
13861 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); | 13220 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); |
13862 ins_pipe( pipe_cmov_reg ); | 13221 ins_pipe( pipe_cmov_reg ); |
13863 %} | 13222 %} |
13864 | 13223 |
13865 // Compare 2 longs and CMOVE doubles | 13224 // Compare 2 longs and CMOVE doubles |
13866 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{ | 13225 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{ |
13867 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); | 13226 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); |
13868 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); | 13227 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); |
13869 ins_cost(200); | 13228 ins_cost(200); |
13870 expand %{ | 13229 expand %{ |
13871 fcmovD_regS(cmp,flags,dst,src); | 13230 fcmovDPR_regS(cmp,flags,dst,src); |
13872 %} | 13231 %} |
13873 %} | 13232 %} |
13874 | 13233 |
13875 // Compare 2 longs and CMOVE doubles | 13234 // Compare 2 longs and CMOVE doubles |
13876 instruct cmovXDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regXD dst, regXD src) %{ | 13235 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{ |
13877 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); | 13236 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); |
13878 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); | 13237 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); |
13879 ins_cost(200); | 13238 ins_cost(200); |
13880 expand %{ | 13239 expand %{ |
13881 fcmovXD_regS(cmp,flags,dst,src); | 13240 fcmovD_regS(cmp,flags,dst,src); |
13882 %} | 13241 %} |
13883 %} | 13242 %} |
13884 | 13243 |
13885 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{ | 13244 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{ |
13886 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); | 13245 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); |
13887 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); | 13246 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); |
13888 ins_cost(200); | 13247 ins_cost(200); |
13889 expand %{ | 13248 expand %{ |
13890 fcmovF_regS(cmp,flags,dst,src); | 13249 fcmovFPR_regS(cmp,flags,dst,src); |
13891 %} | 13250 %} |
13892 %} | 13251 %} |
13893 | 13252 |
13894 | 13253 |
13895 instruct cmovXX_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regX dst, regX src) %{ | 13254 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{ |
13896 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); | 13255 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); |
13897 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); | 13256 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); |
13898 ins_cost(200); | 13257 ins_cost(200); |
13899 expand %{ | 13258 expand %{ |
13900 fcmovX_regS(cmp,flags,dst,src); | 13259 fcmovF_regS(cmp,flags,dst,src); |
13901 %} | 13260 %} |
13902 %} | 13261 %} |
13903 | 13262 |
13904 | 13263 |
13905 // ============================================================================ | 13264 // ============================================================================ |