comparison src/cpu/x86/vm/x86_64.ad @ 6179:8c92982cbbc4

7119644: Increase superword's vector size up to 256 bits Summary: Increase vector size up to 256-bits for YMM AVX registers on x86. Reviewed-by: never, twisti, roland
author kvn
date Fri, 15 Jun 2012 01:25:19 -0700
parents 8b0a4867acf0
children 006050192a5a
comparison
equal deleted inserted replaced
6146:eba1d5bce9e8 6179:8c92982cbbc4
129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next()); 129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
130 130
131 131
132 // Floating Point Registers 132 // Floating Point Registers
133 133
134 // XMM registers. 128-bit registers or 4 words each, labeled (a)-d.
135 // Word a in each register holds a Float, words ab hold a Double. We
136 // currently do not use the SIMD capabilities, so registers cd are
137 // unused at the moment.
138 // XMM8-XMM15 must be encoded with REX.
139 // Linux ABI: No register preserved across function calls
140 // XMM0-XMM7 might hold parameters
141 // Windows ABI: XMM6-XMM15 preserved across function calls
142 // XMM0-XMM3 might hold parameters
143
144 reg_def XMM0 (SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
145 reg_def XMM0_H (SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next());
146
147 reg_def XMM1 (SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
148 reg_def XMM1_H (SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next());
149
150 reg_def XMM2 (SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
151 reg_def XMM2_H (SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next());
152
153 reg_def XMM3 (SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
154 reg_def XMM3_H (SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next());
155
156 reg_def XMM4 (SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
157 reg_def XMM4_H (SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next());
158
159 reg_def XMM5 (SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
160 reg_def XMM5_H (SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next());
161
162 #ifdef _WIN64
163
164 reg_def XMM6 (SOC, SOE, Op_RegF, 6, xmm6->as_VMReg());
165 reg_def XMM6_H (SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next());
166
167 reg_def XMM7 (SOC, SOE, Op_RegF, 7, xmm7->as_VMReg());
168 reg_def XMM7_H (SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next());
169
170 reg_def XMM8 (SOC, SOE, Op_RegF, 8, xmm8->as_VMReg());
171 reg_def XMM8_H (SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next());
172
173 reg_def XMM9 (SOC, SOE, Op_RegF, 9, xmm9->as_VMReg());
174 reg_def XMM9_H (SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next());
175
176 reg_def XMM10 (SOC, SOE, Op_RegF, 10, xmm10->as_VMReg());
177 reg_def XMM10_H(SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next());
178
179 reg_def XMM11 (SOC, SOE, Op_RegF, 11, xmm11->as_VMReg());
180 reg_def XMM11_H(SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next());
181
182 reg_def XMM12 (SOC, SOE, Op_RegF, 12, xmm12->as_VMReg());
183 reg_def XMM12_H(SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next());
184
185 reg_def XMM13 (SOC, SOE, Op_RegF, 13, xmm13->as_VMReg());
186 reg_def XMM13_H(SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next());
187
188 reg_def XMM14 (SOC, SOE, Op_RegF, 14, xmm14->as_VMReg());
189 reg_def XMM14_H(SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next());
190
191 reg_def XMM15 (SOC, SOE, Op_RegF, 15, xmm15->as_VMReg());
192 reg_def XMM15_H(SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next());
193
194 #else
195
196 reg_def XMM6 (SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
197 reg_def XMM6_H (SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next());
198
199 reg_def XMM7 (SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
200 reg_def XMM7_H (SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next());
201
202 reg_def XMM8 (SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
203 reg_def XMM8_H (SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next());
204
205 reg_def XMM9 (SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
206 reg_def XMM9_H (SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next());
207
208 reg_def XMM10 (SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
209 reg_def XMM10_H(SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next());
210
211 reg_def XMM11 (SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
212 reg_def XMM11_H(SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next());
213
214 reg_def XMM12 (SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
215 reg_def XMM12_H(SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next());
216
217 reg_def XMM13 (SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
218 reg_def XMM13_H(SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next());
219
220 reg_def XMM14 (SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
221 reg_def XMM14_H(SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next());
222
223 reg_def XMM15 (SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
224 reg_def XMM15_H(SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next());
225
226 #endif // _WIN64
227
228 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
229
230 // Specify priority of register selection within phases of register 134 // Specify priority of register selection within phases of register
231 // allocation. Highest priority is first. A useful heuristic is to 135 // allocation. Highest priority is first. A useful heuristic is to
232 // give registers a low priority when they are required by machine 136 // give registers a low priority when they are required by machine
233 // instructions, like EAX and EDX on I486, and choose no-save registers 137 // instructions, like EAX and EDX on I486, and choose no-save registers
234 // before save-on-call, & save-on-call before save-on-entry. Registers 138 // before save-on-call, & save-on-call before save-on-entry. Registers
249 RBP, RBP_H, 153 RBP, RBP_H,
250 R13, R13_H, 154 R13, R13_H,
251 R14, R14_H, 155 R14, R14_H,
252 R15, R15_H, 156 R15, R15_H,
253 RSP, RSP_H); 157 RSP, RSP_H);
254
255 // XXX probably use 8-15 first on Linux
256 alloc_class chunk1(XMM0, XMM0_H,
257 XMM1, XMM1_H,
258 XMM2, XMM2_H,
259 XMM3, XMM3_H,
260 XMM4, XMM4_H,
261 XMM5, XMM5_H,
262 XMM6, XMM6_H,
263 XMM7, XMM7_H,
264 XMM8, XMM8_H,
265 XMM9, XMM9_H,
266 XMM10, XMM10_H,
267 XMM11, XMM11_H,
268 XMM12, XMM12_H,
269 XMM13, XMM13_H,
270 XMM14, XMM14_H,
271 XMM15, XMM15_H);
272
273 alloc_class chunk2(RFLAGS);
274 158
275 159
276 //----------Architecture Description Register Classes-------------------------- 160 //----------Architecture Description Register Classes--------------------------
277 // Several register classes are automatically defined based upon information in 161 // Several register classes are automatically defined based upon information in
278 // this architecture description. 162 // this architecture description.
499 reg_class int_rdi_reg(RDI); 383 reg_class int_rdi_reg(RDI);
500 384
501 // Singleton class for instruction pointer 385 // Singleton class for instruction pointer
502 // reg_class ip_reg(RIP); 386 // reg_class ip_reg(RIP);
503 387
504 // Singleton class for condition codes 388 %}
505 reg_class int_flags(RFLAGS);
506
507 // Class for all float registers
508 reg_class float_reg(XMM0,
509 XMM1,
510 XMM2,
511 XMM3,
512 XMM4,
513 XMM5,
514 XMM6,
515 XMM7,
516 XMM8,
517 XMM9,
518 XMM10,
519 XMM11,
520 XMM12,
521 XMM13,
522 XMM14,
523 XMM15);
524
525 // Class for all double registers
526 reg_class double_reg(XMM0, XMM0_H,
527 XMM1, XMM1_H,
528 XMM2, XMM2_H,
529 XMM3, XMM3_H,
530 XMM4, XMM4_H,
531 XMM5, XMM5_H,
532 XMM6, XMM6_H,
533 XMM7, XMM7_H,
534 XMM8, XMM8_H,
535 XMM9, XMM9_H,
536 XMM10, XMM10_H,
537 XMM11, XMM11_H,
538 XMM12, XMM12_H,
539 XMM13, XMM13_H,
540 XMM14, XMM14_H,
541 XMM15, XMM15_H);
542 %}
543
544 389
545 //----------SOURCE BLOCK------------------------------------------------------- 390 //----------SOURCE BLOCK-------------------------------------------------------
546 // This is a block of C++ code which provides values, functions, and 391 // This is a block of C++ code which provides values, functions, and
547 // definitions necessary in the rest of the architecture description 392 // definitions necessary in the rest of the architecture description
548 source %{ 393 source %{
1025 870
1026 assert(r->is_XMMRegister(), "must be"); 871 assert(r->is_XMMRegister(), "must be");
1027 return rc_float; 872 return rc_float;
1028 } 873 }
1029 874
875 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
876 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
877 int src_hi, int dst_hi, uint ireg, outputStream* st);
878
879 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
880 int stack_offset, int reg, uint ireg, outputStream* st);
881
882 static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset,
883 int dst_offset, uint ireg, outputStream* st) {
884 if (cbuf) {
885 MacroAssembler _masm(cbuf);
886 switch (ireg) {
887 case Op_VecS:
888 __ movq(Address(rsp, -8), rax);
889 __ movl(rax, Address(rsp, src_offset));
890 __ movl(Address(rsp, dst_offset), rax);
891 __ movq(rax, Address(rsp, -8));
892 break;
893 case Op_VecD:
894 __ pushq(Address(rsp, src_offset));
895 __ popq (Address(rsp, dst_offset));
896 break;
897 case Op_VecX:
898 __ pushq(Address(rsp, src_offset));
899 __ popq (Address(rsp, dst_offset));
900 __ pushq(Address(rsp, src_offset+8));
901 __ popq (Address(rsp, dst_offset+8));
902 break;
903 case Op_VecY:
904 __ vmovdqu(Address(rsp, -32), xmm0);
905 __ vmovdqu(xmm0, Address(rsp, src_offset));
906 __ vmovdqu(Address(rsp, dst_offset), xmm0);
907 __ vmovdqu(xmm0, Address(rsp, -32));
908 break;
909 default:
910 ShouldNotReachHere();
911 }
912 #ifndef PRODUCT
913 } else {
914 switch (ireg) {
915 case Op_VecS:
916 st->print("movq [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
917 "movl rax, [rsp + #%d]\n\t"
918 "movl [rsp + #%d], rax\n\t"
919 "movq rax, [rsp - #8]",
920 src_offset, dst_offset);
921 break;
922 case Op_VecD:
923 st->print("pushq [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
924 "popq [rsp + #%d]",
925 src_offset, dst_offset);
926 break;
927 case Op_VecX:
928 st->print("pushq [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
929 "popq [rsp + #%d]\n\t"
930 "pushq [rsp + #%d]\n\t"
931 "popq [rsp + #%d]",
932 src_offset, dst_offset, src_offset+8, dst_offset+8);
933 break;
934 case Op_VecY:
935 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
936 "vmovdqu xmm0, [rsp + #%d]\n\t"
937 "vmovdqu [rsp + #%d], xmm0\n\t"
938 "vmovdqu xmm0, [rsp - #32]",
939 src_offset, dst_offset);
940 break;
941 default:
942 ShouldNotReachHere();
943 }
944 #endif
945 }
946 }
947
1030 uint MachSpillCopyNode::implementation(CodeBuffer* cbuf, 948 uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
1031 PhaseRegAlloc* ra_, 949 PhaseRegAlloc* ra_,
1032 bool do_size, 950 bool do_size,
1033 outputStream* st) const 951 outputStream* st) const {
1034 { 952 assert(cbuf != NULL || st != NULL, "sanity");
1035
1036 // Get registers to move 953 // Get registers to move
1037 OptoReg::Name src_second = ra_->get_reg_second(in(1)); 954 OptoReg::Name src_second = ra_->get_reg_second(in(1));
1038 OptoReg::Name src_first = ra_->get_reg_first(in(1)); 955 OptoReg::Name src_first = ra_->get_reg_first(in(1));
1039 OptoReg::Name dst_second = ra_->get_reg_second(this); 956 OptoReg::Name dst_second = ra_->get_reg_second(this);
1040 OptoReg::Name dst_first = ra_->get_reg_first(this); 957 OptoReg::Name dst_first = ra_->get_reg_first(this);
1048 "must move at least 1 register" ); 965 "must move at least 1 register" );
1049 966
1050 if (src_first == dst_first && src_second == dst_second) { 967 if (src_first == dst_first && src_second == dst_second) {
1051 // Self copy, no move 968 // Self copy, no move
1052 return 0; 969 return 0;
1053 } else if (src_first_rc == rc_stack) { 970 }
971 if (bottom_type()->isa_vect() != NULL) {
972 uint ireg = ideal_reg();
973 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
974 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY), "sanity");
975 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
976 // mem -> mem
977 int src_offset = ra_->reg2offset(src_first);
978 int dst_offset = ra_->reg2offset(dst_first);
979 vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st);
980 } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
981 vec_mov_helper(cbuf, false, src_first, dst_first, src_second, dst_second, ireg, st);
982 } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
983 int stack_offset = ra_->reg2offset(dst_first);
984 vec_spill_helper(cbuf, false, false, stack_offset, src_first, ireg, st);
985 } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
986 int stack_offset = ra_->reg2offset(src_first);
987 vec_spill_helper(cbuf, false, true, stack_offset, dst_first, ireg, st);
988 } else {
989 ShouldNotReachHere();
990 }
991 return 0;
992 }
993 if (src_first_rc == rc_stack) {
1054 // mem -> 994 // mem ->
1055 if (dst_first_rc == rc_stack) { 995 if (dst_first_rc == rc_stack) {
1056 // mem -> mem 996 // mem -> mem
1057 assert(src_second != dst_first, "overlap"); 997 assert(src_second != dst_first, "overlap");
1058 if ((src_first & 1) == 0 && src_first + 1 == src_second && 998 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1059 (dst_first & 1) == 0 && dst_first + 1 == dst_second) { 999 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1060 // 64-bit 1000 // 64-bit
1061 int src_offset = ra_->reg2offset(src_first); 1001 int src_offset = ra_->reg2offset(src_first);
1062 int dst_offset = ra_->reg2offset(dst_first); 1002 int dst_offset = ra_->reg2offset(dst_first);
1063 if (cbuf) { 1003 if (cbuf) {
1064 emit_opcode(*cbuf, 0xFF); 1004 MacroAssembler _masm(cbuf);
1065 encode_RegMem(*cbuf, RSI_enc, RSP_enc, 0x4, 0, src_offset, false); 1005 __ pushq(Address(rsp, src_offset));
1066 1006 __ popq (Address(rsp, dst_offset));
1067 emit_opcode(*cbuf, 0x8F);
1068 encode_RegMem(*cbuf, RAX_enc, RSP_enc, 0x4, 0, dst_offset, false);
1069
1070 #ifndef PRODUCT 1007 #ifndef PRODUCT
1071 } else if (!do_size) { 1008 } else {
1072 st->print("pushq [rsp + #%d]\t# 64-bit mem-mem spill\n\t" 1009 st->print("pushq [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1073 "popq [rsp + #%d]", 1010 "popq [rsp + #%d]",
1074 src_offset, 1011 src_offset, dst_offset);
1075 dst_offset);
1076 #endif 1012 #endif
1077 } 1013 }
1078 return
1079 3 + ((src_offset == 0) ? 0 : (src_offset < 0x80 ? 1 : 4)) +
1080 3 + ((dst_offset == 0) ? 0 : (dst_offset < 0x80 ? 1 : 4));
1081 } else { 1014 } else {
1082 // 32-bit 1015 // 32-bit
1083 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); 1016 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1084 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); 1017 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1085 // No pushl/popl, so: 1018 // No pushl/popl, so:
1086 int src_offset = ra_->reg2offset(src_first); 1019 int src_offset = ra_->reg2offset(src_first);
1087 int dst_offset = ra_->reg2offset(dst_first); 1020 int dst_offset = ra_->reg2offset(dst_first);
1088 if (cbuf) { 1021 if (cbuf) {
1089 emit_opcode(*cbuf, Assembler::REX_W); 1022 MacroAssembler _masm(cbuf);
1090 emit_opcode(*cbuf, 0x89); 1023 __ movq(Address(rsp, -8), rax);
1091 emit_opcode(*cbuf, 0x44); 1024 __ movl(rax, Address(rsp, src_offset));
1092 emit_opcode(*cbuf, 0x24); 1025 __ movl(Address(rsp, dst_offset), rax);
1093 emit_opcode(*cbuf, 0xF8); 1026 __ movq(rax, Address(rsp, -8));
1094
1095 emit_opcode(*cbuf, 0x8B);
1096 encode_RegMem(*cbuf,
1097 RAX_enc,
1098 RSP_enc, 0x4, 0, src_offset,
1099 false);
1100
1101 emit_opcode(*cbuf, 0x89);
1102 encode_RegMem(*cbuf,
1103 RAX_enc,
1104 RSP_enc, 0x4, 0, dst_offset,
1105 false);
1106
1107 emit_opcode(*cbuf, Assembler::REX_W);
1108 emit_opcode(*cbuf, 0x8B);
1109 emit_opcode(*cbuf, 0x44);
1110 emit_opcode(*cbuf, 0x24);
1111 emit_opcode(*cbuf, 0xF8);
1112
1113 #ifndef PRODUCT 1027 #ifndef PRODUCT
1114 } else if (!do_size) { 1028 } else {
1115 st->print("movq [rsp - #8], rax\t# 32-bit mem-mem spill\n\t" 1029 st->print("movq [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
1116 "movl rax, [rsp + #%d]\n\t" 1030 "movl rax, [rsp + #%d]\n\t"
1117 "movl [rsp + #%d], rax\n\t" 1031 "movl [rsp + #%d], rax\n\t"
1118 "movq rax, [rsp - #8]", 1032 "movq rax, [rsp - #8]",
1119 src_offset, 1033 src_offset, dst_offset);
1120 dst_offset);
1121 #endif 1034 #endif
1122 } 1035 }
1123 return
1124 5 + // movq
1125 3 + ((src_offset == 0) ? 0 : (src_offset < 0x80 ? 1 : 4)) + // movl
1126 3 + ((dst_offset == 0) ? 0 : (dst_offset < 0x80 ? 1 : 4)) + // movl
1127 5; // movq
1128 } 1036 }
1037 return 0;
1129 } else if (dst_first_rc == rc_int) { 1038 } else if (dst_first_rc == rc_int) {
1130 // mem -> gpr 1039 // mem -> gpr
1131 if ((src_first & 1) == 0 && src_first + 1 == src_second && 1040 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1132 (dst_first & 1) == 0 && dst_first + 1 == dst_second) { 1041 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1133 // 64-bit 1042 // 64-bit
1134 int offset = ra_->reg2offset(src_first); 1043 int offset = ra_->reg2offset(src_first);
1135 if (cbuf) { 1044 if (cbuf) {
1136 if (Matcher::_regEncode[dst_first] < 8) { 1045 MacroAssembler _masm(cbuf);
1137 emit_opcode(*cbuf, Assembler::REX_W); 1046 __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
1138 } else {
1139 emit_opcode(*cbuf, Assembler::REX_WR);
1140 }
1141 emit_opcode(*cbuf, 0x8B);
1142 encode_RegMem(*cbuf,
1143 Matcher::_regEncode[dst_first],
1144 RSP_enc, 0x4, 0, offset,
1145 false);
1146 #ifndef PRODUCT 1047 #ifndef PRODUCT
1147 } else if (!do_size) { 1048 } else {
1148 st->print("movq %s, [rsp + #%d]\t# spill", 1049 st->print("movq %s, [rsp + #%d]\t# spill",
1149 Matcher::regName[dst_first], 1050 Matcher::regName[dst_first],
1150 offset); 1051 offset);
1151 #endif 1052 #endif
1152 } 1053 }
1153 return
1154 ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + 4; // REX
1155 } else { 1054 } else {
1156 // 32-bit 1055 // 32-bit
1157 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); 1056 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1158 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); 1057 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1159 int offset = ra_->reg2offset(src_first); 1058 int offset = ra_->reg2offset(src_first);
1160 if (cbuf) { 1059 if (cbuf) {
1161 if (Matcher::_regEncode[dst_first] >= 8) { 1060 MacroAssembler _masm(cbuf);
1162 emit_opcode(*cbuf, Assembler::REX_R); 1061 __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
1163 }
1164 emit_opcode(*cbuf, 0x8B);
1165 encode_RegMem(*cbuf,
1166 Matcher::_regEncode[dst_first],
1167 RSP_enc, 0x4, 0, offset,
1168 false);
1169 #ifndef PRODUCT 1062 #ifndef PRODUCT
1170 } else if (!do_size) { 1063 } else {
1171 st->print("movl %s, [rsp + #%d]\t# spill", 1064 st->print("movl %s, [rsp + #%d]\t# spill",
1172 Matcher::regName[dst_first], 1065 Matcher::regName[dst_first],
1173 offset); 1066 offset);
1174 #endif 1067 #endif
1175 } 1068 }
1176 return
1177 ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1178 ((Matcher::_regEncode[dst_first] < 8)
1179 ? 3
1180 : 4); // REX
1181 } 1069 }
1070 return 0;
1182 } else if (dst_first_rc == rc_float) { 1071 } else if (dst_first_rc == rc_float) {
1183 // mem-> xmm 1072 // mem-> xmm
1184 if ((src_first & 1) == 0 && src_first + 1 == src_second && 1073 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1185 (dst_first & 1) == 0 && dst_first + 1 == dst_second) { 1074 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1186 // 64-bit 1075 // 64-bit
1187 int offset = ra_->reg2offset(src_first); 1076 int offset = ra_->reg2offset(src_first);
1188 if (cbuf) { 1077 if (cbuf) {
1189 MacroAssembler _masm(cbuf); 1078 MacroAssembler _masm(cbuf);
1190 __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset)); 1079 __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
1191 #ifndef PRODUCT 1080 #ifndef PRODUCT
1192 } else if (!do_size) { 1081 } else {
1193 st->print("%s %s, [rsp + #%d]\t# spill", 1082 st->print("%s %s, [rsp + #%d]\t# spill",
1194 UseXmmLoadAndClearUpper ? "movsd " : "movlpd", 1083 UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
1195 Matcher::regName[dst_first], 1084 Matcher::regName[dst_first],
1196 offset); 1085 offset);
1197 #endif 1086 #endif
1198 } 1087 }
1199 return
1200 ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1201 ((Matcher::_regEncode[dst_first] >= 8)
1202 ? 6
1203 : (5 + ((UseAVX>0)?1:0))); // REX
1204 } else { 1088 } else {
1205 // 32-bit 1089 // 32-bit
1206 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); 1090 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1207 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); 1091 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1208 int offset = ra_->reg2offset(src_first); 1092 int offset = ra_->reg2offset(src_first);
1209 if (cbuf) { 1093 if (cbuf) {
1210 MacroAssembler _masm(cbuf); 1094 MacroAssembler _masm(cbuf);
1211 __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset)); 1095 __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
1212 #ifndef PRODUCT 1096 #ifndef PRODUCT
1213 } else if (!do_size) { 1097 } else {
1214 st->print("movss %s, [rsp + #%d]\t# spill", 1098 st->print("movss %s, [rsp + #%d]\t# spill",
1215 Matcher::regName[dst_first], 1099 Matcher::regName[dst_first],
1216 offset); 1100 offset);
1217 #endif 1101 #endif
1218 } 1102 }
1219 return
1220 ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1221 ((Matcher::_regEncode[dst_first] >= 8)
1222 ? 6
1223 : (5 + ((UseAVX>0)?1:0))); // REX
1224 } 1103 }
1104 return 0;
1225 } 1105 }
1226 } else if (src_first_rc == rc_int) { 1106 } else if (src_first_rc == rc_int) {
1227 // gpr -> 1107 // gpr ->
1228 if (dst_first_rc == rc_stack) { 1108 if (dst_first_rc == rc_stack) {
1229 // gpr -> mem 1109 // gpr -> mem
1230 if ((src_first & 1) == 0 && src_first + 1 == src_second && 1110 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1231 (dst_first & 1) == 0 && dst_first + 1 == dst_second) { 1111 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1232 // 64-bit 1112 // 64-bit
1233 int offset = ra_->reg2offset(dst_first); 1113 int offset = ra_->reg2offset(dst_first);
1234 if (cbuf) { 1114 if (cbuf) {
1235 if (Matcher::_regEncode[src_first] < 8) { 1115 MacroAssembler _masm(cbuf);
1236 emit_opcode(*cbuf, Assembler::REX_W); 1116 __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
1237 } else {
1238 emit_opcode(*cbuf, Assembler::REX_WR);
1239 }
1240 emit_opcode(*cbuf, 0x89);
1241 encode_RegMem(*cbuf,
1242 Matcher::_regEncode[src_first],
1243 RSP_enc, 0x4, 0, offset,
1244 false);
1245 #ifndef PRODUCT 1117 #ifndef PRODUCT
1246 } else if (!do_size) { 1118 } else {
1247 st->print("movq [rsp + #%d], %s\t# spill", 1119 st->print("movq [rsp + #%d], %s\t# spill",
1248 offset, 1120 offset,
1249 Matcher::regName[src_first]); 1121 Matcher::regName[src_first]);
1250 #endif 1122 #endif
1251 } 1123 }
1252 return ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + 4; // REX
1253 } else { 1124 } else {
1254 // 32-bit 1125 // 32-bit
1255 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); 1126 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1256 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); 1127 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1257 int offset = ra_->reg2offset(dst_first); 1128 int offset = ra_->reg2offset(dst_first);
1258 if (cbuf) { 1129 if (cbuf) {
1259 if (Matcher::_regEncode[src_first] >= 8) { 1130 MacroAssembler _masm(cbuf);
1260 emit_opcode(*cbuf, Assembler::REX_R); 1131 __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
1261 }
1262 emit_opcode(*cbuf, 0x89);
1263 encode_RegMem(*cbuf,
1264 Matcher::_regEncode[src_first],
1265 RSP_enc, 0x4, 0, offset,
1266 false);
1267 #ifndef PRODUCT 1132 #ifndef PRODUCT
1268 } else if (!do_size) { 1133 } else {
1269 st->print("movl [rsp + #%d], %s\t# spill", 1134 st->print("movl [rsp + #%d], %s\t# spill",
1270 offset, 1135 offset,
1271 Matcher::regName[src_first]); 1136 Matcher::regName[src_first]);
1272 #endif 1137 #endif
1273 } 1138 }
1274 return
1275 ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1276 ((Matcher::_regEncode[src_first] < 8)
1277 ? 3
1278 : 4); // REX
1279 } 1139 }
1140 return 0;
1280 } else if (dst_first_rc == rc_int) { 1141 } else if (dst_first_rc == rc_int) {
1281 // gpr -> gpr 1142 // gpr -> gpr
1282 if ((src_first & 1) == 0 && src_first + 1 == src_second && 1143 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1283 (dst_first & 1) == 0 && dst_first + 1 == dst_second) { 1144 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1284 // 64-bit 1145 // 64-bit
1285 if (cbuf) { 1146 if (cbuf) {
1286 if (Matcher::_regEncode[dst_first] < 8) { 1147 MacroAssembler _masm(cbuf);
1287 if (Matcher::_regEncode[src_first] < 8) { 1148 __ movq(as_Register(Matcher::_regEncode[dst_first]),
1288 emit_opcode(*cbuf, Assembler::REX_W); 1149 as_Register(Matcher::_regEncode[src_first]));
1289 } else {
1290 emit_opcode(*cbuf, Assembler::REX_WB);
1291 }
1292 } else {
1293 if (Matcher::_regEncode[src_first] < 8) {
1294 emit_opcode(*cbuf, Assembler::REX_WR);
1295 } else {
1296 emit_opcode(*cbuf, Assembler::REX_WRB);
1297 }
1298 }
1299 emit_opcode(*cbuf, 0x8B);
1300 emit_rm(*cbuf, 0x3,
1301 Matcher::_regEncode[dst_first] & 7,
1302 Matcher::_regEncode[src_first] & 7);
1303 #ifndef PRODUCT 1150 #ifndef PRODUCT
1304 } else if (!do_size) { 1151 } else {
1305 st->print("movq %s, %s\t# spill", 1152 st->print("movq %s, %s\t# spill",
1306 Matcher::regName[dst_first], 1153 Matcher::regName[dst_first],
1307 Matcher::regName[src_first]); 1154 Matcher::regName[src_first]);
1308 #endif 1155 #endif
1309 } 1156 }
1310 return 3; // REX 1157 return 0;
1311 } else { 1158 } else {
1312 // 32-bit 1159 // 32-bit
1313 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); 1160 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1314 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); 1161 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1315 if (cbuf) { 1162 if (cbuf) {
1316 if (Matcher::_regEncode[dst_first] < 8) { 1163 MacroAssembler _masm(cbuf);
1317 if (Matcher::_regEncode[src_first] >= 8) { 1164 __ movl(as_Register(Matcher::_regEncode[dst_first]),
1318 emit_opcode(*cbuf, Assembler::REX_B); 1165 as_Register(Matcher::_regEncode[src_first]));
1319 }
1320 } else {
1321 if (Matcher::_regEncode[src_first] < 8) {
1322 emit_opcode(*cbuf, Assembler::REX_R);
1323 } else {
1324 emit_opcode(*cbuf, Assembler::REX_RB);
1325 }
1326 }
1327 emit_opcode(*cbuf, 0x8B);
1328 emit_rm(*cbuf, 0x3,
1329 Matcher::_regEncode[dst_first] & 7,
1330 Matcher::_regEncode[src_first] & 7);
1331 #ifndef PRODUCT 1166 #ifndef PRODUCT
1332 } else if (!do_size) { 1167 } else {
1333 st->print("movl %s, %s\t# spill", 1168 st->print("movl %s, %s\t# spill",
1334 Matcher::regName[dst_first], 1169 Matcher::regName[dst_first],
1335 Matcher::regName[src_first]); 1170 Matcher::regName[src_first]);
1336 #endif 1171 #endif
1337 } 1172 }
1338 return 1173 return 0;
1339 (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
1340 ? 2
1341 : 3; // REX
1342 } 1174 }
1343 } else if (dst_first_rc == rc_float) { 1175 } else if (dst_first_rc == rc_float) {
1344 // gpr -> xmm 1176 // gpr -> xmm
1345 if ((src_first & 1) == 0 && src_first + 1 == src_second && 1177 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1346 (dst_first & 1) == 0 && dst_first + 1 == dst_second) { 1178 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1347 // 64-bit 1179 // 64-bit
1348 if (cbuf) { 1180 if (cbuf) {
1349 MacroAssembler _masm(cbuf); 1181 MacroAssembler _masm(cbuf);
1350 __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first])); 1182 __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
1351 #ifndef PRODUCT 1183 #ifndef PRODUCT
1352 } else if (!do_size) { 1184 } else {
1353 st->print("movdq %s, %s\t# spill", 1185 st->print("movdq %s, %s\t# spill",
1354 Matcher::regName[dst_first], 1186 Matcher::regName[dst_first],
1355 Matcher::regName[src_first]); 1187 Matcher::regName[src_first]);
1356 #endif 1188 #endif
1357 } 1189 }
1358 return 5; // REX
1359 } else { 1190 } else {
1360 // 32-bit 1191 // 32-bit
1361 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); 1192 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1362 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); 1193 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1363 if (cbuf) { 1194 if (cbuf) {
1364 MacroAssembler _masm(cbuf); 1195 MacroAssembler _masm(cbuf);
1365 __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first])); 1196 __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
1366 #ifndef PRODUCT 1197 #ifndef PRODUCT
1367 } else if (!do_size) { 1198 } else {
1368 st->print("movdl %s, %s\t# spill", 1199 st->print("movdl %s, %s\t# spill",
1369 Matcher::regName[dst_first], 1200 Matcher::regName[dst_first],
1370 Matcher::regName[src_first]); 1201 Matcher::regName[src_first]);
1371 #endif 1202 #endif
1372 } 1203 }
1373 return
1374 (Matcher::_regEncode[src_first] >= 8 || Matcher::_regEncode[dst_first] >= 8)
1375 ? 5
1376 : (4 + ((UseAVX>0)?1:0)); // REX
1377 } 1204 }
1205 return 0;
1378 } 1206 }
1379 } else if (src_first_rc == rc_float) { 1207 } else if (src_first_rc == rc_float) {
1380 // xmm -> 1208 // xmm ->
1381 if (dst_first_rc == rc_stack) { 1209 if (dst_first_rc == rc_stack) {
1382 // xmm -> mem 1210 // xmm -> mem
1386 int offset = ra_->reg2offset(dst_first); 1214 int offset = ra_->reg2offset(dst_first);
1387 if (cbuf) { 1215 if (cbuf) {
1388 MacroAssembler _masm(cbuf); 1216 MacroAssembler _masm(cbuf);
1389 __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first])); 1217 __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
1390 #ifndef PRODUCT 1218 #ifndef PRODUCT
1391 } else if (!do_size) { 1219 } else {
1392 st->print("movsd [rsp + #%d], %s\t# spill", 1220 st->print("movsd [rsp + #%d], %s\t# spill",
1393 offset, 1221 offset,
1394 Matcher::regName[src_first]); 1222 Matcher::regName[src_first]);
1395 #endif 1223 #endif
1396 } 1224 }
1397 return
1398 ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1399 ((Matcher::_regEncode[src_first] >= 8)
1400 ? 6
1401 : (5 + ((UseAVX>0)?1:0))); // REX
1402 } else { 1225 } else {
1403 // 32-bit 1226 // 32-bit
1404 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); 1227 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1405 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); 1228 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1406 int offset = ra_->reg2offset(dst_first); 1229 int offset = ra_->reg2offset(dst_first);
1407 if (cbuf) { 1230 if (cbuf) {
1408 MacroAssembler _masm(cbuf); 1231 MacroAssembler _masm(cbuf);
1409 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first])); 1232 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
1410 #ifndef PRODUCT 1233 #ifndef PRODUCT
1411 } else if (!do_size) { 1234 } else {
1412 st->print("movss [rsp + #%d], %s\t# spill", 1235 st->print("movss [rsp + #%d], %s\t# spill",
1413 offset, 1236 offset,
1414 Matcher::regName[src_first]); 1237 Matcher::regName[src_first]);
1415 #endif 1238 #endif
1416 } 1239 }
1417 return
1418 ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
1419 ((Matcher::_regEncode[src_first] >=8)
1420 ? 6
1421 : (5 + ((UseAVX>0)?1:0))); // REX
1422 } 1240 }
1241 return 0;
1423 } else if (dst_first_rc == rc_int) { 1242 } else if (dst_first_rc == rc_int) {
1424 // xmm -> gpr 1243 // xmm -> gpr
1425 if ((src_first & 1) == 0 && src_first + 1 == src_second && 1244 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1426 (dst_first & 1) == 0 && dst_first + 1 == dst_second) { 1245 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1427 // 64-bit 1246 // 64-bit
1428 if (cbuf) { 1247 if (cbuf) {
1429 MacroAssembler _masm(cbuf); 1248 MacroAssembler _masm(cbuf);
1430 __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first])); 1249 __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
1431 #ifndef PRODUCT 1250 #ifndef PRODUCT
1432 } else if (!do_size) { 1251 } else {
1433 st->print("movdq %s, %s\t# spill", 1252 st->print("movdq %s, %s\t# spill",
1434 Matcher::regName[dst_first], 1253 Matcher::regName[dst_first],
1435 Matcher::regName[src_first]); 1254 Matcher::regName[src_first]);
1436 #endif 1255 #endif
1437 } 1256 }
1438 return 5; // REX
1439 } else { 1257 } else {
1440 // 32-bit 1258 // 32-bit
1441 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); 1259 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1442 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); 1260 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1443 if (cbuf) { 1261 if (cbuf) {
1444 MacroAssembler _masm(cbuf); 1262 MacroAssembler _masm(cbuf);
1445 __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first])); 1263 __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
1446 #ifndef PRODUCT 1264 #ifndef PRODUCT
1447 } else if (!do_size) { 1265 } else {
1448 st->print("movdl %s, %s\t# spill", 1266 st->print("movdl %s, %s\t# spill",
1449 Matcher::regName[dst_first], 1267 Matcher::regName[dst_first],
1450 Matcher::regName[src_first]); 1268 Matcher::regName[src_first]);
1451 #endif 1269 #endif
1452 } 1270 }
1453 return
1454 (Matcher::_regEncode[src_first] >= 8 || Matcher::_regEncode[dst_first] >= 8)
1455 ? 5
1456 : (4 + ((UseAVX>0)?1:0)); // REX
1457 } 1271 }
1272 return 0;
1458 } else if (dst_first_rc == rc_float) { 1273 } else if (dst_first_rc == rc_float) {
1459 // xmm -> xmm 1274 // xmm -> xmm
1460 if ((src_first & 1) == 0 && src_first + 1 == src_second && 1275 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
1461 (dst_first & 1) == 0 && dst_first + 1 == dst_second) { 1276 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
1462 // 64-bit 1277 // 64-bit
1463 if (cbuf) { 1278 if (cbuf) {
1464 MacroAssembler _masm(cbuf); 1279 MacroAssembler _masm(cbuf);
1465 __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first])); 1280 __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
1466 #ifndef PRODUCT 1281 #ifndef PRODUCT
1467 } else if (!do_size) { 1282 } else {
1468 st->print("%s %s, %s\t# spill", 1283 st->print("%s %s, %s\t# spill",
1469 UseXmmRegToRegMoveAll ? "movapd" : "movsd ", 1284 UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
1470 Matcher::regName[dst_first], 1285 Matcher::regName[dst_first],
1471 Matcher::regName[src_first]); 1286 Matcher::regName[src_first]);
1472 #endif 1287 #endif
1473 } 1288 }
1474 return
1475 (Matcher::_regEncode[src_first] >= 8 || Matcher::_regEncode[dst_first] >= 8)
1476 ? 5
1477 : (4 + ((UseAVX>0)?1:0)); // REX
1478 } else { 1289 } else {
1479 // 32-bit 1290 // 32-bit
1480 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); 1291 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
1481 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); 1292 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
1482 if (cbuf) { 1293 if (cbuf) {
1483 MacroAssembler _masm(cbuf); 1294 MacroAssembler _masm(cbuf);
1484 __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first])); 1295 __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
1485 #ifndef PRODUCT 1296 #ifndef PRODUCT
1486 } else if (!do_size) { 1297 } else {
1487 st->print("%s %s, %s\t# spill", 1298 st->print("%s %s, %s\t# spill",
1488 UseXmmRegToRegMoveAll ? "movaps" : "movss ", 1299 UseXmmRegToRegMoveAll ? "movaps" : "movss ",
1489 Matcher::regName[dst_first], 1300 Matcher::regName[dst_first],
1490 Matcher::regName[src_first]); 1301 Matcher::regName[src_first]);
1491 #endif 1302 #endif
1492 } 1303 }
1493 return ((UseAVX>0) ? 5:
1494 ((Matcher::_regEncode[src_first] >= 8 || Matcher::_regEncode[dst_first] >= 8)
1495 ? (UseXmmRegToRegMoveAll ? 4 : 5)
1496 : (UseXmmRegToRegMoveAll ? 3 : 4))); // REX
1497 } 1304 }
1305 return 0;
1498 } 1306 }
1499 } 1307 }
1500 1308
1501 assert(0," foo "); 1309 assert(0," foo ");
1502 Unimplemented(); 1310 Unimplemented();
1503
1504 return 0; 1311 return 0;
1505 } 1312 }
1506 1313
1507 #ifndef PRODUCT 1314 #ifndef PRODUCT
1508 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const 1315 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
1509 {
1510 implementation(NULL, ra_, false, st); 1316 implementation(NULL, ra_, false, st);
1511 } 1317 }
1512 #endif 1318 #endif
1513 1319
1514 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const 1320 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1515 {
1516 implementation(&cbuf, ra_, false, NULL); 1321 implementation(&cbuf, ra_, false, NULL);
1517 } 1322 }
1518 1323
1519 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const 1324 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1520 { 1325 return MachNode::size(ra_);
1521 return implementation(NULL, ra_, true, NULL);
1522 } 1326 }
1523 1327
1524 //============================================================================= 1328 //=============================================================================
1525 #ifndef PRODUCT 1329 #ifndef PRODUCT
1526 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const 1330 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1733 // This is UltraSparc specific, true just means we have fast l2f conversion 1537 // This is UltraSparc specific, true just means we have fast l2f conversion
1734 const bool Matcher::convL2FSupported(void) { 1538 const bool Matcher::convL2FSupported(void) {
1735 return true; 1539 return true;
1736 } 1540 }
1737 1541
1738 // Vector width in bytes
1739 const uint Matcher::vector_width_in_bytes(void) {
1740 return 8;
1741 }
1742
1743 // Vector ideal reg
1744 const uint Matcher::vector_ideal_reg(void) {
1745 return Op_RegD;
1746 }
1747
1748 // Is this branch offset short enough that a short branch can be used? 1542 // Is this branch offset short enough that a short branch can be used?
1749 // 1543 //
1750 // NOTE: If the platform does not provide any short branch variants, then 1544 // NOTE: If the platform does not provide any short branch variants, then
1751 // this method should return false for offset 0. 1545 // this method should return false for offset 0.
1752 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { 1546 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
1829 // call in the trampoline, and arguments in those registers not be 1623 // call in the trampoline, and arguments in those registers not be
1830 // available to the callee. 1624 // available to the callee.
1831 bool Matcher::can_be_java_arg(int reg) 1625 bool Matcher::can_be_java_arg(int reg)
1832 { 1626 {
1833 return 1627 return
1834 reg == RDI_num || reg == RDI_H_num || 1628 reg == RDI_num || reg == RDI_H_num ||
1835 reg == RSI_num || reg == RSI_H_num || 1629 reg == RSI_num || reg == RSI_H_num ||
1836 reg == RDX_num || reg == RDX_H_num || 1630 reg == RDX_num || reg == RDX_H_num ||
1837 reg == RCX_num || reg == RCX_H_num || 1631 reg == RCX_num || reg == RCX_H_num ||
1838 reg == R8_num || reg == R8_H_num || 1632 reg == R8_num || reg == R8_H_num ||
1839 reg == R9_num || reg == R9_H_num || 1633 reg == R9_num || reg == R9_H_num ||
1840 reg == R12_num || reg == R12_H_num || 1634 reg == R12_num || reg == R12_H_num ||
1841 reg == XMM0_num || reg == XMM0_H_num || 1635 reg == XMM0_num || reg == XMM0b_num ||
1842 reg == XMM1_num || reg == XMM1_H_num || 1636 reg == XMM1_num || reg == XMM1b_num ||
1843 reg == XMM2_num || reg == XMM2_H_num || 1637 reg == XMM2_num || reg == XMM2b_num ||
1844 reg == XMM3_num || reg == XMM3_H_num || 1638 reg == XMM3_num || reg == XMM3b_num ||
1845 reg == XMM4_num || reg == XMM4_H_num || 1639 reg == XMM4_num || reg == XMM4b_num ||
1846 reg == XMM5_num || reg == XMM5_H_num || 1640 reg == XMM5_num || reg == XMM5b_num ||
1847 reg == XMM6_num || reg == XMM6_H_num || 1641 reg == XMM6_num || reg == XMM6b_num ||
1848 reg == XMM7_num || reg == XMM7_H_num; 1642 reg == XMM7_num || reg == XMM7b_num;
1849 } 1643 }
1850 1644
1851 bool Matcher::is_spillable_arg(int reg) 1645 bool Matcher::is_spillable_arg(int reg)
1852 { 1646 {
1853 return can_be_java_arg(reg); 1647 return can_be_java_arg(reg);
3218 0, 3012 0,
3219 OptoReg::Bad, // Op_RegN 3013 OptoReg::Bad, // Op_RegN
3220 OptoReg::Bad, // Op_RegI 3014 OptoReg::Bad, // Op_RegI
3221 RAX_H_num, // Op_RegP 3015 RAX_H_num, // Op_RegP
3222 OptoReg::Bad, // Op_RegF 3016 OptoReg::Bad, // Op_RegF
3223 XMM0_H_num, // Op_RegD 3017 XMM0b_num, // Op_RegD
3224 RAX_H_num // Op_RegL 3018 RAX_H_num // Op_RegL
3225 }; 3019 };
3226 assert(ARRAY_SIZE(hi) == _last_machine_leaf - 1, "missing type"); 3020 // Excluded flags and vector registers.
3021 assert(ARRAY_SIZE(hi) == _last_machine_leaf - 5, "missing type");
3227 return OptoRegPair(hi[ideal_reg], lo[ideal_reg]); 3022 return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
3228 %} 3023 %}
3229 %} 3024 %}
3230 3025
3231 //----------ATTRIBUTES--------------------------------------------------------- 3026 //----------ATTRIBUTES---------------------------------------------------------
3982 match(RegD); 3777 match(RegD);
3983 3778
3984 format %{ %} 3779 format %{ %}
3985 interface(REG_INTER); 3780 interface(REG_INTER);
3986 %} 3781 %}
3987
3988 3782
3989 //----------Memory Operands---------------------------------------------------- 3783 //----------Memory Operands----------------------------------------------------
3990 // Direct Memory Operand 3784 // Direct Memory Operand
3991 // operand direct(immP addr) 3785 // operand direct(immP addr)
3992 // %{ 3786 // %{
5414 __ movdbl($dst$$XMMRegister, $mem$$Address); 5208 __ movdbl($dst$$XMMRegister, $mem$$Address);
5415 %} 5209 %}
5416 ins_pipe(pipe_slow); // XXX 5210 ins_pipe(pipe_slow); // XXX
5417 %} 5211 %}
5418 5212
5419 // Load Aligned Packed Byte to XMM register
5420 instruct loadA8B(regD dst, memory mem) %{
5421 match(Set dst (Load8B mem));
5422 ins_cost(125);
5423 format %{ "MOVQ $dst,$mem\t! packed8B" %}
5424 ins_encode %{
5425 __ movq($dst$$XMMRegister, $mem$$Address);
5426 %}
5427 ins_pipe( pipe_slow );
5428 %}
5429
5430 // Load Aligned Packed Short to XMM register
5431 instruct loadA4S(regD dst, memory mem) %{
5432 match(Set dst (Load4S mem));
5433 ins_cost(125);
5434 format %{ "MOVQ $dst,$mem\t! packed4S" %}
5435 ins_encode %{
5436 __ movq($dst$$XMMRegister, $mem$$Address);
5437 %}
5438 ins_pipe( pipe_slow );
5439 %}
5440
5441 // Load Aligned Packed Char to XMM register
5442 instruct loadA4C(regD dst, memory mem) %{
5443 match(Set dst (Load4C mem));
5444 ins_cost(125);
5445 format %{ "MOVQ $dst,$mem\t! packed4C" %}
5446 ins_encode %{
5447 __ movq($dst$$XMMRegister, $mem$$Address);
5448 %}
5449 ins_pipe( pipe_slow );
5450 %}
5451
5452 // Load Aligned Packed Integer to XMM register
5453 instruct load2IU(regD dst, memory mem) %{
5454 match(Set dst (Load2I mem));
5455 ins_cost(125);
5456 format %{ "MOVQ $dst,$mem\t! packed2I" %}
5457 ins_encode %{
5458 __ movq($dst$$XMMRegister, $mem$$Address);
5459 %}
5460 ins_pipe( pipe_slow );
5461 %}
5462
5463 // Load Aligned Packed Single to XMM
5464 instruct loadA2F(regD dst, memory mem) %{
5465 match(Set dst (Load2F mem));
5466 ins_cost(125);
5467 format %{ "MOVQ $dst,$mem\t! packed2F" %}
5468 ins_encode %{
5469 __ movq($dst$$XMMRegister, $mem$$Address);
5470 %}
5471 ins_pipe( pipe_slow );
5472 %}
5473
5474 // Load Effective Address 5213 // Load Effective Address
5475 instruct leaP8(rRegP dst, indOffset8 mem) 5214 instruct leaP8(rRegP dst, indOffset8 mem)
5476 %{ 5215 %{
5477 match(Set dst mem); 5216 match(Set dst mem);
5478 5217
6198 opcode(0xC6); /* C6 /0 */ 5937 opcode(0xC6); /* C6 /0 */
6199 ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con8or32(src)); 5938 ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con8or32(src));
6200 ins_pipe(ialu_mem_imm); 5939 ins_pipe(ialu_mem_imm);
6201 %} 5940 %}
6202 5941
6203 // Store Aligned Packed Byte XMM register to memory
6204 instruct storeA8B(memory mem, regD src) %{
6205 match(Set mem (Store8B mem src));
6206 ins_cost(145);
6207 format %{ "MOVQ $mem,$src\t! packed8B" %}
6208 ins_encode %{
6209 __ movq($mem$$Address, $src$$XMMRegister);
6210 %}
6211 ins_pipe( pipe_slow );
6212 %}
6213
6214 // Store Aligned Packed Char/Short XMM register to memory
6215 instruct storeA4C(memory mem, regD src) %{
6216 match(Set mem (Store4C mem src));
6217 ins_cost(145);
6218 format %{ "MOVQ $mem,$src\t! packed4C" %}
6219 ins_encode %{
6220 __ movq($mem$$Address, $src$$XMMRegister);
6221 %}
6222 ins_pipe( pipe_slow );
6223 %}
6224
6225 // Store Aligned Packed Integer XMM register to memory
6226 instruct storeA2I(memory mem, regD src) %{
6227 match(Set mem (Store2I mem src));
6228 ins_cost(145);
6229 format %{ "MOVQ $mem,$src\t! packed2I" %}
6230 ins_encode %{
6231 __ movq($mem$$Address, $src$$XMMRegister);
6232 %}
6233 ins_pipe( pipe_slow );
6234 %}
6235
6236 // Store CMS card-mark Immediate 5942 // Store CMS card-mark Immediate
6237 instruct storeImmCM0_reg(memory mem, immI0 zero) 5943 instruct storeImmCM0_reg(memory mem, immI0 zero)
6238 %{ 5944 %{
6239 predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL)); 5945 predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
6240 match(Set mem (StoreCM mem zero)); 5946 match(Set mem (StoreCM mem zero));
6254 ins_cost(150); // XXX 5960 ins_cost(150); // XXX
6255 format %{ "movb $mem, $src\t# CMS card-mark byte 0" %} 5961 format %{ "movb $mem, $src\t# CMS card-mark byte 0" %}
6256 opcode(0xC6); /* C6 /0 */ 5962 opcode(0xC6); /* C6 /0 */
6257 ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con8or32(src)); 5963 ins_encode(REX_mem(mem), OpcP, RM_opc_mem(0x00, mem), Con8or32(src));
6258 ins_pipe(ialu_mem_imm); 5964 ins_pipe(ialu_mem_imm);
6259 %}
6260
6261 // Store Aligned Packed Single Float XMM register to memory
6262 instruct storeA2F(memory mem, regD src) %{
6263 match(Set mem (Store2F mem src));
6264 ins_cost(145);
6265 format %{ "MOVQ $mem,$src\t! packed2F" %}
6266 ins_encode %{
6267 __ movq($mem$$Address, $src$$XMMRegister);
6268 %}
6269 ins_pipe( pipe_slow );
6270 %} 5965 %}
6271 5966
6272 // Store Float 5967 // Store Float
6273 instruct storeF(memory mem, regF src) 5968 instruct storeF(memory mem, regF src)
6274 %{ 5969 %{
10373 format %{ "movd $dst,$src\t# MoveL2D" %} 10068 format %{ "movd $dst,$src\t# MoveL2D" %}
10374 ins_encode %{ 10069 ins_encode %{
10375 __ movdq($dst$$XMMRegister, $src$$Register); 10070 __ movdq($dst$$XMMRegister, $src$$Register);
10376 %} 10071 %}
10377 ins_pipe( pipe_slow ); 10072 ins_pipe( pipe_slow );
10378 %}
10379
10380 // Replicate scalar to packed byte (1 byte) values in xmm
10381 instruct Repl8B_reg(regD dst, regD src) %{
10382 match(Set dst (Replicate8B src));
10383 format %{ "MOVDQA $dst,$src\n\t"
10384 "PUNPCKLBW $dst,$dst\n\t"
10385 "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
10386 ins_encode %{
10387 if ($dst$$reg != $src$$reg) {
10388 __ movdqa($dst$$XMMRegister, $src$$XMMRegister);
10389 }
10390 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
10391 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
10392 %}
10393 ins_pipe( pipe_slow );
10394 %}
10395
10396 // Replicate scalar to packed byte (1 byte) values in xmm
10397 instruct Repl8B_rRegI(regD dst, rRegI src) %{
10398 match(Set dst (Replicate8B src));
10399 format %{ "MOVD $dst,$src\n\t"
10400 "PUNPCKLBW $dst,$dst\n\t"
10401 "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
10402 ins_encode %{
10403 __ movdl($dst$$XMMRegister, $src$$Register);
10404 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
10405 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
10406 %}
10407 ins_pipe( pipe_slow );
10408 %}
10409
10410 // Replicate scalar zero to packed byte (1 byte) values in xmm
10411 instruct Repl8B_immI0(regD dst, immI0 zero) %{
10412 match(Set dst (Replicate8B zero));
10413 format %{ "PXOR $dst,$dst\t! replicate8B" %}
10414 ins_encode %{
10415 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
10416 %}
10417 ins_pipe( fpu_reg_reg );
10418 %}
10419
10420 // Replicate scalar to packed shore (2 byte) values in xmm
10421 instruct Repl4S_reg(regD dst, regD src) %{
10422 match(Set dst (Replicate4S src));
10423 format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %}
10424 ins_encode %{
10425 __ pshuflw($dst$$XMMRegister, $src$$XMMRegister, 0x00);
10426 %}
10427 ins_pipe( fpu_reg_reg );
10428 %}
10429
10430 // Replicate scalar to packed shore (2 byte) values in xmm
10431 instruct Repl4S_rRegI(regD dst, rRegI src) %{
10432 match(Set dst (Replicate4S src));
10433 format %{ "MOVD $dst,$src\n\t"
10434 "PSHUFLW $dst,$dst,0x00\t! replicate4S" %}
10435 ins_encode %{
10436 __ movdl($dst$$XMMRegister, $src$$Register);
10437 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
10438 %}
10439 ins_pipe( fpu_reg_reg );
10440 %}
10441
10442 // Replicate scalar zero to packed short (2 byte) values in xmm
10443 instruct Repl4S_immI0(regD dst, immI0 zero) %{
10444 match(Set dst (Replicate4S zero));
10445 format %{ "PXOR $dst,$dst\t! replicate4S" %}
10446 ins_encode %{
10447 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
10448 %}
10449 ins_pipe( fpu_reg_reg );
10450 %}
10451
10452 // Replicate scalar to packed char (2 byte) values in xmm
10453 instruct Repl4C_reg(regD dst, regD src) %{
10454 match(Set dst (Replicate4C src));
10455 format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %}
10456 ins_encode %{
10457 __ pshuflw($dst$$XMMRegister, $src$$XMMRegister, 0x00);
10458 %}
10459 ins_pipe( fpu_reg_reg );
10460 %}
10461
10462 // Replicate scalar to packed char (2 byte) values in xmm
10463 instruct Repl4C_rRegI(regD dst, rRegI src) %{
10464 match(Set dst (Replicate4C src));
10465 format %{ "MOVD $dst,$src\n\t"
10466 "PSHUFLW $dst,$dst,0x00\t! replicate4C" %}
10467 ins_encode %{
10468 __ movdl($dst$$XMMRegister, $src$$Register);
10469 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
10470 %}
10471 ins_pipe( fpu_reg_reg );
10472 %}
10473
10474 // Replicate scalar zero to packed char (2 byte) values in xmm
10475 instruct Repl4C_immI0(regD dst, immI0 zero) %{
10476 match(Set dst (Replicate4C zero));
10477 format %{ "PXOR $dst,$dst\t! replicate4C" %}
10478 ins_encode %{
10479 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
10480 %}
10481 ins_pipe( fpu_reg_reg );
10482 %}
10483
10484 // Replicate scalar to packed integer (4 byte) values in xmm
10485 instruct Repl2I_reg(regD dst, regD src) %{
10486 match(Set dst (Replicate2I src));
10487 format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %}
10488 ins_encode %{
10489 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
10490 %}
10491 ins_pipe( fpu_reg_reg );
10492 %}
10493
10494 // Replicate scalar to packed integer (4 byte) values in xmm
10495 instruct Repl2I_rRegI(regD dst, rRegI src) %{
10496 match(Set dst (Replicate2I src));
10497 format %{ "MOVD $dst,$src\n\t"
10498 "PSHUFD $dst,$dst,0x00\t! replicate2I" %}
10499 ins_encode %{
10500 __ movdl($dst$$XMMRegister, $src$$Register);
10501 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
10502 %}
10503 ins_pipe( fpu_reg_reg );
10504 %}
10505
10506 // Replicate scalar zero to packed integer (2 byte) values in xmm
10507 instruct Repl2I_immI0(regD dst, immI0 zero) %{
10508 match(Set dst (Replicate2I zero));
10509 format %{ "PXOR $dst,$dst\t! replicate2I" %}
10510 ins_encode %{
10511 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
10512 %}
10513 ins_pipe( fpu_reg_reg );
10514 %}
10515
10516 // Replicate scalar to packed single precision floating point values in xmm
10517 instruct Repl2F_reg(regD dst, regD src) %{
10518 match(Set dst (Replicate2F src));
10519 format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
10520 ins_encode %{
10521 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0xe0);
10522 %}
10523 ins_pipe( fpu_reg_reg );
10524 %}
10525
10526 // Replicate scalar to packed single precision floating point values in xmm
10527 instruct Repl2F_regF(regD dst, regF src) %{
10528 match(Set dst (Replicate2F src));
10529 format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
10530 ins_encode %{
10531 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0xe0);
10532 %}
10533 ins_pipe( fpu_reg_reg );
10534 %}
10535
10536 // Replicate scalar to packed single precision floating point values in xmm
10537 instruct Repl2F_immF0(regD dst, immF0 zero) %{
10538 match(Set dst (Replicate2F zero));
10539 format %{ "PXOR $dst,$dst\t! replicate2F" %}
10540 ins_encode %{
10541 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
10542 %}
10543 ins_pipe( fpu_reg_reg );
10544 %} 10073 %}
10545 10074
10546 10075
10547 // ======================================================================= 10076 // =======================================================================
10548 // fast clearing of an array 10077 // fast clearing of an array