comparison src/cpu/x86/vm/x86.ad @ 6179:8c92982cbbc4

7119644: Increase superword's vector size up to 256 bits Summary: Increase vector size up to 256-bits for YMM AVX registers on x86. Reviewed-by: never, twisti, roland
author kvn
date Fri, 15 Jun 2012 01:25:19 -0700
parents 9b8ce46870df
children 6f8f439e247d
comparison
equal deleted inserted replaced
6146:eba1d5bce9e8 6179:8c92982cbbc4
22 // 22 //
23 // 23 //
24 24
25 // X86 Common Architecture Description File 25 // X86 Common Architecture Description File
26 26
27 //----------REGISTER DEFINITION BLOCK------------------------------------------
28 // This information is used by the matcher and the register allocator to
29 // describe individual registers and classes of registers within the target
30 // archtecture.
31
32 register %{
33 //----------Architecture Description Register Definitions----------------------
34 // General Registers
35 // "reg_def" name ( register save type, C convention save type,
36 // ideal register type, encoding );
37 // Register Save Types:
38 //
39 // NS = No-Save: The register allocator assumes that these registers
40 // can be used without saving upon entry to the method, &
41 // that they do not need to be saved at call sites.
42 //
43 // SOC = Save-On-Call: The register allocator assumes that these registers
44 // can be used without saving upon entry to the method,
45 // but that they must be saved at call sites.
46 //
47 // SOE = Save-On-Entry: The register allocator assumes that these registers
48 // must be saved before using them upon entry to the
49 // method, but they do not need to be saved at call
50 // sites.
51 //
52 // AS = Always-Save: The register allocator assumes that these registers
53 // must be saved before using them upon entry to the
54 // method, & that they must be saved at call sites.
55 //
56 // Ideal Register Type is used to determine how to save & restore a
57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI.
59 //
60 // The encoding number is the actual bit-pattern placed into the opcodes.
61
62 // XMM registers. 256-bit registers or 8 words each, labeled (a)-h.
63 // Word a in each register holds a Float, words ab hold a Double.
64 // The whole registers are used in SSE4.2 version intrinsics,
65 // array copy stubs and superword operations (see UseSSE42Intrinsics,
66 // UseXMMForArrayCopy and UseSuperword flags).
67 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX).
68 // Linux ABI: No register preserved across function calls
69 // XMM0-XMM7 might hold parameters
70 // Windows ABI: XMM6-XMM15 preserved across function calls
71 // XMM0-XMM3 might hold parameters
72
73 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
74 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next());
75 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next());
76 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next()->next());
77 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next()->next()->next());
78 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next()->next()->next()->next());
79 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next()->next()->next()->next()->next());
80 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
81
82 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
83 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next());
84 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next());
85 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next()->next());
86 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next()->next()->next());
87 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next()->next()->next()->next());
88 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next()->next()->next()->next()->next());
89 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
90
91 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
92 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next());
93 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next());
94 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next()->next());
95 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next()->next()->next());
96 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next()->next()->next()->next());
97 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next()->next()->next()->next()->next());
98 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
99
100 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
101 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next());
102 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next());
103 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next()->next());
104 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next()->next()->next());
105 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next()->next()->next()->next());
106 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next()->next()->next()->next()->next());
107 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
108
109 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
110 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next());
111 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next());
112 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next()->next());
113 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next()->next()->next());
114 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next()->next()->next()->next());
115 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next()->next()->next()->next()->next());
116 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
117
118 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
119 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next());
120 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next());
121 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next()->next());
122 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next()->next()->next());
123 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next()->next()->next()->next());
124 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next()->next()->next()->next()->next());
125 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
126
127 #ifdef _WIN64
128
129 reg_def XMM6 ( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg());
130 reg_def XMM6b( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next());
131 reg_def XMM6c( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next());
132 reg_def XMM6d( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next());
133 reg_def XMM6e( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next());
134 reg_def XMM6f( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next());
135 reg_def XMM6g( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next()->next());
136 reg_def XMM6h( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
137
138 reg_def XMM7 ( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg());
139 reg_def XMM7b( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next());
140 reg_def XMM7c( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next());
141 reg_def XMM7d( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next());
142 reg_def XMM7e( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next());
143 reg_def XMM7f( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next());
144 reg_def XMM7g( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next()->next());
145 reg_def XMM7h( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
146
147 reg_def XMM8 ( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg());
148 reg_def XMM8b( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next());
149 reg_def XMM8c( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next());
150 reg_def XMM8d( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next());
151 reg_def XMM8e( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next());
152 reg_def XMM8f( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next());
153 reg_def XMM8g( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next()->next());
154 reg_def XMM8h( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
155
156 reg_def XMM9 ( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg());
157 reg_def XMM9b( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next());
158 reg_def XMM9c( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next());
159 reg_def XMM9d( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next());
160 reg_def XMM9e( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next());
161 reg_def XMM9f( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next());
162 reg_def XMM9g( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next()->next());
163 reg_def XMM9h( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
164
165 reg_def XMM10 ( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg());
166 reg_def XMM10b( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next());
167 reg_def XMM10c( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next());
168 reg_def XMM10d( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next());
169 reg_def XMM10e( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next());
170 reg_def XMM10f( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next());
171 reg_def XMM10g( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next()->next());
172 reg_def XMM10h( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
173
174 reg_def XMM11 ( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg());
175 reg_def XMM11b( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next());
176 reg_def XMM11c( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next());
177 reg_def XMM11d( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next());
178 reg_def XMM11e( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next());
179 reg_def XMM11f( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next());
180 reg_def XMM11g( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next()->next());
181 reg_def XMM11h( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
182
183 reg_def XMM12 ( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg());
184 reg_def XMM12b( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next());
185 reg_def XMM12c( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next());
186 reg_def XMM12d( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next());
187 reg_def XMM12e( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next());
188 reg_def XMM12f( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next());
189 reg_def XMM12g( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next()->next());
190 reg_def XMM12h( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
191
192 reg_def XMM13 ( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg());
193 reg_def XMM13b( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next());
194 reg_def XMM13c( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next());
195 reg_def XMM13d( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next());
196 reg_def XMM13e( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next());
197 reg_def XMM13f( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next());
198 reg_def XMM13g( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next()->next());
199 reg_def XMM13h( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
200
201 reg_def XMM14 ( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg());
202 reg_def XMM14b( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next());
203 reg_def XMM14c( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next());
204 reg_def XMM14d( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next());
205 reg_def XMM14e( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next());
206 reg_def XMM14f( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next());
207 reg_def XMM14g( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next()->next());
208 reg_def XMM14h( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
209
210 reg_def XMM15 ( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg());
211 reg_def XMM15b( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next());
212 reg_def XMM15c( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next());
213 reg_def XMM15d( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next());
214 reg_def XMM15e( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next());
215 reg_def XMM15f( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next());
216 reg_def XMM15g( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next()->next());
217 reg_def XMM15h( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
218
219 #else // _WIN64
220
221 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
222 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next());
223 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next());
224 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next());
225 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next());
226 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next());
227 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next()->next());
228 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
229
230 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
231 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next());
232 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next());
233 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next());
234 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next());
235 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next());
236 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next()->next());
237 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
238
239 #ifdef _LP64
240
241 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
242 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next());
243 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next());
244 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next());
245 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next());
246 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next());
247 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next()->next());
248 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
249
250 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
251 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next());
252 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next());
253 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next());
254 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next());
255 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next());
256 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next()->next());
257 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
258
259 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
260 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next());
261 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next());
262 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next());
263 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next());
264 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next());
265 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next()->next());
266 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
267
268 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
269 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next());
270 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next());
271 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next());
272 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next());
273 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next());
274 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next()->next());
275 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
276
277 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
278 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next());
279 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next());
280 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next());
281 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next());
282 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next());
283 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next()->next());
284 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
285
286 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
287 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next());
288 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next());
289 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next());
290 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next());
291 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next());
292 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next()->next());
293 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
294
295 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
296 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next());
297 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next());
298 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next());
299 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next());
300 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next());
301 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next()->next());
302 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
303
304 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
305 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next());
306 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next());
307 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next());
308 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next());
309 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next());
310 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next()->next());
311 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
312
313 #endif // _LP64
314
315 #endif // _WIN64
316
317 #ifdef _LP64
318 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
319 #else
320 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad());
321 #endif // _LP64
322
323 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
324 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
325 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
326 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
327 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
328 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
329 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
330 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h
331 #ifdef _LP64
332 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
333 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
334 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
335 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
336 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
337 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
338 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
339 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h
340 #endif
341 );
342
343 // flags allocation class should be last.
344 alloc_class chunk2(RFLAGS);
345
346 // Singleton class for condition codes
347 reg_class int_flags(RFLAGS);
348
349 // Class for all float registers
350 reg_class float_reg(XMM0,
351 XMM1,
352 XMM2,
353 XMM3,
354 XMM4,
355 XMM5,
356 XMM6,
357 XMM7
358 #ifdef _LP64
359 ,XMM8,
360 XMM9,
361 XMM10,
362 XMM11,
363 XMM12,
364 XMM13,
365 XMM14,
366 XMM15
367 #endif
368 );
369
370 // Class for all double registers
371 reg_class double_reg(XMM0, XMM0b,
372 XMM1, XMM1b,
373 XMM2, XMM2b,
374 XMM3, XMM3b,
375 XMM4, XMM4b,
376 XMM5, XMM5b,
377 XMM6, XMM6b,
378 XMM7, XMM7b
379 #ifdef _LP64
380 ,XMM8, XMM8b,
381 XMM9, XMM9b,
382 XMM10, XMM10b,
383 XMM11, XMM11b,
384 XMM12, XMM12b,
385 XMM13, XMM13b,
386 XMM14, XMM14b,
387 XMM15, XMM15b
388 #endif
389 );
390
391 // Class for all 32bit vector registers
392 reg_class vectors_reg(XMM0,
393 XMM1,
394 XMM2,
395 XMM3,
396 XMM4,
397 XMM5,
398 XMM6,
399 XMM7
400 #ifdef _LP64
401 ,XMM8,
402 XMM9,
403 XMM10,
404 XMM11,
405 XMM12,
406 XMM13,
407 XMM14,
408 XMM15
409 #endif
410 );
411
412 // Class for all 64bit vector registers
413 reg_class vectord_reg(XMM0, XMM0b,
414 XMM1, XMM1b,
415 XMM2, XMM2b,
416 XMM3, XMM3b,
417 XMM4, XMM4b,
418 XMM5, XMM5b,
419 XMM6, XMM6b,
420 XMM7, XMM7b
421 #ifdef _LP64
422 ,XMM8, XMM8b,
423 XMM9, XMM9b,
424 XMM10, XMM10b,
425 XMM11, XMM11b,
426 XMM12, XMM12b,
427 XMM13, XMM13b,
428 XMM14, XMM14b,
429 XMM15, XMM15b
430 #endif
431 );
432
433 // Class for all 128bit vector registers
434 reg_class vectorx_reg(XMM0, XMM0b, XMM0c, XMM0d,
435 XMM1, XMM1b, XMM1c, XMM1d,
436 XMM2, XMM2b, XMM2c, XMM2d,
437 XMM3, XMM3b, XMM3c, XMM3d,
438 XMM4, XMM4b, XMM4c, XMM4d,
439 XMM5, XMM5b, XMM5c, XMM5d,
440 XMM6, XMM6b, XMM6c, XMM6d,
441 XMM7, XMM7b, XMM7c, XMM7d
442 #ifdef _LP64
443 ,XMM8, XMM8b, XMM8c, XMM8d,
444 XMM9, XMM9b, XMM9c, XMM9d,
445 XMM10, XMM10b, XMM10c, XMM10d,
446 XMM11, XMM11b, XMM11c, XMM11d,
447 XMM12, XMM12b, XMM12c, XMM12d,
448 XMM13, XMM13b, XMM13c, XMM13d,
449 XMM14, XMM14b, XMM14c, XMM14d,
450 XMM15, XMM15b, XMM15c, XMM15d
451 #endif
452 );
453
454 // Class for all 256bit vector registers
455 reg_class vectory_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
456 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
457 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
458 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
459 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
460 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
461 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
462 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h
463 #ifdef _LP64
464 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
465 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
466 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
467 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
468 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
469 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
470 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
471 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h
472 #endif
473 );
474
475 %}
476
27 source %{ 477 source %{
28 // Float masks come from different places depending on platform. 478 // Float masks come from different places depending on platform.
29 #ifdef _LP64 479 #ifdef _LP64
30 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 480 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); }
31 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 481 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); }
36 static address float_signflip() { return (address)float_signflip_pool; } 486 static address float_signflip() { return (address)float_signflip_pool; }
37 static address double_signmask() { return (address)double_signmask_pool; } 487 static address double_signmask() { return (address)double_signmask_pool; }
38 static address double_signflip() { return (address)double_signflip_pool; } 488 static address double_signflip() { return (address)double_signflip_pool; }
39 #endif 489 #endif
40 490
491 // Map Types to machine register types
492 const int Matcher::base2reg[Type::lastype] = {
493 Node::NotAMachineReg,0,0, Op_RegI, Op_RegL, 0, Op_RegN,
494 Node::NotAMachineReg, Node::NotAMachineReg, /* tuple, array */
495 Op_VecS, Op_VecD, Op_VecX, Op_VecY, /* Vectors */
496 Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, /* the pointers */
497 0, 0/*abio*/,
498 Op_RegP /* Return address */, 0, /* the memories */
499 Op_RegF, Op_RegF, Op_RegF, Op_RegD, Op_RegD, Op_RegD,
500 0 /*bottom*/
501 };
502
503 // Max vector size in bytes. 0 if not supported.
504 const int Matcher::vector_width_in_bytes(BasicType bt) {
505 assert(is_java_primitive(bt), "only primitive type vectors");
506 if (UseSSE < 2) return 0;
507 // SSE2 supports 128bit vectors for all types.
508 // AVX2 supports 256bit vectors for all types.
509 int size = (UseAVX > 1) ? 32 : 16;
510 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
511 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
512 size = 32;
513 // Use flag to limit vector size.
514 size = MIN2(size,(int)MaxVectorSize);
515 // Minimum 2 values in vector (or 4 for bytes).
516 switch (bt) {
517 case T_DOUBLE:
518 case T_LONG:
519 if (size < 16) return 0;
520 case T_FLOAT:
521 case T_INT:
522 if (size < 8) return 0;
523 case T_BOOLEAN:
524 case T_BYTE:
525 case T_CHAR:
526 case T_SHORT:
527 if (size < 4) return 0;
528 break;
529 default:
530 ShouldNotReachHere();
531 }
532 return size;
533 }
534
535 // Limits on vector size (number of elements) loaded into vector.
536 const int Matcher::max_vector_size(const BasicType bt) {
537 return vector_width_in_bytes(bt)/type2aelembytes(bt);
538 }
539 const int Matcher::min_vector_size(const BasicType bt) {
540 int max_size = max_vector_size(bt);
541 // Min size which can be loaded into vector is 4 bytes.
542 int size = (type2aelembytes(bt) == 1) ? 4 : 2;
543 return MIN2(size,max_size);
544 }
545
546 // Vector ideal reg corresponding to specidied size in bytes
547 const int Matcher::vector_ideal_reg(int size) {
548 assert(MaxVectorSize >= size, "");
549 switch(size) {
550 case 4: return Op_VecS;
551 case 8: return Op_VecD;
552 case 16: return Op_VecX;
553 case 32: return Op_VecY;
554 }
555 ShouldNotReachHere();
556 return 0;
557 }
558
559 // x86 supports misaligned vectors store/load.
560 const bool Matcher::misaligned_vectors_ok() {
561 return !AlignVector; // can be changed by flag
562 }
563
564 // Helper methods for MachSpillCopyNode::implementation().
565 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
566 int src_hi, int dst_hi, uint ireg, outputStream* st) {
567 // In 64-bit VM size calculation is very complex. Emitting instructions
568 // into scratch buffer is used to get size in 64-bit VM.
569 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
570 assert(ireg == Op_VecS || // 32bit vector
571 (src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
572 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi,
573 "no non-adjacent vector moves" );
574 if (cbuf) {
575 MacroAssembler _masm(cbuf);
576 int offset = __ offset();
577 switch (ireg) {
578 case Op_VecS: // copy whole register
579 case Op_VecD:
580 case Op_VecX:
581 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
582 break;
583 case Op_VecY:
584 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
585 break;
586 default:
587 ShouldNotReachHere();
588 }
589 int size = __ offset() - offset;
590 #ifdef ASSERT
591 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
592 assert(!do_size || size == 4, "incorrect size calculattion");
593 #endif
594 return size;
595 #ifndef PRODUCT
596 } else if (!do_size) {
597 switch (ireg) {
598 case Op_VecS:
599 case Op_VecD:
600 case Op_VecX:
601 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
602 break;
603 case Op_VecY:
604 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
605 break;
606 default:
607 ShouldNotReachHere();
608 }
609 #endif
610 }
611 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
612 return 4;
613 }
614
615 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
616 int stack_offset, int reg, uint ireg, outputStream* st) {
617 // In 64-bit VM size calculation is very complex. Emitting instructions
618 // into scratch buffer is used to get size in 64-bit VM.
619 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
620 if (cbuf) {
621 MacroAssembler _masm(cbuf);
622 int offset = __ offset();
623 if (is_load) {
624 switch (ireg) {
625 case Op_VecS:
626 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
627 break;
628 case Op_VecD:
629 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
630 break;
631 case Op_VecX:
632 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
633 break;
634 case Op_VecY:
635 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
636 break;
637 default:
638 ShouldNotReachHere();
639 }
640 } else { // store
641 switch (ireg) {
642 case Op_VecS:
643 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
644 break;
645 case Op_VecD:
646 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
647 break;
648 case Op_VecX:
649 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
650 break;
651 case Op_VecY:
652 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
653 break;
654 default:
655 ShouldNotReachHere();
656 }
657 }
658 int size = __ offset() - offset;
659 #ifdef ASSERT
660 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4);
661 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
662 assert(!do_size || size == (5+offset_size), "incorrect size calculattion");
663 #endif
664 return size;
665 #ifndef PRODUCT
666 } else if (!do_size) {
667 if (is_load) {
668 switch (ireg) {
669 case Op_VecS:
670 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
671 break;
672 case Op_VecD:
673 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
674 break;
675 case Op_VecX:
676 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
677 break;
678 case Op_VecY:
679 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
680 break;
681 default:
682 ShouldNotReachHere();
683 }
684 } else { // store
685 switch (ireg) {
686 case Op_VecS:
687 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
688 break;
689 case Op_VecD:
690 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
691 break;
692 case Op_VecX:
693 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
694 break;
695 case Op_VecY:
696 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
697 break;
698 default:
699 ShouldNotReachHere();
700 }
701 }
702 #endif
703 }
704 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4);
705 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
706 return 5+offset_size;
707 }
708
709 static inline jfloat replicate4_imm(int con, int width) {
710 // Load a constant of "width" (in bytes) and replicate it to fill 32bit.
711 assert(width == 1 || width == 2, "only byte or short types here");
712 int bit_width = width * 8;
713 jint val = con;
714 val &= (1 << bit_width) - 1; // mask off sign bits
715 while(bit_width < 32) {
716 val |= (val << bit_width);
717 bit_width <<= 1;
718 }
719 jfloat fval = *((jfloat*) &val); // coerce to float type
720 return fval;
721 }
722
723 static inline jdouble replicate8_imm(int con, int width) {
724 // Load a constant of "width" (in bytes) and replicate it to fill 64bit.
725 assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here");
726 int bit_width = width * 8;
727 jlong val = con;
728 val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits
729 while(bit_width < 64) {
730 val |= (val << bit_width);
731 bit_width <<= 1;
732 }
733 jdouble dval = *((jdouble*) &val); // coerce to double type
734 return dval;
735 }
736
41 #ifndef PRODUCT 737 #ifndef PRODUCT
42 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 738 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
43 st->print("nop \t# %d bytes pad for loops and calls", _count); 739 st->print("nop \t# %d bytes pad for loops and calls", _count);
44 } 740 }
45 #endif 741 #endif
101 } 797 }
102 %} 798 %}
103 799
104 %} 800 %}
105 801
802
803 //----------OPERANDS-----------------------------------------------------------
804 // Operand definitions must precede instruction definitions for correct parsing
805 // in the ADLC because operands constitute user defined types which are used in
806 // instruction definitions.
807
808 // Vectors
809 operand vecS() %{
810 constraint(ALLOC_IN_RC(vectors_reg));
811 match(VecS);
812
813 format %{ %}
814 interface(REG_INTER);
815 %}
816
817 operand vecD() %{
818 constraint(ALLOC_IN_RC(vectord_reg));
819 match(VecD);
820
821 format %{ %}
822 interface(REG_INTER);
823 %}
824
825 operand vecX() %{
826 constraint(ALLOC_IN_RC(vectorx_reg));
827 match(VecX);
828
829 format %{ %}
830 interface(REG_INTER);
831 %}
832
833 operand vecY() %{
834 constraint(ALLOC_IN_RC(vectory_reg));
835 match(VecY);
836
837 format %{ %}
838 interface(REG_INTER);
839 %}
840
841
106 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 842 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit)
107 843
108 // ============================================================================ 844 // ============================================================================
109 845
110 instruct ShouldNotReachHere() %{ 846 instruct ShouldNotReachHere() %{
850 __ sqrtsd($dst$$XMMRegister, $constantaddress($con)); 1586 __ sqrtsd($dst$$XMMRegister, $constantaddress($con));
851 %} 1587 %}
852 ins_pipe(pipe_slow); 1588 ins_pipe(pipe_slow);
853 %} 1589 %}
854 1590
1591
1592 // ====================VECTOR INSTRUCTIONS=====================================
1593
1594 // Load vectors (4 bytes long)
1595 instruct loadV4(vecS dst, memory mem) %{
1596 predicate(n->as_LoadVector()->memory_size() == 4);
1597 match(Set dst (LoadVector mem));
1598 ins_cost(125);
1599 format %{ "movd $dst,$mem\t! load vector (4 bytes)" %}
1600 ins_encode %{
1601 __ movdl($dst$$XMMRegister, $mem$$Address);
1602 %}
1603 ins_pipe( pipe_slow );
1604 %}
1605
1606 // Load vectors (8 bytes long)
1607 instruct loadV8(vecD dst, memory mem) %{
1608 predicate(n->as_LoadVector()->memory_size() == 8);
1609 match(Set dst (LoadVector mem));
1610 ins_cost(125);
1611 format %{ "movq $dst,$mem\t! load vector (8 bytes)" %}
1612 ins_encode %{
1613 __ movq($dst$$XMMRegister, $mem$$Address);
1614 %}
1615 ins_pipe( pipe_slow );
1616 %}
1617
1618 // Load vectors (16 bytes long)
1619 instruct loadV16(vecX dst, memory mem) %{
1620 predicate(n->as_LoadVector()->memory_size() == 16);
1621 match(Set dst (LoadVector mem));
1622 ins_cost(125);
1623 format %{ "movdqu $dst,$mem\t! load vector (16 bytes)" %}
1624 ins_encode %{
1625 __ movdqu($dst$$XMMRegister, $mem$$Address);
1626 %}
1627 ins_pipe( pipe_slow );
1628 %}
1629
1630 // Load vectors (32 bytes long)
1631 instruct loadV32(vecY dst, memory mem) %{
1632 predicate(n->as_LoadVector()->memory_size() == 32);
1633 match(Set dst (LoadVector mem));
1634 ins_cost(125);
1635 format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %}
1636 ins_encode %{
1637 __ vmovdqu($dst$$XMMRegister, $mem$$Address);
1638 %}
1639 ins_pipe( pipe_slow );
1640 %}
1641
1642 // Store vectors
1643 instruct storeV4(memory mem, vecS src) %{
1644 predicate(n->as_StoreVector()->memory_size() == 4);
1645 match(Set mem (StoreVector mem src));
1646 ins_cost(145);
1647 format %{ "movd $mem,$src\t! store vector (4 bytes)" %}
1648 ins_encode %{
1649 __ movdl($mem$$Address, $src$$XMMRegister);
1650 %}
1651 ins_pipe( pipe_slow );
1652 %}
1653
1654 instruct storeV8(memory mem, vecD src) %{
1655 predicate(n->as_StoreVector()->memory_size() == 8);
1656 match(Set mem (StoreVector mem src));
1657 ins_cost(145);
1658 format %{ "movq $mem,$src\t! store vector (8 bytes)" %}
1659 ins_encode %{
1660 __ movq($mem$$Address, $src$$XMMRegister);
1661 %}
1662 ins_pipe( pipe_slow );
1663 %}
1664
1665 instruct storeV16(memory mem, vecX src) %{
1666 predicate(n->as_StoreVector()->memory_size() == 16);
1667 match(Set mem (StoreVector mem src));
1668 ins_cost(145);
1669 format %{ "movdqu $mem,$src\t! store vector (16 bytes)" %}
1670 ins_encode %{
1671 __ movdqu($mem$$Address, $src$$XMMRegister);
1672 %}
1673 ins_pipe( pipe_slow );
1674 %}
1675
1676 instruct storeV32(memory mem, vecY src) %{
1677 predicate(n->as_StoreVector()->memory_size() == 32);
1678 match(Set mem (StoreVector mem src));
1679 ins_cost(145);
1680 format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %}
1681 ins_encode %{
1682 __ vmovdqu($mem$$Address, $src$$XMMRegister);
1683 %}
1684 ins_pipe( pipe_slow );
1685 %}
1686
1687 // Replicate byte scalar to be vector
1688 instruct Repl4B(vecS dst, rRegI src) %{
1689 predicate(n->as_Vector()->length() == 4);
1690 match(Set dst (ReplicateB src));
1691 format %{ "movd $dst,$src\n\t"
1692 "punpcklbw $dst,$dst\n\t"
1693 "pshuflw $dst,$dst,0x00\t! replicate4B" %}
1694 ins_encode %{
1695 __ movdl($dst$$XMMRegister, $src$$Register);
1696 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
1697 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
1698 %}
1699 ins_pipe( pipe_slow );
1700 %}
1701
1702 instruct Repl8B(vecD dst, rRegI src) %{
1703 predicate(n->as_Vector()->length() == 8);
1704 match(Set dst (ReplicateB src));
1705 format %{ "movd $dst,$src\n\t"
1706 "punpcklbw $dst,$dst\n\t"
1707 "pshuflw $dst,$dst,0x00\t! replicate8B" %}
1708 ins_encode %{
1709 __ movdl($dst$$XMMRegister, $src$$Register);
1710 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
1711 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
1712 %}
1713 ins_pipe( pipe_slow );
1714 %}
1715
1716 instruct Repl16B(vecX dst, rRegI src) %{
1717 predicate(n->as_Vector()->length() == 16);
1718 match(Set dst (ReplicateB src));
1719 format %{ "movd $dst,$src\n\t"
1720 "punpcklbw $dst,$dst\n\t"
1721 "pshuflw $dst,$dst,0x00\n\t"
1722 "movlhps $dst,$dst\t! replicate16B" %}
1723 ins_encode %{
1724 __ movdl($dst$$XMMRegister, $src$$Register);
1725 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
1726 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
1727 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
1728 %}
1729 ins_pipe( pipe_slow );
1730 %}
1731
1732 instruct Repl32B(vecY dst, rRegI src) %{
1733 predicate(n->as_Vector()->length() == 32);
1734 match(Set dst (ReplicateB src));
1735 format %{ "movd $dst,$src\n\t"
1736 "punpcklbw $dst,$dst\n\t"
1737 "pshuflw $dst,$dst,0x00\n\t"
1738 "movlhps $dst,$dst\n\t"
1739 "vinsertf128h $dst,$dst,$dst\t! replicate32B" %}
1740 ins_encode %{
1741 __ movdl($dst$$XMMRegister, $src$$Register);
1742 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
1743 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
1744 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
1745 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
1746 %}
1747 ins_pipe( pipe_slow );
1748 %}
1749
1750 // Replicate byte scalar immediate to be vector by loading from const table.
1751 instruct Repl4B_imm(vecS dst, immI con) %{
1752 predicate(n->as_Vector()->length() == 4);
1753 match(Set dst (ReplicateB con));
1754 format %{ "movss $dst,[$constantaddress]\t! replicate4B($con)" %}
1755 ins_encode %{
1756 __ movflt($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1)));
1757 %}
1758 ins_pipe( pipe_slow );
1759 %}
1760
1761 instruct Repl8B_imm(vecD dst, immI con) %{
1762 predicate(n->as_Vector()->length() == 8);
1763 match(Set dst (ReplicateB con));
1764 format %{ "movsd $dst,[$constantaddress]\t! replicate8B($con)" %}
1765 ins_encode %{
1766 __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
1767 %}
1768 ins_pipe( pipe_slow );
1769 %}
1770
1771 instruct Repl16B_imm(vecX dst, immI con) %{
1772 predicate(n->as_Vector()->length() == 16);
1773 match(Set dst (ReplicateB con));
1774 format %{ "movsd $dst,[$constantaddress]\t! replicate16B($con)\n\t"
1775 "movlhps $dst,$dst" %}
1776 ins_encode %{
1777 __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
1778 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
1779 %}
1780 ins_pipe( pipe_slow );
1781 %}
1782
1783 instruct Repl32B_imm(vecY dst, immI con) %{
1784 predicate(n->as_Vector()->length() == 32);
1785 match(Set dst (ReplicateB con));
1786 format %{ "movsd $dst,[$constantaddress]\t! lreplicate32B($con)\n\t"
1787 "movlhps $dst,$dst\n\t"
1788 "vinsertf128h $dst,$dst,$dst" %}
1789 ins_encode %{
1790 __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
1791 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
1792 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
1793 %}
1794 ins_pipe( pipe_slow );
1795 %}
1796
1797 // Replicate byte scalar zero to be vector
1798 instruct Repl4B_zero(vecS dst, immI0 zero) %{
1799 predicate(n->as_Vector()->length() == 4);
1800 match(Set dst (ReplicateB zero));
1801 format %{ "pxor $dst,$dst\t! replicate4B zero" %}
1802 ins_encode %{
1803 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
1804 %}
1805 ins_pipe( fpu_reg_reg );
1806 %}
1807
1808 instruct Repl8B_zero(vecD dst, immI0 zero) %{
1809 predicate(n->as_Vector()->length() == 8);
1810 match(Set dst (ReplicateB zero));
1811 format %{ "pxor $dst,$dst\t! replicate8B zero" %}
1812 ins_encode %{
1813 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
1814 %}
1815 ins_pipe( fpu_reg_reg );
1816 %}
1817
1818 instruct Repl16B_zero(vecX dst, immI0 zero) %{
1819 predicate(n->as_Vector()->length() == 16);
1820 match(Set dst (ReplicateB zero));
1821 format %{ "pxor $dst,$dst\t! replicate16B zero" %}
1822 ins_encode %{
1823 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
1824 %}
1825 ins_pipe( fpu_reg_reg );
1826 %}
1827
1828 instruct Repl32B_zero(vecY dst, immI0 zero) %{
1829 predicate(n->as_Vector()->length() == 32);
1830 match(Set dst (ReplicateB zero));
1831 format %{ "vxorpd $dst,$dst,$dst\t! replicate32B zero" %}
1832 ins_encode %{
1833 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
1834 bool vector256 = true;
1835 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
1836 %}
1837 ins_pipe( fpu_reg_reg );
1838 %}
1839
1840 // Replicate char/short (2 byte) scalar to be vector
1841 instruct Repl2S(vecS dst, rRegI src) %{
1842 predicate(n->as_Vector()->length() == 2);
1843 match(Set dst (ReplicateS src));
1844 format %{ "movd $dst,$src\n\t"
1845 "pshuflw $dst,$dst,0x00\t! replicate2S" %}
1846 ins_encode %{
1847 __ movdl($dst$$XMMRegister, $src$$Register);
1848 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
1849 %}
1850 ins_pipe( fpu_reg_reg );
1851 %}
1852
1853 instruct Repl4S(vecD dst, rRegI src) %{
1854 predicate(n->as_Vector()->length() == 4);
1855 match(Set dst (ReplicateS src));
1856 format %{ "movd $dst,$src\n\t"
1857 "pshuflw $dst,$dst,0x00\t! replicate4S" %}
1858 ins_encode %{
1859 __ movdl($dst$$XMMRegister, $src$$Register);
1860 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
1861 %}
1862 ins_pipe( fpu_reg_reg );
1863 %}
1864
1865 instruct Repl8S(vecX dst, rRegI src) %{
1866 predicate(n->as_Vector()->length() == 8);
1867 match(Set dst (ReplicateS src));
1868 format %{ "movd $dst,$src\n\t"
1869 "pshuflw $dst,$dst,0x00\n\t"
1870 "movlhps $dst,$dst\t! replicate8S" %}
1871 ins_encode %{
1872 __ movdl($dst$$XMMRegister, $src$$Register);
1873 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
1874 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
1875 %}
1876 ins_pipe( pipe_slow );
1877 %}
1878
1879 instruct Repl16S(vecY dst, rRegI src) %{
1880 predicate(n->as_Vector()->length() == 16);
1881 match(Set dst (ReplicateS src));
1882 format %{ "movd $dst,$src\n\t"
1883 "pshuflw $dst,$dst,0x00\n\t"
1884 "movlhps $dst,$dst\n\t"
1885 "vinsertf128h $dst,$dst,$dst\t! replicate16S" %}
1886 ins_encode %{
1887 __ movdl($dst$$XMMRegister, $src$$Register);
1888 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
1889 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
1890 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
1891 %}
1892 ins_pipe( pipe_slow );
1893 %}
1894
1895 // Replicate char/short (2 byte) scalar immediate to be vector by loading from const table.
1896 instruct Repl2S_imm(vecS dst, immI con) %{
1897 predicate(n->as_Vector()->length() == 2);
1898 match(Set dst (ReplicateS con));
1899 format %{ "movss $dst,[$constantaddress]\t! replicate2S($con)" %}
1900 ins_encode %{
1901 __ movflt($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2)));
1902 %}
1903 ins_pipe( fpu_reg_reg );
1904 %}
1905
1906 instruct Repl4S_imm(vecD dst, immI con) %{
1907 predicate(n->as_Vector()->length() == 4);
1908 match(Set dst (ReplicateS con));
1909 format %{ "movsd $dst,[$constantaddress]\t! replicate4S($con)" %}
1910 ins_encode %{
1911 __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
1912 %}
1913 ins_pipe( fpu_reg_reg );
1914 %}
1915
1916 instruct Repl8S_imm(vecX dst, immI con) %{
1917 predicate(n->as_Vector()->length() == 8);
1918 match(Set dst (ReplicateS con));
1919 format %{ "movsd $dst,[$constantaddress]\t! replicate8S($con)\n\t"
1920 "movlhps $dst,$dst" %}
1921 ins_encode %{
1922 __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
1923 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
1924 %}
1925 ins_pipe( pipe_slow );
1926 %}
1927
1928 instruct Repl16S_imm(vecY dst, immI con) %{
1929 predicate(n->as_Vector()->length() == 16);
1930 match(Set dst (ReplicateS con));
1931 format %{ "movsd $dst,[$constantaddress]\t! replicate16S($con)\n\t"
1932 "movlhps $dst,$dst\n\t"
1933 "vinsertf128h $dst,$dst,$dst" %}
1934 ins_encode %{
1935 __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
1936 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
1937 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
1938 %}
1939 ins_pipe( pipe_slow );
1940 %}
1941
1942 // Replicate char/short (2 byte) scalar zero to be vector
1943 instruct Repl2S_zero(vecS dst, immI0 zero) %{
1944 predicate(n->as_Vector()->length() == 2);
1945 match(Set dst (ReplicateS zero));
1946 format %{ "pxor $dst,$dst\t! replicate2S zero" %}
1947 ins_encode %{
1948 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
1949 %}
1950 ins_pipe( fpu_reg_reg );
1951 %}
1952
1953 instruct Repl4S_zero(vecD dst, immI0 zero) %{
1954 predicate(n->as_Vector()->length() == 4);
1955 match(Set dst (ReplicateS zero));
1956 format %{ "pxor $dst,$dst\t! replicate4S zero" %}
1957 ins_encode %{
1958 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
1959 %}
1960 ins_pipe( fpu_reg_reg );
1961 %}
1962
1963 instruct Repl8S_zero(vecX dst, immI0 zero) %{
1964 predicate(n->as_Vector()->length() == 8);
1965 match(Set dst (ReplicateS zero));
1966 format %{ "pxor $dst,$dst\t! replicate8S zero" %}
1967 ins_encode %{
1968 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
1969 %}
1970 ins_pipe( fpu_reg_reg );
1971 %}
1972
1973 instruct Repl16S_zero(vecY dst, immI0 zero) %{
1974 predicate(n->as_Vector()->length() == 16);
1975 match(Set dst (ReplicateS zero));
1976 format %{ "vxorpd $dst,$dst,$dst\t! replicate16S zero" %}
1977 ins_encode %{
1978 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
1979 bool vector256 = true;
1980 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
1981 %}
1982 ins_pipe( fpu_reg_reg );
1983 %}
1984
1985 // Replicate integer (4 byte) scalar to be vector
1986 instruct Repl2I(vecD dst, rRegI src) %{
1987 predicate(n->as_Vector()->length() == 2);
1988 match(Set dst (ReplicateI src));
1989 format %{ "movd $dst,$src\n\t"
1990 "pshufd $dst,$dst,0x00\t! replicate2I" %}
1991 ins_encode %{
1992 __ movdl($dst$$XMMRegister, $src$$Register);
1993 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
1994 %}
1995 ins_pipe( fpu_reg_reg );
1996 %}
1997
1998 instruct Repl4I(vecX dst, rRegI src) %{
1999 predicate(n->as_Vector()->length() == 4);
2000 match(Set dst (ReplicateI src));
2001 format %{ "movd $dst,$src\n\t"
2002 "pshufd $dst,$dst,0x00\t! replicate4I" %}
2003 ins_encode %{
2004 __ movdl($dst$$XMMRegister, $src$$Register);
2005 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2006 %}
2007 ins_pipe( pipe_slow );
2008 %}
2009
2010 instruct Repl8I(vecY dst, rRegI src) %{
2011 predicate(n->as_Vector()->length() == 8);
2012 match(Set dst (ReplicateI src));
2013 format %{ "movd $dst,$src\n\t"
2014 "pshufd $dst,$dst,0x00\n\t"
2015 "vinsertf128h $dst,$dst,$dst\t! replicate8I" %}
2016 ins_encode %{
2017 __ movdl($dst$$XMMRegister, $src$$Register);
2018 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2019 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2020 %}
2021 ins_pipe( pipe_slow );
2022 %}
2023
2024 // Replicate integer (4 byte) scalar immediate to be vector by loading from const table.
2025 instruct Repl2I_imm(vecD dst, immI con) %{
2026 predicate(n->as_Vector()->length() == 2);
2027 match(Set dst (ReplicateI con));
2028 format %{ "movsd $dst,[$constantaddress]\t! replicate2I($con)" %}
2029 ins_encode %{
2030 __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
2031 %}
2032 ins_pipe( fpu_reg_reg );
2033 %}
2034
2035 instruct Repl4I_imm(vecX dst, immI con) %{
2036 predicate(n->as_Vector()->length() == 4);
2037 match(Set dst (ReplicateI con));
2038 format %{ "movsd $dst,[$constantaddress]\t! replicate4I($con)\n\t"
2039 "movlhps $dst,$dst" %}
2040 ins_encode %{
2041 __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
2042 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
2043 %}
2044 ins_pipe( pipe_slow );
2045 %}
2046
2047 instruct Repl8I_imm(vecY dst, immI con) %{
2048 predicate(n->as_Vector()->length() == 8);
2049 match(Set dst (ReplicateI con));
2050 format %{ "movsd $dst,[$constantaddress]\t! replicate8I($con)\n\t"
2051 "movlhps $dst,$dst\n\t"
2052 "vinsertf128h $dst,$dst,$dst" %}
2053 ins_encode %{
2054 __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
2055 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
2056 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2057 %}
2058 ins_pipe( pipe_slow );
2059 %}
2060
2061 // Integer could be loaded into xmm register directly from memory.
2062 instruct Repl2I_mem(vecD dst, memory mem) %{
2063 predicate(n->as_Vector()->length() == 2);
2064 match(Set dst (ReplicateI mem));
2065 format %{ "movd $dst,$mem\n\t"
2066 "pshufd $dst,$dst,0x00\t! replicate2I" %}
2067 ins_encode %{
2068 __ movdl($dst$$XMMRegister, $mem$$Address);
2069 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2070 %}
2071 ins_pipe( fpu_reg_reg );
2072 %}
2073
2074 instruct Repl4I_mem(vecX dst, memory mem) %{
2075 predicate(n->as_Vector()->length() == 4);
2076 match(Set dst (ReplicateI mem));
2077 format %{ "movd $dst,$mem\n\t"
2078 "pshufd $dst,$dst,0x00\t! replicate4I" %}
2079 ins_encode %{
2080 __ movdl($dst$$XMMRegister, $mem$$Address);
2081 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2082 %}
2083 ins_pipe( pipe_slow );
2084 %}
2085
2086 instruct Repl8I_mem(vecY dst, memory mem) %{
2087 predicate(n->as_Vector()->length() == 8);
2088 match(Set dst (ReplicateI mem));
2089 format %{ "movd $dst,$mem\n\t"
2090 "pshufd $dst,$dst,0x00\n\t"
2091 "vinsertf128h $dst,$dst,$dst\t! replicate8I" %}
2092 ins_encode %{
2093 __ movdl($dst$$XMMRegister, $mem$$Address);
2094 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
2095 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2096 %}
2097 ins_pipe( pipe_slow );
2098 %}
2099
2100 // Replicate integer (4 byte) scalar zero to be vector
2101 instruct Repl2I_zero(vecD dst, immI0 zero) %{
2102 predicate(n->as_Vector()->length() == 2);
2103 match(Set dst (ReplicateI zero));
2104 format %{ "pxor $dst,$dst\t! replicate2I" %}
2105 ins_encode %{
2106 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
2107 %}
2108 ins_pipe( fpu_reg_reg );
2109 %}
2110
2111 instruct Repl4I_zero(vecX dst, immI0 zero) %{
2112 predicate(n->as_Vector()->length() == 4);
2113 match(Set dst (ReplicateI zero));
2114 format %{ "pxor $dst,$dst\t! replicate4I zero)" %}
2115 ins_encode %{
2116 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
2117 %}
2118 ins_pipe( fpu_reg_reg );
2119 %}
2120
2121 instruct Repl8I_zero(vecY dst, immI0 zero) %{
2122 predicate(n->as_Vector()->length() == 8);
2123 match(Set dst (ReplicateI zero));
2124 format %{ "vxorpd $dst,$dst,$dst\t! replicate8I zero" %}
2125 ins_encode %{
2126 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
2127 bool vector256 = true;
2128 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
2129 %}
2130 ins_pipe( fpu_reg_reg );
2131 %}
2132
2133 // Replicate long (8 byte) scalar to be vector
2134 #ifdef _LP64
2135 instruct Repl2L(vecX dst, rRegL src) %{
2136 predicate(n->as_Vector()->length() == 2);
2137 match(Set dst (ReplicateL src));
2138 format %{ "movdq $dst,$src\n\t"
2139 "movlhps $dst,$dst\t! replicate2L" %}
2140 ins_encode %{
2141 __ movdq($dst$$XMMRegister, $src$$Register);
2142 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
2143 %}
2144 ins_pipe( pipe_slow );
2145 %}
2146
2147 instruct Repl4L(vecY dst, rRegL src) %{
2148 predicate(n->as_Vector()->length() == 4);
2149 match(Set dst (ReplicateL src));
2150 format %{ "movdq $dst,$src\n\t"
2151 "movlhps $dst,$dst\n\t"
2152 "vinsertf128h $dst,$dst,$dst\t! replicate4L" %}
2153 ins_encode %{
2154 __ movdq($dst$$XMMRegister, $src$$Register);
2155 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
2156 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2157 %}
2158 ins_pipe( pipe_slow );
2159 %}
2160 #else // _LP64
2161 instruct Repl2L(vecX dst, eRegL src, regD tmp) %{
2162 predicate(n->as_Vector()->length() == 2);
2163 match(Set dst (ReplicateL src));
2164 effect(TEMP dst, USE src, TEMP tmp);
2165 format %{ "movdl $dst,$src.lo\n\t"
2166 "movdl $tmp,$src.hi\n\t"
2167 "punpckldq $dst,$tmp\n\t"
2168 "movlhps $dst,$dst\t! replicate2L"%}
2169 ins_encode %{
2170 __ movdl($dst$$XMMRegister, $src$$Register);
2171 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
2172 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
2173 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
2174 %}
2175 ins_pipe( pipe_slow );
2176 %}
2177
2178 instruct Repl4L(vecY dst, eRegL src, regD tmp) %{
2179 predicate(n->as_Vector()->length() == 4);
2180 match(Set dst (ReplicateL src));
2181 effect(TEMP dst, USE src, TEMP tmp);
2182 format %{ "movdl $dst,$src.lo\n\t"
2183 "movdl $tmp,$src.hi\n\t"
2184 "punpckldq $dst,$tmp\n\t"
2185 "movlhps $dst,$dst\n\t"
2186 "vinsertf128h $dst,$dst,$dst\t! replicate4L" %}
2187 ins_encode %{
2188 __ movdl($dst$$XMMRegister, $src$$Register);
2189 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
2190 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
2191 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
2192 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2193 %}
2194 ins_pipe( pipe_slow );
2195 %}
2196 #endif // _LP64
2197
2198 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
2199 instruct Repl2L_imm(vecX dst, immL con) %{
2200 predicate(n->as_Vector()->length() == 2);
2201 match(Set dst (ReplicateL con));
2202 format %{ "movsd $dst,[$constantaddress]\t! replicate2L($con)\n\t"
2203 "movlhps $dst,$dst" %}
2204 ins_encode %{
2205 __ movdbl($dst$$XMMRegister, $constantaddress($con));
2206 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
2207 %}
2208 ins_pipe( pipe_slow );
2209 %}
2210
2211 instruct Repl4L_imm(vecY dst, immL con) %{
2212 predicate(n->as_Vector()->length() == 4);
2213 match(Set dst (ReplicateL con));
2214 format %{ "movsd $dst,[$constantaddress]\t! replicate4L($con)\n\t"
2215 "movlhps $dst,$dst\n\t"
2216 "vinsertf128h $dst,$dst,$dst" %}
2217 ins_encode %{
2218 __ movdbl($dst$$XMMRegister, $constantaddress($con));
2219 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
2220 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2221 %}
2222 ins_pipe( pipe_slow );
2223 %}
2224
2225 // Long could be loaded into xmm register directly from memory.
2226 instruct Repl2L_mem(vecX dst, memory mem) %{
2227 predicate(n->as_Vector()->length() == 2);
2228 match(Set dst (ReplicateL mem));
2229 format %{ "movq $dst,$mem\n\t"
2230 "movlhps $dst,$dst\t! replicate2L" %}
2231 ins_encode %{
2232 __ movq($dst$$XMMRegister, $mem$$Address);
2233 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
2234 %}
2235 ins_pipe( pipe_slow );
2236 %}
2237
2238 instruct Repl4L_mem(vecY dst, memory mem) %{
2239 predicate(n->as_Vector()->length() == 4);
2240 match(Set dst (ReplicateL mem));
2241 format %{ "movq $dst,$mem\n\t"
2242 "movlhps $dst,$dst\n\t"
2243 "vinsertf128h $dst,$dst,$dst\t! replicate4L" %}
2244 ins_encode %{
2245 __ movq($dst$$XMMRegister, $mem$$Address);
2246 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
2247 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2248 %}
2249 ins_pipe( pipe_slow );
2250 %}
2251
2252 // Replicate long (8 byte) scalar zero to be vector
2253 instruct Repl2L_zero(vecX dst, immL0 zero) %{
2254 predicate(n->as_Vector()->length() == 2);
2255 match(Set dst (ReplicateL zero));
2256 format %{ "pxor $dst,$dst\t! replicate2L zero" %}
2257 ins_encode %{
2258 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
2259 %}
2260 ins_pipe( fpu_reg_reg );
2261 %}
2262
2263 instruct Repl4L_zero(vecY dst, immL0 zero) %{
2264 predicate(n->as_Vector()->length() == 4);
2265 match(Set dst (ReplicateL zero));
2266 format %{ "vxorpd $dst,$dst,$dst\t! replicate4L zero" %}
2267 ins_encode %{
2268 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
2269 bool vector256 = true;
2270 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
2271 %}
2272 ins_pipe( fpu_reg_reg );
2273 %}
2274
2275 // Replicate float (4 byte) scalar to be vector
2276 instruct Repl2F(vecD dst, regF src) %{
2277 predicate(n->as_Vector()->length() == 2);
2278 match(Set dst (ReplicateF src));
2279 format %{ "pshufd $dst,$dst,0x00\t! replicate2F" %}
2280 ins_encode %{
2281 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
2282 %}
2283 ins_pipe( fpu_reg_reg );
2284 %}
2285
2286 instruct Repl4F(vecX dst, regF src) %{
2287 predicate(n->as_Vector()->length() == 4);
2288 match(Set dst (ReplicateF src));
2289 format %{ "pshufd $dst,$dst,0x00\t! replicate4F" %}
2290 ins_encode %{
2291 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
2292 %}
2293 ins_pipe( pipe_slow );
2294 %}
2295
2296 instruct Repl8F(vecY dst, regF src) %{
2297 predicate(n->as_Vector()->length() == 8);
2298 match(Set dst (ReplicateF src));
2299 format %{ "pshufd $dst,$src,0x00\n\t"
2300 "vinsertf128h $dst,$dst,$dst\t! replicate8F" %}
2301 ins_encode %{
2302 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
2303 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2304 %}
2305 ins_pipe( pipe_slow );
2306 %}
2307
2308 // Replicate float (4 byte) scalar zero to be vector
2309 instruct Repl2F_zero(vecD dst, immF0 zero) %{
2310 predicate(n->as_Vector()->length() == 2);
2311 match(Set dst (ReplicateF zero));
2312 format %{ "xorps $dst,$dst\t! replicate2F zero" %}
2313 ins_encode %{
2314 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
2315 %}
2316 ins_pipe( fpu_reg_reg );
2317 %}
2318
2319 instruct Repl4F_zero(vecX dst, immF0 zero) %{
2320 predicate(n->as_Vector()->length() == 4);
2321 match(Set dst (ReplicateF zero));
2322 format %{ "xorps $dst,$dst\t! replicate4F zero" %}
2323 ins_encode %{
2324 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
2325 %}
2326 ins_pipe( fpu_reg_reg );
2327 %}
2328
2329 instruct Repl8F_zero(vecY dst, immF0 zero) %{
2330 predicate(n->as_Vector()->length() == 8);
2331 match(Set dst (ReplicateF zero));
2332 format %{ "vxorps $dst,$dst,$dst\t! replicate8F zero" %}
2333 ins_encode %{
2334 bool vector256 = true;
2335 __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
2336 %}
2337 ins_pipe( fpu_reg_reg );
2338 %}
2339
2340 // Replicate double (8 bytes) scalar to be vector
2341 instruct Repl2D(vecX dst, regD src) %{
2342 predicate(n->as_Vector()->length() == 2);
2343 match(Set dst (ReplicateD src));
2344 format %{ "pshufd $dst,$src,0x44\t! replicate2D" %}
2345 ins_encode %{
2346 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
2347 %}
2348 ins_pipe( pipe_slow );
2349 %}
2350
2351 instruct Repl4D(vecY dst, regD src) %{
2352 predicate(n->as_Vector()->length() == 4);
2353 match(Set dst (ReplicateD src));
2354 format %{ "pshufd $dst,$src,0x44\n\t"
2355 "vinsertf128h $dst,$dst,$dst\t! replicate4D" %}
2356 ins_encode %{
2357 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
2358 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
2359 %}
2360 ins_pipe( pipe_slow );
2361 %}
2362
2363 // Replicate double (8 byte) scalar zero to be vector
2364 instruct Repl2D_zero(vecX dst, immD0 zero) %{
2365 predicate(n->as_Vector()->length() == 2);
2366 match(Set dst (ReplicateD zero));
2367 format %{ "xorpd $dst,$dst\t! replicate2D zero" %}
2368 ins_encode %{
2369 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
2370 %}
2371 ins_pipe( fpu_reg_reg );
2372 %}
2373
2374 instruct Repl4D_zero(vecY dst, immD0 zero) %{
2375 predicate(n->as_Vector()->length() == 4);
2376 match(Set dst (ReplicateD zero));
2377 format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %}
2378 ins_encode %{
2379 bool vector256 = true;
2380 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
2381 %}
2382 ins_pipe( fpu_reg_reg );
2383 %}
2384