Mercurial > hg > truffle
comparison src/cpu/x86/vm/x86.ad @ 6179:8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
Summary: Increase vector size up to 256-bits for YMM AVX registers on x86.
Reviewed-by: never, twisti, roland
author | kvn |
---|---|
date | Fri, 15 Jun 2012 01:25:19 -0700 |
parents | 9b8ce46870df |
children | 6f8f439e247d |
comparison
equal
deleted
inserted
replaced
6146:eba1d5bce9e8 | 6179:8c92982cbbc4 |
---|---|
22 // | 22 // |
23 // | 23 // |
24 | 24 |
25 // X86 Common Architecture Description File | 25 // X86 Common Architecture Description File |
26 | 26 |
27 //----------REGISTER DEFINITION BLOCK------------------------------------------ | |
28 // This information is used by the matcher and the register allocator to | |
29 // describe individual registers and classes of registers within the target | |
30 // archtecture. | |
31 | |
32 register %{ | |
33 //----------Architecture Description Register Definitions---------------------- | |
34 // General Registers | |
35 // "reg_def" name ( register save type, C convention save type, | |
36 // ideal register type, encoding ); | |
37 // Register Save Types: | |
38 // | |
39 // NS = No-Save: The register allocator assumes that these registers | |
40 // can be used without saving upon entry to the method, & | |
41 // that they do not need to be saved at call sites. | |
42 // | |
43 // SOC = Save-On-Call: The register allocator assumes that these registers | |
44 // can be used without saving upon entry to the method, | |
45 // but that they must be saved at call sites. | |
46 // | |
47 // SOE = Save-On-Entry: The register allocator assumes that these registers | |
48 // must be saved before using them upon entry to the | |
49 // method, but they do not need to be saved at call | |
50 // sites. | |
51 // | |
52 // AS = Always-Save: The register allocator assumes that these registers | |
53 // must be saved before using them upon entry to the | |
54 // method, & that they must be saved at call sites. | |
55 // | |
56 // Ideal Register Type is used to determine how to save & restore a | |
57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get | |
58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. | |
59 // | |
60 // The encoding number is the actual bit-pattern placed into the opcodes. | |
61 | |
62 // XMM registers. 256-bit registers or 8 words each, labeled (a)-h. | |
63 // Word a in each register holds a Float, words ab hold a Double. | |
64 // The whole registers are used in SSE4.2 version intrinsics, | |
65 // array copy stubs and superword operations (see UseSSE42Intrinsics, | |
66 // UseXMMForArrayCopy and UseSuperword flags). | |
67 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX). | |
68 // Linux ABI: No register preserved across function calls | |
69 // XMM0-XMM7 might hold parameters | |
70 // Windows ABI: XMM6-XMM15 preserved across function calls | |
71 // XMM0-XMM3 might hold parameters | |
72 | |
73 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); | |
74 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()); | |
75 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next()); | |
76 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next()->next()); | |
77 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next()->next()->next()); | |
78 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next()->next()->next()->next()); | |
79 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next()->next()->next()->next()->next()); | |
80 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); | |
81 | |
82 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); | |
83 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()); | |
84 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next()); | |
85 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next()->next()); | |
86 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next()->next()->next()); | |
87 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next()->next()->next()->next()); | |
88 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next()->next()->next()->next()->next()); | |
89 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); | |
90 | |
91 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); | |
92 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()); | |
93 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next()); | |
94 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next()->next()); | |
95 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next()->next()->next()); | |
96 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next()->next()->next()->next()); | |
97 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next()->next()->next()->next()->next()); | |
98 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); | |
99 | |
100 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); | |
101 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()); | |
102 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next()); | |
103 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next()->next()); | |
104 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next()->next()->next()); | |
105 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next()->next()->next()->next()); | |
106 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next()->next()->next()->next()->next()); | |
107 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); | |
108 | |
109 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); | |
110 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()); | |
111 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next()); | |
112 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next()->next()); | |
113 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next()->next()->next()); | |
114 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next()->next()->next()->next()); | |
115 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next()->next()->next()->next()->next()); | |
116 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); | |
117 | |
118 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); | |
119 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()); | |
120 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next()); | |
121 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next()->next()); | |
122 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next()->next()->next()); | |
123 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next()->next()->next()->next()); | |
124 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next()->next()->next()->next()->next()); | |
125 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); | |
126 | |
127 #ifdef _WIN64 | |
128 | |
129 reg_def XMM6 ( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()); | |
130 reg_def XMM6b( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()); | |
131 reg_def XMM6c( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next()); | |
132 reg_def XMM6d( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()); | |
133 reg_def XMM6e( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()); | |
134 reg_def XMM6f( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next()); | |
135 reg_def XMM6g( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next()->next()); | |
136 reg_def XMM6h( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); | |
137 | |
138 reg_def XMM7 ( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()); | |
139 reg_def XMM7b( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()); | |
140 reg_def XMM7c( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next()); | |
141 reg_def XMM7d( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()); | |
142 reg_def XMM7e( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()); | |
143 reg_def XMM7f( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next()); | |
144 reg_def XMM7g( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next()->next()); | |
145 reg_def XMM7h( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); | |
146 | |
147 reg_def XMM8 ( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()); | |
148 reg_def XMM8b( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()); | |
149 reg_def XMM8c( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next()); | |
150 reg_def XMM8d( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()); | |
151 reg_def XMM8e( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()); | |
152 reg_def XMM8f( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next()); | |
153 reg_def XMM8g( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next()->next()); | |
154 reg_def XMM8h( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); | |
155 | |
156 reg_def XMM9 ( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()); | |
157 reg_def XMM9b( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()); | |
158 reg_def XMM9c( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next()); | |
159 reg_def XMM9d( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()); | |
160 reg_def XMM9e( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()); | |
161 reg_def XMM9f( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next()); | |
162 reg_def XMM9g( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next()->next()); | |
163 reg_def XMM9h( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); | |
164 | |
165 reg_def XMM10 ( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()); | |
166 reg_def XMM10b( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()); | |
167 reg_def XMM10c( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next()); | |
168 reg_def XMM10d( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()); | |
169 reg_def XMM10e( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()); | |
170 reg_def XMM10f( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next()); | |
171 reg_def XMM10g( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next()->next()); | |
172 reg_def XMM10h( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); | |
173 | |
174 reg_def XMM11 ( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()); | |
175 reg_def XMM11b( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()); | |
176 reg_def XMM11c( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next()); | |
177 reg_def XMM11d( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()); | |
178 reg_def XMM11e( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()); | |
179 reg_def XMM11f( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next()); | |
180 reg_def XMM11g( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next()->next()); | |
181 reg_def XMM11h( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); | |
182 | |
183 reg_def XMM12 ( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()); | |
184 reg_def XMM12b( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()); | |
185 reg_def XMM12c( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next()); | |
186 reg_def XMM12d( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()); | |
187 reg_def XMM12e( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()); | |
188 reg_def XMM12f( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next()); | |
189 reg_def XMM12g( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next()->next()); | |
190 reg_def XMM12h( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); | |
191 | |
192 reg_def XMM13 ( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()); | |
193 reg_def XMM13b( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()); | |
194 reg_def XMM13c( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next()); | |
195 reg_def XMM13d( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()); | |
196 reg_def XMM13e( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()); | |
197 reg_def XMM13f( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next()); | |
198 reg_def XMM13g( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next()->next()); | |
199 reg_def XMM13h( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); | |
200 | |
201 reg_def XMM14 ( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()); | |
202 reg_def XMM14b( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()); | |
203 reg_def XMM14c( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next()); | |
204 reg_def XMM14d( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()); | |
205 reg_def XMM14e( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()); | |
206 reg_def XMM14f( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next()); | |
207 reg_def XMM14g( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next()->next()); | |
208 reg_def XMM14h( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); | |
209 | |
210 reg_def XMM15 ( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()); | |
211 reg_def XMM15b( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()); | |
212 reg_def XMM15c( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next()); | |
213 reg_def XMM15d( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()); | |
214 reg_def XMM15e( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()); | |
215 reg_def XMM15f( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next()); | |
216 reg_def XMM15g( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next()->next()); | |
217 reg_def XMM15h( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); | |
218 | |
219 #else // _WIN64 | |
220 | |
221 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); | |
222 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()); | |
223 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next()); | |
224 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()); | |
225 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()); | |
226 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next()); | |
227 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next()->next()); | |
228 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); | |
229 | |
230 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); | |
231 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()); | |
232 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next()); | |
233 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()); | |
234 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()); | |
235 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next()); | |
236 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next()->next()); | |
237 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); | |
238 | |
239 #ifdef _LP64 | |
240 | |
241 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); | |
242 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()); | |
243 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next()); | |
244 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()); | |
245 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()); | |
246 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next()); | |
247 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next()->next()); | |
248 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); | |
249 | |
250 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); | |
251 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()); | |
252 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next()); | |
253 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()); | |
254 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()); | |
255 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next()); | |
256 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next()->next()); | |
257 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); | |
258 | |
259 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); | |
260 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()); | |
261 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next()); | |
262 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()); | |
263 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()); | |
264 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next()); | |
265 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next()->next()); | |
266 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); | |
267 | |
268 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); | |
269 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()); | |
270 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next()); | |
271 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()); | |
272 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()); | |
273 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next()); | |
274 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next()->next()); | |
275 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); | |
276 | |
277 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); | |
278 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()); | |
279 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next()); | |
280 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()); | |
281 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()); | |
282 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next()); | |
283 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next()->next()); | |
284 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); | |
285 | |
286 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); | |
287 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()); | |
288 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next()); | |
289 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()); | |
290 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()); | |
291 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next()); | |
292 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next()->next()); | |
293 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); | |
294 | |
295 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); | |
296 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()); | |
297 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next()); | |
298 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()); | |
299 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()); | |
300 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next()); | |
301 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next()->next()); | |
302 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); | |
303 | |
304 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); | |
305 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()); | |
306 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next()); | |
307 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()); | |
308 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()); | |
309 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next()); | |
310 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next()->next()); | |
311 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); | |
312 | |
313 #endif // _LP64 | |
314 | |
315 #endif // _WIN64 | |
316 | |
317 #ifdef _LP64 | |
318 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); | |
319 #else | |
320 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); | |
321 #endif // _LP64 | |
322 | |
323 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, | |
324 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, | |
325 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, | |
326 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, | |
327 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, | |
328 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, | |
329 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, | |
330 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h | |
331 #ifdef _LP64 | |
332 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, | |
333 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, | |
334 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, | |
335 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, | |
336 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, | |
337 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, | |
338 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, | |
339 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h | |
340 #endif | |
341 ); | |
342 | |
343 // flags allocation class should be last. | |
344 alloc_class chunk2(RFLAGS); | |
345 | |
346 // Singleton class for condition codes | |
347 reg_class int_flags(RFLAGS); | |
348 | |
349 // Class for all float registers | |
350 reg_class float_reg(XMM0, | |
351 XMM1, | |
352 XMM2, | |
353 XMM3, | |
354 XMM4, | |
355 XMM5, | |
356 XMM6, | |
357 XMM7 | |
358 #ifdef _LP64 | |
359 ,XMM8, | |
360 XMM9, | |
361 XMM10, | |
362 XMM11, | |
363 XMM12, | |
364 XMM13, | |
365 XMM14, | |
366 XMM15 | |
367 #endif | |
368 ); | |
369 | |
370 // Class for all double registers | |
371 reg_class double_reg(XMM0, XMM0b, | |
372 XMM1, XMM1b, | |
373 XMM2, XMM2b, | |
374 XMM3, XMM3b, | |
375 XMM4, XMM4b, | |
376 XMM5, XMM5b, | |
377 XMM6, XMM6b, | |
378 XMM7, XMM7b | |
379 #ifdef _LP64 | |
380 ,XMM8, XMM8b, | |
381 XMM9, XMM9b, | |
382 XMM10, XMM10b, | |
383 XMM11, XMM11b, | |
384 XMM12, XMM12b, | |
385 XMM13, XMM13b, | |
386 XMM14, XMM14b, | |
387 XMM15, XMM15b | |
388 #endif | |
389 ); | |
390 | |
391 // Class for all 32bit vector registers | |
392 reg_class vectors_reg(XMM0, | |
393 XMM1, | |
394 XMM2, | |
395 XMM3, | |
396 XMM4, | |
397 XMM5, | |
398 XMM6, | |
399 XMM7 | |
400 #ifdef _LP64 | |
401 ,XMM8, | |
402 XMM9, | |
403 XMM10, | |
404 XMM11, | |
405 XMM12, | |
406 XMM13, | |
407 XMM14, | |
408 XMM15 | |
409 #endif | |
410 ); | |
411 | |
412 // Class for all 64bit vector registers | |
413 reg_class vectord_reg(XMM0, XMM0b, | |
414 XMM1, XMM1b, | |
415 XMM2, XMM2b, | |
416 XMM3, XMM3b, | |
417 XMM4, XMM4b, | |
418 XMM5, XMM5b, | |
419 XMM6, XMM6b, | |
420 XMM7, XMM7b | |
421 #ifdef _LP64 | |
422 ,XMM8, XMM8b, | |
423 XMM9, XMM9b, | |
424 XMM10, XMM10b, | |
425 XMM11, XMM11b, | |
426 XMM12, XMM12b, | |
427 XMM13, XMM13b, | |
428 XMM14, XMM14b, | |
429 XMM15, XMM15b | |
430 #endif | |
431 ); | |
432 | |
433 // Class for all 128bit vector registers | |
434 reg_class vectorx_reg(XMM0, XMM0b, XMM0c, XMM0d, | |
435 XMM1, XMM1b, XMM1c, XMM1d, | |
436 XMM2, XMM2b, XMM2c, XMM2d, | |
437 XMM3, XMM3b, XMM3c, XMM3d, | |
438 XMM4, XMM4b, XMM4c, XMM4d, | |
439 XMM5, XMM5b, XMM5c, XMM5d, | |
440 XMM6, XMM6b, XMM6c, XMM6d, | |
441 XMM7, XMM7b, XMM7c, XMM7d | |
442 #ifdef _LP64 | |
443 ,XMM8, XMM8b, XMM8c, XMM8d, | |
444 XMM9, XMM9b, XMM9c, XMM9d, | |
445 XMM10, XMM10b, XMM10c, XMM10d, | |
446 XMM11, XMM11b, XMM11c, XMM11d, | |
447 XMM12, XMM12b, XMM12c, XMM12d, | |
448 XMM13, XMM13b, XMM13c, XMM13d, | |
449 XMM14, XMM14b, XMM14c, XMM14d, | |
450 XMM15, XMM15b, XMM15c, XMM15d | |
451 #endif | |
452 ); | |
453 | |
454 // Class for all 256bit vector registers | |
455 reg_class vectory_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, | |
456 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, | |
457 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, | |
458 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, | |
459 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, | |
460 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, | |
461 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, | |
462 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h | |
463 #ifdef _LP64 | |
464 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, | |
465 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, | |
466 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, | |
467 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, | |
468 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, | |
469 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, | |
470 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, | |
471 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h | |
472 #endif | |
473 ); | |
474 | |
475 %} | |
476 | |
27 source %{ | 477 source %{ |
28 // Float masks come from different places depending on platform. | 478 // Float masks come from different places depending on platform. |
29 #ifdef _LP64 | 479 #ifdef _LP64 |
30 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } | 480 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } |
31 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } | 481 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } |
36 static address float_signflip() { return (address)float_signflip_pool; } | 486 static address float_signflip() { return (address)float_signflip_pool; } |
37 static address double_signmask() { return (address)double_signmask_pool; } | 487 static address double_signmask() { return (address)double_signmask_pool; } |
38 static address double_signflip() { return (address)double_signflip_pool; } | 488 static address double_signflip() { return (address)double_signflip_pool; } |
39 #endif | 489 #endif |
40 | 490 |
491 // Map Types to machine register types | |
492 const int Matcher::base2reg[Type::lastype] = { | |
493 Node::NotAMachineReg,0,0, Op_RegI, Op_RegL, 0, Op_RegN, | |
494 Node::NotAMachineReg, Node::NotAMachineReg, /* tuple, array */ | |
495 Op_VecS, Op_VecD, Op_VecX, Op_VecY, /* Vectors */ | |
496 Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, /* the pointers */ | |
497 0, 0/*abio*/, | |
498 Op_RegP /* Return address */, 0, /* the memories */ | |
499 Op_RegF, Op_RegF, Op_RegF, Op_RegD, Op_RegD, Op_RegD, | |
500 0 /*bottom*/ | |
501 }; | |
502 | |
503 // Max vector size in bytes. 0 if not supported. | |
504 const int Matcher::vector_width_in_bytes(BasicType bt) { | |
505 assert(is_java_primitive(bt), "only primitive type vectors"); | |
506 if (UseSSE < 2) return 0; | |
507 // SSE2 supports 128bit vectors for all types. | |
508 // AVX2 supports 256bit vectors for all types. | |
509 int size = (UseAVX > 1) ? 32 : 16; | |
510 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. | |
511 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) | |
512 size = 32; | |
513 // Use flag to limit vector size. | |
514 size = MIN2(size,(int)MaxVectorSize); | |
515 // Minimum 2 values in vector (or 4 for bytes). | |
516 switch (bt) { | |
517 case T_DOUBLE: | |
518 case T_LONG: | |
519 if (size < 16) return 0; | |
520 case T_FLOAT: | |
521 case T_INT: | |
522 if (size < 8) return 0; | |
523 case T_BOOLEAN: | |
524 case T_BYTE: | |
525 case T_CHAR: | |
526 case T_SHORT: | |
527 if (size < 4) return 0; | |
528 break; | |
529 default: | |
530 ShouldNotReachHere(); | |
531 } | |
532 return size; | |
533 } | |
534 | |
535 // Limits on vector size (number of elements) loaded into vector. | |
536 const int Matcher::max_vector_size(const BasicType bt) { | |
537 return vector_width_in_bytes(bt)/type2aelembytes(bt); | |
538 } | |
539 const int Matcher::min_vector_size(const BasicType bt) { | |
540 int max_size = max_vector_size(bt); | |
541 // Min size which can be loaded into vector is 4 bytes. | |
542 int size = (type2aelembytes(bt) == 1) ? 4 : 2; | |
543 return MIN2(size,max_size); | |
544 } | |
545 | |
546 // Vector ideal reg corresponding to specidied size in bytes | |
547 const int Matcher::vector_ideal_reg(int size) { | |
548 assert(MaxVectorSize >= size, ""); | |
549 switch(size) { | |
550 case 4: return Op_VecS; | |
551 case 8: return Op_VecD; | |
552 case 16: return Op_VecX; | |
553 case 32: return Op_VecY; | |
554 } | |
555 ShouldNotReachHere(); | |
556 return 0; | |
557 } | |
558 | |
559 // x86 supports misaligned vectors store/load. | |
560 const bool Matcher::misaligned_vectors_ok() { | |
561 return !AlignVector; // can be changed by flag | |
562 } | |
563 | |
564 // Helper methods for MachSpillCopyNode::implementation(). | |
565 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, | |
566 int src_hi, int dst_hi, uint ireg, outputStream* st) { | |
567 // In 64-bit VM size calculation is very complex. Emitting instructions | |
568 // into scratch buffer is used to get size in 64-bit VM. | |
569 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) | |
570 assert(ireg == Op_VecS || // 32bit vector | |
571 (src_lo & 1) == 0 && (src_lo + 1) == src_hi && | |
572 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi, | |
573 "no non-adjacent vector moves" ); | |
574 if (cbuf) { | |
575 MacroAssembler _masm(cbuf); | |
576 int offset = __ offset(); | |
577 switch (ireg) { | |
578 case Op_VecS: // copy whole register | |
579 case Op_VecD: | |
580 case Op_VecX: | |
581 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); | |
582 break; | |
583 case Op_VecY: | |
584 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); | |
585 break; | |
586 default: | |
587 ShouldNotReachHere(); | |
588 } | |
589 int size = __ offset() - offset; | |
590 #ifdef ASSERT | |
591 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. | |
592 assert(!do_size || size == 4, "incorrect size calculattion"); | |
593 #endif | |
594 return size; | |
595 #ifndef PRODUCT | |
596 } else if (!do_size) { | |
597 switch (ireg) { | |
598 case Op_VecS: | |
599 case Op_VecD: | |
600 case Op_VecX: | |
601 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); | |
602 break; | |
603 case Op_VecY: | |
604 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); | |
605 break; | |
606 default: | |
607 ShouldNotReachHere(); | |
608 } | |
609 #endif | |
610 } | |
611 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. | |
612 return 4; | |
613 } | |
614 | |
615 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, | |
616 int stack_offset, int reg, uint ireg, outputStream* st) { | |
617 // In 64-bit VM size calculation is very complex. Emitting instructions | |
618 // into scratch buffer is used to get size in 64-bit VM. | |
619 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) | |
620 if (cbuf) { | |
621 MacroAssembler _masm(cbuf); | |
622 int offset = __ offset(); | |
623 if (is_load) { | |
624 switch (ireg) { | |
625 case Op_VecS: | |
626 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); | |
627 break; | |
628 case Op_VecD: | |
629 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); | |
630 break; | |
631 case Op_VecX: | |
632 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); | |
633 break; | |
634 case Op_VecY: | |
635 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); | |
636 break; | |
637 default: | |
638 ShouldNotReachHere(); | |
639 } | |
640 } else { // store | |
641 switch (ireg) { | |
642 case Op_VecS: | |
643 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); | |
644 break; | |
645 case Op_VecD: | |
646 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); | |
647 break; | |
648 case Op_VecX: | |
649 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); | |
650 break; | |
651 case Op_VecY: | |
652 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); | |
653 break; | |
654 default: | |
655 ShouldNotReachHere(); | |
656 } | |
657 } | |
658 int size = __ offset() - offset; | |
659 #ifdef ASSERT | |
660 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4); | |
661 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. | |
662 assert(!do_size || size == (5+offset_size), "incorrect size calculattion"); | |
663 #endif | |
664 return size; | |
665 #ifndef PRODUCT | |
666 } else if (!do_size) { | |
667 if (is_load) { | |
668 switch (ireg) { | |
669 case Op_VecS: | |
670 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); | |
671 break; | |
672 case Op_VecD: | |
673 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); | |
674 break; | |
675 case Op_VecX: | |
676 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); | |
677 break; | |
678 case Op_VecY: | |
679 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); | |
680 break; | |
681 default: | |
682 ShouldNotReachHere(); | |
683 } | |
684 } else { // store | |
685 switch (ireg) { | |
686 case Op_VecS: | |
687 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); | |
688 break; | |
689 case Op_VecD: | |
690 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); | |
691 break; | |
692 case Op_VecX: | |
693 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); | |
694 break; | |
695 case Op_VecY: | |
696 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); | |
697 break; | |
698 default: | |
699 ShouldNotReachHere(); | |
700 } | |
701 } | |
702 #endif | |
703 } | |
704 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4); | |
705 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. | |
706 return 5+offset_size; | |
707 } | |
708 | |
709 static inline jfloat replicate4_imm(int con, int width) { | |
710 // Load a constant of "width" (in bytes) and replicate it to fill 32bit. | |
711 assert(width == 1 || width == 2, "only byte or short types here"); | |
712 int bit_width = width * 8; | |
713 jint val = con; | |
714 val &= (1 << bit_width) - 1; // mask off sign bits | |
715 while(bit_width < 32) { | |
716 val |= (val << bit_width); | |
717 bit_width <<= 1; | |
718 } | |
719 jfloat fval = *((jfloat*) &val); // coerce to float type | |
720 return fval; | |
721 } | |
722 | |
723 static inline jdouble replicate8_imm(int con, int width) { | |
724 // Load a constant of "width" (in bytes) and replicate it to fill 64bit. | |
725 assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here"); | |
726 int bit_width = width * 8; | |
727 jlong val = con; | |
728 val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits | |
729 while(bit_width < 64) { | |
730 val |= (val << bit_width); | |
731 bit_width <<= 1; | |
732 } | |
733 jdouble dval = *((jdouble*) &val); // coerce to double type | |
734 return dval; | |
735 } | |
736 | |
41 #ifndef PRODUCT | 737 #ifndef PRODUCT |
42 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { | 738 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { |
43 st->print("nop \t# %d bytes pad for loops and calls", _count); | 739 st->print("nop \t# %d bytes pad for loops and calls", _count); |
44 } | 740 } |
45 #endif | 741 #endif |
101 } | 797 } |
102 %} | 798 %} |
103 | 799 |
104 %} | 800 %} |
105 | 801 |
802 | |
803 //----------OPERANDS----------------------------------------------------------- | |
804 // Operand definitions must precede instruction definitions for correct parsing | |
805 // in the ADLC because operands constitute user defined types which are used in | |
806 // instruction definitions. | |
807 | |
808 // Vectors | |
809 operand vecS() %{ | |
810 constraint(ALLOC_IN_RC(vectors_reg)); | |
811 match(VecS); | |
812 | |
813 format %{ %} | |
814 interface(REG_INTER); | |
815 %} | |
816 | |
817 operand vecD() %{ | |
818 constraint(ALLOC_IN_RC(vectord_reg)); | |
819 match(VecD); | |
820 | |
821 format %{ %} | |
822 interface(REG_INTER); | |
823 %} | |
824 | |
825 operand vecX() %{ | |
826 constraint(ALLOC_IN_RC(vectorx_reg)); | |
827 match(VecX); | |
828 | |
829 format %{ %} | |
830 interface(REG_INTER); | |
831 %} | |
832 | |
833 operand vecY() %{ | |
834 constraint(ALLOC_IN_RC(vectory_reg)); | |
835 match(VecY); | |
836 | |
837 format %{ %} | |
838 interface(REG_INTER); | |
839 %} | |
840 | |
841 | |
106 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) | 842 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) |
107 | 843 |
108 // ============================================================================ | 844 // ============================================================================ |
109 | 845 |
110 instruct ShouldNotReachHere() %{ | 846 instruct ShouldNotReachHere() %{ |
850 __ sqrtsd($dst$$XMMRegister, $constantaddress($con)); | 1586 __ sqrtsd($dst$$XMMRegister, $constantaddress($con)); |
851 %} | 1587 %} |
852 ins_pipe(pipe_slow); | 1588 ins_pipe(pipe_slow); |
853 %} | 1589 %} |
854 | 1590 |
1591 | |
1592 // ====================VECTOR INSTRUCTIONS===================================== | |
1593 | |
1594 // Load vectors (4 bytes long) | |
1595 instruct loadV4(vecS dst, memory mem) %{ | |
1596 predicate(n->as_LoadVector()->memory_size() == 4); | |
1597 match(Set dst (LoadVector mem)); | |
1598 ins_cost(125); | |
1599 format %{ "movd $dst,$mem\t! load vector (4 bytes)" %} | |
1600 ins_encode %{ | |
1601 __ movdl($dst$$XMMRegister, $mem$$Address); | |
1602 %} | |
1603 ins_pipe( pipe_slow ); | |
1604 %} | |
1605 | |
1606 // Load vectors (8 bytes long) | |
1607 instruct loadV8(vecD dst, memory mem) %{ | |
1608 predicate(n->as_LoadVector()->memory_size() == 8); | |
1609 match(Set dst (LoadVector mem)); | |
1610 ins_cost(125); | |
1611 format %{ "movq $dst,$mem\t! load vector (8 bytes)" %} | |
1612 ins_encode %{ | |
1613 __ movq($dst$$XMMRegister, $mem$$Address); | |
1614 %} | |
1615 ins_pipe( pipe_slow ); | |
1616 %} | |
1617 | |
1618 // Load vectors (16 bytes long) | |
1619 instruct loadV16(vecX dst, memory mem) %{ | |
1620 predicate(n->as_LoadVector()->memory_size() == 16); | |
1621 match(Set dst (LoadVector mem)); | |
1622 ins_cost(125); | |
1623 format %{ "movdqu $dst,$mem\t! load vector (16 bytes)" %} | |
1624 ins_encode %{ | |
1625 __ movdqu($dst$$XMMRegister, $mem$$Address); | |
1626 %} | |
1627 ins_pipe( pipe_slow ); | |
1628 %} | |
1629 | |
1630 // Load vectors (32 bytes long) | |
1631 instruct loadV32(vecY dst, memory mem) %{ | |
1632 predicate(n->as_LoadVector()->memory_size() == 32); | |
1633 match(Set dst (LoadVector mem)); | |
1634 ins_cost(125); | |
1635 format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %} | |
1636 ins_encode %{ | |
1637 __ vmovdqu($dst$$XMMRegister, $mem$$Address); | |
1638 %} | |
1639 ins_pipe( pipe_slow ); | |
1640 %} | |
1641 | |
1642 // Store vectors | |
1643 instruct storeV4(memory mem, vecS src) %{ | |
1644 predicate(n->as_StoreVector()->memory_size() == 4); | |
1645 match(Set mem (StoreVector mem src)); | |
1646 ins_cost(145); | |
1647 format %{ "movd $mem,$src\t! store vector (4 bytes)" %} | |
1648 ins_encode %{ | |
1649 __ movdl($mem$$Address, $src$$XMMRegister); | |
1650 %} | |
1651 ins_pipe( pipe_slow ); | |
1652 %} | |
1653 | |
1654 instruct storeV8(memory mem, vecD src) %{ | |
1655 predicate(n->as_StoreVector()->memory_size() == 8); | |
1656 match(Set mem (StoreVector mem src)); | |
1657 ins_cost(145); | |
1658 format %{ "movq $mem,$src\t! store vector (8 bytes)" %} | |
1659 ins_encode %{ | |
1660 __ movq($mem$$Address, $src$$XMMRegister); | |
1661 %} | |
1662 ins_pipe( pipe_slow ); | |
1663 %} | |
1664 | |
1665 instruct storeV16(memory mem, vecX src) %{ | |
1666 predicate(n->as_StoreVector()->memory_size() == 16); | |
1667 match(Set mem (StoreVector mem src)); | |
1668 ins_cost(145); | |
1669 format %{ "movdqu $mem,$src\t! store vector (16 bytes)" %} | |
1670 ins_encode %{ | |
1671 __ movdqu($mem$$Address, $src$$XMMRegister); | |
1672 %} | |
1673 ins_pipe( pipe_slow ); | |
1674 %} | |
1675 | |
1676 instruct storeV32(memory mem, vecY src) %{ | |
1677 predicate(n->as_StoreVector()->memory_size() == 32); | |
1678 match(Set mem (StoreVector mem src)); | |
1679 ins_cost(145); | |
1680 format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %} | |
1681 ins_encode %{ | |
1682 __ vmovdqu($mem$$Address, $src$$XMMRegister); | |
1683 %} | |
1684 ins_pipe( pipe_slow ); | |
1685 %} | |
1686 | |
1687 // Replicate byte scalar to be vector | |
1688 instruct Repl4B(vecS dst, rRegI src) %{ | |
1689 predicate(n->as_Vector()->length() == 4); | |
1690 match(Set dst (ReplicateB src)); | |
1691 format %{ "movd $dst,$src\n\t" | |
1692 "punpcklbw $dst,$dst\n\t" | |
1693 "pshuflw $dst,$dst,0x00\t! replicate4B" %} | |
1694 ins_encode %{ | |
1695 __ movdl($dst$$XMMRegister, $src$$Register); | |
1696 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); | |
1697 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); | |
1698 %} | |
1699 ins_pipe( pipe_slow ); | |
1700 %} | |
1701 | |
1702 instruct Repl8B(vecD dst, rRegI src) %{ | |
1703 predicate(n->as_Vector()->length() == 8); | |
1704 match(Set dst (ReplicateB src)); | |
1705 format %{ "movd $dst,$src\n\t" | |
1706 "punpcklbw $dst,$dst\n\t" | |
1707 "pshuflw $dst,$dst,0x00\t! replicate8B" %} | |
1708 ins_encode %{ | |
1709 __ movdl($dst$$XMMRegister, $src$$Register); | |
1710 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); | |
1711 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); | |
1712 %} | |
1713 ins_pipe( pipe_slow ); | |
1714 %} | |
1715 | |
1716 instruct Repl16B(vecX dst, rRegI src) %{ | |
1717 predicate(n->as_Vector()->length() == 16); | |
1718 match(Set dst (ReplicateB src)); | |
1719 format %{ "movd $dst,$src\n\t" | |
1720 "punpcklbw $dst,$dst\n\t" | |
1721 "pshuflw $dst,$dst,0x00\n\t" | |
1722 "movlhps $dst,$dst\t! replicate16B" %} | |
1723 ins_encode %{ | |
1724 __ movdl($dst$$XMMRegister, $src$$Register); | |
1725 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); | |
1726 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); | |
1727 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); | |
1728 %} | |
1729 ins_pipe( pipe_slow ); | |
1730 %} | |
1731 | |
1732 instruct Repl32B(vecY dst, rRegI src) %{ | |
1733 predicate(n->as_Vector()->length() == 32); | |
1734 match(Set dst (ReplicateB src)); | |
1735 format %{ "movd $dst,$src\n\t" | |
1736 "punpcklbw $dst,$dst\n\t" | |
1737 "pshuflw $dst,$dst,0x00\n\t" | |
1738 "movlhps $dst,$dst\n\t" | |
1739 "vinsertf128h $dst,$dst,$dst\t! replicate32B" %} | |
1740 ins_encode %{ | |
1741 __ movdl($dst$$XMMRegister, $src$$Register); | |
1742 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); | |
1743 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); | |
1744 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); | |
1745 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); | |
1746 %} | |
1747 ins_pipe( pipe_slow ); | |
1748 %} | |
1749 | |
1750 // Replicate byte scalar immediate to be vector by loading from const table. | |
1751 instruct Repl4B_imm(vecS dst, immI con) %{ | |
1752 predicate(n->as_Vector()->length() == 4); | |
1753 match(Set dst (ReplicateB con)); | |
1754 format %{ "movss $dst,[$constantaddress]\t! replicate4B($con)" %} | |
1755 ins_encode %{ | |
1756 __ movflt($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1))); | |
1757 %} | |
1758 ins_pipe( pipe_slow ); | |
1759 %} | |
1760 | |
1761 instruct Repl8B_imm(vecD dst, immI con) %{ | |
1762 predicate(n->as_Vector()->length() == 8); | |
1763 match(Set dst (ReplicateB con)); | |
1764 format %{ "movsd $dst,[$constantaddress]\t! replicate8B($con)" %} | |
1765 ins_encode %{ | |
1766 __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); | |
1767 %} | |
1768 ins_pipe( pipe_slow ); | |
1769 %} | |
1770 | |
1771 instruct Repl16B_imm(vecX dst, immI con) %{ | |
1772 predicate(n->as_Vector()->length() == 16); | |
1773 match(Set dst (ReplicateB con)); | |
1774 format %{ "movsd $dst,[$constantaddress]\t! replicate16B($con)\n\t" | |
1775 "movlhps $dst,$dst" %} | |
1776 ins_encode %{ | |
1777 __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); | |
1778 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); | |
1779 %} | |
1780 ins_pipe( pipe_slow ); | |
1781 %} | |
1782 | |
1783 instruct Repl32B_imm(vecY dst, immI con) %{ | |
1784 predicate(n->as_Vector()->length() == 32); | |
1785 match(Set dst (ReplicateB con)); | |
1786 format %{ "movsd $dst,[$constantaddress]\t! lreplicate32B($con)\n\t" | |
1787 "movlhps $dst,$dst\n\t" | |
1788 "vinsertf128h $dst,$dst,$dst" %} | |
1789 ins_encode %{ | |
1790 __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); | |
1791 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); | |
1792 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); | |
1793 %} | |
1794 ins_pipe( pipe_slow ); | |
1795 %} | |
1796 | |
1797 // Replicate byte scalar zero to be vector | |
1798 instruct Repl4B_zero(vecS dst, immI0 zero) %{ | |
1799 predicate(n->as_Vector()->length() == 4); | |
1800 match(Set dst (ReplicateB zero)); | |
1801 format %{ "pxor $dst,$dst\t! replicate4B zero" %} | |
1802 ins_encode %{ | |
1803 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); | |
1804 %} | |
1805 ins_pipe( fpu_reg_reg ); | |
1806 %} | |
1807 | |
1808 instruct Repl8B_zero(vecD dst, immI0 zero) %{ | |
1809 predicate(n->as_Vector()->length() == 8); | |
1810 match(Set dst (ReplicateB zero)); | |
1811 format %{ "pxor $dst,$dst\t! replicate8B zero" %} | |
1812 ins_encode %{ | |
1813 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); | |
1814 %} | |
1815 ins_pipe( fpu_reg_reg ); | |
1816 %} | |
1817 | |
1818 instruct Repl16B_zero(vecX dst, immI0 zero) %{ | |
1819 predicate(n->as_Vector()->length() == 16); | |
1820 match(Set dst (ReplicateB zero)); | |
1821 format %{ "pxor $dst,$dst\t! replicate16B zero" %} | |
1822 ins_encode %{ | |
1823 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); | |
1824 %} | |
1825 ins_pipe( fpu_reg_reg ); | |
1826 %} | |
1827 | |
1828 instruct Repl32B_zero(vecY dst, immI0 zero) %{ | |
1829 predicate(n->as_Vector()->length() == 32); | |
1830 match(Set dst (ReplicateB zero)); | |
1831 format %{ "vxorpd $dst,$dst,$dst\t! replicate32B zero" %} | |
1832 ins_encode %{ | |
1833 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). | |
1834 bool vector256 = true; | |
1835 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); | |
1836 %} | |
1837 ins_pipe( fpu_reg_reg ); | |
1838 %} | |
1839 | |
1840 // Replicate char/short (2 byte) scalar to be vector | |
1841 instruct Repl2S(vecS dst, rRegI src) %{ | |
1842 predicate(n->as_Vector()->length() == 2); | |
1843 match(Set dst (ReplicateS src)); | |
1844 format %{ "movd $dst,$src\n\t" | |
1845 "pshuflw $dst,$dst,0x00\t! replicate2S" %} | |
1846 ins_encode %{ | |
1847 __ movdl($dst$$XMMRegister, $src$$Register); | |
1848 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); | |
1849 %} | |
1850 ins_pipe( fpu_reg_reg ); | |
1851 %} | |
1852 | |
1853 instruct Repl4S(vecD dst, rRegI src) %{ | |
1854 predicate(n->as_Vector()->length() == 4); | |
1855 match(Set dst (ReplicateS src)); | |
1856 format %{ "movd $dst,$src\n\t" | |
1857 "pshuflw $dst,$dst,0x00\t! replicate4S" %} | |
1858 ins_encode %{ | |
1859 __ movdl($dst$$XMMRegister, $src$$Register); | |
1860 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); | |
1861 %} | |
1862 ins_pipe( fpu_reg_reg ); | |
1863 %} | |
1864 | |
1865 instruct Repl8S(vecX dst, rRegI src) %{ | |
1866 predicate(n->as_Vector()->length() == 8); | |
1867 match(Set dst (ReplicateS src)); | |
1868 format %{ "movd $dst,$src\n\t" | |
1869 "pshuflw $dst,$dst,0x00\n\t" | |
1870 "movlhps $dst,$dst\t! replicate8S" %} | |
1871 ins_encode %{ | |
1872 __ movdl($dst$$XMMRegister, $src$$Register); | |
1873 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); | |
1874 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); | |
1875 %} | |
1876 ins_pipe( pipe_slow ); | |
1877 %} | |
1878 | |
1879 instruct Repl16S(vecY dst, rRegI src) %{ | |
1880 predicate(n->as_Vector()->length() == 16); | |
1881 match(Set dst (ReplicateS src)); | |
1882 format %{ "movd $dst,$src\n\t" | |
1883 "pshuflw $dst,$dst,0x00\n\t" | |
1884 "movlhps $dst,$dst\n\t" | |
1885 "vinsertf128h $dst,$dst,$dst\t! replicate16S" %} | |
1886 ins_encode %{ | |
1887 __ movdl($dst$$XMMRegister, $src$$Register); | |
1888 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); | |
1889 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); | |
1890 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); | |
1891 %} | |
1892 ins_pipe( pipe_slow ); | |
1893 %} | |
1894 | |
1895 // Replicate char/short (2 byte) scalar immediate to be vector by loading from const table. | |
1896 instruct Repl2S_imm(vecS dst, immI con) %{ | |
1897 predicate(n->as_Vector()->length() == 2); | |
1898 match(Set dst (ReplicateS con)); | |
1899 format %{ "movss $dst,[$constantaddress]\t! replicate2S($con)" %} | |
1900 ins_encode %{ | |
1901 __ movflt($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2))); | |
1902 %} | |
1903 ins_pipe( fpu_reg_reg ); | |
1904 %} | |
1905 | |
1906 instruct Repl4S_imm(vecD dst, immI con) %{ | |
1907 predicate(n->as_Vector()->length() == 4); | |
1908 match(Set dst (ReplicateS con)); | |
1909 format %{ "movsd $dst,[$constantaddress]\t! replicate4S($con)" %} | |
1910 ins_encode %{ | |
1911 __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); | |
1912 %} | |
1913 ins_pipe( fpu_reg_reg ); | |
1914 %} | |
1915 | |
1916 instruct Repl8S_imm(vecX dst, immI con) %{ | |
1917 predicate(n->as_Vector()->length() == 8); | |
1918 match(Set dst (ReplicateS con)); | |
1919 format %{ "movsd $dst,[$constantaddress]\t! replicate8S($con)\n\t" | |
1920 "movlhps $dst,$dst" %} | |
1921 ins_encode %{ | |
1922 __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); | |
1923 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); | |
1924 %} | |
1925 ins_pipe( pipe_slow ); | |
1926 %} | |
1927 | |
1928 instruct Repl16S_imm(vecY dst, immI con) %{ | |
1929 predicate(n->as_Vector()->length() == 16); | |
1930 match(Set dst (ReplicateS con)); | |
1931 format %{ "movsd $dst,[$constantaddress]\t! replicate16S($con)\n\t" | |
1932 "movlhps $dst,$dst\n\t" | |
1933 "vinsertf128h $dst,$dst,$dst" %} | |
1934 ins_encode %{ | |
1935 __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); | |
1936 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); | |
1937 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); | |
1938 %} | |
1939 ins_pipe( pipe_slow ); | |
1940 %} | |
1941 | |
1942 // Replicate char/short (2 byte) scalar zero to be vector | |
1943 instruct Repl2S_zero(vecS dst, immI0 zero) %{ | |
1944 predicate(n->as_Vector()->length() == 2); | |
1945 match(Set dst (ReplicateS zero)); | |
1946 format %{ "pxor $dst,$dst\t! replicate2S zero" %} | |
1947 ins_encode %{ | |
1948 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); | |
1949 %} | |
1950 ins_pipe( fpu_reg_reg ); | |
1951 %} | |
1952 | |
1953 instruct Repl4S_zero(vecD dst, immI0 zero) %{ | |
1954 predicate(n->as_Vector()->length() == 4); | |
1955 match(Set dst (ReplicateS zero)); | |
1956 format %{ "pxor $dst,$dst\t! replicate4S zero" %} | |
1957 ins_encode %{ | |
1958 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); | |
1959 %} | |
1960 ins_pipe( fpu_reg_reg ); | |
1961 %} | |
1962 | |
1963 instruct Repl8S_zero(vecX dst, immI0 zero) %{ | |
1964 predicate(n->as_Vector()->length() == 8); | |
1965 match(Set dst (ReplicateS zero)); | |
1966 format %{ "pxor $dst,$dst\t! replicate8S zero" %} | |
1967 ins_encode %{ | |
1968 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); | |
1969 %} | |
1970 ins_pipe( fpu_reg_reg ); | |
1971 %} | |
1972 | |
1973 instruct Repl16S_zero(vecY dst, immI0 zero) %{ | |
1974 predicate(n->as_Vector()->length() == 16); | |
1975 match(Set dst (ReplicateS zero)); | |
1976 format %{ "vxorpd $dst,$dst,$dst\t! replicate16S zero" %} | |
1977 ins_encode %{ | |
1978 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). | |
1979 bool vector256 = true; | |
1980 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); | |
1981 %} | |
1982 ins_pipe( fpu_reg_reg ); | |
1983 %} | |
1984 | |
1985 // Replicate integer (4 byte) scalar to be vector | |
1986 instruct Repl2I(vecD dst, rRegI src) %{ | |
1987 predicate(n->as_Vector()->length() == 2); | |
1988 match(Set dst (ReplicateI src)); | |
1989 format %{ "movd $dst,$src\n\t" | |
1990 "pshufd $dst,$dst,0x00\t! replicate2I" %} | |
1991 ins_encode %{ | |
1992 __ movdl($dst$$XMMRegister, $src$$Register); | |
1993 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); | |
1994 %} | |
1995 ins_pipe( fpu_reg_reg ); | |
1996 %} | |
1997 | |
1998 instruct Repl4I(vecX dst, rRegI src) %{ | |
1999 predicate(n->as_Vector()->length() == 4); | |
2000 match(Set dst (ReplicateI src)); | |
2001 format %{ "movd $dst,$src\n\t" | |
2002 "pshufd $dst,$dst,0x00\t! replicate4I" %} | |
2003 ins_encode %{ | |
2004 __ movdl($dst$$XMMRegister, $src$$Register); | |
2005 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); | |
2006 %} | |
2007 ins_pipe( pipe_slow ); | |
2008 %} | |
2009 | |
2010 instruct Repl8I(vecY dst, rRegI src) %{ | |
2011 predicate(n->as_Vector()->length() == 8); | |
2012 match(Set dst (ReplicateI src)); | |
2013 format %{ "movd $dst,$src\n\t" | |
2014 "pshufd $dst,$dst,0x00\n\t" | |
2015 "vinsertf128h $dst,$dst,$dst\t! replicate8I" %} | |
2016 ins_encode %{ | |
2017 __ movdl($dst$$XMMRegister, $src$$Register); | |
2018 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); | |
2019 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); | |
2020 %} | |
2021 ins_pipe( pipe_slow ); | |
2022 %} | |
2023 | |
2024 // Replicate integer (4 byte) scalar immediate to be vector by loading from const table. | |
2025 instruct Repl2I_imm(vecD dst, immI con) %{ | |
2026 predicate(n->as_Vector()->length() == 2); | |
2027 match(Set dst (ReplicateI con)); | |
2028 format %{ "movsd $dst,[$constantaddress]\t! replicate2I($con)" %} | |
2029 ins_encode %{ | |
2030 __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); | |
2031 %} | |
2032 ins_pipe( fpu_reg_reg ); | |
2033 %} | |
2034 | |
2035 instruct Repl4I_imm(vecX dst, immI con) %{ | |
2036 predicate(n->as_Vector()->length() == 4); | |
2037 match(Set dst (ReplicateI con)); | |
2038 format %{ "movsd $dst,[$constantaddress]\t! replicate4I($con)\n\t" | |
2039 "movlhps $dst,$dst" %} | |
2040 ins_encode %{ | |
2041 __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); | |
2042 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); | |
2043 %} | |
2044 ins_pipe( pipe_slow ); | |
2045 %} | |
2046 | |
2047 instruct Repl8I_imm(vecY dst, immI con) %{ | |
2048 predicate(n->as_Vector()->length() == 8); | |
2049 match(Set dst (ReplicateI con)); | |
2050 format %{ "movsd $dst,[$constantaddress]\t! replicate8I($con)\n\t" | |
2051 "movlhps $dst,$dst\n\t" | |
2052 "vinsertf128h $dst,$dst,$dst" %} | |
2053 ins_encode %{ | |
2054 __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); | |
2055 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); | |
2056 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); | |
2057 %} | |
2058 ins_pipe( pipe_slow ); | |
2059 %} | |
2060 | |
2061 // Integer could be loaded into xmm register directly from memory. | |
2062 instruct Repl2I_mem(vecD dst, memory mem) %{ | |
2063 predicate(n->as_Vector()->length() == 2); | |
2064 match(Set dst (ReplicateI mem)); | |
2065 format %{ "movd $dst,$mem\n\t" | |
2066 "pshufd $dst,$dst,0x00\t! replicate2I" %} | |
2067 ins_encode %{ | |
2068 __ movdl($dst$$XMMRegister, $mem$$Address); | |
2069 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); | |
2070 %} | |
2071 ins_pipe( fpu_reg_reg ); | |
2072 %} | |
2073 | |
2074 instruct Repl4I_mem(vecX dst, memory mem) %{ | |
2075 predicate(n->as_Vector()->length() == 4); | |
2076 match(Set dst (ReplicateI mem)); | |
2077 format %{ "movd $dst,$mem\n\t" | |
2078 "pshufd $dst,$dst,0x00\t! replicate4I" %} | |
2079 ins_encode %{ | |
2080 __ movdl($dst$$XMMRegister, $mem$$Address); | |
2081 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); | |
2082 %} | |
2083 ins_pipe( pipe_slow ); | |
2084 %} | |
2085 | |
2086 instruct Repl8I_mem(vecY dst, memory mem) %{ | |
2087 predicate(n->as_Vector()->length() == 8); | |
2088 match(Set dst (ReplicateI mem)); | |
2089 format %{ "movd $dst,$mem\n\t" | |
2090 "pshufd $dst,$dst,0x00\n\t" | |
2091 "vinsertf128h $dst,$dst,$dst\t! replicate8I" %} | |
2092 ins_encode %{ | |
2093 __ movdl($dst$$XMMRegister, $mem$$Address); | |
2094 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); | |
2095 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); | |
2096 %} | |
2097 ins_pipe( pipe_slow ); | |
2098 %} | |
2099 | |
2100 // Replicate integer (4 byte) scalar zero to be vector | |
2101 instruct Repl2I_zero(vecD dst, immI0 zero) %{ | |
2102 predicate(n->as_Vector()->length() == 2); | |
2103 match(Set dst (ReplicateI zero)); | |
2104 format %{ "pxor $dst,$dst\t! replicate2I" %} | |
2105 ins_encode %{ | |
2106 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); | |
2107 %} | |
2108 ins_pipe( fpu_reg_reg ); | |
2109 %} | |
2110 | |
2111 instruct Repl4I_zero(vecX dst, immI0 zero) %{ | |
2112 predicate(n->as_Vector()->length() == 4); | |
2113 match(Set dst (ReplicateI zero)); | |
2114 format %{ "pxor $dst,$dst\t! replicate4I zero)" %} | |
2115 ins_encode %{ | |
2116 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); | |
2117 %} | |
2118 ins_pipe( fpu_reg_reg ); | |
2119 %} | |
2120 | |
2121 instruct Repl8I_zero(vecY dst, immI0 zero) %{ | |
2122 predicate(n->as_Vector()->length() == 8); | |
2123 match(Set dst (ReplicateI zero)); | |
2124 format %{ "vxorpd $dst,$dst,$dst\t! replicate8I zero" %} | |
2125 ins_encode %{ | |
2126 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). | |
2127 bool vector256 = true; | |
2128 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); | |
2129 %} | |
2130 ins_pipe( fpu_reg_reg ); | |
2131 %} | |
2132 | |
2133 // Replicate long (8 byte) scalar to be vector | |
2134 #ifdef _LP64 | |
2135 instruct Repl2L(vecX dst, rRegL src) %{ | |
2136 predicate(n->as_Vector()->length() == 2); | |
2137 match(Set dst (ReplicateL src)); | |
2138 format %{ "movdq $dst,$src\n\t" | |
2139 "movlhps $dst,$dst\t! replicate2L" %} | |
2140 ins_encode %{ | |
2141 __ movdq($dst$$XMMRegister, $src$$Register); | |
2142 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); | |
2143 %} | |
2144 ins_pipe( pipe_slow ); | |
2145 %} | |
2146 | |
2147 instruct Repl4L(vecY dst, rRegL src) %{ | |
2148 predicate(n->as_Vector()->length() == 4); | |
2149 match(Set dst (ReplicateL src)); | |
2150 format %{ "movdq $dst,$src\n\t" | |
2151 "movlhps $dst,$dst\n\t" | |
2152 "vinsertf128h $dst,$dst,$dst\t! replicate4L" %} | |
2153 ins_encode %{ | |
2154 __ movdq($dst$$XMMRegister, $src$$Register); | |
2155 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); | |
2156 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); | |
2157 %} | |
2158 ins_pipe( pipe_slow ); | |
2159 %} | |
2160 #else // _LP64 | |
2161 instruct Repl2L(vecX dst, eRegL src, regD tmp) %{ | |
2162 predicate(n->as_Vector()->length() == 2); | |
2163 match(Set dst (ReplicateL src)); | |
2164 effect(TEMP dst, USE src, TEMP tmp); | |
2165 format %{ "movdl $dst,$src.lo\n\t" | |
2166 "movdl $tmp,$src.hi\n\t" | |
2167 "punpckldq $dst,$tmp\n\t" | |
2168 "movlhps $dst,$dst\t! replicate2L"%} | |
2169 ins_encode %{ | |
2170 __ movdl($dst$$XMMRegister, $src$$Register); | |
2171 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); | |
2172 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); | |
2173 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); | |
2174 %} | |
2175 ins_pipe( pipe_slow ); | |
2176 %} | |
2177 | |
2178 instruct Repl4L(vecY dst, eRegL src, regD tmp) %{ | |
2179 predicate(n->as_Vector()->length() == 4); | |
2180 match(Set dst (ReplicateL src)); | |
2181 effect(TEMP dst, USE src, TEMP tmp); | |
2182 format %{ "movdl $dst,$src.lo\n\t" | |
2183 "movdl $tmp,$src.hi\n\t" | |
2184 "punpckldq $dst,$tmp\n\t" | |
2185 "movlhps $dst,$dst\n\t" | |
2186 "vinsertf128h $dst,$dst,$dst\t! replicate4L" %} | |
2187 ins_encode %{ | |
2188 __ movdl($dst$$XMMRegister, $src$$Register); | |
2189 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); | |
2190 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); | |
2191 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); | |
2192 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); | |
2193 %} | |
2194 ins_pipe( pipe_slow ); | |
2195 %} | |
2196 #endif // _LP64 | |
2197 | |
2198 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. | |
2199 instruct Repl2L_imm(vecX dst, immL con) %{ | |
2200 predicate(n->as_Vector()->length() == 2); | |
2201 match(Set dst (ReplicateL con)); | |
2202 format %{ "movsd $dst,[$constantaddress]\t! replicate2L($con)\n\t" | |
2203 "movlhps $dst,$dst" %} | |
2204 ins_encode %{ | |
2205 __ movdbl($dst$$XMMRegister, $constantaddress($con)); | |
2206 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); | |
2207 %} | |
2208 ins_pipe( pipe_slow ); | |
2209 %} | |
2210 | |
2211 instruct Repl4L_imm(vecY dst, immL con) %{ | |
2212 predicate(n->as_Vector()->length() == 4); | |
2213 match(Set dst (ReplicateL con)); | |
2214 format %{ "movsd $dst,[$constantaddress]\t! replicate4L($con)\n\t" | |
2215 "movlhps $dst,$dst\n\t" | |
2216 "vinsertf128h $dst,$dst,$dst" %} | |
2217 ins_encode %{ | |
2218 __ movdbl($dst$$XMMRegister, $constantaddress($con)); | |
2219 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); | |
2220 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); | |
2221 %} | |
2222 ins_pipe( pipe_slow ); | |
2223 %} | |
2224 | |
2225 // Long could be loaded into xmm register directly from memory. | |
2226 instruct Repl2L_mem(vecX dst, memory mem) %{ | |
2227 predicate(n->as_Vector()->length() == 2); | |
2228 match(Set dst (ReplicateL mem)); | |
2229 format %{ "movq $dst,$mem\n\t" | |
2230 "movlhps $dst,$dst\t! replicate2L" %} | |
2231 ins_encode %{ | |
2232 __ movq($dst$$XMMRegister, $mem$$Address); | |
2233 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); | |
2234 %} | |
2235 ins_pipe( pipe_slow ); | |
2236 %} | |
2237 | |
2238 instruct Repl4L_mem(vecY dst, memory mem) %{ | |
2239 predicate(n->as_Vector()->length() == 4); | |
2240 match(Set dst (ReplicateL mem)); | |
2241 format %{ "movq $dst,$mem\n\t" | |
2242 "movlhps $dst,$dst\n\t" | |
2243 "vinsertf128h $dst,$dst,$dst\t! replicate4L" %} | |
2244 ins_encode %{ | |
2245 __ movq($dst$$XMMRegister, $mem$$Address); | |
2246 __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); | |
2247 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); | |
2248 %} | |
2249 ins_pipe( pipe_slow ); | |
2250 %} | |
2251 | |
2252 // Replicate long (8 byte) scalar zero to be vector | |
2253 instruct Repl2L_zero(vecX dst, immL0 zero) %{ | |
2254 predicate(n->as_Vector()->length() == 2); | |
2255 match(Set dst (ReplicateL zero)); | |
2256 format %{ "pxor $dst,$dst\t! replicate2L zero" %} | |
2257 ins_encode %{ | |
2258 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); | |
2259 %} | |
2260 ins_pipe( fpu_reg_reg ); | |
2261 %} | |
2262 | |
2263 instruct Repl4L_zero(vecY dst, immL0 zero) %{ | |
2264 predicate(n->as_Vector()->length() == 4); | |
2265 match(Set dst (ReplicateL zero)); | |
2266 format %{ "vxorpd $dst,$dst,$dst\t! replicate4L zero" %} | |
2267 ins_encode %{ | |
2268 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). | |
2269 bool vector256 = true; | |
2270 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); | |
2271 %} | |
2272 ins_pipe( fpu_reg_reg ); | |
2273 %} | |
2274 | |
2275 // Replicate float (4 byte) scalar to be vector | |
2276 instruct Repl2F(vecD dst, regF src) %{ | |
2277 predicate(n->as_Vector()->length() == 2); | |
2278 match(Set dst (ReplicateF src)); | |
2279 format %{ "pshufd $dst,$dst,0x00\t! replicate2F" %} | |
2280 ins_encode %{ | |
2281 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); | |
2282 %} | |
2283 ins_pipe( fpu_reg_reg ); | |
2284 %} | |
2285 | |
2286 instruct Repl4F(vecX dst, regF src) %{ | |
2287 predicate(n->as_Vector()->length() == 4); | |
2288 match(Set dst (ReplicateF src)); | |
2289 format %{ "pshufd $dst,$dst,0x00\t! replicate4F" %} | |
2290 ins_encode %{ | |
2291 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); | |
2292 %} | |
2293 ins_pipe( pipe_slow ); | |
2294 %} | |
2295 | |
2296 instruct Repl8F(vecY dst, regF src) %{ | |
2297 predicate(n->as_Vector()->length() == 8); | |
2298 match(Set dst (ReplicateF src)); | |
2299 format %{ "pshufd $dst,$src,0x00\n\t" | |
2300 "vinsertf128h $dst,$dst,$dst\t! replicate8F" %} | |
2301 ins_encode %{ | |
2302 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); | |
2303 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); | |
2304 %} | |
2305 ins_pipe( pipe_slow ); | |
2306 %} | |
2307 | |
2308 // Replicate float (4 byte) scalar zero to be vector | |
2309 instruct Repl2F_zero(vecD dst, immF0 zero) %{ | |
2310 predicate(n->as_Vector()->length() == 2); | |
2311 match(Set dst (ReplicateF zero)); | |
2312 format %{ "xorps $dst,$dst\t! replicate2F zero" %} | |
2313 ins_encode %{ | |
2314 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); | |
2315 %} | |
2316 ins_pipe( fpu_reg_reg ); | |
2317 %} | |
2318 | |
2319 instruct Repl4F_zero(vecX dst, immF0 zero) %{ | |
2320 predicate(n->as_Vector()->length() == 4); | |
2321 match(Set dst (ReplicateF zero)); | |
2322 format %{ "xorps $dst,$dst\t! replicate4F zero" %} | |
2323 ins_encode %{ | |
2324 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); | |
2325 %} | |
2326 ins_pipe( fpu_reg_reg ); | |
2327 %} | |
2328 | |
2329 instruct Repl8F_zero(vecY dst, immF0 zero) %{ | |
2330 predicate(n->as_Vector()->length() == 8); | |
2331 match(Set dst (ReplicateF zero)); | |
2332 format %{ "vxorps $dst,$dst,$dst\t! replicate8F zero" %} | |
2333 ins_encode %{ | |
2334 bool vector256 = true; | |
2335 __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); | |
2336 %} | |
2337 ins_pipe( fpu_reg_reg ); | |
2338 %} | |
2339 | |
2340 // Replicate double (8 bytes) scalar to be vector | |
2341 instruct Repl2D(vecX dst, regD src) %{ | |
2342 predicate(n->as_Vector()->length() == 2); | |
2343 match(Set dst (ReplicateD src)); | |
2344 format %{ "pshufd $dst,$src,0x44\t! replicate2D" %} | |
2345 ins_encode %{ | |
2346 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); | |
2347 %} | |
2348 ins_pipe( pipe_slow ); | |
2349 %} | |
2350 | |
2351 instruct Repl4D(vecY dst, regD src) %{ | |
2352 predicate(n->as_Vector()->length() == 4); | |
2353 match(Set dst (ReplicateD src)); | |
2354 format %{ "pshufd $dst,$src,0x44\n\t" | |
2355 "vinsertf128h $dst,$dst,$dst\t! replicate4D" %} | |
2356 ins_encode %{ | |
2357 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); | |
2358 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); | |
2359 %} | |
2360 ins_pipe( pipe_slow ); | |
2361 %} | |
2362 | |
2363 // Replicate double (8 byte) scalar zero to be vector | |
2364 instruct Repl2D_zero(vecX dst, immD0 zero) %{ | |
2365 predicate(n->as_Vector()->length() == 2); | |
2366 match(Set dst (ReplicateD zero)); | |
2367 format %{ "xorpd $dst,$dst\t! replicate2D zero" %} | |
2368 ins_encode %{ | |
2369 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister); | |
2370 %} | |
2371 ins_pipe( fpu_reg_reg ); | |
2372 %} | |
2373 | |
2374 instruct Repl4D_zero(vecY dst, immD0 zero) %{ | |
2375 predicate(n->as_Vector()->length() == 4); | |
2376 match(Set dst (ReplicateD zero)); | |
2377 format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %} | |
2378 ins_encode %{ | |
2379 bool vector256 = true; | |
2380 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); | |
2381 %} | |
2382 ins_pipe( fpu_reg_reg ); | |
2383 %} | |
2384 |