Mercurial > hg > truffle
annotate src/cpu/x86/vm/assembler_x86.hpp @ 17716:cdb71841f4bc
6498581: ThreadInterruptTest3 produces wrong output on Windows
Summary: There is race condition between os::interrupt and os::is_interrupted on Windows. In JVM_Sleep(Thread.sleep), check if thread gets interrupted, it may see interrupted but not really interrupted so cause spurious waking up (early return from sleep). Fix by checking if interrupt event really gets set thus prevent false return. For intrinsic of _isInterrupted, on Windows, go fastpath only on bit not set.
Reviewed-by: acorn, kvn
Contributed-by: david.holmes@oracle.com, yumin.qi@oracle.com
author | minqi |
---|---|
date | Wed, 26 Feb 2014 15:20:41 -0800 |
parents | 59e8ad757e19 |
children | d49f00604347 9e9af3aa4278 |
rev | line source |
---|---|
0 | 1 /* |
7951 | 2 * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. |
0 | 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 * | |
5 * This code is free software; you can redistribute it and/or modify it | |
6 * under the terms of the GNU General Public License version 2 only, as | |
7 * published by the Free Software Foundation. | |
8 * | |
9 * This code is distributed in the hope that it will be useful, but WITHOUT | |
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
12 * version 2 for more details (a copy is included in the LICENSE file that | |
13 * accompanied this code). | |
14 * | |
15 * You should have received a copy of the GNU General Public License version | |
16 * 2 along with this work; if not, write to the Free Software Foundation, | |
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | |
18 * | |
1552
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1503
diff
changeset
|
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1503
diff
changeset
|
20 * or visit www.oracle.com if you need additional information or have any |
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1503
diff
changeset
|
21 * questions. |
0 | 22 * |
23 */ | |
24 | |
1972 | 25 #ifndef CPU_X86_VM_ASSEMBLER_X86_HPP |
26 #define CPU_X86_VM_ASSEMBLER_X86_HPP | |
27 | |
7199
cd3d6a6b95d9
8003240: x86: move MacroAssembler into separate file
twisti
parents:
7198
diff
changeset
|
28 #include "asm/register.hpp" |
cd3d6a6b95d9
8003240: x86: move MacroAssembler into separate file
twisti
parents:
7198
diff
changeset
|
29 |
0 | 30 class BiasedLockingCounters; |
31 | |
32 // Contains all the definitions needed for x86 assembly code generation. | |
33 | |
34 // Calling convention | |
35 class Argument VALUE_OBJ_CLASS_SPEC { | |
36 public: | |
37 enum { | |
38 #ifdef _LP64 | |
39 #ifdef _WIN64 | |
40 n_int_register_parameters_c = 4, // rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...) | |
41 n_float_register_parameters_c = 4, // xmm0 - xmm3 (c_farg0, c_farg1, ... ) | |
42 #else | |
43 n_int_register_parameters_c = 6, // rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...) | |
44 n_float_register_parameters_c = 8, // xmm0 - xmm7 (c_farg0, c_farg1, ... ) | |
45 #endif // _WIN64 | |
46 n_int_register_parameters_j = 6, // j_rarg0, j_rarg1, ... | |
47 n_float_register_parameters_j = 8 // j_farg0, j_farg1, ... | |
48 #else | |
49 n_register_parameters = 0 // 0 registers used to pass arguments | |
50 #endif // _LP64 | |
51 }; | |
52 }; | |
53 | |
54 | |
55 #ifdef _LP64 | |
56 // Symbolically name the register arguments used by the c calling convention. | |
57 // Windows is different from linux/solaris. So much for standards... | |
58 | |
59 #ifdef _WIN64 | |
60 | |
61 REGISTER_DECLARATION(Register, c_rarg0, rcx); | |
62 REGISTER_DECLARATION(Register, c_rarg1, rdx); | |
63 REGISTER_DECLARATION(Register, c_rarg2, r8); | |
64 REGISTER_DECLARATION(Register, c_rarg3, r9); | |
65 | |
304 | 66 REGISTER_DECLARATION(XMMRegister, c_farg0, xmm0); |
67 REGISTER_DECLARATION(XMMRegister, c_farg1, xmm1); | |
68 REGISTER_DECLARATION(XMMRegister, c_farg2, xmm2); | |
69 REGISTER_DECLARATION(XMMRegister, c_farg3, xmm3); | |
0 | 70 |
71 #else | |
72 | |
73 REGISTER_DECLARATION(Register, c_rarg0, rdi); | |
74 REGISTER_DECLARATION(Register, c_rarg1, rsi); | |
75 REGISTER_DECLARATION(Register, c_rarg2, rdx); | |
76 REGISTER_DECLARATION(Register, c_rarg3, rcx); | |
77 REGISTER_DECLARATION(Register, c_rarg4, r8); | |
78 REGISTER_DECLARATION(Register, c_rarg5, r9); | |
79 | |
304 | 80 REGISTER_DECLARATION(XMMRegister, c_farg0, xmm0); |
81 REGISTER_DECLARATION(XMMRegister, c_farg1, xmm1); | |
82 REGISTER_DECLARATION(XMMRegister, c_farg2, xmm2); | |
83 REGISTER_DECLARATION(XMMRegister, c_farg3, xmm3); | |
84 REGISTER_DECLARATION(XMMRegister, c_farg4, xmm4); | |
85 REGISTER_DECLARATION(XMMRegister, c_farg5, xmm5); | |
86 REGISTER_DECLARATION(XMMRegister, c_farg6, xmm6); | |
87 REGISTER_DECLARATION(XMMRegister, c_farg7, xmm7); | |
0 | 88 |
89 #endif // _WIN64 | |
90 | |
91 // Symbolically name the register arguments used by the Java calling convention. | |
92 // We have control over the convention for java so we can do what we please. | |
93 // What pleases us is to offset the java calling convention so that when | |
94 // we call a suitable jni method the arguments are lined up and we don't | |
95 // have to do little shuffling. A suitable jni method is non-static and a | |
96 // small number of arguments (two fewer args on windows) | |
97 // | |
98 // |-------------------------------------------------------| | |
99 // | c_rarg0 c_rarg1 c_rarg2 c_rarg3 c_rarg4 c_rarg5 | | |
100 // |-------------------------------------------------------| | |
101 // | rcx rdx r8 r9 rdi* rsi* | windows (* not a c_rarg) | |
102 // | rdi rsi rdx rcx r8 r9 | solaris/linux | |
103 // |-------------------------------------------------------| | |
104 // | j_rarg5 j_rarg0 j_rarg1 j_rarg2 j_rarg3 j_rarg4 | | |
105 // |-------------------------------------------------------| | |
106 | |
107 REGISTER_DECLARATION(Register, j_rarg0, c_rarg1); | |
108 REGISTER_DECLARATION(Register, j_rarg1, c_rarg2); | |
109 REGISTER_DECLARATION(Register, j_rarg2, c_rarg3); | |
110 // Windows runs out of register args here | |
111 #ifdef _WIN64 | |
112 REGISTER_DECLARATION(Register, j_rarg3, rdi); | |
113 REGISTER_DECLARATION(Register, j_rarg4, rsi); | |
114 #else | |
115 REGISTER_DECLARATION(Register, j_rarg3, c_rarg4); | |
116 REGISTER_DECLARATION(Register, j_rarg4, c_rarg5); | |
117 #endif /* _WIN64 */ | |
118 REGISTER_DECLARATION(Register, j_rarg5, c_rarg0); | |
119 | |
304 | 120 REGISTER_DECLARATION(XMMRegister, j_farg0, xmm0); |
121 REGISTER_DECLARATION(XMMRegister, j_farg1, xmm1); | |
122 REGISTER_DECLARATION(XMMRegister, j_farg2, xmm2); | |
123 REGISTER_DECLARATION(XMMRegister, j_farg3, xmm3); | |
124 REGISTER_DECLARATION(XMMRegister, j_farg4, xmm4); | |
125 REGISTER_DECLARATION(XMMRegister, j_farg5, xmm5); | |
126 REGISTER_DECLARATION(XMMRegister, j_farg6, xmm6); | |
127 REGISTER_DECLARATION(XMMRegister, j_farg7, xmm7); | |
0 | 128 |
129 REGISTER_DECLARATION(Register, rscratch1, r10); // volatile | |
130 REGISTER_DECLARATION(Register, rscratch2, r11); // volatile | |
131 | |
304 | 132 REGISTER_DECLARATION(Register, r12_heapbase, r12); // callee-saved |
0 | 133 REGISTER_DECLARATION(Register, r15_thread, r15); // callee-saved |
134 | |
304 | 135 #else |
136 // rscratch1 will apear in 32bit code that is dead but of course must compile | |
137 // Using noreg ensures if the dead code is incorrectly live and executed it | |
138 // will cause an assertion failure | |
139 #define rscratch1 noreg | |
2002 | 140 #define rscratch2 noreg |
304 | 141 |
0 | 142 #endif // _LP64 |
143 | |
1564 | 144 // JSR 292 fixed register usages: |
145 REGISTER_DECLARATION(Register, rbp_mh_SP_save, rbp); | |
146 | |
0 | 147 // Address is an abstraction used to represent a memory location |
148 // using any of the amd64 addressing modes with one object. | |
149 // | |
150 // Note: A register location is represented via a Register, not | |
151 // via an address for efficiency & simplicity reasons. | |
152 | |
153 class ArrayAddress; | |
154 | |
155 class Address VALUE_OBJ_CLASS_SPEC { | |
156 public: | |
157 enum ScaleFactor { | |
158 no_scale = -1, | |
159 times_1 = 0, | |
160 times_2 = 1, | |
161 times_4 = 2, | |
304 | 162 times_8 = 3, |
163 times_ptr = LP64_ONLY(times_8) NOT_LP64(times_4) | |
0 | 164 }; |
622
56aae7be60d4
6812678: macro assembler needs delayed binding of a few constants (for 6655638)
jrose
parents:
420
diff
changeset
|
165 static ScaleFactor times(int size) { |
56aae7be60d4
6812678: macro assembler needs delayed binding of a few constants (for 6655638)
jrose
parents:
420
diff
changeset
|
166 assert(size >= 1 && size <= 8 && is_power_of_2(size), "bad scale size"); |
56aae7be60d4
6812678: macro assembler needs delayed binding of a few constants (for 6655638)
jrose
parents:
420
diff
changeset
|
167 if (size == 8) return times_8; |
56aae7be60d4
6812678: macro assembler needs delayed binding of a few constants (for 6655638)
jrose
parents:
420
diff
changeset
|
168 if (size == 4) return times_4; |
56aae7be60d4
6812678: macro assembler needs delayed binding of a few constants (for 6655638)
jrose
parents:
420
diff
changeset
|
169 if (size == 2) return times_2; |
56aae7be60d4
6812678: macro assembler needs delayed binding of a few constants (for 6655638)
jrose
parents:
420
diff
changeset
|
170 return times_1; |
56aae7be60d4
6812678: macro assembler needs delayed binding of a few constants (for 6655638)
jrose
parents:
420
diff
changeset
|
171 } |
56aae7be60d4
6812678: macro assembler needs delayed binding of a few constants (for 6655638)
jrose
parents:
420
diff
changeset
|
172 static int scale_size(ScaleFactor scale) { |
56aae7be60d4
6812678: macro assembler needs delayed binding of a few constants (for 6655638)
jrose
parents:
420
diff
changeset
|
173 assert(scale != no_scale, ""); |
56aae7be60d4
6812678: macro assembler needs delayed binding of a few constants (for 6655638)
jrose
parents:
420
diff
changeset
|
174 assert(((1 << (int)times_1) == 1 && |
56aae7be60d4
6812678: macro assembler needs delayed binding of a few constants (for 6655638)
jrose
parents:
420
diff
changeset
|
175 (1 << (int)times_2) == 2 && |
56aae7be60d4
6812678: macro assembler needs delayed binding of a few constants (for 6655638)
jrose
parents:
420
diff
changeset
|
176 (1 << (int)times_4) == 4 && |
56aae7be60d4
6812678: macro assembler needs delayed binding of a few constants (for 6655638)
jrose
parents:
420
diff
changeset
|
177 (1 << (int)times_8) == 8), ""); |
56aae7be60d4
6812678: macro assembler needs delayed binding of a few constants (for 6655638)
jrose
parents:
420
diff
changeset
|
178 return (1 << (int)scale); |
56aae7be60d4
6812678: macro assembler needs delayed binding of a few constants (for 6655638)
jrose
parents:
420
diff
changeset
|
179 } |
0 | 180 |
181 private: | |
182 Register _base; | |
183 Register _index; | |
184 ScaleFactor _scale; | |
185 int _disp; | |
186 RelocationHolder _rspec; | |
187 | |
304 | 188 // Easily misused constructors make them private |
189 // %%% can we make these go away? | |
190 NOT_LP64(Address(address loc, RelocationHolder spec);) | |
191 Address(int disp, address loc, relocInfo::relocType rtype); | |
192 Address(int disp, address loc, RelocationHolder spec); | |
0 | 193 |
194 public: | |
304 | 195 |
196 int disp() { return _disp; } | |
0 | 197 // creation |
198 Address() | |
199 : _base(noreg), | |
200 _index(noreg), | |
201 _scale(no_scale), | |
202 _disp(0) { | |
203 } | |
204 | |
205 // No default displacement otherwise Register can be implicitly | |
206 // converted to 0(Register) which is quite a different animal. | |
207 | |
208 Address(Register base, int disp) | |
209 : _base(base), | |
210 _index(noreg), | |
211 _scale(no_scale), | |
212 _disp(disp) { | |
213 } | |
214 | |
215 Address(Register base, Register index, ScaleFactor scale, int disp = 0) | |
216 : _base (base), | |
217 _index(index), | |
218 _scale(scale), | |
219 _disp (disp) { | |
220 assert(!index->is_valid() == (scale == Address::no_scale), | |
221 "inconsistent address"); | |
222 } | |
223 | |
665
c89f86385056
6814659: separable cleanups and subroutines for 6655638
jrose
parents:
644
diff
changeset
|
224 Address(Register base, RegisterOrConstant index, ScaleFactor scale = times_1, int disp = 0) |
622
56aae7be60d4
6812678: macro assembler needs delayed binding of a few constants (for 6655638)
jrose
parents:
420
diff
changeset
|
225 : _base (base), |
56aae7be60d4
6812678: macro assembler needs delayed binding of a few constants (for 6655638)
jrose
parents:
420
diff
changeset
|
226 _index(index.register_or_noreg()), |
56aae7be60d4
6812678: macro assembler needs delayed binding of a few constants (for 6655638)
jrose
parents:
420
diff
changeset
|
227 _scale(scale), |
56aae7be60d4
6812678: macro assembler needs delayed binding of a few constants (for 6655638)
jrose
parents:
420
diff
changeset
|
228 _disp (disp + (index.constant_or_zero() * scale_size(scale))) { |
56aae7be60d4
6812678: macro assembler needs delayed binding of a few constants (for 6655638)
jrose
parents:
420
diff
changeset
|
229 if (!index.is_register()) scale = Address::no_scale; |
56aae7be60d4
6812678: macro assembler needs delayed binding of a few constants (for 6655638)
jrose
parents:
420
diff
changeset
|
230 assert(!_index->is_valid() == (scale == Address::no_scale), |
56aae7be60d4
6812678: macro assembler needs delayed binding of a few constants (for 6655638)
jrose
parents:
420
diff
changeset
|
231 "inconsistent address"); |
56aae7be60d4
6812678: macro assembler needs delayed binding of a few constants (for 6655638)
jrose
parents:
420
diff
changeset
|
232 } |
56aae7be60d4
6812678: macro assembler needs delayed binding of a few constants (for 6655638)
jrose
parents:
420
diff
changeset
|
233 |
56aae7be60d4
6812678: macro assembler needs delayed binding of a few constants (for 6655638)
jrose
parents:
420
diff
changeset
|
234 Address plus_disp(int disp) const { |
56aae7be60d4
6812678: macro assembler needs delayed binding of a few constants (for 6655638)
jrose
parents:
420
diff
changeset
|
235 Address a = (*this); |
56aae7be60d4
6812678: macro assembler needs delayed binding of a few constants (for 6655638)
jrose
parents:
420
diff
changeset
|
236 a._disp += disp; |
56aae7be60d4
6812678: macro assembler needs delayed binding of a few constants (for 6655638)
jrose
parents:
420
diff
changeset
|
237 return a; |
56aae7be60d4
6812678: macro assembler needs delayed binding of a few constants (for 6655638)
jrose
parents:
420
diff
changeset
|
238 } |
3363
167b70ff3abc
6939861: JVM should handle more conversion operations
never
parents:
3336
diff
changeset
|
239 Address plus_disp(RegisterOrConstant disp, ScaleFactor scale = times_1) const { |
167b70ff3abc
6939861: JVM should handle more conversion operations
never
parents:
3336
diff
changeset
|
240 Address a = (*this); |
167b70ff3abc
6939861: JVM should handle more conversion operations
never
parents:
3336
diff
changeset
|
241 a._disp += disp.constant_or_zero() * scale_size(scale); |
167b70ff3abc
6939861: JVM should handle more conversion operations
never
parents:
3336
diff
changeset
|
242 if (disp.is_register()) { |
167b70ff3abc
6939861: JVM should handle more conversion operations
never
parents:
3336
diff
changeset
|
243 assert(!a.index()->is_valid(), "competing indexes"); |
167b70ff3abc
6939861: JVM should handle more conversion operations
never
parents:
3336
diff
changeset
|
244 a._index = disp.as_register(); |
167b70ff3abc
6939861: JVM should handle more conversion operations
never
parents:
3336
diff
changeset
|
245 a._scale = scale; |
167b70ff3abc
6939861: JVM should handle more conversion operations
never
parents:
3336
diff
changeset
|
246 } |
167b70ff3abc
6939861: JVM should handle more conversion operations
never
parents:
3336
diff
changeset
|
247 return a; |
167b70ff3abc
6939861: JVM should handle more conversion operations
never
parents:
3336
diff
changeset
|
248 } |
167b70ff3abc
6939861: JVM should handle more conversion operations
never
parents:
3336
diff
changeset
|
249 bool is_same_address(Address a) const { |
167b70ff3abc
6939861: JVM should handle more conversion operations
never
parents:
3336
diff
changeset
|
250 // disregard _rspec |
167b70ff3abc
6939861: JVM should handle more conversion operations
never
parents:
3336
diff
changeset
|
251 return _base == a._base && _disp == a._disp && _index == a._index && _scale == a._scale; |
167b70ff3abc
6939861: JVM should handle more conversion operations
never
parents:
3336
diff
changeset
|
252 } |
622
56aae7be60d4
6812678: macro assembler needs delayed binding of a few constants (for 6655638)
jrose
parents:
420
diff
changeset
|
253 |
0 | 254 // The following two overloads are used in connection with the |
255 // ByteSize type (see sizes.hpp). They simplify the use of | |
256 // ByteSize'd arguments in assembly code. Note that their equivalent | |
257 // for the optimized build are the member functions with int disp | |
258 // argument since ByteSize is mapped to an int type in that case. | |
259 // | |
260 // Note: DO NOT introduce similar overloaded functions for WordSize | |
261 // arguments as in the optimized mode, both ByteSize and WordSize | |
262 // are mapped to the same type and thus the compiler cannot make a | |
263 // distinction anymore (=> compiler errors). | |
264 | |
265 #ifdef ASSERT | |
266 Address(Register base, ByteSize disp) | |
267 : _base(base), | |
268 _index(noreg), | |
269 _scale(no_scale), | |
270 _disp(in_bytes(disp)) { | |
271 } | |
272 | |
273 Address(Register base, Register index, ScaleFactor scale, ByteSize disp) | |
274 : _base(base), | |
275 _index(index), | |
276 _scale(scale), | |
277 _disp(in_bytes(disp)) { | |
278 assert(!index->is_valid() == (scale == Address::no_scale), | |
279 "inconsistent address"); | |
280 } | |
622
56aae7be60d4
6812678: macro assembler needs delayed binding of a few constants (for 6655638)
jrose
parents:
420
diff
changeset
|
281 |
665
c89f86385056
6814659: separable cleanups and subroutines for 6655638
jrose
parents:
644
diff
changeset
|
282 Address(Register base, RegisterOrConstant index, ScaleFactor scale, ByteSize disp) |
622
56aae7be60d4
6812678: macro assembler needs delayed binding of a few constants (for 6655638)
jrose
parents:
420
diff
changeset
|
283 : _base (base), |
56aae7be60d4
6812678: macro assembler needs delayed binding of a few constants (for 6655638)
jrose
parents:
420
diff
changeset
|
284 _index(index.register_or_noreg()), |
56aae7be60d4
6812678: macro assembler needs delayed binding of a few constants (for 6655638)
jrose
parents:
420
diff
changeset
|
285 _scale(scale), |
56aae7be60d4
6812678: macro assembler needs delayed binding of a few constants (for 6655638)
jrose
parents:
420
diff
changeset
|
286 _disp (in_bytes(disp) + (index.constant_or_zero() * scale_size(scale))) { |
56aae7be60d4
6812678: macro assembler needs delayed binding of a few constants (for 6655638)
jrose
parents:
420
diff
changeset
|
287 if (!index.is_register()) scale = Address::no_scale; |
56aae7be60d4
6812678: macro assembler needs delayed binding of a few constants (for 6655638)
jrose
parents:
420
diff
changeset
|
288 assert(!_index->is_valid() == (scale == Address::no_scale), |
56aae7be60d4
6812678: macro assembler needs delayed binding of a few constants (for 6655638)
jrose
parents:
420
diff
changeset
|
289 "inconsistent address"); |
56aae7be60d4
6812678: macro assembler needs delayed binding of a few constants (for 6655638)
jrose
parents:
420
diff
changeset
|
290 } |
56aae7be60d4
6812678: macro assembler needs delayed binding of a few constants (for 6655638)
jrose
parents:
420
diff
changeset
|
291 |
0 | 292 #endif // ASSERT |
293 | |
294 // accessors | |
342
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
71
diff
changeset
|
295 bool uses(Register reg) const { return _base == reg || _index == reg; } |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
71
diff
changeset
|
296 Register base() const { return _base; } |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
71
diff
changeset
|
297 Register index() const { return _index; } |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
71
diff
changeset
|
298 ScaleFactor scale() const { return _scale; } |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
71
diff
changeset
|
299 int disp() const { return _disp; } |
0 | 300 |
301 // Convert the raw encoding form into the form expected by the constructor for | |
302 // Address. An index of 4 (rsp) corresponds to having no index, so convert | |
303 // that to noreg for the Address constructor. | |
6725
da91efe96a93
6964458: Reimplement class meta-data storage to use native memory
coleenp
parents:
6614
diff
changeset
|
304 static Address make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc); |
0 | 305 |
306 static Address make_array(ArrayAddress); | |
307 | |
308 private: | |
309 bool base_needs_rex() const { | |
310 return _base != noreg && _base->encoding() >= 8; | |
311 } | |
312 | |
313 bool index_needs_rex() const { | |
314 return _index != noreg &&_index->encoding() >= 8; | |
315 } | |
316 | |
317 relocInfo::relocType reloc() const { return _rspec.type(); } | |
318 | |
319 friend class Assembler; | |
320 friend class MacroAssembler; | |
321 friend class LIR_Assembler; // base/index/scale/disp | |
322 }; | |
323 | |
324 // | |
325 // AddressLiteral has been split out from Address because operands of this type | |
326 // need to be treated specially on 32bit vs. 64bit platforms. By splitting it out | |
327 // the few instructions that need to deal with address literals are unique and the | |
328 // MacroAssembler does not have to implement every instruction in the Assembler | |
329 // in order to search for address literals that may need special handling depending | |
330 // on the instruction and the platform. As small step on the way to merging i486/amd64 | |
331 // directories. | |
332 // | |
333 class AddressLiteral VALUE_OBJ_CLASS_SPEC { | |
334 friend class ArrayAddress; | |
335 RelocationHolder _rspec; | |
336 // Typically we use AddressLiterals we want to use their rval | |
337 // However in some situations we want the lval (effect address) of the item. | |
338 // We provide a special factory for making those lvals. | |
339 bool _is_lval; | |
340 | |
341 // If the target is far we'll need to load the ea of this to | |
342 // a register to reach it. Otherwise if near we can do rip | |
343 // relative addressing. | |
344 | |
345 address _target; | |
346 | |
347 protected: | |
348 // creation | |
349 AddressLiteral() | |
350 : _is_lval(false), | |
351 _target(NULL) | |
352 {} | |
353 | |
354 public: | |
355 | |
356 | |
357 AddressLiteral(address target, relocInfo::relocType rtype); | |
358 | |
359 AddressLiteral(address target, RelocationHolder const& rspec) | |
360 : _rspec(rspec), | |
361 _is_lval(false), | |
362 _target(target) | |
363 {} | |
364 | |
365 AddressLiteral addr() { | |
366 AddressLiteral ret = *this; | |
367 ret._is_lval = true; | |
368 return ret; | |
369 } | |
370 | |
371 | |
372 private: | |
373 | |
374 address target() { return _target; } | |
375 bool is_lval() { return _is_lval; } | |
376 | |
377 relocInfo::relocType reloc() const { return _rspec.type(); } | |
378 const RelocationHolder& rspec() const { return _rspec; } | |
379 | |
380 friend class Assembler; | |
381 friend class MacroAssembler; | |
382 friend class Address; | |
383 friend class LIR_Assembler; | |
384 }; | |
385 | |
386 // Convience classes | |
387 class RuntimeAddress: public AddressLiteral { | |
388 | |
389 public: | |
390 | |
391 RuntimeAddress(address target) : AddressLiteral(target, relocInfo::runtime_call_type) {} | |
392 | |
393 }; | |
394 | |
395 class ExternalAddress: public AddressLiteral { | |
2455
479b4b4b6950
6777083: assert(target != __null,"must not be null")
never
parents:
2415
diff
changeset
|
396 private: |
479b4b4b6950
6777083: assert(target != __null,"must not be null")
never
parents:
2415
diff
changeset
|
397 static relocInfo::relocType reloc_for_target(address target) { |
479b4b4b6950
6777083: assert(target != __null,"must not be null")
never
parents:
2415
diff
changeset
|
398 // Sometimes ExternalAddress is used for values which aren't |
479b4b4b6950
6777083: assert(target != __null,"must not be null")
never
parents:
2415
diff
changeset
|
399 // exactly addresses, like the card table base. |
479b4b4b6950
6777083: assert(target != __null,"must not be null")
never
parents:
2415
diff
changeset
|
400 // external_word_type can't be used for values in the first page |
479b4b4b6950
6777083: assert(target != __null,"must not be null")
never
parents:
2415
diff
changeset
|
401 // so just skip the reloc in that case. |
479b4b4b6950
6777083: assert(target != __null,"must not be null")
never
parents:
2415
diff
changeset
|
402 return external_word_Relocation::can_be_relocated(target) ? relocInfo::external_word_type : relocInfo::none; |
479b4b4b6950
6777083: assert(target != __null,"must not be null")
never
parents:
2415
diff
changeset
|
403 } |
479b4b4b6950
6777083: assert(target != __null,"must not be null")
never
parents:
2415
diff
changeset
|
404 |
479b4b4b6950
6777083: assert(target != __null,"must not be null")
never
parents:
2415
diff
changeset
|
405 public: |
479b4b4b6950
6777083: assert(target != __null,"must not be null")
never
parents:
2415
diff
changeset
|
406 |
479b4b4b6950
6777083: assert(target != __null,"must not be null")
never
parents:
2415
diff
changeset
|
407 ExternalAddress(address target) : AddressLiteral(target, reloc_for_target(target)) {} |
0 | 408 |
409 }; | |
410 | |
411 class InternalAddress: public AddressLiteral { | |
412 | |
413 public: | |
414 | |
415 InternalAddress(address target) : AddressLiteral(target, relocInfo::internal_word_type) {} | |
416 | |
417 }; | |
418 | |
419 // x86 can do array addressing as a single operation since disp can be an absolute | |
420 // address amd64 can't. We create a class that expresses the concept but does extra | |
421 // magic on amd64 to get the final result | |
422 | |
423 class ArrayAddress VALUE_OBJ_CLASS_SPEC { | |
424 private: | |
425 | |
426 AddressLiteral _base; | |
427 Address _index; | |
428 | |
429 public: | |
430 | |
431 ArrayAddress() {}; | |
432 ArrayAddress(AddressLiteral base, Address index): _base(base), _index(index) {}; | |
433 AddressLiteral base() { return _base; } | |
434 Address index() { return _index; } | |
435 | |
436 }; | |
437 | |
304 | 438 const int FPUStateSizeInWords = NOT_LP64(27) LP64_ONLY( 512 / wordSize); |
0 | 439 |
440 // The Intel x86/Amd64 Assembler: Pure assembler doing NO optimizations on the instruction | |
441 // level (e.g. mov rax, 0 is not translated into xor rax, rax!); i.e., what you write | |
442 // is what you get. The Assembler is generating code into a CodeBuffer. | |
443 | |
444 class Assembler : public AbstractAssembler { | |
445 friend class AbstractAssembler; // for the non-virtual hack | |
446 friend class LIR_Assembler; // as_Address() | |
304 | 447 friend class StubGenerator; |
0 | 448 |
449 public: | |
450 enum Condition { // The x86 condition codes used for conditional jumps/moves. | |
451 zero = 0x4, | |
452 notZero = 0x5, | |
453 equal = 0x4, | |
454 notEqual = 0x5, | |
455 less = 0xc, | |
456 lessEqual = 0xe, | |
457 greater = 0xf, | |
458 greaterEqual = 0xd, | |
459 below = 0x2, | |
460 belowEqual = 0x6, | |
461 above = 0x7, | |
462 aboveEqual = 0x3, | |
463 overflow = 0x0, | |
464 noOverflow = 0x1, | |
465 carrySet = 0x2, | |
466 carryClear = 0x3, | |
467 negative = 0x8, | |
468 positive = 0x9, | |
469 parity = 0xa, | |
470 noParity = 0xb | |
471 }; | |
472 | |
473 enum Prefix { | |
474 // segment overrides | |
475 CS_segment = 0x2e, | |
476 SS_segment = 0x36, | |
477 DS_segment = 0x3e, | |
478 ES_segment = 0x26, | |
479 FS_segment = 0x64, | |
480 GS_segment = 0x65, | |
481 | |
482 REX = 0x40, | |
483 | |
484 REX_B = 0x41, | |
485 REX_X = 0x42, | |
486 REX_XB = 0x43, | |
487 REX_R = 0x44, | |
488 REX_RB = 0x45, | |
489 REX_RX = 0x46, | |
490 REX_RXB = 0x47, | |
491 | |
492 REX_W = 0x48, | |
493 | |
494 REX_WB = 0x49, | |
495 REX_WX = 0x4A, | |
496 REX_WXB = 0x4B, | |
497 REX_WR = 0x4C, | |
498 REX_WRB = 0x4D, | |
499 REX_WRX = 0x4E, | |
4759 | 500 REX_WRXB = 0x4F, |
501 | |
502 VEX_3bytes = 0xC4, | |
503 VEX_2bytes = 0xC5 | |
504 }; | |
505 | |
506 enum VexPrefix { | |
507 VEX_B = 0x20, | |
508 VEX_X = 0x40, | |
509 VEX_R = 0x80, | |
510 VEX_W = 0x80 | |
511 }; | |
512 | |
513 enum VexSimdPrefix { | |
514 VEX_SIMD_NONE = 0x0, | |
515 VEX_SIMD_66 = 0x1, | |
516 VEX_SIMD_F3 = 0x2, | |
517 VEX_SIMD_F2 = 0x3 | |
518 }; | |
519 | |
520 enum VexOpcode { | |
521 VEX_OPCODE_NONE = 0x0, | |
522 VEX_OPCODE_0F = 0x1, | |
523 VEX_OPCODE_0F_38 = 0x2, | |
524 VEX_OPCODE_0F_3A = 0x3 | |
0 | 525 }; |
526 | |
527 enum WhichOperand { | |
528 // input to locate_operand, and format code for relocations | |
304 | 529 imm_operand = 0, // embedded 32-bit|64-bit immediate operand |
0 | 530 disp32_operand = 1, // embedded 32-bit displacement or address |
531 call32_operand = 2, // embedded 32-bit self-relative displacement | |
304 | 532 #ifndef _LP64 |
0 | 533 _WhichOperand_limit = 3 |
304 | 534 #else |
535 narrow_oop_operand = 3, // embedded 32-bit immediate narrow oop | |
536 _WhichOperand_limit = 4 | |
537 #endif | |
0 | 538 }; |
539 | |
304 | 540 |
541 | |
542 // NOTE: The general philopsophy of the declarations here is that 64bit versions | |
543 // of instructions are freely declared without the need for wrapping them an ifdef. | |
544 // (Some dangerous instructions are ifdef's out of inappropriate jvm's.) | |
545 // In the .cpp file the implementations are wrapped so that they are dropped out | |
7951 | 546 // of the resulting jvm. This is done mostly to keep the footprint of MINIMAL |
304 | 547 // to the size it was prior to merging up the 32bit and 64bit assemblers. |
548 // | |
549 // This does mean you'll get a linker/runtime error if you use a 64bit only instruction | |
550 // in a 32bit vm. This is somewhat unfortunate but keeps the ifdef noise down. | |
551 | |
552 private: | |
553 | |
554 | |
555 // 64bit prefixes | |
556 int prefix_and_encode(int reg_enc, bool byteinst = false); | |
557 int prefixq_and_encode(int reg_enc); | |
558 | |
559 int prefix_and_encode(int dst_enc, int src_enc, bool byteinst = false); | |
560 int prefixq_and_encode(int dst_enc, int src_enc); | |
561 | |
562 void prefix(Register reg); | |
563 void prefix(Address adr); | |
564 void prefixq(Address adr); | |
565 | |
566 void prefix(Address adr, Register reg, bool byteinst = false); | |
4759 | 567 void prefix(Address adr, XMMRegister reg); |
304 | 568 void prefixq(Address adr, Register reg); |
4759 | 569 void prefixq(Address adr, XMMRegister reg); |
304 | 570 |
571 void prefetch_prefix(Address src); | |
572 | |
4759 | 573 void rex_prefix(Address adr, XMMRegister xreg, |
574 VexSimdPrefix pre, VexOpcode opc, bool rex_w); | |
575 int rex_prefix_and_encode(int dst_enc, int src_enc, | |
576 VexSimdPrefix pre, VexOpcode opc, bool rex_w); | |
577 | |
578 void vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, | |
579 int nds_enc, VexSimdPrefix pre, VexOpcode opc, | |
580 bool vector256); | |
581 | |
582 void vex_prefix(Address adr, int nds_enc, int xreg_enc, | |
583 VexSimdPrefix pre, VexOpcode opc, | |
584 bool vex_w, bool vector256); | |
585 | |
4761
65149e74c706
7121648: Use 3-operands SIMD instructions on x86 with AVX
kvn
parents:
4759
diff
changeset
|
586 void vex_prefix(XMMRegister dst, XMMRegister nds, Address src, |
65149e74c706
7121648: Use 3-operands SIMD instructions on x86 with AVX
kvn
parents:
4759
diff
changeset
|
587 VexSimdPrefix pre, bool vector256 = false) { |
6179
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
6141
diff
changeset
|
588 int dst_enc = dst->encoding(); |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
6141
diff
changeset
|
589 int nds_enc = nds->is_valid() ? nds->encoding() : 0; |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
6141
diff
changeset
|
590 vex_prefix(src, nds_enc, dst_enc, pre, VEX_OPCODE_0F, false, vector256); |
4761
65149e74c706
7121648: Use 3-operands SIMD instructions on x86 with AVX
kvn
parents:
4759
diff
changeset
|
591 } |
65149e74c706
7121648: Use 3-operands SIMD instructions on x86 with AVX
kvn
parents:
4759
diff
changeset
|
592 |
4759 | 593 int vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, |
594 VexSimdPrefix pre, VexOpcode opc, | |
595 bool vex_w, bool vector256); | |
596 | |
4761
65149e74c706
7121648: Use 3-operands SIMD instructions on x86 with AVX
kvn
parents:
4759
diff
changeset
|
597 int vex_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, |
6179
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
6141
diff
changeset
|
598 VexSimdPrefix pre, bool vector256 = false, |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
6141
diff
changeset
|
599 VexOpcode opc = VEX_OPCODE_0F) { |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
6141
diff
changeset
|
600 int src_enc = src->encoding(); |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
6141
diff
changeset
|
601 int dst_enc = dst->encoding(); |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
6141
diff
changeset
|
602 int nds_enc = nds->is_valid() ? nds->encoding() : 0; |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
6141
diff
changeset
|
603 return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, false, vector256); |
4761
65149e74c706
7121648: Use 3-operands SIMD instructions on x86 with AVX
kvn
parents:
4759
diff
changeset
|
604 } |
4759 | 605 |
606 void simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, | |
607 VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F, | |
608 bool rex_w = false, bool vector256 = false); | |
609 | |
610 void simd_prefix(XMMRegister dst, Address src, | |
611 VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) { | |
612 simd_prefix(dst, xnoreg, src, pre, opc); | |
613 } | |
6614
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
614 |
4759 | 615 void simd_prefix(Address dst, XMMRegister src, VexSimdPrefix pre) { |
616 simd_prefix(src, dst, pre); | |
617 } | |
618 void simd_prefix_q(XMMRegister dst, XMMRegister nds, Address src, | |
619 VexSimdPrefix pre) { | |
620 bool rex_w = true; | |
621 simd_prefix(dst, nds, src, pre, VEX_OPCODE_0F, rex_w); | |
622 } | |
623 | |
624 int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, | |
625 VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F, | |
626 bool rex_w = false, bool vector256 = false); | |
627 | |
628 // Move/convert 32-bit integer value. | |
629 int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, Register src, | |
630 VexSimdPrefix pre) { | |
631 // It is OK to cast from Register to XMMRegister to pass argument here | |
632 // since only encoding is used in simd_prefix_and_encode() and number of | |
633 // Gen and Xmm registers are the same. | |
634 return simd_prefix_and_encode(dst, nds, as_XMMRegister(src->encoding()), pre); | |
635 } | |
636 int simd_prefix_and_encode(XMMRegister dst, Register src, VexSimdPrefix pre) { | |
637 return simd_prefix_and_encode(dst, xnoreg, src, pre); | |
638 } | |
639 int simd_prefix_and_encode(Register dst, XMMRegister src, | |
640 VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) { | |
641 return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, opc); | |
642 } | |
643 | |
644 // Move/convert 64-bit integer value. | |
645 int simd_prefix_and_encode_q(XMMRegister dst, XMMRegister nds, Register src, | |
646 VexSimdPrefix pre) { | |
647 bool rex_w = true; | |
648 return simd_prefix_and_encode(dst, nds, as_XMMRegister(src->encoding()), pre, VEX_OPCODE_0F, rex_w); | |
649 } | |
650 int simd_prefix_and_encode_q(XMMRegister dst, Register src, VexSimdPrefix pre) { | |
651 return simd_prefix_and_encode_q(dst, xnoreg, src, pre); | |
652 } | |
653 int simd_prefix_and_encode_q(Register dst, XMMRegister src, | |
654 VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) { | |
655 bool rex_w = true; | |
656 return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, opc, rex_w); | |
657 } | |
658 | |
304 | 659 // Helper functions for groups of instructions |
660 void emit_arith_b(int op1, int op2, Register dst, int imm8); | |
661 | |
662 void emit_arith(int op1, int op2, Register dst, int32_t imm32); | |
4947
fd8114661503
7125136: SIGILL on linux amd64 in gc/ArrayJuggle/Juggle29
kvn
parents:
4761
diff
changeset
|
663 // Force generation of a 4 byte immediate value even if it fits into 8bit |
fd8114661503
7125136: SIGILL on linux amd64 in gc/ArrayJuggle/Juggle29
kvn
parents:
4761
diff
changeset
|
664 void emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32); |
304 | 665 void emit_arith(int op1, int op2, Register dst, Register src); |
666 | |
6614
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
667 void emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
668 void emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
669 void emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
670 void emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
671 void emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds, |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
672 Address src, VexSimdPrefix pre, bool vector256); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
673 void emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds, |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
674 XMMRegister src, VexSimdPrefix pre, bool vector256); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
675 |
304 | 676 void emit_operand(Register reg, |
677 Register base, Register index, Address::ScaleFactor scale, | |
678 int disp, | |
679 RelocationHolder const& rspec, | |
680 int rip_relative_correction = 0); | |
681 | |
682 void emit_operand(Register reg, Address adr, int rip_relative_correction = 0); | |
683 | |
684 // operands that only take the original 32bit registers | |
685 void emit_operand32(Register reg, Address adr); | |
686 | |
687 void emit_operand(XMMRegister reg, | |
688 Register base, Register index, Address::ScaleFactor scale, | |
689 int disp, | |
690 RelocationHolder const& rspec); | |
691 | |
692 void emit_operand(XMMRegister reg, Address adr); | |
693 | |
694 void emit_operand(MMXRegister reg, Address adr); | |
695 | |
696 // workaround gcc (3.2.1-7) bug | |
697 void emit_operand(Address adr, MMXRegister reg); | |
698 | |
699 | |
700 // Immediate-to-memory forms | |
701 void emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32); | |
702 | |
703 void emit_farith(int b1, int b2, int i); | |
704 | |
705 | |
706 protected: | |
707 #ifdef ASSERT | |
708 void check_relocation(RelocationHolder const& rspec, int format); | |
709 #endif | |
710 | |
711 void emit_data(jint data, relocInfo::relocType rtype, int format); | |
712 void emit_data(jint data, RelocationHolder const& rspec, int format); | |
713 void emit_data64(jlong data, relocInfo::relocType rtype, int format = 0); | |
714 void emit_data64(jlong data, RelocationHolder const& rspec, int format = 0); | |
715 | |
716 bool reachable(AddressLiteral adr) NOT_LP64({ return true;}); | |
717 | |
718 // These are all easily abused and hence protected | |
719 | |
720 // 32BIT ONLY SECTION | |
721 #ifndef _LP64 | |
722 // Make these disappear in 64bit mode since they would never be correct | |
723 void cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY | |
724 void cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY | |
725 | |
642
660978a2a31a
6791178: Specialize for zero as the compressed oop vm heap base
kvn
parents:
624
diff
changeset
|
726 void mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY |
304 | 727 void mov_literal32(Address dst, int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY |
728 | |
729 void push_literal32(int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY | |
730 #else | |
731 // 64BIT ONLY SECTION | |
732 void mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec); // 64BIT ONLY | |
642
660978a2a31a
6791178: Specialize for zero as the compressed oop vm heap base
kvn
parents:
624
diff
changeset
|
733 |
660978a2a31a
6791178: Specialize for zero as the compressed oop vm heap base
kvn
parents:
624
diff
changeset
|
734 void cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec); |
660978a2a31a
6791178: Specialize for zero as the compressed oop vm heap base
kvn
parents:
624
diff
changeset
|
735 void cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec); |
660978a2a31a
6791178: Specialize for zero as the compressed oop vm heap base
kvn
parents:
624
diff
changeset
|
736 |
660978a2a31a
6791178: Specialize for zero as the compressed oop vm heap base
kvn
parents:
624
diff
changeset
|
737 void mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec); |
660978a2a31a
6791178: Specialize for zero as the compressed oop vm heap base
kvn
parents:
624
diff
changeset
|
738 void mov_narrow_oop(Address dst, int32_t imm32, RelocationHolder const& rspec); |
304 | 739 #endif // _LP64 |
740 | |
741 // These are unique in that we are ensured by the caller that the 32bit | |
742 // relative in these instructions will always be able to reach the potentially | |
743 // 64bit address described by entry. Since they can take a 64bit address they | |
744 // don't have the 32 suffix like the other instructions in this class. | |
745 | |
746 void call_literal(address entry, RelocationHolder const& rspec); | |
747 void jmp_literal(address entry, RelocationHolder const& rspec); | |
748 | |
749 // Avoid using directly section | |
750 // Instructions in this section are actually usable by anyone without danger | |
751 // of failure but have performance issues that are addressed my enhanced | |
752 // instructions which will do the proper thing base on the particular cpu. | |
753 // We protect them because we don't trust you... | |
754 | |
755 // Don't use next inc() and dec() methods directly. INC & DEC instructions | |
756 // could cause a partial flag stall since they don't set CF flag. | |
757 // Use MacroAssembler::decrement() & MacroAssembler::increment() methods | |
758 // which call inc() & dec() or add() & sub() in accordance with | |
759 // the product flag UseIncDec value. | |
760 | |
761 void decl(Register dst); | |
762 void decl(Address dst); | |
763 void decq(Register dst); | |
764 void decq(Address dst); | |
765 | |
766 void incl(Register dst); | |
767 void incl(Address dst); | |
768 void incq(Register dst); | |
769 void incq(Address dst); | |
770 | |
771 // New cpus require use of movsd and movss to avoid partial register stall | |
772 // when loading from memory. But for old Opteron use movlpd instead of movsd. | |
773 // The selection is done in MacroAssembler::movdbl() and movflt(). | |
774 | |
775 // Move Scalar Single-Precision Floating-Point Values | |
776 void movss(XMMRegister dst, Address src); | |
777 void movss(XMMRegister dst, XMMRegister src); | |
778 void movss(Address dst, XMMRegister src); | |
779 | |
780 // Move Scalar Double-Precision Floating-Point Values | |
781 void movsd(XMMRegister dst, Address src); | |
782 void movsd(XMMRegister dst, XMMRegister src); | |
783 void movsd(Address dst, XMMRegister src); | |
784 void movlpd(XMMRegister dst, Address src); | |
785 | |
786 // New cpus require use of movaps and movapd to avoid partial register stall | |
787 // when moving between registers. | |
788 void movaps(XMMRegister dst, XMMRegister src); | |
789 void movapd(XMMRegister dst, XMMRegister src); | |
790 | |
791 // End avoid using directly | |
792 | |
793 | |
794 // Instruction prefixes | |
795 void prefix(Prefix p); | |
796 | |
0 | 797 public: |
798 | |
799 // Creation | |
800 Assembler(CodeBuffer* code) : AbstractAssembler(code) {} | |
801 | |
802 // Decoding | |
803 static address locate_operand(address inst, WhichOperand which); | |
804 static address locate_next_instruction(address inst); | |
805 | |
304 | 806 // Utilities |
2404
b40d4fa697bf
6964776: c2 should ensure the polling page is reachable on 64 bit
iveresov
parents:
2320
diff
changeset
|
807 static bool is_polling_page_far() NOT_LP64({ return false;}); |
b40d4fa697bf
6964776: c2 should ensure the polling page is reachable on 64 bit
iveresov
parents:
2320
diff
changeset
|
808 |
304 | 809 // Generic instructions |
810 // Does 32bit or 64bit as needed for the platform. In some sense these | |
811 // belong in macro assembler but there is no need for both varieties to exist | |
812 | |
813 void lea(Register dst, Address src); | |
814 | |
815 void mov(Register dst, Register src); | |
816 | |
817 void pusha(); | |
818 void popa(); | |
819 | |
820 void pushf(); | |
821 void popf(); | |
822 | |
823 void push(int32_t imm32); | |
824 | |
825 void push(Register src); | |
826 | |
827 void pop(Register dst); | |
828 | |
829 // These are dummies to prevent surprise implicit conversions to Register | |
830 void push(void* v); | |
831 void pop(void* v); | |
832 | |
833 // These do register sized moves/scans | |
834 void rep_mov(); | |
7474
00af3a3a8df4
8005522: use fast-string instructions on x86 for zeroing
kvn
parents:
7204
diff
changeset
|
835 void rep_stos(); |
00af3a3a8df4
8005522: use fast-string instructions on x86 for zeroing
kvn
parents:
7204
diff
changeset
|
836 void rep_stosb(); |
304 | 837 void repne_scan(); |
838 #ifdef _LP64 | |
839 void repne_scanl(); | |
840 #endif | |
841 | |
842 // Vanilla instructions in lexical order | |
843 | |
2100
b1a2afa37ec4
7003271: Hotspot should track cumulative Java heap bytes allocated on a per-thread basis
phh
parents:
2008
diff
changeset
|
844 void adcl(Address dst, int32_t imm32); |
b1a2afa37ec4
7003271: Hotspot should track cumulative Java heap bytes allocated on a per-thread basis
phh
parents:
2008
diff
changeset
|
845 void adcl(Address dst, Register src); |
304 | 846 void adcl(Register dst, int32_t imm32); |
0 | 847 void adcl(Register dst, Address src); |
848 void adcl(Register dst, Register src); | |
849 | |
304 | 850 void adcq(Register dst, int32_t imm32); |
851 void adcq(Register dst, Address src); | |
852 void adcq(Register dst, Register src); | |
853 | |
854 void addl(Address dst, int32_t imm32); | |
0 | 855 void addl(Address dst, Register src); |
304 | 856 void addl(Register dst, int32_t imm32); |
0 | 857 void addl(Register dst, Address src); |
858 void addl(Register dst, Register src); | |
859 | |
304 | 860 void addq(Address dst, int32_t imm32); |
861 void addq(Address dst, Register src); | |
862 void addq(Register dst, int32_t imm32); | |
863 void addq(Register dst, Address src); | |
864 void addq(Register dst, Register src); | |
865 | |
0 | 866 void addr_nop_4(); |
867 void addr_nop_5(); | |
868 void addr_nop_7(); | |
869 void addr_nop_8(); | |
870 | |
304 | 871 // Add Scalar Double-Precision Floating-Point Values |
872 void addsd(XMMRegister dst, Address src); | |
873 void addsd(XMMRegister dst, XMMRegister src); | |
874 | |
875 // Add Scalar Single-Precision Floating-Point Values | |
876 void addss(XMMRegister dst, Address src); | |
877 void addss(XMMRegister dst, XMMRegister src); | |
878 | |
6894 | 879 // AES instructions |
880 void aesdec(XMMRegister dst, Address src); | |
881 void aesdec(XMMRegister dst, XMMRegister src); | |
882 void aesdeclast(XMMRegister dst, Address src); | |
883 void aesdeclast(XMMRegister dst, XMMRegister src); | |
884 void aesenc(XMMRegister dst, Address src); | |
885 void aesenc(XMMRegister dst, XMMRegister src); | |
886 void aesenclast(XMMRegister dst, Address src); | |
887 void aesenclast(XMMRegister dst, XMMRegister src); | |
888 | |
889 | |
4759 | 890 void andl(Address dst, int32_t imm32); |
304 | 891 void andl(Register dst, int32_t imm32); |
892 void andl(Register dst, Address src); | |
893 void andl(Register dst, Register src); | |
894 | |
3783 | 895 void andq(Address dst, int32_t imm32); |
304 | 896 void andq(Register dst, int32_t imm32); |
897 void andq(Register dst, Address src); | |
898 void andq(Register dst, Register src); | |
899 | |
775
93c14e5562c4
6823354: Add intrinsics for {Integer,Long}.{numberOfLeadingZeros,numberOfTrailingZeros}()
twisti
parents:
710
diff
changeset
|
900 void bsfl(Register dst, Register src); |
93c14e5562c4
6823354: Add intrinsics for {Integer,Long}.{numberOfLeadingZeros,numberOfTrailingZeros}()
twisti
parents:
710
diff
changeset
|
901 void bsrl(Register dst, Register src); |
93c14e5562c4
6823354: Add intrinsics for {Integer,Long}.{numberOfLeadingZeros,numberOfTrailingZeros}()
twisti
parents:
710
diff
changeset
|
902 |
93c14e5562c4
6823354: Add intrinsics for {Integer,Long}.{numberOfLeadingZeros,numberOfTrailingZeros}()
twisti
parents:
710
diff
changeset
|
903 #ifdef _LP64 |
93c14e5562c4
6823354: Add intrinsics for {Integer,Long}.{numberOfLeadingZeros,numberOfTrailingZeros}()
twisti
parents:
710
diff
changeset
|
904 void bsfq(Register dst, Register src); |
93c14e5562c4
6823354: Add intrinsics for {Integer,Long}.{numberOfLeadingZeros,numberOfTrailingZeros}()
twisti
parents:
710
diff
changeset
|
905 void bsrq(Register dst, Register src); |
93c14e5562c4
6823354: Add intrinsics for {Integer,Long}.{numberOfLeadingZeros,numberOfTrailingZeros}()
twisti
parents:
710
diff
changeset
|
906 #endif |
93c14e5562c4
6823354: Add intrinsics for {Integer,Long}.{numberOfLeadingZeros,numberOfTrailingZeros}()
twisti
parents:
710
diff
changeset
|
907 |
304 | 908 void bswapl(Register reg); |
909 | |
910 void bswapq(Register reg); | |
911 | |
0 | 912 void call(Label& L, relocInfo::relocType rtype); |
913 void call(Register reg); // push pc; pc <- reg | |
914 void call(Address adr); // push pc; pc <- adr | |
915 | |
304 | 916 void cdql(); |
917 | |
918 void cdqq(); | |
919 | |
7199
cd3d6a6b95d9
8003240: x86: move MacroAssembler into separate file
twisti
parents:
7198
diff
changeset
|
920 void cld(); |
304 | 921 |
922 void clflush(Address adr); | |
923 | |
924 void cmovl(Condition cc, Register dst, Register src); | |
925 void cmovl(Condition cc, Register dst, Address src); | |
926 | |
927 void cmovq(Condition cc, Register dst, Register src); | |
928 void cmovq(Condition cc, Register dst, Address src); | |
929 | |
930 | |
931 void cmpb(Address dst, int imm8); | |
932 | |
933 void cmpl(Address dst, int32_t imm32); | |
934 | |
935 void cmpl(Register dst, int32_t imm32); | |
936 void cmpl(Register dst, Register src); | |
937 void cmpl(Register dst, Address src); | |
938 | |
939 void cmpq(Address dst, int32_t imm32); | |
940 void cmpq(Address dst, Register src); | |
941 | |
942 void cmpq(Register dst, int32_t imm32); | |
943 void cmpq(Register dst, Register src); | |
944 void cmpq(Register dst, Address src); | |
945 | |
946 // these are dummies used to catch attempting to convert NULL to Register | |
947 void cmpl(Register dst, void* junk); // dummy | |
948 void cmpq(Register dst, void* junk); // dummy | |
949 | |
950 void cmpw(Address dst, int imm16); | |
951 | |
952 void cmpxchg8 (Address adr); | |
953 | |
954 void cmpxchgl(Register reg, Address adr); | |
955 | |
956 void cmpxchgq(Register reg, Address adr); | |
957 | |
958 // Ordered Compare Scalar Double-Precision Floating-Point Values and set EFLAGS | |
959 void comisd(XMMRegister dst, Address src); | |
4759 | 960 void comisd(XMMRegister dst, XMMRegister src); |
304 | 961 |
962 // Ordered Compare Scalar Single-Precision Floating-Point Values and set EFLAGS | |
963 void comiss(XMMRegister dst, Address src); | |
4759 | 964 void comiss(XMMRegister dst, XMMRegister src); |
304 | 965 |
966 // Identify processor type and features | |
7199
cd3d6a6b95d9
8003240: x86: move MacroAssembler into separate file
twisti
parents:
7198
diff
changeset
|
967 void cpuid(); |
304 | 968 |
969 // Convert Scalar Double-Precision Floating-Point Value to Scalar Single-Precision Floating-Point Value | |
970 void cvtsd2ss(XMMRegister dst, XMMRegister src); | |
4759 | 971 void cvtsd2ss(XMMRegister dst, Address src); |
304 | 972 |
973 // Convert Doubleword Integer to Scalar Double-Precision Floating-Point Value | |
974 void cvtsi2sdl(XMMRegister dst, Register src); | |
4759 | 975 void cvtsi2sdl(XMMRegister dst, Address src); |
304 | 976 void cvtsi2sdq(XMMRegister dst, Register src); |
4759 | 977 void cvtsi2sdq(XMMRegister dst, Address src); |
304 | 978 |
979 // Convert Doubleword Integer to Scalar Single-Precision Floating-Point Value | |
980 void cvtsi2ssl(XMMRegister dst, Register src); | |
4759 | 981 void cvtsi2ssl(XMMRegister dst, Address src); |
304 | 982 void cvtsi2ssq(XMMRegister dst, Register src); |
4759 | 983 void cvtsi2ssq(XMMRegister dst, Address src); |
304 | 984 |
985 // Convert Packed Signed Doubleword Integers to Packed Double-Precision Floating-Point Value | |
986 void cvtdq2pd(XMMRegister dst, XMMRegister src); | |
987 | |
988 // Convert Packed Signed Doubleword Integers to Packed Single-Precision Floating-Point Value | |
989 void cvtdq2ps(XMMRegister dst, XMMRegister src); | |
990 | |
991 // Convert Scalar Single-Precision Floating-Point Value to Scalar Double-Precision Floating-Point Value | |
992 void cvtss2sd(XMMRegister dst, XMMRegister src); | |
4759 | 993 void cvtss2sd(XMMRegister dst, Address src); |
304 | 994 |
995 // Convert with Truncation Scalar Double-Precision Floating-Point Value to Doubleword Integer | |
996 void cvttsd2sil(Register dst, Address src); | |
997 void cvttsd2sil(Register dst, XMMRegister src); | |
998 void cvttsd2siq(Register dst, XMMRegister src); | |
999 | |
1000 // Convert with Truncation Scalar Single-Precision Floating-Point Value to Doubleword Integer | |
1001 void cvttss2sil(Register dst, XMMRegister src); | |
1002 void cvttss2siq(Register dst, XMMRegister src); | |
1003 | |
1004 // Divide Scalar Double-Precision Floating-Point Values | |
1005 void divsd(XMMRegister dst, Address src); | |
1006 void divsd(XMMRegister dst, XMMRegister src); | |
1007 | |
1008 // Divide Scalar Single-Precision Floating-Point Values | |
1009 void divss(XMMRegister dst, Address src); | |
1010 void divss(XMMRegister dst, XMMRegister src); | |
1011 | |
1012 void emms(); | |
1013 | |
1014 void fabs(); | |
1015 | |
1016 void fadd(int i); | |
1017 | |
1018 void fadd_d(Address src); | |
1019 void fadd_s(Address src); | |
1020 | |
1021 // "Alternate" versions of x87 instructions place result down in FPU | |
1022 // stack instead of on TOS | |
1023 | |
1024 void fadda(int i); // "alternate" fadd | |
1025 void faddp(int i = 1); | |
1026 | |
1027 void fchs(); | |
1028 | |
1029 void fcom(int i); | |
1030 | |
1031 void fcomp(int i = 1); | |
1032 void fcomp_d(Address src); | |
1033 void fcomp_s(Address src); | |
1034 | |
1035 void fcompp(); | |
1036 | |
1037 void fcos(); | |
1038 | |
1039 void fdecstp(); | |
1040 | |
1041 void fdiv(int i); | |
1042 void fdiv_d(Address src); | |
1043 void fdivr_s(Address src); | |
1044 void fdiva(int i); // "alternate" fdiv | |
1045 void fdivp(int i = 1); | |
1046 | |
1047 void fdivr(int i); | |
1048 void fdivr_d(Address src); | |
1049 void fdiv_s(Address src); | |
1050 | |
1051 void fdivra(int i); // "alternate" reversed fdiv | |
1052 | |
1053 void fdivrp(int i = 1); | |
1054 | |
1055 void ffree(int i = 0); | |
1056 | |
1057 void fild_d(Address adr); | |
1058 void fild_s(Address adr); | |
1059 | |
1060 void fincstp(); | |
1061 | |
1062 void finit(); | |
1063 | |
1064 void fist_s (Address adr); | |
1065 void fistp_d(Address adr); | |
1066 void fistp_s(Address adr); | |
1067 | |
1068 void fld1(); | |
1069 | |
1070 void fld_d(Address adr); | |
1071 void fld_s(Address adr); | |
1072 void fld_s(int index); | |
1073 void fld_x(Address adr); // extended-precision (80-bit) format | |
1074 | |
1075 void fldcw(Address src); | |
1076 | |
1077 void fldenv(Address src); | |
1078 | |
1079 void fldlg2(); | |
1080 | |
1081 void fldln2(); | |
1082 | |
1083 void fldz(); | |
1084 | |
1085 void flog(); | |
1086 void flog10(); | |
1087 | |
1088 void fmul(int i); | |
1089 | |
1090 void fmul_d(Address src); | |
1091 void fmul_s(Address src); | |
1092 | |
1093 void fmula(int i); // "alternate" fmul | |
1094 | |
1095 void fmulp(int i = 1); | |
1096 | |
1097 void fnsave(Address dst); | |
1098 | |
1099 void fnstcw(Address src); | |
1100 | |
1101 void fnstsw_ax(); | |
1102 | |
1103 void fprem(); | |
1104 void fprem1(); | |
1105 | |
1106 void frstor(Address src); | |
1107 | |
1108 void fsin(); | |
1109 | |
1110 void fsqrt(); | |
1111 | |
1112 void fst_d(Address adr); | |
1113 void fst_s(Address adr); | |
1114 | |
1115 void fstp_d(Address adr); | |
1116 void fstp_d(int index); | |
1117 void fstp_s(Address adr); | |
1118 void fstp_x(Address adr); // extended-precision (80-bit) format | |
1119 | |
1120 void fsub(int i); | |
1121 void fsub_d(Address src); | |
1122 void fsub_s(Address src); | |
1123 | |
1124 void fsuba(int i); // "alternate" fsub | |
1125 | |
1126 void fsubp(int i = 1); | |
1127 | |
1128 void fsubr(int i); | |
1129 void fsubr_d(Address src); | |
1130 void fsubr_s(Address src); | |
1131 | |
1132 void fsubra(int i); // "alternate" reversed fsub | |
1133 | |
1134 void fsubrp(int i = 1); | |
1135 | |
1136 void ftan(); | |
1137 | |
1138 void ftst(); | |
1139 | |
1140 void fucomi(int i = 1); | |
1141 void fucomip(int i = 1); | |
1142 | |
1143 void fwait(); | |
1144 | |
1145 void fxch(int i = 1); | |
1146 | |
1147 void fxrstor(Address src); | |
1148 | |
1149 void fxsave(Address dst); | |
1150 | |
1151 void fyl2x(); | |
6084
6759698e3140
7133857: exp() and pow() should use the x87 ISA on x86
roland
parents:
4947
diff
changeset
|
1152 void frndint(); |
6759698e3140
7133857: exp() and pow() should use the x87 ISA on x86
roland
parents:
4947
diff
changeset
|
1153 void f2xm1(); |
6759698e3140
7133857: exp() and pow() should use the x87 ISA on x86
roland
parents:
4947
diff
changeset
|
1154 void fldl2e(); |
304 | 1155 |
1156 void hlt(); | |
1157 | |
1158 void idivl(Register src); | |
1920 | 1159 void divl(Register src); // Unsigned division |
304 | 1160 |
1161 void idivq(Register src); | |
1162 | |
1163 void imull(Register dst, Register src); | |
1164 void imull(Register dst, Register src, int value); | |
12972
59e8ad757e19
8026844: Various Math functions needs intrinsification
rbackman
parents:
11080
diff
changeset
|
1165 void imull(Register dst, Address src); |
304 | 1166 |
1167 void imulq(Register dst, Register src); | |
1168 void imulq(Register dst, Register src, int value); | |
12972
59e8ad757e19
8026844: Various Math functions needs intrinsification
rbackman
parents:
11080
diff
changeset
|
1169 #ifdef _LP64 |
59e8ad757e19
8026844: Various Math functions needs intrinsification
rbackman
parents:
11080
diff
changeset
|
1170 void imulq(Register dst, Address src); |
59e8ad757e19
8026844: Various Math functions needs intrinsification
rbackman
parents:
11080
diff
changeset
|
1171 #endif |
304 | 1172 |
0 | 1173 |
1174 // jcc is the generic conditional branch generator to run- | |
1175 // time routines, jcc is used for branches to labels. jcc | |
1176 // takes a branch opcode (cc) and a label (L) and generates | |
1177 // either a backward branch or a forward branch and links it | |
1178 // to the label fixup chain. Usage: | |
1179 // | |
1180 // Label L; // unbound label | |
1181 // jcc(cc, L); // forward branch to unbound label | |
1182 // bind(L); // bind label to the current pc | |
1183 // jcc(cc, L); // backward branch to bound label | |
1184 // bind(L); // illegal: a label may be bound only once | |
1185 // | |
1186 // Note: The same Label can be used for forward and backward branches | |
1187 // but it may be bound only once. | |
1188 | |
3851 | 1189 void jcc(Condition cc, Label& L, bool maybe_short = true); |
0 | 1190 |
1191 // Conditional jump to a 8-bit offset to L. | |
1192 // WARNING: be very careful using this for forward jumps. If the label is | |
1193 // not bound within an 8-bit offset of this instruction, a run-time error | |
1194 // will occur. | |
1195 void jccb(Condition cc, Label& L); | |
1196 | |
304 | 1197 void jmp(Address entry); // pc <- entry |
1198 | |
1199 // Label operations & relative jumps (PPUM Appendix D) | |
3851 | 1200 void jmp(Label& L, bool maybe_short = true); // unconditional jump to L |
304 | 1201 |
1202 void jmp(Register entry); // pc <- entry | |
1203 | |
1204 // Unconditional 8-bit offset jump to L. | |
1205 // WARNING: be very careful using this for forward jumps. If the label is | |
1206 // not bound within an 8-bit offset of this instruction, a run-time error | |
1207 // will occur. | |
1208 void jmpb(Label& L); | |
1209 | |
1210 void ldmxcsr( Address src ); | |
1211 | |
1212 void leal(Register dst, Address src); | |
1213 | |
1214 void leaq(Register dst, Address src); | |
1215 | |
7199
cd3d6a6b95d9
8003240: x86: move MacroAssembler into separate file
twisti
parents:
7198
diff
changeset
|
1216 void lfence(); |
304 | 1217 |
1218 void lock(); | |
1219 | |
775
93c14e5562c4
6823354: Add intrinsics for {Integer,Long}.{numberOfLeadingZeros,numberOfTrailingZeros}()
twisti
parents:
710
diff
changeset
|
1220 void lzcntl(Register dst, Register src); |
93c14e5562c4
6823354: Add intrinsics for {Integer,Long}.{numberOfLeadingZeros,numberOfTrailingZeros}()
twisti
parents:
710
diff
changeset
|
1221 |
93c14e5562c4
6823354: Add intrinsics for {Integer,Long}.{numberOfLeadingZeros,numberOfTrailingZeros}()
twisti
parents:
710
diff
changeset
|
1222 #ifdef _LP64 |
93c14e5562c4
6823354: Add intrinsics for {Integer,Long}.{numberOfLeadingZeros,numberOfTrailingZeros}()
twisti
parents:
710
diff
changeset
|
1223 void lzcntq(Register dst, Register src); |
93c14e5562c4
6823354: Add intrinsics for {Integer,Long}.{numberOfLeadingZeros,numberOfTrailingZeros}()
twisti
parents:
710
diff
changeset
|
1224 #endif |
93c14e5562c4
6823354: Add intrinsics for {Integer,Long}.{numberOfLeadingZeros,numberOfTrailingZeros}()
twisti
parents:
710
diff
changeset
|
1225 |
304 | 1226 enum Membar_mask_bits { |
1227 StoreStore = 1 << 3, | |
1228 LoadStore = 1 << 2, | |
1229 StoreLoad = 1 << 1, | |
1230 LoadLoad = 1 << 0 | |
1231 }; | |
1232 | |
671
d0994e5bebce
6822204: volatile fences should prefer lock:addl to actual mfence instructions
never
parents:
665
diff
changeset
|
1233 // Serializes memory and blows flags |
304 | 1234 void membar(Membar_mask_bits order_constraint) { |
671
d0994e5bebce
6822204: volatile fences should prefer lock:addl to actual mfence instructions
never
parents:
665
diff
changeset
|
1235 if (os::is_MP()) { |
d0994e5bebce
6822204: volatile fences should prefer lock:addl to actual mfence instructions
never
parents:
665
diff
changeset
|
1236 // We only have to handle StoreLoad |
d0994e5bebce
6822204: volatile fences should prefer lock:addl to actual mfence instructions
never
parents:
665
diff
changeset
|
1237 if (order_constraint & StoreLoad) { |
d0994e5bebce
6822204: volatile fences should prefer lock:addl to actual mfence instructions
never
parents:
665
diff
changeset
|
1238 // All usable chips support "locked" instructions which suffice |
d0994e5bebce
6822204: volatile fences should prefer lock:addl to actual mfence instructions
never
parents:
665
diff
changeset
|
1239 // as barriers, and are much faster than the alternative of |
d0994e5bebce
6822204: volatile fences should prefer lock:addl to actual mfence instructions
never
parents:
665
diff
changeset
|
1240 // using cpuid instruction. We use here a locked add [esp],0. |
d0994e5bebce
6822204: volatile fences should prefer lock:addl to actual mfence instructions
never
parents:
665
diff
changeset
|
1241 // This is conveniently otherwise a no-op except for blowing |
d0994e5bebce
6822204: volatile fences should prefer lock:addl to actual mfence instructions
never
parents:
665
diff
changeset
|
1242 // flags. |
d0994e5bebce
6822204: volatile fences should prefer lock:addl to actual mfence instructions
never
parents:
665
diff
changeset
|
1243 // Any change to this code may need to revisit other places in |
d0994e5bebce
6822204: volatile fences should prefer lock:addl to actual mfence instructions
never
parents:
665
diff
changeset
|
1244 // the code where this idiom is used, in particular the |
d0994e5bebce
6822204: volatile fences should prefer lock:addl to actual mfence instructions
never
parents:
665
diff
changeset
|
1245 // orderAccess code. |
d0994e5bebce
6822204: volatile fences should prefer lock:addl to actual mfence instructions
never
parents:
665
diff
changeset
|
1246 lock(); |
d0994e5bebce
6822204: volatile fences should prefer lock:addl to actual mfence instructions
never
parents:
665
diff
changeset
|
1247 addl(Address(rsp, 0), 0);// Assert the lock# signal here |
d0994e5bebce
6822204: volatile fences should prefer lock:addl to actual mfence instructions
never
parents:
665
diff
changeset
|
1248 } |
d0994e5bebce
6822204: volatile fences should prefer lock:addl to actual mfence instructions
never
parents:
665
diff
changeset
|
1249 } |
304 | 1250 } |
1251 | |
1252 void mfence(); | |
1253 | |
1254 // Moves | |
1255 | |
1256 void mov64(Register dst, int64_t imm64); | |
1257 | |
1258 void movb(Address dst, Register src); | |
1259 void movb(Address dst, int imm8); | |
1260 void movb(Register dst, Address src); | |
1261 | |
1262 void movdl(XMMRegister dst, Register src); | |
1263 void movdl(Register dst, XMMRegister src); | |
2320
41d4973cf100
6942326: x86 code in string_indexof() could read beyond reserved heap space
kvn
parents:
2262
diff
changeset
|
1264 void movdl(XMMRegister dst, Address src); |
6179
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
6141
diff
changeset
|
1265 void movdl(Address dst, XMMRegister src); |
304 | 1266 |
1267 // Move Double Quadword | |
1268 void movdq(XMMRegister dst, Register src); | |
1269 void movdq(Register dst, XMMRegister src); | |
1270 | |
1271 // Move Aligned Double Quadword | |
1272 void movdqa(XMMRegister dst, XMMRegister src); | |
11080
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
7955
diff
changeset
|
1273 void movdqa(XMMRegister dst, Address src); |
304 | 1274 |
405 | 1275 // Move Unaligned Double Quadword |
1276 void movdqu(Address dst, XMMRegister src); | |
1277 void movdqu(XMMRegister dst, Address src); | |
1278 void movdqu(XMMRegister dst, XMMRegister src); | |
1279 | |
6179
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
6141
diff
changeset
|
1280 // Move Unaligned 256bit Vector |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
6141
diff
changeset
|
1281 void vmovdqu(Address dst, XMMRegister src); |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
6141
diff
changeset
|
1282 void vmovdqu(XMMRegister dst, Address src); |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
6141
diff
changeset
|
1283 void vmovdqu(XMMRegister dst, XMMRegister src); |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
6141
diff
changeset
|
1284 |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
6141
diff
changeset
|
1285 // Move lower 64bit to high 64bit in 128bit register |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
6141
diff
changeset
|
1286 void movlhps(XMMRegister dst, XMMRegister src); |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
6141
diff
changeset
|
1287 |
304 | 1288 void movl(Register dst, int32_t imm32); |
1289 void movl(Address dst, int32_t imm32); | |
1290 void movl(Register dst, Register src); | |
1291 void movl(Register dst, Address src); | |
1292 void movl(Address dst, Register src); | |
1293 | |
1294 // These dummies prevent using movl from converting a zero (like NULL) into Register | |
1295 // by giving the compiler two choices it can't resolve | |
1296 | |
1297 void movl(Address dst, void* junk); | |
1298 void movl(Register dst, void* junk); | |
1299 | |
1300 #ifdef _LP64 | |
1301 void movq(Register dst, Register src); | |
1302 void movq(Register dst, Address src); | |
2100
b1a2afa37ec4
7003271: Hotspot should track cumulative Java heap bytes allocated on a per-thread basis
phh
parents:
2008
diff
changeset
|
1303 void movq(Address dst, Register src); |
304 | 1304 #endif |
1305 | |
1306 void movq(Address dst, MMXRegister src ); | |
1307 void movq(MMXRegister dst, Address src ); | |
1308 | |
1309 #ifdef _LP64 | |
1310 // These dummies prevent using movq from converting a zero (like NULL) into Register | |
1311 // by giving the compiler two choices it can't resolve | |
1312 | |
1313 void movq(Address dst, void* dummy); | |
1314 void movq(Register dst, void* dummy); | |
1315 #endif | |
1316 | |
1317 // Move Quadword | |
1318 void movq(Address dst, XMMRegister src); | |
1319 void movq(XMMRegister dst, Address src); | |
1320 | |
1321 void movsbl(Register dst, Address src); | |
1322 void movsbl(Register dst, Register src); | |
1323 | |
1324 #ifdef _LP64 | |
624 | 1325 void movsbq(Register dst, Address src); |
1326 void movsbq(Register dst, Register src); | |
1327 | |
304 | 1328 // Move signed 32bit immediate to 64bit extending sign |
2100
b1a2afa37ec4
7003271: Hotspot should track cumulative Java heap bytes allocated on a per-thread basis
phh
parents:
2008
diff
changeset
|
1329 void movslq(Address dst, int32_t imm64); |
304 | 1330 void movslq(Register dst, int32_t imm64); |
1331 | |
1332 void movslq(Register dst, Address src); | |
1333 void movslq(Register dst, Register src); | |
1334 void movslq(Register dst, void* src); // Dummy declaration to cause NULL to be ambiguous | |
1335 #endif | |
1336 | |
1337 void movswl(Register dst, Address src); | |
1338 void movswl(Register dst, Register src); | |
1339 | |
624 | 1340 #ifdef _LP64 |
1341 void movswq(Register dst, Address src); | |
1342 void movswq(Register dst, Register src); | |
1343 #endif | |
1344 | |
304 | 1345 void movw(Address dst, int imm16); |
1346 void movw(Register dst, Address src); | |
1347 void movw(Address dst, Register src); | |
1348 | |
1349 void movzbl(Register dst, Address src); | |
1350 void movzbl(Register dst, Register src); | |
1351 | |
624 | 1352 #ifdef _LP64 |
1353 void movzbq(Register dst, Address src); | |
1354 void movzbq(Register dst, Register src); | |
1355 #endif | |
1356 | |
304 | 1357 void movzwl(Register dst, Address src); |
1358 void movzwl(Register dst, Register src); | |
1359 | |
624 | 1360 #ifdef _LP64 |
1361 void movzwq(Register dst, Address src); | |
1362 void movzwq(Register dst, Register src); | |
1363 #endif | |
1364 | |
304 | 1365 void mull(Address src); |
1366 void mull(Register src); | |
1367 | |
1368 // Multiply Scalar Double-Precision Floating-Point Values | |
1369 void mulsd(XMMRegister dst, Address src); | |
1370 void mulsd(XMMRegister dst, XMMRegister src); | |
1371 | |
1372 // Multiply Scalar Single-Precision Floating-Point Values | |
1373 void mulss(XMMRegister dst, Address src); | |
1374 void mulss(XMMRegister dst, XMMRegister src); | |
1375 | |
1376 void negl(Register dst); | |
1377 | |
1378 #ifdef _LP64 | |
1379 void negq(Register dst); | |
1380 #endif | |
1381 | |
1382 void nop(int i = 1); | |
1383 | |
1384 void notl(Register dst); | |
1385 | |
1386 #ifdef _LP64 | |
1387 void notq(Register dst); | |
1388 #endif | |
1389 | |
1390 void orl(Address dst, int32_t imm32); | |
1391 void orl(Register dst, int32_t imm32); | |
1392 void orl(Register dst, Address src); | |
1393 void orl(Register dst, Register src); | |
1394 | |
1395 void orq(Address dst, int32_t imm32); | |
1396 void orq(Register dst, int32_t imm32); | |
1397 void orq(Register dst, Address src); | |
1398 void orq(Register dst, Register src); | |
1399 | |
4759 | 1400 // Pack with unsigned saturation |
1401 void packuswb(XMMRegister dst, XMMRegister src); | |
1402 void packuswb(XMMRegister dst, Address src); | |
7637
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
1403 void vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
1404 |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
1405 // Pemutation of 64bit words |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
1406 void vpermq(XMMRegister dst, XMMRegister src, int imm8, bool vector256); |
4759 | 1407 |
681 | 1408 // SSE4.2 string instructions |
1409 void pcmpestri(XMMRegister xmm1, XMMRegister xmm2, int imm8); | |
1410 void pcmpestri(XMMRegister xmm1, Address src, int imm8); | |
1411 | |
11080
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
7955
diff
changeset
|
1412 // SSE 4.1 extract |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
7955
diff
changeset
|
1413 void pextrd(Register dst, XMMRegister src, int imm8); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
7955
diff
changeset
|
1414 void pextrq(Register dst, XMMRegister src, int imm8); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
7955
diff
changeset
|
1415 |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
7955
diff
changeset
|
1416 // SSE 4.1 insert |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
7955
diff
changeset
|
1417 void pinsrd(XMMRegister dst, Register src, int imm8); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
7955
diff
changeset
|
1418 void pinsrq(XMMRegister dst, Register src, int imm8); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
7955
diff
changeset
|
1419 |
4759 | 1420 // SSE4.1 packed move |
1421 void pmovzxbw(XMMRegister dst, XMMRegister src); | |
1422 void pmovzxbw(XMMRegister dst, Address src); | |
1423 | |
1060 | 1424 #ifndef _LP64 // no 32bit push/pop on amd64 |
304 | 1425 void popl(Address dst); |
1060 | 1426 #endif |
304 | 1427 |
1428 #ifdef _LP64 | |
1429 void popq(Address dst); | |
1430 #endif | |
1431 | |
643
c771b7f43bbf
6378821: bitCount() should use POPC on SPARC processors and AMD+10h
twisti
parents:
642
diff
changeset
|
1432 void popcntl(Register dst, Address src); |
c771b7f43bbf
6378821: bitCount() should use POPC on SPARC processors and AMD+10h
twisti
parents:
642
diff
changeset
|
1433 void popcntl(Register dst, Register src); |
c771b7f43bbf
6378821: bitCount() should use POPC on SPARC processors and AMD+10h
twisti
parents:
642
diff
changeset
|
1434 |
c771b7f43bbf
6378821: bitCount() should use POPC on SPARC processors and AMD+10h
twisti
parents:
642
diff
changeset
|
1435 #ifdef _LP64 |
c771b7f43bbf
6378821: bitCount() should use POPC on SPARC processors and AMD+10h
twisti
parents:
642
diff
changeset
|
1436 void popcntq(Register dst, Address src); |
c771b7f43bbf
6378821: bitCount() should use POPC on SPARC processors and AMD+10h
twisti
parents:
642
diff
changeset
|
1437 void popcntq(Register dst, Register src); |
c771b7f43bbf
6378821: bitCount() should use POPC on SPARC processors and AMD+10h
twisti
parents:
642
diff
changeset
|
1438 #endif |
c771b7f43bbf
6378821: bitCount() should use POPC on SPARC processors and AMD+10h
twisti
parents:
642
diff
changeset
|
1439 |
304 | 1440 // Prefetches (SSE, SSE2, 3DNOW only) |
1441 | |
1442 void prefetchnta(Address src); | |
1443 void prefetchr(Address src); | |
1444 void prefetcht0(Address src); | |
1445 void prefetcht1(Address src); | |
1446 void prefetcht2(Address src); | |
1447 void prefetchw(Address src); | |
1448 | |
6894 | 1449 // Shuffle Bytes |
1450 void pshufb(XMMRegister dst, XMMRegister src); | |
1451 void pshufb(XMMRegister dst, Address src); | |
1452 | |
304 | 1453 // Shuffle Packed Doublewords |
1454 void pshufd(XMMRegister dst, XMMRegister src, int mode); | |
1455 void pshufd(XMMRegister dst, Address src, int mode); | |
1456 | |
1457 // Shuffle Packed Low Words | |
1458 void pshuflw(XMMRegister dst, XMMRegister src, int mode); | |
1459 void pshuflw(XMMRegister dst, Address src, int mode); | |
1460 | |
2320
41d4973cf100
6942326: x86 code in string_indexof() could read beyond reserved heap space
kvn
parents:
2262
diff
changeset
|
1461 // Shift Right by bytes Logical DoubleQuadword Immediate |
41d4973cf100
6942326: x86 code in string_indexof() could read beyond reserved heap space
kvn
parents:
2262
diff
changeset
|
1462 void psrldq(XMMRegister dst, int shift); |
41d4973cf100
6942326: x86 code in string_indexof() could read beyond reserved heap space
kvn
parents:
2262
diff
changeset
|
1463 |
7477
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7475
diff
changeset
|
1464 // Logical Compare 128bit |
681 | 1465 void ptest(XMMRegister dst, XMMRegister src); |
1466 void ptest(XMMRegister dst, Address src); | |
7477
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7475
diff
changeset
|
1467 // Logical Compare 256bit |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7475
diff
changeset
|
1468 void vptest(XMMRegister dst, XMMRegister src); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7475
diff
changeset
|
1469 void vptest(XMMRegister dst, Address src); |
681 | 1470 |
304 | 1471 // Interleave Low Bytes |
1472 void punpcklbw(XMMRegister dst, XMMRegister src); | |
4759 | 1473 void punpcklbw(XMMRegister dst, Address src); |
1474 | |
1475 // Interleave Low Doublewords | |
1476 void punpckldq(XMMRegister dst, XMMRegister src); | |
1477 void punpckldq(XMMRegister dst, Address src); | |
304 | 1478 |
6225 | 1479 // Interleave Low Quadwords |
1480 void punpcklqdq(XMMRegister dst, XMMRegister src); | |
1481 | |
1060 | 1482 #ifndef _LP64 // no 32bit push/pop on amd64 |
304 | 1483 void pushl(Address src); |
1060 | 1484 #endif |
304 | 1485 |
1486 void pushq(Address src); | |
1487 | |
1488 void rcll(Register dst, int imm8); | |
1489 | |
1490 void rclq(Register dst, int imm8); | |
1491 | |
1492 void ret(int imm16); | |
0 | 1493 |
1494 void sahf(); | |
1495 | |
304 | 1496 void sarl(Register dst, int imm8); |
1497 void sarl(Register dst); | |
1498 | |
1499 void sarq(Register dst, int imm8); | |
1500 void sarq(Register dst); | |
1501 | |
1502 void sbbl(Address dst, int32_t imm32); | |
1503 void sbbl(Register dst, int32_t imm32); | |
1504 void sbbl(Register dst, Address src); | |
1505 void sbbl(Register dst, Register src); | |
1506 | |
1507 void sbbq(Address dst, int32_t imm32); | |
1508 void sbbq(Register dst, int32_t imm32); | |
1509 void sbbq(Register dst, Address src); | |
1510 void sbbq(Register dst, Register src); | |
1511 | |
1512 void setb(Condition cc, Register dst); | |
1513 | |
1514 void shldl(Register dst, Register src); | |
1515 | |
1516 void shll(Register dst, int imm8); | |
1517 void shll(Register dst); | |
1518 | |
1519 void shlq(Register dst, int imm8); | |
1520 void shlq(Register dst); | |
1521 | |
1522 void shrdl(Register dst, Register src); | |
1523 | |
1524 void shrl(Register dst, int imm8); | |
1525 void shrl(Register dst); | |
1526 | |
1527 void shrq(Register dst, int imm8); | |
1528 void shrq(Register dst); | |
1529 | |
1530 void smovl(); // QQQ generic? | |
1531 | |
1532 // Compute Square Root of Scalar Double-Precision Floating-Point Value | |
1533 void sqrtsd(XMMRegister dst, Address src); | |
1534 void sqrtsd(XMMRegister dst, XMMRegister src); | |
1535 | |
2008 | 1536 // Compute Square Root of Scalar Single-Precision Floating-Point Value |
1537 void sqrtss(XMMRegister dst, Address src); | |
1538 void sqrtss(XMMRegister dst, XMMRegister src); | |
1539 | |
7199
cd3d6a6b95d9
8003240: x86: move MacroAssembler into separate file
twisti
parents:
7198
diff
changeset
|
1540 void std(); |
304 | 1541 |
1542 void stmxcsr( Address dst ); | |
1543 | |
1544 void subl(Address dst, int32_t imm32); | |
1545 void subl(Address dst, Register src); | |
1546 void subl(Register dst, int32_t imm32); | |
1547 void subl(Register dst, Address src); | |
1548 void subl(Register dst, Register src); | |
1549 | |
1550 void subq(Address dst, int32_t imm32); | |
1551 void subq(Address dst, Register src); | |
1552 void subq(Register dst, int32_t imm32); | |
1553 void subq(Register dst, Address src); | |
1554 void subq(Register dst, Register src); | |
1555 | |
4947
fd8114661503
7125136: SIGILL on linux amd64 in gc/ArrayJuggle/Juggle29
kvn
parents:
4761
diff
changeset
|
1556 // Force generation of a 4 byte immediate value even if it fits into 8bit |
fd8114661503
7125136: SIGILL on linux amd64 in gc/ArrayJuggle/Juggle29
kvn
parents:
4761
diff
changeset
|
1557 void subl_imm32(Register dst, int32_t imm32); |
fd8114661503
7125136: SIGILL on linux amd64 in gc/ArrayJuggle/Juggle29
kvn
parents:
4761
diff
changeset
|
1558 void subq_imm32(Register dst, int32_t imm32); |
304 | 1559 |
1560 // Subtract Scalar Double-Precision Floating-Point Values | |
1561 void subsd(XMMRegister dst, Address src); | |
0 | 1562 void subsd(XMMRegister dst, XMMRegister src); |
1563 | |
304 | 1564 // Subtract Scalar Single-Precision Floating-Point Values |
1565 void subss(XMMRegister dst, Address src); | |
1566 void subss(XMMRegister dst, XMMRegister src); | |
1567 | |
1568 void testb(Register dst, int imm8); | |
1569 | |
1570 void testl(Register dst, int32_t imm32); | |
1571 void testl(Register dst, Register src); | |
1572 void testl(Register dst, Address src); | |
1573 | |
1574 void testq(Register dst, int32_t imm32); | |
1575 void testq(Register dst, Register src); | |
1576 | |
1577 | |
1578 // Unordered Compare Scalar Double-Precision Floating-Point Values and set EFLAGS | |
1579 void ucomisd(XMMRegister dst, Address src); | |
0 | 1580 void ucomisd(XMMRegister dst, XMMRegister src); |
1581 | |
304 | 1582 // Unordered Compare Scalar Single-Precision Floating-Point Values and set EFLAGS |
1583 void ucomiss(XMMRegister dst, Address src); | |
1584 void ucomiss(XMMRegister dst, XMMRegister src); | |
1585 | |
1586 void xaddl(Address dst, Register src); | |
1587 | |
1588 void xaddq(Address dst, Register src); | |
1589 | |
1590 void xchgl(Register reg, Address adr); | |
1591 void xchgl(Register dst, Register src); | |
1592 | |
1593 void xchgq(Register reg, Address adr); | |
1594 void xchgq(Register dst, Register src); | |
1595 | |
4759 | 1596 // Get Value of Extended Control Register |
7199
cd3d6a6b95d9
8003240: x86: move MacroAssembler into separate file
twisti
parents:
7198
diff
changeset
|
1597 void xgetbv(); |
4759 | 1598 |
304 | 1599 void xorl(Register dst, int32_t imm32); |
1600 void xorl(Register dst, Address src); | |
1601 void xorl(Register dst, Register src); | |
1602 | |
1603 void xorq(Register dst, Address src); | |
1604 void xorq(Register dst, Register src); | |
1605 | |
1606 void set_byte_if_not_zero(Register dst); // sets reg to 1 if not zero, otherwise 0 | |
4759 | 1607 |
6225 | 1608 // AVX 3-operands scalar instructions (encoded with VEX prefix) |
6614
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1609 |
4761
65149e74c706
7121648: Use 3-operands SIMD instructions on x86 with AVX
kvn
parents:
4759
diff
changeset
|
1610 void vaddsd(XMMRegister dst, XMMRegister nds, Address src); |
65149e74c706
7121648: Use 3-operands SIMD instructions on x86 with AVX
kvn
parents:
4759
diff
changeset
|
1611 void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src); |
65149e74c706
7121648: Use 3-operands SIMD instructions on x86 with AVX
kvn
parents:
4759
diff
changeset
|
1612 void vaddss(XMMRegister dst, XMMRegister nds, Address src); |
65149e74c706
7121648: Use 3-operands SIMD instructions on x86 with AVX
kvn
parents:
4759
diff
changeset
|
1613 void vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src); |
65149e74c706
7121648: Use 3-operands SIMD instructions on x86 with AVX
kvn
parents:
4759
diff
changeset
|
1614 void vdivsd(XMMRegister dst, XMMRegister nds, Address src); |
65149e74c706
7121648: Use 3-operands SIMD instructions on x86 with AVX
kvn
parents:
4759
diff
changeset
|
1615 void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src); |
65149e74c706
7121648: Use 3-operands SIMD instructions on x86 with AVX
kvn
parents:
4759
diff
changeset
|
1616 void vdivss(XMMRegister dst, XMMRegister nds, Address src); |
65149e74c706
7121648: Use 3-operands SIMD instructions on x86 with AVX
kvn
parents:
4759
diff
changeset
|
1617 void vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src); |
65149e74c706
7121648: Use 3-operands SIMD instructions on x86 with AVX
kvn
parents:
4759
diff
changeset
|
1618 void vmulsd(XMMRegister dst, XMMRegister nds, Address src); |
65149e74c706
7121648: Use 3-operands SIMD instructions on x86 with AVX
kvn
parents:
4759
diff
changeset
|
1619 void vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src); |
65149e74c706
7121648: Use 3-operands SIMD instructions on x86 with AVX
kvn
parents:
4759
diff
changeset
|
1620 void vmulss(XMMRegister dst, XMMRegister nds, Address src); |
65149e74c706
7121648: Use 3-operands SIMD instructions on x86 with AVX
kvn
parents:
4759
diff
changeset
|
1621 void vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src); |
65149e74c706
7121648: Use 3-operands SIMD instructions on x86 with AVX
kvn
parents:
4759
diff
changeset
|
1622 void vsubsd(XMMRegister dst, XMMRegister nds, Address src); |
65149e74c706
7121648: Use 3-operands SIMD instructions on x86 with AVX
kvn
parents:
4759
diff
changeset
|
1623 void vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src); |
65149e74c706
7121648: Use 3-operands SIMD instructions on x86 with AVX
kvn
parents:
4759
diff
changeset
|
1624 void vsubss(XMMRegister dst, XMMRegister nds, Address src); |
65149e74c706
7121648: Use 3-operands SIMD instructions on x86 with AVX
kvn
parents:
4759
diff
changeset
|
1625 void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src); |
65149e74c706
7121648: Use 3-operands SIMD instructions on x86 with AVX
kvn
parents:
4759
diff
changeset
|
1626 |
6614
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1627 |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1628 //====================VECTOR ARITHMETIC===================================== |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1629 |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1630 // Add Packed Floating-Point Values |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1631 void addpd(XMMRegister dst, XMMRegister src); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1632 void addps(XMMRegister dst, XMMRegister src); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1633 void vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1634 void vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1635 void vaddpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1636 void vaddps(XMMRegister dst, XMMRegister nds, Address src, bool vector256); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1637 |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1638 // Subtract Packed Floating-Point Values |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1639 void subpd(XMMRegister dst, XMMRegister src); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1640 void subps(XMMRegister dst, XMMRegister src); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1641 void vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1642 void vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1643 void vsubpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1644 void vsubps(XMMRegister dst, XMMRegister nds, Address src, bool vector256); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1645 |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1646 // Multiply Packed Floating-Point Values |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1647 void mulpd(XMMRegister dst, XMMRegister src); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1648 void mulps(XMMRegister dst, XMMRegister src); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1649 void vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1650 void vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1651 void vmulpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1652 void vmulps(XMMRegister dst, XMMRegister nds, Address src, bool vector256); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1653 |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1654 // Divide Packed Floating-Point Values |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1655 void divpd(XMMRegister dst, XMMRegister src); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1656 void divps(XMMRegister dst, XMMRegister src); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1657 void vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1658 void vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1659 void vdivpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1660 void vdivps(XMMRegister dst, XMMRegister nds, Address src, bool vector256); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1661 |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1662 // Bitwise Logical AND of Packed Floating-Point Values |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1663 void andpd(XMMRegister dst, XMMRegister src); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1664 void andps(XMMRegister dst, XMMRegister src); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1665 void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1666 void vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1667 void vandpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1668 void vandps(XMMRegister dst, XMMRegister nds, Address src, bool vector256); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1669 |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1670 // Bitwise Logical XOR of Packed Floating-Point Values |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1671 void xorpd(XMMRegister dst, XMMRegister src); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1672 void xorps(XMMRegister dst, XMMRegister src); |
6179
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
6141
diff
changeset
|
1673 void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
6141
diff
changeset
|
1674 void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); |
6614
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1675 void vxorpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1676 void vxorps(XMMRegister dst, XMMRegister nds, Address src, bool vector256); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1677 |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1678 // Add packed integers |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1679 void paddb(XMMRegister dst, XMMRegister src); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1680 void paddw(XMMRegister dst, XMMRegister src); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1681 void paddd(XMMRegister dst, XMMRegister src); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1682 void paddq(XMMRegister dst, XMMRegister src); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1683 void vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1684 void vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1685 void vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1686 void vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1687 void vpaddb(XMMRegister dst, XMMRegister nds, Address src, bool vector256); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1688 void vpaddw(XMMRegister dst, XMMRegister nds, Address src, bool vector256); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1689 void vpaddd(XMMRegister dst, XMMRegister nds, Address src, bool vector256); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1690 void vpaddq(XMMRegister dst, XMMRegister nds, Address src, bool vector256); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1691 |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1692 // Sub packed integers |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1693 void psubb(XMMRegister dst, XMMRegister src); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1694 void psubw(XMMRegister dst, XMMRegister src); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1695 void psubd(XMMRegister dst, XMMRegister src); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1696 void psubq(XMMRegister dst, XMMRegister src); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1697 void vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1698 void vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1699 void vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1700 void vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1701 void vpsubb(XMMRegister dst, XMMRegister nds, Address src, bool vector256); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1702 void vpsubw(XMMRegister dst, XMMRegister nds, Address src, bool vector256); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1703 void vpsubd(XMMRegister dst, XMMRegister nds, Address src, bool vector256); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1704 void vpsubq(XMMRegister dst, XMMRegister nds, Address src, bool vector256); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1705 |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1706 // Multiply packed integers (only shorts and ints) |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1707 void pmullw(XMMRegister dst, XMMRegister src); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1708 void pmulld(XMMRegister dst, XMMRegister src); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1709 void vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1710 void vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1711 void vpmullw(XMMRegister dst, XMMRegister nds, Address src, bool vector256); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1712 void vpmulld(XMMRegister dst, XMMRegister nds, Address src, bool vector256); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1713 |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1714 // Shift left packed integers |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1715 void psllw(XMMRegister dst, int shift); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1716 void pslld(XMMRegister dst, int shift); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1717 void psllq(XMMRegister dst, int shift); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1718 void psllw(XMMRegister dst, XMMRegister shift); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1719 void pslld(XMMRegister dst, XMMRegister shift); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1720 void psllq(XMMRegister dst, XMMRegister shift); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1721 void vpsllw(XMMRegister dst, XMMRegister src, int shift, bool vector256); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1722 void vpslld(XMMRegister dst, XMMRegister src, int shift, bool vector256); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1723 void vpsllq(XMMRegister dst, XMMRegister src, int shift, bool vector256); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1724 void vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1725 void vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1726 void vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1727 |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1728 // Logical shift right packed integers |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1729 void psrlw(XMMRegister dst, int shift); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1730 void psrld(XMMRegister dst, int shift); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1731 void psrlq(XMMRegister dst, int shift); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1732 void psrlw(XMMRegister dst, XMMRegister shift); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1733 void psrld(XMMRegister dst, XMMRegister shift); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1734 void psrlq(XMMRegister dst, XMMRegister shift); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1735 void vpsrlw(XMMRegister dst, XMMRegister src, int shift, bool vector256); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1736 void vpsrld(XMMRegister dst, XMMRegister src, int shift, bool vector256); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1737 void vpsrlq(XMMRegister dst, XMMRegister src, int shift, bool vector256); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1738 void vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1739 void vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1740 void vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1741 |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1742 // Arithmetic shift right packed integers (only shorts and ints, no instructions for longs) |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1743 void psraw(XMMRegister dst, int shift); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1744 void psrad(XMMRegister dst, int shift); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1745 void psraw(XMMRegister dst, XMMRegister shift); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1746 void psrad(XMMRegister dst, XMMRegister shift); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1747 void vpsraw(XMMRegister dst, XMMRegister src, int shift, bool vector256); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1748 void vpsrad(XMMRegister dst, XMMRegister src, int shift, bool vector256); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1749 void vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1750 void vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1751 |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1752 // And packed integers |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1753 void pand(XMMRegister dst, XMMRegister src); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1754 void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1755 void vpand(XMMRegister dst, XMMRegister nds, Address src, bool vector256); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1756 |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1757 // Or packed integers |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1758 void por(XMMRegister dst, XMMRegister src); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1759 void vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1760 void vpor(XMMRegister dst, XMMRegister nds, Address src, bool vector256); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1761 |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1762 // Xor packed integers |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1763 void pxor(XMMRegister dst, XMMRegister src); |
6225 | 1764 void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); |
6614
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1765 void vpxor(XMMRegister dst, XMMRegister nds, Address src, bool vector256); |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1766 |
006050192a5a
6340864: Implement vectorization optimizations in hotspot-server
kvn
parents:
6266
diff
changeset
|
1767 // Copy low 128bit into high 128bit of YMM registers. |
6179
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
6141
diff
changeset
|
1768 void vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src); |
6225 | 1769 void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src); |
6179
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
6141
diff
changeset
|
1770 |
6792
137868b7aa6f
7196199: java/text/Bidi/Bug6665028.java failed: Bidi run count incorrect
kvn
parents:
6740
diff
changeset
|
1771 // Load/store high 128bit of YMM registers which does not destroy other half. |
137868b7aa6f
7196199: java/text/Bidi/Bug6665028.java failed: Bidi run count incorrect
kvn
parents:
6740
diff
changeset
|
1772 void vinsertf128h(XMMRegister dst, Address src); |
137868b7aa6f
7196199: java/text/Bidi/Bug6665028.java failed: Bidi run count incorrect
kvn
parents:
6740
diff
changeset
|
1773 void vinserti128h(XMMRegister dst, Address src); |
137868b7aa6f
7196199: java/text/Bidi/Bug6665028.java failed: Bidi run count incorrect
kvn
parents:
6740
diff
changeset
|
1774 void vextractf128h(Address dst, XMMRegister src); |
137868b7aa6f
7196199: java/text/Bidi/Bug6665028.java failed: Bidi run count incorrect
kvn
parents:
6740
diff
changeset
|
1775 void vextracti128h(Address dst, XMMRegister src); |
137868b7aa6f
7196199: java/text/Bidi/Bug6665028.java failed: Bidi run count incorrect
kvn
parents:
6740
diff
changeset
|
1776 |
7475
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
1777 // duplicate 4-bytes integer data from src into 8 locations in dest |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
1778 void vpbroadcastd(XMMRegister dst, XMMRegister src); |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
1779 |
11080
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
7955
diff
changeset
|
1780 // Carry-Less Multiplication Quadword |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
7955
diff
changeset
|
1781 void vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
7955
diff
changeset
|
1782 |
6179
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
6141
diff
changeset
|
1783 // AVX instruction which is used to clear upper 128 bits of YMM registers and |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
6141
diff
changeset
|
1784 // to avoid transaction penalty between AVX and SSE states. There is no |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
6141
diff
changeset
|
1785 // penalty if legacy SSE instructions are encoded using VEX prefix because |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
6141
diff
changeset
|
1786 // they always clear upper 128 bits. It should be used before calling |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
6141
diff
changeset
|
1787 // runtime code and native libraries. |
8c92982cbbc4
7119644: Increase superword's vector size up to 256 bits
kvn
parents:
6141
diff
changeset
|
1788 void vzeroupper(); |
4761
65149e74c706
7121648: Use 3-operands SIMD instructions on x86 with AVX
kvn
parents:
4759
diff
changeset
|
1789 |
4759 | 1790 protected: |
1791 // Next instructions require address alignment 16 bytes SSE mode. | |
1792 // They should be called only from corresponding MacroAssembler instructions. | |
1793 void andpd(XMMRegister dst, Address src); | |
1794 void andps(XMMRegister dst, Address src); | |
1795 void xorpd(XMMRegister dst, Address src); | |
1796 void xorps(XMMRegister dst, Address src); | |
1797 | |
0 | 1798 }; |
1799 | |
1972 | 1800 #endif // CPU_X86_VM_ASSEMBLER_X86_HPP |