Mercurial > hg > truffle
annotate src/cpu/x86/vm/macroAssembler_x86.cpp @ 15388:769fc3629f59
Add phase FlowSensitiveReductionPhase.
It is possible to remove GuardingPiNodes, CheckCastNodes, and FixedGuards during
HighTier under certain conditions (control-flow sensitive conditions).
The phase added in this commit (FlowSensitiveReductionPhase) does that,
and in addition replaces usages with "downcasting" PiNodes when possible
thus resulting in more precise object stamps (e.g., non-null).
Finally, usages of floating, side-effects free, expressions are also simplified
(as per control-flow sensitive conditions).
The newly added phase runs only during HighTier and can be deactivated
using Graal option FlowSensitiveReduction (it is active by default).
author | Miguel Garcia <miguel.m.garcia@oracle.com> |
---|---|
date | Fri, 25 Apr 2014 16:50:52 +0200 |
parents | 4ca6dc0799b6 |
children | 52b4284cb496 |
rev | line source |
---|---|
7199 | 1 /* |
11080
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
2 * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. |
7199 | 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 * | |
5 * This code is free software; you can redistribute it and/or modify it | |
6 * under the terms of the GNU General Public License version 2 only, as | |
7 * published by the Free Software Foundation. | |
8 * | |
9 * This code is distributed in the hope that it will be useful, but WITHOUT | |
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
12 * version 2 for more details (a copy is included in the LICENSE file that | |
13 * accompanied this code). | |
14 * | |
15 * You should have received a copy of the GNU General Public License version | |
16 * 2 along with this work; if not, write to the Free Software Foundation, | |
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | |
18 * | |
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA | |
20 * or visit www.oracle.com if you need additional information or have any | |
21 * questions. | |
22 * | |
23 */ | |
24 | |
25 #include "precompiled.hpp" | |
26 #include "asm/assembler.hpp" | |
27 #include "asm/assembler.inline.hpp" | |
28 #include "compiler/disassembler.hpp" | |
29 #include "gc_interface/collectedHeap.inline.hpp" | |
30 #include "interpreter/interpreter.hpp" | |
31 #include "memory/cardTableModRefBS.hpp" | |
32 #include "memory/resourceArea.hpp" | |
12056
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
33 #include "memory/universe.hpp" |
7199 | 34 #include "prims/methodHandles.hpp" |
35 #include "runtime/biasedLocking.hpp" | |
36 #include "runtime/interfaceSupport.hpp" | |
37 #include "runtime/objectMonitor.hpp" | |
38 #include "runtime/os.hpp" | |
39 #include "runtime/sharedRuntime.hpp" | |
40 #include "runtime/stubRoutines.hpp" | |
8001
db9981fd3124
8005915: Unify SERIALGC and INCLUDE_ALTERNATE_GCS
jprovino
parents:
7477
diff
changeset
|
41 #include "utilities/macros.hpp" |
db9981fd3124
8005915: Unify SERIALGC and INCLUDE_ALTERNATE_GCS
jprovino
parents:
7477
diff
changeset
|
42 #if INCLUDE_ALL_GCS |
7199 | 43 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" |
44 #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp" | |
45 #include "gc_implementation/g1/heapRegion.hpp" | |
8001
db9981fd3124
8005915: Unify SERIALGC and INCLUDE_ALTERNATE_GCS
jprovino
parents:
7477
diff
changeset
|
46 #endif // INCLUDE_ALL_GCS |
7199 | 47 |
48 #ifdef PRODUCT | |
49 #define BLOCK_COMMENT(str) /* nothing */ | |
50 #define STOP(error) stop(error) | |
51 #else | |
52 #define BLOCK_COMMENT(str) block_comment(str) | |
53 #define STOP(error) block_comment(error); stop(error) | |
54 #endif | |
55 | |
56 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":") | |
57 | |
58 | |
7204
f0c2369fda5a
8003250: SPARC: move MacroAssembler into separate file
twisti
parents:
7199
diff
changeset
|
59 #ifdef ASSERT |
f0c2369fda5a
8003250: SPARC: move MacroAssembler into separate file
twisti
parents:
7199
diff
changeset
|
60 bool AbstractAssembler::pd_check_instruction_mark() { return true; } |
f0c2369fda5a
8003250: SPARC: move MacroAssembler into separate file
twisti
parents:
7199
diff
changeset
|
61 #endif |
f0c2369fda5a
8003250: SPARC: move MacroAssembler into separate file
twisti
parents:
7199
diff
changeset
|
62 |
7199 | 63 static Assembler::Condition reverse[] = { |
64 Assembler::noOverflow /* overflow = 0x0 */ , | |
65 Assembler::overflow /* noOverflow = 0x1 */ , | |
66 Assembler::aboveEqual /* carrySet = 0x2, below = 0x2 */ , | |
67 Assembler::below /* aboveEqual = 0x3, carryClear = 0x3 */ , | |
68 Assembler::notZero /* zero = 0x4, equal = 0x4 */ , | |
69 Assembler::zero /* notZero = 0x5, notEqual = 0x5 */ , | |
70 Assembler::above /* belowEqual = 0x6 */ , | |
71 Assembler::belowEqual /* above = 0x7 */ , | |
72 Assembler::positive /* negative = 0x8 */ , | |
73 Assembler::negative /* positive = 0x9 */ , | |
74 Assembler::noParity /* parity = 0xa */ , | |
75 Assembler::parity /* noParity = 0xb */ , | |
76 Assembler::greaterEqual /* less = 0xc */ , | |
77 Assembler::less /* greaterEqual = 0xd */ , | |
78 Assembler::greater /* lessEqual = 0xe */ , | |
79 Assembler::lessEqual /* greater = 0xf, */ | |
80 | |
81 }; | |
82 | |
83 | |
84 // Implementation of MacroAssembler | |
85 | |
86 // First all the versions that have distinct versions depending on 32/64 bit | |
87 // Unless the difference is trivial (1 line or so). | |
88 | |
89 #ifndef _LP64 | |
90 | |
91 // 32bit versions | |
92 | |
93 Address MacroAssembler::as_Address(AddressLiteral adr) { | |
94 return Address(adr.target(), adr.rspec()); | |
95 } | |
96 | |
97 Address MacroAssembler::as_Address(ArrayAddress adr) { | |
98 return Address::make_array(adr); | |
99 } | |
100 | |
14909 | 101 int MacroAssembler::biased_locking_enter(Register lock_reg, |
102 Register obj_reg, | |
103 Register swap_reg, | |
104 Register tmp_reg, | |
105 bool swap_reg_contains_mark, | |
106 Label& done, | |
107 Label* slow_case, | |
108 BiasedLockingCounters* counters) { | |
109 assert(UseBiasedLocking, "why call this otherwise?"); | |
110 assert(swap_reg == rax, "swap_reg must be rax, for cmpxchg"); | |
111 assert_different_registers(lock_reg, obj_reg, swap_reg); | |
112 | |
113 if (PrintBiasedLockingStatistics && counters == NULL) | |
114 counters = BiasedLocking::counters(); | |
115 | |
116 bool need_tmp_reg = false; | |
117 if (tmp_reg == noreg) { | |
118 need_tmp_reg = true; | |
119 tmp_reg = lock_reg; | |
120 } else { | |
121 assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg); | |
122 } | |
123 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); | |
124 Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); | |
125 Address klass_addr (obj_reg, oopDesc::klass_offset_in_bytes()); | |
126 Address saved_mark_addr(lock_reg, 0); | |
127 | |
128 // Biased locking | |
129 // See whether the lock is currently biased toward our thread and | |
130 // whether the epoch is still valid | |
131 // Note that the runtime guarantees sufficient alignment of JavaThread | |
132 // pointers to allow age to be placed into low bits | |
133 // First check to see whether biasing is even enabled for this object | |
134 Label cas_label; | |
135 int null_check_offset = -1; | |
136 if (!swap_reg_contains_mark) { | |
137 null_check_offset = offset(); | |
138 movl(swap_reg, mark_addr); | |
139 } | |
140 if (need_tmp_reg) { | |
141 push(tmp_reg); | |
142 } | |
143 movl(tmp_reg, swap_reg); | |
144 andl(tmp_reg, markOopDesc::biased_lock_mask_in_place); | |
145 cmpl(tmp_reg, markOopDesc::biased_lock_pattern); | |
146 if (need_tmp_reg) { | |
147 pop(tmp_reg); | |
148 } | |
149 jcc(Assembler::notEqual, cas_label); | |
150 // The bias pattern is present in the object's header. Need to check | |
151 // whether the bias owner and the epoch are both still current. | |
152 // Note that because there is no current thread register on x86 we | |
153 // need to store off the mark word we read out of the object to | |
154 // avoid reloading it and needing to recheck invariants below. This | |
155 // store is unfortunate but it makes the overall code shorter and | |
156 // simpler. | |
157 movl(saved_mark_addr, swap_reg); | |
158 if (need_tmp_reg) { | |
159 push(tmp_reg); | |
160 } | |
161 get_thread(tmp_reg); | |
162 xorl(swap_reg, tmp_reg); | |
163 if (swap_reg_contains_mark) { | |
164 null_check_offset = offset(); | |
165 } | |
166 movl(tmp_reg, klass_addr); | |
167 xorl(swap_reg, Address(tmp_reg, Klass::prototype_header_offset())); | |
168 andl(swap_reg, ~((int) markOopDesc::age_mask_in_place)); | |
169 if (need_tmp_reg) { | |
170 pop(tmp_reg); | |
171 } | |
172 if (counters != NULL) { | |
173 cond_inc32(Assembler::zero, | |
174 ExternalAddress((address)counters->biased_lock_entry_count_addr())); | |
175 } | |
176 jcc(Assembler::equal, done); | |
177 | |
178 Label try_revoke_bias; | |
179 Label try_rebias; | |
180 | |
181 // At this point we know that the header has the bias pattern and | |
182 // that we are not the bias owner in the current epoch. We need to | |
183 // figure out more details about the state of the header in order to | |
184 // know what operations can be legally performed on the object's | |
185 // header. | |
186 | |
187 // If the low three bits in the xor result aren't clear, that means | |
188 // the prototype header is no longer biased and we have to revoke | |
189 // the bias on this object. | |
190 testl(swap_reg, markOopDesc::biased_lock_mask_in_place); | |
191 jcc(Assembler::notZero, try_revoke_bias); | |
192 | |
193 // Biasing is still enabled for this data type. See whether the | |
194 // epoch of the current bias is still valid, meaning that the epoch | |
195 // bits of the mark word are equal to the epoch bits of the | |
196 // prototype header. (Note that the prototype header's epoch bits | |
197 // only change at a safepoint.) If not, attempt to rebias the object | |
198 // toward the current thread. Note that we must be absolutely sure | |
199 // that the current epoch is invalid in order to do this because | |
200 // otherwise the manipulations it performs on the mark word are | |
201 // illegal. | |
202 testl(swap_reg, markOopDesc::epoch_mask_in_place); | |
203 jcc(Assembler::notZero, try_rebias); | |
204 | |
205 // The epoch of the current bias is still valid but we know nothing | |
206 // about the owner; it might be set or it might be clear. Try to | |
207 // acquire the bias of the object using an atomic operation. If this | |
208 // fails we will go in to the runtime to revoke the object's bias. | |
209 // Note that we first construct the presumed unbiased header so we | |
210 // don't accidentally blow away another thread's valid bias. | |
211 movl(swap_reg, saved_mark_addr); | |
212 andl(swap_reg, | |
213 markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); | |
214 if (need_tmp_reg) { | |
215 push(tmp_reg); | |
216 } | |
217 get_thread(tmp_reg); | |
218 orl(tmp_reg, swap_reg); | |
219 if (os::is_MP()) { | |
220 lock(); | |
221 } | |
222 cmpxchgptr(tmp_reg, Address(obj_reg, 0)); | |
223 if (need_tmp_reg) { | |
224 pop(tmp_reg); | |
225 } | |
226 // If the biasing toward our thread failed, this means that | |
227 // another thread succeeded in biasing it toward itself and we | |
228 // need to revoke that bias. The revocation will occur in the | |
229 // interpreter runtime in the slow case. | |
230 if (counters != NULL) { | |
231 cond_inc32(Assembler::zero, | |
232 ExternalAddress((address)counters->anonymously_biased_lock_entry_count_addr())); | |
233 } | |
234 if (slow_case != NULL) { | |
235 jcc(Assembler::notZero, *slow_case); | |
236 } | |
237 jmp(done); | |
238 | |
239 bind(try_rebias); | |
240 // At this point we know the epoch has expired, meaning that the | |
241 // current "bias owner", if any, is actually invalid. Under these | |
242 // circumstances _only_, we are allowed to use the current header's | |
243 // value as the comparison value when doing the cas to acquire the | |
244 // bias in the current epoch. In other words, we allow transfer of | |
245 // the bias from one thread to another directly in this situation. | |
246 // | |
247 // FIXME: due to a lack of registers we currently blow away the age | |
248 // bits in this situation. Should attempt to preserve them. | |
249 if (need_tmp_reg) { | |
250 push(tmp_reg); | |
251 } | |
252 get_thread(tmp_reg); | |
253 movl(swap_reg, klass_addr); | |
254 orl(tmp_reg, Address(swap_reg, Klass::prototype_header_offset())); | |
255 movl(swap_reg, saved_mark_addr); | |
256 if (os::is_MP()) { | |
257 lock(); | |
258 } | |
259 cmpxchgptr(tmp_reg, Address(obj_reg, 0)); | |
260 if (need_tmp_reg) { | |
261 pop(tmp_reg); | |
262 } | |
263 // If the biasing toward our thread failed, then another thread | |
264 // succeeded in biasing it toward itself and we need to revoke that | |
265 // bias. The revocation will occur in the runtime in the slow case. | |
266 if (counters != NULL) { | |
267 cond_inc32(Assembler::zero, | |
268 ExternalAddress((address)counters->rebiased_lock_entry_count_addr())); | |
269 } | |
270 if (slow_case != NULL) { | |
271 jcc(Assembler::notZero, *slow_case); | |
272 } | |
273 jmp(done); | |
274 | |
275 bind(try_revoke_bias); | |
276 // The prototype mark in the klass doesn't have the bias bit set any | |
277 // more, indicating that objects of this data type are not supposed | |
278 // to be biased any more. We are going to try to reset the mark of | |
279 // this object to the prototype value and fall through to the | |
280 // CAS-based locking scheme. Note that if our CAS fails, it means | |
281 // that another thread raced us for the privilege of revoking the | |
282 // bias of this particular object, so it's okay to continue in the | |
283 // normal locking code. | |
284 // | |
285 // FIXME: due to a lack of registers we currently blow away the age | |
286 // bits in this situation. Should attempt to preserve them. | |
287 movl(swap_reg, saved_mark_addr); | |
288 if (need_tmp_reg) { | |
289 push(tmp_reg); | |
290 } | |
291 movl(tmp_reg, klass_addr); | |
292 movl(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset())); | |
293 if (os::is_MP()) { | |
294 lock(); | |
295 } | |
296 cmpxchgptr(tmp_reg, Address(obj_reg, 0)); | |
297 if (need_tmp_reg) { | |
298 pop(tmp_reg); | |
299 } | |
300 // Fall through to the normal CAS-based lock, because no matter what | |
301 // the result of the above CAS, some thread must have succeeded in | |
302 // removing the bias bit from the object's header. | |
303 if (counters != NULL) { | |
304 cond_inc32(Assembler::zero, | |
305 ExternalAddress((address)counters->revoked_lock_entry_count_addr())); | |
306 } | |
307 | |
308 bind(cas_label); | |
309 | |
310 return null_check_offset; | |
311 } | |
7199 | 312 void MacroAssembler::call_VM_leaf_base(address entry_point, |
313 int number_of_arguments) { | |
314 call(RuntimeAddress(entry_point)); | |
315 increment(rsp, number_of_arguments * wordSize); | |
316 } | |
317 | |
318 void MacroAssembler::cmpklass(Address src1, Metadata* obj) { | |
319 cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate()); | |
320 } | |
321 | |
322 void MacroAssembler::cmpklass(Register src1, Metadata* obj) { | |
323 cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate()); | |
324 } | |
325 | |
326 void MacroAssembler::cmpoop(Address src1, jobject obj) { | |
327 cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate()); | |
328 } | |
329 | |
330 void MacroAssembler::cmpoop(Register src1, jobject obj) { | |
331 cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate()); | |
332 } | |
333 | |
334 void MacroAssembler::extend_sign(Register hi, Register lo) { | |
335 // According to Intel Doc. AP-526, "Integer Divide", p.18. | |
336 if (VM_Version::is_P6() && hi == rdx && lo == rax) { | |
337 cdql(); | |
338 } else { | |
339 movl(hi, lo); | |
340 sarl(hi, 31); | |
341 } | |
342 } | |
343 | |
344 void MacroAssembler::jC2(Register tmp, Label& L) { | |
345 // set parity bit if FPU flag C2 is set (via rax) | |
346 save_rax(tmp); | |
347 fwait(); fnstsw_ax(); | |
348 sahf(); | |
349 restore_rax(tmp); | |
350 // branch | |
351 jcc(Assembler::parity, L); | |
352 } | |
353 | |
354 void MacroAssembler::jnC2(Register tmp, Label& L) { | |
355 // set parity bit if FPU flag C2 is set (via rax) | |
356 save_rax(tmp); | |
357 fwait(); fnstsw_ax(); | |
358 sahf(); | |
359 restore_rax(tmp); | |
360 // branch | |
361 jcc(Assembler::noParity, L); | |
362 } | |
363 | |
364 // 32bit can do a case table jump in one instruction but we no longer allow the base | |
365 // to be installed in the Address class | |
366 void MacroAssembler::jump(ArrayAddress entry) { | |
367 jmp(as_Address(entry)); | |
368 } | |
369 | |
370 // Note: y_lo will be destroyed | |
371 void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) { | |
372 // Long compare for Java (semantics as described in JVM spec.) | |
373 Label high, low, done; | |
374 | |
375 cmpl(x_hi, y_hi); | |
376 jcc(Assembler::less, low); | |
377 jcc(Assembler::greater, high); | |
378 // x_hi is the return register | |
379 xorl(x_hi, x_hi); | |
380 cmpl(x_lo, y_lo); | |
381 jcc(Assembler::below, low); | |
382 jcc(Assembler::equal, done); | |
383 | |
384 bind(high); | |
385 xorl(x_hi, x_hi); | |
386 increment(x_hi); | |
387 jmp(done); | |
388 | |
389 bind(low); | |
390 xorl(x_hi, x_hi); | |
391 decrementl(x_hi); | |
392 | |
393 bind(done); | |
394 } | |
395 | |
396 void MacroAssembler::lea(Register dst, AddressLiteral src) { | |
397 mov_literal32(dst, (int32_t)src.target(), src.rspec()); | |
398 } | |
399 | |
400 void MacroAssembler::lea(Address dst, AddressLiteral adr) { | |
401 // leal(dst, as_Address(adr)); | |
402 // see note in movl as to why we must use a move | |
403 mov_literal32(dst, (int32_t) adr.target(), adr.rspec()); | |
404 } | |
405 | |
406 void MacroAssembler::leave() { | |
407 mov(rsp, rbp); | |
408 pop(rbp); | |
409 } | |
410 | |
411 void MacroAssembler::lmul(int x_rsp_offset, int y_rsp_offset) { | |
412 // Multiplication of two Java long values stored on the stack | |
413 // as illustrated below. Result is in rdx:rax. | |
414 // | |
415 // rsp ---> [ ?? ] \ \ | |
416 // .... | y_rsp_offset | | |
417 // [ y_lo ] / (in bytes) | x_rsp_offset | |
418 // [ y_hi ] | (in bytes) | |
419 // .... | | |
420 // [ x_lo ] / | |
421 // [ x_hi ] | |
422 // .... | |
423 // | |
424 // Basic idea: lo(result) = lo(x_lo * y_lo) | |
425 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) | |
426 Address x_hi(rsp, x_rsp_offset + wordSize); Address x_lo(rsp, x_rsp_offset); | |
427 Address y_hi(rsp, y_rsp_offset + wordSize); Address y_lo(rsp, y_rsp_offset); | |
428 Label quick; | |
429 // load x_hi, y_hi and check if quick | |
430 // multiplication is possible | |
431 movl(rbx, x_hi); | |
432 movl(rcx, y_hi); | |
433 movl(rax, rbx); | |
434 orl(rbx, rcx); // rbx, = 0 <=> x_hi = 0 and y_hi = 0 | |
435 jcc(Assembler::zero, quick); // if rbx, = 0 do quick multiply | |
436 // do full multiplication | |
437 // 1st step | |
438 mull(y_lo); // x_hi * y_lo | |
439 movl(rbx, rax); // save lo(x_hi * y_lo) in rbx, | |
440 // 2nd step | |
441 movl(rax, x_lo); | |
442 mull(rcx); // x_lo * y_hi | |
443 addl(rbx, rax); // add lo(x_lo * y_hi) to rbx, | |
444 // 3rd step | |
445 bind(quick); // note: rbx, = 0 if quick multiply! | |
446 movl(rax, x_lo); | |
447 mull(y_lo); // x_lo * y_lo | |
448 addl(rdx, rbx); // correct hi(x_lo * y_lo) | |
449 } | |
450 | |
451 void MacroAssembler::lneg(Register hi, Register lo) { | |
452 negl(lo); | |
453 adcl(hi, 0); | |
454 negl(hi); | |
455 } | |
456 | |
457 void MacroAssembler::lshl(Register hi, Register lo) { | |
458 // Java shift left long support (semantics as described in JVM spec., p.305) | |
459 // (basic idea for shift counts s >= n: x << s == (x << n) << (s - n)) | |
460 // shift value is in rcx ! | |
461 assert(hi != rcx, "must not use rcx"); | |
462 assert(lo != rcx, "must not use rcx"); | |
463 const Register s = rcx; // shift count | |
464 const int n = BitsPerWord; | |
465 Label L; | |
466 andl(s, 0x3f); // s := s & 0x3f (s < 0x40) | |
467 cmpl(s, n); // if (s < n) | |
468 jcc(Assembler::less, L); // else (s >= n) | |
469 movl(hi, lo); // x := x << n | |
470 xorl(lo, lo); | |
471 // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n! | |
472 bind(L); // s (mod n) < n | |
473 shldl(hi, lo); // x := x << s | |
474 shll(lo); | |
475 } | |
476 | |
477 | |
478 void MacroAssembler::lshr(Register hi, Register lo, bool sign_extension) { | |
479 // Java shift right long support (semantics as described in JVM spec., p.306 & p.310) | |
480 // (basic idea for shift counts s >= n: x >> s == (x >> n) >> (s - n)) | |
481 assert(hi != rcx, "must not use rcx"); | |
482 assert(lo != rcx, "must not use rcx"); | |
483 const Register s = rcx; // shift count | |
484 const int n = BitsPerWord; | |
485 Label L; | |
486 andl(s, 0x3f); // s := s & 0x3f (s < 0x40) | |
487 cmpl(s, n); // if (s < n) | |
488 jcc(Assembler::less, L); // else (s >= n) | |
489 movl(lo, hi); // x := x >> n | |
490 if (sign_extension) sarl(hi, 31); | |
491 else xorl(hi, hi); | |
492 // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n! | |
493 bind(L); // s (mod n) < n | |
494 shrdl(lo, hi); // x := x >> s | |
495 if (sign_extension) sarl(hi); | |
496 else shrl(hi); | |
497 } | |
498 | |
499 void MacroAssembler::movoop(Register dst, jobject obj) { | |
500 mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate()); | |
501 } | |
502 | |
503 void MacroAssembler::movoop(Address dst, jobject obj) { | |
504 mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate()); | |
505 } | |
506 | |
507 void MacroAssembler::mov_metadata(Register dst, Metadata* obj) { | |
508 mov_literal32(dst, (int32_t)obj, metadata_Relocation::spec_for_immediate()); | |
509 } | |
510 | |
511 void MacroAssembler::mov_metadata(Address dst, Metadata* obj) { | |
512 mov_literal32(dst, (int32_t)obj, metadata_Relocation::spec_for_immediate()); | |
513 } | |
514 | |
515 void MacroAssembler::movptr(Register dst, AddressLiteral src) { | |
516 if (src.is_lval()) { | |
517 mov_literal32(dst, (intptr_t)src.target(), src.rspec()); | |
518 } else { | |
519 movl(dst, as_Address(src)); | |
520 } | |
521 } | |
522 | |
523 void MacroAssembler::movptr(ArrayAddress dst, Register src) { | |
524 movl(as_Address(dst), src); | |
525 } | |
526 | |
527 void MacroAssembler::movptr(Register dst, ArrayAddress src) { | |
528 movl(dst, as_Address(src)); | |
529 } | |
530 | |
531 // src should NEVER be a real pointer. Use AddressLiteral for true pointers | |
532 void MacroAssembler::movptr(Address dst, intptr_t src) { | |
533 movl(dst, src); | |
534 } | |
535 | |
536 | |
537 void MacroAssembler::pop_callee_saved_registers() { | |
538 pop(rcx); | |
539 pop(rdx); | |
540 pop(rdi); | |
541 pop(rsi); | |
542 } | |
543 | |
544 void MacroAssembler::pop_fTOS() { | |
545 fld_d(Address(rsp, 0)); | |
546 addl(rsp, 2 * wordSize); | |
547 } | |
548 | |
549 void MacroAssembler::push_callee_saved_registers() { | |
550 push(rsi); | |
551 push(rdi); | |
552 push(rdx); | |
553 push(rcx); | |
554 } | |
555 | |
556 void MacroAssembler::push_fTOS() { | |
557 subl(rsp, 2 * wordSize); | |
558 fstp_d(Address(rsp, 0)); | |
559 } | |
560 | |
561 | |
562 void MacroAssembler::pushoop(jobject obj) { | |
563 push_literal32((int32_t)obj, oop_Relocation::spec_for_immediate()); | |
564 } | |
565 | |
566 void MacroAssembler::pushklass(Metadata* obj) { | |
567 push_literal32((int32_t)obj, metadata_Relocation::spec_for_immediate()); | |
568 } | |
569 | |
570 void MacroAssembler::pushptr(AddressLiteral src) { | |
571 if (src.is_lval()) { | |
572 push_literal32((int32_t)src.target(), src.rspec()); | |
573 } else { | |
574 pushl(as_Address(src)); | |
575 } | |
576 } | |
577 | |
578 void MacroAssembler::set_word_if_not_zero(Register dst) { | |
579 xorl(dst, dst); | |
580 set_byte_if_not_zero(dst); | |
581 } | |
582 | |
583 static void pass_arg0(MacroAssembler* masm, Register arg) { | |
584 masm->push(arg); | |
585 } | |
586 | |
587 static void pass_arg1(MacroAssembler* masm, Register arg) { | |
588 masm->push(arg); | |
589 } | |
590 | |
591 static void pass_arg2(MacroAssembler* masm, Register arg) { | |
592 masm->push(arg); | |
593 } | |
594 | |
595 static void pass_arg3(MacroAssembler* masm, Register arg) { | |
596 masm->push(arg); | |
597 } | |
598 | |
599 #ifndef PRODUCT | |
600 extern "C" void findpc(intptr_t x); | |
601 #endif | |
602 | |
603 void MacroAssembler::debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg) { | |
604 // In order to get locks to work, we need to fake a in_VM state | |
605 JavaThread* thread = JavaThread::current(); | |
606 JavaThreadState saved_state = thread->thread_state(); | |
607 thread->set_thread_state(_thread_in_vm); | |
608 if (ShowMessageBoxOnError) { | |
609 JavaThread* thread = JavaThread::current(); | |
610 JavaThreadState saved_state = thread->thread_state(); | |
611 thread->set_thread_state(_thread_in_vm); | |
612 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { | |
613 ttyLocker ttyl; | |
614 BytecodeCounter::print(); | |
615 } | |
616 // To see where a verify_oop failed, get $ebx+40/X for this frame. | |
617 // This is the value of eip which points to where verify_oop will return. | |
618 if (os::message_box(msg, "Execution stopped, print registers?")) { | |
619 print_state32(rdi, rsi, rbp, rsp, rbx, rdx, rcx, rax, eip); | |
620 BREAKPOINT; | |
621 } | |
622 } else { | |
623 ttyLocker ttyl; | |
624 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg); | |
625 } | |
626 // Don't assert holding the ttyLock | |
627 assert(false, err_msg("DEBUG MESSAGE: %s", msg)); | |
628 ThreadStateTransition::transition(thread, _thread_in_vm, saved_state); | |
629 } | |
630 | |
631 void MacroAssembler::print_state32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip) { | |
632 ttyLocker ttyl; | |
633 FlagSetting fs(Debugging, true); | |
634 tty->print_cr("eip = 0x%08x", eip); | |
635 #ifndef PRODUCT | |
636 if ((WizardMode || Verbose) && PrintMiscellaneous) { | |
637 tty->cr(); | |
638 findpc(eip); | |
639 tty->cr(); | |
640 } | |
641 #endif | |
642 #define PRINT_REG(rax) \ | |
643 { tty->print("%s = ", #rax); os::print_location(tty, rax); } | |
644 PRINT_REG(rax); | |
645 PRINT_REG(rbx); | |
646 PRINT_REG(rcx); | |
647 PRINT_REG(rdx); | |
648 PRINT_REG(rdi); | |
649 PRINT_REG(rsi); | |
650 PRINT_REG(rbp); | |
651 PRINT_REG(rsp); | |
652 #undef PRINT_REG | |
653 // Print some words near top of staack. | |
654 int* dump_sp = (int*) rsp; | |
655 for (int col1 = 0; col1 < 8; col1++) { | |
656 tty->print("(rsp+0x%03x) 0x%08x: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (intptr_t)dump_sp); | |
657 os::print_location(tty, *dump_sp++); | |
658 } | |
659 for (int row = 0; row < 16; row++) { | |
660 tty->print("(rsp+0x%03x) 0x%08x: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (intptr_t)dump_sp); | |
661 for (int col = 0; col < 8; col++) { | |
662 tty->print(" 0x%08x", *dump_sp++); | |
663 } | |
664 tty->cr(); | |
665 } | |
666 // Print some instructions around pc: | |
667 Disassembler::decode((address)eip-64, (address)eip); | |
668 tty->print_cr("--------"); | |
669 Disassembler::decode((address)eip, (address)eip+32); | |
670 } | |
671 | |
672 void MacroAssembler::stop(const char* msg) { | |
673 ExternalAddress message((address)msg); | |
674 // push address of message | |
675 pushptr(message.addr()); | |
676 { Label L; call(L, relocInfo::none); bind(L); } // push eip | |
677 pusha(); // push registers | |
678 call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug32))); | |
679 hlt(); | |
680 } | |
681 | |
682 void MacroAssembler::warn(const char* msg) { | |
683 push_CPU_state(); | |
684 | |
685 ExternalAddress message((address) msg); | |
686 // push address of message | |
687 pushptr(message.addr()); | |
688 | |
689 call(RuntimeAddress(CAST_FROM_FN_PTR(address, warning))); | |
690 addl(rsp, wordSize); // discard argument | |
691 pop_CPU_state(); | |
692 } | |
693 | |
694 void MacroAssembler::print_state() { | |
695 { Label L; call(L, relocInfo::none); bind(L); } // push eip | |
696 pusha(); // push registers | |
697 | |
698 push_CPU_state(); | |
699 call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::print_state32))); | |
700 pop_CPU_state(); | |
701 | |
702 popa(); | |
703 addl(rsp, wordSize); | |
704 } | |
705 | |
706 #else // _LP64 | |
707 | |
708 // 64 bit versions | |
709 | |
710 Address MacroAssembler::as_Address(AddressLiteral adr) { | |
711 // amd64 always does this as a pc-rel | |
712 // we can be absolute or disp based on the instruction type | |
713 // jmp/call are displacements others are absolute | |
714 assert(!adr.is_lval(), "must be rval"); | |
715 assert(reachable(adr), "must be"); | |
716 return Address((int32_t)(intptr_t)(adr.target() - pc()), adr.target(), adr.reloc()); | |
717 | |
718 } | |
719 | |
720 Address MacroAssembler::as_Address(ArrayAddress adr) { | |
721 AddressLiteral base = adr.base(); | |
722 lea(rscratch1, base); | |
723 Address index = adr.index(); | |
724 assert(index._disp == 0, "must not have disp"); // maybe it can? | |
725 Address array(rscratch1, index._index, index._scale, index._disp); | |
726 return array; | |
727 } | |
728 | |
14909 | 729 int MacroAssembler::biased_locking_enter(Register lock_reg, |
730 Register obj_reg, | |
731 Register swap_reg, | |
732 Register tmp_reg, | |
733 bool swap_reg_contains_mark, | |
734 Label& done, | |
735 Label* slow_case, | |
736 BiasedLockingCounters* counters) { | |
737 assert(UseBiasedLocking, "why call this otherwise?"); | |
738 assert(swap_reg == rax, "swap_reg must be rax for cmpxchgq"); | |
739 assert(tmp_reg != noreg, "tmp_reg must be supplied"); | |
740 assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg); | |
741 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); | |
742 Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); | |
743 Address saved_mark_addr(lock_reg, 0); | |
744 | |
745 if (PrintBiasedLockingStatistics && counters == NULL) | |
746 counters = BiasedLocking::counters(); | |
747 | |
748 // Biased locking | |
749 // See whether the lock is currently biased toward our thread and | |
750 // whether the epoch is still valid | |
751 // Note that the runtime guarantees sufficient alignment of JavaThread | |
752 // pointers to allow age to be placed into low bits | |
753 // First check to see whether biasing is even enabled for this object | |
754 Label cas_label; | |
755 int null_check_offset = -1; | |
756 if (!swap_reg_contains_mark) { | |
757 null_check_offset = offset(); | |
758 movq(swap_reg, mark_addr); | |
759 } | |
760 movq(tmp_reg, swap_reg); | |
761 andq(tmp_reg, markOopDesc::biased_lock_mask_in_place); | |
762 cmpq(tmp_reg, markOopDesc::biased_lock_pattern); | |
763 jcc(Assembler::notEqual, cas_label); | |
764 // The bias pattern is present in the object's header. Need to check | |
765 // whether the bias owner and the epoch are both still current. | |
766 load_prototype_header(tmp_reg, obj_reg); | |
767 orq(tmp_reg, r15_thread); | |
768 xorq(tmp_reg, swap_reg); | |
769 andq(tmp_reg, ~((int) markOopDesc::age_mask_in_place)); | |
770 if (counters != NULL) { | |
771 cond_inc32(Assembler::zero, | |
772 ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr())); | |
773 } | |
774 jcc(Assembler::equal, done); | |
775 | |
776 Label try_revoke_bias; | |
777 Label try_rebias; | |
778 | |
779 // At this point we know that the header has the bias pattern and | |
780 // that we are not the bias owner in the current epoch. We need to | |
781 // figure out more details about the state of the header in order to | |
782 // know what operations can be legally performed on the object's | |
783 // header. | |
784 | |
785 // If the low three bits in the xor result aren't clear, that means | |
786 // the prototype header is no longer biased and we have to revoke | |
787 // the bias on this object. | |
788 testq(tmp_reg, markOopDesc::biased_lock_mask_in_place); | |
789 jcc(Assembler::notZero, try_revoke_bias); | |
790 | |
791 // Biasing is still enabled for this data type. See whether the | |
792 // epoch of the current bias is still valid, meaning that the epoch | |
793 // bits of the mark word are equal to the epoch bits of the | |
794 // prototype header. (Note that the prototype header's epoch bits | |
795 // only change at a safepoint.) If not, attempt to rebias the object | |
796 // toward the current thread. Note that we must be absolutely sure | |
797 // that the current epoch is invalid in order to do this because | |
798 // otherwise the manipulations it performs on the mark word are | |
799 // illegal. | |
800 testq(tmp_reg, markOopDesc::epoch_mask_in_place); | |
801 jcc(Assembler::notZero, try_rebias); | |
802 | |
803 // The epoch of the current bias is still valid but we know nothing | |
804 // about the owner; it might be set or it might be clear. Try to | |
805 // acquire the bias of the object using an atomic operation. If this | |
806 // fails we will go in to the runtime to revoke the object's bias. | |
807 // Note that we first construct the presumed unbiased header so we | |
808 // don't accidentally blow away another thread's valid bias. | |
809 andq(swap_reg, | |
810 markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); | |
811 movq(tmp_reg, swap_reg); | |
812 orq(tmp_reg, r15_thread); | |
813 if (os::is_MP()) { | |
814 lock(); | |
815 } | |
816 cmpxchgq(tmp_reg, Address(obj_reg, 0)); | |
817 // If the biasing toward our thread failed, this means that | |
818 // another thread succeeded in biasing it toward itself and we | |
819 // need to revoke that bias. The revocation will occur in the | |
820 // interpreter runtime in the slow case. | |
821 if (counters != NULL) { | |
822 cond_inc32(Assembler::zero, | |
823 ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr())); | |
824 } | |
825 if (slow_case != NULL) { | |
826 jcc(Assembler::notZero, *slow_case); | |
827 } | |
828 jmp(done); | |
829 | |
830 bind(try_rebias); | |
831 // At this point we know the epoch has expired, meaning that the | |
832 // current "bias owner", if any, is actually invalid. Under these | |
833 // circumstances _only_, we are allowed to use the current header's | |
834 // value as the comparison value when doing the cas to acquire the | |
835 // bias in the current epoch. In other words, we allow transfer of | |
836 // the bias from one thread to another directly in this situation. | |
837 // | |
838 // FIXME: due to a lack of registers we currently blow away the age | |
839 // bits in this situation. Should attempt to preserve them. | |
840 load_prototype_header(tmp_reg, obj_reg); | |
841 orq(tmp_reg, r15_thread); | |
842 if (os::is_MP()) { | |
843 lock(); | |
844 } | |
845 cmpxchgq(tmp_reg, Address(obj_reg, 0)); | |
846 // If the biasing toward our thread failed, then another thread | |
847 // succeeded in biasing it toward itself and we need to revoke that | |
848 // bias. The revocation will occur in the runtime in the slow case. | |
849 if (counters != NULL) { | |
850 cond_inc32(Assembler::zero, | |
851 ExternalAddress((address) counters->rebiased_lock_entry_count_addr())); | |
852 } | |
853 if (slow_case != NULL) { | |
854 jcc(Assembler::notZero, *slow_case); | |
855 } | |
856 jmp(done); | |
857 | |
858 bind(try_revoke_bias); | |
859 // The prototype mark in the klass doesn't have the bias bit set any | |
860 // more, indicating that objects of this data type are not supposed | |
861 // to be biased any more. We are going to try to reset the mark of | |
862 // this object to the prototype value and fall through to the | |
863 // CAS-based locking scheme. Note that if our CAS fails, it means | |
864 // that another thread raced us for the privilege of revoking the | |
865 // bias of this particular object, so it's okay to continue in the | |
866 // normal locking code. | |
867 // | |
868 // FIXME: due to a lack of registers we currently blow away the age | |
869 // bits in this situation. Should attempt to preserve them. | |
870 load_prototype_header(tmp_reg, obj_reg); | |
871 if (os::is_MP()) { | |
872 lock(); | |
873 } | |
874 cmpxchgq(tmp_reg, Address(obj_reg, 0)); | |
875 // Fall through to the normal CAS-based lock, because no matter what | |
876 // the result of the above CAS, some thread must have succeeded in | |
877 // removing the bias bit from the object's header. | |
878 if (counters != NULL) { | |
879 cond_inc32(Assembler::zero, | |
880 ExternalAddress((address) counters->revoked_lock_entry_count_addr())); | |
881 } | |
882 | |
883 bind(cas_label); | |
884 | |
885 return null_check_offset; | |
886 } | |
887 | |
7199 | 888 void MacroAssembler::call_VM_leaf_base(address entry_point, int num_args) { |
889 Label L, E; | |
890 | |
891 #ifdef _WIN64 | |
892 // Windows always allocates space for it's register args | |
893 assert(num_args <= 4, "only register arguments supported"); | |
894 subq(rsp, frame::arg_reg_save_area_bytes); | |
895 #endif | |
896 | |
897 // Align stack if necessary | |
898 testl(rsp, 15); | |
899 jcc(Assembler::zero, L); | |
900 | |
901 subq(rsp, 8); | |
902 { | |
903 call(RuntimeAddress(entry_point)); | |
904 } | |
905 addq(rsp, 8); | |
906 jmp(E); | |
907 | |
908 bind(L); | |
909 { | |
910 call(RuntimeAddress(entry_point)); | |
911 } | |
912 | |
913 bind(E); | |
914 | |
915 #ifdef _WIN64 | |
916 // restore stack pointer | |
917 addq(rsp, frame::arg_reg_save_area_bytes); | |
918 #endif | |
919 | |
920 } | |
921 | |
922 void MacroAssembler::cmp64(Register src1, AddressLiteral src2) { | |
923 assert(!src2.is_lval(), "should use cmpptr"); | |
924 | |
925 if (reachable(src2)) { | |
926 cmpq(src1, as_Address(src2)); | |
927 } else { | |
928 lea(rscratch1, src2); | |
929 Assembler::cmpq(src1, Address(rscratch1, 0)); | |
930 } | |
931 } | |
932 | |
933 int MacroAssembler::corrected_idivq(Register reg) { | |
934 // Full implementation of Java ldiv and lrem; checks for special | |
935 // case as described in JVM spec., p.243 & p.271. The function | |
936 // returns the (pc) offset of the idivl instruction - may be needed | |
937 // for implicit exceptions. | |
938 // | |
939 // normal case special case | |
940 // | |
941 // input : rax: dividend min_long | |
942 // reg: divisor (may not be eax/edx) -1 | |
943 // | |
944 // output: rax: quotient (= rax idiv reg) min_long | |
945 // rdx: remainder (= rax irem reg) 0 | |
946 assert(reg != rax && reg != rdx, "reg cannot be rax or rdx register"); | |
947 static const int64_t min_long = 0x8000000000000000; | |
948 Label normal_case, special_case; | |
949 | |
950 // check for special case | |
951 cmp64(rax, ExternalAddress((address) &min_long)); | |
952 jcc(Assembler::notEqual, normal_case); | |
953 xorl(rdx, rdx); // prepare rdx for possible special case (where | |
954 // remainder = 0) | |
955 cmpq(reg, -1); | |
956 jcc(Assembler::equal, special_case); | |
957 | |
958 // handle normal case | |
959 bind(normal_case); | |
960 cdqq(); | |
961 int idivq_offset = offset(); | |
962 idivq(reg); | |
963 | |
964 // normal and special case exit | |
965 bind(special_case); | |
966 | |
967 return idivq_offset; | |
968 } | |
969 | |
970 void MacroAssembler::decrementq(Register reg, int value) { | |
971 if (value == min_jint) { subq(reg, value); return; } | |
972 if (value < 0) { incrementq(reg, -value); return; } | |
973 if (value == 0) { ; return; } | |
974 if (value == 1 && UseIncDec) { decq(reg) ; return; } | |
975 /* else */ { subq(reg, value) ; return; } | |
976 } | |
977 | |
978 void MacroAssembler::decrementq(Address dst, int value) { | |
979 if (value == min_jint) { subq(dst, value); return; } | |
980 if (value < 0) { incrementq(dst, -value); return; } | |
981 if (value == 0) { ; return; } | |
982 if (value == 1 && UseIncDec) { decq(dst) ; return; } | |
983 /* else */ { subq(dst, value) ; return; } | |
984 } | |
985 | |
986 void MacroAssembler::incrementq(Register reg, int value) { | |
987 if (value == min_jint) { addq(reg, value); return; } | |
988 if (value < 0) { decrementq(reg, -value); return; } | |
989 if (value == 0) { ; return; } | |
990 if (value == 1 && UseIncDec) { incq(reg) ; return; } | |
991 /* else */ { addq(reg, value) ; return; } | |
992 } | |
993 | |
994 void MacroAssembler::incrementq(Address dst, int value) { | |
995 if (value == min_jint) { addq(dst, value); return; } | |
996 if (value < 0) { decrementq(dst, -value); return; } | |
997 if (value == 0) { ; return; } | |
998 if (value == 1 && UseIncDec) { incq(dst) ; return; } | |
999 /* else */ { addq(dst, value) ; return; } | |
1000 } | |
1001 | |
1002 // 32bit can do a case table jump in one instruction but we no longer allow the base | |
1003 // to be installed in the Address class | |
1004 void MacroAssembler::jump(ArrayAddress entry) { | |
1005 lea(rscratch1, entry.base()); | |
1006 Address dispatch = entry.index(); | |
1007 assert(dispatch._base == noreg, "must be"); | |
1008 dispatch._base = rscratch1; | |
1009 jmp(dispatch); | |
1010 } | |
1011 | |
1012 void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) { | |
1013 ShouldNotReachHere(); // 64bit doesn't use two regs | |
1014 cmpq(x_lo, y_lo); | |
1015 } | |
1016 | |
1017 void MacroAssembler::lea(Register dst, AddressLiteral src) { | |
1018 mov_literal64(dst, (intptr_t)src.target(), src.rspec()); | |
1019 } | |
1020 | |
1021 void MacroAssembler::lea(Address dst, AddressLiteral adr) { | |
1022 mov_literal64(rscratch1, (intptr_t)adr.target(), adr.rspec()); | |
1023 movptr(dst, rscratch1); | |
1024 } | |
1025 | |
1026 void MacroAssembler::leave() { | |
1027 // %%% is this really better? Why not on 32bit too? | |
7430
d02120b7a34f
8004250: replace AbstractAssembler a_byte/a_long with emit_int8/emit_int32
twisti
parents:
7427
diff
changeset
|
1028 emit_int8((unsigned char)0xC9); // LEAVE |
7199 | 1029 } |
1030 | |
1031 void MacroAssembler::lneg(Register hi, Register lo) { | |
1032 ShouldNotReachHere(); // 64bit doesn't use two regs | |
1033 negq(lo); | |
1034 } | |
1035 | |
1036 void MacroAssembler::movoop(Register dst, jobject obj) { | |
1037 mov_literal64(dst, (intptr_t)obj, oop_Relocation::spec_for_immediate()); | |
1038 } | |
1039 | |
1040 void MacroAssembler::movoop(Address dst, jobject obj) { | |
1041 mov_literal64(rscratch1, (intptr_t)obj, oop_Relocation::spec_for_immediate()); | |
1042 movq(dst, rscratch1); | |
1043 } | |
1044 | |
1045 void MacroAssembler::mov_metadata(Register dst, Metadata* obj) { | |
1046 mov_literal64(dst, (intptr_t)obj, metadata_Relocation::spec_for_immediate()); | |
1047 } | |
1048 | |
1049 void MacroAssembler::mov_metadata(Address dst, Metadata* obj) { | |
1050 mov_literal64(rscratch1, (intptr_t)obj, metadata_Relocation::spec_for_immediate()); | |
1051 movq(dst, rscratch1); | |
1052 } | |
1053 | |
1054 void MacroAssembler::movptr(Register dst, AddressLiteral src) { | |
1055 if (src.is_lval()) { | |
1056 mov_literal64(dst, (intptr_t)src.target(), src.rspec()); | |
1057 } else { | |
1058 if (reachable(src)) { | |
1059 movq(dst, as_Address(src)); | |
1060 } else { | |
1061 lea(rscratch1, src); | |
1062 movq(dst, Address(rscratch1,0)); | |
1063 } | |
1064 } | |
1065 } | |
1066 | |
1067 void MacroAssembler::movptr(ArrayAddress dst, Register src) { | |
1068 movq(as_Address(dst), src); | |
1069 } | |
1070 | |
1071 void MacroAssembler::movptr(Register dst, ArrayAddress src) { | |
1072 movq(dst, as_Address(src)); | |
1073 } | |
1074 | |
1075 // src should NEVER be a real pointer. Use AddressLiteral for true pointers | |
1076 void MacroAssembler::movptr(Address dst, intptr_t src) { | |
1077 mov64(rscratch1, src); | |
1078 movq(dst, rscratch1); | |
1079 } | |
1080 | |
1081 // These are mostly for initializing NULL | |
1082 void MacroAssembler::movptr(Address dst, int32_t src) { | |
1083 movslq(dst, src); | |
1084 } | |
1085 | |
1086 void MacroAssembler::movptr(Register dst, int32_t src) { | |
1087 mov64(dst, (intptr_t)src); | |
1088 } | |
1089 | |
1090 void MacroAssembler::pushoop(jobject obj) { | |
1091 movoop(rscratch1, obj); | |
1092 push(rscratch1); | |
1093 } | |
1094 | |
1095 void MacroAssembler::pushklass(Metadata* obj) { | |
1096 mov_metadata(rscratch1, obj); | |
1097 push(rscratch1); | |
1098 } | |
1099 | |
1100 void MacroAssembler::pushptr(AddressLiteral src) { | |
1101 lea(rscratch1, src); | |
1102 if (src.is_lval()) { | |
1103 push(rscratch1); | |
1104 } else { | |
1105 pushq(Address(rscratch1, 0)); | |
1106 } | |
1107 } | |
1108 | |
1109 void MacroAssembler::reset_last_Java_frame(bool clear_fp, | |
1110 bool clear_pc) { | |
1111 // we must set sp to zero to clear frame | |
1112 movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), NULL_WORD); | |
1113 // must clear fp, so that compiled frames are not confused; it is | |
1114 // possible that we need it only for debugging | |
1115 if (clear_fp) { | |
1116 movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), NULL_WORD); | |
1117 } | |
1118 | |
1119 if (clear_pc) { | |
1120 movptr(Address(r15_thread, JavaThread::last_Java_pc_offset()), NULL_WORD); | |
1121 } | |
1122 } | |
1123 | |
1124 void MacroAssembler::set_last_Java_frame(Register last_java_sp, | |
1125 Register last_java_fp, | |
1126 address last_java_pc) { | |
1127 // determine last_java_sp register | |
1128 if (!last_java_sp->is_valid()) { | |
1129 last_java_sp = rsp; | |
1130 } | |
1131 | |
1132 // last_java_fp is optional | |
1133 if (last_java_fp->is_valid()) { | |
1134 movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), | |
1135 last_java_fp); | |
1136 } | |
1137 | |
1138 // last_java_pc is optional | |
1139 if (last_java_pc != NULL) { | |
1140 Address java_pc(r15_thread, | |
1141 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()); | |
1142 lea(rscratch1, InternalAddress(last_java_pc)); | |
1143 movptr(java_pc, rscratch1); | |
1144 } | |
1145 | |
1146 movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), last_java_sp); | |
1147 } | |
1148 | |
1149 static void pass_arg0(MacroAssembler* masm, Register arg) { | |
1150 if (c_rarg0 != arg ) { | |
1151 masm->mov(c_rarg0, arg); | |
1152 } | |
1153 } | |
1154 | |
1155 static void pass_arg1(MacroAssembler* masm, Register arg) { | |
1156 if (c_rarg1 != arg ) { | |
1157 masm->mov(c_rarg1, arg); | |
1158 } | |
1159 } | |
1160 | |
1161 static void pass_arg2(MacroAssembler* masm, Register arg) { | |
1162 if (c_rarg2 != arg ) { | |
1163 masm->mov(c_rarg2, arg); | |
1164 } | |
1165 } | |
1166 | |
1167 static void pass_arg3(MacroAssembler* masm, Register arg) { | |
1168 if (c_rarg3 != arg ) { | |
1169 masm->mov(c_rarg3, arg); | |
1170 } | |
1171 } | |
1172 | |
1173 void MacroAssembler::stop(const char* msg) { | |
1174 address rip = pc(); | |
1175 pusha(); // get regs on stack | |
1176 lea(c_rarg0, ExternalAddress((address) msg)); | |
1177 lea(c_rarg1, InternalAddress(rip)); | |
1178 movq(c_rarg2, rsp); // pass pointer to regs array | |
1179 andq(rsp, -16); // align stack as required by ABI | |
1180 call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug64))); | |
1181 hlt(); | |
1182 } | |
1183 | |
1184 void MacroAssembler::warn(const char* msg) { | |
1185 push(rbp); | |
1186 movq(rbp, rsp); | |
1187 andq(rsp, -16); // align stack as required by push_CPU_state and call | |
1188 push_CPU_state(); // keeps alignment at 16 bytes | |
1189 lea(c_rarg0, ExternalAddress((address) msg)); | |
1190 call_VM_leaf(CAST_FROM_FN_PTR(address, warning), c_rarg0); | |
1191 pop_CPU_state(); | |
1192 mov(rsp, rbp); | |
1193 pop(rbp); | |
1194 } | |
1195 | |
1196 void MacroAssembler::print_state() { | |
1197 address rip = pc(); | |
1198 pusha(); // get regs on stack | |
1199 push(rbp); | |
1200 movq(rbp, rsp); | |
1201 andq(rsp, -16); // align stack as required by push_CPU_state and call | |
1202 push_CPU_state(); // keeps alignment at 16 bytes | |
1203 | |
1204 lea(c_rarg0, InternalAddress(rip)); | |
1205 lea(c_rarg1, Address(rbp, wordSize)); // pass pointer to regs array | |
1206 call_VM_leaf(CAST_FROM_FN_PTR(address, MacroAssembler::print_state64), c_rarg0, c_rarg1); | |
1207 | |
1208 pop_CPU_state(); | |
1209 mov(rsp, rbp); | |
1210 pop(rbp); | |
1211 popa(); | |
1212 } | |
1213 | |
1214 #ifndef PRODUCT | |
1215 extern "C" void findpc(intptr_t x); | |
1216 #endif | |
1217 | |
1218 void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[]) { | |
1219 // In order to get locks to work, we need to fake a in_VM state | |
1220 if (ShowMessageBoxOnError) { | |
1221 JavaThread* thread = JavaThread::current(); | |
1222 JavaThreadState saved_state = thread->thread_state(); | |
1223 thread->set_thread_state(_thread_in_vm); | |
1224 #ifndef PRODUCT | |
1225 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { | |
1226 ttyLocker ttyl; | |
1227 BytecodeCounter::print(); | |
1228 } | |
1229 #endif | |
1230 // To see where a verify_oop failed, get $ebx+40/X for this frame. | |
1231 // XXX correct this offset for amd64 | |
1232 // This is the value of eip which points to where verify_oop will return. | |
1233 if (os::message_box(msg, "Execution stopped, print registers?")) { | |
1234 print_state64(pc, regs); | |
1235 BREAKPOINT; | |
1236 assert(false, "start up GDB"); | |
1237 } | |
1238 ThreadStateTransition::transition(thread, _thread_in_vm, saved_state); | |
1239 } else { | |
1240 ttyLocker ttyl; | |
1241 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", | |
1242 msg); | |
1243 assert(false, err_msg("DEBUG MESSAGE: %s", msg)); | |
1244 } | |
1245 } | |
1246 | |
1247 void MacroAssembler::print_state64(int64_t pc, int64_t regs[]) { | |
1248 ttyLocker ttyl; | |
1249 FlagSetting fs(Debugging, true); | |
1250 tty->print_cr("rip = 0x%016lx", pc); | |
1251 #ifndef PRODUCT | |
1252 tty->cr(); | |
1253 findpc(pc); | |
1254 tty->cr(); | |
1255 #endif | |
1256 #define PRINT_REG(rax, value) \ | |
1257 { tty->print("%s = ", #rax); os::print_location(tty, value); } | |
1258 PRINT_REG(rax, regs[15]); | |
1259 PRINT_REG(rbx, regs[12]); | |
1260 PRINT_REG(rcx, regs[14]); | |
1261 PRINT_REG(rdx, regs[13]); | |
1262 PRINT_REG(rdi, regs[8]); | |
1263 PRINT_REG(rsi, regs[9]); | |
1264 PRINT_REG(rbp, regs[10]); | |
1265 PRINT_REG(rsp, regs[11]); | |
1266 PRINT_REG(r8 , regs[7]); | |
1267 PRINT_REG(r9 , regs[6]); | |
1268 PRINT_REG(r10, regs[5]); | |
1269 PRINT_REG(r11, regs[4]); | |
1270 PRINT_REG(r12, regs[3]); | |
1271 PRINT_REG(r13, regs[2]); | |
1272 PRINT_REG(r14, regs[1]); | |
1273 PRINT_REG(r15, regs[0]); | |
1274 #undef PRINT_REG | |
1275 // Print some words near top of staack. | |
1276 int64_t* rsp = (int64_t*) regs[11]; | |
1277 int64_t* dump_sp = rsp; | |
1278 for (int col1 = 0; col1 < 8; col1++) { | |
1279 tty->print("(rsp+0x%03x) 0x%016lx: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (int64_t)dump_sp); | |
1280 os::print_location(tty, *dump_sp++); | |
1281 } | |
1282 for (int row = 0; row < 25; row++) { | |
1283 tty->print("(rsp+0x%03x) 0x%016lx: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (int64_t)dump_sp); | |
1284 for (int col = 0; col < 4; col++) { | |
1285 tty->print(" 0x%016lx", *dump_sp++); | |
1286 } | |
1287 tty->cr(); | |
1288 } | |
1289 // Print some instructions around pc: | |
1290 Disassembler::decode((address)pc-64, (address)pc); | |
1291 tty->print_cr("--------"); | |
1292 Disassembler::decode((address)pc, (address)pc+32); | |
1293 } | |
1294 | |
1295 #endif // _LP64 | |
1296 | |
1297 // Now versions that are common to 32/64 bit | |
1298 | |
1299 void MacroAssembler::addptr(Register dst, int32_t imm32) { | |
1300 LP64_ONLY(addq(dst, imm32)) NOT_LP64(addl(dst, imm32)); | |
1301 } | |
1302 | |
1303 void MacroAssembler::addptr(Register dst, Register src) { | |
1304 LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)); | |
1305 } | |
1306 | |
1307 void MacroAssembler::addptr(Address dst, Register src) { | |
1308 LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)); | |
1309 } | |
1310 | |
1311 void MacroAssembler::addsd(XMMRegister dst, AddressLiteral src) { | |
1312 if (reachable(src)) { | |
1313 Assembler::addsd(dst, as_Address(src)); | |
1314 } else { | |
1315 lea(rscratch1, src); | |
1316 Assembler::addsd(dst, Address(rscratch1, 0)); | |
1317 } | |
1318 } | |
1319 | |
1320 void MacroAssembler::addss(XMMRegister dst, AddressLiteral src) { | |
1321 if (reachable(src)) { | |
1322 addss(dst, as_Address(src)); | |
1323 } else { | |
1324 lea(rscratch1, src); | |
1325 addss(dst, Address(rscratch1, 0)); | |
1326 } | |
1327 } | |
1328 | |
1329 void MacroAssembler::align(int modulus) { | |
1330 if (offset() % modulus != 0) { | |
1331 nop(modulus - (offset() % modulus)); | |
1332 } | |
1333 } | |
1334 | |
1335 void MacroAssembler::andpd(XMMRegister dst, AddressLiteral src) { | |
1336 // Used in sign-masking with aligned address. | |
1337 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); | |
1338 if (reachable(src)) { | |
1339 Assembler::andpd(dst, as_Address(src)); | |
1340 } else { | |
1341 lea(rscratch1, src); | |
1342 Assembler::andpd(dst, Address(rscratch1, 0)); | |
1343 } | |
1344 } | |
1345 | |
1346 void MacroAssembler::andps(XMMRegister dst, AddressLiteral src) { | |
1347 // Used in sign-masking with aligned address. | |
1348 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); | |
1349 if (reachable(src)) { | |
1350 Assembler::andps(dst, as_Address(src)); | |
1351 } else { | |
1352 lea(rscratch1, src); | |
1353 Assembler::andps(dst, Address(rscratch1, 0)); | |
1354 } | |
1355 } | |
1356 | |
1357 void MacroAssembler::andptr(Register dst, int32_t imm32) { | |
1358 LP64_ONLY(andq(dst, imm32)) NOT_LP64(andl(dst, imm32)); | |
1359 } | |
1360 | |
1361 void MacroAssembler::atomic_incl(AddressLiteral counter_addr) { | |
1362 pushf(); | |
14909 | 1363 if (os::is_MP()) |
1364 lock(); | |
1365 incrementl(counter_addr); | |
7199 | 1366 popf(); |
1367 } | |
1368 | |
1369 // Writes to stack successive pages until offset reached to check for | |
1370 // stack overflow + shadow pages. This clobbers tmp. | |
1371 void MacroAssembler::bang_stack_size(Register size, Register tmp) { | |
1372 movptr(tmp, rsp); | |
1373 // Bang stack for total size given plus shadow page size. | |
1374 // Bang one page at a time because large size can bang beyond yellow and | |
1375 // red zones. | |
1376 Label loop; | |
1377 bind(loop); | |
1378 movl(Address(tmp, (-os::vm_page_size())), size ); | |
1379 subptr(tmp, os::vm_page_size()); | |
1380 subl(size, os::vm_page_size()); | |
1381 jcc(Assembler::greater, loop); | |
1382 | |
1383 // Bang down shadow pages too. | |
13047
be525e91f65b
8026775: nsk/jvmti/RedefineClasses/StressRedefine crashes due to EXCEPTION_ACCESS_VIOLATION
mikael
parents:
13000
diff
changeset
|
1384 // At this point, (tmp-0) is the last address touched, so don't |
be525e91f65b
8026775: nsk/jvmti/RedefineClasses/StressRedefine crashes due to EXCEPTION_ACCESS_VIOLATION
mikael
parents:
13000
diff
changeset
|
1385 // touch it again. (It was touched as (tmp-pagesize) but then tmp |
be525e91f65b
8026775: nsk/jvmti/RedefineClasses/StressRedefine crashes due to EXCEPTION_ACCESS_VIOLATION
mikael
parents:
13000
diff
changeset
|
1386 // was post-decremented.) Skip this address by starting at i=1, and |
be525e91f65b
8026775: nsk/jvmti/RedefineClasses/StressRedefine crashes due to EXCEPTION_ACCESS_VIOLATION
mikael
parents:
13000
diff
changeset
|
1387 // touch a few more pages below. N.B. It is important to touch all |
be525e91f65b
8026775: nsk/jvmti/RedefineClasses/StressRedefine crashes due to EXCEPTION_ACCESS_VIOLATION
mikael
parents:
13000
diff
changeset
|
1388 // the way down to and including i=StackShadowPages. |
be525e91f65b
8026775: nsk/jvmti/RedefineClasses/StressRedefine crashes due to EXCEPTION_ACCESS_VIOLATION
mikael
parents:
13000
diff
changeset
|
1389 for (int i = 1; i <= StackShadowPages; i++) { |
7199 | 1390 // this could be any sized move but this is can be a debugging crumb |
1391 // so the bigger the better. | |
1392 movptr(Address(tmp, (-i*os::vm_page_size())), size ); | |
1393 } | |
1394 } | |
1395 | |
1396 void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) { | |
1397 assert(UseBiasedLocking, "why call this otherwise?"); | |
1398 | |
1399 // Check for biased locking unlock case, which is a no-op | |
1400 // Note: we do not have to check the thread ID for two reasons. | |
1401 // First, the interpreter checks for IllegalMonitorStateException at | |
1402 // a higher level. Second, if the bias was revoked while we held the | |
1403 // lock, the object could not be rebiased toward another thread, so | |
1404 // the bias bit would be clear. | |
1405 movptr(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); | |
1406 andptr(temp_reg, markOopDesc::biased_lock_mask_in_place); | |
1407 cmpptr(temp_reg, markOopDesc::biased_lock_pattern); | |
1408 jcc(Assembler::equal, done); | |
1409 } | |
1410 | |
1411 void MacroAssembler::c2bool(Register x) { | |
1412 // implements x == 0 ? 0 : 1 | |
1413 // note: must only look at least-significant byte of x | |
1414 // since C-style booleans are stored in one byte | |
1415 // only! (was bug) | |
1416 andl(x, 0xFF); | |
1417 setb(Assembler::notZero, x); | |
1418 } | |
1419 | |
1420 // Wouldn't need if AddressLiteral version had new name | |
1421 void MacroAssembler::call(Label& L, relocInfo::relocType rtype) { | |
1422 Assembler::call(L, rtype); | |
1423 } | |
1424 | |
1425 void MacroAssembler::call(Register entry) { | |
1426 Assembler::call(entry); | |
1427 } | |
1428 | |
1429 void MacroAssembler::call(AddressLiteral entry) { | |
1430 if (reachable(entry)) { | |
1431 Assembler::call_literal(entry.target(), entry.rspec()); | |
1432 } else { | |
1433 lea(rscratch1, entry); | |
1434 Assembler::call(rscratch1); | |
1435 } | |
1436 } | |
1437 | |
1438 void MacroAssembler::ic_call(address entry) { | |
1439 RelocationHolder rh = virtual_call_Relocation::spec(pc()); | |
1440 movptr(rax, (intptr_t)Universe::non_oop_word()); | |
1441 call(AddressLiteral(entry, rh)); | |
1442 } | |
1443 | |
1444 // Implementation of call_VM versions | |
1445 | |
1446 void MacroAssembler::call_VM(Register oop_result, | |
1447 address entry_point, | |
1448 bool check_exceptions) { | |
1449 Label C, E; | |
1450 call(C, relocInfo::none); | |
1451 jmp(E); | |
1452 | |
1453 bind(C); | |
1454 call_VM_helper(oop_result, entry_point, 0, check_exceptions); | |
1455 ret(0); | |
1456 | |
1457 bind(E); | |
1458 } | |
1459 | |
1460 void MacroAssembler::call_VM(Register oop_result, | |
1461 address entry_point, | |
1462 Register arg_1, | |
1463 bool check_exceptions) { | |
1464 Label C, E; | |
1465 call(C, relocInfo::none); | |
1466 jmp(E); | |
1467 | |
1468 bind(C); | |
1469 pass_arg1(this, arg_1); | |
1470 call_VM_helper(oop_result, entry_point, 1, check_exceptions); | |
1471 ret(0); | |
1472 | |
1473 bind(E); | |
1474 } | |
1475 | |
1476 void MacroAssembler::call_VM(Register oop_result, | |
1477 address entry_point, | |
1478 Register arg_1, | |
1479 Register arg_2, | |
1480 bool check_exceptions) { | |
1481 Label C, E; | |
1482 call(C, relocInfo::none); | |
1483 jmp(E); | |
1484 | |
1485 bind(C); | |
1486 | |
1487 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); | |
1488 | |
1489 pass_arg2(this, arg_2); | |
1490 pass_arg1(this, arg_1); | |
1491 call_VM_helper(oop_result, entry_point, 2, check_exceptions); | |
1492 ret(0); | |
1493 | |
1494 bind(E); | |
1495 } | |
1496 | |
1497 void MacroAssembler::call_VM(Register oop_result, | |
1498 address entry_point, | |
1499 Register arg_1, | |
1500 Register arg_2, | |
1501 Register arg_3, | |
1502 bool check_exceptions) { | |
1503 Label C, E; | |
1504 call(C, relocInfo::none); | |
1505 jmp(E); | |
1506 | |
1507 bind(C); | |
1508 | |
1509 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); | |
1510 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); | |
1511 pass_arg3(this, arg_3); | |
1512 | |
1513 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); | |
1514 pass_arg2(this, arg_2); | |
1515 | |
1516 pass_arg1(this, arg_1); | |
1517 call_VM_helper(oop_result, entry_point, 3, check_exceptions); | |
1518 ret(0); | |
1519 | |
1520 bind(E); | |
1521 } | |
1522 | |
1523 void MacroAssembler::call_VM(Register oop_result, | |
1524 Register last_java_sp, | |
1525 address entry_point, | |
1526 int number_of_arguments, | |
1527 bool check_exceptions) { | |
1528 Register thread = LP64_ONLY(r15_thread) NOT_LP64(noreg); | |
1529 call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions); | |
1530 } | |
1531 | |
1532 void MacroAssembler::call_VM(Register oop_result, | |
1533 Register last_java_sp, | |
1534 address entry_point, | |
1535 Register arg_1, | |
1536 bool check_exceptions) { | |
1537 pass_arg1(this, arg_1); | |
1538 call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); | |
1539 } | |
1540 | |
1541 void MacroAssembler::call_VM(Register oop_result, | |
1542 Register last_java_sp, | |
1543 address entry_point, | |
1544 Register arg_1, | |
1545 Register arg_2, | |
1546 bool check_exceptions) { | |
1547 | |
1548 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); | |
1549 pass_arg2(this, arg_2); | |
1550 pass_arg1(this, arg_1); | |
1551 call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); | |
1552 } | |
1553 | |
1554 void MacroAssembler::call_VM(Register oop_result, | |
1555 Register last_java_sp, | |
1556 address entry_point, | |
1557 Register arg_1, | |
1558 Register arg_2, | |
1559 Register arg_3, | |
1560 bool check_exceptions) { | |
1561 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); | |
1562 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); | |
1563 pass_arg3(this, arg_3); | |
1564 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); | |
1565 pass_arg2(this, arg_2); | |
1566 pass_arg1(this, arg_1); | |
1567 call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); | |
1568 } | |
1569 | |
1570 void MacroAssembler::super_call_VM(Register oop_result, | |
1571 Register last_java_sp, | |
1572 address entry_point, | |
1573 int number_of_arguments, | |
1574 bool check_exceptions) { | |
1575 Register thread = LP64_ONLY(r15_thread) NOT_LP64(noreg); | |
1576 MacroAssembler::call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions); | |
1577 } | |
1578 | |
1579 void MacroAssembler::super_call_VM(Register oop_result, | |
1580 Register last_java_sp, | |
1581 address entry_point, | |
1582 Register arg_1, | |
1583 bool check_exceptions) { | |
1584 pass_arg1(this, arg_1); | |
1585 super_call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); | |
1586 } | |
1587 | |
1588 void MacroAssembler::super_call_VM(Register oop_result, | |
1589 Register last_java_sp, | |
1590 address entry_point, | |
1591 Register arg_1, | |
1592 Register arg_2, | |
1593 bool check_exceptions) { | |
1594 | |
1595 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); | |
1596 pass_arg2(this, arg_2); | |
1597 pass_arg1(this, arg_1); | |
1598 super_call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); | |
1599 } | |
1600 | |
1601 void MacroAssembler::super_call_VM(Register oop_result, | |
1602 Register last_java_sp, | |
1603 address entry_point, | |
1604 Register arg_1, | |
1605 Register arg_2, | |
1606 Register arg_3, | |
1607 bool check_exceptions) { | |
1608 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); | |
1609 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); | |
1610 pass_arg3(this, arg_3); | |
1611 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); | |
1612 pass_arg2(this, arg_2); | |
1613 pass_arg1(this, arg_1); | |
1614 super_call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); | |
1615 } | |
1616 | |
1617 void MacroAssembler::call_VM_base(Register oop_result, | |
1618 Register java_thread, | |
1619 Register last_java_sp, | |
1620 address entry_point, | |
1621 int number_of_arguments, | |
1622 bool check_exceptions) { | |
1623 // determine java_thread register | |
1624 if (!java_thread->is_valid()) { | |
1625 #ifdef _LP64 | |
1626 java_thread = r15_thread; | |
1627 #else | |
1628 java_thread = rdi; | |
1629 get_thread(java_thread); | |
1630 #endif // LP64 | |
1631 } | |
1632 // determine last_java_sp register | |
1633 if (!last_java_sp->is_valid()) { | |
1634 last_java_sp = rsp; | |
1635 } | |
1636 // debugging support | |
1637 assert(number_of_arguments >= 0 , "cannot have negative number of arguments"); | |
1638 LP64_ONLY(assert(java_thread == r15_thread, "unexpected register")); | |
1639 #ifdef ASSERT | |
1640 // TraceBytecodes does not use r12 but saves it over the call, so don't verify | |
1641 // r12 is the heapbase. | |
12226
7944aba7ba41
8015107: NPG: Use consistent naming for metaspace concepts
ehelin
parents:
12056
diff
changeset
|
1642 LP64_ONLY(if ((UseCompressedOops || UseCompressedClassPointers) && !TraceBytecodes) verify_heapbase("call_VM_base: heap base corrupted?");) |
7199 | 1643 #endif // ASSERT |
1644 | |
1645 assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result"); | |
1646 assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp"); | |
1647 | |
1648 // push java thread (becomes first argument of C function) | |
1649 | |
1650 NOT_LP64(push(java_thread); number_of_arguments++); | |
1651 LP64_ONLY(mov(c_rarg0, r15_thread)); | |
1652 | |
1653 // set last Java frame before call | |
1654 assert(last_java_sp != rbp, "can't use ebp/rbp"); | |
1655 | |
1656 // Only interpreter should have to set fp | |
1657 set_last_Java_frame(java_thread, last_java_sp, rbp, NULL); | |
1658 | |
1659 // do the call, remove parameters | |
1660 MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments); | |
1661 | |
1662 // restore the thread (cannot use the pushed argument since arguments | |
1663 // may be overwritten by C code generated by an optimizing compiler); | |
1664 // however can use the register value directly if it is callee saved. | |
1665 if (LP64_ONLY(true ||) java_thread == rdi || java_thread == rsi) { | |
1666 // rdi & rsi (also r15) are callee saved -> nothing to do | |
1667 #ifdef ASSERT | |
1668 guarantee(java_thread != rax, "change this code"); | |
1669 push(rax); | |
1670 { Label L; | |
1671 get_thread(rax); | |
1672 cmpptr(java_thread, rax); | |
1673 jcc(Assembler::equal, L); | |
1674 STOP("MacroAssembler::call_VM_base: rdi not callee saved?"); | |
1675 bind(L); | |
1676 } | |
1677 pop(rax); | |
1678 #endif | |
1679 } else { | |
1680 get_thread(java_thread); | |
1681 } | |
1682 // reset last Java frame | |
1683 // Only interpreter should have to clear fp | |
1684 reset_last_Java_frame(java_thread, true, false); | |
1685 | |
1686 #ifndef CC_INTERP | |
1687 // C++ interp handles this in the interpreter | |
1688 check_and_handle_popframe(java_thread); | |
1689 check_and_handle_earlyret(java_thread); | |
1690 #endif /* CC_INTERP */ | |
1691 | |
1692 if (check_exceptions) { | |
1693 // check for pending exceptions (java_thread is set upon return) | |
1694 cmpptr(Address(java_thread, Thread::pending_exception_offset()), (int32_t) NULL_WORD); | |
1695 #ifndef _LP64 | |
1696 jump_cc(Assembler::notEqual, | |
1697 RuntimeAddress(StubRoutines::forward_exception_entry())); | |
1698 #else | |
1699 // This used to conditionally jump to forward_exception however it is | |
1700 // possible if we relocate that the branch will not reach. So we must jump | |
1701 // around so we can always reach | |
1702 | |
1703 Label ok; | |
1704 jcc(Assembler::equal, ok); | |
1705 jump(RuntimeAddress(StubRoutines::forward_exception_entry())); | |
1706 bind(ok); | |
1707 #endif // LP64 | |
1708 } | |
1709 | |
1710 // get oop result if there is one and reset the value in the thread | |
1711 if (oop_result->is_valid()) { | |
1712 get_vm_result(oop_result, java_thread); | |
1713 } | |
1714 } | |
1715 | |
1716 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { | |
1717 | |
1718 // Calculate the value for last_Java_sp | |
1719 // somewhat subtle. call_VM does an intermediate call | |
1720 // which places a return address on the stack just under the | |
1721 // stack pointer as the user finsihed with it. This allows | |
1722 // use to retrieve last_Java_pc from last_Java_sp[-1]. | |
1723 // On 32bit we then have to push additional args on the stack to accomplish | |
1724 // the actual requested call. On 64bit call_VM only can use register args | |
1725 // so the only extra space is the return address that call_VM created. | |
1726 // This hopefully explains the calculations here. | |
1727 | |
1728 #ifdef _LP64 | |
1729 // We've pushed one address, correct last_Java_sp | |
1730 lea(rax, Address(rsp, wordSize)); | |
1731 #else | |
1732 lea(rax, Address(rsp, (1 + number_of_arguments) * wordSize)); | |
1733 #endif // LP64 | |
1734 | |
1735 call_VM_base(oop_result, noreg, rax, entry_point, number_of_arguments, check_exceptions); | |
1736 | |
1737 } | |
1738 | |
1739 void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) { | |
1740 call_VM_leaf_base(entry_point, number_of_arguments); | |
1741 } | |
1742 | |
1743 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) { | |
1744 pass_arg0(this, arg_0); | |
1745 call_VM_leaf(entry_point, 1); | |
1746 } | |
1747 | |
1748 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { | |
1749 | |
1750 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); | |
1751 pass_arg1(this, arg_1); | |
1752 pass_arg0(this, arg_0); | |
1753 call_VM_leaf(entry_point, 2); | |
1754 } | |
1755 | |
1756 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { | |
1757 LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg")); | |
1758 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); | |
1759 pass_arg2(this, arg_2); | |
1760 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); | |
1761 pass_arg1(this, arg_1); | |
1762 pass_arg0(this, arg_0); | |
1763 call_VM_leaf(entry_point, 3); | |
1764 } | |
1765 | |
1766 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) { | |
1767 pass_arg0(this, arg_0); | |
1768 MacroAssembler::call_VM_leaf_base(entry_point, 1); | |
1769 } | |
1770 | |
1771 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { | |
1772 | |
1773 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); | |
1774 pass_arg1(this, arg_1); | |
1775 pass_arg0(this, arg_0); | |
1776 MacroAssembler::call_VM_leaf_base(entry_point, 2); | |
1777 } | |
1778 | |
1779 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { | |
1780 LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg")); | |
1781 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); | |
1782 pass_arg2(this, arg_2); | |
1783 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); | |
1784 pass_arg1(this, arg_1); | |
1785 pass_arg0(this, arg_0); | |
1786 MacroAssembler::call_VM_leaf_base(entry_point, 3); | |
1787 } | |
1788 | |
1789 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) { | |
1790 LP64_ONLY(assert(arg_0 != c_rarg3, "smashed arg")); | |
1791 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); | |
1792 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); | |
1793 pass_arg3(this, arg_3); | |
1794 LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg")); | |
1795 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); | |
1796 pass_arg2(this, arg_2); | |
1797 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); | |
1798 pass_arg1(this, arg_1); | |
1799 pass_arg0(this, arg_0); | |
1800 MacroAssembler::call_VM_leaf_base(entry_point, 4); | |
1801 } | |
1802 | |
1803 void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) { | |
1804 movptr(oop_result, Address(java_thread, JavaThread::vm_result_offset())); | |
1805 movptr(Address(java_thread, JavaThread::vm_result_offset()), NULL_WORD); | |
1806 verify_oop(oop_result, "broken oop in call_VM_base"); | |
1807 } | |
1808 | |
1809 void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) { | |
1810 movptr(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset())); | |
1811 movptr(Address(java_thread, JavaThread::vm_result_2_offset()), NULL_WORD); | |
1812 } | |
1813 | |
1814 void MacroAssembler::check_and_handle_earlyret(Register java_thread) { | |
1815 } | |
1816 | |
1817 void MacroAssembler::check_and_handle_popframe(Register java_thread) { | |
1818 } | |
1819 | |
1820 void MacroAssembler::cmp32(AddressLiteral src1, int32_t imm) { | |
1821 if (reachable(src1)) { | |
1822 cmpl(as_Address(src1), imm); | |
1823 } else { | |
1824 lea(rscratch1, src1); | |
1825 cmpl(Address(rscratch1, 0), imm); | |
1826 } | |
1827 } | |
1828 | |
1829 void MacroAssembler::cmp32(Register src1, AddressLiteral src2) { | |
1830 assert(!src2.is_lval(), "use cmpptr"); | |
1831 if (reachable(src2)) { | |
1832 cmpl(src1, as_Address(src2)); | |
1833 } else { | |
1834 lea(rscratch1, src2); | |
1835 cmpl(src1, Address(rscratch1, 0)); | |
1836 } | |
1837 } | |
1838 | |
1839 void MacroAssembler::cmp32(Register src1, int32_t imm) { | |
1840 Assembler::cmpl(src1, imm); | |
1841 } | |
1842 | |
1843 void MacroAssembler::cmp32(Register src1, Address src2) { | |
1844 Assembler::cmpl(src1, src2); | |
1845 } | |
1846 | |
1847 void MacroAssembler::cmpsd2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) { | |
1848 ucomisd(opr1, opr2); | |
1849 | |
1850 Label L; | |
1851 if (unordered_is_less) { | |
1852 movl(dst, -1); | |
1853 jcc(Assembler::parity, L); | |
1854 jcc(Assembler::below , L); | |
1855 movl(dst, 0); | |
1856 jcc(Assembler::equal , L); | |
1857 increment(dst); | |
1858 } else { // unordered is greater | |
1859 movl(dst, 1); | |
1860 jcc(Assembler::parity, L); | |
1861 jcc(Assembler::above , L); | |
1862 movl(dst, 0); | |
1863 jcc(Assembler::equal , L); | |
1864 decrementl(dst); | |
1865 } | |
1866 bind(L); | |
1867 } | |
1868 | |
1869 void MacroAssembler::cmpss2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) { | |
1870 ucomiss(opr1, opr2); | |
1871 | |
1872 Label L; | |
1873 if (unordered_is_less) { | |
1874 movl(dst, -1); | |
1875 jcc(Assembler::parity, L); | |
1876 jcc(Assembler::below , L); | |
1877 movl(dst, 0); | |
1878 jcc(Assembler::equal , L); | |
1879 increment(dst); | |
1880 } else { // unordered is greater | |
1881 movl(dst, 1); | |
1882 jcc(Assembler::parity, L); | |
1883 jcc(Assembler::above , L); | |
1884 movl(dst, 0); | |
1885 jcc(Assembler::equal , L); | |
1886 decrementl(dst); | |
1887 } | |
1888 bind(L); | |
1889 } | |
1890 | |
1891 | |
1892 void MacroAssembler::cmp8(AddressLiteral src1, int imm) { | |
1893 if (reachable(src1)) { | |
1894 cmpb(as_Address(src1), imm); | |
1895 } else { | |
1896 lea(rscratch1, src1); | |
1897 cmpb(Address(rscratch1, 0), imm); | |
1898 } | |
1899 } | |
1900 | |
1901 void MacroAssembler::cmpptr(Register src1, AddressLiteral src2) { | |
1902 #ifdef _LP64 | |
1903 if (src2.is_lval()) { | |
1904 movptr(rscratch1, src2); | |
1905 Assembler::cmpq(src1, rscratch1); | |
1906 } else if (reachable(src2)) { | |
1907 cmpq(src1, as_Address(src2)); | |
1908 } else { | |
1909 lea(rscratch1, src2); | |
1910 Assembler::cmpq(src1, Address(rscratch1, 0)); | |
1911 } | |
1912 #else | |
1913 if (src2.is_lval()) { | |
1914 cmp_literal32(src1, (int32_t) src2.target(), src2.rspec()); | |
1915 } else { | |
1916 cmpl(src1, as_Address(src2)); | |
1917 } | |
1918 #endif // _LP64 | |
1919 } | |
1920 | |
1921 void MacroAssembler::cmpptr(Address src1, AddressLiteral src2) { | |
1922 assert(src2.is_lval(), "not a mem-mem compare"); | |
1923 #ifdef _LP64 | |
1924 // moves src2's literal address | |
1925 movptr(rscratch1, src2); | |
1926 Assembler::cmpq(src1, rscratch1); | |
1927 #else | |
1928 cmp_literal32(src1, (int32_t) src2.target(), src2.rspec()); | |
1929 #endif // _LP64 | |
1930 } | |
1931 | |
1932 void MacroAssembler::locked_cmpxchgptr(Register reg, AddressLiteral adr) { | |
1933 if (reachable(adr)) { | |
1934 if (os::is_MP()) | |
1935 lock(); | |
1936 cmpxchgptr(reg, as_Address(adr)); | |
1937 } else { | |
1938 lea(rscratch1, adr); | |
1939 if (os::is_MP()) | |
1940 lock(); | |
1941 cmpxchgptr(reg, Address(rscratch1, 0)); | |
1942 } | |
1943 } | |
1944 | |
1945 void MacroAssembler::cmpxchgptr(Register reg, Address adr) { | |
1946 LP64_ONLY(cmpxchgq(reg, adr)) NOT_LP64(cmpxchgl(reg, adr)); | |
1947 } | |
1948 | |
1949 void MacroAssembler::comisd(XMMRegister dst, AddressLiteral src) { | |
1950 if (reachable(src)) { | |
1951 Assembler::comisd(dst, as_Address(src)); | |
1952 } else { | |
1953 lea(rscratch1, src); | |
1954 Assembler::comisd(dst, Address(rscratch1, 0)); | |
1955 } | |
1956 } | |
1957 | |
1958 void MacroAssembler::comiss(XMMRegister dst, AddressLiteral src) { | |
1959 if (reachable(src)) { | |
1960 Assembler::comiss(dst, as_Address(src)); | |
1961 } else { | |
1962 lea(rscratch1, src); | |
1963 Assembler::comiss(dst, Address(rscratch1, 0)); | |
1964 } | |
1965 } | |
1966 | |
1967 | |
1968 void MacroAssembler::cond_inc32(Condition cond, AddressLiteral counter_addr) { | |
1969 Condition negated_cond = negate_condition(cond); | |
1970 Label L; | |
1971 jcc(negated_cond, L); | |
1972 atomic_incl(counter_addr); | |
1973 bind(L); | |
1974 } | |
1975 | |
1976 int MacroAssembler::corrected_idivl(Register reg) { | |
1977 // Full implementation of Java idiv and irem; checks for | |
1978 // special case as described in JVM spec., p.243 & p.271. | |
1979 // The function returns the (pc) offset of the idivl | |
1980 // instruction - may be needed for implicit exceptions. | |
1981 // | |
1982 // normal case special case | |
1983 // | |
1984 // input : rax,: dividend min_int | |
1985 // reg: divisor (may not be rax,/rdx) -1 | |
1986 // | |
1987 // output: rax,: quotient (= rax, idiv reg) min_int | |
1988 // rdx: remainder (= rax, irem reg) 0 | |
1989 assert(reg != rax && reg != rdx, "reg cannot be rax, or rdx register"); | |
1990 const int min_int = 0x80000000; | |
1991 Label normal_case, special_case; | |
1992 | |
1993 // check for special case | |
1994 cmpl(rax, min_int); | |
1995 jcc(Assembler::notEqual, normal_case); | |
1996 xorl(rdx, rdx); // prepare rdx for possible special case (where remainder = 0) | |
1997 cmpl(reg, -1); | |
1998 jcc(Assembler::equal, special_case); | |
1999 | |
2000 // handle normal case | |
2001 bind(normal_case); | |
2002 cdql(); | |
2003 int idivl_offset = offset(); | |
2004 idivl(reg); | |
2005 | |
2006 // normal and special case exit | |
2007 bind(special_case); | |
2008 | |
2009 return idivl_offset; | |
2010 } | |
2011 | |
2012 | |
2013 | |
2014 void MacroAssembler::decrementl(Register reg, int value) { | |
2015 if (value == min_jint) {subl(reg, value) ; return; } | |
2016 if (value < 0) { incrementl(reg, -value); return; } | |
2017 if (value == 0) { ; return; } | |
2018 if (value == 1 && UseIncDec) { decl(reg) ; return; } | |
2019 /* else */ { subl(reg, value) ; return; } | |
2020 } | |
2021 | |
2022 void MacroAssembler::decrementl(Address dst, int value) { | |
2023 if (value == min_jint) {subl(dst, value) ; return; } | |
2024 if (value < 0) { incrementl(dst, -value); return; } | |
2025 if (value == 0) { ; return; } | |
2026 if (value == 1 && UseIncDec) { decl(dst) ; return; } | |
2027 /* else */ { subl(dst, value) ; return; } | |
2028 } | |
2029 | |
2030 void MacroAssembler::division_with_shift (Register reg, int shift_value) { | |
2031 assert (shift_value > 0, "illegal shift value"); | |
2032 Label _is_positive; | |
2033 testl (reg, reg); | |
2034 jcc (Assembler::positive, _is_positive); | |
2035 int offset = (1 << shift_value) - 1 ; | |
2036 | |
2037 if (offset == 1) { | |
2038 incrementl(reg); | |
2039 } else { | |
2040 addl(reg, offset); | |
2041 } | |
2042 | |
2043 bind (_is_positive); | |
2044 sarl(reg, shift_value); | |
2045 } | |
2046 | |
2047 void MacroAssembler::divsd(XMMRegister dst, AddressLiteral src) { | |
2048 if (reachable(src)) { | |
2049 Assembler::divsd(dst, as_Address(src)); | |
2050 } else { | |
2051 lea(rscratch1, src); | |
2052 Assembler::divsd(dst, Address(rscratch1, 0)); | |
2053 } | |
2054 } | |
2055 | |
2056 void MacroAssembler::divss(XMMRegister dst, AddressLiteral src) { | |
2057 if (reachable(src)) { | |
2058 Assembler::divss(dst, as_Address(src)); | |
2059 } else { | |
2060 lea(rscratch1, src); | |
2061 Assembler::divss(dst, Address(rscratch1, 0)); | |
2062 } | |
2063 } | |
2064 | |
2065 // !defined(COMPILER2) is because of stupid core builds | |
8265
ded5288f5b96
fix windows build of the server and client vms
Andreas Woess <andreas.woess@jku.at>
parents:
8042
diff
changeset
|
2066 #if !defined(_LP64) || defined(COMPILER1) || !defined(COMPILER2) || defined(GRAAL) |
7199 | 2067 void MacroAssembler::empty_FPU_stack() { |
2068 if (VM_Version::supports_mmx()) { | |
2069 emms(); | |
2070 } else { | |
2071 for (int i = 8; i-- > 0; ) ffree(i); | |
2072 } | |
2073 } | |
2074 #endif // !LP64 || C1 || !C2 | |
2075 | |
2076 | |
2077 // Defines obj, preserves var_size_in_bytes | |
2078 void MacroAssembler::eden_allocate(Register obj, | |
2079 Register var_size_in_bytes, | |
2080 int con_size_in_bytes, | |
2081 Register t1, | |
2082 Label& slow_case) { | |
2083 assert(obj == rax, "obj must be in rax, for cmpxchg"); | |
2084 assert_different_registers(obj, var_size_in_bytes, t1); | |
2085 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { | |
2086 jmp(slow_case); | |
2087 } else { | |
2088 Register end = t1; | |
2089 Label retry; | |
2090 bind(retry); | |
2091 ExternalAddress heap_top((address) Universe::heap()->top_addr()); | |
2092 movptr(obj, heap_top); | |
2093 if (var_size_in_bytes == noreg) { | |
2094 lea(end, Address(obj, con_size_in_bytes)); | |
2095 } else { | |
2096 lea(end, Address(obj, var_size_in_bytes, Address::times_1)); | |
2097 } | |
2098 // if end < obj then we wrapped around => object too long => slow case | |
2099 cmpptr(end, obj); | |
2100 jcc(Assembler::below, slow_case); | |
2101 cmpptr(end, ExternalAddress((address) Universe::heap()->end_addr())); | |
2102 jcc(Assembler::above, slow_case); | |
2103 // Compare obj with the top addr, and if still equal, store the new top addr in | |
2104 // end at the address of the top addr pointer. Sets ZF if was equal, and clears | |
2105 // it otherwise. Use lock prefix for atomicity on MPs. | |
2106 locked_cmpxchgptr(end, heap_top); | |
2107 jcc(Assembler::notEqual, retry); | |
2108 } | |
2109 } | |
2110 | |
2111 void MacroAssembler::enter() { | |
2112 push(rbp); | |
2113 mov(rbp, rsp); | |
2114 } | |
2115 | |
2116 // A 5 byte nop that is safe for patching (see patch_verified_entry) | |
2117 void MacroAssembler::fat_nop() { | |
2118 if (UseAddressNop) { | |
2119 addr_nop_5(); | |
2120 } else { | |
7430
d02120b7a34f
8004250: replace AbstractAssembler a_byte/a_long with emit_int8/emit_int32
twisti
parents:
7427
diff
changeset
|
2121 emit_int8(0x26); // es: |
d02120b7a34f
8004250: replace AbstractAssembler a_byte/a_long with emit_int8/emit_int32
twisti
parents:
7427
diff
changeset
|
2122 emit_int8(0x2e); // cs: |
d02120b7a34f
8004250: replace AbstractAssembler a_byte/a_long with emit_int8/emit_int32
twisti
parents:
7427
diff
changeset
|
2123 emit_int8(0x64); // fs: |
d02120b7a34f
8004250: replace AbstractAssembler a_byte/a_long with emit_int8/emit_int32
twisti
parents:
7427
diff
changeset
|
2124 emit_int8(0x65); // gs: |
d02120b7a34f
8004250: replace AbstractAssembler a_byte/a_long with emit_int8/emit_int32
twisti
parents:
7427
diff
changeset
|
2125 emit_int8((unsigned char)0x90); |
7199 | 2126 } |
2127 } | |
2128 | |
2129 void MacroAssembler::fcmp(Register tmp) { | |
2130 fcmp(tmp, 1, true, true); | |
2131 } | |
2132 | |
2133 void MacroAssembler::fcmp(Register tmp, int index, bool pop_left, bool pop_right) { | |
2134 assert(!pop_right || pop_left, "usage error"); | |
2135 if (VM_Version::supports_cmov()) { | |
2136 assert(tmp == noreg, "unneeded temp"); | |
2137 if (pop_left) { | |
2138 fucomip(index); | |
2139 } else { | |
2140 fucomi(index); | |
2141 } | |
2142 if (pop_right) { | |
2143 fpop(); | |
2144 } | |
2145 } else { | |
2146 assert(tmp != noreg, "need temp"); | |
2147 if (pop_left) { | |
2148 if (pop_right) { | |
2149 fcompp(); | |
2150 } else { | |
2151 fcomp(index); | |
2152 } | |
2153 } else { | |
2154 fcom(index); | |
2155 } | |
2156 // convert FPU condition into eflags condition via rax, | |
2157 save_rax(tmp); | |
2158 fwait(); fnstsw_ax(); | |
2159 sahf(); | |
2160 restore_rax(tmp); | |
2161 } | |
2162 // condition codes set as follows: | |
2163 // | |
2164 // CF (corresponds to C0) if x < y | |
2165 // PF (corresponds to C2) if unordered | |
2166 // ZF (corresponds to C3) if x = y | |
2167 } | |
2168 | |
2169 void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less) { | |
2170 fcmp2int(dst, unordered_is_less, 1, true, true); | |
2171 } | |
2172 | |
2173 void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less, int index, bool pop_left, bool pop_right) { | |
2174 fcmp(VM_Version::supports_cmov() ? noreg : dst, index, pop_left, pop_right); | |
2175 Label L; | |
2176 if (unordered_is_less) { | |
2177 movl(dst, -1); | |
2178 jcc(Assembler::parity, L); | |
2179 jcc(Assembler::below , L); | |
2180 movl(dst, 0); | |
2181 jcc(Assembler::equal , L); | |
2182 increment(dst); | |
2183 } else { // unordered is greater | |
2184 movl(dst, 1); | |
2185 jcc(Assembler::parity, L); | |
2186 jcc(Assembler::above , L); | |
2187 movl(dst, 0); | |
2188 jcc(Assembler::equal , L); | |
2189 decrementl(dst); | |
2190 } | |
2191 bind(L); | |
2192 } | |
2193 | |
2194 void MacroAssembler::fld_d(AddressLiteral src) { | |
2195 fld_d(as_Address(src)); | |
2196 } | |
2197 | |
2198 void MacroAssembler::fld_s(AddressLiteral src) { | |
2199 fld_s(as_Address(src)); | |
2200 } | |
2201 | |
2202 void MacroAssembler::fld_x(AddressLiteral src) { | |
2203 Assembler::fld_x(as_Address(src)); | |
2204 } | |
2205 | |
2206 void MacroAssembler::fldcw(AddressLiteral src) { | |
2207 Assembler::fldcw(as_Address(src)); | |
2208 } | |
2209 | |
2210 void MacroAssembler::pow_exp_core_encoding() { | |
2211 // kills rax, rcx, rdx | |
2212 subptr(rsp,sizeof(jdouble)); | |
2213 // computes 2^X. Stack: X ... | |
2214 // f2xm1 computes 2^X-1 but only operates on -1<=X<=1. Get int(X) and | |
2215 // keep it on the thread's stack to compute 2^int(X) later | |
2216 // then compute 2^(X-int(X)) as (2^(X-int(X)-1+1) | |
2217 // final result is obtained with: 2^X = 2^int(X) * 2^(X-int(X)) | |
2218 fld_s(0); // Stack: X X ... | |
2219 frndint(); // Stack: int(X) X ... | |
2220 fsuba(1); // Stack: int(X) X-int(X) ... | |
2221 fistp_s(Address(rsp,0)); // move int(X) as integer to thread's stack. Stack: X-int(X) ... | |
2222 f2xm1(); // Stack: 2^(X-int(X))-1 ... | |
2223 fld1(); // Stack: 1 2^(X-int(X))-1 ... | |
2224 faddp(1); // Stack: 2^(X-int(X)) | |
2225 // computes 2^(int(X)): add exponent bias (1023) to int(X), then | |
2226 // shift int(X)+1023 to exponent position. | |
2227 // Exponent is limited to 11 bits if int(X)+1023 does not fit in 11 | |
2228 // bits, set result to NaN. 0x000 and 0x7FF are reserved exponent | |
2229 // values so detect them and set result to NaN. | |
2230 movl(rax,Address(rsp,0)); | |
2231 movl(rcx, -2048); // 11 bit mask and valid NaN binary encoding | |
2232 addl(rax, 1023); | |
2233 movl(rdx,rax); | |
2234 shll(rax,20); | |
2235 // Check that 0 < int(X)+1023 < 2047. Otherwise set rax to NaN. | |
2236 addl(rdx,1); | |
2237 // Check that 1 < int(X)+1023+1 < 2048 | |
2238 // in 3 steps: | |
2239 // 1- (int(X)+1023+1)&-2048 == 0 => 0 <= int(X)+1023+1 < 2048 | |
2240 // 2- (int(X)+1023+1)&-2048 != 0 | |
2241 // 3- (int(X)+1023+1)&-2048 != 1 | |
2242 // Do 2- first because addl just updated the flags. | |
2243 cmov32(Assembler::equal,rax,rcx); | |
2244 cmpl(rdx,1); | |
2245 cmov32(Assembler::equal,rax,rcx); | |
2246 testl(rdx,rcx); | |
2247 cmov32(Assembler::notEqual,rax,rcx); | |
2248 movl(Address(rsp,4),rax); | |
2249 movl(Address(rsp,0),0); | |
2250 fmul_d(Address(rsp,0)); // Stack: 2^X ... | |
2251 addptr(rsp,sizeof(jdouble)); | |
2252 } | |
2253 | |
2254 void MacroAssembler::increase_precision() { | |
2255 subptr(rsp, BytesPerWord); | |
2256 fnstcw(Address(rsp, 0)); | |
2257 movl(rax, Address(rsp, 0)); | |
2258 orl(rax, 0x300); | |
2259 push(rax); | |
2260 fldcw(Address(rsp, 0)); | |
2261 pop(rax); | |
2262 } | |
2263 | |
2264 void MacroAssembler::restore_precision() { | |
2265 fldcw(Address(rsp, 0)); | |
2266 addptr(rsp, BytesPerWord); | |
2267 } | |
2268 | |
2269 void MacroAssembler::fast_pow() { | |
2270 // computes X^Y = 2^(Y * log2(X)) | |
2271 // if fast computation is not possible, result is NaN. Requires | |
2272 // fallback from user of this macro. | |
2273 // increase precision for intermediate steps of the computation | |
2274 increase_precision(); | |
2275 fyl2x(); // Stack: (Y*log2(X)) ... | |
2276 pow_exp_core_encoding(); // Stack: exp(X) ... | |
2277 restore_precision(); | |
2278 } | |
2279 | |
2280 void MacroAssembler::fast_exp() { | |
2281 // computes exp(X) = 2^(X * log2(e)) | |
2282 // if fast computation is not possible, result is NaN. Requires | |
2283 // fallback from user of this macro. | |
2284 // increase precision for intermediate steps of the computation | |
2285 increase_precision(); | |
2286 fldl2e(); // Stack: log2(e) X ... | |
2287 fmulp(1); // Stack: (X*log2(e)) ... | |
2288 pow_exp_core_encoding(); // Stack: exp(X) ... | |
2289 restore_precision(); | |
2290 } | |
2291 | |
2292 void MacroAssembler::pow_or_exp(bool is_exp, int num_fpu_regs_in_use) { | |
2293 // kills rax, rcx, rdx | |
2294 // pow and exp needs 2 extra registers on the fpu stack. | |
2295 Label slow_case, done; | |
2296 Register tmp = noreg; | |
2297 if (!VM_Version::supports_cmov()) { | |
2298 // fcmp needs a temporary so preserve rdx, | |
2299 tmp = rdx; | |
2300 } | |
2301 Register tmp2 = rax; | |
2302 Register tmp3 = rcx; | |
2303 | |
2304 if (is_exp) { | |
2305 // Stack: X | |
2306 fld_s(0); // duplicate argument for runtime call. Stack: X X | |
2307 fast_exp(); // Stack: exp(X) X | |
2308 fcmp(tmp, 0, false, false); // Stack: exp(X) X | |
2309 // exp(X) not equal to itself: exp(X) is NaN go to slow case. | |
2310 jcc(Assembler::parity, slow_case); | |
2311 // get rid of duplicate argument. Stack: exp(X) | |
2312 if (num_fpu_regs_in_use > 0) { | |
2313 fxch(); | |
2314 fpop(); | |
2315 } else { | |
2316 ffree(1); | |
2317 } | |
2318 jmp(done); | |
2319 } else { | |
2320 // Stack: X Y | |
2321 Label x_negative, y_odd; | |
2322 | |
2323 fldz(); // Stack: 0 X Y | |
2324 fcmp(tmp, 1, true, false); // Stack: X Y | |
2325 jcc(Assembler::above, x_negative); | |
2326 | |
2327 // X >= 0 | |
2328 | |
2329 fld_s(1); // duplicate arguments for runtime call. Stack: Y X Y | |
2330 fld_s(1); // Stack: X Y X Y | |
2331 fast_pow(); // Stack: X^Y X Y | |
2332 fcmp(tmp, 0, false, false); // Stack: X^Y X Y | |
2333 // X^Y not equal to itself: X^Y is NaN go to slow case. | |
2334 jcc(Assembler::parity, slow_case); | |
2335 // get rid of duplicate arguments. Stack: X^Y | |
2336 if (num_fpu_regs_in_use > 0) { | |
2337 fxch(); fpop(); | |
2338 fxch(); fpop(); | |
2339 } else { | |
2340 ffree(2); | |
2341 ffree(1); | |
2342 } | |
2343 jmp(done); | |
2344 | |
2345 // X <= 0 | |
2346 bind(x_negative); | |
2347 | |
2348 fld_s(1); // Stack: Y X Y | |
2349 frndint(); // Stack: int(Y) X Y | |
2350 fcmp(tmp, 2, false, false); // Stack: int(Y) X Y | |
2351 jcc(Assembler::notEqual, slow_case); | |
2352 | |
2353 subptr(rsp, 8); | |
2354 | |
2355 // For X^Y, when X < 0, Y has to be an integer and the final | |
2356 // result depends on whether it's odd or even. We just checked | |
2357 // that int(Y) == Y. We move int(Y) to gp registers as a 64 bit | |
2358 // integer to test its parity. If int(Y) is huge and doesn't fit | |
2359 // in the 64 bit integer range, the integer indefinite value will | |
2360 // end up in the gp registers. Huge numbers are all even, the | |
2361 // integer indefinite number is even so it's fine. | |
2362 | |
2363 #ifdef ASSERT | |
2364 // Let's check we don't end up with an integer indefinite number | |
2365 // when not expected. First test for huge numbers: check whether | |
2366 // int(Y)+1 == int(Y) which is true for very large numbers and | |
2367 // those are all even. A 64 bit integer is guaranteed to not | |
2368 // overflow for numbers where y+1 != y (when precision is set to | |
2369 // double precision). | |
2370 Label y_not_huge; | |
2371 | |
2372 fld1(); // Stack: 1 int(Y) X Y | |
2373 fadd(1); // Stack: 1+int(Y) int(Y) X Y | |
2374 | |
2375 #ifdef _LP64 | |
2376 // trip to memory to force the precision down from double extended | |
2377 // precision | |
2378 fstp_d(Address(rsp, 0)); | |
2379 fld_d(Address(rsp, 0)); | |
2380 #endif | |
2381 | |
2382 fcmp(tmp, 1, true, false); // Stack: int(Y) X Y | |
2383 #endif | |
2384 | |
2385 // move int(Y) as 64 bit integer to thread's stack | |
2386 fistp_d(Address(rsp,0)); // Stack: X Y | |
2387 | |
2388 #ifdef ASSERT | |
2389 jcc(Assembler::notEqual, y_not_huge); | |
2390 | |
2391 // Y is huge so we know it's even. It may not fit in a 64 bit | |
2392 // integer and we don't want the debug code below to see the | |
2393 // integer indefinite value so overwrite int(Y) on the thread's | |
2394 // stack with 0. | |
2395 movl(Address(rsp, 0), 0); | |
2396 movl(Address(rsp, 4), 0); | |
2397 | |
2398 bind(y_not_huge); | |
2399 #endif | |
2400 | |
2401 fld_s(1); // duplicate arguments for runtime call. Stack: Y X Y | |
2402 fld_s(1); // Stack: X Y X Y | |
2403 fabs(); // Stack: abs(X) Y X Y | |
2404 fast_pow(); // Stack: abs(X)^Y X Y | |
2405 fcmp(tmp, 0, false, false); // Stack: abs(X)^Y X Y | |
2406 // abs(X)^Y not equal to itself: abs(X)^Y is NaN go to slow case. | |
2407 | |
2408 pop(tmp2); | |
2409 NOT_LP64(pop(tmp3)); | |
2410 jcc(Assembler::parity, slow_case); | |
2411 | |
2412 #ifdef ASSERT | |
2413 // Check that int(Y) is not integer indefinite value (int | |
2414 // overflow). Shouldn't happen because for values that would | |
2415 // overflow, 1+int(Y)==Y which was tested earlier. | |
2416 #ifndef _LP64 | |
2417 { | |
2418 Label integer; | |
2419 testl(tmp2, tmp2); | |
2420 jcc(Assembler::notZero, integer); | |
2421 cmpl(tmp3, 0x80000000); | |
2422 jcc(Assembler::notZero, integer); | |
2423 STOP("integer indefinite value shouldn't be seen here"); | |
2424 bind(integer); | |
2425 } | |
2426 #else | |
2427 { | |
2428 Label integer; | |
2429 mov(tmp3, tmp2); // preserve tmp2 for parity check below | |
2430 shlq(tmp3, 1); | |
2431 jcc(Assembler::carryClear, integer); | |
2432 jcc(Assembler::notZero, integer); | |
2433 STOP("integer indefinite value shouldn't be seen here"); | |
2434 bind(integer); | |
2435 } | |
2436 #endif | |
2437 #endif | |
2438 | |
2439 // get rid of duplicate arguments. Stack: X^Y | |
2440 if (num_fpu_regs_in_use > 0) { | |
2441 fxch(); fpop(); | |
2442 fxch(); fpop(); | |
2443 } else { | |
2444 ffree(2); | |
2445 ffree(1); | |
2446 } | |
2447 | |
2448 testl(tmp2, 1); | |
2449 jcc(Assembler::zero, done); // X <= 0, Y even: X^Y = abs(X)^Y | |
2450 // X <= 0, Y even: X^Y = -abs(X)^Y | |
2451 | |
2452 fchs(); // Stack: -abs(X)^Y Y | |
2453 jmp(done); | |
2454 } | |
2455 | |
2456 // slow case: runtime call | |
2457 bind(slow_case); | |
2458 | |
2459 fpop(); // pop incorrect result or int(Y) | |
2460 | |
2461 fp_runtime_fallback(is_exp ? CAST_FROM_FN_PTR(address, SharedRuntime::dexp) : CAST_FROM_FN_PTR(address, SharedRuntime::dpow), | |
2462 is_exp ? 1 : 2, num_fpu_regs_in_use); | |
2463 | |
2464 // Come here with result in F-TOS | |
2465 bind(done); | |
2466 } | |
2467 | |
2468 void MacroAssembler::fpop() { | |
2469 ffree(); | |
2470 fincstp(); | |
2471 } | |
2472 | |
2473 void MacroAssembler::fremr(Register tmp) { | |
2474 save_rax(tmp); | |
2475 { Label L; | |
2476 bind(L); | |
2477 fprem(); | |
2478 fwait(); fnstsw_ax(); | |
2479 #ifdef _LP64 | |
2480 testl(rax, 0x400); | |
2481 jcc(Assembler::notEqual, L); | |
2482 #else | |
2483 sahf(); | |
2484 jcc(Assembler::parity, L); | |
2485 #endif // _LP64 | |
2486 } | |
2487 restore_rax(tmp); | |
2488 // Result is in ST0. | |
2489 // Note: fxch & fpop to get rid of ST1 | |
2490 // (otherwise FPU stack could overflow eventually) | |
2491 fxch(1); | |
2492 fpop(); | |
2493 } | |
2494 | |
2495 | |
2496 void MacroAssembler::incrementl(AddressLiteral dst) { | |
2497 if (reachable(dst)) { | |
2498 incrementl(as_Address(dst)); | |
2499 } else { | |
2500 lea(rscratch1, dst); | |
2501 incrementl(Address(rscratch1, 0)); | |
2502 } | |
2503 } | |
2504 | |
2505 void MacroAssembler::incrementl(ArrayAddress dst) { | |
2506 incrementl(as_Address(dst)); | |
2507 } | |
2508 | |
2509 void MacroAssembler::incrementl(Register reg, int value) { | |
2510 if (value == min_jint) {addl(reg, value) ; return; } | |
2511 if (value < 0) { decrementl(reg, -value); return; } | |
2512 if (value == 0) { ; return; } | |
2513 if (value == 1 && UseIncDec) { incl(reg) ; return; } | |
2514 /* else */ { addl(reg, value) ; return; } | |
2515 } | |
2516 | |
2517 void MacroAssembler::incrementl(Address dst, int value) { | |
2518 if (value == min_jint) {addl(dst, value) ; return; } | |
2519 if (value < 0) { decrementl(dst, -value); return; } | |
2520 if (value == 0) { ; return; } | |
2521 if (value == 1 && UseIncDec) { incl(dst) ; return; } | |
2522 /* else */ { addl(dst, value) ; return; } | |
2523 } | |
2524 | |
2525 void MacroAssembler::jump(AddressLiteral dst) { | |
2526 if (reachable(dst)) { | |
2527 jmp_literal(dst.target(), dst.rspec()); | |
2528 } else { | |
2529 lea(rscratch1, dst); | |
2530 jmp(rscratch1); | |
2531 } | |
2532 } | |
2533 | |
2534 void MacroAssembler::jump_cc(Condition cc, AddressLiteral dst) { | |
2535 if (reachable(dst)) { | |
2536 InstructionMark im(this); | |
2537 relocate(dst.reloc()); | |
2538 const int short_size = 2; | |
2539 const int long_size = 6; | |
2540 int offs = (intptr_t)dst.target() - ((intptr_t)pc()); | |
2541 if (dst.reloc() == relocInfo::none && is8bit(offs - short_size)) { | |
2542 // 0111 tttn #8-bit disp | |
7430
d02120b7a34f
8004250: replace AbstractAssembler a_byte/a_long with emit_int8/emit_int32
twisti
parents:
7427
diff
changeset
|
2543 emit_int8(0x70 | cc); |
d02120b7a34f
8004250: replace AbstractAssembler a_byte/a_long with emit_int8/emit_int32
twisti
parents:
7427
diff
changeset
|
2544 emit_int8((offs - short_size) & 0xFF); |
7199 | 2545 } else { |
2546 // 0000 1111 1000 tttn #32-bit disp | |
7430
d02120b7a34f
8004250: replace AbstractAssembler a_byte/a_long with emit_int8/emit_int32
twisti
parents:
7427
diff
changeset
|
2547 emit_int8(0x0F); |
d02120b7a34f
8004250: replace AbstractAssembler a_byte/a_long with emit_int8/emit_int32
twisti
parents:
7427
diff
changeset
|
2548 emit_int8((unsigned char)(0x80 | cc)); |
7476
ffa87474d7a4
8004537: replace AbstractAssembler emit_long with emit_int32
twisti
parents:
7475
diff
changeset
|
2549 emit_int32(offs - long_size); |
7199 | 2550 } |
2551 } else { | |
2552 #ifdef ASSERT | |
2553 warning("reversing conditional branch"); | |
2554 #endif /* ASSERT */ | |
2555 Label skip; | |
2556 jccb(reverse[cc], skip); | |
2557 lea(rscratch1, dst); | |
2558 Assembler::jmp(rscratch1); | |
2559 bind(skip); | |
2560 } | |
2561 } | |
2562 | |
2563 void MacroAssembler::ldmxcsr(AddressLiteral src) { | |
2564 if (reachable(src)) { | |
2565 Assembler::ldmxcsr(as_Address(src)); | |
2566 } else { | |
2567 lea(rscratch1, src); | |
2568 Assembler::ldmxcsr(Address(rscratch1, 0)); | |
2569 } | |
2570 } | |
2571 | |
2572 int MacroAssembler::load_signed_byte(Register dst, Address src) { | |
2573 int off; | |
2574 if (LP64_ONLY(true ||) VM_Version::is_P6()) { | |
2575 off = offset(); | |
2576 movsbl(dst, src); // movsxb | |
2577 } else { | |
2578 off = load_unsigned_byte(dst, src); | |
2579 shll(dst, 24); | |
2580 sarl(dst, 24); | |
2581 } | |
2582 return off; | |
2583 } | |
2584 | |
2585 // Note: load_signed_short used to be called load_signed_word. | |
2586 // Although the 'w' in x86 opcodes refers to the term "word" in the assembler | |
2587 // manual, which means 16 bits, that usage is found nowhere in HotSpot code. | |
2588 // The term "word" in HotSpot means a 32- or 64-bit machine word. | |
2589 int MacroAssembler::load_signed_short(Register dst, Address src) { | |
2590 int off; | |
2591 if (LP64_ONLY(true ||) VM_Version::is_P6()) { | |
2592 // This is dubious to me since it seems safe to do a signed 16 => 64 bit | |
2593 // version but this is what 64bit has always done. This seems to imply | |
2594 // that users are only using 32bits worth. | |
2595 off = offset(); | |
2596 movswl(dst, src); // movsxw | |
2597 } else { | |
2598 off = load_unsigned_short(dst, src); | |
2599 shll(dst, 16); | |
2600 sarl(dst, 16); | |
2601 } | |
2602 return off; | |
2603 } | |
2604 | |
2605 int MacroAssembler::load_unsigned_byte(Register dst, Address src) { | |
2606 // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16, | |
2607 // and "3.9 Partial Register Penalties", p. 22). | |
2608 int off; | |
2609 if (LP64_ONLY(true || ) VM_Version::is_P6() || src.uses(dst)) { | |
2610 off = offset(); | |
2611 movzbl(dst, src); // movzxb | |
2612 } else { | |
2613 xorl(dst, dst); | |
2614 off = offset(); | |
2615 movb(dst, src); | |
2616 } | |
2617 return off; | |
2618 } | |
2619 | |
2620 // Note: load_unsigned_short used to be called load_unsigned_word. | |
2621 int MacroAssembler::load_unsigned_short(Register dst, Address src) { | |
2622 // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16, | |
2623 // and "3.9 Partial Register Penalties", p. 22). | |
2624 int off; | |
2625 if (LP64_ONLY(true ||) VM_Version::is_P6() || src.uses(dst)) { | |
2626 off = offset(); | |
2627 movzwl(dst, src); // movzxw | |
2628 } else { | |
2629 xorl(dst, dst); | |
2630 off = offset(); | |
2631 movw(dst, src); | |
2632 } | |
2633 return off; | |
2634 } | |
2635 | |
2636 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) { | |
2637 switch (size_in_bytes) { | |
2638 #ifndef _LP64 | |
2639 case 8: | |
2640 assert(dst2 != noreg, "second dest register required"); | |
2641 movl(dst, src); | |
2642 movl(dst2, src.plus_disp(BytesPerInt)); | |
2643 break; | |
2644 #else | |
2645 case 8: movq(dst, src); break; | |
2646 #endif | |
2647 case 4: movl(dst, src); break; | |
2648 case 2: is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break; | |
2649 case 1: is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break; | |
2650 default: ShouldNotReachHere(); | |
2651 } | |
2652 } | |
2653 | |
2654 void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) { | |
2655 switch (size_in_bytes) { | |
2656 #ifndef _LP64 | |
2657 case 8: | |
2658 assert(src2 != noreg, "second source register required"); | |
2659 movl(dst, src); | |
2660 movl(dst.plus_disp(BytesPerInt), src2); | |
2661 break; | |
2662 #else | |
2663 case 8: movq(dst, src); break; | |
2664 #endif | |
2665 case 4: movl(dst, src); break; | |
2666 case 2: movw(dst, src); break; | |
2667 case 1: movb(dst, src); break; | |
2668 default: ShouldNotReachHere(); | |
2669 } | |
2670 } | |
2671 | |
2672 void MacroAssembler::mov32(AddressLiteral dst, Register src) { | |
2673 if (reachable(dst)) { | |
2674 movl(as_Address(dst), src); | |
2675 } else { | |
2676 lea(rscratch1, dst); | |
2677 movl(Address(rscratch1, 0), src); | |
2678 } | |
2679 } | |
2680 | |
2681 void MacroAssembler::mov32(Register dst, AddressLiteral src) { | |
2682 if (reachable(src)) { | |
2683 movl(dst, as_Address(src)); | |
2684 } else { | |
2685 lea(rscratch1, src); | |
2686 movl(dst, Address(rscratch1, 0)); | |
2687 } | |
2688 } | |
2689 | |
2690 // C++ bool manipulation | |
2691 | |
2692 void MacroAssembler::movbool(Register dst, Address src) { | |
2693 if(sizeof(bool) == 1) | |
2694 movb(dst, src); | |
2695 else if(sizeof(bool) == 2) | |
2696 movw(dst, src); | |
2697 else if(sizeof(bool) == 4) | |
2698 movl(dst, src); | |
2699 else | |
2700 // unsupported | |
2701 ShouldNotReachHere(); | |
2702 } | |
2703 | |
2704 void MacroAssembler::movbool(Address dst, bool boolconst) { | |
2705 if(sizeof(bool) == 1) | |
2706 movb(dst, (int) boolconst); | |
2707 else if(sizeof(bool) == 2) | |
2708 movw(dst, (int) boolconst); | |
2709 else if(sizeof(bool) == 4) | |
2710 movl(dst, (int) boolconst); | |
2711 else | |
2712 // unsupported | |
2713 ShouldNotReachHere(); | |
2714 } | |
2715 | |
2716 void MacroAssembler::movbool(Address dst, Register src) { | |
2717 if(sizeof(bool) == 1) | |
2718 movb(dst, src); | |
2719 else if(sizeof(bool) == 2) | |
2720 movw(dst, src); | |
2721 else if(sizeof(bool) == 4) | |
2722 movl(dst, src); | |
2723 else | |
2724 // unsupported | |
2725 ShouldNotReachHere(); | |
2726 } | |
2727 | |
2728 void MacroAssembler::movbyte(ArrayAddress dst, int src) { | |
2729 movb(as_Address(dst), src); | |
2730 } | |
2731 | |
2732 void MacroAssembler::movdl(XMMRegister dst, AddressLiteral src) { | |
2733 if (reachable(src)) { | |
2734 movdl(dst, as_Address(src)); | |
2735 } else { | |
2736 lea(rscratch1, src); | |
2737 movdl(dst, Address(rscratch1, 0)); | |
2738 } | |
2739 } | |
2740 | |
2741 void MacroAssembler::movq(XMMRegister dst, AddressLiteral src) { | |
2742 if (reachable(src)) { | |
2743 movq(dst, as_Address(src)); | |
2744 } else { | |
2745 lea(rscratch1, src); | |
2746 movq(dst, Address(rscratch1, 0)); | |
2747 } | |
2748 } | |
2749 | |
2750 void MacroAssembler::movdbl(XMMRegister dst, AddressLiteral src) { | |
2751 if (reachable(src)) { | |
2752 if (UseXmmLoadAndClearUpper) { | |
2753 movsd (dst, as_Address(src)); | |
2754 } else { | |
2755 movlpd(dst, as_Address(src)); | |
2756 } | |
2757 } else { | |
2758 lea(rscratch1, src); | |
2759 if (UseXmmLoadAndClearUpper) { | |
2760 movsd (dst, Address(rscratch1, 0)); | |
2761 } else { | |
2762 movlpd(dst, Address(rscratch1, 0)); | |
2763 } | |
2764 } | |
2765 } | |
2766 | |
2767 void MacroAssembler::movflt(XMMRegister dst, AddressLiteral src) { | |
2768 if (reachable(src)) { | |
2769 movss(dst, as_Address(src)); | |
2770 } else { | |
2771 lea(rscratch1, src); | |
2772 movss(dst, Address(rscratch1, 0)); | |
2773 } | |
2774 } | |
2775 | |
2776 void MacroAssembler::movptr(Register dst, Register src) { | |
2777 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); | |
2778 } | |
2779 | |
2780 void MacroAssembler::movptr(Register dst, Address src) { | |
2781 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); | |
2782 } | |
2783 | |
2784 // src should NEVER be a real pointer. Use AddressLiteral for true pointers | |
2785 void MacroAssembler::movptr(Register dst, intptr_t src) { | |
2786 LP64_ONLY(mov64(dst, src)) NOT_LP64(movl(dst, src)); | |
2787 } | |
2788 | |
2789 void MacroAssembler::movptr(Address dst, Register src) { | |
2790 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); | |
2791 } | |
2792 | |
2793 void MacroAssembler::movdqu(XMMRegister dst, AddressLiteral src) { | |
2794 if (reachable(src)) { | |
2795 Assembler::movdqu(dst, as_Address(src)); | |
2796 } else { | |
2797 lea(rscratch1, src); | |
2798 Assembler::movdqu(dst, Address(rscratch1, 0)); | |
2799 } | |
2800 } | |
2801 | |
11080
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
2802 void MacroAssembler::movdqa(XMMRegister dst, AddressLiteral src) { |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
2803 if (reachable(src)) { |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
2804 Assembler::movdqa(dst, as_Address(src)); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
2805 } else { |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
2806 lea(rscratch1, src); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
2807 Assembler::movdqa(dst, Address(rscratch1, 0)); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
2808 } |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
2809 } |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
2810 |
7199 | 2811 void MacroAssembler::movsd(XMMRegister dst, AddressLiteral src) { |
2812 if (reachable(src)) { | |
2813 Assembler::movsd(dst, as_Address(src)); | |
2814 } else { | |
2815 lea(rscratch1, src); | |
2816 Assembler::movsd(dst, Address(rscratch1, 0)); | |
2817 } | |
2818 } | |
2819 | |
2820 void MacroAssembler::movss(XMMRegister dst, AddressLiteral src) { | |
2821 if (reachable(src)) { | |
2822 Assembler::movss(dst, as_Address(src)); | |
2823 } else { | |
2824 lea(rscratch1, src); | |
2825 Assembler::movss(dst, Address(rscratch1, 0)); | |
2826 } | |
2827 } | |
2828 | |
2829 void MacroAssembler::mulsd(XMMRegister dst, AddressLiteral src) { | |
2830 if (reachable(src)) { | |
2831 Assembler::mulsd(dst, as_Address(src)); | |
2832 } else { | |
2833 lea(rscratch1, src); | |
2834 Assembler::mulsd(dst, Address(rscratch1, 0)); | |
2835 } | |
2836 } | |
2837 | |
2838 void MacroAssembler::mulss(XMMRegister dst, AddressLiteral src) { | |
2839 if (reachable(src)) { | |
2840 Assembler::mulss(dst, as_Address(src)); | |
2841 } else { | |
2842 lea(rscratch1, src); | |
2843 Assembler::mulss(dst, Address(rscratch1, 0)); | |
2844 } | |
2845 } | |
2846 | |
2847 void MacroAssembler::null_check(Register reg, int offset) { | |
2848 if (needs_explicit_null_check(offset)) { | |
2849 // provoke OS NULL exception if reg = NULL by | |
2850 // accessing M[reg] w/o changing any (non-CC) registers | |
2851 // NOTE: cmpl is plenty here to provoke a segv | |
2852 cmpptr(rax, Address(reg, 0)); | |
2853 // Note: should probably use testl(rax, Address(reg, 0)); | |
2854 // may be shorter code (however, this version of | |
2855 // testl needs to be implemented first) | |
2856 } else { | |
2857 // nothing to do, (later) access of M[reg + offset] | |
2858 // will provoke OS NULL exception if reg = NULL | |
2859 } | |
2860 } | |
2861 | |
2862 void MacroAssembler::os_breakpoint() { | |
2863 // instead of directly emitting a breakpoint, call os:breakpoint for better debugability | |
2864 // (e.g., MSVC can't call ps() otherwise) | |
2865 call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint))); | |
2866 } | |
2867 | |
2868 void MacroAssembler::pop_CPU_state() { | |
2869 pop_FPU_state(); | |
2870 pop_IU_state(); | |
2871 } | |
2872 | |
2873 void MacroAssembler::pop_FPU_state() { | |
2874 NOT_LP64(frstor(Address(rsp, 0));) | |
2875 LP64_ONLY(fxrstor(Address(rsp, 0));) | |
2876 addptr(rsp, FPUStateSizeInWords * wordSize); | |
2877 } | |
2878 | |
2879 void MacroAssembler::pop_IU_state() { | |
2880 popa(); | |
2881 LP64_ONLY(addq(rsp, 8)); | |
2882 popf(); | |
2883 } | |
2884 | |
2885 // Save Integer and Float state | |
2886 // Warning: Stack must be 16 byte aligned (64bit) | |
2887 void MacroAssembler::push_CPU_state() { | |
2888 push_IU_state(); | |
2889 push_FPU_state(); | |
2890 } | |
2891 | |
2892 void MacroAssembler::push_FPU_state() { | |
2893 subptr(rsp, FPUStateSizeInWords * wordSize); | |
2894 #ifndef _LP64 | |
2895 fnsave(Address(rsp, 0)); | |
2896 fwait(); | |
2897 #else | |
2898 fxsave(Address(rsp, 0)); | |
2899 #endif // LP64 | |
2900 } | |
2901 | |
2902 void MacroAssembler::push_IU_state() { | |
2903 // Push flags first because pusha kills them | |
2904 pushf(); | |
2905 // Make sure rsp stays 16-byte aligned | |
2906 LP64_ONLY(subq(rsp, 8)); | |
2907 pusha(); | |
2908 } | |
2909 | |
2910 void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp, bool clear_pc) { | |
2911 // determine java_thread register | |
2912 if (!java_thread->is_valid()) { | |
2913 java_thread = rdi; | |
2914 get_thread(java_thread); | |
2915 } | |
2916 // we must set sp to zero to clear frame | |
2917 movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), NULL_WORD); | |
2918 if (clear_fp) { | |
2919 movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), NULL_WORD); | |
2920 } | |
2921 | |
2922 if (clear_pc) | |
2923 movptr(Address(java_thread, JavaThread::last_Java_pc_offset()), NULL_WORD); | |
2924 | |
2925 } | |
2926 | |
2927 void MacroAssembler::restore_rax(Register tmp) { | |
2928 if (tmp == noreg) pop(rax); | |
2929 else if (tmp != rax) mov(rax, tmp); | |
2930 } | |
2931 | |
2932 void MacroAssembler::round_to(Register reg, int modulus) { | |
2933 addptr(reg, modulus - 1); | |
2934 andptr(reg, -modulus); | |
2935 } | |
2936 | |
2937 void MacroAssembler::save_rax(Register tmp) { | |
2938 if (tmp == noreg) push(rax); | |
2939 else if (tmp != rax) mov(tmp, rax); | |
2940 } | |
2941 | |
2942 // Write serialization page so VM thread can do a pseudo remote membar. | |
2943 // We use the current thread pointer to calculate a thread specific | |
2944 // offset to write to within the page. This minimizes bus traffic | |
2945 // due to cache line collision. | |
2946 void MacroAssembler::serialize_memory(Register thread, Register tmp) { | |
2947 movl(tmp, thread); | |
2948 shrl(tmp, os::get_serialize_page_shift_count()); | |
2949 andl(tmp, (os::vm_page_size() - sizeof(int))); | |
2950 | |
2951 Address index(noreg, tmp, Address::times_1); | |
2952 ExternalAddress page(os::get_memory_serialize_page()); | |
2953 | |
2954 // Size of store must match masking code above | |
2955 movl(as_Address(ArrayAddress(page, index)), tmp); | |
2956 } | |
2957 | |
2958 // Calls to C land | |
2959 // | |
2960 // When entering C land, the rbp, & rsp of the last Java frame have to be recorded | |
2961 // in the (thread-local) JavaThread object. When leaving C land, the last Java fp | |
2962 // has to be reset to 0. This is required to allow proper stack traversal. | |
2963 void MacroAssembler::set_last_Java_frame(Register java_thread, | |
2964 Register last_java_sp, | |
2965 Register last_java_fp, | |
2966 address last_java_pc) { | |
2967 // determine java_thread register | |
2968 if (!java_thread->is_valid()) { | |
2969 java_thread = rdi; | |
2970 get_thread(java_thread); | |
2971 } | |
2972 // determine last_java_sp register | |
2973 if (!last_java_sp->is_valid()) { | |
2974 last_java_sp = rsp; | |
2975 } | |
2976 | |
2977 // last_java_fp is optional | |
2978 | |
2979 if (last_java_fp->is_valid()) { | |
2980 movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), last_java_fp); | |
2981 } | |
2982 | |
2983 // last_java_pc is optional | |
2984 | |
2985 if (last_java_pc != NULL) { | |
2986 lea(Address(java_thread, | |
2987 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()), | |
2988 InternalAddress(last_java_pc)); | |
2989 | |
2990 } | |
2991 movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), last_java_sp); | |
2992 } | |
2993 | |
2994 void MacroAssembler::shlptr(Register dst, int imm8) { | |
2995 LP64_ONLY(shlq(dst, imm8)) NOT_LP64(shll(dst, imm8)); | |
2996 } | |
2997 | |
2998 void MacroAssembler::shrptr(Register dst, int imm8) { | |
2999 LP64_ONLY(shrq(dst, imm8)) NOT_LP64(shrl(dst, imm8)); | |
3000 } | |
3001 | |
3002 void MacroAssembler::sign_extend_byte(Register reg) { | |
3003 if (LP64_ONLY(true ||) (VM_Version::is_P6() && reg->has_byte_register())) { | |
3004 movsbl(reg, reg); // movsxb | |
3005 } else { | |
3006 shll(reg, 24); | |
3007 sarl(reg, 24); | |
3008 } | |
3009 } | |
3010 | |
3011 void MacroAssembler::sign_extend_short(Register reg) { | |
3012 if (LP64_ONLY(true ||) VM_Version::is_P6()) { | |
3013 movswl(reg, reg); // movsxw | |
3014 } else { | |
3015 shll(reg, 16); | |
3016 sarl(reg, 16); | |
3017 } | |
3018 } | |
3019 | |
3020 void MacroAssembler::testl(Register dst, AddressLiteral src) { | |
3021 assert(reachable(src), "Address should be reachable"); | |
3022 testl(dst, as_Address(src)); | |
3023 } | |
3024 | |
3025 void MacroAssembler::sqrtsd(XMMRegister dst, AddressLiteral src) { | |
3026 if (reachable(src)) { | |
3027 Assembler::sqrtsd(dst, as_Address(src)); | |
3028 } else { | |
3029 lea(rscratch1, src); | |
3030 Assembler::sqrtsd(dst, Address(rscratch1, 0)); | |
3031 } | |
3032 } | |
3033 | |
3034 void MacroAssembler::sqrtss(XMMRegister dst, AddressLiteral src) { | |
3035 if (reachable(src)) { | |
3036 Assembler::sqrtss(dst, as_Address(src)); | |
3037 } else { | |
3038 lea(rscratch1, src); | |
3039 Assembler::sqrtss(dst, Address(rscratch1, 0)); | |
3040 } | |
3041 } | |
3042 | |
3043 void MacroAssembler::subsd(XMMRegister dst, AddressLiteral src) { | |
3044 if (reachable(src)) { | |
3045 Assembler::subsd(dst, as_Address(src)); | |
3046 } else { | |
3047 lea(rscratch1, src); | |
3048 Assembler::subsd(dst, Address(rscratch1, 0)); | |
3049 } | |
3050 } | |
3051 | |
3052 void MacroAssembler::subss(XMMRegister dst, AddressLiteral src) { | |
3053 if (reachable(src)) { | |
3054 Assembler::subss(dst, as_Address(src)); | |
3055 } else { | |
3056 lea(rscratch1, src); | |
3057 Assembler::subss(dst, Address(rscratch1, 0)); | |
3058 } | |
3059 } | |
3060 | |
3061 void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) { | |
3062 if (reachable(src)) { | |
3063 Assembler::ucomisd(dst, as_Address(src)); | |
3064 } else { | |
3065 lea(rscratch1, src); | |
3066 Assembler::ucomisd(dst, Address(rscratch1, 0)); | |
3067 } | |
3068 } | |
3069 | |
3070 void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src) { | |
3071 if (reachable(src)) { | |
3072 Assembler::ucomiss(dst, as_Address(src)); | |
3073 } else { | |
3074 lea(rscratch1, src); | |
3075 Assembler::ucomiss(dst, Address(rscratch1, 0)); | |
3076 } | |
3077 } | |
3078 | |
3079 void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src) { | |
3080 // Used in sign-bit flipping with aligned address. | |
3081 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); | |
3082 if (reachable(src)) { | |
3083 Assembler::xorpd(dst, as_Address(src)); | |
3084 } else { | |
3085 lea(rscratch1, src); | |
3086 Assembler::xorpd(dst, Address(rscratch1, 0)); | |
3087 } | |
3088 } | |
3089 | |
3090 void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) { | |
3091 // Used in sign-bit flipping with aligned address. | |
3092 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); | |
3093 if (reachable(src)) { | |
3094 Assembler::xorps(dst, as_Address(src)); | |
3095 } else { | |
3096 lea(rscratch1, src); | |
3097 Assembler::xorps(dst, Address(rscratch1, 0)); | |
3098 } | |
3099 } | |
3100 | |
3101 void MacroAssembler::pshufb(XMMRegister dst, AddressLiteral src) { | |
3102 // Used in sign-bit flipping with aligned address. | |
7427 | 3103 bool aligned_adr = (((intptr_t)src.target() & 15) == 0); |
3104 assert((UseAVX > 0) || aligned_adr, "SSE mode requires address alignment 16 bytes"); | |
7199 | 3105 if (reachable(src)) { |
3106 Assembler::pshufb(dst, as_Address(src)); | |
3107 } else { | |
3108 lea(rscratch1, src); | |
3109 Assembler::pshufb(dst, Address(rscratch1, 0)); | |
3110 } | |
3111 } | |
3112 | |
3113 // AVX 3-operands instructions | |
3114 | |
3115 void MacroAssembler::vaddsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { | |
3116 if (reachable(src)) { | |
3117 vaddsd(dst, nds, as_Address(src)); | |
3118 } else { | |
3119 lea(rscratch1, src); | |
3120 vaddsd(dst, nds, Address(rscratch1, 0)); | |
3121 } | |
3122 } | |
3123 | |
3124 void MacroAssembler::vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { | |
3125 if (reachable(src)) { | |
3126 vaddss(dst, nds, as_Address(src)); | |
3127 } else { | |
3128 lea(rscratch1, src); | |
3129 vaddss(dst, nds, Address(rscratch1, 0)); | |
3130 } | |
3131 } | |
3132 | |
3133 void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) { | |
3134 if (reachable(src)) { | |
3135 vandpd(dst, nds, as_Address(src), vector256); | |
3136 } else { | |
3137 lea(rscratch1, src); | |
3138 vandpd(dst, nds, Address(rscratch1, 0), vector256); | |
3139 } | |
3140 } | |
3141 | |
3142 void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) { | |
3143 if (reachable(src)) { | |
3144 vandps(dst, nds, as_Address(src), vector256); | |
3145 } else { | |
3146 lea(rscratch1, src); | |
3147 vandps(dst, nds, Address(rscratch1, 0), vector256); | |
3148 } | |
3149 } | |
3150 | |
3151 void MacroAssembler::vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { | |
3152 if (reachable(src)) { | |
3153 vdivsd(dst, nds, as_Address(src)); | |
3154 } else { | |
3155 lea(rscratch1, src); | |
3156 vdivsd(dst, nds, Address(rscratch1, 0)); | |
3157 } | |
3158 } | |
3159 | |
3160 void MacroAssembler::vdivss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { | |
3161 if (reachable(src)) { | |
3162 vdivss(dst, nds, as_Address(src)); | |
3163 } else { | |
3164 lea(rscratch1, src); | |
3165 vdivss(dst, nds, Address(rscratch1, 0)); | |
3166 } | |
3167 } | |
3168 | |
3169 void MacroAssembler::vmulsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { | |
3170 if (reachable(src)) { | |
3171 vmulsd(dst, nds, as_Address(src)); | |
3172 } else { | |
3173 lea(rscratch1, src); | |
3174 vmulsd(dst, nds, Address(rscratch1, 0)); | |
3175 } | |
3176 } | |
3177 | |
3178 void MacroAssembler::vmulss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { | |
3179 if (reachable(src)) { | |
3180 vmulss(dst, nds, as_Address(src)); | |
3181 } else { | |
3182 lea(rscratch1, src); | |
3183 vmulss(dst, nds, Address(rscratch1, 0)); | |
3184 } | |
3185 } | |
3186 | |
3187 void MacroAssembler::vsubsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { | |
3188 if (reachable(src)) { | |
3189 vsubsd(dst, nds, as_Address(src)); | |
3190 } else { | |
3191 lea(rscratch1, src); | |
3192 vsubsd(dst, nds, Address(rscratch1, 0)); | |
3193 } | |
3194 } | |
3195 | |
3196 void MacroAssembler::vsubss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { | |
3197 if (reachable(src)) { | |
3198 vsubss(dst, nds, as_Address(src)); | |
3199 } else { | |
3200 lea(rscratch1, src); | |
3201 vsubss(dst, nds, Address(rscratch1, 0)); | |
3202 } | |
3203 } | |
3204 | |
3205 void MacroAssembler::vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) { | |
3206 if (reachable(src)) { | |
3207 vxorpd(dst, nds, as_Address(src), vector256); | |
3208 } else { | |
3209 lea(rscratch1, src); | |
3210 vxorpd(dst, nds, Address(rscratch1, 0), vector256); | |
3211 } | |
3212 } | |
3213 | |
3214 void MacroAssembler::vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) { | |
3215 if (reachable(src)) { | |
3216 vxorps(dst, nds, as_Address(src), vector256); | |
3217 } else { | |
3218 lea(rscratch1, src); | |
3219 vxorps(dst, nds, Address(rscratch1, 0), vector256); | |
3220 } | |
3221 } | |
3222 | |
3223 | |
3224 ////////////////////////////////////////////////////////////////////////////////// | |
8001
db9981fd3124
8005915: Unify SERIALGC and INCLUDE_ALTERNATE_GCS
jprovino
parents:
7477
diff
changeset
|
3225 #if INCLUDE_ALL_GCS |
7199 | 3226 |
3227 void MacroAssembler::g1_write_barrier_pre(Register obj, | |
3228 Register pre_val, | |
3229 Register thread, | |
3230 Register tmp, | |
3231 bool tosca_live, | |
3232 bool expand_call) { | |
3233 | |
3234 // If expand_call is true then we expand the call_VM_leaf macro | |
3235 // directly to skip generating the check by | |
3236 // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. | |
3237 | |
3238 #ifdef _LP64 | |
3239 assert(thread == r15_thread, "must be"); | |
3240 #endif // _LP64 | |
3241 | |
3242 Label done; | |
3243 Label runtime; | |
3244 | |
3245 assert(pre_val != noreg, "check this code"); | |
3246 | |
3247 if (obj != noreg) { | |
3248 assert_different_registers(obj, pre_val, tmp); | |
3249 assert(pre_val != rax, "check this code"); | |
3250 } | |
3251 | |
3252 Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() + | |
3253 PtrQueue::byte_offset_of_active())); | |
3254 Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() + | |
3255 PtrQueue::byte_offset_of_index())); | |
3256 Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() + | |
3257 PtrQueue::byte_offset_of_buf())); | |
3258 | |
3259 | |
3260 // Is marking active? | |
3261 if (in_bytes(PtrQueue::byte_width_of_active()) == 4) { | |
3262 cmpl(in_progress, 0); | |
3263 } else { | |
3264 assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption"); | |
3265 cmpb(in_progress, 0); | |
3266 } | |
3267 jcc(Assembler::equal, done); | |
3268 | |
3269 // Do we need to load the previous value? | |
3270 if (obj != noreg) { | |
3271 load_heap_oop(pre_val, Address(obj, 0)); | |
3272 } | |
3273 | |
3274 // Is the previous value null? | |
3275 cmpptr(pre_val, (int32_t) NULL_WORD); | |
3276 jcc(Assembler::equal, done); | |
3277 | |
3278 // Can we store original value in the thread's buffer? | |
3279 // Is index == 0? | |
3280 // (The index field is typed as size_t.) | |
3281 | |
3282 movptr(tmp, index); // tmp := *index_adr | |
3283 cmpptr(tmp, 0); // tmp == 0? | |
3284 jcc(Assembler::equal, runtime); // If yes, goto runtime | |
3285 | |
3286 subptr(tmp, wordSize); // tmp := tmp - wordSize | |
3287 movptr(index, tmp); // *index_adr := tmp | |
3288 addptr(tmp, buffer); // tmp := tmp + *buffer_adr | |
3289 | |
3290 // Record the previous value | |
3291 movptr(Address(tmp, 0), pre_val); | |
3292 jmp(done); | |
3293 | |
3294 bind(runtime); | |
3295 // save the live input values | |
3296 if(tosca_live) push(rax); | |
3297 | |
3298 if (obj != noreg && obj != rax) | |
3299 push(obj); | |
3300 | |
3301 if (pre_val != rax) | |
3302 push(pre_val); | |
3303 | |
3304 // Calling the runtime using the regular call_VM_leaf mechanism generates | |
3305 // code (generated by InterpreterMacroAssember::call_VM_leaf_base) | |
3306 // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL. | |
3307 // | |
3308 // If we care generating the pre-barrier without a frame (e.g. in the | |
3309 // intrinsified Reference.get() routine) then ebp might be pointing to | |
3310 // the caller frame and so this check will most likely fail at runtime. | |
3311 // | |
3312 // Expanding the call directly bypasses the generation of the check. | |
3313 // So when we do not have have a full interpreter frame on the stack | |
3314 // expand_call should be passed true. | |
3315 | |
3316 NOT_LP64( push(thread); ) | |
3317 | |
3318 if (expand_call) { | |
3319 LP64_ONLY( assert(pre_val != c_rarg1, "smashed arg"); ) | |
3320 pass_arg1(this, thread); | |
3321 pass_arg0(this, pre_val); | |
3322 MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), 2); | |
3323 } else { | |
3324 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread); | |
3325 } | |
3326 | |
3327 NOT_LP64( pop(thread); ) | |
3328 | |
3329 // save the live input values | |
3330 if (pre_val != rax) | |
3331 pop(pre_val); | |
3332 | |
3333 if (obj != noreg && obj != rax) | |
3334 pop(obj); | |
3335 | |
3336 if(tosca_live) pop(rax); | |
3337 | |
3338 bind(done); | |
3339 } | |
3340 | |
3341 void MacroAssembler::g1_write_barrier_post(Register store_addr, | |
3342 Register new_val, | |
3343 Register thread, | |
3344 Register tmp, | |
3345 Register tmp2) { | |
3346 #ifdef _LP64 | |
3347 assert(thread == r15_thread, "must be"); | |
3348 #endif // _LP64 | |
3349 | |
3350 Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() + | |
3351 PtrQueue::byte_offset_of_index())); | |
3352 Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() + | |
3353 PtrQueue::byte_offset_of_buf())); | |
3354 | |
3355 BarrierSet* bs = Universe::heap()->barrier_set(); | |
3356 CardTableModRefBS* ct = (CardTableModRefBS*)bs; | |
13424
61746b5f0ed3
8028109: compiler/codecache/CheckReservedInitialCodeCacheSizeArgOrder.java crashes in RT_Baseline
anoll
parents:
13047
diff
changeset
|
3357 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); |
61746b5f0ed3
8028109: compiler/codecache/CheckReservedInitialCodeCacheSizeArgOrder.java crashes in RT_Baseline
anoll
parents:
13047
diff
changeset
|
3358 |
7199 | 3359 Label done; |
3360 Label runtime; | |
3361 | |
3362 // Does store cross heap regions? | |
3363 | |
3364 movptr(tmp, store_addr); | |
3365 xorptr(tmp, new_val); | |
3366 shrptr(tmp, HeapRegion::LogOfHRGrainBytes); | |
3367 jcc(Assembler::equal, done); | |
3368 | |
3369 // crosses regions, storing NULL? | |
3370 | |
3371 cmpptr(new_val, (int32_t) NULL_WORD); | |
3372 jcc(Assembler::equal, done); | |
3373 | |
3374 // storing region crossing non-NULL, is card already dirty? | |
3375 | |
3376 const Register card_addr = tmp; | |
13424
61746b5f0ed3
8028109: compiler/codecache/CheckReservedInitialCodeCacheSizeArgOrder.java crashes in RT_Baseline
anoll
parents:
13047
diff
changeset
|
3377 const Register cardtable = tmp2; |
61746b5f0ed3
8028109: compiler/codecache/CheckReservedInitialCodeCacheSizeArgOrder.java crashes in RT_Baseline
anoll
parents:
13047
diff
changeset
|
3378 |
61746b5f0ed3
8028109: compiler/codecache/CheckReservedInitialCodeCacheSizeArgOrder.java crashes in RT_Baseline
anoll
parents:
13047
diff
changeset
|
3379 movptr(card_addr, store_addr); |
61746b5f0ed3
8028109: compiler/codecache/CheckReservedInitialCodeCacheSizeArgOrder.java crashes in RT_Baseline
anoll
parents:
13047
diff
changeset
|
3380 shrptr(card_addr, CardTableModRefBS::card_shift); |
61746b5f0ed3
8028109: compiler/codecache/CheckReservedInitialCodeCacheSizeArgOrder.java crashes in RT_Baseline
anoll
parents:
13047
diff
changeset
|
3381 // Do not use ExternalAddress to load 'byte_map_base', since 'byte_map_base' is NOT |
61746b5f0ed3
8028109: compiler/codecache/CheckReservedInitialCodeCacheSizeArgOrder.java crashes in RT_Baseline
anoll
parents:
13047
diff
changeset
|
3382 // a valid address and therefore is not properly handled by the relocation code. |
61746b5f0ed3
8028109: compiler/codecache/CheckReservedInitialCodeCacheSizeArgOrder.java crashes in RT_Baseline
anoll
parents:
13047
diff
changeset
|
3383 movptr(cardtable, (intptr_t)ct->byte_map_base); |
61746b5f0ed3
8028109: compiler/codecache/CheckReservedInitialCodeCacheSizeArgOrder.java crashes in RT_Baseline
anoll
parents:
13047
diff
changeset
|
3384 addptr(card_addr, cardtable); |
61746b5f0ed3
8028109: compiler/codecache/CheckReservedInitialCodeCacheSizeArgOrder.java crashes in RT_Baseline
anoll
parents:
13047
diff
changeset
|
3385 |
12835
69944b868a32
8014555: G1: Memory ordering problem with Conc refinement and card marking
mgerdin
parents:
12226
diff
changeset
|
3386 cmpb(Address(card_addr, 0), (int)G1SATBCardTableModRefBS::g1_young_card_val()); |
7199 | 3387 jcc(Assembler::equal, done); |
3388 | |
12835
69944b868a32
8014555: G1: Memory ordering problem with Conc refinement and card marking
mgerdin
parents:
12226
diff
changeset
|
3389 membar(Assembler::Membar_mask_bits(Assembler::StoreLoad)); |
69944b868a32
8014555: G1: Memory ordering problem with Conc refinement and card marking
mgerdin
parents:
12226
diff
changeset
|
3390 cmpb(Address(card_addr, 0), (int)CardTableModRefBS::dirty_card_val()); |
69944b868a32
8014555: G1: Memory ordering problem with Conc refinement and card marking
mgerdin
parents:
12226
diff
changeset
|
3391 jcc(Assembler::equal, done); |
69944b868a32
8014555: G1: Memory ordering problem with Conc refinement and card marking
mgerdin
parents:
12226
diff
changeset
|
3392 |
69944b868a32
8014555: G1: Memory ordering problem with Conc refinement and card marking
mgerdin
parents:
12226
diff
changeset
|
3393 |
7199 | 3394 // storing a region crossing, non-NULL oop, card is clean. |
3395 // dirty card and log. | |
3396 | |
12835
69944b868a32
8014555: G1: Memory ordering problem with Conc refinement and card marking
mgerdin
parents:
12226
diff
changeset
|
3397 movb(Address(card_addr, 0), (int)CardTableModRefBS::dirty_card_val()); |
7199 | 3398 |
3399 cmpl(queue_index, 0); | |
3400 jcc(Assembler::equal, runtime); | |
3401 subl(queue_index, wordSize); | |
3402 movptr(tmp2, buffer); | |
3403 #ifdef _LP64 | |
3404 movslq(rscratch1, queue_index); | |
3405 addq(tmp2, rscratch1); | |
3406 movq(Address(tmp2, 0), card_addr); | |
3407 #else | |
3408 addl(tmp2, queue_index); | |
13424
61746b5f0ed3
8028109: compiler/codecache/CheckReservedInitialCodeCacheSizeArgOrder.java crashes in RT_Baseline
anoll
parents:
13047
diff
changeset
|
3409 movl(Address(tmp2, 0), card_addr); |
7199 | 3410 #endif |
3411 jmp(done); | |
3412 | |
3413 bind(runtime); | |
3414 // save the live input values | |
3415 push(store_addr); | |
3416 push(new_val); | |
3417 #ifdef _LP64 | |
3418 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, r15_thread); | |
3419 #else | |
3420 push(thread); | |
3421 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread); | |
3422 pop(thread); | |
3423 #endif | |
3424 pop(new_val); | |
3425 pop(store_addr); | |
3426 | |
3427 bind(done); | |
3428 } | |
3429 | |
8001
db9981fd3124
8005915: Unify SERIALGC and INCLUDE_ALTERNATE_GCS
jprovino
parents:
7477
diff
changeset
|
3430 #endif // INCLUDE_ALL_GCS |
7199 | 3431 ////////////////////////////////////////////////////////////////////////////////// |
3432 | |
3433 | |
3434 void MacroAssembler::store_check(Register obj) { | |
3435 // Does a store check for the oop in register obj. The content of | |
3436 // register obj is destroyed afterwards. | |
3437 store_check_part_1(obj); | |
3438 store_check_part_2(obj); | |
3439 } | |
3440 | |
3441 void MacroAssembler::store_check(Register obj, Address dst) { | |
3442 store_check(obj); | |
3443 } | |
3444 | |
3445 | |
3446 // split the store check operation so that other instructions can be scheduled inbetween | |
3447 void MacroAssembler::store_check_part_1(Register obj) { | |
3448 BarrierSet* bs = Universe::heap()->barrier_set(); | |
3449 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); | |
3450 shrptr(obj, CardTableModRefBS::card_shift); | |
3451 } | |
3452 | |
3453 void MacroAssembler::store_check_part_2(Register obj) { | |
3454 BarrierSet* bs = Universe::heap()->barrier_set(); | |
3455 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); | |
3456 CardTableModRefBS* ct = (CardTableModRefBS*)bs; | |
3457 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); | |
3458 | |
3459 // The calculation for byte_map_base is as follows: | |
3460 // byte_map_base = _byte_map - (uintptr_t(low_bound) >> card_shift); | |
13424
61746b5f0ed3
8028109: compiler/codecache/CheckReservedInitialCodeCacheSizeArgOrder.java crashes in RT_Baseline
anoll
parents:
13047
diff
changeset
|
3461 // So this essentially converts an address to a displacement and it will |
61746b5f0ed3
8028109: compiler/codecache/CheckReservedInitialCodeCacheSizeArgOrder.java crashes in RT_Baseline
anoll
parents:
13047
diff
changeset
|
3462 // never need to be relocated. On 64bit however the value may be too |
61746b5f0ed3
8028109: compiler/codecache/CheckReservedInitialCodeCacheSizeArgOrder.java crashes in RT_Baseline
anoll
parents:
13047
diff
changeset
|
3463 // large for a 32bit displacement. |
7199 | 3464 intptr_t disp = (intptr_t) ct->byte_map_base; |
3465 if (is_simm32(disp)) { | |
3466 Address cardtable(noreg, obj, Address::times_1, disp); | |
3467 movb(cardtable, 0); | |
3468 } else { | |
13424
61746b5f0ed3
8028109: compiler/codecache/CheckReservedInitialCodeCacheSizeArgOrder.java crashes in RT_Baseline
anoll
parents:
13047
diff
changeset
|
3469 // By doing it as an ExternalAddress 'disp' could be converted to a rip-relative |
61746b5f0ed3
8028109: compiler/codecache/CheckReservedInitialCodeCacheSizeArgOrder.java crashes in RT_Baseline
anoll
parents:
13047
diff
changeset
|
3470 // displacement and done in a single instruction given favorable mapping and a |
61746b5f0ed3
8028109: compiler/codecache/CheckReservedInitialCodeCacheSizeArgOrder.java crashes in RT_Baseline
anoll
parents:
13047
diff
changeset
|
3471 // smarter version of as_Address. However, 'ExternalAddress' generates a relocation |
61746b5f0ed3
8028109: compiler/codecache/CheckReservedInitialCodeCacheSizeArgOrder.java crashes in RT_Baseline
anoll
parents:
13047
diff
changeset
|
3472 // entry and that entry is not properly handled by the relocation code. |
61746b5f0ed3
8028109: compiler/codecache/CheckReservedInitialCodeCacheSizeArgOrder.java crashes in RT_Baseline
anoll
parents:
13047
diff
changeset
|
3473 AddressLiteral cardtable((address)ct->byte_map_base, relocInfo::none); |
7199 | 3474 Address index(noreg, obj, Address::times_1); |
3475 movb(as_Address(ArrayAddress(cardtable, index)), 0); | |
3476 } | |
3477 } | |
3478 | |
3479 void MacroAssembler::subptr(Register dst, int32_t imm32) { | |
3480 LP64_ONLY(subq(dst, imm32)) NOT_LP64(subl(dst, imm32)); | |
3481 } | |
3482 | |
3483 // Force generation of a 4 byte immediate value even if it fits into 8bit | |
3484 void MacroAssembler::subptr_imm32(Register dst, int32_t imm32) { | |
3485 LP64_ONLY(subq_imm32(dst, imm32)) NOT_LP64(subl_imm32(dst, imm32)); | |
3486 } | |
3487 | |
3488 void MacroAssembler::subptr(Register dst, Register src) { | |
3489 LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src)); | |
3490 } | |
3491 | |
3492 // C++ bool manipulation | |
3493 void MacroAssembler::testbool(Register dst) { | |
3494 if(sizeof(bool) == 1) | |
3495 testb(dst, 0xff); | |
3496 else if(sizeof(bool) == 2) { | |
3497 // testw implementation needed for two byte bools | |
3498 ShouldNotReachHere(); | |
3499 } else if(sizeof(bool) == 4) | |
3500 testl(dst, dst); | |
3501 else | |
3502 // unsupported | |
3503 ShouldNotReachHere(); | |
3504 } | |
3505 | |
3506 void MacroAssembler::testptr(Register dst, Register src) { | |
3507 LP64_ONLY(testq(dst, src)) NOT_LP64(testl(dst, src)); | |
3508 } | |
3509 | |
3510 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes. | |
3511 void MacroAssembler::tlab_allocate(Register obj, | |
3512 Register var_size_in_bytes, | |
3513 int con_size_in_bytes, | |
3514 Register t1, | |
3515 Register t2, | |
3516 Label& slow_case) { | |
3517 assert_different_registers(obj, t1, t2); | |
3518 assert_different_registers(obj, var_size_in_bytes, t1); | |
3519 Register end = t2; | |
3520 Register thread = NOT_LP64(t1) LP64_ONLY(r15_thread); | |
3521 | |
3522 verify_tlab(); | |
3523 | |
3524 NOT_LP64(get_thread(thread)); | |
3525 | |
3526 movptr(obj, Address(thread, JavaThread::tlab_top_offset())); | |
3527 if (var_size_in_bytes == noreg) { | |
3528 lea(end, Address(obj, con_size_in_bytes)); | |
3529 } else { | |
3530 lea(end, Address(obj, var_size_in_bytes, Address::times_1)); | |
3531 } | |
3532 cmpptr(end, Address(thread, JavaThread::tlab_end_offset())); | |
3533 jcc(Assembler::above, slow_case); | |
3534 | |
3535 // update the tlab top pointer | |
3536 movptr(Address(thread, JavaThread::tlab_top_offset()), end); | |
3537 | |
3538 // recover var_size_in_bytes if necessary | |
3539 if (var_size_in_bytes == end) { | |
3540 subptr(var_size_in_bytes, obj); | |
3541 } | |
3542 verify_tlab(); | |
3543 } | |
3544 | |
3545 // Preserves rbx, and rdx. | |
3546 Register MacroAssembler::tlab_refill(Label& retry, | |
3547 Label& try_eden, | |
3548 Label& slow_case) { | |
3549 Register top = rax; | |
3550 Register t1 = rcx; | |
3551 Register t2 = rsi; | |
3552 Register thread_reg = NOT_LP64(rdi) LP64_ONLY(r15_thread); | |
3553 assert_different_registers(top, thread_reg, t1, t2, /* preserve: */ rbx, rdx); | |
3554 Label do_refill, discard_tlab; | |
3555 | |
3556 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { | |
3557 // No allocation in the shared eden. | |
3558 jmp(slow_case); | |
3559 } | |
3560 | |
3561 NOT_LP64(get_thread(thread_reg)); | |
3562 | |
3563 movptr(top, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); | |
3564 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset()))); | |
3565 | |
3566 // calculate amount of free space | |
3567 subptr(t1, top); | |
3568 shrptr(t1, LogHeapWordSize); | |
3569 | |
3570 // Retain tlab and allocate object in shared space if | |
3571 // the amount free in the tlab is too large to discard. | |
3572 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset()))); | |
3573 jcc(Assembler::lessEqual, discard_tlab); | |
3574 | |
3575 // Retain | |
3576 // %%% yuck as movptr... | |
3577 movptr(t2, (int32_t) ThreadLocalAllocBuffer::refill_waste_limit_increment()); | |
3578 addptr(Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())), t2); | |
3579 if (TLABStats) { | |
3580 // increment number of slow_allocations | |
3581 addl(Address(thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset())), 1); | |
3582 } | |
3583 jmp(try_eden); | |
3584 | |
3585 bind(discard_tlab); | |
3586 if (TLABStats) { | |
3587 // increment number of refills | |
3588 addl(Address(thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset())), 1); | |
3589 // accumulate wastage -- t1 is amount free in tlab | |
3590 addl(Address(thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset())), t1); | |
3591 } | |
3592 | |
3593 // if tlab is currently allocated (top or end != null) then | |
3594 // fill [top, end + alignment_reserve) with array object | |
3595 testptr(top, top); | |
3596 jcc(Assembler::zero, do_refill); | |
3597 | |
3598 // set up the mark word | |
3599 movptr(Address(top, oopDesc::mark_offset_in_bytes()), (intptr_t)markOopDesc::prototype()->copy_set_hash(0x2)); | |
3600 // set the length to the remaining space | |
3601 subptr(t1, typeArrayOopDesc::header_size(T_INT)); | |
3602 addptr(t1, (int32_t)ThreadLocalAllocBuffer::alignment_reserve()); | |
3603 shlptr(t1, log2_intptr(HeapWordSize/sizeof(jint))); | |
3604 movl(Address(top, arrayOopDesc::length_offset_in_bytes()), t1); | |
3605 // set klass to intArrayKlass | |
3606 // dubious reloc why not an oop reloc? | |
3607 movptr(t1, ExternalAddress((address)Universe::intArrayKlassObj_addr())); | |
3608 // store klass last. concurrent gcs assumes klass length is valid if | |
3609 // klass field is not null. | |
3610 store_klass(top, t1); | |
3611 | |
3612 movptr(t1, top); | |
3613 subptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset()))); | |
3614 incr_allocated_bytes(thread_reg, t1, 0); | |
3615 | |
3616 // refill the tlab with an eden allocation | |
3617 bind(do_refill); | |
3618 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset()))); | |
3619 shlptr(t1, LogHeapWordSize); | |
3620 // allocate new tlab, address returned in top | |
3621 eden_allocate(top, t1, 0, t2, slow_case); | |
3622 | |
3623 // Check that t1 was preserved in eden_allocate. | |
3624 #ifdef ASSERT | |
3625 if (UseTLAB) { | |
3626 Label ok; | |
3627 Register tsize = rsi; | |
3628 assert_different_registers(tsize, thread_reg, t1); | |
3629 push(tsize); | |
3630 movptr(tsize, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset()))); | |
3631 shlptr(tsize, LogHeapWordSize); | |
3632 cmpptr(t1, tsize); | |
3633 jcc(Assembler::equal, ok); | |
3634 STOP("assert(t1 != tlab size)"); | |
3635 should_not_reach_here(); | |
3636 | |
3637 bind(ok); | |
3638 pop(tsize); | |
3639 } | |
3640 #endif | |
3641 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())), top); | |
3642 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())), top); | |
3643 addptr(top, t1); | |
3644 subptr(top, (int32_t)ThreadLocalAllocBuffer::alignment_reserve_in_bytes()); | |
3645 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())), top); | |
3646 verify_tlab(); | |
3647 jmp(retry); | |
3648 | |
3649 return thread_reg; // for use by caller | |
3650 } | |
3651 | |
3652 void MacroAssembler::incr_allocated_bytes(Register thread, | |
3653 Register var_size_in_bytes, | |
3654 int con_size_in_bytes, | |
3655 Register t1) { | |
3656 if (!thread->is_valid()) { | |
3657 #ifdef _LP64 | |
3658 thread = r15_thread; | |
3659 #else | |
3660 assert(t1->is_valid(), "need temp reg"); | |
3661 thread = t1; | |
3662 get_thread(thread); | |
3663 #endif | |
3664 } | |
3665 | |
3666 #ifdef _LP64 | |
3667 if (var_size_in_bytes->is_valid()) { | |
3668 addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes); | |
3669 } else { | |
3670 addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes); | |
3671 } | |
3672 #else | |
3673 if (var_size_in_bytes->is_valid()) { | |
3674 addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes); | |
3675 } else { | |
3676 addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes); | |
3677 } | |
3678 adcl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())+4), 0); | |
3679 #endif | |
3680 } | |
3681 | |
3682 void MacroAssembler::fp_runtime_fallback(address runtime_entry, int nb_args, int num_fpu_regs_in_use) { | |
3683 pusha(); | |
3684 | |
3685 // if we are coming from c1, xmm registers may be live | |
3686 int off = 0; | |
3687 if (UseSSE == 1) { | |
3688 subptr(rsp, sizeof(jdouble)*8); | |
3689 movflt(Address(rsp,off++*sizeof(jdouble)),xmm0); | |
3690 movflt(Address(rsp,off++*sizeof(jdouble)),xmm1); | |
3691 movflt(Address(rsp,off++*sizeof(jdouble)),xmm2); | |
3692 movflt(Address(rsp,off++*sizeof(jdouble)),xmm3); | |
3693 movflt(Address(rsp,off++*sizeof(jdouble)),xmm4); | |
3694 movflt(Address(rsp,off++*sizeof(jdouble)),xmm5); | |
3695 movflt(Address(rsp,off++*sizeof(jdouble)),xmm6); | |
3696 movflt(Address(rsp,off++*sizeof(jdouble)),xmm7); | |
3697 } else if (UseSSE >= 2) { | |
3698 #ifdef COMPILER2 | |
3699 if (MaxVectorSize > 16) { | |
3700 assert(UseAVX > 0, "256bit vectors are supported only with AVX"); | |
3701 // Save upper half of YMM registes | |
3702 subptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8)); | |
3703 vextractf128h(Address(rsp, 0),xmm0); | |
3704 vextractf128h(Address(rsp, 16),xmm1); | |
3705 vextractf128h(Address(rsp, 32),xmm2); | |
3706 vextractf128h(Address(rsp, 48),xmm3); | |
3707 vextractf128h(Address(rsp, 64),xmm4); | |
3708 vextractf128h(Address(rsp, 80),xmm5); | |
3709 vextractf128h(Address(rsp, 96),xmm6); | |
3710 vextractf128h(Address(rsp,112),xmm7); | |
3711 #ifdef _LP64 | |
3712 vextractf128h(Address(rsp,128),xmm8); | |
3713 vextractf128h(Address(rsp,144),xmm9); | |
3714 vextractf128h(Address(rsp,160),xmm10); | |
3715 vextractf128h(Address(rsp,176),xmm11); | |
3716 vextractf128h(Address(rsp,192),xmm12); | |
3717 vextractf128h(Address(rsp,208),xmm13); | |
3718 vextractf128h(Address(rsp,224),xmm14); | |
3719 vextractf128h(Address(rsp,240),xmm15); | |
3720 #endif | |
3721 } | |
3722 #endif | |
3723 // Save whole 128bit (16 bytes) XMM regiters | |
3724 subptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8)); | |
3725 movdqu(Address(rsp,off++*16),xmm0); | |
3726 movdqu(Address(rsp,off++*16),xmm1); | |
3727 movdqu(Address(rsp,off++*16),xmm2); | |
3728 movdqu(Address(rsp,off++*16),xmm3); | |
3729 movdqu(Address(rsp,off++*16),xmm4); | |
3730 movdqu(Address(rsp,off++*16),xmm5); | |
3731 movdqu(Address(rsp,off++*16),xmm6); | |
3732 movdqu(Address(rsp,off++*16),xmm7); | |
3733 #ifdef _LP64 | |
3734 movdqu(Address(rsp,off++*16),xmm8); | |
3735 movdqu(Address(rsp,off++*16),xmm9); | |
3736 movdqu(Address(rsp,off++*16),xmm10); | |
3737 movdqu(Address(rsp,off++*16),xmm11); | |
3738 movdqu(Address(rsp,off++*16),xmm12); | |
3739 movdqu(Address(rsp,off++*16),xmm13); | |
3740 movdqu(Address(rsp,off++*16),xmm14); | |
3741 movdqu(Address(rsp,off++*16),xmm15); | |
3742 #endif | |
3743 } | |
3744 | |
3745 // Preserve registers across runtime call | |
3746 int incoming_argument_and_return_value_offset = -1; | |
3747 if (num_fpu_regs_in_use > 1) { | |
3748 // Must preserve all other FPU regs (could alternatively convert | |
3749 // SharedRuntime::dsin, dcos etc. into assembly routines known not to trash | |
3750 // FPU state, but can not trust C compiler) | |
3751 NEEDS_CLEANUP; | |
3752 // NOTE that in this case we also push the incoming argument(s) to | |
3753 // the stack and restore it later; we also use this stack slot to | |
3754 // hold the return value from dsin, dcos etc. | |
3755 for (int i = 0; i < num_fpu_regs_in_use; i++) { | |
3756 subptr(rsp, sizeof(jdouble)); | |
3757 fstp_d(Address(rsp, 0)); | |
3758 } | |
3759 incoming_argument_and_return_value_offset = sizeof(jdouble)*(num_fpu_regs_in_use-1); | |
3760 for (int i = nb_args-1; i >= 0; i--) { | |
3761 fld_d(Address(rsp, incoming_argument_and_return_value_offset-i*sizeof(jdouble))); | |
3762 } | |
3763 } | |
3764 | |
3765 subptr(rsp, nb_args*sizeof(jdouble)); | |
3766 for (int i = 0; i < nb_args; i++) { | |
3767 fstp_d(Address(rsp, i*sizeof(jdouble))); | |
3768 } | |
3769 | |
3770 #ifdef _LP64 | |
3771 if (nb_args > 0) { | |
3772 movdbl(xmm0, Address(rsp, 0)); | |
3773 } | |
3774 if (nb_args > 1) { | |
3775 movdbl(xmm1, Address(rsp, sizeof(jdouble))); | |
3776 } | |
3777 assert(nb_args <= 2, "unsupported number of args"); | |
3778 #endif // _LP64 | |
3779 | |
3780 // NOTE: we must not use call_VM_leaf here because that requires a | |
3781 // complete interpreter frame in debug mode -- same bug as 4387334 | |
3782 // MacroAssembler::call_VM_leaf_base is perfectly safe and will | |
3783 // do proper 64bit abi | |
3784 | |
3785 NEEDS_CLEANUP; | |
3786 // Need to add stack banging before this runtime call if it needs to | |
3787 // be taken; however, there is no generic stack banging routine at | |
3788 // the MacroAssembler level | |
3789 | |
3790 MacroAssembler::call_VM_leaf_base(runtime_entry, 0); | |
3791 | |
3792 #ifdef _LP64 | |
3793 movsd(Address(rsp, 0), xmm0); | |
3794 fld_d(Address(rsp, 0)); | |
3795 #endif // _LP64 | |
3796 addptr(rsp, sizeof(jdouble) * nb_args); | |
3797 if (num_fpu_regs_in_use > 1) { | |
3798 // Must save return value to stack and then restore entire FPU | |
3799 // stack except incoming arguments | |
3800 fstp_d(Address(rsp, incoming_argument_and_return_value_offset)); | |
3801 for (int i = 0; i < num_fpu_regs_in_use - nb_args; i++) { | |
3802 fld_d(Address(rsp, 0)); | |
3803 addptr(rsp, sizeof(jdouble)); | |
3804 } | |
3805 fld_d(Address(rsp, (nb_args-1)*sizeof(jdouble))); | |
3806 addptr(rsp, sizeof(jdouble) * nb_args); | |
3807 } | |
3808 | |
3809 off = 0; | |
3810 if (UseSSE == 1) { | |
3811 movflt(xmm0, Address(rsp,off++*sizeof(jdouble))); | |
3812 movflt(xmm1, Address(rsp,off++*sizeof(jdouble))); | |
3813 movflt(xmm2, Address(rsp,off++*sizeof(jdouble))); | |
3814 movflt(xmm3, Address(rsp,off++*sizeof(jdouble))); | |
3815 movflt(xmm4, Address(rsp,off++*sizeof(jdouble))); | |
3816 movflt(xmm5, Address(rsp,off++*sizeof(jdouble))); | |
3817 movflt(xmm6, Address(rsp,off++*sizeof(jdouble))); | |
3818 movflt(xmm7, Address(rsp,off++*sizeof(jdouble))); | |
3819 addptr(rsp, sizeof(jdouble)*8); | |
3820 } else if (UseSSE >= 2) { | |
3821 // Restore whole 128bit (16 bytes) XMM regiters | |
3822 movdqu(xmm0, Address(rsp,off++*16)); | |
3823 movdqu(xmm1, Address(rsp,off++*16)); | |
3824 movdqu(xmm2, Address(rsp,off++*16)); | |
3825 movdqu(xmm3, Address(rsp,off++*16)); | |
3826 movdqu(xmm4, Address(rsp,off++*16)); | |
3827 movdqu(xmm5, Address(rsp,off++*16)); | |
3828 movdqu(xmm6, Address(rsp,off++*16)); | |
3829 movdqu(xmm7, Address(rsp,off++*16)); | |
3830 #ifdef _LP64 | |
3831 movdqu(xmm8, Address(rsp,off++*16)); | |
3832 movdqu(xmm9, Address(rsp,off++*16)); | |
3833 movdqu(xmm10, Address(rsp,off++*16)); | |
3834 movdqu(xmm11, Address(rsp,off++*16)); | |
3835 movdqu(xmm12, Address(rsp,off++*16)); | |
3836 movdqu(xmm13, Address(rsp,off++*16)); | |
3837 movdqu(xmm14, Address(rsp,off++*16)); | |
3838 movdqu(xmm15, Address(rsp,off++*16)); | |
3839 #endif | |
3840 addptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8)); | |
3841 #ifdef COMPILER2 | |
3842 if (MaxVectorSize > 16) { | |
3843 // Restore upper half of YMM registes. | |
3844 vinsertf128h(xmm0, Address(rsp, 0)); | |
3845 vinsertf128h(xmm1, Address(rsp, 16)); | |
3846 vinsertf128h(xmm2, Address(rsp, 32)); | |
3847 vinsertf128h(xmm3, Address(rsp, 48)); | |
3848 vinsertf128h(xmm4, Address(rsp, 64)); | |
3849 vinsertf128h(xmm5, Address(rsp, 80)); | |
3850 vinsertf128h(xmm6, Address(rsp, 96)); | |
3851 vinsertf128h(xmm7, Address(rsp,112)); | |
3852 #ifdef _LP64 | |
3853 vinsertf128h(xmm8, Address(rsp,128)); | |
3854 vinsertf128h(xmm9, Address(rsp,144)); | |
3855 vinsertf128h(xmm10, Address(rsp,160)); | |
3856 vinsertf128h(xmm11, Address(rsp,176)); | |
3857 vinsertf128h(xmm12, Address(rsp,192)); | |
3858 vinsertf128h(xmm13, Address(rsp,208)); | |
3859 vinsertf128h(xmm14, Address(rsp,224)); | |
3860 vinsertf128h(xmm15, Address(rsp,240)); | |
3861 #endif | |
3862 addptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8)); | |
3863 } | |
3864 #endif | |
3865 } | |
3866 popa(); | |
3867 } | |
3868 | |
3869 static const double pi_4 = 0.7853981633974483; | |
3870 | |
3871 void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) { | |
3872 // A hand-coded argument reduction for values in fabs(pi/4, pi/2) | |
3873 // was attempted in this code; unfortunately it appears that the | |
3874 // switch to 80-bit precision and back causes this to be | |
3875 // unprofitable compared with simply performing a runtime call if | |
3876 // the argument is out of the (-pi/4, pi/4) range. | |
3877 | |
3878 Register tmp = noreg; | |
3879 if (!VM_Version::supports_cmov()) { | |
3880 // fcmp needs a temporary so preserve rbx, | |
3881 tmp = rbx; | |
3882 push(tmp); | |
3883 } | |
3884 | |
3885 Label slow_case, done; | |
3886 | |
3887 ExternalAddress pi4_adr = (address)&pi_4; | |
3888 if (reachable(pi4_adr)) { | |
3889 // x ?<= pi/4 | |
3890 fld_d(pi4_adr); | |
3891 fld_s(1); // Stack: X PI/4 X | |
3892 fabs(); // Stack: |X| PI/4 X | |
3893 fcmp(tmp); | |
3894 jcc(Assembler::above, slow_case); | |
3895 | |
3896 // fastest case: -pi/4 <= x <= pi/4 | |
3897 switch(trig) { | |
3898 case 's': | |
3899 fsin(); | |
3900 break; | |
3901 case 'c': | |
3902 fcos(); | |
3903 break; | |
3904 case 't': | |
3905 ftan(); | |
3906 break; | |
3907 default: | |
3908 assert(false, "bad intrinsic"); | |
3909 break; | |
3910 } | |
3911 jmp(done); | |
3912 } | |
3913 | |
3914 // slow case: runtime call | |
3915 bind(slow_case); | |
3916 | |
3917 switch(trig) { | |
3918 case 's': | |
3919 { | |
3920 fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), 1, num_fpu_regs_in_use); | |
3921 } | |
3922 break; | |
3923 case 'c': | |
3924 { | |
3925 fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), 1, num_fpu_regs_in_use); | |
3926 } | |
3927 break; | |
3928 case 't': | |
3929 { | |
3930 fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), 1, num_fpu_regs_in_use); | |
3931 } | |
3932 break; | |
3933 default: | |
3934 assert(false, "bad intrinsic"); | |
3935 break; | |
3936 } | |
3937 | |
3938 // Come here with result in F-TOS | |
3939 bind(done); | |
3940 | |
3941 if (tmp != noreg) { | |
3942 pop(tmp); | |
3943 } | |
3944 } | |
3945 | |
3946 | |
3947 // Look up the method for a megamorphic invokeinterface call. | |
3948 // The target method is determined by <intf_klass, itable_index>. | |
3949 // The receiver klass is in recv_klass. | |
3950 // On success, the result will be in method_result, and execution falls through. | |
3951 // On failure, execution transfers to the given label. | |
3952 void MacroAssembler::lookup_interface_method(Register recv_klass, | |
3953 Register intf_klass, | |
3954 RegisterOrConstant itable_index, | |
3955 Register method_result, | |
3956 Register scan_temp, | |
3957 Label& L_no_such_interface) { | |
3958 assert_different_registers(recv_klass, intf_klass, method_result, scan_temp); | |
3959 assert(itable_index.is_constant() || itable_index.as_register() == method_result, | |
3960 "caller must use same register for non-constant itable index as for method"); | |
3961 | |
3962 // Compute start of first itableOffsetEntry (which is at the end of the vtable) | |
3963 int vtable_base = InstanceKlass::vtable_start_offset() * wordSize; | |
3964 int itentry_off = itableMethodEntry::method_offset_in_bytes(); | |
3965 int scan_step = itableOffsetEntry::size() * wordSize; | |
3966 int vte_size = vtableEntry::size() * wordSize; | |
3967 Address::ScaleFactor times_vte_scale = Address::times_ptr; | |
3968 assert(vte_size == wordSize, "else adjust times_vte_scale"); | |
3969 | |
3970 movl(scan_temp, Address(recv_klass, InstanceKlass::vtable_length_offset() * wordSize)); | |
3971 | |
3972 // %%% Could store the aligned, prescaled offset in the klassoop. | |
3973 lea(scan_temp, Address(recv_klass, scan_temp, times_vte_scale, vtable_base)); | |
3974 if (HeapWordsPerLong > 1) { | |
3975 // Round up to align_object_offset boundary | |
3976 // see code for InstanceKlass::start_of_itable! | |
3977 round_to(scan_temp, BytesPerLong); | |
3978 } | |
3979 | |
3980 // Adjust recv_klass by scaled itable_index, so we can free itable_index. | |
3981 assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); | |
3982 lea(recv_klass, Address(recv_klass, itable_index, Address::times_ptr, itentry_off)); | |
3983 | |
3984 // for (scan = klass->itable(); scan->interface() != NULL; scan += scan_step) { | |
3985 // if (scan->interface() == intf) { | |
3986 // result = (klass + scan->offset() + itable_index); | |
3987 // } | |
3988 // } | |
3989 Label search, found_method; | |
3990 | |
3991 for (int peel = 1; peel >= 0; peel--) { | |
3992 movptr(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes())); | |
3993 cmpptr(intf_klass, method_result); | |
3994 | |
3995 if (peel) { | |
3996 jccb(Assembler::equal, found_method); | |
3997 } else { | |
3998 jccb(Assembler::notEqual, search); | |
3999 // (invert the test to fall through to found_method...) | |
4000 } | |
4001 | |
4002 if (!peel) break; | |
4003 | |
4004 bind(search); | |
4005 | |
4006 // Check that the previous entry is non-null. A null entry means that | |
4007 // the receiver class doesn't implement the interface, and wasn't the | |
4008 // same as when the caller was compiled. | |
4009 testptr(method_result, method_result); | |
4010 jcc(Assembler::zero, L_no_such_interface); | |
4011 addptr(scan_temp, scan_step); | |
4012 } | |
4013 | |
4014 bind(found_method); | |
4015 | |
4016 // Got a hit. | |
4017 movl(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes())); | |
4018 movptr(method_result, Address(recv_klass, scan_temp, Address::times_1)); | |
4019 } | |
4020 | |
4021 | |
4022 // virtual method calling | |
4023 void MacroAssembler::lookup_virtual_method(Register recv_klass, | |
4024 RegisterOrConstant vtable_index, | |
4025 Register method_result) { | |
4026 const int base = InstanceKlass::vtable_start_offset() * wordSize; | |
4027 assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below"); | |
4028 Address vtable_entry_addr(recv_klass, | |
4029 vtable_index, Address::times_ptr, | |
4030 base + vtableEntry::method_offset_in_bytes()); | |
4031 movptr(method_result, vtable_entry_addr); | |
4032 } | |
4033 | |
4034 | |
4035 void MacroAssembler::check_klass_subtype(Register sub_klass, | |
4036 Register super_klass, | |
4037 Register temp_reg, | |
4038 Label& L_success) { | |
4039 Label L_failure; | |
4040 check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, &L_success, &L_failure, NULL); | |
4041 check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL); | |
4042 bind(L_failure); | |
4043 } | |
4044 | |
4045 | |
4046 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, | |
4047 Register super_klass, | |
4048 Register temp_reg, | |
4049 Label* L_success, | |
4050 Label* L_failure, | |
4051 Label* L_slow_path, | |
4052 RegisterOrConstant super_check_offset) { | |
4053 assert_different_registers(sub_klass, super_klass, temp_reg); | |
4054 bool must_load_sco = (super_check_offset.constant_or_zero() == -1); | |
4055 if (super_check_offset.is_register()) { | |
4056 assert_different_registers(sub_klass, super_klass, | |
4057 super_check_offset.as_register()); | |
4058 } else if (must_load_sco) { | |
4059 assert(temp_reg != noreg, "supply either a temp or a register offset"); | |
4060 } | |
4061 | |
4062 Label L_fallthrough; | |
4063 int label_nulls = 0; | |
4064 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } | |
4065 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } | |
4066 if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } | |
4067 assert(label_nulls <= 1, "at most one NULL in the batch"); | |
4068 | |
4069 int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); | |
4070 int sco_offset = in_bytes(Klass::super_check_offset_offset()); | |
4071 Address super_check_offset_addr(super_klass, sco_offset); | |
4072 | |
4073 // Hacked jcc, which "knows" that L_fallthrough, at least, is in | |
4074 // range of a jccb. If this routine grows larger, reconsider at | |
4075 // least some of these. | |
4076 #define local_jcc(assembler_cond, label) \ | |
4077 if (&(label) == &L_fallthrough) jccb(assembler_cond, label); \ | |
4078 else jcc( assembler_cond, label) /*omit semi*/ | |
4079 | |
4080 // Hacked jmp, which may only be used just before L_fallthrough. | |
4081 #define final_jmp(label) \ | |
4082 if (&(label) == &L_fallthrough) { /*do nothing*/ } \ | |
4083 else jmp(label) /*omit semi*/ | |
4084 | |
4085 // If the pointers are equal, we are done (e.g., String[] elements). | |
4086 // This self-check enables sharing of secondary supertype arrays among | |
4087 // non-primary types such as array-of-interface. Otherwise, each such | |
4088 // type would need its own customized SSA. | |
4089 // We move this check to the front of the fast path because many | |
4090 // type checks are in fact trivially successful in this manner, | |
4091 // so we get a nicely predicted branch right at the start of the check. | |
4092 cmpptr(sub_klass, super_klass); | |
4093 local_jcc(Assembler::equal, *L_success); | |
4094 | |
4095 // Check the supertype display: | |
4096 if (must_load_sco) { | |
4097 // Positive movl does right thing on LP64. | |
4098 movl(temp_reg, super_check_offset_addr); | |
4099 super_check_offset = RegisterOrConstant(temp_reg); | |
4100 } | |
4101 Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0); | |
4102 cmpptr(super_klass, super_check_addr); // load displayed supertype | |
4103 | |
4104 // This check has worked decisively for primary supers. | |
4105 // Secondary supers are sought in the super_cache ('super_cache_addr'). | |
4106 // (Secondary supers are interfaces and very deeply nested subtypes.) | |
4107 // This works in the same check above because of a tricky aliasing | |
4108 // between the super_cache and the primary super display elements. | |
4109 // (The 'super_check_addr' can address either, as the case requires.) | |
4110 // Note that the cache is updated below if it does not help us find | |
4111 // what we need immediately. | |
4112 // So if it was a primary super, we can just fail immediately. | |
4113 // Otherwise, it's the slow path for us (no success at this point). | |
4114 | |
4115 if (super_check_offset.is_register()) { | |
4116 local_jcc(Assembler::equal, *L_success); | |
4117 cmpl(super_check_offset.as_register(), sc_offset); | |
4118 if (L_failure == &L_fallthrough) { | |
4119 local_jcc(Assembler::equal, *L_slow_path); | |
4120 } else { | |
4121 local_jcc(Assembler::notEqual, *L_failure); | |
4122 final_jmp(*L_slow_path); | |
4123 } | |
4124 } else if (super_check_offset.as_constant() == sc_offset) { | |
4125 // Need a slow path; fast failure is impossible. | |
4126 if (L_slow_path == &L_fallthrough) { | |
4127 local_jcc(Assembler::equal, *L_success); | |
4128 } else { | |
4129 local_jcc(Assembler::notEqual, *L_slow_path); | |
4130 final_jmp(*L_success); | |
4131 } | |
4132 } else { | |
4133 // No slow path; it's a fast decision. | |
4134 if (L_failure == &L_fallthrough) { | |
4135 local_jcc(Assembler::equal, *L_success); | |
4136 } else { | |
4137 local_jcc(Assembler::notEqual, *L_failure); | |
4138 final_jmp(*L_success); | |
4139 } | |
4140 } | |
4141 | |
4142 bind(L_fallthrough); | |
4143 | |
4144 #undef local_jcc | |
4145 #undef final_jmp | |
4146 } | |
4147 | |
4148 | |
4149 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, | |
4150 Register super_klass, | |
4151 Register temp_reg, | |
4152 Register temp2_reg, | |
4153 Label* L_success, | |
4154 Label* L_failure, | |
4155 bool set_cond_codes) { | |
4156 assert_different_registers(sub_klass, super_klass, temp_reg); | |
4157 if (temp2_reg != noreg) | |
4158 assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg); | |
4159 #define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg) | |
4160 | |
4161 Label L_fallthrough; | |
4162 int label_nulls = 0; | |
4163 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } | |
4164 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } | |
4165 assert(label_nulls <= 1, "at most one NULL in the batch"); | |
4166 | |
4167 // a couple of useful fields in sub_klass: | |
4168 int ss_offset = in_bytes(Klass::secondary_supers_offset()); | |
4169 int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); | |
4170 Address secondary_supers_addr(sub_klass, ss_offset); | |
4171 Address super_cache_addr( sub_klass, sc_offset); | |
4172 | |
4173 // Do a linear scan of the secondary super-klass chain. | |
4174 // This code is rarely used, so simplicity is a virtue here. | |
4175 // The repne_scan instruction uses fixed registers, which we must spill. | |
4176 // Don't worry too much about pre-existing connections with the input regs. | |
4177 | |
4178 assert(sub_klass != rax, "killed reg"); // killed by mov(rax, super) | |
4179 assert(sub_klass != rcx, "killed reg"); // killed by lea(rcx, &pst_counter) | |
4180 | |
4181 // Get super_klass value into rax (even if it was in rdi or rcx). | |
4182 bool pushed_rax = false, pushed_rcx = false, pushed_rdi = false; | |
4183 if (super_klass != rax || UseCompressedOops) { | |
4184 if (!IS_A_TEMP(rax)) { push(rax); pushed_rax = true; } | |
4185 mov(rax, super_klass); | |
4186 } | |
4187 if (!IS_A_TEMP(rcx)) { push(rcx); pushed_rcx = true; } | |
4188 if (!IS_A_TEMP(rdi)) { push(rdi); pushed_rdi = true; } | |
4189 | |
4190 #ifndef PRODUCT | |
4191 int* pst_counter = &SharedRuntime::_partial_subtype_ctr; | |
4192 ExternalAddress pst_counter_addr((address) pst_counter); | |
4193 NOT_LP64( incrementl(pst_counter_addr) ); | |
4194 LP64_ONLY( lea(rcx, pst_counter_addr) ); | |
4195 LP64_ONLY( incrementl(Address(rcx, 0)) ); | |
4196 #endif //PRODUCT | |
4197 | |
4198 // We will consult the secondary-super array. | |
4199 movptr(rdi, secondary_supers_addr); | |
4200 // Load the array length. (Positive movl does right thing on LP64.) | |
4201 movl(rcx, Address(rdi, Array<Klass*>::length_offset_in_bytes())); | |
4202 // Skip to start of data. | |
4203 addptr(rdi, Array<Klass*>::base_offset_in_bytes()); | |
4204 | |
4205 // Scan RCX words at [RDI] for an occurrence of RAX. | |
4206 // Set NZ/Z based on last compare. | |
4207 // Z flag value will not be set by 'repne' if RCX == 0 since 'repne' does | |
4208 // not change flags (only scas instruction which is repeated sets flags). | |
4209 // Set Z = 0 (not equal) before 'repne' to indicate that class was not found. | |
4210 | |
4211 testptr(rax,rax); // Set Z = 0 | |
4212 repne_scan(); | |
4213 | |
4214 // Unspill the temp. registers: | |
4215 if (pushed_rdi) pop(rdi); | |
4216 if (pushed_rcx) pop(rcx); | |
4217 if (pushed_rax) pop(rax); | |
4218 | |
4219 if (set_cond_codes) { | |
4220 // Special hack for the AD files: rdi is guaranteed non-zero. | |
4221 assert(!pushed_rdi, "rdi must be left non-NULL"); | |
4222 // Also, the condition codes are properly set Z/NZ on succeed/failure. | |
4223 } | |
4224 | |
4225 if (L_failure == &L_fallthrough) | |
4226 jccb(Assembler::notEqual, *L_failure); | |
4227 else jcc(Assembler::notEqual, *L_failure); | |
4228 | |
4229 // Success. Cache the super we found and proceed in triumph. | |
4230 movptr(super_cache_addr, super_klass); | |
4231 | |
4232 if (L_success != &L_fallthrough) { | |
4233 jmp(*L_success); | |
4234 } | |
4235 | |
4236 #undef IS_A_TEMP | |
4237 | |
4238 bind(L_fallthrough); | |
4239 } | |
4240 | |
4241 | |
4242 void MacroAssembler::cmov32(Condition cc, Register dst, Address src) { | |
4243 if (VM_Version::supports_cmov()) { | |
4244 cmovl(cc, dst, src); | |
4245 } else { | |
4246 Label L; | |
4247 jccb(negate_condition(cc), L); | |
4248 movl(dst, src); | |
4249 bind(L); | |
4250 } | |
4251 } | |
4252 | |
4253 void MacroAssembler::cmov32(Condition cc, Register dst, Register src) { | |
4254 if (VM_Version::supports_cmov()) { | |
4255 cmovl(cc, dst, src); | |
4256 } else { | |
4257 Label L; | |
4258 jccb(negate_condition(cc), L); | |
4259 movl(dst, src); | |
4260 bind(L); | |
4261 } | |
4262 } | |
4263 | |
4264 void MacroAssembler::verify_oop(Register reg, const char* s) { | |
4265 if (!VerifyOops) return; | |
4266 | |
4267 // Pass register number to verify_oop_subroutine | |
8767
a5de0cc2f91c
8008555: Debugging code in compiled method sometimes leaks memory
roland
parents:
8042
diff
changeset
|
4268 const char* b = NULL; |
a5de0cc2f91c
8008555: Debugging code in compiled method sometimes leaks memory
roland
parents:
8042
diff
changeset
|
4269 { |
a5de0cc2f91c
8008555: Debugging code in compiled method sometimes leaks memory
roland
parents:
8042
diff
changeset
|
4270 ResourceMark rm; |
a5de0cc2f91c
8008555: Debugging code in compiled method sometimes leaks memory
roland
parents:
8042
diff
changeset
|
4271 stringStream ss; |
a5de0cc2f91c
8008555: Debugging code in compiled method sometimes leaks memory
roland
parents:
8042
diff
changeset
|
4272 ss.print("verify_oop: %s: %s", reg->name(), s); |
a5de0cc2f91c
8008555: Debugging code in compiled method sometimes leaks memory
roland
parents:
8042
diff
changeset
|
4273 b = code_string(ss.as_string()); |
a5de0cc2f91c
8008555: Debugging code in compiled method sometimes leaks memory
roland
parents:
8042
diff
changeset
|
4274 } |
7199 | 4275 BLOCK_COMMENT("verify_oop {"); |
4276 #ifdef _LP64 | |
4277 push(rscratch1); // save r10, trashed by movptr() | |
4278 #endif | |
4279 push(rax); // save rax, | |
4280 push(reg); // pass register argument | |
4281 ExternalAddress buffer((address) b); | |
4282 // avoid using pushptr, as it modifies scratch registers | |
4283 // and our contract is not to modify anything | |
4284 movptr(rax, buffer.addr()); | |
4285 push(rax); | |
4286 // call indirectly to solve generation ordering problem | |
4287 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address())); | |
4288 call(rax); | |
4289 // Caller pops the arguments (oop, message) and restores rax, r10 | |
4290 BLOCK_COMMENT("} verify_oop"); | |
4291 } | |
4292 | |
4293 | |
4294 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, | |
4295 Register tmp, | |
4296 int offset) { | |
4297 intptr_t value = *delayed_value_addr; | |
4298 if (value != 0) | |
4299 return RegisterOrConstant(value + offset); | |
4300 | |
4301 // load indirectly to solve generation ordering problem | |
4302 movptr(tmp, ExternalAddress((address) delayed_value_addr)); | |
4303 | |
4304 #ifdef ASSERT | |
4305 { Label L; | |
4306 testptr(tmp, tmp); | |
4307 if (WizardMode) { | |
8767
a5de0cc2f91c
8008555: Debugging code in compiled method sometimes leaks memory
roland
parents:
8042
diff
changeset
|
4308 const char* buf = NULL; |
a5de0cc2f91c
8008555: Debugging code in compiled method sometimes leaks memory
roland
parents:
8042
diff
changeset
|
4309 { |
a5de0cc2f91c
8008555: Debugging code in compiled method sometimes leaks memory
roland
parents:
8042
diff
changeset
|
4310 ResourceMark rm; |
a5de0cc2f91c
8008555: Debugging code in compiled method sometimes leaks memory
roland
parents:
8042
diff
changeset
|
4311 stringStream ss; |
a5de0cc2f91c
8008555: Debugging code in compiled method sometimes leaks memory
roland
parents:
8042
diff
changeset
|
4312 ss.print("DelayedValue="INTPTR_FORMAT, delayed_value_addr[1]); |
a5de0cc2f91c
8008555: Debugging code in compiled method sometimes leaks memory
roland
parents:
8042
diff
changeset
|
4313 buf = code_string(ss.as_string()); |
a5de0cc2f91c
8008555: Debugging code in compiled method sometimes leaks memory
roland
parents:
8042
diff
changeset
|
4314 } |
7199 | 4315 jcc(Assembler::notZero, L); |
4316 STOP(buf); | |
4317 } else { | |
4318 jccb(Assembler::notZero, L); | |
4319 hlt(); | |
4320 } | |
4321 bind(L); | |
4322 } | |
4323 #endif | |
4324 | |
4325 if (offset != 0) | |
4326 addptr(tmp, offset); | |
4327 | |
4328 return RegisterOrConstant(tmp); | |
4329 } | |
4330 | |
4331 | |
4332 Address MacroAssembler::argument_address(RegisterOrConstant arg_slot, | |
4333 int extra_slot_offset) { | |
4334 // cf. TemplateTable::prepare_invoke(), if (load_receiver). | |
4335 int stackElementSize = Interpreter::stackElementSize; | |
4336 int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0); | |
4337 #ifdef ASSERT | |
4338 int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1); | |
4339 assert(offset1 - offset == stackElementSize, "correct arithmetic"); | |
4340 #endif | |
4341 Register scale_reg = noreg; | |
4342 Address::ScaleFactor scale_factor = Address::no_scale; | |
4343 if (arg_slot.is_constant()) { | |
4344 offset += arg_slot.as_constant() * stackElementSize; | |
4345 } else { | |
4346 scale_reg = arg_slot.as_register(); | |
4347 scale_factor = Address::times(stackElementSize); | |
4348 } | |
4349 offset += wordSize; // return PC is on stack | |
4350 return Address(rsp, scale_reg, scale_factor, offset); | |
4351 } | |
4352 | |
4353 | |
4354 void MacroAssembler::verify_oop_addr(Address addr, const char* s) { | |
4355 if (!VerifyOops) return; | |
4356 | |
4357 // Address adjust(addr.base(), addr.index(), addr.scale(), addr.disp() + BytesPerWord); | |
4358 // Pass register number to verify_oop_subroutine | |
8767
a5de0cc2f91c
8008555: Debugging code in compiled method sometimes leaks memory
roland
parents:
8042
diff
changeset
|
4359 const char* b = NULL; |
a5de0cc2f91c
8008555: Debugging code in compiled method sometimes leaks memory
roland
parents:
8042
diff
changeset
|
4360 { |
a5de0cc2f91c
8008555: Debugging code in compiled method sometimes leaks memory
roland
parents:
8042
diff
changeset
|
4361 ResourceMark rm; |
a5de0cc2f91c
8008555: Debugging code in compiled method sometimes leaks memory
roland
parents:
8042
diff
changeset
|
4362 stringStream ss; |
a5de0cc2f91c
8008555: Debugging code in compiled method sometimes leaks memory
roland
parents:
8042
diff
changeset
|
4363 ss.print("verify_oop_addr: %s", s); |
a5de0cc2f91c
8008555: Debugging code in compiled method sometimes leaks memory
roland
parents:
8042
diff
changeset
|
4364 b = code_string(ss.as_string()); |
a5de0cc2f91c
8008555: Debugging code in compiled method sometimes leaks memory
roland
parents:
8042
diff
changeset
|
4365 } |
7199 | 4366 #ifdef _LP64 |
4367 push(rscratch1); // save r10, trashed by movptr() | |
4368 #endif | |
4369 push(rax); // save rax, | |
4370 // addr may contain rsp so we will have to adjust it based on the push | |
4371 // we just did (and on 64 bit we do two pushes) | |
4372 // NOTE: 64bit seemed to have had a bug in that it did movq(addr, rax); which | |
4373 // stores rax into addr which is backwards of what was intended. | |
4374 if (addr.uses(rsp)) { | |
4375 lea(rax, addr); | |
4376 pushptr(Address(rax, LP64_ONLY(2 *) BytesPerWord)); | |
4377 } else { | |
4378 pushptr(addr); | |
4379 } | |
4380 | |
4381 ExternalAddress buffer((address) b); | |
4382 // pass msg argument | |
4383 // avoid using pushptr, as it modifies scratch registers | |
4384 // and our contract is not to modify anything | |
4385 movptr(rax, buffer.addr()); | |
4386 push(rax); | |
4387 | |
4388 // call indirectly to solve generation ordering problem | |
4389 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address())); | |
4390 call(rax); | |
4391 // Caller pops the arguments (addr, message) and restores rax, r10. | |
4392 } | |
4393 | |
4394 void MacroAssembler::verify_tlab() { | |
4395 #ifdef ASSERT | |
4396 if (UseTLAB && VerifyOops) { | |
4397 Label next, ok; | |
4398 Register t1 = rsi; | |
4399 Register thread_reg = NOT_LP64(rbx) LP64_ONLY(r15_thread); | |
4400 | |
4401 push(t1); | |
4402 NOT_LP64(push(thread_reg)); | |
4403 NOT_LP64(get_thread(thread_reg)); | |
4404 | |
4405 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); | |
4406 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset()))); | |
4407 jcc(Assembler::aboveEqual, next); | |
4408 STOP("assert(top >= start)"); | |
4409 should_not_reach_here(); | |
4410 | |
4411 bind(next); | |
4412 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset()))); | |
4413 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); | |
4414 jcc(Assembler::aboveEqual, ok); | |
4415 STOP("assert(top <= end)"); | |
4416 should_not_reach_here(); | |
4417 | |
4418 bind(ok); | |
4419 NOT_LP64(pop(thread_reg)); | |
4420 pop(t1); | |
4421 } | |
4422 #endif | |
4423 } | |
4424 | |
4425 class ControlWord { | |
4426 public: | |
4427 int32_t _value; | |
4428 | |
4429 int rounding_control() const { return (_value >> 10) & 3 ; } | |
4430 int precision_control() const { return (_value >> 8) & 3 ; } | |
4431 bool precision() const { return ((_value >> 5) & 1) != 0; } | |
4432 bool underflow() const { return ((_value >> 4) & 1) != 0; } | |
4433 bool overflow() const { return ((_value >> 3) & 1) != 0; } | |
4434 bool zero_divide() const { return ((_value >> 2) & 1) != 0; } | |
4435 bool denormalized() const { return ((_value >> 1) & 1) != 0; } | |
4436 bool invalid() const { return ((_value >> 0) & 1) != 0; } | |
4437 | |
4438 void print() const { | |
4439 // rounding control | |
4440 const char* rc; | |
4441 switch (rounding_control()) { | |
4442 case 0: rc = "round near"; break; | |
4443 case 1: rc = "round down"; break; | |
4444 case 2: rc = "round up "; break; | |
4445 case 3: rc = "chop "; break; | |
4446 }; | |
4447 // precision control | |
4448 const char* pc; | |
4449 switch (precision_control()) { | |
4450 case 0: pc = "24 bits "; break; | |
4451 case 1: pc = "reserved"; break; | |
4452 case 2: pc = "53 bits "; break; | |
4453 case 3: pc = "64 bits "; break; | |
4454 }; | |
4455 // flags | |
4456 char f[9]; | |
4457 f[0] = ' '; | |
4458 f[1] = ' '; | |
4459 f[2] = (precision ()) ? 'P' : 'p'; | |
4460 f[3] = (underflow ()) ? 'U' : 'u'; | |
4461 f[4] = (overflow ()) ? 'O' : 'o'; | |
4462 f[5] = (zero_divide ()) ? 'Z' : 'z'; | |
4463 f[6] = (denormalized()) ? 'D' : 'd'; | |
4464 f[7] = (invalid ()) ? 'I' : 'i'; | |
4465 f[8] = '\x0'; | |
4466 // output | |
4467 printf("%04x masks = %s, %s, %s", _value & 0xFFFF, f, rc, pc); | |
4468 } | |
4469 | |
4470 }; | |
4471 | |
4472 class StatusWord { | |
4473 public: | |
4474 int32_t _value; | |
4475 | |
4476 bool busy() const { return ((_value >> 15) & 1) != 0; } | |
4477 bool C3() const { return ((_value >> 14) & 1) != 0; } | |
4478 bool C2() const { return ((_value >> 10) & 1) != 0; } | |
4479 bool C1() const { return ((_value >> 9) & 1) != 0; } | |
4480 bool C0() const { return ((_value >> 8) & 1) != 0; } | |
4481 int top() const { return (_value >> 11) & 7 ; } | |
4482 bool error_status() const { return ((_value >> 7) & 1) != 0; } | |
4483 bool stack_fault() const { return ((_value >> 6) & 1) != 0; } | |
4484 bool precision() const { return ((_value >> 5) & 1) != 0; } | |
4485 bool underflow() const { return ((_value >> 4) & 1) != 0; } | |
4486 bool overflow() const { return ((_value >> 3) & 1) != 0; } | |
4487 bool zero_divide() const { return ((_value >> 2) & 1) != 0; } | |
4488 bool denormalized() const { return ((_value >> 1) & 1) != 0; } | |
4489 bool invalid() const { return ((_value >> 0) & 1) != 0; } | |
4490 | |
4491 void print() const { | |
4492 // condition codes | |
4493 char c[5]; | |
4494 c[0] = (C3()) ? '3' : '-'; | |
4495 c[1] = (C2()) ? '2' : '-'; | |
4496 c[2] = (C1()) ? '1' : '-'; | |
4497 c[3] = (C0()) ? '0' : '-'; | |
4498 c[4] = '\x0'; | |
4499 // flags | |
4500 char f[9]; | |
4501 f[0] = (error_status()) ? 'E' : '-'; | |
4502 f[1] = (stack_fault ()) ? 'S' : '-'; | |
4503 f[2] = (precision ()) ? 'P' : '-'; | |
4504 f[3] = (underflow ()) ? 'U' : '-'; | |
4505 f[4] = (overflow ()) ? 'O' : '-'; | |
4506 f[5] = (zero_divide ()) ? 'Z' : '-'; | |
4507 f[6] = (denormalized()) ? 'D' : '-'; | |
4508 f[7] = (invalid ()) ? 'I' : '-'; | |
4509 f[8] = '\x0'; | |
4510 // output | |
4511 printf("%04x flags = %s, cc = %s, top = %d", _value & 0xFFFF, f, c, top()); | |
4512 } | |
4513 | |
4514 }; | |
4515 | |
4516 class TagWord { | |
4517 public: | |
4518 int32_t _value; | |
4519 | |
4520 int tag_at(int i) const { return (_value >> (i*2)) & 3; } | |
4521 | |
4522 void print() const { | |
4523 printf("%04x", _value & 0xFFFF); | |
4524 } | |
4525 | |
4526 }; | |
4527 | |
4528 class FPU_Register { | |
4529 public: | |
4530 int32_t _m0; | |
4531 int32_t _m1; | |
4532 int16_t _ex; | |
4533 | |
4534 bool is_indefinite() const { | |
4535 return _ex == -1 && _m1 == (int32_t)0xC0000000 && _m0 == 0; | |
4536 } | |
4537 | |
4538 void print() const { | |
4539 char sign = (_ex < 0) ? '-' : '+'; | |
4540 const char* kind = (_ex == 0x7FFF || _ex == (int16_t)-1) ? "NaN" : " "; | |
4541 printf("%c%04hx.%08x%08x %s", sign, _ex, _m1, _m0, kind); | |
4542 }; | |
4543 | |
4544 }; | |
4545 | |
4546 class FPU_State { | |
4547 public: | |
4548 enum { | |
4549 register_size = 10, | |
4550 number_of_registers = 8, | |
4551 register_mask = 7 | |
4552 }; | |
4553 | |
4554 ControlWord _control_word; | |
4555 StatusWord _status_word; | |
4556 TagWord _tag_word; | |
4557 int32_t _error_offset; | |
4558 int32_t _error_selector; | |
4559 int32_t _data_offset; | |
4560 int32_t _data_selector; | |
4561 int8_t _register[register_size * number_of_registers]; | |
4562 | |
4563 int tag_for_st(int i) const { return _tag_word.tag_at((_status_word.top() + i) & register_mask); } | |
4564 FPU_Register* st(int i) const { return (FPU_Register*)&_register[register_size * i]; } | |
4565 | |
4566 const char* tag_as_string(int tag) const { | |
4567 switch (tag) { | |
4568 case 0: return "valid"; | |
4569 case 1: return "zero"; | |
4570 case 2: return "special"; | |
4571 case 3: return "empty"; | |
4572 } | |
4573 ShouldNotReachHere(); | |
4574 return NULL; | |
4575 } | |
4576 | |
4577 void print() const { | |
4578 // print computation registers | |
4579 { int t = _status_word.top(); | |
4580 for (int i = 0; i < number_of_registers; i++) { | |
4581 int j = (i - t) & register_mask; | |
4582 printf("%c r%d = ST%d = ", (j == 0 ? '*' : ' '), i, j); | |
4583 st(j)->print(); | |
4584 printf(" %s\n", tag_as_string(_tag_word.tag_at(i))); | |
4585 } | |
4586 } | |
4587 printf("\n"); | |
4588 // print control registers | |
4589 printf("ctrl = "); _control_word.print(); printf("\n"); | |
4590 printf("stat = "); _status_word .print(); printf("\n"); | |
4591 printf("tags = "); _tag_word .print(); printf("\n"); | |
4592 } | |
4593 | |
4594 }; | |
4595 | |
4596 class Flag_Register { | |
4597 public: | |
4598 int32_t _value; | |
4599 | |
4600 bool overflow() const { return ((_value >> 11) & 1) != 0; } | |
4601 bool direction() const { return ((_value >> 10) & 1) != 0; } | |
4602 bool sign() const { return ((_value >> 7) & 1) != 0; } | |
4603 bool zero() const { return ((_value >> 6) & 1) != 0; } | |
4604 bool auxiliary_carry() const { return ((_value >> 4) & 1) != 0; } | |
4605 bool parity() const { return ((_value >> 2) & 1) != 0; } | |
4606 bool carry() const { return ((_value >> 0) & 1) != 0; } | |
4607 | |
4608 void print() const { | |
4609 // flags | |
4610 char f[8]; | |
4611 f[0] = (overflow ()) ? 'O' : '-'; | |
4612 f[1] = (direction ()) ? 'D' : '-'; | |
4613 f[2] = (sign ()) ? 'S' : '-'; | |
4614 f[3] = (zero ()) ? 'Z' : '-'; | |
4615 f[4] = (auxiliary_carry()) ? 'A' : '-'; | |
4616 f[5] = (parity ()) ? 'P' : '-'; | |
4617 f[6] = (carry ()) ? 'C' : '-'; | |
4618 f[7] = '\x0'; | |
4619 // output | |
4620 printf("%08x flags = %s", _value, f); | |
4621 } | |
4622 | |
4623 }; | |
4624 | |
4625 class IU_Register { | |
4626 public: | |
4627 int32_t _value; | |
4628 | |
4629 void print() const { | |
4630 printf("%08x %11d", _value, _value); | |
4631 } | |
4632 | |
4633 }; | |
4634 | |
4635 class IU_State { | |
4636 public: | |
4637 Flag_Register _eflags; | |
4638 IU_Register _rdi; | |
4639 IU_Register _rsi; | |
4640 IU_Register _rbp; | |
4641 IU_Register _rsp; | |
4642 IU_Register _rbx; | |
4643 IU_Register _rdx; | |
4644 IU_Register _rcx; | |
4645 IU_Register _rax; | |
4646 | |
4647 void print() const { | |
4648 // computation registers | |
4649 printf("rax, = "); _rax.print(); printf("\n"); | |
4650 printf("rbx, = "); _rbx.print(); printf("\n"); | |
4651 printf("rcx = "); _rcx.print(); printf("\n"); | |
4652 printf("rdx = "); _rdx.print(); printf("\n"); | |
4653 printf("rdi = "); _rdi.print(); printf("\n"); | |
4654 printf("rsi = "); _rsi.print(); printf("\n"); | |
4655 printf("rbp, = "); _rbp.print(); printf("\n"); | |
4656 printf("rsp = "); _rsp.print(); printf("\n"); | |
4657 printf("\n"); | |
4658 // control registers | |
4659 printf("flgs = "); _eflags.print(); printf("\n"); | |
4660 } | |
4661 }; | |
4662 | |
4663 | |
4664 class CPU_State { | |
4665 public: | |
4666 FPU_State _fpu_state; | |
4667 IU_State _iu_state; | |
4668 | |
4669 void print() const { | |
4670 printf("--------------------------------------------------\n"); | |
4671 _iu_state .print(); | |
4672 printf("\n"); | |
4673 _fpu_state.print(); | |
4674 printf("--------------------------------------------------\n"); | |
4675 } | |
4676 | |
4677 }; | |
4678 | |
4679 | |
4680 static void _print_CPU_state(CPU_State* state) { | |
4681 state->print(); | |
4682 }; | |
4683 | |
4684 | |
4685 void MacroAssembler::print_CPU_state() { | |
4686 push_CPU_state(); | |
4687 push(rsp); // pass CPU state | |
4688 call(RuntimeAddress(CAST_FROM_FN_PTR(address, _print_CPU_state))); | |
4689 addptr(rsp, wordSize); // discard argument | |
4690 pop_CPU_state(); | |
4691 } | |
4692 | |
4693 | |
4694 static bool _verify_FPU(int stack_depth, char* s, CPU_State* state) { | |
4695 static int counter = 0; | |
4696 FPU_State* fs = &state->_fpu_state; | |
4697 counter++; | |
4698 // For leaf calls, only verify that the top few elements remain empty. | |
4699 // We only need 1 empty at the top for C2 code. | |
4700 if( stack_depth < 0 ) { | |
4701 if( fs->tag_for_st(7) != 3 ) { | |
4702 printf("FPR7 not empty\n"); | |
4703 state->print(); | |
4704 assert(false, "error"); | |
4705 return false; | |
4706 } | |
4707 return true; // All other stack states do not matter | |
4708 } | |
4709 | |
4710 assert((fs->_control_word._value & 0xffff) == StubRoutines::_fpu_cntrl_wrd_std, | |
4711 "bad FPU control word"); | |
4712 | |
4713 // compute stack depth | |
4714 int i = 0; | |
4715 while (i < FPU_State::number_of_registers && fs->tag_for_st(i) < 3) i++; | |
4716 int d = i; | |
4717 while (i < FPU_State::number_of_registers && fs->tag_for_st(i) == 3) i++; | |
4718 // verify findings | |
4719 if (i != FPU_State::number_of_registers) { | |
4720 // stack not contiguous | |
4721 printf("%s: stack not contiguous at ST%d\n", s, i); | |
4722 state->print(); | |
4723 assert(false, "error"); | |
4724 return false; | |
4725 } | |
4726 // check if computed stack depth corresponds to expected stack depth | |
4727 if (stack_depth < 0) { | |
4728 // expected stack depth is -stack_depth or less | |
4729 if (d > -stack_depth) { | |
4730 // too many elements on the stack | |
4731 printf("%s: <= %d stack elements expected but found %d\n", s, -stack_depth, d); | |
4732 state->print(); | |
4733 assert(false, "error"); | |
4734 return false; | |
4735 } | |
4736 } else { | |
4737 // expected stack depth is stack_depth | |
4738 if (d != stack_depth) { | |
4739 // wrong stack depth | |
4740 printf("%s: %d stack elements expected but found %d\n", s, stack_depth, d); | |
4741 state->print(); | |
4742 assert(false, "error"); | |
4743 return false; | |
4744 } | |
4745 } | |
4746 // everything is cool | |
4747 return true; | |
4748 } | |
4749 | |
4750 | |
4751 void MacroAssembler::verify_FPU(int stack_depth, const char* s) { | |
4752 if (!VerifyFPU) return; | |
4753 push_CPU_state(); | |
4754 push(rsp); // pass CPU state | |
4755 ExternalAddress msg((address) s); | |
4756 // pass message string s | |
4757 pushptr(msg.addr()); | |
4758 push(stack_depth); // pass stack depth | |
4759 call(RuntimeAddress(CAST_FROM_FN_PTR(address, _verify_FPU))); | |
4760 addptr(rsp, 3 * wordSize); // discard arguments | |
4761 // check for error | |
4762 { Label L; | |
4763 testl(rax, rax); | |
4764 jcc(Assembler::notZero, L); | |
4765 int3(); // break if error condition | |
4766 bind(L); | |
4767 } | |
4768 pop_CPU_state(); | |
4769 } | |
4770 | |
8873
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
4771 void MacroAssembler::restore_cpu_control_state_after_jni() { |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
4772 // Either restore the MXCSR register after returning from the JNI Call |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
4773 // or verify that it wasn't changed (with -Xcheck:jni flag). |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
4774 if (VM_Version::supports_sse()) { |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
4775 if (RestoreMXCSROnJNICalls) { |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
4776 ldmxcsr(ExternalAddress(StubRoutines::addr_mxcsr_std())); |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
4777 } else if (CheckJNICalls) { |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
4778 call(RuntimeAddress(StubRoutines::x86::verify_mxcsr_entry())); |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
4779 } |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
4780 } |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
4781 if (VM_Version::supports_avx()) { |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
4782 // Clear upper bits of YMM registers to avoid SSE <-> AVX transition penalty. |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
4783 vzeroupper(); |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
4784 } |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
4785 |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
4786 #ifndef _LP64 |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
4787 // Either restore the x87 floating pointer control word after returning |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
4788 // from the JNI call or verify that it wasn't changed. |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
4789 if (CheckJNICalls) { |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
4790 call(RuntimeAddress(StubRoutines::x86::verify_fpu_cntrl_wrd_entry())); |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
4791 } |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
4792 #endif // _LP64 |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
4793 } |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
4794 |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
4795 |
7199 | 4796 void MacroAssembler::load_klass(Register dst, Register src) { |
4797 #ifdef _LP64 | |
12226
7944aba7ba41
8015107: NPG: Use consistent naming for metaspace concepts
ehelin
parents:
12056
diff
changeset
|
4798 if (UseCompressedClassPointers) { |
7199 | 4799 movl(dst, Address(src, oopDesc::klass_offset_in_bytes())); |
4800 decode_klass_not_null(dst); | |
4801 } else | |
4802 #endif | |
4803 movptr(dst, Address(src, oopDesc::klass_offset_in_bytes())); | |
4804 } | |
4805 | |
4806 void MacroAssembler::load_prototype_header(Register dst, Register src) { | |
12056
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
4807 load_klass(dst, src); |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
4808 movptr(dst, Address(dst, Klass::prototype_header_offset())); |
7199 | 4809 } |
4810 | |
4811 void MacroAssembler::store_klass(Register dst, Register src) { | |
4812 #ifdef _LP64 | |
12226
7944aba7ba41
8015107: NPG: Use consistent naming for metaspace concepts
ehelin
parents:
12056
diff
changeset
|
4813 if (UseCompressedClassPointers) { |
7199 | 4814 encode_klass_not_null(src); |
4815 movl(Address(dst, oopDesc::klass_offset_in_bytes()), src); | |
4816 } else | |
4817 #endif | |
4818 movptr(Address(dst, oopDesc::klass_offset_in_bytes()), src); | |
4819 } | |
4820 | |
4821 void MacroAssembler::load_heap_oop(Register dst, Address src) { | |
4822 #ifdef _LP64 | |
4823 // FIXME: Must change all places where we try to load the klass. | |
4824 if (UseCompressedOops) { | |
4825 movl(dst, src); | |
4826 decode_heap_oop(dst); | |
4827 } else | |
4828 #endif | |
4829 movptr(dst, src); | |
4830 } | |
4831 | |
4832 // Doesn't do verfication, generates fixed size code | |
4833 void MacroAssembler::load_heap_oop_not_null(Register dst, Address src) { | |
4834 #ifdef _LP64 | |
4835 if (UseCompressedOops) { | |
4836 movl(dst, src); | |
4837 decode_heap_oop_not_null(dst); | |
4838 } else | |
4839 #endif | |
4840 movptr(dst, src); | |
4841 } | |
4842 | |
4843 void MacroAssembler::store_heap_oop(Address dst, Register src) { | |
4844 #ifdef _LP64 | |
4845 if (UseCompressedOops) { | |
4846 assert(!dst.uses(src), "not enough registers"); | |
4847 encode_heap_oop(src); | |
4848 movl(dst, src); | |
4849 } else | |
4850 #endif | |
4851 movptr(dst, src); | |
4852 } | |
4853 | |
4854 void MacroAssembler::cmp_heap_oop(Register src1, Address src2, Register tmp) { | |
4855 assert_different_registers(src1, tmp); | |
4856 #ifdef _LP64 | |
4857 if (UseCompressedOops) { | |
4858 bool did_push = false; | |
4859 if (tmp == noreg) { | |
4860 tmp = rax; | |
4861 push(tmp); | |
4862 did_push = true; | |
4863 assert(!src2.uses(rsp), "can't push"); | |
4864 } | |
4865 load_heap_oop(tmp, src2); | |
4866 cmpptr(src1, tmp); | |
4867 if (did_push) pop(tmp); | |
4868 } else | |
4869 #endif | |
4870 cmpptr(src1, src2); | |
4871 } | |
4872 | |
4873 // Used for storing NULLs. | |
4874 void MacroAssembler::store_heap_oop_null(Address dst) { | |
4875 #ifdef _LP64 | |
4876 if (UseCompressedOops) { | |
4877 movl(dst, (int32_t)NULL_WORD); | |
4878 } else { | |
4879 movslq(dst, (int32_t)NULL_WORD); | |
4880 } | |
4881 #else | |
4882 movl(dst, (int32_t)NULL_WORD); | |
4883 #endif | |
4884 } | |
4885 | |
4886 #ifdef _LP64 | |
4887 void MacroAssembler::store_klass_gap(Register dst, Register src) { | |
12226
7944aba7ba41
8015107: NPG: Use consistent naming for metaspace concepts
ehelin
parents:
12056
diff
changeset
|
4888 if (UseCompressedClassPointers) { |
7199 | 4889 // Store to klass gap in destination |
4890 movl(Address(dst, oopDesc::klass_gap_offset_in_bytes()), src); | |
4891 } | |
4892 } | |
4893 | |
4894 #ifdef ASSERT | |
4895 void MacroAssembler::verify_heapbase(const char* msg) { | |
12056
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
4896 assert (UseCompressedOops, "should be compressed"); |
7199 | 4897 assert (Universe::heap() != NULL, "java heap should be initialized"); |
4898 if (CheckCompressedOops) { | |
4899 Label ok; | |
4900 push(rscratch1); // cmpptr trashes rscratch1 | |
4901 cmpptr(r12_heapbase, ExternalAddress((address)Universe::narrow_ptrs_base_addr())); | |
4902 jcc(Assembler::equal, ok); | |
4903 STOP(msg); | |
4904 bind(ok); | |
4905 pop(rscratch1); | |
4906 } | |
4907 } | |
4908 #endif | |
4909 | |
4910 // Algorithm must match oop.inline.hpp encode_heap_oop. | |
4911 void MacroAssembler::encode_heap_oop(Register r) { | |
4912 #ifdef ASSERT | |
4913 verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?"); | |
4914 #endif | |
4915 verify_oop(r, "broken oop in encode_heap_oop"); | |
4916 if (Universe::narrow_oop_base() == NULL) { | |
4917 if (Universe::narrow_oop_shift() != 0) { | |
4918 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); | |
4919 shrq(r, LogMinObjAlignmentInBytes); | |
4920 } | |
4921 return; | |
4922 } | |
4923 testq(r, r); | |
4924 cmovq(Assembler::equal, r, r12_heapbase); | |
4925 subq(r, r12_heapbase); | |
4926 shrq(r, LogMinObjAlignmentInBytes); | |
4927 } | |
4928 | |
4929 void MacroAssembler::encode_heap_oop_not_null(Register r) { | |
4930 #ifdef ASSERT | |
4931 verify_heapbase("MacroAssembler::encode_heap_oop_not_null: heap base corrupted?"); | |
4932 if (CheckCompressedOops) { | |
4933 Label ok; | |
4934 testq(r, r); | |
4935 jcc(Assembler::notEqual, ok); | |
4936 STOP("null oop passed to encode_heap_oop_not_null"); | |
4937 bind(ok); | |
4938 } | |
4939 #endif | |
4940 verify_oop(r, "broken oop in encode_heap_oop_not_null"); | |
4941 if (Universe::narrow_oop_base() != NULL) { | |
4942 subq(r, r12_heapbase); | |
4943 } | |
4944 if (Universe::narrow_oop_shift() != 0) { | |
4945 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); | |
4946 shrq(r, LogMinObjAlignmentInBytes); | |
4947 } | |
4948 } | |
4949 | |
4950 void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) { | |
4951 #ifdef ASSERT | |
4952 verify_heapbase("MacroAssembler::encode_heap_oop_not_null2: heap base corrupted?"); | |
4953 if (CheckCompressedOops) { | |
4954 Label ok; | |
4955 testq(src, src); | |
4956 jcc(Assembler::notEqual, ok); | |
4957 STOP("null oop passed to encode_heap_oop_not_null2"); | |
4958 bind(ok); | |
4959 } | |
4960 #endif | |
4961 verify_oop(src, "broken oop in encode_heap_oop_not_null2"); | |
4962 if (dst != src) { | |
4963 movq(dst, src); | |
4964 } | |
4965 if (Universe::narrow_oop_base() != NULL) { | |
4966 subq(dst, r12_heapbase); | |
4967 } | |
4968 if (Universe::narrow_oop_shift() != 0) { | |
4969 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); | |
4970 shrq(dst, LogMinObjAlignmentInBytes); | |
4971 } | |
4972 } | |
4973 | |
4974 void MacroAssembler::decode_heap_oop(Register r) { | |
4975 #ifdef ASSERT | |
4976 verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?"); | |
4977 #endif | |
4978 if (Universe::narrow_oop_base() == NULL) { | |
4979 if (Universe::narrow_oop_shift() != 0) { | |
4980 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); | |
4981 shlq(r, LogMinObjAlignmentInBytes); | |
4982 } | |
4983 } else { | |
4984 Label done; | |
4985 shlq(r, LogMinObjAlignmentInBytes); | |
4986 jccb(Assembler::equal, done); | |
4987 addq(r, r12_heapbase); | |
4988 bind(done); | |
4989 } | |
4990 verify_oop(r, "broken oop in decode_heap_oop"); | |
4991 } | |
4992 | |
4993 void MacroAssembler::decode_heap_oop_not_null(Register r) { | |
4994 // Note: it will change flags | |
4995 assert (UseCompressedOops, "should only be used for compressed headers"); | |
4996 assert (Universe::heap() != NULL, "java heap should be initialized"); | |
4997 // Cannot assert, unverified entry point counts instructions (see .ad file) | |
4998 // vtableStubs also counts instructions in pd_code_size_limit. | |
4999 // Also do not verify_oop as this is called by verify_oop. | |
5000 if (Universe::narrow_oop_shift() != 0) { | |
5001 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); | |
5002 shlq(r, LogMinObjAlignmentInBytes); | |
5003 if (Universe::narrow_oop_base() != NULL) { | |
5004 addq(r, r12_heapbase); | |
5005 } | |
5006 } else { | |
5007 assert (Universe::narrow_oop_base() == NULL, "sanity"); | |
5008 } | |
5009 } | |
5010 | |
5011 void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { | |
5012 // Note: it will change flags | |
5013 assert (UseCompressedOops, "should only be used for compressed headers"); | |
5014 assert (Universe::heap() != NULL, "java heap should be initialized"); | |
5015 // Cannot assert, unverified entry point counts instructions (see .ad file) | |
5016 // vtableStubs also counts instructions in pd_code_size_limit. | |
5017 // Also do not verify_oop as this is called by verify_oop. | |
5018 if (Universe::narrow_oop_shift() != 0) { | |
5019 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); | |
5020 if (LogMinObjAlignmentInBytes == Address::times_8) { | |
5021 leaq(dst, Address(r12_heapbase, src, Address::times_8, 0)); | |
5022 } else { | |
5023 if (dst != src) { | |
5024 movq(dst, src); | |
5025 } | |
5026 shlq(dst, LogMinObjAlignmentInBytes); | |
5027 if (Universe::narrow_oop_base() != NULL) { | |
5028 addq(dst, r12_heapbase); | |
5029 } | |
5030 } | |
5031 } else { | |
5032 assert (Universe::narrow_oop_base() == NULL, "sanity"); | |
5033 if (dst != src) { | |
5034 movq(dst, src); | |
5035 } | |
5036 } | |
5037 } | |
5038 | |
5039 void MacroAssembler::encode_klass_not_null(Register r) { | |
13000
209aa13ab8c0
8024927: Nashorn performance regression with CompressedOops
coleenp
parents:
12835
diff
changeset
|
5040 if (Universe::narrow_klass_base() != NULL) { |
209aa13ab8c0
8024927: Nashorn performance regression with CompressedOops
coleenp
parents:
12835
diff
changeset
|
5041 // Use r12 as a scratch register in which to temporarily load the narrow_klass_base. |
209aa13ab8c0
8024927: Nashorn performance regression with CompressedOops
coleenp
parents:
12835
diff
changeset
|
5042 assert(r != r12_heapbase, "Encoding a klass in r12"); |
209aa13ab8c0
8024927: Nashorn performance regression with CompressedOops
coleenp
parents:
12835
diff
changeset
|
5043 mov64(r12_heapbase, (int64_t)Universe::narrow_klass_base()); |
209aa13ab8c0
8024927: Nashorn performance regression with CompressedOops
coleenp
parents:
12835
diff
changeset
|
5044 subq(r, r12_heapbase); |
209aa13ab8c0
8024927: Nashorn performance regression with CompressedOops
coleenp
parents:
12835
diff
changeset
|
5045 } |
7199 | 5046 if (Universe::narrow_klass_shift() != 0) { |
5047 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); | |
5048 shrq(r, LogKlassAlignmentInBytes); | |
5049 } | |
13000
209aa13ab8c0
8024927: Nashorn performance regression with CompressedOops
coleenp
parents:
12835
diff
changeset
|
5050 if (Universe::narrow_klass_base() != NULL) { |
209aa13ab8c0
8024927: Nashorn performance regression with CompressedOops
coleenp
parents:
12835
diff
changeset
|
5051 reinit_heapbase(); |
209aa13ab8c0
8024927: Nashorn performance regression with CompressedOops
coleenp
parents:
12835
diff
changeset
|
5052 } |
7199 | 5053 } |
5054 | |
5055 void MacroAssembler::encode_klass_not_null(Register dst, Register src) { | |
12056
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5056 if (dst == src) { |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5057 encode_klass_not_null(src); |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5058 } else { |
13000
209aa13ab8c0
8024927: Nashorn performance regression with CompressedOops
coleenp
parents:
12835
diff
changeset
|
5059 if (Universe::narrow_klass_base() != NULL) { |
209aa13ab8c0
8024927: Nashorn performance regression with CompressedOops
coleenp
parents:
12835
diff
changeset
|
5060 mov64(dst, (int64_t)Universe::narrow_klass_base()); |
209aa13ab8c0
8024927: Nashorn performance regression with CompressedOops
coleenp
parents:
12835
diff
changeset
|
5061 negq(dst); |
209aa13ab8c0
8024927: Nashorn performance regression with CompressedOops
coleenp
parents:
12835
diff
changeset
|
5062 addq(dst, src); |
209aa13ab8c0
8024927: Nashorn performance regression with CompressedOops
coleenp
parents:
12835
diff
changeset
|
5063 } else { |
209aa13ab8c0
8024927: Nashorn performance regression with CompressedOops
coleenp
parents:
12835
diff
changeset
|
5064 movptr(dst, src); |
209aa13ab8c0
8024927: Nashorn performance regression with CompressedOops
coleenp
parents:
12835
diff
changeset
|
5065 } |
12056
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5066 if (Universe::narrow_klass_shift() != 0) { |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5067 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5068 shrq(dst, LogKlassAlignmentInBytes); |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5069 } |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5070 } |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5071 } |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5072 |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5073 // Function instr_size_for_decode_klass_not_null() counts the instructions |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5074 // generated by decode_klass_not_null(register r) and reinit_heapbase(), |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5075 // when (Universe::heap() != NULL). Hence, if the instructions they |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5076 // generate change, then this method needs to be updated. |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5077 int MacroAssembler::instr_size_for_decode_klass_not_null() { |
12226
7944aba7ba41
8015107: NPG: Use consistent naming for metaspace concepts
ehelin
parents:
12056
diff
changeset
|
5078 assert (UseCompressedClassPointers, "only for compressed klass ptrs"); |
13000
209aa13ab8c0
8024927: Nashorn performance regression with CompressedOops
coleenp
parents:
12835
diff
changeset
|
5079 if (Universe::narrow_klass_base() != NULL) { |
209aa13ab8c0
8024927: Nashorn performance regression with CompressedOops
coleenp
parents:
12835
diff
changeset
|
5080 // mov64 + addq + shlq? + mov64 (for reinit_heapbase()). |
209aa13ab8c0
8024927: Nashorn performance regression with CompressedOops
coleenp
parents:
12835
diff
changeset
|
5081 return (Universe::narrow_klass_shift() == 0 ? 20 : 24); |
209aa13ab8c0
8024927: Nashorn performance regression with CompressedOops
coleenp
parents:
12835
diff
changeset
|
5082 } else { |
209aa13ab8c0
8024927: Nashorn performance regression with CompressedOops
coleenp
parents:
12835
diff
changeset
|
5083 // longest load decode klass function, mov64, leaq |
209aa13ab8c0
8024927: Nashorn performance regression with CompressedOops
coleenp
parents:
12835
diff
changeset
|
5084 return 16; |
209aa13ab8c0
8024927: Nashorn performance regression with CompressedOops
coleenp
parents:
12835
diff
changeset
|
5085 } |
12056
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5086 } |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5087 |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5088 // !!! If the instructions that get generated here change then function |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5089 // instr_size_for_decode_klass_not_null() needs to get updated. |
7199 | 5090 void MacroAssembler::decode_klass_not_null(Register r) { |
5091 // Note: it will change flags | |
12226
7944aba7ba41
8015107: NPG: Use consistent naming for metaspace concepts
ehelin
parents:
12056
diff
changeset
|
5092 assert (UseCompressedClassPointers, "should only be used for compressed headers"); |
12056
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5093 assert(r != r12_heapbase, "Decoding a klass in r12"); |
7199 | 5094 // Cannot assert, unverified entry point counts instructions (see .ad file) |
5095 // vtableStubs also counts instructions in pd_code_size_limit. | |
5096 // Also do not verify_oop as this is called by verify_oop. | |
5097 if (Universe::narrow_klass_shift() != 0) { | |
5098 assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); | |
5099 shlq(r, LogKlassAlignmentInBytes); | |
12056
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5100 } |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5101 // Use r12 as a scratch register in which to temporarily load the narrow_klass_base. |
13000
209aa13ab8c0
8024927: Nashorn performance regression with CompressedOops
coleenp
parents:
12835
diff
changeset
|
5102 if (Universe::narrow_klass_base() != NULL) { |
209aa13ab8c0
8024927: Nashorn performance regression with CompressedOops
coleenp
parents:
12835
diff
changeset
|
5103 mov64(r12_heapbase, (int64_t)Universe::narrow_klass_base()); |
209aa13ab8c0
8024927: Nashorn performance regression with CompressedOops
coleenp
parents:
12835
diff
changeset
|
5104 addq(r, r12_heapbase); |
209aa13ab8c0
8024927: Nashorn performance regression with CompressedOops
coleenp
parents:
12835
diff
changeset
|
5105 reinit_heapbase(); |
209aa13ab8c0
8024927: Nashorn performance regression with CompressedOops
coleenp
parents:
12835
diff
changeset
|
5106 } |
7199 | 5107 } |
5108 | |
5109 void MacroAssembler::decode_klass_not_null(Register dst, Register src) { | |
5110 // Note: it will change flags | |
12226
7944aba7ba41
8015107: NPG: Use consistent naming for metaspace concepts
ehelin
parents:
12056
diff
changeset
|
5111 assert (UseCompressedClassPointers, "should only be used for compressed headers"); |
12056
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5112 if (dst == src) { |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5113 decode_klass_not_null(dst); |
7199 | 5114 } else { |
12056
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5115 // Cannot assert, unverified entry point counts instructions (see .ad file) |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5116 // vtableStubs also counts instructions in pd_code_size_limit. |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5117 // Also do not verify_oop as this is called by verify_oop. |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5118 mov64(dst, (int64_t)Universe::narrow_klass_base()); |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5119 if (Universe::narrow_klass_shift() != 0) { |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5120 assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5121 assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?"); |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5122 leaq(dst, Address(dst, src, Address::times_8, 0)); |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5123 } else { |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5124 addq(dst, src); |
7199 | 5125 } |
5126 } | |
5127 } | |
5128 | |
5129 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { | |
5130 assert (UseCompressedOops, "should only be used for compressed headers"); | |
5131 assert (Universe::heap() != NULL, "java heap should be initialized"); | |
5132 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); | |
5133 int oop_index = oop_recorder()->find_index(obj); | |
5134 RelocationHolder rspec = oop_Relocation::spec(oop_index); | |
5135 mov_narrow_oop(dst, oop_index, rspec); | |
5136 } | |
5137 | |
5138 void MacroAssembler::set_narrow_oop(Address dst, jobject obj) { | |
5139 assert (UseCompressedOops, "should only be used for compressed headers"); | |
5140 assert (Universe::heap() != NULL, "java heap should be initialized"); | |
5141 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); | |
5142 int oop_index = oop_recorder()->find_index(obj); | |
5143 RelocationHolder rspec = oop_Relocation::spec(oop_index); | |
5144 mov_narrow_oop(dst, oop_index, rspec); | |
5145 } | |
5146 | |
5147 void MacroAssembler::set_narrow_klass(Register dst, Klass* k) { | |
12226
7944aba7ba41
8015107: NPG: Use consistent naming for metaspace concepts
ehelin
parents:
12056
diff
changeset
|
5148 assert (UseCompressedClassPointers, "should only be used for compressed headers"); |
7199 | 5149 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); |
5150 int klass_index = oop_recorder()->find_index(k); | |
5151 RelocationHolder rspec = metadata_Relocation::spec(klass_index); | |
12056
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5152 mov_narrow_oop(dst, Klass::encode_klass(k), rspec); |
7199 | 5153 } |
5154 | |
5155 void MacroAssembler::set_narrow_klass(Address dst, Klass* k) { | |
12226
7944aba7ba41
8015107: NPG: Use consistent naming for metaspace concepts
ehelin
parents:
12056
diff
changeset
|
5156 assert (UseCompressedClassPointers, "should only be used for compressed headers"); |
7199 | 5157 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); |
5158 int klass_index = oop_recorder()->find_index(k); | |
5159 RelocationHolder rspec = metadata_Relocation::spec(klass_index); | |
12056
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5160 mov_narrow_oop(dst, Klass::encode_klass(k), rspec); |
7199 | 5161 } |
5162 | |
5163 void MacroAssembler::cmp_narrow_oop(Register dst, jobject obj) { | |
5164 assert (UseCompressedOops, "should only be used for compressed headers"); | |
5165 assert (Universe::heap() != NULL, "java heap should be initialized"); | |
5166 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); | |
5167 int oop_index = oop_recorder()->find_index(obj); | |
5168 RelocationHolder rspec = oop_Relocation::spec(oop_index); | |
5169 Assembler::cmp_narrow_oop(dst, oop_index, rspec); | |
5170 } | |
5171 | |
5172 void MacroAssembler::cmp_narrow_oop(Address dst, jobject obj) { | |
5173 assert (UseCompressedOops, "should only be used for compressed headers"); | |
5174 assert (Universe::heap() != NULL, "java heap should be initialized"); | |
5175 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); | |
5176 int oop_index = oop_recorder()->find_index(obj); | |
5177 RelocationHolder rspec = oop_Relocation::spec(oop_index); | |
5178 Assembler::cmp_narrow_oop(dst, oop_index, rspec); | |
5179 } | |
5180 | |
5181 void MacroAssembler::cmp_narrow_klass(Register dst, Klass* k) { | |
12226
7944aba7ba41
8015107: NPG: Use consistent naming for metaspace concepts
ehelin
parents:
12056
diff
changeset
|
5182 assert (UseCompressedClassPointers, "should only be used for compressed headers"); |
7199 | 5183 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); |
5184 int klass_index = oop_recorder()->find_index(k); | |
5185 RelocationHolder rspec = metadata_Relocation::spec(klass_index); | |
12056
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5186 Assembler::cmp_narrow_oop(dst, Klass::encode_klass(k), rspec); |
7199 | 5187 } |
5188 | |
5189 void MacroAssembler::cmp_narrow_klass(Address dst, Klass* k) { | |
12226
7944aba7ba41
8015107: NPG: Use consistent naming for metaspace concepts
ehelin
parents:
12056
diff
changeset
|
5190 assert (UseCompressedClassPointers, "should only be used for compressed headers"); |
7199 | 5191 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); |
5192 int klass_index = oop_recorder()->find_index(k); | |
5193 RelocationHolder rspec = metadata_Relocation::spec(klass_index); | |
12056
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5194 Assembler::cmp_narrow_oop(dst, Klass::encode_klass(k), rspec); |
7199 | 5195 } |
5196 | |
5197 void MacroAssembler::reinit_heapbase() { | |
12226
7944aba7ba41
8015107: NPG: Use consistent naming for metaspace concepts
ehelin
parents:
12056
diff
changeset
|
5198 if (UseCompressedOops || UseCompressedClassPointers) { |
12056
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5199 if (Universe::heap() != NULL) { |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5200 if (Universe::narrow_oop_base() == NULL) { |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5201 MacroAssembler::xorptr(r12_heapbase, r12_heapbase); |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5202 } else { |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5203 mov64(r12_heapbase, (int64_t)Universe::narrow_ptrs_base()); |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5204 } |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5205 } else { |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5206 movptr(r12_heapbase, ExternalAddress((address)Universe::narrow_ptrs_base_addr())); |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5207 } |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5208 } |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5209 } |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5210 |
7199 | 5211 #endif // _LP64 |
5212 | |
5213 | |
5214 // C2 compiled method's prolog code. | |
5215 void MacroAssembler::verified_entry(int framesize, bool stack_bang, bool fp_mode_24b) { | |
5216 | |
5217 // WARNING: Initial instruction MUST be 5 bytes or longer so that | |
5218 // NativeJump::patch_verified_entry will be able to patch out the entry | |
5219 // code safely. The push to verify stack depth is ok at 5 bytes, | |
5220 // the frame allocation can be either 3 or 6 bytes. So if we don't do | |
5221 // stack bang then we must use the 6 byte frame allocation even if | |
5222 // we have no frame. :-( | |
5223 | |
5224 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); | |
5225 // Remove word for return addr | |
5226 framesize -= wordSize; | |
5227 | |
5228 // Calls to C2R adapters often do not accept exceptional returns. | |
5229 // We require that their callers must bang for them. But be careful, because | |
5230 // some VM calls (such as call site linkage) can use several kilobytes of | |
5231 // stack. But the stack safety zone should account for that. | |
5232 // See bugs 4446381, 4468289, 4497237. | |
5233 if (stack_bang) { | |
5234 generate_stack_overflow_check(framesize); | |
5235 | |
5236 // We always push rbp, so that on return to interpreter rbp, will be | |
5237 // restored correctly and we can correct the stack. | |
5238 push(rbp); | |
5239 // Remove word for ebp | |
5240 framesize -= wordSize; | |
5241 | |
5242 // Create frame | |
5243 if (framesize) { | |
5244 subptr(rsp, framesize); | |
5245 } | |
5246 } else { | |
5247 // Create frame (force generation of a 4 byte immediate value) | |
5248 subptr_imm32(rsp, framesize); | |
5249 | |
5250 // Save RBP register now. | |
5251 framesize -= wordSize; | |
5252 movptr(Address(rsp, framesize), rbp); | |
5253 } | |
5254 | |
5255 if (VerifyStackAtCalls) { // Majik cookie to verify stack depth | |
5256 framesize -= wordSize; | |
5257 movptr(Address(rsp, framesize), (int32_t)0xbadb100d); | |
5258 } | |
5259 | |
5260 #ifndef _LP64 | |
5261 // If method sets FPU control word do it now | |
5262 if (fp_mode_24b) { | |
5263 fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); | |
5264 } | |
5265 if (UseSSE >= 2 && VerifyFPU) { | |
5266 verify_FPU(0, "FPU stack must be clean on entry"); | |
5267 } | |
5268 #endif | |
5269 | |
5270 #ifdef ASSERT | |
5271 if (VerifyStackAtCalls) { | |
5272 Label L; | |
5273 push(rax); | |
5274 mov(rax, rsp); | |
5275 andptr(rax, StackAlignmentInBytes-1); | |
5276 cmpptr(rax, StackAlignmentInBytes-wordSize); | |
5277 pop(rax); | |
5278 jcc(Assembler::equal, L); | |
5279 STOP("Stack is not properly aligned!"); | |
5280 bind(L); | |
5281 } | |
5282 #endif | |
5283 | |
5284 } | |
5285 | |
7474
00af3a3a8df4
8005522: use fast-string instructions on x86 for zeroing
kvn
parents:
7430
diff
changeset
|
5286 void MacroAssembler::clear_mem(Register base, Register cnt, Register tmp) { |
00af3a3a8df4
8005522: use fast-string instructions on x86 for zeroing
kvn
parents:
7430
diff
changeset
|
5287 // cnt - number of qwords (8-byte words). |
00af3a3a8df4
8005522: use fast-string instructions on x86 for zeroing
kvn
parents:
7430
diff
changeset
|
5288 // base - start address, qword aligned. |
00af3a3a8df4
8005522: use fast-string instructions on x86 for zeroing
kvn
parents:
7430
diff
changeset
|
5289 assert(base==rdi, "base register must be edi for rep stos"); |
00af3a3a8df4
8005522: use fast-string instructions on x86 for zeroing
kvn
parents:
7430
diff
changeset
|
5290 assert(tmp==rax, "tmp register must be eax for rep stos"); |
00af3a3a8df4
8005522: use fast-string instructions on x86 for zeroing
kvn
parents:
7430
diff
changeset
|
5291 assert(cnt==rcx, "cnt register must be ecx for rep stos"); |
00af3a3a8df4
8005522: use fast-string instructions on x86 for zeroing
kvn
parents:
7430
diff
changeset
|
5292 |
00af3a3a8df4
8005522: use fast-string instructions on x86 for zeroing
kvn
parents:
7430
diff
changeset
|
5293 xorptr(tmp, tmp); |
00af3a3a8df4
8005522: use fast-string instructions on x86 for zeroing
kvn
parents:
7430
diff
changeset
|
5294 if (UseFastStosb) { |
00af3a3a8df4
8005522: use fast-string instructions on x86 for zeroing
kvn
parents:
7430
diff
changeset
|
5295 shlptr(cnt,3); // convert to number of bytes |
00af3a3a8df4
8005522: use fast-string instructions on x86 for zeroing
kvn
parents:
7430
diff
changeset
|
5296 rep_stosb(); |
00af3a3a8df4
8005522: use fast-string instructions on x86 for zeroing
kvn
parents:
7430
diff
changeset
|
5297 } else { |
00af3a3a8df4
8005522: use fast-string instructions on x86 for zeroing
kvn
parents:
7430
diff
changeset
|
5298 NOT_LP64(shlptr(cnt,1);) // convert to number of dwords for 32-bit VM |
00af3a3a8df4
8005522: use fast-string instructions on x86 for zeroing
kvn
parents:
7430
diff
changeset
|
5299 rep_stos(); |
00af3a3a8df4
8005522: use fast-string instructions on x86 for zeroing
kvn
parents:
7430
diff
changeset
|
5300 } |
00af3a3a8df4
8005522: use fast-string instructions on x86 for zeroing
kvn
parents:
7430
diff
changeset
|
5301 } |
7199 | 5302 |
5303 // IndexOf for constant substrings with size >= 8 chars | |
5304 // which don't need to be loaded through stack. | |
5305 void MacroAssembler::string_indexofC8(Register str1, Register str2, | |
5306 Register cnt1, Register cnt2, | |
5307 int int_cnt2, Register result, | |
5308 XMMRegister vec, Register tmp) { | |
5309 ShortBranchVerifier sbv(this); | |
5310 assert(UseSSE42Intrinsics, "SSE4.2 is required"); | |
5311 | |
5312 // This method uses pcmpestri inxtruction with bound registers | |
5313 // inputs: | |
5314 // xmm - substring | |
5315 // rax - substring length (elements count) | |
5316 // mem - scanned string | |
5317 // rdx - string length (elements count) | |
5318 // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts) | |
5319 // outputs: | |
5320 // rcx - matched index in string | |
5321 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri"); | |
5322 | |
5323 Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR, | |
5324 RET_FOUND, RET_NOT_FOUND, EXIT, FOUND_SUBSTR, | |
5325 MATCH_SUBSTR_HEAD, RELOAD_STR, FOUND_CANDIDATE; | |
5326 | |
5327 // Note, inline_string_indexOf() generates checks: | |
5328 // if (substr.count > string.count) return -1; | |
5329 // if (substr.count == 0) return 0; | |
5330 assert(int_cnt2 >= 8, "this code isused only for cnt2 >= 8 chars"); | |
5331 | |
5332 // Load substring. | |
5333 movdqu(vec, Address(str2, 0)); | |
5334 movl(cnt2, int_cnt2); | |
5335 movptr(result, str1); // string addr | |
5336 | |
5337 if (int_cnt2 > 8) { | |
5338 jmpb(SCAN_TO_SUBSTR); | |
5339 | |
5340 // Reload substr for rescan, this code | |
5341 // is executed only for large substrings (> 8 chars) | |
5342 bind(RELOAD_SUBSTR); | |
5343 movdqu(vec, Address(str2, 0)); | |
5344 negptr(cnt2); // Jumped here with negative cnt2, convert to positive | |
5345 | |
5346 bind(RELOAD_STR); | |
5347 // We came here after the beginning of the substring was | |
5348 // matched but the rest of it was not so we need to search | |
5349 // again. Start from the next element after the previous match. | |
5350 | |
5351 // cnt2 is number of substring reminding elements and | |
5352 // cnt1 is number of string reminding elements when cmp failed. | |
5353 // Restored cnt1 = cnt1 - cnt2 + int_cnt2 | |
5354 subl(cnt1, cnt2); | |
5355 addl(cnt1, int_cnt2); | |
5356 movl(cnt2, int_cnt2); // Now restore cnt2 | |
5357 | |
5358 decrementl(cnt1); // Shift to next element | |
5359 cmpl(cnt1, cnt2); | |
5360 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring | |
5361 | |
5362 addptr(result, 2); | |
5363 | |
5364 } // (int_cnt2 > 8) | |
5365 | |
5366 // Scan string for start of substr in 16-byte vectors | |
5367 bind(SCAN_TO_SUBSTR); | |
5368 pcmpestri(vec, Address(result, 0), 0x0d); | |
5369 jccb(Assembler::below, FOUND_CANDIDATE); // CF == 1 | |
5370 subl(cnt1, 8); | |
5371 jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string | |
5372 cmpl(cnt1, cnt2); | |
5373 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring | |
5374 addptr(result, 16); | |
5375 jmpb(SCAN_TO_SUBSTR); | |
5376 | |
5377 // Found a potential substr | |
5378 bind(FOUND_CANDIDATE); | |
5379 // Matched whole vector if first element matched (tmp(rcx) == 0). | |
5380 if (int_cnt2 == 8) { | |
5381 jccb(Assembler::overflow, RET_FOUND); // OF == 1 | |
5382 } else { // int_cnt2 > 8 | |
5383 jccb(Assembler::overflow, FOUND_SUBSTR); | |
5384 } | |
5385 // After pcmpestri tmp(rcx) contains matched element index | |
5386 // Compute start addr of substr | |
5387 lea(result, Address(result, tmp, Address::times_2)); | |
5388 | |
5389 // Make sure string is still long enough | |
5390 subl(cnt1, tmp); | |
5391 cmpl(cnt1, cnt2); | |
5392 if (int_cnt2 == 8) { | |
5393 jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR); | |
5394 } else { // int_cnt2 > 8 | |
5395 jccb(Assembler::greaterEqual, MATCH_SUBSTR_HEAD); | |
5396 } | |
5397 // Left less then substring. | |
5398 | |
5399 bind(RET_NOT_FOUND); | |
5400 movl(result, -1); | |
5401 jmpb(EXIT); | |
5402 | |
5403 if (int_cnt2 > 8) { | |
5404 // This code is optimized for the case when whole substring | |
5405 // is matched if its head is matched. | |
5406 bind(MATCH_SUBSTR_HEAD); | |
5407 pcmpestri(vec, Address(result, 0), 0x0d); | |
5408 // Reload only string if does not match | |
5409 jccb(Assembler::noOverflow, RELOAD_STR); // OF == 0 | |
5410 | |
5411 Label CONT_SCAN_SUBSTR; | |
5412 // Compare the rest of substring (> 8 chars). | |
5413 bind(FOUND_SUBSTR); | |
5414 // First 8 chars are already matched. | |
5415 negptr(cnt2); | |
5416 addptr(cnt2, 8); | |
5417 | |
5418 bind(SCAN_SUBSTR); | |
5419 subl(cnt1, 8); | |
5420 cmpl(cnt2, -8); // Do not read beyond substring | |
5421 jccb(Assembler::lessEqual, CONT_SCAN_SUBSTR); | |
5422 // Back-up strings to avoid reading beyond substring: | |
5423 // cnt1 = cnt1 - cnt2 + 8 | |
5424 addl(cnt1, cnt2); // cnt2 is negative | |
5425 addl(cnt1, 8); | |
5426 movl(cnt2, 8); negptr(cnt2); | |
5427 bind(CONT_SCAN_SUBSTR); | |
5428 if (int_cnt2 < (int)G) { | |
5429 movdqu(vec, Address(str2, cnt2, Address::times_2, int_cnt2*2)); | |
5430 pcmpestri(vec, Address(result, cnt2, Address::times_2, int_cnt2*2), 0x0d); | |
5431 } else { | |
5432 // calculate index in register to avoid integer overflow (int_cnt2*2) | |
5433 movl(tmp, int_cnt2); | |
5434 addptr(tmp, cnt2); | |
5435 movdqu(vec, Address(str2, tmp, Address::times_2, 0)); | |
5436 pcmpestri(vec, Address(result, tmp, Address::times_2, 0), 0x0d); | |
5437 } | |
5438 // Need to reload strings pointers if not matched whole vector | |
5439 jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0 | |
5440 addptr(cnt2, 8); | |
5441 jcc(Assembler::negative, SCAN_SUBSTR); | |
5442 // Fall through if found full substring | |
5443 | |
5444 } // (int_cnt2 > 8) | |
5445 | |
5446 bind(RET_FOUND); | |
5447 // Found result if we matched full small substring. | |
5448 // Compute substr offset | |
5449 subptr(result, str1); | |
5450 shrl(result, 1); // index | |
5451 bind(EXIT); | |
5452 | |
5453 } // string_indexofC8 | |
5454 | |
5455 // Small strings are loaded through stack if they cross page boundary. | |
5456 void MacroAssembler::string_indexof(Register str1, Register str2, | |
5457 Register cnt1, Register cnt2, | |
5458 int int_cnt2, Register result, | |
5459 XMMRegister vec, Register tmp) { | |
5460 ShortBranchVerifier sbv(this); | |
5461 assert(UseSSE42Intrinsics, "SSE4.2 is required"); | |
5462 // | |
5463 // int_cnt2 is length of small (< 8 chars) constant substring | |
5464 // or (-1) for non constant substring in which case its length | |
5465 // is in cnt2 register. | |
5466 // | |
5467 // Note, inline_string_indexOf() generates checks: | |
5468 // if (substr.count > string.count) return -1; | |
5469 // if (substr.count == 0) return 0; | |
5470 // | |
5471 assert(int_cnt2 == -1 || (0 < int_cnt2 && int_cnt2 < 8), "should be != 0"); | |
5472 | |
5473 // This method uses pcmpestri inxtruction with bound registers | |
5474 // inputs: | |
5475 // xmm - substring | |
5476 // rax - substring length (elements count) | |
5477 // mem - scanned string | |
5478 // rdx - string length (elements count) | |
5479 // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts) | |
5480 // outputs: | |
5481 // rcx - matched index in string | |
5482 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri"); | |
5483 | |
5484 Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR, ADJUST_STR, | |
5485 RET_FOUND, RET_NOT_FOUND, CLEANUP, FOUND_SUBSTR, | |
5486 FOUND_CANDIDATE; | |
5487 | |
5488 { //======================================================== | |
5489 // We don't know where these strings are located | |
5490 // and we can't read beyond them. Load them through stack. | |
5491 Label BIG_STRINGS, CHECK_STR, COPY_SUBSTR, COPY_STR; | |
5492 | |
5493 movptr(tmp, rsp); // save old SP | |
5494 | |
5495 if (int_cnt2 > 0) { // small (< 8 chars) constant substring | |
5496 if (int_cnt2 == 1) { // One char | |
5497 load_unsigned_short(result, Address(str2, 0)); | |
5498 movdl(vec, result); // move 32 bits | |
5499 } else if (int_cnt2 == 2) { // Two chars | |
5500 movdl(vec, Address(str2, 0)); // move 32 bits | |
5501 } else if (int_cnt2 == 4) { // Four chars | |
5502 movq(vec, Address(str2, 0)); // move 64 bits | |
5503 } else { // cnt2 = { 3, 5, 6, 7 } | |
5504 // Array header size is 12 bytes in 32-bit VM | |
5505 // + 6 bytes for 3 chars == 18 bytes, | |
5506 // enough space to load vec and shift. | |
5507 assert(HeapWordSize*TypeArrayKlass::header_size() >= 12,"sanity"); | |
5508 movdqu(vec, Address(str2, (int_cnt2*2)-16)); | |
5509 psrldq(vec, 16-(int_cnt2*2)); | |
5510 } | |
5511 } else { // not constant substring | |
5512 cmpl(cnt2, 8); | |
5513 jccb(Assembler::aboveEqual, BIG_STRINGS); // Both strings are big enough | |
5514 | |
5515 // We can read beyond string if srt+16 does not cross page boundary | |
5516 // since heaps are aligned and mapped by pages. | |
5517 assert(os::vm_page_size() < (int)G, "default page should be small"); | |
5518 movl(result, str2); // We need only low 32 bits | |
5519 andl(result, (os::vm_page_size()-1)); | |
5520 cmpl(result, (os::vm_page_size()-16)); | |
5521 jccb(Assembler::belowEqual, CHECK_STR); | |
5522 | |
5523 // Move small strings to stack to allow load 16 bytes into vec. | |
5524 subptr(rsp, 16); | |
5525 int stk_offset = wordSize-2; | |
5526 push(cnt2); | |
5527 | |
5528 bind(COPY_SUBSTR); | |
5529 load_unsigned_short(result, Address(str2, cnt2, Address::times_2, -2)); | |
5530 movw(Address(rsp, cnt2, Address::times_2, stk_offset), result); | |
5531 decrement(cnt2); | |
5532 jccb(Assembler::notZero, COPY_SUBSTR); | |
5533 | |
5534 pop(cnt2); | |
5535 movptr(str2, rsp); // New substring address | |
5536 } // non constant | |
5537 | |
5538 bind(CHECK_STR); | |
5539 cmpl(cnt1, 8); | |
5540 jccb(Assembler::aboveEqual, BIG_STRINGS); | |
5541 | |
5542 // Check cross page boundary. | |
5543 movl(result, str1); // We need only low 32 bits | |
5544 andl(result, (os::vm_page_size()-1)); | |
5545 cmpl(result, (os::vm_page_size()-16)); | |
5546 jccb(Assembler::belowEqual, BIG_STRINGS); | |
5547 | |
5548 subptr(rsp, 16); | |
5549 int stk_offset = -2; | |
5550 if (int_cnt2 < 0) { // not constant | |
5551 push(cnt2); | |
5552 stk_offset += wordSize; | |
5553 } | |
5554 movl(cnt2, cnt1); | |
5555 | |
5556 bind(COPY_STR); | |
5557 load_unsigned_short(result, Address(str1, cnt2, Address::times_2, -2)); | |
5558 movw(Address(rsp, cnt2, Address::times_2, stk_offset), result); | |
5559 decrement(cnt2); | |
5560 jccb(Assembler::notZero, COPY_STR); | |
5561 | |
5562 if (int_cnt2 < 0) { // not constant | |
5563 pop(cnt2); | |
5564 } | |
5565 movptr(str1, rsp); // New string address | |
5566 | |
5567 bind(BIG_STRINGS); | |
5568 // Load substring. | |
5569 if (int_cnt2 < 0) { // -1 | |
5570 movdqu(vec, Address(str2, 0)); | |
5571 push(cnt2); // substr count | |
5572 push(str2); // substr addr | |
5573 push(str1); // string addr | |
5574 } else { | |
5575 // Small (< 8 chars) constant substrings are loaded already. | |
5576 movl(cnt2, int_cnt2); | |
5577 } | |
5578 push(tmp); // original SP | |
5579 | |
5580 } // Finished loading | |
5581 | |
5582 //======================================================== | |
5583 // Start search | |
5584 // | |
5585 | |
5586 movptr(result, str1); // string addr | |
5587 | |
5588 if (int_cnt2 < 0) { // Only for non constant substring | |
5589 jmpb(SCAN_TO_SUBSTR); | |
5590 | |
5591 // SP saved at sp+0 | |
5592 // String saved at sp+1*wordSize | |
5593 // Substr saved at sp+2*wordSize | |
5594 // Substr count saved at sp+3*wordSize | |
5595 | |
5596 // Reload substr for rescan, this code | |
5597 // is executed only for large substrings (> 8 chars) | |
5598 bind(RELOAD_SUBSTR); | |
5599 movptr(str2, Address(rsp, 2*wordSize)); | |
5600 movl(cnt2, Address(rsp, 3*wordSize)); | |
5601 movdqu(vec, Address(str2, 0)); | |
5602 // We came here after the beginning of the substring was | |
5603 // matched but the rest of it was not so we need to search | |
5604 // again. Start from the next element after the previous match. | |
5605 subptr(str1, result); // Restore counter | |
5606 shrl(str1, 1); | |
5607 addl(cnt1, str1); | |
5608 decrementl(cnt1); // Shift to next element | |
5609 cmpl(cnt1, cnt2); | |
5610 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring | |
5611 | |
5612 addptr(result, 2); | |
5613 } // non constant | |
5614 | |
5615 // Scan string for start of substr in 16-byte vectors | |
5616 bind(SCAN_TO_SUBSTR); | |
5617 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri"); | |
5618 pcmpestri(vec, Address(result, 0), 0x0d); | |
5619 jccb(Assembler::below, FOUND_CANDIDATE); // CF == 1 | |
5620 subl(cnt1, 8); | |
5621 jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string | |
5622 cmpl(cnt1, cnt2); | |
5623 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring | |
5624 addptr(result, 16); | |
5625 | |
5626 bind(ADJUST_STR); | |
5627 cmpl(cnt1, 8); // Do not read beyond string | |
5628 jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR); | |
5629 // Back-up string to avoid reading beyond string. | |
5630 lea(result, Address(result, cnt1, Address::times_2, -16)); | |
5631 movl(cnt1, 8); | |
5632 jmpb(SCAN_TO_SUBSTR); | |
5633 | |
5634 // Found a potential substr | |
5635 bind(FOUND_CANDIDATE); | |
5636 // After pcmpestri tmp(rcx) contains matched element index | |
5637 | |
5638 // Make sure string is still long enough | |
5639 subl(cnt1, tmp); | |
5640 cmpl(cnt1, cnt2); | |
5641 jccb(Assembler::greaterEqual, FOUND_SUBSTR); | |
5642 // Left less then substring. | |
5643 | |
5644 bind(RET_NOT_FOUND); | |
5645 movl(result, -1); | |
5646 jmpb(CLEANUP); | |
5647 | |
5648 bind(FOUND_SUBSTR); | |
5649 // Compute start addr of substr | |
5650 lea(result, Address(result, tmp, Address::times_2)); | |
5651 | |
5652 if (int_cnt2 > 0) { // Constant substring | |
5653 // Repeat search for small substring (< 8 chars) | |
5654 // from new point without reloading substring. | |
5655 // Have to check that we don't read beyond string. | |
5656 cmpl(tmp, 8-int_cnt2); | |
5657 jccb(Assembler::greater, ADJUST_STR); | |
5658 // Fall through if matched whole substring. | |
5659 } else { // non constant | |
5660 assert(int_cnt2 == -1, "should be != 0"); | |
5661 | |
5662 addl(tmp, cnt2); | |
5663 // Found result if we matched whole substring. | |
5664 cmpl(tmp, 8); | |
5665 jccb(Assembler::lessEqual, RET_FOUND); | |
5666 | |
5667 // Repeat search for small substring (<= 8 chars) | |
5668 // from new point 'str1' without reloading substring. | |
5669 cmpl(cnt2, 8); | |
5670 // Have to check that we don't read beyond string. | |
5671 jccb(Assembler::lessEqual, ADJUST_STR); | |
5672 | |
5673 Label CHECK_NEXT, CONT_SCAN_SUBSTR, RET_FOUND_LONG; | |
5674 // Compare the rest of substring (> 8 chars). | |
5675 movptr(str1, result); | |
5676 | |
5677 cmpl(tmp, cnt2); | |
5678 // First 8 chars are already matched. | |
5679 jccb(Assembler::equal, CHECK_NEXT); | |
5680 | |
5681 bind(SCAN_SUBSTR); | |
5682 pcmpestri(vec, Address(str1, 0), 0x0d); | |
5683 // Need to reload strings pointers if not matched whole vector | |
5684 jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0 | |
5685 | |
5686 bind(CHECK_NEXT); | |
5687 subl(cnt2, 8); | |
5688 jccb(Assembler::lessEqual, RET_FOUND_LONG); // Found full substring | |
5689 addptr(str1, 16); | |
5690 addptr(str2, 16); | |
5691 subl(cnt1, 8); | |
5692 cmpl(cnt2, 8); // Do not read beyond substring | |
5693 jccb(Assembler::greaterEqual, CONT_SCAN_SUBSTR); | |
5694 // Back-up strings to avoid reading beyond substring. | |
5695 lea(str2, Address(str2, cnt2, Address::times_2, -16)); | |
5696 lea(str1, Address(str1, cnt2, Address::times_2, -16)); | |
5697 subl(cnt1, cnt2); | |
5698 movl(cnt2, 8); | |
5699 addl(cnt1, 8); | |
5700 bind(CONT_SCAN_SUBSTR); | |
5701 movdqu(vec, Address(str2, 0)); | |
5702 jmpb(SCAN_SUBSTR); | |
5703 | |
5704 bind(RET_FOUND_LONG); | |
5705 movptr(str1, Address(rsp, wordSize)); | |
5706 } // non constant | |
5707 | |
5708 bind(RET_FOUND); | |
5709 // Compute substr offset | |
5710 subptr(result, str1); | |
5711 shrl(result, 1); // index | |
5712 | |
5713 bind(CLEANUP); | |
5714 pop(rsp); // restore SP | |
5715 | |
5716 } // string_indexof | |
5717 | |
5718 // Compare strings. | |
5719 void MacroAssembler::string_compare(Register str1, Register str2, | |
5720 Register cnt1, Register cnt2, Register result, | |
5721 XMMRegister vec1) { | |
5722 ShortBranchVerifier sbv(this); | |
5723 Label LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, WHILE_HEAD_LABEL; | |
5724 | |
5725 // Compute the minimum of the string lengths and the | |
5726 // difference of the string lengths (stack). | |
5727 // Do the conditional move stuff | |
5728 movl(result, cnt1); | |
5729 subl(cnt1, cnt2); | |
5730 push(cnt1); | |
5731 cmov32(Assembler::lessEqual, cnt2, result); | |
5732 | |
5733 // Is the minimum length zero? | |
5734 testl(cnt2, cnt2); | |
5735 jcc(Assembler::zero, LENGTH_DIFF_LABEL); | |
5736 | |
7477
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5737 // Compare first characters |
7199 | 5738 load_unsigned_short(result, Address(str1, 0)); |
5739 load_unsigned_short(cnt1, Address(str2, 0)); | |
5740 subl(result, cnt1); | |
5741 jcc(Assembler::notZero, POP_LABEL); | |
7477
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5742 cmpl(cnt2, 1); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5743 jcc(Assembler::equal, LENGTH_DIFF_LABEL); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5744 |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5745 // Check if the strings start at the same location. |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5746 cmpptr(str1, str2); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5747 jcc(Assembler::equal, LENGTH_DIFF_LABEL); |
7199 | 5748 |
5749 Address::ScaleFactor scale = Address::times_2; | |
5750 int stride = 8; | |
5751 | |
8042
91a23b11d8dc
8007708: compiler/6855215 assert(VM_Version::supports_sse4_2())
kvn
parents:
8002
diff
changeset
|
5752 if (UseAVX >= 2 && UseSSE42Intrinsics) { |
7477
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5753 Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_WIDE_TAIL, COMPARE_SMALL_STR; |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5754 Label COMPARE_WIDE_VECTORS_LOOP, COMPARE_16_CHARS, COMPARE_INDEX_CHAR; |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5755 Label COMPARE_TAIL_LONG; |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5756 int pcmpmask = 0x19; |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5757 |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5758 // Setup to compare 16-chars (32-bytes) vectors, |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5759 // start from first character again because it has aligned address. |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5760 int stride2 = 16; |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5761 int adr_stride = stride << scale; |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5762 int adr_stride2 = stride2 << scale; |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5763 |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5764 assert(result == rax && cnt2 == rdx && cnt1 == rcx, "pcmpestri"); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5765 // rax and rdx are used by pcmpestri as elements counters |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5766 movl(result, cnt2); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5767 andl(cnt2, ~(stride2-1)); // cnt2 holds the vector count |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5768 jcc(Assembler::zero, COMPARE_TAIL_LONG); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5769 |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5770 // fast path : compare first 2 8-char vectors. |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5771 bind(COMPARE_16_CHARS); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5772 movdqu(vec1, Address(str1, 0)); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5773 pcmpestri(vec1, Address(str2, 0), pcmpmask); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5774 jccb(Assembler::below, COMPARE_INDEX_CHAR); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5775 |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5776 movdqu(vec1, Address(str1, adr_stride)); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5777 pcmpestri(vec1, Address(str2, adr_stride), pcmpmask); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5778 jccb(Assembler::aboveEqual, COMPARE_WIDE_VECTORS); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5779 addl(cnt1, stride); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5780 |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5781 // Compare the characters at index in cnt1 |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5782 bind(COMPARE_INDEX_CHAR); //cnt1 has the offset of the mismatching character |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5783 load_unsigned_short(result, Address(str1, cnt1, scale)); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5784 load_unsigned_short(cnt2, Address(str2, cnt1, scale)); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5785 subl(result, cnt2); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5786 jmp(POP_LABEL); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5787 |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5788 // Setup the registers to start vector comparison loop |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5789 bind(COMPARE_WIDE_VECTORS); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5790 lea(str1, Address(str1, result, scale)); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5791 lea(str2, Address(str2, result, scale)); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5792 subl(result, stride2); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5793 subl(cnt2, stride2); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5794 jccb(Assembler::zero, COMPARE_WIDE_TAIL); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5795 negptr(result); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5796 |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5797 // In a loop, compare 16-chars (32-bytes) at once using (vpxor+vptest) |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5798 bind(COMPARE_WIDE_VECTORS_LOOP); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5799 vmovdqu(vec1, Address(str1, result, scale)); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5800 vpxor(vec1, Address(str2, result, scale)); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5801 vptest(vec1, vec1); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5802 jccb(Assembler::notZero, VECTOR_NOT_EQUAL); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5803 addptr(result, stride2); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5804 subl(cnt2, stride2); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5805 jccb(Assembler::notZero, COMPARE_WIDE_VECTORS_LOOP); |
8873
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
5806 // clean upper bits of YMM registers |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
5807 vzeroupper(); |
7477
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5808 |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5809 // compare wide vectors tail |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5810 bind(COMPARE_WIDE_TAIL); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5811 testptr(result, result); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5812 jccb(Assembler::zero, LENGTH_DIFF_LABEL); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5813 |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5814 movl(result, stride2); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5815 movl(cnt2, result); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5816 negptr(result); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5817 jmpb(COMPARE_WIDE_VECTORS_LOOP); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5818 |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5819 // Identifies the mismatching (higher or lower)16-bytes in the 32-byte vectors. |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5820 bind(VECTOR_NOT_EQUAL); |
8873
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
5821 // clean upper bits of YMM registers |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
5822 vzeroupper(); |
7477
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5823 lea(str1, Address(str1, result, scale)); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5824 lea(str2, Address(str2, result, scale)); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5825 jmp(COMPARE_16_CHARS); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5826 |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5827 // Compare tail chars, length between 1 to 15 chars |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5828 bind(COMPARE_TAIL_LONG); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5829 movl(cnt2, result); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5830 cmpl(cnt2, stride); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5831 jccb(Assembler::less, COMPARE_SMALL_STR); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5832 |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5833 movdqu(vec1, Address(str1, 0)); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5834 pcmpestri(vec1, Address(str2, 0), pcmpmask); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5835 jcc(Assembler::below, COMPARE_INDEX_CHAR); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5836 subptr(cnt2, stride); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5837 jccb(Assembler::zero, LENGTH_DIFF_LABEL); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5838 lea(str1, Address(str1, result, scale)); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5839 lea(str2, Address(str2, result, scale)); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5840 negptr(cnt2); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5841 jmpb(WHILE_HEAD_LABEL); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5842 |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5843 bind(COMPARE_SMALL_STR); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5844 } else if (UseSSE42Intrinsics) { |
7199 | 5845 Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL; |
5846 int pcmpmask = 0x19; | |
7477
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5847 // Setup to compare 8-char (16-byte) vectors, |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5848 // start from first character again because it has aligned address. |
7199 | 5849 movl(result, cnt2); |
5850 andl(cnt2, ~(stride - 1)); // cnt2 holds the vector count | |
5851 jccb(Assembler::zero, COMPARE_TAIL); | |
5852 | |
5853 lea(str1, Address(str1, result, scale)); | |
5854 lea(str2, Address(str2, result, scale)); | |
5855 negptr(result); | |
5856 | |
5857 // pcmpestri | |
5858 // inputs: | |
5859 // vec1- substring | |
5860 // rax - negative string length (elements count) | |
5861 // mem - scaned string | |
5862 // rdx - string length (elements count) | |
5863 // pcmpmask - cmp mode: 11000 (string compare with negated result) | |
5864 // + 00 (unsigned bytes) or + 01 (unsigned shorts) | |
5865 // outputs: | |
5866 // rcx - first mismatched element index | |
5867 assert(result == rax && cnt2 == rdx && cnt1 == rcx, "pcmpestri"); | |
5868 | |
5869 bind(COMPARE_WIDE_VECTORS); | |
5870 movdqu(vec1, Address(str1, result, scale)); | |
5871 pcmpestri(vec1, Address(str2, result, scale), pcmpmask); | |
5872 // After pcmpestri cnt1(rcx) contains mismatched element index | |
5873 | |
5874 jccb(Assembler::below, VECTOR_NOT_EQUAL); // CF==1 | |
5875 addptr(result, stride); | |
5876 subptr(cnt2, stride); | |
5877 jccb(Assembler::notZero, COMPARE_WIDE_VECTORS); | |
5878 | |
5879 // compare wide vectors tail | |
7477
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5880 testptr(result, result); |
7199 | 5881 jccb(Assembler::zero, LENGTH_DIFF_LABEL); |
5882 | |
5883 movl(cnt2, stride); | |
5884 movl(result, stride); | |
5885 negptr(result); | |
5886 movdqu(vec1, Address(str1, result, scale)); | |
5887 pcmpestri(vec1, Address(str2, result, scale), pcmpmask); | |
5888 jccb(Assembler::aboveEqual, LENGTH_DIFF_LABEL); | |
5889 | |
5890 // Mismatched characters in the vectors | |
5891 bind(VECTOR_NOT_EQUAL); | |
7477
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5892 addptr(cnt1, result); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5893 load_unsigned_short(result, Address(str1, cnt1, scale)); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5894 load_unsigned_short(cnt2, Address(str2, cnt1, scale)); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5895 subl(result, cnt2); |
7199 | 5896 jmpb(POP_LABEL); |
5897 | |
5898 bind(COMPARE_TAIL); // limit is zero | |
5899 movl(cnt2, result); | |
5900 // Fallthru to tail compare | |
5901 } | |
5902 // Shift str2 and str1 to the end of the arrays, negate min | |
7477
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5903 lea(str1, Address(str1, cnt2, scale)); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5904 lea(str2, Address(str2, cnt2, scale)); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5905 decrementl(cnt2); // first character was compared already |
7199 | 5906 negptr(cnt2); |
5907 | |
5908 // Compare the rest of the elements | |
5909 bind(WHILE_HEAD_LABEL); | |
5910 load_unsigned_short(result, Address(str1, cnt2, scale, 0)); | |
5911 load_unsigned_short(cnt1, Address(str2, cnt2, scale, 0)); | |
5912 subl(result, cnt1); | |
5913 jccb(Assembler::notZero, POP_LABEL); | |
5914 increment(cnt2); | |
5915 jccb(Assembler::notZero, WHILE_HEAD_LABEL); | |
5916 | |
5917 // Strings are equal up to min length. Return the length difference. | |
5918 bind(LENGTH_DIFF_LABEL); | |
5919 pop(result); | |
5920 jmpb(DONE_LABEL); | |
5921 | |
5922 // Discard the stored length difference | |
5923 bind(POP_LABEL); | |
5924 pop(cnt1); | |
5925 | |
5926 // That's it | |
5927 bind(DONE_LABEL); | |
5928 } | |
5929 | |
5930 // Compare char[] arrays aligned to 4 bytes or substrings. | |
5931 void MacroAssembler::char_arrays_equals(bool is_array_equ, Register ary1, Register ary2, | |
5932 Register limit, Register result, Register chr, | |
5933 XMMRegister vec1, XMMRegister vec2) { | |
5934 ShortBranchVerifier sbv(this); | |
5935 Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR; | |
5936 | |
5937 int length_offset = arrayOopDesc::length_offset_in_bytes(); | |
5938 int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR); | |
5939 | |
5940 // Check the input args | |
5941 cmpptr(ary1, ary2); | |
5942 jcc(Assembler::equal, TRUE_LABEL); | |
5943 | |
5944 if (is_array_equ) { | |
5945 // Need additional checks for arrays_equals. | |
5946 testptr(ary1, ary1); | |
5947 jcc(Assembler::zero, FALSE_LABEL); | |
5948 testptr(ary2, ary2); | |
5949 jcc(Assembler::zero, FALSE_LABEL); | |
5950 | |
5951 // Check the lengths | |
5952 movl(limit, Address(ary1, length_offset)); | |
5953 cmpl(limit, Address(ary2, length_offset)); | |
5954 jcc(Assembler::notEqual, FALSE_LABEL); | |
5955 } | |
5956 | |
5957 // count == 0 | |
5958 testl(limit, limit); | |
5959 jcc(Assembler::zero, TRUE_LABEL); | |
5960 | |
5961 if (is_array_equ) { | |
5962 // Load array address | |
5963 lea(ary1, Address(ary1, base_offset)); | |
5964 lea(ary2, Address(ary2, base_offset)); | |
5965 } | |
5966 | |
5967 shll(limit, 1); // byte count != 0 | |
5968 movl(result, limit); // copy | |
5969 | |
7477
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5970 if (UseAVX >= 2) { |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5971 // With AVX2, use 32-byte vector compare |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5972 Label COMPARE_WIDE_VECTORS, COMPARE_TAIL; |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5973 |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5974 // Compare 32-byte vectors |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5975 andl(result, 0x0000001e); // tail count (in bytes) |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5976 andl(limit, 0xffffffe0); // vector count (in bytes) |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5977 jccb(Assembler::zero, COMPARE_TAIL); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5978 |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5979 lea(ary1, Address(ary1, limit, Address::times_1)); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5980 lea(ary2, Address(ary2, limit, Address::times_1)); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5981 negptr(limit); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5982 |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5983 bind(COMPARE_WIDE_VECTORS); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5984 vmovdqu(vec1, Address(ary1, limit, Address::times_1)); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5985 vmovdqu(vec2, Address(ary2, limit, Address::times_1)); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5986 vpxor(vec1, vec2); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5987 |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5988 vptest(vec1, vec1); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5989 jccb(Assembler::notZero, FALSE_LABEL); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5990 addptr(limit, 32); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5991 jcc(Assembler::notZero, COMPARE_WIDE_VECTORS); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5992 |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5993 testl(result, result); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5994 jccb(Assembler::zero, TRUE_LABEL); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5995 |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5996 vmovdqu(vec1, Address(ary1, result, Address::times_1, -32)); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5997 vmovdqu(vec2, Address(ary2, result, Address::times_1, -32)); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5998 vpxor(vec1, vec2); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
5999 |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6000 vptest(vec1, vec1); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6001 jccb(Assembler::notZero, FALSE_LABEL); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6002 jmpb(TRUE_LABEL); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6003 |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6004 bind(COMPARE_TAIL); // limit is zero |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6005 movl(limit, result); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6006 // Fallthru to tail compare |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6007 } else if (UseSSE42Intrinsics) { |
7199 | 6008 // With SSE4.2, use double quad vector compare |
6009 Label COMPARE_WIDE_VECTORS, COMPARE_TAIL; | |
6010 | |
6011 // Compare 16-byte vectors | |
6012 andl(result, 0x0000000e); // tail count (in bytes) | |
6013 andl(limit, 0xfffffff0); // vector count (in bytes) | |
6014 jccb(Assembler::zero, COMPARE_TAIL); | |
6015 | |
6016 lea(ary1, Address(ary1, limit, Address::times_1)); | |
6017 lea(ary2, Address(ary2, limit, Address::times_1)); | |
6018 negptr(limit); | |
6019 | |
6020 bind(COMPARE_WIDE_VECTORS); | |
6021 movdqu(vec1, Address(ary1, limit, Address::times_1)); | |
6022 movdqu(vec2, Address(ary2, limit, Address::times_1)); | |
6023 pxor(vec1, vec2); | |
6024 | |
6025 ptest(vec1, vec1); | |
6026 jccb(Assembler::notZero, FALSE_LABEL); | |
6027 addptr(limit, 16); | |
6028 jcc(Assembler::notZero, COMPARE_WIDE_VECTORS); | |
6029 | |
6030 testl(result, result); | |
6031 jccb(Assembler::zero, TRUE_LABEL); | |
6032 | |
6033 movdqu(vec1, Address(ary1, result, Address::times_1, -16)); | |
6034 movdqu(vec2, Address(ary2, result, Address::times_1, -16)); | |
6035 pxor(vec1, vec2); | |
6036 | |
6037 ptest(vec1, vec1); | |
6038 jccb(Assembler::notZero, FALSE_LABEL); | |
6039 jmpb(TRUE_LABEL); | |
6040 | |
6041 bind(COMPARE_TAIL); // limit is zero | |
6042 movl(limit, result); | |
6043 // Fallthru to tail compare | |
6044 } | |
6045 | |
6046 // Compare 4-byte vectors | |
6047 andl(limit, 0xfffffffc); // vector count (in bytes) | |
6048 jccb(Assembler::zero, COMPARE_CHAR); | |
6049 | |
6050 lea(ary1, Address(ary1, limit, Address::times_1)); | |
6051 lea(ary2, Address(ary2, limit, Address::times_1)); | |
6052 negptr(limit); | |
6053 | |
6054 bind(COMPARE_VECTORS); | |
6055 movl(chr, Address(ary1, limit, Address::times_1)); | |
6056 cmpl(chr, Address(ary2, limit, Address::times_1)); | |
6057 jccb(Assembler::notEqual, FALSE_LABEL); | |
6058 addptr(limit, 4); | |
6059 jcc(Assembler::notZero, COMPARE_VECTORS); | |
6060 | |
6061 // Compare trailing char (final 2 bytes), if any | |
6062 bind(COMPARE_CHAR); | |
6063 testl(result, 0x2); // tail char | |
6064 jccb(Assembler::zero, TRUE_LABEL); | |
6065 load_unsigned_short(chr, Address(ary1, 0)); | |
6066 load_unsigned_short(limit, Address(ary2, 0)); | |
6067 cmpl(chr, limit); | |
6068 jccb(Assembler::notEqual, FALSE_LABEL); | |
6069 | |
6070 bind(TRUE_LABEL); | |
6071 movl(result, 1); // return true | |
6072 jmpb(DONE); | |
6073 | |
6074 bind(FALSE_LABEL); | |
6075 xorl(result, result); // return false | |
6076 | |
6077 // That's it | |
6078 bind(DONE); | |
8873
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
6079 if (UseAVX >= 2) { |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
6080 // clean upper bits of YMM registers |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
6081 vzeroupper(); |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
6082 } |
7199 | 6083 } |
6084 | |
6085 void MacroAssembler::generate_fill(BasicType t, bool aligned, | |
6086 Register to, Register value, Register count, | |
6087 Register rtmp, XMMRegister xtmp) { | |
6088 ShortBranchVerifier sbv(this); | |
6089 assert_different_registers(to, value, count, rtmp); | |
6090 Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte; | |
6091 Label L_fill_2_bytes, L_fill_4_bytes; | |
6092 | |
6093 int shift = -1; | |
6094 switch (t) { | |
6095 case T_BYTE: | |
6096 shift = 2; | |
6097 break; | |
6098 case T_SHORT: | |
6099 shift = 1; | |
6100 break; | |
6101 case T_INT: | |
6102 shift = 0; | |
6103 break; | |
6104 default: ShouldNotReachHere(); | |
6105 } | |
6106 | |
6107 if (t == T_BYTE) { | |
6108 andl(value, 0xff); | |
6109 movl(rtmp, value); | |
6110 shll(rtmp, 8); | |
6111 orl(value, rtmp); | |
6112 } | |
6113 if (t == T_SHORT) { | |
6114 andl(value, 0xffff); | |
6115 } | |
6116 if (t == T_BYTE || t == T_SHORT) { | |
6117 movl(rtmp, value); | |
6118 shll(rtmp, 16); | |
6119 orl(value, rtmp); | |
6120 } | |
6121 | |
6122 cmpl(count, 2<<shift); // Short arrays (< 8 bytes) fill by element | |
6123 jcc(Assembler::below, L_fill_4_bytes); // use unsigned cmp | |
6124 if (!UseUnalignedLoadStores && !aligned && (t == T_BYTE || t == T_SHORT)) { | |
6125 // align source address at 4 bytes address boundary | |
6126 if (t == T_BYTE) { | |
6127 // One byte misalignment happens only for byte arrays | |
6128 testptr(to, 1); | |
6129 jccb(Assembler::zero, L_skip_align1); | |
6130 movb(Address(to, 0), value); | |
6131 increment(to); | |
6132 decrement(count); | |
6133 BIND(L_skip_align1); | |
6134 } | |
6135 // Two bytes misalignment happens only for byte and short (char) arrays | |
6136 testptr(to, 2); | |
6137 jccb(Assembler::zero, L_skip_align2); | |
6138 movw(Address(to, 0), value); | |
6139 addptr(to, 2); | |
6140 subl(count, 1<<(shift-1)); | |
6141 BIND(L_skip_align2); | |
6142 } | |
6143 if (UseSSE < 2) { | |
6144 Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes; | |
6145 // Fill 32-byte chunks | |
6146 subl(count, 8 << shift); | |
6147 jcc(Assembler::less, L_check_fill_8_bytes); | |
6148 align(16); | |
6149 | |
6150 BIND(L_fill_32_bytes_loop); | |
6151 | |
6152 for (int i = 0; i < 32; i += 4) { | |
6153 movl(Address(to, i), value); | |
6154 } | |
6155 | |
6156 addptr(to, 32); | |
6157 subl(count, 8 << shift); | |
6158 jcc(Assembler::greaterEqual, L_fill_32_bytes_loop); | |
6159 BIND(L_check_fill_8_bytes); | |
6160 addl(count, 8 << shift); | |
6161 jccb(Assembler::zero, L_exit); | |
6162 jmpb(L_fill_8_bytes); | |
6163 | |
6164 // | |
6165 // length is too short, just fill qwords | |
6166 // | |
6167 BIND(L_fill_8_bytes_loop); | |
6168 movl(Address(to, 0), value); | |
6169 movl(Address(to, 4), value); | |
6170 addptr(to, 8); | |
6171 BIND(L_fill_8_bytes); | |
6172 subl(count, 1 << (shift + 1)); | |
6173 jcc(Assembler::greaterEqual, L_fill_8_bytes_loop); | |
6174 // fall through to fill 4 bytes | |
6175 } else { | |
6176 Label L_fill_32_bytes; | |
6177 if (!UseUnalignedLoadStores) { | |
6178 // align to 8 bytes, we know we are 4 byte aligned to start | |
6179 testptr(to, 4); | |
6180 jccb(Assembler::zero, L_fill_32_bytes); | |
6181 movl(Address(to, 0), value); | |
6182 addptr(to, 4); | |
6183 subl(count, 1<<shift); | |
6184 } | |
6185 BIND(L_fill_32_bytes); | |
6186 { | |
6187 assert( UseSSE >= 2, "supported cpu only" ); | |
6188 Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes; | |
6189 movdl(xtmp, value); | |
7475
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
6190 if (UseAVX >= 2 && UseUnalignedLoadStores) { |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
6191 // Fill 64-byte chunks |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
6192 Label L_fill_64_bytes_loop, L_check_fill_32_bytes; |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
6193 vpbroadcastd(xtmp, xtmp); |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
6194 |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
6195 subl(count, 16 << shift); |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
6196 jcc(Assembler::less, L_check_fill_32_bytes); |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
6197 align(16); |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
6198 |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
6199 BIND(L_fill_64_bytes_loop); |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
6200 vmovdqu(Address(to, 0), xtmp); |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
6201 vmovdqu(Address(to, 32), xtmp); |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
6202 addptr(to, 64); |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
6203 subl(count, 16 << shift); |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
6204 jcc(Assembler::greaterEqual, L_fill_64_bytes_loop); |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
6205 |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
6206 BIND(L_check_fill_32_bytes); |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
6207 addl(count, 8 << shift); |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
6208 jccb(Assembler::less, L_check_fill_8_bytes); |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
6209 vmovdqu(Address(to, 0), xtmp); |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
6210 addptr(to, 32); |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
6211 subl(count, 8 << shift); |
8873
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
6212 |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
6213 BIND(L_check_fill_8_bytes); |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
6214 // clean upper bits of YMM registers |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
6215 vzeroupper(); |
7199 | 6216 } else { |
7475
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
6217 // Fill 32-byte chunks |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
6218 pshufd(xtmp, xtmp, 0); |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
6219 |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
6220 subl(count, 8 << shift); |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
6221 jcc(Assembler::less, L_check_fill_8_bytes); |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
6222 align(16); |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
6223 |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
6224 BIND(L_fill_32_bytes_loop); |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
6225 |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
6226 if (UseUnalignedLoadStores) { |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
6227 movdqu(Address(to, 0), xtmp); |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
6228 movdqu(Address(to, 16), xtmp); |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
6229 } else { |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
6230 movq(Address(to, 0), xtmp); |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
6231 movq(Address(to, 8), xtmp); |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
6232 movq(Address(to, 16), xtmp); |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
6233 movq(Address(to, 24), xtmp); |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
6234 } |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
6235 |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
6236 addptr(to, 32); |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
6237 subl(count, 8 << shift); |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
6238 jcc(Assembler::greaterEqual, L_fill_32_bytes_loop); |
8873
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
6239 |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
6240 BIND(L_check_fill_8_bytes); |
7199 | 6241 } |
6242 addl(count, 8 << shift); | |
6243 jccb(Assembler::zero, L_exit); | |
6244 jmpb(L_fill_8_bytes); | |
6245 | |
6246 // | |
6247 // length is too short, just fill qwords | |
6248 // | |
6249 BIND(L_fill_8_bytes_loop); | |
6250 movq(Address(to, 0), xtmp); | |
6251 addptr(to, 8); | |
6252 BIND(L_fill_8_bytes); | |
6253 subl(count, 1 << (shift + 1)); | |
6254 jcc(Assembler::greaterEqual, L_fill_8_bytes_loop); | |
6255 } | |
6256 } | |
6257 // fill trailing 4 bytes | |
6258 BIND(L_fill_4_bytes); | |
6259 testl(count, 1<<shift); | |
6260 jccb(Assembler::zero, L_fill_2_bytes); | |
6261 movl(Address(to, 0), value); | |
6262 if (t == T_BYTE || t == T_SHORT) { | |
6263 addptr(to, 4); | |
6264 BIND(L_fill_2_bytes); | |
6265 // fill trailing 2 bytes | |
6266 testl(count, 1<<(shift-1)); | |
6267 jccb(Assembler::zero, L_fill_byte); | |
6268 movw(Address(to, 0), value); | |
6269 if (t == T_BYTE) { | |
6270 addptr(to, 2); | |
6271 BIND(L_fill_byte); | |
6272 // fill trailing byte | |
6273 testl(count, 1); | |
6274 jccb(Assembler::zero, L_exit); | |
6275 movb(Address(to, 0), value); | |
6276 } else { | |
6277 BIND(L_fill_byte); | |
6278 } | |
6279 } else { | |
6280 BIND(L_fill_2_bytes); | |
6281 } | |
6282 BIND(L_exit); | |
6283 } | |
7637
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6284 |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6285 // encode char[] to byte[] in ISO_8859_1 |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6286 void MacroAssembler::encode_iso_array(Register src, Register dst, Register len, |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6287 XMMRegister tmp1Reg, XMMRegister tmp2Reg, |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6288 XMMRegister tmp3Reg, XMMRegister tmp4Reg, |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6289 Register tmp5, Register result) { |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6290 // rsi: src |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6291 // rdi: dst |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6292 // rdx: len |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6293 // rcx: tmp5 |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6294 // rax: result |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6295 ShortBranchVerifier sbv(this); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6296 assert_different_registers(src, dst, len, tmp5, result); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6297 Label L_done, L_copy_1_char, L_copy_1_char_exit; |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6298 |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6299 // set result |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6300 xorl(result, result); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6301 // check for zero length |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6302 testl(len, len); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6303 jcc(Assembler::zero, L_done); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6304 movl(result, len); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6305 |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6306 // Setup pointers |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6307 lea(src, Address(src, len, Address::times_2)); // char[] |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6308 lea(dst, Address(dst, len, Address::times_1)); // byte[] |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6309 negptr(len); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6310 |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6311 if (UseSSE42Intrinsics || UseAVX >= 2) { |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6312 Label L_chars_8_check, L_copy_8_chars, L_copy_8_chars_exit; |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6313 Label L_chars_16_check, L_copy_16_chars, L_copy_16_chars_exit; |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6314 |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6315 if (UseAVX >= 2) { |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6316 Label L_chars_32_check, L_copy_32_chars, L_copy_32_chars_exit; |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6317 movl(tmp5, 0xff00ff00); // create mask to test for Unicode chars in vector |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6318 movdl(tmp1Reg, tmp5); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6319 vpbroadcastd(tmp1Reg, tmp1Reg); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6320 jmpb(L_chars_32_check); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6321 |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6322 bind(L_copy_32_chars); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6323 vmovdqu(tmp3Reg, Address(src, len, Address::times_2, -64)); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6324 vmovdqu(tmp4Reg, Address(src, len, Address::times_2, -32)); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6325 vpor(tmp2Reg, tmp3Reg, tmp4Reg, /* vector256 */ true); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6326 vptest(tmp2Reg, tmp1Reg); // check for Unicode chars in vector |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6327 jccb(Assembler::notZero, L_copy_32_chars_exit); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6328 vpackuswb(tmp3Reg, tmp3Reg, tmp4Reg, /* vector256 */ true); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6329 vpermq(tmp4Reg, tmp3Reg, 0xD8, /* vector256 */ true); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6330 vmovdqu(Address(dst, len, Address::times_1, -32), tmp4Reg); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6331 |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6332 bind(L_chars_32_check); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6333 addptr(len, 32); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6334 jccb(Assembler::lessEqual, L_copy_32_chars); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6335 |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6336 bind(L_copy_32_chars_exit); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6337 subptr(len, 16); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6338 jccb(Assembler::greater, L_copy_16_chars_exit); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6339 |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6340 } else if (UseSSE42Intrinsics) { |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6341 movl(tmp5, 0xff00ff00); // create mask to test for Unicode chars in vector |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6342 movdl(tmp1Reg, tmp5); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6343 pshufd(tmp1Reg, tmp1Reg, 0); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6344 jmpb(L_chars_16_check); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6345 } |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6346 |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6347 bind(L_copy_16_chars); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6348 if (UseAVX >= 2) { |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6349 vmovdqu(tmp2Reg, Address(src, len, Address::times_2, -32)); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6350 vptest(tmp2Reg, tmp1Reg); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6351 jccb(Assembler::notZero, L_copy_16_chars_exit); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6352 vpackuswb(tmp2Reg, tmp2Reg, tmp1Reg, /* vector256 */ true); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6353 vpermq(tmp3Reg, tmp2Reg, 0xD8, /* vector256 */ true); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6354 } else { |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6355 if (UseAVX > 0) { |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6356 movdqu(tmp3Reg, Address(src, len, Address::times_2, -32)); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6357 movdqu(tmp4Reg, Address(src, len, Address::times_2, -16)); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6358 vpor(tmp2Reg, tmp3Reg, tmp4Reg, /* vector256 */ false); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6359 } else { |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6360 movdqu(tmp3Reg, Address(src, len, Address::times_2, -32)); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6361 por(tmp2Reg, tmp3Reg); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6362 movdqu(tmp4Reg, Address(src, len, Address::times_2, -16)); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6363 por(tmp2Reg, tmp4Reg); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6364 } |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6365 ptest(tmp2Reg, tmp1Reg); // check for Unicode chars in vector |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6366 jccb(Assembler::notZero, L_copy_16_chars_exit); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6367 packuswb(tmp3Reg, tmp4Reg); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6368 } |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6369 movdqu(Address(dst, len, Address::times_1, -16), tmp3Reg); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6370 |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6371 bind(L_chars_16_check); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6372 addptr(len, 16); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6373 jccb(Assembler::lessEqual, L_copy_16_chars); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6374 |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6375 bind(L_copy_16_chars_exit); |
8873
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
6376 if (UseAVX >= 2) { |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
6377 // clean upper bits of YMM registers |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
6378 vzeroupper(); |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
6379 } |
7637
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6380 subptr(len, 8); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6381 jccb(Assembler::greater, L_copy_8_chars_exit); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6382 |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6383 bind(L_copy_8_chars); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6384 movdqu(tmp3Reg, Address(src, len, Address::times_2, -16)); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6385 ptest(tmp3Reg, tmp1Reg); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6386 jccb(Assembler::notZero, L_copy_8_chars_exit); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6387 packuswb(tmp3Reg, tmp1Reg); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6388 movq(Address(dst, len, Address::times_1, -8), tmp3Reg); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6389 addptr(len, 8); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6390 jccb(Assembler::lessEqual, L_copy_8_chars); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6391 |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6392 bind(L_copy_8_chars_exit); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6393 subptr(len, 8); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6394 jccb(Assembler::zero, L_done); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6395 } |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6396 |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6397 bind(L_copy_1_char); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6398 load_unsigned_short(tmp5, Address(src, len, Address::times_2, 0)); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6399 testl(tmp5, 0xff00); // check if Unicode char |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6400 jccb(Assembler::notZero, L_copy_1_char_exit); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6401 movb(Address(dst, len, Address::times_1, 0), tmp5); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6402 addptr(len, 1); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6403 jccb(Assembler::less, L_copy_1_char); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6404 |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6405 bind(L_copy_1_char_exit); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6406 addptr(result, len); // len is negative count of not processed elements |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6407 bind(L_done); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6408 } |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
6409 |
11080
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6410 /** |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6411 * Emits code to update CRC-32 with a byte value according to constants in table |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6412 * |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6413 * @param [in,out]crc Register containing the crc. |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6414 * @param [in]val Register containing the byte to fold into the CRC. |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6415 * @param [in]table Register containing the table of crc constants. |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6416 * |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6417 * uint32_t crc; |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6418 * val = crc_table[(val ^ crc) & 0xFF]; |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6419 * crc = val ^ (crc >> 8); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6420 * |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6421 */ |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6422 void MacroAssembler::update_byte_crc32(Register crc, Register val, Register table) { |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6423 xorl(val, crc); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6424 andl(val, 0xFF); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6425 shrl(crc, 8); // unsigned shift |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6426 xorl(crc, Address(table, val, Address::times_4, 0)); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6427 } |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6428 |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6429 /** |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6430 * Fold 128-bit data chunk |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6431 */ |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6432 void MacroAssembler::fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, Register buf, int offset) { |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6433 vpclmulhdq(xtmp, xK, xcrc); // [123:64] |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6434 vpclmulldq(xcrc, xK, xcrc); // [63:0] |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6435 vpxor(xcrc, xcrc, Address(buf, offset), false /* vector256 */); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6436 pxor(xcrc, xtmp); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6437 } |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6438 |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6439 void MacroAssembler::fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, XMMRegister xbuf) { |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6440 vpclmulhdq(xtmp, xK, xcrc); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6441 vpclmulldq(xcrc, xK, xcrc); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6442 pxor(xcrc, xbuf); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6443 pxor(xcrc, xtmp); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6444 } |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6445 |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6446 /** |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6447 * 8-bit folds to compute 32-bit CRC |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6448 * |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6449 * uint64_t xcrc; |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6450 * timesXtoThe32[xcrc & 0xFF] ^ (xcrc >> 8); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6451 */ |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6452 void MacroAssembler::fold_8bit_crc32(XMMRegister xcrc, Register table, XMMRegister xtmp, Register tmp) { |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6453 movdl(tmp, xcrc); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6454 andl(tmp, 0xFF); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6455 movdl(xtmp, Address(table, tmp, Address::times_4, 0)); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6456 psrldq(xcrc, 1); // unsigned shift one byte |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6457 pxor(xcrc, xtmp); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6458 } |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6459 |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6460 /** |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6461 * uint32_t crc; |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6462 * timesXtoThe32[crc & 0xFF] ^ (crc >> 8); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6463 */ |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6464 void MacroAssembler::fold_8bit_crc32(Register crc, Register table, Register tmp) { |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6465 movl(tmp, crc); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6466 andl(tmp, 0xFF); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6467 shrl(crc, 8); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6468 xorl(crc, Address(table, tmp, Address::times_4, 0)); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6469 } |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6470 |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6471 /** |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6472 * @param crc register containing existing CRC (32-bit) |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6473 * @param buf register pointing to input byte buffer (byte*) |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6474 * @param len register containing number of bytes |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6475 * @param table register that will contain address of CRC table |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6476 * @param tmp scratch register |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6477 */ |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6478 void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len, Register table, Register tmp) { |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6479 assert_different_registers(crc, buf, len, table, tmp, rax); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6480 |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6481 Label L_tail, L_tail_restore, L_tail_loop, L_exit, L_align_loop, L_aligned; |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6482 Label L_fold_tail, L_fold_128b, L_fold_512b, L_fold_512b_loop, L_fold_tail_loop; |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6483 |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6484 lea(table, ExternalAddress(StubRoutines::crc_table_addr())); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6485 notl(crc); // ~crc |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6486 cmpl(len, 16); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6487 jcc(Assembler::less, L_tail); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6488 |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6489 // Align buffer to 16 bytes |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6490 movl(tmp, buf); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6491 andl(tmp, 0xF); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6492 jccb(Assembler::zero, L_aligned); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6493 subl(tmp, 16); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6494 addl(len, tmp); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6495 |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6496 align(4); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6497 BIND(L_align_loop); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6498 movsbl(rax, Address(buf, 0)); // load byte with sign extension |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6499 update_byte_crc32(crc, rax, table); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6500 increment(buf); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6501 incrementl(tmp); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6502 jccb(Assembler::less, L_align_loop); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6503 |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6504 BIND(L_aligned); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6505 movl(tmp, len); // save |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6506 shrl(len, 4); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6507 jcc(Assembler::zero, L_tail_restore); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6508 |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6509 // Fold crc into first bytes of vector |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6510 movdqa(xmm1, Address(buf, 0)); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6511 movdl(rax, xmm1); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6512 xorl(crc, rax); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6513 pinsrd(xmm1, crc, 0); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6514 addptr(buf, 16); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6515 subl(len, 4); // len > 0 |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6516 jcc(Assembler::less, L_fold_tail); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6517 |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6518 movdqa(xmm2, Address(buf, 0)); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6519 movdqa(xmm3, Address(buf, 16)); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6520 movdqa(xmm4, Address(buf, 32)); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6521 addptr(buf, 48); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6522 subl(len, 3); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6523 jcc(Assembler::lessEqual, L_fold_512b); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6524 |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6525 // Fold total 512 bits of polynomial on each iteration, |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6526 // 128 bits per each of 4 parallel streams. |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6527 movdqu(xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_addr() + 32)); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6528 |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6529 align(32); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6530 BIND(L_fold_512b_loop); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6531 fold_128bit_crc32(xmm1, xmm0, xmm5, buf, 0); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6532 fold_128bit_crc32(xmm2, xmm0, xmm5, buf, 16); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6533 fold_128bit_crc32(xmm3, xmm0, xmm5, buf, 32); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6534 fold_128bit_crc32(xmm4, xmm0, xmm5, buf, 48); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6535 addptr(buf, 64); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6536 subl(len, 4); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6537 jcc(Assembler::greater, L_fold_512b_loop); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6538 |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6539 // Fold 512 bits to 128 bits. |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6540 BIND(L_fold_512b); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6541 movdqu(xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_addr() + 16)); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6542 fold_128bit_crc32(xmm1, xmm0, xmm5, xmm2); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6543 fold_128bit_crc32(xmm1, xmm0, xmm5, xmm3); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6544 fold_128bit_crc32(xmm1, xmm0, xmm5, xmm4); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6545 |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6546 // Fold the rest of 128 bits data chunks |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6547 BIND(L_fold_tail); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6548 addl(len, 3); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6549 jccb(Assembler::lessEqual, L_fold_128b); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6550 movdqu(xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_addr() + 16)); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6551 |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6552 BIND(L_fold_tail_loop); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6553 fold_128bit_crc32(xmm1, xmm0, xmm5, buf, 0); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6554 addptr(buf, 16); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6555 decrementl(len); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6556 jccb(Assembler::greater, L_fold_tail_loop); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6557 |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6558 // Fold 128 bits in xmm1 down into 32 bits in crc register. |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6559 BIND(L_fold_128b); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6560 movdqu(xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_addr())); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6561 vpclmulqdq(xmm2, xmm0, xmm1, 0x1); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6562 vpand(xmm3, xmm0, xmm2, false /* vector256 */); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6563 vpclmulqdq(xmm0, xmm0, xmm3, 0x1); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6564 psrldq(xmm1, 8); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6565 psrldq(xmm2, 4); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6566 pxor(xmm0, xmm1); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6567 pxor(xmm0, xmm2); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6568 |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6569 // 8 8-bit folds to compute 32-bit CRC. |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6570 for (int j = 0; j < 4; j++) { |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6571 fold_8bit_crc32(xmm0, table, xmm1, rax); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6572 } |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6573 movdl(crc, xmm0); // mov 32 bits to general register |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6574 for (int j = 0; j < 4; j++) { |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6575 fold_8bit_crc32(crc, table, rax); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6576 } |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6577 |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6578 BIND(L_tail_restore); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6579 movl(len, tmp); // restore |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6580 BIND(L_tail); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6581 andl(len, 0xf); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6582 jccb(Assembler::zero, L_exit); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6583 |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6584 // Fold the rest of bytes |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6585 align(4); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6586 BIND(L_tail_loop); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6587 movsbl(rax, Address(buf, 0)); // load byte with sign extension |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6588 update_byte_crc32(crc, rax, table); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6589 increment(buf); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6590 decrementl(len); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6591 jccb(Assembler::greater, L_tail_loop); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6592 |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6593 BIND(L_exit); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6594 notl(crc); // ~c |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6595 } |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
6596 |
7199 | 6597 #undef BIND |
6598 #undef BLOCK_COMMENT | |
6599 | |
6600 | |
6601 Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) { | |
6602 switch (cond) { | |
6603 // Note some conditions are synonyms for others | |
6604 case Assembler::zero: return Assembler::notZero; | |
6605 case Assembler::notZero: return Assembler::zero; | |
6606 case Assembler::less: return Assembler::greaterEqual; | |
6607 case Assembler::lessEqual: return Assembler::greater; | |
6608 case Assembler::greater: return Assembler::lessEqual; | |
6609 case Assembler::greaterEqual: return Assembler::less; | |
6610 case Assembler::below: return Assembler::aboveEqual; | |
6611 case Assembler::belowEqual: return Assembler::above; | |
6612 case Assembler::above: return Assembler::belowEqual; | |
6613 case Assembler::aboveEqual: return Assembler::below; | |
6614 case Assembler::overflow: return Assembler::noOverflow; | |
6615 case Assembler::noOverflow: return Assembler::overflow; | |
6616 case Assembler::negative: return Assembler::positive; | |
6617 case Assembler::positive: return Assembler::negative; | |
6618 case Assembler::parity: return Assembler::noParity; | |
6619 case Assembler::noParity: return Assembler::parity; | |
6620 } | |
6621 ShouldNotReachHere(); return Assembler::overflow; | |
6622 } | |
6623 | |
6624 SkipIfEqual::SkipIfEqual( | |
6625 MacroAssembler* masm, const bool* flag_addr, bool value) { | |
6626 _masm = masm; | |
6627 _masm->cmp8(ExternalAddress((address)flag_addr), value); | |
6628 _masm->jcc(Assembler::equal, _label); | |
6629 } | |
6630 | |
6631 SkipIfEqual::~SkipIfEqual() { | |
6632 _masm->bind(_label); | |
6633 } |