Mercurial > hg > truffle
annotate src/cpu/x86/vm/macroAssembler_x86.cpp @ 20304:a22acf6d7598
8048112: G1 Full GC needs to support the case when the very first region is not available
Summary: Refactor preparation for compaction during Full GC so that it lazily initializes the first compaction point. This also avoids problems later when the first region may not be committed. Also reviewed by K. Barrett.
Reviewed-by: brutisso
author | tschatzl |
---|---|
date | Mon, 21 Jul 2014 10:00:31 +0200 |
parents | 0bf37f737702 |
children | 52b4284cb496 b1bc1af04c6e |
rev | line source |
---|---|
7199 | 1 /* |
17937
78bbf4d43a14
8037816: Fix for 8036122 breaks build with Xcode5/clang
drchase
parents:
17922
diff
changeset
|
2 * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. |
7199 | 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 * | |
5 * This code is free software; you can redistribute it and/or modify it | |
6 * under the terms of the GNU General Public License version 2 only, as | |
7 * published by the Free Software Foundation. | |
8 * | |
9 * This code is distributed in the hope that it will be useful, but WITHOUT | |
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
12 * version 2 for more details (a copy is included in the LICENSE file that | |
13 * accompanied this code). | |
14 * | |
15 * You should have received a copy of the GNU General Public License version | |
16 * 2 along with this work; if not, write to the Free Software Foundation, | |
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | |
18 * | |
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA | |
20 * or visit www.oracle.com if you need additional information or have any | |
21 * questions. | |
22 * | |
23 */ | |
24 | |
25 #include "precompiled.hpp" | |
26 #include "asm/assembler.hpp" | |
27 #include "asm/assembler.inline.hpp" | |
28 #include "compiler/disassembler.hpp" | |
29 #include "gc_interface/collectedHeap.inline.hpp" | |
30 #include "interpreter/interpreter.hpp" | |
31 #include "memory/cardTableModRefBS.hpp" | |
32 #include "memory/resourceArea.hpp" | |
12056
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
33 #include "memory/universe.hpp" |
7199 | 34 #include "prims/methodHandles.hpp" |
35 #include "runtime/biasedLocking.hpp" | |
36 #include "runtime/interfaceSupport.hpp" | |
37 #include "runtime/objectMonitor.hpp" | |
38 #include "runtime/os.hpp" | |
39 #include "runtime/sharedRuntime.hpp" | |
40 #include "runtime/stubRoutines.hpp" | |
8001
db9981fd3124
8005915: Unify SERIALGC and INCLUDE_ALTERNATE_GCS
jprovino
parents:
7477
diff
changeset
|
41 #include "utilities/macros.hpp" |
db9981fd3124
8005915: Unify SERIALGC and INCLUDE_ALTERNATE_GCS
jprovino
parents:
7477
diff
changeset
|
42 #if INCLUDE_ALL_GCS |
7199 | 43 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" |
44 #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp" | |
45 #include "gc_implementation/g1/heapRegion.hpp" | |
8001
db9981fd3124
8005915: Unify SERIALGC and INCLUDE_ALTERNATE_GCS
jprovino
parents:
7477
diff
changeset
|
46 #endif // INCLUDE_ALL_GCS |
7199 | 47 |
48 #ifdef PRODUCT | |
49 #define BLOCK_COMMENT(str) /* nothing */ | |
50 #define STOP(error) stop(error) | |
51 #else | |
52 #define BLOCK_COMMENT(str) block_comment(str) | |
53 #define STOP(error) block_comment(error); stop(error) | |
54 #endif | |
55 | |
56 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":") | |
57 | |
17937
78bbf4d43a14
8037816: Fix for 8036122 breaks build with Xcode5/clang
drchase
parents:
17922
diff
changeset
|
58 PRAGMA_FORMAT_MUTE_WARNINGS_FOR_GCC |
7199 | 59 |
7204
f0c2369fda5a
8003250: SPARC: move MacroAssembler into separate file
twisti
parents:
7199
diff
changeset
|
60 #ifdef ASSERT |
f0c2369fda5a
8003250: SPARC: move MacroAssembler into separate file
twisti
parents:
7199
diff
changeset
|
61 bool AbstractAssembler::pd_check_instruction_mark() { return true; } |
f0c2369fda5a
8003250: SPARC: move MacroAssembler into separate file
twisti
parents:
7199
diff
changeset
|
62 #endif |
f0c2369fda5a
8003250: SPARC: move MacroAssembler into separate file
twisti
parents:
7199
diff
changeset
|
63 |
7199 | 64 static Assembler::Condition reverse[] = { |
65 Assembler::noOverflow /* overflow = 0x0 */ , | |
66 Assembler::overflow /* noOverflow = 0x1 */ , | |
67 Assembler::aboveEqual /* carrySet = 0x2, below = 0x2 */ , | |
68 Assembler::below /* aboveEqual = 0x3, carryClear = 0x3 */ , | |
69 Assembler::notZero /* zero = 0x4, equal = 0x4 */ , | |
70 Assembler::zero /* notZero = 0x5, notEqual = 0x5 */ , | |
71 Assembler::above /* belowEqual = 0x6 */ , | |
72 Assembler::belowEqual /* above = 0x7 */ , | |
73 Assembler::positive /* negative = 0x8 */ , | |
74 Assembler::negative /* positive = 0x9 */ , | |
75 Assembler::noParity /* parity = 0xa */ , | |
76 Assembler::parity /* noParity = 0xb */ , | |
77 Assembler::greaterEqual /* less = 0xc */ , | |
78 Assembler::less /* greaterEqual = 0xd */ , | |
79 Assembler::greater /* lessEqual = 0xe */ , | |
80 Assembler::lessEqual /* greater = 0xf, */ | |
81 | |
82 }; | |
83 | |
84 | |
85 // Implementation of MacroAssembler | |
86 | |
87 // First all the versions that have distinct versions depending on 32/64 bit | |
88 // Unless the difference is trivial (1 line or so). | |
89 | |
90 #ifndef _LP64 | |
91 | |
92 // 32bit versions | |
93 | |
94 Address MacroAssembler::as_Address(AddressLiteral adr) { | |
95 return Address(adr.target(), adr.rspec()); | |
96 } | |
97 | |
98 Address MacroAssembler::as_Address(ArrayAddress adr) { | |
99 return Address::make_array(adr); | |
100 } | |
101 | |
102 void MacroAssembler::call_VM_leaf_base(address entry_point, | |
103 int number_of_arguments) { | |
104 call(RuntimeAddress(entry_point)); | |
105 increment(rsp, number_of_arguments * wordSize); | |
106 } | |
107 | |
108 void MacroAssembler::cmpklass(Address src1, Metadata* obj) { | |
109 cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate()); | |
110 } | |
111 | |
112 void MacroAssembler::cmpklass(Register src1, Metadata* obj) { | |
113 cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate()); | |
114 } | |
115 | |
116 void MacroAssembler::cmpoop(Address src1, jobject obj) { | |
117 cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate()); | |
118 } | |
119 | |
120 void MacroAssembler::cmpoop(Register src1, jobject obj) { | |
121 cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate()); | |
122 } | |
123 | |
124 void MacroAssembler::extend_sign(Register hi, Register lo) { | |
125 // According to Intel Doc. AP-526, "Integer Divide", p.18. | |
126 if (VM_Version::is_P6() && hi == rdx && lo == rax) { | |
127 cdql(); | |
128 } else { | |
129 movl(hi, lo); | |
130 sarl(hi, 31); | |
131 } | |
132 } | |
133 | |
134 void MacroAssembler::jC2(Register tmp, Label& L) { | |
135 // set parity bit if FPU flag C2 is set (via rax) | |
136 save_rax(tmp); | |
137 fwait(); fnstsw_ax(); | |
138 sahf(); | |
139 restore_rax(tmp); | |
140 // branch | |
141 jcc(Assembler::parity, L); | |
142 } | |
143 | |
144 void MacroAssembler::jnC2(Register tmp, Label& L) { | |
145 // set parity bit if FPU flag C2 is set (via rax) | |
146 save_rax(tmp); | |
147 fwait(); fnstsw_ax(); | |
148 sahf(); | |
149 restore_rax(tmp); | |
150 // branch | |
151 jcc(Assembler::noParity, L); | |
152 } | |
153 | |
154 // 32bit can do a case table jump in one instruction but we no longer allow the base | |
155 // to be installed in the Address class | |
156 void MacroAssembler::jump(ArrayAddress entry) { | |
157 jmp(as_Address(entry)); | |
158 } | |
159 | |
160 // Note: y_lo will be destroyed | |
161 void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) { | |
162 // Long compare for Java (semantics as described in JVM spec.) | |
163 Label high, low, done; | |
164 | |
165 cmpl(x_hi, y_hi); | |
166 jcc(Assembler::less, low); | |
167 jcc(Assembler::greater, high); | |
168 // x_hi is the return register | |
169 xorl(x_hi, x_hi); | |
170 cmpl(x_lo, y_lo); | |
171 jcc(Assembler::below, low); | |
172 jcc(Assembler::equal, done); | |
173 | |
174 bind(high); | |
175 xorl(x_hi, x_hi); | |
176 increment(x_hi); | |
177 jmp(done); | |
178 | |
179 bind(low); | |
180 xorl(x_hi, x_hi); | |
181 decrementl(x_hi); | |
182 | |
183 bind(done); | |
184 } | |
185 | |
186 void MacroAssembler::lea(Register dst, AddressLiteral src) { | |
187 mov_literal32(dst, (int32_t)src.target(), src.rspec()); | |
188 } | |
189 | |
190 void MacroAssembler::lea(Address dst, AddressLiteral adr) { | |
191 // leal(dst, as_Address(adr)); | |
192 // see note in movl as to why we must use a move | |
193 mov_literal32(dst, (int32_t) adr.target(), adr.rspec()); | |
194 } | |
195 | |
196 void MacroAssembler::leave() { | |
197 mov(rsp, rbp); | |
198 pop(rbp); | |
199 } | |
200 | |
201 void MacroAssembler::lmul(int x_rsp_offset, int y_rsp_offset) { | |
202 // Multiplication of two Java long values stored on the stack | |
203 // as illustrated below. Result is in rdx:rax. | |
204 // | |
205 // rsp ---> [ ?? ] \ \ | |
206 // .... | y_rsp_offset | | |
207 // [ y_lo ] / (in bytes) | x_rsp_offset | |
208 // [ y_hi ] | (in bytes) | |
209 // .... | | |
210 // [ x_lo ] / | |
211 // [ x_hi ] | |
212 // .... | |
213 // | |
214 // Basic idea: lo(result) = lo(x_lo * y_lo) | |
215 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) | |
216 Address x_hi(rsp, x_rsp_offset + wordSize); Address x_lo(rsp, x_rsp_offset); | |
217 Address y_hi(rsp, y_rsp_offset + wordSize); Address y_lo(rsp, y_rsp_offset); | |
218 Label quick; | |
219 // load x_hi, y_hi and check if quick | |
220 // multiplication is possible | |
221 movl(rbx, x_hi); | |
222 movl(rcx, y_hi); | |
223 movl(rax, rbx); | |
224 orl(rbx, rcx); // rbx, = 0 <=> x_hi = 0 and y_hi = 0 | |
225 jcc(Assembler::zero, quick); // if rbx, = 0 do quick multiply | |
226 // do full multiplication | |
227 // 1st step | |
228 mull(y_lo); // x_hi * y_lo | |
229 movl(rbx, rax); // save lo(x_hi * y_lo) in rbx, | |
230 // 2nd step | |
231 movl(rax, x_lo); | |
232 mull(rcx); // x_lo * y_hi | |
233 addl(rbx, rax); // add lo(x_lo * y_hi) to rbx, | |
234 // 3rd step | |
235 bind(quick); // note: rbx, = 0 if quick multiply! | |
236 movl(rax, x_lo); | |
237 mull(y_lo); // x_lo * y_lo | |
238 addl(rdx, rbx); // correct hi(x_lo * y_lo) | |
239 } | |
240 | |
241 void MacroAssembler::lneg(Register hi, Register lo) { | |
242 negl(lo); | |
243 adcl(hi, 0); | |
244 negl(hi); | |
245 } | |
246 | |
247 void MacroAssembler::lshl(Register hi, Register lo) { | |
248 // Java shift left long support (semantics as described in JVM spec., p.305) | |
249 // (basic idea for shift counts s >= n: x << s == (x << n) << (s - n)) | |
250 // shift value is in rcx ! | |
251 assert(hi != rcx, "must not use rcx"); | |
252 assert(lo != rcx, "must not use rcx"); | |
253 const Register s = rcx; // shift count | |
254 const int n = BitsPerWord; | |
255 Label L; | |
256 andl(s, 0x3f); // s := s & 0x3f (s < 0x40) | |
257 cmpl(s, n); // if (s < n) | |
258 jcc(Assembler::less, L); // else (s >= n) | |
259 movl(hi, lo); // x := x << n | |
260 xorl(lo, lo); | |
261 // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n! | |
262 bind(L); // s (mod n) < n | |
263 shldl(hi, lo); // x := x << s | |
264 shll(lo); | |
265 } | |
266 | |
267 | |
268 void MacroAssembler::lshr(Register hi, Register lo, bool sign_extension) { | |
269 // Java shift right long support (semantics as described in JVM spec., p.306 & p.310) | |
270 // (basic idea for shift counts s >= n: x >> s == (x >> n) >> (s - n)) | |
271 assert(hi != rcx, "must not use rcx"); | |
272 assert(lo != rcx, "must not use rcx"); | |
273 const Register s = rcx; // shift count | |
274 const int n = BitsPerWord; | |
275 Label L; | |
276 andl(s, 0x3f); // s := s & 0x3f (s < 0x40) | |
277 cmpl(s, n); // if (s < n) | |
278 jcc(Assembler::less, L); // else (s >= n) | |
279 movl(lo, hi); // x := x >> n | |
280 if (sign_extension) sarl(hi, 31); | |
281 else xorl(hi, hi); | |
282 // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n! | |
283 bind(L); // s (mod n) < n | |
284 shrdl(lo, hi); // x := x >> s | |
285 if (sign_extension) sarl(hi); | |
286 else shrl(hi); | |
287 } | |
288 | |
289 void MacroAssembler::movoop(Register dst, jobject obj) { | |
290 mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate()); | |
291 } | |
292 | |
293 void MacroAssembler::movoop(Address dst, jobject obj) { | |
294 mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate()); | |
295 } | |
296 | |
297 void MacroAssembler::mov_metadata(Register dst, Metadata* obj) { | |
298 mov_literal32(dst, (int32_t)obj, metadata_Relocation::spec_for_immediate()); | |
299 } | |
300 | |
301 void MacroAssembler::mov_metadata(Address dst, Metadata* obj) { | |
302 mov_literal32(dst, (int32_t)obj, metadata_Relocation::spec_for_immediate()); | |
303 } | |
304 | |
17780 | 305 void MacroAssembler::movptr(Register dst, AddressLiteral src, Register scratch) { |
306 // scratch register is not used, | |
307 // it is defined to match parameters of 64-bit version of this method. | |
7199 | 308 if (src.is_lval()) { |
309 mov_literal32(dst, (intptr_t)src.target(), src.rspec()); | |
310 } else { | |
311 movl(dst, as_Address(src)); | |
312 } | |
313 } | |
314 | |
315 void MacroAssembler::movptr(ArrayAddress dst, Register src) { | |
316 movl(as_Address(dst), src); | |
317 } | |
318 | |
319 void MacroAssembler::movptr(Register dst, ArrayAddress src) { | |
320 movl(dst, as_Address(src)); | |
321 } | |
322 | |
323 // src should NEVER be a real pointer. Use AddressLiteral for true pointers | |
324 void MacroAssembler::movptr(Address dst, intptr_t src) { | |
325 movl(dst, src); | |
326 } | |
327 | |
328 | |
329 void MacroAssembler::pop_callee_saved_registers() { | |
330 pop(rcx); | |
331 pop(rdx); | |
332 pop(rdi); | |
333 pop(rsi); | |
334 } | |
335 | |
336 void MacroAssembler::pop_fTOS() { | |
337 fld_d(Address(rsp, 0)); | |
338 addl(rsp, 2 * wordSize); | |
339 } | |
340 | |
341 void MacroAssembler::push_callee_saved_registers() { | |
342 push(rsi); | |
343 push(rdi); | |
344 push(rdx); | |
345 push(rcx); | |
346 } | |
347 | |
348 void MacroAssembler::push_fTOS() { | |
349 subl(rsp, 2 * wordSize); | |
350 fstp_d(Address(rsp, 0)); | |
351 } | |
352 | |
353 | |
354 void MacroAssembler::pushoop(jobject obj) { | |
355 push_literal32((int32_t)obj, oop_Relocation::spec_for_immediate()); | |
356 } | |
357 | |
358 void MacroAssembler::pushklass(Metadata* obj) { | |
359 push_literal32((int32_t)obj, metadata_Relocation::spec_for_immediate()); | |
360 } | |
361 | |
362 void MacroAssembler::pushptr(AddressLiteral src) { | |
363 if (src.is_lval()) { | |
364 push_literal32((int32_t)src.target(), src.rspec()); | |
365 } else { | |
366 pushl(as_Address(src)); | |
367 } | |
368 } | |
369 | |
370 void MacroAssembler::set_word_if_not_zero(Register dst) { | |
371 xorl(dst, dst); | |
372 set_byte_if_not_zero(dst); | |
373 } | |
374 | |
375 static void pass_arg0(MacroAssembler* masm, Register arg) { | |
376 masm->push(arg); | |
377 } | |
378 | |
379 static void pass_arg1(MacroAssembler* masm, Register arg) { | |
380 masm->push(arg); | |
381 } | |
382 | |
383 static void pass_arg2(MacroAssembler* masm, Register arg) { | |
384 masm->push(arg); | |
385 } | |
386 | |
387 static void pass_arg3(MacroAssembler* masm, Register arg) { | |
388 masm->push(arg); | |
389 } | |
390 | |
391 #ifndef PRODUCT | |
392 extern "C" void findpc(intptr_t x); | |
393 #endif | |
394 | |
395 void MacroAssembler::debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg) { | |
396 // In order to get locks to work, we need to fake a in_VM state | |
397 JavaThread* thread = JavaThread::current(); | |
398 JavaThreadState saved_state = thread->thread_state(); | |
399 thread->set_thread_state(_thread_in_vm); | |
400 if (ShowMessageBoxOnError) { | |
401 JavaThread* thread = JavaThread::current(); | |
402 JavaThreadState saved_state = thread->thread_state(); | |
403 thread->set_thread_state(_thread_in_vm); | |
404 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { | |
405 ttyLocker ttyl; | |
406 BytecodeCounter::print(); | |
407 } | |
408 // To see where a verify_oop failed, get $ebx+40/X for this frame. | |
409 // This is the value of eip which points to where verify_oop will return. | |
410 if (os::message_box(msg, "Execution stopped, print registers?")) { | |
411 print_state32(rdi, rsi, rbp, rsp, rbx, rdx, rcx, rax, eip); | |
412 BREAKPOINT; | |
413 } | |
414 } else { | |
415 ttyLocker ttyl; | |
416 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg); | |
417 } | |
418 // Don't assert holding the ttyLock | |
419 assert(false, err_msg("DEBUG MESSAGE: %s", msg)); | |
420 ThreadStateTransition::transition(thread, _thread_in_vm, saved_state); | |
421 } | |
422 | |
423 void MacroAssembler::print_state32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip) { | |
424 ttyLocker ttyl; | |
425 FlagSetting fs(Debugging, true); | |
426 tty->print_cr("eip = 0x%08x", eip); | |
427 #ifndef PRODUCT | |
428 if ((WizardMode || Verbose) && PrintMiscellaneous) { | |
429 tty->cr(); | |
430 findpc(eip); | |
431 tty->cr(); | |
432 } | |
433 #endif | |
434 #define PRINT_REG(rax) \ | |
435 { tty->print("%s = ", #rax); os::print_location(tty, rax); } | |
436 PRINT_REG(rax); | |
437 PRINT_REG(rbx); | |
438 PRINT_REG(rcx); | |
439 PRINT_REG(rdx); | |
440 PRINT_REG(rdi); | |
441 PRINT_REG(rsi); | |
442 PRINT_REG(rbp); | |
443 PRINT_REG(rsp); | |
444 #undef PRINT_REG | |
445 // Print some words near top of staack. | |
446 int* dump_sp = (int*) rsp; | |
447 for (int col1 = 0; col1 < 8; col1++) { | |
448 tty->print("(rsp+0x%03x) 0x%08x: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (intptr_t)dump_sp); | |
449 os::print_location(tty, *dump_sp++); | |
450 } | |
451 for (int row = 0; row < 16; row++) { | |
452 tty->print("(rsp+0x%03x) 0x%08x: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (intptr_t)dump_sp); | |
453 for (int col = 0; col < 8; col++) { | |
454 tty->print(" 0x%08x", *dump_sp++); | |
455 } | |
456 tty->cr(); | |
457 } | |
458 // Print some instructions around pc: | |
459 Disassembler::decode((address)eip-64, (address)eip); | |
460 tty->print_cr("--------"); | |
461 Disassembler::decode((address)eip, (address)eip+32); | |
462 } | |
463 | |
464 void MacroAssembler::stop(const char* msg) { | |
465 ExternalAddress message((address)msg); | |
466 // push address of message | |
467 pushptr(message.addr()); | |
468 { Label L; call(L, relocInfo::none); bind(L); } // push eip | |
469 pusha(); // push registers | |
470 call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug32))); | |
471 hlt(); | |
472 } | |
473 | |
474 void MacroAssembler::warn(const char* msg) { | |
475 push_CPU_state(); | |
476 | |
477 ExternalAddress message((address) msg); | |
478 // push address of message | |
479 pushptr(message.addr()); | |
480 | |
481 call(RuntimeAddress(CAST_FROM_FN_PTR(address, warning))); | |
482 addl(rsp, wordSize); // discard argument | |
483 pop_CPU_state(); | |
484 } | |
485 | |
486 void MacroAssembler::print_state() { | |
487 { Label L; call(L, relocInfo::none); bind(L); } // push eip | |
488 pusha(); // push registers | |
489 | |
490 push_CPU_state(); | |
491 call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::print_state32))); | |
492 pop_CPU_state(); | |
493 | |
494 popa(); | |
495 addl(rsp, wordSize); | |
496 } | |
497 | |
498 #else // _LP64 | |
499 | |
500 // 64 bit versions | |
501 | |
502 Address MacroAssembler::as_Address(AddressLiteral adr) { | |
503 // amd64 always does this as a pc-rel | |
504 // we can be absolute or disp based on the instruction type | |
505 // jmp/call are displacements others are absolute | |
506 assert(!adr.is_lval(), "must be rval"); | |
507 assert(reachable(adr), "must be"); | |
508 return Address((int32_t)(intptr_t)(adr.target() - pc()), adr.target(), adr.reloc()); | |
509 | |
510 } | |
511 | |
512 Address MacroAssembler::as_Address(ArrayAddress adr) { | |
513 AddressLiteral base = adr.base(); | |
514 lea(rscratch1, base); | |
515 Address index = adr.index(); | |
516 assert(index._disp == 0, "must not have disp"); // maybe it can? | |
517 Address array(rscratch1, index._index, index._scale, index._disp); | |
518 return array; | |
519 } | |
520 | |
521 void MacroAssembler::call_VM_leaf_base(address entry_point, int num_args) { | |
522 Label L, E; | |
523 | |
524 #ifdef _WIN64 | |
525 // Windows always allocates space for it's register args | |
526 assert(num_args <= 4, "only register arguments supported"); | |
527 subq(rsp, frame::arg_reg_save_area_bytes); | |
528 #endif | |
529 | |
530 // Align stack if necessary | |
531 testl(rsp, 15); | |
532 jcc(Assembler::zero, L); | |
533 | |
534 subq(rsp, 8); | |
535 { | |
536 call(RuntimeAddress(entry_point)); | |
537 } | |
538 addq(rsp, 8); | |
539 jmp(E); | |
540 | |
541 bind(L); | |
542 { | |
543 call(RuntimeAddress(entry_point)); | |
544 } | |
545 | |
546 bind(E); | |
547 | |
548 #ifdef _WIN64 | |
549 // restore stack pointer | |
550 addq(rsp, frame::arg_reg_save_area_bytes); | |
551 #endif | |
552 | |
553 } | |
554 | |
555 void MacroAssembler::cmp64(Register src1, AddressLiteral src2) { | |
556 assert(!src2.is_lval(), "should use cmpptr"); | |
557 | |
558 if (reachable(src2)) { | |
559 cmpq(src1, as_Address(src2)); | |
560 } else { | |
561 lea(rscratch1, src2); | |
562 Assembler::cmpq(src1, Address(rscratch1, 0)); | |
563 } | |
564 } | |
565 | |
566 int MacroAssembler::corrected_idivq(Register reg) { | |
567 // Full implementation of Java ldiv and lrem; checks for special | |
568 // case as described in JVM spec., p.243 & p.271. The function | |
569 // returns the (pc) offset of the idivl instruction - may be needed | |
570 // for implicit exceptions. | |
571 // | |
572 // normal case special case | |
573 // | |
574 // input : rax: dividend min_long | |
575 // reg: divisor (may not be eax/edx) -1 | |
576 // | |
577 // output: rax: quotient (= rax idiv reg) min_long | |
578 // rdx: remainder (= rax irem reg) 0 | |
579 assert(reg != rax && reg != rdx, "reg cannot be rax or rdx register"); | |
580 static const int64_t min_long = 0x8000000000000000; | |
581 Label normal_case, special_case; | |
582 | |
583 // check for special case | |
584 cmp64(rax, ExternalAddress((address) &min_long)); | |
585 jcc(Assembler::notEqual, normal_case); | |
586 xorl(rdx, rdx); // prepare rdx for possible special case (where | |
587 // remainder = 0) | |
588 cmpq(reg, -1); | |
589 jcc(Assembler::equal, special_case); | |
590 | |
591 // handle normal case | |
592 bind(normal_case); | |
593 cdqq(); | |
594 int idivq_offset = offset(); | |
595 idivq(reg); | |
596 | |
597 // normal and special case exit | |
598 bind(special_case); | |
599 | |
600 return idivq_offset; | |
601 } | |
602 | |
603 void MacroAssembler::decrementq(Register reg, int value) { | |
604 if (value == min_jint) { subq(reg, value); return; } | |
605 if (value < 0) { incrementq(reg, -value); return; } | |
606 if (value == 0) { ; return; } | |
607 if (value == 1 && UseIncDec) { decq(reg) ; return; } | |
608 /* else */ { subq(reg, value) ; return; } | |
609 } | |
610 | |
611 void MacroAssembler::decrementq(Address dst, int value) { | |
612 if (value == min_jint) { subq(dst, value); return; } | |
613 if (value < 0) { incrementq(dst, -value); return; } | |
614 if (value == 0) { ; return; } | |
615 if (value == 1 && UseIncDec) { decq(dst) ; return; } | |
616 /* else */ { subq(dst, value) ; return; } | |
617 } | |
618 | |
17780 | 619 void MacroAssembler::incrementq(AddressLiteral dst) { |
620 if (reachable(dst)) { | |
621 incrementq(as_Address(dst)); | |
622 } else { | |
623 lea(rscratch1, dst); | |
624 incrementq(Address(rscratch1, 0)); | |
625 } | |
626 } | |
627 | |
7199 | 628 void MacroAssembler::incrementq(Register reg, int value) { |
629 if (value == min_jint) { addq(reg, value); return; } | |
630 if (value < 0) { decrementq(reg, -value); return; } | |
631 if (value == 0) { ; return; } | |
632 if (value == 1 && UseIncDec) { incq(reg) ; return; } | |
633 /* else */ { addq(reg, value) ; return; } | |
634 } | |
635 | |
636 void MacroAssembler::incrementq(Address dst, int value) { | |
637 if (value == min_jint) { addq(dst, value); return; } | |
638 if (value < 0) { decrementq(dst, -value); return; } | |
639 if (value == 0) { ; return; } | |
640 if (value == 1 && UseIncDec) { incq(dst) ; return; } | |
641 /* else */ { addq(dst, value) ; return; } | |
642 } | |
643 | |
644 // 32bit can do a case table jump in one instruction but we no longer allow the base | |
645 // to be installed in the Address class | |
646 void MacroAssembler::jump(ArrayAddress entry) { | |
647 lea(rscratch1, entry.base()); | |
648 Address dispatch = entry.index(); | |
649 assert(dispatch._base == noreg, "must be"); | |
650 dispatch._base = rscratch1; | |
651 jmp(dispatch); | |
652 } | |
653 | |
654 void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) { | |
655 ShouldNotReachHere(); // 64bit doesn't use two regs | |
656 cmpq(x_lo, y_lo); | |
657 } | |
658 | |
659 void MacroAssembler::lea(Register dst, AddressLiteral src) { | |
660 mov_literal64(dst, (intptr_t)src.target(), src.rspec()); | |
661 } | |
662 | |
663 void MacroAssembler::lea(Address dst, AddressLiteral adr) { | |
664 mov_literal64(rscratch1, (intptr_t)adr.target(), adr.rspec()); | |
665 movptr(dst, rscratch1); | |
666 } | |
667 | |
668 void MacroAssembler::leave() { | |
669 // %%% is this really better? Why not on 32bit too? | |
7430
d02120b7a34f
8004250: replace AbstractAssembler a_byte/a_long with emit_int8/emit_int32
twisti
parents:
7427
diff
changeset
|
670 emit_int8((unsigned char)0xC9); // LEAVE |
7199 | 671 } |
672 | |
673 void MacroAssembler::lneg(Register hi, Register lo) { | |
674 ShouldNotReachHere(); // 64bit doesn't use two regs | |
675 negq(lo); | |
676 } | |
677 | |
678 void MacroAssembler::movoop(Register dst, jobject obj) { | |
679 mov_literal64(dst, (intptr_t)obj, oop_Relocation::spec_for_immediate()); | |
680 } | |
681 | |
682 void MacroAssembler::movoop(Address dst, jobject obj) { | |
683 mov_literal64(rscratch1, (intptr_t)obj, oop_Relocation::spec_for_immediate()); | |
684 movq(dst, rscratch1); | |
685 } | |
686 | |
687 void MacroAssembler::mov_metadata(Register dst, Metadata* obj) { | |
688 mov_literal64(dst, (intptr_t)obj, metadata_Relocation::spec_for_immediate()); | |
689 } | |
690 | |
691 void MacroAssembler::mov_metadata(Address dst, Metadata* obj) { | |
692 mov_literal64(rscratch1, (intptr_t)obj, metadata_Relocation::spec_for_immediate()); | |
693 movq(dst, rscratch1); | |
694 } | |
695 | |
17780 | 696 void MacroAssembler::movptr(Register dst, AddressLiteral src, Register scratch) { |
7199 | 697 if (src.is_lval()) { |
698 mov_literal64(dst, (intptr_t)src.target(), src.rspec()); | |
699 } else { | |
700 if (reachable(src)) { | |
701 movq(dst, as_Address(src)); | |
702 } else { | |
17780 | 703 lea(scratch, src); |
704 movq(dst, Address(scratch, 0)); | |
7199 | 705 } |
706 } | |
707 } | |
708 | |
709 void MacroAssembler::movptr(ArrayAddress dst, Register src) { | |
710 movq(as_Address(dst), src); | |
711 } | |
712 | |
713 void MacroAssembler::movptr(Register dst, ArrayAddress src) { | |
714 movq(dst, as_Address(src)); | |
715 } | |
716 | |
717 // src should NEVER be a real pointer. Use AddressLiteral for true pointers | |
718 void MacroAssembler::movptr(Address dst, intptr_t src) { | |
719 mov64(rscratch1, src); | |
720 movq(dst, rscratch1); | |
721 } | |
722 | |
723 // These are mostly for initializing NULL | |
724 void MacroAssembler::movptr(Address dst, int32_t src) { | |
725 movslq(dst, src); | |
726 } | |
727 | |
728 void MacroAssembler::movptr(Register dst, int32_t src) { | |
729 mov64(dst, (intptr_t)src); | |
730 } | |
731 | |
732 void MacroAssembler::pushoop(jobject obj) { | |
733 movoop(rscratch1, obj); | |
734 push(rscratch1); | |
735 } | |
736 | |
737 void MacroAssembler::pushklass(Metadata* obj) { | |
738 mov_metadata(rscratch1, obj); | |
739 push(rscratch1); | |
740 } | |
741 | |
742 void MacroAssembler::pushptr(AddressLiteral src) { | |
743 lea(rscratch1, src); | |
744 if (src.is_lval()) { | |
745 push(rscratch1); | |
746 } else { | |
747 pushq(Address(rscratch1, 0)); | |
748 } | |
749 } | |
750 | |
751 void MacroAssembler::reset_last_Java_frame(bool clear_fp, | |
752 bool clear_pc) { | |
753 // we must set sp to zero to clear frame | |
754 movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), NULL_WORD); | |
755 // must clear fp, so that compiled frames are not confused; it is | |
756 // possible that we need it only for debugging | |
757 if (clear_fp) { | |
758 movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), NULL_WORD); | |
759 } | |
760 | |
761 if (clear_pc) { | |
762 movptr(Address(r15_thread, JavaThread::last_Java_pc_offset()), NULL_WORD); | |
763 } | |
764 } | |
765 | |
766 void MacroAssembler::set_last_Java_frame(Register last_java_sp, | |
767 Register last_java_fp, | |
768 address last_java_pc) { | |
769 // determine last_java_sp register | |
770 if (!last_java_sp->is_valid()) { | |
771 last_java_sp = rsp; | |
772 } | |
773 | |
774 // last_java_fp is optional | |
775 if (last_java_fp->is_valid()) { | |
776 movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), | |
777 last_java_fp); | |
778 } | |
779 | |
780 // last_java_pc is optional | |
781 if (last_java_pc != NULL) { | |
782 Address java_pc(r15_thread, | |
783 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()); | |
784 lea(rscratch1, InternalAddress(last_java_pc)); | |
785 movptr(java_pc, rscratch1); | |
786 } | |
787 | |
788 movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), last_java_sp); | |
789 } | |
790 | |
791 static void pass_arg0(MacroAssembler* masm, Register arg) { | |
792 if (c_rarg0 != arg ) { | |
793 masm->mov(c_rarg0, arg); | |
794 } | |
795 } | |
796 | |
797 static void pass_arg1(MacroAssembler* masm, Register arg) { | |
798 if (c_rarg1 != arg ) { | |
799 masm->mov(c_rarg1, arg); | |
800 } | |
801 } | |
802 | |
803 static void pass_arg2(MacroAssembler* masm, Register arg) { | |
804 if (c_rarg2 != arg ) { | |
805 masm->mov(c_rarg2, arg); | |
806 } | |
807 } | |
808 | |
809 static void pass_arg3(MacroAssembler* masm, Register arg) { | |
810 if (c_rarg3 != arg ) { | |
811 masm->mov(c_rarg3, arg); | |
812 } | |
813 } | |
814 | |
815 void MacroAssembler::stop(const char* msg) { | |
816 address rip = pc(); | |
817 pusha(); // get regs on stack | |
818 lea(c_rarg0, ExternalAddress((address) msg)); | |
819 lea(c_rarg1, InternalAddress(rip)); | |
820 movq(c_rarg2, rsp); // pass pointer to regs array | |
821 andq(rsp, -16); // align stack as required by ABI | |
822 call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug64))); | |
823 hlt(); | |
824 } | |
825 | |
826 void MacroAssembler::warn(const char* msg) { | |
827 push(rbp); | |
828 movq(rbp, rsp); | |
829 andq(rsp, -16); // align stack as required by push_CPU_state and call | |
830 push_CPU_state(); // keeps alignment at 16 bytes | |
831 lea(c_rarg0, ExternalAddress((address) msg)); | |
832 call_VM_leaf(CAST_FROM_FN_PTR(address, warning), c_rarg0); | |
833 pop_CPU_state(); | |
834 mov(rsp, rbp); | |
835 pop(rbp); | |
836 } | |
837 | |
838 void MacroAssembler::print_state() { | |
839 address rip = pc(); | |
840 pusha(); // get regs on stack | |
841 push(rbp); | |
842 movq(rbp, rsp); | |
843 andq(rsp, -16); // align stack as required by push_CPU_state and call | |
844 push_CPU_state(); // keeps alignment at 16 bytes | |
845 | |
846 lea(c_rarg0, InternalAddress(rip)); | |
847 lea(c_rarg1, Address(rbp, wordSize)); // pass pointer to regs array | |
848 call_VM_leaf(CAST_FROM_FN_PTR(address, MacroAssembler::print_state64), c_rarg0, c_rarg1); | |
849 | |
850 pop_CPU_state(); | |
851 mov(rsp, rbp); | |
852 pop(rbp); | |
853 popa(); | |
854 } | |
855 | |
856 #ifndef PRODUCT | |
857 extern "C" void findpc(intptr_t x); | |
858 #endif | |
859 | |
860 void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[]) { | |
861 // In order to get locks to work, we need to fake a in_VM state | |
862 if (ShowMessageBoxOnError) { | |
863 JavaThread* thread = JavaThread::current(); | |
864 JavaThreadState saved_state = thread->thread_state(); | |
865 thread->set_thread_state(_thread_in_vm); | |
866 #ifndef PRODUCT | |
867 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { | |
868 ttyLocker ttyl; | |
869 BytecodeCounter::print(); | |
870 } | |
871 #endif | |
872 // To see where a verify_oop failed, get $ebx+40/X for this frame. | |
873 // XXX correct this offset for amd64 | |
874 // This is the value of eip which points to where verify_oop will return. | |
875 if (os::message_box(msg, "Execution stopped, print registers?")) { | |
876 print_state64(pc, regs); | |
877 BREAKPOINT; | |
878 assert(false, "start up GDB"); | |
879 } | |
880 ThreadStateTransition::transition(thread, _thread_in_vm, saved_state); | |
881 } else { | |
882 ttyLocker ttyl; | |
883 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", | |
884 msg); | |
885 assert(false, err_msg("DEBUG MESSAGE: %s", msg)); | |
886 } | |
887 } | |
888 | |
889 void MacroAssembler::print_state64(int64_t pc, int64_t regs[]) { | |
890 ttyLocker ttyl; | |
891 FlagSetting fs(Debugging, true); | |
892 tty->print_cr("rip = 0x%016lx", pc); | |
893 #ifndef PRODUCT | |
894 tty->cr(); | |
895 findpc(pc); | |
896 tty->cr(); | |
897 #endif | |
898 #define PRINT_REG(rax, value) \ | |
899 { tty->print("%s = ", #rax); os::print_location(tty, value); } | |
900 PRINT_REG(rax, regs[15]); | |
901 PRINT_REG(rbx, regs[12]); | |
902 PRINT_REG(rcx, regs[14]); | |
903 PRINT_REG(rdx, regs[13]); | |
904 PRINT_REG(rdi, regs[8]); | |
905 PRINT_REG(rsi, regs[9]); | |
906 PRINT_REG(rbp, regs[10]); | |
907 PRINT_REG(rsp, regs[11]); | |
908 PRINT_REG(r8 , regs[7]); | |
909 PRINT_REG(r9 , regs[6]); | |
910 PRINT_REG(r10, regs[5]); | |
911 PRINT_REG(r11, regs[4]); | |
912 PRINT_REG(r12, regs[3]); | |
913 PRINT_REG(r13, regs[2]); | |
914 PRINT_REG(r14, regs[1]); | |
915 PRINT_REG(r15, regs[0]); | |
916 #undef PRINT_REG | |
917 // Print some words near top of staack. | |
918 int64_t* rsp = (int64_t*) regs[11]; | |
919 int64_t* dump_sp = rsp; | |
920 for (int col1 = 0; col1 < 8; col1++) { | |
921 tty->print("(rsp+0x%03x) 0x%016lx: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (int64_t)dump_sp); | |
922 os::print_location(tty, *dump_sp++); | |
923 } | |
924 for (int row = 0; row < 25; row++) { | |
925 tty->print("(rsp+0x%03x) 0x%016lx: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (int64_t)dump_sp); | |
926 for (int col = 0; col < 4; col++) { | |
927 tty->print(" 0x%016lx", *dump_sp++); | |
928 } | |
929 tty->cr(); | |
930 } | |
931 // Print some instructions around pc: | |
932 Disassembler::decode((address)pc-64, (address)pc); | |
933 tty->print_cr("--------"); | |
934 Disassembler::decode((address)pc, (address)pc+32); | |
935 } | |
936 | |
937 #endif // _LP64 | |
938 | |
939 // Now versions that are common to 32/64 bit | |
940 | |
941 void MacroAssembler::addptr(Register dst, int32_t imm32) { | |
942 LP64_ONLY(addq(dst, imm32)) NOT_LP64(addl(dst, imm32)); | |
943 } | |
944 | |
945 void MacroAssembler::addptr(Register dst, Register src) { | |
946 LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)); | |
947 } | |
948 | |
949 void MacroAssembler::addptr(Address dst, Register src) { | |
950 LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)); | |
951 } | |
952 | |
953 void MacroAssembler::addsd(XMMRegister dst, AddressLiteral src) { | |
954 if (reachable(src)) { | |
955 Assembler::addsd(dst, as_Address(src)); | |
956 } else { | |
957 lea(rscratch1, src); | |
958 Assembler::addsd(dst, Address(rscratch1, 0)); | |
959 } | |
960 } | |
961 | |
962 void MacroAssembler::addss(XMMRegister dst, AddressLiteral src) { | |
963 if (reachable(src)) { | |
964 addss(dst, as_Address(src)); | |
965 } else { | |
966 lea(rscratch1, src); | |
967 addss(dst, Address(rscratch1, 0)); | |
968 } | |
969 } | |
970 | |
971 void MacroAssembler::align(int modulus) { | |
972 if (offset() % modulus != 0) { | |
973 nop(modulus - (offset() % modulus)); | |
974 } | |
975 } | |
976 | |
977 void MacroAssembler::andpd(XMMRegister dst, AddressLiteral src) { | |
978 // Used in sign-masking with aligned address. | |
979 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); | |
980 if (reachable(src)) { | |
981 Assembler::andpd(dst, as_Address(src)); | |
982 } else { | |
983 lea(rscratch1, src); | |
984 Assembler::andpd(dst, Address(rscratch1, 0)); | |
985 } | |
986 } | |
987 | |
988 void MacroAssembler::andps(XMMRegister dst, AddressLiteral src) { | |
989 // Used in sign-masking with aligned address. | |
990 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); | |
991 if (reachable(src)) { | |
992 Assembler::andps(dst, as_Address(src)); | |
993 } else { | |
994 lea(rscratch1, src); | |
995 Assembler::andps(dst, Address(rscratch1, 0)); | |
996 } | |
997 } | |
998 | |
999 void MacroAssembler::andptr(Register dst, int32_t imm32) { | |
1000 LP64_ONLY(andq(dst, imm32)) NOT_LP64(andl(dst, imm32)); | |
1001 } | |
1002 | |
17780 | 1003 void MacroAssembler::atomic_incl(Address counter_addr) { |
1004 if (os::is_MP()) | |
1005 lock(); | |
1006 incrementl(counter_addr); | |
1007 } | |
1008 | |
1009 void MacroAssembler::atomic_incl(AddressLiteral counter_addr, Register scr) { | |
17714
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1010 if (reachable(counter_addr)) { |
17780 | 1011 atomic_incl(as_Address(counter_addr)); |
17714
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1012 } else { |
17780 | 1013 lea(scr, counter_addr); |
1014 atomic_incl(Address(scr, 0)); | |
1015 } | |
1016 } | |
1017 | |
1018 #ifdef _LP64 | |
1019 void MacroAssembler::atomic_incq(Address counter_addr) { | |
1020 if (os::is_MP()) | |
1021 lock(); | |
1022 incrementq(counter_addr); | |
1023 } | |
1024 | |
1025 void MacroAssembler::atomic_incq(AddressLiteral counter_addr, Register scr) { | |
1026 if (reachable(counter_addr)) { | |
1027 atomic_incq(as_Address(counter_addr)); | |
1028 } else { | |
1029 lea(scr, counter_addr); | |
1030 atomic_incq(Address(scr, 0)); | |
1031 } | |
1032 } | |
1033 #endif | |
7199 | 1034 |
1035 // Writes to stack successive pages until offset reached to check for | |
1036 // stack overflow + shadow pages. This clobbers tmp. | |
1037 void MacroAssembler::bang_stack_size(Register size, Register tmp) { | |
1038 movptr(tmp, rsp); | |
1039 // Bang stack for total size given plus shadow page size. | |
1040 // Bang one page at a time because large size can bang beyond yellow and | |
1041 // red zones. | |
1042 Label loop; | |
1043 bind(loop); | |
1044 movl(Address(tmp, (-os::vm_page_size())), size ); | |
1045 subptr(tmp, os::vm_page_size()); | |
1046 subl(size, os::vm_page_size()); | |
1047 jcc(Assembler::greater, loop); | |
1048 | |
1049 // Bang down shadow pages too. | |
13047
be525e91f65b
8026775: nsk/jvmti/RedefineClasses/StressRedefine crashes due to EXCEPTION_ACCESS_VIOLATION
mikael
parents:
13000
diff
changeset
|
1050 // At this point, (tmp-0) is the last address touched, so don't |
be525e91f65b
8026775: nsk/jvmti/RedefineClasses/StressRedefine crashes due to EXCEPTION_ACCESS_VIOLATION
mikael
parents:
13000
diff
changeset
|
1051 // touch it again. (It was touched as (tmp-pagesize) but then tmp |
be525e91f65b
8026775: nsk/jvmti/RedefineClasses/StressRedefine crashes due to EXCEPTION_ACCESS_VIOLATION
mikael
parents:
13000
diff
changeset
|
1052 // was post-decremented.) Skip this address by starting at i=1, and |
be525e91f65b
8026775: nsk/jvmti/RedefineClasses/StressRedefine crashes due to EXCEPTION_ACCESS_VIOLATION
mikael
parents:
13000
diff
changeset
|
1053 // touch a few more pages below. N.B. It is important to touch all |
be525e91f65b
8026775: nsk/jvmti/RedefineClasses/StressRedefine crashes due to EXCEPTION_ACCESS_VIOLATION
mikael
parents:
13000
diff
changeset
|
1054 // the way down to and including i=StackShadowPages. |
17980
0bf37f737702
8032410: compiler/uncommontrap/TestStackBangRbp.java times out on Solaris-Sparc V9
roland
parents:
17937
diff
changeset
|
1055 for (int i = 1; i < StackShadowPages; i++) { |
7199 | 1056 // this could be any sized move but this is can be a debugging crumb |
1057 // so the bigger the better. | |
1058 movptr(Address(tmp, (-i*os::vm_page_size())), size ); | |
1059 } | |
1060 } | |
1061 | |
17714
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1062 int MacroAssembler::biased_locking_enter(Register lock_reg, |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1063 Register obj_reg, |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1064 Register swap_reg, |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1065 Register tmp_reg, |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1066 bool swap_reg_contains_mark, |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1067 Label& done, |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1068 Label* slow_case, |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1069 BiasedLockingCounters* counters) { |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1070 assert(UseBiasedLocking, "why call this otherwise?"); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1071 assert(swap_reg == rax, "swap_reg must be rax for cmpxchgq"); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1072 LP64_ONLY( assert(tmp_reg != noreg, "tmp_reg must be supplied"); ) |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1073 bool need_tmp_reg = false; |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1074 if (tmp_reg == noreg) { |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1075 need_tmp_reg = true; |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1076 tmp_reg = lock_reg; |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1077 assert_different_registers(lock_reg, obj_reg, swap_reg); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1078 } else { |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1079 assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1080 } |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1081 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1082 Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1083 Address saved_mark_addr(lock_reg, 0); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1084 |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1085 if (PrintBiasedLockingStatistics && counters == NULL) { |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1086 counters = BiasedLocking::counters(); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1087 } |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1088 // Biased locking |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1089 // See whether the lock is currently biased toward our thread and |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1090 // whether the epoch is still valid |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1091 // Note that the runtime guarantees sufficient alignment of JavaThread |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1092 // pointers to allow age to be placed into low bits |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1093 // First check to see whether biasing is even enabled for this object |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1094 Label cas_label; |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1095 int null_check_offset = -1; |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1096 if (!swap_reg_contains_mark) { |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1097 null_check_offset = offset(); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1098 movptr(swap_reg, mark_addr); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1099 } |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1100 if (need_tmp_reg) { |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1101 push(tmp_reg); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1102 } |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1103 movptr(tmp_reg, swap_reg); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1104 andptr(tmp_reg, markOopDesc::biased_lock_mask_in_place); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1105 cmpptr(tmp_reg, markOopDesc::biased_lock_pattern); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1106 if (need_tmp_reg) { |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1107 pop(tmp_reg); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1108 } |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1109 jcc(Assembler::notEqual, cas_label); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1110 // The bias pattern is present in the object's header. Need to check |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1111 // whether the bias owner and the epoch are both still current. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1112 #ifndef _LP64 |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1113 // Note that because there is no current thread register on x86_32 we |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1114 // need to store off the mark word we read out of the object to |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1115 // avoid reloading it and needing to recheck invariants below. This |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1116 // store is unfortunate but it makes the overall code shorter and |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1117 // simpler. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1118 movptr(saved_mark_addr, swap_reg); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1119 #endif |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1120 if (need_tmp_reg) { |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1121 push(tmp_reg); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1122 } |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1123 if (swap_reg_contains_mark) { |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1124 null_check_offset = offset(); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1125 } |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1126 load_prototype_header(tmp_reg, obj_reg); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1127 #ifdef _LP64 |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1128 orptr(tmp_reg, r15_thread); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1129 xorptr(tmp_reg, swap_reg); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1130 Register header_reg = tmp_reg; |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1131 #else |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1132 xorptr(tmp_reg, swap_reg); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1133 get_thread(swap_reg); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1134 xorptr(swap_reg, tmp_reg); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1135 Register header_reg = swap_reg; |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1136 #endif |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1137 andptr(header_reg, ~((int) markOopDesc::age_mask_in_place)); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1138 if (need_tmp_reg) { |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1139 pop(tmp_reg); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1140 } |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1141 if (counters != NULL) { |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1142 cond_inc32(Assembler::zero, |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1143 ExternalAddress((address) counters->biased_lock_entry_count_addr())); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1144 } |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1145 jcc(Assembler::equal, done); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1146 |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1147 Label try_revoke_bias; |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1148 Label try_rebias; |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1149 |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1150 // At this point we know that the header has the bias pattern and |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1151 // that we are not the bias owner in the current epoch. We need to |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1152 // figure out more details about the state of the header in order to |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1153 // know what operations can be legally performed on the object's |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1154 // header. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1155 |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1156 // If the low three bits in the xor result aren't clear, that means |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1157 // the prototype header is no longer biased and we have to revoke |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1158 // the bias on this object. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1159 testptr(header_reg, markOopDesc::biased_lock_mask_in_place); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1160 jccb(Assembler::notZero, try_revoke_bias); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1161 |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1162 // Biasing is still enabled for this data type. See whether the |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1163 // epoch of the current bias is still valid, meaning that the epoch |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1164 // bits of the mark word are equal to the epoch bits of the |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1165 // prototype header. (Note that the prototype header's epoch bits |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1166 // only change at a safepoint.) If not, attempt to rebias the object |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1167 // toward the current thread. Note that we must be absolutely sure |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1168 // that the current epoch is invalid in order to do this because |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1169 // otherwise the manipulations it performs on the mark word are |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1170 // illegal. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1171 testptr(header_reg, markOopDesc::epoch_mask_in_place); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1172 jccb(Assembler::notZero, try_rebias); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1173 |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1174 // The epoch of the current bias is still valid but we know nothing |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1175 // about the owner; it might be set or it might be clear. Try to |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1176 // acquire the bias of the object using an atomic operation. If this |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1177 // fails we will go in to the runtime to revoke the object's bias. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1178 // Note that we first construct the presumed unbiased header so we |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1179 // don't accidentally blow away another thread's valid bias. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1180 NOT_LP64( movptr(swap_reg, saved_mark_addr); ) |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1181 andptr(swap_reg, |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1182 markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1183 if (need_tmp_reg) { |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1184 push(tmp_reg); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1185 } |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1186 #ifdef _LP64 |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1187 movptr(tmp_reg, swap_reg); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1188 orptr(tmp_reg, r15_thread); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1189 #else |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1190 get_thread(tmp_reg); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1191 orptr(tmp_reg, swap_reg); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1192 #endif |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1193 if (os::is_MP()) { |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1194 lock(); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1195 } |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1196 cmpxchgptr(tmp_reg, mark_addr); // compare tmp_reg and swap_reg |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1197 if (need_tmp_reg) { |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1198 pop(tmp_reg); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1199 } |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1200 // If the biasing toward our thread failed, this means that |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1201 // another thread succeeded in biasing it toward itself and we |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1202 // need to revoke that bias. The revocation will occur in the |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1203 // interpreter runtime in the slow case. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1204 if (counters != NULL) { |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1205 cond_inc32(Assembler::zero, |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1206 ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr())); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1207 } |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1208 if (slow_case != NULL) { |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1209 jcc(Assembler::notZero, *slow_case); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1210 } |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1211 jmp(done); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1212 |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1213 bind(try_rebias); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1214 // At this point we know the epoch has expired, meaning that the |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1215 // current "bias owner", if any, is actually invalid. Under these |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1216 // circumstances _only_, we are allowed to use the current header's |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1217 // value as the comparison value when doing the cas to acquire the |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1218 // bias in the current epoch. In other words, we allow transfer of |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1219 // the bias from one thread to another directly in this situation. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1220 // |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1221 // FIXME: due to a lack of registers we currently blow away the age |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1222 // bits in this situation. Should attempt to preserve them. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1223 if (need_tmp_reg) { |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1224 push(tmp_reg); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1225 } |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1226 load_prototype_header(tmp_reg, obj_reg); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1227 #ifdef _LP64 |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1228 orptr(tmp_reg, r15_thread); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1229 #else |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1230 get_thread(swap_reg); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1231 orptr(tmp_reg, swap_reg); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1232 movptr(swap_reg, saved_mark_addr); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1233 #endif |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1234 if (os::is_MP()) { |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1235 lock(); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1236 } |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1237 cmpxchgptr(tmp_reg, mark_addr); // compare tmp_reg and swap_reg |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1238 if (need_tmp_reg) { |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1239 pop(tmp_reg); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1240 } |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1241 // If the biasing toward our thread failed, then another thread |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1242 // succeeded in biasing it toward itself and we need to revoke that |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1243 // bias. The revocation will occur in the runtime in the slow case. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1244 if (counters != NULL) { |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1245 cond_inc32(Assembler::zero, |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1246 ExternalAddress((address) counters->rebiased_lock_entry_count_addr())); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1247 } |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1248 if (slow_case != NULL) { |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1249 jcc(Assembler::notZero, *slow_case); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1250 } |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1251 jmp(done); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1252 |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1253 bind(try_revoke_bias); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1254 // The prototype mark in the klass doesn't have the bias bit set any |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1255 // more, indicating that objects of this data type are not supposed |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1256 // to be biased any more. We are going to try to reset the mark of |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1257 // this object to the prototype value and fall through to the |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1258 // CAS-based locking scheme. Note that if our CAS fails, it means |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1259 // that another thread raced us for the privilege of revoking the |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1260 // bias of this particular object, so it's okay to continue in the |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1261 // normal locking code. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1262 // |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1263 // FIXME: due to a lack of registers we currently blow away the age |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1264 // bits in this situation. Should attempt to preserve them. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1265 NOT_LP64( movptr(swap_reg, saved_mark_addr); ) |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1266 if (need_tmp_reg) { |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1267 push(tmp_reg); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1268 } |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1269 load_prototype_header(tmp_reg, obj_reg); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1270 if (os::is_MP()) { |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1271 lock(); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1272 } |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1273 cmpxchgptr(tmp_reg, mark_addr); // compare tmp_reg and swap_reg |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1274 if (need_tmp_reg) { |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1275 pop(tmp_reg); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1276 } |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1277 // Fall through to the normal CAS-based lock, because no matter what |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1278 // the result of the above CAS, some thread must have succeeded in |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1279 // removing the bias bit from the object's header. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1280 if (counters != NULL) { |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1281 cond_inc32(Assembler::zero, |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1282 ExternalAddress((address) counters->revoked_lock_entry_count_addr())); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1283 } |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1284 |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1285 bind(cas_label); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1286 |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1287 return null_check_offset; |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1288 } |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1289 |
7199 | 1290 void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) { |
1291 assert(UseBiasedLocking, "why call this otherwise?"); | |
1292 | |
1293 // Check for biased locking unlock case, which is a no-op | |
1294 // Note: we do not have to check the thread ID for two reasons. | |
1295 // First, the interpreter checks for IllegalMonitorStateException at | |
1296 // a higher level. Second, if the bias was revoked while we held the | |
1297 // lock, the object could not be rebiased toward another thread, so | |
1298 // the bias bit would be clear. | |
1299 movptr(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); | |
1300 andptr(temp_reg, markOopDesc::biased_lock_mask_in_place); | |
1301 cmpptr(temp_reg, markOopDesc::biased_lock_pattern); | |
1302 jcc(Assembler::equal, done); | |
1303 } | |
1304 | |
17714
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1305 #ifdef COMPILER2 |
17780 | 1306 |
1307 #if INCLUDE_RTM_OPT | |
1308 | |
1309 // Update rtm_counters based on abort status | |
1310 // input: abort_status | |
1311 // rtm_counters (RTMLockingCounters*) | |
1312 // flags are killed | |
1313 void MacroAssembler::rtm_counters_update(Register abort_status, Register rtm_counters) { | |
1314 | |
1315 atomic_incptr(Address(rtm_counters, RTMLockingCounters::abort_count_offset())); | |
1316 if (PrintPreciseRTMLockingStatistics) { | |
1317 for (int i = 0; i < RTMLockingCounters::ABORT_STATUS_LIMIT; i++) { | |
1318 Label check_abort; | |
1319 testl(abort_status, (1<<i)); | |
1320 jccb(Assembler::equal, check_abort); | |
1321 atomic_incptr(Address(rtm_counters, RTMLockingCounters::abortX_count_offset() + (i * sizeof(uintx)))); | |
1322 bind(check_abort); | |
1323 } | |
1324 } | |
1325 } | |
1326 | |
1327 // Branch if (random & (count-1) != 0), count is 2^n | |
1328 // tmp, scr and flags are killed | |
1329 void MacroAssembler::branch_on_random_using_rdtsc(Register tmp, Register scr, int count, Label& brLabel) { | |
1330 assert(tmp == rax, ""); | |
1331 assert(scr == rdx, ""); | |
1332 rdtsc(); // modifies EDX:EAX | |
1333 andptr(tmp, count-1); | |
1334 jccb(Assembler::notZero, brLabel); | |
1335 } | |
1336 | |
1337 // Perform abort ratio calculation, set no_rtm bit if high ratio | |
1338 // input: rtm_counters_Reg (RTMLockingCounters* address) | |
1339 // tmpReg, rtm_counters_Reg and flags are killed | |
1340 void MacroAssembler::rtm_abort_ratio_calculation(Register tmpReg, | |
1341 Register rtm_counters_Reg, | |
1342 RTMLockingCounters* rtm_counters, | |
1343 Metadata* method_data) { | |
1344 Label L_done, L_check_always_rtm1, L_check_always_rtm2; | |
1345 | |
1346 if (RTMLockingCalculationDelay > 0) { | |
1347 // Delay calculation | |
1348 movptr(tmpReg, ExternalAddress((address) RTMLockingCounters::rtm_calculation_flag_addr()), tmpReg); | |
1349 testptr(tmpReg, tmpReg); | |
1350 jccb(Assembler::equal, L_done); | |
1351 } | |
1352 // Abort ratio calculation only if abort_count > RTMAbortThreshold | |
1353 // Aborted transactions = abort_count * 100 | |
1354 // All transactions = total_count * RTMTotalCountIncrRate | |
1355 // Set no_rtm bit if (Aborted transactions >= All transactions * RTMAbortRatio) | |
1356 | |
1357 movptr(tmpReg, Address(rtm_counters_Reg, RTMLockingCounters::abort_count_offset())); | |
1358 cmpptr(tmpReg, RTMAbortThreshold); | |
1359 jccb(Assembler::below, L_check_always_rtm2); | |
1360 imulptr(tmpReg, tmpReg, 100); | |
1361 | |
1362 Register scrReg = rtm_counters_Reg; | |
1363 movptr(scrReg, Address(rtm_counters_Reg, RTMLockingCounters::total_count_offset())); | |
1364 imulptr(scrReg, scrReg, RTMTotalCountIncrRate); | |
1365 imulptr(scrReg, scrReg, RTMAbortRatio); | |
1366 cmpptr(tmpReg, scrReg); | |
1367 jccb(Assembler::below, L_check_always_rtm1); | |
1368 if (method_data != NULL) { | |
1369 // set rtm_state to "no rtm" in MDO | |
1370 mov_metadata(tmpReg, method_data); | |
1371 if (os::is_MP()) { | |
1372 lock(); | |
1373 } | |
1374 orl(Address(tmpReg, MethodData::rtm_state_offset_in_bytes()), NoRTM); | |
1375 } | |
1376 jmpb(L_done); | |
1377 bind(L_check_always_rtm1); | |
1378 // Reload RTMLockingCounters* address | |
1379 lea(rtm_counters_Reg, ExternalAddress((address)rtm_counters)); | |
1380 bind(L_check_always_rtm2); | |
1381 movptr(tmpReg, Address(rtm_counters_Reg, RTMLockingCounters::total_count_offset())); | |
1382 cmpptr(tmpReg, RTMLockingThreshold / RTMTotalCountIncrRate); | |
1383 jccb(Assembler::below, L_done); | |
1384 if (method_data != NULL) { | |
1385 // set rtm_state to "always rtm" in MDO | |
1386 mov_metadata(tmpReg, method_data); | |
1387 if (os::is_MP()) { | |
1388 lock(); | |
1389 } | |
1390 orl(Address(tmpReg, MethodData::rtm_state_offset_in_bytes()), UseRTM); | |
1391 } | |
1392 bind(L_done); | |
1393 } | |
1394 | |
1395 // Update counters and perform abort ratio calculation | |
1396 // input: abort_status_Reg | |
1397 // rtm_counters_Reg, flags are killed | |
1398 void MacroAssembler::rtm_profiling(Register abort_status_Reg, | |
1399 Register rtm_counters_Reg, | |
1400 RTMLockingCounters* rtm_counters, | |
1401 Metadata* method_data, | |
1402 bool profile_rtm) { | |
1403 | |
1404 assert(rtm_counters != NULL, "should not be NULL when profiling RTM"); | |
1405 // update rtm counters based on rax value at abort | |
1406 // reads abort_status_Reg, updates flags | |
1407 lea(rtm_counters_Reg, ExternalAddress((address)rtm_counters)); | |
1408 rtm_counters_update(abort_status_Reg, rtm_counters_Reg); | |
1409 if (profile_rtm) { | |
1410 // Save abort status because abort_status_Reg is used by following code. | |
1411 if (RTMRetryCount > 0) { | |
1412 push(abort_status_Reg); | |
1413 } | |
1414 assert(rtm_counters != NULL, "should not be NULL when profiling RTM"); | |
1415 rtm_abort_ratio_calculation(abort_status_Reg, rtm_counters_Reg, rtm_counters, method_data); | |
1416 // restore abort status | |
1417 if (RTMRetryCount > 0) { | |
1418 pop(abort_status_Reg); | |
1419 } | |
1420 } | |
1421 } | |
1422 | |
1423 // Retry on abort if abort's status is 0x6: can retry (0x2) | memory conflict (0x4) | |
1424 // inputs: retry_count_Reg | |
1425 // : abort_status_Reg | |
1426 // output: retry_count_Reg decremented by 1 | |
1427 // flags are killed | |
1428 void MacroAssembler::rtm_retry_lock_on_abort(Register retry_count_Reg, Register abort_status_Reg, Label& retryLabel) { | |
1429 Label doneRetry; | |
1430 assert(abort_status_Reg == rax, ""); | |
1431 // The abort reason bits are in eax (see all states in rtmLocking.hpp) | |
1432 // 0x6 = conflict on which we can retry (0x2) | memory conflict (0x4) | |
1433 // if reason is in 0x6 and retry count != 0 then retry | |
1434 andptr(abort_status_Reg, 0x6); | |
1435 jccb(Assembler::zero, doneRetry); | |
1436 testl(retry_count_Reg, retry_count_Reg); | |
1437 jccb(Assembler::zero, doneRetry); | |
1438 pause(); | |
1439 decrementl(retry_count_Reg); | |
1440 jmp(retryLabel); | |
1441 bind(doneRetry); | |
1442 } | |
1443 | |
1444 // Spin and retry if lock is busy, | |
1445 // inputs: box_Reg (monitor address) | |
1446 // : retry_count_Reg | |
1447 // output: retry_count_Reg decremented by 1 | |
1448 // : clear z flag if retry count exceeded | |
1449 // tmp_Reg, scr_Reg, flags are killed | |
1450 void MacroAssembler::rtm_retry_lock_on_busy(Register retry_count_Reg, Register box_Reg, | |
1451 Register tmp_Reg, Register scr_Reg, Label& retryLabel) { | |
1452 Label SpinLoop, SpinExit, doneRetry; | |
1453 // Clean monitor_value bit to get valid pointer | |
1454 int owner_offset = ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value; | |
1455 | |
1456 testl(retry_count_Reg, retry_count_Reg); | |
1457 jccb(Assembler::zero, doneRetry); | |
1458 decrementl(retry_count_Reg); | |
1459 movptr(scr_Reg, RTMSpinLoopCount); | |
1460 | |
1461 bind(SpinLoop); | |
1462 pause(); | |
1463 decrementl(scr_Reg); | |
1464 jccb(Assembler::lessEqual, SpinExit); | |
1465 movptr(tmp_Reg, Address(box_Reg, owner_offset)); | |
1466 testptr(tmp_Reg, tmp_Reg); | |
1467 jccb(Assembler::notZero, SpinLoop); | |
1468 | |
1469 bind(SpinExit); | |
1470 jmp(retryLabel); | |
1471 bind(doneRetry); | |
1472 incrementl(retry_count_Reg); // clear z flag | |
1473 } | |
1474 | |
1475 // Use RTM for normal stack locks | |
1476 // Input: objReg (object to lock) | |
1477 void MacroAssembler::rtm_stack_locking(Register objReg, Register tmpReg, Register scrReg, | |
1478 Register retry_on_abort_count_Reg, | |
1479 RTMLockingCounters* stack_rtm_counters, | |
1480 Metadata* method_data, bool profile_rtm, | |
1481 Label& DONE_LABEL, Label& IsInflated) { | |
1482 assert(UseRTMForStackLocks, "why call this otherwise?"); | |
1483 assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking"); | |
1484 assert(tmpReg == rax, ""); | |
1485 assert(scrReg == rdx, ""); | |
1486 Label L_rtm_retry, L_decrement_retry, L_on_abort; | |
1487 | |
1488 if (RTMRetryCount > 0) { | |
1489 movl(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort | |
1490 bind(L_rtm_retry); | |
1491 } | |
17849
526acaf3626f
8038939: Some options related to RTM locking optimization works inconsistently
kvn
parents:
17780
diff
changeset
|
1492 movptr(tmpReg, Address(objReg, 0)); |
526acaf3626f
8038939: Some options related to RTM locking optimization works inconsistently
kvn
parents:
17780
diff
changeset
|
1493 testptr(tmpReg, markOopDesc::monitor_value); // inflated vs stack-locked|neutral|biased |
526acaf3626f
8038939: Some options related to RTM locking optimization works inconsistently
kvn
parents:
17780
diff
changeset
|
1494 jcc(Assembler::notZero, IsInflated); |
526acaf3626f
8038939: Some options related to RTM locking optimization works inconsistently
kvn
parents:
17780
diff
changeset
|
1495 |
17780 | 1496 if (PrintPreciseRTMLockingStatistics || profile_rtm) { |
1497 Label L_noincrement; | |
1498 if (RTMTotalCountIncrRate > 1) { | |
1499 // tmpReg, scrReg and flags are killed | |
1500 branch_on_random_using_rdtsc(tmpReg, scrReg, (int)RTMTotalCountIncrRate, L_noincrement); | |
1501 } | |
1502 assert(stack_rtm_counters != NULL, "should not be NULL when profiling RTM"); | |
1503 atomic_incptr(ExternalAddress((address)stack_rtm_counters->total_count_addr()), scrReg); | |
1504 bind(L_noincrement); | |
1505 } | |
1506 xbegin(L_on_abort); | |
1507 movptr(tmpReg, Address(objReg, 0)); // fetch markword | |
1508 andptr(tmpReg, markOopDesc::biased_lock_mask_in_place); // look at 3 lock bits | |
1509 cmpptr(tmpReg, markOopDesc::unlocked_value); // bits = 001 unlocked | |
1510 jcc(Assembler::equal, DONE_LABEL); // all done if unlocked | |
1511 | |
1512 Register abort_status_Reg = tmpReg; // status of abort is stored in RAX | |
1513 if (UseRTMXendForLockBusy) { | |
1514 xend(); | |
17849
526acaf3626f
8038939: Some options related to RTM locking optimization works inconsistently
kvn
parents:
17780
diff
changeset
|
1515 movptr(abort_status_Reg, 0x2); // Set the abort status to 2 (so we can retry) |
17780 | 1516 jmp(L_decrement_retry); |
1517 } | |
1518 else { | |
1519 xabort(0); | |
1520 } | |
1521 bind(L_on_abort); | |
1522 if (PrintPreciseRTMLockingStatistics || profile_rtm) { | |
1523 rtm_profiling(abort_status_Reg, scrReg, stack_rtm_counters, method_data, profile_rtm); | |
1524 } | |
1525 bind(L_decrement_retry); | |
1526 if (RTMRetryCount > 0) { | |
1527 // retry on lock abort if abort status is 'can retry' (0x2) or 'memory conflict' (0x4) | |
1528 rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry); | |
1529 } | |
1530 } | |
1531 | |
1532 // Use RTM for inflating locks | |
1533 // inputs: objReg (object to lock) | |
1534 // boxReg (on-stack box address (displaced header location) - KILLED) | |
1535 // tmpReg (ObjectMonitor address + 2(monitor_value)) | |
1536 void MacroAssembler::rtm_inflated_locking(Register objReg, Register boxReg, Register tmpReg, | |
1537 Register scrReg, Register retry_on_busy_count_Reg, | |
1538 Register retry_on_abort_count_Reg, | |
1539 RTMLockingCounters* rtm_counters, | |
1540 Metadata* method_data, bool profile_rtm, | |
1541 Label& DONE_LABEL) { | |
1542 assert(UseRTMLocking, "why call this otherwise?"); | |
1543 assert(tmpReg == rax, ""); | |
1544 assert(scrReg == rdx, ""); | |
1545 Label L_rtm_retry, L_decrement_retry, L_on_abort; | |
1546 // Clean monitor_value bit to get valid pointer | |
1547 int owner_offset = ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value; | |
1548 | |
1549 // Without cast to int32_t a movptr will destroy r10 which is typically obj | |
1550 movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())); | |
1551 movptr(boxReg, tmpReg); // Save ObjectMonitor address | |
1552 | |
1553 if (RTMRetryCount > 0) { | |
1554 movl(retry_on_busy_count_Reg, RTMRetryCount); // Retry on lock busy | |
1555 movl(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort | |
1556 bind(L_rtm_retry); | |
1557 } | |
1558 if (PrintPreciseRTMLockingStatistics || profile_rtm) { | |
1559 Label L_noincrement; | |
1560 if (RTMTotalCountIncrRate > 1) { | |
1561 // tmpReg, scrReg and flags are killed | |
1562 branch_on_random_using_rdtsc(tmpReg, scrReg, (int)RTMTotalCountIncrRate, L_noincrement); | |
1563 } | |
1564 assert(rtm_counters != NULL, "should not be NULL when profiling RTM"); | |
1565 atomic_incptr(ExternalAddress((address)rtm_counters->total_count_addr()), scrReg); | |
1566 bind(L_noincrement); | |
1567 } | |
1568 xbegin(L_on_abort); | |
1569 movptr(tmpReg, Address(objReg, 0)); | |
1570 movptr(tmpReg, Address(tmpReg, owner_offset)); | |
1571 testptr(tmpReg, tmpReg); | |
1572 jcc(Assembler::zero, DONE_LABEL); | |
1573 if (UseRTMXendForLockBusy) { | |
1574 xend(); | |
1575 jmp(L_decrement_retry); | |
1576 } | |
1577 else { | |
1578 xabort(0); | |
1579 } | |
1580 bind(L_on_abort); | |
1581 Register abort_status_Reg = tmpReg; // status of abort is stored in RAX | |
1582 if (PrintPreciseRTMLockingStatistics || profile_rtm) { | |
1583 rtm_profiling(abort_status_Reg, scrReg, rtm_counters, method_data, profile_rtm); | |
1584 } | |
1585 if (RTMRetryCount > 0) { | |
1586 // retry on lock abort if abort status is 'can retry' (0x2) or 'memory conflict' (0x4) | |
1587 rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry); | |
1588 } | |
1589 | |
1590 movptr(tmpReg, Address(boxReg, owner_offset)) ; | |
1591 testptr(tmpReg, tmpReg) ; | |
1592 jccb(Assembler::notZero, L_decrement_retry) ; | |
1593 | |
1594 // Appears unlocked - try to swing _owner from null to non-null. | |
1595 // Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand. | |
1596 #ifdef _LP64 | |
1597 Register threadReg = r15_thread; | |
1598 #else | |
1599 get_thread(scrReg); | |
1600 Register threadReg = scrReg; | |
1601 #endif | |
1602 if (os::is_MP()) { | |
1603 lock(); | |
1604 } | |
1605 cmpxchgptr(threadReg, Address(boxReg, owner_offset)); // Updates tmpReg | |
1606 | |
1607 if (RTMRetryCount > 0) { | |
1608 // success done else retry | |
1609 jccb(Assembler::equal, DONE_LABEL) ; | |
1610 bind(L_decrement_retry); | |
1611 // Spin and retry if lock is busy. | |
1612 rtm_retry_lock_on_busy(retry_on_busy_count_Reg, boxReg, tmpReg, scrReg, L_rtm_retry); | |
1613 } | |
1614 else { | |
1615 bind(L_decrement_retry); | |
1616 } | |
1617 } | |
1618 | |
1619 #endif // INCLUDE_RTM_OPT | |
1620 | |
17714
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1621 // Fast_Lock and Fast_Unlock used by C2 |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1622 |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1623 // Because the transitions from emitted code to the runtime |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1624 // monitorenter/exit helper stubs are so slow it's critical that |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1625 // we inline both the stack-locking fast-path and the inflated fast path. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1626 // |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1627 // See also: cmpFastLock and cmpFastUnlock. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1628 // |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1629 // What follows is a specialized inline transliteration of the code |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1630 // in slow_enter() and slow_exit(). If we're concerned about I$ bloat |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1631 // another option would be to emit TrySlowEnter and TrySlowExit methods |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1632 // at startup-time. These methods would accept arguments as |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1633 // (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1634 // indications in the icc.ZFlag. Fast_Lock and Fast_Unlock would simply |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1635 // marshal the arguments and emit calls to TrySlowEnter and TrySlowExit. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1636 // In practice, however, the # of lock sites is bounded and is usually small. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1637 // Besides the call overhead, TrySlowEnter and TrySlowExit might suffer |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1638 // if the processor uses simple bimodal branch predictors keyed by EIP |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1639 // Since the helper routines would be called from multiple synchronization |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1640 // sites. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1641 // |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1642 // An even better approach would be write "MonitorEnter()" and "MonitorExit()" |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1643 // in java - using j.u.c and unsafe - and just bind the lock and unlock sites |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1644 // to those specialized methods. That'd give us a mostly platform-independent |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1645 // implementation that the JITs could optimize and inline at their pleasure. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1646 // Done correctly, the only time we'd need to cross to native could would be |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1647 // to park() or unpark() threads. We'd also need a few more unsafe operators |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1648 // to (a) prevent compiler-JIT reordering of non-volatile accesses, and |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1649 // (b) explicit barriers or fence operations. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1650 // |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1651 // TODO: |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1652 // |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1653 // * Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr). |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1654 // This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1655 // Given TLAB allocation, Self is usually manifested in a register, so passing it into |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1656 // the lock operators would typically be faster than reifying Self. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1657 // |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1658 // * Ideally I'd define the primitives as: |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1659 // fast_lock (nax Obj, nax box, EAX tmp, nax scr) where box, tmp and scr are KILLED. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1660 // fast_unlock (nax Obj, EAX box, nax tmp) where box and tmp are KILLED |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1661 // Unfortunately ADLC bugs prevent us from expressing the ideal form. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1662 // Instead, we're stuck with a rather awkward and brittle register assignments below. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1663 // Furthermore the register assignments are overconstrained, possibly resulting in |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1664 // sub-optimal code near the synchronization site. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1665 // |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1666 // * Eliminate the sp-proximity tests and just use "== Self" tests instead. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1667 // Alternately, use a better sp-proximity test. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1668 // |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1669 // * Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1670 // Either one is sufficient to uniquely identify a thread. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1671 // TODO: eliminate use of sp in _owner and use get_thread(tr) instead. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1672 // |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1673 // * Intrinsify notify() and notifyAll() for the common cases where the |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1674 // object is locked by the calling thread but the waitlist is empty. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1675 // avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll(). |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1676 // |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1677 // * use jccb and jmpb instead of jcc and jmp to improve code density. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1678 // But beware of excessive branch density on AMD Opterons. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1679 // |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1680 // * Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1681 // or failure of the fast-path. If the fast-path fails then we pass |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1682 // control to the slow-path, typically in C. In Fast_Lock and |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1683 // Fast_Unlock we often branch to DONE_LABEL, just to find that C2 |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1684 // will emit a conditional branch immediately after the node. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1685 // So we have branches to branches and lots of ICC.ZF games. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1686 // Instead, it might be better to have C2 pass a "FailureLabel" |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1687 // into Fast_Lock and Fast_Unlock. In the case of success, control |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1688 // will drop through the node. ICC.ZF is undefined at exit. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1689 // In the case of failure, the node will branch directly to the |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1690 // FailureLabel |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1691 |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1692 |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1693 // obj: object to lock |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1694 // box: on-stack box address (displaced header location) - KILLED |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1695 // rax,: tmp -- KILLED |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1696 // scr: tmp -- KILLED |
17780 | 1697 void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg, |
1698 Register scrReg, Register cx1Reg, Register cx2Reg, | |
1699 BiasedLockingCounters* counters, | |
1700 RTMLockingCounters* rtm_counters, | |
1701 RTMLockingCounters* stack_rtm_counters, | |
1702 Metadata* method_data, | |
1703 bool use_rtm, bool profile_rtm) { | |
17714
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1704 // Ensure the register assignents are disjoint |
17780 | 1705 assert(tmpReg == rax, ""); |
1706 | |
1707 if (use_rtm) { | |
1708 assert_different_registers(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg); | |
1709 } else { | |
1710 assert(cx1Reg == noreg, ""); | |
1711 assert(cx2Reg == noreg, ""); | |
1712 assert_different_registers(objReg, boxReg, tmpReg, scrReg); | |
1713 } | |
17714
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1714 |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1715 if (counters != NULL) { |
17780 | 1716 atomic_incl(ExternalAddress((address)counters->total_entry_count_addr()), scrReg); |
17714
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1717 } |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1718 if (EmitSync & 1) { |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1719 // set box->dhw = unused_mark (3) |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1720 // Force all sync thru slow-path: slow_enter() and slow_exit() |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1721 movptr (Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1722 cmpptr (rsp, (int32_t)NULL_WORD); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1723 } else |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1724 if (EmitSync & 2) { |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1725 Label DONE_LABEL ; |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1726 if (UseBiasedLocking) { |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1727 // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1728 biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, counters); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1729 } |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1730 |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1731 movptr(tmpReg, Address(objReg, 0)); // fetch markword |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1732 orptr (tmpReg, 0x1); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1733 movptr(Address(boxReg, 0), tmpReg); // Anticipate successful CAS |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1734 if (os::is_MP()) { |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1735 lock(); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1736 } |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1737 cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1738 jccb(Assembler::equal, DONE_LABEL); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1739 // Recursive locking |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1740 subptr(tmpReg, rsp); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1741 andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - os::vm_page_size())) ); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1742 movptr(Address(boxReg, 0), tmpReg); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1743 bind(DONE_LABEL); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1744 } else { |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1745 // Possible cases that we'll encounter in fast_lock |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1746 // ------------------------------------------------ |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1747 // * Inflated |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1748 // -- unlocked |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1749 // -- Locked |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1750 // = by self |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1751 // = by other |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1752 // * biased |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1753 // -- by Self |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1754 // -- by other |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1755 // * neutral |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1756 // * stack-locked |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1757 // -- by self |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1758 // = sp-proximity test hits |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1759 // = sp-proximity test generates false-negative |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1760 // -- by other |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1761 // |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1762 |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1763 Label IsInflated, DONE_LABEL; |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1764 |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1765 // it's stack-locked, biased or neutral |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1766 // TODO: optimize away redundant LDs of obj->mark and improve the markword triage |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1767 // order to reduce the number of conditional branches in the most common cases. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1768 // Beware -- there's a subtle invariant that fetch of the markword |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1769 // at [FETCH], below, will never observe a biased encoding (*101b). |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1770 // If this invariant is not held we risk exclusion (safety) failure. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1771 if (UseBiasedLocking && !UseOptoBiasInlining) { |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1772 biased_locking_enter(boxReg, objReg, tmpReg, scrReg, true, DONE_LABEL, NULL, counters); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1773 } |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1774 |
17780 | 1775 #if INCLUDE_RTM_OPT |
1776 if (UseRTMForStackLocks && use_rtm) { | |
1777 rtm_stack_locking(objReg, tmpReg, scrReg, cx2Reg, | |
1778 stack_rtm_counters, method_data, profile_rtm, | |
1779 DONE_LABEL, IsInflated); | |
1780 } | |
1781 #endif // INCLUDE_RTM_OPT | |
1782 | |
17714
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1783 movptr(tmpReg, Address(objReg, 0)); // [FETCH] |
17780 | 1784 testptr(tmpReg, markOopDesc::monitor_value); // inflated vs stack-locked|neutral|biased |
1785 jccb(Assembler::notZero, IsInflated); | |
17714
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1786 |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1787 // Attempt stack-locking ... |
17780 | 1788 orptr (tmpReg, markOopDesc::unlocked_value); |
17714
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1789 movptr(Address(boxReg, 0), tmpReg); // Anticipate successful CAS |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1790 if (os::is_MP()) { |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1791 lock(); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1792 } |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1793 cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1794 if (counters != NULL) { |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1795 cond_inc32(Assembler::equal, |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1796 ExternalAddress((address)counters->fast_path_entry_count_addr())); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1797 } |
17780 | 1798 jcc(Assembler::equal, DONE_LABEL); // Success |
1799 | |
1800 // Recursive locking. | |
1801 // The object is stack-locked: markword contains stack pointer to BasicLock. | |
1802 // Locked by current thread if difference with current SP is less than one page. | |
17714
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1803 subptr(tmpReg, rsp); |
17780 | 1804 // Next instruction set ZFlag == 1 (Success) if difference is less then one page. |
17714
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1805 andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - os::vm_page_size())) ); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1806 movptr(Address(boxReg, 0), tmpReg); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1807 if (counters != NULL) { |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1808 cond_inc32(Assembler::equal, |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1809 ExternalAddress((address)counters->fast_path_entry_count_addr())); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1810 } |
17780 | 1811 jmp(DONE_LABEL); |
17714
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1812 |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1813 bind(IsInflated); |
17780 | 1814 // The object is inflated. tmpReg contains pointer to ObjectMonitor* + 2(monitor_value) |
1815 | |
1816 #if INCLUDE_RTM_OPT | |
1817 // Use the same RTM locking code in 32- and 64-bit VM. | |
1818 if (use_rtm) { | |
1819 rtm_inflated_locking(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg, | |
1820 rtm_counters, method_data, profile_rtm, DONE_LABEL); | |
1821 } else { | |
1822 #endif // INCLUDE_RTM_OPT | |
1823 | |
17714
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1824 #ifndef _LP64 |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1825 // The object is inflated. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1826 // |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1827 // TODO-FIXME: eliminate the ugly use of manifest constants: |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1828 // Use markOopDesc::monitor_value instead of "2". |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1829 // use markOop::unused_mark() instead of "3". |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1830 // The tmpReg value is an objectMonitor reference ORed with |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1831 // markOopDesc::monitor_value (2). We can either convert tmpReg to an |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1832 // objectmonitor pointer by masking off the "2" bit or we can just |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1833 // use tmpReg as an objectmonitor pointer but bias the objectmonitor |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1834 // field offsets with "-2" to compensate for and annul the low-order tag bit. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1835 // |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1836 // I use the latter as it avoids AGI stalls. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1837 // As such, we write "mov r, [tmpReg+OFFSETOF(Owner)-2]" |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1838 // instead of "mov r, [tmpReg+OFFSETOF(Owner)]". |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1839 // |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1840 #define OFFSET_SKEWED(f) ((ObjectMonitor::f ## _offset_in_bytes())-2) |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1841 |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1842 // boxReg refers to the on-stack BasicLock in the current frame. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1843 // We'd like to write: |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1844 // set box->_displaced_header = markOop::unused_mark(). Any non-0 value suffices. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1845 // This is convenient but results a ST-before-CAS penalty. The following CAS suffers |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1846 // additional latency as we have another ST in the store buffer that must drain. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1847 |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1848 if (EmitSync & 8192) { |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1849 movptr(Address(boxReg, 0), 3); // results in ST-before-CAS penalty |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1850 get_thread (scrReg); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1851 movptr(boxReg, tmpReg); // consider: LEA box, [tmp-2] |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1852 movptr(tmpReg, NULL_WORD); // consider: xor vs mov |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1853 if (os::is_MP()) { |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1854 lock(); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1855 } |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1856 cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1857 } else |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1858 if ((EmitSync & 128) == 0) { // avoid ST-before-CAS |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1859 movptr(scrReg, boxReg); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1860 movptr(boxReg, tmpReg); // consider: LEA box, [tmp-2] |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1861 |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1862 // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1863 if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) { |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1864 // prefetchw [eax + Offset(_owner)-2] |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1865 prefetchw(Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1866 } |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1867 |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1868 if ((EmitSync & 64) == 0) { |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1869 // Optimistic form: consider XORL tmpReg,tmpReg |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1870 movptr(tmpReg, NULL_WORD); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1871 } else { |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1872 // Can suffer RTS->RTO upgrades on shared or cold $ lines |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1873 // Test-And-CAS instead of CAS |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1874 movptr(tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)); // rax, = m->_owner |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1875 testptr(tmpReg, tmpReg); // Locked ? |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1876 jccb (Assembler::notZero, DONE_LABEL); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1877 } |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1878 |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1879 // Appears unlocked - try to swing _owner from null to non-null. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1880 // Ideally, I'd manifest "Self" with get_thread and then attempt |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1881 // to CAS the register containing Self into m->Owner. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1882 // But we don't have enough registers, so instead we can either try to CAS |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1883 // rsp or the address of the box (in scr) into &m->owner. If the CAS succeeds |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1884 // we later store "Self" into m->Owner. Transiently storing a stack address |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1885 // (rsp or the address of the box) into m->owner is harmless. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1886 // Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1887 if (os::is_MP()) { |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1888 lock(); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1889 } |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1890 cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1891 movptr(Address(scrReg, 0), 3); // box->_displaced_header = 3 |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1892 jccb (Assembler::notZero, DONE_LABEL); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1893 get_thread (scrReg); // beware: clobbers ICCs |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1894 movptr(Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2), scrReg); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1895 xorptr(boxReg, boxReg); // set icc.ZFlag = 1 to indicate success |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1896 |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1897 // If the CAS fails we can either retry or pass control to the slow-path. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1898 // We use the latter tactic. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1899 // Pass the CAS result in the icc.ZFlag into DONE_LABEL |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1900 // If the CAS was successful ... |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1901 // Self has acquired the lock |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1902 // Invariant: m->_recursions should already be 0, so we don't need to explicitly set it. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1903 // Intentional fall-through into DONE_LABEL ... |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1904 } else { |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1905 movptr(Address(boxReg, 0), intptr_t(markOopDesc::unused_mark())); // results in ST-before-CAS penalty |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1906 movptr(boxReg, tmpReg); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1907 |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1908 // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1909 if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) { |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1910 // prefetchw [eax + Offset(_owner)-2] |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1911 prefetchw(Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1912 } |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1913 |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1914 if ((EmitSync & 64) == 0) { |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1915 // Optimistic form |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1916 xorptr (tmpReg, tmpReg); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1917 } else { |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1918 // Can suffer RTS->RTO upgrades on shared or cold $ lines |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1919 movptr(tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)); // rax, = m->_owner |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1920 testptr(tmpReg, tmpReg); // Locked ? |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1921 jccb (Assembler::notZero, DONE_LABEL); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1922 } |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1923 |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1924 // Appears unlocked - try to swing _owner from null to non-null. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1925 // Use either "Self" (in scr) or rsp as thread identity in _owner. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1926 // Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1927 get_thread (scrReg); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1928 if (os::is_MP()) { |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1929 lock(); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1930 } |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1931 cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1932 |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1933 // If the CAS fails we can either retry or pass control to the slow-path. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1934 // We use the latter tactic. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1935 // Pass the CAS result in the icc.ZFlag into DONE_LABEL |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1936 // If the CAS was successful ... |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1937 // Self has acquired the lock |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1938 // Invariant: m->_recursions should already be 0, so we don't need to explicitly set it. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1939 // Intentional fall-through into DONE_LABEL ... |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1940 } |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1941 #else // _LP64 |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1942 // It's inflated |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1943 |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1944 // TODO: someday avoid the ST-before-CAS penalty by |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1945 // relocating (deferring) the following ST. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1946 // We should also think about trying a CAS without having |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1947 // fetched _owner. If the CAS is successful we may |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1948 // avoid an RTO->RTS upgrade on the $line. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1949 |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1950 // Without cast to int32_t a movptr will destroy r10 which is typically obj |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1951 movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1952 |
17780 | 1953 movptr (boxReg, tmpReg); |
17714
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1954 movptr (tmpReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1955 testptr(tmpReg, tmpReg); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1956 jccb (Assembler::notZero, DONE_LABEL); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1957 |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1958 // It's inflated and appears unlocked |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1959 if (os::is_MP()) { |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1960 lock(); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1961 } |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1962 cmpxchgptr(r15_thread, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1963 // Intentional fall-through into DONE_LABEL ... |
17780 | 1964 #endif // _LP64 |
1965 | |
1966 #if INCLUDE_RTM_OPT | |
1967 } // use_rtm() | |
17714
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1968 #endif |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1969 // DONE_LABEL is a hot target - we'd really like to place it at the |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1970 // start of cache line by padding with NOPs. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1971 // See the AMD and Intel software optimization manuals for the |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1972 // most efficient "long" NOP encodings. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1973 // Unfortunately none of our alignment mechanisms suffice. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1974 bind(DONE_LABEL); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1975 |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1976 // At DONE_LABEL the icc ZFlag is set as follows ... |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1977 // Fast_Unlock uses the same protocol. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1978 // ZFlag == 1 -> Success |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1979 // ZFlag == 0 -> Failure - force control through the slow-path |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1980 } |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1981 } |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1982 |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1983 // obj: object to unlock |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1984 // box: box address (displaced header location), killed. Must be EAX. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1985 // tmp: killed, cannot be obj nor box. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1986 // |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1987 // Some commentary on balanced locking: |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1988 // |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1989 // Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1990 // Methods that don't have provably balanced locking are forced to run in the |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1991 // interpreter - such methods won't be compiled to use fast_lock and fast_unlock. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1992 // The interpreter provides two properties: |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1993 // I1: At return-time the interpreter automatically and quietly unlocks any |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1994 // objects acquired the current activation (frame). Recall that the |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1995 // interpreter maintains an on-stack list of locks currently held by |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1996 // a frame. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1997 // I2: If a method attempts to unlock an object that is not held by the |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1998 // the frame the interpreter throws IMSX. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
1999 // |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2000 // Lets say A(), which has provably balanced locking, acquires O and then calls B(). |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2001 // B() doesn't have provably balanced locking so it runs in the interpreter. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2002 // Control returns to A() and A() unlocks O. By I1 and I2, above, we know that O |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2003 // is still locked by A(). |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2004 // |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2005 // The only other source of unbalanced locking would be JNI. The "Java Native Interface: |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2006 // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2007 // should not be unlocked by "normal" java-level locking and vice-versa. The specification |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2008 // doesn't specify what will occur if a program engages in such mixed-mode locking, however. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2009 |
17780 | 2010 void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg, bool use_rtm) { |
2011 assert(boxReg == rax, ""); | |
2012 assert_different_registers(objReg, boxReg, tmpReg); | |
17714
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2013 |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2014 if (EmitSync & 4) { |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2015 // Disable - inhibit all inlining. Force control through the slow-path |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2016 cmpptr (rsp, 0); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2017 } else |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2018 if (EmitSync & 8) { |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2019 Label DONE_LABEL; |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2020 if (UseBiasedLocking) { |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2021 biased_locking_exit(objReg, tmpReg, DONE_LABEL); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2022 } |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2023 // Classic stack-locking code ... |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2024 // Check whether the displaced header is 0 |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2025 //(=> recursive unlock) |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2026 movptr(tmpReg, Address(boxReg, 0)); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2027 testptr(tmpReg, tmpReg); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2028 jccb(Assembler::zero, DONE_LABEL); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2029 // If not recursive lock, reset the header to displaced header |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2030 if (os::is_MP()) { |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2031 lock(); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2032 } |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2033 cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2034 bind(DONE_LABEL); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2035 } else { |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2036 Label DONE_LABEL, Stacked, CheckSucc; |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2037 |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2038 // Critically, the biased locking test must have precedence over |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2039 // and appear before the (box->dhw == 0) recursive stack-lock test. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2040 if (UseBiasedLocking && !UseOptoBiasInlining) { |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2041 biased_locking_exit(objReg, tmpReg, DONE_LABEL); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2042 } |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2043 |
17780 | 2044 #if INCLUDE_RTM_OPT |
2045 if (UseRTMForStackLocks && use_rtm) { | |
2046 assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking"); | |
2047 Label L_regular_unlock; | |
2048 movptr(tmpReg, Address(objReg, 0)); // fetch markword | |
2049 andptr(tmpReg, markOopDesc::biased_lock_mask_in_place); // look at 3 lock bits | |
2050 cmpptr(tmpReg, markOopDesc::unlocked_value); // bits = 001 unlocked | |
2051 jccb(Assembler::notEqual, L_regular_unlock); // if !HLE RegularLock | |
2052 xend(); // otherwise end... | |
2053 jmp(DONE_LABEL); // ... and we're done | |
2054 bind(L_regular_unlock); | |
2055 } | |
2056 #endif | |
2057 | |
17714
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2058 cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD); // Examine the displaced header |
17780 | 2059 jcc (Assembler::zero, DONE_LABEL); // 0 indicates recursive stack-lock |
17714
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2060 movptr(tmpReg, Address(objReg, 0)); // Examine the object's markword |
17780 | 2061 testptr(tmpReg, markOopDesc::monitor_value); // Inflated? |
17714
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2062 jccb (Assembler::zero, Stacked); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2063 |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2064 // It's inflated. |
17780 | 2065 #if INCLUDE_RTM_OPT |
2066 if (use_rtm) { | |
2067 Label L_regular_inflated_unlock; | |
2068 // Clean monitor_value bit to get valid pointer | |
2069 int owner_offset = ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value; | |
2070 movptr(boxReg, Address(tmpReg, owner_offset)); | |
2071 testptr(boxReg, boxReg); | |
2072 jccb(Assembler::notZero, L_regular_inflated_unlock); | |
2073 xend(); | |
2074 jmpb(DONE_LABEL); | |
2075 bind(L_regular_inflated_unlock); | |
2076 } | |
2077 #endif | |
2078 | |
17714
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2079 // Despite our balanced locking property we still check that m->_owner == Self |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2080 // as java routines or native JNI code called by this thread might |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2081 // have released the lock. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2082 // Refer to the comments in synchronizer.cpp for how we might encode extra |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2083 // state in _succ so we can avoid fetching EntryList|cxq. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2084 // |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2085 // I'd like to add more cases in fast_lock() and fast_unlock() -- |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2086 // such as recursive enter and exit -- but we have to be wary of |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2087 // I$ bloat, T$ effects and BP$ effects. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2088 // |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2089 // If there's no contention try a 1-0 exit. That is, exit without |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2090 // a costly MEMBAR or CAS. See synchronizer.cpp for details on how |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2091 // we detect and recover from the race that the 1-0 exit admits. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2092 // |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2093 // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2094 // before it STs null into _owner, releasing the lock. Updates |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2095 // to data protected by the critical section must be visible before |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2096 // we drop the lock (and thus before any other thread could acquire |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2097 // the lock and observe the fields protected by the lock). |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2098 // IA32's memory-model is SPO, so STs are ordered with respect to |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2099 // each other and there's no need for an explicit barrier (fence). |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2100 // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2101 #ifndef _LP64 |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2102 get_thread (boxReg); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2103 if ((EmitSync & 4096) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) { |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2104 // prefetchw [ebx + Offset(_owner)-2] |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2105 prefetchw(Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2106 } |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2107 |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2108 // Note that we could employ various encoding schemes to reduce |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2109 // the number of loads below (currently 4) to just 2 or 3. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2110 // Refer to the comments in synchronizer.cpp. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2111 // In practice the chain of fetches doesn't seem to impact performance, however. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2112 if ((EmitSync & 65536) == 0 && (EmitSync & 256)) { |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2113 // Attempt to reduce branch density - AMD's branch predictor. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2114 xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2115 orptr(boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2116 orptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2117 orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2118 jccb (Assembler::notZero, DONE_LABEL); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2119 movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2120 jmpb (DONE_LABEL); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2121 } else { |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2122 xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2123 orptr(boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2124 jccb (Assembler::notZero, DONE_LABEL); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2125 movptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2126 orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2127 jccb (Assembler::notZero, CheckSucc); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2128 movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2129 jmpb (DONE_LABEL); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2130 } |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2131 |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2132 // The Following code fragment (EmitSync & 65536) improves the performance of |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2133 // contended applications and contended synchronization microbenchmarks. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2134 // Unfortunately the emission of the code - even though not executed - causes regressions |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2135 // in scimark and jetstream, evidently because of $ effects. Replacing the code |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2136 // with an equal number of never-executed NOPs results in the same regression. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2137 // We leave it off by default. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2138 |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2139 if ((EmitSync & 65536) != 0) { |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2140 Label LSuccess, LGoSlowPath ; |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2141 |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2142 bind (CheckSucc); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2143 |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2144 // Optional pre-test ... it's safe to elide this |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2145 if ((EmitSync & 16) == 0) { |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2146 cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2147 jccb (Assembler::zero, LGoSlowPath); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2148 } |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2149 |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2150 // We have a classic Dekker-style idiom: |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2151 // ST m->_owner = 0 ; MEMBAR; LD m->_succ |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2152 // There are a number of ways to implement the barrier: |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2153 // (1) lock:andl &m->_owner, 0 |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2154 // is fast, but mask doesn't currently support the "ANDL M,IMM32" form. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2155 // LOCK: ANDL [ebx+Offset(_Owner)-2], 0 |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2156 // Encodes as 81 31 OFF32 IMM32 or 83 63 OFF8 IMM8 |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2157 // (2) If supported, an explicit MFENCE is appealing. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2158 // In older IA32 processors MFENCE is slower than lock:add or xchg |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2159 // particularly if the write-buffer is full as might be the case if |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2160 // if stores closely precede the fence or fence-equivalent instruction. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2161 // In more modern implementations MFENCE appears faster, however. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2162 // (3) In lieu of an explicit fence, use lock:addl to the top-of-stack |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2163 // The $lines underlying the top-of-stack should be in M-state. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2164 // The locked add instruction is serializing, of course. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2165 // (4) Use xchg, which is serializing |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2166 // mov boxReg, 0; xchgl boxReg, [tmpReg + Offset(_owner)-2] also works |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2167 // (5) ST m->_owner = 0 and then execute lock:orl &m->_succ, 0. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2168 // The integer condition codes will tell us if succ was 0. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2169 // Since _succ and _owner should reside in the same $line and |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2170 // we just stored into _owner, it's likely that the $line |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2171 // remains in M-state for the lock:orl. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2172 // |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2173 // We currently use (3), although it's likely that switching to (2) |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2174 // is correct for the future. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2175 |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2176 movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2177 if (os::is_MP()) { |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2178 if (VM_Version::supports_sse2() && 1 == FenceInstruction) { |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2179 mfence(); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2180 } else { |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2181 lock (); addptr(Address(rsp, 0), 0); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2182 } |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2183 } |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2184 // Ratify _succ remains non-null |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2185 cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), 0); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2186 jccb (Assembler::notZero, LSuccess); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2187 |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2188 xorptr(boxReg, boxReg); // box is really EAX |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2189 if (os::is_MP()) { lock(); } |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2190 cmpxchgptr(rsp, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2191 jccb (Assembler::notEqual, LSuccess); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2192 // Since we're low on registers we installed rsp as a placeholding in _owner. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2193 // Now install Self over rsp. This is safe as we're transitioning from |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2194 // non-null to non=null |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2195 get_thread (boxReg); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2196 movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), boxReg); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2197 // Intentional fall-through into LGoSlowPath ... |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2198 |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2199 bind (LGoSlowPath); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2200 orptr(boxReg, 1); // set ICC.ZF=0 to indicate failure |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2201 jmpb (DONE_LABEL); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2202 |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2203 bind (LSuccess); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2204 xorptr(boxReg, boxReg); // set ICC.ZF=1 to indicate success |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2205 jmpb (DONE_LABEL); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2206 } |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2207 |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2208 bind (Stacked); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2209 // It's not inflated and it's not recursively stack-locked and it's not biased. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2210 // It must be stack-locked. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2211 // Try to reset the header to displaced header. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2212 // The "box" value on the stack is stable, so we can reload |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2213 // and be assured we observe the same value as above. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2214 movptr(tmpReg, Address(boxReg, 0)); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2215 if (os::is_MP()) { |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2216 lock(); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2217 } |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2218 cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2219 // Intention fall-thru into DONE_LABEL |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2220 |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2221 // DONE_LABEL is a hot target - we'd really like to place it at the |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2222 // start of cache line by padding with NOPs. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2223 // See the AMD and Intel software optimization manuals for the |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2224 // most efficient "long" NOP encodings. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2225 // Unfortunately none of our alignment mechanisms suffice. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2226 if ((EmitSync & 65536) == 0) { |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2227 bind (CheckSucc); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2228 } |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2229 #else // _LP64 |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2230 // It's inflated |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2231 movptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2232 xorptr(boxReg, r15_thread); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2233 orptr (boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2234 jccb (Assembler::notZero, DONE_LABEL); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2235 movptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2236 orptr (boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2237 jccb (Assembler::notZero, CheckSucc); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2238 movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2239 jmpb (DONE_LABEL); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2240 |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2241 if ((EmitSync & 65536) == 0) { |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2242 Label LSuccess, LGoSlowPath ; |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2243 bind (CheckSucc); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2244 cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2245 jccb (Assembler::zero, LGoSlowPath); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2246 |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2247 // I'd much rather use lock:andl m->_owner, 0 as it's faster than the |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2248 // the explicit ST;MEMBAR combination, but masm doesn't currently support |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2249 // "ANDQ M,IMM". Don't use MFENCE here. lock:add to TOS, xchg, etc |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2250 // are all faster when the write buffer is populated. |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2251 movptr (Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2252 if (os::is_MP()) { |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2253 lock (); addl (Address(rsp, 0), 0); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2254 } |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2255 cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2256 jccb (Assembler::notZero, LSuccess); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2257 |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2258 movptr (boxReg, (int32_t)NULL_WORD); // box is really EAX |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2259 if (os::is_MP()) { lock(); } |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2260 cmpxchgptr(r15_thread, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2261 jccb (Assembler::notEqual, LSuccess); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2262 // Intentional fall-through into slow-path |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2263 |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2264 bind (LGoSlowPath); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2265 orl (boxReg, 1); // set ICC.ZF=0 to indicate failure |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2266 jmpb (DONE_LABEL); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2267 |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2268 bind (LSuccess); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2269 testl (boxReg, 0); // set ICC.ZF=1 to indicate success |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2270 jmpb (DONE_LABEL); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2271 } |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2272 |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2273 bind (Stacked); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2274 movptr(tmpReg, Address (boxReg, 0)); // re-fetch |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2275 if (os::is_MP()) { lock(); } |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2276 cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2277 |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2278 if (EmitSync & 65536) { |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2279 bind (CheckSucc); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2280 } |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2281 #endif |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2282 bind(DONE_LABEL); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2283 // Avoid branch to branch on AMD processors |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2284 if (EmitSync & 32768) { |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2285 nop(); |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2286 } |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2287 } |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2288 } |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2289 #endif // COMPILER2 |
4d4ea046d32a
8033805: Move Fast_Lock/Fast_Unlock code from .ad files to macroassembler
kvn
parents:
13424
diff
changeset
|
2290 |
7199 | 2291 void MacroAssembler::c2bool(Register x) { |
2292 // implements x == 0 ? 0 : 1 | |
2293 // note: must only look at least-significant byte of x | |
2294 // since C-style booleans are stored in one byte | |
2295 // only! (was bug) | |
2296 andl(x, 0xFF); | |
2297 setb(Assembler::notZero, x); | |
2298 } | |
2299 | |
2300 // Wouldn't need if AddressLiteral version had new name | |
2301 void MacroAssembler::call(Label& L, relocInfo::relocType rtype) { | |
2302 Assembler::call(L, rtype); | |
2303 } | |
2304 | |
2305 void MacroAssembler::call(Register entry) { | |
2306 Assembler::call(entry); | |
2307 } | |
2308 | |
2309 void MacroAssembler::call(AddressLiteral entry) { | |
2310 if (reachable(entry)) { | |
2311 Assembler::call_literal(entry.target(), entry.rspec()); | |
2312 } else { | |
2313 lea(rscratch1, entry); | |
2314 Assembler::call(rscratch1); | |
2315 } | |
2316 } | |
2317 | |
2318 void MacroAssembler::ic_call(address entry) { | |
2319 RelocationHolder rh = virtual_call_Relocation::spec(pc()); | |
2320 movptr(rax, (intptr_t)Universe::non_oop_word()); | |
2321 call(AddressLiteral(entry, rh)); | |
2322 } | |
2323 | |
2324 // Implementation of call_VM versions | |
2325 | |
2326 void MacroAssembler::call_VM(Register oop_result, | |
2327 address entry_point, | |
2328 bool check_exceptions) { | |
2329 Label C, E; | |
2330 call(C, relocInfo::none); | |
2331 jmp(E); | |
2332 | |
2333 bind(C); | |
2334 call_VM_helper(oop_result, entry_point, 0, check_exceptions); | |
2335 ret(0); | |
2336 | |
2337 bind(E); | |
2338 } | |
2339 | |
2340 void MacroAssembler::call_VM(Register oop_result, | |
2341 address entry_point, | |
2342 Register arg_1, | |
2343 bool check_exceptions) { | |
2344 Label C, E; | |
2345 call(C, relocInfo::none); | |
2346 jmp(E); | |
2347 | |
2348 bind(C); | |
2349 pass_arg1(this, arg_1); | |
2350 call_VM_helper(oop_result, entry_point, 1, check_exceptions); | |
2351 ret(0); | |
2352 | |
2353 bind(E); | |
2354 } | |
2355 | |
2356 void MacroAssembler::call_VM(Register oop_result, | |
2357 address entry_point, | |
2358 Register arg_1, | |
2359 Register arg_2, | |
2360 bool check_exceptions) { | |
2361 Label C, E; | |
2362 call(C, relocInfo::none); | |
2363 jmp(E); | |
2364 | |
2365 bind(C); | |
2366 | |
2367 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); | |
2368 | |
2369 pass_arg2(this, arg_2); | |
2370 pass_arg1(this, arg_1); | |
2371 call_VM_helper(oop_result, entry_point, 2, check_exceptions); | |
2372 ret(0); | |
2373 | |
2374 bind(E); | |
2375 } | |
2376 | |
2377 void MacroAssembler::call_VM(Register oop_result, | |
2378 address entry_point, | |
2379 Register arg_1, | |
2380 Register arg_2, | |
2381 Register arg_3, | |
2382 bool check_exceptions) { | |
2383 Label C, E; | |
2384 call(C, relocInfo::none); | |
2385 jmp(E); | |
2386 | |
2387 bind(C); | |
2388 | |
2389 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); | |
2390 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); | |
2391 pass_arg3(this, arg_3); | |
2392 | |
2393 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); | |
2394 pass_arg2(this, arg_2); | |
2395 | |
2396 pass_arg1(this, arg_1); | |
2397 call_VM_helper(oop_result, entry_point, 3, check_exceptions); | |
2398 ret(0); | |
2399 | |
2400 bind(E); | |
2401 } | |
2402 | |
2403 void MacroAssembler::call_VM(Register oop_result, | |
2404 Register last_java_sp, | |
2405 address entry_point, | |
2406 int number_of_arguments, | |
2407 bool check_exceptions) { | |
2408 Register thread = LP64_ONLY(r15_thread) NOT_LP64(noreg); | |
2409 call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions); | |
2410 } | |
2411 | |
2412 void MacroAssembler::call_VM(Register oop_result, | |
2413 Register last_java_sp, | |
2414 address entry_point, | |
2415 Register arg_1, | |
2416 bool check_exceptions) { | |
2417 pass_arg1(this, arg_1); | |
2418 call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); | |
2419 } | |
2420 | |
2421 void MacroAssembler::call_VM(Register oop_result, | |
2422 Register last_java_sp, | |
2423 address entry_point, | |
2424 Register arg_1, | |
2425 Register arg_2, | |
2426 bool check_exceptions) { | |
2427 | |
2428 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); | |
2429 pass_arg2(this, arg_2); | |
2430 pass_arg1(this, arg_1); | |
2431 call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); | |
2432 } | |
2433 | |
2434 void MacroAssembler::call_VM(Register oop_result, | |
2435 Register last_java_sp, | |
2436 address entry_point, | |
2437 Register arg_1, | |
2438 Register arg_2, | |
2439 Register arg_3, | |
2440 bool check_exceptions) { | |
2441 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); | |
2442 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); | |
2443 pass_arg3(this, arg_3); | |
2444 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); | |
2445 pass_arg2(this, arg_2); | |
2446 pass_arg1(this, arg_1); | |
2447 call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); | |
2448 } | |
2449 | |
2450 void MacroAssembler::super_call_VM(Register oop_result, | |
2451 Register last_java_sp, | |
2452 address entry_point, | |
2453 int number_of_arguments, | |
2454 bool check_exceptions) { | |
2455 Register thread = LP64_ONLY(r15_thread) NOT_LP64(noreg); | |
2456 MacroAssembler::call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions); | |
2457 } | |
2458 | |
2459 void MacroAssembler::super_call_VM(Register oop_result, | |
2460 Register last_java_sp, | |
2461 address entry_point, | |
2462 Register arg_1, | |
2463 bool check_exceptions) { | |
2464 pass_arg1(this, arg_1); | |
2465 super_call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); | |
2466 } | |
2467 | |
2468 void MacroAssembler::super_call_VM(Register oop_result, | |
2469 Register last_java_sp, | |
2470 address entry_point, | |
2471 Register arg_1, | |
2472 Register arg_2, | |
2473 bool check_exceptions) { | |
2474 | |
2475 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); | |
2476 pass_arg2(this, arg_2); | |
2477 pass_arg1(this, arg_1); | |
2478 super_call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); | |
2479 } | |
2480 | |
2481 void MacroAssembler::super_call_VM(Register oop_result, | |
2482 Register last_java_sp, | |
2483 address entry_point, | |
2484 Register arg_1, | |
2485 Register arg_2, | |
2486 Register arg_3, | |
2487 bool check_exceptions) { | |
2488 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); | |
2489 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); | |
2490 pass_arg3(this, arg_3); | |
2491 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); | |
2492 pass_arg2(this, arg_2); | |
2493 pass_arg1(this, arg_1); | |
2494 super_call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); | |
2495 } | |
2496 | |
2497 void MacroAssembler::call_VM_base(Register oop_result, | |
2498 Register java_thread, | |
2499 Register last_java_sp, | |
2500 address entry_point, | |
2501 int number_of_arguments, | |
2502 bool check_exceptions) { | |
2503 // determine java_thread register | |
2504 if (!java_thread->is_valid()) { | |
2505 #ifdef _LP64 | |
2506 java_thread = r15_thread; | |
2507 #else | |
2508 java_thread = rdi; | |
2509 get_thread(java_thread); | |
2510 #endif // LP64 | |
2511 } | |
2512 // determine last_java_sp register | |
2513 if (!last_java_sp->is_valid()) { | |
2514 last_java_sp = rsp; | |
2515 } | |
2516 // debugging support | |
2517 assert(number_of_arguments >= 0 , "cannot have negative number of arguments"); | |
2518 LP64_ONLY(assert(java_thread == r15_thread, "unexpected register")); | |
2519 #ifdef ASSERT | |
2520 // TraceBytecodes does not use r12 but saves it over the call, so don't verify | |
2521 // r12 is the heapbase. | |
12226
7944aba7ba41
8015107: NPG: Use consistent naming for metaspace concepts
ehelin
parents:
12056
diff
changeset
|
2522 LP64_ONLY(if ((UseCompressedOops || UseCompressedClassPointers) && !TraceBytecodes) verify_heapbase("call_VM_base: heap base corrupted?");) |
7199 | 2523 #endif // ASSERT |
2524 | |
2525 assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result"); | |
2526 assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp"); | |
2527 | |
2528 // push java thread (becomes first argument of C function) | |
2529 | |
2530 NOT_LP64(push(java_thread); number_of_arguments++); | |
2531 LP64_ONLY(mov(c_rarg0, r15_thread)); | |
2532 | |
2533 // set last Java frame before call | |
2534 assert(last_java_sp != rbp, "can't use ebp/rbp"); | |
2535 | |
2536 // Only interpreter should have to set fp | |
2537 set_last_Java_frame(java_thread, last_java_sp, rbp, NULL); | |
2538 | |
2539 // do the call, remove parameters | |
2540 MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments); | |
2541 | |
2542 // restore the thread (cannot use the pushed argument since arguments | |
2543 // may be overwritten by C code generated by an optimizing compiler); | |
2544 // however can use the register value directly if it is callee saved. | |
2545 if (LP64_ONLY(true ||) java_thread == rdi || java_thread == rsi) { | |
2546 // rdi & rsi (also r15) are callee saved -> nothing to do | |
2547 #ifdef ASSERT | |
2548 guarantee(java_thread != rax, "change this code"); | |
2549 push(rax); | |
2550 { Label L; | |
2551 get_thread(rax); | |
2552 cmpptr(java_thread, rax); | |
2553 jcc(Assembler::equal, L); | |
2554 STOP("MacroAssembler::call_VM_base: rdi not callee saved?"); | |
2555 bind(L); | |
2556 } | |
2557 pop(rax); | |
2558 #endif | |
2559 } else { | |
2560 get_thread(java_thread); | |
2561 } | |
2562 // reset last Java frame | |
2563 // Only interpreter should have to clear fp | |
2564 reset_last_Java_frame(java_thread, true, false); | |
2565 | |
2566 #ifndef CC_INTERP | |
2567 // C++ interp handles this in the interpreter | |
2568 check_and_handle_popframe(java_thread); | |
2569 check_and_handle_earlyret(java_thread); | |
2570 #endif /* CC_INTERP */ | |
2571 | |
2572 if (check_exceptions) { | |
2573 // check for pending exceptions (java_thread is set upon return) | |
2574 cmpptr(Address(java_thread, Thread::pending_exception_offset()), (int32_t) NULL_WORD); | |
2575 #ifndef _LP64 | |
2576 jump_cc(Assembler::notEqual, | |
2577 RuntimeAddress(StubRoutines::forward_exception_entry())); | |
2578 #else | |
2579 // This used to conditionally jump to forward_exception however it is | |
2580 // possible if we relocate that the branch will not reach. So we must jump | |
2581 // around so we can always reach | |
2582 | |
2583 Label ok; | |
2584 jcc(Assembler::equal, ok); | |
2585 jump(RuntimeAddress(StubRoutines::forward_exception_entry())); | |
2586 bind(ok); | |
2587 #endif // LP64 | |
2588 } | |
2589 | |
2590 // get oop result if there is one and reset the value in the thread | |
2591 if (oop_result->is_valid()) { | |
2592 get_vm_result(oop_result, java_thread); | |
2593 } | |
2594 } | |
2595 | |
2596 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { | |
2597 | |
2598 // Calculate the value for last_Java_sp | |
2599 // somewhat subtle. call_VM does an intermediate call | |
2600 // which places a return address on the stack just under the | |
2601 // stack pointer as the user finsihed with it. This allows | |
2602 // use to retrieve last_Java_pc from last_Java_sp[-1]. | |
2603 // On 32bit we then have to push additional args on the stack to accomplish | |
2604 // the actual requested call. On 64bit call_VM only can use register args | |
2605 // so the only extra space is the return address that call_VM created. | |
2606 // This hopefully explains the calculations here. | |
2607 | |
2608 #ifdef _LP64 | |
2609 // We've pushed one address, correct last_Java_sp | |
2610 lea(rax, Address(rsp, wordSize)); | |
2611 #else | |
2612 lea(rax, Address(rsp, (1 + number_of_arguments) * wordSize)); | |
2613 #endif // LP64 | |
2614 | |
2615 call_VM_base(oop_result, noreg, rax, entry_point, number_of_arguments, check_exceptions); | |
2616 | |
2617 } | |
2618 | |
2619 void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) { | |
2620 call_VM_leaf_base(entry_point, number_of_arguments); | |
2621 } | |
2622 | |
2623 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) { | |
2624 pass_arg0(this, arg_0); | |
2625 call_VM_leaf(entry_point, 1); | |
2626 } | |
2627 | |
2628 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { | |
2629 | |
2630 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); | |
2631 pass_arg1(this, arg_1); | |
2632 pass_arg0(this, arg_0); | |
2633 call_VM_leaf(entry_point, 2); | |
2634 } | |
2635 | |
2636 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { | |
2637 LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg")); | |
2638 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); | |
2639 pass_arg2(this, arg_2); | |
2640 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); | |
2641 pass_arg1(this, arg_1); | |
2642 pass_arg0(this, arg_0); | |
2643 call_VM_leaf(entry_point, 3); | |
2644 } | |
2645 | |
2646 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) { | |
2647 pass_arg0(this, arg_0); | |
2648 MacroAssembler::call_VM_leaf_base(entry_point, 1); | |
2649 } | |
2650 | |
2651 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { | |
2652 | |
2653 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); | |
2654 pass_arg1(this, arg_1); | |
2655 pass_arg0(this, arg_0); | |
2656 MacroAssembler::call_VM_leaf_base(entry_point, 2); | |
2657 } | |
2658 | |
2659 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { | |
2660 LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg")); | |
2661 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); | |
2662 pass_arg2(this, arg_2); | |
2663 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); | |
2664 pass_arg1(this, arg_1); | |
2665 pass_arg0(this, arg_0); | |
2666 MacroAssembler::call_VM_leaf_base(entry_point, 3); | |
2667 } | |
2668 | |
2669 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) { | |
2670 LP64_ONLY(assert(arg_0 != c_rarg3, "smashed arg")); | |
2671 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); | |
2672 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); | |
2673 pass_arg3(this, arg_3); | |
2674 LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg")); | |
2675 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); | |
2676 pass_arg2(this, arg_2); | |
2677 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); | |
2678 pass_arg1(this, arg_1); | |
2679 pass_arg0(this, arg_0); | |
2680 MacroAssembler::call_VM_leaf_base(entry_point, 4); | |
2681 } | |
2682 | |
2683 void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) { | |
2684 movptr(oop_result, Address(java_thread, JavaThread::vm_result_offset())); | |
2685 movptr(Address(java_thread, JavaThread::vm_result_offset()), NULL_WORD); | |
2686 verify_oop(oop_result, "broken oop in call_VM_base"); | |
2687 } | |
2688 | |
2689 void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) { | |
2690 movptr(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset())); | |
2691 movptr(Address(java_thread, JavaThread::vm_result_2_offset()), NULL_WORD); | |
2692 } | |
2693 | |
2694 void MacroAssembler::check_and_handle_earlyret(Register java_thread) { | |
2695 } | |
2696 | |
2697 void MacroAssembler::check_and_handle_popframe(Register java_thread) { | |
2698 } | |
2699 | |
2700 void MacroAssembler::cmp32(AddressLiteral src1, int32_t imm) { | |
2701 if (reachable(src1)) { | |
2702 cmpl(as_Address(src1), imm); | |
2703 } else { | |
2704 lea(rscratch1, src1); | |
2705 cmpl(Address(rscratch1, 0), imm); | |
2706 } | |
2707 } | |
2708 | |
2709 void MacroAssembler::cmp32(Register src1, AddressLiteral src2) { | |
2710 assert(!src2.is_lval(), "use cmpptr"); | |
2711 if (reachable(src2)) { | |
2712 cmpl(src1, as_Address(src2)); | |
2713 } else { | |
2714 lea(rscratch1, src2); | |
2715 cmpl(src1, Address(rscratch1, 0)); | |
2716 } | |
2717 } | |
2718 | |
2719 void MacroAssembler::cmp32(Register src1, int32_t imm) { | |
2720 Assembler::cmpl(src1, imm); | |
2721 } | |
2722 | |
2723 void MacroAssembler::cmp32(Register src1, Address src2) { | |
2724 Assembler::cmpl(src1, src2); | |
2725 } | |
2726 | |
2727 void MacroAssembler::cmpsd2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) { | |
2728 ucomisd(opr1, opr2); | |
2729 | |
2730 Label L; | |
2731 if (unordered_is_less) { | |
2732 movl(dst, -1); | |
2733 jcc(Assembler::parity, L); | |
2734 jcc(Assembler::below , L); | |
2735 movl(dst, 0); | |
2736 jcc(Assembler::equal , L); | |
2737 increment(dst); | |
2738 } else { // unordered is greater | |
2739 movl(dst, 1); | |
2740 jcc(Assembler::parity, L); | |
2741 jcc(Assembler::above , L); | |
2742 movl(dst, 0); | |
2743 jcc(Assembler::equal , L); | |
2744 decrementl(dst); | |
2745 } | |
2746 bind(L); | |
2747 } | |
2748 | |
2749 void MacroAssembler::cmpss2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) { | |
2750 ucomiss(opr1, opr2); | |
2751 | |
2752 Label L; | |
2753 if (unordered_is_less) { | |
2754 movl(dst, -1); | |
2755 jcc(Assembler::parity, L); | |
2756 jcc(Assembler::below , L); | |
2757 movl(dst, 0); | |
2758 jcc(Assembler::equal , L); | |
2759 increment(dst); | |
2760 } else { // unordered is greater | |
2761 movl(dst, 1); | |
2762 jcc(Assembler::parity, L); | |
2763 jcc(Assembler::above , L); | |
2764 movl(dst, 0); | |
2765 jcc(Assembler::equal , L); | |
2766 decrementl(dst); | |
2767 } | |
2768 bind(L); | |
2769 } | |
2770 | |
2771 | |
2772 void MacroAssembler::cmp8(AddressLiteral src1, int imm) { | |
2773 if (reachable(src1)) { | |
2774 cmpb(as_Address(src1), imm); | |
2775 } else { | |
2776 lea(rscratch1, src1); | |
2777 cmpb(Address(rscratch1, 0), imm); | |
2778 } | |
2779 } | |
2780 | |
2781 void MacroAssembler::cmpptr(Register src1, AddressLiteral src2) { | |
2782 #ifdef _LP64 | |
2783 if (src2.is_lval()) { | |
2784 movptr(rscratch1, src2); | |
2785 Assembler::cmpq(src1, rscratch1); | |
2786 } else if (reachable(src2)) { | |
2787 cmpq(src1, as_Address(src2)); | |
2788 } else { | |
2789 lea(rscratch1, src2); | |
2790 Assembler::cmpq(src1, Address(rscratch1, 0)); | |
2791 } | |
2792 #else | |
2793 if (src2.is_lval()) { | |
2794 cmp_literal32(src1, (int32_t) src2.target(), src2.rspec()); | |
2795 } else { | |
2796 cmpl(src1, as_Address(src2)); | |
2797 } | |
2798 #endif // _LP64 | |
2799 } | |
2800 | |
2801 void MacroAssembler::cmpptr(Address src1, AddressLiteral src2) { | |
2802 assert(src2.is_lval(), "not a mem-mem compare"); | |
2803 #ifdef _LP64 | |
2804 // moves src2's literal address | |
2805 movptr(rscratch1, src2); | |
2806 Assembler::cmpq(src1, rscratch1); | |
2807 #else | |
2808 cmp_literal32(src1, (int32_t) src2.target(), src2.rspec()); | |
2809 #endif // _LP64 | |
2810 } | |
2811 | |
2812 void MacroAssembler::locked_cmpxchgptr(Register reg, AddressLiteral adr) { | |
2813 if (reachable(adr)) { | |
2814 if (os::is_MP()) | |
2815 lock(); | |
2816 cmpxchgptr(reg, as_Address(adr)); | |
2817 } else { | |
2818 lea(rscratch1, adr); | |
2819 if (os::is_MP()) | |
2820 lock(); | |
2821 cmpxchgptr(reg, Address(rscratch1, 0)); | |
2822 } | |
2823 } | |
2824 | |
2825 void MacroAssembler::cmpxchgptr(Register reg, Address adr) { | |
2826 LP64_ONLY(cmpxchgq(reg, adr)) NOT_LP64(cmpxchgl(reg, adr)); | |
2827 } | |
2828 | |
2829 void MacroAssembler::comisd(XMMRegister dst, AddressLiteral src) { | |
2830 if (reachable(src)) { | |
2831 Assembler::comisd(dst, as_Address(src)); | |
2832 } else { | |
2833 lea(rscratch1, src); | |
2834 Assembler::comisd(dst, Address(rscratch1, 0)); | |
2835 } | |
2836 } | |
2837 | |
2838 void MacroAssembler::comiss(XMMRegister dst, AddressLiteral src) { | |
2839 if (reachable(src)) { | |
2840 Assembler::comiss(dst, as_Address(src)); | |
2841 } else { | |
2842 lea(rscratch1, src); | |
2843 Assembler::comiss(dst, Address(rscratch1, 0)); | |
2844 } | |
2845 } | |
2846 | |
2847 | |
2848 void MacroAssembler::cond_inc32(Condition cond, AddressLiteral counter_addr) { | |
2849 Condition negated_cond = negate_condition(cond); | |
2850 Label L; | |
2851 jcc(negated_cond, L); | |
17780 | 2852 pushf(); // Preserve flags |
7199 | 2853 atomic_incl(counter_addr); |
17780 | 2854 popf(); |
7199 | 2855 bind(L); |
2856 } | |
2857 | |
2858 int MacroAssembler::corrected_idivl(Register reg) { | |
2859 // Full implementation of Java idiv and irem; checks for | |
2860 // special case as described in JVM spec., p.243 & p.271. | |
2861 // The function returns the (pc) offset of the idivl | |
2862 // instruction - may be needed for implicit exceptions. | |
2863 // | |
2864 // normal case special case | |
2865 // | |
2866 // input : rax,: dividend min_int | |
2867 // reg: divisor (may not be rax,/rdx) -1 | |
2868 // | |
2869 // output: rax,: quotient (= rax, idiv reg) min_int | |
2870 // rdx: remainder (= rax, irem reg) 0 | |
2871 assert(reg != rax && reg != rdx, "reg cannot be rax, or rdx register"); | |
2872 const int min_int = 0x80000000; | |
2873 Label normal_case, special_case; | |
2874 | |
2875 // check for special case | |
2876 cmpl(rax, min_int); | |
2877 jcc(Assembler::notEqual, normal_case); | |
2878 xorl(rdx, rdx); // prepare rdx for possible special case (where remainder = 0) | |
2879 cmpl(reg, -1); | |
2880 jcc(Assembler::equal, special_case); | |
2881 | |
2882 // handle normal case | |
2883 bind(normal_case); | |
2884 cdql(); | |
2885 int idivl_offset = offset(); | |
2886 idivl(reg); | |
2887 | |
2888 // normal and special case exit | |
2889 bind(special_case); | |
2890 | |
2891 return idivl_offset; | |
2892 } | |
2893 | |
2894 | |
2895 | |
2896 void MacroAssembler::decrementl(Register reg, int value) { | |
2897 if (value == min_jint) {subl(reg, value) ; return; } | |
2898 if (value < 0) { incrementl(reg, -value); return; } | |
2899 if (value == 0) { ; return; } | |
2900 if (value == 1 && UseIncDec) { decl(reg) ; return; } | |
2901 /* else */ { subl(reg, value) ; return; } | |
2902 } | |
2903 | |
2904 void MacroAssembler::decrementl(Address dst, int value) { | |
2905 if (value == min_jint) {subl(dst, value) ; return; } | |
2906 if (value < 0) { incrementl(dst, -value); return; } | |
2907 if (value == 0) { ; return; } | |
2908 if (value == 1 && UseIncDec) { decl(dst) ; return; } | |
2909 /* else */ { subl(dst, value) ; return; } | |
2910 } | |
2911 | |
2912 void MacroAssembler::division_with_shift (Register reg, int shift_value) { | |
2913 assert (shift_value > 0, "illegal shift value"); | |
2914 Label _is_positive; | |
2915 testl (reg, reg); | |
2916 jcc (Assembler::positive, _is_positive); | |
2917 int offset = (1 << shift_value) - 1 ; | |
2918 | |
2919 if (offset == 1) { | |
2920 incrementl(reg); | |
2921 } else { | |
2922 addl(reg, offset); | |
2923 } | |
2924 | |
2925 bind (_is_positive); | |
2926 sarl(reg, shift_value); | |
2927 } | |
2928 | |
2929 void MacroAssembler::divsd(XMMRegister dst, AddressLiteral src) { | |
2930 if (reachable(src)) { | |
2931 Assembler::divsd(dst, as_Address(src)); | |
2932 } else { | |
2933 lea(rscratch1, src); | |
2934 Assembler::divsd(dst, Address(rscratch1, 0)); | |
2935 } | |
2936 } | |
2937 | |
2938 void MacroAssembler::divss(XMMRegister dst, AddressLiteral src) { | |
2939 if (reachable(src)) { | |
2940 Assembler::divss(dst, as_Address(src)); | |
2941 } else { | |
2942 lea(rscratch1, src); | |
2943 Assembler::divss(dst, Address(rscratch1, 0)); | |
2944 } | |
2945 } | |
2946 | |
2947 // !defined(COMPILER2) is because of stupid core builds | |
2948 #if !defined(_LP64) || defined(COMPILER1) || !defined(COMPILER2) | |
2949 void MacroAssembler::empty_FPU_stack() { | |
2950 if (VM_Version::supports_mmx()) { | |
2951 emms(); | |
2952 } else { | |
2953 for (int i = 8; i-- > 0; ) ffree(i); | |
2954 } | |
2955 } | |
2956 #endif // !LP64 || C1 || !C2 | |
2957 | |
2958 | |
2959 // Defines obj, preserves var_size_in_bytes | |
2960 void MacroAssembler::eden_allocate(Register obj, | |
2961 Register var_size_in_bytes, | |
2962 int con_size_in_bytes, | |
2963 Register t1, | |
2964 Label& slow_case) { | |
2965 assert(obj == rax, "obj must be in rax, for cmpxchg"); | |
2966 assert_different_registers(obj, var_size_in_bytes, t1); | |
2967 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { | |
2968 jmp(slow_case); | |
2969 } else { | |
2970 Register end = t1; | |
2971 Label retry; | |
2972 bind(retry); | |
2973 ExternalAddress heap_top((address) Universe::heap()->top_addr()); | |
2974 movptr(obj, heap_top); | |
2975 if (var_size_in_bytes == noreg) { | |
2976 lea(end, Address(obj, con_size_in_bytes)); | |
2977 } else { | |
2978 lea(end, Address(obj, var_size_in_bytes, Address::times_1)); | |
2979 } | |
2980 // if end < obj then we wrapped around => object too long => slow case | |
2981 cmpptr(end, obj); | |
2982 jcc(Assembler::below, slow_case); | |
2983 cmpptr(end, ExternalAddress((address) Universe::heap()->end_addr())); | |
2984 jcc(Assembler::above, slow_case); | |
2985 // Compare obj with the top addr, and if still equal, store the new top addr in | |
2986 // end at the address of the top addr pointer. Sets ZF if was equal, and clears | |
2987 // it otherwise. Use lock prefix for atomicity on MPs. | |
2988 locked_cmpxchgptr(end, heap_top); | |
2989 jcc(Assembler::notEqual, retry); | |
2990 } | |
2991 } | |
2992 | |
2993 void MacroAssembler::enter() { | |
2994 push(rbp); | |
2995 mov(rbp, rsp); | |
2996 } | |
2997 | |
2998 // A 5 byte nop that is safe for patching (see patch_verified_entry) | |
2999 void MacroAssembler::fat_nop() { | |
3000 if (UseAddressNop) { | |
3001 addr_nop_5(); | |
3002 } else { | |
7430
d02120b7a34f
8004250: replace AbstractAssembler a_byte/a_long with emit_int8/emit_int32
twisti
parents:
7427
diff
changeset
|
3003 emit_int8(0x26); // es: |
d02120b7a34f
8004250: replace AbstractAssembler a_byte/a_long with emit_int8/emit_int32
twisti
parents:
7427
diff
changeset
|
3004 emit_int8(0x2e); // cs: |
d02120b7a34f
8004250: replace AbstractAssembler a_byte/a_long with emit_int8/emit_int32
twisti
parents:
7427
diff
changeset
|
3005 emit_int8(0x64); // fs: |
d02120b7a34f
8004250: replace AbstractAssembler a_byte/a_long with emit_int8/emit_int32
twisti
parents:
7427
diff
changeset
|
3006 emit_int8(0x65); // gs: |
d02120b7a34f
8004250: replace AbstractAssembler a_byte/a_long with emit_int8/emit_int32
twisti
parents:
7427
diff
changeset
|
3007 emit_int8((unsigned char)0x90); |
7199 | 3008 } |
3009 } | |
3010 | |
3011 void MacroAssembler::fcmp(Register tmp) { | |
3012 fcmp(tmp, 1, true, true); | |
3013 } | |
3014 | |
3015 void MacroAssembler::fcmp(Register tmp, int index, bool pop_left, bool pop_right) { | |
3016 assert(!pop_right || pop_left, "usage error"); | |
3017 if (VM_Version::supports_cmov()) { | |
3018 assert(tmp == noreg, "unneeded temp"); | |
3019 if (pop_left) { | |
3020 fucomip(index); | |
3021 } else { | |
3022 fucomi(index); | |
3023 } | |
3024 if (pop_right) { | |
3025 fpop(); | |
3026 } | |
3027 } else { | |
3028 assert(tmp != noreg, "need temp"); | |
3029 if (pop_left) { | |
3030 if (pop_right) { | |
3031 fcompp(); | |
3032 } else { | |
3033 fcomp(index); | |
3034 } | |
3035 } else { | |
3036 fcom(index); | |
3037 } | |
3038 // convert FPU condition into eflags condition via rax, | |
3039 save_rax(tmp); | |
3040 fwait(); fnstsw_ax(); | |
3041 sahf(); | |
3042 restore_rax(tmp); | |
3043 } | |
3044 // condition codes set as follows: | |
3045 // | |
3046 // CF (corresponds to C0) if x < y | |
3047 // PF (corresponds to C2) if unordered | |
3048 // ZF (corresponds to C3) if x = y | |
3049 } | |
3050 | |
3051 void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less) { | |
3052 fcmp2int(dst, unordered_is_less, 1, true, true); | |
3053 } | |
3054 | |
3055 void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less, int index, bool pop_left, bool pop_right) { | |
3056 fcmp(VM_Version::supports_cmov() ? noreg : dst, index, pop_left, pop_right); | |
3057 Label L; | |
3058 if (unordered_is_less) { | |
3059 movl(dst, -1); | |
3060 jcc(Assembler::parity, L); | |
3061 jcc(Assembler::below , L); | |
3062 movl(dst, 0); | |
3063 jcc(Assembler::equal , L); | |
3064 increment(dst); | |
3065 } else { // unordered is greater | |
3066 movl(dst, 1); | |
3067 jcc(Assembler::parity, L); | |
3068 jcc(Assembler::above , L); | |
3069 movl(dst, 0); | |
3070 jcc(Assembler::equal , L); | |
3071 decrementl(dst); | |
3072 } | |
3073 bind(L); | |
3074 } | |
3075 | |
3076 void MacroAssembler::fld_d(AddressLiteral src) { | |
3077 fld_d(as_Address(src)); | |
3078 } | |
3079 | |
3080 void MacroAssembler::fld_s(AddressLiteral src) { | |
3081 fld_s(as_Address(src)); | |
3082 } | |
3083 | |
3084 void MacroAssembler::fld_x(AddressLiteral src) { | |
3085 Assembler::fld_x(as_Address(src)); | |
3086 } | |
3087 | |
3088 void MacroAssembler::fldcw(AddressLiteral src) { | |
3089 Assembler::fldcw(as_Address(src)); | |
3090 } | |
3091 | |
3092 void MacroAssembler::pow_exp_core_encoding() { | |
3093 // kills rax, rcx, rdx | |
3094 subptr(rsp,sizeof(jdouble)); | |
3095 // computes 2^X. Stack: X ... | |
3096 // f2xm1 computes 2^X-1 but only operates on -1<=X<=1. Get int(X) and | |
3097 // keep it on the thread's stack to compute 2^int(X) later | |
3098 // then compute 2^(X-int(X)) as (2^(X-int(X)-1+1) | |
3099 // final result is obtained with: 2^X = 2^int(X) * 2^(X-int(X)) | |
3100 fld_s(0); // Stack: X X ... | |
3101 frndint(); // Stack: int(X) X ... | |
3102 fsuba(1); // Stack: int(X) X-int(X) ... | |
3103 fistp_s(Address(rsp,0)); // move int(X) as integer to thread's stack. Stack: X-int(X) ... | |
3104 f2xm1(); // Stack: 2^(X-int(X))-1 ... | |
3105 fld1(); // Stack: 1 2^(X-int(X))-1 ... | |
3106 faddp(1); // Stack: 2^(X-int(X)) | |
3107 // computes 2^(int(X)): add exponent bias (1023) to int(X), then | |
3108 // shift int(X)+1023 to exponent position. | |
3109 // Exponent is limited to 11 bits if int(X)+1023 does not fit in 11 | |
3110 // bits, set result to NaN. 0x000 and 0x7FF are reserved exponent | |
3111 // values so detect them and set result to NaN. | |
3112 movl(rax,Address(rsp,0)); | |
3113 movl(rcx, -2048); // 11 bit mask and valid NaN binary encoding | |
3114 addl(rax, 1023); | |
3115 movl(rdx,rax); | |
3116 shll(rax,20); | |
3117 // Check that 0 < int(X)+1023 < 2047. Otherwise set rax to NaN. | |
3118 addl(rdx,1); | |
3119 // Check that 1 < int(X)+1023+1 < 2048 | |
3120 // in 3 steps: | |
3121 // 1- (int(X)+1023+1)&-2048 == 0 => 0 <= int(X)+1023+1 < 2048 | |
3122 // 2- (int(X)+1023+1)&-2048 != 0 | |
3123 // 3- (int(X)+1023+1)&-2048 != 1 | |
3124 // Do 2- first because addl just updated the flags. | |
3125 cmov32(Assembler::equal,rax,rcx); | |
3126 cmpl(rdx,1); | |
3127 cmov32(Assembler::equal,rax,rcx); | |
3128 testl(rdx,rcx); | |
3129 cmov32(Assembler::notEqual,rax,rcx); | |
3130 movl(Address(rsp,4),rax); | |
3131 movl(Address(rsp,0),0); | |
3132 fmul_d(Address(rsp,0)); // Stack: 2^X ... | |
3133 addptr(rsp,sizeof(jdouble)); | |
3134 } | |
3135 | |
3136 void MacroAssembler::increase_precision() { | |
3137 subptr(rsp, BytesPerWord); | |
3138 fnstcw(Address(rsp, 0)); | |
3139 movl(rax, Address(rsp, 0)); | |
3140 orl(rax, 0x300); | |
3141 push(rax); | |
3142 fldcw(Address(rsp, 0)); | |
3143 pop(rax); | |
3144 } | |
3145 | |
3146 void MacroAssembler::restore_precision() { | |
3147 fldcw(Address(rsp, 0)); | |
3148 addptr(rsp, BytesPerWord); | |
3149 } | |
3150 | |
3151 void MacroAssembler::fast_pow() { | |
3152 // computes X^Y = 2^(Y * log2(X)) | |
3153 // if fast computation is not possible, result is NaN. Requires | |
3154 // fallback from user of this macro. | |
3155 // increase precision for intermediate steps of the computation | |
17922
400709e275c1
8029302: Performance regression in Math.pow intrinsic
adlertz
parents:
17849
diff
changeset
|
3156 BLOCK_COMMENT("fast_pow {"); |
7199 | 3157 increase_precision(); |
3158 fyl2x(); // Stack: (Y*log2(X)) ... | |
3159 pow_exp_core_encoding(); // Stack: exp(X) ... | |
3160 restore_precision(); | |
17922
400709e275c1
8029302: Performance regression in Math.pow intrinsic
adlertz
parents:
17849
diff
changeset
|
3161 BLOCK_COMMENT("} fast_pow"); |
7199 | 3162 } |
3163 | |
3164 void MacroAssembler::fast_exp() { | |
3165 // computes exp(X) = 2^(X * log2(e)) | |
3166 // if fast computation is not possible, result is NaN. Requires | |
3167 // fallback from user of this macro. | |
3168 // increase precision for intermediate steps of the computation | |
3169 increase_precision(); | |
3170 fldl2e(); // Stack: log2(e) X ... | |
3171 fmulp(1); // Stack: (X*log2(e)) ... | |
3172 pow_exp_core_encoding(); // Stack: exp(X) ... | |
3173 restore_precision(); | |
3174 } | |
3175 | |
3176 void MacroAssembler::pow_or_exp(bool is_exp, int num_fpu_regs_in_use) { | |
3177 // kills rax, rcx, rdx | |
3178 // pow and exp needs 2 extra registers on the fpu stack. | |
3179 Label slow_case, done; | |
3180 Register tmp = noreg; | |
3181 if (!VM_Version::supports_cmov()) { | |
3182 // fcmp needs a temporary so preserve rdx, | |
3183 tmp = rdx; | |
3184 } | |
3185 Register tmp2 = rax; | |
3186 Register tmp3 = rcx; | |
3187 | |
3188 if (is_exp) { | |
3189 // Stack: X | |
3190 fld_s(0); // duplicate argument for runtime call. Stack: X X | |
3191 fast_exp(); // Stack: exp(X) X | |
3192 fcmp(tmp, 0, false, false); // Stack: exp(X) X | |
3193 // exp(X) not equal to itself: exp(X) is NaN go to slow case. | |
3194 jcc(Assembler::parity, slow_case); | |
3195 // get rid of duplicate argument. Stack: exp(X) | |
3196 if (num_fpu_regs_in_use > 0) { | |
3197 fxch(); | |
3198 fpop(); | |
3199 } else { | |
3200 ffree(1); | |
3201 } | |
3202 jmp(done); | |
3203 } else { | |
3204 // Stack: X Y | |
3205 Label x_negative, y_odd; | |
3206 | |
3207 fldz(); // Stack: 0 X Y | |
3208 fcmp(tmp, 1, true, false); // Stack: X Y | |
3209 jcc(Assembler::above, x_negative); | |
3210 | |
3211 // X >= 0 | |
3212 | |
3213 fld_s(1); // duplicate arguments for runtime call. Stack: Y X Y | |
3214 fld_s(1); // Stack: X Y X Y | |
3215 fast_pow(); // Stack: X^Y X Y | |
3216 fcmp(tmp, 0, false, false); // Stack: X^Y X Y | |
3217 // X^Y not equal to itself: X^Y is NaN go to slow case. | |
3218 jcc(Assembler::parity, slow_case); | |
3219 // get rid of duplicate arguments. Stack: X^Y | |
3220 if (num_fpu_regs_in_use > 0) { | |
3221 fxch(); fpop(); | |
3222 fxch(); fpop(); | |
3223 } else { | |
3224 ffree(2); | |
3225 ffree(1); | |
3226 } | |
3227 jmp(done); | |
3228 | |
3229 // X <= 0 | |
3230 bind(x_negative); | |
3231 | |
3232 fld_s(1); // Stack: Y X Y | |
3233 frndint(); // Stack: int(Y) X Y | |
3234 fcmp(tmp, 2, false, false); // Stack: int(Y) X Y | |
3235 jcc(Assembler::notEqual, slow_case); | |
3236 | |
3237 subptr(rsp, 8); | |
3238 | |
3239 // For X^Y, when X < 0, Y has to be an integer and the final | |
3240 // result depends on whether it's odd or even. We just checked | |
3241 // that int(Y) == Y. We move int(Y) to gp registers as a 64 bit | |
3242 // integer to test its parity. If int(Y) is huge and doesn't fit | |
3243 // in the 64 bit integer range, the integer indefinite value will | |
3244 // end up in the gp registers. Huge numbers are all even, the | |
3245 // integer indefinite number is even so it's fine. | |
3246 | |
3247 #ifdef ASSERT | |
3248 // Let's check we don't end up with an integer indefinite number | |
3249 // when not expected. First test for huge numbers: check whether | |
3250 // int(Y)+1 == int(Y) which is true for very large numbers and | |
3251 // those are all even. A 64 bit integer is guaranteed to not | |
3252 // overflow for numbers where y+1 != y (when precision is set to | |
3253 // double precision). | |
3254 Label y_not_huge; | |
3255 | |
3256 fld1(); // Stack: 1 int(Y) X Y | |
3257 fadd(1); // Stack: 1+int(Y) int(Y) X Y | |
3258 | |
3259 #ifdef _LP64 | |
3260 // trip to memory to force the precision down from double extended | |
3261 // precision | |
3262 fstp_d(Address(rsp, 0)); | |
3263 fld_d(Address(rsp, 0)); | |
3264 #endif | |
3265 | |
3266 fcmp(tmp, 1, true, false); // Stack: int(Y) X Y | |
3267 #endif | |
3268 | |
3269 // move int(Y) as 64 bit integer to thread's stack | |
3270 fistp_d(Address(rsp,0)); // Stack: X Y | |
3271 | |
3272 #ifdef ASSERT | |
3273 jcc(Assembler::notEqual, y_not_huge); | |
3274 | |
3275 // Y is huge so we know it's even. It may not fit in a 64 bit | |
3276 // integer and we don't want the debug code below to see the | |
3277 // integer indefinite value so overwrite int(Y) on the thread's | |
3278 // stack with 0. | |
3279 movl(Address(rsp, 0), 0); | |
3280 movl(Address(rsp, 4), 0); | |
3281 | |
3282 bind(y_not_huge); | |
3283 #endif | |
3284 | |
3285 fld_s(1); // duplicate arguments for runtime call. Stack: Y X Y | |
3286 fld_s(1); // Stack: X Y X Y | |
3287 fabs(); // Stack: abs(X) Y X Y | |
3288 fast_pow(); // Stack: abs(X)^Y X Y | |
3289 fcmp(tmp, 0, false, false); // Stack: abs(X)^Y X Y | |
3290 // abs(X)^Y not equal to itself: abs(X)^Y is NaN go to slow case. | |
3291 | |
3292 pop(tmp2); | |
3293 NOT_LP64(pop(tmp3)); | |
3294 jcc(Assembler::parity, slow_case); | |
3295 | |
3296 #ifdef ASSERT | |
3297 // Check that int(Y) is not integer indefinite value (int | |
3298 // overflow). Shouldn't happen because for values that would | |
3299 // overflow, 1+int(Y)==Y which was tested earlier. | |
3300 #ifndef _LP64 | |
3301 { | |
3302 Label integer; | |
3303 testl(tmp2, tmp2); | |
3304 jcc(Assembler::notZero, integer); | |
3305 cmpl(tmp3, 0x80000000); | |
3306 jcc(Assembler::notZero, integer); | |
3307 STOP("integer indefinite value shouldn't be seen here"); | |
3308 bind(integer); | |
3309 } | |
3310 #else | |
3311 { | |
3312 Label integer; | |
3313 mov(tmp3, tmp2); // preserve tmp2 for parity check below | |
3314 shlq(tmp3, 1); | |
3315 jcc(Assembler::carryClear, integer); | |
3316 jcc(Assembler::notZero, integer); | |
3317 STOP("integer indefinite value shouldn't be seen here"); | |
3318 bind(integer); | |
3319 } | |
3320 #endif | |
3321 #endif | |
3322 | |
3323 // get rid of duplicate arguments. Stack: X^Y | |
3324 if (num_fpu_regs_in_use > 0) { | |
3325 fxch(); fpop(); | |
3326 fxch(); fpop(); | |
3327 } else { | |
3328 ffree(2); | |
3329 ffree(1); | |
3330 } | |
3331 | |
3332 testl(tmp2, 1); | |
3333 jcc(Assembler::zero, done); // X <= 0, Y even: X^Y = abs(X)^Y | |
3334 // X <= 0, Y even: X^Y = -abs(X)^Y | |
3335 | |
3336 fchs(); // Stack: -abs(X)^Y Y | |
3337 jmp(done); | |
3338 } | |
3339 | |
3340 // slow case: runtime call | |
3341 bind(slow_case); | |
3342 | |
3343 fpop(); // pop incorrect result or int(Y) | |
3344 | |
3345 fp_runtime_fallback(is_exp ? CAST_FROM_FN_PTR(address, SharedRuntime::dexp) : CAST_FROM_FN_PTR(address, SharedRuntime::dpow), | |
3346 is_exp ? 1 : 2, num_fpu_regs_in_use); | |
3347 | |
3348 // Come here with result in F-TOS | |
3349 bind(done); | |
3350 } | |
3351 | |
3352 void MacroAssembler::fpop() { | |
3353 ffree(); | |
3354 fincstp(); | |
3355 } | |
3356 | |
3357 void MacroAssembler::fremr(Register tmp) { | |
3358 save_rax(tmp); | |
3359 { Label L; | |
3360 bind(L); | |
3361 fprem(); | |
3362 fwait(); fnstsw_ax(); | |
3363 #ifdef _LP64 | |
3364 testl(rax, 0x400); | |
3365 jcc(Assembler::notEqual, L); | |
3366 #else | |
3367 sahf(); | |
3368 jcc(Assembler::parity, L); | |
3369 #endif // _LP64 | |
3370 } | |
3371 restore_rax(tmp); | |
3372 // Result is in ST0. | |
3373 // Note: fxch & fpop to get rid of ST1 | |
3374 // (otherwise FPU stack could overflow eventually) | |
3375 fxch(1); | |
3376 fpop(); | |
3377 } | |
3378 | |
3379 | |
3380 void MacroAssembler::incrementl(AddressLiteral dst) { | |
3381 if (reachable(dst)) { | |
3382 incrementl(as_Address(dst)); | |
3383 } else { | |
3384 lea(rscratch1, dst); | |
3385 incrementl(Address(rscratch1, 0)); | |
3386 } | |
3387 } | |
3388 | |
3389 void MacroAssembler::incrementl(ArrayAddress dst) { | |
3390 incrementl(as_Address(dst)); | |
3391 } | |
3392 | |
3393 void MacroAssembler::incrementl(Register reg, int value) { | |
3394 if (value == min_jint) {addl(reg, value) ; return; } | |
3395 if (value < 0) { decrementl(reg, -value); return; } | |
3396 if (value == 0) { ; return; } | |
3397 if (value == 1 && UseIncDec) { incl(reg) ; return; } | |
3398 /* else */ { addl(reg, value) ; return; } | |
3399 } | |
3400 | |
3401 void MacroAssembler::incrementl(Address dst, int value) { | |
3402 if (value == min_jint) {addl(dst, value) ; return; } | |
3403 if (value < 0) { decrementl(dst, -value); return; } | |
3404 if (value == 0) { ; return; } | |
3405 if (value == 1 && UseIncDec) { incl(dst) ; return; } | |
3406 /* else */ { addl(dst, value) ; return; } | |
3407 } | |
3408 | |
3409 void MacroAssembler::jump(AddressLiteral dst) { | |
3410 if (reachable(dst)) { | |
3411 jmp_literal(dst.target(), dst.rspec()); | |
3412 } else { | |
3413 lea(rscratch1, dst); | |
3414 jmp(rscratch1); | |
3415 } | |
3416 } | |
3417 | |
3418 void MacroAssembler::jump_cc(Condition cc, AddressLiteral dst) { | |
3419 if (reachable(dst)) { | |
3420 InstructionMark im(this); | |
3421 relocate(dst.reloc()); | |
3422 const int short_size = 2; | |
3423 const int long_size = 6; | |
3424 int offs = (intptr_t)dst.target() - ((intptr_t)pc()); | |
3425 if (dst.reloc() == relocInfo::none && is8bit(offs - short_size)) { | |
3426 // 0111 tttn #8-bit disp | |
7430
d02120b7a34f
8004250: replace AbstractAssembler a_byte/a_long with emit_int8/emit_int32
twisti
parents:
7427
diff
changeset
|
3427 emit_int8(0x70 | cc); |
d02120b7a34f
8004250: replace AbstractAssembler a_byte/a_long with emit_int8/emit_int32
twisti
parents:
7427
diff
changeset
|
3428 emit_int8((offs - short_size) & 0xFF); |
7199 | 3429 } else { |
3430 // 0000 1111 1000 tttn #32-bit disp | |
7430
d02120b7a34f
8004250: replace AbstractAssembler a_byte/a_long with emit_int8/emit_int32
twisti
parents:
7427
diff
changeset
|
3431 emit_int8(0x0F); |
d02120b7a34f
8004250: replace AbstractAssembler a_byte/a_long with emit_int8/emit_int32
twisti
parents:
7427
diff
changeset
|
3432 emit_int8((unsigned char)(0x80 | cc)); |
7476
ffa87474d7a4
8004537: replace AbstractAssembler emit_long with emit_int32
twisti
parents:
7475
diff
changeset
|
3433 emit_int32(offs - long_size); |
7199 | 3434 } |
3435 } else { | |
3436 #ifdef ASSERT | |
3437 warning("reversing conditional branch"); | |
3438 #endif /* ASSERT */ | |
3439 Label skip; | |
3440 jccb(reverse[cc], skip); | |
3441 lea(rscratch1, dst); | |
3442 Assembler::jmp(rscratch1); | |
3443 bind(skip); | |
3444 } | |
3445 } | |
3446 | |
3447 void MacroAssembler::ldmxcsr(AddressLiteral src) { | |
3448 if (reachable(src)) { | |
3449 Assembler::ldmxcsr(as_Address(src)); | |
3450 } else { | |
3451 lea(rscratch1, src); | |
3452 Assembler::ldmxcsr(Address(rscratch1, 0)); | |
3453 } | |
3454 } | |
3455 | |
3456 int MacroAssembler::load_signed_byte(Register dst, Address src) { | |
3457 int off; | |
3458 if (LP64_ONLY(true ||) VM_Version::is_P6()) { | |
3459 off = offset(); | |
3460 movsbl(dst, src); // movsxb | |
3461 } else { | |
3462 off = load_unsigned_byte(dst, src); | |
3463 shll(dst, 24); | |
3464 sarl(dst, 24); | |
3465 } | |
3466 return off; | |
3467 } | |
3468 | |
3469 // Note: load_signed_short used to be called load_signed_word. | |
3470 // Although the 'w' in x86 opcodes refers to the term "word" in the assembler | |
3471 // manual, which means 16 bits, that usage is found nowhere in HotSpot code. | |
3472 // The term "word" in HotSpot means a 32- or 64-bit machine word. | |
3473 int MacroAssembler::load_signed_short(Register dst, Address src) { | |
3474 int off; | |
3475 if (LP64_ONLY(true ||) VM_Version::is_P6()) { | |
3476 // This is dubious to me since it seems safe to do a signed 16 => 64 bit | |
3477 // version but this is what 64bit has always done. This seems to imply | |
3478 // that users are only using 32bits worth. | |
3479 off = offset(); | |
3480 movswl(dst, src); // movsxw | |
3481 } else { | |
3482 off = load_unsigned_short(dst, src); | |
3483 shll(dst, 16); | |
3484 sarl(dst, 16); | |
3485 } | |
3486 return off; | |
3487 } | |
3488 | |
3489 int MacroAssembler::load_unsigned_byte(Register dst, Address src) { | |
3490 // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16, | |
3491 // and "3.9 Partial Register Penalties", p. 22). | |
3492 int off; | |
3493 if (LP64_ONLY(true || ) VM_Version::is_P6() || src.uses(dst)) { | |
3494 off = offset(); | |
3495 movzbl(dst, src); // movzxb | |
3496 } else { | |
3497 xorl(dst, dst); | |
3498 off = offset(); | |
3499 movb(dst, src); | |
3500 } | |
3501 return off; | |
3502 } | |
3503 | |
3504 // Note: load_unsigned_short used to be called load_unsigned_word. | |
3505 int MacroAssembler::load_unsigned_short(Register dst, Address src) { | |
3506 // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16, | |
3507 // and "3.9 Partial Register Penalties", p. 22). | |
3508 int off; | |
3509 if (LP64_ONLY(true ||) VM_Version::is_P6() || src.uses(dst)) { | |
3510 off = offset(); | |
3511 movzwl(dst, src); // movzxw | |
3512 } else { | |
3513 xorl(dst, dst); | |
3514 off = offset(); | |
3515 movw(dst, src); | |
3516 } | |
3517 return off; | |
3518 } | |
3519 | |
3520 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) { | |
3521 switch (size_in_bytes) { | |
3522 #ifndef _LP64 | |
3523 case 8: | |
3524 assert(dst2 != noreg, "second dest register required"); | |
3525 movl(dst, src); | |
3526 movl(dst2, src.plus_disp(BytesPerInt)); | |
3527 break; | |
3528 #else | |
3529 case 8: movq(dst, src); break; | |
3530 #endif | |
3531 case 4: movl(dst, src); break; | |
3532 case 2: is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break; | |
3533 case 1: is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break; | |
3534 default: ShouldNotReachHere(); | |
3535 } | |
3536 } | |
3537 | |
3538 void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) { | |
3539 switch (size_in_bytes) { | |
3540 #ifndef _LP64 | |
3541 case 8: | |
3542 assert(src2 != noreg, "second source register required"); | |
3543 movl(dst, src); | |
3544 movl(dst.plus_disp(BytesPerInt), src2); | |
3545 break; | |
3546 #else | |
3547 case 8: movq(dst, src); break; | |
3548 #endif | |
3549 case 4: movl(dst, src); break; | |
3550 case 2: movw(dst, src); break; | |
3551 case 1: movb(dst, src); break; | |
3552 default: ShouldNotReachHere(); | |
3553 } | |
3554 } | |
3555 | |
3556 void MacroAssembler::mov32(AddressLiteral dst, Register src) { | |
3557 if (reachable(dst)) { | |
3558 movl(as_Address(dst), src); | |
3559 } else { | |
3560 lea(rscratch1, dst); | |
3561 movl(Address(rscratch1, 0), src); | |
3562 } | |
3563 } | |
3564 | |
3565 void MacroAssembler::mov32(Register dst, AddressLiteral src) { | |
3566 if (reachable(src)) { | |
3567 movl(dst, as_Address(src)); | |
3568 } else { | |
3569 lea(rscratch1, src); | |
3570 movl(dst, Address(rscratch1, 0)); | |
3571 } | |
3572 } | |
3573 | |
3574 // C++ bool manipulation | |
3575 | |
3576 void MacroAssembler::movbool(Register dst, Address src) { | |
3577 if(sizeof(bool) == 1) | |
3578 movb(dst, src); | |
3579 else if(sizeof(bool) == 2) | |
3580 movw(dst, src); | |
3581 else if(sizeof(bool) == 4) | |
3582 movl(dst, src); | |
3583 else | |
3584 // unsupported | |
3585 ShouldNotReachHere(); | |
3586 } | |
3587 | |
3588 void MacroAssembler::movbool(Address dst, bool boolconst) { | |
3589 if(sizeof(bool) == 1) | |
3590 movb(dst, (int) boolconst); | |
3591 else if(sizeof(bool) == 2) | |
3592 movw(dst, (int) boolconst); | |
3593 else if(sizeof(bool) == 4) | |
3594 movl(dst, (int) boolconst); | |
3595 else | |
3596 // unsupported | |
3597 ShouldNotReachHere(); | |
3598 } | |
3599 | |
3600 void MacroAssembler::movbool(Address dst, Register src) { | |
3601 if(sizeof(bool) == 1) | |
3602 movb(dst, src); | |
3603 else if(sizeof(bool) == 2) | |
3604 movw(dst, src); | |
3605 else if(sizeof(bool) == 4) | |
3606 movl(dst, src); | |
3607 else | |
3608 // unsupported | |
3609 ShouldNotReachHere(); | |
3610 } | |
3611 | |
3612 void MacroAssembler::movbyte(ArrayAddress dst, int src) { | |
3613 movb(as_Address(dst), src); | |
3614 } | |
3615 | |
3616 void MacroAssembler::movdl(XMMRegister dst, AddressLiteral src) { | |
3617 if (reachable(src)) { | |
3618 movdl(dst, as_Address(src)); | |
3619 } else { | |
3620 lea(rscratch1, src); | |
3621 movdl(dst, Address(rscratch1, 0)); | |
3622 } | |
3623 } | |
3624 | |
3625 void MacroAssembler::movq(XMMRegister dst, AddressLiteral src) { | |
3626 if (reachable(src)) { | |
3627 movq(dst, as_Address(src)); | |
3628 } else { | |
3629 lea(rscratch1, src); | |
3630 movq(dst, Address(rscratch1, 0)); | |
3631 } | |
3632 } | |
3633 | |
3634 void MacroAssembler::movdbl(XMMRegister dst, AddressLiteral src) { | |
3635 if (reachable(src)) { | |
3636 if (UseXmmLoadAndClearUpper) { | |
3637 movsd (dst, as_Address(src)); | |
3638 } else { | |
3639 movlpd(dst, as_Address(src)); | |
3640 } | |
3641 } else { | |
3642 lea(rscratch1, src); | |
3643 if (UseXmmLoadAndClearUpper) { | |
3644 movsd (dst, Address(rscratch1, 0)); | |
3645 } else { | |
3646 movlpd(dst, Address(rscratch1, 0)); | |
3647 } | |
3648 } | |
3649 } | |
3650 | |
3651 void MacroAssembler::movflt(XMMRegister dst, AddressLiteral src) { | |
3652 if (reachable(src)) { | |
3653 movss(dst, as_Address(src)); | |
3654 } else { | |
3655 lea(rscratch1, src); | |
3656 movss(dst, Address(rscratch1, 0)); | |
3657 } | |
3658 } | |
3659 | |
3660 void MacroAssembler::movptr(Register dst, Register src) { | |
3661 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); | |
3662 } | |
3663 | |
3664 void MacroAssembler::movptr(Register dst, Address src) { | |
3665 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); | |
3666 } | |
3667 | |
3668 // src should NEVER be a real pointer. Use AddressLiteral for true pointers | |
3669 void MacroAssembler::movptr(Register dst, intptr_t src) { | |
3670 LP64_ONLY(mov64(dst, src)) NOT_LP64(movl(dst, src)); | |
3671 } | |
3672 | |
3673 void MacroAssembler::movptr(Address dst, Register src) { | |
3674 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); | |
3675 } | |
3676 | |
3677 void MacroAssembler::movdqu(XMMRegister dst, AddressLiteral src) { | |
3678 if (reachable(src)) { | |
3679 Assembler::movdqu(dst, as_Address(src)); | |
3680 } else { | |
3681 lea(rscratch1, src); | |
3682 Assembler::movdqu(dst, Address(rscratch1, 0)); | |
3683 } | |
3684 } | |
3685 | |
11080
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
3686 void MacroAssembler::movdqa(XMMRegister dst, AddressLiteral src) { |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
3687 if (reachable(src)) { |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
3688 Assembler::movdqa(dst, as_Address(src)); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
3689 } else { |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
3690 lea(rscratch1, src); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
3691 Assembler::movdqa(dst, Address(rscratch1, 0)); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
3692 } |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
3693 } |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
3694 |
7199 | 3695 void MacroAssembler::movsd(XMMRegister dst, AddressLiteral src) { |
3696 if (reachable(src)) { | |
3697 Assembler::movsd(dst, as_Address(src)); | |
3698 } else { | |
3699 lea(rscratch1, src); | |
3700 Assembler::movsd(dst, Address(rscratch1, 0)); | |
3701 } | |
3702 } | |
3703 | |
3704 void MacroAssembler::movss(XMMRegister dst, AddressLiteral src) { | |
3705 if (reachable(src)) { | |
3706 Assembler::movss(dst, as_Address(src)); | |
3707 } else { | |
3708 lea(rscratch1, src); | |
3709 Assembler::movss(dst, Address(rscratch1, 0)); | |
3710 } | |
3711 } | |
3712 | |
3713 void MacroAssembler::mulsd(XMMRegister dst, AddressLiteral src) { | |
3714 if (reachable(src)) { | |
3715 Assembler::mulsd(dst, as_Address(src)); | |
3716 } else { | |
3717 lea(rscratch1, src); | |
3718 Assembler::mulsd(dst, Address(rscratch1, 0)); | |
3719 } | |
3720 } | |
3721 | |
3722 void MacroAssembler::mulss(XMMRegister dst, AddressLiteral src) { | |
3723 if (reachable(src)) { | |
3724 Assembler::mulss(dst, as_Address(src)); | |
3725 } else { | |
3726 lea(rscratch1, src); | |
3727 Assembler::mulss(dst, Address(rscratch1, 0)); | |
3728 } | |
3729 } | |
3730 | |
3731 void MacroAssembler::null_check(Register reg, int offset) { | |
3732 if (needs_explicit_null_check(offset)) { | |
3733 // provoke OS NULL exception if reg = NULL by | |
3734 // accessing M[reg] w/o changing any (non-CC) registers | |
3735 // NOTE: cmpl is plenty here to provoke a segv | |
3736 cmpptr(rax, Address(reg, 0)); | |
3737 // Note: should probably use testl(rax, Address(reg, 0)); | |
3738 // may be shorter code (however, this version of | |
3739 // testl needs to be implemented first) | |
3740 } else { | |
3741 // nothing to do, (later) access of M[reg + offset] | |
3742 // will provoke OS NULL exception if reg = NULL | |
3743 } | |
3744 } | |
3745 | |
3746 void MacroAssembler::os_breakpoint() { | |
3747 // instead of directly emitting a breakpoint, call os:breakpoint for better debugability | |
3748 // (e.g., MSVC can't call ps() otherwise) | |
3749 call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint))); | |
3750 } | |
3751 | |
3752 void MacroAssembler::pop_CPU_state() { | |
3753 pop_FPU_state(); | |
3754 pop_IU_state(); | |
3755 } | |
3756 | |
3757 void MacroAssembler::pop_FPU_state() { | |
3758 NOT_LP64(frstor(Address(rsp, 0));) | |
3759 LP64_ONLY(fxrstor(Address(rsp, 0));) | |
3760 addptr(rsp, FPUStateSizeInWords * wordSize); | |
3761 } | |
3762 | |
3763 void MacroAssembler::pop_IU_state() { | |
3764 popa(); | |
3765 LP64_ONLY(addq(rsp, 8)); | |
3766 popf(); | |
3767 } | |
3768 | |
3769 // Save Integer and Float state | |
3770 // Warning: Stack must be 16 byte aligned (64bit) | |
3771 void MacroAssembler::push_CPU_state() { | |
3772 push_IU_state(); | |
3773 push_FPU_state(); | |
3774 } | |
3775 | |
3776 void MacroAssembler::push_FPU_state() { | |
3777 subptr(rsp, FPUStateSizeInWords * wordSize); | |
3778 #ifndef _LP64 | |
3779 fnsave(Address(rsp, 0)); | |
3780 fwait(); | |
3781 #else | |
3782 fxsave(Address(rsp, 0)); | |
3783 #endif // LP64 | |
3784 } | |
3785 | |
3786 void MacroAssembler::push_IU_state() { | |
3787 // Push flags first because pusha kills them | |
3788 pushf(); | |
3789 // Make sure rsp stays 16-byte aligned | |
3790 LP64_ONLY(subq(rsp, 8)); | |
3791 pusha(); | |
3792 } | |
3793 | |
3794 void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp, bool clear_pc) { | |
3795 // determine java_thread register | |
3796 if (!java_thread->is_valid()) { | |
3797 java_thread = rdi; | |
3798 get_thread(java_thread); | |
3799 } | |
3800 // we must set sp to zero to clear frame | |
3801 movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), NULL_WORD); | |
3802 if (clear_fp) { | |
3803 movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), NULL_WORD); | |
3804 } | |
3805 | |
3806 if (clear_pc) | |
3807 movptr(Address(java_thread, JavaThread::last_Java_pc_offset()), NULL_WORD); | |
3808 | |
3809 } | |
3810 | |
3811 void MacroAssembler::restore_rax(Register tmp) { | |
3812 if (tmp == noreg) pop(rax); | |
3813 else if (tmp != rax) mov(rax, tmp); | |
3814 } | |
3815 | |
3816 void MacroAssembler::round_to(Register reg, int modulus) { | |
3817 addptr(reg, modulus - 1); | |
3818 andptr(reg, -modulus); | |
3819 } | |
3820 | |
3821 void MacroAssembler::save_rax(Register tmp) { | |
3822 if (tmp == noreg) push(rax); | |
3823 else if (tmp != rax) mov(tmp, rax); | |
3824 } | |
3825 | |
3826 // Write serialization page so VM thread can do a pseudo remote membar. | |
3827 // We use the current thread pointer to calculate a thread specific | |
3828 // offset to write to within the page. This minimizes bus traffic | |
3829 // due to cache line collision. | |
3830 void MacroAssembler::serialize_memory(Register thread, Register tmp) { | |
3831 movl(tmp, thread); | |
3832 shrl(tmp, os::get_serialize_page_shift_count()); | |
3833 andl(tmp, (os::vm_page_size() - sizeof(int))); | |
3834 | |
3835 Address index(noreg, tmp, Address::times_1); | |
3836 ExternalAddress page(os::get_memory_serialize_page()); | |
3837 | |
3838 // Size of store must match masking code above | |
3839 movl(as_Address(ArrayAddress(page, index)), tmp); | |
3840 } | |
3841 | |
3842 // Calls to C land | |
3843 // | |
3844 // When entering C land, the rbp, & rsp of the last Java frame have to be recorded | |
3845 // in the (thread-local) JavaThread object. When leaving C land, the last Java fp | |
3846 // has to be reset to 0. This is required to allow proper stack traversal. | |
3847 void MacroAssembler::set_last_Java_frame(Register java_thread, | |
3848 Register last_java_sp, | |
3849 Register last_java_fp, | |
3850 address last_java_pc) { | |
3851 // determine java_thread register | |
3852 if (!java_thread->is_valid()) { | |
3853 java_thread = rdi; | |
3854 get_thread(java_thread); | |
3855 } | |
3856 // determine last_java_sp register | |
3857 if (!last_java_sp->is_valid()) { | |
3858 last_java_sp = rsp; | |
3859 } | |
3860 | |
3861 // last_java_fp is optional | |
3862 | |
3863 if (last_java_fp->is_valid()) { | |
3864 movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), last_java_fp); | |
3865 } | |
3866 | |
3867 // last_java_pc is optional | |
3868 | |
3869 if (last_java_pc != NULL) { | |
3870 lea(Address(java_thread, | |
3871 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()), | |
3872 InternalAddress(last_java_pc)); | |
3873 | |
3874 } | |
3875 movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), last_java_sp); | |
3876 } | |
3877 | |
3878 void MacroAssembler::shlptr(Register dst, int imm8) { | |
3879 LP64_ONLY(shlq(dst, imm8)) NOT_LP64(shll(dst, imm8)); | |
3880 } | |
3881 | |
3882 void MacroAssembler::shrptr(Register dst, int imm8) { | |
3883 LP64_ONLY(shrq(dst, imm8)) NOT_LP64(shrl(dst, imm8)); | |
3884 } | |
3885 | |
3886 void MacroAssembler::sign_extend_byte(Register reg) { | |
3887 if (LP64_ONLY(true ||) (VM_Version::is_P6() && reg->has_byte_register())) { | |
3888 movsbl(reg, reg); // movsxb | |
3889 } else { | |
3890 shll(reg, 24); | |
3891 sarl(reg, 24); | |
3892 } | |
3893 } | |
3894 | |
3895 void MacroAssembler::sign_extend_short(Register reg) { | |
3896 if (LP64_ONLY(true ||) VM_Version::is_P6()) { | |
3897 movswl(reg, reg); // movsxw | |
3898 } else { | |
3899 shll(reg, 16); | |
3900 sarl(reg, 16); | |
3901 } | |
3902 } | |
3903 | |
3904 void MacroAssembler::testl(Register dst, AddressLiteral src) { | |
3905 assert(reachable(src), "Address should be reachable"); | |
3906 testl(dst, as_Address(src)); | |
3907 } | |
3908 | |
3909 void MacroAssembler::sqrtsd(XMMRegister dst, AddressLiteral src) { | |
3910 if (reachable(src)) { | |
3911 Assembler::sqrtsd(dst, as_Address(src)); | |
3912 } else { | |
3913 lea(rscratch1, src); | |
3914 Assembler::sqrtsd(dst, Address(rscratch1, 0)); | |
3915 } | |
3916 } | |
3917 | |
3918 void MacroAssembler::sqrtss(XMMRegister dst, AddressLiteral src) { | |
3919 if (reachable(src)) { | |
3920 Assembler::sqrtss(dst, as_Address(src)); | |
3921 } else { | |
3922 lea(rscratch1, src); | |
3923 Assembler::sqrtss(dst, Address(rscratch1, 0)); | |
3924 } | |
3925 } | |
3926 | |
3927 void MacroAssembler::subsd(XMMRegister dst, AddressLiteral src) { | |
3928 if (reachable(src)) { | |
3929 Assembler::subsd(dst, as_Address(src)); | |
3930 } else { | |
3931 lea(rscratch1, src); | |
3932 Assembler::subsd(dst, Address(rscratch1, 0)); | |
3933 } | |
3934 } | |
3935 | |
3936 void MacroAssembler::subss(XMMRegister dst, AddressLiteral src) { | |
3937 if (reachable(src)) { | |
3938 Assembler::subss(dst, as_Address(src)); | |
3939 } else { | |
3940 lea(rscratch1, src); | |
3941 Assembler::subss(dst, Address(rscratch1, 0)); | |
3942 } | |
3943 } | |
3944 | |
3945 void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) { | |
3946 if (reachable(src)) { | |
3947 Assembler::ucomisd(dst, as_Address(src)); | |
3948 } else { | |
3949 lea(rscratch1, src); | |
3950 Assembler::ucomisd(dst, Address(rscratch1, 0)); | |
3951 } | |
3952 } | |
3953 | |
3954 void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src) { | |
3955 if (reachable(src)) { | |
3956 Assembler::ucomiss(dst, as_Address(src)); | |
3957 } else { | |
3958 lea(rscratch1, src); | |
3959 Assembler::ucomiss(dst, Address(rscratch1, 0)); | |
3960 } | |
3961 } | |
3962 | |
3963 void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src) { | |
3964 // Used in sign-bit flipping with aligned address. | |
3965 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); | |
3966 if (reachable(src)) { | |
3967 Assembler::xorpd(dst, as_Address(src)); | |
3968 } else { | |
3969 lea(rscratch1, src); | |
3970 Assembler::xorpd(dst, Address(rscratch1, 0)); | |
3971 } | |
3972 } | |
3973 | |
3974 void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) { | |
3975 // Used in sign-bit flipping with aligned address. | |
3976 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); | |
3977 if (reachable(src)) { | |
3978 Assembler::xorps(dst, as_Address(src)); | |
3979 } else { | |
3980 lea(rscratch1, src); | |
3981 Assembler::xorps(dst, Address(rscratch1, 0)); | |
3982 } | |
3983 } | |
3984 | |
3985 void MacroAssembler::pshufb(XMMRegister dst, AddressLiteral src) { | |
3986 // Used in sign-bit flipping with aligned address. | |
7427 | 3987 bool aligned_adr = (((intptr_t)src.target() & 15) == 0); |
3988 assert((UseAVX > 0) || aligned_adr, "SSE mode requires address alignment 16 bytes"); | |
7199 | 3989 if (reachable(src)) { |
3990 Assembler::pshufb(dst, as_Address(src)); | |
3991 } else { | |
3992 lea(rscratch1, src); | |
3993 Assembler::pshufb(dst, Address(rscratch1, 0)); | |
3994 } | |
3995 } | |
3996 | |
3997 // AVX 3-operands instructions | |
3998 | |
3999 void MacroAssembler::vaddsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { | |
4000 if (reachable(src)) { | |
4001 vaddsd(dst, nds, as_Address(src)); | |
4002 } else { | |
4003 lea(rscratch1, src); | |
4004 vaddsd(dst, nds, Address(rscratch1, 0)); | |
4005 } | |
4006 } | |
4007 | |
4008 void MacroAssembler::vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { | |
4009 if (reachable(src)) { | |
4010 vaddss(dst, nds, as_Address(src)); | |
4011 } else { | |
4012 lea(rscratch1, src); | |
4013 vaddss(dst, nds, Address(rscratch1, 0)); | |
4014 } | |
4015 } | |
4016 | |
4017 void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) { | |
4018 if (reachable(src)) { | |
4019 vandpd(dst, nds, as_Address(src), vector256); | |
4020 } else { | |
4021 lea(rscratch1, src); | |
4022 vandpd(dst, nds, Address(rscratch1, 0), vector256); | |
4023 } | |
4024 } | |
4025 | |
4026 void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) { | |
4027 if (reachable(src)) { | |
4028 vandps(dst, nds, as_Address(src), vector256); | |
4029 } else { | |
4030 lea(rscratch1, src); | |
4031 vandps(dst, nds, Address(rscratch1, 0), vector256); | |
4032 } | |
4033 } | |
4034 | |
4035 void MacroAssembler::vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { | |
4036 if (reachable(src)) { | |
4037 vdivsd(dst, nds, as_Address(src)); | |
4038 } else { | |
4039 lea(rscratch1, src); | |
4040 vdivsd(dst, nds, Address(rscratch1, 0)); | |
4041 } | |
4042 } | |
4043 | |
4044 void MacroAssembler::vdivss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { | |
4045 if (reachable(src)) { | |
4046 vdivss(dst, nds, as_Address(src)); | |
4047 } else { | |
4048 lea(rscratch1, src); | |
4049 vdivss(dst, nds, Address(rscratch1, 0)); | |
4050 } | |
4051 } | |
4052 | |
4053 void MacroAssembler::vmulsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { | |
4054 if (reachable(src)) { | |
4055 vmulsd(dst, nds, as_Address(src)); | |
4056 } else { | |
4057 lea(rscratch1, src); | |
4058 vmulsd(dst, nds, Address(rscratch1, 0)); | |
4059 } | |
4060 } | |
4061 | |
4062 void MacroAssembler::vmulss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { | |
4063 if (reachable(src)) { | |
4064 vmulss(dst, nds, as_Address(src)); | |
4065 } else { | |
4066 lea(rscratch1, src); | |
4067 vmulss(dst, nds, Address(rscratch1, 0)); | |
4068 } | |
4069 } | |
4070 | |
4071 void MacroAssembler::vsubsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { | |
4072 if (reachable(src)) { | |
4073 vsubsd(dst, nds, as_Address(src)); | |
4074 } else { | |
4075 lea(rscratch1, src); | |
4076 vsubsd(dst, nds, Address(rscratch1, 0)); | |
4077 } | |
4078 } | |
4079 | |
4080 void MacroAssembler::vsubss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { | |
4081 if (reachable(src)) { | |
4082 vsubss(dst, nds, as_Address(src)); | |
4083 } else { | |
4084 lea(rscratch1, src); | |
4085 vsubss(dst, nds, Address(rscratch1, 0)); | |
4086 } | |
4087 } | |
4088 | |
4089 void MacroAssembler::vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) { | |
4090 if (reachable(src)) { | |
4091 vxorpd(dst, nds, as_Address(src), vector256); | |
4092 } else { | |
4093 lea(rscratch1, src); | |
4094 vxorpd(dst, nds, Address(rscratch1, 0), vector256); | |
4095 } | |
4096 } | |
4097 | |
4098 void MacroAssembler::vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) { | |
4099 if (reachable(src)) { | |
4100 vxorps(dst, nds, as_Address(src), vector256); | |
4101 } else { | |
4102 lea(rscratch1, src); | |
4103 vxorps(dst, nds, Address(rscratch1, 0), vector256); | |
4104 } | |
4105 } | |
4106 | |
4107 | |
4108 ////////////////////////////////////////////////////////////////////////////////// | |
8001
db9981fd3124
8005915: Unify SERIALGC and INCLUDE_ALTERNATE_GCS
jprovino
parents:
7477
diff
changeset
|
4109 #if INCLUDE_ALL_GCS |
7199 | 4110 |
4111 void MacroAssembler::g1_write_barrier_pre(Register obj, | |
4112 Register pre_val, | |
4113 Register thread, | |
4114 Register tmp, | |
4115 bool tosca_live, | |
4116 bool expand_call) { | |
4117 | |
4118 // If expand_call is true then we expand the call_VM_leaf macro | |
4119 // directly to skip generating the check by | |
4120 // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. | |
4121 | |
4122 #ifdef _LP64 | |
4123 assert(thread == r15_thread, "must be"); | |
4124 #endif // _LP64 | |
4125 | |
4126 Label done; | |
4127 Label runtime; | |
4128 | |
4129 assert(pre_val != noreg, "check this code"); | |
4130 | |
4131 if (obj != noreg) { | |
4132 assert_different_registers(obj, pre_val, tmp); | |
4133 assert(pre_val != rax, "check this code"); | |
4134 } | |
4135 | |
4136 Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() + | |
4137 PtrQueue::byte_offset_of_active())); | |
4138 Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() + | |
4139 PtrQueue::byte_offset_of_index())); | |
4140 Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() + | |
4141 PtrQueue::byte_offset_of_buf())); | |
4142 | |
4143 | |
4144 // Is marking active? | |
4145 if (in_bytes(PtrQueue::byte_width_of_active()) == 4) { | |
4146 cmpl(in_progress, 0); | |
4147 } else { | |
4148 assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption"); | |
4149 cmpb(in_progress, 0); | |
4150 } | |
4151 jcc(Assembler::equal, done); | |
4152 | |
4153 // Do we need to load the previous value? | |
4154 if (obj != noreg) { | |
4155 load_heap_oop(pre_val, Address(obj, 0)); | |
4156 } | |
4157 | |
4158 // Is the previous value null? | |
4159 cmpptr(pre_val, (int32_t) NULL_WORD); | |
4160 jcc(Assembler::equal, done); | |
4161 | |
4162 // Can we store original value in the thread's buffer? | |
4163 // Is index == 0? | |
4164 // (The index field is typed as size_t.) | |
4165 | |
4166 movptr(tmp, index); // tmp := *index_adr | |
4167 cmpptr(tmp, 0); // tmp == 0? | |
4168 jcc(Assembler::equal, runtime); // If yes, goto runtime | |
4169 | |
4170 subptr(tmp, wordSize); // tmp := tmp - wordSize | |
4171 movptr(index, tmp); // *index_adr := tmp | |
4172 addptr(tmp, buffer); // tmp := tmp + *buffer_adr | |
4173 | |
4174 // Record the previous value | |
4175 movptr(Address(tmp, 0), pre_val); | |
4176 jmp(done); | |
4177 | |
4178 bind(runtime); | |
4179 // save the live input values | |
4180 if(tosca_live) push(rax); | |
4181 | |
4182 if (obj != noreg && obj != rax) | |
4183 push(obj); | |
4184 | |
4185 if (pre_val != rax) | |
4186 push(pre_val); | |
4187 | |
4188 // Calling the runtime using the regular call_VM_leaf mechanism generates | |
4189 // code (generated by InterpreterMacroAssember::call_VM_leaf_base) | |
4190 // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL. | |
4191 // | |
4192 // If we care generating the pre-barrier without a frame (e.g. in the | |
4193 // intrinsified Reference.get() routine) then ebp might be pointing to | |
4194 // the caller frame and so this check will most likely fail at runtime. | |
4195 // | |
4196 // Expanding the call directly bypasses the generation of the check. | |
4197 // So when we do not have have a full interpreter frame on the stack | |
4198 // expand_call should be passed true. | |
4199 | |
4200 NOT_LP64( push(thread); ) | |
4201 | |
4202 if (expand_call) { | |
4203 LP64_ONLY( assert(pre_val != c_rarg1, "smashed arg"); ) | |
4204 pass_arg1(this, thread); | |
4205 pass_arg0(this, pre_val); | |
4206 MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), 2); | |
4207 } else { | |
4208 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread); | |
4209 } | |
4210 | |
4211 NOT_LP64( pop(thread); ) | |
4212 | |
4213 // save the live input values | |
4214 if (pre_val != rax) | |
4215 pop(pre_val); | |
4216 | |
4217 if (obj != noreg && obj != rax) | |
4218 pop(obj); | |
4219 | |
4220 if(tosca_live) pop(rax); | |
4221 | |
4222 bind(done); | |
4223 } | |
4224 | |
4225 void MacroAssembler::g1_write_barrier_post(Register store_addr, | |
4226 Register new_val, | |
4227 Register thread, | |
4228 Register tmp, | |
4229 Register tmp2) { | |
4230 #ifdef _LP64 | |
4231 assert(thread == r15_thread, "must be"); | |
4232 #endif // _LP64 | |
4233 | |
4234 Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() + | |
4235 PtrQueue::byte_offset_of_index())); | |
4236 Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() + | |
4237 PtrQueue::byte_offset_of_buf())); | |
4238 | |
4239 BarrierSet* bs = Universe::heap()->barrier_set(); | |
4240 CardTableModRefBS* ct = (CardTableModRefBS*)bs; | |
13424
61746b5f0ed3
8028109: compiler/codecache/CheckReservedInitialCodeCacheSizeArgOrder.java crashes in RT_Baseline
anoll
parents:
13047
diff
changeset
|
4241 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); |
61746b5f0ed3
8028109: compiler/codecache/CheckReservedInitialCodeCacheSizeArgOrder.java crashes in RT_Baseline
anoll
parents:
13047
diff
changeset
|
4242 |
7199 | 4243 Label done; |
4244 Label runtime; | |
4245 | |
4246 // Does store cross heap regions? | |
4247 | |
4248 movptr(tmp, store_addr); | |
4249 xorptr(tmp, new_val); | |
4250 shrptr(tmp, HeapRegion::LogOfHRGrainBytes); | |
4251 jcc(Assembler::equal, done); | |
4252 | |
4253 // crosses regions, storing NULL? | |
4254 | |
4255 cmpptr(new_val, (int32_t) NULL_WORD); | |
4256 jcc(Assembler::equal, done); | |
4257 | |
4258 // storing region crossing non-NULL, is card already dirty? | |
4259 | |
4260 const Register card_addr = tmp; | |
13424
61746b5f0ed3
8028109: compiler/codecache/CheckReservedInitialCodeCacheSizeArgOrder.java crashes in RT_Baseline
anoll
parents:
13047
diff
changeset
|
4261 const Register cardtable = tmp2; |
61746b5f0ed3
8028109: compiler/codecache/CheckReservedInitialCodeCacheSizeArgOrder.java crashes in RT_Baseline
anoll
parents:
13047
diff
changeset
|
4262 |
61746b5f0ed3
8028109: compiler/codecache/CheckReservedInitialCodeCacheSizeArgOrder.java crashes in RT_Baseline
anoll
parents:
13047
diff
changeset
|
4263 movptr(card_addr, store_addr); |
61746b5f0ed3
8028109: compiler/codecache/CheckReservedInitialCodeCacheSizeArgOrder.java crashes in RT_Baseline
anoll
parents:
13047
diff
changeset
|
4264 shrptr(card_addr, CardTableModRefBS::card_shift); |
61746b5f0ed3
8028109: compiler/codecache/CheckReservedInitialCodeCacheSizeArgOrder.java crashes in RT_Baseline
anoll
parents:
13047
diff
changeset
|
4265 // Do not use ExternalAddress to load 'byte_map_base', since 'byte_map_base' is NOT |
61746b5f0ed3
8028109: compiler/codecache/CheckReservedInitialCodeCacheSizeArgOrder.java crashes in RT_Baseline
anoll
parents:
13047
diff
changeset
|
4266 // a valid address and therefore is not properly handled by the relocation code. |
61746b5f0ed3
8028109: compiler/codecache/CheckReservedInitialCodeCacheSizeArgOrder.java crashes in RT_Baseline
anoll
parents:
13047
diff
changeset
|
4267 movptr(cardtable, (intptr_t)ct->byte_map_base); |
61746b5f0ed3
8028109: compiler/codecache/CheckReservedInitialCodeCacheSizeArgOrder.java crashes in RT_Baseline
anoll
parents:
13047
diff
changeset
|
4268 addptr(card_addr, cardtable); |
61746b5f0ed3
8028109: compiler/codecache/CheckReservedInitialCodeCacheSizeArgOrder.java crashes in RT_Baseline
anoll
parents:
13047
diff
changeset
|
4269 |
12835
69944b868a32
8014555: G1: Memory ordering problem with Conc refinement and card marking
mgerdin
parents:
12226
diff
changeset
|
4270 cmpb(Address(card_addr, 0), (int)G1SATBCardTableModRefBS::g1_young_card_val()); |
7199 | 4271 jcc(Assembler::equal, done); |
4272 | |
12835
69944b868a32
8014555: G1: Memory ordering problem with Conc refinement and card marking
mgerdin
parents:
12226
diff
changeset
|
4273 membar(Assembler::Membar_mask_bits(Assembler::StoreLoad)); |
69944b868a32
8014555: G1: Memory ordering problem with Conc refinement and card marking
mgerdin
parents:
12226
diff
changeset
|
4274 cmpb(Address(card_addr, 0), (int)CardTableModRefBS::dirty_card_val()); |
69944b868a32
8014555: G1: Memory ordering problem with Conc refinement and card marking
mgerdin
parents:
12226
diff
changeset
|
4275 jcc(Assembler::equal, done); |
69944b868a32
8014555: G1: Memory ordering problem with Conc refinement and card marking
mgerdin
parents:
12226
diff
changeset
|
4276 |
69944b868a32
8014555: G1: Memory ordering problem with Conc refinement and card marking
mgerdin
parents:
12226
diff
changeset
|
4277 |
7199 | 4278 // storing a region crossing, non-NULL oop, card is clean. |
4279 // dirty card and log. | |
4280 | |
12835
69944b868a32
8014555: G1: Memory ordering problem with Conc refinement and card marking
mgerdin
parents:
12226
diff
changeset
|
4281 movb(Address(card_addr, 0), (int)CardTableModRefBS::dirty_card_val()); |
7199 | 4282 |
4283 cmpl(queue_index, 0); | |
4284 jcc(Assembler::equal, runtime); | |
4285 subl(queue_index, wordSize); | |
4286 movptr(tmp2, buffer); | |
4287 #ifdef _LP64 | |
4288 movslq(rscratch1, queue_index); | |
4289 addq(tmp2, rscratch1); | |
4290 movq(Address(tmp2, 0), card_addr); | |
4291 #else | |
4292 addl(tmp2, queue_index); | |
13424
61746b5f0ed3
8028109: compiler/codecache/CheckReservedInitialCodeCacheSizeArgOrder.java crashes in RT_Baseline
anoll
parents:
13047
diff
changeset
|
4293 movl(Address(tmp2, 0), card_addr); |
7199 | 4294 #endif |
4295 jmp(done); | |
4296 | |
4297 bind(runtime); | |
4298 // save the live input values | |
4299 push(store_addr); | |
4300 push(new_val); | |
4301 #ifdef _LP64 | |
4302 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, r15_thread); | |
4303 #else | |
4304 push(thread); | |
4305 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread); | |
4306 pop(thread); | |
4307 #endif | |
4308 pop(new_val); | |
4309 pop(store_addr); | |
4310 | |
4311 bind(done); | |
4312 } | |
4313 | |
8001
db9981fd3124
8005915: Unify SERIALGC and INCLUDE_ALTERNATE_GCS
jprovino
parents:
7477
diff
changeset
|
4314 #endif // INCLUDE_ALL_GCS |
7199 | 4315 ////////////////////////////////////////////////////////////////////////////////// |
4316 | |
4317 | |
4318 void MacroAssembler::store_check(Register obj) { | |
4319 // Does a store check for the oop in register obj. The content of | |
4320 // register obj is destroyed afterwards. | |
4321 store_check_part_1(obj); | |
4322 store_check_part_2(obj); | |
4323 } | |
4324 | |
4325 void MacroAssembler::store_check(Register obj, Address dst) { | |
4326 store_check(obj); | |
4327 } | |
4328 | |
4329 | |
4330 // split the store check operation so that other instructions can be scheduled inbetween | |
4331 void MacroAssembler::store_check_part_1(Register obj) { | |
4332 BarrierSet* bs = Universe::heap()->barrier_set(); | |
4333 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); | |
4334 shrptr(obj, CardTableModRefBS::card_shift); | |
4335 } | |
4336 | |
4337 void MacroAssembler::store_check_part_2(Register obj) { | |
4338 BarrierSet* bs = Universe::heap()->barrier_set(); | |
4339 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); | |
4340 CardTableModRefBS* ct = (CardTableModRefBS*)bs; | |
4341 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); | |
4342 | |
4343 // The calculation for byte_map_base is as follows: | |
4344 // byte_map_base = _byte_map - (uintptr_t(low_bound) >> card_shift); | |
13424
61746b5f0ed3
8028109: compiler/codecache/CheckReservedInitialCodeCacheSizeArgOrder.java crashes in RT_Baseline
anoll
parents:
13047
diff
changeset
|
4345 // So this essentially converts an address to a displacement and it will |
61746b5f0ed3
8028109: compiler/codecache/CheckReservedInitialCodeCacheSizeArgOrder.java crashes in RT_Baseline
anoll
parents:
13047
diff
changeset
|
4346 // never need to be relocated. On 64bit however the value may be too |
61746b5f0ed3
8028109: compiler/codecache/CheckReservedInitialCodeCacheSizeArgOrder.java crashes in RT_Baseline
anoll
parents:
13047
diff
changeset
|
4347 // large for a 32bit displacement. |
7199 | 4348 intptr_t disp = (intptr_t) ct->byte_map_base; |
4349 if (is_simm32(disp)) { | |
4350 Address cardtable(noreg, obj, Address::times_1, disp); | |
4351 movb(cardtable, 0); | |
4352 } else { | |
13424
61746b5f0ed3
8028109: compiler/codecache/CheckReservedInitialCodeCacheSizeArgOrder.java crashes in RT_Baseline
anoll
parents:
13047
diff
changeset
|
4353 // By doing it as an ExternalAddress 'disp' could be converted to a rip-relative |
61746b5f0ed3
8028109: compiler/codecache/CheckReservedInitialCodeCacheSizeArgOrder.java crashes in RT_Baseline
anoll
parents:
13047
diff
changeset
|
4354 // displacement and done in a single instruction given favorable mapping and a |
61746b5f0ed3
8028109: compiler/codecache/CheckReservedInitialCodeCacheSizeArgOrder.java crashes in RT_Baseline
anoll
parents:
13047
diff
changeset
|
4355 // smarter version of as_Address. However, 'ExternalAddress' generates a relocation |
61746b5f0ed3
8028109: compiler/codecache/CheckReservedInitialCodeCacheSizeArgOrder.java crashes in RT_Baseline
anoll
parents:
13047
diff
changeset
|
4356 // entry and that entry is not properly handled by the relocation code. |
61746b5f0ed3
8028109: compiler/codecache/CheckReservedInitialCodeCacheSizeArgOrder.java crashes in RT_Baseline
anoll
parents:
13047
diff
changeset
|
4357 AddressLiteral cardtable((address)ct->byte_map_base, relocInfo::none); |
7199 | 4358 Address index(noreg, obj, Address::times_1); |
4359 movb(as_Address(ArrayAddress(cardtable, index)), 0); | |
4360 } | |
4361 } | |
4362 | |
4363 void MacroAssembler::subptr(Register dst, int32_t imm32) { | |
4364 LP64_ONLY(subq(dst, imm32)) NOT_LP64(subl(dst, imm32)); | |
4365 } | |
4366 | |
4367 // Force generation of a 4 byte immediate value even if it fits into 8bit | |
4368 void MacroAssembler::subptr_imm32(Register dst, int32_t imm32) { | |
4369 LP64_ONLY(subq_imm32(dst, imm32)) NOT_LP64(subl_imm32(dst, imm32)); | |
4370 } | |
4371 | |
4372 void MacroAssembler::subptr(Register dst, Register src) { | |
4373 LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src)); | |
4374 } | |
4375 | |
4376 // C++ bool manipulation | |
4377 void MacroAssembler::testbool(Register dst) { | |
4378 if(sizeof(bool) == 1) | |
4379 testb(dst, 0xff); | |
4380 else if(sizeof(bool) == 2) { | |
4381 // testw implementation needed for two byte bools | |
4382 ShouldNotReachHere(); | |
4383 } else if(sizeof(bool) == 4) | |
4384 testl(dst, dst); | |
4385 else | |
4386 // unsupported | |
4387 ShouldNotReachHere(); | |
4388 } | |
4389 | |
4390 void MacroAssembler::testptr(Register dst, Register src) { | |
4391 LP64_ONLY(testq(dst, src)) NOT_LP64(testl(dst, src)); | |
4392 } | |
4393 | |
4394 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes. | |
4395 void MacroAssembler::tlab_allocate(Register obj, | |
4396 Register var_size_in_bytes, | |
4397 int con_size_in_bytes, | |
4398 Register t1, | |
4399 Register t2, | |
4400 Label& slow_case) { | |
4401 assert_different_registers(obj, t1, t2); | |
4402 assert_different_registers(obj, var_size_in_bytes, t1); | |
4403 Register end = t2; | |
4404 Register thread = NOT_LP64(t1) LP64_ONLY(r15_thread); | |
4405 | |
4406 verify_tlab(); | |
4407 | |
4408 NOT_LP64(get_thread(thread)); | |
4409 | |
4410 movptr(obj, Address(thread, JavaThread::tlab_top_offset())); | |
4411 if (var_size_in_bytes == noreg) { | |
4412 lea(end, Address(obj, con_size_in_bytes)); | |
4413 } else { | |
4414 lea(end, Address(obj, var_size_in_bytes, Address::times_1)); | |
4415 } | |
4416 cmpptr(end, Address(thread, JavaThread::tlab_end_offset())); | |
4417 jcc(Assembler::above, slow_case); | |
4418 | |
4419 // update the tlab top pointer | |
4420 movptr(Address(thread, JavaThread::tlab_top_offset()), end); | |
4421 | |
4422 // recover var_size_in_bytes if necessary | |
4423 if (var_size_in_bytes == end) { | |
4424 subptr(var_size_in_bytes, obj); | |
4425 } | |
4426 verify_tlab(); | |
4427 } | |
4428 | |
4429 // Preserves rbx, and rdx. | |
4430 Register MacroAssembler::tlab_refill(Label& retry, | |
4431 Label& try_eden, | |
4432 Label& slow_case) { | |
4433 Register top = rax; | |
4434 Register t1 = rcx; | |
4435 Register t2 = rsi; | |
4436 Register thread_reg = NOT_LP64(rdi) LP64_ONLY(r15_thread); | |
4437 assert_different_registers(top, thread_reg, t1, t2, /* preserve: */ rbx, rdx); | |
4438 Label do_refill, discard_tlab; | |
4439 | |
4440 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { | |
4441 // No allocation in the shared eden. | |
4442 jmp(slow_case); | |
4443 } | |
4444 | |
4445 NOT_LP64(get_thread(thread_reg)); | |
4446 | |
4447 movptr(top, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); | |
4448 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset()))); | |
4449 | |
4450 // calculate amount of free space | |
4451 subptr(t1, top); | |
4452 shrptr(t1, LogHeapWordSize); | |
4453 | |
4454 // Retain tlab and allocate object in shared space if | |
4455 // the amount free in the tlab is too large to discard. | |
4456 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset()))); | |
4457 jcc(Assembler::lessEqual, discard_tlab); | |
4458 | |
4459 // Retain | |
4460 // %%% yuck as movptr... | |
4461 movptr(t2, (int32_t) ThreadLocalAllocBuffer::refill_waste_limit_increment()); | |
4462 addptr(Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())), t2); | |
4463 if (TLABStats) { | |
4464 // increment number of slow_allocations | |
4465 addl(Address(thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset())), 1); | |
4466 } | |
4467 jmp(try_eden); | |
4468 | |
4469 bind(discard_tlab); | |
4470 if (TLABStats) { | |
4471 // increment number of refills | |
4472 addl(Address(thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset())), 1); | |
4473 // accumulate wastage -- t1 is amount free in tlab | |
4474 addl(Address(thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset())), t1); | |
4475 } | |
4476 | |
4477 // if tlab is currently allocated (top or end != null) then | |
4478 // fill [top, end + alignment_reserve) with array object | |
4479 testptr(top, top); | |
4480 jcc(Assembler::zero, do_refill); | |
4481 | |
4482 // set up the mark word | |
4483 movptr(Address(top, oopDesc::mark_offset_in_bytes()), (intptr_t)markOopDesc::prototype()->copy_set_hash(0x2)); | |
4484 // set the length to the remaining space | |
4485 subptr(t1, typeArrayOopDesc::header_size(T_INT)); | |
4486 addptr(t1, (int32_t)ThreadLocalAllocBuffer::alignment_reserve()); | |
4487 shlptr(t1, log2_intptr(HeapWordSize/sizeof(jint))); | |
4488 movl(Address(top, arrayOopDesc::length_offset_in_bytes()), t1); | |
4489 // set klass to intArrayKlass | |
4490 // dubious reloc why not an oop reloc? | |
4491 movptr(t1, ExternalAddress((address)Universe::intArrayKlassObj_addr())); | |
4492 // store klass last. concurrent gcs assumes klass length is valid if | |
4493 // klass field is not null. | |
4494 store_klass(top, t1); | |
4495 | |
4496 movptr(t1, top); | |
4497 subptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset()))); | |
4498 incr_allocated_bytes(thread_reg, t1, 0); | |
4499 | |
4500 // refill the tlab with an eden allocation | |
4501 bind(do_refill); | |
4502 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset()))); | |
4503 shlptr(t1, LogHeapWordSize); | |
4504 // allocate new tlab, address returned in top | |
4505 eden_allocate(top, t1, 0, t2, slow_case); | |
4506 | |
4507 // Check that t1 was preserved in eden_allocate. | |
4508 #ifdef ASSERT | |
4509 if (UseTLAB) { | |
4510 Label ok; | |
4511 Register tsize = rsi; | |
4512 assert_different_registers(tsize, thread_reg, t1); | |
4513 push(tsize); | |
4514 movptr(tsize, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset()))); | |
4515 shlptr(tsize, LogHeapWordSize); | |
4516 cmpptr(t1, tsize); | |
4517 jcc(Assembler::equal, ok); | |
4518 STOP("assert(t1 != tlab size)"); | |
4519 should_not_reach_here(); | |
4520 | |
4521 bind(ok); | |
4522 pop(tsize); | |
4523 } | |
4524 #endif | |
4525 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())), top); | |
4526 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())), top); | |
4527 addptr(top, t1); | |
4528 subptr(top, (int32_t)ThreadLocalAllocBuffer::alignment_reserve_in_bytes()); | |
4529 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())), top); | |
4530 verify_tlab(); | |
4531 jmp(retry); | |
4532 | |
4533 return thread_reg; // for use by caller | |
4534 } | |
4535 | |
4536 void MacroAssembler::incr_allocated_bytes(Register thread, | |
4537 Register var_size_in_bytes, | |
4538 int con_size_in_bytes, | |
4539 Register t1) { | |
4540 if (!thread->is_valid()) { | |
4541 #ifdef _LP64 | |
4542 thread = r15_thread; | |
4543 #else | |
4544 assert(t1->is_valid(), "need temp reg"); | |
4545 thread = t1; | |
4546 get_thread(thread); | |
4547 #endif | |
4548 } | |
4549 | |
4550 #ifdef _LP64 | |
4551 if (var_size_in_bytes->is_valid()) { | |
4552 addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes); | |
4553 } else { | |
4554 addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes); | |
4555 } | |
4556 #else | |
4557 if (var_size_in_bytes->is_valid()) { | |
4558 addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes); | |
4559 } else { | |
4560 addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes); | |
4561 } | |
4562 adcl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())+4), 0); | |
4563 #endif | |
4564 } | |
4565 | |
4566 void MacroAssembler::fp_runtime_fallback(address runtime_entry, int nb_args, int num_fpu_regs_in_use) { | |
4567 pusha(); | |
4568 | |
4569 // if we are coming from c1, xmm registers may be live | |
4570 int off = 0; | |
4571 if (UseSSE == 1) { | |
4572 subptr(rsp, sizeof(jdouble)*8); | |
4573 movflt(Address(rsp,off++*sizeof(jdouble)),xmm0); | |
4574 movflt(Address(rsp,off++*sizeof(jdouble)),xmm1); | |
4575 movflt(Address(rsp,off++*sizeof(jdouble)),xmm2); | |
4576 movflt(Address(rsp,off++*sizeof(jdouble)),xmm3); | |
4577 movflt(Address(rsp,off++*sizeof(jdouble)),xmm4); | |
4578 movflt(Address(rsp,off++*sizeof(jdouble)),xmm5); | |
4579 movflt(Address(rsp,off++*sizeof(jdouble)),xmm6); | |
4580 movflt(Address(rsp,off++*sizeof(jdouble)),xmm7); | |
4581 } else if (UseSSE >= 2) { | |
4582 #ifdef COMPILER2 | |
4583 if (MaxVectorSize > 16) { | |
4584 assert(UseAVX > 0, "256bit vectors are supported only with AVX"); | |
4585 // Save upper half of YMM registes | |
4586 subptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8)); | |
4587 vextractf128h(Address(rsp, 0),xmm0); | |
4588 vextractf128h(Address(rsp, 16),xmm1); | |
4589 vextractf128h(Address(rsp, 32),xmm2); | |
4590 vextractf128h(Address(rsp, 48),xmm3); | |
4591 vextractf128h(Address(rsp, 64),xmm4); | |
4592 vextractf128h(Address(rsp, 80),xmm5); | |
4593 vextractf128h(Address(rsp, 96),xmm6); | |
4594 vextractf128h(Address(rsp,112),xmm7); | |
4595 #ifdef _LP64 | |
4596 vextractf128h(Address(rsp,128),xmm8); | |
4597 vextractf128h(Address(rsp,144),xmm9); | |
4598 vextractf128h(Address(rsp,160),xmm10); | |
4599 vextractf128h(Address(rsp,176),xmm11); | |
4600 vextractf128h(Address(rsp,192),xmm12); | |
4601 vextractf128h(Address(rsp,208),xmm13); | |
4602 vextractf128h(Address(rsp,224),xmm14); | |
4603 vextractf128h(Address(rsp,240),xmm15); | |
4604 #endif | |
4605 } | |
4606 #endif | |
4607 // Save whole 128bit (16 bytes) XMM regiters | |
4608 subptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8)); | |
4609 movdqu(Address(rsp,off++*16),xmm0); | |
4610 movdqu(Address(rsp,off++*16),xmm1); | |
4611 movdqu(Address(rsp,off++*16),xmm2); | |
4612 movdqu(Address(rsp,off++*16),xmm3); | |
4613 movdqu(Address(rsp,off++*16),xmm4); | |
4614 movdqu(Address(rsp,off++*16),xmm5); | |
4615 movdqu(Address(rsp,off++*16),xmm6); | |
4616 movdqu(Address(rsp,off++*16),xmm7); | |
4617 #ifdef _LP64 | |
4618 movdqu(Address(rsp,off++*16),xmm8); | |
4619 movdqu(Address(rsp,off++*16),xmm9); | |
4620 movdqu(Address(rsp,off++*16),xmm10); | |
4621 movdqu(Address(rsp,off++*16),xmm11); | |
4622 movdqu(Address(rsp,off++*16),xmm12); | |
4623 movdqu(Address(rsp,off++*16),xmm13); | |
4624 movdqu(Address(rsp,off++*16),xmm14); | |
4625 movdqu(Address(rsp,off++*16),xmm15); | |
4626 #endif | |
4627 } | |
4628 | |
4629 // Preserve registers across runtime call | |
4630 int incoming_argument_and_return_value_offset = -1; | |
4631 if (num_fpu_regs_in_use > 1) { | |
4632 // Must preserve all other FPU regs (could alternatively convert | |
4633 // SharedRuntime::dsin, dcos etc. into assembly routines known not to trash | |
4634 // FPU state, but can not trust C compiler) | |
4635 NEEDS_CLEANUP; | |
4636 // NOTE that in this case we also push the incoming argument(s) to | |
4637 // the stack and restore it later; we also use this stack slot to | |
4638 // hold the return value from dsin, dcos etc. | |
4639 for (int i = 0; i < num_fpu_regs_in_use; i++) { | |
4640 subptr(rsp, sizeof(jdouble)); | |
4641 fstp_d(Address(rsp, 0)); | |
4642 } | |
4643 incoming_argument_and_return_value_offset = sizeof(jdouble)*(num_fpu_regs_in_use-1); | |
4644 for (int i = nb_args-1; i >= 0; i--) { | |
4645 fld_d(Address(rsp, incoming_argument_and_return_value_offset-i*sizeof(jdouble))); | |
4646 } | |
4647 } | |
4648 | |
4649 subptr(rsp, nb_args*sizeof(jdouble)); | |
4650 for (int i = 0; i < nb_args; i++) { | |
4651 fstp_d(Address(rsp, i*sizeof(jdouble))); | |
4652 } | |
4653 | |
4654 #ifdef _LP64 | |
4655 if (nb_args > 0) { | |
4656 movdbl(xmm0, Address(rsp, 0)); | |
4657 } | |
4658 if (nb_args > 1) { | |
4659 movdbl(xmm1, Address(rsp, sizeof(jdouble))); | |
4660 } | |
4661 assert(nb_args <= 2, "unsupported number of args"); | |
4662 #endif // _LP64 | |
4663 | |
4664 // NOTE: we must not use call_VM_leaf here because that requires a | |
4665 // complete interpreter frame in debug mode -- same bug as 4387334 | |
4666 // MacroAssembler::call_VM_leaf_base is perfectly safe and will | |
4667 // do proper 64bit abi | |
4668 | |
4669 NEEDS_CLEANUP; | |
4670 // Need to add stack banging before this runtime call if it needs to | |
4671 // be taken; however, there is no generic stack banging routine at | |
4672 // the MacroAssembler level | |
4673 | |
4674 MacroAssembler::call_VM_leaf_base(runtime_entry, 0); | |
4675 | |
4676 #ifdef _LP64 | |
4677 movsd(Address(rsp, 0), xmm0); | |
4678 fld_d(Address(rsp, 0)); | |
4679 #endif // _LP64 | |
4680 addptr(rsp, sizeof(jdouble) * nb_args); | |
4681 if (num_fpu_regs_in_use > 1) { | |
4682 // Must save return value to stack and then restore entire FPU | |
4683 // stack except incoming arguments | |
4684 fstp_d(Address(rsp, incoming_argument_and_return_value_offset)); | |
4685 for (int i = 0; i < num_fpu_regs_in_use - nb_args; i++) { | |
4686 fld_d(Address(rsp, 0)); | |
4687 addptr(rsp, sizeof(jdouble)); | |
4688 } | |
4689 fld_d(Address(rsp, (nb_args-1)*sizeof(jdouble))); | |
4690 addptr(rsp, sizeof(jdouble) * nb_args); | |
4691 } | |
4692 | |
4693 off = 0; | |
4694 if (UseSSE == 1) { | |
4695 movflt(xmm0, Address(rsp,off++*sizeof(jdouble))); | |
4696 movflt(xmm1, Address(rsp,off++*sizeof(jdouble))); | |
4697 movflt(xmm2, Address(rsp,off++*sizeof(jdouble))); | |
4698 movflt(xmm3, Address(rsp,off++*sizeof(jdouble))); | |
4699 movflt(xmm4, Address(rsp,off++*sizeof(jdouble))); | |
4700 movflt(xmm5, Address(rsp,off++*sizeof(jdouble))); | |
4701 movflt(xmm6, Address(rsp,off++*sizeof(jdouble))); | |
4702 movflt(xmm7, Address(rsp,off++*sizeof(jdouble))); | |
4703 addptr(rsp, sizeof(jdouble)*8); | |
4704 } else if (UseSSE >= 2) { | |
4705 // Restore whole 128bit (16 bytes) XMM regiters | |
4706 movdqu(xmm0, Address(rsp,off++*16)); | |
4707 movdqu(xmm1, Address(rsp,off++*16)); | |
4708 movdqu(xmm2, Address(rsp,off++*16)); | |
4709 movdqu(xmm3, Address(rsp,off++*16)); | |
4710 movdqu(xmm4, Address(rsp,off++*16)); | |
4711 movdqu(xmm5, Address(rsp,off++*16)); | |
4712 movdqu(xmm6, Address(rsp,off++*16)); | |
4713 movdqu(xmm7, Address(rsp,off++*16)); | |
4714 #ifdef _LP64 | |
4715 movdqu(xmm8, Address(rsp,off++*16)); | |
4716 movdqu(xmm9, Address(rsp,off++*16)); | |
4717 movdqu(xmm10, Address(rsp,off++*16)); | |
4718 movdqu(xmm11, Address(rsp,off++*16)); | |
4719 movdqu(xmm12, Address(rsp,off++*16)); | |
4720 movdqu(xmm13, Address(rsp,off++*16)); | |
4721 movdqu(xmm14, Address(rsp,off++*16)); | |
4722 movdqu(xmm15, Address(rsp,off++*16)); | |
4723 #endif | |
4724 addptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8)); | |
4725 #ifdef COMPILER2 | |
4726 if (MaxVectorSize > 16) { | |
4727 // Restore upper half of YMM registes. | |
4728 vinsertf128h(xmm0, Address(rsp, 0)); | |
4729 vinsertf128h(xmm1, Address(rsp, 16)); | |
4730 vinsertf128h(xmm2, Address(rsp, 32)); | |
4731 vinsertf128h(xmm3, Address(rsp, 48)); | |
4732 vinsertf128h(xmm4, Address(rsp, 64)); | |
4733 vinsertf128h(xmm5, Address(rsp, 80)); | |
4734 vinsertf128h(xmm6, Address(rsp, 96)); | |
4735 vinsertf128h(xmm7, Address(rsp,112)); | |
4736 #ifdef _LP64 | |
4737 vinsertf128h(xmm8, Address(rsp,128)); | |
4738 vinsertf128h(xmm9, Address(rsp,144)); | |
4739 vinsertf128h(xmm10, Address(rsp,160)); | |
4740 vinsertf128h(xmm11, Address(rsp,176)); | |
4741 vinsertf128h(xmm12, Address(rsp,192)); | |
4742 vinsertf128h(xmm13, Address(rsp,208)); | |
4743 vinsertf128h(xmm14, Address(rsp,224)); | |
4744 vinsertf128h(xmm15, Address(rsp,240)); | |
4745 #endif | |
4746 addptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8)); | |
4747 } | |
4748 #endif | |
4749 } | |
4750 popa(); | |
4751 } | |
4752 | |
4753 static const double pi_4 = 0.7853981633974483; | |
4754 | |
4755 void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) { | |
4756 // A hand-coded argument reduction for values in fabs(pi/4, pi/2) | |
4757 // was attempted in this code; unfortunately it appears that the | |
4758 // switch to 80-bit precision and back causes this to be | |
4759 // unprofitable compared with simply performing a runtime call if | |
4760 // the argument is out of the (-pi/4, pi/4) range. | |
4761 | |
4762 Register tmp = noreg; | |
4763 if (!VM_Version::supports_cmov()) { | |
4764 // fcmp needs a temporary so preserve rbx, | |
4765 tmp = rbx; | |
4766 push(tmp); | |
4767 } | |
4768 | |
4769 Label slow_case, done; | |
4770 | |
4771 ExternalAddress pi4_adr = (address)&pi_4; | |
4772 if (reachable(pi4_adr)) { | |
4773 // x ?<= pi/4 | |
4774 fld_d(pi4_adr); | |
4775 fld_s(1); // Stack: X PI/4 X | |
4776 fabs(); // Stack: |X| PI/4 X | |
4777 fcmp(tmp); | |
4778 jcc(Assembler::above, slow_case); | |
4779 | |
4780 // fastest case: -pi/4 <= x <= pi/4 | |
4781 switch(trig) { | |
4782 case 's': | |
4783 fsin(); | |
4784 break; | |
4785 case 'c': | |
4786 fcos(); | |
4787 break; | |
4788 case 't': | |
4789 ftan(); | |
4790 break; | |
4791 default: | |
4792 assert(false, "bad intrinsic"); | |
4793 break; | |
4794 } | |
4795 jmp(done); | |
4796 } | |
4797 | |
4798 // slow case: runtime call | |
4799 bind(slow_case); | |
4800 | |
4801 switch(trig) { | |
4802 case 's': | |
4803 { | |
4804 fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), 1, num_fpu_regs_in_use); | |
4805 } | |
4806 break; | |
4807 case 'c': | |
4808 { | |
4809 fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), 1, num_fpu_regs_in_use); | |
4810 } | |
4811 break; | |
4812 case 't': | |
4813 { | |
4814 fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), 1, num_fpu_regs_in_use); | |
4815 } | |
4816 break; | |
4817 default: | |
4818 assert(false, "bad intrinsic"); | |
4819 break; | |
4820 } | |
4821 | |
4822 // Come here with result in F-TOS | |
4823 bind(done); | |
4824 | |
4825 if (tmp != noreg) { | |
4826 pop(tmp); | |
4827 } | |
4828 } | |
4829 | |
4830 | |
4831 // Look up the method for a megamorphic invokeinterface call. | |
4832 // The target method is determined by <intf_klass, itable_index>. | |
4833 // The receiver klass is in recv_klass. | |
4834 // On success, the result will be in method_result, and execution falls through. | |
4835 // On failure, execution transfers to the given label. | |
4836 void MacroAssembler::lookup_interface_method(Register recv_klass, | |
4837 Register intf_klass, | |
4838 RegisterOrConstant itable_index, | |
4839 Register method_result, | |
4840 Register scan_temp, | |
4841 Label& L_no_such_interface) { | |
4842 assert_different_registers(recv_klass, intf_klass, method_result, scan_temp); | |
4843 assert(itable_index.is_constant() || itable_index.as_register() == method_result, | |
4844 "caller must use same register for non-constant itable index as for method"); | |
4845 | |
4846 // Compute start of first itableOffsetEntry (which is at the end of the vtable) | |
4847 int vtable_base = InstanceKlass::vtable_start_offset() * wordSize; | |
4848 int itentry_off = itableMethodEntry::method_offset_in_bytes(); | |
4849 int scan_step = itableOffsetEntry::size() * wordSize; | |
4850 int vte_size = vtableEntry::size() * wordSize; | |
4851 Address::ScaleFactor times_vte_scale = Address::times_ptr; | |
4852 assert(vte_size == wordSize, "else adjust times_vte_scale"); | |
4853 | |
4854 movl(scan_temp, Address(recv_klass, InstanceKlass::vtable_length_offset() * wordSize)); | |
4855 | |
4856 // %%% Could store the aligned, prescaled offset in the klassoop. | |
4857 lea(scan_temp, Address(recv_klass, scan_temp, times_vte_scale, vtable_base)); | |
4858 if (HeapWordsPerLong > 1) { | |
4859 // Round up to align_object_offset boundary | |
4860 // see code for InstanceKlass::start_of_itable! | |
4861 round_to(scan_temp, BytesPerLong); | |
4862 } | |
4863 | |
4864 // Adjust recv_klass by scaled itable_index, so we can free itable_index. | |
4865 assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); | |
4866 lea(recv_klass, Address(recv_klass, itable_index, Address::times_ptr, itentry_off)); | |
4867 | |
4868 // for (scan = klass->itable(); scan->interface() != NULL; scan += scan_step) { | |
4869 // if (scan->interface() == intf) { | |
4870 // result = (klass + scan->offset() + itable_index); | |
4871 // } | |
4872 // } | |
4873 Label search, found_method; | |
4874 | |
4875 for (int peel = 1; peel >= 0; peel--) { | |
4876 movptr(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes())); | |
4877 cmpptr(intf_klass, method_result); | |
4878 | |
4879 if (peel) { | |
4880 jccb(Assembler::equal, found_method); | |
4881 } else { | |
4882 jccb(Assembler::notEqual, search); | |
4883 // (invert the test to fall through to found_method...) | |
4884 } | |
4885 | |
4886 if (!peel) break; | |
4887 | |
4888 bind(search); | |
4889 | |
4890 // Check that the previous entry is non-null. A null entry means that | |
4891 // the receiver class doesn't implement the interface, and wasn't the | |
4892 // same as when the caller was compiled. | |
4893 testptr(method_result, method_result); | |
4894 jcc(Assembler::zero, L_no_such_interface); | |
4895 addptr(scan_temp, scan_step); | |
4896 } | |
4897 | |
4898 bind(found_method); | |
4899 | |
4900 // Got a hit. | |
4901 movl(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes())); | |
4902 movptr(method_result, Address(recv_klass, scan_temp, Address::times_1)); | |
4903 } | |
4904 | |
4905 | |
4906 // virtual method calling | |
4907 void MacroAssembler::lookup_virtual_method(Register recv_klass, | |
4908 RegisterOrConstant vtable_index, | |
4909 Register method_result) { | |
4910 const int base = InstanceKlass::vtable_start_offset() * wordSize; | |
4911 assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below"); | |
4912 Address vtable_entry_addr(recv_klass, | |
4913 vtable_index, Address::times_ptr, | |
4914 base + vtableEntry::method_offset_in_bytes()); | |
4915 movptr(method_result, vtable_entry_addr); | |
4916 } | |
4917 | |
4918 | |
4919 void MacroAssembler::check_klass_subtype(Register sub_klass, | |
4920 Register super_klass, | |
4921 Register temp_reg, | |
4922 Label& L_success) { | |
4923 Label L_failure; | |
4924 check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, &L_success, &L_failure, NULL); | |
4925 check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL); | |
4926 bind(L_failure); | |
4927 } | |
4928 | |
4929 | |
4930 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, | |
4931 Register super_klass, | |
4932 Register temp_reg, | |
4933 Label* L_success, | |
4934 Label* L_failure, | |
4935 Label* L_slow_path, | |
4936 RegisterOrConstant super_check_offset) { | |
4937 assert_different_registers(sub_klass, super_klass, temp_reg); | |
4938 bool must_load_sco = (super_check_offset.constant_or_zero() == -1); | |
4939 if (super_check_offset.is_register()) { | |
4940 assert_different_registers(sub_klass, super_klass, | |
4941 super_check_offset.as_register()); | |
4942 } else if (must_load_sco) { | |
4943 assert(temp_reg != noreg, "supply either a temp or a register offset"); | |
4944 } | |
4945 | |
4946 Label L_fallthrough; | |
4947 int label_nulls = 0; | |
4948 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } | |
4949 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } | |
4950 if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } | |
4951 assert(label_nulls <= 1, "at most one NULL in the batch"); | |
4952 | |
4953 int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); | |
4954 int sco_offset = in_bytes(Klass::super_check_offset_offset()); | |
4955 Address super_check_offset_addr(super_klass, sco_offset); | |
4956 | |
4957 // Hacked jcc, which "knows" that L_fallthrough, at least, is in | |
4958 // range of a jccb. If this routine grows larger, reconsider at | |
4959 // least some of these. | |
4960 #define local_jcc(assembler_cond, label) \ | |
4961 if (&(label) == &L_fallthrough) jccb(assembler_cond, label); \ | |
4962 else jcc( assembler_cond, label) /*omit semi*/ | |
4963 | |
4964 // Hacked jmp, which may only be used just before L_fallthrough. | |
4965 #define final_jmp(label) \ | |
4966 if (&(label) == &L_fallthrough) { /*do nothing*/ } \ | |
4967 else jmp(label) /*omit semi*/ | |
4968 | |
4969 // If the pointers are equal, we are done (e.g., String[] elements). | |
4970 // This self-check enables sharing of secondary supertype arrays among | |
4971 // non-primary types such as array-of-interface. Otherwise, each such | |
4972 // type would need its own customized SSA. | |
4973 // We move this check to the front of the fast path because many | |
4974 // type checks are in fact trivially successful in this manner, | |
4975 // so we get a nicely predicted branch right at the start of the check. | |
4976 cmpptr(sub_klass, super_klass); | |
4977 local_jcc(Assembler::equal, *L_success); | |
4978 | |
4979 // Check the supertype display: | |
4980 if (must_load_sco) { | |
4981 // Positive movl does right thing on LP64. | |
4982 movl(temp_reg, super_check_offset_addr); | |
4983 super_check_offset = RegisterOrConstant(temp_reg); | |
4984 } | |
4985 Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0); | |
4986 cmpptr(super_klass, super_check_addr); // load displayed supertype | |
4987 | |
4988 // This check has worked decisively for primary supers. | |
4989 // Secondary supers are sought in the super_cache ('super_cache_addr'). | |
4990 // (Secondary supers are interfaces and very deeply nested subtypes.) | |
4991 // This works in the same check above because of a tricky aliasing | |
4992 // between the super_cache and the primary super display elements. | |
4993 // (The 'super_check_addr' can address either, as the case requires.) | |
4994 // Note that the cache is updated below if it does not help us find | |
4995 // what we need immediately. | |
4996 // So if it was a primary super, we can just fail immediately. | |
4997 // Otherwise, it's the slow path for us (no success at this point). | |
4998 | |
4999 if (super_check_offset.is_register()) { | |
5000 local_jcc(Assembler::equal, *L_success); | |
5001 cmpl(super_check_offset.as_register(), sc_offset); | |
5002 if (L_failure == &L_fallthrough) { | |
5003 local_jcc(Assembler::equal, *L_slow_path); | |
5004 } else { | |
5005 local_jcc(Assembler::notEqual, *L_failure); | |
5006 final_jmp(*L_slow_path); | |
5007 } | |
5008 } else if (super_check_offset.as_constant() == sc_offset) { | |
5009 // Need a slow path; fast failure is impossible. | |
5010 if (L_slow_path == &L_fallthrough) { | |
5011 local_jcc(Assembler::equal, *L_success); | |
5012 } else { | |
5013 local_jcc(Assembler::notEqual, *L_slow_path); | |
5014 final_jmp(*L_success); | |
5015 } | |
5016 } else { | |
5017 // No slow path; it's a fast decision. | |
5018 if (L_failure == &L_fallthrough) { | |
5019 local_jcc(Assembler::equal, *L_success); | |
5020 } else { | |
5021 local_jcc(Assembler::notEqual, *L_failure); | |
5022 final_jmp(*L_success); | |
5023 } | |
5024 } | |
5025 | |
5026 bind(L_fallthrough); | |
5027 | |
5028 #undef local_jcc | |
5029 #undef final_jmp | |
5030 } | |
5031 | |
5032 | |
5033 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, | |
5034 Register super_klass, | |
5035 Register temp_reg, | |
5036 Register temp2_reg, | |
5037 Label* L_success, | |
5038 Label* L_failure, | |
5039 bool set_cond_codes) { | |
5040 assert_different_registers(sub_klass, super_klass, temp_reg); | |
5041 if (temp2_reg != noreg) | |
5042 assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg); | |
5043 #define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg) | |
5044 | |
5045 Label L_fallthrough; | |
5046 int label_nulls = 0; | |
5047 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } | |
5048 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } | |
5049 assert(label_nulls <= 1, "at most one NULL in the batch"); | |
5050 | |
5051 // a couple of useful fields in sub_klass: | |
5052 int ss_offset = in_bytes(Klass::secondary_supers_offset()); | |
5053 int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); | |
5054 Address secondary_supers_addr(sub_klass, ss_offset); | |
5055 Address super_cache_addr( sub_klass, sc_offset); | |
5056 | |
5057 // Do a linear scan of the secondary super-klass chain. | |
5058 // This code is rarely used, so simplicity is a virtue here. | |
5059 // The repne_scan instruction uses fixed registers, which we must spill. | |
5060 // Don't worry too much about pre-existing connections with the input regs. | |
5061 | |
5062 assert(sub_klass != rax, "killed reg"); // killed by mov(rax, super) | |
5063 assert(sub_klass != rcx, "killed reg"); // killed by lea(rcx, &pst_counter) | |
5064 | |
5065 // Get super_klass value into rax (even if it was in rdi or rcx). | |
5066 bool pushed_rax = false, pushed_rcx = false, pushed_rdi = false; | |
5067 if (super_klass != rax || UseCompressedOops) { | |
5068 if (!IS_A_TEMP(rax)) { push(rax); pushed_rax = true; } | |
5069 mov(rax, super_klass); | |
5070 } | |
5071 if (!IS_A_TEMP(rcx)) { push(rcx); pushed_rcx = true; } | |
5072 if (!IS_A_TEMP(rdi)) { push(rdi); pushed_rdi = true; } | |
5073 | |
5074 #ifndef PRODUCT | |
5075 int* pst_counter = &SharedRuntime::_partial_subtype_ctr; | |
5076 ExternalAddress pst_counter_addr((address) pst_counter); | |
5077 NOT_LP64( incrementl(pst_counter_addr) ); | |
5078 LP64_ONLY( lea(rcx, pst_counter_addr) ); | |
5079 LP64_ONLY( incrementl(Address(rcx, 0)) ); | |
5080 #endif //PRODUCT | |
5081 | |
5082 // We will consult the secondary-super array. | |
5083 movptr(rdi, secondary_supers_addr); | |
5084 // Load the array length. (Positive movl does right thing on LP64.) | |
5085 movl(rcx, Address(rdi, Array<Klass*>::length_offset_in_bytes())); | |
5086 // Skip to start of data. | |
5087 addptr(rdi, Array<Klass*>::base_offset_in_bytes()); | |
5088 | |
5089 // Scan RCX words at [RDI] for an occurrence of RAX. | |
5090 // Set NZ/Z based on last compare. | |
5091 // Z flag value will not be set by 'repne' if RCX == 0 since 'repne' does | |
5092 // not change flags (only scas instruction which is repeated sets flags). | |
5093 // Set Z = 0 (not equal) before 'repne' to indicate that class was not found. | |
5094 | |
5095 testptr(rax,rax); // Set Z = 0 | |
5096 repne_scan(); | |
5097 | |
5098 // Unspill the temp. registers: | |
5099 if (pushed_rdi) pop(rdi); | |
5100 if (pushed_rcx) pop(rcx); | |
5101 if (pushed_rax) pop(rax); | |
5102 | |
5103 if (set_cond_codes) { | |
5104 // Special hack for the AD files: rdi is guaranteed non-zero. | |
5105 assert(!pushed_rdi, "rdi must be left non-NULL"); | |
5106 // Also, the condition codes are properly set Z/NZ on succeed/failure. | |
5107 } | |
5108 | |
5109 if (L_failure == &L_fallthrough) | |
5110 jccb(Assembler::notEqual, *L_failure); | |
5111 else jcc(Assembler::notEqual, *L_failure); | |
5112 | |
5113 // Success. Cache the super we found and proceed in triumph. | |
5114 movptr(super_cache_addr, super_klass); | |
5115 | |
5116 if (L_success != &L_fallthrough) { | |
5117 jmp(*L_success); | |
5118 } | |
5119 | |
5120 #undef IS_A_TEMP | |
5121 | |
5122 bind(L_fallthrough); | |
5123 } | |
5124 | |
5125 | |
5126 void MacroAssembler::cmov32(Condition cc, Register dst, Address src) { | |
5127 if (VM_Version::supports_cmov()) { | |
5128 cmovl(cc, dst, src); | |
5129 } else { | |
5130 Label L; | |
5131 jccb(negate_condition(cc), L); | |
5132 movl(dst, src); | |
5133 bind(L); | |
5134 } | |
5135 } | |
5136 | |
5137 void MacroAssembler::cmov32(Condition cc, Register dst, Register src) { | |
5138 if (VM_Version::supports_cmov()) { | |
5139 cmovl(cc, dst, src); | |
5140 } else { | |
5141 Label L; | |
5142 jccb(negate_condition(cc), L); | |
5143 movl(dst, src); | |
5144 bind(L); | |
5145 } | |
5146 } | |
5147 | |
5148 void MacroAssembler::verify_oop(Register reg, const char* s) { | |
5149 if (!VerifyOops) return; | |
5150 | |
5151 // Pass register number to verify_oop_subroutine | |
8767
a5de0cc2f91c
8008555: Debugging code in compiled method sometimes leaks memory
roland
parents:
8042
diff
changeset
|
5152 const char* b = NULL; |
a5de0cc2f91c
8008555: Debugging code in compiled method sometimes leaks memory
roland
parents:
8042
diff
changeset
|
5153 { |
a5de0cc2f91c
8008555: Debugging code in compiled method sometimes leaks memory
roland
parents:
8042
diff
changeset
|
5154 ResourceMark rm; |
a5de0cc2f91c
8008555: Debugging code in compiled method sometimes leaks memory
roland
parents:
8042
diff
changeset
|
5155 stringStream ss; |
a5de0cc2f91c
8008555: Debugging code in compiled method sometimes leaks memory
roland
parents:
8042
diff
changeset
|
5156 ss.print("verify_oop: %s: %s", reg->name(), s); |
a5de0cc2f91c
8008555: Debugging code in compiled method sometimes leaks memory
roland
parents:
8042
diff
changeset
|
5157 b = code_string(ss.as_string()); |
a5de0cc2f91c
8008555: Debugging code in compiled method sometimes leaks memory
roland
parents:
8042
diff
changeset
|
5158 } |
7199 | 5159 BLOCK_COMMENT("verify_oop {"); |
5160 #ifdef _LP64 | |
5161 push(rscratch1); // save r10, trashed by movptr() | |
5162 #endif | |
5163 push(rax); // save rax, | |
5164 push(reg); // pass register argument | |
5165 ExternalAddress buffer((address) b); | |
5166 // avoid using pushptr, as it modifies scratch registers | |
5167 // and our contract is not to modify anything | |
5168 movptr(rax, buffer.addr()); | |
5169 push(rax); | |
5170 // call indirectly to solve generation ordering problem | |
5171 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address())); | |
5172 call(rax); | |
5173 // Caller pops the arguments (oop, message) and restores rax, r10 | |
5174 BLOCK_COMMENT("} verify_oop"); | |
5175 } | |
5176 | |
5177 | |
5178 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, | |
5179 Register tmp, | |
5180 int offset) { | |
5181 intptr_t value = *delayed_value_addr; | |
5182 if (value != 0) | |
5183 return RegisterOrConstant(value + offset); | |
5184 | |
5185 // load indirectly to solve generation ordering problem | |
5186 movptr(tmp, ExternalAddress((address) delayed_value_addr)); | |
5187 | |
5188 #ifdef ASSERT | |
5189 { Label L; | |
5190 testptr(tmp, tmp); | |
5191 if (WizardMode) { | |
8767
a5de0cc2f91c
8008555: Debugging code in compiled method sometimes leaks memory
roland
parents:
8042
diff
changeset
|
5192 const char* buf = NULL; |
a5de0cc2f91c
8008555: Debugging code in compiled method sometimes leaks memory
roland
parents:
8042
diff
changeset
|
5193 { |
a5de0cc2f91c
8008555: Debugging code in compiled method sometimes leaks memory
roland
parents:
8042
diff
changeset
|
5194 ResourceMark rm; |
a5de0cc2f91c
8008555: Debugging code in compiled method sometimes leaks memory
roland
parents:
8042
diff
changeset
|
5195 stringStream ss; |
a5de0cc2f91c
8008555: Debugging code in compiled method sometimes leaks memory
roland
parents:
8042
diff
changeset
|
5196 ss.print("DelayedValue="INTPTR_FORMAT, delayed_value_addr[1]); |
a5de0cc2f91c
8008555: Debugging code in compiled method sometimes leaks memory
roland
parents:
8042
diff
changeset
|
5197 buf = code_string(ss.as_string()); |
a5de0cc2f91c
8008555: Debugging code in compiled method sometimes leaks memory
roland
parents:
8042
diff
changeset
|
5198 } |
7199 | 5199 jcc(Assembler::notZero, L); |
5200 STOP(buf); | |
5201 } else { | |
5202 jccb(Assembler::notZero, L); | |
5203 hlt(); | |
5204 } | |
5205 bind(L); | |
5206 } | |
5207 #endif | |
5208 | |
5209 if (offset != 0) | |
5210 addptr(tmp, offset); | |
5211 | |
5212 return RegisterOrConstant(tmp); | |
5213 } | |
5214 | |
5215 | |
5216 Address MacroAssembler::argument_address(RegisterOrConstant arg_slot, | |
5217 int extra_slot_offset) { | |
5218 // cf. TemplateTable::prepare_invoke(), if (load_receiver). | |
5219 int stackElementSize = Interpreter::stackElementSize; | |
5220 int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0); | |
5221 #ifdef ASSERT | |
5222 int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1); | |
5223 assert(offset1 - offset == stackElementSize, "correct arithmetic"); | |
5224 #endif | |
5225 Register scale_reg = noreg; | |
5226 Address::ScaleFactor scale_factor = Address::no_scale; | |
5227 if (arg_slot.is_constant()) { | |
5228 offset += arg_slot.as_constant() * stackElementSize; | |
5229 } else { | |
5230 scale_reg = arg_slot.as_register(); | |
5231 scale_factor = Address::times(stackElementSize); | |
5232 } | |
5233 offset += wordSize; // return PC is on stack | |
5234 return Address(rsp, scale_reg, scale_factor, offset); | |
5235 } | |
5236 | |
5237 | |
5238 void MacroAssembler::verify_oop_addr(Address addr, const char* s) { | |
5239 if (!VerifyOops) return; | |
5240 | |
5241 // Address adjust(addr.base(), addr.index(), addr.scale(), addr.disp() + BytesPerWord); | |
5242 // Pass register number to verify_oop_subroutine | |
8767
a5de0cc2f91c
8008555: Debugging code in compiled method sometimes leaks memory
roland
parents:
8042
diff
changeset
|
5243 const char* b = NULL; |
a5de0cc2f91c
8008555: Debugging code in compiled method sometimes leaks memory
roland
parents:
8042
diff
changeset
|
5244 { |
a5de0cc2f91c
8008555: Debugging code in compiled method sometimes leaks memory
roland
parents:
8042
diff
changeset
|
5245 ResourceMark rm; |
a5de0cc2f91c
8008555: Debugging code in compiled method sometimes leaks memory
roland
parents:
8042
diff
changeset
|
5246 stringStream ss; |
a5de0cc2f91c
8008555: Debugging code in compiled method sometimes leaks memory
roland
parents:
8042
diff
changeset
|
5247 ss.print("verify_oop_addr: %s", s); |
a5de0cc2f91c
8008555: Debugging code in compiled method sometimes leaks memory
roland
parents:
8042
diff
changeset
|
5248 b = code_string(ss.as_string()); |
a5de0cc2f91c
8008555: Debugging code in compiled method sometimes leaks memory
roland
parents:
8042
diff
changeset
|
5249 } |
7199 | 5250 #ifdef _LP64 |
5251 push(rscratch1); // save r10, trashed by movptr() | |
5252 #endif | |
5253 push(rax); // save rax, | |
5254 // addr may contain rsp so we will have to adjust it based on the push | |
5255 // we just did (and on 64 bit we do two pushes) | |
5256 // NOTE: 64bit seemed to have had a bug in that it did movq(addr, rax); which | |
5257 // stores rax into addr which is backwards of what was intended. | |
5258 if (addr.uses(rsp)) { | |
5259 lea(rax, addr); | |
5260 pushptr(Address(rax, LP64_ONLY(2 *) BytesPerWord)); | |
5261 } else { | |
5262 pushptr(addr); | |
5263 } | |
5264 | |
5265 ExternalAddress buffer((address) b); | |
5266 // pass msg argument | |
5267 // avoid using pushptr, as it modifies scratch registers | |
5268 // and our contract is not to modify anything | |
5269 movptr(rax, buffer.addr()); | |
5270 push(rax); | |
5271 | |
5272 // call indirectly to solve generation ordering problem | |
5273 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address())); | |
5274 call(rax); | |
5275 // Caller pops the arguments (addr, message) and restores rax, r10. | |
5276 } | |
5277 | |
5278 void MacroAssembler::verify_tlab() { | |
5279 #ifdef ASSERT | |
5280 if (UseTLAB && VerifyOops) { | |
5281 Label next, ok; | |
5282 Register t1 = rsi; | |
5283 Register thread_reg = NOT_LP64(rbx) LP64_ONLY(r15_thread); | |
5284 | |
5285 push(t1); | |
5286 NOT_LP64(push(thread_reg)); | |
5287 NOT_LP64(get_thread(thread_reg)); | |
5288 | |
5289 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); | |
5290 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset()))); | |
5291 jcc(Assembler::aboveEqual, next); | |
5292 STOP("assert(top >= start)"); | |
5293 should_not_reach_here(); | |
5294 | |
5295 bind(next); | |
5296 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset()))); | |
5297 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); | |
5298 jcc(Assembler::aboveEqual, ok); | |
5299 STOP("assert(top <= end)"); | |
5300 should_not_reach_here(); | |
5301 | |
5302 bind(ok); | |
5303 NOT_LP64(pop(thread_reg)); | |
5304 pop(t1); | |
5305 } | |
5306 #endif | |
5307 } | |
5308 | |
5309 class ControlWord { | |
5310 public: | |
5311 int32_t _value; | |
5312 | |
5313 int rounding_control() const { return (_value >> 10) & 3 ; } | |
5314 int precision_control() const { return (_value >> 8) & 3 ; } | |
5315 bool precision() const { return ((_value >> 5) & 1) != 0; } | |
5316 bool underflow() const { return ((_value >> 4) & 1) != 0; } | |
5317 bool overflow() const { return ((_value >> 3) & 1) != 0; } | |
5318 bool zero_divide() const { return ((_value >> 2) & 1) != 0; } | |
5319 bool denormalized() const { return ((_value >> 1) & 1) != 0; } | |
5320 bool invalid() const { return ((_value >> 0) & 1) != 0; } | |
5321 | |
5322 void print() const { | |
5323 // rounding control | |
5324 const char* rc; | |
5325 switch (rounding_control()) { | |
5326 case 0: rc = "round near"; break; | |
5327 case 1: rc = "round down"; break; | |
5328 case 2: rc = "round up "; break; | |
5329 case 3: rc = "chop "; break; | |
5330 }; | |
5331 // precision control | |
5332 const char* pc; | |
5333 switch (precision_control()) { | |
5334 case 0: pc = "24 bits "; break; | |
5335 case 1: pc = "reserved"; break; | |
5336 case 2: pc = "53 bits "; break; | |
5337 case 3: pc = "64 bits "; break; | |
5338 }; | |
5339 // flags | |
5340 char f[9]; | |
5341 f[0] = ' '; | |
5342 f[1] = ' '; | |
5343 f[2] = (precision ()) ? 'P' : 'p'; | |
5344 f[3] = (underflow ()) ? 'U' : 'u'; | |
5345 f[4] = (overflow ()) ? 'O' : 'o'; | |
5346 f[5] = (zero_divide ()) ? 'Z' : 'z'; | |
5347 f[6] = (denormalized()) ? 'D' : 'd'; | |
5348 f[7] = (invalid ()) ? 'I' : 'i'; | |
5349 f[8] = '\x0'; | |
5350 // output | |
5351 printf("%04x masks = %s, %s, %s", _value & 0xFFFF, f, rc, pc); | |
5352 } | |
5353 | |
5354 }; | |
5355 | |
5356 class StatusWord { | |
5357 public: | |
5358 int32_t _value; | |
5359 | |
5360 bool busy() const { return ((_value >> 15) & 1) != 0; } | |
5361 bool C3() const { return ((_value >> 14) & 1) != 0; } | |
5362 bool C2() const { return ((_value >> 10) & 1) != 0; } | |
5363 bool C1() const { return ((_value >> 9) & 1) != 0; } | |
5364 bool C0() const { return ((_value >> 8) & 1) != 0; } | |
5365 int top() const { return (_value >> 11) & 7 ; } | |
5366 bool error_status() const { return ((_value >> 7) & 1) != 0; } | |
5367 bool stack_fault() const { return ((_value >> 6) & 1) != 0; } | |
5368 bool precision() const { return ((_value >> 5) & 1) != 0; } | |
5369 bool underflow() const { return ((_value >> 4) & 1) != 0; } | |
5370 bool overflow() const { return ((_value >> 3) & 1) != 0; } | |
5371 bool zero_divide() const { return ((_value >> 2) & 1) != 0; } | |
5372 bool denormalized() const { return ((_value >> 1) & 1) != 0; } | |
5373 bool invalid() const { return ((_value >> 0) & 1) != 0; } | |
5374 | |
5375 void print() const { | |
5376 // condition codes | |
5377 char c[5]; | |
5378 c[0] = (C3()) ? '3' : '-'; | |
5379 c[1] = (C2()) ? '2' : '-'; | |
5380 c[2] = (C1()) ? '1' : '-'; | |
5381 c[3] = (C0()) ? '0' : '-'; | |
5382 c[4] = '\x0'; | |
5383 // flags | |
5384 char f[9]; | |
5385 f[0] = (error_status()) ? 'E' : '-'; | |
5386 f[1] = (stack_fault ()) ? 'S' : '-'; | |
5387 f[2] = (precision ()) ? 'P' : '-'; | |
5388 f[3] = (underflow ()) ? 'U' : '-'; | |
5389 f[4] = (overflow ()) ? 'O' : '-'; | |
5390 f[5] = (zero_divide ()) ? 'Z' : '-'; | |
5391 f[6] = (denormalized()) ? 'D' : '-'; | |
5392 f[7] = (invalid ()) ? 'I' : '-'; | |
5393 f[8] = '\x0'; | |
5394 // output | |
5395 printf("%04x flags = %s, cc = %s, top = %d", _value & 0xFFFF, f, c, top()); | |
5396 } | |
5397 | |
5398 }; | |
5399 | |
5400 class TagWord { | |
5401 public: | |
5402 int32_t _value; | |
5403 | |
5404 int tag_at(int i) const { return (_value >> (i*2)) & 3; } | |
5405 | |
5406 void print() const { | |
5407 printf("%04x", _value & 0xFFFF); | |
5408 } | |
5409 | |
5410 }; | |
5411 | |
5412 class FPU_Register { | |
5413 public: | |
5414 int32_t _m0; | |
5415 int32_t _m1; | |
5416 int16_t _ex; | |
5417 | |
5418 bool is_indefinite() const { | |
5419 return _ex == -1 && _m1 == (int32_t)0xC0000000 && _m0 == 0; | |
5420 } | |
5421 | |
5422 void print() const { | |
5423 char sign = (_ex < 0) ? '-' : '+'; | |
5424 const char* kind = (_ex == 0x7FFF || _ex == (int16_t)-1) ? "NaN" : " "; | |
5425 printf("%c%04hx.%08x%08x %s", sign, _ex, _m1, _m0, kind); | |
5426 }; | |
5427 | |
5428 }; | |
5429 | |
5430 class FPU_State { | |
5431 public: | |
5432 enum { | |
5433 register_size = 10, | |
5434 number_of_registers = 8, | |
5435 register_mask = 7 | |
5436 }; | |
5437 | |
5438 ControlWord _control_word; | |
5439 StatusWord _status_word; | |
5440 TagWord _tag_word; | |
5441 int32_t _error_offset; | |
5442 int32_t _error_selector; | |
5443 int32_t _data_offset; | |
5444 int32_t _data_selector; | |
5445 int8_t _register[register_size * number_of_registers]; | |
5446 | |
5447 int tag_for_st(int i) const { return _tag_word.tag_at((_status_word.top() + i) & register_mask); } | |
5448 FPU_Register* st(int i) const { return (FPU_Register*)&_register[register_size * i]; } | |
5449 | |
5450 const char* tag_as_string(int tag) const { | |
5451 switch (tag) { | |
5452 case 0: return "valid"; | |
5453 case 1: return "zero"; | |
5454 case 2: return "special"; | |
5455 case 3: return "empty"; | |
5456 } | |
5457 ShouldNotReachHere(); | |
5458 return NULL; | |
5459 } | |
5460 | |
5461 void print() const { | |
5462 // print computation registers | |
5463 { int t = _status_word.top(); | |
5464 for (int i = 0; i < number_of_registers; i++) { | |
5465 int j = (i - t) & register_mask; | |
5466 printf("%c r%d = ST%d = ", (j == 0 ? '*' : ' '), i, j); | |
5467 st(j)->print(); | |
5468 printf(" %s\n", tag_as_string(_tag_word.tag_at(i))); | |
5469 } | |
5470 } | |
5471 printf("\n"); | |
5472 // print control registers | |
5473 printf("ctrl = "); _control_word.print(); printf("\n"); | |
5474 printf("stat = "); _status_word .print(); printf("\n"); | |
5475 printf("tags = "); _tag_word .print(); printf("\n"); | |
5476 } | |
5477 | |
5478 }; | |
5479 | |
5480 class Flag_Register { | |
5481 public: | |
5482 int32_t _value; | |
5483 | |
5484 bool overflow() const { return ((_value >> 11) & 1) != 0; } | |
5485 bool direction() const { return ((_value >> 10) & 1) != 0; } | |
5486 bool sign() const { return ((_value >> 7) & 1) != 0; } | |
5487 bool zero() const { return ((_value >> 6) & 1) != 0; } | |
5488 bool auxiliary_carry() const { return ((_value >> 4) & 1) != 0; } | |
5489 bool parity() const { return ((_value >> 2) & 1) != 0; } | |
5490 bool carry() const { return ((_value >> 0) & 1) != 0; } | |
5491 | |
5492 void print() const { | |
5493 // flags | |
5494 char f[8]; | |
5495 f[0] = (overflow ()) ? 'O' : '-'; | |
5496 f[1] = (direction ()) ? 'D' : '-'; | |
5497 f[2] = (sign ()) ? 'S' : '-'; | |
5498 f[3] = (zero ()) ? 'Z' : '-'; | |
5499 f[4] = (auxiliary_carry()) ? 'A' : '-'; | |
5500 f[5] = (parity ()) ? 'P' : '-'; | |
5501 f[6] = (carry ()) ? 'C' : '-'; | |
5502 f[7] = '\x0'; | |
5503 // output | |
5504 printf("%08x flags = %s", _value, f); | |
5505 } | |
5506 | |
5507 }; | |
5508 | |
5509 class IU_Register { | |
5510 public: | |
5511 int32_t _value; | |
5512 | |
5513 void print() const { | |
5514 printf("%08x %11d", _value, _value); | |
5515 } | |
5516 | |
5517 }; | |
5518 | |
5519 class IU_State { | |
5520 public: | |
5521 Flag_Register _eflags; | |
5522 IU_Register _rdi; | |
5523 IU_Register _rsi; | |
5524 IU_Register _rbp; | |
5525 IU_Register _rsp; | |
5526 IU_Register _rbx; | |
5527 IU_Register _rdx; | |
5528 IU_Register _rcx; | |
5529 IU_Register _rax; | |
5530 | |
5531 void print() const { | |
5532 // computation registers | |
5533 printf("rax, = "); _rax.print(); printf("\n"); | |
5534 printf("rbx, = "); _rbx.print(); printf("\n"); | |
5535 printf("rcx = "); _rcx.print(); printf("\n"); | |
5536 printf("rdx = "); _rdx.print(); printf("\n"); | |
5537 printf("rdi = "); _rdi.print(); printf("\n"); | |
5538 printf("rsi = "); _rsi.print(); printf("\n"); | |
5539 printf("rbp, = "); _rbp.print(); printf("\n"); | |
5540 printf("rsp = "); _rsp.print(); printf("\n"); | |
5541 printf("\n"); | |
5542 // control registers | |
5543 printf("flgs = "); _eflags.print(); printf("\n"); | |
5544 } | |
5545 }; | |
5546 | |
5547 | |
5548 class CPU_State { | |
5549 public: | |
5550 FPU_State _fpu_state; | |
5551 IU_State _iu_state; | |
5552 | |
5553 void print() const { | |
5554 printf("--------------------------------------------------\n"); | |
5555 _iu_state .print(); | |
5556 printf("\n"); | |
5557 _fpu_state.print(); | |
5558 printf("--------------------------------------------------\n"); | |
5559 } | |
5560 | |
5561 }; | |
5562 | |
5563 | |
5564 static void _print_CPU_state(CPU_State* state) { | |
5565 state->print(); | |
5566 }; | |
5567 | |
5568 | |
5569 void MacroAssembler::print_CPU_state() { | |
5570 push_CPU_state(); | |
5571 push(rsp); // pass CPU state | |
5572 call(RuntimeAddress(CAST_FROM_FN_PTR(address, _print_CPU_state))); | |
5573 addptr(rsp, wordSize); // discard argument | |
5574 pop_CPU_state(); | |
5575 } | |
5576 | |
5577 | |
5578 static bool _verify_FPU(int stack_depth, char* s, CPU_State* state) { | |
5579 static int counter = 0; | |
5580 FPU_State* fs = &state->_fpu_state; | |
5581 counter++; | |
5582 // For leaf calls, only verify that the top few elements remain empty. | |
5583 // We only need 1 empty at the top for C2 code. | |
5584 if( stack_depth < 0 ) { | |
5585 if( fs->tag_for_st(7) != 3 ) { | |
5586 printf("FPR7 not empty\n"); | |
5587 state->print(); | |
5588 assert(false, "error"); | |
5589 return false; | |
5590 } | |
5591 return true; // All other stack states do not matter | |
5592 } | |
5593 | |
5594 assert((fs->_control_word._value & 0xffff) == StubRoutines::_fpu_cntrl_wrd_std, | |
5595 "bad FPU control word"); | |
5596 | |
5597 // compute stack depth | |
5598 int i = 0; | |
5599 while (i < FPU_State::number_of_registers && fs->tag_for_st(i) < 3) i++; | |
5600 int d = i; | |
5601 while (i < FPU_State::number_of_registers && fs->tag_for_st(i) == 3) i++; | |
5602 // verify findings | |
5603 if (i != FPU_State::number_of_registers) { | |
5604 // stack not contiguous | |
5605 printf("%s: stack not contiguous at ST%d\n", s, i); | |
5606 state->print(); | |
5607 assert(false, "error"); | |
5608 return false; | |
5609 } | |
5610 // check if computed stack depth corresponds to expected stack depth | |
5611 if (stack_depth < 0) { | |
5612 // expected stack depth is -stack_depth or less | |
5613 if (d > -stack_depth) { | |
5614 // too many elements on the stack | |
5615 printf("%s: <= %d stack elements expected but found %d\n", s, -stack_depth, d); | |
5616 state->print(); | |
5617 assert(false, "error"); | |
5618 return false; | |
5619 } | |
5620 } else { | |
5621 // expected stack depth is stack_depth | |
5622 if (d != stack_depth) { | |
5623 // wrong stack depth | |
5624 printf("%s: %d stack elements expected but found %d\n", s, stack_depth, d); | |
5625 state->print(); | |
5626 assert(false, "error"); | |
5627 return false; | |
5628 } | |
5629 } | |
5630 // everything is cool | |
5631 return true; | |
5632 } | |
5633 | |
5634 | |
5635 void MacroAssembler::verify_FPU(int stack_depth, const char* s) { | |
5636 if (!VerifyFPU) return; | |
5637 push_CPU_state(); | |
5638 push(rsp); // pass CPU state | |
5639 ExternalAddress msg((address) s); | |
5640 // pass message string s | |
5641 pushptr(msg.addr()); | |
5642 push(stack_depth); // pass stack depth | |
5643 call(RuntimeAddress(CAST_FROM_FN_PTR(address, _verify_FPU))); | |
5644 addptr(rsp, 3 * wordSize); // discard arguments | |
5645 // check for error | |
5646 { Label L; | |
5647 testl(rax, rax); | |
5648 jcc(Assembler::notZero, L); | |
5649 int3(); // break if error condition | |
5650 bind(L); | |
5651 } | |
5652 pop_CPU_state(); | |
5653 } | |
5654 | |
8873
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
5655 void MacroAssembler::restore_cpu_control_state_after_jni() { |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
5656 // Either restore the MXCSR register after returning from the JNI Call |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
5657 // or verify that it wasn't changed (with -Xcheck:jni flag). |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
5658 if (VM_Version::supports_sse()) { |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
5659 if (RestoreMXCSROnJNICalls) { |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
5660 ldmxcsr(ExternalAddress(StubRoutines::addr_mxcsr_std())); |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
5661 } else if (CheckJNICalls) { |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
5662 call(RuntimeAddress(StubRoutines::x86::verify_mxcsr_entry())); |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
5663 } |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
5664 } |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
5665 if (VM_Version::supports_avx()) { |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
5666 // Clear upper bits of YMM registers to avoid SSE <-> AVX transition penalty. |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
5667 vzeroupper(); |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
5668 } |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
5669 |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
5670 #ifndef _LP64 |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
5671 // Either restore the x87 floating pointer control word after returning |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
5672 // from the JNI call or verify that it wasn't changed. |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
5673 if (CheckJNICalls) { |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
5674 call(RuntimeAddress(StubRoutines::x86::verify_fpu_cntrl_wrd_entry())); |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
5675 } |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
5676 #endif // _LP64 |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
5677 } |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
5678 |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
5679 |
7199 | 5680 void MacroAssembler::load_klass(Register dst, Register src) { |
5681 #ifdef _LP64 | |
12226
7944aba7ba41
8015107: NPG: Use consistent naming for metaspace concepts
ehelin
parents:
12056
diff
changeset
|
5682 if (UseCompressedClassPointers) { |
7199 | 5683 movl(dst, Address(src, oopDesc::klass_offset_in_bytes())); |
5684 decode_klass_not_null(dst); | |
5685 } else | |
5686 #endif | |
5687 movptr(dst, Address(src, oopDesc::klass_offset_in_bytes())); | |
5688 } | |
5689 | |
5690 void MacroAssembler::load_prototype_header(Register dst, Register src) { | |
12056
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5691 load_klass(dst, src); |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5692 movptr(dst, Address(dst, Klass::prototype_header_offset())); |
7199 | 5693 } |
5694 | |
5695 void MacroAssembler::store_klass(Register dst, Register src) { | |
5696 #ifdef _LP64 | |
12226
7944aba7ba41
8015107: NPG: Use consistent naming for metaspace concepts
ehelin
parents:
12056
diff
changeset
|
5697 if (UseCompressedClassPointers) { |
7199 | 5698 encode_klass_not_null(src); |
5699 movl(Address(dst, oopDesc::klass_offset_in_bytes()), src); | |
5700 } else | |
5701 #endif | |
5702 movptr(Address(dst, oopDesc::klass_offset_in_bytes()), src); | |
5703 } | |
5704 | |
5705 void MacroAssembler::load_heap_oop(Register dst, Address src) { | |
5706 #ifdef _LP64 | |
5707 // FIXME: Must change all places where we try to load the klass. | |
5708 if (UseCompressedOops) { | |
5709 movl(dst, src); | |
5710 decode_heap_oop(dst); | |
5711 } else | |
5712 #endif | |
5713 movptr(dst, src); | |
5714 } | |
5715 | |
5716 // Doesn't do verfication, generates fixed size code | |
5717 void MacroAssembler::load_heap_oop_not_null(Register dst, Address src) { | |
5718 #ifdef _LP64 | |
5719 if (UseCompressedOops) { | |
5720 movl(dst, src); | |
5721 decode_heap_oop_not_null(dst); | |
5722 } else | |
5723 #endif | |
5724 movptr(dst, src); | |
5725 } | |
5726 | |
5727 void MacroAssembler::store_heap_oop(Address dst, Register src) { | |
5728 #ifdef _LP64 | |
5729 if (UseCompressedOops) { | |
5730 assert(!dst.uses(src), "not enough registers"); | |
5731 encode_heap_oop(src); | |
5732 movl(dst, src); | |
5733 } else | |
5734 #endif | |
5735 movptr(dst, src); | |
5736 } | |
5737 | |
5738 void MacroAssembler::cmp_heap_oop(Register src1, Address src2, Register tmp) { | |
5739 assert_different_registers(src1, tmp); | |
5740 #ifdef _LP64 | |
5741 if (UseCompressedOops) { | |
5742 bool did_push = false; | |
5743 if (tmp == noreg) { | |
5744 tmp = rax; | |
5745 push(tmp); | |
5746 did_push = true; | |
5747 assert(!src2.uses(rsp), "can't push"); | |
5748 } | |
5749 load_heap_oop(tmp, src2); | |
5750 cmpptr(src1, tmp); | |
5751 if (did_push) pop(tmp); | |
5752 } else | |
5753 #endif | |
5754 cmpptr(src1, src2); | |
5755 } | |
5756 | |
5757 // Used for storing NULLs. | |
5758 void MacroAssembler::store_heap_oop_null(Address dst) { | |
5759 #ifdef _LP64 | |
5760 if (UseCompressedOops) { | |
5761 movl(dst, (int32_t)NULL_WORD); | |
5762 } else { | |
5763 movslq(dst, (int32_t)NULL_WORD); | |
5764 } | |
5765 #else | |
5766 movl(dst, (int32_t)NULL_WORD); | |
5767 #endif | |
5768 } | |
5769 | |
5770 #ifdef _LP64 | |
5771 void MacroAssembler::store_klass_gap(Register dst, Register src) { | |
12226
7944aba7ba41
8015107: NPG: Use consistent naming for metaspace concepts
ehelin
parents:
12056
diff
changeset
|
5772 if (UseCompressedClassPointers) { |
7199 | 5773 // Store to klass gap in destination |
5774 movl(Address(dst, oopDesc::klass_gap_offset_in_bytes()), src); | |
5775 } | |
5776 } | |
5777 | |
5778 #ifdef ASSERT | |
5779 void MacroAssembler::verify_heapbase(const char* msg) { | |
12056
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5780 assert (UseCompressedOops, "should be compressed"); |
7199 | 5781 assert (Universe::heap() != NULL, "java heap should be initialized"); |
5782 if (CheckCompressedOops) { | |
5783 Label ok; | |
5784 push(rscratch1); // cmpptr trashes rscratch1 | |
5785 cmpptr(r12_heapbase, ExternalAddress((address)Universe::narrow_ptrs_base_addr())); | |
5786 jcc(Assembler::equal, ok); | |
5787 STOP(msg); | |
5788 bind(ok); | |
5789 pop(rscratch1); | |
5790 } | |
5791 } | |
5792 #endif | |
5793 | |
5794 // Algorithm must match oop.inline.hpp encode_heap_oop. | |
5795 void MacroAssembler::encode_heap_oop(Register r) { | |
5796 #ifdef ASSERT | |
5797 verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?"); | |
5798 #endif | |
5799 verify_oop(r, "broken oop in encode_heap_oop"); | |
5800 if (Universe::narrow_oop_base() == NULL) { | |
5801 if (Universe::narrow_oop_shift() != 0) { | |
5802 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); | |
5803 shrq(r, LogMinObjAlignmentInBytes); | |
5804 } | |
5805 return; | |
5806 } | |
5807 testq(r, r); | |
5808 cmovq(Assembler::equal, r, r12_heapbase); | |
5809 subq(r, r12_heapbase); | |
5810 shrq(r, LogMinObjAlignmentInBytes); | |
5811 } | |
5812 | |
5813 void MacroAssembler::encode_heap_oop_not_null(Register r) { | |
5814 #ifdef ASSERT | |
5815 verify_heapbase("MacroAssembler::encode_heap_oop_not_null: heap base corrupted?"); | |
5816 if (CheckCompressedOops) { | |
5817 Label ok; | |
5818 testq(r, r); | |
5819 jcc(Assembler::notEqual, ok); | |
5820 STOP("null oop passed to encode_heap_oop_not_null"); | |
5821 bind(ok); | |
5822 } | |
5823 #endif | |
5824 verify_oop(r, "broken oop in encode_heap_oop_not_null"); | |
5825 if (Universe::narrow_oop_base() != NULL) { | |
5826 subq(r, r12_heapbase); | |
5827 } | |
5828 if (Universe::narrow_oop_shift() != 0) { | |
5829 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); | |
5830 shrq(r, LogMinObjAlignmentInBytes); | |
5831 } | |
5832 } | |
5833 | |
5834 void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) { | |
5835 #ifdef ASSERT | |
5836 verify_heapbase("MacroAssembler::encode_heap_oop_not_null2: heap base corrupted?"); | |
5837 if (CheckCompressedOops) { | |
5838 Label ok; | |
5839 testq(src, src); | |
5840 jcc(Assembler::notEqual, ok); | |
5841 STOP("null oop passed to encode_heap_oop_not_null2"); | |
5842 bind(ok); | |
5843 } | |
5844 #endif | |
5845 verify_oop(src, "broken oop in encode_heap_oop_not_null2"); | |
5846 if (dst != src) { | |
5847 movq(dst, src); | |
5848 } | |
5849 if (Universe::narrow_oop_base() != NULL) { | |
5850 subq(dst, r12_heapbase); | |
5851 } | |
5852 if (Universe::narrow_oop_shift() != 0) { | |
5853 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); | |
5854 shrq(dst, LogMinObjAlignmentInBytes); | |
5855 } | |
5856 } | |
5857 | |
5858 void MacroAssembler::decode_heap_oop(Register r) { | |
5859 #ifdef ASSERT | |
5860 verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?"); | |
5861 #endif | |
5862 if (Universe::narrow_oop_base() == NULL) { | |
5863 if (Universe::narrow_oop_shift() != 0) { | |
5864 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); | |
5865 shlq(r, LogMinObjAlignmentInBytes); | |
5866 } | |
5867 } else { | |
5868 Label done; | |
5869 shlq(r, LogMinObjAlignmentInBytes); | |
5870 jccb(Assembler::equal, done); | |
5871 addq(r, r12_heapbase); | |
5872 bind(done); | |
5873 } | |
5874 verify_oop(r, "broken oop in decode_heap_oop"); | |
5875 } | |
5876 | |
5877 void MacroAssembler::decode_heap_oop_not_null(Register r) { | |
5878 // Note: it will change flags | |
5879 assert (UseCompressedOops, "should only be used for compressed headers"); | |
5880 assert (Universe::heap() != NULL, "java heap should be initialized"); | |
5881 // Cannot assert, unverified entry point counts instructions (see .ad file) | |
5882 // vtableStubs also counts instructions in pd_code_size_limit. | |
5883 // Also do not verify_oop as this is called by verify_oop. | |
5884 if (Universe::narrow_oop_shift() != 0) { | |
5885 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); | |
5886 shlq(r, LogMinObjAlignmentInBytes); | |
5887 if (Universe::narrow_oop_base() != NULL) { | |
5888 addq(r, r12_heapbase); | |
5889 } | |
5890 } else { | |
5891 assert (Universe::narrow_oop_base() == NULL, "sanity"); | |
5892 } | |
5893 } | |
5894 | |
5895 void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { | |
5896 // Note: it will change flags | |
5897 assert (UseCompressedOops, "should only be used for compressed headers"); | |
5898 assert (Universe::heap() != NULL, "java heap should be initialized"); | |
5899 // Cannot assert, unverified entry point counts instructions (see .ad file) | |
5900 // vtableStubs also counts instructions in pd_code_size_limit. | |
5901 // Also do not verify_oop as this is called by verify_oop. | |
5902 if (Universe::narrow_oop_shift() != 0) { | |
5903 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); | |
5904 if (LogMinObjAlignmentInBytes == Address::times_8) { | |
5905 leaq(dst, Address(r12_heapbase, src, Address::times_8, 0)); | |
5906 } else { | |
5907 if (dst != src) { | |
5908 movq(dst, src); | |
5909 } | |
5910 shlq(dst, LogMinObjAlignmentInBytes); | |
5911 if (Universe::narrow_oop_base() != NULL) { | |
5912 addq(dst, r12_heapbase); | |
5913 } | |
5914 } | |
5915 } else { | |
5916 assert (Universe::narrow_oop_base() == NULL, "sanity"); | |
5917 if (dst != src) { | |
5918 movq(dst, src); | |
5919 } | |
5920 } | |
5921 } | |
5922 | |
5923 void MacroAssembler::encode_klass_not_null(Register r) { | |
13000
209aa13ab8c0
8024927: Nashorn performance regression with CompressedOops
coleenp
parents:
12835
diff
changeset
|
5924 if (Universe::narrow_klass_base() != NULL) { |
209aa13ab8c0
8024927: Nashorn performance regression with CompressedOops
coleenp
parents:
12835
diff
changeset
|
5925 // Use r12 as a scratch register in which to temporarily load the narrow_klass_base. |
209aa13ab8c0
8024927: Nashorn performance regression with CompressedOops
coleenp
parents:
12835
diff
changeset
|
5926 assert(r != r12_heapbase, "Encoding a klass in r12"); |
209aa13ab8c0
8024927: Nashorn performance regression with CompressedOops
coleenp
parents:
12835
diff
changeset
|
5927 mov64(r12_heapbase, (int64_t)Universe::narrow_klass_base()); |
209aa13ab8c0
8024927: Nashorn performance regression with CompressedOops
coleenp
parents:
12835
diff
changeset
|
5928 subq(r, r12_heapbase); |
209aa13ab8c0
8024927: Nashorn performance regression with CompressedOops
coleenp
parents:
12835
diff
changeset
|
5929 } |
7199 | 5930 if (Universe::narrow_klass_shift() != 0) { |
5931 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); | |
5932 shrq(r, LogKlassAlignmentInBytes); | |
5933 } | |
13000
209aa13ab8c0
8024927: Nashorn performance regression with CompressedOops
coleenp
parents:
12835
diff
changeset
|
5934 if (Universe::narrow_klass_base() != NULL) { |
209aa13ab8c0
8024927: Nashorn performance regression with CompressedOops
coleenp
parents:
12835
diff
changeset
|
5935 reinit_heapbase(); |
209aa13ab8c0
8024927: Nashorn performance regression with CompressedOops
coleenp
parents:
12835
diff
changeset
|
5936 } |
7199 | 5937 } |
5938 | |
5939 void MacroAssembler::encode_klass_not_null(Register dst, Register src) { | |
12056
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5940 if (dst == src) { |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5941 encode_klass_not_null(src); |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5942 } else { |
13000
209aa13ab8c0
8024927: Nashorn performance regression with CompressedOops
coleenp
parents:
12835
diff
changeset
|
5943 if (Universe::narrow_klass_base() != NULL) { |
209aa13ab8c0
8024927: Nashorn performance regression with CompressedOops
coleenp
parents:
12835
diff
changeset
|
5944 mov64(dst, (int64_t)Universe::narrow_klass_base()); |
209aa13ab8c0
8024927: Nashorn performance regression with CompressedOops
coleenp
parents:
12835
diff
changeset
|
5945 negq(dst); |
209aa13ab8c0
8024927: Nashorn performance regression with CompressedOops
coleenp
parents:
12835
diff
changeset
|
5946 addq(dst, src); |
209aa13ab8c0
8024927: Nashorn performance regression with CompressedOops
coleenp
parents:
12835
diff
changeset
|
5947 } else { |
209aa13ab8c0
8024927: Nashorn performance regression with CompressedOops
coleenp
parents:
12835
diff
changeset
|
5948 movptr(dst, src); |
209aa13ab8c0
8024927: Nashorn performance regression with CompressedOops
coleenp
parents:
12835
diff
changeset
|
5949 } |
12056
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5950 if (Universe::narrow_klass_shift() != 0) { |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5951 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5952 shrq(dst, LogKlassAlignmentInBytes); |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5953 } |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5954 } |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5955 } |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5956 |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5957 // Function instr_size_for_decode_klass_not_null() counts the instructions |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5958 // generated by decode_klass_not_null(register r) and reinit_heapbase(), |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5959 // when (Universe::heap() != NULL). Hence, if the instructions they |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5960 // generate change, then this method needs to be updated. |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5961 int MacroAssembler::instr_size_for_decode_klass_not_null() { |
12226
7944aba7ba41
8015107: NPG: Use consistent naming for metaspace concepts
ehelin
parents:
12056
diff
changeset
|
5962 assert (UseCompressedClassPointers, "only for compressed klass ptrs"); |
13000
209aa13ab8c0
8024927: Nashorn performance regression with CompressedOops
coleenp
parents:
12835
diff
changeset
|
5963 if (Universe::narrow_klass_base() != NULL) { |
209aa13ab8c0
8024927: Nashorn performance regression with CompressedOops
coleenp
parents:
12835
diff
changeset
|
5964 // mov64 + addq + shlq? + mov64 (for reinit_heapbase()). |
209aa13ab8c0
8024927: Nashorn performance regression with CompressedOops
coleenp
parents:
12835
diff
changeset
|
5965 return (Universe::narrow_klass_shift() == 0 ? 20 : 24); |
209aa13ab8c0
8024927: Nashorn performance regression with CompressedOops
coleenp
parents:
12835
diff
changeset
|
5966 } else { |
209aa13ab8c0
8024927: Nashorn performance regression with CompressedOops
coleenp
parents:
12835
diff
changeset
|
5967 // longest load decode klass function, mov64, leaq |
209aa13ab8c0
8024927: Nashorn performance regression with CompressedOops
coleenp
parents:
12835
diff
changeset
|
5968 return 16; |
209aa13ab8c0
8024927: Nashorn performance regression with CompressedOops
coleenp
parents:
12835
diff
changeset
|
5969 } |
12056
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5970 } |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5971 |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5972 // !!! If the instructions that get generated here change then function |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5973 // instr_size_for_decode_klass_not_null() needs to get updated. |
7199 | 5974 void MacroAssembler::decode_klass_not_null(Register r) { |
5975 // Note: it will change flags | |
12226
7944aba7ba41
8015107: NPG: Use consistent naming for metaspace concepts
ehelin
parents:
12056
diff
changeset
|
5976 assert (UseCompressedClassPointers, "should only be used for compressed headers"); |
12056
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5977 assert(r != r12_heapbase, "Decoding a klass in r12"); |
7199 | 5978 // Cannot assert, unverified entry point counts instructions (see .ad file) |
5979 // vtableStubs also counts instructions in pd_code_size_limit. | |
5980 // Also do not verify_oop as this is called by verify_oop. | |
5981 if (Universe::narrow_klass_shift() != 0) { | |
5982 assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); | |
5983 shlq(r, LogKlassAlignmentInBytes); | |
12056
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5984 } |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5985 // Use r12 as a scratch register in which to temporarily load the narrow_klass_base. |
13000
209aa13ab8c0
8024927: Nashorn performance regression with CompressedOops
coleenp
parents:
12835
diff
changeset
|
5986 if (Universe::narrow_klass_base() != NULL) { |
209aa13ab8c0
8024927: Nashorn performance regression with CompressedOops
coleenp
parents:
12835
diff
changeset
|
5987 mov64(r12_heapbase, (int64_t)Universe::narrow_klass_base()); |
209aa13ab8c0
8024927: Nashorn performance regression with CompressedOops
coleenp
parents:
12835
diff
changeset
|
5988 addq(r, r12_heapbase); |
209aa13ab8c0
8024927: Nashorn performance regression with CompressedOops
coleenp
parents:
12835
diff
changeset
|
5989 reinit_heapbase(); |
209aa13ab8c0
8024927: Nashorn performance regression with CompressedOops
coleenp
parents:
12835
diff
changeset
|
5990 } |
7199 | 5991 } |
5992 | |
5993 void MacroAssembler::decode_klass_not_null(Register dst, Register src) { | |
5994 // Note: it will change flags | |
12226
7944aba7ba41
8015107: NPG: Use consistent naming for metaspace concepts
ehelin
parents:
12056
diff
changeset
|
5995 assert (UseCompressedClassPointers, "should only be used for compressed headers"); |
12056
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5996 if (dst == src) { |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5997 decode_klass_not_null(dst); |
7199 | 5998 } else { |
12056
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
5999 // Cannot assert, unverified entry point counts instructions (see .ad file) |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
6000 // vtableStubs also counts instructions in pd_code_size_limit. |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
6001 // Also do not verify_oop as this is called by verify_oop. |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
6002 mov64(dst, (int64_t)Universe::narrow_klass_base()); |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
6003 if (Universe::narrow_klass_shift() != 0) { |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
6004 assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
6005 assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?"); |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
6006 leaq(dst, Address(dst, src, Address::times_8, 0)); |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
6007 } else { |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
6008 addq(dst, src); |
7199 | 6009 } |
6010 } | |
6011 } | |
6012 | |
6013 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { | |
6014 assert (UseCompressedOops, "should only be used for compressed headers"); | |
6015 assert (Universe::heap() != NULL, "java heap should be initialized"); | |
6016 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); | |
6017 int oop_index = oop_recorder()->find_index(obj); | |
6018 RelocationHolder rspec = oop_Relocation::spec(oop_index); | |
6019 mov_narrow_oop(dst, oop_index, rspec); | |
6020 } | |
6021 | |
6022 void MacroAssembler::set_narrow_oop(Address dst, jobject obj) { | |
6023 assert (UseCompressedOops, "should only be used for compressed headers"); | |
6024 assert (Universe::heap() != NULL, "java heap should be initialized"); | |
6025 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); | |
6026 int oop_index = oop_recorder()->find_index(obj); | |
6027 RelocationHolder rspec = oop_Relocation::spec(oop_index); | |
6028 mov_narrow_oop(dst, oop_index, rspec); | |
6029 } | |
6030 | |
6031 void MacroAssembler::set_narrow_klass(Register dst, Klass* k) { | |
12226
7944aba7ba41
8015107: NPG: Use consistent naming for metaspace concepts
ehelin
parents:
12056
diff
changeset
|
6032 assert (UseCompressedClassPointers, "should only be used for compressed headers"); |
7199 | 6033 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); |
6034 int klass_index = oop_recorder()->find_index(k); | |
6035 RelocationHolder rspec = metadata_Relocation::spec(klass_index); | |
12056
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
6036 mov_narrow_oop(dst, Klass::encode_klass(k), rspec); |
7199 | 6037 } |
6038 | |
6039 void MacroAssembler::set_narrow_klass(Address dst, Klass* k) { | |
12226
7944aba7ba41
8015107: NPG: Use consistent naming for metaspace concepts
ehelin
parents:
12056
diff
changeset
|
6040 assert (UseCompressedClassPointers, "should only be used for compressed headers"); |
7199 | 6041 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); |
6042 int klass_index = oop_recorder()->find_index(k); | |
6043 RelocationHolder rspec = metadata_Relocation::spec(klass_index); | |
12056
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
6044 mov_narrow_oop(dst, Klass::encode_klass(k), rspec); |
7199 | 6045 } |
6046 | |
6047 void MacroAssembler::cmp_narrow_oop(Register dst, jobject obj) { | |
6048 assert (UseCompressedOops, "should only be used for compressed headers"); | |
6049 assert (Universe::heap() != NULL, "java heap should be initialized"); | |
6050 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); | |
6051 int oop_index = oop_recorder()->find_index(obj); | |
6052 RelocationHolder rspec = oop_Relocation::spec(oop_index); | |
6053 Assembler::cmp_narrow_oop(dst, oop_index, rspec); | |
6054 } | |
6055 | |
6056 void MacroAssembler::cmp_narrow_oop(Address dst, jobject obj) { | |
6057 assert (UseCompressedOops, "should only be used for compressed headers"); | |
6058 assert (Universe::heap() != NULL, "java heap should be initialized"); | |
6059 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); | |
6060 int oop_index = oop_recorder()->find_index(obj); | |
6061 RelocationHolder rspec = oop_Relocation::spec(oop_index); | |
6062 Assembler::cmp_narrow_oop(dst, oop_index, rspec); | |
6063 } | |
6064 | |
6065 void MacroAssembler::cmp_narrow_klass(Register dst, Klass* k) { | |
12226
7944aba7ba41
8015107: NPG: Use consistent naming for metaspace concepts
ehelin
parents:
12056
diff
changeset
|
6066 assert (UseCompressedClassPointers, "should only be used for compressed headers"); |
7199 | 6067 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); |
6068 int klass_index = oop_recorder()->find_index(k); | |
6069 RelocationHolder rspec = metadata_Relocation::spec(klass_index); | |
12056
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
6070 Assembler::cmp_narrow_oop(dst, Klass::encode_klass(k), rspec); |
7199 | 6071 } |
6072 | |
6073 void MacroAssembler::cmp_narrow_klass(Address dst, Klass* k) { | |
12226
7944aba7ba41
8015107: NPG: Use consistent naming for metaspace concepts
ehelin
parents:
12056
diff
changeset
|
6074 assert (UseCompressedClassPointers, "should only be used for compressed headers"); |
7199 | 6075 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); |
6076 int klass_index = oop_recorder()->find_index(k); | |
6077 RelocationHolder rspec = metadata_Relocation::spec(klass_index); | |
12056
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
6078 Assembler::cmp_narrow_oop(dst, Klass::encode_klass(k), rspec); |
7199 | 6079 } |
6080 | |
6081 void MacroAssembler::reinit_heapbase() { | |
12226
7944aba7ba41
8015107: NPG: Use consistent naming for metaspace concepts
ehelin
parents:
12056
diff
changeset
|
6082 if (UseCompressedOops || UseCompressedClassPointers) { |
12056
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
6083 if (Universe::heap() != NULL) { |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
6084 if (Universe::narrow_oop_base() == NULL) { |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
6085 MacroAssembler::xorptr(r12_heapbase, r12_heapbase); |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
6086 } else { |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
6087 mov64(r12_heapbase, (int64_t)Universe::narrow_ptrs_base()); |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
6088 } |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
6089 } else { |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
6090 movptr(r12_heapbase, ExternalAddress((address)Universe::narrow_ptrs_base_addr())); |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
6091 } |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
6092 } |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
6093 } |
740e263c80c6
8003424: Enable Class Data Sharing for CompressedOops
hseigel
parents:
11080
diff
changeset
|
6094 |
7199 | 6095 #endif // _LP64 |
6096 | |
6097 | |
6098 // C2 compiled method's prolog code. | |
17980
0bf37f737702
8032410: compiler/uncommontrap/TestStackBangRbp.java times out on Solaris-Sparc V9
roland
parents:
17937
diff
changeset
|
6099 void MacroAssembler::verified_entry(int framesize, int stack_bang_size, bool fp_mode_24b) { |
7199 | 6100 |
6101 // WARNING: Initial instruction MUST be 5 bytes or longer so that | |
6102 // NativeJump::patch_verified_entry will be able to patch out the entry | |
6103 // code safely. The push to verify stack depth is ok at 5 bytes, | |
6104 // the frame allocation can be either 3 or 6 bytes. So if we don't do | |
6105 // stack bang then we must use the 6 byte frame allocation even if | |
6106 // we have no frame. :-( | |
17980
0bf37f737702
8032410: compiler/uncommontrap/TestStackBangRbp.java times out on Solaris-Sparc V9
roland
parents:
17937
diff
changeset
|
6107 assert(stack_bang_size >= framesize || stack_bang_size <= 0, "stack bang size incorrect"); |
7199 | 6108 |
6109 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); | |
6110 // Remove word for return addr | |
6111 framesize -= wordSize; | |
17980
0bf37f737702
8032410: compiler/uncommontrap/TestStackBangRbp.java times out on Solaris-Sparc V9
roland
parents:
17937
diff
changeset
|
6112 stack_bang_size -= wordSize; |
7199 | 6113 |
6114 // Calls to C2R adapters often do not accept exceptional returns. | |
6115 // We require that their callers must bang for them. But be careful, because | |
6116 // some VM calls (such as call site linkage) can use several kilobytes of | |
6117 // stack. But the stack safety zone should account for that. | |
6118 // See bugs 4446381, 4468289, 4497237. | |
17980
0bf37f737702
8032410: compiler/uncommontrap/TestStackBangRbp.java times out on Solaris-Sparc V9
roland
parents:
17937
diff
changeset
|
6119 if (stack_bang_size > 0) { |
0bf37f737702
8032410: compiler/uncommontrap/TestStackBangRbp.java times out on Solaris-Sparc V9
roland
parents:
17937
diff
changeset
|
6120 generate_stack_overflow_check(stack_bang_size); |
7199 | 6121 |
6122 // We always push rbp, so that on return to interpreter rbp, will be | |
6123 // restored correctly and we can correct the stack. | |
6124 push(rbp); | |
6125 // Remove word for ebp | |
6126 framesize -= wordSize; | |
6127 | |
6128 // Create frame | |
6129 if (framesize) { | |
6130 subptr(rsp, framesize); | |
6131 } | |
6132 } else { | |
6133 // Create frame (force generation of a 4 byte immediate value) | |
6134 subptr_imm32(rsp, framesize); | |
6135 | |
6136 // Save RBP register now. | |
6137 framesize -= wordSize; | |
6138 movptr(Address(rsp, framesize), rbp); | |
6139 } | |
6140 | |
6141 if (VerifyStackAtCalls) { // Majik cookie to verify stack depth | |
6142 framesize -= wordSize; | |
6143 movptr(Address(rsp, framesize), (int32_t)0xbadb100d); | |
6144 } | |
6145 | |
6146 #ifndef _LP64 | |
6147 // If method sets FPU control word do it now | |
6148 if (fp_mode_24b) { | |
6149 fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); | |
6150 } | |
6151 if (UseSSE >= 2 && VerifyFPU) { | |
6152 verify_FPU(0, "FPU stack must be clean on entry"); | |
6153 } | |
6154 #endif | |
6155 | |
6156 #ifdef ASSERT | |
6157 if (VerifyStackAtCalls) { | |
6158 Label L; | |
6159 push(rax); | |
6160 mov(rax, rsp); | |
6161 andptr(rax, StackAlignmentInBytes-1); | |
6162 cmpptr(rax, StackAlignmentInBytes-wordSize); | |
6163 pop(rax); | |
6164 jcc(Assembler::equal, L); | |
6165 STOP("Stack is not properly aligned!"); | |
6166 bind(L); | |
6167 } | |
6168 #endif | |
6169 | |
6170 } | |
6171 | |
7474
00af3a3a8df4
8005522: use fast-string instructions on x86 for zeroing
kvn
parents:
7430
diff
changeset
|
6172 void MacroAssembler::clear_mem(Register base, Register cnt, Register tmp) { |
00af3a3a8df4
8005522: use fast-string instructions on x86 for zeroing
kvn
parents:
7430
diff
changeset
|
6173 // cnt - number of qwords (8-byte words). |
00af3a3a8df4
8005522: use fast-string instructions on x86 for zeroing
kvn
parents:
7430
diff
changeset
|
6174 // base - start address, qword aligned. |
00af3a3a8df4
8005522: use fast-string instructions on x86 for zeroing
kvn
parents:
7430
diff
changeset
|
6175 assert(base==rdi, "base register must be edi for rep stos"); |
00af3a3a8df4
8005522: use fast-string instructions on x86 for zeroing
kvn
parents:
7430
diff
changeset
|
6176 assert(tmp==rax, "tmp register must be eax for rep stos"); |
00af3a3a8df4
8005522: use fast-string instructions on x86 for zeroing
kvn
parents:
7430
diff
changeset
|
6177 assert(cnt==rcx, "cnt register must be ecx for rep stos"); |
00af3a3a8df4
8005522: use fast-string instructions on x86 for zeroing
kvn
parents:
7430
diff
changeset
|
6178 |
00af3a3a8df4
8005522: use fast-string instructions on x86 for zeroing
kvn
parents:
7430
diff
changeset
|
6179 xorptr(tmp, tmp); |
00af3a3a8df4
8005522: use fast-string instructions on x86 for zeroing
kvn
parents:
7430
diff
changeset
|
6180 if (UseFastStosb) { |
00af3a3a8df4
8005522: use fast-string instructions on x86 for zeroing
kvn
parents:
7430
diff
changeset
|
6181 shlptr(cnt,3); // convert to number of bytes |
00af3a3a8df4
8005522: use fast-string instructions on x86 for zeroing
kvn
parents:
7430
diff
changeset
|
6182 rep_stosb(); |
00af3a3a8df4
8005522: use fast-string instructions on x86 for zeroing
kvn
parents:
7430
diff
changeset
|
6183 } else { |
00af3a3a8df4
8005522: use fast-string instructions on x86 for zeroing
kvn
parents:
7430
diff
changeset
|
6184 NOT_LP64(shlptr(cnt,1);) // convert to number of dwords for 32-bit VM |
00af3a3a8df4
8005522: use fast-string instructions on x86 for zeroing
kvn
parents:
7430
diff
changeset
|
6185 rep_stos(); |
00af3a3a8df4
8005522: use fast-string instructions on x86 for zeroing
kvn
parents:
7430
diff
changeset
|
6186 } |
00af3a3a8df4
8005522: use fast-string instructions on x86 for zeroing
kvn
parents:
7430
diff
changeset
|
6187 } |
7199 | 6188 |
6189 // IndexOf for constant substrings with size >= 8 chars | |
6190 // which don't need to be loaded through stack. | |
6191 void MacroAssembler::string_indexofC8(Register str1, Register str2, | |
6192 Register cnt1, Register cnt2, | |
6193 int int_cnt2, Register result, | |
6194 XMMRegister vec, Register tmp) { | |
6195 ShortBranchVerifier sbv(this); | |
6196 assert(UseSSE42Intrinsics, "SSE4.2 is required"); | |
6197 | |
6198 // This method uses pcmpestri inxtruction with bound registers | |
6199 // inputs: | |
6200 // xmm - substring | |
6201 // rax - substring length (elements count) | |
6202 // mem - scanned string | |
6203 // rdx - string length (elements count) | |
6204 // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts) | |
6205 // outputs: | |
6206 // rcx - matched index in string | |
6207 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri"); | |
6208 | |
6209 Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR, | |
6210 RET_FOUND, RET_NOT_FOUND, EXIT, FOUND_SUBSTR, | |
6211 MATCH_SUBSTR_HEAD, RELOAD_STR, FOUND_CANDIDATE; | |
6212 | |
6213 // Note, inline_string_indexOf() generates checks: | |
6214 // if (substr.count > string.count) return -1; | |
6215 // if (substr.count == 0) return 0; | |
6216 assert(int_cnt2 >= 8, "this code isused only for cnt2 >= 8 chars"); | |
6217 | |
6218 // Load substring. | |
6219 movdqu(vec, Address(str2, 0)); | |
6220 movl(cnt2, int_cnt2); | |
6221 movptr(result, str1); // string addr | |
6222 | |
6223 if (int_cnt2 > 8) { | |
6224 jmpb(SCAN_TO_SUBSTR); | |
6225 | |
6226 // Reload substr for rescan, this code | |
6227 // is executed only for large substrings (> 8 chars) | |
6228 bind(RELOAD_SUBSTR); | |
6229 movdqu(vec, Address(str2, 0)); | |
6230 negptr(cnt2); // Jumped here with negative cnt2, convert to positive | |
6231 | |
6232 bind(RELOAD_STR); | |
6233 // We came here after the beginning of the substring was | |
6234 // matched but the rest of it was not so we need to search | |
6235 // again. Start from the next element after the previous match. | |
6236 | |
6237 // cnt2 is number of substring reminding elements and | |
6238 // cnt1 is number of string reminding elements when cmp failed. | |
6239 // Restored cnt1 = cnt1 - cnt2 + int_cnt2 | |
6240 subl(cnt1, cnt2); | |
6241 addl(cnt1, int_cnt2); | |
6242 movl(cnt2, int_cnt2); // Now restore cnt2 | |
6243 | |
6244 decrementl(cnt1); // Shift to next element | |
6245 cmpl(cnt1, cnt2); | |
6246 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring | |
6247 | |
6248 addptr(result, 2); | |
6249 | |
6250 } // (int_cnt2 > 8) | |
6251 | |
6252 // Scan string for start of substr in 16-byte vectors | |
6253 bind(SCAN_TO_SUBSTR); | |
6254 pcmpestri(vec, Address(result, 0), 0x0d); | |
6255 jccb(Assembler::below, FOUND_CANDIDATE); // CF == 1 | |
6256 subl(cnt1, 8); | |
6257 jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string | |
6258 cmpl(cnt1, cnt2); | |
6259 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring | |
6260 addptr(result, 16); | |
6261 jmpb(SCAN_TO_SUBSTR); | |
6262 | |
6263 // Found a potential substr | |
6264 bind(FOUND_CANDIDATE); | |
6265 // Matched whole vector if first element matched (tmp(rcx) == 0). | |
6266 if (int_cnt2 == 8) { | |
6267 jccb(Assembler::overflow, RET_FOUND); // OF == 1 | |
6268 } else { // int_cnt2 > 8 | |
6269 jccb(Assembler::overflow, FOUND_SUBSTR); | |
6270 } | |
6271 // After pcmpestri tmp(rcx) contains matched element index | |
6272 // Compute start addr of substr | |
6273 lea(result, Address(result, tmp, Address::times_2)); | |
6274 | |
6275 // Make sure string is still long enough | |
6276 subl(cnt1, tmp); | |
6277 cmpl(cnt1, cnt2); | |
6278 if (int_cnt2 == 8) { | |
6279 jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR); | |
6280 } else { // int_cnt2 > 8 | |
6281 jccb(Assembler::greaterEqual, MATCH_SUBSTR_HEAD); | |
6282 } | |
6283 // Left less then substring. | |
6284 | |
6285 bind(RET_NOT_FOUND); | |
6286 movl(result, -1); | |
6287 jmpb(EXIT); | |
6288 | |
6289 if (int_cnt2 > 8) { | |
6290 // This code is optimized for the case when whole substring | |
6291 // is matched if its head is matched. | |
6292 bind(MATCH_SUBSTR_HEAD); | |
6293 pcmpestri(vec, Address(result, 0), 0x0d); | |
6294 // Reload only string if does not match | |
6295 jccb(Assembler::noOverflow, RELOAD_STR); // OF == 0 | |
6296 | |
6297 Label CONT_SCAN_SUBSTR; | |
6298 // Compare the rest of substring (> 8 chars). | |
6299 bind(FOUND_SUBSTR); | |
6300 // First 8 chars are already matched. | |
6301 negptr(cnt2); | |
6302 addptr(cnt2, 8); | |
6303 | |
6304 bind(SCAN_SUBSTR); | |
6305 subl(cnt1, 8); | |
6306 cmpl(cnt2, -8); // Do not read beyond substring | |
6307 jccb(Assembler::lessEqual, CONT_SCAN_SUBSTR); | |
6308 // Back-up strings to avoid reading beyond substring: | |
6309 // cnt1 = cnt1 - cnt2 + 8 | |
6310 addl(cnt1, cnt2); // cnt2 is negative | |
6311 addl(cnt1, 8); | |
6312 movl(cnt2, 8); negptr(cnt2); | |
6313 bind(CONT_SCAN_SUBSTR); | |
6314 if (int_cnt2 < (int)G) { | |
6315 movdqu(vec, Address(str2, cnt2, Address::times_2, int_cnt2*2)); | |
6316 pcmpestri(vec, Address(result, cnt2, Address::times_2, int_cnt2*2), 0x0d); | |
6317 } else { | |
6318 // calculate index in register to avoid integer overflow (int_cnt2*2) | |
6319 movl(tmp, int_cnt2); | |
6320 addptr(tmp, cnt2); | |
6321 movdqu(vec, Address(str2, tmp, Address::times_2, 0)); | |
6322 pcmpestri(vec, Address(result, tmp, Address::times_2, 0), 0x0d); | |
6323 } | |
6324 // Need to reload strings pointers if not matched whole vector | |
6325 jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0 | |
6326 addptr(cnt2, 8); | |
6327 jcc(Assembler::negative, SCAN_SUBSTR); | |
6328 // Fall through if found full substring | |
6329 | |
6330 } // (int_cnt2 > 8) | |
6331 | |
6332 bind(RET_FOUND); | |
6333 // Found result if we matched full small substring. | |
6334 // Compute substr offset | |
6335 subptr(result, str1); | |
6336 shrl(result, 1); // index | |
6337 bind(EXIT); | |
6338 | |
6339 } // string_indexofC8 | |
6340 | |
6341 // Small strings are loaded through stack if they cross page boundary. | |
6342 void MacroAssembler::string_indexof(Register str1, Register str2, | |
6343 Register cnt1, Register cnt2, | |
6344 int int_cnt2, Register result, | |
6345 XMMRegister vec, Register tmp) { | |
6346 ShortBranchVerifier sbv(this); | |
6347 assert(UseSSE42Intrinsics, "SSE4.2 is required"); | |
6348 // | |
6349 // int_cnt2 is length of small (< 8 chars) constant substring | |
6350 // or (-1) for non constant substring in which case its length | |
6351 // is in cnt2 register. | |
6352 // | |
6353 // Note, inline_string_indexOf() generates checks: | |
6354 // if (substr.count > string.count) return -1; | |
6355 // if (substr.count == 0) return 0; | |
6356 // | |
6357 assert(int_cnt2 == -1 || (0 < int_cnt2 && int_cnt2 < 8), "should be != 0"); | |
6358 | |
6359 // This method uses pcmpestri inxtruction with bound registers | |
6360 // inputs: | |
6361 // xmm - substring | |
6362 // rax - substring length (elements count) | |
6363 // mem - scanned string | |
6364 // rdx - string length (elements count) | |
6365 // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts) | |
6366 // outputs: | |
6367 // rcx - matched index in string | |
6368 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri"); | |
6369 | |
6370 Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR, ADJUST_STR, | |
6371 RET_FOUND, RET_NOT_FOUND, CLEANUP, FOUND_SUBSTR, | |
6372 FOUND_CANDIDATE; | |
6373 | |
6374 { //======================================================== | |
6375 // We don't know where these strings are located | |
6376 // and we can't read beyond them. Load them through stack. | |
6377 Label BIG_STRINGS, CHECK_STR, COPY_SUBSTR, COPY_STR; | |
6378 | |
6379 movptr(tmp, rsp); // save old SP | |
6380 | |
6381 if (int_cnt2 > 0) { // small (< 8 chars) constant substring | |
6382 if (int_cnt2 == 1) { // One char | |
6383 load_unsigned_short(result, Address(str2, 0)); | |
6384 movdl(vec, result); // move 32 bits | |
6385 } else if (int_cnt2 == 2) { // Two chars | |
6386 movdl(vec, Address(str2, 0)); // move 32 bits | |
6387 } else if (int_cnt2 == 4) { // Four chars | |
6388 movq(vec, Address(str2, 0)); // move 64 bits | |
6389 } else { // cnt2 = { 3, 5, 6, 7 } | |
6390 // Array header size is 12 bytes in 32-bit VM | |
6391 // + 6 bytes for 3 chars == 18 bytes, | |
6392 // enough space to load vec and shift. | |
6393 assert(HeapWordSize*TypeArrayKlass::header_size() >= 12,"sanity"); | |
6394 movdqu(vec, Address(str2, (int_cnt2*2)-16)); | |
6395 psrldq(vec, 16-(int_cnt2*2)); | |
6396 } | |
6397 } else { // not constant substring | |
6398 cmpl(cnt2, 8); | |
6399 jccb(Assembler::aboveEqual, BIG_STRINGS); // Both strings are big enough | |
6400 | |
6401 // We can read beyond string if srt+16 does not cross page boundary | |
6402 // since heaps are aligned and mapped by pages. | |
6403 assert(os::vm_page_size() < (int)G, "default page should be small"); | |
6404 movl(result, str2); // We need only low 32 bits | |
6405 andl(result, (os::vm_page_size()-1)); | |
6406 cmpl(result, (os::vm_page_size()-16)); | |
6407 jccb(Assembler::belowEqual, CHECK_STR); | |
6408 | |
6409 // Move small strings to stack to allow load 16 bytes into vec. | |
6410 subptr(rsp, 16); | |
6411 int stk_offset = wordSize-2; | |
6412 push(cnt2); | |
6413 | |
6414 bind(COPY_SUBSTR); | |
6415 load_unsigned_short(result, Address(str2, cnt2, Address::times_2, -2)); | |
6416 movw(Address(rsp, cnt2, Address::times_2, stk_offset), result); | |
6417 decrement(cnt2); | |
6418 jccb(Assembler::notZero, COPY_SUBSTR); | |
6419 | |
6420 pop(cnt2); | |
6421 movptr(str2, rsp); // New substring address | |
6422 } // non constant | |
6423 | |
6424 bind(CHECK_STR); | |
6425 cmpl(cnt1, 8); | |
6426 jccb(Assembler::aboveEqual, BIG_STRINGS); | |
6427 | |
6428 // Check cross page boundary. | |
6429 movl(result, str1); // We need only low 32 bits | |
6430 andl(result, (os::vm_page_size()-1)); | |
6431 cmpl(result, (os::vm_page_size()-16)); | |
6432 jccb(Assembler::belowEqual, BIG_STRINGS); | |
6433 | |
6434 subptr(rsp, 16); | |
6435 int stk_offset = -2; | |
6436 if (int_cnt2 < 0) { // not constant | |
6437 push(cnt2); | |
6438 stk_offset += wordSize; | |
6439 } | |
6440 movl(cnt2, cnt1); | |
6441 | |
6442 bind(COPY_STR); | |
6443 load_unsigned_short(result, Address(str1, cnt2, Address::times_2, -2)); | |
6444 movw(Address(rsp, cnt2, Address::times_2, stk_offset), result); | |
6445 decrement(cnt2); | |
6446 jccb(Assembler::notZero, COPY_STR); | |
6447 | |
6448 if (int_cnt2 < 0) { // not constant | |
6449 pop(cnt2); | |
6450 } | |
6451 movptr(str1, rsp); // New string address | |
6452 | |
6453 bind(BIG_STRINGS); | |
6454 // Load substring. | |
6455 if (int_cnt2 < 0) { // -1 | |
6456 movdqu(vec, Address(str2, 0)); | |
6457 push(cnt2); // substr count | |
6458 push(str2); // substr addr | |
6459 push(str1); // string addr | |
6460 } else { | |
6461 // Small (< 8 chars) constant substrings are loaded already. | |
6462 movl(cnt2, int_cnt2); | |
6463 } | |
6464 push(tmp); // original SP | |
6465 | |
6466 } // Finished loading | |
6467 | |
6468 //======================================================== | |
6469 // Start search | |
6470 // | |
6471 | |
6472 movptr(result, str1); // string addr | |
6473 | |
6474 if (int_cnt2 < 0) { // Only for non constant substring | |
6475 jmpb(SCAN_TO_SUBSTR); | |
6476 | |
6477 // SP saved at sp+0 | |
6478 // String saved at sp+1*wordSize | |
6479 // Substr saved at sp+2*wordSize | |
6480 // Substr count saved at sp+3*wordSize | |
6481 | |
6482 // Reload substr for rescan, this code | |
6483 // is executed only for large substrings (> 8 chars) | |
6484 bind(RELOAD_SUBSTR); | |
6485 movptr(str2, Address(rsp, 2*wordSize)); | |
6486 movl(cnt2, Address(rsp, 3*wordSize)); | |
6487 movdqu(vec, Address(str2, 0)); | |
6488 // We came here after the beginning of the substring was | |
6489 // matched but the rest of it was not so we need to search | |
6490 // again. Start from the next element after the previous match. | |
6491 subptr(str1, result); // Restore counter | |
6492 shrl(str1, 1); | |
6493 addl(cnt1, str1); | |
6494 decrementl(cnt1); // Shift to next element | |
6495 cmpl(cnt1, cnt2); | |
6496 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring | |
6497 | |
6498 addptr(result, 2); | |
6499 } // non constant | |
6500 | |
6501 // Scan string for start of substr in 16-byte vectors | |
6502 bind(SCAN_TO_SUBSTR); | |
6503 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri"); | |
6504 pcmpestri(vec, Address(result, 0), 0x0d); | |
6505 jccb(Assembler::below, FOUND_CANDIDATE); // CF == 1 | |
6506 subl(cnt1, 8); | |
6507 jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string | |
6508 cmpl(cnt1, cnt2); | |
6509 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring | |
6510 addptr(result, 16); | |
6511 | |
6512 bind(ADJUST_STR); | |
6513 cmpl(cnt1, 8); // Do not read beyond string | |
6514 jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR); | |
6515 // Back-up string to avoid reading beyond string. | |
6516 lea(result, Address(result, cnt1, Address::times_2, -16)); | |
6517 movl(cnt1, 8); | |
6518 jmpb(SCAN_TO_SUBSTR); | |
6519 | |
6520 // Found a potential substr | |
6521 bind(FOUND_CANDIDATE); | |
6522 // After pcmpestri tmp(rcx) contains matched element index | |
6523 | |
6524 // Make sure string is still long enough | |
6525 subl(cnt1, tmp); | |
6526 cmpl(cnt1, cnt2); | |
6527 jccb(Assembler::greaterEqual, FOUND_SUBSTR); | |
6528 // Left less then substring. | |
6529 | |
6530 bind(RET_NOT_FOUND); | |
6531 movl(result, -1); | |
6532 jmpb(CLEANUP); | |
6533 | |
6534 bind(FOUND_SUBSTR); | |
6535 // Compute start addr of substr | |
6536 lea(result, Address(result, tmp, Address::times_2)); | |
6537 | |
6538 if (int_cnt2 > 0) { // Constant substring | |
6539 // Repeat search for small substring (< 8 chars) | |
6540 // from new point without reloading substring. | |
6541 // Have to check that we don't read beyond string. | |
6542 cmpl(tmp, 8-int_cnt2); | |
6543 jccb(Assembler::greater, ADJUST_STR); | |
6544 // Fall through if matched whole substring. | |
6545 } else { // non constant | |
6546 assert(int_cnt2 == -1, "should be != 0"); | |
6547 | |
6548 addl(tmp, cnt2); | |
6549 // Found result if we matched whole substring. | |
6550 cmpl(tmp, 8); | |
6551 jccb(Assembler::lessEqual, RET_FOUND); | |
6552 | |
6553 // Repeat search for small substring (<= 8 chars) | |
6554 // from new point 'str1' without reloading substring. | |
6555 cmpl(cnt2, 8); | |
6556 // Have to check that we don't read beyond string. | |
6557 jccb(Assembler::lessEqual, ADJUST_STR); | |
6558 | |
6559 Label CHECK_NEXT, CONT_SCAN_SUBSTR, RET_FOUND_LONG; | |
6560 // Compare the rest of substring (> 8 chars). | |
6561 movptr(str1, result); | |
6562 | |
6563 cmpl(tmp, cnt2); | |
6564 // First 8 chars are already matched. | |
6565 jccb(Assembler::equal, CHECK_NEXT); | |
6566 | |
6567 bind(SCAN_SUBSTR); | |
6568 pcmpestri(vec, Address(str1, 0), 0x0d); | |
6569 // Need to reload strings pointers if not matched whole vector | |
6570 jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0 | |
6571 | |
6572 bind(CHECK_NEXT); | |
6573 subl(cnt2, 8); | |
6574 jccb(Assembler::lessEqual, RET_FOUND_LONG); // Found full substring | |
6575 addptr(str1, 16); | |
6576 addptr(str2, 16); | |
6577 subl(cnt1, 8); | |
6578 cmpl(cnt2, 8); // Do not read beyond substring | |
6579 jccb(Assembler::greaterEqual, CONT_SCAN_SUBSTR); | |
6580 // Back-up strings to avoid reading beyond substring. | |
6581 lea(str2, Address(str2, cnt2, Address::times_2, -16)); | |
6582 lea(str1, Address(str1, cnt2, Address::times_2, -16)); | |
6583 subl(cnt1, cnt2); | |
6584 movl(cnt2, 8); | |
6585 addl(cnt1, 8); | |
6586 bind(CONT_SCAN_SUBSTR); | |
6587 movdqu(vec, Address(str2, 0)); | |
6588 jmpb(SCAN_SUBSTR); | |
6589 | |
6590 bind(RET_FOUND_LONG); | |
6591 movptr(str1, Address(rsp, wordSize)); | |
6592 } // non constant | |
6593 | |
6594 bind(RET_FOUND); | |
6595 // Compute substr offset | |
6596 subptr(result, str1); | |
6597 shrl(result, 1); // index | |
6598 | |
6599 bind(CLEANUP); | |
6600 pop(rsp); // restore SP | |
6601 | |
6602 } // string_indexof | |
6603 | |
6604 // Compare strings. | |
6605 void MacroAssembler::string_compare(Register str1, Register str2, | |
6606 Register cnt1, Register cnt2, Register result, | |
6607 XMMRegister vec1) { | |
6608 ShortBranchVerifier sbv(this); | |
6609 Label LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, WHILE_HEAD_LABEL; | |
6610 | |
6611 // Compute the minimum of the string lengths and the | |
6612 // difference of the string lengths (stack). | |
6613 // Do the conditional move stuff | |
6614 movl(result, cnt1); | |
6615 subl(cnt1, cnt2); | |
6616 push(cnt1); | |
6617 cmov32(Assembler::lessEqual, cnt2, result); | |
6618 | |
6619 // Is the minimum length zero? | |
6620 testl(cnt2, cnt2); | |
6621 jcc(Assembler::zero, LENGTH_DIFF_LABEL); | |
6622 | |
7477
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6623 // Compare first characters |
7199 | 6624 load_unsigned_short(result, Address(str1, 0)); |
6625 load_unsigned_short(cnt1, Address(str2, 0)); | |
6626 subl(result, cnt1); | |
6627 jcc(Assembler::notZero, POP_LABEL); | |
7477
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6628 cmpl(cnt2, 1); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6629 jcc(Assembler::equal, LENGTH_DIFF_LABEL); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6630 |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6631 // Check if the strings start at the same location. |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6632 cmpptr(str1, str2); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6633 jcc(Assembler::equal, LENGTH_DIFF_LABEL); |
7199 | 6634 |
6635 Address::ScaleFactor scale = Address::times_2; | |
6636 int stride = 8; | |
6637 | |
8042
91a23b11d8dc
8007708: compiler/6855215 assert(VM_Version::supports_sse4_2())
kvn
parents:
8002
diff
changeset
|
6638 if (UseAVX >= 2 && UseSSE42Intrinsics) { |
7477
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6639 Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_WIDE_TAIL, COMPARE_SMALL_STR; |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6640 Label COMPARE_WIDE_VECTORS_LOOP, COMPARE_16_CHARS, COMPARE_INDEX_CHAR; |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6641 Label COMPARE_TAIL_LONG; |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6642 int pcmpmask = 0x19; |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6643 |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6644 // Setup to compare 16-chars (32-bytes) vectors, |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6645 // start from first character again because it has aligned address. |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6646 int stride2 = 16; |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6647 int adr_stride = stride << scale; |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6648 int adr_stride2 = stride2 << scale; |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6649 |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6650 assert(result == rax && cnt2 == rdx && cnt1 == rcx, "pcmpestri"); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6651 // rax and rdx are used by pcmpestri as elements counters |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6652 movl(result, cnt2); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6653 andl(cnt2, ~(stride2-1)); // cnt2 holds the vector count |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6654 jcc(Assembler::zero, COMPARE_TAIL_LONG); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6655 |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6656 // fast path : compare first 2 8-char vectors. |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6657 bind(COMPARE_16_CHARS); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6658 movdqu(vec1, Address(str1, 0)); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6659 pcmpestri(vec1, Address(str2, 0), pcmpmask); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6660 jccb(Assembler::below, COMPARE_INDEX_CHAR); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6661 |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6662 movdqu(vec1, Address(str1, adr_stride)); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6663 pcmpestri(vec1, Address(str2, adr_stride), pcmpmask); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6664 jccb(Assembler::aboveEqual, COMPARE_WIDE_VECTORS); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6665 addl(cnt1, stride); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6666 |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6667 // Compare the characters at index in cnt1 |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6668 bind(COMPARE_INDEX_CHAR); //cnt1 has the offset of the mismatching character |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6669 load_unsigned_short(result, Address(str1, cnt1, scale)); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6670 load_unsigned_short(cnt2, Address(str2, cnt1, scale)); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6671 subl(result, cnt2); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6672 jmp(POP_LABEL); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6673 |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6674 // Setup the registers to start vector comparison loop |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6675 bind(COMPARE_WIDE_VECTORS); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6676 lea(str1, Address(str1, result, scale)); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6677 lea(str2, Address(str2, result, scale)); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6678 subl(result, stride2); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6679 subl(cnt2, stride2); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6680 jccb(Assembler::zero, COMPARE_WIDE_TAIL); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6681 negptr(result); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6682 |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6683 // In a loop, compare 16-chars (32-bytes) at once using (vpxor+vptest) |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6684 bind(COMPARE_WIDE_VECTORS_LOOP); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6685 vmovdqu(vec1, Address(str1, result, scale)); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6686 vpxor(vec1, Address(str2, result, scale)); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6687 vptest(vec1, vec1); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6688 jccb(Assembler::notZero, VECTOR_NOT_EQUAL); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6689 addptr(result, stride2); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6690 subl(cnt2, stride2); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6691 jccb(Assembler::notZero, COMPARE_WIDE_VECTORS_LOOP); |
8873
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
6692 // clean upper bits of YMM registers |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
6693 vzeroupper(); |
7477
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6694 |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6695 // compare wide vectors tail |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6696 bind(COMPARE_WIDE_TAIL); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6697 testptr(result, result); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6698 jccb(Assembler::zero, LENGTH_DIFF_LABEL); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6699 |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6700 movl(result, stride2); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6701 movl(cnt2, result); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6702 negptr(result); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6703 jmpb(COMPARE_WIDE_VECTORS_LOOP); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6704 |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6705 // Identifies the mismatching (higher or lower)16-bytes in the 32-byte vectors. |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6706 bind(VECTOR_NOT_EQUAL); |
8873
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
6707 // clean upper bits of YMM registers |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
6708 vzeroupper(); |
7477
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6709 lea(str1, Address(str1, result, scale)); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6710 lea(str2, Address(str2, result, scale)); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6711 jmp(COMPARE_16_CHARS); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6712 |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6713 // Compare tail chars, length between 1 to 15 chars |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6714 bind(COMPARE_TAIL_LONG); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6715 movl(cnt2, result); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6716 cmpl(cnt2, stride); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6717 jccb(Assembler::less, COMPARE_SMALL_STR); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6718 |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6719 movdqu(vec1, Address(str1, 0)); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6720 pcmpestri(vec1, Address(str2, 0), pcmpmask); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6721 jcc(Assembler::below, COMPARE_INDEX_CHAR); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6722 subptr(cnt2, stride); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6723 jccb(Assembler::zero, LENGTH_DIFF_LABEL); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6724 lea(str1, Address(str1, result, scale)); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6725 lea(str2, Address(str2, result, scale)); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6726 negptr(cnt2); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6727 jmpb(WHILE_HEAD_LABEL); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6728 |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6729 bind(COMPARE_SMALL_STR); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6730 } else if (UseSSE42Intrinsics) { |
7199 | 6731 Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL; |
6732 int pcmpmask = 0x19; | |
7477
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6733 // Setup to compare 8-char (16-byte) vectors, |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6734 // start from first character again because it has aligned address. |
7199 | 6735 movl(result, cnt2); |
6736 andl(cnt2, ~(stride - 1)); // cnt2 holds the vector count | |
6737 jccb(Assembler::zero, COMPARE_TAIL); | |
6738 | |
6739 lea(str1, Address(str1, result, scale)); | |
6740 lea(str2, Address(str2, result, scale)); | |
6741 negptr(result); | |
6742 | |
6743 // pcmpestri | |
6744 // inputs: | |
6745 // vec1- substring | |
6746 // rax - negative string length (elements count) | |
6747 // mem - scaned string | |
6748 // rdx - string length (elements count) | |
6749 // pcmpmask - cmp mode: 11000 (string compare with negated result) | |
6750 // + 00 (unsigned bytes) or + 01 (unsigned shorts) | |
6751 // outputs: | |
6752 // rcx - first mismatched element index | |
6753 assert(result == rax && cnt2 == rdx && cnt1 == rcx, "pcmpestri"); | |
6754 | |
6755 bind(COMPARE_WIDE_VECTORS); | |
6756 movdqu(vec1, Address(str1, result, scale)); | |
6757 pcmpestri(vec1, Address(str2, result, scale), pcmpmask); | |
6758 // After pcmpestri cnt1(rcx) contains mismatched element index | |
6759 | |
6760 jccb(Assembler::below, VECTOR_NOT_EQUAL); // CF==1 | |
6761 addptr(result, stride); | |
6762 subptr(cnt2, stride); | |
6763 jccb(Assembler::notZero, COMPARE_WIDE_VECTORS); | |
6764 | |
6765 // compare wide vectors tail | |
7477
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6766 testptr(result, result); |
7199 | 6767 jccb(Assembler::zero, LENGTH_DIFF_LABEL); |
6768 | |
6769 movl(cnt2, stride); | |
6770 movl(result, stride); | |
6771 negptr(result); | |
6772 movdqu(vec1, Address(str1, result, scale)); | |
6773 pcmpestri(vec1, Address(str2, result, scale), pcmpmask); | |
6774 jccb(Assembler::aboveEqual, LENGTH_DIFF_LABEL); | |
6775 | |
6776 // Mismatched characters in the vectors | |
6777 bind(VECTOR_NOT_EQUAL); | |
7477
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6778 addptr(cnt1, result); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6779 load_unsigned_short(result, Address(str1, cnt1, scale)); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6780 load_unsigned_short(cnt2, Address(str2, cnt1, scale)); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6781 subl(result, cnt2); |
7199 | 6782 jmpb(POP_LABEL); |
6783 | |
6784 bind(COMPARE_TAIL); // limit is zero | |
6785 movl(cnt2, result); | |
6786 // Fallthru to tail compare | |
6787 } | |
6788 // Shift str2 and str1 to the end of the arrays, negate min | |
7477
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6789 lea(str1, Address(str1, cnt2, scale)); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6790 lea(str2, Address(str2, cnt2, scale)); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6791 decrementl(cnt2); // first character was compared already |
7199 | 6792 negptr(cnt2); |
6793 | |
6794 // Compare the rest of the elements | |
6795 bind(WHILE_HEAD_LABEL); | |
6796 load_unsigned_short(result, Address(str1, cnt2, scale, 0)); | |
6797 load_unsigned_short(cnt1, Address(str2, cnt2, scale, 0)); | |
6798 subl(result, cnt1); | |
6799 jccb(Assembler::notZero, POP_LABEL); | |
6800 increment(cnt2); | |
6801 jccb(Assembler::notZero, WHILE_HEAD_LABEL); | |
6802 | |
6803 // Strings are equal up to min length. Return the length difference. | |
6804 bind(LENGTH_DIFF_LABEL); | |
6805 pop(result); | |
6806 jmpb(DONE_LABEL); | |
6807 | |
6808 // Discard the stored length difference | |
6809 bind(POP_LABEL); | |
6810 pop(cnt1); | |
6811 | |
6812 // That's it | |
6813 bind(DONE_LABEL); | |
6814 } | |
6815 | |
6816 // Compare char[] arrays aligned to 4 bytes or substrings. | |
6817 void MacroAssembler::char_arrays_equals(bool is_array_equ, Register ary1, Register ary2, | |
6818 Register limit, Register result, Register chr, | |
6819 XMMRegister vec1, XMMRegister vec2) { | |
6820 ShortBranchVerifier sbv(this); | |
6821 Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR; | |
6822 | |
6823 int length_offset = arrayOopDesc::length_offset_in_bytes(); | |
6824 int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR); | |
6825 | |
6826 // Check the input args | |
6827 cmpptr(ary1, ary2); | |
6828 jcc(Assembler::equal, TRUE_LABEL); | |
6829 | |
6830 if (is_array_equ) { | |
6831 // Need additional checks for arrays_equals. | |
6832 testptr(ary1, ary1); | |
6833 jcc(Assembler::zero, FALSE_LABEL); | |
6834 testptr(ary2, ary2); | |
6835 jcc(Assembler::zero, FALSE_LABEL); | |
6836 | |
6837 // Check the lengths | |
6838 movl(limit, Address(ary1, length_offset)); | |
6839 cmpl(limit, Address(ary2, length_offset)); | |
6840 jcc(Assembler::notEqual, FALSE_LABEL); | |
6841 } | |
6842 | |
6843 // count == 0 | |
6844 testl(limit, limit); | |
6845 jcc(Assembler::zero, TRUE_LABEL); | |
6846 | |
6847 if (is_array_equ) { | |
6848 // Load array address | |
6849 lea(ary1, Address(ary1, base_offset)); | |
6850 lea(ary2, Address(ary2, base_offset)); | |
6851 } | |
6852 | |
6853 shll(limit, 1); // byte count != 0 | |
6854 movl(result, limit); // copy | |
6855 | |
7477
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6856 if (UseAVX >= 2) { |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6857 // With AVX2, use 32-byte vector compare |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6858 Label COMPARE_WIDE_VECTORS, COMPARE_TAIL; |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6859 |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6860 // Compare 32-byte vectors |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6861 andl(result, 0x0000001e); // tail count (in bytes) |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6862 andl(limit, 0xffffffe0); // vector count (in bytes) |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6863 jccb(Assembler::zero, COMPARE_TAIL); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6864 |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6865 lea(ary1, Address(ary1, limit, Address::times_1)); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6866 lea(ary2, Address(ary2, limit, Address::times_1)); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6867 negptr(limit); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6868 |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6869 bind(COMPARE_WIDE_VECTORS); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6870 vmovdqu(vec1, Address(ary1, limit, Address::times_1)); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6871 vmovdqu(vec2, Address(ary2, limit, Address::times_1)); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6872 vpxor(vec1, vec2); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6873 |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6874 vptest(vec1, vec1); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6875 jccb(Assembler::notZero, FALSE_LABEL); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6876 addptr(limit, 32); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6877 jcc(Assembler::notZero, COMPARE_WIDE_VECTORS); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6878 |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6879 testl(result, result); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6880 jccb(Assembler::zero, TRUE_LABEL); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6881 |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6882 vmovdqu(vec1, Address(ary1, result, Address::times_1, -32)); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6883 vmovdqu(vec2, Address(ary2, result, Address::times_1, -32)); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6884 vpxor(vec1, vec2); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6885 |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6886 vptest(vec1, vec1); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6887 jccb(Assembler::notZero, FALSE_LABEL); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6888 jmpb(TRUE_LABEL); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6889 |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6890 bind(COMPARE_TAIL); // limit is zero |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6891 movl(limit, result); |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6892 // Fallthru to tail compare |
038dd2875b94
8005419: Improve intrinsics code performance on x86 by using AVX2
kvn
parents:
7476
diff
changeset
|
6893 } else if (UseSSE42Intrinsics) { |
7199 | 6894 // With SSE4.2, use double quad vector compare |
6895 Label COMPARE_WIDE_VECTORS, COMPARE_TAIL; | |
6896 | |
6897 // Compare 16-byte vectors | |
6898 andl(result, 0x0000000e); // tail count (in bytes) | |
6899 andl(limit, 0xfffffff0); // vector count (in bytes) | |
6900 jccb(Assembler::zero, COMPARE_TAIL); | |
6901 | |
6902 lea(ary1, Address(ary1, limit, Address::times_1)); | |
6903 lea(ary2, Address(ary2, limit, Address::times_1)); | |
6904 negptr(limit); | |
6905 | |
6906 bind(COMPARE_WIDE_VECTORS); | |
6907 movdqu(vec1, Address(ary1, limit, Address::times_1)); | |
6908 movdqu(vec2, Address(ary2, limit, Address::times_1)); | |
6909 pxor(vec1, vec2); | |
6910 | |
6911 ptest(vec1, vec1); | |
6912 jccb(Assembler::notZero, FALSE_LABEL); | |
6913 addptr(limit, 16); | |
6914 jcc(Assembler::notZero, COMPARE_WIDE_VECTORS); | |
6915 | |
6916 testl(result, result); | |
6917 jccb(Assembler::zero, TRUE_LABEL); | |
6918 | |
6919 movdqu(vec1, Address(ary1, result, Address::times_1, -16)); | |
6920 movdqu(vec2, Address(ary2, result, Address::times_1, -16)); | |
6921 pxor(vec1, vec2); | |
6922 | |
6923 ptest(vec1, vec1); | |
6924 jccb(Assembler::notZero, FALSE_LABEL); | |
6925 jmpb(TRUE_LABEL); | |
6926 | |
6927 bind(COMPARE_TAIL); // limit is zero | |
6928 movl(limit, result); | |
6929 // Fallthru to tail compare | |
6930 } | |
6931 | |
6932 // Compare 4-byte vectors | |
6933 andl(limit, 0xfffffffc); // vector count (in bytes) | |
6934 jccb(Assembler::zero, COMPARE_CHAR); | |
6935 | |
6936 lea(ary1, Address(ary1, limit, Address::times_1)); | |
6937 lea(ary2, Address(ary2, limit, Address::times_1)); | |
6938 negptr(limit); | |
6939 | |
6940 bind(COMPARE_VECTORS); | |
6941 movl(chr, Address(ary1, limit, Address::times_1)); | |
6942 cmpl(chr, Address(ary2, limit, Address::times_1)); | |
6943 jccb(Assembler::notEqual, FALSE_LABEL); | |
6944 addptr(limit, 4); | |
6945 jcc(Assembler::notZero, COMPARE_VECTORS); | |
6946 | |
6947 // Compare trailing char (final 2 bytes), if any | |
6948 bind(COMPARE_CHAR); | |
6949 testl(result, 0x2); // tail char | |
6950 jccb(Assembler::zero, TRUE_LABEL); | |
6951 load_unsigned_short(chr, Address(ary1, 0)); | |
6952 load_unsigned_short(limit, Address(ary2, 0)); | |
6953 cmpl(chr, limit); | |
6954 jccb(Assembler::notEqual, FALSE_LABEL); | |
6955 | |
6956 bind(TRUE_LABEL); | |
6957 movl(result, 1); // return true | |
6958 jmpb(DONE); | |
6959 | |
6960 bind(FALSE_LABEL); | |
6961 xorl(result, result); // return false | |
6962 | |
6963 // That's it | |
6964 bind(DONE); | |
8873
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
6965 if (UseAVX >= 2) { |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
6966 // clean upper bits of YMM registers |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
6967 vzeroupper(); |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
6968 } |
7199 | 6969 } |
6970 | |
6971 void MacroAssembler::generate_fill(BasicType t, bool aligned, | |
6972 Register to, Register value, Register count, | |
6973 Register rtmp, XMMRegister xtmp) { | |
6974 ShortBranchVerifier sbv(this); | |
6975 assert_different_registers(to, value, count, rtmp); | |
6976 Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte; | |
6977 Label L_fill_2_bytes, L_fill_4_bytes; | |
6978 | |
6979 int shift = -1; | |
6980 switch (t) { | |
6981 case T_BYTE: | |
6982 shift = 2; | |
6983 break; | |
6984 case T_SHORT: | |
6985 shift = 1; | |
6986 break; | |
6987 case T_INT: | |
6988 shift = 0; | |
6989 break; | |
6990 default: ShouldNotReachHere(); | |
6991 } | |
6992 | |
6993 if (t == T_BYTE) { | |
6994 andl(value, 0xff); | |
6995 movl(rtmp, value); | |
6996 shll(rtmp, 8); | |
6997 orl(value, rtmp); | |
6998 } | |
6999 if (t == T_SHORT) { | |
7000 andl(value, 0xffff); | |
7001 } | |
7002 if (t == T_BYTE || t == T_SHORT) { | |
7003 movl(rtmp, value); | |
7004 shll(rtmp, 16); | |
7005 orl(value, rtmp); | |
7006 } | |
7007 | |
7008 cmpl(count, 2<<shift); // Short arrays (< 8 bytes) fill by element | |
7009 jcc(Assembler::below, L_fill_4_bytes); // use unsigned cmp | |
7010 if (!UseUnalignedLoadStores && !aligned && (t == T_BYTE || t == T_SHORT)) { | |
7011 // align source address at 4 bytes address boundary | |
7012 if (t == T_BYTE) { | |
7013 // One byte misalignment happens only for byte arrays | |
7014 testptr(to, 1); | |
7015 jccb(Assembler::zero, L_skip_align1); | |
7016 movb(Address(to, 0), value); | |
7017 increment(to); | |
7018 decrement(count); | |
7019 BIND(L_skip_align1); | |
7020 } | |
7021 // Two bytes misalignment happens only for byte and short (char) arrays | |
7022 testptr(to, 2); | |
7023 jccb(Assembler::zero, L_skip_align2); | |
7024 movw(Address(to, 0), value); | |
7025 addptr(to, 2); | |
7026 subl(count, 1<<(shift-1)); | |
7027 BIND(L_skip_align2); | |
7028 } | |
7029 if (UseSSE < 2) { | |
7030 Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes; | |
7031 // Fill 32-byte chunks | |
7032 subl(count, 8 << shift); | |
7033 jcc(Assembler::less, L_check_fill_8_bytes); | |
7034 align(16); | |
7035 | |
7036 BIND(L_fill_32_bytes_loop); | |
7037 | |
7038 for (int i = 0; i < 32; i += 4) { | |
7039 movl(Address(to, i), value); | |
7040 } | |
7041 | |
7042 addptr(to, 32); | |
7043 subl(count, 8 << shift); | |
7044 jcc(Assembler::greaterEqual, L_fill_32_bytes_loop); | |
7045 BIND(L_check_fill_8_bytes); | |
7046 addl(count, 8 << shift); | |
7047 jccb(Assembler::zero, L_exit); | |
7048 jmpb(L_fill_8_bytes); | |
7049 | |
7050 // | |
7051 // length is too short, just fill qwords | |
7052 // | |
7053 BIND(L_fill_8_bytes_loop); | |
7054 movl(Address(to, 0), value); | |
7055 movl(Address(to, 4), value); | |
7056 addptr(to, 8); | |
7057 BIND(L_fill_8_bytes); | |
7058 subl(count, 1 << (shift + 1)); | |
7059 jcc(Assembler::greaterEqual, L_fill_8_bytes_loop); | |
7060 // fall through to fill 4 bytes | |
7061 } else { | |
7062 Label L_fill_32_bytes; | |
7063 if (!UseUnalignedLoadStores) { | |
7064 // align to 8 bytes, we know we are 4 byte aligned to start | |
7065 testptr(to, 4); | |
7066 jccb(Assembler::zero, L_fill_32_bytes); | |
7067 movl(Address(to, 0), value); | |
7068 addptr(to, 4); | |
7069 subl(count, 1<<shift); | |
7070 } | |
7071 BIND(L_fill_32_bytes); | |
7072 { | |
7073 assert( UseSSE >= 2, "supported cpu only" ); | |
7074 Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes; | |
7075 movdl(xtmp, value); | |
7475
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
7076 if (UseAVX >= 2 && UseUnalignedLoadStores) { |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
7077 // Fill 64-byte chunks |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
7078 Label L_fill_64_bytes_loop, L_check_fill_32_bytes; |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
7079 vpbroadcastd(xtmp, xtmp); |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
7080 |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
7081 subl(count, 16 << shift); |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
7082 jcc(Assembler::less, L_check_fill_32_bytes); |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
7083 align(16); |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
7084 |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
7085 BIND(L_fill_64_bytes_loop); |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
7086 vmovdqu(Address(to, 0), xtmp); |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
7087 vmovdqu(Address(to, 32), xtmp); |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
7088 addptr(to, 64); |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
7089 subl(count, 16 << shift); |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
7090 jcc(Assembler::greaterEqual, L_fill_64_bytes_loop); |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
7091 |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
7092 BIND(L_check_fill_32_bytes); |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
7093 addl(count, 8 << shift); |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
7094 jccb(Assembler::less, L_check_fill_8_bytes); |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
7095 vmovdqu(Address(to, 0), xtmp); |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
7096 addptr(to, 32); |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
7097 subl(count, 8 << shift); |
8873
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
7098 |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
7099 BIND(L_check_fill_8_bytes); |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
7100 // clean upper bits of YMM registers |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
7101 vzeroupper(); |
7199 | 7102 } else { |
7475
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
7103 // Fill 32-byte chunks |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
7104 pshufd(xtmp, xtmp, 0); |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
7105 |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
7106 subl(count, 8 << shift); |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
7107 jcc(Assembler::less, L_check_fill_8_bytes); |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
7108 align(16); |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
7109 |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
7110 BIND(L_fill_32_bytes_loop); |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
7111 |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
7112 if (UseUnalignedLoadStores) { |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
7113 movdqu(Address(to, 0), xtmp); |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
7114 movdqu(Address(to, 16), xtmp); |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
7115 } else { |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
7116 movq(Address(to, 0), xtmp); |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
7117 movq(Address(to, 8), xtmp); |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
7118 movq(Address(to, 16), xtmp); |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
7119 movq(Address(to, 24), xtmp); |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
7120 } |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
7121 |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
7122 addptr(to, 32); |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
7123 subl(count, 8 << shift); |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7474
diff
changeset
|
7124 jcc(Assembler::greaterEqual, L_fill_32_bytes_loop); |
8873
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
7125 |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
7126 BIND(L_check_fill_8_bytes); |
7199 | 7127 } |
7128 addl(count, 8 << shift); | |
7129 jccb(Assembler::zero, L_exit); | |
7130 jmpb(L_fill_8_bytes); | |
7131 | |
7132 // | |
7133 // length is too short, just fill qwords | |
7134 // | |
7135 BIND(L_fill_8_bytes_loop); | |
7136 movq(Address(to, 0), xtmp); | |
7137 addptr(to, 8); | |
7138 BIND(L_fill_8_bytes); | |
7139 subl(count, 1 << (shift + 1)); | |
7140 jcc(Assembler::greaterEqual, L_fill_8_bytes_loop); | |
7141 } | |
7142 } | |
7143 // fill trailing 4 bytes | |
7144 BIND(L_fill_4_bytes); | |
7145 testl(count, 1<<shift); | |
7146 jccb(Assembler::zero, L_fill_2_bytes); | |
7147 movl(Address(to, 0), value); | |
7148 if (t == T_BYTE || t == T_SHORT) { | |
7149 addptr(to, 4); | |
7150 BIND(L_fill_2_bytes); | |
7151 // fill trailing 2 bytes | |
7152 testl(count, 1<<(shift-1)); | |
7153 jccb(Assembler::zero, L_fill_byte); | |
7154 movw(Address(to, 0), value); | |
7155 if (t == T_BYTE) { | |
7156 addptr(to, 2); | |
7157 BIND(L_fill_byte); | |
7158 // fill trailing byte | |
7159 testl(count, 1); | |
7160 jccb(Assembler::zero, L_exit); | |
7161 movb(Address(to, 0), value); | |
7162 } else { | |
7163 BIND(L_fill_byte); | |
7164 } | |
7165 } else { | |
7166 BIND(L_fill_2_bytes); | |
7167 } | |
7168 BIND(L_exit); | |
7169 } | |
7637
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7170 |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7171 // encode char[] to byte[] in ISO_8859_1 |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7172 void MacroAssembler::encode_iso_array(Register src, Register dst, Register len, |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7173 XMMRegister tmp1Reg, XMMRegister tmp2Reg, |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7174 XMMRegister tmp3Reg, XMMRegister tmp4Reg, |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7175 Register tmp5, Register result) { |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7176 // rsi: src |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7177 // rdi: dst |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7178 // rdx: len |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7179 // rcx: tmp5 |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7180 // rax: result |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7181 ShortBranchVerifier sbv(this); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7182 assert_different_registers(src, dst, len, tmp5, result); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7183 Label L_done, L_copy_1_char, L_copy_1_char_exit; |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7184 |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7185 // set result |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7186 xorl(result, result); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7187 // check for zero length |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7188 testl(len, len); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7189 jcc(Assembler::zero, L_done); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7190 movl(result, len); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7191 |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7192 // Setup pointers |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7193 lea(src, Address(src, len, Address::times_2)); // char[] |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7194 lea(dst, Address(dst, len, Address::times_1)); // byte[] |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7195 negptr(len); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7196 |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7197 if (UseSSE42Intrinsics || UseAVX >= 2) { |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7198 Label L_chars_8_check, L_copy_8_chars, L_copy_8_chars_exit; |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7199 Label L_chars_16_check, L_copy_16_chars, L_copy_16_chars_exit; |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7200 |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7201 if (UseAVX >= 2) { |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7202 Label L_chars_32_check, L_copy_32_chars, L_copy_32_chars_exit; |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7203 movl(tmp5, 0xff00ff00); // create mask to test for Unicode chars in vector |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7204 movdl(tmp1Reg, tmp5); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7205 vpbroadcastd(tmp1Reg, tmp1Reg); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7206 jmpb(L_chars_32_check); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7207 |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7208 bind(L_copy_32_chars); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7209 vmovdqu(tmp3Reg, Address(src, len, Address::times_2, -64)); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7210 vmovdqu(tmp4Reg, Address(src, len, Address::times_2, -32)); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7211 vpor(tmp2Reg, tmp3Reg, tmp4Reg, /* vector256 */ true); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7212 vptest(tmp2Reg, tmp1Reg); // check for Unicode chars in vector |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7213 jccb(Assembler::notZero, L_copy_32_chars_exit); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7214 vpackuswb(tmp3Reg, tmp3Reg, tmp4Reg, /* vector256 */ true); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7215 vpermq(tmp4Reg, tmp3Reg, 0xD8, /* vector256 */ true); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7216 vmovdqu(Address(dst, len, Address::times_1, -32), tmp4Reg); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7217 |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7218 bind(L_chars_32_check); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7219 addptr(len, 32); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7220 jccb(Assembler::lessEqual, L_copy_32_chars); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7221 |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7222 bind(L_copy_32_chars_exit); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7223 subptr(len, 16); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7224 jccb(Assembler::greater, L_copy_16_chars_exit); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7225 |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7226 } else if (UseSSE42Intrinsics) { |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7227 movl(tmp5, 0xff00ff00); // create mask to test for Unicode chars in vector |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7228 movdl(tmp1Reg, tmp5); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7229 pshufd(tmp1Reg, tmp1Reg, 0); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7230 jmpb(L_chars_16_check); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7231 } |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7232 |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7233 bind(L_copy_16_chars); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7234 if (UseAVX >= 2) { |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7235 vmovdqu(tmp2Reg, Address(src, len, Address::times_2, -32)); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7236 vptest(tmp2Reg, tmp1Reg); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7237 jccb(Assembler::notZero, L_copy_16_chars_exit); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7238 vpackuswb(tmp2Reg, tmp2Reg, tmp1Reg, /* vector256 */ true); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7239 vpermq(tmp3Reg, tmp2Reg, 0xD8, /* vector256 */ true); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7240 } else { |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7241 if (UseAVX > 0) { |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7242 movdqu(tmp3Reg, Address(src, len, Address::times_2, -32)); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7243 movdqu(tmp4Reg, Address(src, len, Address::times_2, -16)); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7244 vpor(tmp2Reg, tmp3Reg, tmp4Reg, /* vector256 */ false); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7245 } else { |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7246 movdqu(tmp3Reg, Address(src, len, Address::times_2, -32)); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7247 por(tmp2Reg, tmp3Reg); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7248 movdqu(tmp4Reg, Address(src, len, Address::times_2, -16)); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7249 por(tmp2Reg, tmp4Reg); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7250 } |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7251 ptest(tmp2Reg, tmp1Reg); // check for Unicode chars in vector |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7252 jccb(Assembler::notZero, L_copy_16_chars_exit); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7253 packuswb(tmp3Reg, tmp4Reg); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7254 } |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7255 movdqu(Address(dst, len, Address::times_1, -16), tmp3Reg); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7256 |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7257 bind(L_chars_16_check); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7258 addptr(len, 16); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7259 jccb(Assembler::lessEqual, L_copy_16_chars); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7260 |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7261 bind(L_copy_16_chars_exit); |
8873
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
7262 if (UseAVX >= 2) { |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
7263 // clean upper bits of YMM registers |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
7264 vzeroupper(); |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
8767
diff
changeset
|
7265 } |
7637
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7266 subptr(len, 8); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7267 jccb(Assembler::greater, L_copy_8_chars_exit); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7268 |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7269 bind(L_copy_8_chars); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7270 movdqu(tmp3Reg, Address(src, len, Address::times_2, -16)); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7271 ptest(tmp3Reg, tmp1Reg); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7272 jccb(Assembler::notZero, L_copy_8_chars_exit); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7273 packuswb(tmp3Reg, tmp1Reg); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7274 movq(Address(dst, len, Address::times_1, -8), tmp3Reg); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7275 addptr(len, 8); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7276 jccb(Assembler::lessEqual, L_copy_8_chars); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7277 |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7278 bind(L_copy_8_chars_exit); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7279 subptr(len, 8); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7280 jccb(Assembler::zero, L_done); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7281 } |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7282 |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7283 bind(L_copy_1_char); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7284 load_unsigned_short(tmp5, Address(src, len, Address::times_2, 0)); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7285 testl(tmp5, 0xff00); // check if Unicode char |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7286 jccb(Assembler::notZero, L_copy_1_char_exit); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7287 movb(Address(dst, len, Address::times_1, 0), tmp5); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7288 addptr(len, 1); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7289 jccb(Assembler::less, L_copy_1_char); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7290 |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7291 bind(L_copy_1_char_exit); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7292 addptr(result, len); // len is negative count of not processed elements |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7293 bind(L_done); |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7294 } |
b30b3c2a0cf2
6896617: Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() on x86
kvn
parents:
7477
diff
changeset
|
7295 |
11080
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7296 /** |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7297 * Emits code to update CRC-32 with a byte value according to constants in table |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7298 * |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7299 * @param [in,out]crc Register containing the crc. |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7300 * @param [in]val Register containing the byte to fold into the CRC. |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7301 * @param [in]table Register containing the table of crc constants. |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7302 * |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7303 * uint32_t crc; |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7304 * val = crc_table[(val ^ crc) & 0xFF]; |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7305 * crc = val ^ (crc >> 8); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7306 * |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7307 */ |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7308 void MacroAssembler::update_byte_crc32(Register crc, Register val, Register table) { |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7309 xorl(val, crc); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7310 andl(val, 0xFF); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7311 shrl(crc, 8); // unsigned shift |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7312 xorl(crc, Address(table, val, Address::times_4, 0)); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7313 } |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7314 |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7315 /** |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7316 * Fold 128-bit data chunk |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7317 */ |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7318 void MacroAssembler::fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, Register buf, int offset) { |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7319 vpclmulhdq(xtmp, xK, xcrc); // [123:64] |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7320 vpclmulldq(xcrc, xK, xcrc); // [63:0] |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7321 vpxor(xcrc, xcrc, Address(buf, offset), false /* vector256 */); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7322 pxor(xcrc, xtmp); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7323 } |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7324 |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7325 void MacroAssembler::fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, XMMRegister xbuf) { |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7326 vpclmulhdq(xtmp, xK, xcrc); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7327 vpclmulldq(xcrc, xK, xcrc); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7328 pxor(xcrc, xbuf); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7329 pxor(xcrc, xtmp); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7330 } |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7331 |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7332 /** |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7333 * 8-bit folds to compute 32-bit CRC |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7334 * |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7335 * uint64_t xcrc; |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7336 * timesXtoThe32[xcrc & 0xFF] ^ (xcrc >> 8); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7337 */ |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7338 void MacroAssembler::fold_8bit_crc32(XMMRegister xcrc, Register table, XMMRegister xtmp, Register tmp) { |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7339 movdl(tmp, xcrc); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7340 andl(tmp, 0xFF); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7341 movdl(xtmp, Address(table, tmp, Address::times_4, 0)); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7342 psrldq(xcrc, 1); // unsigned shift one byte |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7343 pxor(xcrc, xtmp); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7344 } |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7345 |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7346 /** |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7347 * uint32_t crc; |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7348 * timesXtoThe32[crc & 0xFF] ^ (crc >> 8); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7349 */ |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7350 void MacroAssembler::fold_8bit_crc32(Register crc, Register table, Register tmp) { |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7351 movl(tmp, crc); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7352 andl(tmp, 0xFF); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7353 shrl(crc, 8); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7354 xorl(crc, Address(table, tmp, Address::times_4, 0)); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7355 } |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7356 |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7357 /** |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7358 * @param crc register containing existing CRC (32-bit) |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7359 * @param buf register pointing to input byte buffer (byte*) |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7360 * @param len register containing number of bytes |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7361 * @param table register that will contain address of CRC table |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7362 * @param tmp scratch register |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7363 */ |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7364 void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len, Register table, Register tmp) { |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7365 assert_different_registers(crc, buf, len, table, tmp, rax); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7366 |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7367 Label L_tail, L_tail_restore, L_tail_loop, L_exit, L_align_loop, L_aligned; |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7368 Label L_fold_tail, L_fold_128b, L_fold_512b, L_fold_512b_loop, L_fold_tail_loop; |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7369 |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7370 lea(table, ExternalAddress(StubRoutines::crc_table_addr())); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7371 notl(crc); // ~crc |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7372 cmpl(len, 16); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7373 jcc(Assembler::less, L_tail); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7374 |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7375 // Align buffer to 16 bytes |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7376 movl(tmp, buf); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7377 andl(tmp, 0xF); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7378 jccb(Assembler::zero, L_aligned); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7379 subl(tmp, 16); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7380 addl(len, tmp); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7381 |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7382 align(4); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7383 BIND(L_align_loop); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7384 movsbl(rax, Address(buf, 0)); // load byte with sign extension |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7385 update_byte_crc32(crc, rax, table); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7386 increment(buf); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7387 incrementl(tmp); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7388 jccb(Assembler::less, L_align_loop); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7389 |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7390 BIND(L_aligned); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7391 movl(tmp, len); // save |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7392 shrl(len, 4); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7393 jcc(Assembler::zero, L_tail_restore); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7394 |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7395 // Fold crc into first bytes of vector |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7396 movdqa(xmm1, Address(buf, 0)); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7397 movdl(rax, xmm1); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7398 xorl(crc, rax); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7399 pinsrd(xmm1, crc, 0); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7400 addptr(buf, 16); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7401 subl(len, 4); // len > 0 |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7402 jcc(Assembler::less, L_fold_tail); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7403 |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7404 movdqa(xmm2, Address(buf, 0)); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7405 movdqa(xmm3, Address(buf, 16)); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7406 movdqa(xmm4, Address(buf, 32)); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7407 addptr(buf, 48); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7408 subl(len, 3); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7409 jcc(Assembler::lessEqual, L_fold_512b); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7410 |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7411 // Fold total 512 bits of polynomial on each iteration, |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7412 // 128 bits per each of 4 parallel streams. |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7413 movdqu(xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_addr() + 32)); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7414 |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7415 align(32); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7416 BIND(L_fold_512b_loop); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7417 fold_128bit_crc32(xmm1, xmm0, xmm5, buf, 0); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7418 fold_128bit_crc32(xmm2, xmm0, xmm5, buf, 16); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7419 fold_128bit_crc32(xmm3, xmm0, xmm5, buf, 32); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7420 fold_128bit_crc32(xmm4, xmm0, xmm5, buf, 48); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7421 addptr(buf, 64); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7422 subl(len, 4); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7423 jcc(Assembler::greater, L_fold_512b_loop); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7424 |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7425 // Fold 512 bits to 128 bits. |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7426 BIND(L_fold_512b); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7427 movdqu(xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_addr() + 16)); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7428 fold_128bit_crc32(xmm1, xmm0, xmm5, xmm2); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7429 fold_128bit_crc32(xmm1, xmm0, xmm5, xmm3); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7430 fold_128bit_crc32(xmm1, xmm0, xmm5, xmm4); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7431 |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7432 // Fold the rest of 128 bits data chunks |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7433 BIND(L_fold_tail); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7434 addl(len, 3); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7435 jccb(Assembler::lessEqual, L_fold_128b); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7436 movdqu(xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_addr() + 16)); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7437 |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7438 BIND(L_fold_tail_loop); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7439 fold_128bit_crc32(xmm1, xmm0, xmm5, buf, 0); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7440 addptr(buf, 16); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7441 decrementl(len); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7442 jccb(Assembler::greater, L_fold_tail_loop); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7443 |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7444 // Fold 128 bits in xmm1 down into 32 bits in crc register. |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7445 BIND(L_fold_128b); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7446 movdqu(xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_addr())); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7447 vpclmulqdq(xmm2, xmm0, xmm1, 0x1); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7448 vpand(xmm3, xmm0, xmm2, false /* vector256 */); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7449 vpclmulqdq(xmm0, xmm0, xmm3, 0x1); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7450 psrldq(xmm1, 8); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7451 psrldq(xmm2, 4); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7452 pxor(xmm0, xmm1); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7453 pxor(xmm0, xmm2); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7454 |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7455 // 8 8-bit folds to compute 32-bit CRC. |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7456 for (int j = 0; j < 4; j++) { |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7457 fold_8bit_crc32(xmm0, table, xmm1, rax); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7458 } |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7459 movdl(crc, xmm0); // mov 32 bits to general register |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7460 for (int j = 0; j < 4; j++) { |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7461 fold_8bit_crc32(crc, table, rax); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7462 } |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7463 |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7464 BIND(L_tail_restore); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7465 movl(len, tmp); // restore |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7466 BIND(L_tail); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7467 andl(len, 0xf); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7468 jccb(Assembler::zero, L_exit); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7469 |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7470 // Fold the rest of bytes |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7471 align(4); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7472 BIND(L_tail_loop); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7473 movsbl(rax, Address(buf, 0)); // load byte with sign extension |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7474 update_byte_crc32(crc, rax, table); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7475 increment(buf); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7476 decrementl(len); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7477 jccb(Assembler::greater, L_tail_loop); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7478 |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7479 BIND(L_exit); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7480 notl(crc); // ~c |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7481 } |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
8873
diff
changeset
|
7482 |
7199 | 7483 #undef BIND |
7484 #undef BLOCK_COMMENT | |
7485 | |
7486 | |
7487 Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) { | |
7488 switch (cond) { | |
7489 // Note some conditions are synonyms for others | |
7490 case Assembler::zero: return Assembler::notZero; | |
7491 case Assembler::notZero: return Assembler::zero; | |
7492 case Assembler::less: return Assembler::greaterEqual; | |
7493 case Assembler::lessEqual: return Assembler::greater; | |
7494 case Assembler::greater: return Assembler::lessEqual; | |
7495 case Assembler::greaterEqual: return Assembler::less; | |
7496 case Assembler::below: return Assembler::aboveEqual; | |
7497 case Assembler::belowEqual: return Assembler::above; | |
7498 case Assembler::above: return Assembler::belowEqual; | |
7499 case Assembler::aboveEqual: return Assembler::below; | |
7500 case Assembler::overflow: return Assembler::noOverflow; | |
7501 case Assembler::noOverflow: return Assembler::overflow; | |
7502 case Assembler::negative: return Assembler::positive; | |
7503 case Assembler::positive: return Assembler::negative; | |
7504 case Assembler::parity: return Assembler::noParity; | |
7505 case Assembler::noParity: return Assembler::parity; | |
7506 } | |
7507 ShouldNotReachHere(); return Assembler::overflow; | |
7508 } | |
7509 | |
7510 SkipIfEqual::SkipIfEqual( | |
7511 MacroAssembler* masm, const bool* flag_addr, bool value) { | |
7512 _masm = masm; | |
7513 _masm->cmp8(ExternalAddress((address)flag_addr), value); | |
7514 _masm->jcc(Assembler::equal, _label); | |
7515 } | |
7516 | |
7517 SkipIfEqual::~SkipIfEqual() { | |
7518 _masm->bind(_label); | |
7519 } |