Mercurial > hg > graal-compiler
comparison src/cpu/ppc/vm/macroAssembler_ppc.cpp @ 14408:ec28f9c041ff
8019972: PPC64 (part 9): platform files for interpreter only VM.
Summary: With this change the HotSpot core build works on Linux/PPC64. The VM succesfully executes simple test programs.
Reviewed-by: kvn
author | goetz |
---|---|
date | Fri, 02 Aug 2013 16:46:45 +0200 |
parents | |
children | 7687c56b6693 |
comparison
equal
deleted
inserted
replaced
14407:94c202aa2646 | 14408:ec28f9c041ff |
---|---|
1 /* | |
2 * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. | |
3 * Copyright 2012, 2013 SAP AG. All rights reserved. | |
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | |
5 * | |
6 * This code is free software; you can redistribute it and/or modify it | |
7 * under the terms of the GNU General Public License version 2 only, as | |
8 * published by the Free Software Foundation. | |
9 * | |
10 * This code is distributed in the hope that it will be useful, but WITHOUT | |
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
13 * version 2 for more details (a copy is included in the LICENSE file that | |
14 * accompanied this code). | |
15 * | |
16 * You should have received a copy of the GNU General Public License version | |
17 * 2 along with this work; if not, write to the Free Software Foundation, | |
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | |
19 * | |
20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA | |
21 * or visit www.oracle.com if you need additional information or have any | |
22 * questions. | |
23 * | |
24 */ | |
25 | |
26 #include "precompiled.hpp" | |
27 #include "asm/assembler.hpp" | |
28 #include "asm/assembler.inline.hpp" | |
29 #include "asm/macroAssembler.inline.hpp" | |
30 #include "compiler/disassembler.hpp" | |
31 #include "gc_interface/collectedHeap.inline.hpp" | |
32 #include "interpreter/interpreter.hpp" | |
33 #include "memory/cardTableModRefBS.hpp" | |
34 #include "memory/resourceArea.hpp" | |
35 #include "prims/methodHandles.hpp" | |
36 #include "runtime/biasedLocking.hpp" | |
37 #include "runtime/interfaceSupport.hpp" | |
38 #include "runtime/objectMonitor.hpp" | |
39 #include "runtime/os.hpp" | |
40 #include "runtime/sharedRuntime.hpp" | |
41 #include "runtime/stubRoutines.hpp" | |
42 #include "utilities/macros.hpp" | |
43 #if INCLUDE_ALL_GCS | |
44 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" | |
45 #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp" | |
46 #include "gc_implementation/g1/heapRegion.hpp" | |
47 #endif // INCLUDE_ALL_GCS | |
48 | |
49 #ifdef PRODUCT | |
50 #define BLOCK_COMMENT(str) // nothing | |
51 #else | |
52 #define BLOCK_COMMENT(str) block_comment(str) | |
53 #endif | |
54 | |
55 #ifdef ASSERT | |
56 // On RISC, there's no benefit to verifying instruction boundaries. | |
57 bool AbstractAssembler::pd_check_instruction_mark() { return false; } | |
58 #endif | |
59 | |
60 void MacroAssembler::ld_largeoffset_unchecked(Register d, int si31, Register a, int emit_filler_nop) { | |
61 assert(Assembler::is_simm(si31, 31) && si31 >= 0, "si31 out of range"); | |
62 if (Assembler::is_simm(si31, 16)) { | |
63 ld(d, si31, a); | |
64 if (emit_filler_nop) nop(); | |
65 } else { | |
66 const int hi = MacroAssembler::largeoffset_si16_si16_hi(si31); | |
67 const int lo = MacroAssembler::largeoffset_si16_si16_lo(si31); | |
68 addis(d, a, hi); | |
69 ld(d, lo, d); | |
70 } | |
71 } | |
72 | |
73 void MacroAssembler::ld_largeoffset(Register d, int si31, Register a, int emit_filler_nop) { | |
74 assert_different_registers(d, a); | |
75 ld_largeoffset_unchecked(d, si31, a, emit_filler_nop); | |
76 } | |
77 | |
78 void MacroAssembler::load_sized_value(Register dst, RegisterOrConstant offs, Register base, | |
79 size_t size_in_bytes, bool is_signed) { | |
80 switch (size_in_bytes) { | |
81 case 8: ld(dst, offs, base); break; | |
82 case 4: is_signed ? lwa(dst, offs, base) : lwz(dst, offs, base); break; | |
83 case 2: is_signed ? lha(dst, offs, base) : lhz(dst, offs, base); break; | |
84 case 1: lbz(dst, offs, base); if (is_signed) extsb(dst, dst); break; // lba doesn't exist :( | |
85 default: ShouldNotReachHere(); | |
86 } | |
87 } | |
88 | |
89 void MacroAssembler::store_sized_value(Register dst, RegisterOrConstant offs, Register base, | |
90 size_t size_in_bytes) { | |
91 switch (size_in_bytes) { | |
92 case 8: std(dst, offs, base); break; | |
93 case 4: stw(dst, offs, base); break; | |
94 case 2: sth(dst, offs, base); break; | |
95 case 1: stb(dst, offs, base); break; | |
96 default: ShouldNotReachHere(); | |
97 } | |
98 } | |
99 | |
100 void MacroAssembler::align(int modulus) { | |
101 while (offset() % modulus != 0) nop(); | |
102 } | |
103 | |
104 // Issue instructions that calculate given TOC from global TOC. | |
105 void MacroAssembler::calculate_address_from_global_toc(Register dst, address addr, bool hi16, bool lo16, | |
106 bool add_relocation, bool emit_dummy_addr) { | |
107 int offset = -1; | |
108 if (emit_dummy_addr) { | |
109 offset = -128; // dummy address | |
110 } else if (addr != (address)(intptr_t)-1) { | |
111 offset = MacroAssembler::offset_to_global_toc(addr); | |
112 } | |
113 | |
114 if (hi16) { | |
115 addis(dst, R29, MacroAssembler::largeoffset_si16_si16_hi(offset)); | |
116 } | |
117 if (lo16) { | |
118 if (add_relocation) { | |
119 // Relocate at the addi to avoid confusion with a load from the method's TOC. | |
120 relocate(internal_word_Relocation::spec(addr)); | |
121 } | |
122 addi(dst, dst, MacroAssembler::largeoffset_si16_si16_lo(offset)); | |
123 } | |
124 } | |
125 | |
126 int MacroAssembler::patch_calculate_address_from_global_toc_at(address a, address bound, address addr) { | |
127 const int offset = MacroAssembler::offset_to_global_toc(addr); | |
128 | |
129 const address inst2_addr = a; | |
130 const int inst2 = *(int *)inst2_addr; | |
131 | |
132 // The relocation points to the second instruction, the addi, | |
133 // and the addi reads and writes the same register dst. | |
134 const int dst = inv_rt_field(inst2); | |
135 assert(is_addi(inst2) && inv_ra_field(inst2) == dst, "must be addi reading and writing dst"); | |
136 | |
137 // Now, find the preceding addis which writes to dst. | |
138 int inst1 = 0; | |
139 address inst1_addr = inst2_addr - BytesPerInstWord; | |
140 while (inst1_addr >= bound) { | |
141 inst1 = *(int *) inst1_addr; | |
142 if (is_addis(inst1) && inv_rt_field(inst1) == dst) { | |
143 // Stop, found the addis which writes dst. | |
144 break; | |
145 } | |
146 inst1_addr -= BytesPerInstWord; | |
147 } | |
148 | |
149 assert(is_addis(inst1) && inv_ra_field(inst1) == 29 /* R29 */, "source must be global TOC"); | |
150 set_imm((int *)inst1_addr, MacroAssembler::largeoffset_si16_si16_hi(offset)); | |
151 set_imm((int *)inst2_addr, MacroAssembler::largeoffset_si16_si16_lo(offset)); | |
152 return (int)((intptr_t)addr - (intptr_t)inst1_addr); | |
153 } | |
154 | |
155 address MacroAssembler::get_address_of_calculate_address_from_global_toc_at(address a, address bound) { | |
156 const address inst2_addr = a; | |
157 const int inst2 = *(int *)inst2_addr; | |
158 | |
159 // The relocation points to the second instruction, the addi, | |
160 // and the addi reads and writes the same register dst. | |
161 const int dst = inv_rt_field(inst2); | |
162 assert(is_addi(inst2) && inv_ra_field(inst2) == dst, "must be addi reading and writing dst"); | |
163 | |
164 // Now, find the preceding addis which writes to dst. | |
165 int inst1 = 0; | |
166 address inst1_addr = inst2_addr - BytesPerInstWord; | |
167 while (inst1_addr >= bound) { | |
168 inst1 = *(int *) inst1_addr; | |
169 if (is_addis(inst1) && inv_rt_field(inst1) == dst) { | |
170 // stop, found the addis which writes dst | |
171 break; | |
172 } | |
173 inst1_addr -= BytesPerInstWord; | |
174 } | |
175 | |
176 assert(is_addis(inst1) && inv_ra_field(inst1) == 29 /* R29 */, "source must be global TOC"); | |
177 | |
178 int offset = (get_imm(inst1_addr, 0) << 16) + get_imm(inst2_addr, 0); | |
179 // -1 is a special case | |
180 if (offset == -1) { | |
181 return (address)(intptr_t)-1; | |
182 } else { | |
183 return global_toc() + offset; | |
184 } | |
185 } | |
186 | |
187 #ifdef _LP64 | |
188 // Patch compressed oops or klass constants. | |
189 int MacroAssembler::patch_set_narrow_oop(address a, address bound, narrowOop data) { | |
190 assert(UseCompressedOops, "Should only patch compressed oops"); | |
191 | |
192 const address inst2_addr = a; | |
193 const int inst2 = *(int *)inst2_addr; | |
194 | |
195 // The relocation points to the second instruction, the addi, | |
196 // and the addi reads and writes the same register dst. | |
197 const int dst = inv_rt_field(inst2); | |
198 assert(is_addi(inst2) && inv_ra_field(inst2) == dst, "must be addi reading and writing dst"); | |
199 // Now, find the preceding addis which writes to dst. | |
200 int inst1 = 0; | |
201 address inst1_addr = inst2_addr - BytesPerInstWord; | |
202 bool inst1_found = false; | |
203 while (inst1_addr >= bound) { | |
204 inst1 = *(int *)inst1_addr; | |
205 if (is_lis(inst1) && inv_rs_field(inst1) == dst) { inst1_found = true; break; } | |
206 inst1_addr -= BytesPerInstWord; | |
207 } | |
208 assert(inst1_found, "inst is not lis"); | |
209 | |
210 int xc = (data >> 16) & 0xffff; | |
211 int xd = (data >> 0) & 0xffff; | |
212 | |
213 set_imm((int *)inst1_addr,((short)(xc + ((xd & 0x8000) != 0 ? 1 : 0)))); // see enc_load_con_narrow1/2 | |
214 set_imm((int *)inst2_addr, (short)(xd)); | |
215 return (int)((intptr_t)inst2_addr - (intptr_t)inst1_addr); | |
216 } | |
217 | |
218 // Get compressed oop or klass constant. | |
219 narrowOop MacroAssembler::get_narrow_oop(address a, address bound) { | |
220 assert(UseCompressedOops, "Should only patch compressed oops"); | |
221 | |
222 const address inst2_addr = a; | |
223 const int inst2 = *(int *)inst2_addr; | |
224 | |
225 // The relocation points to the second instruction, the addi, | |
226 // and the addi reads and writes the same register dst. | |
227 const int dst = inv_rt_field(inst2); | |
228 assert(is_addi(inst2) && inv_ra_field(inst2) == dst, "must be addi reading and writing dst"); | |
229 // Now, find the preceding lis which writes to dst. | |
230 int inst1 = 0; | |
231 address inst1_addr = inst2_addr - BytesPerInstWord; | |
232 bool inst1_found = false; | |
233 | |
234 while (inst1_addr >= bound) { | |
235 inst1 = *(int *) inst1_addr; | |
236 if (is_lis(inst1) && inv_rs_field(inst1) == dst) { inst1_found = true; break;} | |
237 inst1_addr -= BytesPerInstWord; | |
238 } | |
239 assert(inst1_found, "inst is not lis"); | |
240 | |
241 uint xl = ((unsigned int) (get_imm(inst2_addr,0) & 0xffff)); | |
242 uint xh = (((((xl & 0x8000) != 0 ? -1 : 0) + get_imm(inst1_addr,0)) & 0xffff) << 16); | |
243 return (int) (xl | xh); | |
244 } | |
245 #endif // _LP64 | |
246 | |
247 void MacroAssembler::load_const_from_method_toc(Register dst, AddressLiteral& a, Register toc) { | |
248 int toc_offset = 0; | |
249 // Use RelocationHolder::none for the constant pool entry, otherwise | |
250 // we will end up with a failing NativeCall::verify(x) where x is | |
251 // the address of the constant pool entry. | |
252 // FIXME: We should insert relocation information for oops at the constant | |
253 // pool entries instead of inserting it at the loads; patching of a constant | |
254 // pool entry should be less expensive. | |
255 Unimplemented(); | |
256 if (false) { | |
257 address oop_address = address_constant((address)a.value(), RelocationHolder::none); | |
258 // Relocate at the pc of the load. | |
259 relocate(a.rspec()); | |
260 toc_offset = (int)(oop_address - code()->consts()->start()); | |
261 } | |
262 ld_largeoffset_unchecked(dst, toc_offset, toc, true); | |
263 } | |
264 | |
265 bool MacroAssembler::is_load_const_from_method_toc_at(address a) { | |
266 const address inst1_addr = a; | |
267 const int inst1 = *(int *)inst1_addr; | |
268 | |
269 // The relocation points to the ld or the addis. | |
270 return (is_ld(inst1)) || | |
271 (is_addis(inst1) && inv_ra_field(inst1) != 0); | |
272 } | |
273 | |
274 int MacroAssembler::get_offset_of_load_const_from_method_toc_at(address a) { | |
275 assert(is_load_const_from_method_toc_at(a), "must be load_const_from_method_toc"); | |
276 | |
277 const address inst1_addr = a; | |
278 const int inst1 = *(int *)inst1_addr; | |
279 | |
280 if (is_ld(inst1)) { | |
281 return inv_d1_field(inst1); | |
282 } else if (is_addis(inst1)) { | |
283 const int dst = inv_rt_field(inst1); | |
284 | |
285 // Now, find the succeeding ld which reads and writes to dst. | |
286 address inst2_addr = inst1_addr + BytesPerInstWord; | |
287 int inst2 = 0; | |
288 while (true) { | |
289 inst2 = *(int *) inst2_addr; | |
290 if (is_ld(inst2) && inv_ra_field(inst2) == dst && inv_rt_field(inst2) == dst) { | |
291 // Stop, found the ld which reads and writes dst. | |
292 break; | |
293 } | |
294 inst2_addr += BytesPerInstWord; | |
295 } | |
296 return (inv_d1_field(inst1) << 16) + inv_d1_field(inst2); | |
297 } | |
298 ShouldNotReachHere(); | |
299 return 0; | |
300 } | |
301 | |
302 // Get the constant from a `load_const' sequence. | |
303 long MacroAssembler::get_const(address a) { | |
304 assert(is_load_const_at(a), "not a load of a constant"); | |
305 const int *p = (const int*) a; | |
306 unsigned long x = (((unsigned long) (get_imm(a,0) & 0xffff)) << 48); | |
307 if (is_ori(*(p+1))) { | |
308 x |= (((unsigned long) (get_imm(a,1) & 0xffff)) << 32); | |
309 x |= (((unsigned long) (get_imm(a,3) & 0xffff)) << 16); | |
310 x |= (((unsigned long) (get_imm(a,4) & 0xffff))); | |
311 } else if (is_lis(*(p+1))) { | |
312 x |= (((unsigned long) (get_imm(a,2) & 0xffff)) << 32); | |
313 x |= (((unsigned long) (get_imm(a,1) & 0xffff)) << 16); | |
314 x |= (((unsigned long) (get_imm(a,3) & 0xffff))); | |
315 } else { | |
316 ShouldNotReachHere(); | |
317 return (long) 0; | |
318 } | |
319 return (long) x; | |
320 } | |
321 | |
322 // Patch the 64 bit constant of a `load_const' sequence. This is a low | |
323 // level procedure. It neither flushes the instruction cache nor is it | |
324 // mt safe. | |
325 void MacroAssembler::patch_const(address a, long x) { | |
326 assert(is_load_const_at(a), "not a load of a constant"); | |
327 int *p = (int*) a; | |
328 if (is_ori(*(p+1))) { | |
329 set_imm(0 + p, (x >> 48) & 0xffff); | |
330 set_imm(1 + p, (x >> 32) & 0xffff); | |
331 set_imm(3 + p, (x >> 16) & 0xffff); | |
332 set_imm(4 + p, x & 0xffff); | |
333 } else if (is_lis(*(p+1))) { | |
334 set_imm(0 + p, (x >> 48) & 0xffff); | |
335 set_imm(2 + p, (x >> 32) & 0xffff); | |
336 set_imm(1 + p, (x >> 16) & 0xffff); | |
337 set_imm(3 + p, x & 0xffff); | |
338 } else { | |
339 ShouldNotReachHere(); | |
340 } | |
341 } | |
342 | |
343 AddressLiteral MacroAssembler::allocate_metadata_address(Metadata* obj) { | |
344 assert(oop_recorder() != NULL, "this assembler needs a Recorder"); | |
345 int index = oop_recorder()->allocate_metadata_index(obj); | |
346 RelocationHolder rspec = metadata_Relocation::spec(index); | |
347 return AddressLiteral((address)obj, rspec); | |
348 } | |
349 | |
350 AddressLiteral MacroAssembler::constant_metadata_address(Metadata* obj) { | |
351 assert(oop_recorder() != NULL, "this assembler needs a Recorder"); | |
352 int index = oop_recorder()->find_index(obj); | |
353 RelocationHolder rspec = metadata_Relocation::spec(index); | |
354 return AddressLiteral((address)obj, rspec); | |
355 } | |
356 | |
357 AddressLiteral MacroAssembler::allocate_oop_address(jobject obj) { | |
358 assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); | |
359 int oop_index = oop_recorder()->allocate_oop_index(obj); | |
360 return AddressLiteral(address(obj), oop_Relocation::spec(oop_index)); | |
361 } | |
362 | |
363 AddressLiteral MacroAssembler::constant_oop_address(jobject obj) { | |
364 assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); | |
365 int oop_index = oop_recorder()->find_index(obj); | |
366 return AddressLiteral(address(obj), oop_Relocation::spec(oop_index)); | |
367 } | |
368 | |
369 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, | |
370 Register tmp, int offset) { | |
371 intptr_t value = *delayed_value_addr; | |
372 if (value != 0) { | |
373 return RegisterOrConstant(value + offset); | |
374 } | |
375 | |
376 // Load indirectly to solve generation ordering problem. | |
377 // static address, no relocation | |
378 int simm16_offset = load_const_optimized(tmp, delayed_value_addr, noreg, true); | |
379 ld(tmp, simm16_offset, tmp); // must be aligned ((xa & 3) == 0) | |
380 | |
381 if (offset != 0) { | |
382 addi(tmp, tmp, offset); | |
383 } | |
384 | |
385 return RegisterOrConstant(tmp); | |
386 } | |
387 | |
388 #ifndef PRODUCT | |
389 void MacroAssembler::pd_print_patched_instruction(address branch) { | |
390 Unimplemented(); // TODO: PPC port | |
391 } | |
392 #endif // ndef PRODUCT | |
393 | |
394 // Conditional far branch for destinations encodable in 24+2 bits. | |
395 void MacroAssembler::bc_far(int boint, int biint, Label& dest, int optimize) { | |
396 | |
397 // If requested by flag optimize, relocate the bc_far as a | |
398 // runtime_call and prepare for optimizing it when the code gets | |
399 // relocated. | |
400 if (optimize == bc_far_optimize_on_relocate) { | |
401 relocate(relocInfo::runtime_call_type); | |
402 } | |
403 | |
404 // variant 2: | |
405 // | |
406 // b!cxx SKIP | |
407 // bxx DEST | |
408 // SKIP: | |
409 // | |
410 | |
411 const int opposite_boint = add_bhint_to_boint(opposite_bhint(inv_boint_bhint(boint)), | |
412 opposite_bcond(inv_boint_bcond(boint))); | |
413 | |
414 // We emit two branches. | |
415 // First, a conditional branch which jumps around the far branch. | |
416 const address not_taken_pc = pc() + 2 * BytesPerInstWord; | |
417 const address bc_pc = pc(); | |
418 bc(opposite_boint, biint, not_taken_pc); | |
419 | |
420 const int bc_instr = *(int*)bc_pc; | |
421 assert(not_taken_pc == (address)inv_bd_field(bc_instr, (intptr_t)bc_pc), "postcondition"); | |
422 assert(opposite_boint == inv_bo_field(bc_instr), "postcondition"); | |
423 assert(boint == add_bhint_to_boint(opposite_bhint(inv_boint_bhint(inv_bo_field(bc_instr))), | |
424 opposite_bcond(inv_boint_bcond(inv_bo_field(bc_instr)))), | |
425 "postcondition"); | |
426 assert(biint == inv_bi_field(bc_instr), "postcondition"); | |
427 | |
428 // Second, an unconditional far branch which jumps to dest. | |
429 // Note: target(dest) remembers the current pc (see CodeSection::target) | |
430 // and returns the current pc if the label is not bound yet; when | |
431 // the label gets bound, the unconditional far branch will be patched. | |
432 const address target_pc = target(dest); | |
433 const address b_pc = pc(); | |
434 b(target_pc); | |
435 | |
436 assert(not_taken_pc == pc(), "postcondition"); | |
437 assert(dest.is_bound() || target_pc == b_pc, "postcondition"); | |
438 } | |
439 | |
440 bool MacroAssembler::is_bc_far_at(address instruction_addr) { | |
441 return is_bc_far_variant1_at(instruction_addr) || | |
442 is_bc_far_variant2_at(instruction_addr) || | |
443 is_bc_far_variant3_at(instruction_addr); | |
444 } | |
445 | |
446 address MacroAssembler::get_dest_of_bc_far_at(address instruction_addr) { | |
447 if (is_bc_far_variant1_at(instruction_addr)) { | |
448 const address instruction_1_addr = instruction_addr; | |
449 const int instruction_1 = *(int*)instruction_1_addr; | |
450 return (address)inv_bd_field(instruction_1, (intptr_t)instruction_1_addr); | |
451 } else if (is_bc_far_variant2_at(instruction_addr)) { | |
452 const address instruction_2_addr = instruction_addr + 4; | |
453 return bxx_destination(instruction_2_addr); | |
454 } else if (is_bc_far_variant3_at(instruction_addr)) { | |
455 return instruction_addr + 8; | |
456 } | |
457 // variant 4 ??? | |
458 ShouldNotReachHere(); | |
459 return NULL; | |
460 } | |
461 void MacroAssembler::set_dest_of_bc_far_at(address instruction_addr, address dest) { | |
462 | |
463 if (is_bc_far_variant3_at(instruction_addr)) { | |
464 // variant 3, far cond branch to the next instruction, already patched to nops: | |
465 // | |
466 // nop | |
467 // endgroup | |
468 // SKIP/DEST: | |
469 // | |
470 return; | |
471 } | |
472 | |
473 // first, extract boint and biint from the current branch | |
474 int boint = 0; | |
475 int biint = 0; | |
476 | |
477 ResourceMark rm; | |
478 const int code_size = 2 * BytesPerInstWord; | |
479 CodeBuffer buf(instruction_addr, code_size); | |
480 MacroAssembler masm(&buf); | |
481 if (is_bc_far_variant2_at(instruction_addr) && dest == instruction_addr + 8) { | |
482 // Far branch to next instruction: Optimize it by patching nops (produce variant 3). | |
483 masm.nop(); | |
484 masm.endgroup(); | |
485 } else { | |
486 if (is_bc_far_variant1_at(instruction_addr)) { | |
487 // variant 1, the 1st instruction contains the destination address: | |
488 // | |
489 // bcxx DEST | |
490 // endgroup | |
491 // | |
492 const int instruction_1 = *(int*)(instruction_addr); | |
493 boint = inv_bo_field(instruction_1); | |
494 biint = inv_bi_field(instruction_1); | |
495 } else if (is_bc_far_variant2_at(instruction_addr)) { | |
496 // variant 2, the 2nd instruction contains the destination address: | |
497 // | |
498 // b!cxx SKIP | |
499 // bxx DEST | |
500 // SKIP: | |
501 // | |
502 const int instruction_1 = *(int*)(instruction_addr); | |
503 boint = add_bhint_to_boint(opposite_bhint(inv_boint_bhint(inv_bo_field(instruction_1))), | |
504 opposite_bcond(inv_boint_bcond(inv_bo_field(instruction_1)))); | |
505 biint = inv_bi_field(instruction_1); | |
506 } else { | |
507 // variant 4??? | |
508 ShouldNotReachHere(); | |
509 } | |
510 | |
511 // second, set the new branch destination and optimize the code | |
512 if (dest != instruction_addr + 4 && // the bc_far is still unbound! | |
513 masm.is_within_range_of_bcxx(dest, instruction_addr)) { | |
514 // variant 1: | |
515 // | |
516 // bcxx DEST | |
517 // endgroup | |
518 // | |
519 masm.bc(boint, biint, dest); | |
520 masm.endgroup(); | |
521 } else { | |
522 // variant 2: | |
523 // | |
524 // b!cxx SKIP | |
525 // bxx DEST | |
526 // SKIP: | |
527 // | |
528 const int opposite_boint = add_bhint_to_boint(opposite_bhint(inv_boint_bhint(boint)), | |
529 opposite_bcond(inv_boint_bcond(boint))); | |
530 const address not_taken_pc = masm.pc() + 2 * BytesPerInstWord; | |
531 masm.bc(opposite_boint, biint, not_taken_pc); | |
532 masm.b(dest); | |
533 } | |
534 } | |
535 ICache::invalidate_range(instruction_addr, code_size); | |
536 } | |
537 | |
538 // Emit a NOT mt-safe patchable 64 bit absolute call/jump. | |
539 void MacroAssembler::bxx64_patchable(address dest, relocInfo::relocType rt, bool link) { | |
540 // get current pc | |
541 uint64_t start_pc = (uint64_t) pc(); | |
542 | |
543 const address pc_of_bl = (address) (start_pc + (6*BytesPerInstWord)); // bl is last | |
544 const address pc_of_b = (address) (start_pc + (0*BytesPerInstWord)); // b is first | |
545 | |
546 // relocate here | |
547 if (rt != relocInfo::none) { | |
548 relocate(rt); | |
549 } | |
550 | |
551 if ( ReoptimizeCallSequences && | |
552 (( link && is_within_range_of_b(dest, pc_of_bl)) || | |
553 (!link && is_within_range_of_b(dest, pc_of_b)))) { | |
554 // variant 2: | |
555 // Emit an optimized, pc-relative call/jump. | |
556 | |
557 if (link) { | |
558 // some padding | |
559 nop(); | |
560 nop(); | |
561 nop(); | |
562 nop(); | |
563 nop(); | |
564 nop(); | |
565 | |
566 // do the call | |
567 assert(pc() == pc_of_bl, "just checking"); | |
568 bl(dest, relocInfo::none); | |
569 } else { | |
570 // do the jump | |
571 assert(pc() == pc_of_b, "just checking"); | |
572 b(dest, relocInfo::none); | |
573 | |
574 // some padding | |
575 nop(); | |
576 nop(); | |
577 nop(); | |
578 nop(); | |
579 nop(); | |
580 nop(); | |
581 } | |
582 | |
583 // Assert that we can identify the emitted call/jump. | |
584 assert(is_bxx64_patchable_variant2_at((address)start_pc, link), | |
585 "can't identify emitted call"); | |
586 } else { | |
587 // variant 1: | |
588 | |
589 mr(R0, R11); // spill R11 -> R0. | |
590 | |
591 // Load the destination address into CTR, | |
592 // calculate destination relative to global toc. | |
593 calculate_address_from_global_toc(R11, dest, true, true, false); | |
594 | |
595 mtctr(R11); | |
596 mr(R11, R0); // spill R11 <- R0. | |
597 nop(); | |
598 | |
599 // do the call/jump | |
600 if (link) { | |
601 bctrl(); | |
602 } else{ | |
603 bctr(); | |
604 } | |
605 // Assert that we can identify the emitted call/jump. | |
606 assert(is_bxx64_patchable_variant1b_at((address)start_pc, link), | |
607 "can't identify emitted call"); | |
608 } | |
609 | |
610 // Assert that we can identify the emitted call/jump. | |
611 assert(is_bxx64_patchable_at((address)start_pc, link), | |
612 "can't identify emitted call"); | |
613 assert(get_dest_of_bxx64_patchable_at((address)start_pc, link) == dest, | |
614 "wrong encoding of dest address"); | |
615 } | |
616 | |
617 // Identify a bxx64_patchable instruction. | |
618 bool MacroAssembler::is_bxx64_patchable_at(address instruction_addr, bool link) { | |
619 return is_bxx64_patchable_variant1b_at(instruction_addr, link) | |
620 //|| is_bxx64_patchable_variant1_at(instruction_addr, link) | |
621 || is_bxx64_patchable_variant2_at(instruction_addr, link); | |
622 } | |
623 | |
624 // Does the call64_patchable instruction use a pc-relative encoding of | |
625 // the call destination? | |
626 bool MacroAssembler::is_bxx64_patchable_pcrelative_at(address instruction_addr, bool link) { | |
627 // variant 2 is pc-relative | |
628 return is_bxx64_patchable_variant2_at(instruction_addr, link); | |
629 } | |
630 | |
631 // Identify variant 1. | |
632 bool MacroAssembler::is_bxx64_patchable_variant1_at(address instruction_addr, bool link) { | |
633 unsigned int* instr = (unsigned int*) instruction_addr; | |
634 return (link ? is_bctrl(instr[6]) : is_bctr(instr[6])) // bctr[l] | |
635 && is_mtctr(instr[5]) // mtctr | |
636 && is_load_const_at(instruction_addr); | |
637 } | |
638 | |
639 // Identify variant 1b: load destination relative to global toc. | |
640 bool MacroAssembler::is_bxx64_patchable_variant1b_at(address instruction_addr, bool link) { | |
641 unsigned int* instr = (unsigned int*) instruction_addr; | |
642 return (link ? is_bctrl(instr[6]) : is_bctr(instr[6])) // bctr[l] | |
643 && is_mtctr(instr[3]) // mtctr | |
644 && is_calculate_address_from_global_toc_at(instruction_addr + 2*BytesPerInstWord, instruction_addr); | |
645 } | |
646 | |
647 // Identify variant 2. | |
648 bool MacroAssembler::is_bxx64_patchable_variant2_at(address instruction_addr, bool link) { | |
649 unsigned int* instr = (unsigned int*) instruction_addr; | |
650 if (link) { | |
651 return is_bl (instr[6]) // bl dest is last | |
652 && is_nop(instr[0]) // nop | |
653 && is_nop(instr[1]) // nop | |
654 && is_nop(instr[2]) // nop | |
655 && is_nop(instr[3]) // nop | |
656 && is_nop(instr[4]) // nop | |
657 && is_nop(instr[5]); // nop | |
658 } else { | |
659 return is_b (instr[0]) // b dest is first | |
660 && is_nop(instr[1]) // nop | |
661 && is_nop(instr[2]) // nop | |
662 && is_nop(instr[3]) // nop | |
663 && is_nop(instr[4]) // nop | |
664 && is_nop(instr[5]) // nop | |
665 && is_nop(instr[6]); // nop | |
666 } | |
667 } | |
668 | |
669 // Set dest address of a bxx64_patchable instruction. | |
670 void MacroAssembler::set_dest_of_bxx64_patchable_at(address instruction_addr, address dest, bool link) { | |
671 ResourceMark rm; | |
672 int code_size = MacroAssembler::bxx64_patchable_size; | |
673 CodeBuffer buf(instruction_addr, code_size); | |
674 MacroAssembler masm(&buf); | |
675 masm.bxx64_patchable(dest, relocInfo::none, link); | |
676 ICache::invalidate_range(instruction_addr, code_size); | |
677 } | |
678 | |
679 // Get dest address of a bxx64_patchable instruction. | |
680 address MacroAssembler::get_dest_of_bxx64_patchable_at(address instruction_addr, bool link) { | |
681 if (is_bxx64_patchable_variant1_at(instruction_addr, link)) { | |
682 return (address) (unsigned long) get_const(instruction_addr); | |
683 } else if (is_bxx64_patchable_variant2_at(instruction_addr, link)) { | |
684 unsigned int* instr = (unsigned int*) instruction_addr; | |
685 if (link) { | |
686 const int instr_idx = 6; // bl is last | |
687 int branchoffset = branch_destination(instr[instr_idx], 0); | |
688 return instruction_addr + branchoffset + instr_idx*BytesPerInstWord; | |
689 } else { | |
690 const int instr_idx = 0; // b is first | |
691 int branchoffset = branch_destination(instr[instr_idx], 0); | |
692 return instruction_addr + branchoffset + instr_idx*BytesPerInstWord; | |
693 } | |
694 // Load dest relative to global toc. | |
695 } else if (is_bxx64_patchable_variant1b_at(instruction_addr, link)) { | |
696 return get_address_of_calculate_address_from_global_toc_at(instruction_addr + 2*BytesPerInstWord, | |
697 instruction_addr); | |
698 } else { | |
699 ShouldNotReachHere(); | |
700 return NULL; | |
701 } | |
702 } | |
703 | |
704 // Uses ordering which corresponds to ABI: | |
705 // _savegpr0_14: std r14,-144(r1) | |
706 // _savegpr0_15: std r15,-136(r1) | |
707 // _savegpr0_16: std r16,-128(r1) | |
708 void MacroAssembler::save_nonvolatile_gprs(Register dst, int offset) { | |
709 std(R14, offset, dst); offset += 8; | |
710 std(R15, offset, dst); offset += 8; | |
711 std(R16, offset, dst); offset += 8; | |
712 std(R17, offset, dst); offset += 8; | |
713 std(R18, offset, dst); offset += 8; | |
714 std(R19, offset, dst); offset += 8; | |
715 std(R20, offset, dst); offset += 8; | |
716 std(R21, offset, dst); offset += 8; | |
717 std(R22, offset, dst); offset += 8; | |
718 std(R23, offset, dst); offset += 8; | |
719 std(R24, offset, dst); offset += 8; | |
720 std(R25, offset, dst); offset += 8; | |
721 std(R26, offset, dst); offset += 8; | |
722 std(R27, offset, dst); offset += 8; | |
723 std(R28, offset, dst); offset += 8; | |
724 std(R29, offset, dst); offset += 8; | |
725 std(R30, offset, dst); offset += 8; | |
726 std(R31, offset, dst); offset += 8; | |
727 | |
728 stfd(F14, offset, dst); offset += 8; | |
729 stfd(F15, offset, dst); offset += 8; | |
730 stfd(F16, offset, dst); offset += 8; | |
731 stfd(F17, offset, dst); offset += 8; | |
732 stfd(F18, offset, dst); offset += 8; | |
733 stfd(F19, offset, dst); offset += 8; | |
734 stfd(F20, offset, dst); offset += 8; | |
735 stfd(F21, offset, dst); offset += 8; | |
736 stfd(F22, offset, dst); offset += 8; | |
737 stfd(F23, offset, dst); offset += 8; | |
738 stfd(F24, offset, dst); offset += 8; | |
739 stfd(F25, offset, dst); offset += 8; | |
740 stfd(F26, offset, dst); offset += 8; | |
741 stfd(F27, offset, dst); offset += 8; | |
742 stfd(F28, offset, dst); offset += 8; | |
743 stfd(F29, offset, dst); offset += 8; | |
744 stfd(F30, offset, dst); offset += 8; | |
745 stfd(F31, offset, dst); | |
746 } | |
747 | |
748 // Uses ordering which corresponds to ABI: | |
749 // _restgpr0_14: ld r14,-144(r1) | |
750 // _restgpr0_15: ld r15,-136(r1) | |
751 // _restgpr0_16: ld r16,-128(r1) | |
752 void MacroAssembler::restore_nonvolatile_gprs(Register src, int offset) { | |
753 ld(R14, offset, src); offset += 8; | |
754 ld(R15, offset, src); offset += 8; | |
755 ld(R16, offset, src); offset += 8; | |
756 ld(R17, offset, src); offset += 8; | |
757 ld(R18, offset, src); offset += 8; | |
758 ld(R19, offset, src); offset += 8; | |
759 ld(R20, offset, src); offset += 8; | |
760 ld(R21, offset, src); offset += 8; | |
761 ld(R22, offset, src); offset += 8; | |
762 ld(R23, offset, src); offset += 8; | |
763 ld(R24, offset, src); offset += 8; | |
764 ld(R25, offset, src); offset += 8; | |
765 ld(R26, offset, src); offset += 8; | |
766 ld(R27, offset, src); offset += 8; | |
767 ld(R28, offset, src); offset += 8; | |
768 ld(R29, offset, src); offset += 8; | |
769 ld(R30, offset, src); offset += 8; | |
770 ld(R31, offset, src); offset += 8; | |
771 | |
772 // FP registers | |
773 lfd(F14, offset, src); offset += 8; | |
774 lfd(F15, offset, src); offset += 8; | |
775 lfd(F16, offset, src); offset += 8; | |
776 lfd(F17, offset, src); offset += 8; | |
777 lfd(F18, offset, src); offset += 8; | |
778 lfd(F19, offset, src); offset += 8; | |
779 lfd(F20, offset, src); offset += 8; | |
780 lfd(F21, offset, src); offset += 8; | |
781 lfd(F22, offset, src); offset += 8; | |
782 lfd(F23, offset, src); offset += 8; | |
783 lfd(F24, offset, src); offset += 8; | |
784 lfd(F25, offset, src); offset += 8; | |
785 lfd(F26, offset, src); offset += 8; | |
786 lfd(F27, offset, src); offset += 8; | |
787 lfd(F28, offset, src); offset += 8; | |
788 lfd(F29, offset, src); offset += 8; | |
789 lfd(F30, offset, src); offset += 8; | |
790 lfd(F31, offset, src); | |
791 } | |
792 | |
793 // For verify_oops. | |
794 void MacroAssembler::save_volatile_gprs(Register dst, int offset) { | |
795 std(R3, offset, dst); offset += 8; | |
796 std(R4, offset, dst); offset += 8; | |
797 std(R5, offset, dst); offset += 8; | |
798 std(R6, offset, dst); offset += 8; | |
799 std(R7, offset, dst); offset += 8; | |
800 std(R8, offset, dst); offset += 8; | |
801 std(R9, offset, dst); offset += 8; | |
802 std(R10, offset, dst); offset += 8; | |
803 std(R11, offset, dst); offset += 8; | |
804 std(R12, offset, dst); | |
805 } | |
806 | |
807 // For verify_oops. | |
808 void MacroAssembler::restore_volatile_gprs(Register src, int offset) { | |
809 ld(R3, offset, src); offset += 8; | |
810 ld(R4, offset, src); offset += 8; | |
811 ld(R5, offset, src); offset += 8; | |
812 ld(R6, offset, src); offset += 8; | |
813 ld(R7, offset, src); offset += 8; | |
814 ld(R8, offset, src); offset += 8; | |
815 ld(R9, offset, src); offset += 8; | |
816 ld(R10, offset, src); offset += 8; | |
817 ld(R11, offset, src); offset += 8; | |
818 ld(R12, offset, src); | |
819 } | |
820 | |
821 void MacroAssembler::save_LR_CR(Register tmp) { | |
822 mfcr(tmp); | |
823 std(tmp, _abi(cr), R1_SP); | |
824 mflr(tmp); | |
825 std(tmp, _abi(lr), R1_SP); | |
826 // Tmp must contain lr on exit! (see return_addr and prolog in ppc64.ad) | |
827 } | |
828 | |
829 void MacroAssembler::restore_LR_CR(Register tmp) { | |
830 assert(tmp != R1_SP, "must be distinct"); | |
831 ld(tmp, _abi(lr), R1_SP); | |
832 mtlr(tmp); | |
833 ld(tmp, _abi(cr), R1_SP); | |
834 mtcr(tmp); | |
835 } | |
836 | |
837 address MacroAssembler::get_PC_trash_LR(Register result) { | |
838 Label L; | |
839 bl(L); | |
840 bind(L); | |
841 address lr_pc = pc(); | |
842 mflr(result); | |
843 return lr_pc; | |
844 } | |
845 | |
846 void MacroAssembler::resize_frame(Register offset, Register tmp) { | |
847 #ifdef ASSERT | |
848 assert_different_registers(offset, tmp, R1_SP); | |
849 andi_(tmp, offset, frame::alignment_in_bytes-1); | |
850 asm_assert_eq("resize_frame: unaligned", 0x204); | |
851 #endif | |
852 | |
853 // tmp <- *(SP) | |
854 ld(tmp, _abi(callers_sp), R1_SP); | |
855 // addr <- SP + offset; | |
856 // *(addr) <- tmp; | |
857 // SP <- addr | |
858 stdux(tmp, R1_SP, offset); | |
859 } | |
860 | |
861 void MacroAssembler::resize_frame(int offset, Register tmp) { | |
862 assert(is_simm(offset, 16), "too big an offset"); | |
863 assert_different_registers(tmp, R1_SP); | |
864 assert((offset & (frame::alignment_in_bytes-1))==0, "resize_frame: unaligned"); | |
865 // tmp <- *(SP) | |
866 ld(tmp, _abi(callers_sp), R1_SP); | |
867 // addr <- SP + offset; | |
868 // *(addr) <- tmp; | |
869 // SP <- addr | |
870 stdu(tmp, offset, R1_SP); | |
871 } | |
872 | |
873 void MacroAssembler::resize_frame_absolute(Register addr, Register tmp1, Register tmp2) { | |
874 // (addr == tmp1) || (addr == tmp2) is allowed here! | |
875 assert(tmp1 != tmp2, "must be distinct"); | |
876 | |
877 // compute offset w.r.t. current stack pointer | |
878 // tmp_1 <- addr - SP (!) | |
879 subf(tmp1, R1_SP, addr); | |
880 | |
881 // atomically update SP keeping back link. | |
882 resize_frame(tmp1/* offset */, tmp2/* tmp */); | |
883 } | |
884 | |
885 void MacroAssembler::push_frame(Register bytes, Register tmp) { | |
886 #ifdef ASSERT | |
887 assert(bytes != R0, "r0 not allowed here"); | |
888 andi_(R0, bytes, frame::alignment_in_bytes-1); | |
889 asm_assert_eq("push_frame(Reg, Reg): unaligned", 0x203); | |
890 #endif | |
891 neg(tmp, bytes); | |
892 stdux(R1_SP, R1_SP, tmp); | |
893 } | |
894 | |
895 // Push a frame of size `bytes'. | |
896 void MacroAssembler::push_frame(unsigned int bytes, Register tmp) { | |
897 long offset = align_addr(bytes, frame::alignment_in_bytes); | |
898 if (is_simm(-offset, 16)) { | |
899 stdu(R1_SP, -offset, R1_SP); | |
900 } else { | |
901 load_const(tmp, -offset); | |
902 stdux(R1_SP, R1_SP, tmp); | |
903 } | |
904 } | |
905 | |
906 // Push a frame of size `bytes' plus abi112 on top. | |
907 void MacroAssembler::push_frame_abi112(unsigned int bytes, Register tmp) { | |
908 push_frame(bytes + frame::abi_112_size, tmp); | |
909 } | |
910 | |
911 // Setup up a new C frame with a spill area for non-volatile GPRs and | |
912 // additional space for local variables. | |
913 void MacroAssembler::push_frame_abi112_nonvolatiles(unsigned int bytes, | |
914 Register tmp) { | |
915 push_frame(bytes + frame::abi_112_size + frame::spill_nonvolatiles_size, tmp); | |
916 } | |
917 | |
918 // Pop current C frame. | |
919 void MacroAssembler::pop_frame() { | |
920 ld(R1_SP, _abi(callers_sp), R1_SP); | |
921 } | |
922 | |
923 // Generic version of a call to C function via a function descriptor | |
924 // with variable support for C calling conventions (TOC, ENV, etc.). | |
925 // Updates and returns _last_calls_return_pc. | |
926 address MacroAssembler::branch_to(Register function_descriptor, bool and_link, bool save_toc_before_call, | |
927 bool restore_toc_after_call, bool load_toc_of_callee, bool load_env_of_callee) { | |
928 // we emit standard ptrgl glue code here | |
929 assert((function_descriptor != R0), "function_descriptor cannot be R0"); | |
930 | |
931 // retrieve necessary entries from the function descriptor | |
932 ld(R0, in_bytes(FunctionDescriptor::entry_offset()), function_descriptor); | |
933 mtctr(R0); | |
934 | |
935 if (load_toc_of_callee) { | |
936 ld(R2_TOC, in_bytes(FunctionDescriptor::toc_offset()), function_descriptor); | |
937 } | |
938 if (load_env_of_callee) { | |
939 ld(R11, in_bytes(FunctionDescriptor::env_offset()), function_descriptor); | |
940 } else if (load_toc_of_callee) { | |
941 li(R11, 0); | |
942 } | |
943 | |
944 // do a call or a branch | |
945 if (and_link) { | |
946 bctrl(); | |
947 } else { | |
948 bctr(); | |
949 } | |
950 _last_calls_return_pc = pc(); | |
951 | |
952 return _last_calls_return_pc; | |
953 } | |
954 | |
955 // Call a C function via a function descriptor and use full C calling | |
956 // conventions. | |
957 // We don't use the TOC in generated code, so there is no need to save | |
958 // and restore its value. | |
959 address MacroAssembler::call_c(Register fd) { | |
960 return branch_to(fd, /*and_link=*/true, | |
961 /*save toc=*/false, | |
962 /*restore toc=*/false, | |
963 /*load toc=*/true, | |
964 /*load env=*/true); | |
965 } | |
966 | |
967 address MacroAssembler::call_c(const FunctionDescriptor* fd, relocInfo::relocType rt) { | |
968 if (rt != relocInfo::none) { | |
969 // this call needs to be relocatable | |
970 if (!ReoptimizeCallSequences | |
971 || (rt != relocInfo::runtime_call_type && rt != relocInfo::none) | |
972 || fd == NULL // support code-size estimation | |
973 || !fd->is_friend_function() | |
974 || fd->entry() == NULL) { | |
975 // it's not a friend function as defined by class FunctionDescriptor, | |
976 // so do a full call-c here. | |
977 load_const(R11, (address)fd, R0); | |
978 | |
979 bool has_env = (fd != NULL && fd->env() != NULL); | |
980 return branch_to(R11, /*and_link=*/true, | |
981 /*save toc=*/false, | |
982 /*restore toc=*/false, | |
983 /*load toc=*/true, | |
984 /*load env=*/has_env); | |
985 } else { | |
986 // It's a friend function. Load the entry point and don't care about | |
987 // toc and env. Use an optimizable call instruction, but ensure the | |
988 // same code-size as in the case of a non-friend function. | |
989 nop(); | |
990 nop(); | |
991 nop(); | |
992 bl64_patchable(fd->entry(), rt); | |
993 _last_calls_return_pc = pc(); | |
994 return _last_calls_return_pc; | |
995 } | |
996 } else { | |
997 // This call does not need to be relocatable, do more aggressive | |
998 // optimizations. | |
999 if (!ReoptimizeCallSequences | |
1000 || !fd->is_friend_function()) { | |
1001 // It's not a friend function as defined by class FunctionDescriptor, | |
1002 // so do a full call-c here. | |
1003 load_const(R11, (address)fd, R0); | |
1004 return branch_to(R11, /*and_link=*/true, | |
1005 /*save toc=*/false, | |
1006 /*restore toc=*/false, | |
1007 /*load toc=*/true, | |
1008 /*load env=*/true); | |
1009 } else { | |
1010 // it's a friend function, load the entry point and don't care about | |
1011 // toc and env. | |
1012 address dest = fd->entry(); | |
1013 if (is_within_range_of_b(dest, pc())) { | |
1014 bl(dest); | |
1015 } else { | |
1016 bl64_patchable(dest, rt); | |
1017 } | |
1018 _last_calls_return_pc = pc(); | |
1019 return _last_calls_return_pc; | |
1020 } | |
1021 } | |
1022 } | |
1023 | |
1024 // Call a C function. All constants needed reside in TOC. | |
1025 // | |
1026 // Read the address to call from the TOC. | |
1027 // Read env from TOC, if fd specifies an env. | |
1028 // Read new TOC from TOC. | |
1029 address MacroAssembler::call_c_using_toc(const FunctionDescriptor* fd, | |
1030 relocInfo::relocType rt, Register toc) { | |
1031 if (!ReoptimizeCallSequences | |
1032 || (rt != relocInfo::runtime_call_type && rt != relocInfo::none) | |
1033 || !fd->is_friend_function()) { | |
1034 // It's not a friend function as defined by class FunctionDescriptor, | |
1035 // so do a full call-c here. | |
1036 assert(fd->entry() != NULL, "function must be linked"); | |
1037 | |
1038 AddressLiteral fd_entry(fd->entry()); | |
1039 load_const_from_method_toc(R11, fd_entry, toc); | |
1040 mtctr(R11); | |
1041 if (fd->env() == NULL) { | |
1042 li(R11, 0); | |
1043 nop(); | |
1044 } else { | |
1045 AddressLiteral fd_env(fd->env()); | |
1046 load_const_from_method_toc(R11, fd_env, toc); | |
1047 } | |
1048 AddressLiteral fd_toc(fd->toc()); | |
1049 load_toc_from_toc(R2_TOC, fd_toc, toc); | |
1050 // R2_TOC is killed. | |
1051 bctrl(); | |
1052 _last_calls_return_pc = pc(); | |
1053 } else { | |
1054 // It's a friend function, load the entry point and don't care about | |
1055 // toc and env. Use an optimizable call instruction, but ensure the | |
1056 // same code-size as in the case of a non-friend function. | |
1057 nop(); | |
1058 bl64_patchable(fd->entry(), rt); | |
1059 _last_calls_return_pc = pc(); | |
1060 } | |
1061 return _last_calls_return_pc; | |
1062 } | |
1063 | |
1064 void MacroAssembler::call_VM_base(Register oop_result, | |
1065 Register last_java_sp, | |
1066 address entry_point, | |
1067 bool check_exceptions) { | |
1068 BLOCK_COMMENT("call_VM {"); | |
1069 // Determine last_java_sp register. | |
1070 if (!last_java_sp->is_valid()) { | |
1071 last_java_sp = R1_SP; | |
1072 } | |
1073 set_top_ijava_frame_at_SP_as_last_Java_frame(last_java_sp, R11_scratch1); | |
1074 | |
1075 // ARG1 must hold thread address. | |
1076 mr(R3_ARG1, R16_thread); | |
1077 | |
1078 address return_pc = call_c((FunctionDescriptor*)entry_point, relocInfo::none); | |
1079 | |
1080 reset_last_Java_frame(); | |
1081 | |
1082 // Check for pending exceptions. | |
1083 if (check_exceptions) { | |
1084 // We don't check for exceptions here. | |
1085 ShouldNotReachHere(); | |
1086 } | |
1087 | |
1088 // Get oop result if there is one and reset the value in the thread. | |
1089 if (oop_result->is_valid()) { | |
1090 get_vm_result(oop_result); | |
1091 } | |
1092 | |
1093 _last_calls_return_pc = return_pc; | |
1094 BLOCK_COMMENT("} call_VM"); | |
1095 } | |
1096 | |
1097 void MacroAssembler::call_VM_leaf_base(address entry_point) { | |
1098 BLOCK_COMMENT("call_VM_leaf {"); | |
1099 call_c(CAST_FROM_FN_PTR(FunctionDescriptor*, entry_point), relocInfo::none); | |
1100 BLOCK_COMMENT("} call_VM_leaf"); | |
1101 } | |
1102 | |
1103 void MacroAssembler::call_VM(Register oop_result, address entry_point, bool check_exceptions) { | |
1104 call_VM_base(oop_result, noreg, entry_point, check_exceptions); | |
1105 } | |
1106 | |
1107 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, | |
1108 bool check_exceptions) { | |
1109 // R3_ARG1 is reserved for the thread. | |
1110 mr_if_needed(R4_ARG2, arg_1); | |
1111 call_VM(oop_result, entry_point, check_exceptions); | |
1112 } | |
1113 | |
1114 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, | |
1115 bool check_exceptions) { | |
1116 // R3_ARG1 is reserved for the thread | |
1117 mr_if_needed(R4_ARG2, arg_1); | |
1118 assert(arg_2 != R4_ARG2, "smashed argument"); | |
1119 mr_if_needed(R5_ARG3, arg_2); | |
1120 call_VM(oop_result, entry_point, check_exceptions); | |
1121 } | |
1122 | |
1123 void MacroAssembler::call_VM_leaf(address entry_point) { | |
1124 call_VM_leaf_base(entry_point); | |
1125 } | |
1126 | |
1127 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1) { | |
1128 mr_if_needed(R3_ARG1, arg_1); | |
1129 call_VM_leaf(entry_point); | |
1130 } | |
1131 | |
1132 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2) { | |
1133 mr_if_needed(R3_ARG1, arg_1); | |
1134 assert(arg_2 != R3_ARG1, "smashed argument"); | |
1135 mr_if_needed(R4_ARG2, arg_2); | |
1136 call_VM_leaf(entry_point); | |
1137 } | |
1138 | |
1139 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3) { | |
1140 mr_if_needed(R3_ARG1, arg_1); | |
1141 assert(arg_2 != R3_ARG1, "smashed argument"); | |
1142 mr_if_needed(R4_ARG2, arg_2); | |
1143 assert(arg_3 != R3_ARG1 && arg_3 != R4_ARG2, "smashed argument"); | |
1144 mr_if_needed(R5_ARG3, arg_3); | |
1145 call_VM_leaf(entry_point); | |
1146 } | |
1147 | |
1148 // Check whether instruction is a read access to the polling page | |
1149 // which was emitted by load_from_polling_page(..). | |
1150 bool MacroAssembler::is_load_from_polling_page(int instruction, void* ucontext, | |
1151 address* polling_address_ptr) { | |
1152 if (!is_ld(instruction)) | |
1153 return false; // It's not a ld. Fail. | |
1154 | |
1155 int rt = inv_rt_field(instruction); | |
1156 int ra = inv_ra_field(instruction); | |
1157 int ds = inv_ds_field(instruction); | |
1158 if (!(ds == 0 && ra != 0 && rt == 0)) { | |
1159 return false; // It's not a ld(r0, X, ra). Fail. | |
1160 } | |
1161 | |
1162 if (!ucontext) { | |
1163 // Set polling address. | |
1164 if (polling_address_ptr != NULL) { | |
1165 *polling_address_ptr = NULL; | |
1166 } | |
1167 return true; // No ucontext given. Can't check value of ra. Assume true. | |
1168 } | |
1169 | |
1170 #ifdef LINUX | |
1171 // Ucontext given. Check that register ra contains the address of | |
1172 // the safepoing polling page. | |
1173 ucontext_t* uc = (ucontext_t*) ucontext; | |
1174 // Set polling address. | |
1175 address addr = (address)uc->uc_mcontext.regs->gpr[ra] + (ssize_t)ds; | |
1176 if (polling_address_ptr != NULL) { | |
1177 *polling_address_ptr = addr; | |
1178 } | |
1179 return os::is_poll_address(addr); | |
1180 #else | |
1181 // Not on Linux, ucontext must be NULL. | |
1182 ShouldNotReachHere(); | |
1183 return false; | |
1184 #endif | |
1185 } | |
1186 | |
1187 bool MacroAssembler::is_memory_serialization(int instruction, JavaThread* thread, void* ucontext) { | |
1188 #ifdef LINUX | |
1189 ucontext_t* uc = (ucontext_t*) ucontext; | |
1190 | |
1191 if (is_stwx(instruction) || is_stwux(instruction)) { | |
1192 int ra = inv_ra_field(instruction); | |
1193 int rb = inv_rb_field(instruction); | |
1194 | |
1195 // look up content of ra and rb in ucontext | |
1196 address ra_val=(address)uc->uc_mcontext.regs->gpr[ra]; | |
1197 long rb_val=(long)uc->uc_mcontext.regs->gpr[rb]; | |
1198 return os::is_memory_serialize_page(thread, ra_val+rb_val); | |
1199 } else if (is_stw(instruction) || is_stwu(instruction)) { | |
1200 int ra = inv_ra_field(instruction); | |
1201 int d1 = inv_d1_field(instruction); | |
1202 | |
1203 // look up content of ra in ucontext | |
1204 address ra_val=(address)uc->uc_mcontext.regs->gpr[ra]; | |
1205 return os::is_memory_serialize_page(thread, ra_val+d1); | |
1206 } else { | |
1207 return false; | |
1208 } | |
1209 #else | |
1210 // workaround not needed on !LINUX :-) | |
1211 ShouldNotCallThis(); | |
1212 return false; | |
1213 #endif | |
1214 } | |
1215 | |
1216 void MacroAssembler::bang_stack_with_offset(int offset) { | |
1217 // When increasing the stack, the old stack pointer will be written | |
1218 // to the new top of stack according to the PPC64 abi. | |
1219 // Therefore, stack banging is not necessary when increasing | |
1220 // the stack by <= os::vm_page_size() bytes. | |
1221 // When increasing the stack by a larger amount, this method is | |
1222 // called repeatedly to bang the intermediate pages. | |
1223 | |
1224 // Stack grows down, caller passes positive offset. | |
1225 assert(offset > 0, "must bang with positive offset"); | |
1226 | |
1227 long stdoffset = -offset; | |
1228 | |
1229 if (is_simm(stdoffset, 16)) { | |
1230 // Signed 16 bit offset, a simple std is ok. | |
1231 if (UseLoadInstructionsForStackBangingPPC64) { | |
1232 ld(R0, (int)(signed short)stdoffset, R1_SP); | |
1233 } else { | |
1234 std(R0,(int)(signed short)stdoffset, R1_SP); | |
1235 } | |
1236 } else if (is_simm(stdoffset, 31)) { | |
1237 const int hi = MacroAssembler::largeoffset_si16_si16_hi(stdoffset); | |
1238 const int lo = MacroAssembler::largeoffset_si16_si16_lo(stdoffset); | |
1239 | |
1240 Register tmp = R11; | |
1241 addis(tmp, R1_SP, hi); | |
1242 if (UseLoadInstructionsForStackBangingPPC64) { | |
1243 ld(R0, lo, tmp); | |
1244 } else { | |
1245 std(R0, lo, tmp); | |
1246 } | |
1247 } else { | |
1248 ShouldNotReachHere(); | |
1249 } | |
1250 } | |
1251 | |
1252 // If instruction is a stack bang of the form | |
1253 // std R0, x(Ry), (see bang_stack_with_offset()) | |
1254 // stdu R1_SP, x(R1_SP), (see push_frame(), resize_frame()) | |
1255 // or stdux R1_SP, Rx, R1_SP (see push_frame(), resize_frame()) | |
1256 // return the banged address. Otherwise, return 0. | |
1257 address MacroAssembler::get_stack_bang_address(int instruction, void *ucontext) { | |
1258 #ifdef LINUX | |
1259 ucontext_t* uc = (ucontext_t*) ucontext; | |
1260 int rs = inv_rs_field(instruction); | |
1261 int ra = inv_ra_field(instruction); | |
1262 if ( (is_ld(instruction) && rs == 0 && UseLoadInstructionsForStackBangingPPC64) | |
1263 || (is_std(instruction) && rs == 0 && !UseLoadInstructionsForStackBangingPPC64) | |
1264 || (is_stdu(instruction) && rs == 1)) { | |
1265 int ds = inv_ds_field(instruction); | |
1266 // return banged address | |
1267 return ds+(address)uc->uc_mcontext.regs->gpr[ra]; | |
1268 } else if (is_stdux(instruction) && rs == 1) { | |
1269 int rb = inv_rb_field(instruction); | |
1270 address sp = (address)uc->uc_mcontext.regs->gpr[1]; | |
1271 long rb_val = (long)uc->uc_mcontext.regs->gpr[rb]; | |
1272 return ra != 1 || rb_val >= 0 ? NULL // not a stack bang | |
1273 : sp + rb_val; // banged address | |
1274 } | |
1275 return NULL; // not a stack bang | |
1276 #else | |
1277 // workaround not needed on !LINUX :-) | |
1278 ShouldNotCallThis(); | |
1279 return NULL; | |
1280 #endif | |
1281 } | |
1282 | |
1283 // CmpxchgX sets condition register to cmpX(current, compare). | |
1284 void MacroAssembler::cmpxchgw(ConditionRegister flag, Register dest_current_value, | |
1285 Register compare_value, Register exchange_value, | |
1286 Register addr_base, int semantics, bool cmpxchgx_hint, | |
1287 Register int_flag_success, bool contention_hint) { | |
1288 Label retry; | |
1289 Label failed; | |
1290 Label done; | |
1291 | |
1292 // Save one branch if result is returned via register and | |
1293 // result register is different from the other ones. | |
1294 bool use_result_reg = (int_flag_success != noreg); | |
1295 bool preset_result_reg = (int_flag_success != dest_current_value && int_flag_success != compare_value && | |
1296 int_flag_success != exchange_value && int_flag_success != addr_base); | |
1297 | |
1298 // release/fence semantics | |
1299 if (semantics & MemBarRel) { | |
1300 release(); | |
1301 } | |
1302 | |
1303 if (use_result_reg && preset_result_reg) { | |
1304 li(int_flag_success, 0); // preset (assume cas failed) | |
1305 } | |
1306 | |
1307 // Add simple guard in order to reduce risk of starving under high contention (recommended by IBM). | |
1308 if (contention_hint) { // Don't try to reserve if cmp fails. | |
1309 lwz(dest_current_value, 0, addr_base); | |
1310 cmpw(flag, dest_current_value, compare_value); | |
1311 bne(flag, failed); | |
1312 } | |
1313 | |
1314 // atomic emulation loop | |
1315 bind(retry); | |
1316 | |
1317 lwarx(dest_current_value, addr_base, cmpxchgx_hint); | |
1318 cmpw(flag, dest_current_value, compare_value); | |
1319 if (UseStaticBranchPredictionInCompareAndSwapPPC64) { | |
1320 bne_predict_not_taken(flag, failed); | |
1321 } else { | |
1322 bne( flag, failed); | |
1323 } | |
1324 // branch to done => (flag == ne), (dest_current_value != compare_value) | |
1325 // fall through => (flag == eq), (dest_current_value == compare_value) | |
1326 | |
1327 stwcx_(exchange_value, addr_base); | |
1328 if (UseStaticBranchPredictionInCompareAndSwapPPC64) { | |
1329 bne_predict_not_taken(CCR0, retry); // StXcx_ sets CCR0. | |
1330 } else { | |
1331 bne( CCR0, retry); // StXcx_ sets CCR0. | |
1332 } | |
1333 // fall through => (flag == eq), (dest_current_value == compare_value), (swapped) | |
1334 | |
1335 // Result in register (must do this at the end because int_flag_success can be the | |
1336 // same register as one above). | |
1337 if (use_result_reg) { | |
1338 li(int_flag_success, 1); | |
1339 } | |
1340 | |
1341 if (semantics & MemBarFenceAfter) { | |
1342 fence(); | |
1343 } else if (semantics & MemBarAcq) { | |
1344 isync(); | |
1345 } | |
1346 | |
1347 if (use_result_reg && !preset_result_reg) { | |
1348 b(done); | |
1349 } | |
1350 | |
1351 bind(failed); | |
1352 if (use_result_reg && !preset_result_reg) { | |
1353 li(int_flag_success, 0); | |
1354 } | |
1355 | |
1356 bind(done); | |
1357 // (flag == ne) => (dest_current_value != compare_value), (!swapped) | |
1358 // (flag == eq) => (dest_current_value == compare_value), ( swapped) | |
1359 } | |
1360 | |
1361 // Preforms atomic compare exchange: | |
1362 // if (compare_value == *addr_base) | |
1363 // *addr_base = exchange_value | |
1364 // int_flag_success = 1; | |
1365 // else | |
1366 // int_flag_success = 0; | |
1367 // | |
1368 // ConditionRegister flag = cmp(compare_value, *addr_base) | |
1369 // Register dest_current_value = *addr_base | |
1370 // Register compare_value Used to compare with value in memory | |
1371 // Register exchange_value Written to memory if compare_value == *addr_base | |
1372 // Register addr_base The memory location to compareXChange | |
1373 // Register int_flag_success Set to 1 if exchange_value was written to *addr_base | |
1374 // | |
1375 // To avoid the costly compare exchange the value is tested beforehand. | |
1376 // Several special cases exist to avoid that unnecessary information is generated. | |
1377 // | |
1378 void MacroAssembler::cmpxchgd(ConditionRegister flag, | |
1379 Register dest_current_value, Register compare_value, Register exchange_value, | |
1380 Register addr_base, int semantics, bool cmpxchgx_hint, | |
1381 Register int_flag_success, Label* failed_ext, bool contention_hint) { | |
1382 Label retry; | |
1383 Label failed_int; | |
1384 Label& failed = (failed_ext != NULL) ? *failed_ext : failed_int; | |
1385 Label done; | |
1386 | |
1387 // Save one branch if result is returned via register and result register is different from the other ones. | |
1388 bool use_result_reg = (int_flag_success!=noreg); | |
1389 bool preset_result_reg = (int_flag_success!=dest_current_value && int_flag_success!=compare_value && | |
1390 int_flag_success!=exchange_value && int_flag_success!=addr_base); | |
1391 assert(int_flag_success == noreg || failed_ext == NULL, "cannot have both"); | |
1392 | |
1393 // release/fence semantics | |
1394 if (semantics & MemBarRel) { | |
1395 release(); | |
1396 } | |
1397 | |
1398 if (use_result_reg && preset_result_reg) { | |
1399 li(int_flag_success, 0); // preset (assume cas failed) | |
1400 } | |
1401 | |
1402 // Add simple guard in order to reduce risk of starving under high contention (recommended by IBM). | |
1403 if (contention_hint) { // Don't try to reserve if cmp fails. | |
1404 ld(dest_current_value, 0, addr_base); | |
1405 cmpd(flag, dest_current_value, compare_value); | |
1406 bne(flag, failed); | |
1407 } | |
1408 | |
1409 // atomic emulation loop | |
1410 bind(retry); | |
1411 | |
1412 ldarx(dest_current_value, addr_base, cmpxchgx_hint); | |
1413 cmpd(flag, dest_current_value, compare_value); | |
1414 if (UseStaticBranchPredictionInCompareAndSwapPPC64) { | |
1415 bne_predict_not_taken(flag, failed); | |
1416 } else { | |
1417 bne( flag, failed); | |
1418 } | |
1419 | |
1420 stdcx_(exchange_value, addr_base); | |
1421 if (UseStaticBranchPredictionInCompareAndSwapPPC64) { | |
1422 bne_predict_not_taken(CCR0, retry); // stXcx_ sets CCR0 | |
1423 } else { | |
1424 bne( CCR0, retry); // stXcx_ sets CCR0 | |
1425 } | |
1426 | |
1427 // result in register (must do this at the end because int_flag_success can be the same register as one above) | |
1428 if (use_result_reg) { | |
1429 li(int_flag_success, 1); | |
1430 } | |
1431 | |
1432 // POWER6 doesn't need isync in CAS. | |
1433 // Always emit isync to be on the safe side. | |
1434 if (semantics & MemBarFenceAfter) { | |
1435 fence(); | |
1436 } else if (semantics & MemBarAcq) { | |
1437 isync(); | |
1438 } | |
1439 | |
1440 if (use_result_reg && !preset_result_reg) { | |
1441 b(done); | |
1442 } | |
1443 | |
1444 bind(failed_int); | |
1445 if (use_result_reg && !preset_result_reg) { | |
1446 li(int_flag_success, 0); | |
1447 } | |
1448 | |
1449 bind(done); | |
1450 // (flag == ne) => (dest_current_value != compare_value), (!swapped) | |
1451 // (flag == eq) => (dest_current_value == compare_value), ( swapped) | |
1452 } | |
1453 | |
1454 // Look up the method for a megamorphic invokeinterface call. | |
1455 // The target method is determined by <intf_klass, itable_index>. | |
1456 // The receiver klass is in recv_klass. | |
1457 // On success, the result will be in method_result, and execution falls through. | |
1458 // On failure, execution transfers to the given label. | |
1459 void MacroAssembler::lookup_interface_method(Register recv_klass, | |
1460 Register intf_klass, | |
1461 RegisterOrConstant itable_index, | |
1462 Register method_result, | |
1463 Register scan_temp, | |
1464 Register sethi_temp, | |
1465 Label& L_no_such_interface) { | |
1466 assert_different_registers(recv_klass, intf_klass, method_result, scan_temp); | |
1467 assert(itable_index.is_constant() || itable_index.as_register() == method_result, | |
1468 "caller must use same register for non-constant itable index as for method"); | |
1469 | |
1470 // Compute start of first itableOffsetEntry (which is at the end of the vtable). | |
1471 int vtable_base = InstanceKlass::vtable_start_offset() * wordSize; | |
1472 int itentry_off = itableMethodEntry::method_offset_in_bytes(); | |
1473 int logMEsize = exact_log2(itableMethodEntry::size() * wordSize); | |
1474 int scan_step = itableOffsetEntry::size() * wordSize; | |
1475 int log_vte_size= exact_log2(vtableEntry::size() * wordSize); | |
1476 | |
1477 lwz(scan_temp, InstanceKlass::vtable_length_offset() * wordSize, recv_klass); | |
1478 // %%% We should store the aligned, prescaled offset in the klassoop. | |
1479 // Then the next several instructions would fold away. | |
1480 | |
1481 sldi(scan_temp, scan_temp, log_vte_size); | |
1482 addi(scan_temp, scan_temp, vtable_base); | |
1483 add(scan_temp, recv_klass, scan_temp); | |
1484 | |
1485 // Adjust recv_klass by scaled itable_index, so we can free itable_index. | |
1486 if (itable_index.is_register()) { | |
1487 Register itable_offset = itable_index.as_register(); | |
1488 sldi(itable_offset, itable_offset, logMEsize); | |
1489 if (itentry_off) addi(itable_offset, itable_offset, itentry_off); | |
1490 add(recv_klass, itable_offset, recv_klass); | |
1491 } else { | |
1492 long itable_offset = (long)itable_index.as_constant(); | |
1493 load_const_optimized(sethi_temp, (itable_offset<<logMEsize)+itentry_off); // static address, no relocation | |
1494 add(recv_klass, sethi_temp, recv_klass); | |
1495 } | |
1496 | |
1497 // for (scan = klass->itable(); scan->interface() != NULL; scan += scan_step) { | |
1498 // if (scan->interface() == intf) { | |
1499 // result = (klass + scan->offset() + itable_index); | |
1500 // } | |
1501 // } | |
1502 Label search, found_method; | |
1503 | |
1504 for (int peel = 1; peel >= 0; peel--) { | |
1505 // %%%% Could load both offset and interface in one ldx, if they were | |
1506 // in the opposite order. This would save a load. | |
1507 ld(method_result, itableOffsetEntry::interface_offset_in_bytes(), scan_temp); | |
1508 | |
1509 // Check that this entry is non-null. A null entry means that | |
1510 // the receiver class doesn't implement the interface, and wasn't the | |
1511 // same as when the caller was compiled. | |
1512 cmpd(CCR0, method_result, intf_klass); | |
1513 | |
1514 if (peel) { | |
1515 beq(CCR0, found_method); | |
1516 } else { | |
1517 bne(CCR0, search); | |
1518 // (invert the test to fall through to found_method...) | |
1519 } | |
1520 | |
1521 if (!peel) break; | |
1522 | |
1523 bind(search); | |
1524 | |
1525 cmpdi(CCR0, method_result, 0); | |
1526 beq(CCR0, L_no_such_interface); | |
1527 addi(scan_temp, scan_temp, scan_step); | |
1528 } | |
1529 | |
1530 bind(found_method); | |
1531 | |
1532 // Got a hit. | |
1533 int ito_offset = itableOffsetEntry::offset_offset_in_bytes(); | |
1534 lwz(scan_temp, ito_offset, scan_temp); | |
1535 ldx(method_result, scan_temp, recv_klass); | |
1536 } | |
1537 | |
1538 // virtual method calling | |
1539 void MacroAssembler::lookup_virtual_method(Register recv_klass, | |
1540 RegisterOrConstant vtable_index, | |
1541 Register method_result) { | |
1542 | |
1543 assert_different_registers(recv_klass, method_result, vtable_index.register_or_noreg()); | |
1544 | |
1545 const int base = InstanceKlass::vtable_start_offset() * wordSize; | |
1546 assert(vtableEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); | |
1547 | |
1548 if (vtable_index.is_register()) { | |
1549 sldi(vtable_index.as_register(), vtable_index.as_register(), LogBytesPerWord); | |
1550 add(recv_klass, vtable_index.as_register(), recv_klass); | |
1551 } else { | |
1552 addi(recv_klass, recv_klass, vtable_index.as_constant() << LogBytesPerWord); | |
1553 } | |
1554 ld(R19_method, base + vtableEntry::method_offset_in_bytes(), recv_klass); | |
1555 } | |
1556 | |
1557 /////////////////////////////////////////// subtype checking //////////////////////////////////////////// | |
1558 | |
1559 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, | |
1560 Register super_klass, | |
1561 Register temp1_reg, | |
1562 Register temp2_reg, | |
1563 Label& L_success, | |
1564 Label& L_failure) { | |
1565 | |
1566 const Register check_cache_offset = temp1_reg; | |
1567 const Register cached_super = temp2_reg; | |
1568 | |
1569 assert_different_registers(sub_klass, super_klass, check_cache_offset, cached_super); | |
1570 | |
1571 int sco_offset = in_bytes(Klass::super_check_offset_offset()); | |
1572 int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); | |
1573 | |
1574 // If the pointers are equal, we are done (e.g., String[] elements). | |
1575 // This self-check enables sharing of secondary supertype arrays among | |
1576 // non-primary types such as array-of-interface. Otherwise, each such | |
1577 // type would need its own customized SSA. | |
1578 // We move this check to the front of the fast path because many | |
1579 // type checks are in fact trivially successful in this manner, | |
1580 // so we get a nicely predicted branch right at the start of the check. | |
1581 cmpd(CCR0, sub_klass, super_klass); | |
1582 beq(CCR0, L_success); | |
1583 | |
1584 // Check the supertype display: | |
1585 lwz(check_cache_offset, sco_offset, super_klass); | |
1586 // The loaded value is the offset from KlassOopDesc. | |
1587 | |
1588 ldx(cached_super, check_cache_offset, sub_klass); | |
1589 cmpd(CCR0, cached_super, super_klass); | |
1590 beq(CCR0, L_success); | |
1591 | |
1592 // This check has worked decisively for primary supers. | |
1593 // Secondary supers are sought in the super_cache ('super_cache_addr'). | |
1594 // (Secondary supers are interfaces and very deeply nested subtypes.) | |
1595 // This works in the same check above because of a tricky aliasing | |
1596 // between the super_cache and the primary super display elements. | |
1597 // (The 'super_check_addr' can address either, as the case requires.) | |
1598 // Note that the cache is updated below if it does not help us find | |
1599 // what we need immediately. | |
1600 // So if it was a primary super, we can just fail immediately. | |
1601 // Otherwise, it's the slow path for us (no success at this point). | |
1602 | |
1603 cmpwi(CCR0, check_cache_offset, sc_offset); | |
1604 bne(CCR0, L_failure); | |
1605 // bind(slow_path); // fallthru | |
1606 } | |
1607 | |
1608 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, | |
1609 Register super_klass, | |
1610 Register temp1_reg, | |
1611 Register temp2_reg, | |
1612 Label* L_success, | |
1613 Register result_reg) { | |
1614 const Register array_ptr = temp1_reg; // current value from cache array | |
1615 const Register temp = temp2_reg; | |
1616 | |
1617 assert_different_registers(sub_klass, super_klass, array_ptr, temp); | |
1618 | |
1619 int source_offset = in_bytes(Klass::secondary_supers_offset()); | |
1620 int target_offset = in_bytes(Klass::secondary_super_cache_offset()); | |
1621 | |
1622 int length_offset = Array<Klass*>::length_offset_in_bytes(); | |
1623 int base_offset = Array<Klass*>::base_offset_in_bytes(); | |
1624 | |
1625 Label hit, loop, failure, fallthru; | |
1626 | |
1627 ld(array_ptr, source_offset, sub_klass); | |
1628 | |
1629 //assert(4 == arrayOopDesc::length_length_in_bytes(), "precondition violated."); | |
1630 lwz(temp, length_offset, array_ptr); | |
1631 cmpwi(CCR0, temp, 0); | |
1632 beq(CCR0, result_reg!=noreg ? failure : fallthru); // length 0 | |
1633 | |
1634 mtctr(temp); // load ctr | |
1635 | |
1636 bind(loop); | |
1637 // Oops in table are NO MORE compressed. | |
1638 ld(temp, base_offset, array_ptr); | |
1639 cmpd(CCR0, temp, super_klass); | |
1640 beq(CCR0, hit); | |
1641 addi(array_ptr, array_ptr, BytesPerWord); | |
1642 bdnz(loop); | |
1643 | |
1644 bind(failure); | |
1645 if (result_reg!=noreg) li(result_reg, 1); // load non-zero result (indicates a miss) | |
1646 b(fallthru); | |
1647 | |
1648 bind(hit); | |
1649 std(super_klass, target_offset, sub_klass); // save result to cache | |
1650 if (result_reg != noreg) li(result_reg, 0); // load zero result (indicates a hit) | |
1651 if (L_success != NULL) b(*L_success); | |
1652 | |
1653 bind(fallthru); | |
1654 } | |
1655 | |
1656 // Try fast path, then go to slow one if not successful | |
1657 void MacroAssembler::check_klass_subtype(Register sub_klass, | |
1658 Register super_klass, | |
1659 Register temp1_reg, | |
1660 Register temp2_reg, | |
1661 Label& L_success) { | |
1662 Label L_failure; | |
1663 check_klass_subtype_fast_path(sub_klass, super_klass, temp1_reg, temp2_reg, L_success, L_failure); | |
1664 check_klass_subtype_slow_path(sub_klass, super_klass, temp1_reg, temp2_reg, &L_success); | |
1665 bind(L_failure); // Fallthru if not successful. | |
1666 } | |
1667 | |
1668 void MacroAssembler::check_method_handle_type(Register mtype_reg, Register mh_reg, | |
1669 Register temp_reg, | |
1670 Label& wrong_method_type) { | |
1671 assert_different_registers(mtype_reg, mh_reg, temp_reg); | |
1672 // Compare method type against that of the receiver. | |
1673 load_heap_oop_not_null(temp_reg, delayed_value(java_lang_invoke_MethodHandle::type_offset_in_bytes, temp_reg), mh_reg); | |
1674 cmpd(CCR0, temp_reg, mtype_reg); | |
1675 bne(CCR0, wrong_method_type); | |
1676 } | |
1677 | |
1678 RegisterOrConstant MacroAssembler::argument_offset(RegisterOrConstant arg_slot, | |
1679 Register temp_reg, | |
1680 int extra_slot_offset) { | |
1681 // cf. TemplateTable::prepare_invoke(), if (load_receiver). | |
1682 int stackElementSize = Interpreter::stackElementSize; | |
1683 int offset = extra_slot_offset * stackElementSize; | |
1684 if (arg_slot.is_constant()) { | |
1685 offset += arg_slot.as_constant() * stackElementSize; | |
1686 return offset; | |
1687 } else { | |
1688 assert(temp_reg != noreg, "must specify"); | |
1689 sldi(temp_reg, arg_slot.as_register(), exact_log2(stackElementSize)); | |
1690 if (offset != 0) | |
1691 addi(temp_reg, temp_reg, offset); | |
1692 return temp_reg; | |
1693 } | |
1694 } | |
1695 | |
1696 void MacroAssembler::biased_locking_enter(ConditionRegister cr_reg, Register obj_reg, | |
1697 Register mark_reg, Register temp_reg, | |
1698 Register temp2_reg, Label& done, Label* slow_case) { | |
1699 assert(UseBiasedLocking, "why call this otherwise?"); | |
1700 | |
1701 #ifdef ASSERT | |
1702 assert_different_registers(obj_reg, mark_reg, temp_reg, temp2_reg); | |
1703 #endif | |
1704 | |
1705 Label cas_label; | |
1706 | |
1707 // Branch to done if fast path fails and no slow_case provided. | |
1708 Label *slow_case_int = (slow_case != NULL) ? slow_case : &done; | |
1709 | |
1710 // Biased locking | |
1711 // See whether the lock is currently biased toward our thread and | |
1712 // whether the epoch is still valid | |
1713 // Note that the runtime guarantees sufficient alignment of JavaThread | |
1714 // pointers to allow age to be placed into low bits | |
1715 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, | |
1716 "biased locking makes assumptions about bit layout"); | |
1717 | |
1718 if (PrintBiasedLockingStatistics) { | |
1719 load_const(temp_reg, (address) BiasedLocking::total_entry_count_addr(), temp2_reg); | |
1720 lwz(temp2_reg, 0, temp_reg); | |
1721 addi(temp2_reg, temp2_reg, 1); | |
1722 stw(temp2_reg, 0, temp_reg); | |
1723 } | |
1724 | |
1725 andi(temp_reg, mark_reg, markOopDesc::biased_lock_mask_in_place); | |
1726 cmpwi(cr_reg, temp_reg, markOopDesc::biased_lock_pattern); | |
1727 bne(cr_reg, cas_label); | |
1728 | |
1729 load_klass_with_trap_null_check(temp_reg, obj_reg); | |
1730 | |
1731 load_const_optimized(temp2_reg, ~((int) markOopDesc::age_mask_in_place)); | |
1732 ld(temp_reg, in_bytes(Klass::prototype_header_offset()), temp_reg); | |
1733 orr(temp_reg, R16_thread, temp_reg); | |
1734 xorr(temp_reg, mark_reg, temp_reg); | |
1735 andr(temp_reg, temp_reg, temp2_reg); | |
1736 cmpdi(cr_reg, temp_reg, 0); | |
1737 if (PrintBiasedLockingStatistics) { | |
1738 Label l; | |
1739 bne(cr_reg, l); | |
1740 load_const(mark_reg, (address) BiasedLocking::biased_lock_entry_count_addr()); | |
1741 lwz(temp2_reg, 0, mark_reg); | |
1742 addi(temp2_reg, temp2_reg, 1); | |
1743 stw(temp2_reg, 0, mark_reg); | |
1744 // restore mark_reg | |
1745 ld(mark_reg, oopDesc::mark_offset_in_bytes(), obj_reg); | |
1746 bind(l); | |
1747 } | |
1748 beq(cr_reg, done); | |
1749 | |
1750 Label try_revoke_bias; | |
1751 Label try_rebias; | |
1752 | |
1753 // At this point we know that the header has the bias pattern and | |
1754 // that we are not the bias owner in the current epoch. We need to | |
1755 // figure out more details about the state of the header in order to | |
1756 // know what operations can be legally performed on the object's | |
1757 // header. | |
1758 | |
1759 // If the low three bits in the xor result aren't clear, that means | |
1760 // the prototype header is no longer biased and we have to revoke | |
1761 // the bias on this object. | |
1762 andi(temp2_reg, temp_reg, markOopDesc::biased_lock_mask_in_place); | |
1763 cmpwi(cr_reg, temp2_reg, 0); | |
1764 bne(cr_reg, try_revoke_bias); | |
1765 | |
1766 // Biasing is still enabled for this data type. See whether the | |
1767 // epoch of the current bias is still valid, meaning that the epoch | |
1768 // bits of the mark word are equal to the epoch bits of the | |
1769 // prototype header. (Note that the prototype header's epoch bits | |
1770 // only change at a safepoint.) If not, attempt to rebias the object | |
1771 // toward the current thread. Note that we must be absolutely sure | |
1772 // that the current epoch is invalid in order to do this because | |
1773 // otherwise the manipulations it performs on the mark word are | |
1774 // illegal. | |
1775 | |
1776 int shift_amount = 64 - markOopDesc::epoch_shift; | |
1777 // rotate epoch bits to right (little) end and set other bits to 0 | |
1778 // [ big part | epoch | little part ] -> [ 0..0 | epoch ] | |
1779 rldicl_(temp2_reg, temp_reg, shift_amount, 64 - markOopDesc::epoch_bits); | |
1780 // branch if epoch bits are != 0, i.e. they differ, because the epoch has been incremented | |
1781 bne(CCR0, try_rebias); | |
1782 | |
1783 // The epoch of the current bias is still valid but we know nothing | |
1784 // about the owner; it might be set or it might be clear. Try to | |
1785 // acquire the bias of the object using an atomic operation. If this | |
1786 // fails we will go in to the runtime to revoke the object's bias. | |
1787 // Note that we first construct the presumed unbiased header so we | |
1788 // don't accidentally blow away another thread's valid bias. | |
1789 andi(mark_reg, mark_reg, (markOopDesc::biased_lock_mask_in_place | | |
1790 markOopDesc::age_mask_in_place | | |
1791 markOopDesc::epoch_mask_in_place)); | |
1792 orr(temp_reg, R16_thread, mark_reg); | |
1793 | |
1794 assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); | |
1795 | |
1796 // CmpxchgX sets cr_reg to cmpX(temp2_reg, mark_reg). | |
1797 fence(); // TODO: replace by MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq ? | |
1798 cmpxchgd(/*flag=*/cr_reg, /*current_value=*/temp2_reg, | |
1799 /*compare_value=*/mark_reg, /*exchange_value=*/temp_reg, | |
1800 /*where=*/obj_reg, | |
1801 MacroAssembler::MemBarAcq, | |
1802 MacroAssembler::cmpxchgx_hint_acquire_lock(), | |
1803 noreg, slow_case_int); // bail out if failed | |
1804 | |
1805 // If the biasing toward our thread failed, this means that | |
1806 // another thread succeeded in biasing it toward itself and we | |
1807 // need to revoke that bias. The revocation will occur in the | |
1808 // interpreter runtime in the slow case. | |
1809 if (PrintBiasedLockingStatistics) { | |
1810 load_const(temp_reg, (address) BiasedLocking::anonymously_biased_lock_entry_count_addr(), temp2_reg); | |
1811 lwz(temp2_reg, 0, temp_reg); | |
1812 addi(temp2_reg, temp2_reg, 1); | |
1813 stw(temp2_reg, 0, temp_reg); | |
1814 } | |
1815 b(done); | |
1816 | |
1817 bind(try_rebias); | |
1818 // At this point we know the epoch has expired, meaning that the | |
1819 // current "bias owner", if any, is actually invalid. Under these | |
1820 // circumstances _only_, we are allowed to use the current header's | |
1821 // value as the comparison value when doing the cas to acquire the | |
1822 // bias in the current epoch. In other words, we allow transfer of | |
1823 // the bias from one thread to another directly in this situation. | |
1824 andi(temp_reg, mark_reg, markOopDesc::age_mask_in_place); | |
1825 orr(temp_reg, R16_thread, temp_reg); | |
1826 load_klass_with_trap_null_check(temp2_reg, obj_reg); | |
1827 ld(temp2_reg, in_bytes(Klass::prototype_header_offset()), temp2_reg); | |
1828 orr(temp_reg, temp_reg, temp2_reg); | |
1829 | |
1830 assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); | |
1831 | |
1832 // CmpxchgX sets cr_reg to cmpX(temp2_reg, mark_reg). | |
1833 fence(); // TODO: replace by MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq ? | |
1834 cmpxchgd(/*flag=*/cr_reg, /*current_value=*/temp2_reg, | |
1835 /*compare_value=*/mark_reg, /*exchange_value=*/temp_reg, | |
1836 /*where=*/obj_reg, | |
1837 MacroAssembler::MemBarAcq, | |
1838 MacroAssembler::cmpxchgx_hint_acquire_lock(), | |
1839 noreg, slow_case_int); // bail out if failed | |
1840 | |
1841 // If the biasing toward our thread failed, this means that | |
1842 // another thread succeeded in biasing it toward itself and we | |
1843 // need to revoke that bias. The revocation will occur in the | |
1844 // interpreter runtime in the slow case. | |
1845 if (PrintBiasedLockingStatistics) { | |
1846 load_const(temp_reg, (address) BiasedLocking::rebiased_lock_entry_count_addr(), temp2_reg); | |
1847 lwz(temp2_reg, 0, temp_reg); | |
1848 addi(temp2_reg, temp2_reg, 1); | |
1849 stw(temp2_reg, 0, temp_reg); | |
1850 } | |
1851 b(done); | |
1852 | |
1853 bind(try_revoke_bias); | |
1854 // The prototype mark in the klass doesn't have the bias bit set any | |
1855 // more, indicating that objects of this data type are not supposed | |
1856 // to be biased any more. We are going to try to reset the mark of | |
1857 // this object to the prototype value and fall through to the | |
1858 // CAS-based locking scheme. Note that if our CAS fails, it means | |
1859 // that another thread raced us for the privilege of revoking the | |
1860 // bias of this particular object, so it's okay to continue in the | |
1861 // normal locking code. | |
1862 load_klass_with_trap_null_check(temp_reg, obj_reg); | |
1863 ld(temp_reg, in_bytes(Klass::prototype_header_offset()), temp_reg); | |
1864 andi(temp2_reg, mark_reg, markOopDesc::age_mask_in_place); | |
1865 orr(temp_reg, temp_reg, temp2_reg); | |
1866 | |
1867 assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); | |
1868 | |
1869 // CmpxchgX sets cr_reg to cmpX(temp2_reg, mark_reg). | |
1870 fence(); // TODO: replace by MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq ? | |
1871 cmpxchgd(/*flag=*/cr_reg, /*current_value=*/temp2_reg, | |
1872 /*compare_value=*/mark_reg, /*exchange_value=*/temp_reg, | |
1873 /*where=*/obj_reg, | |
1874 MacroAssembler::MemBarAcq, | |
1875 MacroAssembler::cmpxchgx_hint_acquire_lock()); | |
1876 | |
1877 // reload markOop in mark_reg before continuing with lightweight locking | |
1878 ld(mark_reg, oopDesc::mark_offset_in_bytes(), obj_reg); | |
1879 | |
1880 // Fall through to the normal CAS-based lock, because no matter what | |
1881 // the result of the above CAS, some thread must have succeeded in | |
1882 // removing the bias bit from the object's header. | |
1883 if (PrintBiasedLockingStatistics) { | |
1884 Label l; | |
1885 bne(cr_reg, l); | |
1886 load_const(temp_reg, (address) BiasedLocking::revoked_lock_entry_count_addr(), temp2_reg); | |
1887 lwz(temp2_reg, 0, temp_reg); | |
1888 addi(temp2_reg, temp2_reg, 1); | |
1889 stw(temp2_reg, 0, temp_reg); | |
1890 bind(l); | |
1891 } | |
1892 | |
1893 bind(cas_label); | |
1894 } | |
1895 | |
1896 void MacroAssembler::biased_locking_exit (ConditionRegister cr_reg, Register mark_addr, Register temp_reg, Label& done) { | |
1897 // Check for biased locking unlock case, which is a no-op | |
1898 // Note: we do not have to check the thread ID for two reasons. | |
1899 // First, the interpreter checks for IllegalMonitorStateException at | |
1900 // a higher level. Second, if the bias was revoked while we held the | |
1901 // lock, the object could not be rebiased toward another thread, so | |
1902 // the bias bit would be clear. | |
1903 | |
1904 ld(temp_reg, 0, mark_addr); | |
1905 andi(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place); | |
1906 | |
1907 cmpwi(cr_reg, temp_reg, markOopDesc::biased_lock_pattern); | |
1908 beq(cr_reg, done); | |
1909 } | |
1910 | |
1911 // "The box" is the space on the stack where we copy the object mark. | |
1912 void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register oop, Register box, | |
1913 Register temp, Register displaced_header, Register current_header) { | |
1914 assert_different_registers(oop, box, temp, displaced_header, current_header); | |
1915 assert(flag != CCR0, "bad condition register"); | |
1916 Label cont; | |
1917 Label object_has_monitor; | |
1918 Label cas_failed; | |
1919 | |
1920 // Load markOop from object into displaced_header. | |
1921 ld(displaced_header, oopDesc::mark_offset_in_bytes(), oop); | |
1922 | |
1923 | |
1924 // Always do locking in runtime. | |
1925 if (EmitSync & 0x01) { | |
1926 cmpdi(flag, oop, 0); // Oop can't be 0 here => always false. | |
1927 return; | |
1928 } | |
1929 | |
1930 if (UseBiasedLocking) { | |
1931 biased_locking_enter(flag, oop, displaced_header, temp, current_header, cont); | |
1932 } | |
1933 | |
1934 // Handle existing monitor. | |
1935 if ((EmitSync & 0x02) == 0) { | |
1936 // The object has an existing monitor iff (mark & monitor_value) != 0. | |
1937 andi_(temp, displaced_header, markOopDesc::monitor_value); | |
1938 bne(CCR0, object_has_monitor); | |
1939 } | |
1940 | |
1941 // Set displaced_header to be (markOop of object | UNLOCK_VALUE). | |
1942 ori(displaced_header, displaced_header, markOopDesc::unlocked_value); | |
1943 | |
1944 // Load Compare Value application register. | |
1945 | |
1946 // Initialize the box. (Must happen before we update the object mark!) | |
1947 std(displaced_header, BasicLock::displaced_header_offset_in_bytes(), box); | |
1948 | |
1949 // Must fence, otherwise, preceding store(s) may float below cmpxchg. | |
1950 // Compare object markOop with mark and if equal exchange scratch1 with object markOop. | |
1951 // CmpxchgX sets cr_reg to cmpX(current, displaced). | |
1952 cmpxchgd(/*flag=*/flag, | |
1953 /*current_value=*/current_header, | |
1954 /*compare_value=*/displaced_header, | |
1955 /*exchange_value=*/box, | |
1956 /*where=*/oop, | |
1957 MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq, | |
1958 MacroAssembler::cmpxchgx_hint_acquire_lock(), | |
1959 noreg, | |
1960 &cas_failed); | |
1961 assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); | |
1962 | |
1963 // If the compare-and-exchange succeeded, then we found an unlocked | |
1964 // object and we have now locked it. | |
1965 b(cont); | |
1966 | |
1967 bind(cas_failed); | |
1968 // We did not see an unlocked object so try the fast recursive case. | |
1969 | |
1970 // Check if the owner is self by comparing the value in the markOop of object | |
1971 // (current_header) with the stack pointer. | |
1972 sub(current_header, current_header, R1_SP); | |
1973 load_const_optimized(temp, (address) (~(os::vm_page_size()-1) | | |
1974 markOopDesc::lock_mask_in_place)); | |
1975 | |
1976 and_(R0/*==0?*/, current_header, temp); | |
1977 // If condition is true we are cont and hence we can store 0 as the | |
1978 // displaced header in the box, which indicates that it is a recursive lock. | |
1979 mcrf(flag,CCR0); | |
1980 std(R0/*==0, perhaps*/, BasicLock::displaced_header_offset_in_bytes(), box); | |
1981 | |
1982 // Handle existing monitor. | |
1983 if ((EmitSync & 0x02) == 0) { | |
1984 b(cont); | |
1985 | |
1986 bind(object_has_monitor); | |
1987 // The object's monitor m is unlocked iff m->owner == NULL, | |
1988 // otherwise m->owner may contain a thread or a stack address. | |
1989 // | |
1990 // Try to CAS m->owner from NULL to current thread. | |
1991 addi(temp, displaced_header, ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value); | |
1992 li(displaced_header, 0); | |
1993 // CmpxchgX sets flag to cmpX(current, displaced). | |
1994 cmpxchgd(/*flag=*/flag, | |
1995 /*current_value=*/current_header, | |
1996 /*compare_value=*/displaced_header, | |
1997 /*exchange_value=*/R16_thread, | |
1998 /*where=*/temp, | |
1999 MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq, | |
2000 MacroAssembler::cmpxchgx_hint_acquire_lock()); | |
2001 | |
2002 // Store a non-null value into the box. | |
2003 std(box, BasicLock::displaced_header_offset_in_bytes(), box); | |
2004 | |
2005 # ifdef ASSERT | |
2006 bne(flag, cont); | |
2007 // We have acquired the monitor, check some invariants. | |
2008 addi(/*monitor=*/temp, temp, -ObjectMonitor::owner_offset_in_bytes()); | |
2009 // Invariant 1: _recursions should be 0. | |
2010 //assert(ObjectMonitor::recursions_size_in_bytes() == 8, "unexpected size"); | |
2011 asm_assert_mem8_is_zero(ObjectMonitor::recursions_offset_in_bytes(), temp, | |
2012 "monitor->_recursions should be 0", -1); | |
2013 // Invariant 2: OwnerIsThread shouldn't be 0. | |
2014 //assert(ObjectMonitor::OwnerIsThread_size_in_bytes() == 4, "unexpected size"); | |
2015 //asm_assert_mem4_isnot_zero(ObjectMonitor::OwnerIsThread_offset_in_bytes(), temp, | |
2016 // "monitor->OwnerIsThread shouldn't be 0", -1); | |
2017 # endif | |
2018 } | |
2019 | |
2020 bind(cont); | |
2021 // flag == EQ indicates success | |
2022 // flag == NE indicates failure | |
2023 } | |
2024 | |
2025 void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Register oop, Register box, | |
2026 Register temp, Register displaced_header, Register current_header) { | |
2027 assert_different_registers(oop, box, temp, displaced_header, current_header); | |
2028 assert(flag != CCR0, "bad condition register"); | |
2029 Label cont; | |
2030 Label object_has_monitor; | |
2031 | |
2032 // Always do locking in runtime. | |
2033 if (EmitSync & 0x01) { | |
2034 cmpdi(flag, oop, 0); // Oop can't be 0 here => always false. | |
2035 return; | |
2036 } | |
2037 | |
2038 if (UseBiasedLocking) { | |
2039 biased_locking_exit(flag, oop, current_header, cont); | |
2040 } | |
2041 | |
2042 // Find the lock address and load the displaced header from the stack. | |
2043 ld(displaced_header, BasicLock::displaced_header_offset_in_bytes(), box); | |
2044 | |
2045 // If the displaced header is 0, we have a recursive unlock. | |
2046 cmpdi(flag, displaced_header, 0); | |
2047 beq(flag, cont); | |
2048 | |
2049 // Handle existing monitor. | |
2050 if ((EmitSync & 0x02) == 0) { | |
2051 // The object has an existing monitor iff (mark & monitor_value) != 0. | |
2052 ld(current_header, oopDesc::mark_offset_in_bytes(), oop); | |
2053 andi(temp, current_header, markOopDesc::monitor_value); | |
2054 cmpdi(flag, temp, 0); | |
2055 bne(flag, object_has_monitor); | |
2056 } | |
2057 | |
2058 | |
2059 // Check if it is still a light weight lock, this is is true if we see | |
2060 // the stack address of the basicLock in the markOop of the object. | |
2061 // Cmpxchg sets flag to cmpd(current_header, box). | |
2062 cmpxchgd(/*flag=*/flag, | |
2063 /*current_value=*/current_header, | |
2064 /*compare_value=*/box, | |
2065 /*exchange_value=*/displaced_header, | |
2066 /*where=*/oop, | |
2067 MacroAssembler::MemBarRel, | |
2068 MacroAssembler::cmpxchgx_hint_release_lock(), | |
2069 noreg, | |
2070 &cont); | |
2071 | |
2072 assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); | |
2073 | |
2074 // Handle existing monitor. | |
2075 if ((EmitSync & 0x02) == 0) { | |
2076 b(cont); | |
2077 | |
2078 bind(object_has_monitor); | |
2079 addi(current_header, current_header, -markOopDesc::monitor_value); // monitor | |
2080 ld(temp, ObjectMonitor::owner_offset_in_bytes(), current_header); | |
2081 ld(displaced_header, ObjectMonitor::recursions_offset_in_bytes(), current_header); | |
2082 xorr(temp, R16_thread, temp); // Will be 0 if we are the owner. | |
2083 orr(temp, temp, displaced_header); // Will be 0 if there are 0 recursions. | |
2084 cmpdi(flag, temp, 0); | |
2085 bne(flag, cont); | |
2086 | |
2087 ld(temp, ObjectMonitor::EntryList_offset_in_bytes(), current_header); | |
2088 ld(displaced_header, ObjectMonitor::cxq_offset_in_bytes(), current_header); | |
2089 orr(temp, temp, displaced_header); // Will be 0 if both are 0. | |
2090 cmpdi(flag, temp, 0); | |
2091 bne(flag, cont); | |
2092 release(); | |
2093 std(temp, ObjectMonitor::owner_offset_in_bytes(), current_header); | |
2094 } | |
2095 | |
2096 bind(cont); | |
2097 // flag == EQ indicates success | |
2098 // flag == NE indicates failure | |
2099 } | |
2100 | |
2101 // Write serialization page so VM thread can do a pseudo remote membar. | |
2102 // We use the current thread pointer to calculate a thread specific | |
2103 // offset to write to within the page. This minimizes bus traffic | |
2104 // due to cache line collision. | |
2105 void MacroAssembler::serialize_memory(Register thread, Register tmp1, Register tmp2) { | |
2106 srdi(tmp2, thread, os::get_serialize_page_shift_count()); | |
2107 | |
2108 int mask = os::vm_page_size() - sizeof(int); | |
2109 if (Assembler::is_simm(mask, 16)) { | |
2110 andi(tmp2, tmp2, mask); | |
2111 } else { | |
2112 lis(tmp1, (int)((signed short) (mask >> 16))); | |
2113 ori(tmp1, tmp1, mask & 0x0000ffff); | |
2114 andr(tmp2, tmp2, tmp1); | |
2115 } | |
2116 | |
2117 load_const(tmp1, (long) os::get_memory_serialize_page()); | |
2118 release(); | |
2119 stwx(R0, tmp1, tmp2); | |
2120 } | |
2121 | |
2122 | |
2123 // GC barrier helper macros | |
2124 | |
2125 // Write the card table byte if needed. | |
2126 void MacroAssembler::card_write_barrier_post(Register Rstore_addr, Register Rnew_val, Register Rtmp) { | |
2127 CardTableModRefBS* bs = (CardTableModRefBS*) Universe::heap()->barrier_set(); | |
2128 assert(bs->kind() == BarrierSet::CardTableModRef || | |
2129 bs->kind() == BarrierSet::CardTableExtension, "wrong barrier"); | |
2130 #ifdef ASSERT | |
2131 cmpdi(CCR0, Rnew_val, 0); | |
2132 asm_assert_ne("null oop not allowed", 0x321); | |
2133 #endif | |
2134 card_table_write(bs->byte_map_base, Rtmp, Rstore_addr); | |
2135 } | |
2136 | |
2137 // Write the card table byte. | |
2138 void MacroAssembler::card_table_write(jbyte* byte_map_base, Register Rtmp, Register Robj) { | |
2139 assert_different_registers(Robj, Rtmp, R0); | |
2140 load_const_optimized(Rtmp, (address)byte_map_base, R0); | |
2141 srdi(Robj, Robj, CardTableModRefBS::card_shift); | |
2142 li(R0, 0); // dirty | |
2143 if (UseConcMarkSweepGC) release(); | |
2144 stbx(R0, Rtmp, Robj); | |
2145 } | |
2146 | |
2147 #ifndef SERIALGC | |
2148 | |
2149 // General G1 pre-barrier generator. | |
2150 // Goal: record the previous value if it is not null. | |
2151 void MacroAssembler::g1_write_barrier_pre(Register Robj, RegisterOrConstant offset, Register Rpre_val, | |
2152 Register Rtmp1, Register Rtmp2, bool needs_frame) { | |
2153 Label runtime, filtered; | |
2154 | |
2155 // Is marking active? | |
2156 if (in_bytes(PtrQueue::byte_width_of_active()) == 4) { | |
2157 lwz(Rtmp1, in_bytes(JavaThread::satb_mark_queue_offset() + PtrQueue::byte_offset_of_active()), R16_thread); | |
2158 } else { | |
2159 guarantee(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption"); | |
2160 lbz(Rtmp1, in_bytes(JavaThread::satb_mark_queue_offset() + PtrQueue::byte_offset_of_active()), R16_thread); | |
2161 } | |
2162 cmpdi(CCR0, Rtmp1, 0); | |
2163 beq(CCR0, filtered); | |
2164 | |
2165 // Do we need to load the previous value? | |
2166 if (Robj != noreg) { | |
2167 // Load the previous value... | |
2168 if (UseCompressedOops) { | |
2169 lwz(Rpre_val, offset, Robj); | |
2170 } else { | |
2171 ld(Rpre_val, offset, Robj); | |
2172 } | |
2173 // Previous value has been loaded into Rpre_val. | |
2174 } | |
2175 assert(Rpre_val != noreg, "must have a real register"); | |
2176 | |
2177 // Is the previous value null? | |
2178 cmpdi(CCR0, Rpre_val, 0); | |
2179 beq(CCR0, filtered); | |
2180 | |
2181 if (Robj != noreg && UseCompressedOops) { | |
2182 decode_heap_oop_not_null(Rpre_val); | |
2183 } | |
2184 | |
2185 // OK, it's not filtered, so we'll need to call enqueue. In the normal | |
2186 // case, pre_val will be a scratch G-reg, but there are some cases in | |
2187 // which it's an O-reg. In the first case, do a normal call. In the | |
2188 // latter, do a save here and call the frameless version. | |
2189 | |
2190 // Can we store original value in the thread's buffer? | |
2191 // Is index == 0? | |
2192 // (The index field is typed as size_t.) | |
2193 const Register Rbuffer = Rtmp1, Rindex = Rtmp2; | |
2194 | |
2195 ld(Rindex, in_bytes(JavaThread::satb_mark_queue_offset() + PtrQueue::byte_offset_of_index()), R16_thread); | |
2196 cmpdi(CCR0, Rindex, 0); | |
2197 beq(CCR0, runtime); // If index == 0, goto runtime. | |
2198 ld(Rbuffer, in_bytes(JavaThread::satb_mark_queue_offset() + PtrQueue::byte_offset_of_buf()), R16_thread); | |
2199 | |
2200 addi(Rindex, Rindex, -wordSize); // Decrement index. | |
2201 std(Rindex, in_bytes(JavaThread::satb_mark_queue_offset() + PtrQueue::byte_offset_of_index()), R16_thread); | |
2202 | |
2203 // Record the previous value. | |
2204 stdx(Rpre_val, Rbuffer, Rindex); | |
2205 b(filtered); | |
2206 | |
2207 bind(runtime); | |
2208 | |
2209 // VM call need frame to access(write) O register. | |
2210 if (needs_frame) { | |
2211 save_LR_CR(Rtmp1); | |
2212 push_frame_abi112(0, Rtmp2); | |
2213 } | |
2214 | |
2215 if (Rpre_val->is_volatile() && Robj == noreg) mr(R31, Rpre_val); // Save pre_val across C call if it was preloaded. | |
2216 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), Rpre_val, R16_thread); | |
2217 if (Rpre_val->is_volatile() && Robj == noreg) mr(Rpre_val, R31); // restore | |
2218 | |
2219 if (needs_frame) { | |
2220 pop_frame(); | |
2221 restore_LR_CR(Rtmp1); | |
2222 } | |
2223 | |
2224 bind(filtered); | |
2225 } | |
2226 | |
2227 // General G1 post-barrier generator | |
2228 // Store cross-region card. | |
2229 void MacroAssembler::g1_write_barrier_post(Register Rstore_addr, Register Rnew_val, Register Rtmp1, Register Rtmp2, Register Rtmp3, Label *filtered_ext) { | |
2230 Label runtime, filtered_int; | |
2231 Label& filtered = (filtered_ext != NULL) ? *filtered_ext : filtered_int; | |
2232 assert_different_registers(Rstore_addr, Rnew_val, Rtmp1, Rtmp2); | |
2233 | |
2234 G1SATBCardTableModRefBS* bs = (G1SATBCardTableModRefBS*) Universe::heap()->barrier_set(); | |
2235 assert(bs->kind() == BarrierSet::G1SATBCT || | |
2236 bs->kind() == BarrierSet::G1SATBCTLogging, "wrong barrier"); | |
2237 | |
2238 // Does store cross heap regions? | |
2239 if (G1RSBarrierRegionFilter) { | |
2240 xorr(Rtmp1, Rstore_addr, Rnew_val); | |
2241 srdi_(Rtmp1, Rtmp1, HeapRegion::LogOfHRGrainBytes); | |
2242 beq(CCR0, filtered); | |
2243 } | |
2244 | |
2245 // Crosses regions, storing NULL? | |
2246 #ifdef ASSERT | |
2247 cmpdi(CCR0, Rnew_val, 0); | |
2248 asm_assert_ne("null oop not allowed (G1)", 0x322); // Checked by caller on PPC64, so following branch is obsolete: | |
2249 //beq(CCR0, filtered); | |
2250 #endif | |
2251 | |
2252 // Storing region crossing non-NULL, is card already dirty? | |
2253 assert(sizeof(*bs->byte_map_base) == sizeof(jbyte), "adjust this code"); | |
2254 const Register Rcard_addr = Rtmp1; | |
2255 Register Rbase = Rtmp2; | |
2256 load_const_optimized(Rbase, (address)bs->byte_map_base, /*temp*/ Rtmp3); | |
2257 | |
2258 srdi(Rcard_addr, Rstore_addr, CardTableModRefBS::card_shift); | |
2259 | |
2260 // Get the address of the card. | |
2261 lbzx(/*card value*/ Rtmp3, Rbase, Rcard_addr); | |
2262 | |
2263 assert(CardTableModRefBS::dirty_card_val() == 0, "otherwise check this code"); | |
2264 cmpwi(CCR0, Rtmp3 /* card value */, 0); | |
2265 beq(CCR0, filtered); | |
2266 | |
2267 // Storing a region crossing, non-NULL oop, card is clean. | |
2268 // Dirty card and log. | |
2269 li(Rtmp3, 0); // dirty | |
2270 //release(); // G1: oops are allowed to get visible after dirty marking. | |
2271 stbx(Rtmp3, Rbase, Rcard_addr); | |
2272 | |
2273 add(Rcard_addr, Rbase, Rcard_addr); // This is the address which needs to get enqueued. | |
2274 Rbase = noreg; // end of lifetime | |
2275 | |
2276 const Register Rqueue_index = Rtmp2, | |
2277 Rqueue_buf = Rtmp3; | |
2278 ld(Rqueue_index, in_bytes(JavaThread::dirty_card_queue_offset() + PtrQueue::byte_offset_of_index()), R16_thread); | |
2279 cmpdi(CCR0, Rqueue_index, 0); | |
2280 beq(CCR0, runtime); // index == 0 then jump to runtime | |
2281 ld(Rqueue_buf, in_bytes(JavaThread::dirty_card_queue_offset() + PtrQueue::byte_offset_of_buf()), R16_thread); | |
2282 | |
2283 addi(Rqueue_index, Rqueue_index, -wordSize); // decrement index | |
2284 std(Rqueue_index, in_bytes(JavaThread::dirty_card_queue_offset() + PtrQueue::byte_offset_of_index()), R16_thread); | |
2285 | |
2286 stdx(Rcard_addr, Rqueue_buf, Rqueue_index); // store card | |
2287 b(filtered); | |
2288 | |
2289 bind(runtime); | |
2290 | |
2291 // Save the live input values. | |
2292 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), Rcard_addr, R16_thread); | |
2293 | |
2294 bind(filtered_int); | |
2295 } | |
2296 #endif // SERIALGC | |
2297 | |
2298 // Values for last_Java_pc, and last_Java_sp must comply to the rules | |
2299 // in frame_ppc64.hpp. | |
2300 void MacroAssembler::set_last_Java_frame(Register last_Java_sp, Register last_Java_pc) { | |
2301 // Always set last_Java_pc and flags first because once last_Java_sp | |
2302 // is visible has_last_Java_frame is true and users will look at the | |
2303 // rest of the fields. (Note: flags should always be zero before we | |
2304 // get here so doesn't need to be set.) | |
2305 | |
2306 // Verify that last_Java_pc was zeroed on return to Java | |
2307 asm_assert_mem8_is_zero(in_bytes(JavaThread::last_Java_pc_offset()), R16_thread, | |
2308 "last_Java_pc not zeroed before leaving Java", 0x200); | |
2309 | |
2310 // When returning from calling out from Java mode the frame anchor's | |
2311 // last_Java_pc will always be set to NULL. It is set here so that | |
2312 // if we are doing a call to native (not VM) that we capture the | |
2313 // known pc and don't have to rely on the native call having a | |
2314 // standard frame linkage where we can find the pc. | |
2315 if (last_Java_pc != noreg) | |
2316 std(last_Java_pc, in_bytes(JavaThread::last_Java_pc_offset()), R16_thread); | |
2317 | |
2318 // set last_Java_sp last | |
2319 std(last_Java_sp, in_bytes(JavaThread::last_Java_sp_offset()), R16_thread); | |
2320 } | |
2321 | |
2322 void MacroAssembler::reset_last_Java_frame(void) { | |
2323 asm_assert_mem8_isnot_zero(in_bytes(JavaThread::last_Java_sp_offset()), | |
2324 R16_thread, "SP was not set, still zero", 0x202); | |
2325 | |
2326 BLOCK_COMMENT("reset_last_Java_frame {"); | |
2327 li(R0, 0); | |
2328 | |
2329 // _last_Java_sp = 0 | |
2330 std(R0, in_bytes(JavaThread::last_Java_sp_offset()), R16_thread); | |
2331 | |
2332 // _last_Java_pc = 0 | |
2333 std(R0, in_bytes(JavaThread::last_Java_pc_offset()), R16_thread); | |
2334 BLOCK_COMMENT("} reset_last_Java_frame"); | |
2335 } | |
2336 | |
2337 void MacroAssembler::set_top_ijava_frame_at_SP_as_last_Java_frame(Register sp, Register tmp1) { | |
2338 assert_different_registers(sp, tmp1); | |
2339 | |
2340 // sp points to a TOP_IJAVA_FRAME, retrieve frame's PC via | |
2341 // TOP_IJAVA_FRAME_ABI. | |
2342 // FIXME: assert that we really have a TOP_IJAVA_FRAME here! | |
2343 #ifdef CC_INTERP | |
2344 ld(tmp1/*pc*/, _top_ijava_frame_abi(frame_manager_lr), sp); | |
2345 #else | |
2346 Unimplemented(); | |
2347 #endif | |
2348 | |
2349 set_last_Java_frame(/*sp=*/sp, /*pc=*/tmp1); | |
2350 } | |
2351 | |
2352 void MacroAssembler::get_vm_result(Register oop_result) { | |
2353 // Read: | |
2354 // R16_thread | |
2355 // R16_thread->in_bytes(JavaThread::vm_result_offset()) | |
2356 // | |
2357 // Updated: | |
2358 // oop_result | |
2359 // R16_thread->in_bytes(JavaThread::vm_result_offset()) | |
2360 | |
2361 ld(oop_result, in_bytes(JavaThread::vm_result_offset()), R16_thread); | |
2362 li(R0, 0); | |
2363 std(R0, in_bytes(JavaThread::vm_result_offset()), R16_thread); | |
2364 | |
2365 verify_oop(oop_result); | |
2366 } | |
2367 | |
2368 void MacroAssembler::get_vm_result_2(Register metadata_result) { | |
2369 // Read: | |
2370 // R16_thread | |
2371 // R16_thread->in_bytes(JavaThread::vm_result_2_offset()) | |
2372 // | |
2373 // Updated: | |
2374 // metadata_result | |
2375 // R16_thread->in_bytes(JavaThread::vm_result_2_offset()) | |
2376 | |
2377 ld(metadata_result, in_bytes(JavaThread::vm_result_2_offset()), R16_thread); | |
2378 li(R0, 0); | |
2379 std(R0, in_bytes(JavaThread::vm_result_2_offset()), R16_thread); | |
2380 } | |
2381 | |
2382 | |
2383 void MacroAssembler::encode_klass_not_null(Register dst, Register src) { | |
2384 if (src == noreg) src = dst; | |
2385 if (Universe::narrow_klass_base() != NULL) { | |
2386 // heapbased | |
2387 assert(Universe::narrow_klass_shift() != 0, "sanity"); | |
2388 sub(dst, src, R30); | |
2389 srdi(dst, dst, Universe::narrow_klass_shift()); | |
2390 } else if (Universe::narrow_klass_shift() != 0) { | |
2391 // zerobased | |
2392 srdi(dst, src, Universe::narrow_klass_shift()); | |
2393 } else if (src != dst) { | |
2394 // unscaled | |
2395 mr(dst, src); | |
2396 } | |
2397 } | |
2398 | |
2399 void MacroAssembler::store_klass(Register dst_oop, Register klass, Register ck) { | |
2400 if (UseCompressedKlassPointers) { | |
2401 encode_klass_not_null(ck, klass); | |
2402 stw(ck, oopDesc::klass_offset_in_bytes(), dst_oop); | |
2403 } else { | |
2404 std(klass, oopDesc::klass_offset_in_bytes(), dst_oop); | |
2405 } | |
2406 } | |
2407 | |
2408 void MacroAssembler::decode_klass_not_null(Register dst, Register src) { | |
2409 if (src == noreg) src = dst; | |
2410 if (Universe::narrow_klass_base() != NULL) { | |
2411 // heapbased | |
2412 assert(Universe::narrow_klass_shift() != 0, "sanity"); | |
2413 sldi(dst, src, Universe::narrow_klass_shift()); | |
2414 add(dst, dst, R30); | |
2415 } else if (Universe::narrow_klass_shift() != 0) { | |
2416 // zerobased | |
2417 sldi(dst, src, Universe::narrow_klass_shift()); | |
2418 } else if (src != dst) { | |
2419 // unscaled | |
2420 mr(dst, src); | |
2421 } | |
2422 } | |
2423 | |
2424 void MacroAssembler::load_klass(Register dst, Register src) { | |
2425 if (UseCompressedKlassPointers) { | |
2426 lwz(dst, oopDesc::klass_offset_in_bytes(), src); | |
2427 // Attention: no null check here! | |
2428 decode_klass_not_null(dst, dst); | |
2429 } else { | |
2430 ld(dst, oopDesc::klass_offset_in_bytes(), src); | |
2431 } | |
2432 } | |
2433 | |
2434 void MacroAssembler::load_klass_with_trap_null_check(Register dst, Register src) { | |
2435 if (false NOT_LINUX(|| true) /*!os::zero_page_read_protected()*/) { | |
2436 if (TrapBasedNullChecks) { | |
2437 trap_null_check(src); | |
2438 } | |
2439 } | |
2440 load_klass(dst, src); | |
2441 } | |
2442 | |
2443 void MacroAssembler::reinit_heapbase(Register d, Register tmp) { | |
2444 if (UseCompressedOops || UseCompressedKlassPointers) { | |
2445 load_const(R30, Universe::narrow_ptrs_base_addr(), tmp); | |
2446 ld(R30, 0, R30); | |
2447 } | |
2448 } | |
2449 | |
2450 /////////////////////////////////////////// String intrinsics //////////////////////////////////////////// | |
2451 | |
2452 // Search for a single jchar in an jchar[]. | |
2453 // | |
2454 // Assumes that result differs from all other registers. | |
2455 // | |
2456 // Haystack, needle are the addresses of jchar-arrays. | |
2457 // NeedleChar is needle[0] if it is known at compile time. | |
2458 // Haycnt is the length of the haystack. We assume haycnt >=1. | |
2459 // | |
2460 // Preserves haystack, haycnt, kills all other registers. | |
2461 // | |
2462 // If needle == R0, we search for the constant needleChar. | |
2463 void MacroAssembler::string_indexof_1(Register result, Register haystack, Register haycnt, | |
2464 Register needle, jchar needleChar, | |
2465 Register tmp1, Register tmp2) { | |
2466 | |
2467 assert_different_registers(result, haystack, haycnt, needle, tmp1, tmp2); | |
2468 | |
2469 Label L_InnerLoop, L_FinalCheck, L_Found1, L_Found2, L_Found3, L_NotFound, L_End; | |
2470 Register needle0 = needle, // Contains needle[0]. | |
2471 addr = tmp1, | |
2472 ch1 = tmp2, | |
2473 ch2 = R0; | |
2474 | |
2475 //2 (variable) or 3 (const): | |
2476 if (needle != R0) lhz(needle0, 0, needle); // Preload needle character, needle has len==1. | |
2477 dcbtct(haystack, 0x00); // Indicate R/O access to haystack. | |
2478 | |
2479 srwi_(tmp2, haycnt, 1); // Shift right by exact_log2(UNROLL_FACTOR). | |
2480 mr(addr, haystack); | |
2481 beq(CCR0, L_FinalCheck); | |
2482 mtctr(tmp2); // Move to count register. | |
2483 //8: | |
2484 bind(L_InnerLoop); // Main work horse (2x unrolled search loop). | |
2485 lhz(ch1, 0, addr); // Load characters from haystack. | |
2486 lhz(ch2, 2, addr); | |
2487 (needle != R0) ? cmpw(CCR0, ch1, needle0) : cmplwi(CCR0, ch1, needleChar); | |
2488 (needle != R0) ? cmpw(CCR1, ch2, needle0) : cmplwi(CCR1, ch2, needleChar); | |
2489 beq(CCR0, L_Found1); // Did we find the needle? | |
2490 beq(CCR1, L_Found2); | |
2491 addi(addr, addr, 4); | |
2492 bdnz(L_InnerLoop); | |
2493 //16: | |
2494 bind(L_FinalCheck); | |
2495 andi_(R0, haycnt, 1); | |
2496 beq(CCR0, L_NotFound); | |
2497 lhz(ch1, 0, addr); // One position left at which we have to compare. | |
2498 (needle != R0) ? cmpw(CCR1, ch1, needle0) : cmplwi(CCR1, ch1, needleChar); | |
2499 beq(CCR1, L_Found3); | |
2500 //21: | |
2501 bind(L_NotFound); | |
2502 li(result, -1); // Not found. | |
2503 b(L_End); | |
2504 | |
2505 bind(L_Found2); | |
2506 addi(addr, addr, 2); | |
2507 //24: | |
2508 bind(L_Found1); | |
2509 bind(L_Found3); // Return index ... | |
2510 subf(addr, haystack, addr); // relative to haystack, | |
2511 srdi(result, addr, 1); // in characters. | |
2512 bind(L_End); | |
2513 } | |
2514 | |
2515 | |
2516 // Implementation of IndexOf for jchar arrays. | |
2517 // | |
2518 // The length of haystack and needle are not constant, i.e. passed in a register. | |
2519 // | |
2520 // Preserves registers haystack, needle. | |
2521 // Kills registers haycnt, needlecnt. | |
2522 // Assumes that result differs from all other registers. | |
2523 // Haystack, needle are the addresses of jchar-arrays. | |
2524 // Haycnt, needlecnt are the lengths of them, respectively. | |
2525 // | |
2526 // Needlecntval must be zero or 15-bit unsigned immediate and > 1. | |
2527 void MacroAssembler::string_indexof(Register result, Register haystack, Register haycnt, | |
2528 Register needle, ciTypeArray* needle_values, Register needlecnt, int needlecntval, | |
2529 Register tmp1, Register tmp2, Register tmp3, Register tmp4) { | |
2530 | |
2531 // Ensure 0<needlecnt<=haycnt in ideal graph as prerequisite! | |
2532 Label L_TooShort, L_Found, L_NotFound, L_End; | |
2533 Register last_addr = haycnt, // Kill haycnt at the beginning. | |
2534 addr = tmp1, | |
2535 n_start = tmp2, | |
2536 ch1 = tmp3, | |
2537 ch2 = R0; | |
2538 | |
2539 // ************************************************************************************************** | |
2540 // Prepare for main loop: optimized for needle count >=2, bail out otherwise. | |
2541 // ************************************************************************************************** | |
2542 | |
2543 //1 (variable) or 3 (const): | |
2544 dcbtct(needle, 0x00); // Indicate R/O access to str1. | |
2545 dcbtct(haystack, 0x00); // Indicate R/O access to str2. | |
2546 | |
2547 // Compute last haystack addr to use if no match gets found. | |
2548 if (needlecntval == 0) { // variable needlecnt | |
2549 //3: | |
2550 subf(ch1, needlecnt, haycnt); // Last character index to compare is haycnt-needlecnt. | |
2551 addi(addr, haystack, -2); // Accesses use pre-increment. | |
2552 cmpwi(CCR6, needlecnt, 2); | |
2553 blt(CCR6, L_TooShort); // Variable needlecnt: handle short needle separately. | |
2554 slwi(ch1, ch1, 1); // Scale to number of bytes. | |
2555 lwz(n_start, 0, needle); // Load first 2 characters of needle. | |
2556 add(last_addr, haystack, ch1); // Point to last address to compare (haystack+2*(haycnt-needlecnt)). | |
2557 addi(needlecnt, needlecnt, -2); // Rest of needle. | |
2558 } else { // constant needlecnt | |
2559 guarantee(needlecntval != 1, "IndexOf with single-character needle must be handled separately"); | |
2560 assert((needlecntval & 0x7fff) == needlecntval, "wrong immediate"); | |
2561 //5: | |
2562 addi(ch1, haycnt, -needlecntval); // Last character index to compare is haycnt-needlecnt. | |
2563 lwz(n_start, 0, needle); // Load first 2 characters of needle. | |
2564 addi(addr, haystack, -2); // Accesses use pre-increment. | |
2565 slwi(ch1, ch1, 1); // Scale to number of bytes. | |
2566 add(last_addr, haystack, ch1); // Point to last address to compare (haystack+2*(haycnt-needlecnt)). | |
2567 li(needlecnt, needlecntval-2); // Rest of needle. | |
2568 } | |
2569 | |
2570 // Main Loop (now we have at least 3 characters). | |
2571 //11: | |
2572 Label L_OuterLoop, L_InnerLoop, L_FinalCheck, L_Comp1, L_Comp2, L_Comp3; | |
2573 bind(L_OuterLoop); // Search for 1st 2 characters. | |
2574 Register addr_diff = tmp4; | |
2575 subf(addr_diff, addr, last_addr); // Difference between already checked address and last address to check. | |
2576 addi(addr, addr, 2); // This is the new address we want to use for comparing. | |
2577 srdi_(ch2, addr_diff, 2); | |
2578 beq(CCR0, L_FinalCheck); // 2 characters left? | |
2579 mtctr(ch2); // addr_diff/4 | |
2580 //16: | |
2581 bind(L_InnerLoop); // Main work horse (2x unrolled search loop) | |
2582 lwz(ch1, 0, addr); // Load 2 characters of haystack (ignore alignment). | |
2583 lwz(ch2, 2, addr); | |
2584 cmpw(CCR0, ch1, n_start); // Compare 2 characters (1 would be sufficient but try to reduce branches to CompLoop). | |
2585 cmpw(CCR1, ch2, n_start); | |
2586 beq(CCR0, L_Comp1); // Did we find the needle start? | |
2587 beq(CCR1, L_Comp2); | |
2588 addi(addr, addr, 4); | |
2589 bdnz(L_InnerLoop); | |
2590 //24: | |
2591 bind(L_FinalCheck); | |
2592 rldicl_(addr_diff, addr_diff, 64-1, 63); // Remaining characters not covered by InnerLoop: (addr_diff>>1)&1. | |
2593 beq(CCR0, L_NotFound); | |
2594 lwz(ch1, 0, addr); // One position left at which we have to compare. | |
2595 cmpw(CCR1, ch1, n_start); | |
2596 beq(CCR1, L_Comp3); | |
2597 //29: | |
2598 bind(L_NotFound); | |
2599 li(result, -1); // not found | |
2600 b(L_End); | |
2601 | |
2602 | |
2603 // ************************************************************************************************** | |
2604 // Special Case: unfortunately, the variable needle case can be called with needlecnt<2 | |
2605 // ************************************************************************************************** | |
2606 //31: | |
2607 if ((needlecntval>>1) !=1 ) { // Const needlecnt is 2 or 3? Reduce code size. | |
2608 int nopcnt = 5; | |
2609 if (needlecntval !=0 ) ++nopcnt; // Balance alignment (other case: see below). | |
2610 if (needlecntval == 0) { // We have to handle these cases separately. | |
2611 Label L_OneCharLoop; | |
2612 bind(L_TooShort); | |
2613 mtctr(haycnt); | |
2614 lhz(n_start, 0, needle); // First character of needle | |
2615 bind(L_OneCharLoop); | |
2616 lhzu(ch1, 2, addr); | |
2617 cmpw(CCR1, ch1, n_start); | |
2618 beq(CCR1, L_Found); // Did we find the one character needle? | |
2619 bdnz(L_OneCharLoop); | |
2620 li(result, -1); // Not found. | |
2621 b(L_End); | |
2622 } // 8 instructions, so no impact on alignment. | |
2623 for (int x = 0; x < nopcnt; ++x) nop(); | |
2624 } | |
2625 | |
2626 // ************************************************************************************************** | |
2627 // Regular Case Part II: compare rest of needle (first 2 characters have been compared already) | |
2628 // ************************************************************************************************** | |
2629 | |
2630 // Compare the rest | |
2631 //36 if needlecntval==0, else 37: | |
2632 bind(L_Comp2); | |
2633 addi(addr, addr, 2); // First comparison has failed, 2nd one hit. | |
2634 bind(L_Comp1); // Addr points to possible needle start. | |
2635 bind(L_Comp3); // Could have created a copy and use a different return address but saving code size here. | |
2636 if (needlecntval != 2) { // Const needlecnt==2? | |
2637 if (needlecntval != 3) { | |
2638 if (needlecntval == 0) beq(CCR6, L_Found); // Variable needlecnt==2? | |
2639 Register ind_reg = tmp4; | |
2640 li(ind_reg, 2*2); // First 2 characters are already compared, use index 2. | |
2641 mtctr(needlecnt); // Decremented by 2, still > 0. | |
2642 //40: | |
2643 Label L_CompLoop; | |
2644 bind(L_CompLoop); | |
2645 lhzx(ch2, needle, ind_reg); | |
2646 lhzx(ch1, addr, ind_reg); | |
2647 cmpw(CCR1, ch1, ch2); | |
2648 bne(CCR1, L_OuterLoop); | |
2649 addi(ind_reg, ind_reg, 2); | |
2650 bdnz(L_CompLoop); | |
2651 } else { // No loop required if there's only one needle character left. | |
2652 lhz(ch2, 2*2, needle); | |
2653 lhz(ch1, 2*2, addr); | |
2654 cmpw(CCR1, ch1, ch2); | |
2655 bne(CCR1, L_OuterLoop); | |
2656 } | |
2657 } | |
2658 // Return index ... | |
2659 //46: | |
2660 bind(L_Found); | |
2661 subf(addr, haystack, addr); // relative to haystack, ... | |
2662 srdi(result, addr, 1); // in characters. | |
2663 //48: | |
2664 bind(L_End); | |
2665 } | |
2666 | |
2667 // Implementation of Compare for jchar arrays. | |
2668 // | |
2669 // Kills the registers str1, str2, cnt1, cnt2. | |
2670 // Kills cr0, ctr. | |
2671 // Assumes that result differes from the input registers. | |
2672 void MacroAssembler::string_compare(Register str1_reg, Register str2_reg, Register cnt1_reg, Register cnt2_reg, | |
2673 Register result_reg, Register tmp_reg) { | |
2674 assert_different_registers(result_reg, str1_reg, str2_reg, cnt1_reg, cnt2_reg, tmp_reg); | |
2675 | |
2676 Label Ldone, Lslow_case, Lslow_loop, Lfast_loop; | |
2677 Register cnt_diff = R0, | |
2678 limit_reg = cnt1_reg, | |
2679 chr1_reg = result_reg, | |
2680 chr2_reg = cnt2_reg, | |
2681 addr_diff = str2_reg; | |
2682 | |
2683 // Offset 0 should be 32 byte aligned. | |
2684 //-4: | |
2685 dcbtct(str1_reg, 0x00); // Indicate R/O access to str1. | |
2686 dcbtct(str2_reg, 0x00); // Indicate R/O access to str2. | |
2687 //-2: | |
2688 // Compute min(cnt1, cnt2) and check if 0 (bail out if we don't need to compare characters). | |
2689 subf(result_reg, cnt2_reg, cnt1_reg); // difference between cnt1/2 | |
2690 subf_(addr_diff, str1_reg, str2_reg); // alias? | |
2691 beq(CCR0, Ldone); // return cnt difference if both ones are identical | |
2692 srawi(limit_reg, result_reg, 31); // generate signmask (cnt1/2 must be non-negative so cnt_diff can't overflow) | |
2693 mr(cnt_diff, result_reg); | |
2694 andr(limit_reg, result_reg, limit_reg); // difference or zero (negative): cnt1<cnt2 ? cnt1-cnt2 : 0 | |
2695 add_(limit_reg, cnt2_reg, limit_reg); // min(cnt1, cnt2)==0? | |
2696 beq(CCR0, Ldone); // return cnt difference if one has 0 length | |
2697 | |
2698 lhz(chr1_reg, 0, str1_reg); // optional: early out if first characters mismatch | |
2699 lhzx(chr2_reg, str1_reg, addr_diff); // optional: early out if first characters mismatch | |
2700 addi(tmp_reg, limit_reg, -1); // min(cnt1, cnt2)-1 | |
2701 subf_(result_reg, chr2_reg, chr1_reg); // optional: early out if first characters mismatch | |
2702 bne(CCR0, Ldone); // optional: early out if first characters mismatch | |
2703 | |
2704 // Set loop counter by scaling down tmp_reg | |
2705 srawi_(chr2_reg, tmp_reg, exact_log2(4)); // (min(cnt1, cnt2)-1)/4 | |
2706 ble(CCR0, Lslow_case); // need >4 characters for fast loop | |
2707 andi(limit_reg, tmp_reg, 4-1); // remaining characters | |
2708 | |
2709 // Adapt str1_reg str2_reg for the first loop iteration | |
2710 mtctr(chr2_reg); // (min(cnt1, cnt2)-1)/4 | |
2711 addi(limit_reg, limit_reg, 4+1); // compare last 5-8 characters in slow_case if mismatch found in fast_loop | |
2712 //16: | |
2713 // Compare the rest of the characters | |
2714 bind(Lfast_loop); | |
2715 ld(chr1_reg, 0, str1_reg); | |
2716 ldx(chr2_reg, str1_reg, addr_diff); | |
2717 cmpd(CCR0, chr2_reg, chr1_reg); | |
2718 bne(CCR0, Lslow_case); // return chr1_reg | |
2719 addi(str1_reg, str1_reg, 4*2); | |
2720 bdnz(Lfast_loop); | |
2721 addi(limit_reg, limit_reg, -4); // no mismatch found in fast_loop, only 1-4 characters missing | |
2722 //23: | |
2723 bind(Lslow_case); | |
2724 mtctr(limit_reg); | |
2725 //24: | |
2726 bind(Lslow_loop); | |
2727 lhz(chr1_reg, 0, str1_reg); | |
2728 lhzx(chr2_reg, str1_reg, addr_diff); | |
2729 subf_(result_reg, chr2_reg, chr1_reg); | |
2730 bne(CCR0, Ldone); // return chr1_reg | |
2731 addi(str1_reg, str1_reg, 1*2); | |
2732 bdnz(Lslow_loop); | |
2733 //30: | |
2734 // If strings are equal up to min length, return the length difference. | |
2735 mr(result_reg, cnt_diff); | |
2736 nop(); // alignment | |
2737 //32: | |
2738 // Otherwise, return the difference between the first mismatched chars. | |
2739 bind(Ldone); | |
2740 } | |
2741 | |
2742 | |
2743 // Compare char[] arrays. | |
2744 // | |
2745 // str1_reg USE only | |
2746 // str2_reg USE only | |
2747 // cnt_reg USE_DEF, due to tmp reg shortage | |
2748 // result_reg DEF only, might compromise USE only registers | |
2749 void MacroAssembler::char_arrays_equals(Register str1_reg, Register str2_reg, Register cnt_reg, Register result_reg, | |
2750 Register tmp1_reg, Register tmp2_reg, Register tmp3_reg, Register tmp4_reg, | |
2751 Register tmp5_reg) { | |
2752 | |
2753 // Str1 may be the same register as str2 which can occur e.g. after scalar replacement. | |
2754 assert_different_registers(result_reg, str1_reg, cnt_reg, tmp1_reg, tmp2_reg, tmp3_reg, tmp4_reg, tmp5_reg); | |
2755 assert_different_registers(result_reg, str2_reg, cnt_reg, tmp1_reg, tmp2_reg, tmp3_reg, tmp4_reg, tmp5_reg); | |
2756 | |
2757 // Offset 0 should be 32 byte aligned. | |
2758 Label Linit_cbc, Lcbc, Lloop, Ldone_true, Ldone_false; | |
2759 Register index_reg = tmp5_reg; | |
2760 Register cbc_iter = tmp4_reg; | |
2761 | |
2762 //-1: | |
2763 dcbtct(str1_reg, 0x00); // Indicate R/O access to str1. | |
2764 dcbtct(str2_reg, 0x00); // Indicate R/O access to str2. | |
2765 //1: | |
2766 andi(cbc_iter, cnt_reg, 4-1); // Remaining iterations after 4 java characters per iteration loop. | |
2767 li(index_reg, 0); // init | |
2768 li(result_reg, 0); // assume false | |
2769 srwi_(tmp2_reg, cnt_reg, exact_log2(4)); // Div: 4 java characters per iteration (main loop). | |
2770 | |
2771 cmpwi(CCR1, cbc_iter, 0); // CCR1 = (cbc_iter==0) | |
2772 beq(CCR0, Linit_cbc); // too short | |
2773 mtctr(tmp2_reg); | |
2774 //8: | |
2775 bind(Lloop); | |
2776 ldx(tmp1_reg, str1_reg, index_reg); | |
2777 ldx(tmp2_reg, str2_reg, index_reg); | |
2778 cmpd(CCR0, tmp1_reg, tmp2_reg); | |
2779 bne(CCR0, Ldone_false); // Unequal char pair found -> done. | |
2780 addi(index_reg, index_reg, 4*sizeof(jchar)); | |
2781 bdnz(Lloop); | |
2782 //14: | |
2783 bind(Linit_cbc); | |
2784 beq(CCR1, Ldone_true); | |
2785 mtctr(cbc_iter); | |
2786 //16: | |
2787 bind(Lcbc); | |
2788 lhzx(tmp1_reg, str1_reg, index_reg); | |
2789 lhzx(tmp2_reg, str2_reg, index_reg); | |
2790 cmpw(CCR0, tmp1_reg, tmp2_reg); | |
2791 bne(CCR0, Ldone_false); // Unequal char pair found -> done. | |
2792 addi(index_reg, index_reg, 1*sizeof(jchar)); | |
2793 bdnz(Lcbc); | |
2794 nop(); | |
2795 bind(Ldone_true); | |
2796 li(result_reg, 1); | |
2797 //24: | |
2798 bind(Ldone_false); | |
2799 } | |
2800 | |
2801 | |
2802 void MacroAssembler::char_arrays_equalsImm(Register str1_reg, Register str2_reg, int cntval, Register result_reg, | |
2803 Register tmp1_reg, Register tmp2_reg) { | |
2804 // Str1 may be the same register as str2 which can occur e.g. after scalar replacement. | |
2805 assert_different_registers(result_reg, str1_reg, tmp1_reg, tmp2_reg); | |
2806 assert_different_registers(result_reg, str2_reg, tmp1_reg, tmp2_reg); | |
2807 assert(sizeof(jchar) == 2, "must be"); | |
2808 assert(cntval >= 0 && ((cntval & 0x7fff) == cntval), "wrong immediate"); | |
2809 | |
2810 Label Ldone_false; | |
2811 | |
2812 if (cntval < 16) { // short case | |
2813 if (cntval != 0) li(result_reg, 0); // assume false | |
2814 | |
2815 const int num_bytes = cntval*sizeof(jchar); | |
2816 int index = 0; | |
2817 for (int next_index; (next_index = index + 8) <= num_bytes; index = next_index) { | |
2818 ld(tmp1_reg, index, str1_reg); | |
2819 ld(tmp2_reg, index, str2_reg); | |
2820 cmpd(CCR0, tmp1_reg, tmp2_reg); | |
2821 bne(CCR0, Ldone_false); | |
2822 } | |
2823 if (cntval & 2) { | |
2824 lwz(tmp1_reg, index, str1_reg); | |
2825 lwz(tmp2_reg, index, str2_reg); | |
2826 cmpw(CCR0, tmp1_reg, tmp2_reg); | |
2827 bne(CCR0, Ldone_false); | |
2828 index += 4; | |
2829 } | |
2830 if (cntval & 1) { | |
2831 lhz(tmp1_reg, index, str1_reg); | |
2832 lhz(tmp2_reg, index, str2_reg); | |
2833 cmpw(CCR0, tmp1_reg, tmp2_reg); | |
2834 bne(CCR0, Ldone_false); | |
2835 } | |
2836 // fallthrough: true | |
2837 } else { | |
2838 Label Lloop; | |
2839 Register index_reg = tmp1_reg; | |
2840 const int loopcnt = cntval/4; | |
2841 assert(loopcnt > 0, "must be"); | |
2842 // Offset 0 should be 32 byte aligned. | |
2843 //2: | |
2844 dcbtct(str1_reg, 0x00); // Indicate R/O access to str1. | |
2845 dcbtct(str2_reg, 0x00); // Indicate R/O access to str2. | |
2846 li(tmp2_reg, loopcnt); | |
2847 li(index_reg, 0); // init | |
2848 li(result_reg, 0); // assume false | |
2849 mtctr(tmp2_reg); | |
2850 //8: | |
2851 bind(Lloop); | |
2852 ldx(R0, str1_reg, index_reg); | |
2853 ldx(tmp2_reg, str2_reg, index_reg); | |
2854 cmpd(CCR0, R0, tmp2_reg); | |
2855 bne(CCR0, Ldone_false); // Unequal char pair found -> done. | |
2856 addi(index_reg, index_reg, 4*sizeof(jchar)); | |
2857 bdnz(Lloop); | |
2858 //14: | |
2859 if (cntval & 2) { | |
2860 lwzx(R0, str1_reg, index_reg); | |
2861 lwzx(tmp2_reg, str2_reg, index_reg); | |
2862 cmpw(CCR0, R0, tmp2_reg); | |
2863 bne(CCR0, Ldone_false); | |
2864 if (cntval & 1) addi(index_reg, index_reg, 2*sizeof(jchar)); | |
2865 } | |
2866 if (cntval & 1) { | |
2867 lhzx(R0, str1_reg, index_reg); | |
2868 lhzx(tmp2_reg, str2_reg, index_reg); | |
2869 cmpw(CCR0, R0, tmp2_reg); | |
2870 bne(CCR0, Ldone_false); | |
2871 } | |
2872 // fallthru: true | |
2873 } | |
2874 li(result_reg, 1); | |
2875 bind(Ldone_false); | |
2876 } | |
2877 | |
2878 | |
2879 void MacroAssembler::asm_assert(bool check_equal, const char *msg, int id) { | |
2880 #ifdef ASSERT | |
2881 Label ok; | |
2882 if (check_equal) { | |
2883 beq(CCR0, ok); | |
2884 } else { | |
2885 bne(CCR0, ok); | |
2886 } | |
2887 stop(msg, id); | |
2888 bind(ok); | |
2889 #endif | |
2890 } | |
2891 | |
2892 void MacroAssembler::asm_assert_mems_zero(bool check_equal, int size, int mem_offset, | |
2893 Register mem_base, const char* msg, int id) { | |
2894 #ifdef ASSERT | |
2895 switch (size) { | |
2896 case 4: | |
2897 lwz(R0, mem_offset, mem_base); | |
2898 cmpwi(CCR0, R0, 0); | |
2899 break; | |
2900 case 8: | |
2901 ld(R0, mem_offset, mem_base); | |
2902 cmpdi(CCR0, R0, 0); | |
2903 break; | |
2904 default: | |
2905 ShouldNotReachHere(); | |
2906 } | |
2907 asm_assert(check_equal, msg, id); | |
2908 #endif // ASSERT | |
2909 } | |
2910 | |
2911 void MacroAssembler::verify_thread() { | |
2912 if (VerifyThread) { | |
2913 unimplemented("'VerifyThread' currently not implemented on PPC"); | |
2914 } | |
2915 } | |
2916 | |
2917 // READ: oop. KILL: R0. Volatile floats perhaps. | |
2918 void MacroAssembler::verify_oop(Register oop, const char* msg) { | |
2919 if (!VerifyOops) { | |
2920 return; | |
2921 } | |
2922 // will be preserved. | |
2923 Register tmp = R11; | |
2924 assert(oop != tmp, "precondition"); | |
2925 unsigned int nbytes_save = 10*8; // 10 volatile gprs | |
2926 address/* FunctionDescriptor** */fd = | |
2927 StubRoutines::verify_oop_subroutine_entry_address(); | |
2928 // save tmp | |
2929 mr(R0, tmp); | |
2930 // kill tmp | |
2931 save_LR_CR(tmp); | |
2932 push_frame_abi112(nbytes_save, tmp); | |
2933 // restore tmp | |
2934 mr(tmp, R0); | |
2935 save_volatile_gprs(R1_SP, 112); // except R0 | |
2936 // load FunctionDescriptor** | |
2937 load_const(tmp, fd); | |
2938 // load FunctionDescriptor* | |
2939 ld(tmp, 0, tmp); | |
2940 mr(R4_ARG2, oop); | |
2941 load_const(R3_ARG1, (address)msg); | |
2942 // call destination for its side effect | |
2943 call_c(tmp); | |
2944 restore_volatile_gprs(R1_SP, 112); // except R0 | |
2945 pop_frame(); | |
2946 // save tmp | |
2947 mr(R0, tmp); | |
2948 // kill tmp | |
2949 restore_LR_CR(tmp); | |
2950 // restore tmp | |
2951 mr(tmp, R0); | |
2952 } | |
2953 | |
2954 const char* stop_types[] = { | |
2955 "stop", | |
2956 "untested", | |
2957 "unimplemented", | |
2958 "shouldnotreachhere" | |
2959 }; | |
2960 | |
2961 static void stop_on_request(int tp, const char* msg) { | |
2962 tty->print("PPC assembly code requires stop: (%s) %s\n", (void *)stop_types[tp%/*stop_end*/4], msg); | |
2963 guarantee(false, err_msg("PPC assembly code requires stop: %s", msg)); | |
2964 } | |
2965 | |
2966 // Call a C-function that prints output. | |
2967 void MacroAssembler::stop(int type, const char* msg, int id) { | |
2968 #ifndef PRODUCT | |
2969 block_comment(err_msg("stop: %s %s {", stop_types[type%stop_end], msg)); | |
2970 #else | |
2971 block_comment("stop {"); | |
2972 #endif | |
2973 | |
2974 // setup arguments | |
2975 load_const_optimized(R3_ARG1, type); | |
2976 load_const_optimized(R4_ARG2, (void *)msg, /*tmp=*/R0); | |
2977 call_VM_leaf(CAST_FROM_FN_PTR(address, stop_on_request), R3_ARG1, R4_ARG2); | |
2978 illtrap(); | |
2979 emit_int32(id); | |
2980 block_comment("} stop;"); | |
2981 } | |
2982 | |
2983 #ifndef PRODUCT | |
2984 // Write pattern 0x0101010101010101 in memory region [low-before, high+after]. | |
2985 // Val, addr are temp registers. | |
2986 // If low == addr, addr is killed. | |
2987 // High is preserved. | |
2988 void MacroAssembler::zap_from_to(Register low, int before, Register high, int after, Register val, Register addr) { | |
2989 if (!ZapMemory) return; | |
2990 | |
2991 assert_different_registers(low, val); | |
2992 | |
2993 BLOCK_COMMENT("zap memory region {"); | |
2994 load_const_optimized(val, 0x0101010101010101); | |
2995 int size = before + after; | |
2996 if (low == high && size < 5 && size > 0) { | |
2997 int offset = -before*BytesPerWord; | |
2998 for (int i = 0; i < size; ++i) { | |
2999 std(val, offset, low); | |
3000 offset += (1*BytesPerWord); | |
3001 } | |
3002 } else { | |
3003 addi(addr, low, -before*BytesPerWord); | |
3004 assert_different_registers(high, val); | |
3005 if (after) addi(high, high, after * BytesPerWord); | |
3006 Label loop; | |
3007 bind(loop); | |
3008 std(val, 0, addr); | |
3009 addi(addr, addr, 8); | |
3010 cmpd(CCR6, addr, high); | |
3011 ble(CCR6, loop); | |
3012 if (after) addi(high, high, -after * BytesPerWord); // Correct back to old value. | |
3013 } | |
3014 BLOCK_COMMENT("} zap memory region"); | |
3015 } | |
3016 | |
3017 #endif // !PRODUCT |