Mercurial > hg > truffle
comparison src/cpu/ppc/vm/macroAssembler_ppc.cpp @ 14445:67fa91961822
8029940: PPC64 (part 122): C2 compiler port
Reviewed-by: kvn
author | goetz |
---|---|
date | Wed, 11 Dec 2013 00:06:11 +0100 |
parents | b0133e4187d3 |
children | b858620b0081 |
comparison
equal
deleted
inserted
replaced
14444:492e67693373 | 14445:67fa91961822 |
---|---|
95 case 1: stb(dst, offs, base); break; | 95 case 1: stb(dst, offs, base); break; |
96 default: ShouldNotReachHere(); | 96 default: ShouldNotReachHere(); |
97 } | 97 } |
98 } | 98 } |
99 | 99 |
100 void MacroAssembler::align(int modulus) { | 100 void MacroAssembler::align(int modulus, int max, int rem) { |
101 while (offset() % modulus != 0) nop(); | 101 int padding = (rem + modulus - (offset() % modulus)) % modulus; |
102 if (padding > max) return; | |
103 for (int c = (padding >> 2); c > 0; --c) { nop(); } | |
102 } | 104 } |
103 | 105 |
104 // Issue instructions that calculate given TOC from global TOC. | 106 // Issue instructions that calculate given TOC from global TOC. |
105 void MacroAssembler::calculate_address_from_global_toc(Register dst, address addr, bool hi16, bool lo16, | 107 void MacroAssembler::calculate_address_from_global_toc(Register dst, address addr, bool hi16, bool lo16, |
106 bool add_relocation, bool emit_dummy_addr) { | 108 bool add_relocation, bool emit_dummy_addr) { |
184 } | 186 } |
185 } | 187 } |
186 | 188 |
187 #ifdef _LP64 | 189 #ifdef _LP64 |
188 // Patch compressed oops or klass constants. | 190 // Patch compressed oops or klass constants. |
191 // Assembler sequence is | |
192 // 1) compressed oops: | |
193 // lis rx = const.hi | |
194 // ori rx = rx | const.lo | |
195 // 2) compressed klass: | |
196 // lis rx = const.hi | |
197 // clrldi rx = rx & 0xFFFFffff // clearMS32b, optional | |
198 // ori rx = rx | const.lo | |
199 // Clrldi will be passed by. | |
189 int MacroAssembler::patch_set_narrow_oop(address a, address bound, narrowOop data) { | 200 int MacroAssembler::patch_set_narrow_oop(address a, address bound, narrowOop data) { |
190 assert(UseCompressedOops, "Should only patch compressed oops"); | 201 assert(UseCompressedOops, "Should only patch compressed oops"); |
191 | 202 |
192 const address inst2_addr = a; | 203 const address inst2_addr = a; |
193 const int inst2 = *(int *)inst2_addr; | 204 const int inst2 = *(int *)inst2_addr; |
194 | 205 |
195 // The relocation points to the second instruction, the addi, | 206 // The relocation points to the second instruction, the ori, |
196 // and the addi reads and writes the same register dst. | 207 // and the ori reads and writes the same register dst. |
197 const int dst = inv_rt_field(inst2); | 208 const int dst = inv_rta_field(inst2); |
198 assert(is_addi(inst2) && inv_ra_field(inst2) == dst, "must be addi reading and writing dst"); | 209 assert(is_ori(inst2) && inv_rs_field(inst2) == dst, "must be addi reading and writing dst"); |
199 // Now, find the preceding addis which writes to dst. | 210 // Now, find the preceding addis which writes to dst. |
200 int inst1 = 0; | 211 int inst1 = 0; |
201 address inst1_addr = inst2_addr - BytesPerInstWord; | 212 address inst1_addr = inst2_addr - BytesPerInstWord; |
202 bool inst1_found = false; | 213 bool inst1_found = false; |
203 while (inst1_addr >= bound) { | 214 while (inst1_addr >= bound) { |
208 assert(inst1_found, "inst is not lis"); | 219 assert(inst1_found, "inst is not lis"); |
209 | 220 |
210 int xc = (data >> 16) & 0xffff; | 221 int xc = (data >> 16) & 0xffff; |
211 int xd = (data >> 0) & 0xffff; | 222 int xd = (data >> 0) & 0xffff; |
212 | 223 |
213 set_imm((int *)inst1_addr,((short)(xc + ((xd & 0x8000) != 0 ? 1 : 0)))); // see enc_load_con_narrow1/2 | 224 set_imm((int *)inst1_addr, (short)(xc)); // see enc_load_con_narrow_hi/_lo |
214 set_imm((int *)inst2_addr, (short)(xd)); | 225 set_imm((int *)inst2_addr, (short)(xd)); |
226 | |
215 return (int)((intptr_t)inst2_addr - (intptr_t)inst1_addr); | 227 return (int)((intptr_t)inst2_addr - (intptr_t)inst1_addr); |
216 } | 228 } |
217 | 229 |
218 // Get compressed oop or klass constant. | 230 // Get compressed oop or klass constant. |
219 narrowOop MacroAssembler::get_narrow_oop(address a, address bound) { | 231 narrowOop MacroAssembler::get_narrow_oop(address a, address bound) { |
220 assert(UseCompressedOops, "Should only patch compressed oops"); | 232 assert(UseCompressedOops, "Should only patch compressed oops"); |
221 | 233 |
222 const address inst2_addr = a; | 234 const address inst2_addr = a; |
223 const int inst2 = *(int *)inst2_addr; | 235 const int inst2 = *(int *)inst2_addr; |
224 | 236 |
225 // The relocation points to the second instruction, the addi, | 237 // The relocation points to the second instruction, the ori, |
226 // and the addi reads and writes the same register dst. | 238 // and the ori reads and writes the same register dst. |
227 const int dst = inv_rt_field(inst2); | 239 const int dst = inv_rta_field(inst2); |
228 assert(is_addi(inst2) && inv_ra_field(inst2) == dst, "must be addi reading and writing dst"); | 240 assert(is_ori(inst2) && inv_rs_field(inst2) == dst, "must be addi reading and writing dst"); |
229 // Now, find the preceding lis which writes to dst. | 241 // Now, find the preceding lis which writes to dst. |
230 int inst1 = 0; | 242 int inst1 = 0; |
231 address inst1_addr = inst2_addr - BytesPerInstWord; | 243 address inst1_addr = inst2_addr - BytesPerInstWord; |
232 bool inst1_found = false; | 244 bool inst1_found = false; |
233 | 245 |
236 if (is_lis(inst1) && inv_rs_field(inst1) == dst) { inst1_found = true; break;} | 248 if (is_lis(inst1) && inv_rs_field(inst1) == dst) { inst1_found = true; break;} |
237 inst1_addr -= BytesPerInstWord; | 249 inst1_addr -= BytesPerInstWord; |
238 } | 250 } |
239 assert(inst1_found, "inst is not lis"); | 251 assert(inst1_found, "inst is not lis"); |
240 | 252 |
241 uint xl = ((unsigned int) (get_imm(inst2_addr,0) & 0xffff)); | 253 uint xl = ((unsigned int) (get_imm(inst2_addr, 0) & 0xffff)); |
242 uint xh = (((((xl & 0x8000) != 0 ? -1 : 0) + get_imm(inst1_addr,0)) & 0xffff) << 16); | 254 uint xh = (((get_imm(inst1_addr, 0)) & 0xffff) << 16); |
255 | |
243 return (int) (xl | xh); | 256 return (int) (xl | xh); |
244 } | 257 } |
245 #endif // _LP64 | 258 #endif // _LP64 |
246 | 259 |
247 void MacroAssembler::load_const_from_method_toc(Register dst, AddressLiteral& a, Register toc) { | 260 void MacroAssembler::load_const_from_method_toc(Register dst, AddressLiteral& a, Register toc) { |
250 // we will end up with a failing NativeCall::verify(x) where x is | 263 // we will end up with a failing NativeCall::verify(x) where x is |
251 // the address of the constant pool entry. | 264 // the address of the constant pool entry. |
252 // FIXME: We should insert relocation information for oops at the constant | 265 // FIXME: We should insert relocation information for oops at the constant |
253 // pool entries instead of inserting it at the loads; patching of a constant | 266 // pool entries instead of inserting it at the loads; patching of a constant |
254 // pool entry should be less expensive. | 267 // pool entry should be less expensive. |
255 Unimplemented(); | 268 address oop_address = address_constant((address)a.value(), RelocationHolder::none); |
256 if (false) { | 269 // Relocate at the pc of the load. |
257 address oop_address = address_constant((address)a.value(), RelocationHolder::none); | 270 relocate(a.rspec()); |
258 // Relocate at the pc of the load. | 271 toc_offset = (int)(oop_address - code()->consts()->start()); |
259 relocate(a.rspec()); | |
260 toc_offset = (int)(oop_address - code()->consts()->start()); | |
261 } | |
262 ld_largeoffset_unchecked(dst, toc_offset, toc, true); | 272 ld_largeoffset_unchecked(dst, toc_offset, toc, true); |
263 } | 273 } |
264 | 274 |
265 bool MacroAssembler::is_load_const_from_method_toc_at(address a) { | 275 bool MacroAssembler::is_load_const_from_method_toc_at(address a) { |
266 const address inst1_addr = a; | 276 const address inst1_addr = a; |
530 const address not_taken_pc = masm.pc() + 2 * BytesPerInstWord; | 540 const address not_taken_pc = masm.pc() + 2 * BytesPerInstWord; |
531 masm.bc(opposite_boint, biint, not_taken_pc); | 541 masm.bc(opposite_boint, biint, not_taken_pc); |
532 masm.b(dest); | 542 masm.b(dest); |
533 } | 543 } |
534 } | 544 } |
535 ICache::invalidate_range(instruction_addr, code_size); | 545 ICache::ppc64_flush_icache_bytes(instruction_addr, code_size); |
536 } | 546 } |
537 | 547 |
538 // Emit a NOT mt-safe patchable 64 bit absolute call/jump. | 548 // Emit a NOT mt-safe patchable 64 bit absolute call/jump. |
539 void MacroAssembler::bxx64_patchable(address dest, relocInfo::relocType rt, bool link) { | 549 void MacroAssembler::bxx64_patchable(address dest, relocInfo::relocType rt, bool link) { |
540 // get current pc | 550 // get current pc |
671 ResourceMark rm; | 681 ResourceMark rm; |
672 int code_size = MacroAssembler::bxx64_patchable_size; | 682 int code_size = MacroAssembler::bxx64_patchable_size; |
673 CodeBuffer buf(instruction_addr, code_size); | 683 CodeBuffer buf(instruction_addr, code_size); |
674 MacroAssembler masm(&buf); | 684 MacroAssembler masm(&buf); |
675 masm.bxx64_patchable(dest, relocInfo::none, link); | 685 masm.bxx64_patchable(dest, relocInfo::none, link); |
676 ICache::invalidate_range(instruction_addr, code_size); | 686 ICache::ppc64_flush_icache_bytes(instruction_addr, code_size); |
677 } | 687 } |
678 | 688 |
679 // Get dest address of a bxx64_patchable instruction. | 689 // Get dest address of a bxx64_patchable instruction. |
680 address MacroAssembler::get_dest_of_bxx64_patchable_at(address instruction_addr, bool link) { | 690 address MacroAssembler::get_dest_of_bxx64_patchable_at(address instruction_addr, bool link) { |
681 if (is_bxx64_patchable_variant1_at(instruction_addr, link)) { | 691 if (is_bxx64_patchable_variant1_at(instruction_addr, link)) { |
956 // conventions. | 966 // conventions. |
957 // We don't use the TOC in generated code, so there is no need to save | 967 // We don't use the TOC in generated code, so there is no need to save |
958 // and restore its value. | 968 // and restore its value. |
959 address MacroAssembler::call_c(Register fd) { | 969 address MacroAssembler::call_c(Register fd) { |
960 return branch_to(fd, /*and_link=*/true, | 970 return branch_to(fd, /*and_link=*/true, |
971 /*save toc=*/false, | |
972 /*restore toc=*/false, | |
973 /*load toc=*/true, | |
974 /*load env=*/true); | |
975 } | |
976 | |
977 address MacroAssembler::call_c_and_return_to_caller(Register fd) { | |
978 return branch_to(fd, /*and_link=*/false, | |
961 /*save toc=*/false, | 979 /*save toc=*/false, |
962 /*restore toc=*/false, | 980 /*restore toc=*/false, |
963 /*load toc=*/true, | 981 /*load toc=*/true, |
964 /*load env=*/true); | 982 /*load env=*/true); |
965 } | 983 } |
2313 // known pc and don't have to rely on the native call having a | 2331 // known pc and don't have to rely on the native call having a |
2314 // standard frame linkage where we can find the pc. | 2332 // standard frame linkage where we can find the pc. |
2315 if (last_Java_pc != noreg) | 2333 if (last_Java_pc != noreg) |
2316 std(last_Java_pc, in_bytes(JavaThread::last_Java_pc_offset()), R16_thread); | 2334 std(last_Java_pc, in_bytes(JavaThread::last_Java_pc_offset()), R16_thread); |
2317 | 2335 |
2318 // set last_Java_sp last | 2336 // Set last_Java_sp last. |
2319 std(last_Java_sp, in_bytes(JavaThread::last_Java_sp_offset()), R16_thread); | 2337 std(last_Java_sp, in_bytes(JavaThread::last_Java_sp_offset()), R16_thread); |
2320 } | 2338 } |
2321 | 2339 |
2322 void MacroAssembler::reset_last_Java_frame(void) { | 2340 void MacroAssembler::reset_last_Java_frame(void) { |
2323 asm_assert_mem8_isnot_zero(in_bytes(JavaThread::last_Java_sp_offset()), | 2341 asm_assert_mem8_isnot_zero(in_bytes(JavaThread::last_Java_sp_offset()), |
2450 } | 2468 } |
2451 } else { | 2469 } else { |
2452 load_const(R30, Universe::narrow_ptrs_base_addr(), tmp); | 2470 load_const(R30, Universe::narrow_ptrs_base_addr(), tmp); |
2453 ld(R30, 0, R30); | 2471 ld(R30, 0, R30); |
2454 } | 2472 } |
2473 } | |
2474 | |
2475 // Clear Array | |
2476 // Kills both input registers. tmp == R0 is allowed. | |
2477 void MacroAssembler::clear_memory_doubleword(Register base_ptr, Register cnt_dwords, Register tmp) { | |
2478 // Procedure for large arrays (uses data cache block zero instruction). | |
2479 Label startloop, fast, fastloop, small_rest, restloop, done; | |
2480 const int cl_size = VM_Version::get_cache_line_size(), | |
2481 cl_dwords = cl_size>>3, | |
2482 cl_dw_addr_bits = exact_log2(cl_dwords), | |
2483 dcbz_min = 1; // Min count of dcbz executions, needs to be >0. | |
2484 | |
2485 //2: | |
2486 cmpdi(CCR1, cnt_dwords, ((dcbz_min+1)<<cl_dw_addr_bits)-1); // Big enough? (ensure >=dcbz_min lines included). | |
2487 blt(CCR1, small_rest); // Too small. | |
2488 rldicl_(tmp, base_ptr, 64-3, 64-cl_dw_addr_bits); // Extract dword offset within first cache line. | |
2489 beq(CCR0, fast); // Already 128byte aligned. | |
2490 | |
2491 subfic(tmp, tmp, cl_dwords); | |
2492 mtctr(tmp); // Set ctr to hit 128byte boundary (0<ctr<cl_dwords). | |
2493 subf(cnt_dwords, tmp, cnt_dwords); // rest. | |
2494 li(tmp, 0); | |
2495 //10: | |
2496 bind(startloop); // Clear at the beginning to reach 128byte boundary. | |
2497 std(tmp, 0, base_ptr); // Clear 8byte aligned block. | |
2498 addi(base_ptr, base_ptr, 8); | |
2499 bdnz(startloop); | |
2500 //13: | |
2501 bind(fast); // Clear 128byte blocks. | |
2502 srdi(tmp, cnt_dwords, cl_dw_addr_bits); // Loop count for 128byte loop (>0). | |
2503 andi(cnt_dwords, cnt_dwords, cl_dwords-1); // Rest in dwords. | |
2504 mtctr(tmp); // Load counter. | |
2505 //16: | |
2506 bind(fastloop); | |
2507 dcbz(base_ptr); // Clear 128byte aligned block. | |
2508 addi(base_ptr, base_ptr, cl_size); | |
2509 bdnz(fastloop); | |
2510 if (InsertEndGroupPPC64) { endgroup(); } else { nop(); } | |
2511 //20: | |
2512 bind(small_rest); | |
2513 cmpdi(CCR0, cnt_dwords, 0); // size 0? | |
2514 beq(CCR0, done); // rest == 0 | |
2515 li(tmp, 0); | |
2516 mtctr(cnt_dwords); // Load counter. | |
2517 //24: | |
2518 bind(restloop); // Clear rest. | |
2519 std(tmp, 0, base_ptr); // Clear 8byte aligned block. | |
2520 addi(base_ptr, base_ptr, 8); | |
2521 bdnz(restloop); | |
2522 //27: | |
2523 bind(done); | |
2455 } | 2524 } |
2456 | 2525 |
2457 /////////////////////////////////////////// String intrinsics //////////////////////////////////////////// | 2526 /////////////////////////////////////////// String intrinsics //////////////////////////////////////////// |
2458 | 2527 |
2459 // Search for a single jchar in an jchar[]. | 2528 // Search for a single jchar in an jchar[]. |
2924 // READ: oop. KILL: R0. Volatile floats perhaps. | 2993 // READ: oop. KILL: R0. Volatile floats perhaps. |
2925 void MacroAssembler::verify_oop(Register oop, const char* msg) { | 2994 void MacroAssembler::verify_oop(Register oop, const char* msg) { |
2926 if (!VerifyOops) { | 2995 if (!VerifyOops) { |
2927 return; | 2996 return; |
2928 } | 2997 } |
2929 // will be preserved. | 2998 // Will be preserved. |
2930 Register tmp = R11; | 2999 Register tmp = R11; |
2931 assert(oop != tmp, "precondition"); | 3000 assert(oop != tmp, "precondition"); |
2932 unsigned int nbytes_save = 10*8; // 10 volatile gprs | 3001 unsigned int nbytes_save = 10*8; // 10 volatile gprs |
2933 address/* FunctionDescriptor** */fd = | 3002 address/* FunctionDescriptor** */fd = StubRoutines::verify_oop_subroutine_entry_address(); |
2934 StubRoutines::verify_oop_subroutine_entry_address(); | |
2935 // save tmp | 3003 // save tmp |
2936 mr(R0, tmp); | 3004 mr(R0, tmp); |
2937 // kill tmp | 3005 // kill tmp |
2938 save_LR_CR(tmp); | 3006 save_LR_CR(tmp); |
2939 push_frame_abi112(nbytes_save, tmp); | 3007 push_frame_abi112(nbytes_save, tmp); |