comparison src/cpu/ppc/vm/macroAssembler_ppc.cpp @ 14445:67fa91961822

8029940: PPC64 (part 122): C2 compiler port Reviewed-by: kvn
author goetz
date Wed, 11 Dec 2013 00:06:11 +0100
parents b0133e4187d3
children b858620b0081
comparison
equal deleted inserted replaced
14444:492e67693373 14445:67fa91961822
95 case 1: stb(dst, offs, base); break; 95 case 1: stb(dst, offs, base); break;
96 default: ShouldNotReachHere(); 96 default: ShouldNotReachHere();
97 } 97 }
98 } 98 }
99 99
100 void MacroAssembler::align(int modulus) { 100 void MacroAssembler::align(int modulus, int max, int rem) {
101 while (offset() % modulus != 0) nop(); 101 int padding = (rem + modulus - (offset() % modulus)) % modulus;
102 if (padding > max) return;
103 for (int c = (padding >> 2); c > 0; --c) { nop(); }
102 } 104 }
103 105
104 // Issue instructions that calculate given TOC from global TOC. 106 // Issue instructions that calculate given TOC from global TOC.
105 void MacroAssembler::calculate_address_from_global_toc(Register dst, address addr, bool hi16, bool lo16, 107 void MacroAssembler::calculate_address_from_global_toc(Register dst, address addr, bool hi16, bool lo16,
106 bool add_relocation, bool emit_dummy_addr) { 108 bool add_relocation, bool emit_dummy_addr) {
184 } 186 }
185 } 187 }
186 188
187 #ifdef _LP64 189 #ifdef _LP64
188 // Patch compressed oops or klass constants. 190 // Patch compressed oops or klass constants.
191 // Assembler sequence is
192 // 1) compressed oops:
193 // lis rx = const.hi
194 // ori rx = rx | const.lo
195 // 2) compressed klass:
196 // lis rx = const.hi
197 // clrldi rx = rx & 0xFFFFffff // clearMS32b, optional
198 // ori rx = rx | const.lo
199 // Clrldi will be passed by.
189 int MacroAssembler::patch_set_narrow_oop(address a, address bound, narrowOop data) { 200 int MacroAssembler::patch_set_narrow_oop(address a, address bound, narrowOop data) {
190 assert(UseCompressedOops, "Should only patch compressed oops"); 201 assert(UseCompressedOops, "Should only patch compressed oops");
191 202
192 const address inst2_addr = a; 203 const address inst2_addr = a;
193 const int inst2 = *(int *)inst2_addr; 204 const int inst2 = *(int *)inst2_addr;
194 205
195 // The relocation points to the second instruction, the addi, 206 // The relocation points to the second instruction, the ori,
196 // and the addi reads and writes the same register dst. 207 // and the ori reads and writes the same register dst.
197 const int dst = inv_rt_field(inst2); 208 const int dst = inv_rta_field(inst2);
198 assert(is_addi(inst2) && inv_ra_field(inst2) == dst, "must be addi reading and writing dst"); 209 assert(is_ori(inst2) && inv_rs_field(inst2) == dst, "must be addi reading and writing dst");
199 // Now, find the preceding addis which writes to dst. 210 // Now, find the preceding addis which writes to dst.
200 int inst1 = 0; 211 int inst1 = 0;
201 address inst1_addr = inst2_addr - BytesPerInstWord; 212 address inst1_addr = inst2_addr - BytesPerInstWord;
202 bool inst1_found = false; 213 bool inst1_found = false;
203 while (inst1_addr >= bound) { 214 while (inst1_addr >= bound) {
208 assert(inst1_found, "inst is not lis"); 219 assert(inst1_found, "inst is not lis");
209 220
210 int xc = (data >> 16) & 0xffff; 221 int xc = (data >> 16) & 0xffff;
211 int xd = (data >> 0) & 0xffff; 222 int xd = (data >> 0) & 0xffff;
212 223
213 set_imm((int *)inst1_addr,((short)(xc + ((xd & 0x8000) != 0 ? 1 : 0)))); // see enc_load_con_narrow1/2 224 set_imm((int *)inst1_addr, (short)(xc)); // see enc_load_con_narrow_hi/_lo
214 set_imm((int *)inst2_addr, (short)(xd)); 225 set_imm((int *)inst2_addr, (short)(xd));
226
215 return (int)((intptr_t)inst2_addr - (intptr_t)inst1_addr); 227 return (int)((intptr_t)inst2_addr - (intptr_t)inst1_addr);
216 } 228 }
217 229
218 // Get compressed oop or klass constant. 230 // Get compressed oop or klass constant.
219 narrowOop MacroAssembler::get_narrow_oop(address a, address bound) { 231 narrowOop MacroAssembler::get_narrow_oop(address a, address bound) {
220 assert(UseCompressedOops, "Should only patch compressed oops"); 232 assert(UseCompressedOops, "Should only patch compressed oops");
221 233
222 const address inst2_addr = a; 234 const address inst2_addr = a;
223 const int inst2 = *(int *)inst2_addr; 235 const int inst2 = *(int *)inst2_addr;
224 236
225 // The relocation points to the second instruction, the addi, 237 // The relocation points to the second instruction, the ori,
226 // and the addi reads and writes the same register dst. 238 // and the ori reads and writes the same register dst.
227 const int dst = inv_rt_field(inst2); 239 const int dst = inv_rta_field(inst2);
228 assert(is_addi(inst2) && inv_ra_field(inst2) == dst, "must be addi reading and writing dst"); 240 assert(is_ori(inst2) && inv_rs_field(inst2) == dst, "must be addi reading and writing dst");
229 // Now, find the preceding lis which writes to dst. 241 // Now, find the preceding lis which writes to dst.
230 int inst1 = 0; 242 int inst1 = 0;
231 address inst1_addr = inst2_addr - BytesPerInstWord; 243 address inst1_addr = inst2_addr - BytesPerInstWord;
232 bool inst1_found = false; 244 bool inst1_found = false;
233 245
236 if (is_lis(inst1) && inv_rs_field(inst1) == dst) { inst1_found = true; break;} 248 if (is_lis(inst1) && inv_rs_field(inst1) == dst) { inst1_found = true; break;}
237 inst1_addr -= BytesPerInstWord; 249 inst1_addr -= BytesPerInstWord;
238 } 250 }
239 assert(inst1_found, "inst is not lis"); 251 assert(inst1_found, "inst is not lis");
240 252
241 uint xl = ((unsigned int) (get_imm(inst2_addr,0) & 0xffff)); 253 uint xl = ((unsigned int) (get_imm(inst2_addr, 0) & 0xffff));
242 uint xh = (((((xl & 0x8000) != 0 ? -1 : 0) + get_imm(inst1_addr,0)) & 0xffff) << 16); 254 uint xh = (((get_imm(inst1_addr, 0)) & 0xffff) << 16);
255
243 return (int) (xl | xh); 256 return (int) (xl | xh);
244 } 257 }
245 #endif // _LP64 258 #endif // _LP64
246 259
247 void MacroAssembler::load_const_from_method_toc(Register dst, AddressLiteral& a, Register toc) { 260 void MacroAssembler::load_const_from_method_toc(Register dst, AddressLiteral& a, Register toc) {
250 // we will end up with a failing NativeCall::verify(x) where x is 263 // we will end up with a failing NativeCall::verify(x) where x is
251 // the address of the constant pool entry. 264 // the address of the constant pool entry.
252 // FIXME: We should insert relocation information for oops at the constant 265 // FIXME: We should insert relocation information for oops at the constant
253 // pool entries instead of inserting it at the loads; patching of a constant 266 // pool entries instead of inserting it at the loads; patching of a constant
254 // pool entry should be less expensive. 267 // pool entry should be less expensive.
255 Unimplemented(); 268 address oop_address = address_constant((address)a.value(), RelocationHolder::none);
256 if (false) { 269 // Relocate at the pc of the load.
257 address oop_address = address_constant((address)a.value(), RelocationHolder::none); 270 relocate(a.rspec());
258 // Relocate at the pc of the load. 271 toc_offset = (int)(oop_address - code()->consts()->start());
259 relocate(a.rspec());
260 toc_offset = (int)(oop_address - code()->consts()->start());
261 }
262 ld_largeoffset_unchecked(dst, toc_offset, toc, true); 272 ld_largeoffset_unchecked(dst, toc_offset, toc, true);
263 } 273 }
264 274
265 bool MacroAssembler::is_load_const_from_method_toc_at(address a) { 275 bool MacroAssembler::is_load_const_from_method_toc_at(address a) {
266 const address inst1_addr = a; 276 const address inst1_addr = a;
530 const address not_taken_pc = masm.pc() + 2 * BytesPerInstWord; 540 const address not_taken_pc = masm.pc() + 2 * BytesPerInstWord;
531 masm.bc(opposite_boint, biint, not_taken_pc); 541 masm.bc(opposite_boint, biint, not_taken_pc);
532 masm.b(dest); 542 masm.b(dest);
533 } 543 }
534 } 544 }
535 ICache::invalidate_range(instruction_addr, code_size); 545 ICache::ppc64_flush_icache_bytes(instruction_addr, code_size);
536 } 546 }
537 547
538 // Emit a NOT mt-safe patchable 64 bit absolute call/jump. 548 // Emit a NOT mt-safe patchable 64 bit absolute call/jump.
539 void MacroAssembler::bxx64_patchable(address dest, relocInfo::relocType rt, bool link) { 549 void MacroAssembler::bxx64_patchable(address dest, relocInfo::relocType rt, bool link) {
540 // get current pc 550 // get current pc
671 ResourceMark rm; 681 ResourceMark rm;
672 int code_size = MacroAssembler::bxx64_patchable_size; 682 int code_size = MacroAssembler::bxx64_patchable_size;
673 CodeBuffer buf(instruction_addr, code_size); 683 CodeBuffer buf(instruction_addr, code_size);
674 MacroAssembler masm(&buf); 684 MacroAssembler masm(&buf);
675 masm.bxx64_patchable(dest, relocInfo::none, link); 685 masm.bxx64_patchable(dest, relocInfo::none, link);
676 ICache::invalidate_range(instruction_addr, code_size); 686 ICache::ppc64_flush_icache_bytes(instruction_addr, code_size);
677 } 687 }
678 688
679 // Get dest address of a bxx64_patchable instruction. 689 // Get dest address of a bxx64_patchable instruction.
680 address MacroAssembler::get_dest_of_bxx64_patchable_at(address instruction_addr, bool link) { 690 address MacroAssembler::get_dest_of_bxx64_patchable_at(address instruction_addr, bool link) {
681 if (is_bxx64_patchable_variant1_at(instruction_addr, link)) { 691 if (is_bxx64_patchable_variant1_at(instruction_addr, link)) {
956 // conventions. 966 // conventions.
957 // We don't use the TOC in generated code, so there is no need to save 967 // We don't use the TOC in generated code, so there is no need to save
958 // and restore its value. 968 // and restore its value.
959 address MacroAssembler::call_c(Register fd) { 969 address MacroAssembler::call_c(Register fd) {
960 return branch_to(fd, /*and_link=*/true, 970 return branch_to(fd, /*and_link=*/true,
971 /*save toc=*/false,
972 /*restore toc=*/false,
973 /*load toc=*/true,
974 /*load env=*/true);
975 }
976
977 address MacroAssembler::call_c_and_return_to_caller(Register fd) {
978 return branch_to(fd, /*and_link=*/false,
961 /*save toc=*/false, 979 /*save toc=*/false,
962 /*restore toc=*/false, 980 /*restore toc=*/false,
963 /*load toc=*/true, 981 /*load toc=*/true,
964 /*load env=*/true); 982 /*load env=*/true);
965 } 983 }
2313 // known pc and don't have to rely on the native call having a 2331 // known pc and don't have to rely on the native call having a
2314 // standard frame linkage where we can find the pc. 2332 // standard frame linkage where we can find the pc.
2315 if (last_Java_pc != noreg) 2333 if (last_Java_pc != noreg)
2316 std(last_Java_pc, in_bytes(JavaThread::last_Java_pc_offset()), R16_thread); 2334 std(last_Java_pc, in_bytes(JavaThread::last_Java_pc_offset()), R16_thread);
2317 2335
2318 // set last_Java_sp last 2336 // Set last_Java_sp last.
2319 std(last_Java_sp, in_bytes(JavaThread::last_Java_sp_offset()), R16_thread); 2337 std(last_Java_sp, in_bytes(JavaThread::last_Java_sp_offset()), R16_thread);
2320 } 2338 }
2321 2339
2322 void MacroAssembler::reset_last_Java_frame(void) { 2340 void MacroAssembler::reset_last_Java_frame(void) {
2323 asm_assert_mem8_isnot_zero(in_bytes(JavaThread::last_Java_sp_offset()), 2341 asm_assert_mem8_isnot_zero(in_bytes(JavaThread::last_Java_sp_offset()),
2450 } 2468 }
2451 } else { 2469 } else {
2452 load_const(R30, Universe::narrow_ptrs_base_addr(), tmp); 2470 load_const(R30, Universe::narrow_ptrs_base_addr(), tmp);
2453 ld(R30, 0, R30); 2471 ld(R30, 0, R30);
2454 } 2472 }
2473 }
2474
2475 // Clear Array
2476 // Kills both input registers. tmp == R0 is allowed.
2477 void MacroAssembler::clear_memory_doubleword(Register base_ptr, Register cnt_dwords, Register tmp) {
2478 // Procedure for large arrays (uses data cache block zero instruction).
2479 Label startloop, fast, fastloop, small_rest, restloop, done;
2480 const int cl_size = VM_Version::get_cache_line_size(),
2481 cl_dwords = cl_size>>3,
2482 cl_dw_addr_bits = exact_log2(cl_dwords),
2483 dcbz_min = 1; // Min count of dcbz executions, needs to be >0.
2484
2485 //2:
2486 cmpdi(CCR1, cnt_dwords, ((dcbz_min+1)<<cl_dw_addr_bits)-1); // Big enough? (ensure >=dcbz_min lines included).
2487 blt(CCR1, small_rest); // Too small.
2488 rldicl_(tmp, base_ptr, 64-3, 64-cl_dw_addr_bits); // Extract dword offset within first cache line.
2489 beq(CCR0, fast); // Already 128byte aligned.
2490
2491 subfic(tmp, tmp, cl_dwords);
2492 mtctr(tmp); // Set ctr to hit 128byte boundary (0<ctr<cl_dwords).
2493 subf(cnt_dwords, tmp, cnt_dwords); // rest.
2494 li(tmp, 0);
2495 //10:
2496 bind(startloop); // Clear at the beginning to reach 128byte boundary.
2497 std(tmp, 0, base_ptr); // Clear 8byte aligned block.
2498 addi(base_ptr, base_ptr, 8);
2499 bdnz(startloop);
2500 //13:
2501 bind(fast); // Clear 128byte blocks.
2502 srdi(tmp, cnt_dwords, cl_dw_addr_bits); // Loop count for 128byte loop (>0).
2503 andi(cnt_dwords, cnt_dwords, cl_dwords-1); // Rest in dwords.
2504 mtctr(tmp); // Load counter.
2505 //16:
2506 bind(fastloop);
2507 dcbz(base_ptr); // Clear 128byte aligned block.
2508 addi(base_ptr, base_ptr, cl_size);
2509 bdnz(fastloop);
2510 if (InsertEndGroupPPC64) { endgroup(); } else { nop(); }
2511 //20:
2512 bind(small_rest);
2513 cmpdi(CCR0, cnt_dwords, 0); // size 0?
2514 beq(CCR0, done); // rest == 0
2515 li(tmp, 0);
2516 mtctr(cnt_dwords); // Load counter.
2517 //24:
2518 bind(restloop); // Clear rest.
2519 std(tmp, 0, base_ptr); // Clear 8byte aligned block.
2520 addi(base_ptr, base_ptr, 8);
2521 bdnz(restloop);
2522 //27:
2523 bind(done);
2455 } 2524 }
2456 2525
2457 /////////////////////////////////////////// String intrinsics //////////////////////////////////////////// 2526 /////////////////////////////////////////// String intrinsics ////////////////////////////////////////////
2458 2527
2459 // Search for a single jchar in an jchar[]. 2528 // Search for a single jchar in an jchar[].
2924 // READ: oop. KILL: R0. Volatile floats perhaps. 2993 // READ: oop. KILL: R0. Volatile floats perhaps.
2925 void MacroAssembler::verify_oop(Register oop, const char* msg) { 2994 void MacroAssembler::verify_oop(Register oop, const char* msg) {
2926 if (!VerifyOops) { 2995 if (!VerifyOops) {
2927 return; 2996 return;
2928 } 2997 }
2929 // will be preserved. 2998 // Will be preserved.
2930 Register tmp = R11; 2999 Register tmp = R11;
2931 assert(oop != tmp, "precondition"); 3000 assert(oop != tmp, "precondition");
2932 unsigned int nbytes_save = 10*8; // 10 volatile gprs 3001 unsigned int nbytes_save = 10*8; // 10 volatile gprs
2933 address/* FunctionDescriptor** */fd = 3002 address/* FunctionDescriptor** */fd = StubRoutines::verify_oop_subroutine_entry_address();
2934 StubRoutines::verify_oop_subroutine_entry_address();
2935 // save tmp 3003 // save tmp
2936 mr(R0, tmp); 3004 mr(R0, tmp);
2937 // kill tmp 3005 // kill tmp
2938 save_LR_CR(tmp); 3006 save_LR_CR(tmp);
2939 push_frame_abi112(nbytes_save, tmp); 3007 push_frame_abi112(nbytes_save, tmp);