comparison src/cpu/sparc/vm/assembler_sparc.cpp @ 113:ba764ed4b6f2

6420645: Create a vm that uses compressed oops for up to 32gb heapsizes Summary: Compressed oops in instances, arrays, and headers. Code contributors are coleenp, phh, never, swamyv Reviewed-by: jmasa, kamg, acorn, tbell, kvn, rasbold
author coleenp
date Sun, 13 Apr 2008 17:43:42 -0400
parents a61af66fc99e
children b130b98db9cf
comparison
equal deleted inserted replaced
110:a49a647afe9a 113:ba764ed4b6f2
1777 delayed()->nop(); 1777 delayed()->nop();
1778 } 1778 }
1779 1779
1780 // Check the klassOop of this object for being in the right area of memory. 1780 // Check the klassOop of this object for being in the right area of memory.
1781 // Cannot do the load in the delay above slot in case O0 is null 1781 // Cannot do the load in the delay above slot in case O0 is null
1782 ld_ptr(Address(O0_obj, 0, oopDesc::klass_offset_in_bytes()), O0_obj); 1782 load_klass(O0_obj, O0_obj);
1783 // assert((klass & klass_mask) == klass_bits); 1783 // assert((klass & klass_mask) == klass_bits);
1784 if( Universe::verify_klass_mask() != Universe::verify_oop_mask() ) 1784 if( Universe::verify_klass_mask() != Universe::verify_oop_mask() )
1785 set(Universe::verify_klass_mask(), O2_mask); 1785 set(Universe::verify_klass_mask(), O2_mask);
1786 if( Universe::verify_klass_bits() != Universe::verify_oop_bits() ) 1786 if( Universe::verify_klass_bits() != Universe::verify_oop_bits() )
1787 set(Universe::verify_klass_bits(), O3_bits); 1787 set(Universe::verify_klass_bits(), O3_bits);
1788 and3(O0_obj, O2_mask, O4_temp); 1788 and3(O0_obj, O2_mask, O4_temp);
1789 cmp(O4_temp, O3_bits); 1789 cmp(O4_temp, O3_bits);
1790 brx(notEqual, false, pn, fail); 1790 brx(notEqual, false, pn, fail);
1791 delayed()->nop();
1791 // Check the klass's klass 1792 // Check the klass's klass
1792 delayed()->ld_ptr(Address(O0_obj, 0, oopDesc::klass_offset_in_bytes()), O0_obj); 1793 load_klass(O0_obj, O0_obj);
1793 and3(O0_obj, O2_mask, O4_temp); 1794 and3(O0_obj, O2_mask, O4_temp);
1794 cmp(O4_temp, O3_bits); 1795 cmp(O4_temp, O3_bits);
1795 brx(notEqual, false, pn, fail); 1796 brx(notEqual, false, pn, fail);
1796 delayed()->wrccr( O5_save_flags ); // Restore CCR's 1797 delayed()->wrccr( O5_save_flags ); // Restore CCR's
1797 1798
2586 // pointers to allow age to be placed into low bits 2587 // pointers to allow age to be placed into low bits
2587 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); 2588 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
2588 and3(mark_reg, markOopDesc::biased_lock_mask_in_place, temp_reg); 2589 and3(mark_reg, markOopDesc::biased_lock_mask_in_place, temp_reg);
2589 cmp(temp_reg, markOopDesc::biased_lock_pattern); 2590 cmp(temp_reg, markOopDesc::biased_lock_pattern);
2590 brx(Assembler::notEqual, false, Assembler::pn, cas_label); 2591 brx(Assembler::notEqual, false, Assembler::pn, cas_label);
2591 2592 delayed()->nop();
2592 delayed()->ld_ptr(Address(obj_reg, 0, oopDesc::klass_offset_in_bytes()), temp_reg); 2593
2594 load_klass(obj_reg, temp_reg);
2593 ld_ptr(Address(temp_reg, 0, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()), temp_reg); 2595 ld_ptr(Address(temp_reg, 0, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()), temp_reg);
2594 or3(G2_thread, temp_reg, temp_reg); 2596 or3(G2_thread, temp_reg, temp_reg);
2595 xor3(mark_reg, temp_reg, temp_reg); 2597 xor3(mark_reg, temp_reg, temp_reg);
2596 andcc(temp_reg, ~((int) markOopDesc::age_mask_in_place), temp_reg); 2598 andcc(temp_reg, ~((int) markOopDesc::age_mask_in_place), temp_reg);
2597 if (counters != NULL) { 2599 if (counters != NULL) {
2666 // bias in the current epoch. In other words, we allow transfer of 2668 // bias in the current epoch. In other words, we allow transfer of
2667 // the bias from one thread to another directly in this situation. 2669 // the bias from one thread to another directly in this situation.
2668 // 2670 //
2669 // FIXME: due to a lack of registers we currently blow away the age 2671 // FIXME: due to a lack of registers we currently blow away the age
2670 // bits in this situation. Should attempt to preserve them. 2672 // bits in this situation. Should attempt to preserve them.
2671 ld_ptr(Address(obj_reg, 0, oopDesc::klass_offset_in_bytes()), temp_reg); 2673 load_klass(obj_reg, temp_reg);
2672 ld_ptr(Address(temp_reg, 0, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()), temp_reg); 2674 ld_ptr(Address(temp_reg, 0, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()), temp_reg);
2673 or3(G2_thread, temp_reg, temp_reg); 2675 or3(G2_thread, temp_reg, temp_reg);
2674 casx_under_lock(mark_addr.base(), mark_reg, temp_reg, 2676 casx_under_lock(mark_addr.base(), mark_reg, temp_reg,
2675 (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr()); 2677 (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr());
2676 // If the biasing toward our thread failed, this means that 2678 // If the biasing toward our thread failed, this means that
2698 // bias of this particular object, so it's okay to continue in the 2700 // bias of this particular object, so it's okay to continue in the
2699 // normal locking code. 2701 // normal locking code.
2700 // 2702 //
2701 // FIXME: due to a lack of registers we currently blow away the age 2703 // FIXME: due to a lack of registers we currently blow away the age
2702 // bits in this situation. Should attempt to preserve them. 2704 // bits in this situation. Should attempt to preserve them.
2703 ld_ptr(Address(obj_reg, 0, oopDesc::klass_offset_in_bytes()), temp_reg); 2705 load_klass(obj_reg, temp_reg);
2704 ld_ptr(Address(temp_reg, 0, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()), temp_reg); 2706 ld_ptr(Address(temp_reg, 0, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()), temp_reg);
2705 casx_under_lock(mark_addr.base(), mark_reg, temp_reg, 2707 casx_under_lock(mark_addr.base(), mark_reg, temp_reg,
2706 (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr()); 2708 (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr());
2707 // Fall through to the normal CAS-based lock, because no matter what 2709 // Fall through to the normal CAS-based lock, because no matter what
2708 // the result of the above CAS, some thread must have succeeded in 2710 // the result of the above CAS, some thread must have succeeded in
3404 set((intptr_t)markOopDesc::prototype()->copy_set_hash(0x2), t2); 3406 set((intptr_t)markOopDesc::prototype()->copy_set_hash(0x2), t2);
3405 st_ptr(t2, top, oopDesc::mark_offset_in_bytes()); // set up the mark word 3407 st_ptr(t2, top, oopDesc::mark_offset_in_bytes()); // set up the mark word
3406 // set klass to intArrayKlass 3408 // set klass to intArrayKlass
3407 set((intptr_t)Universe::intArrayKlassObj_addr(), t2); 3409 set((intptr_t)Universe::intArrayKlassObj_addr(), t2);
3408 ld_ptr(t2, 0, t2); 3410 ld_ptr(t2, 0, t2);
3409 st_ptr(t2, top, oopDesc::klass_offset_in_bytes()); 3411 store_klass(t2, top);
3410 sub(t1, typeArrayOopDesc::header_size(T_INT), t1); 3412 sub(t1, typeArrayOopDesc::header_size(T_INT), t1);
3411 add(t1, ThreadLocalAllocBuffer::alignment_reserve(), t1); 3413 add(t1, ThreadLocalAllocBuffer::alignment_reserve(), t1);
3412 sll_ptr(t1, log2_intptr(HeapWordSize/sizeof(jint)), t1); 3414 sll_ptr(t1, log2_intptr(HeapWordSize/sizeof(jint)), t1);
3413 st(t1, top, arrayOopDesc::length_offset_in_bytes()); 3415 st(t1, top, arrayOopDesc::length_offset_in_bytes());
3414 verify_oop(top); 3416 verify_oop(top);
3532 for (int i = 0; i< StackShadowPages-1; i++) { 3534 for (int i = 0; i< StackShadowPages-1; i++) {
3533 set((-i*offset)+STACK_BIAS, Rscratch); 3535 set((-i*offset)+STACK_BIAS, Rscratch);
3534 st(G0, Rtsp, Rscratch); 3536 st(G0, Rtsp, Rscratch);
3535 } 3537 }
3536 } 3538 }
3539
3540 void MacroAssembler::load_klass(Register s, Register d) {
3541 // The number of bytes in this code is used by
3542 // MachCallDynamicJavaNode::ret_addr_offset()
3543 // if this changes, change that.
3544 if (UseCompressedOops) {
3545 lduw(s, oopDesc::klass_offset_in_bytes(), d);
3546 decode_heap_oop_not_null(d);
3547 } else {
3548 ld_ptr(s, oopDesc::klass_offset_in_bytes(), d);
3549 }
3550 }
3551
3552 // ??? figure out src vs. dst!
3553 void MacroAssembler::store_klass(Register d, Register s1) {
3554 if (UseCompressedOops) {
3555 assert(s1 != d, "not enough registers");
3556 encode_heap_oop_not_null(d);
3557 // Zero out entire klass field first.
3558 st_ptr(G0, s1, oopDesc::klass_offset_in_bytes());
3559 st(d, s1, oopDesc::klass_offset_in_bytes());
3560 } else {
3561 st_ptr(d, s1, oopDesc::klass_offset_in_bytes());
3562 }
3563 }
3564
3565 void MacroAssembler::load_heap_oop(const Address& s, Register d, int offset) {
3566 if (UseCompressedOops) {
3567 lduw(s, d, offset);
3568 decode_heap_oop(d);
3569 } else {
3570 ld_ptr(s, d, offset);
3571 }
3572 }
3573
3574 void MacroAssembler::load_heap_oop(Register s1, Register s2, Register d) {
3575 if (UseCompressedOops) {
3576 lduw(s1, s2, d);
3577 decode_heap_oop(d, d);
3578 } else {
3579 ld_ptr(s1, s2, d);
3580 }
3581 }
3582
3583 void MacroAssembler::load_heap_oop(Register s1, int simm13a, Register d) {
3584 if (UseCompressedOops) {
3585 lduw(s1, simm13a, d);
3586 decode_heap_oop(d, d);
3587 } else {
3588 ld_ptr(s1, simm13a, d);
3589 }
3590 }
3591
3592 void MacroAssembler::store_heap_oop(Register d, Register s1, Register s2) {
3593 if (UseCompressedOops) {
3594 assert(s1 != d && s2 != d, "not enough registers");
3595 encode_heap_oop(d);
3596 st(d, s1, s2);
3597 } else {
3598 st_ptr(d, s1, s2);
3599 }
3600 }
3601
3602 void MacroAssembler::store_heap_oop(Register d, Register s1, int simm13a) {
3603 if (UseCompressedOops) {
3604 assert(s1 != d, "not enough registers");
3605 encode_heap_oop(d);
3606 st(d, s1, simm13a);
3607 } else {
3608 st_ptr(d, s1, simm13a);
3609 }
3610 }
3611
3612 void MacroAssembler::store_heap_oop(Register d, const Address& a, int offset) {
3613 if (UseCompressedOops) {
3614 assert(a.base() != d, "not enough registers");
3615 encode_heap_oop(d);
3616 st(d, a, offset);
3617 } else {
3618 st_ptr(d, a, offset);
3619 }
3620 }
3621
3622
3623 void MacroAssembler::encode_heap_oop(Register src, Register dst) {
3624 assert (UseCompressedOops, "must be compressed");
3625 Label done;
3626 if (src == dst) {
3627 // optimize for frequent case src == dst
3628 bpr(rc_nz, true, Assembler::pt, src, done);
3629 delayed() -> sub(src, G6_heapbase, dst); // annuled if not taken
3630 bind(done);
3631 srlx(src, LogMinObjAlignmentInBytes, dst);
3632 } else {
3633 bpr(rc_z, false, Assembler::pn, src, done);
3634 delayed() -> mov(G0, dst);
3635 // could be moved before branch, and annulate delay,
3636 // but may add some unneeded work decoding null
3637 sub(src, G6_heapbase, dst);
3638 srlx(dst, LogMinObjAlignmentInBytes, dst);
3639 bind(done);
3640 }
3641 }
3642
3643
3644 void MacroAssembler::encode_heap_oop_not_null(Register r) {
3645 assert (UseCompressedOops, "must be compressed");
3646 sub(r, G6_heapbase, r);
3647 srlx(r, LogMinObjAlignmentInBytes, r);
3648 }
3649
3650 // Same algorithm as oops.inline.hpp decode_heap_oop.
3651 void MacroAssembler::decode_heap_oop(Register src, Register dst) {
3652 assert (UseCompressedOops, "must be compressed");
3653 Label done;
3654 sllx(src, LogMinObjAlignmentInBytes, dst);
3655 bpr(rc_nz, true, Assembler::pt, dst, done);
3656 delayed() -> add(dst, G6_heapbase, dst); // annuled if not taken
3657 bind(done);
3658 }
3659
3660 void MacroAssembler::decode_heap_oop_not_null(Register r) {
3661 // Do not add assert code to this unless you change vtableStubs_sparc.cpp
3662 // pd_code_size_limit.
3663 assert (UseCompressedOops, "must be compressed");
3664 sllx(r, LogMinObjAlignmentInBytes, r);
3665 add(r, G6_heapbase, r);
3666 }
3667
3668 void MacroAssembler::reinit_heapbase() {
3669 if (UseCompressedOops) {
3670 // call indirectly to solve generation ordering problem
3671 Address base(G6_heapbase, (address)Universe::heap_base_addr());
3672 load_ptr_contents(base, G6_heapbase);
3673 }
3674 }