Mercurial > hg > graal-compiler
comparison src/cpu/sparc/vm/assembler_sparc.cpp @ 113:ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
Summary: Compressed oops in instances, arrays, and headers. Code contributors are coleenp, phh, never, swamyv
Reviewed-by: jmasa, kamg, acorn, tbell, kvn, rasbold
author | coleenp |
---|---|
date | Sun, 13 Apr 2008 17:43:42 -0400 |
parents | a61af66fc99e |
children | b130b98db9cf |
comparison
equal
deleted
inserted
replaced
110:a49a647afe9a | 113:ba764ed4b6f2 |
---|---|
1777 delayed()->nop(); | 1777 delayed()->nop(); |
1778 } | 1778 } |
1779 | 1779 |
1780 // Check the klassOop of this object for being in the right area of memory. | 1780 // Check the klassOop of this object for being in the right area of memory. |
1781 // Cannot do the load in the delay above slot in case O0 is null | 1781 // Cannot do the load in the delay above slot in case O0 is null |
1782 ld_ptr(Address(O0_obj, 0, oopDesc::klass_offset_in_bytes()), O0_obj); | 1782 load_klass(O0_obj, O0_obj); |
1783 // assert((klass & klass_mask) == klass_bits); | 1783 // assert((klass & klass_mask) == klass_bits); |
1784 if( Universe::verify_klass_mask() != Universe::verify_oop_mask() ) | 1784 if( Universe::verify_klass_mask() != Universe::verify_oop_mask() ) |
1785 set(Universe::verify_klass_mask(), O2_mask); | 1785 set(Universe::verify_klass_mask(), O2_mask); |
1786 if( Universe::verify_klass_bits() != Universe::verify_oop_bits() ) | 1786 if( Universe::verify_klass_bits() != Universe::verify_oop_bits() ) |
1787 set(Universe::verify_klass_bits(), O3_bits); | 1787 set(Universe::verify_klass_bits(), O3_bits); |
1788 and3(O0_obj, O2_mask, O4_temp); | 1788 and3(O0_obj, O2_mask, O4_temp); |
1789 cmp(O4_temp, O3_bits); | 1789 cmp(O4_temp, O3_bits); |
1790 brx(notEqual, false, pn, fail); | 1790 brx(notEqual, false, pn, fail); |
1791 delayed()->nop(); | |
1791 // Check the klass's klass | 1792 // Check the klass's klass |
1792 delayed()->ld_ptr(Address(O0_obj, 0, oopDesc::klass_offset_in_bytes()), O0_obj); | 1793 load_klass(O0_obj, O0_obj); |
1793 and3(O0_obj, O2_mask, O4_temp); | 1794 and3(O0_obj, O2_mask, O4_temp); |
1794 cmp(O4_temp, O3_bits); | 1795 cmp(O4_temp, O3_bits); |
1795 brx(notEqual, false, pn, fail); | 1796 brx(notEqual, false, pn, fail); |
1796 delayed()->wrccr( O5_save_flags ); // Restore CCR's | 1797 delayed()->wrccr( O5_save_flags ); // Restore CCR's |
1797 | 1798 |
2586 // pointers to allow age to be placed into low bits | 2587 // pointers to allow age to be placed into low bits |
2587 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); | 2588 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); |
2588 and3(mark_reg, markOopDesc::biased_lock_mask_in_place, temp_reg); | 2589 and3(mark_reg, markOopDesc::biased_lock_mask_in_place, temp_reg); |
2589 cmp(temp_reg, markOopDesc::biased_lock_pattern); | 2590 cmp(temp_reg, markOopDesc::biased_lock_pattern); |
2590 brx(Assembler::notEqual, false, Assembler::pn, cas_label); | 2591 brx(Assembler::notEqual, false, Assembler::pn, cas_label); |
2591 | 2592 delayed()->nop(); |
2592 delayed()->ld_ptr(Address(obj_reg, 0, oopDesc::klass_offset_in_bytes()), temp_reg); | 2593 |
2594 load_klass(obj_reg, temp_reg); | |
2593 ld_ptr(Address(temp_reg, 0, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()), temp_reg); | 2595 ld_ptr(Address(temp_reg, 0, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()), temp_reg); |
2594 or3(G2_thread, temp_reg, temp_reg); | 2596 or3(G2_thread, temp_reg, temp_reg); |
2595 xor3(mark_reg, temp_reg, temp_reg); | 2597 xor3(mark_reg, temp_reg, temp_reg); |
2596 andcc(temp_reg, ~((int) markOopDesc::age_mask_in_place), temp_reg); | 2598 andcc(temp_reg, ~((int) markOopDesc::age_mask_in_place), temp_reg); |
2597 if (counters != NULL) { | 2599 if (counters != NULL) { |
2666 // bias in the current epoch. In other words, we allow transfer of | 2668 // bias in the current epoch. In other words, we allow transfer of |
2667 // the bias from one thread to another directly in this situation. | 2669 // the bias from one thread to another directly in this situation. |
2668 // | 2670 // |
2669 // FIXME: due to a lack of registers we currently blow away the age | 2671 // FIXME: due to a lack of registers we currently blow away the age |
2670 // bits in this situation. Should attempt to preserve them. | 2672 // bits in this situation. Should attempt to preserve them. |
2671 ld_ptr(Address(obj_reg, 0, oopDesc::klass_offset_in_bytes()), temp_reg); | 2673 load_klass(obj_reg, temp_reg); |
2672 ld_ptr(Address(temp_reg, 0, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()), temp_reg); | 2674 ld_ptr(Address(temp_reg, 0, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()), temp_reg); |
2673 or3(G2_thread, temp_reg, temp_reg); | 2675 or3(G2_thread, temp_reg, temp_reg); |
2674 casx_under_lock(mark_addr.base(), mark_reg, temp_reg, | 2676 casx_under_lock(mark_addr.base(), mark_reg, temp_reg, |
2675 (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr()); | 2677 (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr()); |
2676 // If the biasing toward our thread failed, this means that | 2678 // If the biasing toward our thread failed, this means that |
2698 // bias of this particular object, so it's okay to continue in the | 2700 // bias of this particular object, so it's okay to continue in the |
2699 // normal locking code. | 2701 // normal locking code. |
2700 // | 2702 // |
2701 // FIXME: due to a lack of registers we currently blow away the age | 2703 // FIXME: due to a lack of registers we currently blow away the age |
2702 // bits in this situation. Should attempt to preserve them. | 2704 // bits in this situation. Should attempt to preserve them. |
2703 ld_ptr(Address(obj_reg, 0, oopDesc::klass_offset_in_bytes()), temp_reg); | 2705 load_klass(obj_reg, temp_reg); |
2704 ld_ptr(Address(temp_reg, 0, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()), temp_reg); | 2706 ld_ptr(Address(temp_reg, 0, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()), temp_reg); |
2705 casx_under_lock(mark_addr.base(), mark_reg, temp_reg, | 2707 casx_under_lock(mark_addr.base(), mark_reg, temp_reg, |
2706 (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr()); | 2708 (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr()); |
2707 // Fall through to the normal CAS-based lock, because no matter what | 2709 // Fall through to the normal CAS-based lock, because no matter what |
2708 // the result of the above CAS, some thread must have succeeded in | 2710 // the result of the above CAS, some thread must have succeeded in |
3404 set((intptr_t)markOopDesc::prototype()->copy_set_hash(0x2), t2); | 3406 set((intptr_t)markOopDesc::prototype()->copy_set_hash(0x2), t2); |
3405 st_ptr(t2, top, oopDesc::mark_offset_in_bytes()); // set up the mark word | 3407 st_ptr(t2, top, oopDesc::mark_offset_in_bytes()); // set up the mark word |
3406 // set klass to intArrayKlass | 3408 // set klass to intArrayKlass |
3407 set((intptr_t)Universe::intArrayKlassObj_addr(), t2); | 3409 set((intptr_t)Universe::intArrayKlassObj_addr(), t2); |
3408 ld_ptr(t2, 0, t2); | 3410 ld_ptr(t2, 0, t2); |
3409 st_ptr(t2, top, oopDesc::klass_offset_in_bytes()); | 3411 store_klass(t2, top); |
3410 sub(t1, typeArrayOopDesc::header_size(T_INT), t1); | 3412 sub(t1, typeArrayOopDesc::header_size(T_INT), t1); |
3411 add(t1, ThreadLocalAllocBuffer::alignment_reserve(), t1); | 3413 add(t1, ThreadLocalAllocBuffer::alignment_reserve(), t1); |
3412 sll_ptr(t1, log2_intptr(HeapWordSize/sizeof(jint)), t1); | 3414 sll_ptr(t1, log2_intptr(HeapWordSize/sizeof(jint)), t1); |
3413 st(t1, top, arrayOopDesc::length_offset_in_bytes()); | 3415 st(t1, top, arrayOopDesc::length_offset_in_bytes()); |
3414 verify_oop(top); | 3416 verify_oop(top); |
3532 for (int i = 0; i< StackShadowPages-1; i++) { | 3534 for (int i = 0; i< StackShadowPages-1; i++) { |
3533 set((-i*offset)+STACK_BIAS, Rscratch); | 3535 set((-i*offset)+STACK_BIAS, Rscratch); |
3534 st(G0, Rtsp, Rscratch); | 3536 st(G0, Rtsp, Rscratch); |
3535 } | 3537 } |
3536 } | 3538 } |
3539 | |
3540 void MacroAssembler::load_klass(Register s, Register d) { | |
3541 // The number of bytes in this code is used by | |
3542 // MachCallDynamicJavaNode::ret_addr_offset() | |
3543 // if this changes, change that. | |
3544 if (UseCompressedOops) { | |
3545 lduw(s, oopDesc::klass_offset_in_bytes(), d); | |
3546 decode_heap_oop_not_null(d); | |
3547 } else { | |
3548 ld_ptr(s, oopDesc::klass_offset_in_bytes(), d); | |
3549 } | |
3550 } | |
3551 | |
3552 // ??? figure out src vs. dst! | |
3553 void MacroAssembler::store_klass(Register d, Register s1) { | |
3554 if (UseCompressedOops) { | |
3555 assert(s1 != d, "not enough registers"); | |
3556 encode_heap_oop_not_null(d); | |
3557 // Zero out entire klass field first. | |
3558 st_ptr(G0, s1, oopDesc::klass_offset_in_bytes()); | |
3559 st(d, s1, oopDesc::klass_offset_in_bytes()); | |
3560 } else { | |
3561 st_ptr(d, s1, oopDesc::klass_offset_in_bytes()); | |
3562 } | |
3563 } | |
3564 | |
3565 void MacroAssembler::load_heap_oop(const Address& s, Register d, int offset) { | |
3566 if (UseCompressedOops) { | |
3567 lduw(s, d, offset); | |
3568 decode_heap_oop(d); | |
3569 } else { | |
3570 ld_ptr(s, d, offset); | |
3571 } | |
3572 } | |
3573 | |
3574 void MacroAssembler::load_heap_oop(Register s1, Register s2, Register d) { | |
3575 if (UseCompressedOops) { | |
3576 lduw(s1, s2, d); | |
3577 decode_heap_oop(d, d); | |
3578 } else { | |
3579 ld_ptr(s1, s2, d); | |
3580 } | |
3581 } | |
3582 | |
3583 void MacroAssembler::load_heap_oop(Register s1, int simm13a, Register d) { | |
3584 if (UseCompressedOops) { | |
3585 lduw(s1, simm13a, d); | |
3586 decode_heap_oop(d, d); | |
3587 } else { | |
3588 ld_ptr(s1, simm13a, d); | |
3589 } | |
3590 } | |
3591 | |
3592 void MacroAssembler::store_heap_oop(Register d, Register s1, Register s2) { | |
3593 if (UseCompressedOops) { | |
3594 assert(s1 != d && s2 != d, "not enough registers"); | |
3595 encode_heap_oop(d); | |
3596 st(d, s1, s2); | |
3597 } else { | |
3598 st_ptr(d, s1, s2); | |
3599 } | |
3600 } | |
3601 | |
3602 void MacroAssembler::store_heap_oop(Register d, Register s1, int simm13a) { | |
3603 if (UseCompressedOops) { | |
3604 assert(s1 != d, "not enough registers"); | |
3605 encode_heap_oop(d); | |
3606 st(d, s1, simm13a); | |
3607 } else { | |
3608 st_ptr(d, s1, simm13a); | |
3609 } | |
3610 } | |
3611 | |
3612 void MacroAssembler::store_heap_oop(Register d, const Address& a, int offset) { | |
3613 if (UseCompressedOops) { | |
3614 assert(a.base() != d, "not enough registers"); | |
3615 encode_heap_oop(d); | |
3616 st(d, a, offset); | |
3617 } else { | |
3618 st_ptr(d, a, offset); | |
3619 } | |
3620 } | |
3621 | |
3622 | |
3623 void MacroAssembler::encode_heap_oop(Register src, Register dst) { | |
3624 assert (UseCompressedOops, "must be compressed"); | |
3625 Label done; | |
3626 if (src == dst) { | |
3627 // optimize for frequent case src == dst | |
3628 bpr(rc_nz, true, Assembler::pt, src, done); | |
3629 delayed() -> sub(src, G6_heapbase, dst); // annuled if not taken | |
3630 bind(done); | |
3631 srlx(src, LogMinObjAlignmentInBytes, dst); | |
3632 } else { | |
3633 bpr(rc_z, false, Assembler::pn, src, done); | |
3634 delayed() -> mov(G0, dst); | |
3635 // could be moved before branch, and annulate delay, | |
3636 // but may add some unneeded work decoding null | |
3637 sub(src, G6_heapbase, dst); | |
3638 srlx(dst, LogMinObjAlignmentInBytes, dst); | |
3639 bind(done); | |
3640 } | |
3641 } | |
3642 | |
3643 | |
3644 void MacroAssembler::encode_heap_oop_not_null(Register r) { | |
3645 assert (UseCompressedOops, "must be compressed"); | |
3646 sub(r, G6_heapbase, r); | |
3647 srlx(r, LogMinObjAlignmentInBytes, r); | |
3648 } | |
3649 | |
3650 // Same algorithm as oops.inline.hpp decode_heap_oop. | |
3651 void MacroAssembler::decode_heap_oop(Register src, Register dst) { | |
3652 assert (UseCompressedOops, "must be compressed"); | |
3653 Label done; | |
3654 sllx(src, LogMinObjAlignmentInBytes, dst); | |
3655 bpr(rc_nz, true, Assembler::pt, dst, done); | |
3656 delayed() -> add(dst, G6_heapbase, dst); // annuled if not taken | |
3657 bind(done); | |
3658 } | |
3659 | |
3660 void MacroAssembler::decode_heap_oop_not_null(Register r) { | |
3661 // Do not add assert code to this unless you change vtableStubs_sparc.cpp | |
3662 // pd_code_size_limit. | |
3663 assert (UseCompressedOops, "must be compressed"); | |
3664 sllx(r, LogMinObjAlignmentInBytes, r); | |
3665 add(r, G6_heapbase, r); | |
3666 } | |
3667 | |
3668 void MacroAssembler::reinit_heapbase() { | |
3669 if (UseCompressedOops) { | |
3670 // call indirectly to solve generation ordering problem | |
3671 Address base(G6_heapbase, (address)Universe::heap_base_addr()); | |
3672 load_ptr_contents(base, G6_heapbase); | |
3673 } | |
3674 } |