Mercurial > hg > truffle
comparison src/cpu/x86/vm/assembler_x86_64.cpp @ 113:ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
Summary: Compressed oops in instances, arrays, and headers. Code contributors are coleenp, phh, never, swamyv
Reviewed-by: jmasa, kamg, acorn, tbell, kvn, rasbold
author | coleenp |
---|---|
date | Sun, 13 Apr 2008 17:43:42 -0400 |
parents | d6fe2e4959d6 |
children | fb75a7673531 |
comparison
equal
deleted
inserted
replaced
110:a49a647afe9a | 113:ba764ed4b6f2 |
---|---|
125 // This should only be used by 64bit instructions that can use rip-relative | 125 // This should only be used by 64bit instructions that can use rip-relative |
126 // it cannot be used by instructions that want an immediate value. | 126 // it cannot be used by instructions that want an immediate value. |
127 | 127 |
128 bool Assembler::reachable(AddressLiteral adr) { | 128 bool Assembler::reachable(AddressLiteral adr) { |
129 int64_t disp; | 129 int64_t disp; |
130 | |
130 // None will force a 64bit literal to the code stream. Likely a placeholder | 131 // None will force a 64bit literal to the code stream. Likely a placeholder |
131 // for something that will be patched later and we need to certain it will | 132 // for something that will be patched later and we need to certain it will |
132 // always be reachable. | 133 // always be reachable. |
133 if (adr.reloc() == relocInfo::none) { | 134 if (adr.reloc() == relocInfo::none) { |
134 return false; | 135 return false; |
634 case 0x88: // movb a, r | 635 case 0x88: // movb a, r |
635 case 0x89: // movl a, r | 636 case 0x89: // movl a, r |
636 case 0x8A: // movb r, a | 637 case 0x8A: // movb r, a |
637 case 0x8B: // movl r, a | 638 case 0x8B: // movl r, a |
638 case 0x8F: // popl a | 639 case 0x8F: // popl a |
639 debug_only(has_disp32 = true); | 640 debug_only(has_disp32 = true;) |
640 break; | 641 break; |
641 | 642 |
642 case 0x68: // pushq #32 | 643 case 0x68: // pushq #32 |
643 if (which == end_pc_operand) { | 644 if (which == end_pc_operand) { |
644 return ip + 4; | 645 return ip + 4; |
2889 prefix(REX_W); | 2890 prefix(REX_W); |
2890 emit_byte(0xAB); | 2891 emit_byte(0xAB); |
2891 } | 2892 } |
2892 | 2893 |
2893 // scans rcx double words (m64) at [rdi] for occurance of rax | 2894 // scans rcx double words (m64) at [rdi] for occurance of rax |
2894 void Assembler::repne_scan() { | 2895 void Assembler::repne_scanq() { |
2895 // REPNE/REPNZ | 2896 // REPNE/REPNZ |
2896 emit_byte(0xF2); | 2897 emit_byte(0xF2); |
2897 // SCASQ | 2898 // SCASQ |
2898 prefix(REX_W); | 2899 prefix(REX_W); |
2899 emit_byte(0xAF); | 2900 emit_byte(0xAF); |
2900 } | 2901 } |
2902 | |
2903 void Assembler::repne_scanl() { | |
2904 // REPNE/REPNZ | |
2905 emit_byte(0xF2); | |
2906 // SCASL | |
2907 emit_byte(0xAF); | |
2908 } | |
2909 | |
2901 | 2910 |
2902 void Assembler::setb(Condition cc, Register dst) { | 2911 void Assembler::setb(Condition cc, Register dst) { |
2903 assert(0 <= cc && cc < 16, "illegal cc"); | 2912 assert(0 <= cc && cc < 16, "illegal cc"); |
2904 int encode = prefix_and_encode(dst->encoding(), true); | 2913 int encode = prefix_and_encode(dst->encoding(), true); |
2905 emit_byte(0x0F); | 2914 emit_byte(0x0F); |
4595 | 4604 |
4596 pushq(rax); // save rax, restored by receiver | 4605 pushq(rax); // save rax, restored by receiver |
4597 | 4606 |
4598 // pass args on stack, only touch rax | 4607 // pass args on stack, only touch rax |
4599 pushq(reg); | 4608 pushq(reg); |
4600 | |
4601 // avoid using pushptr, as it modifies scratch registers | 4609 // avoid using pushptr, as it modifies scratch registers |
4602 // and our contract is not to modify anything | 4610 // and our contract is not to modify anything |
4603 ExternalAddress buffer((address)b); | 4611 ExternalAddress buffer((address)b); |
4604 movptr(rax, buffer.addr()); | 4612 movptr(rax, buffer.addr()); |
4605 pushq(rax); | 4613 pushq(rax); |
4662 // In order to get locks to work, we need to fake a in_VM state | 4670 // In order to get locks to work, we need to fake a in_VM state |
4663 if (ShowMessageBoxOnError ) { | 4671 if (ShowMessageBoxOnError ) { |
4664 JavaThread* thread = JavaThread::current(); | 4672 JavaThread* thread = JavaThread::current(); |
4665 JavaThreadState saved_state = thread->thread_state(); | 4673 JavaThreadState saved_state = thread->thread_state(); |
4666 thread->set_thread_state(_thread_in_vm); | 4674 thread->set_thread_state(_thread_in_vm); |
4667 ttyLocker ttyl; | |
4668 #ifndef PRODUCT | 4675 #ifndef PRODUCT |
4669 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { | 4676 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { |
4677 ttyLocker ttyl; | |
4670 BytecodeCounter::print(); | 4678 BytecodeCounter::print(); |
4671 } | 4679 } |
4672 #endif | 4680 #endif |
4673 // To see where a verify_oop failed, get $ebx+40/X for this frame. | 4681 // To see where a verify_oop failed, get $ebx+40/X for this frame. |
4674 // XXX correct this offset for amd64 | 4682 // XXX correct this offset for amd64 |
4675 // This is the value of eip which points to where verify_oop will return. | 4683 // This is the value of eip which points to where verify_oop will return. |
4676 if (os::message_box(msg, "Execution stopped, print registers?")) { | 4684 if (os::message_box(msg, "Execution stopped, print registers?")) { |
4685 ttyLocker ttyl; | |
4677 tty->print_cr("rip = 0x%016lx", pc); | 4686 tty->print_cr("rip = 0x%016lx", pc); |
4678 tty->print_cr("rax = 0x%016lx", regs[15]); | 4687 tty->print_cr("rax = 0x%016lx", regs[15]); |
4679 tty->print_cr("rbx = 0x%016lx", regs[12]); | 4688 tty->print_cr("rbx = 0x%016lx", regs[12]); |
4680 tty->print_cr("rcx = 0x%016lx", regs[14]); | 4689 tty->print_cr("rcx = 0x%016lx", regs[14]); |
4681 tty->print_cr("rdx = 0x%016lx", regs[13]); | 4690 tty->print_cr("rdx = 0x%016lx", regs[13]); |
4693 tty->print_cr("r15 = 0x%016lx", regs[0]); | 4702 tty->print_cr("r15 = 0x%016lx", regs[0]); |
4694 BREAKPOINT; | 4703 BREAKPOINT; |
4695 } | 4704 } |
4696 ThreadStateTransition::transition(thread, _thread_in_vm, saved_state); | 4705 ThreadStateTransition::transition(thread, _thread_in_vm, saved_state); |
4697 } else { | 4706 } else { |
4707 ttyLocker ttyl; | |
4698 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", | 4708 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", |
4699 msg); | 4709 msg); |
4700 } | 4710 } |
4701 } | 4711 } |
4702 | 4712 |
4889 addq(t1, (int)ThreadLocalAllocBuffer::alignment_reserve()); | 4899 addq(t1, (int)ThreadLocalAllocBuffer::alignment_reserve()); |
4890 shlq(t1, log2_intptr(HeapWordSize / sizeof(jint))); | 4900 shlq(t1, log2_intptr(HeapWordSize / sizeof(jint))); |
4891 movq(Address(top, arrayOopDesc::length_offset_in_bytes()), t1); | 4901 movq(Address(top, arrayOopDesc::length_offset_in_bytes()), t1); |
4892 // set klass to intArrayKlass | 4902 // set klass to intArrayKlass |
4893 movptr(t1, ExternalAddress((address) Universe::intArrayKlassObj_addr())); | 4903 movptr(t1, ExternalAddress((address) Universe::intArrayKlassObj_addr())); |
4894 movq(Address(top, oopDesc::klass_offset_in_bytes()), t1); | 4904 store_klass(top, t1); |
4895 | 4905 |
4896 // refill the tlab with an eden allocation | 4906 // refill the tlab with an eden allocation |
4897 bind(do_refill); | 4907 bind(do_refill); |
4898 movq(t1, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset()))); | 4908 movq(t1, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset()))); |
4899 shlq(t1, LogHeapWordSize); | 4909 shlq(t1, LogHeapWordSize); |
4936 assert(swap_reg == rax, "swap_reg must be rax for cmpxchgq"); | 4946 assert(swap_reg == rax, "swap_reg must be rax for cmpxchgq"); |
4937 assert(tmp_reg != noreg, "tmp_reg must be supplied"); | 4947 assert(tmp_reg != noreg, "tmp_reg must be supplied"); |
4938 assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg); | 4948 assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg); |
4939 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); | 4949 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); |
4940 Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); | 4950 Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); |
4941 Address klass_addr (obj_reg, oopDesc::klass_offset_in_bytes()); | |
4942 Address saved_mark_addr(lock_reg, 0); | 4951 Address saved_mark_addr(lock_reg, 0); |
4943 | 4952 |
4944 if (PrintBiasedLockingStatistics && counters == NULL) | 4953 if (PrintBiasedLockingStatistics && counters == NULL) |
4945 counters = BiasedLocking::counters(); | 4954 counters = BiasedLocking::counters(); |
4946 | 4955 |
4960 andq(tmp_reg, markOopDesc::biased_lock_mask_in_place); | 4969 andq(tmp_reg, markOopDesc::biased_lock_mask_in_place); |
4961 cmpq(tmp_reg, markOopDesc::biased_lock_pattern); | 4970 cmpq(tmp_reg, markOopDesc::biased_lock_pattern); |
4962 jcc(Assembler::notEqual, cas_label); | 4971 jcc(Assembler::notEqual, cas_label); |
4963 // The bias pattern is present in the object's header. Need to check | 4972 // The bias pattern is present in the object's header. Need to check |
4964 // whether the bias owner and the epoch are both still current. | 4973 // whether the bias owner and the epoch are both still current. |
4965 movq(tmp_reg, klass_addr); | 4974 load_klass(tmp_reg, obj_reg); |
4966 movq(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); | 4975 movq(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); |
4967 orq(tmp_reg, r15_thread); | 4976 orq(tmp_reg, r15_thread); |
4968 xorq(tmp_reg, swap_reg); | 4977 xorq(tmp_reg, swap_reg); |
4969 andq(tmp_reg, ~((int) markOopDesc::age_mask_in_place)); | 4978 andq(tmp_reg, ~((int) markOopDesc::age_mask_in_place)); |
4970 if (counters != NULL) { | 4979 if (counters != NULL) { |
5035 // bias in the current epoch. In other words, we allow transfer of | 5044 // bias in the current epoch. In other words, we allow transfer of |
5036 // the bias from one thread to another directly in this situation. | 5045 // the bias from one thread to another directly in this situation. |
5037 // | 5046 // |
5038 // FIXME: due to a lack of registers we currently blow away the age | 5047 // FIXME: due to a lack of registers we currently blow away the age |
5039 // bits in this situation. Should attempt to preserve them. | 5048 // bits in this situation. Should attempt to preserve them. |
5040 movq(tmp_reg, klass_addr); | 5049 load_klass(tmp_reg, obj_reg); |
5041 movq(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); | 5050 movq(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); |
5042 orq(tmp_reg, r15_thread); | 5051 orq(tmp_reg, r15_thread); |
5043 if (os::is_MP()) { | 5052 if (os::is_MP()) { |
5044 lock(); | 5053 lock(); |
5045 } | 5054 } |
5066 // bias of this particular object, so it's okay to continue in the | 5075 // bias of this particular object, so it's okay to continue in the |
5067 // normal locking code. | 5076 // normal locking code. |
5068 // | 5077 // |
5069 // FIXME: due to a lack of registers we currently blow away the age | 5078 // FIXME: due to a lack of registers we currently blow away the age |
5070 // bits in this situation. Should attempt to preserve them. | 5079 // bits in this situation. Should attempt to preserve them. |
5071 movq(tmp_reg, klass_addr); | 5080 load_klass(tmp_reg, obj_reg); |
5072 movq(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); | 5081 movq(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); |
5073 if (os::is_MP()) { | 5082 if (os::is_MP()) { |
5074 lock(); | 5083 lock(); |
5075 } | 5084 } |
5076 cmpxchgq(tmp_reg, Address(obj_reg, 0)); | 5085 cmpxchgq(tmp_reg, Address(obj_reg, 0)); |
5101 andq(temp_reg, markOopDesc::biased_lock_mask_in_place); | 5110 andq(temp_reg, markOopDesc::biased_lock_mask_in_place); |
5102 cmpq(temp_reg, markOopDesc::biased_lock_pattern); | 5111 cmpq(temp_reg, markOopDesc::biased_lock_pattern); |
5103 jcc(Assembler::equal, done); | 5112 jcc(Assembler::equal, done); |
5104 } | 5113 } |
5105 | 5114 |
5115 | |
5116 void MacroAssembler::load_klass(Register dst, Register src) { | |
5117 if (UseCompressedOops) { | |
5118 movl(dst, Address(src, oopDesc::klass_offset_in_bytes())); | |
5119 decode_heap_oop_not_null(dst); | |
5120 } else { | |
5121 movq(dst, Address(src, oopDesc::klass_offset_in_bytes())); | |
5122 } | |
5123 } | |
5124 | |
5125 void MacroAssembler::store_klass(Register dst, Register src) { | |
5126 if (UseCompressedOops) { | |
5127 encode_heap_oop_not_null(src); | |
5128 // zero the entire klass field first as the gap needs to be zeroed too. | |
5129 movptr(Address(dst, oopDesc::klass_offset_in_bytes()), NULL_WORD); | |
5130 movl(Address(dst, oopDesc::klass_offset_in_bytes()), src); | |
5131 } else { | |
5132 movq(Address(dst, oopDesc::klass_offset_in_bytes()), src); | |
5133 } | |
5134 } | |
5135 | |
5136 void MacroAssembler::load_heap_oop(Register dst, Address src) { | |
5137 if (UseCompressedOops) { | |
5138 movl(dst, src); | |
5139 decode_heap_oop(dst); | |
5140 } else { | |
5141 movq(dst, src); | |
5142 } | |
5143 } | |
5144 | |
5145 void MacroAssembler::store_heap_oop(Address dst, Register src) { | |
5146 if (UseCompressedOops) { | |
5147 assert(!dst.uses(src), "not enough registers"); | |
5148 encode_heap_oop(src); | |
5149 movl(dst, src); | |
5150 } else { | |
5151 movq(dst, src); | |
5152 } | |
5153 } | |
5154 | |
5155 // Algorithm must match oop.inline.hpp encode_heap_oop. | |
5156 void MacroAssembler::encode_heap_oop(Register r) { | |
5157 assert (UseCompressedOops, "should be compressed"); | |
5158 #ifdef ASSERT | |
5159 Label ok; | |
5160 pushq(rscratch1); // cmpptr trashes rscratch1 | |
5161 cmpptr(r12_heapbase, ExternalAddress((address)Universe::heap_base_addr())); | |
5162 jcc(Assembler::equal, ok); | |
5163 stop("MacroAssembler::encode_heap_oop: heap base corrupted?"); | |
5164 bind(ok); | |
5165 popq(rscratch1); | |
5166 #endif | |
5167 verify_oop(r); | |
5168 testq(r, r); | |
5169 cmovq(Assembler::equal, r, r12_heapbase); | |
5170 subq(r, r12_heapbase); | |
5171 shrq(r, LogMinObjAlignmentInBytes); | |
5172 } | |
5173 | |
5174 void MacroAssembler::encode_heap_oop_not_null(Register r) { | |
5175 assert (UseCompressedOops, "should be compressed"); | |
5176 #ifdef ASSERT | |
5177 Label ok; | |
5178 testq(r, r); | |
5179 jcc(Assembler::notEqual, ok); | |
5180 stop("null oop passed to encode_heap_oop_not_null"); | |
5181 bind(ok); | |
5182 #endif | |
5183 verify_oop(r); | |
5184 subq(r, r12_heapbase); | |
5185 shrq(r, LogMinObjAlignmentInBytes); | |
5186 } | |
5187 | |
5188 void MacroAssembler::decode_heap_oop(Register r) { | |
5189 assert (UseCompressedOops, "should be compressed"); | |
5190 #ifdef ASSERT | |
5191 Label ok; | |
5192 pushq(rscratch1); | |
5193 cmpptr(r12_heapbase, | |
5194 ExternalAddress((address)Universe::heap_base_addr())); | |
5195 jcc(Assembler::equal, ok); | |
5196 stop("MacroAssembler::decode_heap_oop: heap base corrupted?"); | |
5197 bind(ok); | |
5198 popq(rscratch1); | |
5199 #endif | |
5200 | |
5201 Label done; | |
5202 shlq(r, LogMinObjAlignmentInBytes); | |
5203 jccb(Assembler::equal, done); | |
5204 addq(r, r12_heapbase); | |
5205 #if 0 | |
5206 // alternate decoding probably a wash. | |
5207 testq(r, r); | |
5208 jccb(Assembler::equal, done); | |
5209 leaq(r, Address(r12_heapbase, r, Address::times_8, 0)); | |
5210 #endif | |
5211 bind(done); | |
5212 verify_oop(r); | |
5213 } | |
5214 | |
5215 void MacroAssembler::decode_heap_oop_not_null(Register r) { | |
5216 assert (UseCompressedOops, "should only be used for compressed headers"); | |
5217 // Cannot assert, unverified entry point counts instructions (see .ad file) | |
5218 // vtableStubs also counts instructions in pd_code_size_limit. | |
5219 assert(Address::times_8 == LogMinObjAlignmentInBytes, "decode alg wrong"); | |
5220 leaq(r, Address(r12_heapbase, r, Address::times_8, 0)); | |
5221 } | |
5106 | 5222 |
5107 Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) { | 5223 Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) { |
5108 switch (cond) { | 5224 switch (cond) { |
5109 // Note some conditions are synonyms for others | 5225 // Note some conditions are synonyms for others |
5110 case Assembler::zero: return Assembler::notZero; | 5226 case Assembler::zero: return Assembler::notZero; |
5171 // The -1 because we already subtracted 1 page. | 5287 // The -1 because we already subtracted 1 page. |
5172 for (int i = 0; i< StackShadowPages-1; i++) { | 5288 for (int i = 0; i< StackShadowPages-1; i++) { |
5173 movq(Address(tmp, (-i*os::vm_page_size())), size ); | 5289 movq(Address(tmp, (-i*os::vm_page_size())), size ); |
5174 } | 5290 } |
5175 } | 5291 } |
5292 | |
5293 void MacroAssembler::reinit_heapbase() { | |
5294 if (UseCompressedOops) { | |
5295 movptr(r12_heapbase, ExternalAddress((address)Universe::heap_base_addr())); | |
5296 } | |
5297 } |