diff src/cpu/x86/vm/x86_64.ad @ 113:ba764ed4b6f2

6420645: Create a vm that uses compressed oops for up to 32gb heapsizes Summary: Compressed oops in instances, arrays, and headers. Code contributors are coleenp, phh, never, swamyv Reviewed-by: jmasa, kamg, acorn, tbell, kvn, rasbold
author coleenp
date Sun, 13 Apr 2008 17:43:42 -0400
parents 3d62cb85208d
children b130b98db9cf
line wrap: on
line diff
--- a/src/cpu/x86/vm/x86_64.ad	Fri Apr 11 09:56:35 2008 -0400
+++ b/src/cpu/x86/vm/x86_64.ad	Sun Apr 13 17:43:42 2008 -0400
@@ -312,7 +312,6 @@
                   R9,  R9_H,
                   R10, R10_H,
                   R11, R11_H,
-                  R12, R12_H,
                   R13, R13_H,
                   R14, R14_H);
 
@@ -392,7 +391,6 @@
                    R9,  R9_H,
                    R10, R10_H,
                    R11, R11_H,
-                   R12, R12_H,
                    R13, R13_H,
                    R14, R14_H);
 
@@ -406,7 +404,6 @@
                               R9,  R9_H,
                               R10, R10_H,
                               R11, R11_H,
-                              R12, R12_H,
                               R13, R13_H,
                               R14, R14_H);
 
@@ -421,7 +418,6 @@
                           R9,  R9_H,
                           R10, R10_H,
                           R11, R11_H,
-                          R12, R12_H,
                           R13, R13_H,
                           R14, R14_H);
 
@@ -436,7 +432,6 @@
                           R9,  R9_H,
                           R10, R10_H,
                           R11, R11_H,
-                          R12, R12_H,
                           R13, R13_H,
                           R14, R14_H);
 
@@ -449,6 +444,9 @@
 // Singleton class for RDX long register
 reg_class long_rdx_reg(RDX, RDX_H);
 
+// Singleton class for R12 long register
+reg_class long_r12_reg(R12, R12_H);
+
 // Class for all int registers (except RSP)
 reg_class int_reg(RAX,
                   RDX,
@@ -461,7 +459,6 @@
                   R9,
                   R10,
                   R11,
-                  R12,
                   R13,
                   R14);
 
@@ -476,7 +473,6 @@
                          R9,
                          R10,
                          R11,
-                         R12,
                          R13,
                          R14);
 
@@ -490,7 +486,6 @@
                              R9,
                              R10,
                              R11,
-                             R12,
                              R13,
                              R14);
 
@@ -1844,8 +1839,14 @@
 #ifndef PRODUCT
 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 {
-  st->print_cr("cmpq    rax, [j_rarg0 + oopDesc::klass_offset_in_bytes() #%d]\t"
-               "# Inline cache check", oopDesc::klass_offset_in_bytes());
+  if (UseCompressedOops) {
+    st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes() #%d]\t", oopDesc::klass_offset_in_bytes());
+    st->print_cr("leaq    rscratch1, [r12_heapbase, r, Address::times_8, 0]");
+    st->print_cr("cmpq    rax, rscratch1\t # Inline cache check");
+  } else {
+    st->print_cr("cmpq    rax, [j_rarg0 + oopDesc::klass_offset_in_bytes() #%d]\t"
+                 "# Inline cache check", oopDesc::klass_offset_in_bytes());
+  }
   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
   st->print_cr("\tnop");
   if (!OptoBreakpoint) {
@@ -1860,7 +1861,12 @@
 #ifdef ASSERT
   uint code_size = cbuf.code_size();
 #endif
-  masm.cmpq(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
+  if (UseCompressedOops) {
+    masm.load_klass(rscratch1, j_rarg0);
+    masm.cmpq(rax, rscratch1);
+  } else {
+    masm.cmpq(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
+  }
 
   masm.jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 
@@ -1871,6 +1877,10 @@
     // Leave space for int3
      nops_cnt += 1;
   }
+  if (UseCompressedOops) {
+    // ??? divisible by 4 is aligned?
+    nops_cnt += 1;
+  }
   masm.nop(nops_cnt);
 
   assert(cbuf.code_size() - code_size == size(ra_),
@@ -1879,7 +1889,11 @@
 
 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
 {
-  return OptoBreakpoint ? 11 : 12;
+  if (UseCompressedOops) {
+    return OptoBreakpoint ? 19 : 20;
+  } else {
+    return OptoBreakpoint ? 11 : 12;
+  }
 }
 
 
@@ -2052,6 +2066,7 @@
     reg ==  RCX_num || reg ==  RCX_H_num ||
     reg ==   R8_num || reg ==   R8_H_num ||
     reg ==   R9_num || reg ==   R9_H_num ||
+    reg ==  R12_num || reg ==  R12_H_num ||
     reg == XMM0_num || reg == XMM0_H_num ||
     reg == XMM1_num || reg == XMM1_H_num ||
     reg == XMM2_num || reg == XMM2_H_num ||
@@ -2087,6 +2102,17 @@
   return LONG_RDX_REG_mask;
 }
 
+static Address build_address(int b, int i, int s, int d) {
+  Register index = as_Register(i);
+  Address::ScaleFactor scale = (Address::ScaleFactor)s;
+  if (index == rsp) {
+    index = noreg;
+    scale = Address::no_scale;
+  }
+  Address addr(as_Register(b), index, scale, d);
+  return addr;
+}
+
 %}
 
 //----------ENCODING BLOCK-----------------------------------------------------
@@ -2545,7 +2571,7 @@
     Register Rrax = as_Register(RAX_enc); // super class
     Register Rrcx = as_Register(RCX_enc); // killed
     Register Rrsi = as_Register(RSI_enc); // sub class
-    Label hit, miss;
+    Label hit, miss, cmiss;
 
     MacroAssembler _masm(&cbuf);
     // Compare super with sub directly, since super is not in its own SSA.
@@ -2562,12 +2588,27 @@
                           Klass::secondary_supers_offset_in_bytes()));
     __ movl(Rrcx, Address(Rrdi, arrayOopDesc::length_offset_in_bytes()));
     __ addq(Rrdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
-    __ repne_scan();
-    __ jcc(Assembler::notEqual, miss);
-    __ movq(Address(Rrsi,
-                    sizeof(oopDesc) +
-                    Klass::secondary_super_cache_offset_in_bytes()),
-            Rrax);
+    if (UseCompressedOops) {
+      __ encode_heap_oop(Rrax);
+      __ repne_scanl();
+      __ jcc(Assembler::notEqual, cmiss);
+      __ decode_heap_oop(Rrax);
+      __ movq(Address(Rrsi,
+                      sizeof(oopDesc) +
+                      Klass::secondary_super_cache_offset_in_bytes()),
+              Rrax);
+      __ jmp(hit);
+      __ bind(cmiss);
+      __ decode_heap_oop(Rrax);
+      __ jmp(miss);
+    } else {
+      __ repne_scanq();
+      __ jcc(Assembler::notEqual, miss);
+      __ movq(Address(Rrsi,
+                      sizeof(oopDesc) +
+                      Klass::secondary_super_cache_offset_in_bytes()),
+              Rrax);
+    }
     __ bind(hit);
     if ($primary) {
       __ xorq(Rrdi, Rrdi);
@@ -3693,10 +3734,10 @@
     int count_offset  = java_lang_String::count_offset_in_bytes();
     int base_offset   = arrayOopDesc::base_offset_in_bytes(T_CHAR);
 
-    masm.movq(rax, Address(rsi, value_offset));
+    masm.load_heap_oop(rax, Address(rsi, value_offset));
     masm.movl(rcx, Address(rsi, offset_offset));
     masm.leaq(rax, Address(rax, rcx, Address::times_2, base_offset));
-    masm.movq(rbx, Address(rdi, value_offset));
+    masm.load_heap_oop(rbx, Address(rdi, value_offset));
     masm.movl(rcx, Address(rdi, offset_offset));
     masm.leaq(rbx, Address(rbx, rcx, Address::times_2, base_offset));
 
@@ -4120,6 +4161,7 @@
 %}
 
 
+
 //----------FRAME--------------------------------------------------------------
 // Definition of frame structure and management information.
 //
@@ -4255,6 +4297,7 @@
     static const int lo[Op_RegL + 1] = {
       0,
       0,
+      RAX_num,  // Op_RegN
       RAX_num,  // Op_RegI
       RAX_num,  // Op_RegP
       XMM0_num, // Op_RegF
@@ -4264,13 +4307,14 @@
     static const int hi[Op_RegL + 1] = {
       0,
       0,
+      OptoReg::Bad, // Op_RegN
       OptoReg::Bad, // Op_RegI
       RAX_H_num,    // Op_RegP
       OptoReg::Bad, // Op_RegF
       XMM0_H_num,   // Op_RegD
       RAX_H_num     // Op_RegL
     };
-
+    assert(ARRAY_SIZE(hi) == _last_machine_leaf - 1, "missing type");
     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
   %}
 %}
@@ -4417,9 +4461,25 @@
   interface(CONST_INTER);
 %}
 
-// Unsigned 31-bit Pointer Immediate
-// Can be used in both 32-bit signed and 32-bit unsigned insns.
-// Works for nulls and markOops; not for relocatable (oop) pointers.
+// Pointer Immediate
+operand immN() %{
+  match(ConN);
+
+  op_cost(10);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// NULL Pointer Immediate
+operand immN0() %{
+  predicate(n->get_narrowcon() == 0);
+  match(ConN);
+
+  op_cost(5);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
 operand immP31()
 %{
   predicate(!n->as_Type()->type()->isa_oopptr()
@@ -4431,6 +4491,7 @@
   interface(CONST_INTER);
 %}
 
+
 // Long Immediate
 operand immL()
 %{
@@ -4767,6 +4828,23 @@
   interface(REG_INTER);
 %}
 
+
+operand r12RegL() %{
+  constraint(ALLOC_IN_RC(long_r12_reg));
+  match(RegL);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand rRegN() %{
+  constraint(ALLOC_IN_RC(int_reg));
+  match(RegN);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
 // It's fine for an instruction input which expects rRegP to match a r15_RegP.
@@ -4822,6 +4900,18 @@
   interface(REG_INTER);
 %}
 
+// Special Registers
+// Return a compressed pointer value
+operand rax_RegN()
+%{
+  constraint(ALLOC_IN_RC(int_rax_reg));
+  match(RegN);
+  match(rRegN);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
 // Used in AtomicAdd
 operand rbx_RegP()
 %{
@@ -5112,6 +5202,21 @@
   %}
 %}
 
+// Indirect Memory Times Scale Plus Index Register Plus Offset Operand
+operand indIndexScaleOffsetComp(rRegN src, immL32 off, r12RegL base) %{
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP (DecodeN src base) off);
+
+  op_cost(10);
+  format %{"[$base + $src << 3 + $off] (compressed)" %}
+  interface(MEMORY_INTER) %{
+    base($base);
+    index($src);
+    scale(0x3);
+    disp($off);
+  %}
+%}
+
 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 %{
@@ -5259,7 +5364,8 @@
 // case of this is memory operands.
 
 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
-               indIndexScale, indIndexScaleOffset, indPosIndexScaleOffset);
+               indIndexScale, indIndexScaleOffset, indPosIndexScaleOffset,
+               indIndexScaleOffsetComp);
 
 //----------PIPELINE-----------------------------------------------------------
 // Rules which define the behavior of the target architectures pipeline.
@@ -5937,10 +6043,28 @@
   ins_pipe(ialu_reg_mem); // XXX
 %}
 
+// Load Compressed Pointer
+instruct loadN(rRegN dst, memory mem, rFlagsReg cr)
+%{
+   match(Set dst (LoadN mem));
+   effect(KILL cr);
+
+   ins_cost(125); // XXX
+   format %{ "movl    $dst, $mem\t# compressed ptr" %}
+   ins_encode %{
+     Address addr = build_address($mem$$base, $mem$$index, $mem$$scale, $mem$$disp);
+     Register dst = as_Register($dst$$reg);
+     __ movl(dst, addr);
+   %}
+   ins_pipe(ialu_reg_mem); // XXX
+%}
+
+
 // Load Klass Pointer
 instruct loadKlass(rRegP dst, memory mem)
 %{
   match(Set dst (LoadKlass mem));
+  predicate(!n->in(MemNode::Address)->bottom_type()->is_narrow());
 
   ins_cost(125); // XXX
   format %{ "movq    $dst, $mem\t# class" %}
@@ -5949,6 +6073,25 @@
   ins_pipe(ialu_reg_mem); // XXX
 %}
 
+// Load Klass Pointer
+instruct loadKlassComp(rRegP dst, memory mem)
+%{
+  match(Set dst (LoadKlass mem));
+  predicate(n->in(MemNode::Address)->bottom_type()->is_narrow());
+
+  ins_cost(125); // XXX
+  format %{ "movl    $dst, $mem\t# compressed class" %}
+  ins_encode %{
+    Address addr = build_address($mem$$base, $mem$$index, $mem$$scale, $mem$$disp);
+    Register dst = as_Register($dst$$reg);
+    __ movl(dst, addr);
+    // klass is never null in the header but this is generated for all
+    // klass loads not just the _klass field in the header.
+    __ decode_heap_oop(dst);
+  %}
+  ins_pipe(ialu_reg_mem); // XXX
+%}
+
 // Load Float
 instruct loadF(regF dst, memory mem)
 %{
@@ -6203,6 +6346,35 @@
   ins_pipe(pipe_slow);
 %}
 
+instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
+  match(Set dst src);
+  effect(KILL cr);
+  format %{ "xorq    $dst, $src\t# compressed ptr" %}
+  ins_encode %{
+    Register dst = $dst$$Register;
+    __ xorq(dst, dst);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+instruct loadConN(rRegN dst, immN src) %{
+  match(Set dst src);
+
+  ins_cost(125);
+  format %{ "movl    $dst, $src\t# compressed ptr" %}
+  ins_encode %{
+    address con = (address)$src$$constant;
+    Register dst = $dst$$Register;
+    if (con == NULL) {
+      ShouldNotReachHere();
+    } else {
+      __ movoop(dst, (jobject)$src$$constant);
+      __ encode_heap_oop_not_null(dst);
+    }
+  %}
+  ins_pipe(ialu_reg_fat); // XXX
+%}
+
 instruct loadConF0(regF dst, immF0 src)
 %{
   match(Set dst src);
@@ -6458,6 +6630,22 @@
   ins_pipe(ialu_mem_imm);
 %}
 
+// Store Compressed Pointer
+instruct storeN(memory mem, rRegN src, rFlagsReg cr)
+%{
+  match(Set mem (StoreN mem src));
+  effect(KILL cr);
+
+  ins_cost(125); // XXX
+  format %{ "movl    $mem, $src\t# ptr" %}
+  ins_encode %{
+    Address addr = build_address($mem$$base, $mem$$index, $mem$$scale, $mem$$disp);
+    Register src = as_Register($src$$reg);
+    __ movl(addr, src);
+  %}
+  ins_pipe(ialu_mem_reg);
+%}
+
 // Store Integer Immediate
 instruct storeImmI(memory mem, immI src)
 %{
@@ -6805,6 +6993,39 @@
   ins_pipe(ialu_reg_reg); // XXX
 %}
 
+
+// Convert oop pointer into compressed form
+instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
+  match(Set dst (EncodeP src));
+  effect(KILL cr);
+  format %{ "encode_heap_oop $dst,$src" %}
+  ins_encode %{
+    Register s = $src$$Register;
+    Register d = $dst$$Register;
+    if (s != d) {
+      __ movq(d, s);
+    }
+    __ encode_heap_oop(d);
+  %}
+  ins_pipe(ialu_reg_long);
+%}
+
+instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
+  match(Set dst (DecodeN src));
+  effect(KILL cr);
+  format %{ "decode_heap_oop $dst,$src" %}
+  ins_encode %{
+    Register s = $src$$Register;
+    Register d = $dst$$Register;
+    if (s != d) {
+      __ movq(d, s);
+    }
+    __ decode_heap_oop(d);
+  %}
+  ins_pipe(ialu_reg_long);
+%}
+
+
 //----------Conditional Move---------------------------------------------------
 // Jump
 // dummy instruction for generating temp registers
@@ -7521,6 +7742,28 @@
 %}
 
 
+instruct compareAndSwapN(rRegI res,
+                          memory mem_ptr,
+                          rax_RegN oldval, rRegN newval,
+                          rFlagsReg cr) %{
+  match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
+  effect(KILL cr, KILL oldval);
+
+  format %{ "cmpxchgl $mem_ptr,$newval\t# "
+            "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
+            "sete    $res\n\t"
+            "movzbl  $res, $res" %}
+  opcode(0x0F, 0xB1);
+  ins_encode(lock_prefix,
+             REX_reg_mem(newval, mem_ptr),
+             OpcP, OpcS,
+             reg_mem(newval, mem_ptr),
+             REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
+             REX_reg_breg(res, res), // movzbl
+             Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
+  ins_pipe( pipe_cmpxchg );
+%}
+
 //----------Subtraction Instructions-------------------------------------------
 
 // Integer Subtraction Instructions
@@ -10771,6 +11014,14 @@
   ins_pipe(ialu_cr_reg_imm);
 %}
 
+instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
+  match(Set cr (CmpN src zero));
+
+  format %{ "testl   $src, $src" %}
+  ins_encode %{ __ testl($src$$Register, $src$$Register); %}
+  ins_pipe(ialu_cr_reg_imm);
+%}
+
 // Yanked all unsigned pointer compare operations.
 // Pointer compares are done with CmpP which is already unsigned.
 
@@ -11018,6 +11269,7 @@
                                      rdi_RegP result)
 %{
   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
+  predicate(!UseCompressedOops); // decoding oop kills condition codes
   effect(KILL rcx, KILL result);
 
   ins_cost(1000);