diff src/cpu/x86/vm/x86_64.ad @ 647:bd441136a5ce

Merge
author kvn
date Thu, 19 Mar 2009 09:13:24 -0700
parents 7bb995fbd3c0 c517646eef23
children d0994e5bebce
line wrap: on
line diff
--- a/src/cpu/x86/vm/x86_64.ad	Wed Mar 18 11:37:48 2009 -0400
+++ b/src/cpu/x86/vm/x86_64.ad	Thu Mar 19 09:13:24 2009 -0700
@@ -326,7 +326,6 @@
                          R9,  R9_H,
                          R10, R10_H,
                          R11, R11_H,
-                         R12, R12_H,
                          R13, R13_H,
                          R14, R14_H);
 
@@ -340,7 +339,6 @@
                          R9,  R9_H,
                          R10, R10_H,
                          R11, R11_H,
-                         R12, R12_H,
                          R13, R13_H,
                          R14, R14_H);
 
@@ -354,7 +352,6 @@
                              R9,  R9_H,
                              R10, R10_H,
                              R11, R11_H,
-                             R12, R12_H,
                              R13, R13_H,
                              R14, R14_H);
 
@@ -444,9 +441,6 @@
 // Singleton class for RDX long register
 reg_class long_rdx_reg(RDX, RDX_H);
 
-// Singleton class for R12 long register
-reg_class long_r12_reg(R12, R12_H);
-
 // Class for all int registers (except RSP)
 reg_class int_reg(RAX,
                   RDX,
@@ -1842,7 +1836,9 @@
 {
   if (UseCompressedOops) {
     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes() #%d]\t", oopDesc::klass_offset_in_bytes());
-    st->print_cr("leaq    rscratch1, [r12_heapbase, r, Address::times_8, 0]");
+    if (Universe::narrow_oop_shift() != 0) {
+      st->print_cr("leaq    rscratch1, [r12_heapbase, r, Address::times_8, 0]");
+    }
     st->print_cr("cmpq    rax, rscratch1\t # Inline cache check");
   } else {
     st->print_cr("cmpq    rax, [j_rarg0 + oopDesc::klass_offset_in_bytes() #%d]\t"
@@ -1891,7 +1887,11 @@
 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
 {
   if (UseCompressedOops) {
-    return OptoBreakpoint ? 19 : 20;
+    if (Universe::narrow_oop_shift() == 0) {
+      return OptoBreakpoint ? 15 : 16;
+    } else {
+      return OptoBreakpoint ? 19 : 20;
+    }
   } else {
     return OptoBreakpoint ? 11 : 12;
   }
@@ -2575,45 +2575,13 @@
     Register Rrax = as_Register(RAX_enc); // super class
     Register Rrcx = as_Register(RCX_enc); // killed
     Register Rrsi = as_Register(RSI_enc); // sub class
-    Label hit, miss, cmiss;
+    Label miss;
+    const bool set_cond_codes = true;
 
     MacroAssembler _masm(&cbuf);
-    // Compare super with sub directly, since super is not in its own SSA.
-    // The compiler used to emit this test, but we fold it in here,
-    // to allow platform-specific tweaking on sparc.
-    __ cmpptr(Rrax, Rrsi);
-    __ jcc(Assembler::equal, hit);
-#ifndef PRODUCT
-    __ lea(Rrcx, ExternalAddress((address)&SharedRuntime::_partial_subtype_ctr));
-    __ incrementl(Address(Rrcx, 0));
-#endif //PRODUCT
-    __ movptr(Rrdi, Address(Rrsi, 
-                          sizeof(oopDesc) + 
-                          Klass::secondary_supers_offset_in_bytes()));
-    __ movl(Rrcx, Address(Rrdi, arrayOopDesc::length_offset_in_bytes()));
-    __ addptr(Rrdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
-    if (UseCompressedOops) {
-      __ encode_heap_oop(Rrax);
-      __ repne_scanl();
-      __ jcc(Assembler::notEqual, cmiss);
-      __ decode_heap_oop(Rrax);
-      __ movptr(Address(Rrsi,
-                      sizeof(oopDesc) +
-                      Klass::secondary_super_cache_offset_in_bytes()),
-              Rrax);
-      __ jmp(hit);
-      __ bind(cmiss);
-      __ decode_heap_oop(Rrax);
-      __ jmp(miss);
-    } else {
-      __ repne_scan();
-      __ jcc(Assembler::notEqual, miss);
-      __ movptr(Address(Rrsi,
-                      sizeof(oopDesc) +
-                      Klass::secondary_super_cache_offset_in_bytes()),
-              Rrax);
-    }
-    __ bind(hit);
+    __ check_klass_subtype_slow_path(Rrsi, Rrax, Rrcx, Rrdi,
+                                     NULL, &miss,
+                                     /*set_cond_codes:*/ true);
     if ($primary) {
       __ xorptr(Rrdi, Rrdi);
     }
@@ -4906,15 +4874,6 @@
   interface(REG_INTER);
 %}
 
-
-operand r12RegL() %{
-  constraint(ALLOC_IN_RC(long_r12_reg));
-  match(RegL);
-
-  format %{ %}
-  interface(REG_INTER);
-%}
-
 operand rRegN() %{
   constraint(ALLOC_IN_RC(int_reg));
   match(RegN);
@@ -5289,21 +5248,6 @@
   %}
 %}
 
-// Indirect Narrow Oop Plus Offset Operand
-operand indNarrowOopOffset(rRegN src, immL32 off) %{
-  constraint(ALLOC_IN_RC(ptr_reg));
-  match(AddP (DecodeN src) off);
-
-  op_cost(10);
-  format %{"[R12 + $src << 3 + $off] (compressed oop addressing)" %}
-  interface(MEMORY_INTER) %{
-    base(0xc); // R12
-    index($src);
-    scale(0x3);
-    disp($off);
-  %}
-%}
-
 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 %{
@@ -5321,6 +5265,158 @@
   %}
 %}
 
+// Indirect Narrow Oop Plus Offset Operand
+// Note: x86 architecture doesn't support "scale * index + offset" without a base
+// we can't free r12 even with Universe::narrow_oop_base() == NULL.
+operand indCompressedOopOffset(rRegN reg, immL32 off) %{
+  predicate(UseCompressedOops && (Universe::narrow_oop_shift() != 0));
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP (DecodeN reg) off);
+
+  op_cost(10);
+  format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
+  interface(MEMORY_INTER) %{
+    base(0xc); // R12
+    index($reg);
+    scale(0x3);
+    disp($off);
+  %}
+%}
+
+// Indirect Memory Operand
+operand indirectNarrow(rRegN reg)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(DecodeN reg);
+
+  format %{ "[$reg]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0x4);
+    scale(0x0);
+    disp(0x0);
+  %}
+%}
+
+// Indirect Memory Plus Short Offset Operand
+operand indOffset8Narrow(rRegN reg, immL8 off)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP (DecodeN reg) off);
+
+  format %{ "[$reg + $off (8-bit)]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0x4);
+    scale(0x0);
+    disp($off);
+  %}
+%}
+
+// Indirect Memory Plus Long Offset Operand
+operand indOffset32Narrow(rRegN reg, immL32 off)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP (DecodeN reg) off);
+
+  format %{ "[$reg + $off (32-bit)]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0x4);
+    scale(0x0);
+    disp($off);
+  %}
+%}
+
+// Indirect Memory Plus Index Register Plus Offset Operand
+operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP (AddP (DecodeN reg) lreg) off);
+
+  op_cost(10);
+  format %{"[$reg + $off + $lreg]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index($lreg);
+    scale(0x0);
+    disp($off);
+  %}
+%}
+
+// Indirect Memory Plus Index Register Plus Offset Operand
+operand indIndexNarrow(rRegN reg, rRegL lreg)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP (DecodeN reg) lreg);
+
+  op_cost(10);
+  format %{"[$reg + $lreg]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index($lreg);
+    scale(0x0);
+    disp(0x0);
+  %}
+%}
+
+// Indirect Memory Times Scale Plus Index Register
+operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP (DecodeN reg) (LShiftL lreg scale));
+
+  op_cost(10);
+  format %{"[$reg + $lreg << $scale]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index($lreg);
+    scale($scale);
+    disp(0x0);
+  %}
+%}
+
+// Indirect Memory Times Scale Plus Index Register Plus Offset Operand
+operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
+
+  op_cost(10);
+  format %{"[$reg + $off + $lreg << $scale]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index($lreg);
+    scale($scale);
+    disp($off);
+  %}
+%}
+
+// Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
+operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
+%{
+  constraint(ALLOC_IN_RC(ptr_reg));
+  predicate(Universe::narrow_oop_shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
+  match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
+
+  op_cost(10);
+  format %{"[$reg + $off + $idx << $scale]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index($idx);
+    scale($scale);
+    disp($off);
+  %}
+%}
+
+
 //----------Special Memory Operands--------------------------------------------
 // Stack Slot Operand - This operand is used for loading and storing temporary
 //                      values on the stack where a match requires a value to
@@ -5488,7 +5584,10 @@
 
 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
                indIndexScale, indIndexScaleOffset, indPosIndexScaleOffset,
-               indNarrowOopOffset);
+               indCompressedOopOffset,
+               indirectNarrow, indOffset8Narrow, indOffset32Narrow,
+               indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
+               indIndexScaleOffsetNarrow, indPosIndexScaleOffsetNarrow);
 
 //----------PIPELINE-----------------------------------------------------------
 // Rules which define the behavior of the target architectures pipeline.
@@ -6234,9 +6333,7 @@
    ins_cost(125); // XXX
    format %{ "movl    $dst, $mem\t# compressed ptr" %}
    ins_encode %{
-     Address addr = build_address($mem$$base, $mem$$index, $mem$$scale, $mem$$disp);
-     Register dst = as_Register($dst$$reg);
-     __ movl(dst, addr);
+     __ movl($dst$$Register, $mem$$Address);
    %}
    ins_pipe(ialu_reg_mem); // XXX
 %}
@@ -6262,9 +6359,7 @@
   ins_cost(125); // XXX
   format %{ "movl    $dst, $mem\t# compressed klass ptr" %}
   ins_encode %{
-    Address addr = build_address($mem$$base, $mem$$index, $mem$$scale, $mem$$disp);
-    Register dst = as_Register($dst$$reg);
-    __ movl(dst, addr);
+    __ movl($dst$$Register, $mem$$Address);
   %}
   ins_pipe(ialu_reg_mem); // XXX
 %}
@@ -6418,6 +6513,102 @@
   ins_pipe(ialu_reg_reg_fat);
 %}
 
+instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
+%{
+  match(Set dst mem);
+
+  ins_cost(110);
+  format %{ "leaq    $dst, $mem\t# ptr posidxscaleoff" %}
+  opcode(0x8D);
+  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
+  ins_pipe(ialu_reg_reg_fat);
+%}
+
+// Load Effective Address which uses Narrow (32-bits) oop
+instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
+%{
+  predicate(UseCompressedOops && (Universe::narrow_oop_shift() != 0));
+  match(Set dst mem);
+
+  ins_cost(110);
+  format %{ "leaq    $dst, $mem\t# ptr compressedoopoff32" %}
+  opcode(0x8D);
+  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
+  ins_pipe(ialu_reg_reg_fat);
+%}
+
+instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  match(Set dst mem);
+
+  ins_cost(110); // XXX
+  format %{ "leaq    $dst, $mem\t# ptr off8narrow" %}
+  opcode(0x8D);
+  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
+  ins_pipe(ialu_reg_reg_fat);
+%}
+
+instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  match(Set dst mem);
+
+  ins_cost(110);
+  format %{ "leaq    $dst, $mem\t# ptr off32narrow" %}
+  opcode(0x8D);
+  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
+  ins_pipe(ialu_reg_reg_fat);
+%}
+
+instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  match(Set dst mem);
+
+  ins_cost(110);
+  format %{ "leaq    $dst, $mem\t# ptr idxoffnarrow" %}
+  opcode(0x8D);
+  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
+  ins_pipe(ialu_reg_reg_fat);
+%}
+
+instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  match(Set dst mem);
+
+  ins_cost(110);
+  format %{ "leaq    $dst, $mem\t# ptr idxscalenarrow" %}
+  opcode(0x8D);
+  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
+  ins_pipe(ialu_reg_reg_fat);
+%}
+
+instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  match(Set dst mem);
+
+  ins_cost(110);
+  format %{ "leaq    $dst, $mem\t# ptr idxscaleoffnarrow" %}
+  opcode(0x8D);
+  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
+  ins_pipe(ialu_reg_reg_fat);
+%}
+
+instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  match(Set dst mem);
+
+  ins_cost(110);
+  format %{ "leaq    $dst, $mem\t# ptr posidxscaleoffnarrow" %}
+  opcode(0x8D);
+  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
+  ins_pipe(ialu_reg_reg_fat);
+%}
+
 instruct loadConI(rRegI dst, immI src)
 %{
   match(Set dst src);
@@ -6528,8 +6719,7 @@
   effect(KILL cr);
   format %{ "xorq    $dst, $src\t# compressed NULL ptr" %}
   ins_encode %{
-    Register dst = $dst$$Register;
-    __ xorq(dst, dst);
+    __ xorq($dst$$Register, $dst$$Register);
   %}
   ins_pipe(ialu_reg);
 %}
@@ -6541,11 +6731,10 @@
   format %{ "movl    $dst, $src\t# compressed ptr" %}
   ins_encode %{
     address con = (address)$src$$constant;
-    Register dst = $dst$$Register;
     if (con == NULL) {
       ShouldNotReachHere();
     } else {
-      __ set_narrow_oop(dst, (jobject)$src$$constant);
+      __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
     }
   %}
   ins_pipe(ialu_reg_fat); // XXX
@@ -6794,12 +6983,25 @@
   ins_pipe(ialu_mem_reg);
 %}
 
+instruct storeImmP0(memory mem, immP0 zero)
+%{
+  predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
+  match(Set mem (StoreP mem zero));
+
+  ins_cost(125); // XXX
+  format %{ "movq    $mem, R12\t# ptr (R12_heapbase==0)" %}
+  ins_encode %{
+    __ movq($mem$$Address, r12);
+  %}
+  ins_pipe(ialu_mem_reg);
+%}
+
 // Store NULL Pointer, mark word, or other simple pointer constant.
 instruct storeImmP(memory mem, immP31 src)
 %{
   match(Set mem (StoreP mem src));
 
-  ins_cost(125); // XXX
+  ins_cost(150); // XXX
   format %{ "movq    $mem, $src\t# ptr" %}
   opcode(0xC7); /* C7 /0 */
   ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
@@ -6814,14 +7016,55 @@
   ins_cost(125); // XXX
   format %{ "movl    $mem, $src\t# compressed ptr" %}
   ins_encode %{
-    Address addr = build_address($mem$$base, $mem$$index, $mem$$scale, $mem$$disp);
-    Register src = as_Register($src$$reg);
-    __ movl(addr, src);
+    __ movl($mem$$Address, $src$$Register);
+  %}
+  ins_pipe(ialu_mem_reg);
+%}
+
+instruct storeImmN0(memory mem, immN0 zero)
+%{
+  predicate(Universe::narrow_oop_base() == NULL);
+  match(Set mem (StoreN mem zero));
+
+  ins_cost(125); // XXX
+  format %{ "movl    $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
+  ins_encode %{
+    __ movl($mem$$Address, r12);
   %}
   ins_pipe(ialu_mem_reg);
 %}
 
+instruct storeImmN(memory mem, immN src)
+%{
+  match(Set mem (StoreN mem src));
+
+  ins_cost(150); // XXX
+  format %{ "movl    $mem, $src\t# compressed ptr" %}
+  ins_encode %{
+    address con = (address)$src$$constant;
+    if (con == NULL) {
+      __ movl($mem$$Address, (int32_t)0);
+    } else {
+      __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
+    }
+  %}
+  ins_pipe(ialu_mem_imm);
+%}
+
 // Store Integer Immediate
+instruct storeImmI0(memory mem, immI0 zero)
+%{
+  predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
+  match(Set mem (StoreI mem zero));
+
+  ins_cost(125); // XXX
+  format %{ "movl    $mem, R12\t# int (R12_heapbase==0)" %}
+  ins_encode %{
+    __ movl($mem$$Address, r12);
+  %}
+  ins_pipe(ialu_mem_reg);
+%}
+
 instruct storeImmI(memory mem, immI src)
 %{
   match(Set mem (StoreI mem src));
@@ -6834,6 +7077,19 @@
 %}
 
 // Store Long Immediate
+instruct storeImmL0(memory mem, immL0 zero)
+%{
+  predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
+  match(Set mem (StoreL mem zero));
+
+  ins_cost(125); // XXX
+  format %{ "movq    $mem, R12\t# long (R12_heapbase==0)" %}
+  ins_encode %{
+    __ movq($mem$$Address, r12);
+  %}
+  ins_pipe(ialu_mem_reg);
+%}
+
 instruct storeImmL(memory mem, immL32 src)
 %{
   match(Set mem (StoreL mem src));
@@ -6846,6 +7102,19 @@
 %}
 
 // Store Short/Char Immediate
+instruct storeImmC0(memory mem, immI0 zero)
+%{
+  predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
+  match(Set mem (StoreC mem zero));
+
+  ins_cost(125); // XXX
+  format %{ "movw    $mem, R12\t# short/char (R12_heapbase==0)" %}
+  ins_encode %{
+    __ movw($mem$$Address, r12);
+  %}
+  ins_pipe(ialu_mem_reg);
+%}
+
 instruct storeImmI16(memory mem, immI16 src)
 %{
   predicate(UseStoreImmI16);
@@ -6859,6 +7128,19 @@
 %}
 
 // Store Byte Immediate
+instruct storeImmB0(memory mem, immI0 zero)
+%{
+  predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
+  match(Set mem (StoreB mem zero));
+
+  ins_cost(125); // XXX
+  format %{ "movb    $mem, R12\t# short/char (R12_heapbase==0)" %}
+  ins_encode %{
+    __ movb($mem$$Address, r12);
+  %}
+  ins_pipe(ialu_mem_reg);
+%}
+
 instruct storeImmB(memory mem, immI8 src)
 %{
   match(Set mem (StoreB mem src));
@@ -6898,6 +7180,19 @@
 %}
 
 // Store CMS card-mark Immediate
+instruct storeImmCM0_reg(memory mem, immI0 zero)
+%{
+  predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
+  match(Set mem (StoreCM mem zero));
+
+  ins_cost(125); // XXX
+  format %{ "movb    $mem, R12\t# CMS card-mark byte 0 (R12_heapbase==0)" %}
+  ins_encode %{
+    __ movb($mem$$Address, r12);
+  %}
+  ins_pipe(ialu_mem_reg);
+%}
+
 instruct storeImmCM0(memory mem, immI0 src)
 %{
   match(Set mem (StoreCM mem src));
@@ -6931,6 +7226,19 @@
 %}
 
 // Store immediate Float value (it is faster than store from XMM register)
+instruct storeF0(memory mem, immF0 zero)
+%{
+  predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
+  match(Set mem (StoreF mem zero));
+
+  ins_cost(25); // XXX
+  format %{ "movl    $mem, R12\t# float 0. (R12_heapbase==0)" %}
+  ins_encode %{
+    __ movl($mem$$Address, r12);
+  %}
+  ins_pipe(ialu_mem_reg);
+%}
+
 instruct storeF_imm(memory mem, immF src)
 %{
   match(Set mem (StoreF mem src));
@@ -6957,6 +7265,7 @@
 // Store immediate double 0.0 (it is faster than store from XMM register)
 instruct storeD0_imm(memory mem, immD0 src)
 %{
+  predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL));
   match(Set mem (StoreD mem src));
 
   ins_cost(50);
@@ -6966,6 +7275,19 @@
   ins_pipe(ialu_mem_imm);
 %}
 
+instruct storeD0(memory mem, immD0 zero)
+%{
+  predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
+  match(Set mem (StoreD mem zero));
+
+  ins_cost(25); // XXX
+  format %{ "movq    $mem, R12\t# double 0. (R12_heapbase==0)" %}
+  ins_encode %{
+    __ movq($mem$$Address, r12);
+  %}
+  ins_pipe(ialu_mem_reg);
+%}
+
 instruct storeSSI(stackSlotI dst, rRegI src)
 %{
   match(Set dst src);
@@ -7077,6 +7399,56 @@
   ins_pipe( ialu_mem_reg );
 %}
 
+
+//---------- Population Count Instructions -------------------------------------
+
+instruct popCountI(rRegI dst, rRegI src) %{
+  predicate(UsePopCountInstruction);
+  match(Set dst (PopCountI src));
+
+  format %{ "popcnt  $dst, $src" %}
+  ins_encode %{
+    __ popcntl($dst$$Register, $src$$Register);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+instruct popCountI_mem(rRegI dst, memory mem) %{
+  predicate(UsePopCountInstruction);
+  match(Set dst (PopCountI (LoadI mem)));
+
+  format %{ "popcnt  $dst, $mem" %}
+  ins_encode %{
+    __ popcntl($dst$$Register, $mem$$Address);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+// Note: Long.bitCount(long) returns an int.
+instruct popCountL(rRegI dst, rRegL src) %{
+  predicate(UsePopCountInstruction);
+  match(Set dst (PopCountL src));
+
+  format %{ "popcnt  $dst, $src" %}
+  ins_encode %{
+    __ popcntq($dst$$Register, $src$$Register);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+// Note: Long.bitCount(long) returns an int.
+instruct popCountL_mem(rRegI dst, memory mem) %{
+  predicate(UsePopCountInstruction);
+  match(Set dst (PopCountL (LoadL mem)));
+
+  format %{ "popcnt  $dst, $mem" %}
+  ins_encode %{
+    __ popcntq($dst$$Register, $mem$$Address);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+
 //----------MemBar Instructions-----------------------------------------------
 // Memory barrier flavors
 
@@ -7192,9 +7564,7 @@
   effect(KILL cr);
   format %{ "encode_heap_oop_not_null $dst,$src" %}
   ins_encode %{
-    Register s = $src$$Register;
-    Register d = $dst$$Register;
-    __ encode_heap_oop_not_null(d, s);
+    __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
   %}
   ins_pipe(ialu_reg_long);
 %}
@@ -7224,7 +7594,11 @@
   ins_encode %{
     Register s = $src$$Register;
     Register d = $dst$$Register;
-    __ decode_heap_oop_not_null(d, s);
+    if (s != d) {
+      __ decode_heap_oop_not_null(d, s);
+    } else {
+      __ decode_heap_oop_not_null(d);
+    }
   %}
   ins_pipe(ialu_reg_long);
 %}
@@ -11389,8 +11763,9 @@
 
 // This will generate a signed flags result. This should be OK since
 // any compare to a zero should be eq/neq.
-instruct testP_reg_mem(rFlagsReg cr, memory op, immP0 zero)
-%{
+instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
+%{
+  predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL));
   match(Set cr (CmpP (LoadP op) zero));
 
   ins_cost(500); // XXX
@@ -11401,13 +11776,24 @@
   ins_pipe(ialu_cr_reg_imm);
 %}
 
+instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
+%{
+  predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
+  match(Set cr (CmpP (LoadP mem) zero));
+
+  format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
+  ins_encode %{
+    __ cmpq(r12, $mem$$Address);
+  %}
+  ins_pipe(ialu_cr_reg_mem);
+%}
 
 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
 %{
   match(Set cr (CmpN op1 op2));
 
   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
-  ins_encode %{ __ cmpl(as_Register($op1$$reg), as_Register($op2$$reg)); %}
+  ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
   ins_pipe(ialu_cr_reg_reg);
 %}
 
@@ -11415,11 +11801,30 @@
 %{
   match(Set cr (CmpN src (LoadN mem)));
 
-  ins_cost(500); // XXX
-  format %{ "cmpl    $src, mem\t# compressed ptr" %}
+  format %{ "cmpl    $src, $mem\t# compressed ptr" %}
+  ins_encode %{
+    __ cmpl($src$$Register, $mem$$Address);
+  %}
+  ins_pipe(ialu_cr_reg_mem);
+%}
+
+instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
+  match(Set cr (CmpN op1 op2));
+
+  format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
   ins_encode %{
-    Address adr = build_address($mem$$base, $mem$$index, $mem$$scale, $mem$$disp);
-    __ cmpl(as_Register($src$$reg), adr);
+    __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
+  %}
+  ins_pipe(ialu_cr_reg_imm);
+%}
+
+instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
+%{
+  match(Set cr (CmpN src (LoadN mem)));
+
+  format %{ "cmpl    $mem, $src\t# compressed ptr" %}
+  ins_encode %{
+    __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
   %}
   ins_pipe(ialu_cr_reg_mem);
 %}
@@ -11432,15 +11837,27 @@
   ins_pipe(ialu_cr_reg_imm);
 %}
 
-instruct testN_reg_mem(rFlagsReg cr, memory mem, immN0 zero)
-%{
+instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
+%{
+  predicate(Universe::narrow_oop_base() != NULL);
   match(Set cr (CmpN (LoadN mem) zero));
 
   ins_cost(500); // XXX
   format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
   ins_encode %{
-    Address addr = build_address($mem$$base, $mem$$index, $mem$$scale, $mem$$disp);
-    __ cmpl(addr, (int)0xFFFFFFFF);
+    __ cmpl($mem$$Address, (int)0xFFFFFFFF);
+  %}
+  ins_pipe(ialu_cr_reg_mem);
+%}
+
+instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
+%{
+  predicate(Universe::narrow_oop_base() == NULL);
+  match(Set cr (CmpN (LoadN mem) zero));
+
+  format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
+  ins_encode %{
+    __ cmpl(r12, $mem$$Address);
   %}
   ins_pipe(ialu_cr_reg_mem);
 %}
@@ -11472,7 +11889,6 @@
 %{
   match(Set cr (CmpL op1 (LoadL op2)));
 
-  ins_cost(500); // XXX
   format %{ "cmpq    $op1, $op2" %}
   opcode(0x3B); /* Opcode 3B /r */
   ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
@@ -11733,15 +12149,12 @@
   effect(KILL rcx, KILL cr);
 
   ins_cost(1100);  // slightly larger than the next version
-  format %{ "cmpq    rax, rsi\n\t"
-            "jeq,s   hit\n\t"
-            "movq    rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
+  format %{ "movq    rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
             "movl    rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t"
             "addq    rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t"
             "repne   scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
             "jne,s   miss\t\t# Missed: rdi not-zero\n\t"
             "movq    [$sub + (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())], $super\t# Hit: update cache\n\t"
-    "hit:\n\t"
             "xorq    $result, $result\t\t Hit: rdi zero\n\t"
     "miss:\t" %}
 
@@ -11756,13 +12169,10 @@
                                      rdi_RegP result)
 %{
   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
-  predicate(!UseCompressedOops); // decoding oop kills condition codes
   effect(KILL rcx, KILL result);
 
   ins_cost(1000);
-  format %{ "cmpq    rax, rsi\n\t"
-            "jeq,s   miss\t# Actually a hit; we are done.\n\t"
-            "movq    rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
+  format %{ "movq    rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
             "movl    rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t"
             "addq    rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t"
             "repne   scasq\t# Scan *rdi++ for a match with rax while cx-- != 0\n\t"