changeset 164:c436414a719e

6703890: Compressed Oops: add LoadNKlass node to generate narrow oops (32-bits) compare instructions Summary: Add LoadNKlass and CMoveN nodes, use CmpN and ConN nodes to generate narrow oops compare instructions. Reviewed-by: never, rasbold
author kvn
date Wed, 21 May 2008 13:46:23 -0700
parents 885ed790ecf0
children 437d03ea40b1
files src/cpu/sparc/vm/assembler_sparc.cpp src/cpu/sparc/vm/assembler_sparc.hpp src/cpu/sparc/vm/relocInfo_sparc.cpp src/cpu/sparc/vm/relocInfo_sparc.hpp src/cpu/sparc/vm/sparc.ad src/cpu/x86/vm/assembler_x86_64.cpp src/cpu/x86/vm/assembler_x86_64.hpp src/cpu/x86/vm/relocInfo_x86.cpp src/cpu/x86/vm/relocInfo_x86.hpp src/cpu/x86/vm/x86_64.ad src/share/vm/adlc/forms.cpp src/share/vm/adlc/formssel.cpp src/share/vm/includeDB_core src/share/vm/opto/classes.hpp src/share/vm/opto/compile.cpp src/share/vm/opto/connode.cpp src/share/vm/opto/connode.hpp src/share/vm/opto/doCall.cpp src/share/vm/opto/escape.cpp src/share/vm/opto/graphKit.cpp src/share/vm/opto/lcm.cpp src/share/vm/opto/library_call.cpp src/share/vm/opto/loopopts.cpp src/share/vm/opto/matcher.cpp src/share/vm/opto/memnode.cpp src/share/vm/opto/memnode.hpp src/share/vm/opto/parse1.cpp src/share/vm/opto/parseHelper.cpp src/share/vm/runtime/globals.hpp
diffstat 29 files changed, 430 insertions(+), 130 deletions(-) [+]
line wrap: on
line diff
--- a/src/cpu/sparc/vm/assembler_sparc.cpp	Wed May 21 10:45:07 2008 -0700
+++ b/src/cpu/sparc/vm/assembler_sparc.cpp	Wed May 21 13:46:23 2008 -0700
@@ -1523,6 +1523,21 @@
   return Address(d, address(obj), oop_Relocation::spec(oop_index));
 }
 
+void  MacroAssembler::set_narrow_oop(jobject obj, Register d) {
+  assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
+  int oop_index = oop_recorder()->find_index(obj);
+  RelocationHolder rspec = oop_Relocation::spec(oop_index);
+
+  assert_not_delayed();
+  // Relocation with special format (see relocInfo_sparc.hpp).
+  relocate(rspec, 1);
+  // Assembler::sethi(0x3fffff, d);
+  emit_long( op(branch_op) | rd(d) | op2(sethi_op2) | hi22(0x3fffff) );
+  // Don't add relocation for 'add'. Do patching during 'sethi' processing.
+  add(d, 0x3ff, d);
+
+}
+
 
 void MacroAssembler::align(int modulus) {
   while (offset() % modulus != 0) nop();
@@ -3537,28 +3552,26 @@
   }
 }
 
-void MacroAssembler::load_klass(Register s, Register d) {
+void MacroAssembler::load_klass(Register src_oop, Register klass) {
   // The number of bytes in this code is used by
   // MachCallDynamicJavaNode::ret_addr_offset()
   // if this changes, change that.
   if (UseCompressedOops) {
-    lduw(s, oopDesc::klass_offset_in_bytes(), d);
-    decode_heap_oop_not_null(d);
+    lduw(src_oop, oopDesc::klass_offset_in_bytes(), klass);
+    decode_heap_oop_not_null(klass);
   } else {
-    ld_ptr(s, oopDesc::klass_offset_in_bytes(), d);
+    ld_ptr(src_oop, oopDesc::klass_offset_in_bytes(), klass);
   }
 }
 
-// ??? figure out src vs. dst!
-void MacroAssembler::store_klass(Register d, Register s1) {
+void MacroAssembler::store_klass(Register klass, Register dst_oop) {
   if (UseCompressedOops) {
-    assert(s1 != d, "not enough registers");
-    encode_heap_oop_not_null(d);
-    // Zero out entire klass field first.
-    st_ptr(G0, s1, oopDesc::klass_offset_in_bytes());
-    st(d, s1, oopDesc::klass_offset_in_bytes());
+    assert(dst_oop != klass, "not enough registers");
+    encode_heap_oop_not_null(klass);
+    sllx(klass, BitsPerInt, klass);
+    stx(klass, dst_oop, oopDesc::klass_offset_in_bytes());
   } else {
-    st_ptr(d, s1, oopDesc::klass_offset_in_bytes());
+    st_ptr(klass, dst_oop, oopDesc::klass_offset_in_bytes());
   }
 }
 
--- a/src/cpu/sparc/vm/assembler_sparc.hpp	Wed May 21 10:45:07 2008 -0700
+++ b/src/cpu/sparc/vm/assembler_sparc.hpp	Wed May 21 13:46:23 2008 -0700
@@ -1977,8 +1977,8 @@
   inline void movbool( bool boolconst, Register d) { mov( (int) boolconst, d); }
 
   // klass oop manipulations if compressed
-  void load_klass(Register  src_oop, Register dst);
-  void store_klass(Register dst_oop, Register s1);
+  void load_klass(Register src_oop, Register klass);
+  void store_klass(Register klass, Register dst_oop);
 
    // oop manipulations
   void load_heap_oop(const Address& s, Register d, int offset = 0);
@@ -2103,6 +2103,8 @@
   inline void set_oop_constant( jobject obj, Register d ); // uses constant_oop_address
   inline void set_oop         ( Address obj_addr );        // same as load_address
 
+  void set_narrow_oop( jobject obj, Register d );
+
   // nop padding
   void align(int modulus);
 
--- a/src/cpu/sparc/vm/relocInfo_sparc.cpp	Wed May 21 10:45:07 2008 -0700
+++ b/src/cpu/sparc/vm/relocInfo_sparc.cpp	Wed May 21 13:46:23 2008 -0700
@@ -87,6 +87,17 @@
 #ifdef _LP64
     jint inst2;
     guarantee(Assembler::inv_op2(inst)==Assembler::sethi_op2, "must be sethi");
+    if (format() != 0) {
+      assert(type() == relocInfo::oop_type, "only narrow oops case");
+      jint np = oopDesc::encode_heap_oop((oop)x);
+      inst &= ~Assembler::hi22(-1);
+      inst |=  Assembler::hi22((intptr_t)np);
+      ip->set_long_at(0, inst);
+      inst2 = ip->long_at( NativeInstruction::nop_instruction_size );
+      guarantee(Assembler::inv_op(inst2)==Assembler::arith_op, "arith op");
+      ip->set_long_at(NativeInstruction::nop_instruction_size, ip->set_data32_simm13( inst2, (intptr_t)np));
+      break;
+    }
     ip->set_data64_sethi( ip->addr_at(0), (intptr_t)x );
 #ifdef COMPILER2
     // [RGV] Someone must have missed putting in a reloc entry for the
--- a/src/cpu/sparc/vm/relocInfo_sparc.hpp	Wed May 21 10:45:07 2008 -0700
+++ b/src/cpu/sparc/vm/relocInfo_sparc.hpp	Wed May 21 13:46:23 2008 -0700
@@ -31,7 +31,12 @@
 
     // There is no need for format bits; the instructions are
     // sufficiently self-identifying.
+#ifndef _LP64
     format_width       =  0
+#else
+    // Except narrow oops in 64-bits VM.
+    format_width       =  1
+#endif
   };
 
 
--- a/src/cpu/sparc/vm/sparc.ad	Wed May 21 10:45:07 2008 -0700
+++ b/src/cpu/sparc/vm/sparc.ad	Wed May 21 13:46:23 2008 -0700
@@ -5486,10 +5486,9 @@
   ins_pipe(iload_mem);
 %}
 
-// Load Klass Pointer
-instruct loadKlassComp(iRegP dst, memory mem) %{
-  match(Set dst (LoadKlass mem));
-  predicate(n->in(MemNode::Address)->bottom_type()->is_ptr_to_narrowoop());
+// Load narrow Klass Pointer
+instruct loadNKlass(iRegN dst, memory mem) %{
+  match(Set dst (LoadNKlass mem));
   ins_cost(MEMORY_REF_COST);
 
   format %{ "LDUW   $mem,$dst\t! compressed klass ptr" %}
@@ -5503,9 +5502,6 @@
      } else {
        __ lduw(base, $mem$$disp, dst);
      }
-     // klass oop never null but this is generated for nonheader klass loads
-     // too which can be null.
-     __ decode_heap_oop(dst);
   %}
   ins_pipe(iload_mem);
 %}
@@ -5609,22 +5605,24 @@
   ins_pipe(loadConP_poll);
 %}
 
+instruct loadConN0(iRegN dst, immN0 src) %{
+  match(Set dst src);
+
+  size(4);
+  format %{ "CLR    $dst\t! compressed NULL ptr" %}
+  ins_encode( SetNull( dst ) );
+  ins_pipe(ialu_imm);
+%}
+
 instruct loadConN(iRegN dst, immN src) %{
   match(Set dst src);
-  ins_cost(DEFAULT_COST * 2);
-  format %{ "SET    $src,$dst\t!ptr" %}
+  ins_cost(DEFAULT_COST * 3/2);
+  format %{ "SET    $src,$dst\t! compressed ptr" %}
   ins_encode %{
-    address con = (address)$src$$constant;
     Register dst = $dst$$Register;
-    if (con == NULL) {
-      __ mov(G0, dst);
-    } else {
-      __ set_oop((jobject)$src$$constant, dst);
-      __ encode_heap_oop(dst);
-    }
-  %}
-  ins_pipe(loadConP);
-
+    __ set_narrow_oop((jobject)$src$$constant, dst);
+  %}
+  ins_pipe(ialu_hi_lo_reg);
 %}
 
 instruct loadConL(iRegL dst, immL src, o7RegL tmp) %{
@@ -6258,6 +6256,34 @@
   ins_pipe(ialu_imm);
 %}
 
+// Conditional move for RegN. Only cmov(reg,reg).
+instruct cmovNP_reg(cmpOpP cmp, flagsRegP pcc, iRegN dst, iRegN src) %{
+  match(Set dst (CMoveN (Binary cmp pcc) (Binary dst src)));
+  ins_cost(150);
+  format %{ "MOV$cmp $pcc,$src,$dst" %}
+  ins_encode( enc_cmov_reg(cmp,dst,src, (Assembler::ptr_cc)) );
+  ins_pipe(ialu_reg);
+%}
+
+// This instruction also works with CmpN so we don't need cmovNN_reg.
+instruct cmovNI_reg(cmpOp cmp, flagsReg icc, iRegN dst, iRegN src) %{
+  match(Set dst (CMoveN (Binary cmp icc) (Binary dst src)));
+  ins_cost(150);
+  size(4);
+  format %{ "MOV$cmp  $icc,$src,$dst" %}
+  ins_encode( enc_cmov_reg(cmp,dst,src, (Assembler::icc)) );
+  ins_pipe(ialu_reg);
+%}
+
+instruct cmovNF_reg(cmpOpF cmp, flagsRegF fcc, iRegN dst, iRegN src) %{
+  match(Set dst (CMoveN (Binary cmp fcc) (Binary dst src)));
+  ins_cost(150);
+  size(4);
+  format %{ "MOV$cmp $fcc,$src,$dst" %}
+  ins_encode( enc_cmov_reg_f(cmp,dst,src, fcc) );
+  ins_pipe(ialu_reg);
+%}
+
 // Conditional move
 instruct cmovPP_reg(cmpOpP cmp, flagsRegP pcc, iRegP dst, iRegP src) %{
   match(Set dst (CMoveP (Binary cmp pcc) (Binary dst src)));
@@ -6275,6 +6301,7 @@
   ins_pipe(ialu_imm);
 %}
 
+// This instruction also works with CmpN so we don't need cmovPN_reg.
 instruct cmovPI_reg(cmpOp cmp, flagsReg icc, iRegP dst, iRegP src) %{
   match(Set dst (CMoveP (Binary cmp icc) (Binary dst src)));
   ins_cost(150);
@@ -8265,6 +8292,27 @@
   ins_pipe(ialu_cconly_reg_imm);
 %}
 
+// Compare Narrow oops
+instruct compN_iRegN(flagsReg icc, iRegN op1, iRegN op2 ) %{
+  match(Set icc (CmpN op1 op2));
+
+  size(4);
+  format %{ "CMP    $op1,$op2\t! compressed ptr" %}
+  opcode(Assembler::subcc_op3, Assembler::arith_op);
+  ins_encode( form3_rs1_rs2_rd( op1, op2, R_G0 ) );
+  ins_pipe(ialu_cconly_reg_reg);
+%}
+
+instruct compN_iRegN_immN0(flagsReg icc, iRegN op1, immN0 op2 ) %{
+  match(Set icc (CmpN op1 op2));
+
+  size(4);
+  format %{ "CMP    $op1,$op2\t! compressed ptr" %}
+  opcode(Assembler::subcc_op3, Assembler::arith_op);
+  ins_encode( form3_rs1_simm13_rd( op1, op2, R_G0 ) );
+  ins_pipe(ialu_cconly_reg_imm);
+%}
+
 //----------Max and Min--------------------------------------------------------
 // Min Instructions
 // Conditional move for min
@@ -8595,6 +8643,14 @@
   ins_pipe(ialu_imm);
 %}
 
+instruct cmovNL_reg(cmpOp cmp, flagsRegL xcc, iRegN dst, iRegN src) %{
+  match(Set dst (CMoveN (Binary cmp xcc) (Binary dst src)));
+  ins_cost(150);
+  format %{ "MOV$cmp  $xcc,$src,$dst" %}
+  ins_encode( enc_cmov_reg(cmp,dst,src, (Assembler::xcc)) );
+  ins_pipe(ialu_reg);
+%}
+
 instruct cmovPL_reg(cmpOp cmp, flagsRegL xcc, iRegP dst, iRegP src) %{
   match(Set dst (CMoveP (Binary cmp xcc) (Binary dst src)));
   ins_cost(150);
@@ -8826,16 +8882,6 @@
 %}
 
 
-instruct compP_iRegN_immN0(flagsRegP pcc, iRegN op1, immN0 op2 ) %{
-  match(Set pcc (CmpN op1 op2));
-
-  size(4);
-  format %{ "CMP    $op1,$op2\t! ptr" %}
-  opcode(Assembler::subcc_op3, Assembler::arith_op);
-  ins_encode( form3_rs1_simm13_rd( op1, op2, R_G0 ) );
-  ins_pipe(ialu_cconly_reg_imm);
-%}
-
 // ============================================================================
 // inlined locking and unlocking
 
--- a/src/cpu/x86/vm/assembler_x86_64.cpp	Wed May 21 10:45:07 2008 -0700
+++ b/src/cpu/x86/vm/assembler_x86_64.cpp	Wed May 21 13:46:23 2008 -0700
@@ -683,7 +683,8 @@
 
   case REP8(0xB8): // movl/q r, #32/#64(oop?)
     if (which == end_pc_operand)  return ip + (is_64bit ? 8 : 4);
-    assert((which == call32_operand || which == imm64_operand) && is_64bit, "");
+    assert((which == call32_operand || which == imm64_operand) && is_64bit ||
+           which == narrow_oop_operand && !is_64bit, "");
     return ip;
 
   case 0x69: // imul r, a, #32
@@ -909,7 +910,8 @@
   } else if (r->is_call() || format == call32_operand) {
     opnd = locate_operand(inst, call32_operand);
   } else if (r->is_data()) {
-    assert(format == imm64_operand || format == disp32_operand, "format ok");
+    assert(format == imm64_operand || format == disp32_operand ||
+           format == narrow_oop_operand, "format ok");
     opnd = locate_operand(inst, (WhichOperand) format);
   } else {
     assert(format == 0, "cannot specify a format");
@@ -5157,12 +5159,9 @@
 void MacroAssembler::store_klass(Register dst, Register src) {
   if (UseCompressedOops) {
     encode_heap_oop_not_null(src);
-    // zero the entire klass field first as the gap needs to be zeroed too.
-    movptr(Address(dst, oopDesc::klass_offset_in_bytes()), NULL_WORD);
-    movl(Address(dst, oopDesc::klass_offset_in_bytes()), src);
-  } else {
-    movq(Address(dst, oopDesc::klass_offset_in_bytes()), src);
+    // Store to the wide klass field to zero the gap.
   }
+  movq(Address(dst, oopDesc::klass_offset_in_bytes()), src);
 }
 
 void MacroAssembler::load_heap_oop(Register dst, Address src) {
@@ -5188,13 +5187,15 @@
 void MacroAssembler::encode_heap_oop(Register r) {
   assert (UseCompressedOops, "should be compressed");
 #ifdef ASSERT
-  Label ok;
-  pushq(rscratch1); // cmpptr trashes rscratch1
-  cmpptr(r12_heapbase, ExternalAddress((address)Universe::heap_base_addr()));
-  jcc(Assembler::equal, ok);
-  stop("MacroAssembler::encode_heap_oop: heap base corrupted?");
-  bind(ok);
-  popq(rscratch1);
+  if (CheckCompressedOops) {
+    Label ok;
+    pushq(rscratch1); // cmpptr trashes rscratch1
+    cmpptr(r12_heapbase, ExternalAddress((address)Universe::heap_base_addr()));
+    jcc(Assembler::equal, ok);
+    stop("MacroAssembler::encode_heap_oop: heap base corrupted?");
+    bind(ok);
+    popq(rscratch1);
+  }
 #endif
   verify_oop(r, "broken oop in encode_heap_oop");
   testq(r, r);
@@ -5206,11 +5207,13 @@
 void MacroAssembler::encode_heap_oop_not_null(Register r) {
   assert (UseCompressedOops, "should be compressed");
 #ifdef ASSERT
-  Label ok;
-  testq(r, r);
-  jcc(Assembler::notEqual, ok);
-  stop("null oop passed to encode_heap_oop_not_null");
-  bind(ok);
+  if (CheckCompressedOops) {
+    Label ok;
+    testq(r, r);
+    jcc(Assembler::notEqual, ok);
+    stop("null oop passed to encode_heap_oop_not_null");
+    bind(ok);
+  }
 #endif
   verify_oop(r, "broken oop in encode_heap_oop_not_null");
   subq(r, r12_heapbase);
@@ -5220,11 +5223,13 @@
 void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {
   assert (UseCompressedOops, "should be compressed");
 #ifdef ASSERT
-  Label ok;
-  testq(src, src);
-  jcc(Assembler::notEqual, ok);
-  stop("null oop passed to encode_heap_oop_not_null2");
-  bind(ok);
+  if (CheckCompressedOops) {
+    Label ok;
+    testq(src, src);
+    jcc(Assembler::notEqual, ok);
+    stop("null oop passed to encode_heap_oop_not_null2");
+    bind(ok);
+  }
 #endif
   verify_oop(src, "broken oop in encode_heap_oop_not_null2");
   if (dst != src) {
@@ -5237,14 +5242,16 @@
 void  MacroAssembler::decode_heap_oop(Register r) {
   assert (UseCompressedOops, "should be compressed");
 #ifdef ASSERT
-  Label ok;
-  pushq(rscratch1);
-  cmpptr(r12_heapbase,
-         ExternalAddress((address)Universe::heap_base_addr()));
-  jcc(Assembler::equal, ok);
-  stop("MacroAssembler::decode_heap_oop: heap base corrupted?");
-  bind(ok);
-  popq(rscratch1);
+  if (CheckCompressedOops) {
+    Label ok;
+    pushq(rscratch1);
+    cmpptr(r12_heapbase,
+           ExternalAddress((address)Universe::heap_base_addr()));
+    jcc(Assembler::equal, ok);
+    stop("MacroAssembler::decode_heap_oop: heap base corrupted?");
+    bind(ok);
+    popq(rscratch1);
+  }
 #endif
 
   Label done;
@@ -5277,6 +5284,19 @@
   leaq(dst, Address(r12_heapbase, src, Address::times_8, 0));
 }
 
+void  MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
+  assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
+  int oop_index = oop_recorder()->find_index(obj);
+  RelocationHolder rspec = oop_Relocation::spec(oop_index);
+
+  // movl dst,obj
+  InstructionMark im(this);
+  int encode = prefix_and_encode(dst->encoding());
+  emit_byte(0xB8 | encode);
+  emit_data(oop_index, rspec, narrow_oop_operand);
+}
+
+
 Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) {
   switch (cond) {
     // Note some conditions are synonyms for others
--- a/src/cpu/x86/vm/assembler_x86_64.hpp	Wed May 21 10:45:07 2008 -0700
+++ b/src/cpu/x86/vm/assembler_x86_64.hpp	Wed May 21 13:46:23 2008 -0700
@@ -490,7 +490,12 @@
     imm64_operand  = 0,          // embedded 64-bit immediate operand
     disp32_operand = 1,          // embedded 32-bit displacement
     call32_operand = 2,          // embedded 32-bit self-relative displacement
+#ifndef AMD64
     _WhichOperand_limit = 3
+#else
+     narrow_oop_operand = 3,     // embedded 32-bit immediate narrow oop
+    _WhichOperand_limit = 4
+#endif
   };
 
   public:
@@ -1114,6 +1119,8 @@
   void encode_heap_oop_not_null(Register dst, Register src);
   void decode_heap_oop_not_null(Register dst, Register src);
 
+  void set_narrow_oop(Register dst, jobject obj);
+
   // Stack frame creation/removal
   void enter();
   void leave();
--- a/src/cpu/x86/vm/relocInfo_x86.cpp	Wed May 21 10:45:07 2008 -0700
+++ b/src/cpu/x86/vm/relocInfo_x86.cpp	Wed May 21 13:46:23 2008 -0700
@@ -30,11 +30,15 @@
 #ifdef AMD64
   x += o;
   typedef Assembler::WhichOperand WhichOperand;
-  WhichOperand which = (WhichOperand) format(); // that is, disp32 or imm64, call32
+  WhichOperand which = (WhichOperand) format(); // that is, disp32 or imm64, call32, narrow oop
   assert(which == Assembler::disp32_operand ||
+         which == Assembler::narrow_oop_operand ||
          which == Assembler::imm64_operand, "format unpacks ok");
   if (which == Assembler::imm64_operand) {
     *pd_address_in_code() = x;
+  } else if (which == Assembler::narrow_oop_operand) {
+    address disp = Assembler::locate_operand(addr(), which);
+    *(int32_t*) disp = oopDesc::encode_heap_oop((oop)x);
   } else {
     // Note:  Use runtime_call_type relocations for call32_operand.
     address ip = addr();
--- a/src/cpu/x86/vm/relocInfo_x86.hpp	Wed May 21 10:45:07 2008 -0700
+++ b/src/cpu/x86/vm/relocInfo_x86.hpp	Wed May 21 13:46:23 2008 -0700
@@ -29,5 +29,10 @@
     offset_unit        =  1,
 
     // Encodes Assembler::disp32_operand vs. Assembler::imm32_operand.
+#ifndef AMD64
     format_width       =  1
+#else
+    // vs Assembler::narrow_oop_operand.
+    format_width       =  2
+#endif
   };
--- a/src/cpu/x86/vm/x86_64.ad	Wed May 21 10:45:07 2008 -0700
+++ b/src/cpu/x86/vm/x86_64.ad	Wed May 21 13:46:23 2008 -0700
@@ -6063,7 +6063,6 @@
 instruct loadKlass(rRegP dst, memory mem)
 %{
   match(Set dst (LoadKlass mem));
-  predicate(!n->in(MemNode::Address)->bottom_type()->is_ptr_to_narrowoop());
 
   ins_cost(125); // XXX
   format %{ "movq    $dst, $mem\t# class" %}
@@ -6072,23 +6071,17 @@
   ins_pipe(ialu_reg_mem); // XXX
 %}
 
-// Load Klass Pointer
-instruct loadKlassComp(rRegP dst, memory mem, rFlagsReg cr)
-%{
-  match(Set dst (LoadKlass mem));
-  predicate(n->in(MemNode::Address)->bottom_type()->is_ptr_to_narrowoop());
-  effect(KILL cr);
+// Load narrow Klass Pointer
+instruct loadNKlass(rRegN dst, memory mem)
+%{
+  match(Set dst (LoadNKlass mem));
 
   ins_cost(125); // XXX
-  format %{ "movl    $dst, $mem\t# compressed class\n\t"
-            "decode_heap_oop $dst,$dst" %}
+  format %{ "movl    $dst, $mem\t# compressed klass ptr\n\t" %}
   ins_encode %{
     Address addr = build_address($mem$$base, $mem$$index, $mem$$scale, $mem$$disp);
     Register dst = as_Register($dst$$reg);
     __ movl(dst, addr);
-    // klass is never null in the header but this is generated for all
-    // klass loads not just the _klass field in the header.
-    __ decode_heap_oop(dst);
   %}
   ins_pipe(ialu_reg_mem); // XXX
 %}
@@ -6358,21 +6351,18 @@
   ins_pipe(ialu_reg);
 %}
 
-instruct loadConN(rRegN dst, immN src, rFlagsReg cr) %{
+instruct loadConN(rRegN dst, immN src) %{
   match(Set dst src);
-  effect(KILL cr);
 
   ins_cost(125);
-  format %{ "movq    $dst, $src\t# compressed ptr\n\t"
-            "encode_heap_oop_not_null $dst,$dst" %}
+  format %{ "movl    $dst, $src\t# compressed ptr" %}
   ins_encode %{
     address con = (address)$src$$constant;
     Register dst = $dst$$Register;
     if (con == NULL) {
       ShouldNotReachHere();
     } else {
-      __ movoop(dst, (jobject)$src$$constant);
-      __ encode_heap_oop_not_null(dst);
+      __ set_narrow_oop(dst, (jobject)$src$$constant);
     }
   %}
   ins_pipe(ialu_reg_fat); // XXX
@@ -6639,7 +6629,7 @@
   match(Set mem (StoreN mem src));
 
   ins_cost(125); // XXX
-  format %{ "movl    $mem, $src\t# ptr" %}
+  format %{ "movl    $mem, $src\t# compressed ptr" %}
   ins_encode %{
     Address addr = build_address($mem$$base, $mem$$index, $mem$$scale, $mem$$disp);
     Register src = as_Register($src$$reg);
@@ -7143,6 +7133,30 @@
 %}
 
 // Conditional move
+instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
+%{
+  match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
+
+  ins_cost(200); // XXX
+  format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
+  opcode(0x0F, 0x40);
+  ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
+  ins_pipe(pipe_cmov_reg);
+%}
+
+// Conditional move
+instruct cmovN_regU(rRegN dst, rRegN src, rFlagsRegU cr, cmpOpU cop)
+%{
+  match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
+
+  ins_cost(200); // XXX
+  format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
+  opcode(0x0F, 0x40);
+  ins_encode(REX_reg_reg(dst, src), enc_cmov(cop), reg_reg(dst, src));
+  ins_pipe(pipe_cmov_reg);
+%}
+
+// Conditional move
 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
 %{
   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
@@ -11055,14 +11069,50 @@
   ins_pipe(ialu_cr_reg_imm);
 %}
 
+
+instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
+%{
+  match(Set cr (CmpN op1 op2));
+
+  format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
+  ins_encode %{ __ cmpl(as_Register($op1$$reg), as_Register($op2$$reg)); %}
+  ins_pipe(ialu_cr_reg_reg);
+%}
+
+instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
+%{
+  match(Set cr (CmpN src (LoadN mem)));
+
+  ins_cost(500); // XXX
+  format %{ "cmpl    $src, mem\t# compressed ptr" %}
+  ins_encode %{
+    Address adr = build_address($mem$$base, $mem$$index, $mem$$scale, $mem$$disp);
+    __ cmpl(as_Register($src$$reg), adr);
+  %}
+  ins_pipe(ialu_cr_reg_mem);
+%}
+
 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
   match(Set cr (CmpN src zero));
 
-  format %{ "testl   $src, $src" %}
+  format %{ "testl   $src, $src\t# compressed ptr" %}
   ins_encode %{ __ testl($src$$Register, $src$$Register); %}
   ins_pipe(ialu_cr_reg_imm);
 %}
 
+instruct testN_reg_mem(rFlagsReg cr, memory mem, immN0 zero)
+%{
+  match(Set cr (CmpN (LoadN mem) zero));
+
+  ins_cost(500); // XXX
+  format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
+  ins_encode %{
+    Address addr = build_address($mem$$base, $mem$$index, $mem$$scale, $mem$$disp);
+    __ cmpl(addr, (int)0xFFFFFFFF);
+  %}
+  ins_pipe(ialu_cr_reg_mem);
+%}
+
 // Yanked all unsigned pointer compare operations.
 // Pointer compares are done with CmpP which is already unsigned.
 
--- a/src/share/vm/adlc/forms.cpp	Wed May 21 10:45:07 2008 -0700
+++ b/src/share/vm/adlc/forms.cpp	Wed May 21 13:46:23 2008 -0700
@@ -252,6 +252,7 @@
   if( strcmp(opType,"LoadF")==0 )  return Form::idealF;
   if( strcmp(opType,"LoadI")==0 )  return Form::idealI;
   if( strcmp(opType,"LoadKlass")==0 )  return Form::idealP;
+  if( strcmp(opType,"LoadNKlass")==0 ) return Form::idealN;
   if( strcmp(opType,"LoadL")==0 )  return Form::idealL;
   if( strcmp(opType,"LoadL_unaligned")==0 )  return Form::idealL;
   if( strcmp(opType,"LoadPLocked")==0 )  return Form::idealP;
--- a/src/share/vm/adlc/formssel.cpp	Wed May 21 10:45:07 2008 -0700
+++ b/src/share/vm/adlc/formssel.cpp	Wed May 21 13:46:23 2008 -0700
@@ -3313,7 +3313,7 @@
     "Store8B","Store4B","Store8C","Store4C","Store2C",
     "Load4I" ,"Load2I" ,"Load2L" ,"Load2D" ,"Load4F" ,"Load2F" ,"Load16B" ,
     "Load8B" ,"Load4B" ,"Load8C" ,"Load4C" ,"Load2C" ,"Load8S", "Load4S","Load2S",
-    "LoadRange", "LoadKlass", "LoadL_unaligned", "LoadD_unaligned",
+    "LoadRange", "LoadKlass", "LoadNKlass", "LoadL_unaligned", "LoadD_unaligned",
     "LoadPLocked", "LoadLLocked",
     "StorePConditional", "StoreLConditional",
     "CompareAndSwapI", "CompareAndSwapL", "CompareAndSwapP", "CompareAndSwapN",
--- a/src/share/vm/includeDB_core	Wed May 21 10:45:07 2008 -0700
+++ b/src/share/vm/includeDB_core	Wed May 21 13:46:23 2008 -0700
@@ -3492,6 +3492,7 @@
 relocInfo_<arch>.cpp                    assembler.inline.hpp
 relocInfo_<arch>.cpp                    assembler_<arch_model>.inline.hpp
 relocInfo_<arch>.cpp                    nativeInst_<arch>.hpp
+relocInfo_<arch>.cpp                    oop.inline.hpp
 relocInfo_<arch>.cpp                    relocInfo.hpp
 relocInfo_<arch>.cpp                    safepoint.hpp
 
--- a/src/share/vm/opto/classes.hpp	Wed May 21 10:45:07 2008 -0700
+++ b/src/share/vm/opto/classes.hpp	Wed May 21 13:46:23 2008 -0700
@@ -64,6 +64,7 @@
 macro(CMoveI)
 macro(CMoveL)
 macro(CMoveP)
+macro(CMoveN)
 macro(CmpN)
 macro(CmpD)
 macro(CmpD3)
@@ -133,6 +134,7 @@
 macro(LoadF)
 macro(LoadI)
 macro(LoadKlass)
+macro(LoadNKlass)
 macro(LoadL)
 macro(LoadL_unaligned)
 macro(LoadPLocked)
--- a/src/share/vm/opto/compile.cpp	Wed May 21 10:45:07 2008 -0700
+++ b/src/share/vm/opto/compile.cpp	Wed May 21 13:46:23 2008 -0700
@@ -1968,6 +1968,7 @@
   case Op_LoadC:
   case Op_LoadI:
   case Op_LoadKlass:
+  case Op_LoadNKlass:
   case Op_LoadL:
   case Op_LoadL_unaligned:
   case Op_LoadPLocked:
@@ -1998,6 +1999,38 @@
     break;
   }
 
+#ifdef _LP64
+  case Op_CmpP:
+    if( n->in(1)->Opcode() == Op_DecodeN ) {
+      Compile* C = Compile::current();
+      Node* in2 = NULL;
+      if( n->in(2)->Opcode() == Op_DecodeN ) {
+        in2 = n->in(2)->in(1);
+      } else if ( n->in(2)->Opcode() == Op_ConP ) {
+        const Type* t = n->in(2)->bottom_type();
+        if (t == TypePtr::NULL_PTR) {
+          Node *in1 = n->in(1);
+          uint i = 0;
+          for (; i < in1->outcnt(); i++) {
+            if (in1->raw_out(i)->is_AddP())
+              break;
+          }
+          if (i >= in1->outcnt()) {
+            // Don't replace CmpP(o ,null) if 'o' is used in AddP
+            // to generate implicit NULL check.
+            in2 = ConNode::make(C, TypeNarrowOop::NULL_PTR);
+          }
+        } else if (t->isa_oopptr()) {
+          in2 = ConNode::make(C, t->is_oopptr()->make_narrowoop());
+        }
+      }
+      if( in2 != NULL ) {
+        Node* cmpN = new (C, 3) CmpNNode(n->in(1)->in(1), in2);
+        n->replace_by( cmpN );
+      }
+    }
+#endif
+
   case Op_ModI:
     if (UseDivMod) {
       // Check if a%b and a/b both exist
--- a/src/share/vm/opto/connode.cpp	Wed May 21 10:45:07 2008 -0700
+++ b/src/share/vm/opto/connode.cpp	Wed May 21 13:46:23 2008 -0700
@@ -38,12 +38,12 @@
   if (t->isa_narrowoop()) return new (C, 1) ConNNode( t->is_narrowoop() );
   switch( t->basic_type() ) {
   case T_INT:       return new (C, 1) ConINode( t->is_int() );
-  case T_ARRAY:     return new (C, 1) ConPNode( t->is_aryptr() );
   case T_LONG:      return new (C, 1) ConLNode( t->is_long() );
   case T_FLOAT:     return new (C, 1) ConFNode( t->is_float_constant() );
   case T_DOUBLE:    return new (C, 1) ConDNode( t->is_double_constant() );
   case T_VOID:      return new (C, 1) ConNode ( Type::TOP );
   case T_OBJECT:    return new (C, 1) ConPNode( t->is_oopptr() );
+  case T_ARRAY:     return new (C, 1) ConPNode( t->is_aryptr() );
   case T_ADDRESS:   return new (C, 1) ConPNode( t->is_ptr() );
     // Expected cases:  TypePtr::NULL_PTR, any is_rawptr()
     // Also seen: AnyPtr(TopPTR *+top); from command line:
@@ -185,6 +185,7 @@
   case T_LONG:    return new (C, 4) CMoveLNode( bol, left, right, t->is_long() );
   case T_OBJECT:  return new (C, 4) CMovePNode( c, bol, left, right, t->is_oopptr() );
   case T_ADDRESS: return new (C, 4) CMovePNode( c, bol, left, right, t->is_ptr() );
+  case T_NARROWOOP: return new (C, 4) CMoveNNode( c, bol, left, right, t );
   default:
     ShouldNotReachHere();
     return NULL;
@@ -570,7 +571,7 @@
   return bottom_type();
 }
 
-Node* DecodeNNode::decode(PhaseGVN* phase, Node* value) {
+Node* DecodeNNode::decode(PhaseTransform* phase, Node* value) {
   if (value->Opcode() == Op_EncodeP) {
     // (DecodeN (EncodeP p)) -> p
     return value->in(1);
@@ -604,7 +605,7 @@
   return bottom_type();
 }
 
-Node* EncodePNode::encode(PhaseGVN* phase, Node* value) {
+Node* EncodePNode::encode(PhaseTransform* phase, Node* value) {
   if (value->Opcode() == Op_DecodeN) {
     // (EncodeP (DecodeN p)) -> p
     return value->in(1);
--- a/src/share/vm/opto/connode.hpp	Wed May 21 10:45:07 2008 -0700
+++ b/src/share/vm/opto/connode.hpp	Wed May 21 13:46:23 2008 -0700
@@ -70,11 +70,6 @@
     else
       return new (C, 1) ConPNode( TypeRawPtr::make(con) );
   }
-
-  static ConPNode* make( Compile *C, ciObject* con ) {
-    return new (C, 1) ConPNode( TypeOopPtr::make_from_constant(con) );
-  }
-
 };
 
 
@@ -84,11 +79,6 @@
 public:
   ConNNode( const TypeNarrowOop *t ) : ConNode(t) {}
   virtual int Opcode() const;
-
-  static ConNNode* make( Compile *C, ciObject* con ) {
-    return new (C, 1) ConNNode( TypeNarrowOop::make_from_constant(con) );
-  }
-
 };
 
 
@@ -210,7 +200,14 @@
   virtual int Opcode() const;
 };
 
-//------------------------------ConstraintCastNode-------------------------------------
+//------------------------------CMoveNNode-------------------------------------
+class CMoveNNode : public CMoveNode {
+public:
+  CMoveNNode( Node *c, Node *bol, Node *left, Node *right, const Type* t ) : CMoveNode(bol,left,right,t) { init_req(Control,c); }
+  virtual int Opcode() const;
+};
+
+//------------------------------ConstraintCastNode-----------------------------
 // cast to a different range
 class ConstraintCastNode: public TypeNode {
 public:
@@ -282,7 +279,7 @@
   virtual const Type *Value( PhaseTransform *phase ) const;
   virtual uint  ideal_reg() const { return Op_RegN; }
 
-  static Node* encode(PhaseGVN* phase, Node* value);
+  static Node* encode(PhaseTransform* phase, Node* value);
   virtual Node *Ideal_DU_postCCP( PhaseCCP *ccp );
 };
 
@@ -302,7 +299,7 @@
   virtual const Type *Value( PhaseTransform *phase ) const;
   virtual uint  ideal_reg() const { return Op_RegP; }
 
-  static Node* decode(PhaseGVN* phase, Node* value);
+  static Node* decode(PhaseTransform* phase, Node* value);
 };
 
 //------------------------------Conv2BNode-------------------------------------
--- a/src/share/vm/opto/doCall.cpp	Wed May 21 10:45:07 2008 -0700
+++ b/src/share/vm/opto/doCall.cpp	Wed May 21 13:46:23 2008 -0700
@@ -580,7 +580,7 @@
   Node* ex_klass_node = NULL;
   if (has_ex_handler() && !ex_type->klass_is_exact()) {
     Node* p = basic_plus_adr( ex_node, ex_node, oopDesc::klass_offset_in_bytes());
-    ex_klass_node = _gvn.transform(new (C, 3) LoadKlassNode(NULL, immutable_memory(), p, TypeInstPtr::KLASS, TypeKlassPtr::OBJECT));
+    ex_klass_node = _gvn.transform( LoadKlassNode::make(_gvn, immutable_memory(), p, TypeInstPtr::KLASS, TypeKlassPtr::OBJECT) );
 
     // Compute the exception klass a little more cleverly.
     // Obvious solution is to simple do a LoadKlass from the 'ex_node'.
@@ -592,7 +592,7 @@
       ex_klass_node = new (C, ex_node->req()) PhiNode( ex_node->in(0), TypeKlassPtr::OBJECT );
       for( uint i = 1; i < ex_node->req(); i++ ) {
         Node* p = basic_plus_adr( ex_node->in(i), ex_node->in(i), oopDesc::klass_offset_in_bytes() );
-        Node* k = _gvn.transform(new (C, 3) LoadKlassNode(0, immutable_memory(), p, TypeInstPtr::KLASS, TypeKlassPtr::OBJECT));
+        Node* k = _gvn.transform( LoadKlassNode::make(_gvn, immutable_memory(), p, TypeInstPtr::KLASS, TypeKlassPtr::OBJECT) );
         ex_klass_node->init_req( i, k );
       }
       _gvn.set_type(ex_klass_node, TypeKlassPtr::OBJECT);
--- a/src/share/vm/opto/escape.cpp	Wed May 21 10:45:07 2008 -0700
+++ b/src/share/vm/opto/escape.cpp	Wed May 21 13:46:23 2008 -0700
@@ -417,11 +417,18 @@
   //       | |
   //       AddP  ( base == address )
   //
+  // case #8. narrow Klass's field reference.
+  //      LoadNKlass
+  //       |
+  //      DecodeN
+  //       | |
+  //       AddP  ( base == address )
+  //
   Node *base = addp->in(AddPNode::Base)->uncast();
   if (base->is_top()) { // The AddP case #3 and #6.
     base = addp->in(AddPNode::Address)->uncast();
     assert(base->Opcode() == Op_ConP || base->Opcode() == Op_ThreadLocal ||
-           base->Opcode() == Op_CastX2P ||
+           base->Opcode() == Op_CastX2P || base->Opcode() == Op_DecodeN ||
            (base->is_Mem() && base->bottom_type() == TypeRawPtr::NOTNULL) ||
            (base->is_Proj() && base->in(0)->is_Allocate()), "sanity");
   }
@@ -1573,6 +1580,7 @@
       if (k->Opcode() == Op_LoadKlass) {
         kt = k->as_Load()->type()->isa_klassptr();
       } else {
+        // Also works for DecodeN(LoadNKlass).
         kt = k->as_Type()->type()->isa_klassptr();
       }
       assert(kt != NULL, "TypeKlassPtr  required.");
@@ -1811,6 +1819,7 @@
       break;
     }
     case Op_LoadKlass:
+    case Op_LoadNKlass:
     {
       add_node(n, PointsToNode::JavaObject, PointsToNode::GlobalEscape, true);
       break;
@@ -2025,6 +2034,7 @@
       break;
     }
     case Op_LoadKlass:
+    case Op_LoadNKlass:
     {
       assert(false, "Op_LoadKlass");
       break;
--- a/src/share/vm/opto/graphKit.cpp	Wed May 21 10:45:07 2008 -0700
+++ b/src/share/vm/opto/graphKit.cpp	Wed May 21 13:46:23 2008 -0700
@@ -532,7 +532,7 @@
         C->log()->elem("hot_throw preallocated='1' reason='%s'",
                        Deoptimization::trap_reason_name(reason));
       const TypeInstPtr* ex_con  = TypeInstPtr::make(ex_obj);
-      Node*              ex_node = _gvn.transform(new (C, 1) ConPNode(ex_con));
+      Node*              ex_node = _gvn.transform( ConNode::make(C, ex_con) );
 
       // Clear the detail message of the preallocated exception object.
       // Weblogic sometimes mutates the detail message of exceptions
@@ -1043,7 +1043,7 @@
   Node* akls = AllocateNode::Ideal_klass(obj, &_gvn);
   if (akls != NULL)  return akls;
   Node* k_adr = basic_plus_adr(obj, oopDesc::klass_offset_in_bytes());
-  return _gvn.transform( new (C, 3) LoadKlassNode(0, immutable_memory(), k_adr, TypeInstPtr::KLASS) );
+  return _gvn.transform( LoadKlassNode::make(_gvn, immutable_memory(), k_adr, TypeInstPtr::KLASS) );
 }
 
 //-------------------------load_array_length-----------------------------------
@@ -2210,7 +2210,7 @@
   // cache which is mutable so can't use immutable memory.  Other
   // types load from the super-class display table which is immutable.
   Node *kmem = might_be_cache ? memory(p2) : immutable_memory();
-  Node *nkls = _gvn.transform( new (C, 3) LoadKlassNode( NULL, kmem, p2, _gvn.type(p2)->is_ptr(), TypeKlassPtr::OBJECT_OR_NULL ) );
+  Node *nkls = _gvn.transform( LoadKlassNode::make( _gvn, kmem, p2, _gvn.type(p2)->is_ptr(), TypeKlassPtr::OBJECT_OR_NULL ) );
 
   // Compile speed common case: ARE a subtype and we canNOT fail
   if( superklass == nkls )
@@ -2801,7 +2801,6 @@
     // initialization, and source them from the new InitializeNode.
     // This will allow us to observe initializations when they occur,
     // and link them properly (as a group) to the InitializeNode.
-    Node* klass_node = alloc->in(AllocateNode::KlassNode);
     assert(init->in(InitializeNode::Memory) == malloc, "");
     MergeMemNode* minit_in = MergeMemNode::make(C, malloc);
     init->set_req(InitializeNode::Memory, minit_in);
--- a/src/share/vm/opto/lcm.cpp	Wed May 21 10:45:07 2008 -0700
+++ b/src/share/vm/opto/lcm.cpp	Wed May 21 13:46:23 2008 -0700
@@ -113,6 +113,7 @@
     case Op_LoadN:
     case Op_LoadS:
     case Op_LoadKlass:
+    case Op_LoadNKlass:
     case Op_LoadRange:
     case Op_LoadD_unaligned:
     case Op_LoadL_unaligned:
--- a/src/share/vm/opto/library_call.cpp	Wed May 21 10:45:07 2008 -0700
+++ b/src/share/vm/opto/library_call.cpp	Wed May 21 13:46:23 2008 -0700
@@ -896,7 +896,7 @@
   Node* sourcea       = basic_plus_adr(string_object, string_object, value_offset);
   Node* source        = make_load(no_ctrl, sourcea, source_type, T_OBJECT, string_type->add_offset(value_offset));
 
-  Node* target = _gvn.transform(ConPNode::make(C, target_array));
+  Node* target = _gvn.transform( makecon(TypeOopPtr::make_from_constant(target_array)) );
   jint target_length = target_array->length();
   const TypeAry* target_array_type = TypeAry::make(TypeInt::CHAR, TypeInt::make(0, target_length, Type::WidenMin));
   const TypeAryPtr* target_type = TypeAryPtr::make(TypePtr::BotPTR, target_array_type, target_array->klass(), true, Type::OffsetBot);
@@ -2454,7 +2454,7 @@
   if (region == NULL)  never_see_null = true;
   Node* p = basic_plus_adr(mirror, offset);
   const TypeKlassPtr*  kls_type = TypeKlassPtr::OBJECT_OR_NULL;
-  Node* kls = _gvn.transform(new (C, 3) LoadKlassNode(0, immutable_memory(), p, TypeRawPtr::BOTTOM, kls_type));
+  Node* kls = _gvn.transform( LoadKlassNode::make(_gvn, immutable_memory(), p, TypeRawPtr::BOTTOM, kls_type) );
   _sp += nargs; // any deopt will start just before call to enclosing method
   Node* null_ctl = top();
   kls = null_check_oop(kls, &null_ctl, never_see_null);
@@ -2634,7 +2634,7 @@
       phi->add_req(makecon(TypeInstPtr::make(env()->Object_klass()->java_mirror())));
     // If we fall through, it's a plain class.  Get its _super.
     p = basic_plus_adr(kls, Klass::super_offset_in_bytes() + sizeof(oopDesc));
-    kls = _gvn.transform(new (C, 3) LoadKlassNode(0, immutable_memory(), p, TypeRawPtr::BOTTOM, TypeKlassPtr::OBJECT_OR_NULL));
+    kls = _gvn.transform( LoadKlassNode::make(_gvn, immutable_memory(), p, TypeRawPtr::BOTTOM, TypeKlassPtr::OBJECT_OR_NULL) );
     null_ctl = top();
     kls = null_check_oop(kls, &null_ctl);
     if (null_ctl != top()) {
@@ -2720,7 +2720,7 @@
     args[which_arg] = _gvn.transform(arg);
 
     Node* p = basic_plus_adr(arg, class_klass_offset);
-    Node* kls = new (C, 3) LoadKlassNode(0, immutable_memory(), p, adr_type, kls_type);
+    Node* kls = LoadKlassNode::make(_gvn, immutable_memory(), p, adr_type, kls_type);
     klasses[which_arg] = _gvn.transform(kls);
   }
 
@@ -4388,7 +4388,7 @@
       // (At this point we can assume disjoint_bases, since types differ.)
       int ek_offset = objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc);
       Node* p1 = basic_plus_adr(dest_klass, ek_offset);
-      Node* n1 = new (C, 3) LoadKlassNode(0, immutable_memory(), p1, TypeRawPtr::BOTTOM);
+      Node* n1 = LoadKlassNode::make(_gvn, immutable_memory(), p1, TypeRawPtr::BOTTOM);
       Node* dest_elem_klass = _gvn.transform(n1);
       Node* cv = generate_checkcast_arraycopy(adr_type,
                                               dest_elem_klass,
--- a/src/share/vm/opto/loopopts.cpp	Wed May 21 10:45:07 2008 -0700
+++ b/src/share/vm/opto/loopopts.cpp	Wed May 21 13:46:23 2008 -0700
@@ -464,6 +464,7 @@
     case T_FLOAT:
     case T_DOUBLE:
     case T_ADDRESS:             // (RawPtr)
+    case T_NARROWOOP:
       cost++;
       break;
     case T_OBJECT: {            // Base oops are OK, but not derived oops
--- a/src/share/vm/opto/matcher.cpp	Wed May 21 10:45:07 2008 -0700
+++ b/src/share/vm/opto/matcher.cpp	Wed May 21 13:46:23 2008 -0700
@@ -1768,6 +1768,7 @@
       case Op_LoadF:
       case Op_LoadI:
       case Op_LoadKlass:
+      case Op_LoadNKlass:
       case Op_LoadL:
       case Op_LoadS:
       case Op_LoadP:
@@ -1899,6 +1900,7 @@
       case Op_CMoveF:
       case Op_CMoveI:
       case Op_CMoveL:
+      case Op_CMoveN:
       case Op_CMoveP: {
         // Restructure into a binary tree for Matching.  It's possible that
         // we could move this code up next to the graph reshaping for IfNodes
--- a/src/share/vm/opto/memnode.cpp	Wed May 21 10:45:07 2008 -0700
+++ b/src/share/vm/opto/memnode.cpp	Wed May 21 13:46:23 2008 -0700
@@ -671,11 +671,13 @@
       case Op_LoadP:            // Loading from within a klass
       case Op_LoadN:            // Loading from within a klass
       case Op_LoadKlass:        // Loading from within a klass
+      case Op_LoadNKlass:       // Loading from within a klass
       case Op_ConP:             // Loading from a klass
       case Op_ConN:             // Loading from a klass
       case Op_CreateEx:         // Sucking up the guts of an exception oop
       case Op_Con:              // Reading from TLS
       case Op_CMoveP:           // CMoveP is pinned
+      case Op_CMoveN:           // CMoveN is pinned
         break;                  // No progress
 
       case Op_Proj:             // Direct call to an allocation routine
@@ -1610,8 +1612,35 @@
 }
 
 //=============================================================================
+//----------------------------LoadKlassNode::make------------------------------
+// Polymorphic factory method:
+Node *LoadKlassNode::make( PhaseGVN& gvn, Node *mem, Node *adr, const TypePtr* at, const TypeKlassPtr *tk ) {
+  Compile* C = gvn.C;
+  Node *ctl = NULL;
+  // sanity check the alias category against the created node type
+  const TypeOopPtr *adr_type = adr->bottom_type()->isa_oopptr();
+  assert(adr_type != NULL, "expecting TypeOopPtr");
+#ifdef _LP64
+  if (adr_type->is_ptr_to_narrowoop()) {
+    const TypeNarrowOop* narrowtype = tk->is_oopptr()->make_narrowoop();
+    Node* load_klass = gvn.transform(new (C, 3) LoadNKlassNode(ctl, mem, adr, at, narrowtype));
+    return DecodeNNode::decode(&gvn, load_klass);
+  } else
+#endif
+  {
+    assert(!adr_type->is_ptr_to_narrowoop(), "should have got back a narrow oop");
+    return new (C, 3) LoadKlassNode(ctl, mem, adr, at, tk);
+  }
+  ShouldNotReachHere();
+  return (LoadKlassNode*)NULL;
+}
+
 //------------------------------Value------------------------------------------
 const Type *LoadKlassNode::Value( PhaseTransform *phase ) const {
+  return klass_value_common(phase);
+}
+
+const Type *LoadNode::klass_value_common( PhaseTransform *phase ) const {
   // Either input is TOP ==> the result is TOP
   const Type *t1 = phase->type( in(MemNode::Memory) );
   if (t1 == Type::TOP)  return Type::TOP;
@@ -1743,6 +1772,10 @@
 // To clean up reflective code, simplify k.java_mirror.as_klass to plain k.
 // Also feed through the klass in Allocate(...klass...)._klass.
 Node* LoadKlassNode::Identity( PhaseTransform *phase ) {
+  return klass_identity_common(phase);
+}
+
+Node* LoadNode::klass_identity_common(PhaseTransform *phase ) {
   Node* x = LoadNode::Identity(phase);
   if (x != this)  return x;
 
@@ -1801,6 +1834,34 @@
   return this;
 }
 
+
+//------------------------------Value------------------------------------------
+const Type *LoadNKlassNode::Value( PhaseTransform *phase ) const {
+  const Type *t = klass_value_common(phase);
+
+  if (t == TypePtr::NULL_PTR) {
+    return TypeNarrowOop::NULL_PTR;
+  }
+  if (t != Type::TOP && !t->isa_narrowoop()) {
+    assert(t->is_oopptr(), "sanity");
+    t = t->is_oopptr()->make_narrowoop();
+  }
+  return t;
+}
+
+//------------------------------Identity---------------------------------------
+// To clean up reflective code, simplify k.java_mirror.as_klass to narrow k.
+// Also feed through the klass in Allocate(...klass...)._klass.
+Node* LoadNKlassNode::Identity( PhaseTransform *phase ) {
+  Node *x = klass_identity_common(phase);
+
+  const Type *t = phase->type( x );
+  if( t == Type::TOP ) return x;
+  if( t->isa_narrowoop()) return x;
+
+  return EncodePNode::encode(phase, x);
+}
+
 //------------------------------Value-----------------------------------------
 const Type *LoadRangeNode::Value( PhaseTransform *phase ) const {
   // Either input is TOP ==> the result is TOP
--- a/src/share/vm/opto/memnode.hpp	Wed May 21 10:45:07 2008 -0700
+++ b/src/share/vm/opto/memnode.hpp	Wed May 21 13:46:23 2008 -0700
@@ -161,6 +161,10 @@
   // then call the virtual add() to set the type.
   virtual const Type *Value( PhaseTransform *phase ) const;
 
+  // Common methods for LoadKlass and LoadNKlass nodes.
+  const Type *klass_value_common( PhaseTransform *phase ) const;
+  Node *klass_identity_common( PhaseTransform *phase );
+
   virtual uint ideal_reg() const;
   virtual const Type *bottom_type() const;
   // Following method is copied from TypeNode:
@@ -362,14 +366,35 @@
 // Load a Klass from an object
 class LoadKlassNode : public LoadPNode {
 public:
-  LoadKlassNode( Node *c, Node *mem, Node *adr, const TypePtr *at, const TypeKlassPtr *tk = TypeKlassPtr::OBJECT )
+  LoadKlassNode( Node *c, Node *mem, Node *adr, const TypePtr *at, const TypeKlassPtr *tk )
     : LoadPNode(c,mem,adr,at,tk) {}
   virtual int Opcode() const;
   virtual const Type *Value( PhaseTransform *phase ) const;
   virtual Node *Identity( PhaseTransform *phase );
   virtual bool depends_only_on_test() const { return true; }
+
+  // Polymorphic factory method:
+  static Node* make( PhaseGVN& gvn, Node *mem, Node *adr, const TypePtr* at,
+                     const TypeKlassPtr *tk = TypeKlassPtr::OBJECT );
 };
 
+//------------------------------LoadNKlassNode---------------------------------
+// Load a narrow Klass from an object.
+class LoadNKlassNode : public LoadNNode {
+public:
+  LoadNKlassNode( Node *c, Node *mem, Node *adr, const TypePtr *at, const TypeNarrowOop *tk )
+    : LoadNNode(c,mem,adr,at,tk) {}
+  virtual int Opcode() const;
+  virtual uint ideal_reg() const { return Op_RegN; }
+  virtual int store_Opcode() const { return Op_StoreN; }
+  virtual BasicType memory_type() const { return T_NARROWOOP; }
+
+  virtual const Type *Value( PhaseTransform *phase ) const;
+  virtual Node *Identity( PhaseTransform *phase );
+  virtual bool depends_only_on_test() const { return true; }
+};
+
+
 //------------------------------LoadSNode--------------------------------------
 // Load a short (16bits signed) from memory
 class LoadSNode : public LoadNode {
--- a/src/share/vm/opto/parse1.cpp	Wed May 21 10:45:07 2008 -0700
+++ b/src/share/vm/opto/parse1.cpp	Wed May 21 13:46:23 2008 -0700
@@ -1901,7 +1901,7 @@
   // finalization.  In general this will fold up since the concrete
   // class is often visible so the access flags are constant.
   Node* klass_addr = basic_plus_adr( receiver, receiver, oopDesc::klass_offset_in_bytes() );
-  Node* klass = _gvn.transform(new (C, 3) LoadKlassNode(NULL, immutable_memory(), klass_addr, TypeInstPtr::KLASS));
+  Node* klass = _gvn.transform( LoadKlassNode::make(_gvn, immutable_memory(), klass_addr, TypeInstPtr::KLASS) );
 
   Node* access_flags_addr = basic_plus_adr(klass, klass, Klass::access_flags_offset_in_bytes() + sizeof(oopDesc));
   Node* access_flags = make_load(NULL, access_flags_addr, TypeInt::INT, T_INT);
--- a/src/share/vm/opto/parseHelper.cpp	Wed May 21 10:45:07 2008 -0700
+++ b/src/share/vm/opto/parseHelper.cpp	Wed May 21 13:46:23 2008 -0700
@@ -38,7 +38,7 @@
 
   // Get method
   const TypeInstPtr* method_type = TypeInstPtr::make(TypePtr::Constant, method->klass(), true, method, 0);
-  Node *method_node = _gvn.transform( new (C, 1) ConPNode(method_type) );
+  Node *method_node = _gvn.transform( ConNode::make(C, method_type) );
 
   kill_dead_locals();
 
@@ -143,7 +143,7 @@
   int klass_offset = oopDesc::klass_offset_in_bytes();
   Node* p = basic_plus_adr( ary, ary, klass_offset );
   // p's type is array-of-OOPS plus klass_offset
-  Node* array_klass = _gvn.transform(new (C, 3) LoadKlassNode(0, immutable_memory(), p, TypeInstPtr::KLASS));
+  Node* array_klass = _gvn.transform( LoadKlassNode::make(_gvn, immutable_memory(), p, TypeInstPtr::KLASS) );
   // Get the array klass
   const TypeKlassPtr *tak = _gvn.type(array_klass)->is_klassptr();
 
@@ -189,7 +189,7 @@
   // Extract the array element class
   int element_klass_offset = objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc);
   Node *p2 = basic_plus_adr(array_klass, array_klass, element_klass_offset);
-  Node *a_e_klass = _gvn.transform(new (C, 3) LoadKlassNode(0, immutable_memory(), p2, tak));
+  Node *a_e_klass = _gvn.transform( LoadKlassNode::make(_gvn, immutable_memory(), p2, tak) );
 
   // Check (the hard way) and throw if not a subklass.
   // Result is ignored, we just need the CFG effects.
--- a/src/share/vm/runtime/globals.hpp	Wed May 21 10:45:07 2008 -0700
+++ b/src/share/vm/runtime/globals.hpp	Wed May 21 13:46:23 2008 -0700
@@ -291,6 +291,9 @@
             "Use 32-bit object references in 64-bit VM. "                   \
             "lp64_product means flag is always constant in 32 bit VM")      \
                                                                             \
+  lp64_product(bool, CheckCompressedOops, trueInDebug,                      \
+            "generate checks in encoding/decoding code")                    \
+                                                                            \
   /* UseMembar is theoretically a temp flag used for memory barrier         \
    * removal testing.  It was supposed to be removed before FCS but has     \
    * been re-added (see 6401008) */                                         \