changeset 168:7793bd37a336

6705887: Compressed Oops: generate x64 addressing and implicit null checks with narrow oops Summary: Generate addresses and implicit null checks with narrow oops to avoid decoding. Reviewed-by: jrose, never
author kvn
date Thu, 29 May 2008 12:04:14 -0700
parents feeb96a45707
children 9148c65abefc
files src/cpu/x86/vm/assembler_x86_32.hpp src/cpu/x86/vm/assembler_x86_64.hpp src/cpu/x86/vm/x86_64.ad src/os_cpu/linux_x86/vm/assembler_linux_x86_32.cpp src/os_cpu/linux_x86/vm/assembler_linux_x86_64.cpp src/os_cpu/solaris_x86/vm/assembler_solaris_x86_32.cpp src/os_cpu/solaris_x86/vm/assembler_solaris_x86_64.cpp src/os_cpu/windows_x86/vm/assembler_windows_x86_32.cpp src/os_cpu/windows_x86/vm/assembler_windows_x86_64.cpp src/share/vm/opto/callnode.hpp src/share/vm/opto/chaitin.cpp src/share/vm/opto/compile.cpp src/share/vm/opto/connode.cpp src/share/vm/opto/connode.hpp src/share/vm/opto/escape.cpp src/share/vm/opto/macro.cpp src/share/vm/opto/matcher.cpp src/share/vm/opto/matcher.hpp src/share/vm/opto/memnode.cpp src/share/vm/opto/node.hpp
diffstat 20 files changed, 138 insertions(+), 81 deletions(-) [+]
line wrap: on
line diff
--- a/src/cpu/x86/vm/assembler_x86_32.hpp	Wed May 28 21:06:24 2008 -0700
+++ b/src/cpu/x86/vm/assembler_x86_32.hpp	Thu May 29 12:04:14 2008 -0700
@@ -1054,7 +1054,7 @@
   // range (0 <= offset <= page_size).
 
   void null_check(Register reg, int offset = -1);
-  static bool needs_explicit_null_check(int offset);
+  static bool needs_explicit_null_check(intptr_t offset);
 
   // Required platform-specific helpers for Label::patch_instructions.
   // They _shadow_ the declarations in AbstractAssembler, which are undefined.
--- a/src/cpu/x86/vm/assembler_x86_64.hpp	Wed May 28 21:06:24 2008 -0700
+++ b/src/cpu/x86/vm/assembler_x86_64.hpp	Thu May 29 12:04:14 2008 -0700
@@ -1028,7 +1028,7 @@
   // is needed if the offset is within a certain range (0 <= offset <=
   // page_size).
   void null_check(Register reg, int offset = -1);
-  static bool needs_explicit_null_check(int offset);
+  static bool needs_explicit_null_check(intptr_t offset);
 
   // Required platform-specific helpers for Label::patch_instructions.
   // They _shadow_ the declarations in AbstractAssembler, which are undefined.
--- a/src/cpu/x86/vm/x86_64.ad	Wed May 28 21:06:24 2008 -0700
+++ b/src/cpu/x86/vm/x86_64.ad	Thu May 29 12:04:14 2008 -0700
@@ -5202,15 +5202,15 @@
   %}
 %}
 
-// Indirect Memory Times Scale Plus Index Register Plus Offset Operand
-operand indIndexScaleOffsetComp(rRegN src, immL32 off, r12RegL base) %{
+// Indirect Narrow Oop Plus Offset Operand
+operand indNarrowOopOffset(rRegN src, immL32 off) %{
   constraint(ALLOC_IN_RC(ptr_reg));
-  match(AddP (DecodeN src base) off);
+  match(AddP (DecodeN src) off);
 
   op_cost(10);
-  format %{"[$base + $src << 3 + $off] (compressed)" %}
+  format %{"[R12 + $src << 3 + $off] (compressed oop addressing)" %}
   interface(MEMORY_INTER) %{
-    base($base);
+    base(0xc); // R12
     index($src);
     scale(0x3);
     disp($off);
@@ -5365,7 +5365,7 @@
 
 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
                indIndexScale, indIndexScaleOffset, indPosIndexScaleOffset,
-               indIndexScaleOffsetComp);
+               indNarrowOopOffset);
 
 //----------PIPELINE-----------------------------------------------------------
 // Rules which define the behavior of the target architectures pipeline.
--- a/src/os_cpu/linux_x86/vm/assembler_linux_x86_32.cpp	Wed May 28 21:06:24 2008 -0700
+++ b/src/os_cpu/linux_x86/vm/assembler_linux_x86_32.cpp	Thu May 29 12:04:14 2008 -0700
@@ -40,7 +40,7 @@
   movptr(thread, tls);
 }
 
-bool MacroAssembler::needs_explicit_null_check(int offset) {
+bool MacroAssembler::needs_explicit_null_check(intptr_t offset) {
   // Linux kernel guarantees that the first page is always unmapped. Don't
   // assume anything more than that.
   bool offset_in_first_page =   0 <= offset  &&  offset < os::vm_page_size();
--- a/src/os_cpu/linux_x86/vm/assembler_linux_x86_64.cpp	Wed May 28 21:06:24 2008 -0700
+++ b/src/os_cpu/linux_x86/vm/assembler_linux_x86_64.cpp	Thu May 29 12:04:14 2008 -0700
@@ -66,8 +66,21 @@
    }
 }
 
-// NOTE: since the linux kernel resides at the low end of
-// user address space, no null pointer check is needed.
-bool MacroAssembler::needs_explicit_null_check(int offset) {
-  return offset < 0 || offset >= 0x100000;
+bool MacroAssembler::needs_explicit_null_check(intptr_t offset) {
+  // Exception handler checks the nmethod's implicit null checks table
+  // only when this method returns false.
+  if (UseCompressedOops) {
+    // The first page after heap_base is unmapped and
+    // the 'offset' is equal to [heap_base + offset] for
+    // narrow oop implicit null checks.
+    uintptr_t heap_base = (uintptr_t)Universe::heap_base();
+    if ((uintptr_t)offset >= heap_base) {
+      // Normalize offset for the next check.
+      offset = (intptr_t)(pointer_delta((void*)offset, (void*)heap_base, 1));
+    }
+  }
+  // Linux kernel guarantees that the first page is always unmapped. Don't
+  // assume anything more than that.
+  bool offset_in_first_page =   0 <= offset  &&  offset < os::vm_page_size();
+  return !offset_in_first_page;
 }
--- a/src/os_cpu/solaris_x86/vm/assembler_solaris_x86_32.cpp	Wed May 28 21:06:24 2008 -0700
+++ b/src/os_cpu/solaris_x86/vm/assembler_solaris_x86_32.cpp	Thu May 29 12:04:14 2008 -0700
@@ -80,7 +80,7 @@
   popl(thread);
 }
 
-bool MacroAssembler::needs_explicit_null_check(int offset) {
+bool MacroAssembler::needs_explicit_null_check(intptr_t offset) {
   // Identical to Sparc/Solaris code
   bool offset_in_first_page =   0 <= offset  &&  offset < os::vm_page_size();
   return !offset_in_first_page;
--- a/src/os_cpu/solaris_x86/vm/assembler_solaris_x86_64.cpp	Wed May 28 21:06:24 2008 -0700
+++ b/src/os_cpu/solaris_x86/vm/assembler_solaris_x86_64.cpp	Thu May 29 12:04:14 2008 -0700
@@ -86,8 +86,21 @@
   }
 }
 
-bool MacroAssembler::needs_explicit_null_check(int offset) {
+bool MacroAssembler::needs_explicit_null_check(intptr_t offset) {
   // Identical to Sparc/Solaris code
+
+  // Exception handler checks the nmethod's implicit null checks table
+  // only when this method returns false.
+  if (UseCompressedOops) {
+    // The first page after heap_base is unmapped and
+    // the 'offset' is equal to [heap_base + offset] for
+    // narrow oop implicit null checks.
+    uintptr_t heap_base = (uintptr_t)Universe::heap_base();
+    if ((uintptr_t)offset >= heap_base) {
+      // Normalize offset for the next check.
+      offset = (intptr_t)(pointer_delta((void*)offset, (void*)heap_base, 1));
+    }
+  }
   bool offset_in_first_page = 0 <= offset && offset < os::vm_page_size();
   return !offset_in_first_page;
 }
--- a/src/os_cpu/windows_x86/vm/assembler_windows_x86_32.cpp	Wed May 28 21:06:24 2008 -0700
+++ b/src/os_cpu/windows_x86/vm/assembler_windows_x86_32.cpp	Thu May 29 12:04:14 2008 -0700
@@ -59,6 +59,6 @@
   movl(thread, Address(thread, ThreadLocalStorage::get_thread_ptr_offset()));
 }
 
-bool MacroAssembler::needs_explicit_null_check(int offset) {
+bool MacroAssembler::needs_explicit_null_check(intptr_t offset) {
   return offset < 0 || (int)os::vm_page_size() <= offset;
 }
--- a/src/os_cpu/windows_x86/vm/assembler_windows_x86_64.cpp	Wed May 28 21:06:24 2008 -0700
+++ b/src/os_cpu/windows_x86/vm/assembler_windows_x86_64.cpp	Thu May 29 12:04:14 2008 -0700
@@ -66,6 +66,18 @@
    }
 }
 
-bool MacroAssembler::needs_explicit_null_check(int offset) {
-  return offset < 0 || (int)os::vm_page_size() <= offset;
+bool MacroAssembler::needs_explicit_null_check(intptr_t offset) {
+  // Exception handler checks the nmethod's implicit null checks table
+  // only when this method returns false.
+  if (UseCompressedOops) {
+    // The first page after heap_base is unmapped and
+    // the 'offset' is equal to [heap_base + offset] for
+    // narrow oop implicit null checks.
+    uintptr_t heap_base = (uintptr_t)Universe::heap_base();
+    if ((uintptr_t)offset >= heap_base) {
+      // Normalize offset for the next check.
+      offset = (intptr_t)(pointer_delta((void*)offset, (void*)heap_base, 1));
+    }
+  }
+  return offset < 0 || os::vm_page_size() <= offset;
 }
--- a/src/share/vm/opto/callnode.hpp	Wed May 28 21:06:24 2008 -0700
+++ b/src/share/vm/opto/callnode.hpp	Thu May 29 12:04:14 2008 -0700
@@ -388,9 +388,6 @@
   void               set_next_exception(SafePointNode* n);
   bool                   has_exceptions() const { return next_exception() != NULL; }
 
-  // Does this node have a use of n other than in debug information?
-  virtual bool           has_non_debug_use(Node *n)  {return false; }
-
   // Standard Node stuff
   virtual int            Opcode() const;
   virtual bool           pinned() const { return true; }
@@ -497,7 +494,7 @@
   // Returns true if the call may modify n
   virtual bool        may_modify(const TypePtr *addr_t, PhaseTransform *phase);
   // Does this node have a use of n other than in debug information?
-  virtual bool        has_non_debug_use(Node *n);
+  bool                has_non_debug_use(Node *n);
   // Returns the unique CheckCastPP of a call
   // or result projection is there are several CheckCastPP
   // or returns NULL if there is no one.
--- a/src/share/vm/opto/chaitin.cpp	Wed May 28 21:06:24 2008 -0700
+++ b/src/share/vm/opto/chaitin.cpp	Thu May 29 12:04:14 2008 -0700
@@ -1385,7 +1385,7 @@
             cisc->ins_req(1,src);         // Requires a memory edge
           }
           b->_nodes.map(j,cisc);          // Insert into basic block
-          n->replace_by(cisc); // Correct graph
+          n->subsume_by(cisc); // Correct graph
           //
           ++_used_cisc_instructions;
 #ifndef PRODUCT
--- a/src/share/vm/opto/compile.cpp	Wed May 28 21:06:24 2008 -0700
+++ b/src/share/vm/opto/compile.cpp	Thu May 29 12:04:14 2008 -0700
@@ -1842,6 +1842,7 @@
 // Implement items 1-5 from final_graph_reshaping below.
 static void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &fpu ) {
 
+  if ( n->outcnt() == 0 ) return; // dead node
   uint nop = n->Opcode();
 
   // Check for 2-input instruction with "last use" on right input.
@@ -1908,7 +1909,7 @@
     break;
   case Op_Opaque1:              // Remove Opaque Nodes before matching
   case Op_Opaque2:              // Remove Opaque Nodes before matching
-    n->replace_by(n->in(1));
+    n->subsume_by(n->in(1));
     break;
   case Op_CallStaticJava:
   case Op_CallJava:
@@ -2001,24 +2002,34 @@
 
 #ifdef _LP64
   case Op_CmpP:
-    if( n->in(1)->Opcode() == Op_DecodeN ) {
+    // Do this transformation here to preserve CmpPNode::sub() and
+    // other TypePtr related Ideal optimizations (for example, ptr nullness).
+    if( n->in(1)->is_DecodeN() ) {
       Compile* C = Compile::current();
       Node* in2 = NULL;
-      if( n->in(2)->Opcode() == Op_DecodeN ) {
+      if( n->in(2)->is_DecodeN() ) {
         in2 = n->in(2)->in(1);
       } else if ( n->in(2)->Opcode() == Op_ConP ) {
         const Type* t = n->in(2)->bottom_type();
         if (t == TypePtr::NULL_PTR) {
           Node *in1 = n->in(1);
-          uint i = 0;
-          for (; i < in1->outcnt(); i++) {
-            if (in1->raw_out(i)->is_AddP())
-              break;
-          }
-          if (i >= in1->outcnt()) {
+          if (Matcher::clone_shift_expressions) {
+            // x86, ARM and friends can handle 2 adds in addressing mode.
+            // Decode a narrow oop and do implicit NULL check in address
+            // [R12 + narrow_oop_reg<<3 + offset]
+            in2 = ConNode::make(C, TypeNarrowOop::NULL_PTR);
+          } else {
             // Don't replace CmpP(o ,null) if 'o' is used in AddP
-            // to generate implicit NULL check.
-            in2 = ConNode::make(C, TypeNarrowOop::NULL_PTR);
+            // to generate implicit NULL check on Sparc where
+            // narrow oops can't be used in address.
+            uint i = 0;
+            for (; i < in1->outcnt(); i++) {
+              if (in1->raw_out(i)->is_AddP())
+                break;
+            }
+            if (i >= in1->outcnt()) {
+              in2 = ConNode::make(C, TypeNarrowOop::NULL_PTR);
+            }
           }
         } else if (t->isa_oopptr()) {
           in2 = ConNode::make(C, t->is_oopptr()->make_narrowoop());
@@ -2026,7 +2037,7 @@
       }
       if( in2 != NULL ) {
         Node* cmpN = new (C, 3) CmpNNode(n->in(1)->in(1), in2);
-        n->replace_by( cmpN );
+        n->subsume_by( cmpN );
       }
     }
 #endif
@@ -2040,13 +2051,13 @@
         Compile* C = Compile::current();
         if (Matcher::has_match_rule(Op_DivModI)) {
           DivModINode* divmod = DivModINode::make(C, n);
-          d->replace_by(divmod->div_proj());
-          n->replace_by(divmod->mod_proj());
+          d->subsume_by(divmod->div_proj());
+          n->subsume_by(divmod->mod_proj());
         } else {
           // replace a%b with a-((a/b)*b)
           Node* mult = new (C, 3) MulINode(d, d->in(2));
           Node* sub  = new (C, 3) SubINode(d->in(1), mult);
-          n->replace_by( sub );
+          n->subsume_by( sub );
         }
       }
     }
@@ -2061,13 +2072,13 @@
         Compile* C = Compile::current();
         if (Matcher::has_match_rule(Op_DivModL)) {
           DivModLNode* divmod = DivModLNode::make(C, n);
-          d->replace_by(divmod->div_proj());
-          n->replace_by(divmod->mod_proj());
+          d->subsume_by(divmod->div_proj());
+          n->subsume_by(divmod->mod_proj());
         } else {
           // replace a%b with a-((a/b)*b)
           Node* mult = new (C, 3) MulLNode(d, d->in(2));
           Node* sub  = new (C, 3) SubLNode(d->in(1), mult);
-          n->replace_by( sub );
+          n->subsume_by( sub );
         }
       }
     }
@@ -2113,7 +2124,7 @@
       // Replace many operand PackNodes with a binary tree for matching
       PackNode* p = (PackNode*) n;
       Node* btp = p->binaryTreePack(Compile::current(), 1, n->req());
-      n->replace_by(btp);
+      n->subsume_by(btp);
     }
     break;
   default:
--- a/src/share/vm/opto/connode.cpp	Wed May 28 21:06:24 2008 -0700
+++ b/src/share/vm/opto/connode.cpp	Thu May 29 12:04:14 2008 -0700
@@ -35,7 +35,6 @@
 
 //------------------------------make-------------------------------------------
 ConNode *ConNode::make( Compile* C, const Type *t ) {
-  if (t->isa_narrowoop()) return new (C, 1) ConNNode( t->is_narrowoop() );
   switch( t->basic_type() ) {
   case T_INT:       return new (C, 1) ConINode( t->is_int() );
   case T_LONG:      return new (C, 1) ConLNode( t->is_long() );
@@ -45,6 +44,7 @@
   case T_OBJECT:    return new (C, 1) ConPNode( t->is_oopptr() );
   case T_ARRAY:     return new (C, 1) ConPNode( t->is_aryptr() );
   case T_ADDRESS:   return new (C, 1) ConPNode( t->is_ptr() );
+  case T_NARROWOOP: return new (C, 1) ConNNode( t->is_narrowoop() );
     // Expected cases:  TypePtr::NULL_PTR, any is_rawptr()
     // Also seen: AnyPtr(TopPTR *+top); from command line:
     //   r -XX:+PrintOpto -XX:CIStart=285 -XX:+CompileTheWorld -XX:CompileTheWorldStartAt=660
@@ -557,7 +557,7 @@
   const Type *t = phase->type( in(1) );
   if( t == Type::TOP ) return in(1);
 
-  if (in(1)->Opcode() == Op_EncodeP) {
+  if (in(1)->is_EncodeP()) {
     // (DecodeN (EncodeP p)) -> p
     return in(1)->in(1);
   }
@@ -572,7 +572,7 @@
 }
 
 Node* DecodeNNode::decode(PhaseTransform* phase, Node* value) {
-  if (value->Opcode() == Op_EncodeP) {
+  if (value->is_EncodeP()) {
     // (DecodeN (EncodeP p)) -> p
     return value->in(1);
   }
@@ -591,7 +591,7 @@
   const Type *t = phase->type( in(1) );
   if( t == Type::TOP ) return in(1);
 
-  if (in(1)->Opcode() == Op_DecodeN) {
+  if (in(1)->is_DecodeN()) {
     // (EncodeP (DecodeN p)) -> p
     return in(1)->in(1);
   }
@@ -606,7 +606,7 @@
 }
 
 Node* EncodePNode::encode(PhaseTransform* phase, Node* value) {
-  if (value->Opcode() == Op_DecodeN) {
+  if (value->is_DecodeN()) {
     // (EncodeP (DecodeN p)) -> p
     return value->in(1);
   }
--- a/src/share/vm/opto/connode.hpp	Wed May 28 21:06:24 2008 -0700
+++ b/src/share/vm/opto/connode.hpp	Thu May 29 12:04:14 2008 -0700
@@ -271,6 +271,7 @@
  public:
   EncodePNode(Node* value, const Type* type):
     TypeNode(type, 2) {
+    init_class_id(Class_EncodeP);
     init_req(0, NULL);
     init_req(1, value);
   }
@@ -291,6 +292,7 @@
  public:
   DecodeNNode(Node* value, const Type* type):
     TypeNode(type, 2) {
+    init_class_id(Class_DecodeN);
     init_req(0, NULL);
     init_req(1, value);
   }
--- a/src/share/vm/opto/escape.cpp	Wed May 28 21:06:24 2008 -0700
+++ b/src/share/vm/opto/escape.cpp	Thu May 29 12:04:14 2008 -0700
@@ -428,7 +428,7 @@
   if (base->is_top()) { // The AddP case #3 and #6.
     base = addp->in(AddPNode::Address)->uncast();
     assert(base->Opcode() == Op_ConP || base->Opcode() == Op_ThreadLocal ||
-           base->Opcode() == Op_CastX2P || base->Opcode() == Op_DecodeN ||
+           base->Opcode() == Op_CastX2P || base->is_DecodeN() ||
            (base->is_Mem() && base->bottom_type() == TypeRawPtr::NOTNULL) ||
            (base->is_Proj() && base->in(0)->is_Allocate()), "sanity");
   }
@@ -943,8 +943,8 @@
       tinst = igvn->type(base)->isa_oopptr();
     } else if (n->is_Phi() ||
                n->is_CheckCastPP() ||
-               n->Opcode() == Op_EncodeP ||
-               n->Opcode() == Op_DecodeN ||
+               n->is_EncodeP() ||
+               n->is_DecodeN() ||
                (n->is_ConstraintCast() && n->Opcode() == Op_CastPP)) {
       if (visited.test_set(n->_idx)) {
         assert(n->is_Phi(), "loops only through Phi's");
@@ -1016,8 +1016,8 @@
         alloc_worklist.append_if_missing(use);
       } else if (use->is_Phi() ||
                  use->is_CheckCastPP() ||
-                 use->Opcode() == Op_EncodeP ||
-                 use->Opcode() == Op_DecodeN ||
+                 use->is_EncodeP() ||
+                 use->is_DecodeN() ||
                  (use->is_ConstraintCast() && use->Opcode() == Op_CastPP)) {
         alloc_worklist.append_if_missing(use);
       }
--- a/src/share/vm/opto/macro.cpp	Wed May 28 21:06:24 2008 -0700
+++ b/src/share/vm/opto/macro.cpp	Thu May 29 12:04:14 2008 -0700
@@ -458,7 +458,7 @@
         }
       } else if (use->is_SafePoint()) {
         SafePointNode* sfpt = use->as_SafePoint();
-        if (sfpt->has_non_debug_use(res)) {
+        if (sfpt->is_Call() && sfpt->as_Call()->has_non_debug_use(res)) {
           // Object is passed as argument.
           DEBUG_ONLY(disq_node = use;)
           NOT_PRODUCT(fail_eliminate = "Object is passed as argument";)
--- a/src/share/vm/opto/matcher.cpp	Wed May 28 21:06:24 2008 -0700
+++ b/src/share/vm/opto/matcher.cpp	Thu May 29 12:04:14 2008 -0700
@@ -52,7 +52,7 @@
 #ifdef ASSERT
   _old2new_map(C->comp_arena()),
 #endif
-  _shared_constants(C->comp_arena()),
+  _shared_nodes(C->comp_arena()),
   _reduceOp(reduceOp), _leftOp(leftOp), _rightOp(rightOp),
   _swallowed(swallowed),
   _begin_inst_chain_rule(_BEGIN_INST_CHAIN_RULE),
@@ -1191,7 +1191,7 @@
   uint cnt = n->req();
   uint start = 1;
   if( mem != (Node*)1 ) start = MemNode::Memory+1;
-  if( n->Opcode() == Op_AddP ) {
+  if( n->is_AddP() ) {
     assert( mem == (Node*)1, "" );
     start = AddPNode::Base+1;
   }
@@ -1219,7 +1219,7 @@
   if( t->singleton() ) {
     // Never force constants into registers.  Allow them to match as
     // constants or registers.  Copies of the same value will share
-    // the same register.  See find_shared_constant.
+    // the same register.  See find_shared_node.
     return false;
   } else {                      // Not a constant
     // Stop recursion if they have different Controls.
@@ -1243,12 +1243,10 @@
       if( j == max_scan )       // No post-domination before scan end?
         return true;            // Then break the match tree up
     }
-
-    if (m->Opcode() == Op_DecodeN && m->outcnt() == 2) {
+    if (m->is_DecodeN() && Matcher::clone_shift_expressions) {
       // These are commonly used in address expressions and can
-      // efficiently fold into them in some cases but because they are
-      // consumed by AddP they commonly have two users.
-      if (m->raw_out(0) == m->raw_out(1) && m->raw_out(0)->Opcode() == Op_AddP) return false;
+      // efficiently fold into them on X64 in some cases.
+      return false;
     }
   }
 
@@ -1368,13 +1366,16 @@
 // which reduces the number of copies of a constant in the final
 // program.  The register allocator is free to split uses later to
 // split live ranges.
-MachNode* Matcher::find_shared_constant(Node* leaf, uint rule) {
-  if (!leaf->is_Con()) return NULL;
+MachNode* Matcher::find_shared_node(Node* leaf, uint rule) {
+  if (!leaf->is_Con() && !leaf->is_DecodeN()) return NULL;
 
   // See if this Con has already been reduced using this rule.
-  if (_shared_constants.Size() <= leaf->_idx) return NULL;
-  MachNode* last = (MachNode*)_shared_constants.at(leaf->_idx);
+  if (_shared_nodes.Size() <= leaf->_idx) return NULL;
+  MachNode* last = (MachNode*)_shared_nodes.at(leaf->_idx);
   if (last != NULL && rule == last->rule()) {
+    // Don't expect control change for DecodeN
+    if (leaf->is_DecodeN())
+      return last;
     // Get the new space root.
     Node* xroot = new_node(C->root());
     if (xroot == NULL) {
@@ -1420,9 +1421,9 @@
 MachNode *Matcher::ReduceInst( State *s, int rule, Node *&mem ) {
   assert( rule >= NUM_OPERANDS, "called with operand rule" );
 
-  MachNode* shared_con = find_shared_constant(s->_leaf, rule);
-  if (shared_con != NULL) {
-    return shared_con;
+  MachNode* shared_node = find_shared_node(s->_leaf, rule);
+  if (shared_node != NULL) {
+    return shared_node;
   }
 
   // Build the object to represent this state & prepare for recursive calls
@@ -1447,7 +1448,7 @@
     mach->ins_req(MemNode::Memory,mem);
 
   // If the _leaf is an AddP, insert the base edge
-  if( leaf->Opcode() == Op_AddP )
+  if( leaf->is_AddP() )
     mach->ins_req(AddPNode::Base,leaf->in(AddPNode::Base));
 
   uint num_proj = _proj_list.size();
@@ -1475,9 +1476,9 @@
     guarantee(_proj_list.size() == num_proj, "no allocation during spill generation");
   }
 
-  if (leaf->is_Con()) {
+  if (leaf->is_Con() || leaf->is_DecodeN()) {
     // Record the con for sharing
-    _shared_constants.map(leaf->_idx, ex);
+    _shared_nodes.map(leaf->_idx, ex);
   }
 
   return ex;
@@ -1826,7 +1827,7 @@
             Node *adr = m->in(AddPNode::Address);
 
             // Intel, ARM and friends can handle 2 adds in addressing mode
-            if( clone_shift_expressions && adr->Opcode() == Op_AddP &&
+            if( clone_shift_expressions && adr->is_AddP() &&
                 // AtomicAdd is not an addressing expression.
                 // Cheap to find it by looking for screwy base.
                 !adr->in(AddPNode::Base)->is_top() ) {
--- a/src/share/vm/opto/matcher.hpp	Wed May 28 21:06:24 2008 -0700
+++ b/src/share/vm/opto/matcher.hpp	Thu May 29 12:04:14 2008 -0700
@@ -48,7 +48,7 @@
   void ReduceOper( State *s, int newrule, Node *&mem, MachNode *mach );
 
   // If this node already matched using "rule", return the MachNode for it.
-  MachNode* find_shared_constant(Node* con, uint rule);
+  MachNode* find_shared_node(Node* n, uint rule);
 
   // Convert a dense opcode number to an expanded rule number
   const int *_reduceOp;
@@ -81,7 +81,7 @@
 
   Node_List &_proj_list;        // For Machine nodes killing many values
 
-  Node_Array _shared_constants;
+  Node_Array _shared_nodes;
 
   debug_only(Node_Array _old2new_map;)   // Map roots of ideal-trees to machine-roots
 
--- a/src/share/vm/opto/memnode.cpp	Wed May 28 21:06:24 2008 -0700
+++ b/src/share/vm/opto/memnode.cpp	Thu May 29 12:04:14 2008 -0700
@@ -1625,14 +1625,10 @@
     const TypeNarrowOop* narrowtype = tk->is_oopptr()->make_narrowoop();
     Node* load_klass = gvn.transform(new (C, 3) LoadNKlassNode(ctl, mem, adr, at, narrowtype));
     return DecodeNNode::decode(&gvn, load_klass);
-  } else
+  }
 #endif
-  {
-    assert(!adr_type->is_ptr_to_narrowoop(), "should have got back a narrow oop");
-    return new (C, 3) LoadKlassNode(ctl, mem, adr, at, tk);
-  }
-  ShouldNotReachHere();
-  return (LoadKlassNode*)NULL;
+  assert(!adr_type->is_ptr_to_narrowoop(), "should have got back a narrow oop");
+  return new (C, 3) LoadKlassNode(ctl, mem, adr, at, tk);
 }
 
 //------------------------------Value------------------------------------------
--- a/src/share/vm/opto/node.hpp	Wed May 28 21:06:24 2008 -0700
+++ b/src/share/vm/opto/node.hpp	Thu May 29 12:04:14 2008 -0700
@@ -53,6 +53,8 @@
 class ConNode;
 class CountedLoopNode;
 class CountedLoopEndNode;
+class DecodeNNode;
+class EncodePNode;
 class FastLockNode;
 class FastUnlockNode;
 class IfNode;
@@ -438,6 +440,12 @@
 public:
   // Globally replace this node by a given new node, updating all uses.
   void replace_by(Node* new_node);
+  // Globally replace this node by a given new node, updating all uses
+  // and cutting input edges of old node.
+  void subsume_by(Node* new_node) {
+    replace_by(new_node);
+    disconnect_inputs(NULL);
+  }
   void set_req_X( uint i, Node *n, PhaseIterGVN *igvn );
   // Find the one non-null required input.  RegionNode only
   Node *nonnull_req() const;
@@ -577,6 +585,8 @@
       DEFINE_CLASS_ID(CheckCastPP, Type, 2)
       DEFINE_CLASS_ID(CMove, Type, 3)
       DEFINE_CLASS_ID(SafePointScalarObject, Type, 4)
+      DEFINE_CLASS_ID(DecodeN, Type, 5)
+      DEFINE_CLASS_ID(EncodeP, Type, 6)
 
     DEFINE_CLASS_ID(Mem,   Node, 6)
       DEFINE_CLASS_ID(Load,  Mem, 0)
@@ -685,6 +695,8 @@
   DEFINE_CLASS_QUERY(Cmp)
   DEFINE_CLASS_QUERY(CountedLoop)
   DEFINE_CLASS_QUERY(CountedLoopEnd)
+  DEFINE_CLASS_QUERY(DecodeN)
+  DEFINE_CLASS_QUERY(EncodeP)
   DEFINE_CLASS_QUERY(FastLock)
   DEFINE_CLASS_QUERY(FastUnlock)
   DEFINE_CLASS_QUERY(If)