# HG changeset patch
# User ysr
# Date 1236989204 25200
# Node ID fe24415002812ab6abd4fd64817536fc19acce58
# Parent  09f82af55c3eb537afd1478711c48f6d77caeeda# Parent  4018e98c778a65bb22504e48717a4b1b5556c608
Merge

diff -r 09f82af55c3e -r fe2441500281 src/cpu/sparc/vm/assembler_sparc.cpp
--- a/src/cpu/sparc/vm/assembler_sparc.cpp	Fri Mar 13 13:56:01 2009 -0700
+++ b/src/cpu/sparc/vm/assembler_sparc.cpp	Fri Mar 13 17:06:44 2009 -0700
@@ -2615,6 +2615,158 @@
   }
 }
 
+RegisterConstant MacroAssembler::delayed_value(intptr_t* delayed_value_addr,
+                                               Register tmp,
+                                               int offset) {
+  intptr_t value = *delayed_value_addr;
+  if (value != 0)
+    return RegisterConstant(value + offset);
+
+  // load indirectly to solve generation ordering problem
+  Address a(tmp, (address) delayed_value_addr);
+  load_ptr_contents(a, tmp);
+
+#ifdef ASSERT
+  tst(tmp);
+  breakpoint_trap(zero, xcc);
+#endif
+
+  if (offset != 0)
+    add(tmp, offset, tmp);
+
+  return RegisterConstant(tmp);
+}
+
+
+void MacroAssembler::regcon_inc_ptr( RegisterConstant& dest, RegisterConstant src, Register temp ) {
+  assert(dest.register_or_noreg() != G0, "lost side effect");
+  if ((src.is_constant() && src.as_constant() == 0) ||
+      (src.is_register() && src.as_register() == G0)) {
+    // do nothing
+  } else if (dest.is_register()) {
+    add(dest.as_register(), ensure_rs2(src, temp), dest.as_register());
+  } else if (src.is_constant()) {
+    intptr_t res = dest.as_constant() + src.as_constant();
+    dest = RegisterConstant(res); // side effect seen by caller
+  } else {
+    assert(temp != noreg, "cannot handle constant += register");
+    add(src.as_register(), ensure_rs2(dest, temp), temp);
+    dest = RegisterConstant(temp); // side effect seen by caller
+  }
+}
+
+void MacroAssembler::regcon_sll_ptr( RegisterConstant& dest, RegisterConstant src, Register temp ) {
+  assert(dest.register_or_noreg() != G0, "lost side effect");
+  if (!is_simm13(src.constant_or_zero()))
+    src = (src.as_constant() & 0xFF);
+  if ((src.is_constant() && src.as_constant() == 0) ||
+      (src.is_register() && src.as_register() == G0)) {
+    // do nothing
+  } else if (dest.is_register()) {
+    sll_ptr(dest.as_register(), src, dest.as_register());
+  } else if (src.is_constant()) {
+    intptr_t res = dest.as_constant() << src.as_constant();
+    dest = RegisterConstant(res); // side effect seen by caller
+  } else {
+    assert(temp != noreg, "cannot handle constant <<= register");
+    set(dest.as_constant(), temp);
+    sll_ptr(temp, src, temp);
+    dest = RegisterConstant(temp); // side effect seen by caller
+  }
+}
+
+
+// Look up the method for a megamorphic invokeinterface call.
+// The target method is determined by <intf_klass, itable_index>.
+// The receiver klass is in recv_klass.
+// On success, the result will be in method_result, and execution falls through.
+// On failure, execution transfers to the given label.
+void MacroAssembler::lookup_interface_method(Register recv_klass,
+                                             Register intf_klass,
+                                             RegisterConstant itable_index,
+                                             Register method_result,
+                                             Register scan_temp,
+                                             Register sethi_temp,
+                                             Label& L_no_such_interface) {
+  assert_different_registers(recv_klass, intf_klass, method_result, scan_temp);
+  assert(itable_index.is_constant() || itable_index.as_register() == method_result,
+         "caller must use same register for non-constant itable index as for method");
+
+  // Compute start of first itableOffsetEntry (which is at the end of the vtable)
+  int vtable_base = instanceKlass::vtable_start_offset() * wordSize;
+  int scan_step   = itableOffsetEntry::size() * wordSize;
+  int vte_size    = vtableEntry::size() * wordSize;
+
+  lduw(recv_klass, instanceKlass::vtable_length_offset() * wordSize, scan_temp);
+  // %%% We should store the aligned, prescaled offset in the klassoop.
+  // Then the next several instructions would fold away.
+
+  int round_to_unit = ((HeapWordsPerLong > 1) ? BytesPerLong : 0);
+  int itb_offset = vtable_base;
+  if (round_to_unit != 0) {
+    // hoist first instruction of round_to(scan_temp, BytesPerLong):
+    itb_offset += round_to_unit - wordSize;
+  }
+  int itb_scale = exact_log2(vtableEntry::size() * wordSize);
+  sll(scan_temp, itb_scale,  scan_temp);
+  add(scan_temp, itb_offset, scan_temp);
+  if (round_to_unit != 0) {
+    // Round up to align_object_offset boundary
+    // see code for instanceKlass::start_of_itable!
+    // Was: round_to(scan_temp, BytesPerLong);
+    // Hoisted: add(scan_temp, BytesPerLong-1, scan_temp);
+    and3(scan_temp, -round_to_unit, scan_temp);
+  }
+  add(recv_klass, scan_temp, scan_temp);
+
+  // Adjust recv_klass by scaled itable_index, so we can free itable_index.
+  RegisterConstant itable_offset = itable_index;
+  regcon_sll_ptr(itable_offset, exact_log2(itableMethodEntry::size() * wordSize));
+  regcon_inc_ptr(itable_offset, itableMethodEntry::method_offset_in_bytes());
+  add(recv_klass, ensure_rs2(itable_offset, sethi_temp), recv_klass);
+
+  // for (scan = klass->itable(); scan->interface() != NULL; scan += scan_step) {
+  //   if (scan->interface() == intf) {
+  //     result = (klass + scan->offset() + itable_index);
+  //   }
+  // }
+  Label search, found_method;
+
+  for (int peel = 1; peel >= 0; peel--) {
+    // %%%% Could load both offset and interface in one ldx, if they were
+    // in the opposite order.  This would save a load.
+    ld_ptr(scan_temp, itableOffsetEntry::interface_offset_in_bytes(), method_result);
+
+    // Check that this entry is non-null.  A null entry means that
+    // the receiver class doesn't implement the interface, and wasn't the
+    // same as when the caller was compiled.
+    bpr(Assembler::rc_z, false, Assembler::pn, method_result, L_no_such_interface);
+    delayed()->cmp(method_result, intf_klass);
+
+    if (peel) {
+      brx(Assembler::equal,    false, Assembler::pt, found_method);
+    } else {
+      brx(Assembler::notEqual, false, Assembler::pn, search);
+      // (invert the test to fall through to found_method...)
+    }
+    delayed()->add(scan_temp, scan_step, scan_temp);
+
+    if (!peel)  break;
+
+    bind(search);
+  }
+
+  bind(found_method);
+
+  // Got a hit.
+  int ito_offset = itableOffsetEntry::offset_offset_in_bytes();
+  // scan_temp[-scan_step] points to the vtable offset we need
+  ito_offset -= scan_step;
+  lduw(scan_temp, ito_offset, scan_temp);
+  ld_ptr(recv_klass, scan_temp, method_result);
+}
+
+
 void MacroAssembler::biased_locking_enter(Register obj_reg, Register mark_reg,
                                           Register temp_reg,
                                           Label& done, Label* slow_case,
@@ -4057,6 +4209,24 @@
   card_table_write(bs->byte_map_base, tmp, store_addr);
 }
 
+// Loading values by size and signed-ness
+void MacroAssembler::load_sized_value(Register s1, RegisterConstant s2, Register d,
+                                      int size_in_bytes, bool is_signed) {
+  switch (size_in_bytes ^ (is_signed ? -1 : 0)) {
+  case ~8:  // fall through:
+  case  8:  ld_long( s1, s2, d ); break;
+  case ~4:  ldsw(    s1, s2, d ); break;
+  case  4:  lduw(    s1, s2, d ); break;
+  case ~2:  ldsh(    s1, s2, d ); break;
+  case  2:  lduh(    s1, s2, d ); break;
+  case ~1:  ldsb(    s1, s2, d ); break;
+  case  1:  ldub(    s1, s2, d ); break;
+  default:  ShouldNotReachHere();
+  }
+}
+
+
+
 void MacroAssembler::load_klass(Register src_oop, Register klass) {
   // The number of bytes in this code is used by
   // MachCallDynamicJavaNode::ret_addr_offset()
diff -r 09f82af55c3e -r fe2441500281 src/cpu/sparc/vm/assembler_sparc.hpp
--- a/src/cpu/sparc/vm/assembler_sparc.hpp	Fri Mar 13 13:56:01 2009 -0700
+++ b/src/cpu/sparc/vm/assembler_sparc.hpp	Fri Mar 13 17:06:44 2009 -0700
@@ -384,6 +384,12 @@
 
   inline bool is_simm13(int offset = 0);  // check disp+offset for overflow
 
+  Address plus_disp(int disp) const {     // bump disp by a small amount
+    Address a = (*this);
+    a._disp += disp;
+    return a;
+  }
+
   Address split_disp() const {            // deal with disp overflow
     Address a = (*this);
     int hi_disp = _disp & ~0x3ff;
@@ -1082,6 +1088,7 @@
   inline void add(    Register s1, Register s2, Register d );
   inline void add(    Register s1, int simm13a, Register d, relocInfo::relocType rtype = relocInfo::none);
   inline void add(    Register s1, int simm13a, Register d, RelocationHolder const& rspec);
+  inline void add(    Register s1, RegisterConstant s2, Register d, int offset = 0);
   inline void add(    const Address&  a,              Register d, int offset = 0);
 
   void addcc(  Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(add_op3  | cc_bit_op3) | rs1(s1) | rs2(s2) ); }
@@ -1298,6 +1305,16 @@
   inline void ld(   const Address& a, Register d, int offset = 0 );
   inline void ldd(  const Address& a, Register d, int offset = 0 );
 
+  inline void ldub(  Register s1, RegisterConstant s2, Register d );
+  inline void ldsb(  Register s1, RegisterConstant s2, Register d );
+  inline void lduh(  Register s1, RegisterConstant s2, Register d );
+  inline void ldsh(  Register s1, RegisterConstant s2, Register d );
+  inline void lduw(  Register s1, RegisterConstant s2, Register d );
+  inline void ldsw(  Register s1, RegisterConstant s2, Register d );
+  inline void ldx(   Register s1, RegisterConstant s2, Register d );
+  inline void ld(    Register s1, RegisterConstant s2, Register d );
+  inline void ldd(   Register s1, RegisterConstant s2, Register d );
+
   // pp 177
 
   void ldsba(  Register s1, Register s2, int ia, Register d ) {             emit_long( op(ldst_op) | rd(d) | op3(ldsb_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
@@ -1518,6 +1535,13 @@
   inline void st(   Register d, const Address& a, int offset = 0 );
   inline void std(  Register d, const Address& a, int offset = 0 );
 
+  inline void stb(  Register d, Register s1, RegisterConstant s2 );
+  inline void sth(  Register d, Register s1, RegisterConstant s2 );
+  inline void stw(  Register d, Register s1, RegisterConstant s2 );
+  inline void stx(  Register d, Register s1, RegisterConstant s2 );
+  inline void std(  Register d, Register s1, RegisterConstant s2 );
+  inline void st(   Register d, Register s1, RegisterConstant s2 );
+
   // pp 177
 
   void stba(  Register d, Register s1, Register s2, int ia ) {             emit_long( op(ldst_op) | rd(d) | op3(stb_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2) ); }
@@ -1835,6 +1859,7 @@
   // Functions for isolating 64 bit shifts for LP64
   inline void sll_ptr( Register s1, Register s2, Register d );
   inline void sll_ptr( Register s1, int imm6a,   Register d );
+  inline void sll_ptr( Register s1, RegisterConstant s2, Register d );
   inline void srl_ptr( Register s1, Register s2, Register d );
   inline void srl_ptr( Register s1, int imm6a,   Register d );
 
@@ -1940,20 +1965,47 @@
   // st_ptr will perform st for 32 bit VM's and stx for 64 bit VM's
   inline void ld_ptr(   Register s1, Register s2, Register d );
   inline void ld_ptr(   Register s1, int simm13a, Register d);
+  inline void ld_ptr(   Register s1, RegisterConstant s2, Register d );
   inline void ld_ptr(  const Address& a, Register d, int offset = 0 );
   inline void st_ptr(  Register d, Register s1, Register s2 );
   inline void st_ptr(  Register d, Register s1, int simm13a);
+  inline void st_ptr(  Register d, Register s1, RegisterConstant s2 );
   inline void st_ptr(  Register d, const Address& a, int offset = 0 );
 
   // ld_long will perform ld for 32 bit VM's and ldx for 64 bit VM's
   // st_long will perform st for 32 bit VM's and stx for 64 bit VM's
   inline void ld_long( Register s1, Register s2, Register d );
   inline void ld_long( Register s1, int simm13a, Register d );
+  inline void ld_long( Register s1, RegisterConstant s2, Register d );
   inline void ld_long( const Address& a, Register d, int offset = 0 );
   inline void st_long( Register d, Register s1, Register s2 );
   inline void st_long( Register d, Register s1, int simm13a );
+  inline void st_long( Register d, Register s1, RegisterConstant s2 );
   inline void st_long( Register d, const Address& a, int offset = 0 );
 
+  // Loading values by size and signed-ness
+  void load_sized_value(Register s1, RegisterConstant s2, Register d,
+                        int size_in_bytes, bool is_signed);
+
+  // Helpers for address formation.
+  // They update the dest in place, whether it is a register or constant.
+  // They emit no code at all if src is a constant zero.
+  // If dest is a constant and src is a register, the temp argument
+  // is required, and becomes the result.
+  // If dest is a register and src is a non-simm13 constant,
+  // the temp argument is required, and is used to materialize the constant.
+  void regcon_inc_ptr( RegisterConstant& dest, RegisterConstant src,
+                       Register temp = noreg );
+  void regcon_sll_ptr( RegisterConstant& dest, RegisterConstant src,
+                       Register temp = noreg );
+  RegisterConstant ensure_rs2(RegisterConstant rs2, Register sethi_temp) {
+    guarantee(sethi_temp != noreg, "constant offset overflow");
+    if (is_simm13(rs2.constant_or_zero()))
+      return rs2;               // register or short constant
+    set(rs2.as_constant(), sethi_temp);
+    return sethi_temp;
+  }
+
   // --------------------------------------------------
 
  public:
@@ -2267,6 +2319,14 @@
   );
   void tlab_refill(Label& retry_tlab, Label& try_eden, Label& slow_case);
 
+  // interface method calling
+  void lookup_interface_method(Register recv_klass,
+                               Register intf_klass,
+                               RegisterConstant itable_index,
+                               Register method_result,
+                               Register temp_reg, Register temp2_reg,
+                               Label& no_such_interface);
+
   // Stack overflow checking
 
   // Note: this clobbers G3_scratch
@@ -2281,6 +2341,8 @@
   // stack overflow + shadow pages.  Clobbers tsp and scratch registers.
   void bang_stack_size(Register Rsize, Register Rtsp, Register Rscratch);
 
+  virtual RegisterConstant delayed_value(intptr_t* delayed_value_addr, Register tmp, int offset);
+
   void verify_tlab();
 
   Condition negate_condition(Condition cond);
diff -r 09f82af55c3e -r fe2441500281 src/cpu/sparc/vm/assembler_sparc.inline.hpp
--- a/src/cpu/sparc/vm/assembler_sparc.inline.hpp	Fri Mar 13 13:56:01 2009 -0700
+++ b/src/cpu/sparc/vm/assembler_sparc.inline.hpp	Fri Mar 13 17:06:44 2009 -0700
@@ -143,6 +143,49 @@
 inline void Assembler::ld(  Register s1, int simm13a, Register d) { lduw( s1, simm13a, d); }
 #endif
 
+inline void Assembler::ldub(  Register s1, RegisterConstant s2, Register d) {
+  if (s2.is_register())  ldsb(s1, s2.as_register(), d);
+  else                   ldsb(s1, s2.as_constant(), d);
+}
+inline void Assembler::ldsb(  Register s1, RegisterConstant s2, Register d) {
+  if (s2.is_register())  ldsb(s1, s2.as_register(), d);
+  else                   ldsb(s1, s2.as_constant(), d);
+}
+inline void Assembler::lduh(  Register s1, RegisterConstant s2, Register d) {
+  if (s2.is_register())  ldsh(s1, s2.as_register(), d);
+  else                   ldsh(s1, s2.as_constant(), d);
+}
+inline void Assembler::ldsh(  Register s1, RegisterConstant s2, Register d) {
+  if (s2.is_register())  ldsh(s1, s2.as_register(), d);
+  else                   ldsh(s1, s2.as_constant(), d);
+}
+inline void Assembler::lduw(  Register s1, RegisterConstant s2, Register d) {
+  if (s2.is_register())  ldsw(s1, s2.as_register(), d);
+  else                   ldsw(s1, s2.as_constant(), d);
+}
+inline void Assembler::ldsw(  Register s1, RegisterConstant s2, Register d) {
+  if (s2.is_register())  ldsw(s1, s2.as_register(), d);
+  else                   ldsw(s1, s2.as_constant(), d);
+}
+inline void Assembler::ldx(   Register s1, RegisterConstant s2, Register d) {
+  if (s2.is_register())  ldx(s1, s2.as_register(), d);
+  else                   ldx(s1, s2.as_constant(), d);
+}
+inline void Assembler::ld(    Register s1, RegisterConstant s2, Register d) {
+  if (s2.is_register())  ld(s1, s2.as_register(), d);
+  else                   ld(s1, s2.as_constant(), d);
+}
+inline void Assembler::ldd(   Register s1, RegisterConstant s2, Register d) {
+  if (s2.is_register())  ldd(s1, s2.as_register(), d);
+  else                   ldd(s1, s2.as_constant(), d);
+}
+
+// form effective addresses this way:
+inline void Assembler::add(   Register s1, RegisterConstant s2, Register d, int offset) {
+  if (s2.is_register())  add(s1, s2.as_register(), d);
+  else                 { add(s1, s2.as_constant() + offset, d); offset = 0; }
+  if (offset != 0)       add(d,  offset,                    d);
+}
 
 inline void Assembler::ld(   const Address& a, Register d, int offset ) { relocate(a.rspec(offset)); ld(   a.base(), a.disp() + offset, d ); }
 inline void Assembler::ldsb( const Address& a, Register d, int offset ) { relocate(a.rspec(offset)); ldsb( a.base(), a.disp() + offset, d ); }
@@ -200,6 +243,27 @@
 inline void Assembler::st(  Register d, Register s1, Register s2) { stw(d, s1, s2); }
 inline void Assembler::st(  Register d, Register s1, int simm13a) { stw(d, s1, simm13a); }
 
+inline void Assembler::stb(  Register d, Register s1, RegisterConstant s2) {
+  if (s2.is_register())  stb(d, s1, s2.as_register());
+  else                   stb(d, s1, s2.as_constant());
+}
+inline void Assembler::sth(  Register d, Register s1, RegisterConstant s2) {
+  if (s2.is_register())  sth(d, s1, s2.as_register());
+  else                   sth(d, s1, s2.as_constant());
+}
+inline void Assembler::stx(  Register d, Register s1, RegisterConstant s2) {
+  if (s2.is_register())  stx(d, s1, s2.as_register());
+  else                   stx(d, s1, s2.as_constant());
+}
+inline void Assembler::std( Register d, Register s1, RegisterConstant s2) {
+  if (s2.is_register())  std(d, s1, s2.as_register());
+  else                   std(d, s1, s2.as_constant());
+}
+inline void Assembler::st(  Register d, Register s1, RegisterConstant s2) {
+  if (s2.is_register())  st(d, s1, s2.as_register());
+  else                   st(d, s1, s2.as_constant());
+}
+
 inline void Assembler::stb( Register d, const Address& a, int offset) { relocate(a.rspec(offset)); stb( d, a.base(), a.disp() + offset); }
 inline void Assembler::sth( Register d, const Address& a, int offset) { relocate(a.rspec(offset)); sth( d, a.base(), a.disp() + offset); }
 inline void Assembler::stw( Register d, const Address& a, int offset) { relocate(a.rspec(offset)); stw( d, a.base(), a.disp() + offset); }
@@ -244,6 +308,14 @@
 #endif
 }
 
+inline void MacroAssembler::ld_ptr( Register s1, RegisterConstant s2, Register d ) {
+#ifdef _LP64
+  Assembler::ldx( s1, s2, d);
+#else
+  Assembler::ld(  s1, s2, d);
+#endif
+}
+
 inline void MacroAssembler::ld_ptr( const Address& a, Register d, int offset ) {
 #ifdef _LP64
   Assembler::ldx(  a, d, offset );
@@ -268,6 +340,14 @@
 #endif
 }
 
+inline void MacroAssembler::st_ptr( Register d, Register s1, RegisterConstant s2 ) {
+#ifdef _LP64
+  Assembler::stx( d, s1, s2);
+#else
+  Assembler::st( d, s1, s2);
+#endif
+}
+
 inline void MacroAssembler::st_ptr(  Register d, const Address& a, int offset) {
 #ifdef _LP64
   Assembler::stx(  d, a, offset);
@@ -293,6 +373,14 @@
 #endif
 }
 
+inline void MacroAssembler::ld_long( Register s1, RegisterConstant s2, Register d ) {
+#ifdef _LP64
+  Assembler::ldx(s1, s2, d);
+#else
+  Assembler::ldd(s1, s2, d);
+#endif
+}
+
 inline void MacroAssembler::ld_long( const Address& a, Register d, int offset ) {
 #ifdef _LP64
   Assembler::ldx(a, d, offset );
@@ -317,6 +405,14 @@
 #endif
 }
 
+inline void MacroAssembler::st_long( Register d, Register s1, RegisterConstant s2 ) {
+#ifdef _LP64
+  Assembler::stx(d, s1, s2);
+#else
+  Assembler::std(d, s1, s2);
+#endif
+}
+
 inline void MacroAssembler::st_long( Register d, const Address& a, int offset ) {
 #ifdef _LP64
   Assembler::stx(d, a, offset);
@@ -359,6 +455,11 @@
 #endif
 }
 
+inline void MacroAssembler::sll_ptr( Register s1, RegisterConstant s2, Register d ) {
+  if (s2.is_register())  sll_ptr(s1, s2.as_register(), d);
+  else                   sll_ptr(s1, s2.as_constant(), d);
+}
+
 // Use the right branch for the platform
 
 inline void MacroAssembler::br( Condition c, bool a, Predict p, address d, relocInfo::relocType rt ) {
diff -r 09f82af55c3e -r fe2441500281 src/cpu/sparc/vm/sparc.ad
--- a/src/cpu/sparc/vm/sparc.ad	Fri Mar 13 13:56:01 2009 -0700
+++ b/src/cpu/sparc/vm/sparc.ad	Fri Mar 13 17:06:44 2009 -0700
@@ -1,5 +1,5 @@
 //
-// Copyright 1998-2008 Sun Microsystems, Inc.  All Rights Reserved.
+// Copyright 1998-2009 Sun Microsystems, Inc.  All Rights Reserved.
 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 //
 // This code is free software; you can redistribute it and/or modify it
@@ -5286,55 +5286,91 @@
   ins_cost(MEMORY_REF_COST);
 
   size(4);
-  format %{ "LDSB   $mem,$dst" %}
+  format %{ "LDSB   $mem,$dst\t! byte" %}
+  opcode(Assembler::ldsb_op3);
+  ins_encode(simple_form3_mem_reg( mem, dst ) );
+  ins_pipe(iload_mask_mem);
+%}
+
+// Load Byte (8bit signed) into a Long Register
+instruct loadB2L(iRegL dst, memory mem) %{
+  match(Set dst (ConvI2L (LoadB mem)));
+  ins_cost(MEMORY_REF_COST);
+
+  size(4);
+  format %{ "LDSB   $mem,$dst\t! byte -> long" %}
   opcode(Assembler::ldsb_op3);
   ins_encode(simple_form3_mem_reg( mem, dst ) );
   ins_pipe(iload_mask_mem);
 %}
 
-// Load Byte (8bit UNsigned) into an int reg
-instruct loadUB(iRegI dst, memory mem, immI_255 bytemask) %{
-  match(Set dst (AndI (LoadB mem) bytemask));
+// Load Unsigned Byte (8bit UNsigned) into an int reg
+instruct loadUB(iRegI dst, memory mem) %{
+  match(Set dst (LoadUB mem));
   ins_cost(MEMORY_REF_COST);
 
   size(4);
-  format %{ "LDUB   $mem,$dst" %}
+  format %{ "LDUB   $mem,$dst\t! ubyte" %}
+  opcode(Assembler::ldub_op3);
+  ins_encode(simple_form3_mem_reg( mem, dst ) );
+  ins_pipe(iload_mask_mem);
+%}
+
+// Load Unsigned Byte (8bit UNsigned) into a Long Register
+instruct loadUB2L(iRegL dst, memory mem) %{
+  match(Set dst (ConvI2L (LoadUB mem)));
+  ins_cost(MEMORY_REF_COST);
+
+  size(4);
+  format %{ "LDUB   $mem,$dst\t! ubyte -> long" %}
   opcode(Assembler::ldub_op3);
   ins_encode(simple_form3_mem_reg( mem, dst ) );
   ins_pipe(iload_mask_mem);
 %}
 
-// Load Byte (8bit UNsigned) into a Long Register
-instruct loadUBL(iRegL dst, memory mem, immL_FF bytemask) %{
-  match(Set dst (AndL (ConvI2L (LoadB mem)) bytemask));
+// Load Short (16bit signed)
+instruct loadS(iRegI dst, memory mem) %{
+  match(Set dst (LoadS mem));
+  ins_cost(MEMORY_REF_COST);
+
+  size(4);
+  format %{ "LDSH   $mem,$dst\t! short" %}
+  opcode(Assembler::ldsh_op3);
+  ins_encode(simple_form3_mem_reg( mem, dst ) );
+  ins_pipe(iload_mask_mem);
+%}
+
+// Load Short (16bit signed) into a Long Register
+instruct loadS2L(iRegL dst, memory mem) %{
+  match(Set dst (ConvI2L (LoadS mem)));
   ins_cost(MEMORY_REF_COST);
 
   size(4);
-  format %{ "LDUB   $mem,$dst" %}
-  opcode(Assembler::ldub_op3);
+  format %{ "LDSH   $mem,$dst\t! short -> long" %}
+  opcode(Assembler::ldsh_op3);
+  ins_encode(simple_form3_mem_reg( mem, dst ) );
+  ins_pipe(iload_mask_mem);
+%}
+
+// Load Unsigned Short/Char (16bit UNsigned)
+instruct loadUS(iRegI dst, memory mem) %{
+  match(Set dst (LoadUS mem));
+  ins_cost(MEMORY_REF_COST);
+
+  size(4);
+  format %{ "LDUH   $mem,$dst\t! ushort/char" %}
+  opcode(Assembler::lduh_op3);
   ins_encode(simple_form3_mem_reg( mem, dst ) );
   ins_pipe(iload_mask_mem);
 %}
 
 // Load Unsigned Short/Char (16bit UNsigned) into a Long Register
-instruct loadUS2L(iRegL dst, memory mem, immL_FFFF bytemask) %{
-  match(Set dst (AndL (ConvI2L (LoadUS mem)) bytemask));
+instruct loadUS2L(iRegL dst, memory mem) %{
+  match(Set dst (ConvI2L (LoadUS mem)));
   ins_cost(MEMORY_REF_COST);
 
   size(4);
-  format %{ "LDUH   $mem,$dst" %}
-  opcode(Assembler::lduh_op3);
-  ins_encode(simple_form3_mem_reg( mem, dst ) );
-  ins_pipe(iload_mask_mem);
-%}
-
-// Load Unsigned Short/Char (16bit unsigned)
-instruct loadUS(iRegI dst, memory mem) %{
-  match(Set dst (LoadUS mem));
-  ins_cost(MEMORY_REF_COST);
-
-  size(4);
-  format %{ "LDUH   $mem,$dst" %}
+  format %{ "LDUH   $mem,$dst\t! ushort/char -> long" %}
   opcode(Assembler::lduh_op3);
   ins_encode(simple_form3_mem_reg( mem, dst ) );
   ins_pipe(iload_mask_mem);
@@ -5344,9 +5380,33 @@
 instruct loadI(iRegI dst, memory mem) %{
   match(Set dst (LoadI mem));
   ins_cost(MEMORY_REF_COST);
-  size(4);
-
-  format %{ "LDUW   $mem,$dst" %}
+
+  size(4);
+  format %{ "LDUW   $mem,$dst\t! int" %}
+  opcode(Assembler::lduw_op3);
+  ins_encode(simple_form3_mem_reg( mem, dst ) );
+  ins_pipe(iload_mem);
+%}
+
+// Load Integer into a Long Register
+instruct loadI2L(iRegL dst, memory mem) %{
+  match(Set dst (ConvI2L (LoadI mem)));
+  ins_cost(MEMORY_REF_COST);
+
+  size(4);
+  format %{ "LDSW   $mem,$dst\t! int -> long" %}
+  opcode(Assembler::ldsw_op3);
+  ins_encode(simple_form3_mem_reg( mem, dst ) );
+  ins_pipe(iload_mem);
+%}
+
+// Load Unsigned Integer into a Long Register
+instruct loadUI2L(iRegL dst, memory mem) %{
+  match(Set dst (LoadUI2L mem));
+  ins_cost(MEMORY_REF_COST);
+
+  size(4);
+  format %{ "LDUW   $mem,$dst\t! uint -> long" %}
   opcode(Assembler::lduw_op3);
   ins_encode(simple_form3_mem_reg( mem, dst ) );
   ins_pipe(iload_mem);
@@ -5356,6 +5416,7 @@
 instruct loadL(iRegL dst, memory mem ) %{
   match(Set dst (LoadL mem));
   ins_cost(MEMORY_REF_COST);
+
   size(4);
   format %{ "LDX    $mem,$dst\t! long" %}
   opcode(Assembler::ldx_op3);
@@ -5471,13 +5532,11 @@
 
    format %{ "LDUW   $mem,$dst\t! compressed ptr" %}
    ins_encode %{
-     Register base = as_Register($mem$$base);
-     Register index = as_Register($mem$$index);
-     Register dst = $dst$$Register;
+     Register index = $mem$$index$$Register;
      if (index != G0) {
-       __ lduw(base, index, dst);
+       __ lduw($mem$$base$$Register, index, $dst$$Register);
      } else {
-       __ lduw(base, $mem$$disp, dst);
+       __ lduw($mem$$base$$Register, $mem$$disp, $dst$$Register);
      }
    %}
    ins_pipe(iload_mem);
@@ -5521,18 +5580,6 @@
   ins_pipe(iload_mem);
 %}
 
-// Load Short (16bit signed)
-instruct loadS(iRegI dst, memory mem) %{
-  match(Set dst (LoadS mem));
-  ins_cost(MEMORY_REF_COST);
-
-  size(4);
-  format %{ "LDSH   $mem,$dst" %}
-  opcode(Assembler::ldsh_op3);
-  ins_encode(simple_form3_mem_reg( mem, dst ) );
-  ins_pipe(iload_mask_mem);
-%}
-
 // Load Double
 instruct loadD(regD dst, memory mem) %{
   match(Set dst (LoadD mem));
diff -r 09f82af55c3e -r fe2441500281 src/cpu/sparc/vm/vtableStubs_sparc.cpp
--- a/src/cpu/sparc/vm/vtableStubs_sparc.cpp	Fri Mar 13 13:56:01 2009 -0700
+++ b/src/cpu/sparc/vm/vtableStubs_sparc.cpp	Fri Mar 13 17:06:44 2009 -0700
@@ -106,6 +106,15 @@
   __ delayed()->nop();
 
   masm->flush();
+
+  if (PrintMiscellaneous && (WizardMode || Verbose)) {
+    tty->print_cr("vtable #%d at "PTR_FORMAT"[%d] left over: %d",
+                  vtable_index, s->entry_point(),
+                  (int)(s->code_end() - s->entry_point()),
+                  (int)(s->code_end() - __ pc()));
+  }
+  guarantee(__ pc() <= s->code_end(), "overflowed buffer");
+
   s->set_exception_points(npe_addr, ame_addr);
   return s;
 }
@@ -113,9 +122,9 @@
 
 // NOTE:  %%%% if any change is made to this stub make sure that the function
 //             pd_code_size_limit is changed to ensure the correct size for VtableStub
-VtableStub* VtableStubs::create_itable_stub(int vtable_index) {
+VtableStub* VtableStubs::create_itable_stub(int itable_index) {
   const int sparc_code_length = VtableStub::pd_code_size_limit(false);
-  VtableStub* s = new(sparc_code_length) VtableStub(false, vtable_index);
+  VtableStub* s = new(sparc_code_length) VtableStub(false, itable_index);
   ResourceMark rm;
   CodeBuffer cb(s->entry_point(), sparc_code_length);
   MacroAssembler* masm = new MacroAssembler(&cb);
@@ -139,7 +148,6 @@
   // are passed in the %o registers.  Instead, longs are passed in G1 and G4
   // and so those registers are not available here.
   __ save(SP,-frame::register_save_words*wordSize,SP);
-  Register I0_receiver = I0;    // Location of receiver after save
 
 #ifndef PRODUCT
   if (CountCompiledCalls) {
@@ -151,63 +159,31 @@
   }
 #endif /* PRODUCT */
 
-  // load start of itable entries into L0 register
-  const int base = instanceKlass::vtable_start_offset() * wordSize;
-  __ ld(Address(G3_klassOop, 0, instanceKlass::vtable_length_offset() * wordSize), L0);
-
-  // %%% Could store the aligned, prescaled offset in the klassoop.
-  __ sll(L0, exact_log2(vtableEntry::size() * wordSize), L0);
-  // see code for instanceKlass::start_of_itable!
-  const int vtable_alignment = align_object_offset(1);
-  assert(vtable_alignment == 1 || vtable_alignment == 2, "");
-  const int odd_bit = vtableEntry::size() * wordSize;
-  if (vtable_alignment == 2) {
-    __ and3(L0, odd_bit, L1);   // isolate the odd bit
-  }
-  __ add(G3_klassOop, L0, L0);
-  if (vtable_alignment == 2) {
-    __ add(L0, L1, L0);         // double the odd bit, to align up
-  }
+  Label throw_icce;
 
-  // Loop over all itable entries until desired interfaceOop (G5_interface) found
-  __ bind(search);
-
-  // %%%% Could load both offset and interface in one ldx, if they were
-  // in the opposite order.  This would save a load.
-  __ ld_ptr(L0, base + itableOffsetEntry::interface_offset_in_bytes(), L1);
-
-  // If the entry is NULL then we've reached the end of the table
-  // without finding the expected interface, so throw an exception
-  Label throw_icce;
-  __ bpr(Assembler::rc_z, false, Assembler::pn, L1, throw_icce);
-  __ delayed()->cmp(G5_interface, L1);
-  __ brx(Assembler::notEqual, true, Assembler::pn, search);
-  __ delayed()->add(L0, itableOffsetEntry::size() * wordSize, L0);
-
-  // entry found and L0 points to it, move offset of vtable for interface into L0
-  __ ld(L0, base + itableOffsetEntry::offset_offset_in_bytes(), L0);
-
-  // Compute itableMethodEntry and get methodOop(G5_method) and entrypoint(L0) for compiler
-  const int method_offset = (itableMethodEntry::size() * wordSize * vtable_index) + itableMethodEntry::method_offset_in_bytes();
-  __ add(G3_klassOop, L0, L1);
-  __ ld_ptr(L1, method_offset, G5_method);
+  Register L5_method = L5;
+  __ lookup_interface_method(// inputs: rec. class, interface, itable index
+                             G3_klassOop, G5_interface, itable_index,
+                             // outputs: method, scan temp. reg
+                             L5_method, L2, L3,
+                             throw_icce);
 
 #ifndef PRODUCT
   if (DebugVtables) {
     Label L01;
-    __ ld_ptr(L1, method_offset, G5_method);
-    __ bpr(Assembler::rc_nz, false, Assembler::pt, G5_method, L01);
+    __ bpr(Assembler::rc_nz, false, Assembler::pt, L5_method, L01);
     __ delayed()->nop();
     __ stop("methodOop is null");
     __ bind(L01);
-    __ verify_oop(G5_method);
+    __ verify_oop(L5_method);
   }
 #endif
 
   // If the following load is through a NULL pointer, we'll take an OS
   // exception that should translate into an AbstractMethodError.  We need the
   // window count to be correct at that time.
-  __ restore();                 // Restore registers BEFORE the AME point
+  __ restore(L5_method, 0, G5_method);
+  // Restore registers *before* the AME point.
 
   address ame_addr = __ pc();   // if the vtable entry is null, the method is abstract
   __ ld_ptr(G5_method, in_bytes(methodOopDesc::from_compiled_offset()), G3_scratch);
@@ -225,6 +201,12 @@
 
   masm->flush();
 
+  if (PrintMiscellaneous && (WizardMode || Verbose)) {
+    tty->print_cr("itable #%d at "PTR_FORMAT"[%d] left over: %d",
+                  itable_index, s->entry_point(),
+                  (int)(s->code_end() - s->entry_point()),
+                  (int)(s->code_end() - __ pc()));
+  }
   guarantee(__ pc() <= s->code_end(), "overflowed buffer");
 
   s->set_exception_points(npe_addr, ame_addr);
@@ -243,8 +225,7 @@
                         (UseCompressedOops ? 2*BytesPerInstWord : 0);
       return basic + slop;
     } else {
-      // save, ld, ld, sll, and, add, add, ld, cmp, br, add, ld, add, ld, ld, jmp, restore, sethi, jmpl, restore
-      const int basic = (20 LP64_ONLY(+ 6)) * BytesPerInstWord +
+      const int basic = (28 LP64_ONLY(+ 6)) * BytesPerInstWord +
                         // shift;add for load_klass
                         (UseCompressedOops ? 2*BytesPerInstWord : 0);
       return (basic + slop);
diff -r 09f82af55c3e -r fe2441500281 src/cpu/x86/vm/assembler_x86.cpp
--- a/src/cpu/x86/vm/assembler_x86.cpp	Fri Mar 13 13:56:01 2009 -0700
+++ b/src/cpu/x86/vm/assembler_x86.cpp	Fri Mar 13 17:06:44 2009 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright 1997-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 1997-2009 Sun Microsystems, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -129,13 +129,19 @@
 // Convert the raw encoding form into the form expected by the constructor for
 // Address.  An index of 4 (rsp) corresponds to having no index, so convert
 // that to noreg for the Address constructor.
-Address Address::make_raw(int base, int index, int scale, int disp) {
+Address Address::make_raw(int base, int index, int scale, int disp, bool disp_is_oop) {
+  RelocationHolder rspec;
+  if (disp_is_oop) {
+    rspec = Relocation::spec_simple(relocInfo::oop_type);
+  }
   bool valid_index = index != rsp->encoding();
   if (valid_index) {
     Address madr(as_Register(base), as_Register(index), (Address::ScaleFactor)scale, in_ByteSize(disp));
+    madr._rspec = rspec;
     return madr;
   } else {
     Address madr(as_Register(base), noreg, Address::no_scale, in_ByteSize(disp));
+    madr._rspec = rspec;
     return madr;
   }
 }
@@ -3892,6 +3898,21 @@
   emit_operand(src, dst);
 }
 
+void Assembler::movsbq(Register dst, Address src) {
+  InstructionMark im(this);
+  prefixq(src, dst);
+  emit_byte(0x0F);
+  emit_byte(0xBE);
+  emit_operand(dst, src);
+}
+
+void Assembler::movsbq(Register dst, Register src) {
+  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
+  emit_byte(0x0F);
+  emit_byte(0xBE);
+  emit_byte(0xC0 | encode);
+}
+
 void Assembler::movslq(Register dst, int32_t imm32) {
   // dbx shows movslq(rcx, 3) as movq     $0x0000000049000000,(%rbx)
   // and movslq(r8, 3); as movl     $0x0000000048000000,(%rbx)
@@ -3925,6 +3946,51 @@
   emit_byte(0xC0 | encode);
 }
 
+void Assembler::movswq(Register dst, Address src) {
+  InstructionMark im(this);
+  prefixq(src, dst);
+  emit_byte(0x0F);
+  emit_byte(0xBF);
+  emit_operand(dst, src);
+}
+
+void Assembler::movswq(Register dst, Register src) {
+  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
+  emit_byte(0x0F);
+  emit_byte(0xBF);
+  emit_byte(0xC0 | encode);
+}
+
+void Assembler::movzbq(Register dst, Address src) {
+  InstructionMark im(this);
+  prefixq(src, dst);
+  emit_byte(0x0F);
+  emit_byte(0xB6);
+  emit_operand(dst, src);
+}
+
+void Assembler::movzbq(Register dst, Register src) {
+  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
+  emit_byte(0x0F);
+  emit_byte(0xB6);
+  emit_byte(0xC0 | encode);
+}
+
+void Assembler::movzwq(Register dst, Address src) {
+  InstructionMark im(this);
+  prefixq(src, dst);
+  emit_byte(0x0F);
+  emit_byte(0xB7);
+  emit_operand(dst, src);
+}
+
+void Assembler::movzwq(Register dst, Register src) {
+  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
+  emit_byte(0x0F);
+  emit_byte(0xB7);
+  emit_byte(0xC0 | encode);
+}
+
 void Assembler::negq(Register dst) {
   int encode = prefixq_and_encode(dst->encoding());
   emit_byte(0xF7);
@@ -6197,8 +6263,11 @@
   return off;
 }
 
-// word => int32 which seems bad for 64bit
-int MacroAssembler::load_signed_word(Register dst, Address src) {
+// Note: load_signed_short used to be called load_signed_word.
+// Although the 'w' in x86 opcodes refers to the term "word" in the assembler
+// manual, which means 16 bits, that usage is found nowhere in HotSpot code.
+// The term "word" in HotSpot means a 32- or 64-bit machine word.
+int MacroAssembler::load_signed_short(Register dst, Address src) {
   int off;
   if (LP64_ONLY(true ||) VM_Version::is_P6()) {
     // This is dubious to me since it seems safe to do a signed 16 => 64 bit
@@ -6207,7 +6276,7 @@
     off = offset();
     movswl(dst, src); // movsxw
   } else {
-    off = load_unsigned_word(dst, src);
+    off = load_unsigned_short(dst, src);
     shll(dst, 16);
     sarl(dst, 16);
   }
@@ -6229,7 +6298,8 @@
   return off;
 }
 
-int MacroAssembler::load_unsigned_word(Register dst, Address src) {
+// Note: load_unsigned_short used to be called load_unsigned_word.
+int MacroAssembler::load_unsigned_short(Register dst, Address src) {
   // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16,
   // and "3.9 Partial Register Penalties", p. 22).
   int off;
@@ -6244,6 +6314,28 @@
   return off;
 }
 
+void MacroAssembler::load_sized_value(Register dst, Address src,
+                                      int size_in_bytes, bool is_signed) {
+  switch (size_in_bytes ^ (is_signed ? -1 : 0)) {
+#ifndef _LP64
+  // For case 8, caller is responsible for manually loading
+  // the second word into another register.
+  case ~8:  // fall through:
+  case  8:  movl(                dst, src ); break;
+#else
+  case ~8:  // fall through:
+  case  8:  movq(                dst, src ); break;
+#endif
+  case ~4:  // fall through:
+  case  4:  movl(                dst, src ); break;
+  case ~2:  load_signed_short(   dst, src ); break;
+  case  2:  load_unsigned_short( dst, src ); break;
+  case ~1:  load_signed_byte(    dst, src ); break;
+  case  1:  load_unsigned_byte(  dst, src ); break;
+  default:  ShouldNotReachHere();
+  }
+}
+
 void MacroAssembler::mov32(AddressLiteral dst, Register src) {
   if (reachable(dst)) {
     movl(as_Address(dst), src);
@@ -7050,6 +7142,81 @@
 }
 
 
+// Look up the method for a megamorphic invokeinterface call.
+// The target method is determined by <intf_klass, itable_index>.
+// The receiver klass is in recv_klass.
+// On success, the result will be in method_result, and execution falls through.
+// On failure, execution transfers to the given label.
+void MacroAssembler::lookup_interface_method(Register recv_klass,
+                                             Register intf_klass,
+                                             RegisterConstant itable_index,
+                                             Register method_result,
+                                             Register scan_temp,
+                                             Label& L_no_such_interface) {
+  assert_different_registers(recv_klass, intf_klass, method_result, scan_temp);
+  assert(itable_index.is_constant() || itable_index.as_register() == method_result,
+         "caller must use same register for non-constant itable index as for method");
+
+  // Compute start of first itableOffsetEntry (which is at the end of the vtable)
+  int vtable_base = instanceKlass::vtable_start_offset() * wordSize;
+  int itentry_off = itableMethodEntry::method_offset_in_bytes();
+  int scan_step   = itableOffsetEntry::size() * wordSize;
+  int vte_size    = vtableEntry::size() * wordSize;
+  Address::ScaleFactor times_vte_scale = Address::times_ptr;
+  assert(vte_size == wordSize, "else adjust times_vte_scale");
+
+  movl(scan_temp, Address(recv_klass, instanceKlass::vtable_length_offset() * wordSize));
+
+  // %%% Could store the aligned, prescaled offset in the klassoop.
+  lea(scan_temp, Address(recv_klass, scan_temp, times_vte_scale, vtable_base));
+  if (HeapWordsPerLong > 1) {
+    // Round up to align_object_offset boundary
+    // see code for instanceKlass::start_of_itable!
+    round_to(scan_temp, BytesPerLong);
+  }
+
+  // Adjust recv_klass by scaled itable_index, so we can free itable_index.
+  assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
+  lea(recv_klass, Address(recv_klass, itable_index, Address::times_ptr, itentry_off));
+
+  // for (scan = klass->itable(); scan->interface() != NULL; scan += scan_step) {
+  //   if (scan->interface() == intf) {
+  //     result = (klass + scan->offset() + itable_index);
+  //   }
+  // }
+  Label search, found_method;
+
+  for (int peel = 1; peel >= 0; peel--) {
+    movptr(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes()));
+    cmpptr(intf_klass, method_result);
+
+    if (peel) {
+      jccb(Assembler::equal, found_method);
+    } else {
+      jccb(Assembler::notEqual, search);
+      // (invert the test to fall through to found_method...)
+    }
+
+    if (!peel)  break;
+
+    bind(search);
+
+    // Check that the previous entry is non-null.  A null entry means that
+    // the receiver class doesn't implement the interface, and wasn't the
+    // same as when the caller was compiled.
+    testptr(method_result, method_result);
+    jcc(Assembler::zero, L_no_such_interface);
+    addptr(scan_temp, scan_step);
+  }
+
+  bind(found_method);
+
+  // Got a hit.
+  movl(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes()));
+  movptr(method_result, Address(recv_klass, scan_temp, Address::times_1));
+}
+
+
 void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) {
   ucomisd(dst, as_Address(src));
 }
@@ -7095,6 +7262,31 @@
 }
 
 
+RegisterConstant MacroAssembler::delayed_value(intptr_t* delayed_value_addr,
+                                               Register tmp,
+                                               int offset) {
+  intptr_t value = *delayed_value_addr;
+  if (value != 0)
+    return RegisterConstant(value + offset);
+
+  // load indirectly to solve generation ordering problem
+  movptr(tmp, ExternalAddress((address) delayed_value_addr));
+
+#ifdef ASSERT
+  Label L;
+  testl(tmp, tmp);
+  jccb(Assembler::notZero, L);
+  hlt();
+  bind(L);
+#endif
+
+  if (offset != 0)
+    addptr(tmp, offset);
+
+  return RegisterConstant(tmp);
+}
+
+
 void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
   if (!VerifyOops) return;
 
diff -r 09f82af55c3e -r fe2441500281 src/cpu/x86/vm/assembler_x86.hpp
--- a/src/cpu/x86/vm/assembler_x86.hpp	Fri Mar 13 13:56:01 2009 -0700
+++ b/src/cpu/x86/vm/assembler_x86.hpp	Fri Mar 13 17:06:44 2009 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright 1997-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 1997-2009 Sun Microsystems, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -153,6 +153,21 @@
     times_8  =  3,
     times_ptr = LP64_ONLY(times_8) NOT_LP64(times_4)
   };
+  static ScaleFactor times(int size) {
+    assert(size >= 1 && size <= 8 && is_power_of_2(size), "bad scale size");
+    if (size == 8)  return times_8;
+    if (size == 4)  return times_4;
+    if (size == 2)  return times_2;
+    return times_1;
+  }
+  static int scale_size(ScaleFactor scale) {
+    assert(scale != no_scale, "");
+    assert(((1 << (int)times_1) == 1 &&
+            (1 << (int)times_2) == 2 &&
+            (1 << (int)times_4) == 4 &&
+            (1 << (int)times_8) == 8), "");
+    return (1 << (int)scale);
+  }
 
  private:
   Register         _base;
@@ -197,6 +212,22 @@
            "inconsistent address");
   }
 
+  Address(Register base, RegisterConstant index, ScaleFactor scale = times_1, int disp = 0)
+    : _base (base),
+      _index(index.register_or_noreg()),
+      _scale(scale),
+      _disp (disp + (index.constant_or_zero() * scale_size(scale))) {
+    if (!index.is_register())  scale = Address::no_scale;
+    assert(!_index->is_valid() == (scale == Address::no_scale),
+           "inconsistent address");
+  }
+
+  Address plus_disp(int disp) const {
+    Address a = (*this);
+    a._disp += disp;
+    return a;
+  }
+
   // The following two overloads are used in connection with the
   // ByteSize type (see sizes.hpp).  They simplify the use of
   // ByteSize'd arguments in assembly code. Note that their equivalent
@@ -224,6 +255,17 @@
     assert(!index->is_valid() == (scale == Address::no_scale),
            "inconsistent address");
   }
+
+  Address(Register base, RegisterConstant index, ScaleFactor scale, ByteSize disp)
+    : _base (base),
+      _index(index.register_or_noreg()),
+      _scale(scale),
+      _disp (in_bytes(disp) + (index.constant_or_zero() * scale_size(scale))) {
+    if (!index.is_register())  scale = Address::no_scale;
+    assert(!_index->is_valid() == (scale == Address::no_scale),
+           "inconsistent address");
+  }
+
 #endif // ASSERT
 
   // accessors
@@ -236,11 +278,10 @@
   // Convert the raw encoding form into the form expected by the constructor for
   // Address.  An index of 4 (rsp) corresponds to having no index, so convert
   // that to noreg for the Address constructor.
-  static Address make_raw(int base, int index, int scale, int disp);
+  static Address make_raw(int base, int index, int scale, int disp, bool disp_is_oop);
 
   static Address make_array(ArrayAddress);
 
-
  private:
   bool base_needs_rex() const {
     return _base != noreg && _base->encoding() >= 8;
@@ -1097,6 +1138,9 @@
   void movsbl(Register dst, Register src);
 
 #ifdef _LP64
+  void movsbq(Register dst, Address src);
+  void movsbq(Register dst, Register src);
+
   // Move signed 32bit immediate to 64bit extending sign
   void movslq(Address dst, int32_t imm64);
   void movslq(Register dst, int32_t imm64);
@@ -1109,6 +1153,11 @@
   void movswl(Register dst, Address src);
   void movswl(Register dst, Register src);
 
+#ifdef _LP64
+  void movswq(Register dst, Address src);
+  void movswq(Register dst, Register src);
+#endif
+
   void movw(Address dst, int imm16);
   void movw(Register dst, Address src);
   void movw(Address dst, Register src);
@@ -1116,9 +1165,19 @@
   void movzbl(Register dst, Address src);
   void movzbl(Register dst, Register src);
 
+#ifdef _LP64
+  void movzbq(Register dst, Address src);
+  void movzbq(Register dst, Register src);
+#endif
+
   void movzwl(Register dst, Address src);
   void movzwl(Register dst, Register src);
 
+#ifdef _LP64
+  void movzwq(Register dst, Address src);
+  void movzwq(Register dst, Register src);
+#endif
+
   void mull(Address src);
   void mull(Register src);
 
@@ -1393,17 +1452,20 @@
 
   // The following 4 methods return the offset of the appropriate move instruction
 
-  // Support for fast byte/word loading with zero extension (depending on particular CPU)
+  // Support for fast byte/short loading with zero extension (depending on particular CPU)
   int load_unsigned_byte(Register dst, Address src);
-  int load_unsigned_word(Register dst, Address src);
-
-  // Support for fast byte/word loading with sign extension (depending on particular CPU)
+  int load_unsigned_short(Register dst, Address src);
+
+  // Support for fast byte/short loading with sign extension (depending on particular CPU)
   int load_signed_byte(Register dst, Address src);
-  int load_signed_word(Register dst, Address src);
+  int load_signed_short(Register dst, Address src);
 
   // Support for sign-extension (hi:lo = extend_sign(lo))
   void extend_sign(Register hi, Register lo);
 
+  // Loading values by size and signed-ness
+  void load_sized_value(Register dst, Address src, int size_in_bytes, bool is_signed);
+
   // Support for inc/dec with optimal instruction selection depending on value
 
   void increment(Register reg, int value = 1) { LP64_ONLY(incrementq(reg, value)) NOT_LP64(incrementl(reg, value)) ; }
@@ -1721,6 +1783,14 @@
   );
   void tlab_refill(Label& retry_tlab, Label& try_eden, Label& slow_case);
 
+  // interface method calling
+  void lookup_interface_method(Register recv_klass,
+                               Register intf_klass,
+                               RegisterConstant itable_index,
+                               Register method_result,
+                               Register scan_temp,
+                               Label& no_such_interface);
+
   //----
   void set_word_if_not_zero(Register reg); // sets reg to 1 if not zero, otherwise 0
 
@@ -1763,6 +1833,10 @@
   // stack overflow + shadow pages.  Also, clobbers tmp
   void bang_stack_size(Register size, Register tmp);
 
+  virtual RegisterConstant delayed_value(intptr_t* delayed_value_addr,
+                                         Register tmp,
+                                         int offset);
+
   // Support for serializing memory accesses between threads
   void serialize_memory(Register thread, Register tmp);
 
diff -r 09f82af55c3e -r fe2441500281 src/cpu/x86/vm/c1_LIRAssembler_x86.cpp
--- a/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp	Fri Mar 13 13:56:01 2009 -0700
+++ b/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp	Fri Mar 13 17:06:44 2009 -0700
@@ -554,8 +554,8 @@
   __ jcc (Assembler::zero, noLoop);
 
   // compare first characters
-  __ load_unsigned_word(rcx, Address(rdi, 0));
-  __ load_unsigned_word(rbx, Address(rsi, 0));
+  __ load_unsigned_short(rcx, Address(rdi, 0));
+  __ load_unsigned_short(rbx, Address(rsi, 0));
   __ subl(rcx, rbx);
   __ jcc(Assembler::notZero, haveResult);
   // starting loop
@@ -574,8 +574,8 @@
   Label loop;
   __ align(wordSize);
   __ bind(loop);
-  __ load_unsigned_word(rcx, Address(rdi, rax, Address::times_2, 0));
-  __ load_unsigned_word(rbx, Address(rsi, rax, Address::times_2, 0));
+  __ load_unsigned_short(rcx, Address(rdi, rax, Address::times_2, 0));
+  __ load_unsigned_short(rbx, Address(rsi, rax, Address::times_2, 0));
   __ subl(rcx, rbx);
   __ jcc(Assembler::notZero, haveResult);
   __ increment(rax);
diff -r 09f82af55c3e -r fe2441500281 src/cpu/x86/vm/cppInterpreter_x86.cpp
--- a/src/cpu/x86/vm/cppInterpreter_x86.cpp	Fri Mar 13 13:56:01 2009 -0700
+++ b/src/cpu/x86/vm/cppInterpreter_x86.cpp	Fri Mar 13 17:06:44 2009 -0700
@@ -513,7 +513,7 @@
     // compute full expression stack limit
 
     const Address size_of_stack    (rbx, methodOopDesc::max_stack_offset());
-    __ load_unsigned_word(rdx, size_of_stack);                            // get size of expression stack in words
+    __ load_unsigned_short(rdx, size_of_stack);                           // get size of expression stack in words
     __ negptr(rdx);                                                       // so we can subtract in next step
     // Allocate expression stack
     __ lea(rsp, Address(rsp, rdx, Address::times_ptr));
@@ -659,7 +659,7 @@
     // Always give one monitor to allow us to start interp if sync method.
     // Any additional monitors need a check when moving the expression stack
     const int one_monitor = frame::interpreter_frame_monitor_size() * wordSize;
-  __ load_unsigned_word(rax, size_of_stack);                            // get size of expression stack in words
+  __ load_unsigned_short(rax, size_of_stack);                           // get size of expression stack in words
   __ lea(rax, Address(noreg, rax, Interpreter::stackElementScale(), one_monitor));
   __ lea(rax, Address(rax, rdx, Interpreter::stackElementScale(), overhead_size));
 
@@ -863,13 +863,13 @@
     __ bind(notByte);
     __ cmpl(rdx, stos);
     __ jcc(Assembler::notEqual, notShort);
-    __ load_signed_word(rax, field_address);
+    __ load_signed_short(rax, field_address);
     __ jmp(xreturn_path);
 
     __ bind(notShort);
     __ cmpl(rdx, ctos);
     __ jcc(Assembler::notEqual, notChar);
-    __ load_unsigned_word(rax, field_address);
+    __ load_unsigned_short(rax, field_address);
     __ jmp(xreturn_path);
 
     __ bind(notChar);
@@ -937,7 +937,7 @@
   const Register locals = rdi;
 
   // get parameter size (always needed)
-  __ load_unsigned_word(rcx, size_of_parameters);
+  __ load_unsigned_short(rcx, size_of_parameters);
 
   // rbx: methodOop
   // rcx: size of parameters
@@ -1062,7 +1062,7 @@
   // allocate space for parameters
   __ movptr(method, STATE(_method));
   __ verify_oop(method);
-  __ load_unsigned_word(t, Address(method, methodOopDesc::size_of_parameters_offset()));
+  __ load_unsigned_short(t, Address(method, methodOopDesc::size_of_parameters_offset()));
   __ shll(t, 2);
 #ifdef _LP64
   __ subptr(rsp, t);
@@ -1659,11 +1659,11 @@
   // const Address monitor(rbp, frame::interpreter_frame_initial_sp_offset * wordSize - (int)sizeof(BasicObjectLock));
 
   // get parameter size (always needed)
-  __ load_unsigned_word(rcx, size_of_parameters);
+  __ load_unsigned_short(rcx, size_of_parameters);
 
   // rbx: methodOop
   // rcx: size of parameters
-  __ load_unsigned_word(rdx, size_of_locals);                      // get size of locals in words
+  __ load_unsigned_short(rdx, size_of_locals);                     // get size of locals in words
 
   __ subptr(rdx, rcx);                                             // rdx = no. of additional locals
 
@@ -1949,7 +1949,7 @@
   __ movptr(rbx, STATE(_result._to_call._callee));
 
   // callee left args on top of expression stack, remove them
-  __ load_unsigned_word(rcx, Address(rbx, methodOopDesc::size_of_parameters_offset()));
+  __ load_unsigned_short(rcx, Address(rbx, methodOopDesc::size_of_parameters_offset()));
   __ lea(rsp, Address(rsp, rcx, Address::times_ptr));
 
   __ movl(rcx, Address(rbx, methodOopDesc::result_index_offset()));
@@ -2119,7 +2119,7 @@
   // Make it look like call_stub calling conventions
 
   // Get (potential) receiver
-  __ load_unsigned_word(rcx, size_of_parameters);                     // get size of parameters in words
+  __ load_unsigned_short(rcx, size_of_parameters);                   // get size of parameters in words
 
   ExternalAddress recursive(CAST_FROM_FN_PTR(address, RecursiveInterpreterActivation));
   __ pushptr(recursive.addr());                                      // make it look good in the debugger
diff -r 09f82af55c3e -r fe2441500281 src/cpu/x86/vm/interp_masm_x86_32.cpp
--- a/src/cpu/x86/vm/interp_masm_x86_32.cpp	Fri Mar 13 13:56:01 2009 -0700
+++ b/src/cpu/x86/vm/interp_masm_x86_32.cpp	Fri Mar 13 17:06:44 2009 -0700
@@ -192,7 +192,7 @@
 void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache, Register index, int bcp_offset) {
   assert(bcp_offset > 0, "bcp is still pointing to start of bytecode");
   assert(cache != index, "must use different registers");
-  load_unsigned_word(index, Address(rsi, bcp_offset));
+  load_unsigned_short(index, Address(rsi, bcp_offset));
   movptr(cache, Address(rbp, frame::interpreter_frame_cache_offset * wordSize));
   assert(sizeof(ConstantPoolCacheEntry) == 4*wordSize, "adjust code below");
   shlptr(index, 2); // convert from field index to ConstantPoolCacheEntry index
@@ -202,7 +202,7 @@
 void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache, Register tmp, int bcp_offset) {
   assert(bcp_offset > 0, "bcp is still pointing to start of bytecode");
   assert(cache != tmp, "must use different register");
-  load_unsigned_word(tmp, Address(rsi, bcp_offset));
+  load_unsigned_short(tmp, Address(rsi, bcp_offset));
   assert(sizeof(ConstantPoolCacheEntry) == 4*wordSize, "adjust code below");
                                // convert from field index to ConstantPoolCacheEntry index
                                // and from word offset to byte offset
@@ -1031,7 +1031,7 @@
 
   // If the mdp is valid, it will point to a DataLayout header which is
   // consistent with the bcp.  The converse is highly probable also.
-  load_unsigned_word(rdx, Address(rcx, in_bytes(DataLayout::bci_offset())));
+  load_unsigned_short(rdx, Address(rcx, in_bytes(DataLayout::bci_offset())));
   addptr(rdx, Address(rbx, methodOopDesc::const_offset()));
   lea(rdx, Address(rdx, constMethodOopDesc::codes_offset()));
   cmpptr(rdx, rsi);
diff -r 09f82af55c3e -r fe2441500281 src/cpu/x86/vm/interp_masm_x86_64.cpp
--- a/src/cpu/x86/vm/interp_masm_x86_64.cpp	Fri Mar 13 13:56:01 2009 -0700
+++ b/src/cpu/x86/vm/interp_masm_x86_64.cpp	Fri Mar 13 17:06:44 2009 -0700
@@ -190,7 +190,7 @@
                                                            int bcp_offset) {
   assert(bcp_offset > 0, "bcp is still pointing to start of bytecode");
   assert(cache != index, "must use different registers");
-  load_unsigned_word(index, Address(r13, bcp_offset));
+  load_unsigned_short(index, Address(r13, bcp_offset));
   movptr(cache, Address(rbp, frame::interpreter_frame_cache_offset * wordSize));
   assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below");
   // convert from field index to ConstantPoolCacheEntry index
@@ -203,7 +203,7 @@
                                                                int bcp_offset) {
   assert(bcp_offset > 0, "bcp is still pointing to start of bytecode");
   assert(cache != tmp, "must use different register");
-  load_unsigned_word(tmp, Address(r13, bcp_offset));
+  load_unsigned_short(tmp, Address(r13, bcp_offset));
   assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below");
   // convert from field index to ConstantPoolCacheEntry index
   // and from word offset to byte offset
@@ -1063,8 +1063,8 @@
 
   // If the mdp is valid, it will point to a DataLayout header which is
   // consistent with the bcp.  The converse is highly probable also.
-  load_unsigned_word(c_rarg2,
-                     Address(c_rarg3, in_bytes(DataLayout::bci_offset())));
+  load_unsigned_short(c_rarg2,
+                      Address(c_rarg3, in_bytes(DataLayout::bci_offset())));
   addptr(c_rarg2, Address(rbx, methodOopDesc::const_offset()));
   lea(c_rarg2, Address(c_rarg2, constMethodOopDesc::codes_offset()));
   cmpptr(c_rarg2, r13);
diff -r 09f82af55c3e -r fe2441500281 src/cpu/x86/vm/templateInterpreter_x86_32.cpp
--- a/src/cpu/x86/vm/templateInterpreter_x86_32.cpp	Fri Mar 13 13:56:01 2009 -0700
+++ b/src/cpu/x86/vm/templateInterpreter_x86_32.cpp	Fri Mar 13 17:06:44 2009 -0700
@@ -662,13 +662,13 @@
     __ bind(notByte);
     __ cmpl(rdx, stos);
     __ jcc(Assembler::notEqual, notShort);
-    __ load_signed_word(rax, field_address);
+    __ load_signed_short(rax, field_address);
     __ jmp(xreturn_path);
 
     __ bind(notShort);
     __ cmpl(rdx, ctos);
     __ jcc(Assembler::notEqual, notChar);
-    __ load_unsigned_word(rax, field_address);
+    __ load_unsigned_short(rax, field_address);
     __ jmp(xreturn_path);
 
     __ bind(notChar);
@@ -723,7 +723,7 @@
   const Address access_flags      (rbx, methodOopDesc::access_flags_offset());
 
   // get parameter size (always needed)
-  __ load_unsigned_word(rcx, size_of_parameters);
+  __ load_unsigned_short(rcx, size_of_parameters);
 
   // native calls don't need the stack size check since they have no expression stack
   // and the arguments are already on the stack and we only add a handful of words
@@ -838,7 +838,7 @@
   // allocate space for parameters
   __ get_method(method);
   __ verify_oop(method);
-  __ load_unsigned_word(t, Address(method, methodOopDesc::size_of_parameters_offset()));
+  __ load_unsigned_short(t, Address(method, methodOopDesc::size_of_parameters_offset()));
   __ shlptr(t, Interpreter::logStackElementSize());
   __ addptr(t, 2*wordSize);     // allocate two more slots for JNIEnv and possible mirror
   __ subptr(rsp, t);
@@ -1155,14 +1155,14 @@
   const Address access_flags      (rbx, methodOopDesc::access_flags_offset());
 
   // get parameter size (always needed)
-  __ load_unsigned_word(rcx, size_of_parameters);
+  __ load_unsigned_short(rcx, size_of_parameters);
 
   // rbx,: methodOop
   // rcx: size of parameters
 
   // rsi: sender_sp (could differ from sp+wordSize if we were called via c2i )
 
-  __ load_unsigned_word(rdx, size_of_locals);       // get size of locals in words
+  __ load_unsigned_short(rdx, size_of_locals);       // get size of locals in words
   __ subl(rdx, rcx);                                // rdx = no. of additional locals
 
   // see if we've got enough room on the stack for locals plus overhead.
@@ -1558,7 +1558,7 @@
     // Compute size of arguments for saving when returning to deoptimized caller
     __ get_method(rax);
     __ verify_oop(rax);
-    __ load_unsigned_word(rax, Address(rax, in_bytes(methodOopDesc::size_of_parameters_offset())));
+    __ load_unsigned_short(rax, Address(rax, in_bytes(methodOopDesc::size_of_parameters_offset())));
     __ shlptr(rax, Interpreter::logStackElementSize());
     __ restore_locals();
     __ subptr(rdi, rax);
diff -r 09f82af55c3e -r fe2441500281 src/cpu/x86/vm/templateInterpreter_x86_64.cpp
--- a/src/cpu/x86/vm/templateInterpreter_x86_64.cpp	Fri Mar 13 13:56:01 2009 -0700
+++ b/src/cpu/x86/vm/templateInterpreter_x86_64.cpp	Fri Mar 13 17:06:44 2009 -0700
@@ -650,7 +650,7 @@
     __ cmpl(rdx, stos);
     __ jcc(Assembler::notEqual, notShort);
     // stos
-    __ load_signed_word(rax, field_address);
+    __ load_signed_short(rax, field_address);
     __ jmp(xreturn_path);
 
     __ bind(notShort);
@@ -662,7 +662,7 @@
     __ bind(okay);
 #endif
     // ctos
-    __ load_unsigned_word(rax, field_address);
+    __ load_unsigned_short(rax, field_address);
 
     __ bind(xreturn_path);
 
@@ -702,7 +702,7 @@
   const Address access_flags      (rbx, methodOopDesc::access_flags_offset());
 
   // get parameter size (always needed)
-  __ load_unsigned_word(rcx, size_of_parameters);
+  __ load_unsigned_short(rcx, size_of_parameters);
 
   // native calls don't need the stack size check since they have no
   // expression stack and the arguments are already on the stack and
@@ -819,9 +819,9 @@
   // allocate space for parameters
   __ get_method(method);
   __ verify_oop(method);
-  __ load_unsigned_word(t,
-                        Address(method,
-                                methodOopDesc::size_of_parameters_offset()));
+  __ load_unsigned_short(t,
+                         Address(method,
+                                 methodOopDesc::size_of_parameters_offset()));
   __ shll(t, Interpreter::logStackElementSize());
 
   __ subptr(rsp, t);
@@ -1165,13 +1165,13 @@
   const Address access_flags(rbx, methodOopDesc::access_flags_offset());
 
   // get parameter size (always needed)
-  __ load_unsigned_word(rcx, size_of_parameters);
+  __ load_unsigned_short(rcx, size_of_parameters);
 
   // rbx: methodOop
   // rcx: size of parameters
   // r13: sender_sp (could differ from sp+wordSize if we were called via c2i )
 
-  __ load_unsigned_word(rdx, size_of_locals); // get size of locals in words
+  __ load_unsigned_short(rdx, size_of_locals); // get size of locals in words
   __ subl(rdx, rcx); // rdx = no. of additional locals
 
   // YYY
@@ -1583,7 +1583,7 @@
     // Compute size of arguments for saving when returning to
     // deoptimized caller
     __ get_method(rax);
-    __ load_unsigned_word(rax, Address(rax, in_bytes(methodOopDesc::
+    __ load_unsigned_short(rax, Address(rax, in_bytes(methodOopDesc::
                                                 size_of_parameters_offset())));
     __ shll(rax, Interpreter::logStackElementSize());
     __ restore_locals(); // XXX do we need this?
diff -r 09f82af55c3e -r fe2441500281 src/cpu/x86/vm/templateTable_x86_32.cpp
--- a/src/cpu/x86/vm/templateTable_x86_32.cpp	Fri Mar 13 13:56:01 2009 -0700
+++ b/src/cpu/x86/vm/templateTable_x86_32.cpp	Fri Mar 13 17:06:44 2009 -0700
@@ -296,7 +296,7 @@
 
 void TemplateTable::sipush() {
   transition(vtos, itos);
-  __ load_unsigned_word(rax, at_bcp(1));
+  __ load_unsigned_short(rax, at_bcp(1));
   __ bswapl(rax);
   __ sarl(rax, 16);
 }
@@ -662,7 +662,7 @@
   index_check(rdx, rax);  // kills rbx,
   // rax,: index
   // can do better code for P5 - may want to improve this at some point
-  __ load_unsigned_word(rbx, Address(rdx, rax, Address::times_2, arrayOopDesc::base_offset_in_bytes(T_CHAR)));
+  __ load_unsigned_short(rbx, Address(rdx, rax, Address::times_2, arrayOopDesc::base_offset_in_bytes(T_CHAR)));
   __ mov(rax, rbx);
 }
 
@@ -677,7 +677,7 @@
   // rdx: array
   index_check(rdx, rax);
   // rax,: index
-  __ load_unsigned_word(rbx, Address(rdx, rax, Address::times_2, arrayOopDesc::base_offset_in_bytes(T_CHAR)));
+  __ load_unsigned_short(rbx, Address(rdx, rax, Address::times_2, arrayOopDesc::base_offset_in_bytes(T_CHAR)));
   __ mov(rax, rbx);
 }
 
@@ -687,7 +687,7 @@
   index_check(rdx, rax);  // kills rbx,
   // rax,: index
   // can do better code for P5 - may want to improve this at some point
-  __ load_signed_word(rbx, Address(rdx, rax, Address::times_2, arrayOopDesc::base_offset_in_bytes(T_SHORT)));
+  __ load_signed_short(rbx, Address(rdx, rax, Address::times_2, arrayOopDesc::base_offset_in_bytes(T_SHORT)));
   __ mov(rax, rbx);
 }
 
@@ -2310,7 +2310,7 @@
   __ cmpl(flags, ctos );
   __ jcc(Assembler::notEqual, notChar);
 
-  __ load_unsigned_word(rax, lo );
+  __ load_unsigned_short(rax, lo );
   __ push(ctos);
   if (!is_static) {
     patch_bytecode(Bytecodes::_fast_cgetfield, rcx, rbx);
@@ -2322,7 +2322,7 @@
   __ cmpl(flags, stos );
   __ jcc(Assembler::notEqual, notShort);
 
-  __ load_signed_word(rax, lo );
+  __ load_signed_short(rax, lo );
   __ push(stos);
   if (!is_static) {
     patch_bytecode(Bytecodes::_fast_sgetfield, rcx, rbx);
@@ -2830,8 +2830,8 @@
   // access field
   switch (bytecode()) {
     case Bytecodes::_fast_bgetfield: __ movsbl(rax, lo );                 break;
-    case Bytecodes::_fast_sgetfield: __ load_signed_word(rax, lo );       break;
-    case Bytecodes::_fast_cgetfield: __ load_unsigned_word(rax, lo );     break;
+    case Bytecodes::_fast_sgetfield: __ load_signed_short(rax, lo );      break;
+    case Bytecodes::_fast_cgetfield: __ load_unsigned_short(rax, lo );    break;
     case Bytecodes::_fast_igetfield: __ movl(rax, lo);                    break;
     case Bytecodes::_fast_lgetfield: __ stop("should not be rewritten");  break;
     case Bytecodes::_fast_fgetfield: __ fld_s(lo);                        break;
@@ -3055,35 +3055,44 @@
   // profile this call
   __ profile_virtual_call(rdx, rsi, rdi);
 
-  __ mov(rdi, rdx); // Save klassOop in rdi
-
-  // Compute start of first itableOffsetEntry (which is at the end of the vtable)
-  const int base = instanceKlass::vtable_start_offset() * wordSize;
-  assert(vtableEntry::size() * wordSize == (1 << (int)Address::times_ptr), "adjust the scaling in the code below");
-  __ movl(rsi, Address(rdx, instanceKlass::vtable_length_offset() * wordSize)); // Get length of vtable
-  __ lea(rdx, Address(rdx, rsi, Address::times_4, base));
-  if (HeapWordsPerLong > 1) {
-    // Round up to align_object_offset boundary
-    __ round_to(rdx, BytesPerLong);
-  }
-
-  Label entry, search, interface_ok;
-
-  __ jmpb(entry);
-  __ bind(search);
-  __ addptr(rdx, itableOffsetEntry::size() * wordSize);
-
-  __ bind(entry);
-
-  // Check that the entry is non-null.  A null entry means that the receiver
-  // class doesn't implement the interface, and wasn't the same as the
-  // receiver class checked when the interface was resolved.
-  __ push(rdx);
-  __ movptr(rdx, Address(rdx, itableOffsetEntry::interface_offset_in_bytes()));
-  __ testptr(rdx, rdx);
-  __ jcc(Assembler::notZero, interface_ok);
+  Label no_such_interface, no_such_method;
+
+  __ lookup_interface_method(// inputs: rec. class, interface, itable index
+                             rdx, rax, rbx,
+                             // outputs: method, scan temp. reg
+                             rbx, rsi,
+                             no_such_interface);
+
+  // rbx,: methodOop to call
+  // rcx: receiver
+  // Check for abstract method error
+  // Note: This should be done more efficiently via a throw_abstract_method_error
+  //       interpreter entry point and a conditional jump to it in case of a null
+  //       method.
+  __ testptr(rbx, rbx);
+  __ jcc(Assembler::zero, no_such_method);
+
+  // do the call
+  // rcx: receiver
+  // rbx,: methodOop
+  __ jump_from_interpreted(rbx, rdx);
+  __ should_not_reach_here();
+
+  // exception handling code follows...
+  // note: must restore interpreter registers to canonical
+  //       state for exception handling to work correctly!
+
+  __ bind(no_such_method);
   // throw exception
-  __ pop(rdx);           // pop saved register first.
+  __ pop(rbx);           // pop return address (pushed by prepare_invoke)
+  __ restore_bcp();      // rsi must be correct for exception handler   (was destroyed)
+  __ restore_locals();   // make sure locals pointer is correct as well (was destroyed)
+  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodError));
+  // the call_VM checks for exception, so we should never return here.
+  __ should_not_reach_here();
+
+  __ bind(no_such_interface);
+  // throw exception
   __ pop(rbx);           // pop return address (pushed by prepare_invoke)
   __ restore_bcp();      // rsi must be correct for exception handler   (was destroyed)
   __ restore_locals();   // make sure locals pointer is correct as well (was destroyed)
@@ -3091,42 +3100,6 @@
                    InterpreterRuntime::throw_IncompatibleClassChangeError));
   // the call_VM checks for exception, so we should never return here.
   __ should_not_reach_here();
-  __ bind(interface_ok);
-
-    __ pop(rdx);
-
-    __ cmpptr(rax, Address(rdx, itableOffsetEntry::interface_offset_in_bytes()));
-    __ jcc(Assembler::notEqual, search);
-
-    __ movl(rdx, Address(rdx, itableOffsetEntry::offset_offset_in_bytes()));
-    __ addptr(rdx, rdi); // Add offset to klassOop
-    assert(itableMethodEntry::size() * wordSize == (1 << (int)Address::times_ptr), "adjust the scaling in the code below");
-    __ movptr(rbx, Address(rdx, rbx, Address::times_ptr));
-    // rbx,: methodOop to call
-    // rcx: receiver
-    // Check for abstract method error
-    // Note: This should be done more efficiently via a throw_abstract_method_error
-    //       interpreter entry point and a conditional jump to it in case of a null
-    //       method.
-    { Label L;
-      __ testptr(rbx, rbx);
-      __ jcc(Assembler::notZero, L);
-      // throw exception
-          // note: must restore interpreter registers to canonical
-          //       state for exception handling to work correctly!
-          __ pop(rbx);           // pop return address (pushed by prepare_invoke)
-          __ restore_bcp();      // rsi must be correct for exception handler   (was destroyed)
-          __ restore_locals();   // make sure locals pointer is correct as well (was destroyed)
-      __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodError));
-      // the call_VM checks for exception, so we should never return here.
-      __ should_not_reach_here();
-      __ bind(L);
-    }
-
-  // do the call
-  // rcx: receiver
-  // rbx,: methodOop
-  __ jump_from_interpreted(rbx, rdx);
 }
 
 //----------------------------------------------------------------------------------------------------
diff -r 09f82af55c3e -r fe2441500281 src/cpu/x86/vm/templateTable_x86_64.cpp
--- a/src/cpu/x86/vm/templateTable_x86_64.cpp	Fri Mar 13 13:56:01 2009 -0700
+++ b/src/cpu/x86/vm/templateTable_x86_64.cpp	Fri Mar 13 17:06:44 2009 -0700
@@ -307,7 +307,7 @@
 
 void TemplateTable::sipush() {
   transition(vtos, itos);
-  __ load_unsigned_word(rax, at_bcp(1));
+  __ load_unsigned_short(rax, at_bcp(1));
   __ bswapl(rax);
   __ sarl(rax, 16);
 }
@@ -645,10 +645,10 @@
   // eax: index
   // rdx: array
   index_check(rdx, rax); // kills rbx
-  __ load_unsigned_word(rax,
-                        Address(rdx, rax,
-                                Address::times_2,
-                                arrayOopDesc::base_offset_in_bytes(T_CHAR)));
+  __ load_unsigned_short(rax,
+                         Address(rdx, rax,
+                                 Address::times_2,
+                                 arrayOopDesc::base_offset_in_bytes(T_CHAR)));
 }
 
 // iload followed by caload frequent pair
@@ -663,10 +663,10 @@
   // rdx: array
   __ pop_ptr(rdx);
   index_check(rdx, rax); // kills rbx
-  __ load_unsigned_word(rax,
-                        Address(rdx, rax,
-                                Address::times_2,
-                                arrayOopDesc::base_offset_in_bytes(T_CHAR)));
+  __ load_unsigned_short(rax,
+                         Address(rdx, rax,
+                                 Address::times_2,
+                                 arrayOopDesc::base_offset_in_bytes(T_CHAR)));
 }
 
 void TemplateTable::saload() {
@@ -675,10 +675,10 @@
   // eax: index
   // rdx: array
   index_check(rdx, rax); // kills rbx
-  __ load_signed_word(rax,
-                      Address(rdx, rax,
-                              Address::times_2,
-                              arrayOopDesc::base_offset_in_bytes(T_SHORT)));
+  __ load_signed_short(rax,
+                       Address(rdx, rax,
+                               Address::times_2,
+                               arrayOopDesc::base_offset_in_bytes(T_SHORT)));
 }
 
 void TemplateTable::iload(int n) {
@@ -2276,7 +2276,7 @@
   __ cmpl(flags, ctos);
   __ jcc(Assembler::notEqual, notChar);
   // ctos
-  __ load_unsigned_word(rax, field);
+  __ load_unsigned_short(rax, field);
   __ push(ctos);
   // Rewrite bytecode to be faster
   if (!is_static) {
@@ -2288,7 +2288,7 @@
   __ cmpl(flags, stos);
   __ jcc(Assembler::notEqual, notShort);
   // stos
-  __ load_signed_word(rax, field);
+  __ load_signed_short(rax, field);
   __ push(stos);
   // Rewrite bytecode to be faster
   if (!is_static) {
@@ -2751,10 +2751,10 @@
     __ movsbl(rax, field);
     break;
   case Bytecodes::_fast_sgetfield:
-    __ load_signed_word(rax, field);
+    __ load_signed_short(rax, field);
     break;
   case Bytecodes::_fast_cgetfield:
-    __ load_unsigned_word(rax, field);
+    __ load_unsigned_short(rax, field);
     break;
   case Bytecodes::_fast_fgetfield:
     __ movflt(xmm0, field);
@@ -3010,97 +3010,55 @@
   // profile this call
   __ profile_virtual_call(rdx, r13, r14);
 
-  __ mov(r14, rdx); // Save klassOop in r14
-
-  // Compute start of first itableOffsetEntry (which is at the end of
-  // the vtable)
-  const int base = instanceKlass::vtable_start_offset() * wordSize;
-  // Get length of vtable
-  assert(vtableEntry::size() * wordSize == 8,
-         "adjust the scaling in the code below");
-  __ movl(r13, Address(rdx,
-                       instanceKlass::vtable_length_offset() * wordSize));
-  __ lea(rdx, Address(rdx, r13, Address::times_8, base));
-
-  if (HeapWordsPerLong > 1) {
-    // Round up to align_object_offset boundary
-    __ round_to(rdx, BytesPerLong);
-  }
-
-  Label entry, search, interface_ok;
-
-  __ jmpb(entry);
-  __ bind(search);
-  __ addptr(rdx, itableOffsetEntry::size() * wordSize);
-
-  __ bind(entry);
-
-  // Check that the entry is non-null.  A null entry means that the
-  // receiver class doesn't implement the interface, and wasn't the
-  // same as the receiver class checked when the interface was
-  // resolved.
-  __ push(rdx);
-  __ movptr(rdx, Address(rdx, itableOffsetEntry::interface_offset_in_bytes()));
-  __ testptr(rdx, rdx);
-  __ jcc(Assembler::notZero, interface_ok);
+  Label no_such_interface, no_such_method;
+
+  __ lookup_interface_method(// inputs: rec. class, interface, itable index
+                             rdx, rax, rbx,
+                             // outputs: method, scan temp. reg
+                             rbx, r13,
+                             no_such_interface);
+
+  // rbx,: methodOop to call
+  // rcx: receiver
+  // Check for abstract method error
+  // Note: This should be done more efficiently via a throw_abstract_method_error
+  //       interpreter entry point and a conditional jump to it in case of a null
+  //       method.
+  __ testptr(rbx, rbx);
+  __ jcc(Assembler::zero, no_such_method);
+
+  // do the call
+  // rcx: receiver
+  // rbx,: methodOop
+  __ jump_from_interpreted(rbx, rdx);
+  __ should_not_reach_here();
+
+  // exception handling code follows...
+  // note: must restore interpreter registers to canonical
+  //       state for exception handling to work correctly!
+
+  __ bind(no_such_method);
   // throw exception
-  __ pop(rdx); // pop saved register first.
-  __ pop(rbx); // pop return address (pushed by prepare_invoke)
-  __ restore_bcp(); // r13 must be correct for exception handler (was
-                    // destroyed)
-  __ restore_locals(); // make sure locals pointer is correct as well
-                       // (was destroyed)
+  __ pop(rbx);           // pop return address (pushed by prepare_invoke)
+  __ restore_bcp();      // r13 must be correct for exception handler   (was destroyed)
+  __ restore_locals();   // make sure locals pointer is correct as well (was destroyed)
+  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodError));
+  // the call_VM checks for exception, so we should never return here.
+  __ should_not_reach_here();
+
+  __ bind(no_such_interface);
+  // throw exception
+  __ pop(rbx);           // pop return address (pushed by prepare_invoke)
+  __ restore_bcp();      // r13 must be correct for exception handler   (was destroyed)
+  __ restore_locals();   // make sure locals pointer is correct as well (was destroyed)
   __ call_VM(noreg, CAST_FROM_FN_PTR(address,
                    InterpreterRuntime::throw_IncompatibleClassChangeError));
   // the call_VM checks for exception, so we should never return here.
   __ should_not_reach_here();
-  __ bind(interface_ok);
-
-  __ pop(rdx);
-
-  __ cmpptr(rax, Address(rdx, itableOffsetEntry::interface_offset_in_bytes()));
-  __ jcc(Assembler::notEqual, search);
-
-  __ movl(rdx, Address(rdx, itableOffsetEntry::offset_offset_in_bytes()));
-
-  __ addptr(rdx, r14); // Add offset to klassOop
-  assert(itableMethodEntry::size() * wordSize == 8,
-         "adjust the scaling in the code below");
-  __ movptr(rbx, Address(rdx, rbx, Address::times_8));
-  // rbx: methodOop to call
-  // rcx: receiver
-  // Check for abstract method error
-  // Note: This should be done more efficiently via a
-  // throw_abstract_method_error interpreter entry point and a
-  // conditional jump to it in case of a null method.
-  {
-    Label L;
-    __ testptr(rbx, rbx);
-    __ jcc(Assembler::notZero, L);
-    // throw exception
-    // note: must restore interpreter registers to canonical
-    //       state for exception handling to work correctly!
-    __ pop(rbx);  // pop return address (pushed by prepare_invoke)
-    __ restore_bcp(); // r13 must be correct for exception handler
-                      // (was destroyed)
-    __ restore_locals(); // make sure locals pointer is correct as
-                         // well (was destroyed)
-    __ call_VM(noreg,
-               CAST_FROM_FN_PTR(address,
-                             InterpreterRuntime::throw_AbstractMethodError));
-    // the call_VM checks for exception, so we should never return here.
-    __ should_not_reach_here();
-    __ bind(L);
-  }
-
-  __ movptr(rcx, Address(rbx, methodOopDesc::interpreter_entry_offset()));
-
-  // do the call
-  // rcx: receiver
-  // rbx: methodOop
-  __ jump_from_interpreted(rbx, rdx);
+  return;
 }
 
+
 //-----------------------------------------------------------------------------
 // Allocation
 
diff -r 09f82af55c3e -r fe2441500281 src/cpu/x86/vm/vtableStubs_x86_32.cpp
--- a/src/cpu/x86/vm/vtableStubs_x86_32.cpp	Fri Mar 13 13:56:01 2009 -0700
+++ b/src/cpu/x86/vm/vtableStubs_x86_32.cpp	Fri Mar 13 17:06:44 2009 -0700
@@ -34,10 +34,16 @@
 extern "C" void bad_compiled_vtable_index(JavaThread* thread, oop receiver, int index);
 #endif
 
-// used by compiler only; may use only caller saved registers rax, rbx, rcx.
-// rdx holds first int arg, rsi, rdi, rbp are callee-save & must be preserved.
-// Leave receiver in rcx; required behavior when +OptoArgsInRegisters
-// is modifed to put first oop in rcx.
+// These stubs are used by the compiler only.
+// Argument registers, which must be preserved:
+//   rcx - receiver (always first argument)
+//   rdx - second argument (if any)
+// Other registers that might be usable:
+//   rax - inline cache register (is interface for itable stub)
+//   rbx - method (used when calling out to interpreter)
+// Available now, but may become callee-save at some point:
+//   rsi, rdi
+// Note that rax and rdx are also used for return values.
 //
 VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
   const int i486_code_length = VtableStub::pd_code_size_limit(true);
@@ -94,16 +100,25 @@
   __ jmp( Address(method, methodOopDesc::from_compiled_offset()));
 
   masm->flush();
+
+  if (PrintMiscellaneous && (WizardMode || Verbose)) {
+    tty->print_cr("vtable #%d at "PTR_FORMAT"[%d] left over: %d",
+                  vtable_index, s->entry_point(),
+                  (int)(s->code_end() - s->entry_point()),
+                  (int)(s->code_end() - __ pc()));
+  }
+  guarantee(__ pc() <= s->code_end(), "overflowed buffer");
+
   s->set_exception_points(npe_addr, ame_addr);
   return s;
 }
 
 
-VtableStub* VtableStubs::create_itable_stub(int vtable_index) {
+VtableStub* VtableStubs::create_itable_stub(int itable_index) {
   // Note well: pd_code_size_limit is the absolute minimum we can get away with.  If you
   //            add code here, bump the code stub size returned by pd_code_size_limit!
   const int i486_code_length = VtableStub::pd_code_size_limit(false);
-  VtableStub* s = new(i486_code_length) VtableStub(false, vtable_index);
+  VtableStub* s = new(i486_code_length) VtableStub(false, itable_index);
   ResourceMark rm;
   CodeBuffer cb(s->entry_point(), i486_code_length);
   MacroAssembler* masm = new MacroAssembler(&cb);
@@ -123,50 +138,19 @@
 
   // get receiver klass (also an implicit null-check)
   address npe_addr = __ pc();
-  __ movptr(rbx, Address(rcx, oopDesc::klass_offset_in_bytes()));
-
-  __ mov(rsi, rbx);   // Save klass in free register
-  // Most registers are in use, so save a few
-  __ push(rdx);
-  // compute itable entry offset (in words)
-  const int base = instanceKlass::vtable_start_offset() * wordSize;
-  assert(vtableEntry::size() * wordSize == 4, "adjust the scaling in the code below");
-  __ movl(rdx, Address(rbx, instanceKlass::vtable_length_offset() * wordSize)); // Get length of vtable
-  __ lea(rbx, Address(rbx, rdx, Address::times_ptr, base));
-  if (HeapWordsPerLong > 1) {
-    // Round up to align_object_offset boundary
-    __ round_to(rbx, BytesPerLong);
-  }
-
-  Label hit, next, entry, throw_icce;
-
-  __ jmpb(entry);
+  __ movptr(rsi, Address(rcx, oopDesc::klass_offset_in_bytes()));
 
-  __ bind(next);
-  __ addptr(rbx, itableOffsetEntry::size() * wordSize);
-
-  __ bind(entry);
-
-  // If the entry is NULL then we've reached the end of the table
-  // without finding the expected interface, so throw an exception
-  __ movptr(rdx, Address(rbx, itableOffsetEntry::interface_offset_in_bytes()));
-  __ testptr(rdx, rdx);
-  __ jcc(Assembler::zero, throw_icce);
-  __ cmpptr(rax, rdx);
-  __ jcc(Assembler::notEqual, next);
-
-  // We found a hit, move offset into rbx,
-  __ movl(rdx, Address(rbx, itableOffsetEntry::offset_offset_in_bytes()));
-
-  // Compute itableMethodEntry.
-  const int method_offset = (itableMethodEntry::size() * wordSize * vtable_index) + itableMethodEntry::method_offset_in_bytes();
+  // Most registers are in use; we'll use rax, rbx, rsi, rdi
+  // (If we need to make rsi, rdi callee-save, do a push/pop here.)
+  const Register method = rbx;
+  Label throw_icce;
 
   // Get methodOop and entrypoint for compiler
-  const Register method = rbx;
-  __ movptr(method, Address(rsi, rdx, Address::times_1, method_offset));
-
-  // Restore saved register, before possible trap.
-  __ pop(rdx);
+  __ lookup_interface_method(// inputs: rec. class, interface, itable index
+                             rsi, rax, itable_index,
+                             // outputs: method, scan temp. reg
+                             method, rdi,
+                             throw_icce);
 
   // method (rbx): methodOop
   // rcx: receiver
@@ -187,12 +171,15 @@
   __ jmp(Address(method, methodOopDesc::from_compiled_offset()));
 
   __ bind(throw_icce);
-  // Restore saved register
-  __ pop(rdx);
   __ jump(RuntimeAddress(StubRoutines::throw_IncompatibleClassChangeError_entry()));
-
   masm->flush();
 
+  if (PrintMiscellaneous && (WizardMode || Verbose)) {
+    tty->print_cr("itable #%d at "PTR_FORMAT"[%d] left over: %d",
+                  itable_index, s->entry_point(),
+                  (int)(s->code_end() - s->entry_point()),
+                  (int)(s->code_end() - __ pc()));
+  }
   guarantee(__ pc() <= s->code_end(), "overflowed buffer");
 
   s->set_exception_points(npe_addr, ame_addr);
@@ -207,7 +194,7 @@
     return (DebugVtables ? 210 : 16) + (CountCompiledCalls ? 6 : 0);
   } else {
     // Itable stub size
-    return (DebugVtables ? 144 : 64) + (CountCompiledCalls ? 6 : 0);
+    return (DebugVtables ? 256 : 66) + (CountCompiledCalls ? 6 : 0);
   }
 }
 
diff -r 09f82af55c3e -r fe2441500281 src/cpu/x86/vm/vtableStubs_x86_64.cpp
--- a/src/cpu/x86/vm/vtableStubs_x86_64.cpp	Fri Mar 13 13:56:01 2009 -0700
+++ b/src/cpu/x86/vm/vtableStubs_x86_64.cpp	Fri Mar 13 17:06:44 2009 -0700
@@ -98,17 +98,26 @@
   __ jmp( Address(rbx, methodOopDesc::from_compiled_offset()));
 
   __ flush();
+
+  if (PrintMiscellaneous && (WizardMode || Verbose)) {
+    tty->print_cr("vtable #%d at "PTR_FORMAT"[%d] left over: %d",
+                  vtable_index, s->entry_point(),
+                  (int)(s->code_end() - s->entry_point()),
+                  (int)(s->code_end() - __ pc()));
+  }
+  guarantee(__ pc() <= s->code_end(), "overflowed buffer");
+
   s->set_exception_points(npe_addr, ame_addr);
   return s;
 }
 
 
-VtableStub* VtableStubs::create_itable_stub(int vtable_index) {
+VtableStub* VtableStubs::create_itable_stub(int itable_index) {
   // Note well: pd_code_size_limit is the absolute minimum we can get
   // away with.  If you add code here, bump the code stub size
   // returned by pd_code_size_limit!
   const int amd64_code_length = VtableStub::pd_code_size_limit(false);
-  VtableStub* s = new(amd64_code_length) VtableStub(false, vtable_index);
+  VtableStub* s = new(amd64_code_length) VtableStub(false, itable_index);
   ResourceMark rm;
   CodeBuffer cb(s->entry_point(), amd64_code_length);
   MacroAssembler* masm = new MacroAssembler(&cb);
@@ -131,68 +140,28 @@
   // get receiver klass (also an implicit null-check)
   address npe_addr = __ pc();
 
-  __ load_klass(rbx, j_rarg0);
+  // Most registers are in use; we'll use rax, rbx, r10, r11
+  // (various calling sequences use r[cd]x, r[sd]i, r[89]; stay away from them)
+  __ load_klass(r10, j_rarg0);
 
   // If we take a trap while this arg is on the stack we will not
   // be able to walk the stack properly. This is not an issue except
   // when there are mistakes in this assembly code that could generate
   // a spurious fault. Ask me how I know...
 
-  __ push(j_rarg1);     // Most registers are in use, so save one
-
-  // compute itable entry offset (in words)
-  const int base = instanceKlass::vtable_start_offset() * wordSize;
-  assert(vtableEntry::size() * wordSize == 8,
-         "adjust the scaling in the code below");
-  // Get length of vtable
-  __ movl(j_rarg1,
-          Address(rbx, instanceKlass::vtable_length_offset() * wordSize));
-  __ lea(rbx, Address(rbx, j_rarg1, Address::times_8, base));
-
-  if (HeapWordsPerLong > 1) {
-    // Round up to align_object_offset boundary
-    __ round_to(rbx, BytesPerLong);
-  }
-  Label hit, next, entry, throw_icce;
-
-  __ jmpb(entry);
-
-  __ bind(next);
-  __ addptr(rbx, itableOffsetEntry::size() * wordSize);
-
-  __ bind(entry);
-
-  // If the entry is NULL then we've reached the end of the table
-  // without finding the expected interface, so throw an exception
-  __ movptr(j_rarg1, Address(rbx, itableOffsetEntry::interface_offset_in_bytes()));
-  __ testptr(j_rarg1, j_rarg1);
-  __ jcc(Assembler::zero, throw_icce);
-  __ cmpptr(rax, j_rarg1);
-  __ jccb(Assembler::notEqual, next);
-
-  // We found a hit, move offset into j_rarg1
-  __ movl(j_rarg1, Address(rbx, itableOffsetEntry::offset_offset_in_bytes()));
-
-  // Compute itableMethodEntry
-  const int method_offset =
-    (itableMethodEntry::size() * wordSize * vtable_index) +
-    itableMethodEntry::method_offset_in_bytes();
+  const Register method = rbx;
+  Label throw_icce;
 
   // Get methodOop and entrypoint for compiler
-
-  // Get klass pointer again
-  __ load_klass(rax, j_rarg0);
-
-  const Register method = rbx;
-  __ movptr(method, Address(rax, j_rarg1, Address::times_1, method_offset));
-
-  // Restore saved register, before possible trap.
-  __ pop(j_rarg1);
+  __ lookup_interface_method(// inputs: rec. class, interface, itable index
+                             r10, rax, itable_index,
+                             // outputs: method, scan temp. reg
+                             method, r11,
+                             throw_icce);
 
   // method (rbx): methodOop
   // j_rarg0: receiver
 
-
 #ifdef ASSERT
   if (DebugVtables) {
     Label L2;
@@ -211,12 +180,16 @@
   __ jmp(Address(method, methodOopDesc::from_compiled_offset()));
 
   __ bind(throw_icce);
-  // Restore saved register
-  __ pop(j_rarg1);
   __ jump(RuntimeAddress(StubRoutines::throw_IncompatibleClassChangeError_entry()));
 
   __ flush();
 
+  if (PrintMiscellaneous && (WizardMode || Verbose)) {
+    tty->print_cr("itable #%d at "PTR_FORMAT"[%d] left over: %d",
+                  itable_index, s->entry_point(),
+                  (int)(s->code_end() - s->entry_point()),
+                  (int)(s->code_end() - __ pc()));
+  }
   guarantee(__ pc() <= s->code_end(), "overflowed buffer");
 
   s->set_exception_points(npe_addr, ame_addr);
@@ -230,7 +203,7 @@
            (UseCompressedOops ? 16 : 0);  // 1 leaq can be 3 bytes + 1 long
   } else {
     // Itable stub size
-    return (DebugVtables ? 636 : 72) + (CountCompiledCalls ? 13 : 0) +
+    return (DebugVtables ? 512 : 72) + (CountCompiledCalls ? 13 : 0) +
            (UseCompressedOops ? 32 : 0);  // 2 leaqs
   }
 }
diff -r 09f82af55c3e -r fe2441500281 src/cpu/x86/vm/x86_32.ad
--- a/src/cpu/x86/vm/x86_32.ad	Fri Mar 13 13:56:01 2009 -0700
+++ b/src/cpu/x86/vm/x86_32.ad	Fri Mar 13 17:06:44 2009 -0700
@@ -1,5 +1,5 @@
 //
-// Copyright 1997-2008 Sun Microsystems, Inc.  All Rights Reserved.
+// Copyright 1997-2009 Sun Microsystems, Inc.  All Rights Reserved.
 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 //
 // This code is free software; you can redistribute it and/or modify it
@@ -3126,14 +3126,12 @@
 
   enc_class movq_ld(regXD dst, memory mem) %{
     MacroAssembler _masm(&cbuf);
-    Address madr = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp);
-    __ movq(as_XMMRegister($dst$$reg), madr);
+    __ movq($dst$$XMMRegister, $mem$$Address);
   %}
 
   enc_class movq_st(memory mem, regXD src) %{
     MacroAssembler _masm(&cbuf);
-    Address madr = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp);
-    __ movq(madr, as_XMMRegister($src$$reg));
+    __ movq($mem$$Address, $src$$XMMRegister);
   %}
 
   enc_class pshufd_8x8(regX dst, regX src) %{
@@ -3751,8 +3749,8 @@
     masm.jcc(Assembler::zero, LENGTH_DIFF_LABEL);
 
     // Load first characters
-    masm.load_unsigned_word(rcx, Address(rbx, 0));
-    masm.load_unsigned_word(rdi, Address(rax, 0));
+    masm.load_unsigned_short(rcx, Address(rbx, 0));
+    masm.load_unsigned_short(rdi, Address(rax, 0));
 
     // Compare first characters
     masm.subl(rcx, rdi);
@@ -3782,8 +3780,8 @@
 
     // Compare the rest of the characters
     masm.bind(WHILE_HEAD_LABEL);
-    masm.load_unsigned_word(rcx, Address(rbx, rsi, Address::times_2, 0));
-    masm.load_unsigned_word(rdi, Address(rax, rsi, Address::times_2, 0));
+    masm.load_unsigned_short(rcx, Address(rbx, rsi, Address::times_2, 0));
+    masm.load_unsigned_short(rdi, Address(rax, rsi, Address::times_2, 0));
     masm.subl(rcx, rdi);
     masm.jcc(Assembler::notZero, POP_LABEL);
     masm.incrementl(rsi);
@@ -3840,8 +3838,8 @@
     masm.jcc(Assembler::zero, COMPARE_LOOP_HDR);
 
     // Compare 2-byte "tail" at end of arrays
-    masm.load_unsigned_word(tmp1Reg, Address(ary1Reg, resultReg, Address::times_4, base_offset));
-    masm.load_unsigned_word(tmp2Reg, Address(ary2Reg, resultReg, Address::times_4, base_offset));
+    masm.load_unsigned_short(tmp1Reg, Address(ary1Reg, resultReg, Address::times_4, base_offset));
+    masm.load_unsigned_short(tmp2Reg, Address(ary2Reg, resultReg, Address::times_4, base_offset));
     masm.cmpl(tmp1Reg, tmp2Reg);
     masm.jcc(Assembler::notEqual, FALSE_LABEL);
     masm.testl(resultReg, resultReg);
@@ -6396,21 +6394,94 @@
   match(Set dst (LoadB mem));
 
   ins_cost(125);
-  format %{ "MOVSX8 $dst,$mem" %}
-  opcode(0xBE, 0x0F);
-  ins_encode( OpcS, OpcP, RegMem(dst,mem));
-  ins_pipe( ialu_reg_mem );
-%}
-
-// Load Byte (8bit UNsigned)
-instruct loadUB(xRegI dst, memory mem, immI_255 bytemask) %{
-  match(Set dst (AndI (LoadB mem) bytemask));
+  format %{ "MOVSX8 $dst,$mem\t# byte" %}
+
+  ins_encode %{
+    __ movsbl($dst$$Register, $mem$$Address);
+  %}
+
+  ins_pipe(ialu_reg_mem);
+%}
+
+// Load Byte (8bit signed) into Long Register
+instruct loadB2L(eRegL dst, memory mem) %{
+  match(Set dst (ConvI2L (LoadB mem)));
+
+  ins_cost(375);
+  format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
+            "MOV    $dst.hi,$dst.lo\n\t"
+            "SAR    $dst.hi,7" %}
+
+  ins_encode %{
+    __ movsbl($dst$$Register, $mem$$Address);
+    __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
+    __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
+  %}
+
+  ins_pipe(ialu_reg_mem);
+%}
+
+// Load Unsigned Byte (8bit UNsigned)
+instruct loadUB(xRegI dst, memory mem) %{
+  match(Set dst (LoadUB mem));
 
   ins_cost(125);
-  format %{ "MOVZX8 $dst,$mem" %}
-  opcode(0xB6, 0x0F);
-  ins_encode( OpcS, OpcP, RegMem(dst,mem));
-  ins_pipe( ialu_reg_mem );
+  format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
+
+  ins_encode %{
+    __ movzbl($dst$$Register, $mem$$Address);
+  %}
+
+  ins_pipe(ialu_reg_mem);
+%}
+
+// Load Unsigned Byte (8 bit UNsigned) into Long Register
+instruct loadUB2L(eRegL dst, memory mem)
+%{
+  match(Set dst (ConvI2L (LoadUB mem)));
+
+  ins_cost(250);
+  format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
+            "XOR    $dst.hi,$dst.hi" %}
+
+  ins_encode %{
+    __ movzbl($dst$$Register, $mem$$Address);
+    __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
+  %}
+
+  ins_pipe(ialu_reg_mem);
+%}
+
+// Load Short (16bit signed)
+instruct loadS(eRegI dst, memory mem) %{
+  match(Set dst (LoadS mem));
+
+  ins_cost(125);
+  format %{ "MOVSX  $dst,$mem\t# short" %}
+
+  ins_encode %{
+    __ movswl($dst$$Register, $mem$$Address);
+  %}
+
+  ins_pipe(ialu_reg_mem);
+%}
+
+// Load Short (16bit signed) into Long Register
+instruct loadS2L(eRegL dst, memory mem) %{
+  match(Set dst (ConvI2L (LoadS mem)));
+
+  ins_cost(375);
+  format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
+            "MOV    $dst.hi,$dst.lo\n\t"
+            "SAR    $dst.hi,15" %}
+
+  ins_encode %{
+    __ movswl($dst$$Register, $mem$$Address);
+    __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
+    __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
+  %}
+
+  ins_pipe(ialu_reg_mem);
 %}
 
 // Load Unsigned Short/Char (16bit unsigned)
@@ -6418,10 +6489,30 @@
   match(Set dst (LoadUS mem));
 
   ins_cost(125);
-  format %{ "MOVZX  $dst,$mem" %}
-  opcode(0xB7, 0x0F);
-  ins_encode( OpcS, OpcP, RegMem(dst,mem));
-  ins_pipe( ialu_reg_mem );
+  format %{ "MOVZX  $dst,$mem\t# ushort/char -> int" %}
+
+  ins_encode %{
+    __ movzwl($dst$$Register, $mem$$Address);
+  %}
+
+  ins_pipe(ialu_reg_mem);
+%}
+
+// Load Unsigned Short/Char (16 bit UNsigned) into Long Register
+instruct loadUS2L(eRegL dst, memory mem)
+%{
+  match(Set dst (ConvI2L (LoadUS mem)));
+
+  ins_cost(250);
+  format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
+            "XOR    $dst.hi,$dst.hi" %}
+
+  ins_encode %{
+    __ movzwl($dst$$Register, $mem$$Address);
+    __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
+  %}
+
+  ins_pipe(ialu_reg_mem);
 %}
 
 // Load Integer
@@ -6429,10 +6520,47 @@
   match(Set dst (LoadI mem));
 
   ins_cost(125);
-  format %{ "MOV    $dst,$mem" %}
-  opcode(0x8B);
-  ins_encode( OpcP, RegMem(dst,mem));
-  ins_pipe( ialu_reg_mem );
+  format %{ "MOV    $dst,$mem\t# int" %}
+
+  ins_encode %{
+    __ movl($dst$$Register, $mem$$Address);
+  %}
+
+  ins_pipe(ialu_reg_mem);
+%}
+
+// Load Integer into Long Register
+instruct loadI2L(eRegL dst, memory mem) %{
+  match(Set dst (ConvI2L (LoadI mem)));
+
+  ins_cost(375);
+  format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
+            "MOV    $dst.hi,$dst.lo\n\t"
+            "SAR    $dst.hi,31" %}
+
+  ins_encode %{
+    __ movl($dst$$Register, $mem$$Address);
+    __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
+    __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
+  %}
+
+  ins_pipe(ialu_reg_mem);
+%}
+
+// Load Unsigned Integer into Long Register
+instruct loadUI2L(eRegL dst, memory mem) %{
+  match(Set dst (LoadUI2L mem));
+
+  ins_cost(250);
+  format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
+            "XOR    $dst.hi,$dst.hi" %}
+
+  ins_encode %{
+    __ movl($dst$$Register, $mem$$Address);
+    __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
+  %}
+
+  ins_pipe(ialu_reg_mem);
 %}
 
 // Load Long.  Cannot clobber address while loading, so restrict address
@@ -6442,11 +6570,17 @@
   match(Set dst (LoadL mem));
 
   ins_cost(250);
-  format %{ "MOV    $dst.lo,$mem\n\t"
+  format %{ "MOV    $dst.lo,$mem\t# long\n\t"
             "MOV    $dst.hi,$mem+4" %}
-  opcode(0x8B, 0x8B);
-  ins_encode( OpcP, RegMem(dst,mem), OpcS, RegMem_Hi(dst,mem));
-  ins_pipe( ialu_reg_long_mem );
+
+  ins_encode %{
+    Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, false);
+    Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, false);
+    __ movl($dst$$Register, Amemlo);
+    __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
+  %}
+
+  ins_pipe(ialu_reg_long_mem);
 %}
 
 // Volatile Load Long.  Must be atomic, so do 64-bit FILD
@@ -6521,17 +6655,6 @@
   ins_pipe( ialu_reg_mem );
 %}
 
-// Load Short (16bit signed)
-instruct loadS(eRegI dst, memory mem) %{
-  match(Set dst (LoadS mem));
-
-  ins_cost(125);
-  format %{ "MOVSX  $dst,$mem" %}
-  opcode(0xBF, 0x0F);
-  ins_encode( OpcS, OpcP, RegMem(dst,mem));
-  ins_pipe( ialu_reg_mem );
-%}
-
 // Load Double
 instruct loadD(regD dst, memory mem) %{
   predicate(UseSSE<=1);
@@ -7957,7 +8080,7 @@
     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
     if( os::is_MP() )
       __ lock();
-    __ cmpxchg8(Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp));
+    __ cmpxchg8($mem$$Address);
     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
   %}
   ins_pipe( pipe_cmpxchg );
@@ -11467,6 +11590,7 @@
 instruct convI2L_reg( eRegL dst, eRegI src, eFlagsReg cr) %{
   match(Set dst (ConvI2L src));
   effect(KILL cr);
+  ins_cost(375);
   format %{ "MOV    $dst.lo,$src\n\t"
             "MOV    $dst.hi,$src\n\t"
             "SAR    $dst.hi,31" %}
@@ -11478,6 +11602,7 @@
 instruct convI2L_reg_zex(eRegL dst, eRegI src, immL_32bits mask, eFlagsReg flags ) %{
   match(Set dst (AndL (ConvI2L src) mask) );
   effect( KILL flags );
+  ins_cost(250);
   format %{ "MOV    $dst.lo,$src\n\t"
             "XOR    $dst.hi,$dst.hi" %}
   opcode(0x33); // XOR
@@ -11489,6 +11614,7 @@
 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
   match(Set dst (AndL src mask) );
   effect( KILL flags );
+  ins_cost(250);
   format %{ "MOV    $dst.lo,$src.lo\n\t"
             "XOR    $dst.hi,$dst.hi\n\t" %}
   opcode(0x33); // XOR
diff -r 09f82af55c3e -r fe2441500281 src/cpu/x86/vm/x86_64.ad
--- a/src/cpu/x86/vm/x86_64.ad	Fri Mar 13 13:56:01 2009 -0700
+++ b/src/cpu/x86/vm/x86_64.ad	Fri Mar 13 17:06:44 2009 -0700
@@ -1,5 +1,5 @@
 //
-// Copyright 2003-2008 Sun Microsystems, Inc.  All Rights Reserved.
+// Copyright 2003-2009 Sun Microsystems, Inc.  All Rights Reserved.
 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 //
 // This code is free software; you can redistribute it and/or modify it
@@ -3462,14 +3462,12 @@
 
   enc_class movq_ld(regD dst, memory mem) %{
     MacroAssembler _masm(&cbuf);
-    Address madr = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp);
-    __ movq(as_XMMRegister($dst$$reg), madr);
+    __ movq($dst$$XMMRegister, $mem$$Address);
   %}
 
   enc_class movq_st(memory mem, regD src) %{
     MacroAssembler _masm(&cbuf);
-    Address madr = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp);
-    __ movq(madr, as_XMMRegister($src$$reg));
+    __ movq($mem$$Address, $src$$XMMRegister);
   %}
 
   enc_class pshufd_8x8(regF dst, regF src) %{
@@ -3765,8 +3763,8 @@
     masm.jcc(Assembler::zero, LENGTH_DIFF_LABEL);
 
     // Load first characters
-    masm.load_unsigned_word(rcx, Address(rbx, 0));
-    masm.load_unsigned_word(rdi, Address(rax, 0));
+    masm.load_unsigned_short(rcx, Address(rbx, 0));
+    masm.load_unsigned_short(rdi, Address(rax, 0));
 
     // Compare first characters
     masm.subl(rcx, rdi);
@@ -3796,8 +3794,8 @@
 
     // Compare the rest of the characters
     masm.bind(WHILE_HEAD_LABEL);
-    masm.load_unsigned_word(rcx, Address(rbx, rsi, Address::times_2, 0));
-    masm.load_unsigned_word(rdi, Address(rax, rsi, Address::times_2, 0));
+    masm.load_unsigned_short(rcx, Address(rbx, rsi, Address::times_2, 0));
+    masm.load_unsigned_short(rdi, Address(rax, rsi, Address::times_2, 0));
     masm.subl(rcx, rdi);
     masm.jcc(Assembler::notZero, POP_LABEL);
     masm.increment(rsi);
@@ -3854,8 +3852,8 @@
     masm.jcc(Assembler::zero, COMPARE_LOOP_HDR);
 
     // Compare 2-byte "tail" at end of arrays
-    masm.load_unsigned_word(tmp1Reg, Address(ary1Reg, resultReg, Address::times_4, base_offset));
-    masm.load_unsigned_word(tmp2Reg, Address(ary2Reg, resultReg, Address::times_4, base_offset));
+    masm.load_unsigned_short(tmp1Reg, Address(ary1Reg, resultReg, Address::times_4, base_offset));
+    masm.load_unsigned_short(tmp2Reg, Address(ary2Reg, resultReg, Address::times_4, base_offset));
     masm.cmpl(tmp1Reg, tmp2Reg);
     masm.jcc(Assembler::notEqual, FALSE_LABEL);
     masm.testl(resultReg, resultReg);
@@ -6031,70 +6029,88 @@
 
   ins_cost(125);
   format %{ "movsbl  $dst, $mem\t# byte" %}
-  opcode(0x0F, 0xBE);
-  ins_encode(REX_reg_mem(dst, mem), OpcP, OpcS, reg_mem(dst, mem));
+
+  ins_encode %{
+    __ movsbl($dst$$Register, $mem$$Address);
+  %}
+
   ins_pipe(ialu_reg_mem);
 %}
 
-// Load Byte (8 bit signed) into long
-// instruct loadB2L(rRegL dst, memory mem)
-// %{
-//   match(Set dst (ConvI2L (LoadB mem)));
-
-//   ins_cost(125);
-//   format %{ "movsbq  $dst, $mem\t# byte -> long" %}
-//   opcode(0x0F, 0xBE);
-//   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, OpcS, reg_mem(dst, mem));
-//   ins_pipe(ialu_reg_mem);
-// %}
-
-// Load Byte (8 bit UNsigned)
-instruct loadUB(rRegI dst, memory mem, immI_255 bytemask)
-%{
-  match(Set dst (AndI (LoadB mem) bytemask));
+// Load Byte (8 bit signed) into Long Register
+instruct loadB2L(rRegL dst, memory mem)
+%{
+  match(Set dst (ConvI2L (LoadB mem)));
+
+  ins_cost(125);
+  format %{ "movsbq  $dst, $mem\t# byte -> long" %}
+
+  ins_encode %{
+    __ movsbq($dst$$Register, $mem$$Address);
+  %}
+
+  ins_pipe(ialu_reg_mem);
+%}
+
+// Load Unsigned Byte (8 bit UNsigned)
+instruct loadUB(rRegI dst, memory mem)
+%{
+  match(Set dst (LoadUB mem));
 
   ins_cost(125);
   format %{ "movzbl  $dst, $mem\t# ubyte" %}
-  opcode(0x0F, 0xB6);
-  ins_encode(REX_reg_mem(dst, mem), OpcP, OpcS, reg_mem(dst, mem));
+
+  ins_encode %{
+    __ movzbl($dst$$Register, $mem$$Address);
+  %}
+
   ins_pipe(ialu_reg_mem);
 %}
 
-// Load Byte (8 bit UNsigned) into long
-// instruct loadUB2L(rRegL dst, memory mem, immI_255 bytemask)
-// %{
-//   match(Set dst (ConvI2L (AndI (LoadB mem) bytemask)));
-
-//   ins_cost(125);
-//   format %{ "movzbl  $dst, $mem\t# ubyte -> long" %}
-//   opcode(0x0F, 0xB6);
-//   ins_encode(REX_reg_mem(dst, mem), OpcP, OpcS, reg_mem(dst, mem));
-//   ins_pipe(ialu_reg_mem);
-// %}
+// Load Unsigned Byte (8 bit UNsigned) into Long Register
+instruct loadUB2L(rRegL dst, memory mem)
+%{
+  match(Set dst (ConvI2L (LoadUB mem)));
+
+  ins_cost(125);
+  format %{ "movzbq  $dst, $mem\t# ubyte -> long" %}
+
+  ins_encode %{
+    __ movzbq($dst$$Register, $mem$$Address);
+  %}
+
+  ins_pipe(ialu_reg_mem);
+%}
 
 // Load Short (16 bit signed)
 instruct loadS(rRegI dst, memory mem)
 %{
   match(Set dst (LoadS mem));
 
-  ins_cost(125); // XXX
+  ins_cost(125);
   format %{ "movswl $dst, $mem\t# short" %}
-  opcode(0x0F, 0xBF);
-  ins_encode(REX_reg_mem(dst, mem), OpcP, OpcS, reg_mem(dst, mem));
+
+  ins_encode %{
+    __ movswl($dst$$Register, $mem$$Address);
+  %}
+
   ins_pipe(ialu_reg_mem);
 %}
 
-// Load Short (16 bit signed) into long
-// instruct loadS2L(rRegL dst, memory mem)
-// %{
-//   match(Set dst (ConvI2L (LoadS mem)));
-
-//   ins_cost(125); // XXX
-//   format %{ "movswq $dst, $mem\t# short -> long" %}
-//   opcode(0x0F, 0xBF);
-//   ins_encode(REX_reg_mem_wide(dst, mem), OpcP, OpcS, reg_mem(dst, mem));
-//   ins_pipe(ialu_reg_mem);
-// %}
+// Load Short (16 bit signed) into Long Register
+instruct loadS2L(rRegL dst, memory mem)
+%{
+  match(Set dst (ConvI2L (LoadS mem)));
+
+  ins_cost(125);
+  format %{ "movswq $dst, $mem\t# short -> long" %}
+
+  ins_encode %{
+    __ movswq($dst$$Register, $mem$$Address);
+  %}
+
+  ins_pipe(ialu_reg_mem);
+%}
 
 // Load Unsigned Short/Char (16 bit UNsigned)
 instruct loadUS(rRegI dst, memory mem)
@@ -6103,32 +6119,71 @@
 
   ins_cost(125);
   format %{ "movzwl  $dst, $mem\t# ushort/char" %}
-  opcode(0x0F, 0xB7);
-  ins_encode(REX_reg_mem(dst, mem), OpcP, OpcS, reg_mem(dst, mem));
+
+  ins_encode %{
+    __ movzwl($dst$$Register, $mem$$Address);
+  %}
+
   ins_pipe(ialu_reg_mem);
 %}
 
-// Load Unsigned Short/Char (16 bit UNsigned) into long
-// instruct loadUS2L(rRegL dst, memory mem)
-// %{
-//   match(Set dst (ConvI2L (LoadUS mem)));
-
-//   ins_cost(125);
-//   format %{ "movzwl  $dst, $mem\t# ushort/char -> long" %}
-//   opcode(0x0F, 0xB7);
-//   ins_encode(REX_reg_mem(dst, mem), OpcP, OpcS, reg_mem(dst, mem));
-//   ins_pipe(ialu_reg_mem);
-// %}
+// Load Unsigned Short/Char (16 bit UNsigned) into Long Register
+instruct loadUS2L(rRegL dst, memory mem)
+%{
+  match(Set dst (ConvI2L (LoadUS mem)));
+
+  ins_cost(125);
+  format %{ "movzwq  $dst, $mem\t# ushort/char -> long" %}
+
+  ins_encode %{
+    __ movzwq($dst$$Register, $mem$$Address);
+  %}
+
+  ins_pipe(ialu_reg_mem);
+%}
 
 // Load Integer
 instruct loadI(rRegI dst, memory mem)
 %{
   match(Set dst (LoadI mem));
 
-  ins_cost(125); // XXX
+  ins_cost(125);
   format %{ "movl    $dst, $mem\t# int" %}
-  opcode(0x8B);
-  ins_encode(REX_reg_mem(dst, mem), OpcP, reg_mem(dst, mem));
+
+  ins_encode %{
+    __ movl($dst$$Register, $mem$$Address);
+  %}
+
+  ins_pipe(ialu_reg_mem);
+%}
+
+// Load Integer into Long Register
+instruct loadI2L(rRegL dst, memory mem)
+%{
+  match(Set dst (ConvI2L (LoadI mem)));
+
+  ins_cost(125);
+  format %{ "movslq  $dst, $mem\t# int -> long" %}
+
+  ins_encode %{
+    __ movslq($dst$$Register, $mem$$Address);
+  %}
+
+  ins_pipe(ialu_reg_mem);
+%}
+
+// Load Unsigned Integer into Long Register
+instruct loadUI2L(rRegL dst, memory mem)
+%{
+  match(Set dst (LoadUI2L mem));
+
+  ins_cost(125);
+  format %{ "movl    $dst, $mem\t# uint -> long" %}
+
+  ins_encode %{
+    __ movl($dst$$Register, $mem$$Address);
+  %}
+
   ins_pipe(ialu_reg_mem);
 %}
 
@@ -6137,10 +6192,13 @@
 %{
   match(Set dst (LoadL mem));
 
-  ins_cost(125); // XXX
+  ins_cost(125);
   format %{ "movq    $dst, $mem\t# long" %}
-  opcode(0x8B);
-  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
+
+  ins_encode %{
+    __ movq($dst$$Register, $mem$$Address);
+  %}
+
   ins_pipe(ialu_reg_mem); // XXX
 %}
 
@@ -10804,16 +10862,6 @@
 //   ins_pipe(ialu_reg_reg);
 // %}
 
-instruct convI2L_reg_mem(rRegL dst, memory src)
-%{
-  match(Set dst (ConvI2L (LoadI src)));
-
-  format %{ "movslq  $dst, $src\t# i2l" %}
-  opcode(0x63); // needs REX.W
-  ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst,src));
-  ins_pipe(ialu_reg_mem);
-%}
-
 // Zero-extend convert int to long
 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
 %{
diff -r 09f82af55c3e -r fe2441500281 src/share/vm/adlc/forms.cpp
--- a/src/share/vm/adlc/forms.cpp	Fri Mar 13 13:56:01 2009 -0700
+++ b/src/share/vm/adlc/forms.cpp	Fri Mar 13 17:06:44 2009 -0700
@@ -70,6 +70,7 @@
   else return (_iter <_cur-1 ? _names[++_iter] : NULL);
 }
 const char  *NameList::current() { return (_iter < _cur ? _names[_iter] : NULL); }
+const char  *NameList::peek(int skip) { return (_iter + skip < _cur ? _names[_iter + skip] : NULL); }
 
 // Return 'true' if current entry is signal
 bool  NameList::current_is_signal() {
@@ -248,11 +249,13 @@
 // True if 'opType', an ideal name, loads or stores.
 Form::DataType Form::is_load_from_memory(const char *opType) const {
   if( strcmp(opType,"LoadB")==0 )  return Form::idealB;
+  if( strcmp(opType,"LoadUB")==0 )  return Form::idealB;
   if( strcmp(opType,"LoadUS")==0 )  return Form::idealC;
   if( strcmp(opType,"LoadD")==0 )  return Form::idealD;
   if( strcmp(opType,"LoadD_unaligned")==0 )  return Form::idealD;
   if( strcmp(opType,"LoadF")==0 )  return Form::idealF;
   if( strcmp(opType,"LoadI")==0 )  return Form::idealI;
+  if( strcmp(opType,"LoadUI2L")==0 )  return Form::idealI;
   if( strcmp(opType,"LoadKlass")==0 )  return Form::idealP;
   if( strcmp(opType,"LoadNKlass")==0 ) return Form::idealN;
   if( strcmp(opType,"LoadL")==0 )  return Form::idealL;
diff -r 09f82af55c3e -r fe2441500281 src/share/vm/adlc/forms.hpp
--- a/src/share/vm/adlc/forms.hpp	Fri Mar 13 13:56:01 2009 -0700
+++ b/src/share/vm/adlc/forms.hpp	Fri Mar 13 17:06:44 2009 -0700
@@ -342,6 +342,7 @@
   void  reset();                   // Reset iteration
   const char *iter();              // after reset(), first element : else next
   const char *current();           // return current element in iteration.
+  const char *peek(int skip = 1);  // returns element + skip in iteration if there is one
 
   bool  current_is_signal();       // Return 'true' if current entry is signal
   bool  is_signal(const char *entry); // Return true if entry is a signal
diff -r 09f82af55c3e -r fe2441500281 src/share/vm/adlc/formssel.cpp
--- a/src/share/vm/adlc/formssel.cpp	Fri Mar 13 13:56:01 2009 -0700
+++ b/src/share/vm/adlc/formssel.cpp	Fri Mar 13 17:06:44 2009 -0700
@@ -3310,8 +3310,8 @@
   static const char *needs_ideal_memory_list[] = {
     "StoreI","StoreL","StoreP","StoreN","StoreD","StoreF" ,
     "StoreB","StoreC","Store" ,"StoreFP",
-    "LoadI" ,"LoadL", "LoadP" ,"LoadN", "LoadD" ,"LoadF"  ,
-    "LoadB" ,"LoadUS" ,"LoadS" ,"Load"   ,
+    "LoadI", "LoadUI2L", "LoadL", "LoadP" ,"LoadN", "LoadD" ,"LoadF"  ,
+    "LoadB" , "LoadUB", "LoadUS" ,"LoadS" ,"Load"   ,
     "Store4I","Store2I","Store2L","Store2D","Store4F","Store2F","Store16B",
     "Store8B","Store4B","Store8C","Store4C","Store2C",
     "Load4I" ,"Load2I" ,"Load2L" ,"Load2D" ,"Load4F" ,"Load2F" ,"Load16B" ,
@@ -3431,10 +3431,16 @@
     const InstructForm *form2_inst = form2 ? form2->is_instruction() : NULL;
     const char *name_left  = mRule2->_lChild ? mRule2->_lChild->_opType : NULL;
     const char *name_right = mRule2->_rChild ? mRule2->_rChild->_opType : NULL;
+    DataType data_type = Form::none;
+    if (form->is_operand()) {
+      // Make sure the loadX matches the type of the reg
+      data_type = form->ideal_to_Reg_type(form->is_operand()->ideal_type(globals));
+    }
     // Detect reg vs (loadX memory)
     if( form->is_cisc_reg(globals)
         && form2_inst
-        && (is_load_from_memory(mRule2->_opType) != Form::none) // reg vs. (load memory)
+        && data_type != Form::none
+        && (is_load_from_memory(mRule2->_opType) == data_type) // reg vs. (load memory)
         && (name_left != NULL)       // NOT (load)
         && (name_right == NULL) ) {  // NOT (load memory foo)
       const Form *form2_left = name_left ? globals[name_left] : NULL;
diff -r 09f82af55c3e -r fe2441500281 src/share/vm/adlc/output_c.cpp
--- a/src/share/vm/adlc/output_c.cpp	Fri Mar 13 13:56:01 2009 -0700
+++ b/src/share/vm/adlc/output_c.cpp	Fri Mar 13 17:06:44 2009 -0700
@@ -2139,8 +2139,59 @@
         // A subfield variable, '$$' prefix
         emit_field( rep_var );
       } else {
-        // A replacement variable, '$' prefix
-        emit_rep_var( rep_var );
+        if (_strings_to_emit.peek() != NULL &&
+            strcmp(_strings_to_emit.peek(), "$Address") == 0) {
+          fprintf(_fp, "Address::make_raw(");
+
+          emit_rep_var( rep_var );
+          fprintf(_fp,"->base(ra_,this,idx%d), ", _operand_idx);
+
+          _reg_status = LITERAL_ACCESSED;
+          emit_rep_var( rep_var );
+          fprintf(_fp,"->index(ra_,this,idx%d), ", _operand_idx);
+
+          _reg_status = LITERAL_ACCESSED;
+          emit_rep_var( rep_var );
+          fprintf(_fp,"->scale(), ");
+
+          _reg_status = LITERAL_ACCESSED;
+          emit_rep_var( rep_var );
+          Form::DataType stack_type = _operand ? _operand->is_user_name_for_sReg() : Form::none;
+          if( _operand  && _operand_idx==0 && stack_type != Form::none ) {
+            fprintf(_fp,"->disp(ra_,this,0), ");
+          } else {
+            fprintf(_fp,"->disp(ra_,this,idx%d), ", _operand_idx);
+          }
+
+          _reg_status = LITERAL_ACCESSED;
+          emit_rep_var( rep_var );
+          fprintf(_fp,"->disp_is_oop())");
+
+          // skip trailing $Address
+          _strings_to_emit.iter();
+        } else {
+          // A replacement variable, '$' prefix
+          const char* next = _strings_to_emit.peek();
+          const char* next2 = _strings_to_emit.peek(2);
+          if (next != NULL && next2 != NULL && strcmp(next2, "$Register") == 0 &&
+              (strcmp(next, "$base") == 0 || strcmp(next, "$index") == 0)) {
+            // handle $rev_var$$base$$Register and $rev_var$$index$$Register by
+            // producing as_Register(opnd_array(#)->base(ra_,this,idx1)).
+            fprintf(_fp, "as_Register(");
+            // emit the operand reference
+            emit_rep_var( rep_var );
+            rep_var = _strings_to_emit.iter();
+            assert(strcmp(rep_var, "$base") == 0 || strcmp(rep_var, "$index") == 0, "bad pattern");
+            // handle base or index
+            emit_field(rep_var);
+            rep_var = _strings_to_emit.iter();
+            assert(strcmp(rep_var, "$Register") == 0, "bad pattern");
+            // close up the parens
+            fprintf(_fp, ")");
+          } else {
+            emit_rep_var( rep_var );
+          }
+        }
       } // end replacement and/or subfield
     }
   }
diff -r 09f82af55c3e -r fe2441500281 src/share/vm/asm/assembler.cpp
--- a/src/share/vm/asm/assembler.cpp	Fri Mar 13 13:56:01 2009 -0700
+++ b/src/share/vm/asm/assembler.cpp	Fri Mar 13 17:06:44 2009 -0700
@@ -239,6 +239,78 @@
   }
 }
 
+struct DelayedConstant {
+  typedef void (*value_fn_t)();
+  BasicType type;
+  intptr_t value;
+  value_fn_t value_fn;
+  // This limit of 20 is generous for initial uses.
+  // The limit needs to be large enough to store the field offsets
+  // into classes which do not have statically fixed layouts.
+  // (Initial use is for method handle object offsets.)
+  // Look for uses of "delayed_value" in the source code
+  // and make sure this number is generous enough to handle all of them.
+  enum { DC_LIMIT = 20 };
+  static DelayedConstant delayed_constants[DC_LIMIT];
+  static DelayedConstant* add(BasicType type, value_fn_t value_fn);
+  bool match(BasicType t, value_fn_t cfn) {
+    return type == t && value_fn == cfn;
+  }
+  static void update_all();
+};
+
+DelayedConstant DelayedConstant::delayed_constants[DC_LIMIT];
+// Default C structure initialization rules have the following effect here:
+// = { { (BasicType)0, (intptr_t)NULL }, ... };
+
+DelayedConstant* DelayedConstant::add(BasicType type,
+                                      DelayedConstant::value_fn_t cfn) {
+  for (int i = 0; i < DC_LIMIT; i++) {
+    DelayedConstant* dcon = &delayed_constants[i];
+    if (dcon->match(type, cfn))
+      return dcon;
+    if (dcon->value_fn == NULL) {
+      // (cmpxchg not because this is multi-threaded but because I'm paranoid)
+      if (Atomic::cmpxchg_ptr(CAST_FROM_FN_PTR(void*, cfn), &dcon->value_fn, NULL) == NULL) {
+        dcon->type = type;
+        return dcon;
+      }
+    }
+  }
+  // If this assert is hit (in pre-integration testing!) then re-evaluate
+  // the comment on the definition of DC_LIMIT.
+  guarantee(false, "too many delayed constants");
+  return NULL;
+}
+
+void DelayedConstant::update_all() {
+  for (int i = 0; i < DC_LIMIT; i++) {
+    DelayedConstant* dcon = &delayed_constants[i];
+    if (dcon->value_fn != NULL && dcon->value == 0) {
+      typedef int     (*int_fn_t)();
+      typedef address (*address_fn_t)();
+      switch (dcon->type) {
+      case T_INT:     dcon->value = (intptr_t) ((int_fn_t)    dcon->value_fn)(); break;
+      case T_ADDRESS: dcon->value = (intptr_t) ((address_fn_t)dcon->value_fn)(); break;
+      }
+    }
+  }
+}
+
+intptr_t* AbstractAssembler::delayed_value_addr(int(*value_fn)()) {
+  DelayedConstant* dcon = DelayedConstant::add(T_INT, (DelayedConstant::value_fn_t) value_fn);
+  return &dcon->value;
+}
+intptr_t* AbstractAssembler::delayed_value_addr(address(*value_fn)()) {
+  DelayedConstant* dcon = DelayedConstant::add(T_ADDRESS, (DelayedConstant::value_fn_t) value_fn);
+  return &dcon->value;
+}
+void AbstractAssembler::update_delayed_values() {
+  DelayedConstant::update_all();
+}
+
+
+
 
 void AbstractAssembler::block_comment(const char* comment) {
   if (sect() == CodeBuffer::SECT_INSTS) {
diff -r 09f82af55c3e -r fe2441500281 src/share/vm/asm/assembler.hpp
--- a/src/share/vm/asm/assembler.hpp	Fri Mar 13 13:56:01 2009 -0700
+++ b/src/share/vm/asm/assembler.hpp	Fri Mar 13 17:06:44 2009 -0700
@@ -140,6 +140,28 @@
   }
 };
 
+// A union type for code which has to assemble both constant and
+// non-constant operands, when the distinction cannot be made
+// statically.
+class RegisterConstant VALUE_OBJ_CLASS_SPEC {
+ private:
+  Register _r;
+  intptr_t _c;
+
+ public:
+  RegisterConstant(): _r(noreg), _c(0) {}
+  RegisterConstant(Register r): _r(r), _c(0) {}
+  RegisterConstant(intptr_t c): _r(noreg), _c(c) {}
+
+  Register as_register() const { assert(is_register(),""); return _r; }
+  intptr_t as_constant() const { assert(is_constant(),""); return _c; }
+
+  Register register_or_noreg() const { return _r; }
+  intptr_t constant_or_zero() const  { return _c; }
+
+  bool is_register() const { return _r != noreg; }
+  bool is_constant() const { return _r == noreg; }
+};
 
 // The Abstract Assembler: Pure assembler doing NO optimizations on the
 // instruction level; i.e., what you write is what you get.
@@ -280,6 +302,26 @@
   inline address address_constant(Label& L);
   inline address address_table_constant(GrowableArray<Label*> label);
 
+  // Bootstrapping aid to cope with delayed determination of constants.
+  // Returns a static address which will eventually contain the constant.
+  // The value zero (NULL) stands instead of a constant which is still uncomputed.
+  // Thus, the eventual value of the constant must not be zero.
+  // This is fine, since this is designed for embedding object field
+  // offsets in code which must be generated before the object class is loaded.
+  // Field offsets are never zero, since an object's header (mark word)
+  // is located at offset zero.
+  RegisterConstant delayed_value(int(*value_fn)(), Register tmp, int offset = 0) {
+    return delayed_value(delayed_value_addr(value_fn), tmp, offset);
+  }
+  RegisterConstant delayed_value(address(*value_fn)(), Register tmp, int offset = 0) {
+    return delayed_value(delayed_value_addr(value_fn), tmp, offset);
+  }
+  virtual RegisterConstant delayed_value(intptr_t* delayed_value_addr, Register tmp, int offset) = 0;
+  // Last overloading is platform-dependent; look in assembler_<arch>.cpp.
+  static intptr_t* delayed_value_addr(int(*constant_fn)());
+  static intptr_t* delayed_value_addr(address(*constant_fn)());
+  static void update_delayed_values();
+
   // Bang stack to trigger StackOverflowError at a safe location
   // implementation delegates to machine-specific bang_stack_with_offset
   void generate_stack_overflow_check( int frame_size_in_bytes );
diff -r 09f82af55c3e -r fe2441500281 src/share/vm/opto/classes.hpp
--- a/src/share/vm/opto/classes.hpp	Fri Mar 13 13:56:01 2009 -0700
+++ b/src/share/vm/opto/classes.hpp	Fri Mar 13 17:06:44 2009 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright 1997-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 1997-2009 Sun Microsystems, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -129,11 +129,13 @@
 macro(LShiftI)
 macro(LShiftL)
 macro(LoadB)
+macro(LoadUB)
 macro(LoadUS)
 macro(LoadD)
 macro(LoadD_unaligned)
 macro(LoadF)
 macro(LoadI)
+macro(LoadUI2L)
 macro(LoadKlass)
 macro(LoadNKlass)
 macro(LoadL)
diff -r 09f82af55c3e -r fe2441500281 src/share/vm/opto/compile.cpp
--- a/src/share/vm/opto/compile.cpp	Fri Mar 13 13:56:01 2009 -0700
+++ b/src/share/vm/opto/compile.cpp	Fri Mar 13 17:06:44 2009 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright 1997-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 1997-2009 Sun Microsystems, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -2005,8 +2005,10 @@
   case Op_StoreP:
   case Op_StoreN:
   case Op_LoadB:
+  case Op_LoadUB:
   case Op_LoadUS:
   case Op_LoadI:
+  case Op_LoadUI2L:
   case Op_LoadKlass:
   case Op_LoadNKlass:
   case Op_LoadL:
diff -r 09f82af55c3e -r fe2441500281 src/share/vm/opto/gcm.cpp
--- a/src/share/vm/opto/gcm.cpp	Fri Mar 13 13:56:01 2009 -0700
+++ b/src/share/vm/opto/gcm.cpp	Fri Mar 13 17:06:44 2009 -0700
@@ -1901,7 +1901,8 @@
   for (int i = 0; i < _members.length(); i++) {
     CFGElement* s = _members.at(i);
     float block_freq = s->_freq * loop_freq;
-    if (block_freq < MIN_BLOCK_FREQUENCY) block_freq = MIN_BLOCK_FREQUENCY;
+    if (g_isnan(block_freq) || block_freq < MIN_BLOCK_FREQUENCY)
+      block_freq = MIN_BLOCK_FREQUENCY;
     s->_freq = block_freq;
   }
   CFGLoop* ch = _child;
diff -r 09f82af55c3e -r fe2441500281 src/share/vm/opto/memnode.cpp
--- a/src/share/vm/opto/memnode.cpp	Fri Mar 13 13:56:01 2009 -0700
+++ b/src/share/vm/opto/memnode.cpp	Fri Mar 13 17:06:44 2009 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright 1997-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 1997-2009 Sun Microsystems, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -778,7 +778,7 @@
            adr_type->offset() == arrayOopDesc::length_offset_in_bytes()),
          "use LoadRangeNode instead");
   switch (bt) {
-  case T_BOOLEAN:
+  case T_BOOLEAN: return new (C, 3) LoadUBNode(ctl, mem, adr, adr_type, rt->is_int()    );
   case T_BYTE:    return new (C, 3) LoadBNode (ctl, mem, adr, adr_type, rt->is_int()    );
   case T_INT:     return new (C, 3) LoadINode (ctl, mem, adr, adr_type, rt->is_int()    );
   case T_CHAR:    return new (C, 3) LoadUSNode(ctl, mem, adr, adr_type, rt->is_int()    );
@@ -1616,6 +1616,22 @@
   return LoadNode::Ideal(phase, can_reshape);
 }
 
+//--------------------------LoadUBNode::Ideal-------------------------------------
+//
+//  If the previous store is to the same address as this load,
+//  and the value stored was larger than a byte, replace this load
+//  with the value stored truncated to a byte.  If no truncation is
+//  needed, the replacement is done in LoadNode::Identity().
+//
+Node* LoadUBNode::Ideal(PhaseGVN* phase, bool can_reshape) {
+  Node* mem = in(MemNode::Memory);
+  Node* value = can_see_stored_value(mem, phase);
+  if (value && !phase->type(value)->higher_equal(_type))
+    return new (phase->C, 3) AndINode(value, phase->intcon(0xFF));
+  // Identity call will handle the case where truncation is not needed.
+  return LoadNode::Ideal(phase, can_reshape);
+}
+
 //--------------------------LoadUSNode::Ideal-------------------------------------
 //
 //  If the previous store is to the same address as this load,
diff -r 09f82af55c3e -r fe2441500281 src/share/vm/opto/memnode.hpp
--- a/src/share/vm/opto/memnode.hpp	Fri Mar 13 13:56:01 2009 -0700
+++ b/src/share/vm/opto/memnode.hpp	Fri Mar 13 17:06:44 2009 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright 1997-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 1997-2009 Sun Microsystems, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -207,6 +207,19 @@
   virtual BasicType memory_type() const { return T_BYTE; }
 };
 
+//------------------------------LoadUBNode-------------------------------------
+// Load a unsigned byte (8bits unsigned) from memory
+class LoadUBNode : public LoadNode {
+public:
+  LoadUBNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt* ti = TypeInt::UBYTE )
+    : LoadNode(c, mem, adr, at, ti) {}
+  virtual int Opcode() const;
+  virtual uint ideal_reg() const { return Op_RegI; }
+  virtual Node* Ideal(PhaseGVN *phase, bool can_reshape);
+  virtual int store_Opcode() const { return Op_StoreB; }
+  virtual BasicType memory_type() const { return T_BYTE; }
+};
+
 //------------------------------LoadUSNode-------------------------------------
 // Load an unsigned short/char (16bits unsigned) from memory
 class LoadUSNode : public LoadNode {
@@ -232,6 +245,18 @@
   virtual BasicType memory_type() const { return T_INT; }
 };
 
+//------------------------------LoadUI2LNode-----------------------------------
+// Load an unsigned integer into long from memory
+class LoadUI2LNode : public LoadNode {
+public:
+  LoadUI2LNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeLong* t = TypeLong::UINT)
+    : LoadNode(c, mem, adr, at, t) {}
+  virtual int Opcode() const;
+  virtual uint ideal_reg() const { return Op_RegL; }
+  virtual int store_Opcode() const { return Op_StoreL; }
+  virtual BasicType memory_type() const { return T_LONG; }
+};
+
 //------------------------------LoadRangeNode----------------------------------
 // Load an array length from the array
 class LoadRangeNode : public LoadINode {
diff -r 09f82af55c3e -r fe2441500281 src/share/vm/opto/mulnode.cpp
--- a/src/share/vm/opto/mulnode.cpp	Fri Mar 13 13:56:01 2009 -0700
+++ b/src/share/vm/opto/mulnode.cpp	Fri Mar 13 17:06:44 2009 -0700
@@ -486,20 +486,23 @@
     return new (phase->C, 3) AndINode(ldus, phase->intcon(mask&0xFFFF));
   }
 
-  // Masking sign bits off of a Byte?  Let the matcher use an unsigned load
-  if( lop == Op_LoadB &&
-      (!in(0) && load->in(0)) &&
-      (mask == 0x000000FF) ) {
-    // Associate this node with the LoadB, so the matcher can see them together.
-    // If we don't do this, it is common for the LoadB to have one control
-    // edge, and the store or call containing this AndI to have a different
-    // control edge.  This will cause Label_Root to group the AndI with
-    // the encoding store or call, so the matcher has no chance to match
-    // this AndI together with the LoadB.  Setting the control edge here
-    // prevents Label_Root from grouping the AndI with the store or call,
-    // if it has a control edge that is inconsistent with the LoadB.
-    set_req(0, load->in(0));
-    return this;
+  // Masking sign bits off of a Byte?  Do an unsigned byte load.
+  if (lop == Op_LoadB && mask == 0x000000FF) {
+    return new (phase->C, 3) LoadUBNode(load->in(MemNode::Control),
+                                        load->in(MemNode::Memory),
+                                        load->in(MemNode::Address),
+                                        load->adr_type());
+  }
+
+  // Masking sign bits off of a Byte plus additional lower bits?  Do
+  // an unsigned byte load plus an and.
+  if (lop == Op_LoadB && (mask & 0xFFFFFF00) == 0) {
+    Node* ldub = new (phase->C, 3) LoadUBNode(load->in(MemNode::Control),
+                                              load->in(MemNode::Memory),
+                                              load->in(MemNode::Address),
+                                              load->adr_type());
+    ldub = phase->transform(ldub);
+    return new (phase->C, 3) AndINode(ldub, phase->intcon(mask));
   }
 
   // Masking off sign bits?  Dont make them!
@@ -599,12 +602,21 @@
   if( !t2 || !t2->is_con() ) return MulNode::Ideal(phase, can_reshape);
   const jlong mask = t2->get_con();
 
-  Node *rsh = in(1);
-  uint rop = rsh->Opcode();
+  Node* in1 = in(1);
+  uint op = in1->Opcode();
+
+  // Masking sign bits off of an integer?  Do an unsigned integer to long load.
+  if (op == Op_ConvI2L && in1->in(1)->Opcode() == Op_LoadI && mask == 0x00000000FFFFFFFFL) {
+    Node* load = in1->in(1);
+    return new (phase->C, 3) LoadUI2LNode(load->in(MemNode::Control),
+                                          load->in(MemNode::Memory),
+                                          load->in(MemNode::Address),
+                                          load->adr_type());
+  }
 
   // Masking off sign bits?  Dont make them!
-  if( rop == Op_RShiftL ) {
-    const TypeInt *t12 = phase->type(rsh->in(2))->isa_int();
+  if (op == Op_RShiftL) {
+    const TypeInt *t12 = phase->type(in1->in(2))->isa_int();
     if( t12 && t12->is_con() ) { // Shift is by a constant
       int shift = t12->get_con();
       shift &= BitsPerJavaLong - 1;  // semantics of Java shifts
@@ -613,7 +625,7 @@
       // bits survive.  NO sign-extension bits survive the maskings.
       if( (sign_bits_mask & mask) == 0 ) {
         // Use zero-fill shift instead
-        Node *zshift = phase->transform(new (phase->C, 3) URShiftLNode(rsh->in(1),rsh->in(2)));
+        Node *zshift = phase->transform(new (phase->C, 3) URShiftLNode(in1->in(1), in1->in(2)));
         return new (phase->C, 3) AndLNode( zshift, in(2) );
       }
     }
diff -r 09f82af55c3e -r fe2441500281 src/share/vm/opto/type.cpp
--- a/src/share/vm/opto/type.cpp	Fri Mar 13 13:56:01 2009 -0700
+++ b/src/share/vm/opto/type.cpp	Fri Mar 13 17:06:44 2009 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright 1997-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 1997-2009 Sun Microsystems, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -226,6 +226,7 @@
   TypeInt::CC_LE   = TypeInt::make(-1, 0, WidenMin);
   TypeInt::CC_GE   = TypeInt::make( 0, 1, WidenMin);  // == TypeInt::BOOL
   TypeInt::BYTE    = TypeInt::make(-128,127,     WidenMin); // Bytes
+  TypeInt::UBYTE   = TypeInt::make(0, 255,       WidenMin); // Unsigned Bytes
   TypeInt::CHAR    = TypeInt::make(0,65535,      WidenMin); // Java chars
   TypeInt::SHORT   = TypeInt::make(-32768,32767, WidenMin); // Java shorts
   TypeInt::POS     = TypeInt::make(0,max_jint,   WidenMin); // Non-neg values
@@ -1022,6 +1023,7 @@
 const TypeInt *TypeInt::CC_LE;  // [-1,0]
 const TypeInt *TypeInt::CC_GE;  // [0,1] == BOOL (!)
 const TypeInt *TypeInt::BYTE;   // Bytes, -128 to 127
+const TypeInt *TypeInt::UBYTE;  // Unsigned Bytes, 0 to 255
 const TypeInt *TypeInt::CHAR;   // Java chars, 0-65535
 const TypeInt *TypeInt::SHORT;  // Java shorts, -32768-32767
 const TypeInt *TypeInt::POS;    // Positive 32-bit integers or zero
diff -r 09f82af55c3e -r fe2441500281 src/share/vm/opto/type.hpp
--- a/src/share/vm/opto/type.hpp	Fri Mar 13 13:56:01 2009 -0700
+++ b/src/share/vm/opto/type.hpp	Fri Mar 13 17:06:44 2009 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright 1997-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 1997-2009 Sun Microsystems, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -415,6 +415,7 @@
   static const TypeInt *CC_LE;  // [-1,0]
   static const TypeInt *CC_GE;  // [0,1] == BOOL (!)
   static const TypeInt *BYTE;
+  static const TypeInt *UBYTE;
   static const TypeInt *CHAR;
   static const TypeInt *SHORT;
   static const TypeInt *POS;
diff -r 09f82af55c3e -r fe2441500281 test/compiler/6797305/Test6797305.java
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/6797305/Test6797305.java	Fri Mar 13 17:06:44 2009 -0700
@@ -0,0 +1,114 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+/**
+ * @test
+ * @bug 6797305
+ * @summary Add LoadUB and LoadUI opcode class
+ *
+ * @run main/othervm -Xcomp -XX:CompileOnly=Test6797305.loadB,Test6797305.loadB2L,Test6797305.loadUB,Test6797305.loadUBmask,Test6797305.loadUB2L,Test6797305.loadS,Test6797305.loadS2L,Test6797305.loadUS,Test6797305.loadUSmask,Test6797305.loadUS2L,Test6797305.loadI,Test6797305.loadI2L,Test6797305.loadUI2L,Test6797305.loadL Test6797305
+ */
+
+public class Test6797305 {
+    static final byte[]  ba = new byte[]  { -1 };
+    static final short[] sa = new short[] { -1 };
+    static final int[]   ia = new int[]   { -1 };
+    static final long[]  la = new long[]  { -1 };
+
+    public static void main(String[] args)
+    {
+        long b = loadB(ba);
+        if (b != -1)
+            throw new InternalError("loadB failed: " + b + " != " + -1);
+
+        long b2l = loadB2L(ba);
+        if (b2l != -1L)
+            throw new InternalError("loadB2L failed: " + b2l + " != " + -1L);
+
+        int ub = loadUB(ba);
+        if (ub != 0xFF)
+            throw new InternalError("loadUB failed: " + ub + " != " + 0xFF);
+
+        int ubmask = loadUBmask(ba);
+        if (ubmask != 0xFE)
+            throw new InternalError("loadUBmask failed: " + ubmask + " != " + 0xFE);
+
+        long ub2l = loadUB2L(ba);
+        if (ub2l != 0xFFL)
+            throw new InternalError("loadUB2L failed: " + ub2l + " != " + 0xFFL);
+
+        int s = loadS(sa);
+        if (s != -1)
+            throw new InternalError("loadS failed: " + s + " != " + -1);
+
+        long s2l = loadS2L(sa);
+        if (s2l != -1L)
+            throw new InternalError("loadS2L failed: " + s2l + " != " + -1L);
+
+        int us = loadUS(sa);
+        if (us != 0xFFFF)
+            throw new InternalError("loadUS failed: " + us + " != " + 0xFFFF);
+
+        int usmask = loadUSmask(sa);
+        if (usmask != 0xFFFE)
+            throw new InternalError("loadUBmask failed: " + ubmask + " != " + 0xFFFE);
+
+        long us2l = loadUS2L(sa);
+        if (us2l != 0xFFFFL)
+            throw new InternalError("loadUS2L failed: " + us2l + " != " + 0xFFFFL);
+
+        int i = loadI(ia);
+        if (i != -1)
+            throw new InternalError("loadI failed: " + i + " != " + -1);
+
+        long i2l = loadI2L(ia);
+        if (i2l != -1L)
+            throw new InternalError("loadI2L failed: " + i2l + " != " + -1L);
+
+        long ui2l = loadUI2L(ia);
+        if (ui2l != 0xFFFFFFFFL)
+            throw new InternalError("loadUI2L failed: " + ui2l + " != " + 0xFFFFFFFFL);
+
+        long l = loadL(la);
+        if (l != -1L)
+            throw new InternalError("loadL failed: " + l + " != " + -1L);
+    }
+
+    static int  loadB     (byte[] ba)  { return ba[0];               }
+    static long loadB2L   (byte[] ba)  { return ba[0];               }
+    static int  loadUB    (byte[] ba)  { return ba[0] & 0xFF;        }
+    static int  loadUBmask(byte[] ba)  { return ba[0] & 0xFE;        }
+    static long loadUB2L  (byte[] ba)  { return ba[0] & 0xFF;        }
+
+    static int  loadS     (short[] sa) { return sa[0];               }
+    static long loadS2L   (short[] sa) { return sa[0];               }
+    static int  loadUS    (short[] sa) { return sa[0] & 0xFFFF;      }
+    static int  loadUSmask(short[] sa) { return sa[0] & 0xFFFE;      }
+    static long loadUS2L  (short[] sa) { return sa[0] & 0xFFFF;      }
+
+    static int  loadI     (int[] ia)   { return ia[0];               }
+    static long loadI2L   (int[] ia)   { return ia[0];               }
+    static long loadUI2L  (int[] ia)   { return ia[0] & 0xFFFFFFFFL; }
+
+    static long loadL     (long[] la)  { return la[0];               }
+}