# HG changeset patch
# User dholmes
# Date 1305638996 14400
# Node ID 8bec9b249a6e8c059006287e79336d6e1d53b413
# Parent  03b943e6c02530270d3d5fc8925c0011f115c610# Parent  c149193c768b8b7233da4c3a3fdc0756b975848e
Merge

diff -r 03b943e6c025 -r 8bec9b249a6e .hgtags
--- a/.hgtags	Sun May 15 23:57:15 2011 -0400
+++ b/.hgtags	Tue May 17 09:29:56 2011 -0400
@@ -170,3 +170,5 @@
 d283b82966712b353fa307845a1316da42a355f4 hs21-b10
 5d07913abd59261c77f24cc04a759cb75d804099 jdk7-b141
 3aea9e9feb073f5500e031be6186666bcae89aa2 hs21-b11
+9ad1548c6b63d596c411afc35147ffd5254426d9 jdk7-b142
+9ad1548c6b63d596c411afc35147ffd5254426d9 hs21-b12
diff -r 03b943e6c025 -r 8bec9b249a6e make/hotspot_version
--- a/make/hotspot_version	Sun May 15 23:57:15 2011 -0400
+++ b/make/hotspot_version	Tue May 17 09:29:56 2011 -0400
@@ -35,7 +35,7 @@
 
 HS_MAJOR_VER=21
 HS_MINOR_VER=0
-HS_BUILD_NUMBER=12
+HS_BUILD_NUMBER=13
 
 JDK_MAJOR_VER=1
 JDK_MINOR_VER=7
diff -r 03b943e6c025 -r 8bec9b249a6e src/cpu/sparc/vm/cppInterpreter_sparc.cpp
--- a/src/cpu/sparc/vm/cppInterpreter_sparc.cpp	Sun May 15 23:57:15 2011 -0400
+++ b/src/cpu/sparc/vm/cppInterpreter_sparc.cpp	Tue May 17 09:29:56 2011 -0400
@@ -2176,6 +2176,7 @@
                                            int tempcount, // Number of slots on java expression stack in use
                                            int popframe_extra_args,
                                            int moncount,  // Number of active monitors
+                                           int caller_actual_parameters,
                                            int callee_param_size,
                                            int callee_locals_size,
                                            frame* caller,
diff -r 03b943e6c025 -r 8bec9b249a6e src/cpu/sparc/vm/frame_sparc.cpp
--- a/src/cpu/sparc/vm/frame_sparc.cpp	Sun May 15 23:57:15 2011 -0400
+++ b/src/cpu/sparc/vm/frame_sparc.cpp	Tue May 17 09:29:56 2011 -0400
@@ -811,7 +811,7 @@
 #ifdef ASSERT
 
 #define DESCRIBE_FP_OFFSET(name) \
-  values.describe(-1, fp() + frame::name##_offset, #name)
+  values.describe(frame_no, fp() + frame::name##_offset, #name)
 
 void frame::describe_pd(FrameValues& values, int frame_no) {
   for (int w = 0; w < frame::register_save_words; w++) {
diff -r 03b943e6c025 -r 8bec9b249a6e src/cpu/sparc/vm/interpreter_sparc.cpp
--- a/src/cpu/sparc/vm/interpreter_sparc.cpp	Sun May 15 23:57:15 2011 -0400
+++ b/src/cpu/sparc/vm/interpreter_sparc.cpp	Tue May 17 09:29:56 2011 -0400
@@ -423,25 +423,6 @@
   return true;
 }
 
-// This method tells the deoptimizer how big an interpreted frame must be:
-int AbstractInterpreter::size_activation(methodOop method,
-                                         int tempcount,
-                                         int popframe_extra_args,
-                                         int moncount,
-                                         int callee_param_count,
-                                         int callee_locals,
-                                         bool is_top_frame) {
-  return layout_activation(method,
-                           tempcount,
-                           popframe_extra_args,
-                           moncount,
-                           callee_param_count,
-                           callee_locals,
-                           (frame*)NULL,
-                           (frame*)NULL,
-                           is_top_frame);
-}
-
 void Deoptimization::unwind_callee_save_values(frame* f, vframeArray* vframe_array) {
 
   // This code is sort of the equivalent of C2IAdapter::setup_stack_frame back in
diff -r 03b943e6c025 -r 8bec9b249a6e src/cpu/sparc/vm/methodHandles_sparc.cpp
--- a/src/cpu/sparc/vm/methodHandles_sparc.cpp	Sun May 15 23:57:15 2011 -0400
+++ b/src/cpu/sparc/vm/methodHandles_sparc.cpp	Tue May 17 09:29:56 2011 -0400
@@ -142,18 +142,8 @@
   Register O2_form    = O2_scratch;
   Register O3_adapter = O3_scratch;
   __ load_heap_oop(Address(O0_mtype, __ delayed_value(java_lang_invoke_MethodType::form_offset_in_bytes,               O1_scratch)), O2_form);
-  // load_heap_oop(Address(O2_form,  __ delayed_value(java_lang_invoke_MethodTypeForm::genericInvoker_offset_in_bytes, O1_scratch)), O3_adapter);
-  // deal with old JDK versions:
-  __ add(          Address(O2_form,  __ delayed_value(java_lang_invoke_MethodTypeForm::genericInvoker_offset_in_bytes, O1_scratch)), O3_adapter);
-  __ cmp(O3_adapter, O2_form);
-  Label sorry_no_invoke_generic;
-  __ brx(Assembler::lessUnsigned, false, Assembler::pn, sorry_no_invoke_generic);
-  __ delayed()->nop();
-
-  __ load_heap_oop(Address(O3_adapter, 0), O3_adapter);
-  __ tst(O3_adapter);
-  __ brx(Assembler::zero, false, Assembler::pn, sorry_no_invoke_generic);
-  __ delayed()->nop();
+  __ load_heap_oop(Address(O2_form,  __ delayed_value(java_lang_invoke_MethodTypeForm::genericInvoker_offset_in_bytes, O1_scratch)), O3_adapter);
+  __ verify_oop(O3_adapter);
   __ st_ptr(O3_adapter, Address(O4_argbase, 1 * Interpreter::stackElementSize));
   // As a trusted first argument, pass the type being called, so the adapter knows
   // the actual types of the arguments and return values.
@@ -164,12 +154,6 @@
   trace_method_handle(_masm, "invokeGeneric");
   __ jump_to_method_handle_entry(G3_method_handle, O1_scratch);
 
-  __ bind(sorry_no_invoke_generic); // no invokeGeneric implementation available!
-  __ mov(O0_mtype, G5_method_type);  // required by throw_WrongMethodType
-  // mov(G3_method_handle, G3_method_handle);  // already in this register
-  __ jump_to(AddressLiteral(Interpreter::throw_WrongMethodType_entry()), O1_scratch);
-  __ delayed()->nop();
-
   return entry_point;
 }
 
diff -r 03b943e6c025 -r 8bec9b249a6e src/cpu/sparc/vm/templateInterpreter_sparc.cpp
--- a/src/cpu/sparc/vm/templateInterpreter_sparc.cpp	Sun May 15 23:57:15 2011 -0400
+++ b/src/cpu/sparc/vm/templateInterpreter_sparc.cpp	Tue May 17 09:29:56 2011 -0400
@@ -1623,6 +1623,7 @@
                                            int tempcount,
                                            int popframe_extra_args,
                                            int moncount,
+                                           int caller_actual_parameters,
                                            int callee_param_count,
                                            int callee_local_count,
                                            frame* caller,
@@ -1698,21 +1699,35 @@
                      popframe_extra_args;
 
     int local_words = method->max_locals() * Interpreter::stackElementWords;
+    NEEDS_CLEANUP;
     intptr_t* locals;
-    if (caller->is_compiled_frame()) {
-      // Compiled frames do not allocate a varargs area so place them
-      // next to the register save area.
-      locals = fp + frame::register_save_words + local_words - 1;
-      // Caller wants his own SP back
-      int caller_frame_size = caller->cb()->frame_size();
-      *interpreter_frame->register_addr(I5_savedSP) = (intptr_t)(caller->fp() - caller_frame_size) - STACK_BIAS;
+    if (caller->is_interpreted_frame()) {
+      // Can force the locals area to end up properly overlapping the top of the expression stack.
+      intptr_t* Lesp_ptr = caller->interpreter_frame_tos_address() - 1;
+      // Note that this computation means we replace size_of_parameters() values from the caller
+      // interpreter frame's expression stack with our argument locals
+      int parm_words  = caller_actual_parameters * Interpreter::stackElementWords;
+      locals = Lesp_ptr + parm_words;
+      int delta = local_words - parm_words;
+      int computed_sp_adjustment = (delta > 0) ? round_to(delta, WordsPerLong) : 0;
+      *interpreter_frame->register_addr(I5_savedSP)    = (intptr_t) (fp + computed_sp_adjustment) - STACK_BIAS;
     } else {
-      assert(caller->is_interpreted_frame() || caller->is_entry_frame(), "only possible cases");
-      // The entry and interpreter frames are laid out like normal C
-      // frames so place the locals adjacent to the varargs area.
-      locals = fp + frame::memory_parameter_word_sp_offset + local_words - 1;
-      if (caller->is_interpreted_frame()) {
-        *interpreter_frame->register_addr(I5_savedSP)    = (intptr_t) (fp + rounded_cls) - STACK_BIAS;
+      assert(caller->is_compiled_frame() || caller->is_entry_frame(), "only possible cases");
+      // Don't have Lesp available; lay out locals block in the caller
+      // adjacent to the register window save area.
+      //
+      // Compiled frames do not allocate a varargs area which is why this if
+      // statement is needed.
+      //
+      if (caller->is_compiled_frame()) {
+        locals = fp + frame::register_save_words + local_words - 1;
+      } else {
+        locals = fp + frame::memory_parameter_word_sp_offset + local_words - 1;
+      }
+      if (!caller->is_entry_frame()) {
+        // Caller wants his own SP back
+        int caller_frame_size = caller->cb()->frame_size();
+        *interpreter_frame->register_addr(I5_savedSP) = (intptr_t)(caller->fp() - caller_frame_size) - STACK_BIAS;
       }
     }
     if (TraceDeoptimization) {
diff -r 03b943e6c025 -r 8bec9b249a6e src/cpu/x86/vm/assembler_x86.hpp
--- a/src/cpu/x86/vm/assembler_x86.hpp	Sun May 15 23:57:15 2011 -0400
+++ b/src/cpu/x86/vm/assembler_x86.hpp	Tue May 17 09:29:56 2011 -0400
@@ -234,6 +234,20 @@
     a._disp += disp;
     return a;
   }
+  Address plus_disp(RegisterOrConstant disp, ScaleFactor scale = times_1) const {
+    Address a = (*this);
+    a._disp += disp.constant_or_zero() * scale_size(scale);
+    if (disp.is_register()) {
+      assert(!a.index()->is_valid(), "competing indexes");
+      a._index = disp.as_register();
+      a._scale = scale;
+    }
+    return a;
+  }
+  bool is_same_address(Address a) const {
+    // disregard _rspec
+    return _base == a._base && _disp == a._disp && _index == a._index && _scale == a._scale;
+  }
 
   // The following two overloads are used in connection with the
   // ByteSize type (see sizes.hpp).  They simplify the use of
@@ -2029,6 +2043,10 @@
   void addptr(Register dst, Address src) { LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)); }
   void addptr(Register dst, int32_t src);
   void addptr(Register dst, Register src);
+  void addptr(Register dst, RegisterOrConstant src) {
+    if (src.is_constant()) addptr(dst, (int) src.as_constant());
+    else                   addptr(dst,       src.as_register());
+  }
 
   void andptr(Register dst, int32_t src);
   void andptr(Register src1, Register src2) { LP64_ONLY(andq(src1, src2)) NOT_LP64(andl(src1, src2)) ; }
@@ -2090,7 +2108,10 @@
   void subptr(Register dst, Address src) { LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src)); }
   void subptr(Register dst, int32_t src);
   void subptr(Register dst, Register src);
-
+  void subptr(Register dst, RegisterOrConstant src) {
+    if (src.is_constant()) subptr(dst, (int) src.as_constant());
+    else                   subptr(dst,       src.as_register());
+  }
 
   void sbbptr(Address dst, int32_t src) { LP64_ONLY(sbbq(dst, src)) NOT_LP64(sbbl(dst, src)); }
   void sbbptr(Register dst, int32_t src) { LP64_ONLY(sbbq(dst, src)) NOT_LP64(sbbl(dst, src)); }
@@ -2288,6 +2309,11 @@
 
   void movptr(Address dst, Register src);
 
+  void movptr(Register dst, RegisterOrConstant src) {
+    if (src.is_constant()) movptr(dst, src.as_constant());
+    else                   movptr(dst, src.as_register());
+  }
+
 #ifdef _LP64
   // Generally the next two are only used for moving NULL
   // Although there are situations in initializing the mark word where
diff -r 03b943e6c025 -r 8bec9b249a6e src/cpu/x86/vm/cppInterpreter_x86.cpp
--- a/src/cpu/x86/vm/cppInterpreter_x86.cpp	Sun May 15 23:57:15 2011 -0400
+++ b/src/cpu/x86/vm/cppInterpreter_x86.cpp	Tue May 17 09:29:56 2011 -0400
@@ -2339,14 +2339,15 @@
 }
 
 int AbstractInterpreter::layout_activation(methodOop method,
-                                                int tempcount,  //
-                                                int popframe_extra_args,
-                                                int moncount,
-                                                int callee_param_count,
-                                                int callee_locals,
-                                                frame* caller,
-                                                frame* interpreter_frame,
-                                                bool is_top_frame) {
+                                           int tempcount,  //
+                                           int popframe_extra_args,
+                                           int moncount,
+                                           int caller_actual_parameters,
+                                           int callee_param_count,
+                                           int callee_locals,
+                                           frame* caller,
+                                           frame* interpreter_frame,
+                                           bool is_top_frame) {
 
   assert(popframe_extra_args == 0, "FIX ME");
   // NOTE this code must exactly mimic what InterpreterGenerator::generate_compute_interpreter_state()
diff -r 03b943e6c025 -r 8bec9b249a6e src/cpu/x86/vm/frame_x86.cpp
--- a/src/cpu/x86/vm/frame_x86.cpp	Sun May 15 23:57:15 2011 -0400
+++ b/src/cpu/x86/vm/frame_x86.cpp	Tue May 17 09:29:56 2011 -0400
@@ -339,7 +339,6 @@
   return fr;
 }
 
-
 //------------------------------------------------------------------------------
 // frame::verify_deopt_original_pc
 //
@@ -361,6 +360,55 @@
 }
 #endif
 
+//------------------------------------------------------------------------------
+// frame::adjust_unextended_sp
+void frame::adjust_unextended_sp() {
+  // If we are returning to a compiled MethodHandle call site, the
+  // saved_fp will in fact be a saved value of the unextended SP.  The
+  // simplest way to tell whether we are returning to such a call site
+  // is as follows:
+
+  nmethod* sender_nm = (_cb == NULL) ? NULL : _cb->as_nmethod_or_null();
+  if (sender_nm != NULL) {
+    // If the sender PC is a deoptimization point, get the original
+    // PC.  For MethodHandle call site the unextended_sp is stored in
+    // saved_fp.
+    if (sender_nm->is_deopt_mh_entry(_pc)) {
+      DEBUG_ONLY(verify_deopt_mh_original_pc(sender_nm, _fp));
+      _unextended_sp = _fp;
+    }
+    else if (sender_nm->is_deopt_entry(_pc)) {
+      DEBUG_ONLY(verify_deopt_original_pc(sender_nm, _unextended_sp));
+    }
+    else if (sender_nm->is_method_handle_return(_pc)) {
+      _unextended_sp = _fp;
+    }
+  }
+}
+
+//------------------------------------------------------------------------------
+// frame::update_map_with_saved_link
+void frame::update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr) {
+  // The interpreter and compiler(s) always save EBP/RBP in a known
+  // location on entry. We must record where that location is
+  // so this if EBP/RBP was live on callout from c2 we can find
+  // the saved copy no matter what it called.
+
+  // Since the interpreter always saves EBP/RBP if we record where it is then
+  // we don't have to always save EBP/RBP on entry and exit to c2 compiled
+  // code, on entry will be enough.
+  map->set_location(rbp->as_VMReg(), (address) link_addr);
+#ifdef AMD64
+  // this is weird "H" ought to be at a higher address however the
+  // oopMaps seems to have the "H" regs at the same address and the
+  // vanilla register.
+  // XXXX make this go away
+  if (true) {
+    map->set_location(rbp->as_VMReg()->next(), (address) link_addr);
+  }
+#endif // AMD64
+}
+
 
 //------------------------------------------------------------------------------
 // frame::sender_for_interpreter_frame
@@ -372,54 +420,13 @@
   // This is the sp before any possible extension (adapter/locals).
   intptr_t* unextended_sp = interpreter_frame_sender_sp();
 
-  // Stored FP.
-  intptr_t* saved_fp = link();
-
-  address sender_pc = this->sender_pc();
-  CodeBlob* sender_cb = CodeCache::find_blob_unsafe(sender_pc);
-  assert(sender_cb, "sanity");
-  nmethod* sender_nm = sender_cb->as_nmethod_or_null();
-
-  if (sender_nm != NULL) {
-    // If the sender PC is a deoptimization point, get the original
-    // PC.  For MethodHandle call site the unextended_sp is stored in
-    // saved_fp.
-    if (sender_nm->is_deopt_mh_entry(sender_pc)) {
-      DEBUG_ONLY(verify_deopt_mh_original_pc(sender_nm, saved_fp));
-      unextended_sp = saved_fp;
-    }
-    else if (sender_nm->is_deopt_entry(sender_pc)) {
-      DEBUG_ONLY(verify_deopt_original_pc(sender_nm, unextended_sp));
-    }
-    else if (sender_nm->is_method_handle_return(sender_pc)) {
-      unextended_sp = saved_fp;
-    }
-  }
-
-  // The interpreter and compiler(s) always save EBP/RBP in a known
-  // location on entry. We must record where that location is
-  // so this if EBP/RBP was live on callout from c2 we can find
-  // the saved copy no matter what it called.
-
-  // Since the interpreter always saves EBP/RBP if we record where it is then
-  // we don't have to always save EBP/RBP on entry and exit to c2 compiled
-  // code, on entry will be enough.
 #ifdef COMPILER2
   if (map->update_map()) {
-    map->set_location(rbp->as_VMReg(), (address) addr_at(link_offset));
-#ifdef AMD64
-    // this is weird "H" ought to be at a higher address however the
-    // oopMaps seems to have the "H" regs at the same address and the
-    // vanilla register.
-    // XXXX make this go away
-    if (true) {
-      map->set_location(rbp->as_VMReg()->next(), (address)addr_at(link_offset));
-    }
-#endif // AMD64
+    update_map_with_saved_link(map, (intptr_t**) addr_at(link_offset));
   }
 #endif // COMPILER2
 
-  return frame(sender_sp, unextended_sp, saved_fp, sender_pc);
+  return frame(sender_sp, unextended_sp, link(), sender_pc());
 }
 
 
@@ -427,6 +434,7 @@
 // frame::sender_for_compiled_frame
 frame frame::sender_for_compiled_frame(RegisterMap* map) const {
   assert(map != NULL, "map must be set");
+  assert(!is_ricochet_frame(), "caller must handle this");
 
   // frame owned by optimizing compiler
   assert(_cb->frame_size() >= 0, "must have non-zero frame size");
@@ -438,31 +446,7 @@
 
   // This is the saved value of EBP which may or may not really be an FP.
   // It is only an FP if the sender is an interpreter frame (or C1?).
-  intptr_t* saved_fp = (intptr_t*) *(sender_sp - frame::sender_sp_offset);
-
-  // If we are returning to a compiled MethodHandle call site, the
-  // saved_fp will in fact be a saved value of the unextended SP.  The
-  // simplest way to tell whether we are returning to such a call site
-  // is as follows:
-  CodeBlob* sender_cb = CodeCache::find_blob_unsafe(sender_pc);
-  assert(sender_cb, "sanity");
-  nmethod* sender_nm = sender_cb->as_nmethod_or_null();
-
-  if (sender_nm != NULL) {
-    // If the sender PC is a deoptimization point, get the original
-    // PC.  For MethodHandle call site the unextended_sp is stored in
-    // saved_fp.
-    if (sender_nm->is_deopt_mh_entry(sender_pc)) {
-      DEBUG_ONLY(verify_deopt_mh_original_pc(sender_nm, saved_fp));
-      unextended_sp = saved_fp;
-    }
-    else if (sender_nm->is_deopt_entry(sender_pc)) {
-      DEBUG_ONLY(verify_deopt_original_pc(sender_nm, unextended_sp));
-    }
-    else if (sender_nm->is_method_handle_return(sender_pc)) {
-      unextended_sp = saved_fp;
-    }
-  }
+  intptr_t** saved_fp_addr = (intptr_t**) (sender_sp - frame::sender_sp_offset);
 
   if (map->update_map()) {
     // Tell GC to use argument oopmaps for some runtime stubs that need it.
@@ -472,23 +456,15 @@
     if (_cb->oop_maps() != NULL) {
       OopMapSet::update_register_map(this, map);
     }
+
     // Since the prolog does the save and restore of EBP there is no oopmap
     // for it so we must fill in its location as if there was an oopmap entry
     // since if our caller was compiled code there could be live jvm state in it.
-    map->set_location(rbp->as_VMReg(), (address) (sender_sp - frame::sender_sp_offset));
-#ifdef AMD64
-    // this is weird "H" ought to be at a higher address however the
-    // oopMaps seems to have the "H" regs at the same address and the
-    // vanilla register.
-    // XXXX make this go away
-    if (true) {
-      map->set_location(rbp->as_VMReg()->next(), (address) (sender_sp - frame::sender_sp_offset));
-    }
-#endif // AMD64
+    update_map_with_saved_link(map, saved_fp_addr);
   }
 
   assert(sender_sp != sp(), "must have changed");
-  return frame(sender_sp, unextended_sp, saved_fp, sender_pc);
+  return frame(sender_sp, unextended_sp, *saved_fp_addr, sender_pc);
 }
 
 
@@ -502,6 +478,7 @@
   if (is_entry_frame())       return sender_for_entry_frame(map);
   if (is_interpreted_frame()) return sender_for_interpreter_frame(map);
   assert(_cb == CodeCache::find_blob(pc()),"Must be the same");
+  if (is_ricochet_frame())    return sender_for_ricochet_frame(map);
 
   if (_cb != NULL) {
     return sender_for_compiled_frame(map);
@@ -673,7 +650,7 @@
 #ifdef ASSERT
 
 #define DESCRIBE_FP_OFFSET(name) \
-  values.describe(-1, fp() + frame::name##_offset, #name)
+  values.describe(frame_no, fp() + frame::name##_offset, #name)
 
 void frame::describe_pd(FrameValues& values, int frame_no) {
   if (is_interpreted_frame()) {
diff -r 03b943e6c025 -r 8bec9b249a6e src/cpu/x86/vm/frame_x86.hpp
--- a/src/cpu/x86/vm/frame_x86.hpp	Sun May 15 23:57:15 2011 -0400
+++ b/src/cpu/x86/vm/frame_x86.hpp	Tue May 17 09:29:56 2011 -0400
@@ -164,6 +164,7 @@
   // original sp we use that convention.
 
   intptr_t*     _unextended_sp;
+  void adjust_unextended_sp();
 
   intptr_t* ptr_at_addr(int offset) const {
     return (intptr_t*) addr_at(offset);
@@ -197,6 +198,9 @@
   // expression stack tos if we are nested in a java call
   intptr_t* interpreter_frame_last_sp() const;
 
+  // helper to update a map with callee-saved RBP
+  static void update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr);
+
 #ifndef CC_INTERP
   // deoptimization support
   void interpreter_frame_set_last_sp(intptr_t* sp);
diff -r 03b943e6c025 -r 8bec9b249a6e src/cpu/x86/vm/frame_x86.inline.hpp
--- a/src/cpu/x86/vm/frame_x86.inline.hpp	Sun May 15 23:57:15 2011 -0400
+++ b/src/cpu/x86/vm/frame_x86.inline.hpp	Tue May 17 09:29:56 2011 -0400
@@ -62,6 +62,7 @@
   _pc = pc;
   assert(pc != NULL, "no pc?");
   _cb = CodeCache::find_blob(pc);
+  adjust_unextended_sp();
 
   address original_pc = nmethod::get_deopt_original_pc(this);
   if (original_pc != NULL) {
diff -r 03b943e6c025 -r 8bec9b249a6e src/cpu/x86/vm/interpreter_x86.hpp
--- a/src/cpu/x86/vm/interpreter_x86.hpp	Sun May 15 23:57:15 2011 -0400
+++ b/src/cpu/x86/vm/interpreter_x86.hpp	Tue May 17 09:29:56 2011 -0400
@@ -26,7 +26,9 @@
 #define CPU_X86_VM_INTERPRETER_X86_HPP
 
  public:
-  static Address::ScaleFactor stackElementScale() { return Address::times_4; }
+  static Address::ScaleFactor stackElementScale() {
+    return NOT_LP64(Address::times_4) LP64_ONLY(Address::times_8);
+  }
 
   // Offset from rsp (which points to the last stack element)
   static int expr_offset_in_bytes(int i) { return stackElementSize * i; }
diff -r 03b943e6c025 -r 8bec9b249a6e src/cpu/x86/vm/interpreter_x86_32.cpp
--- a/src/cpu/x86/vm/interpreter_x86_32.cpp	Sun May 15 23:57:15 2011 -0400
+++ b/src/cpu/x86/vm/interpreter_x86_32.cpp	Tue May 17 09:29:56 2011 -0400
@@ -242,26 +242,6 @@
   return entry_point;
 }
 
-
-// This method tells the deoptimizer how big an interpreted frame must be:
-int AbstractInterpreter::size_activation(methodOop method,
-                                         int tempcount,
-                                         int popframe_extra_args,
-                                         int moncount,
-                                         int callee_param_count,
-                                         int callee_locals,
-                                         bool is_top_frame) {
-  return layout_activation(method,
-                           tempcount,
-                           popframe_extra_args,
-                           moncount,
-                           callee_param_count,
-                           callee_locals,
-                           (frame*) NULL,
-                           (frame*) NULL,
-                           is_top_frame);
-}
-
 void Deoptimization::unwind_callee_save_values(frame* f, vframeArray* vframe_array) {
 
   // This code is sort of the equivalent of C2IAdapter::setup_stack_frame back in
diff -r 03b943e6c025 -r 8bec9b249a6e src/cpu/x86/vm/interpreter_x86_64.cpp
--- a/src/cpu/x86/vm/interpreter_x86_64.cpp	Sun May 15 23:57:15 2011 -0400
+++ b/src/cpu/x86/vm/interpreter_x86_64.cpp	Tue May 17 09:29:56 2011 -0400
@@ -362,20 +362,6 @@
 
 }
 
-// This method tells the deoptimizer how big an interpreted frame must be:
-int AbstractInterpreter::size_activation(methodOop method,
-                                         int tempcount,
-                                         int popframe_extra_args,
-                                         int moncount,
-                                         int callee_param_count,
-                                         int callee_locals,
-                                         bool is_top_frame) {
-  return layout_activation(method,
-                           tempcount, popframe_extra_args, moncount,
-                           callee_param_count, callee_locals,
-                           (frame*) NULL, (frame*) NULL, is_top_frame);
-}
-
 void Deoptimization::unwind_callee_save_values(frame* f, vframeArray* vframe_array) {
 
   // This code is sort of the equivalent of C2IAdapter::setup_stack_frame back in
diff -r 03b943e6c025 -r 8bec9b249a6e src/cpu/x86/vm/methodHandles_x86.cpp
--- a/src/cpu/x86/vm/methodHandles_x86.cpp	Sun May 15 23:57:15 2011 -0400
+++ b/src/cpu/x86/vm/methodHandles_x86.cpp	Tue May 17 09:29:56 2011 -0400
@@ -69,23 +69,475 @@
   return me;
 }
 
+// stack walking support
+
+frame MethodHandles::ricochet_frame_sender(const frame& fr, RegisterMap *map) {
+  RicochetFrame* f = RicochetFrame::from_frame(fr);
+  if (map->update_map())
+    frame::update_map_with_saved_link(map, &f->_sender_link);
+  return frame(f->extended_sender_sp(), f->exact_sender_sp(), f->sender_link(), f->sender_pc());
+}
+
+void MethodHandles::ricochet_frame_oops_do(const frame& fr, OopClosure* blk, const RegisterMap* reg_map) {
+  RicochetFrame* f = RicochetFrame::from_frame(fr);
+
+  // pick up the argument type descriptor:
+  Thread* thread = Thread::current();
+  Handle cookie(thread, f->compute_saved_args_layout(true, true));
+
+  // process fixed part
+  blk->do_oop((oop*)f->saved_target_addr());
+  blk->do_oop((oop*)f->saved_args_layout_addr());
+
+  // process variable arguments:
+  if (cookie.is_null())  return;  // no arguments to describe
+
+  // the cookie is actually the invokeExact method for my target
+  // his argument signature is what I'm interested in
+  assert(cookie->is_method(), "");
+  methodHandle invoker(thread, methodOop(cookie()));
+  assert(invoker->name() == vmSymbols::invokeExact_name(), "must be this kind of method");
+  assert(!invoker->is_static(), "must have MH argument");
+  int slot_count = invoker->size_of_parameters();
+  assert(slot_count >= 1, "must include 'this'");
+  intptr_t* base = f->saved_args_base();
+  intptr_t* retval = NULL;
+  if (f->has_return_value_slot())
+    retval = f->return_value_slot_addr();
+  int slot_num = slot_count;
+  intptr_t* loc = &base[slot_num -= 1];
+  //blk->do_oop((oop*) loc);   // original target, which is irrelevant
+  int arg_num = 0;
+  for (SignatureStream ss(invoker->signature()); !ss.is_done(); ss.next()) {
+    if (ss.at_return_type())  continue;
+    BasicType ptype = ss.type();
+    if (ptype == T_ARRAY)  ptype = T_OBJECT; // fold all refs to T_OBJECT
+    assert(ptype >= T_BOOLEAN && ptype <= T_OBJECT, "not array or void");
+    loc = &base[slot_num -= type2size[ptype]];
+    bool is_oop = (ptype == T_OBJECT && loc != retval);
+    if (is_oop)  blk->do_oop((oop*)loc);
+    arg_num += 1;
+  }
+  assert(slot_num == 0, "must have processed all the arguments");
+}
+
+oop MethodHandles::RicochetFrame::compute_saved_args_layout(bool read_cache, bool write_cache) {
+  oop cookie = NULL;
+  if (read_cache) {
+    cookie = saved_args_layout();
+    if (cookie != NULL)  return cookie;
+  }
+  oop target = saved_target();
+  oop mtype  = java_lang_invoke_MethodHandle::type(target);
+  oop mtform = java_lang_invoke_MethodType::form(mtype);
+  cookie = java_lang_invoke_MethodTypeForm::vmlayout(mtform);
+  if (write_cache)  {
+    (*saved_args_layout_addr()) = cookie;
+  }
+  return cookie;
+}
+
+void MethodHandles::RicochetFrame::generate_ricochet_blob(MacroAssembler* _masm,
+                                                          // output params:
+                                                          int* frame_size_in_words,
+                                                          int* bounce_offset,
+                                                          int* exception_offset) {
+  (*frame_size_in_words) = RicochetFrame::frame_size_in_bytes() / wordSize;
+
+  address start = __ pc();
+
 #ifdef ASSERT
-static void verify_argslot(MacroAssembler* _masm, Register argslot_reg,
-                           const char* error_message) {
+  __ hlt(); __ hlt(); __ hlt();
+  // here's a hint of something special:
+  __ push(MAGIC_NUMBER_1);
+  __ push(MAGIC_NUMBER_2);
+#endif //ASSERT
+  __ hlt();  // not reached
+
+  // A return PC has just been popped from the stack.
+  // Return values are in registers.
+  // The ebp points into the RicochetFrame, which contains
+  // a cleanup continuation we must return to.
+
+  (*bounce_offset) = __ pc() - start;
+  BLOCK_COMMENT("ricochet_blob.bounce");
+
+  if (VerifyMethodHandles)  RicochetFrame::verify_clean(_masm);
+  trace_method_handle(_masm, "return/ricochet_blob.bounce");
+
+  __ jmp(frame_address(continuation_offset_in_bytes()));
+  __ hlt();
+  DEBUG_ONLY(__ push(MAGIC_NUMBER_2));
+
+  (*exception_offset) = __ pc() - start;
+  BLOCK_COMMENT("ricochet_blob.exception");
+
+  // compare this to Interpreter::rethrow_exception_entry, which is parallel code
+  // for example, see TemplateInterpreterGenerator::generate_throw_exception
+  // Live registers in:
+  //   rax: exception
+  //   rdx: return address/pc that threw exception (ignored, always equal to bounce addr)
+  __ verify_oop(rax);
+
+  // no need to empty_FPU_stack or reinit_heapbase, since caller frame will do the same if needed
+
+  // Take down the frame.
+
+  // Cf. InterpreterMacroAssembler::remove_activation.
+  leave_ricochet_frame(_masm, /*rcx_recv=*/ noreg,
+                       saved_last_sp_register(),
+                       /*sender_pc_reg=*/ rdx);
+
+  // In between activations - previous activation type unknown yet
+  // compute continuation point - the continuation point expects the
+  // following registers set up:
+  //
+  // rax: exception
+  // rdx: return address/pc that threw exception
+  // rsp: expression stack of caller
+  // rbp: ebp of caller
+  __ push(rax);                                  // save exception
+  __ push(rdx);                                  // save return address
+  Register thread_reg = LP64_ONLY(r15_thread) NOT_LP64(rdi);
+  NOT_LP64(__ get_thread(thread_reg));
+  __ call_VM_leaf(CAST_FROM_FN_PTR(address,
+                                   SharedRuntime::exception_handler_for_return_address),
+                  thread_reg, rdx);
+  __ mov(rbx, rax);                              // save exception handler
+  __ pop(rdx);                                   // restore return address
+  __ pop(rax);                                   // restore exception
+  __ jmp(rbx);                                   // jump to exception
+                                                 // handler of caller
+}
+
+void MethodHandles::RicochetFrame::enter_ricochet_frame(MacroAssembler* _masm,
+                                                        Register rcx_recv,
+                                                        Register rax_argv,
+                                                        address return_handler,
+                                                        Register rbx_temp) {
+  const Register saved_last_sp = saved_last_sp_register();
+  Address rcx_mh_vmtarget(    rcx_recv, java_lang_invoke_MethodHandle::vmtarget_offset_in_bytes() );
+  Address rcx_amh_conversion( rcx_recv, java_lang_invoke_AdapterMethodHandle::conversion_offset_in_bytes() );
+
+  // Push the RicochetFrame a word at a time.
+  // This creates something similar to an interpreter frame.
+  // Cf. TemplateInterpreterGenerator::generate_fixed_frame.
+  BLOCK_COMMENT("push RicochetFrame {");
+  DEBUG_ONLY(int rfo = (int) sizeof(RicochetFrame));
+  assert((rfo -= wordSize) == RicochetFrame::sender_pc_offset_in_bytes(), "");
+#define RF_FIELD(push_value, name)                                      \
+  { push_value;                                                         \
+    assert((rfo -= wordSize) == RicochetFrame::name##_offset_in_bytes(), ""); }
+  RF_FIELD(__ push(rbp),                   sender_link);
+  RF_FIELD(__ push(saved_last_sp),         exact_sender_sp);  // rsi/r13
+  RF_FIELD(__ pushptr(rcx_amh_conversion), conversion);
+  RF_FIELD(__ push(rax_argv),              saved_args_base);   // can be updated if args are shifted
+  RF_FIELD(__ push((int32_t) NULL_WORD),   saved_args_layout); // cache for GC layout cookie
+  if (UseCompressedOops) {
+    __ load_heap_oop(rbx_temp, rcx_mh_vmtarget);
+    RF_FIELD(__ push(rbx_temp),            saved_target);
+  } else {
+    RF_FIELD(__ pushptr(rcx_mh_vmtarget),  saved_target);
+  }
+  __ lea(rbx_temp, ExternalAddress(return_handler));
+  RF_FIELD(__ push(rbx_temp),              continuation);
+#undef RF_FIELD
+  assert(rfo == 0, "fully initialized the RicochetFrame");
+  // compute new frame pointer:
+  __ lea(rbp, Address(rsp, RicochetFrame::sender_link_offset_in_bytes()));
+  // Push guard word #1 in debug mode.
+  DEBUG_ONLY(__ push((int32_t) RicochetFrame::MAGIC_NUMBER_1));
+  // For debugging, leave behind an indication of which stub built this frame.
+  DEBUG_ONLY({ Label L; __ call(L, relocInfo::none); __ bind(L); });
+  BLOCK_COMMENT("} RicochetFrame");
+}
+
+void MethodHandles::RicochetFrame::leave_ricochet_frame(MacroAssembler* _masm,
+                                                        Register rcx_recv,
+                                                        Register new_sp_reg,
+                                                        Register sender_pc_reg) {
+  assert_different_registers(rcx_recv, new_sp_reg, sender_pc_reg);
+  const Register saved_last_sp = saved_last_sp_register();
+  // Take down the frame.
+  // Cf. InterpreterMacroAssembler::remove_activation.
+  BLOCK_COMMENT("end_ricochet_frame {");
+  // TO DO: If (exact_sender_sp - extended_sender_sp) > THRESH, compact the frame down.
+  // This will keep stack in bounds even with unlimited tailcalls, each with an adapter.
+  if (rcx_recv->is_valid())
+    __ movptr(rcx_recv,    RicochetFrame::frame_address(RicochetFrame::saved_target_offset_in_bytes()));
+  __ movptr(sender_pc_reg, RicochetFrame::frame_address(RicochetFrame::sender_pc_offset_in_bytes()));
+  __ movptr(saved_last_sp, RicochetFrame::frame_address(RicochetFrame::exact_sender_sp_offset_in_bytes()));
+  __ movptr(rbp,           RicochetFrame::frame_address(RicochetFrame::sender_link_offset_in_bytes()));
+  __ mov(rsp, new_sp_reg);
+  BLOCK_COMMENT("} end_ricochet_frame");
+}
+
+// Emit code to verify that RBP is pointing at a valid ricochet frame.
+#ifdef ASSERT
+enum {
+  ARG_LIMIT = 255, SLOP = 4,
+  // use this parameter for checking for garbage stack movements:
+  UNREASONABLE_STACK_MOVE = (ARG_LIMIT + SLOP)
+  // the slop defends against false alarms due to fencepost errors
+};
+
+void MethodHandles::RicochetFrame::verify_clean(MacroAssembler* _masm) {
+  // The stack should look like this:
+  //    ... keep1 | dest=42 | keep2 | RF | magic | handler | magic | recursive args |
+  // Check various invariants.
+  verify_offsets();
+
+  Register rdi_temp = rdi;
+  Register rcx_temp = rcx;
+  { __ push(rdi_temp); __ push(rcx_temp); }
+#define UNPUSH_TEMPS \
+  { __ pop(rcx_temp);  __ pop(rdi_temp); }
+
+  Address magic_number_1_addr  = RicochetFrame::frame_address(RicochetFrame::magic_number_1_offset_in_bytes());
+  Address magic_number_2_addr  = RicochetFrame::frame_address(RicochetFrame::magic_number_2_offset_in_bytes());
+  Address continuation_addr    = RicochetFrame::frame_address(RicochetFrame::continuation_offset_in_bytes());
+  Address conversion_addr      = RicochetFrame::frame_address(RicochetFrame::conversion_offset_in_bytes());
+  Address saved_args_base_addr = RicochetFrame::frame_address(RicochetFrame::saved_args_base_offset_in_bytes());
+
+  Label L_bad, L_ok;
+  BLOCK_COMMENT("verify_clean {");
+  // Magic numbers must check out:
+  __ cmpptr(magic_number_1_addr, (int32_t) MAGIC_NUMBER_1);
+  __ jcc(Assembler::notEqual, L_bad);
+  __ cmpptr(magic_number_2_addr, (int32_t) MAGIC_NUMBER_2);
+  __ jcc(Assembler::notEqual, L_bad);
+
+  // Arguments pointer must look reasonable:
+  __ movptr(rcx_temp, saved_args_base_addr);
+  __ cmpptr(rcx_temp, rbp);
+  __ jcc(Assembler::below, L_bad);
+  __ subptr(rcx_temp, UNREASONABLE_STACK_MOVE * Interpreter::stackElementSize);
+  __ cmpptr(rcx_temp, rbp);
+  __ jcc(Assembler::above, L_bad);
+
+  load_conversion_dest_type(_masm, rdi_temp, conversion_addr);
+  __ cmpl(rdi_temp, T_VOID);
+  __ jcc(Assembler::equal, L_ok);
+  __ movptr(rcx_temp, saved_args_base_addr);
+  load_conversion_vminfo(_masm, rdi_temp, conversion_addr);
+  __ cmpptr(Address(rcx_temp, rdi_temp, Interpreter::stackElementScale()),
+            (int32_t) RETURN_VALUE_PLACEHOLDER);
+  __ jcc(Assembler::equal, L_ok);
+  __ BIND(L_bad);
+  UNPUSH_TEMPS;
+  __ stop("damaged ricochet frame");
+  __ BIND(L_ok);
+  UNPUSH_TEMPS;
+  BLOCK_COMMENT("} verify_clean");
+
+#undef UNPUSH_TEMPS
+
+}
+#endif //ASSERT
+
+void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_reg) {
+  if (VerifyMethodHandles)
+    verify_klass(_masm, klass_reg, SystemDictionaryHandles::Class_klass(),
+                 "AMH argument is a Class");
+  __ load_heap_oop(klass_reg, Address(klass_reg, java_lang_Class::klass_offset_in_bytes()));
+}
+
+void MethodHandles::load_conversion_vminfo(MacroAssembler* _masm, Register reg, Address conversion_field_addr) {
+  int bits   = BitsPerByte;
+  int offset = (CONV_VMINFO_SHIFT / bits);
+  int shift  = (CONV_VMINFO_SHIFT % bits);
+  __ load_unsigned_byte(reg, conversion_field_addr.plus_disp(offset));
+  assert(CONV_VMINFO_MASK == right_n_bits(bits - shift), "else change type of previous load");
+  assert(shift == 0, "no shift needed");
+}
+
+void MethodHandles::load_conversion_dest_type(MacroAssembler* _masm, Register reg, Address conversion_field_addr) {
+  int bits   = BitsPerByte;
+  int offset = (CONV_DEST_TYPE_SHIFT / bits);
+  int shift  = (CONV_DEST_TYPE_SHIFT % bits);
+  __ load_unsigned_byte(reg, conversion_field_addr.plus_disp(offset));
+  assert(CONV_TYPE_MASK == right_n_bits(bits - shift), "else change type of previous load");
+  __ shrl(reg, shift);
+  DEBUG_ONLY(int conv_type_bits = (int) exact_log2(CONV_TYPE_MASK+1));
+  assert((shift + conv_type_bits) == bits, "left justified in byte");
+}
+
+void MethodHandles::load_stack_move(MacroAssembler* _masm,
+                                    Register rdi_stack_move,
+                                    Register rcx_amh,
+                                    bool might_be_negative) {
+  BLOCK_COMMENT("load_stack_move");
+  Address rcx_amh_conversion(rcx_amh, java_lang_invoke_AdapterMethodHandle::conversion_offset_in_bytes());
+  __ movl(rdi_stack_move, rcx_amh_conversion);
+  __ sarl(rdi_stack_move, CONV_STACK_MOVE_SHIFT);
+#ifdef _LP64
+  if (might_be_negative) {
+    // clean high bits of stack motion register (was loaded as an int)
+    __ movslq(rdi_stack_move, rdi_stack_move);
+  }
+#endif //_LP64
+  if (VerifyMethodHandles) {
+    Label L_ok, L_bad;
+    int32_t stack_move_limit = 0x4000;  // extra-large
+    __ cmpptr(rdi_stack_move, stack_move_limit);
+    __ jcc(Assembler::greaterEqual, L_bad);
+    __ cmpptr(rdi_stack_move, -stack_move_limit);
+    __ jcc(Assembler::greater, L_ok);
+    __ bind(L_bad);
+    __ stop("load_stack_move of garbage value");
+    __ BIND(L_ok);
+  }
+}
+
+#ifndef PRODUCT
+void MethodHandles::RicochetFrame::verify_offsets() {
+  // Check compatibility of this struct with the more generally used offsets of class frame:
+  int ebp_off = sender_link_offset_in_bytes();  // offset from struct base to local rbp value
+  assert(ebp_off + wordSize*frame::interpreter_frame_method_offset      == saved_args_base_offset_in_bytes(), "");
+  assert(ebp_off + wordSize*frame::interpreter_frame_last_sp_offset     == conversion_offset_in_bytes(), "");
+  assert(ebp_off + wordSize*frame::interpreter_frame_sender_sp_offset   == exact_sender_sp_offset_in_bytes(), "");
+  // These last two have to be exact:
+  assert(ebp_off + wordSize*frame::link_offset                          == sender_link_offset_in_bytes(), "");
+  assert(ebp_off + wordSize*frame::return_addr_offset                   == sender_pc_offset_in_bytes(), "");
+}
+
+void MethodHandles::RicochetFrame::verify() const {
+  verify_offsets();
+  assert(magic_number_1() == MAGIC_NUMBER_1, "");
+  assert(magic_number_2() == MAGIC_NUMBER_2, "");
+  if (!Universe::heap()->is_gc_active()) {
+    if (saved_args_layout() != NULL) {
+      assert(saved_args_layout()->is_method(), "must be valid oop");
+    }
+    if (saved_target() != NULL) {
+      assert(java_lang_invoke_MethodHandle::is_instance(saved_target()), "checking frame value");
+    }
+  }
+  int conv_op = adapter_conversion_op(conversion());
+  assert(conv_op == java_lang_invoke_AdapterMethodHandle::OP_COLLECT_ARGS ||
+         conv_op == java_lang_invoke_AdapterMethodHandle::OP_FOLD_ARGS ||
+         conv_op == java_lang_invoke_AdapterMethodHandle::OP_PRIM_TO_REF,
+         "must be a sane conversion");
+  if (has_return_value_slot()) {
+    assert(*return_value_slot_addr() == RETURN_VALUE_PLACEHOLDER, "");
+  }
+}
+#endif //PRODUCT
+
+#ifdef ASSERT
+void MethodHandles::verify_argslot(MacroAssembler* _masm,
+                                   Register argslot_reg,
+                                   const char* error_message) {
   // Verify that argslot lies within (rsp, rbp].
   Label L_ok, L_bad;
-  BLOCK_COMMENT("{ verify_argslot");
+  BLOCK_COMMENT("verify_argslot {");
   __ cmpptr(argslot_reg, rbp);
   __ jccb(Assembler::above, L_bad);
   __ cmpptr(rsp, argslot_reg);
   __ jccb(Assembler::below, L_ok);
   __ bind(L_bad);
   __ stop(error_message);
-  __ bind(L_ok);
+  __ BIND(L_ok);
   BLOCK_COMMENT("} verify_argslot");
 }
-#endif
+
+void MethodHandles::verify_argslots(MacroAssembler* _masm,
+                                    RegisterOrConstant arg_slots,
+                                    Register arg_slot_base_reg,
+                                    bool negate_argslots,
+                                    const char* error_message) {
+  // Verify that [argslot..argslot+size) lies within (rsp, rbp).
+  Label L_ok, L_bad;
+  Register rdi_temp = rdi;
+  BLOCK_COMMENT("verify_argslots {");
+  __ push(rdi_temp);
+  if (negate_argslots) {
+    if (arg_slots.is_constant()) {
+      arg_slots = -1 * arg_slots.as_constant();
+    } else {
+      __ movptr(rdi_temp, arg_slots);
+      __ negptr(rdi_temp);
+      arg_slots = rdi_temp;
+    }
+  }
+  __ lea(rdi_temp, Address(arg_slot_base_reg, arg_slots, Interpreter::stackElementScale()));
+  __ cmpptr(rdi_temp, rbp);
+  __ pop(rdi_temp);
+  __ jcc(Assembler::above, L_bad);
+  __ cmpptr(rsp, arg_slot_base_reg);
+  __ jcc(Assembler::below, L_ok);
+  __ bind(L_bad);
+  __ stop(error_message);
+  __ BIND(L_ok);
+  BLOCK_COMMENT("} verify_argslots");
+}
 
+// Make sure that arg_slots has the same sign as the given direction.
+// If (and only if) arg_slots is a assembly-time constant, also allow it to be zero.
+void MethodHandles::verify_stack_move(MacroAssembler* _masm,
+                                      RegisterOrConstant arg_slots, int direction) {
+  bool allow_zero = arg_slots.is_constant();
+  if (direction == 0) { direction = +1; allow_zero = true; }
+  assert(stack_move_unit() == -1, "else add extra checks here");
+  if (arg_slots.is_register()) {
+    Label L_ok, L_bad;
+    BLOCK_COMMENT("verify_stack_move {");
+    // testl(arg_slots.as_register(), -stack_move_unit() - 1);  // no need
+    // jcc(Assembler::notZero, L_bad);
+    __ cmpptr(arg_slots.as_register(), (int32_t) NULL_WORD);
+    if (direction > 0) {
+      __ jcc(allow_zero ? Assembler::less : Assembler::lessEqual, L_bad);
+      __ cmpptr(arg_slots.as_register(), (int32_t) UNREASONABLE_STACK_MOVE);
+      __ jcc(Assembler::less, L_ok);
+    } else {
+      __ jcc(allow_zero ? Assembler::greater : Assembler::greaterEqual, L_bad);
+      __ cmpptr(arg_slots.as_register(), (int32_t) -UNREASONABLE_STACK_MOVE);
+      __ jcc(Assembler::greater, L_ok);
+    }
+    __ bind(L_bad);
+    if (direction > 0)
+      __ stop("assert arg_slots > 0");
+    else
+      __ stop("assert arg_slots < 0");
+    __ BIND(L_ok);
+    BLOCK_COMMENT("} verify_stack_move");
+  } else {
+    intptr_t size = arg_slots.as_constant();
+    if (direction < 0)  size = -size;
+    assert(size >= 0, "correct direction of constant move");
+    assert(size < UNREASONABLE_STACK_MOVE, "reasonable size of constant move");
+  }
+}
+
+void MethodHandles::verify_klass(MacroAssembler* _masm,
+                                 Register obj, KlassHandle klass,
+                                 const char* error_message) {
+  oop* klass_addr = klass.raw_value();
+  assert(klass_addr >= SystemDictionaryHandles::Object_klass().raw_value() &&
+         klass_addr <= SystemDictionaryHandles::Long_klass().raw_value(),
+         "must be one of the SystemDictionaryHandles");
+  Register temp = rdi;
+  Label L_ok, L_bad;
+  BLOCK_COMMENT("verify_klass {");
+  __ verify_oop(obj);
+  __ testptr(obj, obj);
+  __ jcc(Assembler::zero, L_bad);
+  __ push(temp);
+  __ load_klass(temp, obj);
+  __ cmpptr(temp, ExternalAddress((address) klass_addr));
+  __ jcc(Assembler::equal, L_ok);
+  intptr_t super_check_offset = klass->super_check_offset();
+  __ movptr(temp, Address(temp, super_check_offset));
+  __ cmpptr(temp, ExternalAddress((address) klass_addr));
+  __ jcc(Assembler::equal, L_ok);
+  __ pop(temp);
+  __ bind(L_bad);
+  __ stop(error_message);
+  __ BIND(L_ok);
+  __ pop(temp);
+  BLOCK_COMMENT("} verify_klass");
+}
+#endif //ASSERT
 
 // Code generation
 address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* _masm) {
@@ -116,6 +568,9 @@
   address entry_point = __ pc();
 
   // fetch the MethodType from the method handle into rax (the 'check' register)
+  // FIXME: Interpreter should transmit pre-popped stack pointer, to locate base of arg list.
+  // This would simplify several touchy bits of code.
+  // See 6984712: JSR 292 method handle calls need a clean argument base pointer
   {
     Register tem = rbx_method;
     for (jint* pchase = methodOopDesc::method_type_offsets_chain(); (*pchase) != -1; pchase++) {
@@ -128,17 +583,23 @@
   __ load_heap_oop(rdx_temp, Address(rax_mtype, __ delayed_value(java_lang_invoke_MethodType::form_offset_in_bytes, rdi_temp)));
   Register rdx_vmslots = rdx_temp;
   __ movl(rdx_vmslots, Address(rdx_temp, __ delayed_value(java_lang_invoke_MethodTypeForm::vmslots_offset_in_bytes, rdi_temp)));
-  __ movptr(rcx_recv, __ argument_address(rdx_vmslots));
+  Address mh_receiver_slot_addr = __ argument_address(rdx_vmslots);
+  __ movptr(rcx_recv, mh_receiver_slot_addr);
 
   trace_method_handle(_masm, "invokeExact");
 
   __ check_method_handle_type(rax_mtype, rcx_recv, rdi_temp, wrong_method_type);
+
+  // Nobody uses the MH receiver slot after this.  Make sure.
+  DEBUG_ONLY(__ movptr(mh_receiver_slot_addr, (int32_t)0x999999));
+
   __ jump_to_method_handle_entry(rcx_recv, rdi_temp);
 
   // for invokeGeneric (only), apply argument and result conversions on the fly
   __ bind(invoke_generic_slow_path);
 #ifdef ASSERT
-  { Label L;
+  if (VerifyMethodHandles) {
+    Label L;
     __ cmpb(Address(rbx_method, methodOopDesc::intrinsic_id_offset_in_bytes()), (int) vmIntrinsics::_invokeGeneric);
     __ jcc(Assembler::equal, L);
     __ stop("bad methodOop::intrinsic_id");
@@ -150,22 +611,14 @@
   // make room on the stack for another pointer:
   Register rcx_argslot = rcx_recv;
   __ lea(rcx_argslot, __ argument_address(rdx_vmslots, 1));
-  insert_arg_slots(_masm, 2 * stack_move_unit(), _INSERT_REF_MASK,
+  insert_arg_slots(_masm, 2 * stack_move_unit(),
                    rcx_argslot, rbx_temp, rdx_temp);
 
   // load up an adapter from the calling type (Java weaves this)
-  __ load_heap_oop(rdx_temp, Address(rax_mtype, __ delayed_value(java_lang_invoke_MethodType::form_offset_in_bytes, rdi_temp)));
   Register rdx_adapter = rdx_temp;
-  // __ load_heap_oop(rdx_adapter, Address(rdx_temp, java_lang_invoke_MethodTypeForm::genericInvoker_offset_in_bytes()));
-  // deal with old JDK versions:
-  __ lea(rdi_temp, Address(rdx_temp, __ delayed_value(java_lang_invoke_MethodTypeForm::genericInvoker_offset_in_bytes, rdi_temp)));
-  __ cmpptr(rdi_temp, rdx_temp);
-  Label sorry_no_invoke_generic;
-  __ jcc(Assembler::below, sorry_no_invoke_generic);
-
-  __ load_heap_oop(rdx_adapter, Address(rdi_temp, 0));
-  __ testptr(rdx_adapter, rdx_adapter);
-  __ jcc(Assembler::zero, sorry_no_invoke_generic);
+  __ load_heap_oop(rdx_temp,    Address(rax_mtype, __ delayed_value(java_lang_invoke_MethodType::form_offset_in_bytes,               rdi_temp)));
+  __ load_heap_oop(rdx_adapter, Address(rdx_temp,  __ delayed_value(java_lang_invoke_MethodTypeForm::genericInvoker_offset_in_bytes, rdi_temp)));
+  __ verify_oop(rdx_adapter);
   __ movptr(Address(rcx_argslot, 1 * Interpreter::stackElementSize), rdx_adapter);
   // As a trusted first argument, pass the type being called, so the adapter knows
   // the actual types of the arguments and return values.
@@ -176,49 +629,31 @@
   trace_method_handle(_masm, "invokeGeneric");
   __ jump_to_method_handle_entry(rcx, rdi_temp);
 
-  __ bind(sorry_no_invoke_generic); // no invokeGeneric implementation available!
-  __ movptr(rcx_recv, Address(rcx_argslot, -1 * Interpreter::stackElementSize));  // recover original MH
-  __ push(rax_mtype);       // required mtype
-  __ push(rcx_recv);        // bad mh (1st stacked argument)
-  __ jump(ExternalAddress(Interpreter::throw_WrongMethodType_entry()));
+  return entry_point;
+}
 
-  return entry_point;
+// Workaround for C++ overloading nastiness on '0' for RegisterOrConstant.
+static RegisterOrConstant constant(int value) {
+  return RegisterOrConstant(value);
 }
 
 // Helper to insert argument slots into the stack.
-// arg_slots must be a multiple of stack_move_unit() and <= 0
+// arg_slots must be a multiple of stack_move_unit() and < 0
+// rax_argslot is decremented to point to the new (shifted) location of the argslot
+// But, rdx_temp ends up holding the original value of rax_argslot.
 void MethodHandles::insert_arg_slots(MacroAssembler* _masm,
                                      RegisterOrConstant arg_slots,
-                                     int arg_mask,
                                      Register rax_argslot,
-                                     Register rbx_temp, Register rdx_temp, Register temp3_reg) {
-  assert(temp3_reg == noreg, "temp3 not required");
+                                     Register rbx_temp, Register rdx_temp) {
+  // allow constant zero
+  if (arg_slots.is_constant() && arg_slots.as_constant() == 0)
+    return;
   assert_different_registers(rax_argslot, rbx_temp, rdx_temp,
                              (!arg_slots.is_register() ? rsp : arg_slots.as_register()));
-
-#ifdef ASSERT
-  verify_argslot(_masm, rax_argslot, "insertion point must fall within current frame");
-  if (arg_slots.is_register()) {
-    Label L_ok, L_bad;
-    __ cmpptr(arg_slots.as_register(), (int32_t) NULL_WORD);
-    __ jccb(Assembler::greater, L_bad);
-    __ testl(arg_slots.as_register(), -stack_move_unit() - 1);
-    __ jccb(Assembler::zero, L_ok);
-    __ bind(L_bad);
-    __ stop("assert arg_slots <= 0 and clear low bits");
-    __ bind(L_ok);
-  } else {
-    assert(arg_slots.as_constant() <= 0, "");
-    assert(arg_slots.as_constant() % -stack_move_unit() == 0, "");
-  }
-#endif //ASSERT
-
-#ifdef _LP64
-  if (arg_slots.is_register()) {
-    // clean high bits of stack motion register (was loaded as an int)
-    __ movslq(arg_slots.as_register(), arg_slots.as_register());
-  }
-#endif
+  if (VerifyMethodHandles)
+    verify_argslot(_masm, rax_argslot, "insertion point must fall within current frame");
+  if (VerifyMethodHandles)
+    verify_stack_move(_masm, arg_slots, -1);
 
   // Make space on the stack for the inserted argument(s).
   // Then pull down everything shallower than rax_argslot.
@@ -230,59 +665,39 @@
   //   argslot -= size;
   BLOCK_COMMENT("insert_arg_slots {");
   __ mov(rdx_temp, rsp);                        // source pointer for copy
-  __ lea(rsp, Address(rsp, arg_slots, Address::times_ptr));
+  __ lea(rsp, Address(rsp, arg_slots, Interpreter::stackElementScale()));
   {
     Label loop;
     __ BIND(loop);
     // pull one word down each time through the loop
     __ movptr(rbx_temp, Address(rdx_temp, 0));
-    __ movptr(Address(rdx_temp, arg_slots, Address::times_ptr), rbx_temp);
+    __ movptr(Address(rdx_temp, arg_slots, Interpreter::stackElementScale()), rbx_temp);
     __ addptr(rdx_temp, wordSize);
     __ cmpptr(rdx_temp, rax_argslot);
-    __ jccb(Assembler::less, loop);
+    __ jcc(Assembler::below, loop);
   }
 
   // Now move the argslot down, to point to the opened-up space.
-  __ lea(rax_argslot, Address(rax_argslot, arg_slots, Address::times_ptr));
+  __ lea(rax_argslot, Address(rax_argslot, arg_slots, Interpreter::stackElementScale()));
   BLOCK_COMMENT("} insert_arg_slots");
 }
 
 // Helper to remove argument slots from the stack.
-// arg_slots must be a multiple of stack_move_unit() and >= 0
+// arg_slots must be a multiple of stack_move_unit() and > 0
 void MethodHandles::remove_arg_slots(MacroAssembler* _masm,
-                                    RegisterOrConstant arg_slots,
-                                    Register rax_argslot,
-                                     Register rbx_temp, Register rdx_temp, Register temp3_reg) {
-  assert(temp3_reg == noreg, "temp3 not required");
+                                     RegisterOrConstant arg_slots,
+                                     Register rax_argslot,
+                                     Register rbx_temp, Register rdx_temp) {
+  // allow constant zero
+  if (arg_slots.is_constant() && arg_slots.as_constant() == 0)
+    return;
   assert_different_registers(rax_argslot, rbx_temp, rdx_temp,
                              (!arg_slots.is_register() ? rsp : arg_slots.as_register()));
-
-#ifdef ASSERT
-  // Verify that [argslot..argslot+size) lies within (rsp, rbp).
-  __ lea(rbx_temp, Address(rax_argslot, arg_slots, Address::times_ptr));
-  verify_argslot(_masm, rbx_temp, "deleted argument(s) must fall within current frame");
-  if (arg_slots.is_register()) {
-    Label L_ok, L_bad;
-    __ cmpptr(arg_slots.as_register(), (int32_t) NULL_WORD);
-    __ jccb(Assembler::less, L_bad);
-    __ testl(arg_slots.as_register(), -stack_move_unit() - 1);
-    __ jccb(Assembler::zero, L_ok);
-    __ bind(L_bad);
-    __ stop("assert arg_slots >= 0 and clear low bits");
-    __ bind(L_ok);
-  } else {
-    assert(arg_slots.as_constant() >= 0, "");
-    assert(arg_slots.as_constant() % -stack_move_unit() == 0, "");
-  }
-#endif //ASSERT
-
-#ifdef _LP64
-  if (false) {                  // not needed, since register is positive
-    // clean high bits of stack motion register (was loaded as an int)
-    if (arg_slots.is_register())
-      __ movslq(arg_slots.as_register(), arg_slots.as_register());
-  }
-#endif
+  if (VerifyMethodHandles)
+    verify_argslots(_masm, arg_slots, rax_argslot, false,
+                    "deleted argument(s) must fall within current frame");
+  if (VerifyMethodHandles)
+    verify_stack_move(_masm, arg_slots, +1);
 
   BLOCK_COMMENT("remove_arg_slots {");
   // Pull up everything shallower than rax_argslot.
@@ -299,54 +714,344 @@
     __ BIND(loop);
     // pull one word up each time through the loop
     __ movptr(rbx_temp, Address(rdx_temp, 0));
-    __ movptr(Address(rdx_temp, arg_slots, Address::times_ptr), rbx_temp);
+    __ movptr(Address(rdx_temp, arg_slots, Interpreter::stackElementScale()), rbx_temp);
     __ addptr(rdx_temp, -wordSize);
     __ cmpptr(rdx_temp, rsp);
-    __ jccb(Assembler::greaterEqual, loop);
+    __ jcc(Assembler::aboveEqual, loop);
   }
 
   // Now move the argslot up, to point to the just-copied block.
-  __ lea(rsp, Address(rsp, arg_slots, Address::times_ptr));
+  __ lea(rsp, Address(rsp, arg_slots, Interpreter::stackElementScale()));
   // And adjust the argslot address to point at the deletion point.
-  __ lea(rax_argslot, Address(rax_argslot, arg_slots, Address::times_ptr));
+  __ lea(rax_argslot, Address(rax_argslot, arg_slots, Interpreter::stackElementScale()));
   BLOCK_COMMENT("} remove_arg_slots");
 }
 
+// Helper to copy argument slots to the top of the stack.
+// The sequence starts with rax_argslot and is counted by slot_count
+// slot_count must be a multiple of stack_move_unit() and >= 0
+// This function blows the temps but does not change rax_argslot.
+void MethodHandles::push_arg_slots(MacroAssembler* _masm,
+                                   Register rax_argslot,
+                                   RegisterOrConstant slot_count,
+                                   int skip_words_count,
+                                   Register rbx_temp, Register rdx_temp) {
+  assert_different_registers(rax_argslot, rbx_temp, rdx_temp,
+                             (!slot_count.is_register() ? rbp : slot_count.as_register()),
+                             rsp);
+  assert(Interpreter::stackElementSize == wordSize, "else change this code");
+
+  if (VerifyMethodHandles)
+    verify_stack_move(_masm, slot_count, 0);
+
+  // allow constant zero
+  if (slot_count.is_constant() && slot_count.as_constant() == 0)
+    return;
+
+  BLOCK_COMMENT("push_arg_slots {");
+
+  Register rbx_top = rbx_temp;
+
+  // There is at most 1 word to carry down with the TOS.
+  switch (skip_words_count) {
+  case 1: __ pop(rdx_temp); break;
+  case 0:                   break;
+  default: ShouldNotReachHere();
+  }
+
+  if (slot_count.is_constant()) {
+    for (int i = slot_count.as_constant() - 1; i >= 0; i--) {
+      __ pushptr(Address(rax_argslot, i * wordSize));
+    }
+  } else {
+    Label L_plural, L_loop, L_break;
+    // Emit code to dynamically check for the common cases, zero and one slot.
+    __ cmpl(slot_count.as_register(), (int32_t) 1);
+    __ jccb(Assembler::greater, L_plural);
+    __ jccb(Assembler::less, L_break);
+    __ pushptr(Address(rax_argslot, 0));
+    __ jmpb(L_break);
+    __ BIND(L_plural);
+
+    // Loop for 2 or more:
+    //   rbx = &rax[slot_count]
+    //   while (rbx > rax)  *(--rsp) = *(--rbx)
+    __ lea(rbx_top, Address(rax_argslot, slot_count, Address::times_ptr));
+    __ BIND(L_loop);
+    __ subptr(rbx_top, wordSize);
+    __ pushptr(Address(rbx_top, 0));
+    __ cmpptr(rbx_top, rax_argslot);
+    __ jcc(Assembler::above, L_loop);
+    __ bind(L_break);
+  }
+  switch (skip_words_count) {
+  case 1: __ push(rdx_temp); break;
+  case 0:                    break;
+  default: ShouldNotReachHere();
+  }
+  BLOCK_COMMENT("} push_arg_slots");
+}
+
+// in-place movement; no change to rsp
+// blows rax_temp, rdx_temp
+void MethodHandles::move_arg_slots_up(MacroAssembler* _masm,
+                                      Register rbx_bottom,  // invariant
+                                      Address  top_addr,     // can use rax_temp
+                                      RegisterOrConstant positive_distance_in_slots,
+                                      Register rax_temp, Register rdx_temp) {
+  BLOCK_COMMENT("move_arg_slots_up {");
+  assert_different_registers(rbx_bottom,
+                             rax_temp, rdx_temp,
+                             positive_distance_in_slots.register_or_noreg());
+  Label L_loop, L_break;
+  Register rax_top = rax_temp;
+  if (!top_addr.is_same_address(Address(rax_top, 0)))
+    __ lea(rax_top, top_addr);
+  // Detect empty (or broken) loop:
+#ifdef ASSERT
+  if (VerifyMethodHandles) {
+    // Verify that &bottom < &top (non-empty interval)
+    Label L_ok, L_bad;
+    if (positive_distance_in_slots.is_register()) {
+      __ cmpptr(positive_distance_in_slots.as_register(), (int32_t) 0);
+      __ jcc(Assembler::lessEqual, L_bad);
+    }
+    __ cmpptr(rbx_bottom, rax_top);
+    __ jcc(Assembler::below, L_ok);
+    __ bind(L_bad);
+    __ stop("valid bounds (copy up)");
+    __ BIND(L_ok);
+  }
+#endif
+  __ cmpptr(rbx_bottom, rax_top);
+  __ jccb(Assembler::aboveEqual, L_break);
+  // work rax down to rbx, copying contiguous data upwards
+  // In pseudo-code:
+  //   [rbx, rax) = &[bottom, top)
+  //   while (--rax >= rbx) *(rax + distance) = *(rax + 0), rax--;
+  __ BIND(L_loop);
+  __ subptr(rax_top, wordSize);
+  __ movptr(rdx_temp, Address(rax_top, 0));
+  __ movptr(          Address(rax_top, positive_distance_in_slots, Address::times_ptr), rdx_temp);
+  __ cmpptr(rax_top, rbx_bottom);
+  __ jcc(Assembler::above, L_loop);
+  assert(Interpreter::stackElementSize == wordSize, "else change loop");
+  __ bind(L_break);
+  BLOCK_COMMENT("} move_arg_slots_up");
+}
+
+// in-place movement; no change to rsp
+// blows rax_temp, rdx_temp
+void MethodHandles::move_arg_slots_down(MacroAssembler* _masm,
+                                        Address  bottom_addr,  // can use rax_temp
+                                        Register rbx_top,      // invariant
+                                        RegisterOrConstant negative_distance_in_slots,
+                                        Register rax_temp, Register rdx_temp) {
+  BLOCK_COMMENT("move_arg_slots_down {");
+  assert_different_registers(rbx_top,
+                             negative_distance_in_slots.register_or_noreg(),
+                             rax_temp, rdx_temp);
+  Label L_loop, L_break;
+  Register rax_bottom = rax_temp;
+  if (!bottom_addr.is_same_address(Address(rax_bottom, 0)))
+    __ lea(rax_bottom, bottom_addr);
+  // Detect empty (or broken) loop:
+#ifdef ASSERT
+  assert(!negative_distance_in_slots.is_constant() || negative_distance_in_slots.as_constant() < 0, "");
+  if (VerifyMethodHandles) {
+    // Verify that &bottom < &top (non-empty interval)
+    Label L_ok, L_bad;
+    if (negative_distance_in_slots.is_register()) {
+      __ cmpptr(negative_distance_in_slots.as_register(), (int32_t) 0);
+      __ jcc(Assembler::greaterEqual, L_bad);
+    }
+    __ cmpptr(rax_bottom, rbx_top);
+    __ jcc(Assembler::below, L_ok);
+    __ bind(L_bad);
+    __ stop("valid bounds (copy down)");
+    __ BIND(L_ok);
+  }
+#endif
+  __ cmpptr(rax_bottom, rbx_top);
+  __ jccb(Assembler::aboveEqual, L_break);
+  // work rax up to rbx, copying contiguous data downwards
+  // In pseudo-code:
+  //   [rax, rbx) = &[bottom, top)
+  //   while (rax < rbx) *(rax - distance) = *(rax + 0), rax++;
+  __ BIND(L_loop);
+  __ movptr(rdx_temp, Address(rax_bottom, 0));
+  __ movptr(          Address(rax_bottom, negative_distance_in_slots, Address::times_ptr), rdx_temp);
+  __ addptr(rax_bottom, wordSize);
+  __ cmpptr(rax_bottom, rbx_top);
+  __ jcc(Assembler::below, L_loop);
+  assert(Interpreter::stackElementSize == wordSize, "else change loop");
+  __ bind(L_break);
+  BLOCK_COMMENT("} move_arg_slots_down");
+}
+
+// Copy from a field or array element to a stacked argument slot.
+// is_element (ignored) says whether caller is loading an array element instead of an instance field.
+void MethodHandles::move_typed_arg(MacroAssembler* _masm,
+                                   BasicType type, bool is_element,
+                                   Address slot_dest, Address value_src,
+                                   Register rbx_temp, Register rdx_temp) {
+  BLOCK_COMMENT(!is_element ? "move_typed_arg {" : "move_typed_arg { (array element)");
+  if (type == T_OBJECT || type == T_ARRAY) {
+    __ load_heap_oop(rbx_temp, value_src);
+    __ movptr(slot_dest, rbx_temp);
+  } else if (type != T_VOID) {
+    int  arg_size      = type2aelembytes(type);
+    bool arg_is_signed = is_signed_subword_type(type);
+    int  slot_size     = (arg_size > wordSize) ? arg_size : wordSize;
+    __ load_sized_value(  rdx_temp,  value_src, arg_size, arg_is_signed, rbx_temp);
+    __ store_sized_value( slot_dest, rdx_temp,  slot_size,               rbx_temp);
+  }
+  BLOCK_COMMENT("} move_typed_arg");
+}
+
+void MethodHandles::move_return_value(MacroAssembler* _masm, BasicType type,
+                                      Address return_slot) {
+  BLOCK_COMMENT("move_return_value {");
+  // Old versions of the JVM must clean the FPU stack after every return.
+#ifndef _LP64
+#ifdef COMPILER2
+  // The FPU stack is clean if UseSSE >= 2 but must be cleaned in other cases
+  if ((type == T_FLOAT && UseSSE < 1) || (type == T_DOUBLE && UseSSE < 2)) {
+    for (int i = 1; i < 8; i++) {
+        __ ffree(i);
+    }
+  } else if (UseSSE < 2) {
+    __ empty_FPU_stack();
+  }
+#endif //COMPILER2
+#endif //!_LP64
+
+  // Look at the type and pull the value out of the corresponding register.
+  if (type == T_VOID) {
+    // nothing to do
+  } else if (type == T_OBJECT) {
+    __ movptr(return_slot, rax);
+  } else if (type == T_INT || is_subword_type(type)) {
+    // write the whole word, even if only 32 bits is significant
+    __ movptr(return_slot, rax);
+  } else if (type == T_LONG) {
+    // store the value by parts
+    // Note: We assume longs are continguous (if misaligned) on the interpreter stack.
+    __ store_sized_value(return_slot, rax, BytesPerLong, rdx);
+  } else if (NOT_LP64((type == T_FLOAT  && UseSSE < 1) ||
+                      (type == T_DOUBLE && UseSSE < 2) ||)
+             false) {
+    // Use old x86 FPU registers:
+    if (type == T_FLOAT)
+      __ fstp_s(return_slot);
+    else
+      __ fstp_d(return_slot);
+  } else if (type == T_FLOAT) {
+    __ movflt(return_slot, xmm0);
+  } else if (type == T_DOUBLE) {
+    __ movdbl(return_slot, xmm0);
+  } else {
+    ShouldNotReachHere();
+  }
+  BLOCK_COMMENT("} move_return_value");
+}
+
+
 #ifndef PRODUCT
 extern "C" void print_method_handle(oop mh);
 void trace_method_handle_stub(const char* adaptername,
+                              oop mh,
+                              intptr_t* saved_regs,
+                              intptr_t* entry_sp,
                               intptr_t* saved_sp,
-                              oop mh,
-                              intptr_t* sp) {
+                              intptr_t* saved_bp) {
   // called as a leaf from native code: do not block the JVM!
-  intptr_t* entry_sp = sp + LP64_ONLY(16) NOT_LP64(8);
-  tty->print_cr("MH %s mh="INTPTR_FORMAT" sp="INTPTR_FORMAT" saved_sp="INTPTR_FORMAT")",
-                adaptername, (intptr_t)mh, (intptr_t)entry_sp, saved_sp);
+  bool has_mh = (strstr(adaptername, "return/") == NULL);  // return adapters don't have rcx_mh
+  intptr_t* last_sp = (intptr_t*) saved_bp[frame::interpreter_frame_last_sp_offset];
+  intptr_t* base_sp = last_sp;
+  typedef MethodHandles::RicochetFrame RicochetFrame;
+  RicochetFrame* rfp = (RicochetFrame*)((address)saved_bp - RicochetFrame::sender_link_offset_in_bytes());
+  if (!UseRicochetFrames || Universe::heap()->is_in((address) rfp->saved_args_base())) {
+    // Probably an interpreter frame.
+    base_sp = (intptr_t*) saved_bp[frame::interpreter_frame_monitor_block_top_offset];
+  }
+  intptr_t    mh_reg = (intptr_t)mh;
+  const char* mh_reg_name = "rcx_mh";
+  if (!has_mh)  mh_reg_name = "rcx";
+  tty->print_cr("MH %s %s="PTR_FORMAT" sp=("PTR_FORMAT"+"INTX_FORMAT") stack_size="INTX_FORMAT" bp="PTR_FORMAT,
+                adaptername, mh_reg_name, mh_reg,
+                (intptr_t)entry_sp, (intptr_t)(saved_sp - entry_sp), (intptr_t)(base_sp - last_sp), (intptr_t)saved_bp);
   if (Verbose) {
-    print_method_handle(mh);
+    tty->print(" reg dump: ");
+    int saved_regs_count = (entry_sp-1) - saved_regs;
+    // 32 bit: rdi rsi rbp rsp; rbx rdx rcx (*) rax
+    int i;
+    for (i = 0; i <= saved_regs_count; i++) {
+      if (i > 0 && i % 4 == 0 && i != saved_regs_count) {
+        tty->cr();
+        tty->print("   + dump: ");
+      }
+      tty->print(" %d: "PTR_FORMAT, i, saved_regs[i]);
+    }
+    tty->cr();
+    if (last_sp != saved_sp && last_sp != NULL)
+      tty->print_cr("*** last_sp="PTR_FORMAT, (intptr_t)last_sp);
+    int stack_dump_count = 16;
+    if (stack_dump_count < (int)(saved_bp + 2 - saved_sp))
+      stack_dump_count = (int)(saved_bp + 2 - saved_sp);
+    if (stack_dump_count > 64)  stack_dump_count = 48;
+    for (i = 0; i < stack_dump_count; i += 4) {
+      tty->print_cr(" dump at SP[%d] "PTR_FORMAT": "PTR_FORMAT" "PTR_FORMAT" "PTR_FORMAT" "PTR_FORMAT,
+                    i, (intptr_t) &entry_sp[i+0], entry_sp[i+0], entry_sp[i+1], entry_sp[i+2], entry_sp[i+3]);
+    }
+    if (has_mh)
+      print_method_handle(mh);
   }
 }
+
+// The stub wraps the arguments in a struct on the stack to avoid
+// dealing with the different calling conventions for passing 6
+// arguments.
+struct MethodHandleStubArguments {
+  const char* adaptername;
+  oopDesc* mh;
+  intptr_t* saved_regs;
+  intptr_t* entry_sp;
+  intptr_t* saved_sp;
+  intptr_t* saved_bp;
+};
+void trace_method_handle_stub_wrapper(MethodHandleStubArguments* args) {
+  trace_method_handle_stub(args->adaptername,
+                           args->mh,
+                           args->saved_regs,
+                           args->entry_sp,
+                           args->saved_sp,
+                           args->saved_bp);
+}
+
 void MethodHandles::trace_method_handle(MacroAssembler* _masm, const char* adaptername) {
   if (!TraceMethodHandles)  return;
   BLOCK_COMMENT("trace_method_handle {");
+  __ push(rax);
+  __ lea(rax, Address(rsp, wordSize * NOT_LP64(6) LP64_ONLY(14))); // entry_sp  __ pusha();
   __ pusha();
-#ifdef _LP64
-  // Pass arguments carefully since the registers overlap with the calling convention.
+  __ mov(rbx, rsp);
+  __ enter();
+  // incoming state:
   // rcx: method handle
-  // r13: saved sp
-  __ mov(c_rarg2, rcx); // mh
-  __ mov(c_rarg1, r13); // saved sp
-  __ mov(c_rarg3, rsp); // sp
-  __ movptr(c_rarg0, (intptr_t) adaptername);
-  __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, trace_method_handle_stub), c_rarg0, c_rarg1, c_rarg2, c_rarg3);
-#else
-  // arguments:
-  // rcx: method handle
-  // rsi: saved sp
-  __ movptr(rbx, (intptr_t) adaptername);
-  __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, trace_method_handle_stub), rbx, rsi, rcx, rsp);
-#endif
+  // r13 or rsi: saved sp
+  // To avoid calling convention issues, build a record on the stack and pass the pointer to that instead.
+  __ push(rbp);               // saved_bp
+  __ push(rsi);               // saved_sp
+  __ push(rax);               // entry_sp
+  __ push(rbx);               // pusha saved_regs
+  __ push(rcx);               // mh
+  __ push(rcx);               // adaptername
+  __ movptr(Address(rsp, 0), (intptr_t) adaptername);
+  __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, trace_method_handle_stub_wrapper), rsp);
+  __ leave();
   __ popa();
+  __ pop(rax);
   BLOCK_COMMENT("} trace_method_handle");
 }
 #endif //PRODUCT
@@ -358,13 +1063,20 @@
          |(1<<java_lang_invoke_AdapterMethodHandle::OP_CHECK_CAST)
          |(1<<java_lang_invoke_AdapterMethodHandle::OP_PRIM_TO_PRIM)
          |(1<<java_lang_invoke_AdapterMethodHandle::OP_REF_TO_PRIM)
+          //OP_PRIM_TO_REF is below...
          |(1<<java_lang_invoke_AdapterMethodHandle::OP_SWAP_ARGS)
          |(1<<java_lang_invoke_AdapterMethodHandle::OP_ROT_ARGS)
          |(1<<java_lang_invoke_AdapterMethodHandle::OP_DUP_ARGS)
          |(1<<java_lang_invoke_AdapterMethodHandle::OP_DROP_ARGS)
-         //|(1<<java_lang_invoke_AdapterMethodHandle::OP_SPREAD_ARGS) //BUG!
+          //OP_COLLECT_ARGS is below...
+         |(1<<java_lang_invoke_AdapterMethodHandle::OP_SPREAD_ARGS)
+         |(!UseRicochetFrames ? 0 :
+           java_lang_invoke_MethodTypeForm::vmlayout_offset_in_bytes() <= 0 ? 0 :
+           ((1<<java_lang_invoke_AdapterMethodHandle::OP_PRIM_TO_REF)
+           |(1<<java_lang_invoke_AdapterMethodHandle::OP_COLLECT_ARGS)
+           |(1<<java_lang_invoke_AdapterMethodHandle::OP_FOLD_ARGS)
+            ))
          );
-  // FIXME: MethodHandlesTest gets a crash if we enable OP_SPREAD_ARGS.
 }
 
 //------------------------------------------------------------------------------
@@ -373,6 +1085,8 @@
 // Generate an "entry" field for a method handle.
 // This determines how the method handle will respond to calls.
 void MethodHandles::generate_method_handle_stub(MacroAssembler* _masm, MethodHandles::EntryKind ek) {
+  MethodHandles::EntryKind ek_orig = ek_original_kind(ek);
+
   // Here is the register state during an interpreted call,
   // as set up by generate_method_handle_interpreter_entry():
   // - rbx: garbage temp (was MethodHandle.invoke methodOop, unused)
@@ -385,10 +1099,11 @@
   const Register rax_argslot = rax;
   const Register rbx_temp    = rbx;
   const Register rdx_temp    = rdx;
+  const Register rdi_temp    = rdi;
 
   // This guy is set up by prepare_to_jump_from_interpreted (from interpreted calls)
   // and gen_c2i_adapter (from compiled calls):
-  const Register saved_last_sp = LP64_ONLY(r13) NOT_LP64(rsi);
+  const Register saved_last_sp = saved_last_sp_register();
 
   // Argument registers for _raise_exception.
   // 32-bit: Pass first two oop/int args in registers ECX and EDX.
@@ -421,6 +1136,13 @@
     return;
   }
 
+#ifdef ASSERT
+  __ push((int32_t) 0xEEEEEEEE);
+  __ push((int32_t) (intptr_t) entry_name(ek));
+  LP64_ONLY(__ push((int32_t) high((intptr_t) entry_name(ek))));
+  __ push((int32_t) 0x33333333);
+#endif //ASSERT
+
   address interp_entry = __ pc();
 
   trace_method_handle(_masm, entry_name(ek));
@@ -536,7 +1258,6 @@
       __ load_klass(rax_klass, rcx_recv);
       __ verify_oop(rax_klass);
 
-      Register rdi_temp   = rdi;
       Register rbx_method = rbx_index;
 
       // get interface klass
@@ -572,16 +1293,14 @@
   case _bound_long_direct_mh:
     {
       bool direct_to_method = (ek >= _bound_ref_direct_mh);
-      BasicType arg_type  = T_ILLEGAL;
-      int       arg_mask  = _INSERT_NO_MASK;
-      int       arg_slots = -1;
-      get_ek_bound_mh_info(ek, arg_type, arg_mask, arg_slots);
+      BasicType arg_type  = ek_bound_mh_arg_type(ek);
+      int       arg_slots = type2size[arg_type];
 
       // make room for the new argument:
       __ movl(rax_argslot, rcx_bmh_vmargslot);
       __ lea(rax_argslot, __ argument_address(rax_argslot));
 
-      insert_arg_slots(_masm, arg_slots * stack_move_unit(), arg_mask, rax_argslot, rbx_temp, rdx_temp);
+      insert_arg_slots(_masm, arg_slots * stack_move_unit(), rax_argslot, rbx_temp, rdx_temp);
 
       // store bound argument into the new stack slot:
       __ load_heap_oop(rbx_temp, rcx_bmh_argument);
@@ -589,9 +1308,10 @@
         __ movptr(Address(rax_argslot, 0), rbx_temp);
       } else {
         Address prim_value_addr(rbx_temp, java_lang_boxing_object::value_offset_in_bytes(arg_type));
-        const int arg_size = type2aelembytes(arg_type);
-        __ load_sized_value(rdx_temp, prim_value_addr, arg_size, is_signed_subword_type(arg_type), rbx_temp);
-        __ store_sized_value(Address(rax_argslot, 0), rdx_temp, arg_size, rbx_temp);
+        move_typed_arg(_masm, arg_type, false,
+                       Address(rax_argslot, 0),
+                       prim_value_addr,
+                       rbx_temp, rdx_temp);
       }
 
       if (direct_to_method) {
@@ -628,7 +1348,7 @@
 
       // What class are we casting to?
       __ load_heap_oop(rbx_klass, rcx_amh_argument); // this is a Class object!
-      __ load_heap_oop(rbx_klass, Address(rbx_klass, java_lang_Class::klass_offset_in_bytes()));
+      load_klass_from_Class(_masm, rbx_klass);
 
       Label done;
       __ movptr(rdx_temp, vmarg);
@@ -663,6 +1383,7 @@
 
   case _adapter_prim_to_prim:
   case _adapter_ref_to_prim:
+  case _adapter_prim_to_ref:
     // handled completely by optimized cases
     __ stop("init_AdapterMethodHandle should not issue this");
     break;
@@ -714,8 +1435,7 @@
 
       // Do the requested conversion and store the value.
       Register rbx_vminfo = rbx_temp;
-      __ movl(rbx_vminfo, rcx_amh_conversion);
-      assert(CONV_VMINFO_SHIFT == 0, "preshifted");
+      load_conversion_vminfo(_masm, rbx_vminfo, rcx_amh_conversion);
 
       // get the new MH:
       __ load_heap_oop(rcx_recv, rcx_mh_vmtarget);
@@ -753,7 +1473,7 @@
 
       // on a little-endian machine we keep the first slot and add another after
       __ lea(rax_argslot, __ argument_address(rax_argslot, 1));
-      insert_arg_slots(_masm, stack_move_unit(), _INSERT_INT_MASK,
+      insert_arg_slots(_masm, stack_move_unit(),
                        rax_argslot, rbx_temp, rdx_temp);
       Address vmarg1(rax_argslot, -Interpreter::stackElementSize);
       Address vmarg2 = vmarg1.plus_disp(Interpreter::stackElementSize);
@@ -805,7 +1525,7 @@
       __ movl(rax_argslot, rcx_amh_vmargslot);
       __ lea(rax_argslot, __ argument_address(rax_argslot, 1));
       if (ek == _adapter_opt_f2d) {
-        insert_arg_slots(_masm, stack_move_unit(), _INSERT_INT_MASK,
+        insert_arg_slots(_masm, stack_move_unit(),
                          rax_argslot, rbx_temp, rdx_temp);
       }
       Address vmarg(rax_argslot, -Interpreter::stackElementSize);
@@ -823,7 +1543,7 @@
 #else //_LP64
       if (ek == _adapter_opt_f2d) {
         __ fld_s(vmarg);        // load float to ST0
-        __ fstp_s(vmarg);       // store single
+        __ fstp_d(vmarg);       // store double
       } else {
         __ fld_d(vmarg);        // load double to ST0
         __ fstp_s(vmarg);       // store single
@@ -840,10 +1560,6 @@
     }
     break;
 
-  case _adapter_prim_to_ref:
-    __ unimplemented(entry_name(ek)); // %%% FIXME: NYI
-    break;
-
   case _adapter_swap_args:
   case _adapter_rot_args:
     // handled completely by optimized cases
@@ -857,8 +1573,8 @@
   case _adapter_opt_rot_2_up:
   case _adapter_opt_rot_2_down:
     {
-      int swap_bytes = 0, rotate = 0;
-      get_ek_adapter_opt_swap_rot_info(ek, swap_bytes, rotate);
+      int swap_slots = ek_adapter_opt_swap_slots(ek);
+      int rotate     = ek_adapter_opt_swap_mode(ek);
 
       // 'argslot' is the position of the first argument to swap
       __ movl(rax_argslot, rcx_amh_vmargslot);
@@ -866,83 +1582,69 @@
 
       // 'vminfo' is the second
       Register rbx_destslot = rbx_temp;
-      __ movl(rbx_destslot, rcx_amh_conversion);
-      assert(CONV_VMINFO_SHIFT == 0, "preshifted");
-      __ andl(rbx_destslot, CONV_VMINFO_MASK);
+      load_conversion_vminfo(_masm, rbx_destslot, rcx_amh_conversion);
       __ lea(rbx_destslot, __ argument_address(rbx_destslot));
-      DEBUG_ONLY(verify_argslot(_masm, rbx_destslot, "swap point must fall within current frame"));
+      if (VerifyMethodHandles)
+        verify_argslot(_masm, rbx_destslot, "swap point must fall within current frame");
 
+      assert(Interpreter::stackElementSize == wordSize, "else rethink use of wordSize here");
       if (!rotate) {
-        for (int i = 0; i < swap_bytes; i += wordSize) {
-          __ movptr(rdx_temp, Address(rax_argslot , i));
-          __ push(rdx_temp);
-          __ movptr(rdx_temp, Address(rbx_destslot, i));
-          __ movptr(Address(rax_argslot, i), rdx_temp);
-          __ pop(rdx_temp);
-          __ movptr(Address(rbx_destslot, i), rdx_temp);
+        // simple swap
+        for (int i = 0; i < swap_slots; i++) {
+          __ movptr(rdi_temp, Address(rax_argslot,  i * wordSize));
+          __ movptr(rdx_temp, Address(rbx_destslot, i * wordSize));
+          __ movptr(Address(rax_argslot,  i * wordSize), rdx_temp);
+          __ movptr(Address(rbx_destslot, i * wordSize), rdi_temp);
         }
       } else {
-        // push the first chunk, which is going to get overwritten
-        for (int i = swap_bytes; (i -= wordSize) >= 0; ) {
-          __ movptr(rdx_temp, Address(rax_argslot, i));
-          __ push(rdx_temp);
+        // A rotate is actually pair of moves, with an "odd slot" (or pair)
+        // changing place with a series of other slots.
+        // First, push the "odd slot", which is going to get overwritten
+        for (int i = swap_slots - 1; i >= 0; i--) {
+          // handle one with rdi_temp instead of a push:
+          if (i == 0)  __ movptr(rdi_temp, Address(rax_argslot, i * wordSize));
+          else         __ pushptr(         Address(rax_argslot, i * wordSize));
         }
-
         if (rotate > 0) {
-          // rotate upward
-          __ subptr(rax_argslot, swap_bytes);
-#ifdef ASSERT
-          {
-            // Verify that argslot > destslot, by at least swap_bytes.
-            Label L_ok;
-            __ cmpptr(rax_argslot, rbx_destslot);
-            __ jccb(Assembler::aboveEqual, L_ok);
-            __ stop("source must be above destination (upward rotation)");
-            __ bind(L_ok);
-          }
-#endif
+          // Here is rotate > 0:
+          // (low mem)                                          (high mem)
+          //     | dest:     more_slots...     | arg: odd_slot :arg+1 |
+          // =>
+          //     | dest: odd_slot | dest+1: more_slots...      :arg+1 |
           // work argslot down to destslot, copying contiguous data upwards
           // pseudo-code:
           //   rax = src_addr - swap_bytes
           //   rbx = dest_addr
           //   while (rax >= rbx) *(rax + swap_bytes) = *(rax + 0), rax--;
-          Label loop;
-          __ bind(loop);
-          __ movptr(rdx_temp, Address(rax_argslot, 0));
-          __ movptr(Address(rax_argslot, swap_bytes), rdx_temp);
-          __ addptr(rax_argslot, -wordSize);
-          __ cmpptr(rax_argslot, rbx_destslot);
-          __ jccb(Assembler::aboveEqual, loop);
+          move_arg_slots_up(_masm,
+                            rbx_destslot,
+                            Address(rax_argslot, 0),
+                            swap_slots,
+                            rax_argslot, rdx_temp);
         } else {
-          __ addptr(rax_argslot, swap_bytes);
-#ifdef ASSERT
-          {
-            // Verify that argslot < destslot, by at least swap_bytes.
-            Label L_ok;
-            __ cmpptr(rax_argslot, rbx_destslot);
-            __ jccb(Assembler::belowEqual, L_ok);
-            __ stop("source must be below destination (downward rotation)");
-            __ bind(L_ok);
-          }
-#endif
+          // Here is the other direction, rotate < 0:
+          // (low mem)                                          (high mem)
+          //     | arg: odd_slot | arg+1: more_slots...       :dest+1 |
+          // =>
+          //     | arg:    more_slots...     | dest: odd_slot :dest+1 |
           // work argslot up to destslot, copying contiguous data downwards
           // pseudo-code:
           //   rax = src_addr + swap_bytes
           //   rbx = dest_addr
           //   while (rax <= rbx) *(rax - swap_bytes) = *(rax + 0), rax++;
-          Label loop;
-          __ bind(loop);
-          __ movptr(rdx_temp, Address(rax_argslot, 0));
-          __ movptr(Address(rax_argslot, -swap_bytes), rdx_temp);
-          __ addptr(rax_argslot, wordSize);
-          __ cmpptr(rax_argslot, rbx_destslot);
-          __ jccb(Assembler::belowEqual, loop);
+          __ addptr(rbx_destslot, wordSize);
+          move_arg_slots_down(_masm,
+                              Address(rax_argslot, swap_slots * wordSize),
+                              rbx_destslot,
+                              -swap_slots,
+                              rax_argslot, rdx_temp);
+
+          __ subptr(rbx_destslot, wordSize);
         }
-
         // pop the original first chunk into the destination slot, now free
-        for (int i = 0; i < swap_bytes; i += wordSize) {
-          __ pop(rdx_temp);
-          __ movptr(Address(rbx_destslot, i), rdx_temp);
+        for (int i = 0; i < swap_slots; i++) {
+          if (i == 0)  __ movptr(Address(rbx_destslot, i * wordSize), rdi_temp);
+          else         __ popptr(Address(rbx_destslot, i * wordSize));
         }
       }
 
@@ -958,53 +1660,22 @@
       __ lea(rax_argslot, __ argument_address(rax_argslot));
 
       // 'stack_move' is negative number of words to duplicate
-      Register rdx_stack_move = rdx_temp;
-      __ movl2ptr(rdx_stack_move, rcx_amh_conversion);
-      __ sarptr(rdx_stack_move, CONV_STACK_MOVE_SHIFT);
-
-      int argslot0_num = 0;
-      Address argslot0 = __ argument_address(RegisterOrConstant(argslot0_num));
-      assert(argslot0.base() == rsp, "");
-      int pre_arg_size = argslot0.disp();
-      assert(pre_arg_size % wordSize == 0, "");
-      assert(pre_arg_size > 0, "must include PC");
-
-      // remember the old rsp+1 (argslot[0])
-      Register rbx_oldarg = rbx_temp;
-      __ lea(rbx_oldarg, argslot0);
+      Register rdi_stack_move = rdi_temp;
+      load_stack_move(_masm, rdi_stack_move, rcx_recv, true);
 
-      // move rsp down to make room for dups
-      __ lea(rsp, Address(rsp, rdx_stack_move, Address::times_ptr));
-
-      // compute the new rsp+1 (argslot[0])
-      Register rdx_newarg = rdx_temp;
-      __ lea(rdx_newarg, argslot0);
-
-      __ push(rdi);             // need a temp
-      // (preceding push must be done after arg addresses are taken!)
-
-      // pull down the pre_arg_size data (PC)
-      for (int i = -pre_arg_size; i < 0; i += wordSize) {
-        __ movptr(rdi, Address(rbx_oldarg, i));
-        __ movptr(Address(rdx_newarg, i), rdi);
+      if (VerifyMethodHandles) {
+        verify_argslots(_masm, rdi_stack_move, rax_argslot, true,
+                        "copied argument(s) must fall within current frame");
       }
 
-      // copy from rax_argslot[0...] down to new_rsp[1...]
-      // pseudo-code:
-      //   rbx = old_rsp+1
-      //   rdx = new_rsp+1
-      //   rax = argslot
-      //   while (rdx < rbx) *rdx++ = *rax++
-      Label loop;
-      __ bind(loop);
-      __ movptr(rdi, Address(rax_argslot, 0));
-      __ movptr(Address(rdx_newarg, 0), rdi);
-      __ addptr(rax_argslot, wordSize);
-      __ addptr(rdx_newarg, wordSize);
-      __ cmpptr(rdx_newarg, rbx_oldarg);
-      __ jccb(Assembler::less, loop);
+      // insert location is always the bottom of the argument list:
+      Address insert_location = __ argument_address(constant(0));
+      int pre_arg_words = insert_location.disp() / wordSize;   // return PC is pushed
+      assert(insert_location.base() == rsp, "");
 
-      __ pop(rdi);              // restore temp
+      __ negl(rdi_stack_move);
+      push_arg_slots(_masm, rax_argslot, rdi_stack_move,
+                     pre_arg_words, rbx_temp, rdx_temp);
 
       __ load_heap_oop(rcx_recv, rcx_mh_vmtarget);
       __ jump_to_method_handle_entry(rcx_recv, rdx_temp);
@@ -1017,63 +1688,583 @@
       __ movl(rax_argslot, rcx_amh_vmargslot);
       __ lea(rax_argslot, __ argument_address(rax_argslot));
 
-      __ push(rdi);             // need a temp
       // (must do previous push after argslot address is taken)
 
       // 'stack_move' is number of words to drop
-      Register rdi_stack_move = rdi;
-      __ movl2ptr(rdi_stack_move, rcx_amh_conversion);
-      __ sarptr(rdi_stack_move, CONV_STACK_MOVE_SHIFT);
+      Register rdi_stack_move = rdi_temp;
+      load_stack_move(_masm, rdi_stack_move, rcx_recv, false);
       remove_arg_slots(_masm, rdi_stack_move,
                        rax_argslot, rbx_temp, rdx_temp);
 
-      __ pop(rdi);              // restore temp
-
       __ load_heap_oop(rcx_recv, rcx_mh_vmtarget);
       __ jump_to_method_handle_entry(rcx_recv, rdx_temp);
     }
     break;
 
   case _adapter_collect_args:
-    __ unimplemented(entry_name(ek)); // %%% FIXME: NYI
-    break;
-
+  case _adapter_fold_args:
   case _adapter_spread_args:
     // handled completely by optimized cases
     __ stop("init_AdapterMethodHandle should not issue this");
     break;
 
+  case _adapter_opt_collect_ref:
+  case _adapter_opt_collect_int:
+  case _adapter_opt_collect_long:
+  case _adapter_opt_collect_float:
+  case _adapter_opt_collect_double:
+  case _adapter_opt_collect_void:
+  case _adapter_opt_collect_0_ref:
+  case _adapter_opt_collect_1_ref:
+  case _adapter_opt_collect_2_ref:
+  case _adapter_opt_collect_3_ref:
+  case _adapter_opt_collect_4_ref:
+  case _adapter_opt_collect_5_ref:
+  case _adapter_opt_filter_S0_ref:
+  case _adapter_opt_filter_S1_ref:
+  case _adapter_opt_filter_S2_ref:
+  case _adapter_opt_filter_S3_ref:
+  case _adapter_opt_filter_S4_ref:
+  case _adapter_opt_filter_S5_ref:
+  case _adapter_opt_collect_2_S0_ref:
+  case _adapter_opt_collect_2_S1_ref:
+  case _adapter_opt_collect_2_S2_ref:
+  case _adapter_opt_collect_2_S3_ref:
+  case _adapter_opt_collect_2_S4_ref:
+  case _adapter_opt_collect_2_S5_ref:
+  case _adapter_opt_fold_ref:
+  case _adapter_opt_fold_int:
+  case _adapter_opt_fold_long:
+  case _adapter_opt_fold_float:
+  case _adapter_opt_fold_double:
+  case _adapter_opt_fold_void:
+  case _adapter_opt_fold_1_ref:
+  case _adapter_opt_fold_2_ref:
+  case _adapter_opt_fold_3_ref:
+  case _adapter_opt_fold_4_ref:
+  case _adapter_opt_fold_5_ref:
+    {
+      // Given a fresh incoming stack frame, build a new ricochet frame.
+      // On entry, TOS points at a return PC, and RBP is the callers frame ptr.
+      // RSI/R13 has the caller's exact stack pointer, which we must also preserve.
+      // RCX contains an AdapterMethodHandle of the indicated kind.
+
+      // Relevant AMH fields:
+      // amh.vmargslot:
+      //   points to the trailing edge of the arguments
+      //   to filter, collect, or fold.  For a boxing operation,
+      //   it points just after the single primitive value.
+      // amh.argument:
+      //   recursively called MH, on |collect| arguments
+      // amh.vmtarget:
+      //   final destination MH, on return value, etc.
+      // amh.conversion.dest:
+      //   tells what is the type of the return value
+      //   (not needed here, since dest is also derived from ek)
+      // amh.conversion.vminfo:
+      //   points to the trailing edge of the return value
+      //   when the vmtarget is to be called; this is
+      //   equal to vmargslot + (retained ? |collect| : 0)
+
+      // Pass 0 or more argument slots to the recursive target.
+      int collect_count_constant = ek_adapter_opt_collect_count(ek);
+
+      // The collected arguments are copied from the saved argument list:
+      int collect_slot_constant = ek_adapter_opt_collect_slot(ek);
+
+      assert(ek_orig == _adapter_collect_args ||
+             ek_orig == _adapter_fold_args, "");
+      bool retain_original_args = (ek_orig == _adapter_fold_args);
+
+      // The return value is replaced (or inserted) at the 'vminfo' argslot.
+      // Sometimes we can compute this statically.
+      int dest_slot_constant = -1;
+      if (!retain_original_args)
+        dest_slot_constant = collect_slot_constant;
+      else if (collect_slot_constant >= 0 && collect_count_constant >= 0)
+        // We are preserving all the arguments, and the return value is prepended,
+        // so the return slot is to the left (above) the |collect| sequence.
+        dest_slot_constant = collect_slot_constant + collect_count_constant;
+
+      // Replace all those slots by the result of the recursive call.
+      // The result type can be one of ref, int, long, float, double, void.
+      // In the case of void, nothing is pushed on the stack after return.
+      BasicType dest = ek_adapter_opt_collect_type(ek);
+      assert(dest == type2wfield[dest], "dest is a stack slot type");
+      int dest_count = type2size[dest];
+      assert(dest_count == 1 || dest_count == 2 || (dest_count == 0 && dest == T_VOID), "dest has a size");
+
+      // Choose a return continuation.
+      EntryKind ek_ret = _adapter_opt_return_any;
+      if (dest != T_CONFLICT && OptimizeMethodHandles) {
+        switch (dest) {
+        case T_INT    : ek_ret = _adapter_opt_return_int;     break;
+        case T_LONG   : ek_ret = _adapter_opt_return_long;    break;
+        case T_FLOAT  : ek_ret = _adapter_opt_return_float;   break;
+        case T_DOUBLE : ek_ret = _adapter_opt_return_double;  break;
+        case T_OBJECT : ek_ret = _adapter_opt_return_ref;     break;
+        case T_VOID   : ek_ret = _adapter_opt_return_void;    break;
+        default       : ShouldNotReachHere();
+        }
+        if (dest == T_OBJECT && dest_slot_constant >= 0) {
+          EntryKind ek_try = EntryKind(_adapter_opt_return_S0_ref + dest_slot_constant);
+          if (ek_try <= _adapter_opt_return_LAST &&
+              ek_adapter_opt_return_slot(ek_try) == dest_slot_constant) {
+            ek_ret = ek_try;
+          }
+        }
+        assert(ek_adapter_opt_return_type(ek_ret) == dest, "");
+      }
+
+      // Already pushed:  ... keep1 | collect | keep2 | sender_pc |
+      // push(sender_pc);
+
+      // Compute argument base:
+      Register rax_argv = rax_argslot;
+      __ lea(rax_argv, __ argument_address(constant(0)));
+
+      // Push a few extra argument words, if we need them to store the return value.
+      {
+        int extra_slots = 0;
+        if (retain_original_args) {
+          extra_slots = dest_count;
+        } else if (collect_count_constant == -1) {
+          extra_slots = dest_count;  // collect_count might be zero; be generous
+        } else if (dest_count > collect_count_constant) {
+          extra_slots = (dest_count - collect_count_constant);
+        } else {
+          // else we know we have enough dead space in |collect| to repurpose for return values
+        }
+        DEBUG_ONLY(extra_slots += 1);
+        if (extra_slots > 0) {
+          __ pop(rbx_temp);   // return value
+          __ subptr(rsp, (extra_slots * Interpreter::stackElementSize));
+          // Push guard word #2 in debug mode.
+          DEBUG_ONLY(__ movptr(Address(rsp, 0), (int32_t) RicochetFrame::MAGIC_NUMBER_2));
+          __ push(rbx_temp);
+        }
+      }
+
+      RicochetFrame::enter_ricochet_frame(_masm, rcx_recv, rax_argv,
+                                          entry(ek_ret)->from_interpreted_entry(), rbx_temp);
+
+      // Now pushed:  ... keep1 | collect | keep2 | RF |
+      // some handy frame slots:
+      Address exact_sender_sp_addr = RicochetFrame::frame_address(RicochetFrame::exact_sender_sp_offset_in_bytes());
+      Address conversion_addr      = RicochetFrame::frame_address(RicochetFrame::conversion_offset_in_bytes());
+      Address saved_args_base_addr = RicochetFrame::frame_address(RicochetFrame::saved_args_base_offset_in_bytes());
+
+#ifdef ASSERT
+      if (VerifyMethodHandles && dest != T_CONFLICT) {
+        BLOCK_COMMENT("verify AMH.conv.dest");
+        load_conversion_dest_type(_masm, rbx_temp, conversion_addr);
+        Label L_dest_ok;
+        __ cmpl(rbx_temp, (int) dest);
+        __ jcc(Assembler::equal, L_dest_ok);
+        if (dest == T_INT) {
+          for (int bt = T_BOOLEAN; bt < T_INT; bt++) {
+            if (is_subword_type(BasicType(bt))) {
+              __ cmpl(rbx_temp, (int) bt);
+              __ jcc(Assembler::equal, L_dest_ok);
+            }
+          }
+        }
+        __ stop("bad dest in AMH.conv");
+        __ BIND(L_dest_ok);
+      }
+#endif //ASSERT
+
+      // Find out where the original copy of the recursive argument sequence begins.
+      Register rax_coll = rax_argv;
+      {
+        RegisterOrConstant collect_slot = collect_slot_constant;
+        if (collect_slot_constant == -1) {
+          __ movl(rdi_temp, rcx_amh_vmargslot);
+          collect_slot = rdi_temp;
+        }
+        if (collect_slot_constant != 0)
+          __ lea(rax_coll, Address(rax_argv, collect_slot, Interpreter::stackElementScale()));
+        // rax_coll now points at the trailing edge of |collect| and leading edge of |keep2|
+      }
+
+      // Replace the old AMH with the recursive MH.  (No going back now.)
+      // In the case of a boxing call, the recursive call is to a 'boxer' method,
+      // such as Integer.valueOf or Long.valueOf.  In the case of a filter
+      // or collect call, it will take one or more arguments, transform them,
+      // and return some result, to store back into argument_base[vminfo].
+      __ load_heap_oop(rcx_recv, rcx_amh_argument);
+      if (VerifyMethodHandles)  verify_method_handle(_masm, rcx_recv);
+
+      // Push a space for the recursively called MH first:
+      __ push((int32_t)NULL_WORD);
+
+      // Calculate |collect|, the number of arguments we are collecting.
+      Register rdi_collect_count = rdi_temp;
+      RegisterOrConstant collect_count;
+      if (collect_count_constant >= 0) {
+        collect_count = collect_count_constant;
+      } else {
+        __ load_method_handle_vmslots(rdi_collect_count, rcx_recv, rdx_temp);
+        collect_count = rdi_collect_count;
+      }
+#ifdef ASSERT
+      if (VerifyMethodHandles && collect_count_constant >= 0) {
+        __ load_method_handle_vmslots(rbx_temp, rcx_recv, rdx_temp);
+        Label L_count_ok;
+        __ cmpl(rbx_temp, collect_count_constant);
+        __ jcc(Assembler::equal, L_count_ok);
+        __ stop("bad vminfo in AMH.conv");
+        __ BIND(L_count_ok);
+      }
+#endif //ASSERT
+
+      // copy |collect| slots directly to TOS:
+      push_arg_slots(_masm, rax_coll, collect_count, 0, rbx_temp, rdx_temp);
+      // Now pushed:  ... keep1 | collect | keep2 | RF... | collect |
+      // rax_coll still points at the trailing edge of |collect| and leading edge of |keep2|
+
+      // If necessary, adjust the saved arguments to make room for the eventual return value.
+      // Normal adjustment:  ... keep1 | +dest+ | -collect- | keep2 | RF... | collect |
+      // If retaining args:  ... keep1 | +dest+ |  collect  | keep2 | RF... | collect |
+      // In the non-retaining case, this might move keep2 either up or down.
+      // We don't have to copy the whole | RF... collect | complex,
+      // but we must adjust RF.saved_args_base.
+      // Also, from now on, we will forget about the origial copy of |collect|.
+      // If we are retaining it, we will treat it as part of |keep2|.
+      // For clarity we will define |keep3| = |collect|keep2| or |keep2|.
+
+      BLOCK_COMMENT("adjust trailing arguments {");
+      // Compare the sizes of |+dest+| and |-collect-|, which are opposed opening and closing movements.
+      int                open_count  = dest_count;
+      RegisterOrConstant close_count = collect_count_constant;
+      Register rdi_close_count = rdi_collect_count;
+      if (retain_original_args) {
+        close_count = constant(0);
+      } else if (collect_count_constant == -1) {
+        close_count = rdi_collect_count;
+      }
+
+      // How many slots need moving?  This is simply dest_slot (0 => no |keep3|).
+      RegisterOrConstant keep3_count;
+      Register rsi_keep3_count = rsi;  // can repair from RF.exact_sender_sp
+      if (dest_slot_constant >= 0) {
+        keep3_count = dest_slot_constant;
+      } else  {
+        load_conversion_vminfo(_masm, rsi_keep3_count, conversion_addr);
+        keep3_count = rsi_keep3_count;
+      }
+#ifdef ASSERT
+      if (VerifyMethodHandles && dest_slot_constant >= 0) {
+        load_conversion_vminfo(_masm, rbx_temp, conversion_addr);
+        Label L_vminfo_ok;
+        __ cmpl(rbx_temp, dest_slot_constant);
+        __ jcc(Assembler::equal, L_vminfo_ok);
+        __ stop("bad vminfo in AMH.conv");
+        __ BIND(L_vminfo_ok);
+      }
+#endif //ASSERT
+
+      // tasks remaining:
+      bool move_keep3 = (!keep3_count.is_constant() || keep3_count.as_constant() != 0);
+      bool stomp_dest = (NOT_DEBUG(dest == T_OBJECT) DEBUG_ONLY(dest_count != 0));
+      bool fix_arg_base = (!close_count.is_constant() || open_count != close_count.as_constant());
+
+      if (stomp_dest | fix_arg_base) {
+        // we will probably need an updated rax_argv value
+        if (collect_slot_constant >= 0) {
+          // rax_coll already holds the leading edge of |keep2|, so tweak it
+          assert(rax_coll == rax_argv, "elided a move");
+          if (collect_slot_constant != 0)
+            __ subptr(rax_argv, collect_slot_constant * Interpreter::stackElementSize);
+        } else {
+          // Just reload from RF.saved_args_base.
+          __ movptr(rax_argv, saved_args_base_addr);
+        }
+      }
+
+      // Old and new argument locations (based at slot 0).
+      // Net shift (&new_argv - &old_argv) is (close_count - open_count).
+      bool zero_open_count = (open_count == 0);  // remember this bit of info
+      if (move_keep3 && fix_arg_base) {
+        // It will be easier t have everything in one register:
+        if (close_count.is_register()) {
+          // Deduct open_count from close_count register to get a clean +/- value.
+          __ subptr(close_count.as_register(), open_count);
+        } else {
+          close_count = close_count.as_constant() - open_count;
+        }
+        open_count = 0;
+      }
+      Address old_argv(rax_argv, 0);
+      Address new_argv(rax_argv, close_count,  Interpreter::stackElementScale(),
+                                - open_count * Interpreter::stackElementSize);
+
+      // First decide if any actual data are to be moved.
+      // We can skip if (a) |keep3| is empty, or (b) the argument list size didn't change.
+      // (As it happens, all movements involve an argument list size change.)
+
+      // If there are variable parameters, use dynamic checks to skip around the whole mess.
+      Label L_done;
+      if (!keep3_count.is_constant()) {
+        __ testl(keep3_count.as_register(), keep3_count.as_register());
+        __ jcc(Assembler::zero, L_done);
+      }
+      if (!close_count.is_constant()) {
+        __ cmpl(close_count.as_register(), open_count);
+        __ jcc(Assembler::equal, L_done);
+      }
+
+      if (move_keep3 && fix_arg_base) {
+        bool emit_move_down = false, emit_move_up = false, emit_guard = false;
+        if (!close_count.is_constant()) {
+          emit_move_down = emit_guard = !zero_open_count;
+          emit_move_up   = true;
+        } else if (open_count != close_count.as_constant()) {
+          emit_move_down = (open_count > close_count.as_constant());
+          emit_move_up   = !emit_move_down;
+        }
+        Label L_move_up;
+        if (emit_guard) {
+          __ cmpl(close_count.as_register(), open_count);
+          __ jcc(Assembler::greater, L_move_up);
+        }
+
+        if (emit_move_down) {
+          // Move arguments down if |+dest+| > |-collect-|
+          // (This is rare, except when arguments are retained.)
+          // This opens space for the return value.
+          if (keep3_count.is_constant()) {
+            for (int i = 0; i < keep3_count.as_constant(); i++) {
+              __ movptr(rdx_temp, old_argv.plus_disp(i * Interpreter::stackElementSize));
+              __ movptr(          new_argv.plus_disp(i * Interpreter::stackElementSize), rdx_temp);
+            }
+          } else {
+            Register rbx_argv_top = rbx_temp;
+            __ lea(rbx_argv_top, old_argv.plus_disp(keep3_count, Interpreter::stackElementScale()));
+            move_arg_slots_down(_masm,
+                                old_argv,     // beginning of old argv
+                                rbx_argv_top, // end of old argv
+                                close_count,  // distance to move down (must be negative)
+                                rax_argv, rdx_temp);
+            // Used argv as an iteration variable; reload from RF.saved_args_base.
+            __ movptr(rax_argv, saved_args_base_addr);
+          }
+        }
+
+        if (emit_guard) {
+          __ jmp(L_done);  // assumes emit_move_up is true also
+          __ BIND(L_move_up);
+        }
+
+        if (emit_move_up) {
+
+          // Move arguments up if |+dest+| < |-collect-|
+          // (This is usual, except when |keep3| is empty.)
+          // This closes up the space occupied by the now-deleted collect values.
+          if (keep3_count.is_constant()) {
+            for (int i = keep3_count.as_constant() - 1; i >= 0; i--) {
+              __ movptr(rdx_temp, old_argv.plus_disp(i * Interpreter::stackElementSize));
+              __ movptr(          new_argv.plus_disp(i * Interpreter::stackElementSize), rdx_temp);
+            }
+          } else {
+            Address argv_top = old_argv.plus_disp(keep3_count, Interpreter::stackElementScale());
+            move_arg_slots_up(_masm,
+                              rax_argv,     // beginning of old argv
+                              argv_top,     // end of old argv
+                              close_count,  // distance to move up (must be positive)
+                              rbx_temp, rdx_temp);
+          }
+        }
+      }
+      __ BIND(L_done);
+
+      if (fix_arg_base) {
+        // adjust RF.saved_args_base by adding (close_count - open_count)
+        if (!new_argv.is_same_address(Address(rax_argv, 0)))
+          __ lea(rax_argv, new_argv);
+        __ movptr(saved_args_base_addr, rax_argv);
+      }
+
+      if (stomp_dest) {
+        // Stomp the return slot, so it doesn't hold garbage.
+        // This isn't strictly necessary, but it may help detect bugs.
+        int forty_two = RicochetFrame::RETURN_VALUE_PLACEHOLDER;
+        __ movptr(Address(rax_argv, keep3_count, Address::times_ptr),
+                  (int32_t) forty_two);
+        // uses rsi_keep3_count
+      }
+      BLOCK_COMMENT("} adjust trailing arguments");
+
+      BLOCK_COMMENT("do_recursive_call");
+      __ mov(saved_last_sp, rsp);    // set rsi/r13 for callee
+      __ pushptr(ExternalAddress(SharedRuntime::ricochet_blob()->bounce_addr()).addr());
+      // The globally unique bounce address has two purposes:
+      // 1. It helps the JVM recognize this frame (frame::is_ricochet_frame).
+      // 2. When returned to, it cuts back the stack and redirects control flow
+      //    to the return handler.
+      // The return handler will further cut back the stack when it takes
+      // down the RF.  Perhaps there is a way to streamline this further.
+
+      // State during recursive call:
+      // ... keep1 | dest | dest=42 | keep3 | RF... | collect | bounce_pc |
+      __ jump_to_method_handle_entry(rcx_recv, rdx_temp);
+
+      break;
+    }
+
+  case _adapter_opt_return_ref:
+  case _adapter_opt_return_int:
+  case _adapter_opt_return_long:
+  case _adapter_opt_return_float:
+  case _adapter_opt_return_double:
+  case _adapter_opt_return_void:
+  case _adapter_opt_return_S0_ref:
+  case _adapter_opt_return_S1_ref:
+  case _adapter_opt_return_S2_ref:
+  case _adapter_opt_return_S3_ref:
+  case _adapter_opt_return_S4_ref:
+  case _adapter_opt_return_S5_ref:
+    {
+      BasicType dest_type_constant = ek_adapter_opt_return_type(ek);
+      int       dest_slot_constant = ek_adapter_opt_return_slot(ek);
+
+      if (VerifyMethodHandles)  RicochetFrame::verify_clean(_masm);
+
+      if (dest_slot_constant == -1) {
+        // The current stub is a general handler for this dest_type.
+        // It can be called from _adapter_opt_return_any below.
+        // Stash the address in a little table.
+        assert((dest_type_constant & CONV_TYPE_MASK) == dest_type_constant, "oob");
+        address return_handler = __ pc();
+        _adapter_return_handlers[dest_type_constant] = return_handler;
+        if (dest_type_constant == T_INT) {
+          // do the subword types too
+          for (int bt = T_BOOLEAN; bt < T_INT; bt++) {
+            if (is_subword_type(BasicType(bt)) &&
+                _adapter_return_handlers[bt] == NULL) {
+              _adapter_return_handlers[bt] = return_handler;
+            }
+          }
+        }
+      }
+
+      Register rbx_arg_base = rbx_temp;
+      assert_different_registers(rax, rdx,  // possibly live return value registers
+                                 rdi_temp, rbx_arg_base);
+
+      Address conversion_addr      = RicochetFrame::frame_address(RicochetFrame::conversion_offset_in_bytes());
+      Address saved_args_base_addr = RicochetFrame::frame_address(RicochetFrame::saved_args_base_offset_in_bytes());
+
+      __ movptr(rbx_arg_base, saved_args_base_addr);
+      RegisterOrConstant dest_slot = dest_slot_constant;
+      if (dest_slot_constant == -1) {
+        load_conversion_vminfo(_masm, rdi_temp, conversion_addr);
+        dest_slot = rdi_temp;
+      }
+      // Store the result back into the argslot.
+      // This code uses the interpreter calling sequence, in which the return value
+      // is usually left in the TOS register, as defined by InterpreterMacroAssembler::pop.
+      // There are certain irregularities with floating point values, which can be seen
+      // in TemplateInterpreterGenerator::generate_return_entry_for.
+      move_return_value(_masm, dest_type_constant, Address(rbx_arg_base, dest_slot, Interpreter::stackElementScale()));
+
+      RicochetFrame::leave_ricochet_frame(_masm, rcx_recv, rbx_arg_base, rdx_temp);
+      __ push(rdx_temp);  // repush the return PC
+
+      // Load the final target and go.
+      if (VerifyMethodHandles)  verify_method_handle(_masm, rcx_recv);
+      __ jump_to_method_handle_entry(rcx_recv, rdx_temp);
+      __ hlt(); // --------------------
+      break;
+    }
+
+  case _adapter_opt_return_any:
+    {
+      if (VerifyMethodHandles)  RicochetFrame::verify_clean(_masm);
+      Register rdi_conv = rdi_temp;
+      assert_different_registers(rax, rdx,  // possibly live return value registers
+                                 rdi_conv, rbx_temp);
+
+      Address conversion_addr = RicochetFrame::frame_address(RicochetFrame::conversion_offset_in_bytes());
+      load_conversion_dest_type(_masm, rdi_conv, conversion_addr);
+      __ lea(rbx_temp, ExternalAddress((address) &_adapter_return_handlers[0]));
+      __ movptr(rbx_temp, Address(rbx_temp, rdi_conv, Address::times_ptr));
+
+#ifdef ASSERT
+      { Label L_badconv;
+        __ testptr(rbx_temp, rbx_temp);
+        __ jccb(Assembler::zero, L_badconv);
+        __ jmp(rbx_temp);
+        __ bind(L_badconv);
+        __ stop("bad method handle return");
+      }
+#else //ASSERT
+      __ jmp(rbx_temp);
+#endif //ASSERT
+      break;
+    }
+
   case _adapter_opt_spread_0:
-  case _adapter_opt_spread_1:
-  case _adapter_opt_spread_more:
+  case _adapter_opt_spread_1_ref:
+  case _adapter_opt_spread_2_ref:
+  case _adapter_opt_spread_3_ref:
+  case _adapter_opt_spread_4_ref:
+  case _adapter_opt_spread_5_ref:
+  case _adapter_opt_spread_ref:
+  case _adapter_opt_spread_byte:
+  case _adapter_opt_spread_char:
+  case _adapter_opt_spread_short:
+  case _adapter_opt_spread_int:
+  case _adapter_opt_spread_long:
+  case _adapter_opt_spread_float:
+  case _adapter_opt_spread_double:
     {
       // spread an array out into a group of arguments
-      int length_constant = get_ek_adapter_opt_spread_info(ek);
+      int length_constant = ek_adapter_opt_spread_count(ek);
+      bool length_can_be_zero = (length_constant == 0);
+      if (length_constant < 0) {
+        // some adapters with variable length must handle the zero case
+        if (!OptimizeMethodHandles ||
+            ek_adapter_opt_spread_type(ek) != T_OBJECT)
+          length_can_be_zero = true;
+      }
 
       // find the address of the array argument
       __ movl(rax_argslot, rcx_amh_vmargslot);
       __ lea(rax_argslot, __ argument_address(rax_argslot));
 
-      // grab some temps
-      { __ push(rsi); __ push(rdi); }
-      // (preceding pushes must be done after argslot address is taken!)
-#define UNPUSH_RSI_RDI \
-      { __ pop(rdi); __ pop(rsi); }
+      // grab another temp
+      Register rsi_temp = rsi;
+      { if (rsi_temp == saved_last_sp)  __ push(saved_last_sp); }
+      // (preceding push must be done after argslot address is taken!)
+#define UNPUSH_RSI \
+      { if (rsi_temp == saved_last_sp)  __ pop(saved_last_sp); }
 
       // arx_argslot points both to the array and to the first output arg
       vmarg = Address(rax_argslot, 0);
 
       // Get the array value.
-      Register  rsi_array       = rsi;
+      Register  rsi_array       = rsi_temp;
       Register  rdx_array_klass = rdx_temp;
-      BasicType elem_type       = T_OBJECT;
+      BasicType elem_type = ek_adapter_opt_spread_type(ek);
+      int       elem_slots = type2size[elem_type];  // 1 or 2
+      int       array_slots = 1;  // array is always a T_OBJECT
       int       length_offset   = arrayOopDesc::length_offset_in_bytes();
       int       elem0_offset    = arrayOopDesc::base_offset_in_bytes(elem_type);
       __ movptr(rsi_array, vmarg);
-      Label skip_array_check;
-      if (length_constant == 0) {
+
+      Label L_array_is_empty, L_insert_arg_space, L_copy_args, L_args_done;
+      if (length_can_be_zero) {
+        // handle the null pointer case, if zero is allowed
+        Label L_skip;
+        if (length_constant < 0) {
+          load_conversion_vminfo(_masm, rbx_temp, rcx_amh_conversion);
+          __ testl(rbx_temp, rbx_temp);
+          __ jcc(Assembler::notZero, L_skip);
+        }
         __ testptr(rsi_array, rsi_array);
-        __ jcc(Assembler::zero, skip_array_check);
+        __ jcc(Assembler::zero, L_array_is_empty);
+        __ bind(L_skip);
       }
       __ null_check(rsi_array, oopDesc::klass_offset_in_bytes());
       __ load_klass(rdx_array_klass, rsi_array);
@@ -1081,22 +2272,20 @@
       // Check the array type.
       Register rbx_klass = rbx_temp;
       __ load_heap_oop(rbx_klass, rcx_amh_argument); // this is a Class object!
-      __ load_heap_oop(rbx_klass, Address(rbx_klass, java_lang_Class::klass_offset_in_bytes()));
+      load_klass_from_Class(_masm, rbx_klass);
 
       Label ok_array_klass, bad_array_klass, bad_array_length;
-      __ check_klass_subtype(rdx_array_klass, rbx_klass, rdi, ok_array_klass);
+      __ check_klass_subtype(rdx_array_klass, rbx_klass, rdi_temp, ok_array_klass);
       // If we get here, the type check failed!
       __ jmp(bad_array_klass);
-      __ bind(ok_array_klass);
+      __ BIND(ok_array_klass);
 
       // Check length.
       if (length_constant >= 0) {
         __ cmpl(Address(rsi_array, length_offset), length_constant);
       } else {
         Register rbx_vminfo = rbx_temp;
-        __ movl(rbx_vminfo, rcx_amh_conversion);
-        assert(CONV_VMINFO_SHIFT == 0, "preshifted");
-        __ andl(rbx_vminfo, CONV_VMINFO_MASK);
+        load_conversion_vminfo(_masm, rbx_vminfo, rcx_amh_conversion);
         __ cmpl(rbx_vminfo, Address(rsi_array, length_offset));
       }
       __ jcc(Assembler::notEqual, bad_array_length);
@@ -1108,90 +2297,104 @@
         // Form a pointer to the end of the affected region.
         __ lea(rdx_argslot_limit, Address(rax_argslot, Interpreter::stackElementSize));
         // 'stack_move' is negative number of words to insert
-        Register rdi_stack_move = rdi;
-        __ movl2ptr(rdi_stack_move, rcx_amh_conversion);
-        __ sarptr(rdi_stack_move, CONV_STACK_MOVE_SHIFT);
+        // This number already accounts for elem_slots.
+        Register rdi_stack_move = rdi_temp;
+        load_stack_move(_masm, rdi_stack_move, rcx_recv, true);
+        __ cmpptr(rdi_stack_move, 0);
+        assert(stack_move_unit() < 0, "else change this comparison");
+        __ jcc(Assembler::less, L_insert_arg_space);
+        __ jcc(Assembler::equal, L_copy_args);
+        // single argument case, with no array movement
+        __ BIND(L_array_is_empty);
+        remove_arg_slots(_masm, -stack_move_unit() * array_slots,
+                         rax_argslot, rbx_temp, rdx_temp);
+        __ jmp(L_args_done);  // no spreading to do
+        __ BIND(L_insert_arg_space);
+        // come here in the usual case, stack_move < 0 (2 or more spread arguments)
         Register rsi_temp = rsi_array;  // spill this
-        insert_arg_slots(_masm, rdi_stack_move, -1,
+        insert_arg_slots(_masm, rdi_stack_move,
                          rax_argslot, rbx_temp, rsi_temp);
-        // reload the array (since rsi was killed)
-        __ movptr(rsi_array, vmarg);
-      } else if (length_constant > 1) {
-        int arg_mask = 0;
-        int new_slots = (length_constant - 1);
-        for (int i = 0; i < new_slots; i++) {
-          arg_mask <<= 1;
-          arg_mask |= _INSERT_REF_MASK;
-        }
-        insert_arg_slots(_masm, new_slots * stack_move_unit(), arg_mask,
+        // reload the array since rsi was killed
+        // reload from rdx_argslot_limit since rax_argslot is now decremented
+        __ movptr(rsi_array, Address(rdx_argslot_limit, -Interpreter::stackElementSize));
+      } else if (length_constant >= 1) {
+        int new_slots = (length_constant * elem_slots) - array_slots;
+        insert_arg_slots(_masm, new_slots * stack_move_unit(),
                          rax_argslot, rbx_temp, rdx_temp);
-      } else if (length_constant == 1) {
-        // no stack resizing required
       } else if (length_constant == 0) {
-        remove_arg_slots(_masm, -stack_move_unit(),
+        __ BIND(L_array_is_empty);
+        remove_arg_slots(_masm, -stack_move_unit() * array_slots,
                          rax_argslot, rbx_temp, rdx_temp);
+      } else {
+        ShouldNotReachHere();
       }
 
       // Copy from the array to the new slots.
       // Note: Stack change code preserves integrity of rax_argslot pointer.
       // So even after slot insertions, rax_argslot still points to first argument.
+      // Beware:  Arguments that are shallow on the stack are deep in the array,
+      // and vice versa.  So a downward-growing stack (the usual) has to be copied
+      // elementwise in reverse order from the source array.
+      __ BIND(L_copy_args);
       if (length_constant == -1) {
         // [rax_argslot, rdx_argslot_limit) is the area we are inserting into.
+        // Array element [0] goes at rdx_argslot_limit[-wordSize].
         Register rsi_source = rsi_array;
         __ lea(rsi_source, Address(rsi_array, elem0_offset));
+        Register rdx_fill_ptr = rdx_argslot_limit;
         Label loop;
-        __ bind(loop);
-        __ movptr(rbx_temp, Address(rsi_source, 0));
-        __ movptr(Address(rax_argslot, 0), rbx_temp);
+        __ BIND(loop);
+        __ addptr(rdx_fill_ptr, -Interpreter::stackElementSize * elem_slots);
+        move_typed_arg(_masm, elem_type, true,
+                       Address(rdx_fill_ptr, 0), Address(rsi_source, 0),
+                       rbx_temp, rdi_temp);
         __ addptr(rsi_source, type2aelembytes(elem_type));
-        __ addptr(rax_argslot, Interpreter::stackElementSize);
-        __ cmpptr(rax_argslot, rdx_argslot_limit);
-        __ jccb(Assembler::less, loop);
+        __ cmpptr(rdx_fill_ptr, rax_argslot);
+        __ jcc(Assembler::above, loop);
       } else if (length_constant == 0) {
-        __ bind(skip_array_check);
         // nothing to copy
       } else {
         int elem_offset = elem0_offset;
-        int slot_offset = 0;
+        int slot_offset = length_constant * Interpreter::stackElementSize;
         for (int index = 0; index < length_constant; index++) {
-          __ movptr(rbx_temp, Address(rsi_array, elem_offset));
-          __ movptr(Address(rax_argslot, slot_offset), rbx_temp);
+          slot_offset -= Interpreter::stackElementSize * elem_slots;  // fill backward
+          move_typed_arg(_masm, elem_type, true,
+                         Address(rax_argslot, slot_offset), Address(rsi_array, elem_offset),
+                         rbx_temp, rdi_temp);
           elem_offset += type2aelembytes(elem_type);
-           slot_offset += Interpreter::stackElementSize;
         }
       }
+      __ BIND(L_args_done);
 
       // Arguments are spread.  Move to next method handle.
-      UNPUSH_RSI_RDI;
+      UNPUSH_RSI;
       __ load_heap_oop(rcx_recv, rcx_mh_vmtarget);
       __ jump_to_method_handle_entry(rcx_recv, rdx_temp);
 
       __ bind(bad_array_klass);
-      UNPUSH_RSI_RDI;
+      UNPUSH_RSI;
       assert(!vmarg.uses(rarg2_required), "must be different registers");
-      __ movptr(rarg2_required, Address(rdx_array_klass, java_mirror_offset));  // required type
-      __ movptr(rarg1_actual,   vmarg);                                         // bad array
-      __ movl(  rarg0_code,     (int) Bytecodes::_aaload);                      // who is complaining?
+      __ load_heap_oop( rarg2_required, Address(rdx_array_klass, java_mirror_offset));  // required type
+      __ movptr(        rarg1_actual,   vmarg);                                         // bad array
+      __ movl(          rarg0_code,     (int) Bytecodes::_aaload);                      // who is complaining?
       __ jump(ExternalAddress(from_interpreted_entry(_raise_exception)));
 
       __ bind(bad_array_length);
-      UNPUSH_RSI_RDI;
+      UNPUSH_RSI;
       assert(!vmarg.uses(rarg2_required), "must be different registers");
-      __ mov   (rarg2_required, rcx_recv);                       // AMH requiring a certain length
-      __ movptr(rarg1_actual,   vmarg);                          // bad array
-      __ movl(  rarg0_code,     (int) Bytecodes::_arraylength);  // who is complaining?
+      __ mov(    rarg2_required, rcx_recv);                       // AMH requiring a certain length
+      __ movptr( rarg1_actual,   vmarg);                          // bad array
+      __ movl(   rarg0_code,     (int) Bytecodes::_arraylength);  // who is complaining?
       __ jump(ExternalAddress(from_interpreted_entry(_raise_exception)));
+#undef UNPUSH_RSI
 
-#undef UNPUSH_RSI_RDI
+      break;
     }
-    break;
 
-  case _adapter_flyby:
-  case _adapter_ricochet:
-    __ unimplemented(entry_name(ek)); // %%% FIXME: NYI
-    break;
-
-  default:  ShouldNotReachHere();
+  default:
+    // do not require all platforms to recognize all adapter types
+    __ nop();
+    return;
   }
   __ hlt();
 
diff -r 03b943e6c025 -r 8bec9b249a6e src/cpu/x86/vm/methodHandles_x86.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/x86/vm/methodHandles_x86.hpp	Tue May 17 09:29:56 2011 -0400
@@ -0,0 +1,292 @@
+/*
+ * Copyright (c) 2010, 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+// Platform-specific definitions for method handles.
+// These definitions are inlined into class MethodHandles.
+
+public:
+
+// The stack just after the recursive call from a ricochet frame
+// looks something like this.  Offsets are marked in words, not bytes.
+// rsi (r13 on LP64) is part of the interpreter calling sequence
+// which tells the callee where is my real rsp (for frame walking).
+// (...lower memory addresses)
+// rsp:     [ return pc                 ]   always the global RicochetBlob::bounce_addr
+// rsp+1:   [ recursive arg N           ]
+// rsp+2:   [ recursive arg N-1         ]
+// ...
+// rsp+N:   [ recursive arg 1           ]
+// rsp+N+1: [ recursive method handle   ]
+// ...
+// rbp-6:   [ cleanup continuation pc   ]   <-- (struct RicochetFrame)
+// rbp-5:   [ saved target MH           ]   the MH we will call on the saved args
+// rbp-4:   [ saved args layout oop     ]   an int[] array which describes argument layout
+// rbp-3:   [ saved args pointer        ]   address of transformed adapter arg M (slot 0)
+// rbp-2:   [ conversion                ]   information about how the return value is used
+// rbp-1:   [ exact sender sp           ]   exact TOS (rsi/r13) of original sender frame
+// rbp+0:   [ saved sender fp           ]   (for original sender of AMH)
+// rbp+1:   [ saved sender pc           ]   (back to original sender of AMH)
+// rbp+2:   [ transformed adapter arg M ]   <-- (extended TOS of original sender)
+// rbp+3:   [ transformed adapter arg M-1]
+// ...
+// rbp+M+1: [ transformed adapter arg 1 ]
+// rbp+M+2: [ padding                   ] <-- (rbp + saved args base offset)
+// ...      [ optional padding]
+// (higher memory addresses...)
+//
+// The arguments originally passed by the original sender
+// are lost, and arbitrary amounts of stack motion might have
+// happened due to argument transformation.
+// (This is done by C2I/I2C adapters and non-direct method handles.)
+// This is why there is an unpredictable amount of memory between
+// the extended and exact TOS of the sender.
+// The ricochet adapter itself will also (in general) perform
+// transformations before the recursive call.
+//
+// The transformed and saved arguments, immediately above the saved
+// return PC, are a well-formed method handle invocation ready to execute.
+// When the GC needs to walk the stack, these arguments are described
+// via the saved arg types oop, an int[] array with a private format.
+// This array is derived from the type of the transformed adapter
+// method handle, which also sits at the base of the saved argument
+// bundle.  Since the GC may not be able to fish out the int[]
+// array, so it is pushed explicitly on the stack.  This may be
+// an unnecessary expense.
+//
+// The following register conventions are significant at this point:
+// rsp       the thread stack, as always; preserved by caller
+// rsi/r13   exact TOS of recursive frame (contents of [rbp-2])
+// rcx       recursive method handle (contents of [rsp+N+1])
+// rbp       preserved by caller (not used by caller)
+// Unless otherwise specified, all registers can be blown by the call.
+//
+// If this frame must be walked, the transformed adapter arguments
+// will be found with the help of the saved arguments descriptor.
+//
+// Therefore, the descriptor must match the referenced arguments.
+// The arguments must be followed by at least one word of padding,
+// which will be necessary to complete the final method handle call.
+// That word is not treated as holding an oop.  Neither is the word
+//
+// The word pointed to by the return argument pointer is not
+// treated as an oop, even if points to a saved argument.
+// This allows the saved argument list to have a "hole" in it
+// to receive an oop from the recursive call.
+// (The hole might temporarily contain RETURN_VALUE_PLACEHOLDER.)
+//
+// When the recursive callee returns, RicochetBlob::bounce_addr will
+// immediately jump to the continuation stored in the RF.
+// This continuation will merge the recursive return value
+// into the saved argument list.  At that point, the original
+// rsi, rbp, and rsp will be reloaded, the ricochet frame will
+// disappear, and the final target of the adapter method handle
+// will be invoked on the transformed argument list.
+
+class RicochetFrame {
+  friend class MethodHandles;
+
+ private:
+  intptr_t* _continuation;          // what to do when control gets back here
+  oopDesc*  _saved_target;          // target method handle to invoke on saved_args
+  oopDesc*  _saved_args_layout;     // caching point for MethodTypeForm.vmlayout cookie
+  intptr_t* _saved_args_base;       // base of pushed arguments (slot 0, arg N) (-3)
+  intptr_t  _conversion;            // misc. information from original AdapterMethodHandle (-2)
+  intptr_t* _exact_sender_sp;       // parallel to interpreter_frame_sender_sp (-1)
+  intptr_t* _sender_link;           // *must* coincide with frame::link_offset (0)
+  address   _sender_pc;             // *must* coincide with frame::return_addr_offset (1)
+
+ public:
+  intptr_t* continuation() const        { return _continuation; }
+  oop       saved_target() const        { return _saved_target; }
+  oop       saved_args_layout() const   { return _saved_args_layout; }
+  intptr_t* saved_args_base() const     { return _saved_args_base; }
+  intptr_t  conversion() const          { return _conversion; }
+  intptr_t* exact_sender_sp() const     { return _exact_sender_sp; }
+  intptr_t* sender_link() const         { return _sender_link; }
+  address   sender_pc() const           { return _sender_pc; }
+
+  intptr_t* extended_sender_sp() const  { return saved_args_base(); }
+
+  intptr_t  return_value_slot_number() const {
+    return adapter_conversion_vminfo(conversion());
+  }
+  BasicType return_value_type() const {
+    return adapter_conversion_dest_type(conversion());
+  }
+  bool has_return_value_slot() const {
+    return return_value_type() != T_VOID;
+  }
+  intptr_t* return_value_slot_addr() const {
+    assert(has_return_value_slot(), "");
+    return saved_arg_slot_addr(return_value_slot_number());
+  }
+  intptr_t* saved_target_slot_addr() const {
+    return saved_arg_slot_addr(saved_args_length());
+  }
+  intptr_t* saved_arg_slot_addr(int slot) const {
+    assert(slot >= 0, "");
+    return (intptr_t*)( (address)saved_args_base() + (slot * Interpreter::stackElementSize) );
+  }
+
+  jint      saved_args_length() const;
+  jint      saved_arg_offset(int arg) const;
+
+  // GC interface
+  oop*  saved_target_addr()                     { return (oop*)&_saved_target; }
+  oop*  saved_args_layout_addr()                { return (oop*)&_saved_args_layout; }
+
+  oop  compute_saved_args_layout(bool read_cache, bool write_cache);
+
+  // Compiler/assembler interface.
+  static int continuation_offset_in_bytes()     { return offset_of(RicochetFrame, _continuation); }
+  static int saved_target_offset_in_bytes()     { return offset_of(RicochetFrame, _saved_target); }
+  static int saved_args_layout_offset_in_bytes(){ return offset_of(RicochetFrame, _saved_args_layout); }
+  static int saved_args_base_offset_in_bytes()  { return offset_of(RicochetFrame, _saved_args_base); }
+  static int conversion_offset_in_bytes()       { return offset_of(RicochetFrame, _conversion); }
+  static int exact_sender_sp_offset_in_bytes()  { return offset_of(RicochetFrame, _exact_sender_sp); }
+  static int sender_link_offset_in_bytes()      { return offset_of(RicochetFrame, _sender_link); }
+  static int sender_pc_offset_in_bytes()        { return offset_of(RicochetFrame, _sender_pc); }
+
+  // This value is not used for much, but it apparently must be nonzero.
+  static int frame_size_in_bytes()              { return sender_link_offset_in_bytes(); }
+
+#ifdef ASSERT
+  // The magic number is supposed to help find ricochet frames within the bytes of stack dumps.
+  enum { MAGIC_NUMBER_1 = 0xFEED03E, MAGIC_NUMBER_2 = 0xBEEF03E };
+  static int magic_number_1_offset_in_bytes()   { return -wordSize; }
+  static int magic_number_2_offset_in_bytes()   { return sizeof(RicochetFrame); }
+  intptr_t magic_number_1() const               { return *(intptr_t*)((address)this + magic_number_1_offset_in_bytes()); };
+  intptr_t magic_number_2() const               { return *(intptr_t*)((address)this + magic_number_2_offset_in_bytes()); };
+#endif //ASSERT
+
+  enum { RETURN_VALUE_PLACEHOLDER = (NOT_DEBUG(0) DEBUG_ONLY(42)) };
+
+  static void verify_offsets() NOT_DEBUG_RETURN;
+  void verify() const NOT_DEBUG_RETURN; // check for MAGIC_NUMBER, etc.
+  void zap_arguments() NOT_DEBUG_RETURN;
+
+  static void generate_ricochet_blob(MacroAssembler* _masm,
+                                     // output params:
+                                     int* frame_size_in_words, int* bounce_offset, int* exception_offset);
+
+  static void enter_ricochet_frame(MacroAssembler* _masm,
+                                   Register rcx_recv,
+                                   Register rax_argv,
+                                   address return_handler,
+                                   Register rbx_temp);
+  static void leave_ricochet_frame(MacroAssembler* _masm,
+                                   Register rcx_recv,
+                                   Register new_sp_reg,
+                                   Register sender_pc_reg);
+
+  static Address frame_address(int offset = 0) {
+    // The RicochetFrame is found by subtracting a constant offset from rbp.
+    return Address(rbp, - sender_link_offset_in_bytes() + offset);
+  }
+
+  static RicochetFrame* from_frame(const frame& fr) {
+    address bp = (address) fr.fp();
+    RicochetFrame* rf = (RicochetFrame*)(bp - sender_link_offset_in_bytes());
+    rf->verify();
+    return rf;
+  }
+
+  static void verify_clean(MacroAssembler* _masm) NOT_DEBUG_RETURN;
+};
+
+// Additional helper methods for MethodHandles code generation:
+public:
+  static void load_klass_from_Class(MacroAssembler* _masm, Register klass_reg);
+  static void load_conversion_vminfo(MacroAssembler* _masm, Register reg, Address conversion_field_addr);
+  static void load_conversion_dest_type(MacroAssembler* _masm, Register reg, Address conversion_field_addr);
+
+  static void load_stack_move(MacroAssembler* _masm,
+                              Register rdi_stack_move,
+                              Register rcx_amh,
+                              bool might_be_negative);
+
+  static void insert_arg_slots(MacroAssembler* _masm,
+                               RegisterOrConstant arg_slots,
+                               Register rax_argslot,
+                               Register rbx_temp, Register rdx_temp);
+
+  static void remove_arg_slots(MacroAssembler* _masm,
+                               RegisterOrConstant arg_slots,
+                               Register rax_argslot,
+                               Register rbx_temp, Register rdx_temp);
+
+  static void push_arg_slots(MacroAssembler* _masm,
+                                   Register rax_argslot,
+                                   RegisterOrConstant slot_count,
+                                   int skip_words_count,
+                                   Register rbx_temp, Register rdx_temp);
+
+  static void move_arg_slots_up(MacroAssembler* _masm,
+                                Register rbx_bottom,  // invariant
+                                Address  top_addr,    // can use rax_temp
+                                RegisterOrConstant positive_distance_in_slots,
+                                Register rax_temp, Register rdx_temp);
+
+  static void move_arg_slots_down(MacroAssembler* _masm,
+                                  Address  bottom_addr,  // can use rax_temp
+                                  Register rbx_top,      // invariant
+                                  RegisterOrConstant negative_distance_in_slots,
+                                  Register rax_temp, Register rdx_temp);
+
+  static void move_typed_arg(MacroAssembler* _masm,
+                             BasicType type, bool is_element,
+                             Address slot_dest, Address value_src,
+                             Register rbx_temp, Register rdx_temp);
+
+  static void move_return_value(MacroAssembler* _masm, BasicType type,
+                                Address return_slot);
+
+  static void verify_argslot(MacroAssembler* _masm, Register argslot_reg,
+                             const char* error_message) NOT_DEBUG_RETURN;
+
+  static void verify_argslots(MacroAssembler* _masm,
+                              RegisterOrConstant argslot_count,
+                              Register argslot_reg,
+                              bool negate_argslot,
+                              const char* error_message) NOT_DEBUG_RETURN;
+
+  static void verify_stack_move(MacroAssembler* _masm,
+                                RegisterOrConstant arg_slots,
+                                int direction) NOT_DEBUG_RETURN;
+
+  static void verify_klass(MacroAssembler* _masm,
+                           Register obj, KlassHandle klass,
+                           const char* error_message = "wrong klass") NOT_DEBUG_RETURN;
+
+  static void verify_method_handle(MacroAssembler* _masm, Register mh_reg) {
+    verify_klass(_masm, mh_reg, SystemDictionaryHandles::MethodHandle_klass(),
+                 "reference is a MH");
+  }
+
+  static void trace_method_handle(MacroAssembler* _masm, const char* adaptername) PRODUCT_RETURN;
+
+  static Register saved_last_sp_register() {
+    // Should be in sharedRuntime, not here.
+    return LP64_ONLY(r13) NOT_LP64(rsi);
+  }
diff -r 03b943e6c025 -r 8bec9b249a6e src/cpu/x86/vm/sharedRuntime_x86_32.cpp
--- a/src/cpu/x86/vm/sharedRuntime_x86_32.cpp	Sun May 15 23:57:15 2011 -0400
+++ b/src/cpu/x86/vm/sharedRuntime_x86_32.cpp	Tue May 17 09:29:56 2011 -0400
@@ -2253,6 +2253,31 @@
   return 0;
 }
 
+//----------------------------generate_ricochet_blob---------------------------
+void SharedRuntime::generate_ricochet_blob() {
+  if (!EnableInvokeDynamic)  return;  // leave it as a null
+
+  // allocate space for the code
+  ResourceMark rm;
+  // setup code generation tools
+  CodeBuffer   buffer("ricochet_blob", 256, 256);
+  MacroAssembler* masm = new MacroAssembler(&buffer);
+
+  int frame_size_in_words = -1, bounce_offset = -1, exception_offset = -1;
+  MethodHandles::RicochetFrame::generate_ricochet_blob(masm, &frame_size_in_words, &bounce_offset, &exception_offset);
+
+  // -------------
+  // make sure all code is generated
+  masm->flush();
+
+  // failed to generate?
+  if (frame_size_in_words < 0 || bounce_offset < 0 || exception_offset < 0) {
+    assert(false, "bad ricochet blob");
+    return;
+  }
+
+  _ricochet_blob = RicochetBlob::create(&buffer, bounce_offset, exception_offset, frame_size_in_words);
+}
 
 //------------------------------generate_deopt_blob----------------------------
 void SharedRuntime::generate_deopt_blob() {
@@ -2996,6 +3021,8 @@
     generate_handler_blob(CAST_FROM_FN_PTR(address,
                    SafepointSynchronize::handle_polling_page_exception), true);
 
+  generate_ricochet_blob();
+
   generate_deopt_blob();
 #ifdef COMPILER2
   generate_uncommon_trap_blob();
diff -r 03b943e6c025 -r 8bec9b249a6e src/cpu/x86/vm/sharedRuntime_x86_64.cpp
--- a/src/cpu/x86/vm/sharedRuntime_x86_64.cpp	Sun May 15 23:57:15 2011 -0400
+++ b/src/cpu/x86/vm/sharedRuntime_x86_64.cpp	Tue May 17 09:29:56 2011 -0400
@@ -2530,6 +2530,32 @@
 }
 
 
+//----------------------------generate_ricochet_blob---------------------------
+void SharedRuntime::generate_ricochet_blob() {
+  if (!EnableInvokeDynamic)  return;  // leave it as a null
+
+  // allocate space for the code
+  ResourceMark rm;
+  // setup code generation tools
+  CodeBuffer   buffer("ricochet_blob", 512, 512);
+  MacroAssembler* masm = new MacroAssembler(&buffer);
+
+  int frame_size_in_words = -1, bounce_offset = -1, exception_offset = -1;
+  MethodHandles::RicochetFrame::generate_ricochet_blob(masm, &frame_size_in_words, &bounce_offset, &exception_offset);
+
+  // -------------
+  // make sure all code is generated
+  masm->flush();
+
+  // failed to generate?
+  if (frame_size_in_words < 0 || bounce_offset < 0 || exception_offset < 0) {
+    assert(false, "bad ricochet blob");
+    return;
+  }
+
+  _ricochet_blob = RicochetBlob::create(&buffer, bounce_offset, exception_offset, frame_size_in_words);
+}
+
 //------------------------------generate_deopt_blob----------------------------
 void SharedRuntime::generate_deopt_blob() {
   // Allocate space for the code
@@ -3205,6 +3231,8 @@
     generate_handler_blob(CAST_FROM_FN_PTR(address,
                    SafepointSynchronize::handle_polling_page_exception), true);
 
+  generate_ricochet_blob();
+
   generate_deopt_blob();
 
 #ifdef COMPILER2
diff -r 03b943e6c025 -r 8bec9b249a6e src/cpu/x86/vm/stubRoutines_x86_32.hpp
--- a/src/cpu/x86/vm/stubRoutines_x86_32.hpp	Sun May 15 23:57:15 2011 -0400
+++ b/src/cpu/x86/vm/stubRoutines_x86_32.hpp	Tue May 17 09:29:56 2011 -0400
@@ -36,7 +36,7 @@
 
 // MethodHandles adapters
 enum method_handles_platform_dependent_constants {
-  method_handles_adapters_code_size = 10000
+  method_handles_adapters_code_size = 30000 DEBUG_ONLY(+ 10000)
 };
 
 class x86 {
diff -r 03b943e6c025 -r 8bec9b249a6e src/cpu/x86/vm/stubRoutines_x86_64.hpp
--- a/src/cpu/x86/vm/stubRoutines_x86_64.hpp	Sun May 15 23:57:15 2011 -0400
+++ b/src/cpu/x86/vm/stubRoutines_x86_64.hpp	Tue May 17 09:29:56 2011 -0400
@@ -38,7 +38,7 @@
 
 // MethodHandles adapters
 enum method_handles_platform_dependent_constants {
-  method_handles_adapters_code_size = 40000
+  method_handles_adapters_code_size = 80000 DEBUG_ONLY(+ 120000)
 };
 
 class x86 {
diff -r 03b943e6c025 -r 8bec9b249a6e src/cpu/x86/vm/templateInterpreter_x86_32.cpp
--- a/src/cpu/x86/vm/templateInterpreter_x86_32.cpp	Sun May 15 23:57:15 2011 -0400
+++ b/src/cpu/x86/vm/templateInterpreter_x86_32.cpp	Tue May 17 09:29:56 2011 -0400
@@ -1589,6 +1589,7 @@
                                            int tempcount,
                                            int popframe_extra_args,
                                            int moncount,
+                                           int caller_actual_parameters,
                                            int callee_param_count,
                                            int callee_locals,
                                            frame* caller,
diff -r 03b943e6c025 -r 8bec9b249a6e src/cpu/x86/vm/templateInterpreter_x86_64.cpp
--- a/src/cpu/x86/vm/templateInterpreter_x86_64.cpp	Sun May 15 23:57:15 2011 -0400
+++ b/src/cpu/x86/vm/templateInterpreter_x86_64.cpp	Tue May 17 09:29:56 2011 -0400
@@ -1603,6 +1603,7 @@
                                            int tempcount,
                                            int popframe_extra_args,
                                            int moncount,
+                                           int caller_actual_parameters,
                                            int callee_param_count,
                                            int callee_locals,
                                            frame* caller,
diff -r 03b943e6c025 -r 8bec9b249a6e src/cpu/zero/vm/cppInterpreter_zero.cpp
--- a/src/cpu/zero/vm/cppInterpreter_zero.cpp	Sun May 15 23:57:15 2011 -0400
+++ b/src/cpu/zero/vm/cppInterpreter_zero.cpp	Tue May 17 09:29:56 2011 -0400
@@ -1427,6 +1427,7 @@
                                            int       tempcount,
                                            int       popframe_extra_args,
                                            int       moncount,
+                                           int       caller_actual_parameters,
                                            int       callee_param_count,
                                            int       callee_locals,
                                            frame*    caller,
diff -r 03b943e6c025 -r 8bec9b249a6e src/cpu/zero/vm/interpreter_zero.cpp
--- a/src/cpu/zero/vm/interpreter_zero.cpp	Sun May 15 23:57:15 2011 -0400
+++ b/src/cpu/zero/vm/interpreter_zero.cpp	Tue May 17 09:29:56 2011 -0400
@@ -82,24 +82,6 @@
   return true;
 }
 
-int AbstractInterpreter::size_activation(methodOop method,
-                                         int tempcount,
-                                         int popframe_extra_args,
-                                         int moncount,
-                                         int callee_param_count,
-                                         int callee_locals,
-                                         bool is_top_frame) {
-  return layout_activation(method,
-                           tempcount,
-                           popframe_extra_args,
-                           moncount,
-                           callee_param_count,
-                           callee_locals,
-                           (frame*) NULL,
-                           (frame*) NULL,
-                           is_top_frame);
-}
-
 void Deoptimization::unwind_callee_save_values(frame* f,
                                                vframeArray* vframe_array) {
 }
diff -r 03b943e6c025 -r 8bec9b249a6e src/os/linux/vm/os_linux.cpp
--- a/src/os/linux/vm/os_linux.cpp	Sun May 15 23:57:15 2011 -0400
+++ b/src/os/linux/vm/os_linux.cpp	Tue May 17 09:29:56 2011 -0400
@@ -2850,7 +2850,7 @@
         char chars[257];
         long x = 0;
         if (fgets(chars, sizeof(chars), fp)) {
-          if (sscanf(chars, "%lx-%*lx", &x) == 1
+          if (sscanf(chars, "%lx-%*x", &x) == 1
               && x == (long)p) {
             if (strstr (chars, "hugepage")) {
               result = true;
diff -r 03b943e6c025 -r 8bec9b249a6e src/share/vm/c1/c1_InstructionPrinter.cpp
--- a/src/share/vm/c1/c1_InstructionPrinter.cpp	Sun May 15 23:57:15 2011 -0400
+++ b/src/share/vm/c1/c1_InstructionPrinter.cpp	Tue May 17 09:29:56 2011 -0400
@@ -132,17 +132,22 @@
     if (value->is_null_object()) {
       output()->print("null");
     } else if (!value->is_loaded()) {
-      output()->print("<unloaded object 0x%x>", value);
+      output()->print("<unloaded object " PTR_FORMAT ">", value);
     } else if (value->is_method()) {
       ciMethod* m = (ciMethod*)value;
       output()->print("<method %s.%s>", m->holder()->name()->as_utf8(), m->name()->as_utf8());
     } else {
-      output()->print("<object 0x%x>", value->constant_encoding());
+      output()->print("<object " PTR_FORMAT ">", value->constant_encoding());
     }
   } else if (type->as_InstanceConstant() != NULL) {
-    output()->print("<instance 0x%x>", type->as_InstanceConstant()->value()->constant_encoding());
+    ciInstance* value = type->as_InstanceConstant()->value();
+    if (value->is_loaded()) {
+      output()->print("<instance " PTR_FORMAT ">", value->constant_encoding());
+    } else {
+      output()->print("<unloaded instance " PTR_FORMAT ">", value);
+    }
   } else if (type->as_ArrayConstant() != NULL) {
-    output()->print("<array 0x%x>", type->as_ArrayConstant()->value()->constant_encoding());
+    output()->print("<array " PTR_FORMAT ">", type->as_ArrayConstant()->value()->constant_encoding());
   } else if (type->as_ClassConstant() != NULL) {
     ciInstanceKlass* klass = type->as_ClassConstant()->value();
     if (!klass->is_loaded()) {
diff -r 03b943e6c025 -r 8bec9b249a6e src/share/vm/c1/c1_Optimizer.cpp
--- a/src/share/vm/c1/c1_Optimizer.cpp	Sun May 15 23:57:15 2011 -0400
+++ b/src/share/vm/c1/c1_Optimizer.cpp	Tue May 17 09:29:56 2011 -0400
@@ -252,26 +252,28 @@
         Constant::CompareResult t_compare_res = x_tval_const->compare(cond, y_const);
         Constant::CompareResult f_compare_res = x_fval_const->compare(cond, y_const);
 
-        guarantee(t_compare_res != Constant::not_comparable && f_compare_res != Constant::not_comparable, "incomparable constants in IfOp");
-
-        Value new_tval = t_compare_res == Constant::cond_true ? tval : fval;
-        Value new_fval = f_compare_res == Constant::cond_true ? tval : fval;
+        // not_comparable here is a valid return in case we're comparing unloaded oop constants
+        if (t_compare_res != Constant::not_comparable && f_compare_res != Constant::not_comparable) {
+          Value new_tval = t_compare_res == Constant::cond_true ? tval : fval;
+          Value new_fval = f_compare_res == Constant::cond_true ? tval : fval;
 
-        _ifop_count++;
-        if (new_tval == new_fval) {
-          return new_tval;
-        } else {
-          return new IfOp(x_ifop->x(), x_ifop_cond, x_ifop->y(), new_tval, new_fval);
+          _ifop_count++;
+          if (new_tval == new_fval) {
+            return new_tval;
+          } else {
+            return new IfOp(x_ifop->x(), x_ifop_cond, x_ifop->y(), new_tval, new_fval);
+          }
         }
       }
     } else {
       Constant* x_const = x->as_Constant();
       if (x_const != NULL) {         // x and y are constants
         Constant::CompareResult x_compare_res = x_const->compare(cond, y_const);
-        guarantee(x_compare_res != Constant::not_comparable, "incomparable constants in IfOp");
-
-        _ifop_count++;
-        return x_compare_res == Constant::cond_true ? tval : fval;
+        // not_comparable here is a valid return in case we're comparing unloaded oop constants
+        if (x_compare_res != Constant::not_comparable) {
+          _ifop_count++;
+          return x_compare_res == Constant::cond_true ? tval : fval;
+        }
       }
     }
   }
diff -r 03b943e6c025 -r 8bec9b249a6e src/share/vm/ci/ciMethodData.hpp
--- a/src/share/vm/ci/ciMethodData.hpp	Sun May 15 23:57:15 2011 -0400
+++ b/src/share/vm/ci/ciMethodData.hpp	Tue May 17 09:29:56 2011 -0400
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -233,7 +233,10 @@
 
 public:
   bool is_method_data()  { return true; }
-  bool is_empty() { return _state == empty_state; }
+
+  void set_mature() { _state = mature_state; }
+
+  bool is_empty()  { return _state == empty_state; }
   bool is_mature() { return _state == mature_state; }
 
   int creation_mileage() { return _orig.creation_mileage(); }
diff -r 03b943e6c025 -r 8bec9b249a6e src/share/vm/ci/ciMethodHandle.cpp
--- a/src/share/vm/ci/ciMethodHandle.cpp	Sun May 15 23:57:15 2011 -0400
+++ b/src/share/vm/ci/ciMethodHandle.cpp	Tue May 17 09:29:56 2011 -0400
@@ -25,6 +25,7 @@
 #include "precompiled.hpp"
 #include "ci/ciClassList.hpp"
 #include "ci/ciInstance.hpp"
+#include "ci/ciMethodData.hpp"
 #include "ci/ciMethodHandle.hpp"
 #include "ci/ciUtilities.hpp"
 #include "prims/methodHandleWalk.hpp"
@@ -36,13 +37,13 @@
 // ciMethodHandle::get_adapter
 //
 // Return an adapter for this MethodHandle.
-ciMethod* ciMethodHandle::get_adapter(bool is_invokedynamic) const {
+ciMethod* ciMethodHandle::get_adapter_impl(bool is_invokedynamic) const {
   VM_ENTRY_MARK;
   Handle h(get_oop());
   methodHandle callee(_callee->get_methodOop());
   // We catch all exceptions here that could happen in the method
   // handle compiler and stop the VM.
-  MethodHandleCompiler mhc(h, callee, is_invokedynamic, THREAD);
+  MethodHandleCompiler mhc(h, callee, _profile->count(), is_invokedynamic, THREAD);
   if (!HAS_PENDING_EXCEPTION) {
     methodHandle m = mhc.compile(THREAD);
     if (!HAS_PENDING_EXCEPTION) {
@@ -58,6 +59,22 @@
   return NULL;
 }
 
+// ------------------------------------------------------------------
+// ciMethodHandle::get_adapter
+//
+// Return an adapter for this MethodHandle.
+ciMethod* ciMethodHandle::get_adapter(bool is_invokedynamic) const {
+  ciMethod* result = get_adapter_impl(is_invokedynamic);
+  if (result) {
+    // Fake up the MDO maturity.
+    ciMethodData* mdo = result->method_data();
+    if (mdo != NULL && _caller->method_data() != NULL && _caller->method_data()->is_mature()) {
+      mdo->set_mature();
+    }
+  }
+  return result;
+}
+
 
 // ------------------------------------------------------------------
 // ciMethodHandle::print_impl
diff -r 03b943e6c025 -r 8bec9b249a6e src/share/vm/ci/ciMethodHandle.hpp
--- a/src/share/vm/ci/ciMethodHandle.hpp	Sun May 15 23:57:15 2011 -0400
+++ b/src/share/vm/ci/ciMethodHandle.hpp	Tue May 17 09:29:56 2011 -0400
@@ -25,6 +25,7 @@
 #ifndef SHARE_VM_CI_CIMETHODHANDLE_HPP
 #define SHARE_VM_CI_CIMETHODHANDLE_HPP
 
+#include "ci/ciCallProfile.hpp"
 #include "ci/ciInstance.hpp"
 #include "prims/methodHandles.hpp"
 
@@ -33,32 +34,37 @@
 // The class represents a java.lang.invoke.MethodHandle object.
 class ciMethodHandle : public ciInstance {
 private:
-  ciMethod* _callee;
+  ciMethod*      _callee;
+  ciMethod*      _caller;
+  ciCallProfile* _profile;
 
   // Return an adapter for this MethodHandle.
-  ciMethod* get_adapter(bool is_invokedynamic) const;
+  ciMethod* get_adapter_impl(bool is_invokedynamic) const;
+  ciMethod* get_adapter(     bool is_invokedynamic) const;
 
 protected:
   void print_impl(outputStream* st);
 
 public:
-  ciMethodHandle(instanceHandle h_i) : ciInstance(h_i) {};
+  ciMethodHandle(instanceHandle h_i) :
+    ciInstance(h_i),
+    _callee(NULL),
+    _caller(NULL),
+    _profile(NULL)
+  {}
 
   // What kind of ciObject is this?
   bool is_method_handle() const { return true; }
 
-  ciMethod* callee() const { return _callee; }
-  void  set_callee(ciMethod* m) { _callee = m; }
+  void set_callee(ciMethod* m)                  { _callee  = m;       }
+  void set_caller(ciMethod* m)                  { _caller  = m;       }
+  void set_call_profile(ciCallProfile* profile) { _profile = profile; }
 
   // Return an adapter for a MethodHandle call.
-  ciMethod* get_method_handle_adapter() const {
-    return get_adapter(false);
-  }
+  ciMethod* get_method_handle_adapter() const { return get_adapter(false); }
 
   // Return an adapter for an invokedynamic call.
-  ciMethod* get_invokedynamic_adapter() const {
-    return get_adapter(true);
-  }
+  ciMethod* get_invokedynamic_adapter() const { return get_adapter(true);  }
 };
 
 #endif // SHARE_VM_CI_CIMETHODHANDLE_HPP
diff -r 03b943e6c025 -r 8bec9b249a6e src/share/vm/classfile/javaClasses.cpp
--- a/src/share/vm/classfile/javaClasses.cpp	Sun May 15 23:57:15 2011 -0400
+++ b/src/share/vm/classfile/javaClasses.cpp	Tue May 17 09:29:56 2011 -0400
@@ -2602,6 +2602,7 @@
 // Support for java_lang_invoke_MethodTypeForm
 
 int java_lang_invoke_MethodTypeForm::_vmslots_offset;
+int java_lang_invoke_MethodTypeForm::_vmlayout_offset;
 int java_lang_invoke_MethodTypeForm::_erasedType_offset;
 int java_lang_invoke_MethodTypeForm::_genericInvoker_offset;
 
@@ -2609,6 +2610,7 @@
   klassOop k = SystemDictionary::MethodTypeForm_klass();
   if (k != NULL) {
     compute_optional_offset(_vmslots_offset,    k, vmSymbols::vmslots_name(),    vmSymbols::int_signature(), true);
+    compute_optional_offset(_vmlayout_offset,   k, vmSymbols::vmlayout_name(),   vmSymbols::object_signature());
     compute_optional_offset(_erasedType_offset, k, vmSymbols::erasedType_name(), vmSymbols::java_lang_invoke_MethodType_signature(), true);
     compute_optional_offset(_genericInvoker_offset, k, vmSymbols::genericInvoker_name(), vmSymbols::java_lang_invoke_MethodHandle_signature(), true);
     if (_genericInvoker_offset == 0)  _genericInvoker_offset = -1;  // set to explicit "empty" value
@@ -2617,9 +2619,31 @@
 
 int java_lang_invoke_MethodTypeForm::vmslots(oop mtform) {
   assert(mtform->klass() == SystemDictionary::MethodTypeForm_klass(), "MTForm only");
+  assert(_vmslots_offset > 0, "");
   return mtform->int_field(_vmslots_offset);
 }
 
+oop java_lang_invoke_MethodTypeForm::vmlayout(oop mtform) {
+  assert(mtform->klass() == SystemDictionary::MethodTypeForm_klass(), "MTForm only");
+  assert(_vmlayout_offset > 0, "");
+  return mtform->obj_field(_vmlayout_offset);
+}
+
+oop java_lang_invoke_MethodTypeForm::init_vmlayout(oop mtform, oop cookie) {
+  assert(mtform->klass() == SystemDictionary::MethodTypeForm_klass(), "MTForm only");
+  oop previous = vmlayout(mtform);
+  if (previous != NULL) {
+    return previous;  // someone else beat us to it
+  }
+  HeapWord* cookie_addr = (HeapWord*) mtform->obj_field_addr<oop>(_vmlayout_offset);
+  OrderAccess::storestore();  // make sure our copy is fully committed
+  previous = oopDesc::atomic_compare_exchange_oop(cookie, cookie_addr, previous);
+  if (previous != NULL) {
+    return previous;  // someone else beat us to it
+  }
+  return cookie;
+}
+
 oop java_lang_invoke_MethodTypeForm::erasedType(oop mtform) {
   assert(mtform->klass() == SystemDictionary::MethodTypeForm_klass(), "MTForm only");
   return mtform->obj_field(_erasedType_offset);
diff -r 03b943e6c025 -r 8bec9b249a6e src/share/vm/classfile/javaClasses.hpp
--- a/src/share/vm/classfile/javaClasses.hpp	Sun May 15 23:57:15 2011 -0400
+++ b/src/share/vm/classfile/javaClasses.hpp	Tue May 17 09:29:56 2011 -0400
@@ -949,18 +949,19 @@
     OP_CHECK_CAST    = 0x2, // ref-to-ref conversion; requires a Class argument
     OP_PRIM_TO_PRIM  = 0x3, // converts from one primitive to another
     OP_REF_TO_PRIM   = 0x4, // unboxes a wrapper to produce a primitive
-    OP_PRIM_TO_REF   = 0x5, // boxes a primitive into a wrapper (NYI)
+    OP_PRIM_TO_REF   = 0x5, // boxes a primitive into a wrapper
     OP_SWAP_ARGS     = 0x6, // swap arguments (vminfo is 2nd arg)
     OP_ROT_ARGS      = 0x7, // rotate arguments (vminfo is displaced arg)
     OP_DUP_ARGS      = 0x8, // duplicates one or more arguments (at TOS)
     OP_DROP_ARGS     = 0x9, // remove one or more argument slots
-    OP_COLLECT_ARGS  = 0xA, // combine one or more arguments into a varargs (NYI)
+    OP_COLLECT_ARGS  = 0xA, // combine arguments using an auxiliary function
     OP_SPREAD_ARGS   = 0xB, // expand in place a varargs array (of known size)
-    OP_FLYBY         = 0xC, // operate first on reified argument list (NYI)
-    OP_RICOCHET      = 0xD, // run an adapter chain on the return value (NYI)
+    OP_FOLD_ARGS     = 0xC, // combine but do not remove arguments; prepend result
+    //OP_UNUSED_13   = 0xD, // unused code, perhaps for reified argument lists
     CONV_OP_LIMIT    = 0xE, // limit of CONV_OP enumeration
 
     CONV_OP_MASK     = 0xF00, // this nybble contains the conversion op field
+    CONV_TYPE_MASK   = 0x0F,  // fits T_ADDRESS and below
     CONV_VMINFO_MASK = 0x0FF, // LSB is reserved for JVM use
     CONV_VMINFO_SHIFT     =  0, // position of bits in CONV_VMINFO_MASK
     CONV_OP_SHIFT         =  8, // position of bits in CONV_OP_MASK
@@ -1089,6 +1090,7 @@
 
  private:
   static int _vmslots_offset;           // number of argument slots needed
+  static int _vmlayout_offset;          // object describing internal calling sequence
   static int _erasedType_offset;        // erasedType = canonical MethodType
   static int _genericInvoker_offset;    // genericInvoker = adapter for invokeGeneric
 
@@ -1100,8 +1102,12 @@
   static oop            erasedType(oop mtform);
   static oop            genericInvoker(oop mtform);
 
+  static oop            vmlayout(oop mtform);
+  static oop       init_vmlayout(oop mtform, oop cookie);
+
   // Accessors for code generation:
   static int vmslots_offset_in_bytes()          { return _vmslots_offset; }
+  static int vmlayout_offset_in_bytes()         { return _vmlayout_offset; }
   static int erasedType_offset_in_bytes()       { return _erasedType_offset; }
   static int genericInvoker_offset_in_bytes()   { return _genericInvoker_offset; }
 };
diff -r 03b943e6c025 -r 8bec9b249a6e src/share/vm/classfile/systemDictionary.cpp
--- a/src/share/vm/classfile/systemDictionary.cpp	Sun May 15 23:57:15 2011 -0400
+++ b/src/share/vm/classfile/systemDictionary.cpp	Tue May 17 09:29:56 2011 -0400
@@ -2362,8 +2362,15 @@
       spe = invoke_method_table()->find_entry(index, hash, signature, name_id);
       if (spe == NULL)
         spe = invoke_method_table()->add_entry(index, hash, signature, name_id);
-      if (spe->property_oop() == NULL)
+      if (spe->property_oop() == NULL) {
         spe->set_property_oop(m());
+        // Link m to his method type, if it is suitably generic.
+        oop mtform = java_lang_invoke_MethodType::form(mt());
+        if (mtform != NULL && mt() == java_lang_invoke_MethodTypeForm::erasedType(mtform)
+            && java_lang_invoke_MethodTypeForm::vmlayout_offset_in_bytes() > 0) {
+          java_lang_invoke_MethodTypeForm::init_vmlayout(mtform, m());
+        }
+      }
     } else {
       non_cached_result = m;
     }
diff -r 03b943e6c025 -r 8bec9b249a6e src/share/vm/classfile/vmSymbols.hpp
--- a/src/share/vm/classfile/vmSymbols.hpp	Sun May 15 23:57:15 2011 -0400
+++ b/src/share/vm/classfile/vmSymbols.hpp	Tue May 17 09:29:56 2011 -0400
@@ -341,6 +341,7 @@
   template(vmtarget_name,                             "vmtarget")                                 \
   template(vmentry_name,                              "vmentry")                                  \
   template(vmslots_name,                              "vmslots")                                  \
+  template(vmlayout_name,                             "vmlayout")                                 \
   template(vmindex_name,                              "vmindex")                                  \
   template(vmargslot_name,                            "vmargslot")                                \
   template(flags_name,                                "flags")                                    \
@@ -393,6 +394,7 @@
   template(void_signature,                            "V")                                        \
   template(byte_array_signature,                      "[B")                                       \
   template(char_array_signature,                      "[C")                                       \
+  template(int_array_signature,                       "[I")                                       \
   template(object_void_signature,                     "(Ljava/lang/Object;)V")                    \
   template(object_int_signature,                      "(Ljava/lang/Object;)I")                    \
   template(object_boolean_signature,                  "(Ljava/lang/Object;)Z")                    \
diff -r 03b943e6c025 -r 8bec9b249a6e src/share/vm/code/codeBlob.cpp
--- a/src/share/vm/code/codeBlob.cpp	Sun May 15 23:57:15 2011 -0400
+++ b/src/share/vm/code/codeBlob.cpp	Tue May 17 09:29:56 2011 -0400
@@ -152,6 +152,32 @@
 }
 
 
+void CodeBlob::trace_new_stub(CodeBlob* stub, const char* name1, const char* name2) {
+  // Do not hold the CodeCache lock during name formatting.
+  assert(!CodeCache_lock->owned_by_self(), "release CodeCache before registering the stub");
+
+  if (stub != NULL) {
+    char stub_id[256];
+    assert(strlen(name1) + strlen(name2) < sizeof(stub_id), "");
+    jio_snprintf(stub_id, sizeof(stub_id), "%s%s", name1, name2);
+    if (PrintStubCode) {
+      tty->print_cr("Decoding %s " INTPTR_FORMAT, stub_id, (intptr_t) stub);
+      Disassembler::decode(stub->code_begin(), stub->code_end());
+    }
+    Forte::register_stub(stub_id, stub->code_begin(), stub->code_end());
+
+    if (JvmtiExport::should_post_dynamic_code_generated()) {
+      const char* stub_name = name2;
+      if (name2[0] == '\0')  stub_name = name1;
+      JvmtiExport::post_dynamic_code_generated(stub_name, stub->code_begin(), stub->code_end());
+    }
+  }
+
+  // Track memory usage statistic after releasing CodeCache_lock
+  MemoryService::track_code_cache_memory_usage();
+}
+
+
 void CodeBlob::flush() {
   if (_oop_maps) {
     FREE_C_HEAP_ARRAY(unsigned char, _oop_maps);
@@ -312,23 +338,7 @@
     stub = new (size) RuntimeStub(stub_name, cb, size, frame_complete, frame_size, oop_maps, caller_must_gc_arguments);
   }
 
-  // Do not hold the CodeCache lock during name formatting.
-  if (stub != NULL) {
-    char stub_id[256];
-    jio_snprintf(stub_id, sizeof(stub_id), "RuntimeStub - %s", stub_name);
-    if (PrintStubCode) {
-      tty->print_cr("Decoding %s " INTPTR_FORMAT, stub_id, stub);
-      Disassembler::decode(stub->code_begin(), stub->code_end());
-    }
-    Forte::register_stub(stub_id, stub->code_begin(), stub->code_end());
-
-    if (JvmtiExport::should_post_dynamic_code_generated()) {
-      JvmtiExport::post_dynamic_code_generated(stub_name, stub->code_begin(), stub->code_end());
-    }
-  }
-
-  // Track memory usage statistic after releasing CodeCache_lock
-  MemoryService::track_code_cache_memory_usage();
+  trace_new_stub(stub, "RuntimeStub - ", stub_name);
 
   return stub;
 }
@@ -340,6 +350,50 @@
   return p;
 }
 
+// operator new shared by all singletons:
+void* SingletonBlob::operator new(size_t s, unsigned size) {
+  void* p = CodeCache::allocate(size);
+  if (!p) fatal("Initial size of CodeCache is too small");
+  return p;
+}
+
+
+//----------------------------------------------------------------------------------------------------
+// Implementation of RicochetBlob
+
+RicochetBlob::RicochetBlob(
+  CodeBuffer* cb,
+  int         size,
+  int         bounce_offset,
+  int         exception_offset,
+  int         frame_size
+)
+: SingletonBlob("RicochetBlob", cb, sizeof(RicochetBlob), size, frame_size, (OopMapSet*) NULL)
+{
+  _bounce_offset = bounce_offset;
+  _exception_offset = exception_offset;
+}
+
+
+RicochetBlob* RicochetBlob::create(
+  CodeBuffer* cb,
+  int         bounce_offset,
+  int         exception_offset,
+  int         frame_size)
+{
+  RicochetBlob* blob = NULL;
+  ThreadInVMfromUnknown __tiv;  // get to VM state in case we block on CodeCache_lock
+  {
+    MutexLockerEx mu(CodeCache_lock, Mutex::_no_safepoint_check_flag);
+    unsigned int size = allocation_size(cb, sizeof(RicochetBlob));
+    blob = new (size) RicochetBlob(cb, size, bounce_offset, exception_offset, frame_size);
+  }
+
+  trace_new_stub(blob, "RicochetBlob");
+
+  return blob;
+}
+
 
 //----------------------------------------------------------------------------------------------------
 // Implementation of DeoptimizationBlob
@@ -386,34 +440,12 @@
                                          frame_size);
   }
 
-  // Do not hold the CodeCache lock during name formatting.
-  if (blob != NULL) {
-    char blob_id[256];
-    jio_snprintf(blob_id, sizeof(blob_id), "DeoptimizationBlob@" PTR_FORMAT, blob->code_begin());
-    if (PrintStubCode) {
-      tty->print_cr("Decoding %s " INTPTR_FORMAT, blob_id, blob);
-      Disassembler::decode(blob->code_begin(), blob->code_end());
-    }
-    Forte::register_stub(blob_id, blob->code_begin(), blob->code_end());
-
-    if (JvmtiExport::should_post_dynamic_code_generated()) {
-      JvmtiExport::post_dynamic_code_generated("DeoptimizationBlob", blob->code_begin(), blob->code_end());
-    }
-  }
-
-  // Track memory usage statistic after releasing CodeCache_lock
-  MemoryService::track_code_cache_memory_usage();
+  trace_new_stub(blob, "DeoptimizationBlob");
 
   return blob;
 }
 
 
-void* DeoptimizationBlob::operator new(size_t s, unsigned size) {
-  void* p = CodeCache::allocate(size);
-  if (!p) fatal("Initial size of CodeCache is too small");
-  return p;
-}
-
 //----------------------------------------------------------------------------------------------------
 // Implementation of UncommonTrapBlob
 
@@ -441,33 +473,12 @@
     blob = new (size) UncommonTrapBlob(cb, size, oop_maps, frame_size);
   }
 
-  // Do not hold the CodeCache lock during name formatting.
-  if (blob != NULL) {
-    char blob_id[256];
-    jio_snprintf(blob_id, sizeof(blob_id), "UncommonTrapBlob@" PTR_FORMAT, blob->code_begin());
-    if (PrintStubCode) {
-      tty->print_cr("Decoding %s " INTPTR_FORMAT, blob_id, blob);
-      Disassembler::decode(blob->code_begin(), blob->code_end());
-    }
-    Forte::register_stub(blob_id, blob->code_begin(), blob->code_end());
-
-    if (JvmtiExport::should_post_dynamic_code_generated()) {
-      JvmtiExport::post_dynamic_code_generated("UncommonTrapBlob", blob->code_begin(), blob->code_end());
-    }
-  }
-
-  // Track memory usage statistic after releasing CodeCache_lock
-  MemoryService::track_code_cache_memory_usage();
+  trace_new_stub(blob, "UncommonTrapBlob");
 
   return blob;
 }
 
 
-void* UncommonTrapBlob::operator new(size_t s, unsigned size) {
-  void* p = CodeCache::allocate(size);
-  if (!p) fatal("Initial size of CodeCache is too small");
-  return p;
-}
 #endif // COMPILER2
 
 
@@ -498,33 +509,12 @@
     blob = new (size) ExceptionBlob(cb, size, oop_maps, frame_size);
   }
 
-  // We do not need to hold the CodeCache lock during name formatting
-  if (blob != NULL) {
-    char blob_id[256];
-    jio_snprintf(blob_id, sizeof(blob_id), "ExceptionBlob@" PTR_FORMAT, blob->code_begin());
-    if (PrintStubCode) {
-      tty->print_cr("Decoding %s " INTPTR_FORMAT, blob_id, blob);
-      Disassembler::decode(blob->code_begin(), blob->code_end());
-    }
-    Forte::register_stub(blob_id, blob->code_begin(), blob->code_end());
-
-    if (JvmtiExport::should_post_dynamic_code_generated()) {
-      JvmtiExport::post_dynamic_code_generated("ExceptionBlob", blob->code_begin(), blob->code_end());
-    }
-  }
-
-  // Track memory usage statistic after releasing CodeCache_lock
-  MemoryService::track_code_cache_memory_usage();
+  trace_new_stub(blob, "ExceptionBlob");
 
   return blob;
 }
 
 
-void* ExceptionBlob::operator new(size_t s, unsigned size) {
-  void* p = CodeCache::allocate(size);
-  if (!p) fatal("Initial size of CodeCache is too small");
-  return p;
-}
 #endif // COMPILER2
 
 
@@ -554,35 +544,12 @@
     blob = new (size) SafepointBlob(cb, size, oop_maps, frame_size);
   }
 
-  // We do not need to hold the CodeCache lock during name formatting.
-  if (blob != NULL) {
-    char blob_id[256];
-    jio_snprintf(blob_id, sizeof(blob_id), "SafepointBlob@" PTR_FORMAT, blob->code_begin());
-    if (PrintStubCode) {
-      tty->print_cr("Decoding %s " INTPTR_FORMAT, blob_id, blob);
-      Disassembler::decode(blob->code_begin(), blob->code_end());
-    }
-    Forte::register_stub(blob_id, blob->code_begin(), blob->code_end());
-
-    if (JvmtiExport::should_post_dynamic_code_generated()) {
-      JvmtiExport::post_dynamic_code_generated("SafepointBlob", blob->code_begin(), blob->code_end());
-    }
-  }
-
-  // Track memory usage statistic after releasing CodeCache_lock
-  MemoryService::track_code_cache_memory_usage();
+  trace_new_stub(blob, "SafepointBlob");
 
   return blob;
 }
 
 
-void* SafepointBlob::operator new(size_t s, unsigned size) {
-  void* p = CodeCache::allocate(size);
-  if (!p) fatal("Initial size of CodeCache is too small");
-  return p;
-}
-
-
 //----------------------------------------------------------------------------------------------------
 // Verification and printing
 
diff -r 03b943e6c025 -r 8bec9b249a6e src/share/vm/code/codeBlob.hpp
--- a/src/share/vm/code/codeBlob.hpp	Sun May 15 23:57:15 2011 -0400
+++ b/src/share/vm/code/codeBlob.hpp	Tue May 17 09:29:56 2011 -0400
@@ -35,6 +35,7 @@
 // Suptypes are:
 //   nmethod            : Compiled Java methods (include method that calls to native code)
 //   RuntimeStub        : Call to VM runtime methods
+//   RicochetBlob       : Used for blocking MethodHandle adapters
 //   DeoptimizationBlob : Used for deoptimizatation
 //   ExceptionBlob      : Used for stack unrolling
 //   SafepointBlob      : Used to handle illegal instruction exceptions
@@ -95,12 +96,13 @@
   void flush();
 
   // Typing
-  virtual bool is_buffer_blob() const                 { return false; }
-  virtual bool is_nmethod() const                     { return false; }
-  virtual bool is_runtime_stub() const                { return false; }
-  virtual bool is_deoptimization_stub() const         { return false; }
-  virtual bool is_uncommon_trap_stub() const          { return false; }
-  virtual bool is_exception_stub() const              { return false; }
+  virtual bool is_buffer_blob() const            { return false; }
+  virtual bool is_nmethod() const                { return false; }
+  virtual bool is_runtime_stub() const           { return false; }
+  virtual bool is_ricochet_stub() const          { return false; }
+  virtual bool is_deoptimization_stub() const    { return false; }
+  virtual bool is_uncommon_trap_stub() const     { return false; }
+  virtual bool is_exception_stub() const         { return false; }
   virtual bool is_safepoint_stub() const              { return false; }
   virtual bool is_adapter_blob() const                { return false; }
   virtual bool is_method_handles_adapter_blob() const { return false; }
@@ -182,6 +184,9 @@
   virtual void print_on(outputStream* st) const;
   virtual void print_value_on(outputStream* st) const;
 
+  // Deal with Disassembler, VTune, Forte, JvmtiExport, MemoryService.
+  static void trace_new_stub(CodeBlob* blob, const char* name1, const char* name2 = "");
+
   // Print the comment associated with offset on stream, if there is one
   virtual void print_block_comment(outputStream* stream, address block_begin) {
     intptr_t offset = (intptr_t)(block_begin - code_begin());
@@ -318,7 +323,11 @@
 
 class SingletonBlob: public CodeBlob {
   friend class VMStructs;
-  public:
+
+ protected:
+  void* operator new(size_t s, unsigned size);
+
+ public:
    SingletonBlob(
      const char* name,
      CodeBuffer* cb,
@@ -341,6 +350,50 @@
 
 
 //----------------------------------------------------------------------------------------------------
+// RicochetBlob
+// Holds an arbitrary argument list indefinitely while Java code executes recursively.
+
+class RicochetBlob: public SingletonBlob {
+  friend class VMStructs;
+ private:
+
+  int _bounce_offset;
+  int _exception_offset;
+
+  // Creation support
+  RicochetBlob(
+    CodeBuffer* cb,
+    int         size,
+    int         bounce_offset,
+    int         exception_offset,
+    int         frame_size
+  );
+
+ public:
+  // Creation
+  static RicochetBlob* create(
+    CodeBuffer* cb,
+    int         bounce_offset,
+    int         exception_offset,
+    int         frame_size
+  );
+
+  // Typing
+  bool is_ricochet_stub() const { return true; }
+
+  // GC for args
+  void preserve_callee_argument_oops(frame fr, const RegisterMap *reg_map, OopClosure* f) { /* Nothing to do */ }
+
+  address bounce_addr() const           { return code_begin() + _bounce_offset; }
+  address exception_addr() const        { return code_begin() + _exception_offset; }
+  bool returns_to_bounce_addr(address pc) const {
+    address bounce_pc = bounce_addr();
+    return (pc == bounce_pc || (pc + frame::pc_return_offset) == bounce_pc);
+  }
+};
+
+
+//----------------------------------------------------------------------------------------------------
 // DeoptimizationBlob
 
 class DeoptimizationBlob: public SingletonBlob {
@@ -363,8 +416,6 @@
     int         frame_size
   );
 
-  void* operator new(size_t s, unsigned size);
-
  public:
   // Creation
   static DeoptimizationBlob* create(
@@ -378,7 +429,6 @@
 
   // Typing
   bool is_deoptimization_stub() const { return true; }
-  const DeoptimizationBlob *as_deoptimization_stub() const { return this; }
   bool exception_address_is_unpack_entry(address pc) const {
     address unpack_pc = unpack();
     return (pc == unpack_pc || (pc + frame::pc_return_offset) == unpack_pc);
@@ -426,8 +476,6 @@
     int         frame_size
   );
 
-  void* operator new(size_t s, unsigned size);
-
  public:
   // Creation
   static UncommonTrapBlob* create(
@@ -458,8 +506,6 @@
     int         frame_size
   );
 
-  void* operator new(size_t s, unsigned size);
-
  public:
   // Creation
   static ExceptionBlob* create(
@@ -491,8 +537,6 @@
     int         frame_size
   );
 
-  void* operator new(size_t s, unsigned size);
-
  public:
   // Creation
   static SafepointBlob* create(
diff -r 03b943e6c025 -r 8bec9b249a6e src/share/vm/code/codeCache.cpp
--- a/src/share/vm/code/codeCache.cpp	Sun May 15 23:57:15 2011 -0400
+++ b/src/share/vm/code/codeCache.cpp	Tue May 17 09:29:56 2011 -0400
@@ -796,6 +796,7 @@
   int nmethodCount = 0;
   int runtimeStubCount = 0;
   int adapterCount = 0;
+  int ricochetStubCount = 0;
   int deoptimizationStubCount = 0;
   int uncommonTrapStubCount = 0;
   int bufferBlobCount = 0;
@@ -840,6 +841,8 @@
       }
     } else if (cb->is_runtime_stub()) {
       runtimeStubCount++;
+    } else if (cb->is_ricochet_stub()) {
+      ricochetStubCount++;
     } else if (cb->is_deoptimization_stub()) {
       deoptimizationStubCount++;
     } else if (cb->is_uncommon_trap_stub()) {
@@ -876,6 +879,7 @@
   tty->print_cr("runtime_stubs: %d",runtimeStubCount);
   tty->print_cr("adapters: %d",adapterCount);
   tty->print_cr("buffer blobs: %d",bufferBlobCount);
+  tty->print_cr("ricochet_stubs: %d",ricochetStubCount);
   tty->print_cr("deoptimization_stubs: %d",deoptimizationStubCount);
   tty->print_cr("uncommon_traps: %d",uncommonTrapStubCount);
   tty->print_cr("\nnmethod size distribution (non-zombie java)");
diff -r 03b943e6c025 -r 8bec9b249a6e src/share/vm/compiler/disassembler.cpp
--- a/src/share/vm/compiler/disassembler.cpp	Sun May 15 23:57:15 2011 -0400
+++ b/src/share/vm/compiler/disassembler.cpp	Tue May 17 09:29:56 2011 -0400
@@ -283,10 +283,10 @@
         st->print("Stub::%s", desc->name());
         if (desc->begin() != adr)
           st->print("%+d 0x%p",adr - desc->begin(), adr);
-        else if (WizardMode) st->print(" " INTPTR_FORMAT, adr);
+        else if (WizardMode) st->print(" " PTR_FORMAT, adr);
         return;
       }
-      st->print("Stub::<unknown> " INTPTR_FORMAT, adr);
+      st->print("Stub::<unknown> " PTR_FORMAT, adr);
       return;
     }
 
@@ -314,8 +314,8 @@
     }
   }
 
-  // Fall through to a simple numeral.
-  st->print(INTPTR_FORMAT, (intptr_t)adr);
+  // Fall through to a simple (hexadecimal) numeral.
+  st->print(PTR_FORMAT, adr);
 }
 
 void decode_env::print_insn_labels() {
@@ -326,7 +326,7 @@
     cb->print_block_comment(st, p);
   }
   if (_print_pc) {
-    st->print("  " INTPTR_FORMAT ": ", (intptr_t) p);
+    st->print("  " PTR_FORMAT ": ", p);
   }
 }
 
@@ -432,7 +432,7 @@
 void Disassembler::decode(CodeBlob* cb, outputStream* st) {
   if (!load_library())  return;
   decode_env env(cb, st);
-  env.output()->print_cr("Decoding CodeBlob " INTPTR_FORMAT, cb);
+  env.output()->print_cr("Decoding CodeBlob " PTR_FORMAT, cb);
   env.decode_instructions(cb->code_begin(), cb->code_end());
 }
 
@@ -446,7 +446,7 @@
 void Disassembler::decode(nmethod* nm, outputStream* st) {
   if (!load_library())  return;
   decode_env env(nm, st);
-  env.output()->print_cr("Decoding compiled method " INTPTR_FORMAT ":", nm);
+  env.output()->print_cr("Decoding compiled method " PTR_FORMAT ":", nm);
   env.output()->print_cr("Code:");
 
 #ifdef SHARK
@@ -478,9 +478,9 @@
     int offset = 0;
     for (address p = nm->consts_begin(); p < nm->consts_end(); p += 4, offset += 4) {
       if ((offset % 8) == 0) {
-        env.output()->print_cr("  " INTPTR_FORMAT " (offset: %4d): " PTR32_FORMAT "   " PTR64_FORMAT, (intptr_t) p, offset, *((int32_t*) p), *((int64_t*) p));
+        env.output()->print_cr("  " PTR_FORMAT " (offset: %4d): " PTR32_FORMAT "   " PTR64_FORMAT, p, offset, *((int32_t*) p), *((int64_t*) p));
       } else {
-        env.output()->print_cr("  " INTPTR_FORMAT " (offset: %4d): " PTR32_FORMAT,                    (intptr_t) p, offset, *((int32_t*) p));
+        env.output()->print_cr("  " PTR_FORMAT " (offset: %4d): " PTR32_FORMAT,                    p, offset, *((int32_t*) p));
       }
     }
   }
diff -r 03b943e6c025 -r 8bec9b249a6e src/share/vm/gc_implementation/parNew/parCardTableModRefBS.cpp
--- a/src/share/vm/gc_implementation/parNew/parCardTableModRefBS.cpp	Sun May 15 23:57:15 2011 -0400
+++ b/src/share/vm/gc_implementation/parNew/parCardTableModRefBS.cpp	Tue May 17 09:29:56 2011 -0400
@@ -29,13 +29,14 @@
 #include "memory/sharedHeap.hpp"
 #include "memory/space.inline.hpp"
 #include "memory/universe.hpp"
+#include "oops/oop.inline.hpp"
 #include "runtime/java.hpp"
 #include "runtime/mutexLocker.hpp"
 #include "runtime/virtualspace.hpp"
 
 void CardTableModRefBS::non_clean_card_iterate_parallel_work(Space* sp, MemRegion mr,
-                                                             DirtyCardToOopClosure* dcto_cl,
-                                                             ClearNoncleanCardWrapper* cl,
+                                                             OopsInGenClosure* cl,
+                                                             CardTableRS* ct,
                                                              int n_threads) {
   assert(n_threads > 0, "Error: expected n_threads > 0");
   assert((n_threads == 1 && ParallelGCThreads == 0) ||
@@ -49,14 +50,14 @@
                           lowest_non_clean_base_chunk_index,
                           lowest_non_clean_chunk_size);
 
-  int n_strides = n_threads * StridesPerThread;
+  int n_strides = n_threads * ParGCStridesPerThread;
   SequentialSubTasksDone* pst = sp->par_seq_tasks();
   pst->set_n_threads(n_threads);
   pst->set_n_tasks(n_strides);
 
   int stride = 0;
   while (!pst->is_task_claimed(/* reference */ stride)) {
-    process_stride(sp, mr, stride, n_strides, dcto_cl, cl,
+    process_stride(sp, mr, stride, n_strides, cl, ct,
                    lowest_non_clean,
                    lowest_non_clean_base_chunk_index,
                    lowest_non_clean_chunk_size);
@@ -79,13 +80,13 @@
 process_stride(Space* sp,
                MemRegion used,
                jint stride, int n_strides,
-               DirtyCardToOopClosure* dcto_cl,
-               ClearNoncleanCardWrapper* cl,
+               OopsInGenClosure* cl,
+               CardTableRS* ct,
                jbyte** lowest_non_clean,
                uintptr_t lowest_non_clean_base_chunk_index,
                size_t    lowest_non_clean_chunk_size) {
-  // We don't have to go downwards here; it wouldn't help anyway,
-  // because of parallelism.
+  // We go from higher to lower addresses here; it wouldn't help that much
+  // because of the strided parallelism pattern used here.
 
   // Find the first card address of the first chunk in the stride that is
   // at least "bottom" of the used region.
@@ -98,25 +99,35 @@
   if ((uintptr_t)stride >= start_chunk_stride_num) {
     chunk_card_start = (jbyte*)(start_card +
                                 (stride - start_chunk_stride_num) *
-                                CardsPerStrideChunk);
+                                ParGCCardsPerStrideChunk);
   } else {
     // Go ahead to the next chunk group boundary, then to the requested stride.
     chunk_card_start = (jbyte*)(start_card +
                                 (n_strides - start_chunk_stride_num + stride) *
-                                CardsPerStrideChunk);
+                                ParGCCardsPerStrideChunk);
   }
 
   while (chunk_card_start < end_card) {
-    // We don't have to go downwards here; it wouldn't help anyway,
-    // because of parallelism.  (We take care with "min_done"; see below.)
+    // Even though we go from lower to higher addresses below, the
+    // strided parallelism can interleave the actual processing of the
+    // dirty pages in various ways. For a specific chunk within this
+    // stride, we take care to avoid double scanning or missing a card
+    // by suitably initializing the "min_done" field in process_chunk_boundaries()
+    // below, together with the dirty region extension accomplished in
+    // DirtyCardToOopClosure::do_MemRegion().
+    jbyte*    chunk_card_end = chunk_card_start + ParGCCardsPerStrideChunk;
     // Invariant: chunk_mr should be fully contained within the "used" region.
-    jbyte*    chunk_card_end = chunk_card_start + CardsPerStrideChunk;
     MemRegion chunk_mr       = MemRegion(addr_for(chunk_card_start),
                                          chunk_card_end >= end_card ?
                                            used.end() : addr_for(chunk_card_end));
     assert(chunk_mr.word_size() > 0, "[chunk_card_start > used_end)");
     assert(used.contains(chunk_mr), "chunk_mr should be subset of used");
 
+    DirtyCardToOopClosure* dcto_cl = sp->new_dcto_cl(cl, precision(),
+                                                     cl->gen_boundary());
+    ClearNoncleanCardWrapper clear_cl(dcto_cl, ct);
+
+
     // Process the chunk.
     process_chunk_boundaries(sp,
                              dcto_cl,
@@ -126,17 +137,30 @@
                              lowest_non_clean_base_chunk_index,
                              lowest_non_clean_chunk_size);
 
+    // We want the LNC array updates above in process_chunk_boundaries
+    // to be visible before any of the card table value changes as a
+    // result of the dirty card iteration below.
+    OrderAccess::storestore();
+
     // We do not call the non_clean_card_iterate_serial() version because
-    // we want to clear the cards, and the ClearNoncleanCardWrapper closure
-    // itself does the work of finding contiguous dirty ranges of cards to
-    // process (and clear).
-    cl->do_MemRegion(chunk_mr);
+    // we want to clear the cards: clear_cl here does the work of finding
+    // contiguous dirty ranges of cards to process and clear.
+    clear_cl.do_MemRegion(chunk_mr);
 
     // Find the next chunk of the stride.
-    chunk_card_start += CardsPerStrideChunk * n_strides;
+    chunk_card_start += ParGCCardsPerStrideChunk * n_strides;
   }
 }
 
+
+// If you want a talkative process_chunk_boundaries,
+// then #define NOISY(x) x
+#ifdef NOISY
+#error "Encountered a global preprocessor flag, NOISY, which might clash with local definition to follow"
+#else
+#define NOISY(x)
+#endif
+
 void
 CardTableModRefBS::
 process_chunk_boundaries(Space* sp,
@@ -147,126 +171,232 @@
                          uintptr_t lowest_non_clean_base_chunk_index,
                          size_t    lowest_non_clean_chunk_size)
 {
-  // We must worry about the chunk boundaries.
+  // We must worry about non-array objects that cross chunk boundaries,
+  // because such objects are both precisely and imprecisely marked:
+  // .. if the head of such an object is dirty, the entire object
+  //    needs to be scanned, under the interpretation that this
+  //    was an imprecise mark
+  // .. if the head of such an object is not dirty, we can assume
+  //    precise marking and it's efficient to scan just the dirty
+  //    cards.
+  // In either case, each scanned reference must be scanned precisely
+  // once so as to avoid cloning of a young referent. For efficiency,
+  // our closures depend on this property and do not protect against
+  // double scans.
 
-  // First, set our max_to_do:
-  HeapWord* max_to_do = NULL;
   uintptr_t cur_chunk_index = addr_to_chunk_index(chunk_mr.start());
   cur_chunk_index           = cur_chunk_index - lowest_non_clean_base_chunk_index;
 
+  NOISY(tty->print_cr("===========================================================================");)
+  NOISY(tty->print_cr(" process_chunk_boundary: Called with [" PTR_FORMAT "," PTR_FORMAT ")",
+                      chunk_mr.start(), chunk_mr.end());)
+
+  // First, set "our" lowest_non_clean entry, which would be
+  // used by the thread scanning an adjoining left chunk with
+  // a non-array object straddling the mutual boundary.
+  // Find the object that spans our boundary, if one exists.
+  // first_block is the block possibly straddling our left boundary.
+  HeapWord* first_block = sp->block_start(chunk_mr.start());
+  assert((chunk_mr.start() != used.start()) || (first_block == chunk_mr.start()),
+         "First chunk should always have a co-initial block");
+  // Does the block straddle the chunk's left boundary, and is it
+  // a non-array object?
+  if (first_block < chunk_mr.start()        // first block straddles left bdry
+      && sp->block_is_obj(first_block)      // first block is an object
+      && !(oop(first_block)->is_objArray()  // first block is not an array (arrays are precisely dirtied)
+           || oop(first_block)->is_typeArray())) {
+    // Find our least non-clean card, so that a left neighbour
+    // does not scan an object straddling the mutual boundary
+    // too far to the right, and attempt to scan a portion of
+    // that object twice.
+    jbyte* first_dirty_card = NULL;
+    jbyte* last_card_of_first_obj =
+        byte_for(first_block + sp->block_size(first_block) - 1);
+    jbyte* first_card_of_cur_chunk = byte_for(chunk_mr.start());
+    jbyte* last_card_of_cur_chunk = byte_for(chunk_mr.last());
+    jbyte* last_card_to_check =
+      (jbyte*) MIN2((intptr_t) last_card_of_cur_chunk,
+                    (intptr_t) last_card_of_first_obj);
+    // Note that this does not need to go beyond our last card
+    // if our first object completely straddles this chunk.
+    for (jbyte* cur = first_card_of_cur_chunk;
+         cur <= last_card_to_check; cur++) {
+      jbyte val = *cur;
+      if (card_will_be_scanned(val)) {
+        first_dirty_card = cur; break;
+      } else {
+        assert(!card_may_have_been_dirty(val), "Error");
+      }
+    }
+    if (first_dirty_card != NULL) {
+      NOISY(tty->print_cr(" LNC: Found a dirty card at " PTR_FORMAT " in current chunk",
+                    first_dirty_card);)
+      assert(0 <= cur_chunk_index && cur_chunk_index < lowest_non_clean_chunk_size,
+             "Bounds error.");
+      assert(lowest_non_clean[cur_chunk_index] == NULL,
+             "Write exactly once : value should be stable hereafter for this round");
+      lowest_non_clean[cur_chunk_index] = first_dirty_card;
+    } NOISY(else {
+      tty->print_cr(" LNC: Found no dirty card in current chunk; leaving LNC entry NULL");
+      // In the future, we could have this thread look for a non-NULL value to copy from its
+      // right neighbour (up to the end of the first object).
+      if (last_card_of_cur_chunk < last_card_of_first_obj) {
+        tty->print_cr(" LNC: BEWARE!!! first obj straddles past right end of chunk:\n"
+                      "   might be efficient to get value from right neighbour?");
+      }
+    })
+  } else {
+    // In this case we can help our neighbour by just asking them
+    // to stop at our first card (even though it may not be dirty).
+    NOISY(tty->print_cr(" LNC: first block is not a non-array object; setting LNC to first card of current chunk");)
+    assert(lowest_non_clean[cur_chunk_index] == NULL, "Write once : value should be stable hereafter");
+    jbyte* first_card_of_cur_chunk = byte_for(chunk_mr.start());
+    lowest_non_clean[cur_chunk_index] = first_card_of_cur_chunk;
+  }
+  NOISY(tty->print_cr(" process_chunk_boundary: lowest_non_clean[" INTPTR_FORMAT "] = " PTR_FORMAT
+                "   which corresponds to the heap address " PTR_FORMAT,
+                cur_chunk_index, lowest_non_clean[cur_chunk_index],
+                (lowest_non_clean[cur_chunk_index] != NULL)
+                ? addr_for(lowest_non_clean[cur_chunk_index])
+                : NULL);)
+  NOISY(tty->print_cr("---------------------------------------------------------------------------");)
+
+  // Next, set our own max_to_do, which will strictly/exclusively bound
+  // the highest address that we will scan past the right end of our chunk.
+  HeapWord* max_to_do = NULL;
   if (chunk_mr.end() < used.end()) {
-    // This is not the last chunk in the used region.  What is the last
-    // object?
-    HeapWord* last_block = sp->block_start(chunk_mr.end());
+    // This is not the last chunk in the used region.
+    // What is our last block? We check the first block of
+    // the next (right) chunk rather than strictly check our last block
+    // because it's potentially more efficient to do so.
+    HeapWord* const last_block = sp->block_start(chunk_mr.end());
     assert(last_block <= chunk_mr.end(), "In case this property changes.");
-    if (last_block == chunk_mr.end()
-        || !sp->block_is_obj(last_block)) {
+    if ((last_block == chunk_mr.end())     // our last block does not straddle boundary
+        || !sp->block_is_obj(last_block)   // last_block isn't an object
+        || oop(last_block)->is_objArray()  // last_block is an array (precisely marked)
+        || oop(last_block)->is_typeArray()) {
       max_to_do = chunk_mr.end();
-
+      NOISY(tty->print_cr(" process_chunk_boundary: Last block on this card is not a non-array object;\n"
+                         "   max_to_do left at " PTR_FORMAT, max_to_do);)
     } else {
-      // It is an object and starts before the end of the current chunk.
+      assert(last_block < chunk_mr.end(), "Tautology");
+      // It is a non-array object that straddles the right boundary of this chunk.
       // last_obj_card is the card corresponding to the start of the last object
       // in the chunk.  Note that the last object may not start in
       // the chunk.
-      jbyte* last_obj_card = byte_for(last_block);
-      if (!card_may_have_been_dirty(*last_obj_card)) {
-        // The card containing the head is not dirty.  Any marks in
+      jbyte* const last_obj_card = byte_for(last_block);
+      const jbyte val = *last_obj_card;
+      if (!card_will_be_scanned(val)) {
+        assert(!card_may_have_been_dirty(val), "Error");
+        // The card containing the head is not dirty.  Any marks on
         // subsequent cards still in this chunk must have been made
-        // precisely; we can cap processing at the end.
+        // precisely; we can cap processing at the end of our chunk.
         max_to_do = chunk_mr.end();
+        NOISY(tty->print_cr(" process_chunk_boundary: Head of last object on this card is not dirty;\n"
+                            "   max_to_do left at " PTR_FORMAT,
+                            max_to_do);)
       } else {
         // The last object must be considered dirty, and extends onto the
         // following chunk.  Look for a dirty card in that chunk that will
         // bound our processing.
         jbyte* limit_card = NULL;
-        size_t last_block_size = sp->block_size(last_block);
-        jbyte* last_card_of_last_obj =
+        const size_t last_block_size = sp->block_size(last_block);
+        jbyte* const last_card_of_last_obj =
           byte_for(last_block + last_block_size - 1);
-        jbyte* first_card_of_next_chunk = byte_for(chunk_mr.end());
+        jbyte* const first_card_of_next_chunk = byte_for(chunk_mr.end());
         // This search potentially goes a long distance looking
-        // for the next card that will be scanned.  For example,
-        // an object that is an array of primitives will not
-        // have any cards covering regions interior to the array
-        // that will need to be scanned. The scan can be terminated
-        // at the last card of the next chunk.  That would leave
-        // limit_card as NULL and would result in "max_to_do"
-        // being set with the LNC value or with the end
-        // of the last block.
-        jbyte* last_card_of_next_chunk = first_card_of_next_chunk +
-          CardsPerStrideChunk;
-        assert(byte_for(chunk_mr.end()) - byte_for(chunk_mr.start())
-          == CardsPerStrideChunk, "last card of next chunk may be wrong");
-        jbyte* last_card_to_check = (jbyte*) MIN2(last_card_of_last_obj,
-                                                  last_card_of_next_chunk);
+        // for the next card that will be scanned, terminating
+        // at the end of the last_block, if no earlier dirty card
+        // is found.
+        assert(byte_for(chunk_mr.end()) - byte_for(chunk_mr.start()) == ParGCCardsPerStrideChunk,
+               "last card of next chunk may be wrong");
         for (jbyte* cur = first_card_of_next_chunk;
-             cur <= last_card_to_check; cur++) {
-          if (card_will_be_scanned(*cur)) {
+             cur <= last_card_of_last_obj; cur++) {
+          const jbyte val = *cur;
+          if (card_will_be_scanned(val)) {
+            NOISY(tty->print_cr(" Found a non-clean card " PTR_FORMAT " with value 0x%x",
+                                cur, (int)val);)
             limit_card = cur; break;
+          } else {
+            assert(!card_may_have_been_dirty(val), "Error: card can't be skipped");
           }
         }
-        assert(0 <= cur_chunk_index+1 &&
-               cur_chunk_index+1 < lowest_non_clean_chunk_size,
+        if (limit_card != NULL) {
+          max_to_do = addr_for(limit_card);
+          assert(limit_card != NULL && max_to_do != NULL, "Error");
+          NOISY(tty->print_cr(" process_chunk_boundary: Found a dirty card at " PTR_FORMAT
+                        "   max_to_do set at " PTR_FORMAT " which is before end of last block in chunk: "
+                        PTR_FORMAT " + " PTR_FORMAT " = " PTR_FORMAT,
+                        limit_card, max_to_do, last_block, last_block_size, (last_block+last_block_size));)
+        } else {
+          // The following is a pessimistic value, because it's possible
+          // that a dirty card on a subsequent chunk has been cleared by
+          // the time we get to look at it; we'll correct for that further below,
+          // using the LNC array which records the least non-clean card
+          // before cards were cleared in a particular chunk.
+          limit_card = last_card_of_last_obj;
+          max_to_do = last_block + last_block_size;
+          assert(limit_card != NULL && max_to_do != NULL, "Error");
+          NOISY(tty->print_cr(" process_chunk_boundary: Found no dirty card before end of last block in chunk\n"
+                              "   Setting limit_card to " PTR_FORMAT
+                              " and max_to_do " PTR_FORMAT " + " PTR_FORMAT " = " PTR_FORMAT,
+                              limit_card, last_block, last_block_size, max_to_do);)
+        }
+        assert(0 < cur_chunk_index+1 && cur_chunk_index+1 < lowest_non_clean_chunk_size,
                "Bounds error.");
-        // LNC for the next chunk
-        jbyte* lnc_card = lowest_non_clean[cur_chunk_index+1];
-        if (limit_card == NULL) {
-          limit_card = lnc_card;
-        }
-        if (limit_card != NULL) {
+        // It is possible that a dirty card for the last object may have been
+        // cleared before we had a chance to examine it. In that case, the value
+        // will have been logged in the LNC for that chunk.
+        // We need to examine as many chunks to the right as this object
+        // covers.
+        const uintptr_t last_chunk_index_to_check = addr_to_chunk_index(last_block + last_block_size - 1)
+                                                    - lowest_non_clean_base_chunk_index;
+        DEBUG_ONLY(const uintptr_t last_chunk_index = addr_to_chunk_index(used.last())
+                                                      - lowest_non_clean_base_chunk_index;)
+        assert(last_chunk_index_to_check <= last_chunk_index,
+               err_msg("Out of bounds: last_chunk_index_to_check " INTPTR_FORMAT
+                       " exceeds last_chunk_index " INTPTR_FORMAT,
+                       last_chunk_index_to_check, last_chunk_index));
+        for (uintptr_t lnc_index = cur_chunk_index + 1;
+             lnc_index <= last_chunk_index_to_check;
+             lnc_index++) {
+          jbyte* lnc_card = lowest_non_clean[lnc_index];
           if (lnc_card != NULL) {
-            limit_card = (jbyte*)MIN2((intptr_t)limit_card,
-                                      (intptr_t)lnc_card);
-          }
-          max_to_do = addr_for(limit_card);
-        } else {
-          max_to_do = last_block + last_block_size;
+            // we can stop at the first non-NULL entry we find
+            if (lnc_card <= limit_card) {
+              NOISY(tty->print_cr(" process_chunk_boundary: LNC card " PTR_FORMAT " is lower than limit_card " PTR_FORMAT,
+                                  "   max_to_do will be lowered to " PTR_FORMAT " from " PTR_FORMAT,
+                                  lnc_card, limit_card, addr_for(lnc_card), max_to_do);)
+              limit_card = lnc_card;
+              max_to_do = addr_for(limit_card);
+              assert(limit_card != NULL && max_to_do != NULL, "Error");
+            }
+            // In any case, we break now
+            break;
+          }  // else continue to look for a non-NULL entry if any
         }
+        assert(limit_card != NULL && max_to_do != NULL, "Error");
       }
+      assert(max_to_do != NULL, "OOPS 1 !");
     }
-    assert(max_to_do != NULL, "OOPS!");
+    assert(max_to_do != NULL, "OOPS 2!");
   } else {
     max_to_do = used.end();
+    NOISY(tty->print_cr(" process_chunk_boundary: Last chunk of this space;\n"
+                  "   max_to_do left at " PTR_FORMAT,
+                  max_to_do);)
   }
+  assert(max_to_do != NULL, "OOPS 3!");
   // Now we can set the closure we're using so it doesn't to beyond
   // max_to_do.
   dcto_cl->set_min_done(max_to_do);
 #ifndef PRODUCT
   dcto_cl->set_last_bottom(max_to_do);
 #endif
+  NOISY(tty->print_cr("===========================================================================\n");)
+}
 
-  // Now we set *our" lowest_non_clean entry.
-  // Find the object that spans our boundary, if one exists.
-  // Nothing to do on the first chunk.
-  if (chunk_mr.start() > used.start()) {
-    // first_block is the block possibly spanning the chunk start
-    HeapWord* first_block = sp->block_start(chunk_mr.start());
-    // Does the block span the start of the chunk and is it
-    // an object?
-    if (first_block < chunk_mr.start() &&
-        sp->block_is_obj(first_block)) {
-      jbyte* first_dirty_card = NULL;
-      jbyte* last_card_of_first_obj =
-          byte_for(first_block + sp->block_size(first_block) - 1);
-      jbyte* first_card_of_cur_chunk = byte_for(chunk_mr.start());
-      jbyte* last_card_of_cur_chunk = byte_for(chunk_mr.last());
-      jbyte* last_card_to_check =
-        (jbyte*) MIN2((intptr_t) last_card_of_cur_chunk,
-                      (intptr_t) last_card_of_first_obj);
-      for (jbyte* cur = first_card_of_cur_chunk;
-           cur <= last_card_to_check; cur++) {
-        if (card_will_be_scanned(*cur)) {
-          first_dirty_card = cur; break;
-        }
-      }
-      if (first_dirty_card != NULL) {
-        assert(0 <= cur_chunk_index &&
-                 cur_chunk_index < lowest_non_clean_chunk_size,
-               "Bounds error.");
-        lowest_non_clean[cur_chunk_index] = first_dirty_card;
-      }
-    }
-  }
-}
+#undef NOISY
 
 void
 CardTableModRefBS::
@@ -283,8 +413,8 @@
   // LNC array for the covered region.  Any later expansion can't affect
   // the used_at_save_marks region.
   // (I observed a bug in which the first thread to execute this would
-  // resize, and then it would cause "expand_and_allocates" that would
-  // Increase the number of chunks in the covered region.  Then a second
+  // resize, and then it would cause "expand_and_allocate" that would
+  // increase the number of chunks in the covered region.  Then a second
   // thread would come and execute this, see that the size didn't match,
   // and free and allocate again.  So the first thread would be using a
   // freed "_lowest_non_clean" array.)
diff -r 03b943e6c025 -r 8bec9b249a6e src/share/vm/gc_implementation/parNew/parOopClosures.inline.hpp
--- a/src/share/vm/gc_implementation/parNew/parOopClosures.inline.hpp	Sun May 15 23:57:15 2011 -0400
+++ b/src/share/vm/gc_implementation/parNew/parOopClosures.inline.hpp	Tue May 17 09:29:56 2011 -0400
@@ -77,7 +77,23 @@
   if (!oopDesc::is_null(heap_oop)) {
     oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
     if ((HeapWord*)obj < _boundary) {
-      assert(!_g->to()->is_in_reserved(obj), "Scanning field twice?");
+#ifndef PRODUCT
+      if (_g->to()->is_in_reserved(obj)) {
+        tty->print_cr("Scanning field (" PTR_FORMAT ") twice?", p);
+        GenCollectedHeap* gch =  (GenCollectedHeap*)Universe::heap();
+        Space* sp = gch->space_containing(p);
+        oop obj = oop(sp->block_start(p));
+        assert((HeapWord*)obj < (HeapWord*)p, "Error");
+        tty->print_cr("Object: " PTR_FORMAT, obj);
+        tty->print_cr("-------");
+        obj->print();
+        tty->print_cr("-----");
+        tty->print_cr("Heap:");
+        tty->print_cr("-----");
+        gch->print();
+        ShouldNotReachHere();
+      }
+#endif
       // OK, we need to ensure that it is copied.
       // We read the klass and mark in this order, so that we can reliably
       // get the size of the object: if the mark we read is not a
diff -r 03b943e6c025 -r 8bec9b249a6e src/share/vm/interpreter/abstractInterpreter.hpp
--- a/src/share/vm/interpreter/abstractInterpreter.hpp	Sun May 15 23:57:15 2011 -0400
+++ b/src/share/vm/interpreter/abstractInterpreter.hpp	Tue May 17 09:29:56 2011 -0400
@@ -175,19 +175,32 @@
                                     int temps,
                                     int popframe_args,
                                     int monitors,
+                                    int caller_actual_parameters,
                                     int callee_params,
                                     int callee_locals,
-                                    bool is_top_frame);
+                                    bool is_top_frame) {
+    return layout_activation(method,
+                             temps,
+                             popframe_args,
+                             monitors,
+                             caller_actual_parameters,
+                             callee_params,
+                             callee_locals,
+                             (frame*)NULL,
+                             (frame*)NULL,
+                             is_top_frame);
+  }
 
   static int       layout_activation(methodOop method,
-                                      int temps,
-                                      int popframe_args,
-                                      int monitors,
-                                      int callee_params,
-                                      int callee_locals,
-                                      frame* caller,
-                                      frame* interpreter_frame,
-                                      bool is_top_frame);
+                                     int temps,
+                                     int popframe_args,
+                                     int monitors,
+                                     int caller_actual_parameters,
+                                     int callee_params,
+                                     int callee_locals,
+                                     frame* caller,
+                                     frame* interpreter_frame,
+                                     bool is_top_frame);
 
   // Runtime support
   static bool       is_not_reached(                       methodHandle method, int bci);
diff -r 03b943e6c025 -r 8bec9b249a6e src/share/vm/memory/blockOffsetTable.cpp
--- a/src/share/vm/memory/blockOffsetTable.cpp	Sun May 15 23:57:15 2011 -0400
+++ b/src/share/vm/memory/blockOffsetTable.cpp	Tue May 17 09:29:56 2011 -0400
@@ -541,20 +541,33 @@
     // to go back by.
     size_t n_cards_back = entry_to_cards_back(offset);
     q -= (N_words * n_cards_back);
-    assert(q >= _sp->bottom(), "Went below bottom!");
+    assert(q >= _sp->bottom(),
+           err_msg("q = " PTR_FORMAT " crossed below bottom = " PTR_FORMAT,
+                   q, _sp->bottom()));
+    assert(q < _sp->end(),
+           err_msg("q = " PTR_FORMAT " crossed above end = " PTR_FORMAT,
+                   q, _sp->end()));
     index -= n_cards_back;
     offset = _array->offset_array(index);
   }
   assert(offset < N_words, "offset too large");
   index--;
   q -= offset;
+  assert(q >= _sp->bottom(),
+         err_msg("q = " PTR_FORMAT " crossed below bottom = " PTR_FORMAT,
+                 q, _sp->bottom()));
+  assert(q < _sp->end(),
+         err_msg("q = " PTR_FORMAT " crossed above end = " PTR_FORMAT,
+                 q, _sp->end()));
   HeapWord* n = q;
 
   while (n <= addr) {
     debug_only(HeapWord* last = q);   // for debugging
     q = n;
     n += _sp->block_size(n);
-    assert(n > q, err_msg("Looping at: " INTPTR_FORMAT, n));
+    assert(n > q,
+           err_msg("Looping at n = " PTR_FORMAT " with last = " PTR_FORMAT " _sp = [" PTR_FORMAT "," PTR_FORMAT ")",
+                   n, last, _sp->bottom(), _sp->end()));
   }
   assert(q <= addr, err_msg("wrong order for current (" INTPTR_FORMAT ") <= arg (" INTPTR_FORMAT ")", q, addr));
   assert(addr <= n, err_msg("wrong order for arg (" INTPTR_FORMAT ") <= next (" INTPTR_FORMAT ")", addr, n));
diff -r 03b943e6c025 -r 8bec9b249a6e src/share/vm/memory/cardTableModRefBS.cpp
--- a/src/share/vm/memory/cardTableModRefBS.cpp	Sun May 15 23:57:15 2011 -0400
+++ b/src/share/vm/memory/cardTableModRefBS.cpp	Tue May 17 09:29:56 2011 -0400
@@ -455,25 +455,29 @@
   return true;
 }
 
-
 void CardTableModRefBS::non_clean_card_iterate_possibly_parallel(Space* sp,
                                                                  MemRegion mr,
-                                                                 DirtyCardToOopClosure* dcto_cl,
-                                                                 ClearNoncleanCardWrapper* cl) {
+                                                                 OopsInGenClosure* cl,
+                                                                 CardTableRS* ct) {
   if (!mr.is_empty()) {
     int n_threads = SharedHeap::heap()->n_par_threads();
     if (n_threads > 0) {
 #ifndef SERIALGC
-      non_clean_card_iterate_parallel_work(sp, mr, dcto_cl, cl, n_threads);
+      non_clean_card_iterate_parallel_work(sp, mr, cl, ct, n_threads);
 #else  // SERIALGC
       fatal("Parallel gc not supported here.");
 #endif // SERIALGC
     } else {
       // We do not call the non_clean_card_iterate_serial() version below because
       // we want to clear the cards (which non_clean_card_iterate_serial() does not
-      // do for us), and the ClearNoncleanCardWrapper closure itself does the work
-      // of finding contiguous dirty ranges of cards to process (and clear).
-      cl->do_MemRegion(mr);
+      // do for us): clear_cl here does the work of finding contiguous dirty ranges
+      // of cards to process and clear.
+
+      DirtyCardToOopClosure* dcto_cl = sp->new_dcto_cl(cl, precision(),
+                                                       cl->gen_boundary());
+      ClearNoncleanCardWrapper clear_cl(dcto_cl, ct);
+
+      clear_cl.do_MemRegion(mr);
     }
   }
 }
diff -r 03b943e6c025 -r 8bec9b249a6e src/share/vm/memory/cardTableModRefBS.hpp
--- a/src/share/vm/memory/cardTableModRefBS.hpp	Sun May 15 23:57:15 2011 -0400
+++ b/src/share/vm/memory/cardTableModRefBS.hpp	Tue May 17 09:29:56 2011 -0400
@@ -150,7 +150,9 @@
   // Mapping from address to card marking array entry
   jbyte* byte_for(const void* p) const {
     assert(_whole_heap.contains(p),
-           "out of bounds access to card marking array");
+           err_msg("Attempt to access p = "PTR_FORMAT" out of bounds of "
+                   " card marking array's _whole_heap = ["PTR_FORMAT","PTR_FORMAT")",
+                   p, _whole_heap.start(), _whole_heap.end()));
     jbyte* result = &byte_map_base[uintptr_t(p) >> card_shift];
     assert(result >= _byte_map && result < _byte_map + _byte_map_size,
            "out of bounds accessor for card marking array");
@@ -173,18 +175,17 @@
   // A variant of the above that will operate in a parallel mode if
   // worker threads are available, and clear the dirty cards as it
   // processes them.
-  // ClearNoncleanCardWrapper cl must wrap the DirtyCardToOopClosure dcto_cl,
-  // which may itself be modified by the method.
+  // XXX ??? MemRegionClosure above vs OopsInGenClosure below XXX
+  // XXX some new_dcto_cl's take OopClosure's, plus as above there are
+  // some MemRegionClosures. Clean this up everywhere. XXX
   void non_clean_card_iterate_possibly_parallel(Space* sp, MemRegion mr,
-                                                DirtyCardToOopClosure* dcto_cl,
-                                                ClearNoncleanCardWrapper* cl);
+                                                OopsInGenClosure* cl, CardTableRS* ct);
 
  private:
   // Work method used to implement non_clean_card_iterate_possibly_parallel()
   // above in the parallel case.
   void non_clean_card_iterate_parallel_work(Space* sp, MemRegion mr,
-                                            DirtyCardToOopClosure* dcto_cl,
-                                            ClearNoncleanCardWrapper* cl,
+                                            OopsInGenClosure* cl, CardTableRS* ct,
                                             int n_threads);
 
  protected:
@@ -198,11 +199,6 @@
 
   // *** Support for parallel card scanning.
 
-  enum SomeConstantsForParallelism {
-    StridesPerThread    = 2,
-    CardsPerStrideChunk = 256
-  };
-
   // This is an array, one element per covered region of the card table.
   // Each entry is itself an array, with one element per chunk in the
   // covered region.  Each entry of these arrays is the lowest non-clean
@@ -235,7 +231,7 @@
   // covers the given address.
   uintptr_t addr_to_chunk_index(const void* addr) {
     uintptr_t card = (uintptr_t) byte_for(addr);
-    return card / CardsPerStrideChunk;
+    return card / ParGCCardsPerStrideChunk;
   }
 
   // Apply cl, which must either itself apply dcto_cl or be dcto_cl,
@@ -243,8 +239,8 @@
   void process_stride(Space* sp,
                       MemRegion used,
                       jint stride, int n_strides,
-                      DirtyCardToOopClosure* dcto_cl,
-                      ClearNoncleanCardWrapper* cl,
+                      OopsInGenClosure* cl,
+                      CardTableRS* ct,
                       jbyte** lowest_non_clean,
                       uintptr_t lowest_non_clean_base_chunk_index,
                       size_t lowest_non_clean_chunk_size);
@@ -457,14 +453,18 @@
     size_t delta = pointer_delta(p, byte_map_base, sizeof(jbyte));
     HeapWord* result = (HeapWord*) (delta << card_shift);
     assert(_whole_heap.contains(result),
-           "out of bounds accessor from card marking array");
+           err_msg("Returning result = "PTR_FORMAT" out of bounds of "
+                   " card marking array's _whole_heap = ["PTR_FORMAT","PTR_FORMAT")",
+                   result, _whole_heap.start(), _whole_heap.end()));
     return result;
   }
 
   // Mapping from address to card marking array index.
   size_t index_for(void* p) {
     assert(_whole_heap.contains(p),
-           "out of bounds access to card marking array");
+           err_msg("Attempt to access p = "PTR_FORMAT" out of bounds of "
+                   " card marking array's _whole_heap = ["PTR_FORMAT","PTR_FORMAT")",
+                   p, _whole_heap.start(), _whole_heap.end()));
     return byte_for(p) - _byte_map;
   }
 
@@ -482,7 +482,7 @@
   void verify_dirty_region(MemRegion mr) PRODUCT_RETURN;
 
   static size_t par_chunk_heapword_alignment() {
-    return CardsPerStrideChunk * card_size_in_words;
+    return ParGCCardsPerStrideChunk * card_size_in_words;
   }
 
 };
diff -r 03b943e6c025 -r 8bec9b249a6e src/share/vm/memory/cardTableRS.cpp
--- a/src/share/vm/memory/cardTableRS.cpp	Sun May 15 23:57:15 2011 -0400
+++ b/src/share/vm/memory/cardTableRS.cpp	Tue May 17 09:29:56 2011 -0400
@@ -162,7 +162,7 @@
 }
 
 ClearNoncleanCardWrapper::ClearNoncleanCardWrapper(
-  MemRegionClosure* dirty_card_closure, CardTableRS* ct) :
+  DirtyCardToOopClosure* dirty_card_closure, CardTableRS* ct) :
     _dirty_card_closure(dirty_card_closure), _ct(ct) {
     _is_par = (SharedHeap::heap()->n_par_threads() > 0);
 }
@@ -246,10 +246,6 @@
 
 void CardTableRS::younger_refs_in_space_iterate(Space* sp,
                                                 OopsInGenClosure* cl) {
-  DirtyCardToOopClosure* dcto_cl = sp->new_dcto_cl(cl, _ct_bs->precision(),
-                                                   cl->gen_boundary());
-  ClearNoncleanCardWrapper clear_cl(dcto_cl, this);
-
   const MemRegion urasm = sp->used_region_at_save_marks();
 #ifdef ASSERT
   // Convert the assertion check to a warning if we are running
@@ -275,10 +271,10 @@
     if (!urasm.equals(urasm2)) {
       warning("CMS+ParNew: Flickering used_region_at_save_marks()!!");
     }
+    ShouldNotReachHere();
   }
 #endif
-  _ct_bs->non_clean_card_iterate_possibly_parallel(sp, urasm,
-                                                   dcto_cl, &clear_cl);
+  _ct_bs->non_clean_card_iterate_possibly_parallel(sp, urasm, cl, this);
 }
 
 void CardTableRS::clear_into_younger(Generation* gen, bool clear_perm) {
diff -r 03b943e6c025 -r 8bec9b249a6e src/share/vm/memory/cardTableRS.hpp
--- a/src/share/vm/memory/cardTableRS.hpp	Sun May 15 23:57:15 2011 -0400
+++ b/src/share/vm/memory/cardTableRS.hpp	Tue May 17 09:29:56 2011 -0400
@@ -31,7 +31,6 @@
 
 class Space;
 class OopsInGenClosure;
-class DirtyCardToOopClosure;
 
 // This kind of "GenRemSet" uses a card table both as shared data structure
 // for a mod ref barrier set and for the rem set information.
@@ -167,7 +166,7 @@
 };
 
 class ClearNoncleanCardWrapper: public MemRegionClosure {
-  MemRegionClosure* _dirty_card_closure;
+  DirtyCardToOopClosure* _dirty_card_closure;
   CardTableRS* _ct;
   bool _is_par;
 private:
@@ -179,7 +178,7 @@
   inline bool clear_card_parallel(jbyte* entry);
 
 public:
-  ClearNoncleanCardWrapper(MemRegionClosure* dirty_card_closure, CardTableRS* ct);
+  ClearNoncleanCardWrapper(DirtyCardToOopClosure* dirty_card_closure, CardTableRS* ct);
   void do_MemRegion(MemRegion mr);
 };
 
diff -r 03b943e6c025 -r 8bec9b249a6e src/share/vm/memory/space.cpp
--- a/src/share/vm/memory/space.cpp	Sun May 15 23:57:15 2011 -0400
+++ b/src/share/vm/memory/space.cpp	Tue May 17 09:29:56 2011 -0400
@@ -97,6 +97,14 @@
   }
 }
 
+// We get called with "mr" representing the dirty region
+// that we want to process. Because of imprecise marking,
+// we may need to extend the incoming "mr" to the right,
+// and scan more. However, because we may already have
+// scanned some of that extended region, we may need to
+// trim its right-end back some so we do not scan what
+// we (or another worker thread) may already have scanned
+// or planning to scan.
 void DirtyCardToOopClosure::do_MemRegion(MemRegion mr) {
 
   // Some collectors need to do special things whenever their dirty
@@ -148,7 +156,7 @@
   // e.g. the dirty card region is entirely in a now free object
   // -- something that could happen with a concurrent sweeper.
   bottom = MIN2(bottom, top);
-  mr     = MemRegion(bottom, top);
+  MemRegion extended_mr = MemRegion(bottom, top);
   assert(bottom <= top &&
          (_precision != CardTableModRefBS::ObjHeadPreciseArray ||
           _min_done == NULL ||
@@ -156,8 +164,8 @@
          "overlap!");
 
   // Walk the region if it is not empty; otherwise there is nothing to do.
-  if (!mr.is_empty()) {
-    walk_mem_region(mr, bottom_obj, top);
+  if (!extended_mr.is_empty()) {
+    walk_mem_region(extended_mr, bottom_obj, top);
   }
 
   // An idempotent closure might be applied in any order, so we don't
diff -r 03b943e6c025 -r 8bec9b249a6e src/share/vm/oops/constantPoolKlass.cpp
--- a/src/share/vm/oops/constantPoolKlass.cpp	Sun May 15 23:57:15 2011 -0400
+++ b/src/share/vm/oops/constantPoolKlass.cpp	Tue May 17 09:29:56 2011 -0400
@@ -285,10 +285,9 @@
 void constantPoolKlass::oop_push_contents(PSPromotionManager* pm, oop obj) {
   assert(obj->is_constantPool(), "should be constant pool");
   constantPoolOop cp = (constantPoolOop) obj;
-  if (cp->tags() != NULL &&
-      (!JavaObjectsInPerm || (EnableInvokeDynamic && cp->has_pseudo_string()))) {
+  if (cp->tags() != NULL) {
     for (int i = 1; i < cp->length(); ++i) {
-      if (cp->tag_at(i).is_string()) {
+      if (cp->is_pointer_entry(i)) {
         oop* base = cp->obj_at_addr_raw(i);
         if (PSScavenge::should_scavenge(base)) {
           pm->claim_or_forward_depth(base);
@@ -342,6 +341,11 @@
         anObj->print_value_on(st);
         st->print(" {0x%lx}", (address)anObj);
         break;
+      case JVM_CONSTANT_Object :
+        anObj = cp->object_at(index);
+        anObj->print_value_on(st);
+        st->print(" {0x%lx}", (address)anObj);
+        break;
       case JVM_CONSTANT_Integer :
         st->print("%d", cp->int_at(index));
         break;
@@ -432,23 +436,21 @@
   guarantee(cp->is_perm(), "should be in permspace");
   if (!cp->partially_loaded()) {
     for (int i = 0; i< cp->length();  i++) {
+      constantTag tag = cp->tag_at(i);
       CPSlot entry = cp->slot_at(i);
-      if (cp->tag_at(i).is_klass()) {
+      if (tag.is_klass()) {
         if (entry.is_oop()) {
           guarantee(entry.get_oop()->is_perm(),     "should be in permspace");
           guarantee(entry.get_oop()->is_klass(),    "should be klass");
         }
-      }
-      if (cp->tag_at(i).is_unresolved_klass()) {
+      } else if (tag.is_unresolved_klass()) {
         if (entry.is_oop()) {
           guarantee(entry.get_oop()->is_perm(),     "should be in permspace");
           guarantee(entry.get_oop()->is_klass(),    "should be klass");
         }
-      }
-      if (cp->tag_at(i).is_symbol()) {
+      } else if (tag.is_symbol()) {
         guarantee(entry.get_symbol()->refcount() != 0, "should have nonzero reference count");
-      }
-      if (cp->tag_at(i).is_unresolved_string()) {
+      } else if (tag.is_unresolved_string()) {
         if (entry.is_oop()) {
           guarantee(entry.get_oop()->is_perm(),     "should be in permspace");
           guarantee(entry.get_oop()->is_instance(), "should be instance");
@@ -456,8 +458,7 @@
         else {
           guarantee(entry.get_symbol()->refcount() != 0, "should have nonzero reference count");
         }
-      }
-      if (cp->tag_at(i).is_string()) {
+      } else if (tag.is_string()) {
         if (!cp->has_pseudo_string()) {
           if (entry.is_oop()) {
             guarantee(!JavaObjectsInPerm || entry.get_oop()->is_perm(),
@@ -467,8 +468,11 @@
         } else {
           // can be non-perm, can be non-instance (array)
         }
+      } else if (tag.is_object()) {
+        assert(entry.get_oop()->is_oop(), "should be some valid oop");
+      } else {
+        assert(!cp->is_pointer_entry(i), "unhandled oop type in constantPoolKlass::verify_on");
       }
-      // FIXME: verify JSR 292 tags JVM_CONSTANT_MethodHandle, etc.
     }
     guarantee(cp->tags()->is_perm(),         "should be in permspace");
     guarantee(cp->tags()->is_typeArray(),    "should be type array");
diff -r 03b943e6c025 -r 8bec9b249a6e src/share/vm/opto/bytecodeInfo.cpp
--- a/src/share/vm/opto/bytecodeInfo.cpp	Sun May 15 23:57:15 2011 -0400
+++ b/src/share/vm/opto/bytecodeInfo.cpp	Tue May 17 09:29:56 2011 -0400
@@ -89,7 +89,7 @@
 }
 
 // positive filter: should send be inlined?  returns NULL, if yes, or rejection msg
-const char* InlineTree::shouldInline(ciMethod* callee_method, ciMethod* caller_method, int caller_bci, ciCallProfile& profile, WarmCallInfo* wci_result) const {
+const char* InlineTree::should_inline(ciMethod* callee_method, ciMethod* caller_method, int caller_bci, ciCallProfile& profile, WarmCallInfo* wci_result) const {
   // Allows targeted inlining
   if(callee_method->should_inline()) {
     *wci_result = *(WarmCallInfo::always_hot());
@@ -102,8 +102,7 @@
 
   // positive filter: should send be inlined?  returns NULL (--> yes)
   // or rejection msg
-  int max_size = C->max_inline_size();
-  int size     = callee_method->code_size();
+  int size = callee_method->code_size();
 
   // Check for too many throws (and not too huge)
   if(callee_method->interpreter_throwout_count() > InlineThrowCount &&
@@ -120,18 +119,36 @@
     return NULL;  // size and frequency are represented in a new way
   }
 
+  int default_max_inline_size = C->max_inline_size();
+  int inline_small_code_size  = InlineSmallCode / 4;
+  int max_inline_size         = default_max_inline_size;
+
   int call_site_count  = method()->scale_count(profile.count());
   int invoke_count     = method()->interpreter_invocation_count();
-  assert( invoke_count != 0, "Require invokation count greater than zero");
-  int freq = call_site_count/invoke_count;
+
+  // Bytecoded method handle adapters do not have interpreter
+  // profiling data but only made up MDO data.  Get the counter from
+  // there.
+  if (caller_method->is_method_handle_adapter()) {
+    assert(method()->method_data_or_null(), "must have an MDO");
+    ciMethodData* mdo = method()->method_data();
+    ciProfileData* mha_profile = mdo->bci_to_data(caller_bci);
+    assert(mha_profile, "must exist");
+    CounterData* cd = mha_profile->as_CounterData();
+    invoke_count = cd->count();
+    call_site_count = invoke_count;  // use the same value
+  }
+
+  assert(invoke_count != 0, "require invocation count greater than zero");
+  int freq = call_site_count / invoke_count;
 
   // bump the max size if the call is frequent
   if ((freq >= InlineFrequencyRatio) ||
       (call_site_count >= InlineFrequencyCount) ||
       is_init_with_ea(callee_method, caller_method, C)) {
 
-    max_size = C->freq_inline_size();
-    if (size <= max_size && TraceFrequencyInlining) {
+    max_inline_size = C->freq_inline_size();
+    if (size <= max_inline_size && TraceFrequencyInlining) {
       CompileTask::print_inline_indent(inline_depth());
       tty->print_cr("Inlined frequent method (freq=%d count=%d):", freq, call_site_count);
       CompileTask::print_inline_indent(inline_depth());
@@ -141,11 +158,11 @@
   } else {
     // Not hot.  Check for medium-sized pre-existing nmethod at cold sites.
     if (callee_method->has_compiled_code() &&
-        callee_method->instructions_size(CompLevel_full_optimization) > InlineSmallCode/4)
+        callee_method->instructions_size(CompLevel_full_optimization) > inline_small_code_size)
       return "already compiled into a medium method";
   }
-  if (size > max_size) {
-    if (max_size > C->max_inline_size())
+  if (size > max_inline_size) {
+    if (max_inline_size > default_max_inline_size)
       return "hot method too big";
     return "too big";
   }
@@ -154,7 +171,7 @@
 
 
 // negative filter: should send NOT be inlined?  returns NULL, ok to inline, or rejection msg
-const char* InlineTree::shouldNotInline(ciMethod *callee_method, ciMethod* caller_method, WarmCallInfo* wci_result) const {
+const char* InlineTree::should_not_inline(ciMethod *callee_method, ciMethod* caller_method, WarmCallInfo* wci_result) const {
   // negative filter: should send NOT be inlined?  returns NULL (--> inline) or rejection msg
   if (!UseOldInlining) {
     const char* fail = NULL;
@@ -269,14 +286,13 @@
   }
 
   const char *msg = NULL;
-  if ((msg = shouldInline(callee_method, caller_method, caller_bci,
-                          profile, wci_result)) != NULL) {
+  msg = should_inline(callee_method, caller_method, caller_bci, profile, wci_result);
+  if (msg != NULL)
     return msg;
-  }
-  if ((msg = shouldNotInline(callee_method, caller_method,
-                             wci_result)) != NULL) {
+
+  msg = should_not_inline(callee_method, caller_method, wci_result);
+  if (msg != NULL)
     return msg;
-  }
 
   if (InlineAccessors && callee_method->is_accessor()) {
     // accessor methods are not subject to any of the following limits.
@@ -492,9 +508,8 @@
       new_depth_adjust -= 1;  // don't count method handle calls from java.lang.invoke implem
     }
     if (new_depth_adjust != 0 && PrintInlining) {
-      stringStream nm1; caller_jvms->method()->print_name(&nm1);
-      stringStream nm2; callee_method->print_name(&nm2);
-      tty->print_cr("discounting inlining depth from %s to %s", nm1.base(), nm2.base());
+      CompileTask::print_inline_indent(inline_depth());
+      tty->print_cr(" \\-> discounting inline depth");
     }
     if (new_depth_adjust != 0 && C->log()) {
       int id1 = C->log()->identify(caller_jvms->method());
diff -r 03b943e6c025 -r 8bec9b249a6e src/share/vm/opto/doCall.cpp
--- a/src/share/vm/opto/doCall.cpp	Sun May 15 23:57:15 2011 -0400
+++ b/src/share/vm/opto/doCall.cpp	Tue May 17 09:29:56 2011 -0400
@@ -62,7 +62,10 @@
 CallGenerator* Compile::call_generator(ciMethod* call_method, int vtable_index, bool call_is_virtual,
                                        JVMState* jvms, bool allow_inline,
                                        float prof_factor) {
-  CallGenerator* cg;
+  CallGenerator*  cg;
+  ciMethod*       caller   = jvms->method();
+  int             bci      = jvms->bci();
+  Bytecodes::Code bytecode = caller->java_code_at_bci(bci);
   guarantee(call_method != NULL, "failed method resolution");
 
   // Dtrace currently doesn't work unless all calls are vanilla
@@ -73,7 +76,7 @@
   // Note: When we get profiling during stage-1 compiles, we want to pull
   // from more specific profile data which pertains to this inlining.
   // Right now, ignore the information in jvms->caller(), and do method[bci].
-  ciCallProfile profile = jvms->method()->call_profile_at_bci(jvms->bci());
+  ciCallProfile profile = caller->call_profile_at_bci(bci);
 
   // See how many times this site has been invoked.
   int site_count = profile.count();
@@ -116,7 +119,7 @@
   // MethodHandle.invoke* are native methods which obviously don't
   // have bytecodes and so normal inlining fails.
   if (call_method->is_method_handle_invoke()) {
-    if (jvms->method()->java_code_at_bci(jvms->bci()) != Bytecodes::_invokedynamic) {
+    if (bytecode != Bytecodes::_invokedynamic) {
       GraphKit kit(jvms);
       Node* n = kit.argument(0);
 
@@ -125,17 +128,19 @@
         ciObject* const_oop = oop_ptr->const_oop();
         ciMethodHandle* method_handle = const_oop->as_method_handle();
 
-        // Set the actually called method to have access to the class
-        // and signature in the MethodHandleCompiler.
+        // Set the callee to have access to the class and signature in
+        // the MethodHandleCompiler.
         method_handle->set_callee(call_method);
+        method_handle->set_caller(caller);
+        method_handle->set_call_profile(&profile);
 
         // Get an adapter for the MethodHandle.
         ciMethod* target_method = method_handle->get_method_handle_adapter();
-        CallGenerator* hit_cg = NULL;
-        if (target_method != NULL)
-          hit_cg = this->call_generator(target_method, vtable_index, false, jvms, true, prof_factor);
-        if (hit_cg != NULL && hit_cg->is_inline())
-          return hit_cg;
+        if (target_method != NULL) {
+          CallGenerator* hit_cg = this->call_generator(target_method, vtable_index, false, jvms, true, prof_factor);
+          if (hit_cg != NULL && hit_cg->is_inline())
+            return hit_cg;
+        }
       }
 
       return CallGenerator::for_direct_call(call_method);
@@ -148,18 +153,20 @@
       ciCallSite*     call_site     = str.get_call_site();
       ciMethodHandle* method_handle = call_site->get_target();
 
-      // Set the actually called method to have access to the class
-      // and signature in the MethodHandleCompiler.
+      // Set the callee to have access to the class and signature in
+      // the MethodHandleCompiler.
       method_handle->set_callee(call_method);
+      method_handle->set_caller(caller);
+      method_handle->set_call_profile(&profile);
 
       // Get an adapter for the MethodHandle.
       ciMethod* target_method = method_handle->get_invokedynamic_adapter();
-      CallGenerator* hit_cg = NULL;
-      if (target_method != NULL)
-        hit_cg = this->call_generator(target_method, vtable_index, false, jvms, true, prof_factor);
-      if (hit_cg != NULL && hit_cg->is_inline()) {
-        CallGenerator* miss_cg = CallGenerator::for_dynamic_call(call_method);
-        return CallGenerator::for_predicted_dynamic_call(method_handle, miss_cg, hit_cg, prof_factor);
+      if (target_method != NULL) {
+        CallGenerator* hit_cg = this->call_generator(target_method, vtable_index, false, jvms, true, prof_factor);
+        if (hit_cg != NULL && hit_cg->is_inline()) {
+          CallGenerator* miss_cg = CallGenerator::for_dynamic_call(call_method);
+          return CallGenerator::for_predicted_dynamic_call(method_handle, miss_cg, hit_cg, prof_factor);
+        }
       }
 
       // If something failed, generate a normal dynamic call.
diff -r 03b943e6c025 -r 8bec9b249a6e src/share/vm/opto/loopTransform.cpp
--- a/src/share/vm/opto/loopTransform.cpp	Sun May 15 23:57:15 2011 -0400
+++ b/src/share/vm/opto/loopTransform.cpp	Tue May 17 09:29:56 2011 -0400
@@ -1230,7 +1230,7 @@
         set_ctrl(new_limit, C->root());
       } else {
         // Limit is not constant.
-        {
+        if (loop_head->unrolled_count() == 1) { // only for first unroll
           // Separate limit by Opaque node in case it is an incremented
           // variable from previous loop to avoid using pre-incremented
           // value which could increase register pressure.
diff -r 03b943e6c025 -r 8bec9b249a6e src/share/vm/opto/parse.hpp
--- a/src/share/vm/opto/parse.hpp	Sun May 15 23:57:15 2011 -0400
+++ b/src/share/vm/opto/parse.hpp	Tue May 17 09:29:56 2011 -0400
@@ -68,8 +68,8 @@
                                            JVMState* caller_jvms,
                                            int caller_bci);
   const char* try_to_inline(ciMethod* callee_method, ciMethod* caller_method, int caller_bci, ciCallProfile& profile, WarmCallInfo* wci_result);
-  const char* shouldInline(ciMethod* callee_method, ciMethod* caller_method, int caller_bci, ciCallProfile& profile, WarmCallInfo* wci_result) const;
-  const char* shouldNotInline(ciMethod* callee_method, ciMethod* caller_method, WarmCallInfo* wci_result) const;
+  const char* should_inline(ciMethod* callee_method, ciMethod* caller_method, int caller_bci, ciCallProfile& profile, WarmCallInfo* wci_result) const;
+  const char* should_not_inline(ciMethod* callee_method, ciMethod* caller_method, WarmCallInfo* wci_result) const;
   void        print_inlining(ciMethod *callee_method, int caller_bci, const char *failure_msg) const;
 
   InlineTree *caller_tree()       const { return _caller_tree;  }
diff -r 03b943e6c025 -r 8bec9b249a6e src/share/vm/prims/jvmtiTagMap.cpp
--- a/src/share/vm/prims/jvmtiTagMap.cpp	Sun May 15 23:57:15 2011 -0400
+++ b/src/share/vm/prims/jvmtiTagMap.cpp	Tue May 17 09:29:56 2011 -0400
@@ -3158,6 +3158,9 @@
         if (fr->is_entry_frame()) {
           last_entry_frame = fr;
         }
+        if (fr->is_ricochet_frame()) {
+          fr->oops_ricochet_do(blk, vf->register_map());
+        }
       }
 
       vf = vf->sender();
diff -r 03b943e6c025 -r 8bec9b249a6e src/share/vm/prims/methodHandleWalk.cpp
--- a/src/share/vm/prims/methodHandleWalk.cpp	Sun May 15 23:57:15 2011 -0400
+++ b/src/share/vm/prims/methodHandleWalk.cpp	Tue May 17 09:29:56 2011 -0400
@@ -31,6 +31,11 @@
  * JSR 292 reference implementation: method handle structure analysis
  */
 
+#ifdef PRODUCT
+#define print_method_handle(mh) {}
+#else //PRODUCT
+extern "C" void print_method_handle(oop mh);
+#endif //PRODUCT
 
 // -----------------------------------------------------------------------------
 // MethodHandleChain
@@ -206,8 +211,10 @@
         lose("bad argument index", CHECK_(empty));
       }
 
+      bool retain_original_args = false;  // used by fold/collect logic
+
       // perform the adapter action
-      switch (chain().adapter_conversion_op()) {
+      switch (conv_op) {
       case java_lang_invoke_AdapterMethodHandle::OP_RETYPE_ONLY:
         // No changes to arguments; pass the bits through.
         break;
@@ -216,51 +223,36 @@
         // To keep the verifier happy, emit bitwise ("raw") conversions as needed.
         // See MethodHandles::same_basic_type_for_arguments for allowed conversions.
         Handle incoming_mtype(THREAD, chain().method_type_oop());
-        oop outgoing_mh_oop = chain().vmtarget_oop();
-        if (!java_lang_invoke_MethodHandle::is_instance(outgoing_mh_oop))
-          lose("outgoing target not a MethodHandle", CHECK_(empty));
-        Handle outgoing_mtype(THREAD, java_lang_invoke_MethodHandle::type(outgoing_mh_oop));
-        outgoing_mh_oop = NULL;  // GC safety
+        Handle outgoing_mtype;
+        {
+          oop outgoing_mh_oop = chain().vmtarget_oop();
+          if (!java_lang_invoke_MethodHandle::is_instance(outgoing_mh_oop))
+            lose("outgoing target not a MethodHandle", CHECK_(empty));
+          outgoing_mtype = Handle(THREAD, java_lang_invoke_MethodHandle::type(outgoing_mh_oop));
+        }
 
         int nptypes = java_lang_invoke_MethodType::ptype_count(outgoing_mtype());
         if (nptypes != java_lang_invoke_MethodType::ptype_count(incoming_mtype()))
           lose("incoming and outgoing parameter count do not agree", CHECK_(empty));
 
+        // Argument types.
         for (int i = 0, slot = _outgoing.length() - 1; slot >= 0; slot--) {
           SlotState* arg_state = slot_state(slot);
           if (arg_state->_type == T_VOID)  continue;
-          ArgToken arg = _outgoing.at(slot)._arg;
 
-          klassOop  in_klass  = NULL;
-          klassOop  out_klass = NULL;
-          BasicType inpbt  = java_lang_Class::as_BasicType(java_lang_invoke_MethodType::ptype(incoming_mtype(), i), &in_klass);
-          BasicType outpbt = java_lang_Class::as_BasicType(java_lang_invoke_MethodType::ptype(outgoing_mtype(), i), &out_klass);
-          assert(inpbt == arg.basic_type(), "sanity");
-
-          if (inpbt != outpbt) {
-            vmIntrinsics::ID iid = vmIntrinsics::for_raw_conversion(inpbt, outpbt);
-            if (iid == vmIntrinsics::_none) {
-              lose("no raw conversion method", CHECK_(empty));
-            }
-            ArgToken arglist[2];
-            arglist[0] = arg;         // outgoing 'this'
-            arglist[1] = ArgToken();  // sentinel
-            arg = make_invoke(NULL, iid, Bytecodes::_invokestatic, false, 1, &arglist[0], CHECK_(empty));
-            change_argument(inpbt, slot, outpbt, arg);
-          }
-
+          klassOop  src_klass = NULL;
+          klassOop  dst_klass = NULL;
+          BasicType src = java_lang_Class::as_BasicType(java_lang_invoke_MethodType::ptype(incoming_mtype(), i), &src_klass);
+          BasicType dst = java_lang_Class::as_BasicType(java_lang_invoke_MethodType::ptype(outgoing_mtype(), i), &dst_klass);
+          retype_raw_argument_type(src, dst, slot, CHECK_(empty));
           i++;  // We need to skip void slots at the top of the loop.
         }
 
-        BasicType inrbt  = java_lang_Class::as_BasicType(java_lang_invoke_MethodType::rtype(incoming_mtype()));
-        BasicType outrbt = java_lang_Class::as_BasicType(java_lang_invoke_MethodType::rtype(outgoing_mtype()));
-        if (inrbt != outrbt) {
-          if (inrbt == T_INT && outrbt == T_VOID) {
-            // See comments in MethodHandles::same_basic_type_for_arguments.
-          } else {
-            assert(false, "IMPLEMENT ME");
-            lose("no raw conversion method", CHECK_(empty));
-          }
+        // Return type.
+        {
+          BasicType src = java_lang_Class::as_BasicType(java_lang_invoke_MethodType::rtype(incoming_mtype()));
+          BasicType dst = java_lang_Class::as_BasicType(java_lang_invoke_MethodType::rtype(outgoing_mtype()));
+          retype_raw_return_type(src, dst, CHECK_(empty));
         }
         break;
       }
@@ -273,7 +265,7 @@
         assert(dest == arg_state->_type, "");
         ArgToken arg = arg_state->_arg;
         ArgToken new_arg = make_conversion(T_OBJECT, dest_klass, Bytecodes::_checkcast, arg, CHECK_(empty));
-        assert(arg.index() == new_arg.index(), "should be the same index");
+        assert(arg.token_type() >= tt_symbolic || arg.index() == new_arg.index(), "should be the same index");
         debug_only(dest_klass = (klassOop)badOop);
         break;
       }
@@ -332,7 +324,7 @@
         ArgToken arglist[2];
         arglist[0] = arg;         // outgoing value
         arglist[1] = ArgToken();  // sentinel
-        arg = make_invoke(NULL, boxer, Bytecodes::_invokevirtual, false, 1, &arglist[0], CHECK_(empty));
+        arg = make_invoke(NULL, boxer, Bytecodes::_invokestatic, false, 1, &arglist[0], CHECK_(empty));
         change_argument(src, arg_slot, T_OBJECT, arg);
         break;
       }
@@ -404,8 +396,54 @@
         break;
       }
 
-      case java_lang_invoke_AdapterMethodHandle::OP_COLLECT_ARGS: { //NYI, may GC
-        lose("unimplemented", CHECK_(empty));
+      case java_lang_invoke_AdapterMethodHandle::OP_FOLD_ARGS:
+        retain_original_args = true;   // and fall through:
+      case java_lang_invoke_AdapterMethodHandle::OP_COLLECT_ARGS: {
+        // call argument MH recursively
+        //{static int x; if (!x++) print_method_handle(chain().method_handle_oop()); --x;}
+        Handle recursive_mh(THREAD, chain().adapter_arg_oop());
+        if (!java_lang_invoke_MethodHandle::is_instance(recursive_mh())) {
+          lose("recursive target not a MethodHandle", CHECK_(empty));
+        }
+        Handle recursive_mtype(THREAD, java_lang_invoke_MethodHandle::type(recursive_mh()));
+        int argc = java_lang_invoke_MethodType::ptype_count(recursive_mtype());
+        int coll_slots = java_lang_invoke_MethodHandle::vmslots(recursive_mh());
+        BasicType rtype = java_lang_Class::as_BasicType(java_lang_invoke_MethodType::rtype(recursive_mtype()));
+        ArgToken* arglist = NEW_RESOURCE_ARRAY(ArgToken, 1 + argc + 1);  // 1+: mh, +1: sentinel
+        arglist[0] = make_oop_constant(recursive_mh(), CHECK_(empty));
+        if (arg_slot < 0 || coll_slots < 0 || arg_slot + coll_slots > _outgoing.length()) {
+          lose("bad fold/collect arg slot", CHECK_(empty));
+        }
+        for (int i = 0, slot = arg_slot + coll_slots - 1; slot >= arg_slot; slot--) {
+          SlotState* arg_state = slot_state(slot);
+          BasicType  arg_type  = arg_state->_type;
+          if (arg_type == T_VOID)  continue;
+          ArgToken arg = _outgoing.at(slot)._arg;
+          if (i >= argc) { lose("bad fold/collect arg", CHECK_(empty)); }
+          arglist[1+i] = arg;
+          if (!retain_original_args)
+            change_argument(arg_type, slot, T_VOID, ArgToken(tt_void));
+        }
+        arglist[1+argc] = ArgToken();  // sentinel
+        oop invoker = java_lang_invoke_MethodTypeForm::vmlayout(
+                          java_lang_invoke_MethodType::form(recursive_mtype()) );
+        if (invoker == NULL || !invoker->is_method()) {
+          lose("bad vmlayout slot", CHECK_(empty));
+        }
+        // FIXME: consider inlining the invokee at the bytecode level
+        ArgToken ret = make_invoke(methodOop(invoker), vmIntrinsics::_none,
+                                   Bytecodes::_invokevirtual, false, 1+argc, &arglist[0], CHECK_(empty));
+        DEBUG_ONLY(invoker = NULL);
+        if (rtype == T_OBJECT) {
+          klassOop rklass = java_lang_Class::as_klassOop( java_lang_invoke_MethodType::rtype(recursive_mtype()) );
+          if (rklass != SystemDictionary::Object_klass() &&
+              !Klass::cast(rklass)->is_interface()) {
+            // preserve type safety
+            ret = make_conversion(T_OBJECT, rklass, Bytecodes::_checkcast, ret, CHECK_(empty));
+          }
+        }
+        int ret_slot = arg_slot + (retain_original_args ? coll_slots : 0);
+        change_argument(T_VOID, ret_slot, rtype, ret);
         break;
       }
 
@@ -452,9 +490,18 @@
                     Bytecodes::_invokestatic, false, 3, &arglist[0], CHECK_(empty));
 
         // Spread out the array elements.
-        Bytecodes::Code aload_op = Bytecodes::_aaload;
-        if (element_type != T_OBJECT) {
-          lose("primitive array NYI", CHECK_(empty));
+        Bytecodes::Code aload_op = Bytecodes::_nop;
+        switch (element_type) {
+        case T_INT:       aload_op = Bytecodes::_iaload; break;
+        case T_LONG:      aload_op = Bytecodes::_laload; break;
+        case T_FLOAT:     aload_op = Bytecodes::_faload; break;
+        case T_DOUBLE:    aload_op = Bytecodes::_daload; break;
+        case T_OBJECT:    aload_op = Bytecodes::_aaload; break;
+        case T_BOOLEAN:   // fall through:
+        case T_BYTE:      aload_op = Bytecodes::_baload; break;
+        case T_CHAR:      aload_op = Bytecodes::_caload; break;
+        case T_SHORT:     aload_op = Bytecodes::_saload; break;
+        default:          lose("primitive array NYI", CHECK_(empty));
         }
         int ap = arg_slot;
         for (int i = 0; i < spread_length; i++) {
@@ -467,11 +514,6 @@
         break;
       }
 
-      case java_lang_invoke_AdapterMethodHandle::OP_FLYBY: //NYI, runs Java code
-      case java_lang_invoke_AdapterMethodHandle::OP_RICOCHET: //NYI, runs Java code
-        lose("unimplemented", CHECK_(empty));
-        break;
-
       default:
         lose("bad adapter conversion", CHECK_(empty));
         break;
@@ -495,7 +537,7 @@
           lose("bad bound value", CHECK_(empty));
         }
       }
-      debug_only(arg_oop = badOop);
+      DEBUG_ONLY(arg_oop = badOop);
       change_argument(T_VOID, arg_slot, arg_type, arg);
     }
 
@@ -538,11 +580,10 @@
   }
   for (int i = 0; i < nptypes; i++) {
     klassOop  arg_type_klass = NULL;
-    BasicType arg_type = java_lang_Class::as_BasicType(
-                java_lang_invoke_MethodType::ptype(mtype(), i), &arg_type_klass);
+    BasicType arg_type = java_lang_Class::as_BasicType(java_lang_invoke_MethodType::ptype(mtype(), i), &arg_type_klass);
     int index = new_local_index(arg_type);
     ArgToken arg = make_parameter(arg_type, arg_type_klass, index, CHECK);
-    debug_only(arg_type_klass = (klassOop) NULL);
+    DEBUG_ONLY(arg_type_klass = (klassOop) NULL);
     _outgoing.at_put(argp, make_state(arg_type, arg));
     if (type2size[arg_type] == 2) {
       // add the extra slot, so we can model the JVM stack
@@ -552,8 +593,7 @@
   }
   // call make_parameter at the end of the list for the return type
   klassOop  ret_type_klass = NULL;
-  BasicType ret_type = java_lang_Class::as_BasicType(
-              java_lang_invoke_MethodType::rtype(mtype()), &ret_type_klass);
+  BasicType ret_type = java_lang_Class::as_BasicType(java_lang_invoke_MethodType::rtype(mtype()), &ret_type_klass);
   ArgToken  ret = make_parameter(ret_type, ret_type_klass, -1, CHECK);
   // ignore ret; client can catch it if needed
 }
@@ -605,11 +645,54 @@
 
 
 // -----------------------------------------------------------------------------
+// MethodHandleWalker::retype_raw_conversion
+//
+// Do the raw retype conversions for OP_RETYPE_RAW.
+void MethodHandleWalker::retype_raw_conversion(BasicType src, BasicType dst, bool for_return, int slot, TRAPS) {
+  if (src != dst) {
+    if (MethodHandles::same_basic_type_for_returns(src, dst, /*raw*/ true)) {
+      if (MethodHandles::is_float_fixed_reinterpretation_cast(src, dst)) {
+        if (for_return)  Untested("MHW return raw conversion");  // still untested
+        vmIntrinsics::ID iid = vmIntrinsics::for_raw_conversion(src, dst);
+        if (iid == vmIntrinsics::_none) {
+          lose("no raw conversion method", CHECK);
+        }
+        ArgToken arglist[2];
+        if (!for_return) {
+          // argument type conversion
+          ArgToken arg = _outgoing.at(slot)._arg;
+          assert(arg.token_type() >= tt_symbolic || src == arg.basic_type(), "sanity");
+          arglist[0] = arg;         // outgoing 'this'
+          arglist[1] = ArgToken();  // sentinel
+          arg = make_invoke(NULL, iid, Bytecodes::_invokestatic, false, 1, &arglist[0], CHECK);
+          change_argument(src, slot, dst, arg);
+        } else {
+          // return type conversion
+          klassOop arg_klass = NULL;
+          arglist[0] = make_parameter(src, arg_klass, -1, CHECK);  // return value
+          arglist[1] = ArgToken();                                 // sentinel
+          (void) make_invoke(NULL, iid, Bytecodes::_invokestatic, false, 1, &arglist[0], CHECK);
+        }
+      } else {
+        // Nothing to do.
+      }
+    } else if (src == T_OBJECT && is_java_primitive(dst)) {
+      // ref-to-prim: discard ref, push zero
+      lose("requested ref-to-prim conversion not expected", CHECK);
+    } else {
+      lose("requested raw conversion not allowed", CHECK);
+    }
+  }
+}
+
+
+// -----------------------------------------------------------------------------
 // MethodHandleCompiler
 
-MethodHandleCompiler::MethodHandleCompiler(Handle root, methodHandle callee, bool is_invokedynamic, TRAPS)
+MethodHandleCompiler::MethodHandleCompiler(Handle root, methodHandle callee, int invoke_count, bool is_invokedynamic, TRAPS)
   : MethodHandleWalker(root, is_invokedynamic, THREAD),
     _callee(callee),
+    _invoke_count(invoke_count),
     _thread(THREAD),
     _bytecode(THREAD, 50),
     _constants(THREAD, 10),
@@ -709,6 +792,7 @@
   case Bytecodes::_astore_1:
   case Bytecodes::_astore_2:
   case Bytecodes::_astore_3:
+  case Bytecodes::_iand:
   case Bytecodes::_i2l:
   case Bytecodes::_i2f:
   case Bytecodes::_i2d:
@@ -935,7 +1019,11 @@
     break;
 
   default:
-    ShouldNotReachHere();
+    if (op == Bytecodes::_illegal)
+      lose("no such primitive conversion", THREAD);
+    else
+      lose("bad primitive conversion op", THREAD);
+    return make_prim_constant(type, &zero_jvalue, THREAD);
   }
 
   return make_parameter(type, tk, index, THREAD);
@@ -946,7 +1034,9 @@
 // MethodHandleCompiler
 //
 
-static jvalue zero_jvalue;
+// Values used by the compiler.
+jvalue MethodHandleCompiler::zero_jvalue = { 0 };
+jvalue MethodHandleCompiler::one_jvalue  = { 1 };
 
 // Emit bytecodes for the given invoke instruction.
 MethodHandleWalker::ArgToken
@@ -954,18 +1044,18 @@
                                   Bytecodes::Code op, bool tailcall,
                                   int argc, MethodHandleWalker::ArgToken* argv,
                                   TRAPS) {
+  ArgToken zero;
   if (m == NULL) {
     // Get the intrinsic methodOop.
     m = vmIntrinsics::method_for(iid);
     if (m == NULL) {
-      ArgToken zero;
       lose(vmIntrinsics::name_at(iid), CHECK_(zero));
     }
   }
 
-  klassOop  klass   = m->method_holder();
-  Symbol* name      = m->name();
-  Symbol* signature = m->signature();
+  klassOop klass     = m->method_holder();
+  Symbol*  name      = m->name();
+  Symbol*  signature = m->signature();
 
   if (tailcall) {
     // Actually, in order to make these methods more recognizable,
@@ -1031,7 +1121,6 @@
     if (rbt != _rtype) {
       if (rbt == T_VOID) {
         // push a zero of the right sort
-        ArgToken zero;
         if (_rtype == T_OBJECT) {
           zero = make_oop_constant(NULL, CHECK_(zero));
         } else {
@@ -1041,9 +1130,27 @@
       } else if (_rtype == T_VOID) {
         // We'll emit a _return with something on the stack.
         // It's OK to ignore what's on the stack.
+      } else if (rbt == T_INT && is_subword_type(_rtype)) {
+        // Convert value to match return type.
+        switch (_rtype) {
+        case T_BOOLEAN: {
+          // boolean is treated as a one-bit unsigned integer.
+          // Cf. API documentation: java/lang/invoke/MethodHandles.html#explicitCastArguments
+          ArgToken one = make_prim_constant(T_INT, &one_jvalue, CHECK_(zero));
+          emit_load_constant(one);
+          emit_bc(Bytecodes::_iand);
+          break;
+        }
+        case T_BYTE:    emit_bc(Bytecodes::_i2b); break;
+        case T_CHAR:    emit_bc(Bytecodes::_i2c); break;
+        case T_SHORT:   emit_bc(Bytecodes::_i2s); break;
+        default: ShouldNotReachHere();
+        }
+      } else if (is_subword_type(rbt) && (is_subword_type(_rtype) || (_rtype == T_INT))) {
+        // The subword type was returned as an int and will be passed
+        // on as an int.
       } else {
-        tty->print_cr("*** rbt=%d != rtype=%d", rbt, _rtype);
-        assert(false, "IMPLEMENT ME");
+        lose("unknown conversion", CHECK_(zero));
       }
     }
     switch (_rtype) {
@@ -1173,7 +1280,7 @@
 
 
 methodHandle MethodHandleCompiler::get_method_oop(TRAPS) const {
-  methodHandle nullHandle;
+  methodHandle empty;
   // Create a method that holds the generated bytecode.  invokedynamic
   // has no receiver, normal MH calls do.
   int flags_bits;
@@ -1182,13 +1289,16 @@
   else
     flags_bits = (/*JVM_MH_INVOKE_BITS |*/ JVM_ACC_PUBLIC | JVM_ACC_FINAL | JVM_ACC_SYNTHETIC);
 
-  methodOop m_oop = oopFactory::new_method(bytecode_length(),
-                                           accessFlags_from(flags_bits),
-                                           0, 0, 0, oopDesc::IsSafeConc, CHECK_(nullHandle));
-  methodHandle m(THREAD, m_oop);
-  m_oop = NULL;  // oop not GC safe
+  // Create a new method
+  methodHandle m;
+  {
+    methodOop m_oop = oopFactory::new_method(bytecode_length(),
+                                             accessFlags_from(flags_bits),
+                                             0, 0, 0, oopDesc::IsSafeConc, CHECK_(empty));
+    m = methodHandle(THREAD, m_oop);
+  }
 
-  constantPoolHandle cpool = get_constant_pool(CHECK_(nullHandle));
+  constantPoolHandle cpool = get_constant_pool(CHECK_(empty));
   m->set_constants(cpool());
 
   m->set_name_index(_name_index);
@@ -1203,16 +1313,34 @@
   typeArrayHandle exception_handlers(THREAD, Universe::the_empty_int_array());
   m->set_exception_table(exception_handlers());
 
-  // Set the carry bit of the invocation counter to force inlining of
-  // the adapter.
-  InvocationCounter* ic = m->invocation_counter();
-  ic->set_carry_flag();
-
   // Rewrite the method and set up the constant pool cache.
-  objArrayOop m_array = oopFactory::new_system_objArray(1, CHECK_(nullHandle));
+  objArrayOop m_array = oopFactory::new_system_objArray(1, CHECK_(empty));
   objArrayHandle methods(THREAD, m_array);
   methods->obj_at_put(0, m());
-  Rewriter::rewrite(_target_klass(), cpool, methods, CHECK_(nullHandle));  // Use fake class.
+  Rewriter::rewrite(_target_klass(), cpool, methods, CHECK_(empty));  // Use fake class.
+
+  // Set the invocation counter's count to the invoke count of the
+  // original call site.
+  InvocationCounter* ic = m->invocation_counter();
+  ic->set(InvocationCounter::wait_for_compile, _invoke_count);
+
+  // Create a new MDO
+  {
+    methodDataOop mdo = oopFactory::new_methodData(m, CHECK_(empty));
+    assert(m->method_data() == NULL, "there should not be an MDO yet");
+    m->set_method_data(mdo);
+
+    // Iterate over all profile data and set the count of the counter
+    // data entries to the original call site counter.
+    for (ProfileData* profile_data = mdo->first_data();
+         mdo->is_valid(profile_data);
+         profile_data = mdo->next_data(profile_data)) {
+      if (profile_data->is_CounterData()) {
+        CounterData* counter_data = profile_data->as_CounterData();
+        counter_data->set_count(_invoke_count);
+      }
+    }
+  }
 
 #ifndef PRODUCT
   if (TraceMethodHandles) {
@@ -1228,7 +1356,6 @@
 
 #ifndef PRODUCT
 
-#if 0
 // MH printer for debugging.
 
 class MethodHandlePrinter : public MethodHandleWalker {
@@ -1236,6 +1363,7 @@
   outputStream* _out;
   bool          _verbose;
   int           _temp_num;
+  int           _param_state;
   stringStream  _strbuf;
   const char* strbuf() {
     const char* s = _strbuf.as_string();
@@ -1243,14 +1371,21 @@
     return s;
   }
   ArgToken token(const char* str) {
-    return (ArgToken) str;
+    jvalue string_con;
+    string_con.j = (intptr_t) str;
+    return ArgToken(tt_symbolic, T_LONG, string_con);
+  }
+  const char* string(ArgToken token) {
+    return (const char*) (intptr_t) token.get_jlong();
   }
   void start_params() {
+    _param_state <<= 1;
     _out->print("(");
   }
   void end_params() {
     if (_verbose)  _out->print("\n");
     _out->print(") => {");
+    _param_state >>= 1;
   }
   void put_type_name(BasicType type, klassOop tk, outputStream* s) {
     const char* kname = NULL;
@@ -1270,9 +1405,10 @@
 
 public:
   MethodHandlePrinter(Handle root, bool verbose, outputStream* out, TRAPS)
-    : MethodHandleWalker(root, THREAD),
+    : MethodHandleWalker(root, false, THREAD),
       _out(out),
       _verbose(verbose),
+      _param_state(0),
       _temp_num(0)
   {
     start_params();
@@ -1280,9 +1416,10 @@
   virtual ArgToken make_parameter(BasicType type, klassOop tk, int argnum, TRAPS) {
     if (argnum < 0) {
       end_params();
-      return NULL;
+      return token("return");
     }
-    if (argnum == 0) {
+    if ((_param_state & 1) == 0) {
+      _param_state |= 1;
       _out->print(_verbose ? "\n  " : "");
     } else {
       _out->print(_verbose ? ",\n  " : ", ");
@@ -1312,8 +1449,15 @@
     java_lang_boxing_object::print(type, con, &_strbuf);
     return maybe_make_temp("constant", type, "k");
   }
-  virtual ArgToken make_conversion(BasicType type, klassOop tk, Bytecodes::Code op, ArgToken src, TRAPS) {
-    _strbuf.print("%s(%s", Bytecodes::name(op), (const char*)src);
+  void print_bytecode_name(Bytecodes::Code op) {
+    if (Bytecodes::is_defined(op))
+      _strbuf.print("%s", Bytecodes::name(op));
+    else
+      _strbuf.print("bytecode_%d", (int) op);
+  }
+  virtual ArgToken make_conversion(BasicType type, klassOop tk, Bytecodes::Code op, const ArgToken& src, TRAPS) {
+    print_bytecode_name(op);
+    _strbuf.print("(%s", string(src));
     if (tk != NULL) {
       _strbuf.print(", ");
       put_type_name(type, tk, &_strbuf);
@@ -1321,8 +1465,8 @@
     _strbuf.print(")");
     return maybe_make_temp("convert", type, "v");
   }
-  virtual ArgToken make_fetch(BasicType type, klassOop tk, Bytecodes::Code op, ArgToken base, ArgToken offset, TRAPS) {
-    _strbuf.print("%s(%s, %s", Bytecodes::name(op), (const char*)base, (const char*)offset);
+  virtual ArgToken make_fetch(BasicType type, klassOop tk, Bytecodes::Code op, const ArgToken& base, const ArgToken& offset, TRAPS) {
+    _strbuf.print("%s(%s, %s", Bytecodes::name(op), string(base), string(offset));
     if (tk != NULL) {
       _strbuf.print(", ");
       put_type_name(type, tk, &_strbuf);
@@ -1333,7 +1477,8 @@
   virtual ArgToken make_invoke(methodOop m, vmIntrinsics::ID iid,
                                Bytecodes::Code op, bool tailcall,
                                int argc, ArgToken* argv, TRAPS) {
-    Symbol* name, sig;
+    Symbol* name;
+    Symbol* sig;
     if (m != NULL) {
       name = m->name();
       sig  = m->signature();
@@ -1343,7 +1488,7 @@
     }
     _strbuf.print("%s %s%s(", Bytecodes::name(op), name->as_C_string(), sig->as_C_string());
     for (int i = 0; i < argc; i++) {
-      _strbuf.print("%s%s", (i > 0 ? ", " : ""), (const char*)argv[i]);
+      _strbuf.print("%s%s", (i > 0 ? ", " : ""), string(argv[i]));
     }
     _strbuf.print(")");
     if (!tailcall) {
@@ -1381,24 +1526,20 @@
     if (HAS_PENDING_EXCEPTION) {
       oop ex = PENDING_EXCEPTION;
       CLEAR_PENDING_EXCEPTION;
-      out->print("\n*** ");
-      if (ex != Universe::virtual_machine_error_instance())
-        ex->print_on(out);
-      else
-        out->print("lose: %s", printer.lose_message());
-      out->print("\n}\n");
+      out->print(" *** ");
+      if (printer.lose_message() != NULL)  out->print("%s ", printer.lose_message());
+      out->print("}");
     }
     out->print("\n");
   }
 };
-#endif // 0
 
 extern "C"
 void print_method_handle(oop mh) {
   if (!mh->is_oop()) {
-    tty->print_cr("*** not a method handle: "INTPTR_FORMAT, (intptr_t)mh);
+    tty->print_cr("*** not a method handle: "PTR_FORMAT, (intptr_t)mh);
   } else if (java_lang_invoke_MethodHandle::is_instance(mh)) {
-    //MethodHandlePrinter::print(mh);
+    MethodHandlePrinter::print(mh);
   } else {
     tty->print("*** not a method handle: ");
     mh->print();
diff -r 03b943e6c025 -r 8bec9b249a6e src/share/vm/prims/methodHandleWalk.hpp
--- a/src/share/vm/prims/methodHandleWalk.hpp	Sun May 15 23:57:15 2011 -0400
+++ b/src/share/vm/prims/methodHandleWalk.hpp	Tue May 17 09:29:56 2011 -0400
@@ -113,6 +113,7 @@
     tt_parameter,
     tt_temporary,
     tt_constant,
+    tt_symbolic,
     tt_illegal
   };
 
@@ -164,6 +165,10 @@
   bool              _for_invokedynamic;
   int               _local_index;
 
+  // This array is kept in an unusual order, indexed by low-level "slot number".
+  // TOS is always _outgoing.at(0), so simple pushes and pops shift the whole _outgoing array.
+  // If there is a receiver in the current argument list, it is at _outgoing.at(_outgoing.length()-1).
+  // If a value at _outgoing.at(n) is T_LONG or T_DOUBLE, the value at _outgoing.at(n+1) is T_VOID.
   GrowableArray<SlotState> _outgoing;       // current outgoing parameter slots
   int                      _outgoing_argc;  // # non-empty outgoing slots
 
@@ -173,6 +178,11 @@
   // Insert or delete a second empty slot as needed.
   void change_argument(BasicType old_type, int slot, BasicType new_type, const ArgToken& new_arg);
 
+  // Raw retype conversions for OP_RAW_RETYPE.
+  void retype_raw_conversion(BasicType src, BasicType dst, bool for_return, int slot, TRAPS);
+  void retype_raw_argument_type(BasicType src, BasicType dst, int slot, TRAPS) { retype_raw_conversion(src, dst, false, slot, CHECK); }
+  void retype_raw_return_type(  BasicType src, BasicType dst,           TRAPS) { retype_raw_conversion(src, dst, true,  -1,   CHECK); }
+
   SlotState* slot_state(int slot) {
     if (slot < 0 || slot >= _outgoing.length())
       return NULL;
@@ -221,12 +231,12 @@
   int max_locals() const { return _local_index; }
 
   // plug-in abstract interpretation steps:
-  virtual ArgToken make_parameter( BasicType type, klassOop tk, int argnum, TRAPS ) = 0;
-  virtual ArgToken make_prim_constant( BasicType type, jvalue* con, TRAPS ) = 0;
-  virtual ArgToken make_oop_constant( oop con, TRAPS ) = 0;
-  virtual ArgToken make_conversion( BasicType type, klassOop tk, Bytecodes::Code op, const ArgToken& src, TRAPS ) = 0;
-  virtual ArgToken make_fetch( BasicType type, klassOop tk, Bytecodes::Code op, const ArgToken& base, const ArgToken& offset, TRAPS ) = 0;
-  virtual ArgToken make_invoke( methodOop m, vmIntrinsics::ID iid, Bytecodes::Code op, bool tailcall, int argc, ArgToken* argv, TRAPS ) = 0;
+  virtual ArgToken make_parameter(BasicType type, klassOop tk, int argnum, TRAPS) = 0;
+  virtual ArgToken make_prim_constant(BasicType type, jvalue* con, TRAPS) = 0;
+  virtual ArgToken make_oop_constant(oop con, TRAPS) = 0;
+  virtual ArgToken make_conversion(BasicType type, klassOop tk, Bytecodes::Code op, const ArgToken& src, TRAPS) = 0;
+  virtual ArgToken make_fetch(BasicType type, klassOop tk, Bytecodes::Code op, const ArgToken& base, const ArgToken& offset, TRAPS) = 0;
+  virtual ArgToken make_invoke(methodOop m, vmIntrinsics::ID iid, Bytecodes::Code op, bool tailcall, int argc, ArgToken* argv, TRAPS) = 0;
 
   // For make_invoke, the methodOop can be NULL if the intrinsic ID
   // is something other than vmIntrinsics::_none.
@@ -247,11 +257,16 @@
 class MethodHandleCompiler : public MethodHandleWalker {
 private:
   methodHandle _callee;
+  int          _invoke_count;  // count the original call site has been executed
   KlassHandle  _rklass;        // Return type for casting.
   BasicType    _rtype;
   KlassHandle  _target_klass;
   Thread*      _thread;
 
+  // Values used by the compiler.
+  static jvalue zero_jvalue;
+  static jvalue one_jvalue;
+
   // Fake constant pool entry.
   class ConstantValue {
   private:
@@ -416,7 +431,7 @@
   methodHandle get_method_oop(TRAPS) const;
 
 public:
-  MethodHandleCompiler(Handle root, methodHandle call_method, bool for_invokedynamic, TRAPS);
+  MethodHandleCompiler(Handle root, methodHandle callee, int invoke_count, bool for_invokedynamic, TRAPS);
 
   // Compile the given MH chain into bytecode.
   methodHandle compile(TRAPS);
diff -r 03b943e6c025 -r 8bec9b249a6e src/share/vm/prims/methodHandles.cpp
--- a/src/share/vm/prims/methodHandles.cpp	Sun May 15 23:57:15 2011 -0400
+++ b/src/share/vm/prims/methodHandles.cpp	Tue May 17 09:29:56 2011 -0400
@@ -66,8 +66,8 @@
   "adapter_drop_args",
   "adapter_collect_args",
   "adapter_spread_args",
-  "adapter_flyby",
-  "adapter_ricochet",
+  "adapter_fold_args",
+  "adapter_unused_13",
 
   // optimized adapter types:
   "adapter_swap_args/1",
@@ -83,9 +83,76 @@
   "adapter_prim_to_prim/f2d",
   "adapter_ref_to_prim/unboxi",
   "adapter_ref_to_prim/unboxl",
-  "adapter_spread_args/0",
-  "adapter_spread_args/1",
-  "adapter_spread_args/more",
+
+  // return value handlers for collect/filter/fold adapters:
+  "return/ref",
+  "return/int",
+  "return/long",
+  "return/float",
+  "return/double",
+  "return/void",
+  "return/S0/ref",
+  "return/S1/ref",
+  "return/S2/ref",
+  "return/S3/ref",
+  "return/S4/ref",
+  "return/S5/ref",
+  "return/any",
+
+  // spreading (array length cases 0, 1, ...)
+  "adapter_spread/0",
+  "adapter_spread/1/ref",
+  "adapter_spread/2/ref",
+  "adapter_spread/3/ref",
+  "adapter_spread/4/ref",
+  "adapter_spread/5/ref",
+  "adapter_spread/ref",
+  "adapter_spread/byte",
+  "adapter_spread/char",
+  "adapter_spread/short",
+  "adapter_spread/int",
+  "adapter_spread/long",
+  "adapter_spread/float",
+  "adapter_spread/double",
+
+  // blocking filter/collect conversions:
+  "adapter_collect/ref",
+  "adapter_collect/int",
+  "adapter_collect/long",
+  "adapter_collect/float",
+  "adapter_collect/double",
+  "adapter_collect/void",
+  "adapter_collect/0/ref",
+  "adapter_collect/1/ref",
+  "adapter_collect/2/ref",
+  "adapter_collect/3/ref",
+  "adapter_collect/4/ref",
+  "adapter_collect/5/ref",
+  "adapter_filter/S0/ref",
+  "adapter_filter/S1/ref",
+  "adapter_filter/S2/ref",
+  "adapter_filter/S3/ref",
+  "adapter_filter/S4/ref",
+  "adapter_filter/S5/ref",
+  "adapter_collect/2/S0/ref",
+  "adapter_collect/2/S1/ref",
+  "adapter_collect/2/S2/ref",
+  "adapter_collect/2/S3/ref",
+  "adapter_collect/2/S4/ref",
+  "adapter_collect/2/S5/ref",
+
+  // blocking fold conversions:
+  "adapter_fold/ref",
+  "adapter_fold/int",
+  "adapter_fold/long",
+  "adapter_fold/float",
+  "adapter_fold/double",
+  "adapter_fold/void",
+  "adapter_fold/1/ref",
+  "adapter_fold/2/ref",
+  "adapter_fold/3/ref",
+  "adapter_fold/4/ref",
+  "adapter_fold/5/ref",
 
   NULL
 };
@@ -96,13 +163,23 @@
 
 jobject MethodHandles::_raise_exception_method;
 
+address MethodHandles::_adapter_return_handlers[CONV_TYPE_MASK+1];
+
 #ifdef ASSERT
 bool MethodHandles::spot_check_entry_names() {
   assert(!strcmp(entry_name(_invokestatic_mh), "invokestatic"), "");
   assert(!strcmp(entry_name(_bound_ref_mh), "bound_ref"), "");
   assert(!strcmp(entry_name(_adapter_retype_only), "adapter_retype_only"), "");
-  assert(!strcmp(entry_name(_adapter_ricochet), "adapter_ricochet"), "");
+  assert(!strcmp(entry_name(_adapter_fold_args), "adapter_fold_args"), "");
   assert(!strcmp(entry_name(_adapter_opt_unboxi), "adapter_ref_to_prim/unboxi"), "");
+  assert(!strcmp(entry_name(_adapter_opt_spread_char), "adapter_spread/char"), "");
+  assert(!strcmp(entry_name(_adapter_opt_spread_double), "adapter_spread/double"), "");
+  assert(!strcmp(entry_name(_adapter_opt_collect_int), "adapter_collect/int"), "");
+  assert(!strcmp(entry_name(_adapter_opt_collect_0_ref), "adapter_collect/0/ref"), "");
+  assert(!strcmp(entry_name(_adapter_opt_collect_2_S3_ref), "adapter_collect/2/S3/ref"), "");
+  assert(!strcmp(entry_name(_adapter_opt_filter_S5_ref), "adapter_filter/S5/ref"), "");
+  assert(!strcmp(entry_name(_adapter_opt_fold_3_ref), "adapter_fold/3/ref"), "");
+  assert(!strcmp(entry_name(_adapter_opt_fold_void), "adapter_fold/void"), "");
   return true;
 }
 #endif
@@ -112,6 +189,9 @@
 // MethodHandles::generate_adapters
 //
 void MethodHandles::generate_adapters() {
+#ifdef TARGET_ARCH_NYI_6939861
+  if (FLAG_IS_DEFAULT(UseRicochetFrames))  UseRicochetFrames = false;
+#endif
   if (!EnableInvokeDynamic || SystemDictionary::MethodHandle_klass() == NULL)  return;
 
   assert(_adapter_code == NULL, "generate only once");
@@ -126,7 +206,6 @@
   g.generate();
 }
 
-
 //------------------------------------------------------------------------------
 // MethodHandlesAdapterGenerator::generate
 //
@@ -135,12 +214,62 @@
   for (MethodHandles::EntryKind ek = MethodHandles::_EK_FIRST;
        ek < MethodHandles::_EK_LIMIT;
        ek = MethodHandles::EntryKind(1 + (int)ek)) {
-    StubCodeMark mark(this, "MethodHandle", MethodHandles::entry_name(ek));
-    MethodHandles::generate_method_handle_stub(_masm, ek);
+    if (MethodHandles::ek_supported(ek)) {
+      StubCodeMark mark(this, "MethodHandle", MethodHandles::entry_name(ek));
+      MethodHandles::generate_method_handle_stub(_masm, ek);
+    }
   }
 }
 
 
+#ifdef TARGET_ARCH_NYI_6939861
+// these defs belong in methodHandles_<arch>.cpp
+frame MethodHandles::ricochet_frame_sender(const frame& fr, RegisterMap *map) {
+  ShouldNotCallThis();
+  return fr;
+}
+void MethodHandles::ricochet_frame_oops_do(const frame& fr, OopClosure* f, const RegisterMap* reg_map) {
+  ShouldNotCallThis();
+}
+#endif //TARGET_ARCH_NYI_6939861
+
+
+//------------------------------------------------------------------------------
+// MethodHandles::ek_supported
+//
+bool MethodHandles::ek_supported(MethodHandles::EntryKind ek) {
+  MethodHandles::EntryKind ek_orig = MethodHandles::ek_original_kind(ek);
+  switch (ek_orig) {
+  case _adapter_unused_13:
+    return false;  // not defined yet
+  case _adapter_prim_to_ref:
+    return UseRicochetFrames && conv_op_supported(java_lang_invoke_AdapterMethodHandle::OP_PRIM_TO_REF);
+  case _adapter_collect_args:
+    return UseRicochetFrames && conv_op_supported(java_lang_invoke_AdapterMethodHandle::OP_COLLECT_ARGS);
+  case _adapter_fold_args:
+    return UseRicochetFrames && conv_op_supported(java_lang_invoke_AdapterMethodHandle::OP_FOLD_ARGS);
+  case _adapter_opt_return_any:
+    return UseRicochetFrames;
+#ifdef TARGET_ARCH_NYI_6939861
+  // ports before 6939861 supported only three kinds of spread ops
+  case _adapter_spread_args:
+    // restrict spreads to three kinds:
+    switch (ek) {
+    case _adapter_opt_spread_0:
+    case _adapter_opt_spread_1:
+    case _adapter_opt_spread_more:
+      break;
+    default:
+      return false;
+      break;
+    }
+    break;
+#endif //TARGET_ARCH_NYI_6939861
+  }
+  return true;
+}
+
+
 void MethodHandles::set_enabled(bool z) {
   if (_enabled != z) {
     guarantee(z && EnableInvokeDynamic, "can only enable once, and only if -XX:+EnableInvokeDynamic");
@@ -970,6 +1099,14 @@
   return Klass::cast(SystemDictionary::Object_klass())->java_mirror();
 }
 
+bool MethodHandles::is_float_fixed_reinterpretation_cast(BasicType src, BasicType dst) {
+  if (src == T_FLOAT)   return dst == T_INT;
+  if (src == T_INT)     return dst == T_FLOAT;
+  if (src == T_DOUBLE)  return dst == T_LONG;
+  if (src == T_LONG)    return dst == T_DOUBLE;
+  return false;
+}
+
 bool MethodHandles::same_basic_type_for_arguments(BasicType src,
                                                   BasicType dst,
                                                   bool raw,
@@ -996,10 +1133,8 @@
       return true;            // remaining case: byte fits in short
   }
   // allow float/fixed reinterpretation casts
-  if (src == T_FLOAT)   return dst == T_INT;
-  if (src == T_INT)     return dst == T_FLOAT;
-  if (src == T_DOUBLE)  return dst == T_LONG;
-  if (src == T_LONG)    return dst == T_DOUBLE;
+  if (is_float_fixed_reinterpretation_cast(src, dst))
+    return true;
   return false;
 }
 
@@ -1270,7 +1405,7 @@
                                                       int argnum,
                                                       bool raw) {
   const char* err = NULL;
-  bool for_return = (argnum < 0);
+  const bool for_return = (argnum < 0);
 
   // just in case:
   if (src_type == T_ARRAY)  src_type = T_OBJECT;
@@ -1279,17 +1414,17 @@
   // Produce some nice messages if VerifyMethodHandles is turned on:
   if (!same_basic_type_for_arguments(src_type, dst_type, raw, for_return)) {
     if (src_type == T_OBJECT) {
-      if (raw && dst_type == T_INT && is_always_null_type(src_klass))
-        return NULL;    // OK to convert a null pointer to a garbage int
-      err = ((argnum >= 0)
+      if (raw && is_java_primitive(dst_type))
+        return NULL;    // ref-to-prim discards ref and returns zero
+      err = (!for_return
              ? "type mismatch: passing a %s for method argument #%d, which expects primitive %s"
              : "type mismatch: returning a %s, but caller expects primitive %s");
     } else if (dst_type == T_OBJECT) {
-      err = ((argnum >= 0)
+      err = (!for_return
              ? "type mismatch: passing a primitive %s for method argument #%d, which expects %s"
              : "type mismatch: returning a primitive %s, but caller expects %s");
     } else {
-      err = ((argnum >= 0)
+      err = (!for_return
              ? "type mismatch: passing a %s for method argument #%d, which expects %s"
              : "type mismatch: returning a %s, but caller expects %s");
     }
@@ -1298,11 +1433,11 @@
     if (!class_cast_needed(dst_klass, src_klass)) {
       if (raw)
         return NULL;    // reverse cast is OK; the MH target is trusted to enforce it
-      err = ((argnum >= 0)
+      err = (!for_return
              ? "cast required: passing a %s for method argument #%d, which expects %s"
              : "cast required: returning a %s, but caller expects %s");
     } else {
-      err = ((argnum >= 0)
+      err = (!for_return
              ? "reference mismatch: passing a %s for method argument #%d, which expects %s"
              : "reference mismatch: returning a %s, but caller expects %s");
     }
@@ -1323,7 +1458,7 @@
 
   size_t msglen = strlen(err) + strlen(src_name) + strlen(dst_name) + (argnum < 10 ? 1 : 11);
   char* msg = NEW_RESOURCE_ARRAY(char, msglen + 1);
-  if (argnum >= 0) {
+  if (!for_return) {
     assert(strstr(err, "%d") != NULL, "");
     jio_snprintf(msg, msglen, err, src_name, argnum, dst_name);
   } else {
@@ -1564,6 +1699,8 @@
   if (m->is_abstract()) { THROW(vmSymbols::java_lang_AbstractMethodError()); }
 
   java_lang_invoke_MethodHandle::init_vmslots(mh());
+  int vmargslot = m->size_of_parameters() - 1;
+  assert(java_lang_invoke_BoundMethodHandle::vmargslot(mh()) == vmargslot, "");
 
   if (VerifyMethodHandles) {
     verify_BoundMethodHandle_with_receiver(mh, m, CHECK);
@@ -1642,14 +1779,9 @@
     DEBUG_ONLY(int this_pushes = decode_MethodHandle_stack_pushes(mh()));
     if (direct_to_method) {
       assert(this_pushes == slots_pushed, "BMH pushes one or two stack slots");
-      assert(slots_pushed <= MethodHandlePushLimit, "");
     } else {
       int target_pushes = decode_MethodHandle_stack_pushes(target());
       assert(this_pushes == slots_pushed + target_pushes, "BMH stack motion must be correct");
-      // do not blow the stack; use a Java-based adapter if this limit is exceeded
-      // FIXME
-      // if (slots_pushed + target_pushes > MethodHandlePushLimit)
-      //   err = "too many bound parameters";
     }
   }
 
@@ -1672,10 +1804,11 @@
   }
 
   java_lang_invoke_MethodHandle::init_vmslots(mh());
+  int argslot = java_lang_invoke_BoundMethodHandle::vmargslot(mh());
 
   if (VerifyMethodHandles) {
     int insert_after = argnum - 1;
-    verify_vmargslot(mh, insert_after, java_lang_invoke_BoundMethodHandle::vmargslot(mh()), CHECK);
+    verify_vmargslot(mh, insert_after, argslot, CHECK);
     verify_vmslots(mh, CHECK);
   }
 
@@ -1769,6 +1902,7 @@
   Handle target(THREAD,    java_lang_invoke_AdapterMethodHandle::vmtarget(mh()));
   Handle src_mtype(THREAD, java_lang_invoke_MethodHandle::type(mh()));
   Handle dst_mtype(THREAD, java_lang_invoke_MethodHandle::type(target()));
+  Handle arg_mtype;
 
   const char* err = NULL;
 
@@ -1777,25 +1911,29 @@
     switch (ek) {
     case _adapter_check_cast:     // target type of cast
     case _adapter_ref_to_prim:    // wrapper type from which to unbox
-    case _adapter_prim_to_ref:    // wrapper type to box into
-    case _adapter_collect_args:   // array type to collect into
     case _adapter_spread_args:    // array type to spread from
       if (!java_lang_Class::is_instance(argument())
           || java_lang_Class::is_primitive(argument()))
         { err = "adapter requires argument of type java.lang.Class"; break; }
-      if (ek == _adapter_collect_args ||
-          ek == _adapter_spread_args) {
+      if (ek == _adapter_spread_args) {
         // Make sure it is a suitable collection type.  (Array, for now.)
         Klass* ak = Klass::cast(java_lang_Class::as_klassOop(argument()));
-        if (!ak->oop_is_objArray()) {
-          { err = "adapter requires argument of type java.lang.Class<Object[]>"; break; }
-        }
+        if (!ak->oop_is_array())
+          { err = "spread adapter requires argument representing an array class"; break; }
+        BasicType et = arrayKlass::cast(ak->as_klassOop())->element_type();
+        if (et != dest && stack_move <= 0)
+          { err = "spread adapter requires array class argument of correct type"; break; }
       }
       break;
-    case _adapter_flyby:
-    case _adapter_ricochet:
+    case _adapter_prim_to_ref:    // boxer MH to use
+    case _adapter_collect_args:   // method handle which collects the args
+    case _adapter_fold_args:      // method handle which collects the args
+      if (!UseRicochetFrames) {
+        { err = "box/collect/fold operators are not supported"; break; }
+      }
       if (!java_lang_invoke_MethodHandle::is_instance(argument()))
         { err = "MethodHandle adapter argument required"; break; }
+      arg_mtype = Handle(THREAD, java_lang_invoke_MethodHandle::type(argument()));
       break;
     default:
       if (argument.not_null())
@@ -1806,6 +1944,7 @@
 
   if (err == NULL) {
     // Check that the src/dest types are supplied if needed.
+    // Also check relevant parameter or return types.
     switch (ek) {
     case _adapter_check_cast:
       if (src != T_OBJECT || dest != T_OBJECT) {
@@ -1828,8 +1967,7 @@
       }
       break;
     case _adapter_prim_to_ref:
-      if (!is_java_primitive(src) || dest != T_OBJECT
-          || argument() != Klass::cast(SystemDictionary::box_klass(src))->java_mirror()) {
+      if (!is_java_primitive(src) || dest != T_OBJECT) {
         err = "adapter requires primitive src conversion subfield"; break;
       }
       break;
@@ -1840,14 +1978,12 @@
           err = "adapter requires src/dest conversion subfields for swap"; break;
         }
         int swap_size = type2size[src];
-        oop src_mtype  = java_lang_invoke_AdapterMethodHandle::type(mh());
-        oop dest_mtype = java_lang_invoke_AdapterMethodHandle::type(target());
-        int slot_limit = java_lang_invoke_AdapterMethodHandle::vmslots(target());
+        int slot_limit = java_lang_invoke_MethodHandle::vmslots(target());
         int src_slot   = argslot;
         int dest_slot  = vminfo;
         bool rotate_up = (src_slot > dest_slot); // upward rotation
         int src_arg    = argnum;
-        int dest_arg   = argument_slot_to_argnum(dest_mtype, dest_slot);
+        int dest_arg   = argument_slot_to_argnum(dst_mtype(), dest_slot);
         verify_vmargslot(mh, dest_arg, dest_slot, CHECK);
         if (!(dest_slot >= src_slot + swap_size) &&
             !(src_slot >= dest_slot + swap_size)) {
@@ -1855,8 +1991,8 @@
         } else if (ek == _adapter_swap_args && !(src_slot > dest_slot)) {
           err = "source of swap must be deeper in stack";
         } else if (ek == _adapter_swap_args) {
-          err = check_argument_type_change(java_lang_invoke_MethodType::ptype(src_mtype, dest_arg),
-                                           java_lang_invoke_MethodType::ptype(dest_mtype, src_arg),
+          err = check_argument_type_change(java_lang_invoke_MethodType::ptype(src_mtype(), dest_arg),
+                                           java_lang_invoke_MethodType::ptype(dst_mtype(), src_arg),
                                            dest_arg);
         } else if (ek == _adapter_rot_args) {
           if (rotate_up) {
@@ -1864,8 +2000,8 @@
             // rotate up: [dest_slot..src_slot-ss] --> [dest_slot+ss..src_slot]
             // that is:   [src_arg+1..dest_arg] --> [src_arg..dest_arg-1]
             for (int i = src_arg+1; i <= dest_arg && err == NULL; i++) {
-              err = check_argument_type_change(java_lang_invoke_MethodType::ptype(src_mtype, i),
-                                               java_lang_invoke_MethodType::ptype(dest_mtype, i-1),
+              err = check_argument_type_change(java_lang_invoke_MethodType::ptype(src_mtype(), i),
+                                               java_lang_invoke_MethodType::ptype(dst_mtype(), i-1),
                                                i);
             }
           } else { // rotate down
@@ -1873,28 +2009,54 @@
             // rotate down: [src_slot+ss..dest_slot] --> [src_slot..dest_slot-ss]
             // that is:     [dest_arg..src_arg-1] --> [dst_arg+1..src_arg]
             for (int i = dest_arg; i <= src_arg-1 && err == NULL; i++) {
-              err = check_argument_type_change(java_lang_invoke_MethodType::ptype(src_mtype, i),
-                                               java_lang_invoke_MethodType::ptype(dest_mtype, i+1),
+              err = check_argument_type_change(java_lang_invoke_MethodType::ptype(src_mtype(), i),
+                                               java_lang_invoke_MethodType::ptype(dst_mtype(), i+1),
                                                i);
             }
           }
         }
         if (err == NULL)
-          err = check_argument_type_change(java_lang_invoke_MethodType::ptype(src_mtype, src_arg),
-                                           java_lang_invoke_MethodType::ptype(dest_mtype, dest_arg),
+          err = check_argument_type_change(java_lang_invoke_MethodType::ptype(src_mtype(), src_arg),
+                                           java_lang_invoke_MethodType::ptype(dst_mtype(), dest_arg),
                                            src_arg);
       }
       break;
+    case _adapter_spread_args:
     case _adapter_collect_args:
-    case _adapter_spread_args:
+    case _adapter_fold_args:
       {
-        BasicType coll_type = (ek == _adapter_collect_args) ? dest : src;
-        BasicType elem_type = (ek == _adapter_collect_args) ? src : dest;
-        if (coll_type != T_OBJECT || elem_type != T_OBJECT) {
-          err = "adapter requires src/dest subfields"; break;
-          // later:
-          // - consider making coll be a primitive array
-          // - consider making coll be a heterogeneous collection
+        bool is_spread = (ek == _adapter_spread_args);
+        bool is_fold   = (ek == _adapter_fold_args);
+        BasicType coll_type = is_spread ? src : dest;
+        BasicType elem_type = is_spread ? dest : src;
+        // coll_type is type of args in collected form (or T_VOID if none)
+        // elem_type is common type of args in spread form (or T_VOID if missing or heterogeneous)
+        if (coll_type == 0 || elem_type == 0) {
+          err = "adapter requires src/dest subfields for spread or collect"; break;
+        }
+        if (is_spread && coll_type != T_OBJECT) {
+          err = "spread adapter requires object type for argument bundle"; break;
+        }
+        Handle spread_mtype = (is_spread ? dst_mtype : src_mtype);
+        int spread_slot = argslot;
+        int spread_arg  = argnum;
+        int slots_pushed = stack_move / stack_move_unit();
+        int coll_slot_count = type2size[coll_type];
+        int spread_slot_count = (is_spread ? slots_pushed : -slots_pushed) + coll_slot_count;
+        if (is_fold)  spread_slot_count = argument_slot_count(arg_mtype());
+        if (!is_spread) {
+          int init_slots = argument_slot_count(src_mtype());
+          int coll_slots = argument_slot_count(arg_mtype());
+          if (spread_slot_count > init_slots ||
+              spread_slot_count != coll_slots) {
+            err = "collect adapter has inconsistent arg counts"; break;
+          }
+          int next_slots = argument_slot_count(dst_mtype());
+          int unchanged_slots_in  = (init_slots - spread_slot_count);
+          int unchanged_slots_out = (next_slots - coll_slot_count - (is_fold ? spread_slot_count : 0));
+          if (unchanged_slots_in != unchanged_slots_out) {
+            err = "collect adapter continuation has inconsistent arg counts"; break;
+          }
         }
       }
       break;
@@ -1929,8 +2091,9 @@
       }
       break;
     case _adapter_collect_args:
-      if (slots_pushed > 1) {
-        err = "adapter requires conversion subfield slots_pushed <= 1";
+    case _adapter_fold_args:
+      if (slots_pushed > 2) {
+        err = "adapter requires conversion subfield slots_pushed <= 2";
       }
       break;
     case _adapter_spread_args:
@@ -1950,32 +2113,36 @@
   }
 
   if (err == NULL) {
-    // Make sure this adapter does not push too deeply.
+    // Make sure this adapter's stack pushing is accurately recorded.
     int slots_pushed = stack_move / stack_move_unit();
     int this_vmslots = java_lang_invoke_MethodHandle::vmslots(mh());
     int target_vmslots = java_lang_invoke_MethodHandle::vmslots(target());
+    int target_pushes = decode_MethodHandle_stack_pushes(target());
     if (slots_pushed != (target_vmslots - this_vmslots)) {
       err = "stack_move inconsistent with previous and current MethodType vmslots";
-    } else if (slots_pushed > 0)  {
-      // verify stack_move against MethodHandlePushLimit
-      int target_pushes = decode_MethodHandle_stack_pushes(target());
-      // do not blow the stack; use a Java-based adapter if this limit is exceeded
-      if (slots_pushed + target_pushes > MethodHandlePushLimit) {
-        err = "adapter pushes too many parameters";
+    } else {
+      int this_pushes = decode_MethodHandle_stack_pushes(mh());
+      if (slots_pushed + target_pushes != this_pushes) {
+        if (this_pushes == 0)
+          err = "adapter push count not initialized";
+        else
+          err = "adapter push count is wrong";
       }
     }
 
     // While we're at it, check that the stack motion decoder works:
-    DEBUG_ONLY(int target_pushes = decode_MethodHandle_stack_pushes(target()));
     DEBUG_ONLY(int this_pushes = decode_MethodHandle_stack_pushes(mh()));
     assert(this_pushes == slots_pushed + target_pushes, "AMH stack motion must be correct");
   }
 
   if (err == NULL && vminfo != 0) {
     switch (ek) {
-      case _adapter_swap_args:
-      case _adapter_rot_args:
-        break;                // OK
+    case _adapter_swap_args:
+    case _adapter_rot_args:
+    case _adapter_prim_to_ref:
+    case _adapter_collect_args:
+    case _adapter_fold_args:
+      break;                // OK
     default:
       err = "vminfo subfield is reserved to the JVM";
     }
@@ -2019,13 +2186,15 @@
 }
 
 void MethodHandles::init_AdapterMethodHandle(Handle mh, Handle target, int argnum, TRAPS) {
-  int  argslot    = java_lang_invoke_AdapterMethodHandle::vmargslot(mh());
-  jint conversion = java_lang_invoke_AdapterMethodHandle::conversion(mh());
-  jint conv_op    = adapter_conversion_op(conversion);
+  Handle argument   = java_lang_invoke_AdapterMethodHandle::argument(mh());
+  int    argslot    = java_lang_invoke_AdapterMethodHandle::vmargslot(mh());
+  jint   conversion = java_lang_invoke_AdapterMethodHandle::conversion(mh());
+  jint   conv_op    = adapter_conversion_op(conversion);
 
   // adjust the adapter code to the internal EntryKind enumeration:
   EntryKind ek_orig = adapter_entry_kind(conv_op);
   EntryKind ek_opt  = ek_orig;  // may be optimized
+  EntryKind ek_try;             // temp
 
   // Finalize the vmtarget field (Java initialized it to null).
   if (!java_lang_invoke_MethodHandle::is_instance(target())) {
@@ -2034,17 +2203,23 @@
   }
   java_lang_invoke_AdapterMethodHandle::set_vmtarget(mh(), target());
 
-  if (VerifyMethodHandles) {
-    verify_AdapterMethodHandle(mh, argnum, CHECK);
-  }
-
   int stack_move = adapter_conversion_stack_move(conversion);
   BasicType src  = adapter_conversion_src_type(conversion);
   BasicType dest = adapter_conversion_dest_type(conversion);
   int vminfo     = adapter_conversion_vminfo(conversion); // should be zero
 
+  int slots_pushed = stack_move / stack_move_unit();
+
+  if (VerifyMethodHandles) {
+    verify_AdapterMethodHandle(mh, argnum, CHECK);
+  }
+
   const char* err = NULL;
 
+  if (!conv_op_supported(conv_op)) {
+    err = "adapter not yet implemented in the JVM";
+  }
+
   // Now it's time to finish the case analysis and pick a MethodHandleEntry.
   switch (ek_orig) {
   case _adapter_retype_only:
@@ -2073,20 +2248,20 @@
         } else if (src == T_DOUBLE && dest == T_FLOAT) {
           ek_opt = _adapter_opt_d2f;
         } else {
-          assert(false, "");
+          goto throw_not_impl;        // runs user code, hence could block
         }
         break;
       case 1 *4+ 2:
-        if (src == T_INT && dest == T_LONG) {
+        if ((src == T_INT || is_subword_type(src)) && dest == T_LONG) {
           ek_opt = _adapter_opt_i2l;
         } else if (src == T_FLOAT && dest == T_DOUBLE) {
           ek_opt = _adapter_opt_f2d;
         } else {
-          assert(false, "");
+          goto throw_not_impl;        // runs user code, hence could block
         }
         break;
       default:
-        assert(false, "");
+        goto throw_not_impl;        // runs user code, hence could block
         break;
       }
     }
@@ -2103,14 +2278,54 @@
         ek_opt = _adapter_opt_unboxl;
         break;
       default:
-        assert(false, "");
+        goto throw_not_impl;
         break;
       }
     }
     break;
 
   case _adapter_prim_to_ref:
-    goto throw_not_impl;        // allocates, hence could block
+    {
+      assert(UseRicochetFrames, "else don't come here");
+      // vminfo will be the location to insert the return value
+      vminfo = argslot;
+      ek_opt = _adapter_opt_collect_ref;
+      ensure_vmlayout_field(target, CHECK);
+      // for MethodHandleWalk:
+      if (java_lang_invoke_AdapterMethodHandle::is_instance(argument()))
+        ensure_vmlayout_field(argument, CHECK);
+      if (!OptimizeMethodHandles)  break;
+      switch (type2size[src]) {
+      case 1:
+        ek_try = EntryKind(_adapter_opt_filter_S0_ref + argslot);
+        if (ek_try < _adapter_opt_collect_LAST &&
+            ek_adapter_opt_collect_slot(ek_try) == argslot) {
+          assert(ek_adapter_opt_collect_count(ek_try) == 1 &&
+                 ek_adapter_opt_collect_type(ek_try) == T_OBJECT, "");
+          ek_opt = ek_try;
+          break;
+        }
+        // else downgrade to variable slot:
+        ek_opt = _adapter_opt_collect_1_ref;
+        break;
+      case 2:
+        ek_try = EntryKind(_adapter_opt_collect_2_S0_ref + argslot);
+        if (ek_try < _adapter_opt_collect_LAST &&
+            ek_adapter_opt_collect_slot(ek_try) == argslot) {
+          assert(ek_adapter_opt_collect_count(ek_try) == 2 &&
+                 ek_adapter_opt_collect_type(ek_try) == T_OBJECT, "");
+          ek_opt = ek_try;
+          break;
+        }
+        // else downgrade to variable slot:
+        ek_opt = _adapter_opt_collect_2_ref;
+        break;
+      default:
+        goto throw_not_impl;
+        break;
+      }
+    }
+    break;
 
   case _adapter_swap_args:
   case _adapter_rot_args:
@@ -2130,35 +2345,184 @@
                   rotate > 0 ? _adapter_opt_rot_2_up : _adapter_opt_rot_2_down);
         break;
       default:
-        assert(false, "");
+        goto throw_not_impl;
         break;
       }
     }
     break;
 
-  case _adapter_collect_args:
-    goto throw_not_impl;        // allocates, hence could block
-
   case _adapter_spread_args:
     {
+#ifdef TARGET_ARCH_NYI_6939861
+      // ports before 6939861 supported only three kinds of spread ops
+      if (!UseRicochetFrames) {
+        int array_size   = slots_pushed + 1;
+        assert(array_size >= 0, "");
+        vminfo = array_size;
+        switch (array_size) {
+        case 0:   ek_opt = _adapter_opt_spread_0;       break;
+        case 1:   ek_opt = _adapter_opt_spread_1;       break;
+        default:  ek_opt = _adapter_opt_spread_more;    break;
+        }
+        break;
+      }
+#endif //TARGET_ARCH_NYI_6939861
       // vminfo will be the required length of the array
-      int slots_pushed = stack_move / stack_move_unit();
-      int array_size   = slots_pushed + 1;
-      assert(array_size >= 0, "");
+      int array_size = (slots_pushed + 1) / (type2size[dest] == 2 ? 2 : 1);
       vminfo = array_size;
-      switch (array_size) {
-      case 0:   ek_opt = _adapter_opt_spread_0;       break;
-      case 1:   ek_opt = _adapter_opt_spread_1;       break;
-      default:  ek_opt = _adapter_opt_spread_more;    break;
+      // general case
+      switch (dest) {
+      case T_BOOLEAN : // fall through to T_BYTE:
+      case T_BYTE    : ek_opt = _adapter_opt_spread_byte;    break;
+      case T_CHAR    : ek_opt = _adapter_opt_spread_char;    break;
+      case T_SHORT   : ek_opt = _adapter_opt_spread_short;   break;
+      case T_INT     : ek_opt = _adapter_opt_spread_int;     break;
+      case T_LONG    : ek_opt = _adapter_opt_spread_long;    break;
+      case T_FLOAT   : ek_opt = _adapter_opt_spread_float;   break;
+      case T_DOUBLE  : ek_opt = _adapter_opt_spread_double;  break;
+      case T_OBJECT  : ek_opt = _adapter_opt_spread_ref;     break;
+      case T_VOID    : if (array_size != 0)  goto throw_not_impl;
+                       ek_opt = _adapter_opt_spread_ref;     break;
+      default        : goto throw_not_impl;
       }
-      if ((vminfo & CONV_VMINFO_MASK) != vminfo)
-        goto throw_not_impl;    // overflow
+      assert(array_size == 0 ||  // it doesn't matter what the spreader is
+             (ek_adapter_opt_spread_count(ek_opt) == -1 &&
+              (ek_adapter_opt_spread_type(ek_opt) == dest ||
+               (ek_adapter_opt_spread_type(ek_opt) == T_BYTE && dest == T_BOOLEAN))),
+             err_msg("dest=%d ek_opt=%d", dest, ek_opt));
+
+      if (array_size <= 0) {
+        // since the general case does not handle length 0, this case is required:
+        ek_opt = _adapter_opt_spread_0;
+        break;
+      }
+      if (dest == T_OBJECT) {
+        ek_try = EntryKind(_adapter_opt_spread_1_ref - 1 + array_size);
+        if (ek_try < _adapter_opt_spread_LAST &&
+            ek_adapter_opt_spread_count(ek_try) == array_size) {
+          assert(ek_adapter_opt_spread_type(ek_try) == dest, "");
+          ek_opt = ek_try;
+          break;
+        }
+      }
+      break;
     }
     break;
 
-  case _adapter_flyby:
-  case _adapter_ricochet:
-    goto throw_not_impl;        // runs Java code, hence could block
+  case _adapter_collect_args:
+    {
+      assert(UseRicochetFrames, "else don't come here");
+      int elem_slots = argument_slot_count(java_lang_invoke_MethodHandle::type(argument()));
+      // vminfo will be the location to insert the return value
+      vminfo = argslot;
+      ensure_vmlayout_field(target, CHECK);
+      ensure_vmlayout_field(argument, CHECK);
+
+      // general case:
+      switch (dest) {
+      default       : if (!is_subword_type(dest))  goto throw_not_impl;
+                    // else fall through:
+      case T_INT    : ek_opt = _adapter_opt_collect_int;     break;
+      case T_LONG   : ek_opt = _adapter_opt_collect_long;    break;
+      case T_FLOAT  : ek_opt = _adapter_opt_collect_float;   break;
+      case T_DOUBLE : ek_opt = _adapter_opt_collect_double;  break;
+      case T_OBJECT : ek_opt = _adapter_opt_collect_ref;     break;
+      case T_VOID   : ek_opt = _adapter_opt_collect_void;    break;
+      }
+      assert(ek_adapter_opt_collect_slot(ek_opt) == -1 &&
+             ek_adapter_opt_collect_count(ek_opt) == -1 &&
+             (ek_adapter_opt_collect_type(ek_opt) == dest ||
+              ek_adapter_opt_collect_type(ek_opt) == T_INT && is_subword_type(dest)),
+             "");
+
+      if (dest == T_OBJECT && elem_slots == 1 && OptimizeMethodHandles) {
+        // filter operation on a ref
+        ek_try = EntryKind(_adapter_opt_filter_S0_ref + argslot);
+        if (ek_try < _adapter_opt_collect_LAST &&
+            ek_adapter_opt_collect_slot(ek_try) == argslot) {
+          assert(ek_adapter_opt_collect_count(ek_try) == elem_slots &&
+                 ek_adapter_opt_collect_type(ek_try) == dest, "");
+          ek_opt = ek_try;
+          break;
+        }
+        ek_opt = _adapter_opt_collect_1_ref;
+        break;
+      }
+
+      if (dest == T_OBJECT && elem_slots == 2 && OptimizeMethodHandles) {
+        // filter of two arguments
+        ek_try = EntryKind(_adapter_opt_collect_2_S0_ref + argslot);
+        if (ek_try < _adapter_opt_collect_LAST &&
+            ek_adapter_opt_collect_slot(ek_try) == argslot) {
+          assert(ek_adapter_opt_collect_count(ek_try) == elem_slots &&
+                 ek_adapter_opt_collect_type(ek_try) == dest, "");
+          ek_opt = ek_try;
+          break;
+        }
+        ek_opt = _adapter_opt_collect_2_ref;
+        break;
+      }
+
+      if (dest == T_OBJECT && OptimizeMethodHandles) {
+        // try to use a fixed length adapter
+        ek_try = EntryKind(_adapter_opt_collect_0_ref + elem_slots);
+        if (ek_try < _adapter_opt_collect_LAST &&
+            ek_adapter_opt_collect_count(ek_try) == elem_slots) {
+          assert(ek_adapter_opt_collect_slot(ek_try) == -1 &&
+                 ek_adapter_opt_collect_type(ek_try) == dest, "");
+          ek_opt = ek_try;
+          break;
+        }
+      }
+
+      break;
+    }
+
+  case _adapter_fold_args:
+    {
+      assert(UseRicochetFrames, "else don't come here");
+      int elem_slots = argument_slot_count(java_lang_invoke_MethodHandle::type(argument()));
+      // vminfo will be the location to insert the return value
+      vminfo = argslot + elem_slots;
+      ensure_vmlayout_field(target, CHECK);
+      ensure_vmlayout_field(argument, CHECK);
+
+      switch (dest) {
+      default       : if (!is_subword_type(dest))  goto throw_not_impl;
+                    // else fall through:
+      case T_INT    : ek_opt = _adapter_opt_fold_int;     break;
+      case T_LONG   : ek_opt = _adapter_opt_fold_long;    break;
+      case T_FLOAT  : ek_opt = _adapter_opt_fold_float;   break;
+      case T_DOUBLE : ek_opt = _adapter_opt_fold_double;  break;
+      case T_OBJECT : ek_opt = _adapter_opt_fold_ref;     break;
+      case T_VOID   : ek_opt = _adapter_opt_fold_void;    break;
+      }
+      assert(ek_adapter_opt_collect_slot(ek_opt) == -1 &&
+             ek_adapter_opt_collect_count(ek_opt) == -1 &&
+             (ek_adapter_opt_collect_type(ek_opt) == dest ||
+              ek_adapter_opt_collect_type(ek_opt) == T_INT && is_subword_type(dest)),
+             "");
+
+      if (dest == T_OBJECT && elem_slots == 0 && OptimizeMethodHandles) {
+        // if there are no args, just pretend it's a collect
+        ek_opt = _adapter_opt_collect_0_ref;
+        break;
+      }
+
+      if (dest == T_OBJECT && OptimizeMethodHandles) {
+        // try to use a fixed length adapter
+        ek_try = EntryKind(_adapter_opt_fold_1_ref - 1 + elem_slots);
+        if (ek_try < _adapter_opt_fold_LAST &&
+            ek_adapter_opt_collect_count(ek_try) == elem_slots) {
+          assert(ek_adapter_opt_collect_slot(ek_try) == -1 &&
+                 ek_adapter_opt_collect_type(ek_try) == dest, "");
+          ek_opt = ek_try;
+          break;
+        }
+      }
+
+      break;
+    }
 
   default:
     // should have failed much earlier; must be a missing case here
@@ -2166,11 +2530,36 @@
     // and fall through:
 
   throw_not_impl:
-    // FIXME: these adapters are NYI
-    err = "adapter not yet implemented in the JVM";
+    if (err == NULL)
+      err = "unknown adapter type";
     break;
   }
 
+  if (err == NULL && (vminfo & CONV_VMINFO_MASK) != vminfo) {
+    // should not happen, since vminfo is used to encode arg/slot indexes < 255
+    err = "vminfo overflow";
+  }
+
+  if (err == NULL && !have_entry(ek_opt)) {
+    err = "adapter stub for this kind of method handle is missing";
+  }
+
+  if (err == NULL && ek_opt == ek_orig) {
+    switch (ek_opt) {
+    case _adapter_prim_to_prim:
+    case _adapter_ref_to_prim:
+    case _adapter_prim_to_ref:
+    case _adapter_swap_args:
+    case _adapter_rot_args:
+    case _adapter_collect_args:
+    case _adapter_fold_args:
+    case _adapter_spread_args:
+      // should be handled completely by optimized cases; see above
+      err = "init_AdapterMethodHandle should not issue this";
+      break;
+    }
+  }
+
   if (err != NULL) {
     throw_InternalError_for_bad_conversion(conversion, err, THREAD);
     return;
@@ -2190,6 +2579,26 @@
   // Java code can publish it in global data structures.
 }
 
+void MethodHandles::ensure_vmlayout_field(Handle target, TRAPS) {
+  Handle mtype(THREAD, java_lang_invoke_MethodHandle::type(target()));
+  Handle mtform(THREAD, java_lang_invoke_MethodType::form(mtype()));
+  if (mtform.is_null()) { THROW(vmSymbols::java_lang_InternalError()); }
+  if (java_lang_invoke_MethodTypeForm::vmlayout_offset_in_bytes() > 0) {
+    if (java_lang_invoke_MethodTypeForm::vmlayout(mtform()) == NULL) {
+      // fill it in
+      Handle erased_mtype(THREAD, java_lang_invoke_MethodTypeForm::erasedType(mtform()));
+      TempNewSymbol erased_signature
+        = java_lang_invoke_MethodType::as_signature(erased_mtype(), /*intern:*/true, CHECK);
+      methodOop cookie
+        = SystemDictionary::find_method_handle_invoke(vmSymbols::invokeExact_name(),
+                                                      erased_signature,
+                                                      SystemDictionaryHandles::Object_klass(),
+                                                      THREAD);
+      java_lang_invoke_MethodTypeForm::init_vmlayout(mtform(), cookie);
+    }
+  }
+}
+
 //
 // Here are the native methods on sun.invoke.MethodHandleImpl.
 // They are the private interface between this JVM and the HotSpot-specific
@@ -2360,8 +2769,10 @@
 
 #ifndef PRODUCT
 #define EACH_NAMED_CON(template) \
-    template(MethodHandles,GC_JVM_PUSH_LIMIT) \
-    template(MethodHandles,GC_JVM_STACK_MOVE_UNIT) \
+  /* hold back this one until JDK stabilizes */ \
+  /* template(MethodHandles,GC_JVM_PUSH_LIMIT) */  \
+  /* hold back this one until JDK stabilizes */ \
+  /* template(MethodHandles,GC_JVM_STACK_MOVE_UNIT) */ \
     template(MethodHandles,ETF_HANDLE_OR_METHOD_NAME) \
     template(MethodHandles,ETF_DIRECT_HANDLE) \
     template(MethodHandles,ETF_METHOD_NAME) \
@@ -2385,9 +2796,8 @@
     template(java_lang_invoke_AdapterMethodHandle,OP_DROP_ARGS) \
     template(java_lang_invoke_AdapterMethodHandle,OP_COLLECT_ARGS) \
     template(java_lang_invoke_AdapterMethodHandle,OP_SPREAD_ARGS) \
-    template(java_lang_invoke_AdapterMethodHandle,OP_FLYBY) \
-    template(java_lang_invoke_AdapterMethodHandle,OP_RICOCHET) \
-    template(java_lang_invoke_AdapterMethodHandle,CONV_OP_LIMIT) \
+      /* hold back this one until JDK stabilizes */ \
+      /*template(java_lang_invoke_AdapterMethodHandle,CONV_OP_LIMIT)*/  \
     template(java_lang_invoke_AdapterMethodHandle,CONV_OP_MASK) \
     template(java_lang_invoke_AdapterMethodHandle,CONV_VMINFO_MASK) \
     template(java_lang_invoke_AdapterMethodHandle,CONV_VMINFO_SHIFT) \
diff -r 03b943e6c025 -r 8bec9b249a6e src/share/vm/prims/methodHandles.hpp
--- a/src/share/vm/prims/methodHandles.hpp	Sun May 15 23:57:15 2011 -0400
+++ b/src/share/vm/prims/methodHandles.hpp	Tue May 17 09:29:56 2011 -0400
@@ -66,8 +66,8 @@
     _adapter_drop_args     = _adapter_mh_first + java_lang_invoke_AdapterMethodHandle::OP_DROP_ARGS,
     _adapter_collect_args  = _adapter_mh_first + java_lang_invoke_AdapterMethodHandle::OP_COLLECT_ARGS,
     _adapter_spread_args   = _adapter_mh_first + java_lang_invoke_AdapterMethodHandle::OP_SPREAD_ARGS,
-    _adapter_flyby         = _adapter_mh_first + java_lang_invoke_AdapterMethodHandle::OP_FLYBY,
-    _adapter_ricochet      = _adapter_mh_first + java_lang_invoke_AdapterMethodHandle::OP_RICOCHET,
+    _adapter_fold_args     = _adapter_mh_first + java_lang_invoke_AdapterMethodHandle::OP_FOLD_ARGS,
+    _adapter_unused_13     = _adapter_mh_first + 13,  //hole in the CONV_OP enumeration
     _adapter_mh_last       = _adapter_mh_first + java_lang_invoke_AdapterMethodHandle::CONV_OP_LIMIT - 1,
 
     // Optimized adapter types
@@ -93,10 +93,99 @@
     _adapter_opt_unboxi,
     _adapter_opt_unboxl,
 
-    // spreading (array length cases 0, 1, >=2)
-    _adapter_opt_spread_0,
-    _adapter_opt_spread_1,
-    _adapter_opt_spread_more,
+    // %% Maybe tame the following with a VM_SYMBOLS_DO type macro?
+
+    // how a blocking adapter returns (platform-dependent)
+    _adapter_opt_return_ref,
+    _adapter_opt_return_int,
+    _adapter_opt_return_long,
+    _adapter_opt_return_float,
+    _adapter_opt_return_double,
+    _adapter_opt_return_void,
+    _adapter_opt_return_S0_ref,  // return ref to S=0 (last slot)
+    _adapter_opt_return_S1_ref,  // return ref to S=1 (2nd-to-last slot)
+    _adapter_opt_return_S2_ref,
+    _adapter_opt_return_S3_ref,
+    _adapter_opt_return_S4_ref,
+    _adapter_opt_return_S5_ref,
+    _adapter_opt_return_any,     // dynamically select r/i/l/f/d
+    _adapter_opt_return_FIRST = _adapter_opt_return_ref,
+    _adapter_opt_return_LAST  = _adapter_opt_return_any,
+
+    // spreading (array length cases 0, 1, ...)
+    _adapter_opt_spread_0,       // spread empty array to N=0 arguments
+    _adapter_opt_spread_1_ref,   // spread Object[] to N=1 argument
+    _adapter_opt_spread_2_ref,   // spread Object[] to N=2 arguments
+    _adapter_opt_spread_3_ref,   // spread Object[] to N=3 arguments
+    _adapter_opt_spread_4_ref,   // spread Object[] to N=4 arguments
+    _adapter_opt_spread_5_ref,   // spread Object[] to N=5 arguments
+    _adapter_opt_spread_ref,     // spread Object[] to N arguments
+    _adapter_opt_spread_byte,    // spread byte[] or boolean[] to N arguments
+    _adapter_opt_spread_char,    // spread char[], etc., to N arguments
+    _adapter_opt_spread_short,   // spread short[], etc., to N arguments
+    _adapter_opt_spread_int,     // spread int[], short[], etc., to N arguments
+    _adapter_opt_spread_long,    // spread long[] to N arguments
+    _adapter_opt_spread_float,   // spread float[] to N arguments
+    _adapter_opt_spread_double,  // spread double[] to N arguments
+    _adapter_opt_spread_FIRST = _adapter_opt_spread_0,
+    _adapter_opt_spread_LAST  = _adapter_opt_spread_double,
+
+    // blocking filter/collect conversions
+    // These collect N arguments and replace them (at slot S) by a return value
+    // which is passed to the final target, along with the unaffected arguments.
+    // collect_{N}_{T} collects N arguments at any position into a T value
+    // collect_{N}_S{S}_{T} collects N arguments at slot S into a T value
+    // collect_{T} collects any number of arguments at any position
+    // filter_S{S}_{T} is the same as collect_1_S{S}_{T} (a unary collection)
+    // (collect_2 is also usable as a filter, with long or double arguments)
+    _adapter_opt_collect_ref,    // combine N arguments, replace with a reference
+    _adapter_opt_collect_int,    // combine N arguments, replace with an int, short, etc.
+    _adapter_opt_collect_long,   // combine N arguments, replace with a long
+    _adapter_opt_collect_float,  // combine N arguments, replace with a float
+    _adapter_opt_collect_double, // combine N arguments, replace with a double
+    _adapter_opt_collect_void,   // combine N arguments, replace with nothing
+    // if there is a small fixed number to push, do so without a loop:
+    _adapter_opt_collect_0_ref,  // collect N=0 arguments, insert a reference
+    _adapter_opt_collect_1_ref,  // collect N=1 argument, replace with a reference
+    _adapter_opt_collect_2_ref,  // combine N=2 arguments, replace with a reference
+    _adapter_opt_collect_3_ref,  // combine N=3 arguments, replace with a reference
+    _adapter_opt_collect_4_ref,  // combine N=4 arguments, replace with a reference
+    _adapter_opt_collect_5_ref,  // combine N=5 arguments, replace with a reference
+    // filters are an important special case because they never move arguments:
+    _adapter_opt_filter_S0_ref,  // filter N=1 argument at S=0, replace with a reference
+    _adapter_opt_filter_S1_ref,  // filter N=1 argument at S=1, replace with a reference
+    _adapter_opt_filter_S2_ref,  // filter N=1 argument at S=2, replace with a reference
+    _adapter_opt_filter_S3_ref,  // filter N=1 argument at S=3, replace with a reference
+    _adapter_opt_filter_S4_ref,  // filter N=1 argument at S=4, replace with a reference
+    _adapter_opt_filter_S5_ref,  // filter N=1 argument at S=5, replace with a reference
+    // these move arguments, but they are important for boxing
+    _adapter_opt_collect_2_S0_ref,  // combine last N=2 arguments, replace with a reference
+    _adapter_opt_collect_2_S1_ref,  // combine N=2 arguments at S=1, replace with a reference
+    _adapter_opt_collect_2_S2_ref,  // combine N=2 arguments at S=2, replace with a reference
+    _adapter_opt_collect_2_S3_ref,  // combine N=2 arguments at S=3, replace with a reference
+    _adapter_opt_collect_2_S4_ref,  // combine N=2 arguments at S=4, replace with a reference
+    _adapter_opt_collect_2_S5_ref,  // combine N=2 arguments at S=5, replace with a reference
+    _adapter_opt_collect_FIRST = _adapter_opt_collect_ref,
+    _adapter_opt_collect_LAST  = _adapter_opt_collect_2_S5_ref,
+
+    // blocking folding conversions
+    // these are like collects, but retain all the N arguments for the final target
+    //_adapter_opt_fold_0_ref,   // same as _adapter_opt_collect_0_ref
+    // fold_{N}_{T} processes N arguments at any position into a T value, which it inserts
+    // fold_{T} processes any number of arguments at any position
+    _adapter_opt_fold_ref,       // process N arguments, prepend a reference
+    _adapter_opt_fold_int,       // process N arguments, prepend an int, short, etc.
+    _adapter_opt_fold_long,      // process N arguments, prepend a long
+    _adapter_opt_fold_float,     // process N arguments, prepend a float
+    _adapter_opt_fold_double,    // process N arguments, prepend a double
+    _adapter_opt_fold_void,      // process N arguments, but leave the list unchanged
+    _adapter_opt_fold_1_ref,     // process N=1 argument, prepend a reference
+    _adapter_opt_fold_2_ref,     // process N=2 arguments, prepend a reference
+    _adapter_opt_fold_3_ref,     // process N=3 arguments, prepend a reference
+    _adapter_opt_fold_4_ref,     // process N=4 arguments, prepend a reference
+    _adapter_opt_fold_5_ref,     // process N=5 arguments, prepend a reference
+    _adapter_opt_fold_FIRST = _adapter_opt_fold_ref,
+    _adapter_opt_fold_LAST  = _adapter_opt_fold_5_ref,
 
     _EK_LIMIT,
     _EK_FIRST = 0
@@ -110,6 +199,7 @@
   enum {  // import java_lang_invoke_AdapterMethodHandle::CONV_OP_*
     CONV_OP_LIMIT         = java_lang_invoke_AdapterMethodHandle::CONV_OP_LIMIT,
     CONV_OP_MASK          = java_lang_invoke_AdapterMethodHandle::CONV_OP_MASK,
+    CONV_TYPE_MASK        = java_lang_invoke_AdapterMethodHandle::CONV_TYPE_MASK,
     CONV_VMINFO_MASK      = java_lang_invoke_AdapterMethodHandle::CONV_VMINFO_MASK,
     CONV_VMINFO_SHIFT     = java_lang_invoke_AdapterMethodHandle::CONV_VMINFO_SHIFT,
     CONV_OP_SHIFT         = java_lang_invoke_AdapterMethodHandle::CONV_OP_SHIFT,
@@ -123,6 +213,7 @@
   static MethodHandleEntry* _entries[_EK_LIMIT];
   static const char*        _entry_names[_EK_LIMIT+1];
   static jobject            _raise_exception_method;
+  static address            _adapter_return_handlers[CONV_TYPE_MASK+1];
 
   // Adapters.
   static MethodHandlesAdapterBlob* _adapter_code;
@@ -147,39 +238,195 @@
   }
 
   // Some adapter helper functions.
-  static void get_ek_bound_mh_info(EntryKind ek, BasicType& arg_type, int& arg_mask, int& arg_slots) {
+  static EntryKind ek_original_kind(EntryKind ek) {
+    if (ek <= _adapter_mh_last)  return ek;
     switch (ek) {
-    case _bound_int_mh        : // fall-thru
-    case _bound_int_direct_mh : arg_type = T_INT;    arg_mask = _INSERT_INT_MASK;  break;
-    case _bound_long_mh       : // fall-thru
-    case _bound_long_direct_mh: arg_type = T_LONG;   arg_mask = _INSERT_LONG_MASK; break;
-    case _bound_ref_mh        : // fall-thru
-    case _bound_ref_direct_mh : arg_type = T_OBJECT; arg_mask = _INSERT_REF_MASK;  break;
-    default: ShouldNotReachHere();
+    case _adapter_opt_swap_1:
+    case _adapter_opt_swap_2:
+      return _adapter_swap_args;
+    case _adapter_opt_rot_1_up:
+    case _adapter_opt_rot_1_down:
+    case _adapter_opt_rot_2_up:
+    case _adapter_opt_rot_2_down:
+      return _adapter_rot_args;
+    case _adapter_opt_i2i:
+    case _adapter_opt_l2i:
+    case _adapter_opt_d2f:
+    case _adapter_opt_i2l:
+    case _adapter_opt_f2d:
+      return _adapter_prim_to_prim;
+    case _adapter_opt_unboxi:
+    case _adapter_opt_unboxl:
+      return _adapter_ref_to_prim;
     }
-    arg_slots = type2size[arg_type];
+    if (ek >= _adapter_opt_spread_FIRST && ek <= _adapter_opt_spread_LAST)
+      return _adapter_spread_args;
+    if (ek >= _adapter_opt_collect_FIRST && ek <= _adapter_opt_collect_LAST)
+      return _adapter_collect_args;
+    if (ek >= _adapter_opt_fold_FIRST && ek <= _adapter_opt_fold_LAST)
+      return _adapter_fold_args;
+    if (ek >= _adapter_opt_return_FIRST && ek <= _adapter_opt_return_LAST)
+      return _adapter_opt_return_any;
+    assert(false, "oob");
+    return _EK_LIMIT;
+  }
+
+  static bool ek_supported(MethodHandles::EntryKind ek);
+
+  static BasicType ek_bound_mh_arg_type(EntryKind ek) {
+    switch (ek) {
+    case _bound_int_mh         : // fall-thru
+    case _bound_int_direct_mh  : return T_INT;
+    case _bound_long_mh        : // fall-thru
+    case _bound_long_direct_mh : return T_LONG;
+    default                    : return T_OBJECT;
+    }
+  }
+
+  static int ek_adapter_opt_swap_slots(EntryKind ek) {
+    switch (ek) {
+    case _adapter_opt_swap_1        : return  1;
+    case _adapter_opt_swap_2        : return  2;
+    case _adapter_opt_rot_1_up      : return  1;
+    case _adapter_opt_rot_1_down    : return  1;
+    case _adapter_opt_rot_2_up      : return  2;
+    case _adapter_opt_rot_2_down    : return  2;
+    default : ShouldNotReachHere();   return -1;
+    }
+  }
+
+  static int ek_adapter_opt_swap_mode(EntryKind ek) {
+    switch (ek) {
+    case _adapter_opt_swap_1       : return  0;
+    case _adapter_opt_swap_2       : return  0;
+    case _adapter_opt_rot_1_up     : return  1;
+    case _adapter_opt_rot_1_down   : return -1;
+    case _adapter_opt_rot_2_up     : return  1;
+    case _adapter_opt_rot_2_down   : return -1;
+    default : ShouldNotReachHere();  return  0;
+    }
   }
 
-  static void get_ek_adapter_opt_swap_rot_info(EntryKind ek, int& swap_bytes, int& rotate) {
-    int swap_slots = 0;
+  static int ek_adapter_opt_collect_count(EntryKind ek) {
+    assert(ek >= _adapter_opt_collect_FIRST && ek <= _adapter_opt_collect_LAST ||
+           ek >= _adapter_opt_fold_FIRST    && ek <= _adapter_opt_fold_LAST, "");
     switch (ek) {
-    case _adapter_opt_swap_1:     swap_slots = 1; rotate =  0; break;
-    case _adapter_opt_swap_2:     swap_slots = 2; rotate =  0; break;
-    case _adapter_opt_rot_1_up:   swap_slots = 1; rotate =  1; break;
-    case _adapter_opt_rot_1_down: swap_slots = 1; rotate = -1; break;
-    case _adapter_opt_rot_2_up:   swap_slots = 2; rotate =  1; break;
-    case _adapter_opt_rot_2_down: swap_slots = 2; rotate = -1; break;
-    default: ShouldNotReachHere();
+    case _adapter_opt_collect_0_ref    : return  0;
+    case _adapter_opt_filter_S0_ref    :
+    case _adapter_opt_filter_S1_ref    :
+    case _adapter_opt_filter_S2_ref    :
+    case _adapter_opt_filter_S3_ref    :
+    case _adapter_opt_filter_S4_ref    :
+    case _adapter_opt_filter_S5_ref    :
+    case _adapter_opt_fold_1_ref       :
+    case _adapter_opt_collect_1_ref    : return  1;
+    case _adapter_opt_collect_2_S0_ref :
+    case _adapter_opt_collect_2_S1_ref :
+    case _adapter_opt_collect_2_S2_ref :
+    case _adapter_opt_collect_2_S3_ref :
+    case _adapter_opt_collect_2_S4_ref :
+    case _adapter_opt_collect_2_S5_ref :
+    case _adapter_opt_fold_2_ref       :
+    case _adapter_opt_collect_2_ref    : return  2;
+    case _adapter_opt_fold_3_ref       :
+    case _adapter_opt_collect_3_ref    : return  3;
+    case _adapter_opt_fold_4_ref       :
+    case _adapter_opt_collect_4_ref    : return  4;
+    case _adapter_opt_fold_5_ref       :
+    case _adapter_opt_collect_5_ref    : return  5;
+    default                            : return -1;  // sentinel value for "variable"
     }
-    // Return the size of the stack slots to move in bytes.
-    swap_bytes = swap_slots * Interpreter::stackElementSize;
+  }
+
+  static int ek_adapter_opt_collect_slot(EntryKind ek) {
+    assert(ek >= _adapter_opt_collect_FIRST && ek <= _adapter_opt_collect_LAST ||
+           ek >= _adapter_opt_fold_FIRST    && ek <= _adapter_opt_fold_LAST, "");
+    switch (ek) {
+    case _adapter_opt_collect_2_S0_ref  :
+    case _adapter_opt_filter_S0_ref     : return 0;
+    case _adapter_opt_collect_2_S1_ref  :
+    case _adapter_opt_filter_S1_ref     : return 1;
+    case _adapter_opt_collect_2_S2_ref  :
+    case _adapter_opt_filter_S2_ref     : return 2;
+    case _adapter_opt_collect_2_S3_ref  :
+    case _adapter_opt_filter_S3_ref     : return 3;
+    case _adapter_opt_collect_2_S4_ref  :
+    case _adapter_opt_filter_S4_ref     : return 4;
+    case _adapter_opt_collect_2_S5_ref  :
+    case _adapter_opt_filter_S5_ref     : return 5;
+    default                             : return -1;  // sentinel value for "variable"
+    }
   }
 
-  static int get_ek_adapter_opt_spread_info(EntryKind ek) {
+  static BasicType ek_adapter_opt_collect_type(EntryKind ek) {
+    assert(ek >= _adapter_opt_collect_FIRST && ek <= _adapter_opt_collect_LAST ||
+           ek >= _adapter_opt_fold_FIRST    && ek <= _adapter_opt_fold_LAST, "");
+    switch (ek) {
+    case _adapter_opt_fold_int          :
+    case _adapter_opt_collect_int       : return T_INT;
+    case _adapter_opt_fold_long         :
+    case _adapter_opt_collect_long      : return T_LONG;
+    case _adapter_opt_fold_float        :
+    case _adapter_opt_collect_float     : return T_FLOAT;
+    case _adapter_opt_fold_double       :
+    case _adapter_opt_collect_double    : return T_DOUBLE;
+    case _adapter_opt_fold_void         :
+    case _adapter_opt_collect_void      : return T_VOID;
+    default                             : return T_OBJECT;
+    }
+  }
+
+  static int ek_adapter_opt_return_slot(EntryKind ek) {
+    assert(ek >= _adapter_opt_return_FIRST && ek <= _adapter_opt_return_LAST, "");
+    switch (ek) {
+    case _adapter_opt_return_S0_ref : return 0;
+    case _adapter_opt_return_S1_ref : return 1;
+    case _adapter_opt_return_S2_ref : return 2;
+    case _adapter_opt_return_S3_ref : return 3;
+    case _adapter_opt_return_S4_ref : return 4;
+    case _adapter_opt_return_S5_ref : return 5;
+    default                         : return -1;  // sentinel value for "variable"
+    }
+  }
+
+  static BasicType ek_adapter_opt_return_type(EntryKind ek) {
+    assert(ek >= _adapter_opt_return_FIRST && ek <= _adapter_opt_return_LAST, "");
     switch (ek) {
-    case _adapter_opt_spread_0: return  0;
-    case _adapter_opt_spread_1: return  1;
-    default                   : return -1;
+    case _adapter_opt_return_int    : return T_INT;
+    case _adapter_opt_return_long   : return T_LONG;
+    case _adapter_opt_return_float  : return T_FLOAT;
+    case _adapter_opt_return_double : return T_DOUBLE;
+    case _adapter_opt_return_void   : return T_VOID;
+    case _adapter_opt_return_any    : return T_CONFLICT;  // sentinel value for "variable"
+    default                         : return T_OBJECT;
+    }
+  }
+
+  static int ek_adapter_opt_spread_count(EntryKind ek) {
+    assert(ek >= _adapter_opt_spread_FIRST && ek <= _adapter_opt_spread_LAST, "");
+    switch (ek) {
+    case _adapter_opt_spread_0     : return  0;
+    case _adapter_opt_spread_1_ref : return  1;
+    case _adapter_opt_spread_2_ref : return  2;
+    case _adapter_opt_spread_3_ref : return  3;
+    case _adapter_opt_spread_4_ref : return  4;
+    case _adapter_opt_spread_5_ref : return  5;
+    default                        : return -1;  // sentinel value for "variable"
+    }
+  }
+
+  static BasicType ek_adapter_opt_spread_type(EntryKind ek) {
+    assert(ek >= _adapter_opt_spread_FIRST && ek <= _adapter_opt_spread_LAST, "");
+    switch (ek) {
+    // (there is no _adapter_opt_spread_boolean; we use byte)
+    case _adapter_opt_spread_byte   : return T_BYTE;
+    case _adapter_opt_spread_char   : return T_CHAR;
+    case _adapter_opt_spread_short  : return T_SHORT;
+    case _adapter_opt_spread_int    : return T_INT;
+    case _adapter_opt_spread_long   : return T_LONG;
+    case _adapter_opt_spread_float  : return T_FLOAT;
+    case _adapter_opt_spread_double : return T_DOUBLE;
+    default                         : return T_OBJECT;
     }
   }
 
@@ -228,12 +475,21 @@
   // Bit mask of conversion_op values.  May vary by platform.
   static int adapter_conversion_ops_supported_mask();
 
+  static bool conv_op_supported(int conv_op) {
+    assert(conv_op_valid(conv_op), "");
+    return ((adapter_conversion_ops_supported_mask() & nth_bit(conv_op)) != 0);
+  }
+
   // Offset in words that the interpreter stack pointer moves when an argument is pushed.
   // The stack_move value must always be a multiple of this.
   static int stack_move_unit() {
     return frame::interpreter_frame_expression_stack_direction() * Interpreter::stackElementWords;
   }
 
+  // Adapter frame traversal.  (Implementation-specific.)
+  static frame ricochet_frame_sender(const frame& fr, RegisterMap* reg_map);
+  static void ricochet_frame_oops_do(const frame& fr, OopClosure* blk, const RegisterMap* reg_map);
+
   enum { CONV_VMINFO_SIGN_FLAG = 0x80 };
   // Shift values for prim-to-prim conversions.
   static int adapter_prim_to_prim_subword_vminfo(BasicType dest) {
@@ -429,6 +685,7 @@
 
   // Fill in the fields of an AdapterMethodHandle mh.  (MH.type must be pre-filled.)
   static void init_AdapterMethodHandle(Handle mh, Handle target, int argnum, TRAPS);
+  static void ensure_vmlayout_field(Handle target, TRAPS);
 
 #ifdef ASSERT
   static bool spot_check_entry_names();
@@ -441,6 +698,8 @@
                                               KlassHandle receiver_klass,
                                               TRAPS);
 
+public:
+  static bool is_float_fixed_reinterpretation_cast(BasicType src, BasicType dst);
   static bool same_basic_type_for_arguments(BasicType src, BasicType dst,
                                             bool raw = false,
                                             bool for_return = false);
@@ -448,12 +707,54 @@
     return same_basic_type_for_arguments(src, dst, raw, true);
   }
 
-  enum {                        // arg_mask values
+  static Symbol* convert_to_signature(oop type_str, bool polymorphic, TRAPS);
+
+#ifdef TARGET_ARCH_x86
+# include "methodHandles_x86.hpp"
+#endif
+#ifdef TARGET_ARCH_sparc
+#define TARGET_ARCH_NYI_6939861 1 //FIXME
+//# include "methodHandles_sparc.hpp"
+#endif
+#ifdef TARGET_ARCH_zero
+#define TARGET_ARCH_NYI_6939861 1 //FIXME
+//# include "methodHandles_zero.hpp"
+#endif
+#ifdef TARGET_ARCH_arm
+#define TARGET_ARCH_NYI_6939861 1 //FIXME
+//# include "methodHandles_arm.hpp"
+#endif
+#ifdef TARGET_ARCH_ppc
+#define TARGET_ARCH_NYI_6939861 1 //FIXME
+//# include "methodHandles_ppc.hpp"
+#endif
+
+#ifdef TARGET_ARCH_NYI_6939861
+  // Here are some backward compatible declarations until the 6939861 ports are updated.
+  #define _adapter_flyby    (_EK_LIMIT + 10)
+  #define _adapter_ricochet (_EK_LIMIT + 11)
+  #define _adapter_opt_spread_1    _adapter_opt_spread_1_ref
+  #define _adapter_opt_spread_more _adapter_opt_spread_ref
+  enum {
     _INSERT_NO_MASK   = -1,
     _INSERT_REF_MASK  = 0,
     _INSERT_INT_MASK  = 1,
     _INSERT_LONG_MASK = 3
   };
+  static void get_ek_bound_mh_info(EntryKind ek, BasicType& arg_type, int& arg_mask, int& arg_slots) {
+    arg_type = ek_bound_mh_arg_type(ek);
+    arg_mask = 0;
+    arg_slots = type2size[arg_type];;
+  }
+  static void get_ek_adapter_opt_swap_rot_info(EntryKind ek, int& swap_bytes, int& rotate) {
+    int swap_slots = ek_adapter_opt_swap_slots(ek);
+    rotate = ek_adapter_opt_swap_mode(ek);
+    swap_bytes = swap_slots * Interpreter::stackElementSize;
+  }
+  static int get_ek_adapter_opt_spread_info(EntryKind ek) {
+    return ek_adapter_opt_spread_count(ek);
+  }
+
   static void insert_arg_slots(MacroAssembler* _masm,
                                RegisterOrConstant arg_slots,
                                int arg_mask,
@@ -466,8 +767,7 @@
                                Register temp_reg, Register temp2_reg, Register temp3_reg = noreg);
 
   static void trace_method_handle(MacroAssembler* _masm, const char* adaptername) PRODUCT_RETURN;
-
-  static Symbol* convert_to_signature(oop type_str, bool polymorphic, TRAPS);
+#endif //TARGET_ARCH_NYI_6939861
 };
 
 
diff -r 03b943e6c025 -r 8bec9b249a6e src/share/vm/runtime/advancedThresholdPolicy.cpp
--- a/src/share/vm/runtime/advancedThresholdPolicy.cpp	Sun May 15 23:57:15 2011 -0400
+++ b/src/share/vm/runtime/advancedThresholdPolicy.cpp	Tue May 17 09:29:56 2011 -0400
@@ -1,7 +1,26 @@
 /*
-* Copyright (c) 2010, 2011 Oracle and/or its affiliates. All rights reserved.
-* ORACLE PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
-*/
+ * Copyright (c) 2010, 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
 
 #include "precompiled.hpp"
 #include "runtime/advancedThresholdPolicy.hpp"
diff -r 03b943e6c025 -r 8bec9b249a6e src/share/vm/runtime/advancedThresholdPolicy.hpp
--- a/src/share/vm/runtime/advancedThresholdPolicy.hpp	Sun May 15 23:57:15 2011 -0400
+++ b/src/share/vm/runtime/advancedThresholdPolicy.hpp	Tue May 17 09:29:56 2011 -0400
@@ -1,7 +1,26 @@
 /*
-* Copyright (c) 2010, 2011 Oracle and/or its affiliates. All rights reserved.
-* ORACLE PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
-*/
+ * Copyright (c) 2010, 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
 
 #ifndef SHARE_VM_RUNTIME_ADVANCEDTHRESHOLDPOLICY_HPP
 #define SHARE_VM_RUNTIME_ADVANCEDTHRESHOLDPOLICY_HPP
diff -r 03b943e6c025 -r 8bec9b249a6e src/share/vm/runtime/deoptimization.cpp
--- a/src/share/vm/runtime/deoptimization.cpp	Sun May 15 23:57:15 2011 -0400
+++ b/src/share/vm/runtime/deoptimization.cpp	Tue May 17 09:29:56 2011 -0400
@@ -90,12 +90,14 @@
 
 Deoptimization::UnrollBlock::UnrollBlock(int  size_of_deoptimized_frame,
                                          int  caller_adjustment,
+                                         int  caller_actual_parameters,
                                          int  number_of_frames,
                                          intptr_t* frame_sizes,
                                          address* frame_pcs,
                                          BasicType return_type) {
   _size_of_deoptimized_frame = size_of_deoptimized_frame;
   _caller_adjustment         = caller_adjustment;
+  _caller_actual_parameters  = caller_actual_parameters;
   _number_of_frames          = number_of_frames;
   _frame_sizes               = frame_sizes;
   _frame_pcs                 = frame_pcs;
@@ -373,6 +375,28 @@
     popframe_extra_args = in_words(thread->popframe_preserved_args_size_in_words());
   }
 
+  // Find the current pc for sender of the deoptee. Since the sender may have been deoptimized
+  // itself since the deoptee vframeArray was created we must get a fresh value of the pc rather
+  // than simply use array->sender.pc(). This requires us to walk the current set of frames
+  //
+  frame deopt_sender = stub_frame.sender(&dummy_map); // First is the deoptee frame
+  deopt_sender = deopt_sender.sender(&dummy_map);     // Now deoptee caller
+
+  // It's possible that the number of paramters at the call site is
+  // different than number of arguments in the callee when method
+  // handles are used.  If the caller is interpreted get the real
+  // value so that the proper amount of space can be added to it's
+  // frame.
+  int caller_actual_parameters = callee_parameters;
+  if (deopt_sender.is_interpreted_frame()) {
+    methodHandle method = deopt_sender.interpreter_frame_method();
+    Bytecode_invoke cur = Bytecode_invoke_check(method,
+                                                deopt_sender.interpreter_frame_bci());
+    Symbol* signature = method->constants()->signature_ref_at(cur.index());
+    ArgumentSizeComputer asc(signature);
+    caller_actual_parameters = asc.size() + (cur.has_receiver() ? 1 : 0);
+  }
+
   //
   // frame_sizes/frame_pcs[0] oldest frame (int or c2i)
   // frame_sizes/frame_pcs[1] next oldest frame (int)
@@ -391,7 +415,13 @@
     // frame[number_of_frames - 1 ] = on_stack_size(youngest)
     // frame[number_of_frames - 2 ] = on_stack_size(sender(youngest))
     // frame[number_of_frames - 3 ] = on_stack_size(sender(sender(youngest)))
-    frame_sizes[number_of_frames - 1 - index] = BytesPerWord * array->element(index)->on_stack_size(callee_parameters,
+    int caller_parms = callee_parameters;
+    if (index == array->frames() - 1) {
+      // Use the value from the interpreted caller
+      caller_parms = caller_actual_parameters;
+    }
+    frame_sizes[number_of_frames - 1 - index] = BytesPerWord * array->element(index)->on_stack_size(caller_parms,
+                                                                                                    callee_parameters,
                                                                                                     callee_locals,
                                                                                                     index == 0,
                                                                                                     popframe_extra_args);
@@ -418,28 +448,6 @@
   // Compute information for handling adapters and adjusting the frame size of the caller.
   int caller_adjustment = 0;
 
-  // Find the current pc for sender of the deoptee. Since the sender may have been deoptimized
-  // itself since the deoptee vframeArray was created we must get a fresh value of the pc rather
-  // than simply use array->sender.pc(). This requires us to walk the current set of frames
-  //
-  frame deopt_sender = stub_frame.sender(&dummy_map); // First is the deoptee frame
-  deopt_sender = deopt_sender.sender(&dummy_map);     // Now deoptee caller
-
-  // It's possible that the number of paramters at the call site is
-  // different than number of arguments in the callee when method
-  // handles are used.  If the caller is interpreted get the real
-  // value so that the proper amount of space can be added to it's
-  // frame.
-  int sender_callee_parameters = callee_parameters;
-  if (deopt_sender.is_interpreted_frame()) {
-    methodHandle method = deopt_sender.interpreter_frame_method();
-    Bytecode_invoke cur = Bytecode_invoke_check(method,
-                                                deopt_sender.interpreter_frame_bci());
-    Symbol* signature = method->constants()->signature_ref_at(cur.index());
-    ArgumentSizeComputer asc(signature);
-    sender_callee_parameters = asc.size() + (cur.has_receiver() ? 1 : 0);
-  }
-
   // Compute the amount the oldest interpreter frame will have to adjust
   // its caller's stack by. If the caller is a compiled frame then
   // we pretend that the callee has no parameters so that the
@@ -454,11 +462,11 @@
 
   if (deopt_sender.is_compiled_frame()) {
     caller_adjustment = last_frame_adjust(0, callee_locals);
-  } else if (callee_locals > sender_callee_parameters) {
+  } else if (callee_locals > caller_actual_parameters) {
     // The caller frame may need extending to accommodate
     // non-parameter locals of the first unpacked interpreted frame.
     // Compute that adjustment.
-    caller_adjustment = last_frame_adjust(sender_callee_parameters, callee_locals);
+    caller_adjustment = last_frame_adjust(caller_actual_parameters, callee_locals);
   }
 
   // If the sender is deoptimized the we must retrieve the address of the handler
@@ -473,6 +481,7 @@
 
   UnrollBlock* info = new UnrollBlock(array->frame_size() * BytesPerWord,
                                       caller_adjustment * BytesPerWord,
+                                      caller_actual_parameters,
                                       number_of_frames,
                                       frame_sizes,
                                       frame_pcs,
@@ -570,7 +579,7 @@
   UnrollBlock* info = array->unroll_block();
 
   // Unpack the interpreter frames and any adapter frame (c2 only) we might create.
-  array->unpack_to_stack(stub_frame, exec_mode);
+  array->unpack_to_stack(stub_frame, exec_mode, info->caller_actual_parameters());
 
   BasicType bt = info->return_type();
 
diff -r 03b943e6c025 -r 8bec9b249a6e src/share/vm/runtime/deoptimization.hpp
--- a/src/share/vm/runtime/deoptimization.hpp	Sun May 15 23:57:15 2011 -0400
+++ b/src/share/vm/runtime/deoptimization.hpp	Tue May 17 09:29:56 2011 -0400
@@ -138,6 +138,9 @@
     intptr_t* _register_block;            // Block for storing callee-saved registers.
     BasicType _return_type;               // Tells if we have to restore double or long return value
     intptr_t  _initial_fp;                // FP of the sender frame
+    int       _caller_actual_parameters;  // The number of actual arguments at the
+                                          // interpreted caller of the deoptimized frame
+
     // The following fields are used as temps during the unpacking phase
     // (which is tight on registers, especially on x86). They really ought
     // to be PD variables but that involves moving this class into its own
@@ -149,6 +152,7 @@
     // Constructor
     UnrollBlock(int  size_of_deoptimized_frame,
                 int  caller_adjustment,
+                int  caller_actual_parameters,
                 int  number_of_frames,
                 intptr_t* frame_sizes,
                 address* frames_pcs,
@@ -168,6 +172,8 @@
 
     void set_initial_fp(intptr_t fp) { _initial_fp = fp; }
 
+    int caller_actual_parameters() const { return _caller_actual_parameters; }
+
     // Accessors used by the code generator for the unpack stub.
     static int size_of_deoptimized_frame_offset_in_bytes() { return offset_of(UnrollBlock, _size_of_deoptimized_frame); }
     static int caller_adjustment_offset_in_bytes()         { return offset_of(UnrollBlock, _caller_adjustment);         }
diff -r 03b943e6c025 -r 8bec9b249a6e src/share/vm/runtime/frame.cpp
--- a/src/share/vm/runtime/frame.cpp	Sun May 15 23:57:15 2011 -0400
+++ b/src/share/vm/runtime/frame.cpp	Tue May 17 09:29:56 2011 -0400
@@ -33,6 +33,7 @@
 #include "oops/methodOop.hpp"
 #include "oops/oop.inline.hpp"
 #include "oops/oop.inline2.hpp"
+#include "prims/methodHandles.hpp"
 #include "runtime/frame.inline.hpp"
 #include "runtime/handles.inline.hpp"
 #include "runtime/javaCalls.hpp"
@@ -169,6 +170,11 @@
 }
 
 // type testers
+bool frame::is_ricochet_frame() const {
+  RicochetBlob* rcb = SharedRuntime::ricochet_blob();
+  return (_cb == rcb && rcb != NULL && rcb->returns_to_bounce_addr(_pc));
+}
+
 bool frame::is_deoptimized_frame() const {
   assert(_deopt_state != unknown, "not answerable");
   return _deopt_state == is_deoptimized;
@@ -341,12 +347,18 @@
 
 frame frame::real_sender(RegisterMap* map) const {
   frame result = sender(map);
-  while (result.is_runtime_frame()) {
+  while (result.is_runtime_frame() ||
+         result.is_ricochet_frame()) {
     result = result.sender(map);
   }
   return result;
 }
 
+frame frame::sender_for_ricochet_frame(RegisterMap* map) const {
+  assert(is_ricochet_frame(), "");
+  return MethodHandles::ricochet_frame_sender(*this, map);
+}
+
 // Note: called by profiler - NOT for current thread
 frame frame::profile_find_Java_sender_frame(JavaThread *thread) {
 // If we don't recognize this frame, walk back up the stack until we do
@@ -529,6 +541,7 @@
 const char* frame::print_name() const {
   if (is_native_frame())      return "Native";
   if (is_interpreted_frame()) return "Interpreted";
+  if (is_ricochet_frame())    return "Ricochet";
   if (is_compiled_frame()) {
     if (is_deoptimized_frame()) return "Deoptimized";
     return "Compiled";
@@ -715,6 +728,8 @@
       st->print("v  ~RuntimeStub::%s", ((RuntimeStub *)_cb)->name());
     } else if (_cb->is_deoptimization_stub()) {
       st->print("v  ~DeoptimizationBlob");
+    } else if (_cb->is_ricochet_stub()) {
+      st->print("v  ~RichochetBlob");
     } else if (_cb->is_exception_stub()) {
       st->print("v  ~ExceptionBlob");
     } else if (_cb->is_safepoint_stub()) {
@@ -978,6 +993,9 @@
 
 void frame::oops_code_blob_do(OopClosure* f, CodeBlobClosure* cf, const RegisterMap* reg_map) {
   assert(_cb != NULL, "sanity check");
+  if (_cb == SharedRuntime::ricochet_blob()) {
+    oops_ricochet_do(f, reg_map);
+  }
   if (_cb->oop_maps() != NULL) {
     OopMapSet::oops_do(this, reg_map, f);
 
@@ -996,6 +1014,11 @@
     cf->do_code_blob(_cb);
 }
 
+void frame::oops_ricochet_do(OopClosure* f, const RegisterMap* map) {
+  assert(is_ricochet_frame(), "");
+  MethodHandles::ricochet_frame_oops_do(*this, f, map);
+}
+
 class CompiledArgumentOopFinder: public SignatureInfo {
  protected:
   OopClosure*     _f;
@@ -1400,7 +1423,7 @@
 }
 
 
-bool FrameValues::validate() {
+void FrameValues::validate() {
   _values.sort(compare);
   bool error = false;
   FrameValue prev;
@@ -1423,19 +1446,32 @@
       prev = fv;
     }
   }
-  return error;
+  assert(!error, "invalid layout");
 }
 
 
 void FrameValues::print() {
   _values.sort(compare);
-  intptr_t* v0 = _values.at(0).location;
-  intptr_t* v1 = _values.at(_values.length() - 1).location;
+  JavaThread* thread = JavaThread::current();
+
+  // Sometimes values like the fp can be invalid values if the
+  // register map wasn't updated during the walk.  Trim out values
+  // that aren't actually in the stack of the thread.
+  int min_index = 0;
+  int max_index = _values.length() - 1;
+  intptr_t* v0 = _values.at(min_index).location;
+  while (!thread->is_in_stack((address)v0)) {
+    v0 = _values.at(++min_index).location;
+  }
+  intptr_t* v1 = _values.at(max_index).location;
+  while (!thread->is_in_stack((address)v1)) {
+    v1 = _values.at(--max_index).location;
+  }
   intptr_t* min = MIN2(v0, v1);
   intptr_t* max = MAX2(v0, v1);
   intptr_t* cur = max;
   intptr_t* last = NULL;
-  for (int i = _values.length() - 1; i >= 0; i--) {
+  for (int i = max_index; i >= min_index; i--) {
     FrameValue fv = _values.at(i);
     while (cur > fv.location) {
       tty->print_cr(" " INTPTR_FORMAT ": " INTPTR_FORMAT, cur, *cur);
diff -r 03b943e6c025 -r 8bec9b249a6e src/share/vm/runtime/frame.hpp
--- a/src/share/vm/runtime/frame.hpp	Sun May 15 23:57:15 2011 -0400
+++ b/src/share/vm/runtime/frame.hpp	Tue May 17 09:29:56 2011 -0400
@@ -135,6 +135,7 @@
   bool is_interpreted_frame()    const;
   bool is_java_frame()           const;
   bool is_entry_frame()          const;             // Java frame called from C?
+  bool is_ricochet_frame()       const;
   bool is_native_frame()         const;
   bool is_runtime_frame()        const;
   bool is_compiled_frame()       const;
@@ -175,6 +176,7 @@
   // Helper methods for better factored code in frame::sender
   frame sender_for_compiled_frame(RegisterMap* map) const;
   frame sender_for_entry_frame(RegisterMap* map) const;
+  frame sender_for_ricochet_frame(RegisterMap* map) const;
   frame sender_for_interpreter_frame(RegisterMap* map) const;
   frame sender_for_native_frame(RegisterMap* map) const;
 
@@ -400,6 +402,7 @@
   // Oops-do's
   void oops_compiled_arguments_do(Symbol* signature, bool has_receiver, const RegisterMap* reg_map, OopClosure* f);
   void oops_interpreted_do(OopClosure* f, const RegisterMap* map, bool query_oop_map_cache = true);
+  void oops_ricochet_do(OopClosure* f, const RegisterMap* map);
 
  private:
   void oops_interpreted_arguments_do(Symbol* signature, bool has_receiver, OopClosure* f);
@@ -508,7 +511,7 @@
   // Used by frame functions to describe locations.
   void describe(int owner, intptr_t* location, const char* description, int priority = 0);
 
-  bool validate();
+  void validate();
   void print();
 };
 
diff -r 03b943e6c025 -r 8bec9b249a6e src/share/vm/runtime/globals.hpp
--- a/src/share/vm/runtime/globals.hpp	Sun May 15 23:57:15 2011 -0400
+++ b/src/share/vm/runtime/globals.hpp	Tue May 17 09:29:56 2011 -0400
@@ -1460,8 +1460,10 @@
   product(intx, ParallelGCBufferWastePct, 10,                               \
           "wasted fraction of parallel allocation buffer.")                 \
                                                                             \
-  product(bool, ParallelGCRetainPLAB, true,                                 \
-          "Retain parallel allocation buffers across scavenges.")           \
+  diagnostic(bool, ParallelGCRetainPLAB, false,                             \
+             "Retain parallel allocation buffers across scavenges; "        \
+             " -- disabled because this currently conflicts with "          \
+             " parallel card scanning under certain conditions ")           \
                                                                             \
   product(intx, TargetPLABWastePct, 10,                                     \
           "target wasted space in last buffer as pct of overall allocation")\
@@ -1495,7 +1497,15 @@
   product(uintx, ParGCDesiredObjsFromOverflowList, 20,                      \
           "The desired number of objects to claim from the overflow list")  \
                                                                             \
-  product(uintx, CMSParPromoteBlocksToClaim, 16,                             \
+  diagnostic(intx, ParGCStridesPerThread, 2,                                \
+          "The number of strides per worker thread that we divide up the "  \
+          "card table scanning work into")                                  \
+                                                                            \
+  diagnostic(intx, ParGCCardsPerStrideChunk, 256,                           \
+          "The number of cards in each chunk of the parallel chunks used "  \
+          "during card table scanning")                                     \
+                                                                            \
+  product(uintx, CMSParPromoteBlocksToClaim, 16,                            \
           "Number of blocks to attempt to claim when refilling CMS LAB for "\
           "parallel GC.")                                                   \
                                                                             \
@@ -3708,6 +3718,10 @@
   diagnostic(bool, OptimizeMethodHandles, true,                             \
           "when constructing method handles, try to improve them")          \
                                                                             \
+  diagnostic(bool, UseRicochetFrames, true,                                 \
+          "use ricochet stack frames for method handle combination, "       \
+          "if the platform supports them")                                  \
+                                                                            \
   experimental(bool, TrustFinalNonStaticFields, false,                      \
           "trust final non-static declarations for constant folding")       \
                                                                             \
diff -r 03b943e6c025 -r 8bec9b249a6e src/share/vm/runtime/sharedRuntime.cpp
--- a/src/share/vm/runtime/sharedRuntime.cpp	Sun May 15 23:57:15 2011 -0400
+++ b/src/share/vm/runtime/sharedRuntime.cpp	Tue May 17 09:29:56 2011 -0400
@@ -88,6 +88,8 @@
 HS_DTRACE_PROBE_DECL7(hotspot, method__return, int,
                       char*, int, char*, int, char*, int);
 
+RicochetBlob*      SharedRuntime::_ricochet_blob = NULL;
+
 // Implementation of SharedRuntime
 
 #ifndef PRODUCT
@@ -460,6 +462,10 @@
   if (Interpreter::contains(return_address)) {
     return Interpreter::rethrow_exception_entry();
   }
+  // Ricochet frame unwind code
+  if (SharedRuntime::ricochet_blob() != NULL && SharedRuntime::ricochet_blob()->returns_to_bounce_addr(return_address)) {
+    return SharedRuntime::ricochet_blob()->exception_addr();
+  }
 
   guarantee(blob == NULL || !blob->is_runtime_stub(), "caller should have skipped stub");
   guarantee(!VtableStubs::contains(return_address), "NULL exceptions in vtables should have been handled already!");
@@ -1174,6 +1180,7 @@
   assert(stub_frame.is_runtime_frame(), "sanity check");
   frame caller_frame = stub_frame.sender(&reg_map);
   assert(!caller_frame.is_interpreted_frame() && !caller_frame.is_entry_frame(), "unexpected frame");
+  assert(!caller_frame.is_ricochet_frame(), "unexpected frame");
 #endif /* ASSERT */
 
   methodHandle callee_method;
@@ -1222,6 +1229,7 @@
 
   if (caller_frame.is_interpreted_frame() ||
       caller_frame.is_entry_frame()       ||
+      caller_frame.is_ricochet_frame()    ||
       is_mh_invoke_via_adapter) {
     methodOop callee = thread->callee_target();
     guarantee(callee != NULL && callee->is_method(), "bad handshake");
diff -r 03b943e6c025 -r 8bec9b249a6e src/share/vm/runtime/sharedRuntime.hpp
--- a/src/share/vm/runtime/sharedRuntime.hpp	Sun May 15 23:57:15 2011 -0400
+++ b/src/share/vm/runtime/sharedRuntime.hpp	Tue May 17 09:29:56 2011 -0400
@@ -58,6 +58,8 @@
   static RuntimeStub* _resolve_virtual_call_blob;
   static RuntimeStub* _resolve_static_call_blob;
 
+  static RicochetBlob* _ricochet_blob;
+
   static SafepointBlob* _polling_page_safepoint_handler_blob;
   static SafepointBlob* _polling_page_return_handler_blob;
 #ifdef COMPILER2
@@ -213,6 +215,16 @@
     return _resolve_static_call_blob->entry_point();
   }
 
+  static RicochetBlob* ricochet_blob() {
+#ifdef X86
+    // Currently only implemented on x86
+    assert(!EnableInvokeDynamic || _ricochet_blob != NULL, "oops");
+#endif
+    return _ricochet_blob;
+  }
+
+  static void generate_ricochet_blob();
+
   static SafepointBlob* polling_page_return_handler_blob()     { return _polling_page_return_handler_blob; }
   static SafepointBlob* polling_page_safepoint_handler_blob()  { return _polling_page_safepoint_handler_blob; }
 
diff -r 03b943e6c025 -r 8bec9b249a6e src/share/vm/runtime/simpleThresholdPolicy.cpp
--- a/src/share/vm/runtime/simpleThresholdPolicy.cpp	Sun May 15 23:57:15 2011 -0400
+++ b/src/share/vm/runtime/simpleThresholdPolicy.cpp	Tue May 17 09:29:56 2011 -0400
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2010, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
diff -r 03b943e6c025 -r 8bec9b249a6e src/share/vm/runtime/simpleThresholdPolicy.hpp
--- a/src/share/vm/runtime/simpleThresholdPolicy.hpp	Sun May 15 23:57:15 2011 -0400
+++ b/src/share/vm/runtime/simpleThresholdPolicy.hpp	Tue May 17 09:29:56 2011 -0400
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2010, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
diff -r 03b943e6c025 -r 8bec9b249a6e src/share/vm/runtime/vframeArray.cpp
--- a/src/share/vm/runtime/vframeArray.cpp	Sun May 15 23:57:15 2011 -0400
+++ b/src/share/vm/runtime/vframeArray.cpp	Tue May 17 09:29:56 2011 -0400
@@ -154,7 +154,8 @@
 
 int unpack_counter = 0;
 
-void vframeArrayElement::unpack_on_stack(int callee_parameters,
+void vframeArrayElement::unpack_on_stack(int caller_actual_parameters,
+                                         int callee_parameters,
                                          int callee_locals,
                                          frame* caller,
                                          bool is_top_frame,
@@ -270,6 +271,7 @@
                                  temps + callee_parameters,
                                  popframe_preserved_args_size_in_words,
                                  locks,
+                                 caller_actual_parameters,
                                  callee_parameters,
                                  callee_locals,
                                  caller,
@@ -415,7 +417,8 @@
 
 }
 
-int vframeArrayElement::on_stack_size(int callee_parameters,
+int vframeArrayElement::on_stack_size(int caller_actual_parameters,
+                                      int callee_parameters,
                                       int callee_locals,
                                       bool is_top_frame,
                                       int popframe_extra_stack_expression_els) const {
@@ -426,6 +429,7 @@
                                       temps + callee_parameters,
                                       popframe_extra_stack_expression_els,
                                       locks,
+                                      caller_actual_parameters,
                                       callee_parameters,
                                       callee_locals,
                                       is_top_frame);
@@ -496,7 +500,7 @@
   }
 }
 
-void vframeArray::unpack_to_stack(frame &unpack_frame, int exec_mode) {
+void vframeArray::unpack_to_stack(frame &unpack_frame, int exec_mode, int caller_actual_parameters) {
   // stack picture
   //   unpack_frame
   //   [new interpreter frames ] (frames are skeletal but walkable)
@@ -525,7 +529,8 @@
   for (index = frames() - 1; index >= 0 ; index--) {
     int callee_parameters = index == 0 ? 0 : element(index-1)->method()->size_of_parameters();
     int callee_locals     = index == 0 ? 0 : element(index-1)->method()->max_locals();
-    element(index)->unpack_on_stack(callee_parameters,
+    element(index)->unpack_on_stack(caller_actual_parameters,
+                                    callee_parameters,
                                     callee_locals,
                                     &caller_frame,
                                     index == 0,
@@ -534,6 +539,7 @@
       Deoptimization::unwind_callee_save_values(element(index)->iframe(), this);
     }
     caller_frame = *element(index)->iframe();
+    caller_actual_parameters = callee_parameters;
   }
 
 
diff -r 03b943e6c025 -r 8bec9b249a6e src/share/vm/runtime/vframeArray.hpp
--- a/src/share/vm/runtime/vframeArray.hpp	Sun May 15 23:57:15 2011 -0400
+++ b/src/share/vm/runtime/vframeArray.hpp	Tue May 17 09:29:56 2011 -0400
@@ -83,13 +83,15 @@
 
   // Returns the on stack word size for this frame
   // callee_parameters is the number of callee locals residing inside this frame
-  int on_stack_size(int callee_parameters,
+  int on_stack_size(int caller_actual_parameters,
+                    int callee_parameters,
                     int callee_locals,
                     bool is_top_frame,
                     int popframe_extra_stack_expression_els) const;
 
   // Unpacks the element to skeletal interpreter frame
-  void unpack_on_stack(int callee_parameters,
+  void unpack_on_stack(int caller_actual_parameters,
+                       int callee_parameters,
                        int callee_locals,
                        frame* caller,
                        bool is_top_frame,
@@ -190,7 +192,7 @@
   int frame_size() const { return _frame_size; }
 
   // Unpack the array on the stack passed in stack interval
-  void unpack_to_stack(frame &unpack_frame, int exec_mode);
+  void unpack_to_stack(frame &unpack_frame, int exec_mode, int caller_actual_parameters);
 
   // Deallocates monitor chunks allocated during deoptimization.
   // This should be called when the array is not used anymore.
diff -r 03b943e6c025 -r 8bec9b249a6e src/share/vm/services/heapDumper.cpp
--- a/src/share/vm/services/heapDumper.cpp	Sun May 15 23:57:15 2011 -0400
+++ b/src/share/vm/services/heapDumper.cpp	Tue May 17 09:29:56 2011 -0400
@@ -1649,6 +1649,9 @@
         if (fr->is_entry_frame()) {
           last_entry_frame = fr;
         }
+        if (fr->is_ricochet_frame()) {
+          fr->oops_ricochet_do(&blk, vf->register_map());
+        }
       }
       vf = vf->sender();
     }
diff -r 03b943e6c025 -r 8bec9b249a6e test/compiler/7042153/Test7042153.java
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/7042153/Test7042153.java	Tue May 17 09:29:56 2011 -0400
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 7042153
+ * @summary Bad folding of IfOps with unloaded constant arguments in C1
+ *
+ * @run main/othervm -Xcomp Test7042153
+ */
+
+import java.lang.reflect.*;
+
+public class Test7042153 {
+  static public class Bar { }
+  static public class Foo { }
+
+  static volatile boolean z;
+  public static void main(String [] args) {
+    Class cx = Bar.class;
+    Class cy = Foo.class;
+    z = (cx == cy);
+  }
+}