diff src/cpu/x86/vm/sharedRuntime_x86_64.cpp @ 6948:e522a00b91aa

Merge with http://hg.openjdk.java.net/hsx/hsx25/hotspot/ after NPG - C++ build works
author Doug Simon <doug.simon@oracle.com>
date Mon, 12 Nov 2012 23:14:12 +0100
parents ea845fd3c820 18fb7da42534
children 5d0bb7d52783
line wrap: on
line diff
--- a/src/cpu/x86/vm/sharedRuntime_x86_64.cpp	Mon Nov 12 18:11:17 2012 +0100
+++ b/src/cpu/x86/vm/sharedRuntime_x86_64.cpp	Mon Nov 12 23:14:12 2012 +0100
@@ -29,7 +29,7 @@
 #include "code/icBuffer.hpp"
 #include "code/vtableStubs.hpp"
 #include "interpreter/interpreter.hpp"
-#include "oops/compiledICHolderOop.hpp"
+#include "oops/compiledICHolder.hpp"
 #include "prims/jvmtiRedefineClassesTrace.hpp"
 #include "runtime/sharedRuntime.hpp"
 #include "runtime/vframeArray.hpp"
@@ -116,8 +116,8 @@
   };
 
  public:
-  static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words);
-  static void restore_live_registers(MacroAssembler* masm);
+  static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors = false);
+  static void restore_live_registers(MacroAssembler* masm, bool restore_vectors = false);
 
   // Offsets into the register save area
   // Used by deoptimization when it is managing result register
@@ -135,7 +135,19 @@
   static void restore_result_registers(MacroAssembler* masm);
 };
 
-OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) {
+OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) {
+  int vect_words = 0;
+#ifdef COMPILER2
+  if (save_vectors) {
+    assert(UseAVX > 0, "256bit vectors are supported only with AVX");
+    assert(MaxVectorSize == 32, "only 256bit vectors are supported now");
+    // Save upper half of YMM registes
+    vect_words = 16 * 16 / wordSize;
+    additional_frame_words += vect_words;
+  }
+#else
+  assert(!save_vectors, "vectors are generated only by C2");
+#endif
 
   // Always make the frame size 16-byte aligned
   int frame_size_in_bytes = round_to(additional_frame_words*wordSize +
@@ -156,6 +168,27 @@
 
   __ enter();          // rsp becomes 16-byte aligned here
   __ push_CPU_state(); // Push a multiple of 16 bytes
+
+  if (vect_words > 0) {
+    assert(vect_words*wordSize == 256, "");
+    __ subptr(rsp, 256); // Save upper half of YMM registes
+    __ vextractf128h(Address(rsp,  0),xmm0);
+    __ vextractf128h(Address(rsp, 16),xmm1);
+    __ vextractf128h(Address(rsp, 32),xmm2);
+    __ vextractf128h(Address(rsp, 48),xmm3);
+    __ vextractf128h(Address(rsp, 64),xmm4);
+    __ vextractf128h(Address(rsp, 80),xmm5);
+    __ vextractf128h(Address(rsp, 96),xmm6);
+    __ vextractf128h(Address(rsp,112),xmm7);
+    __ vextractf128h(Address(rsp,128),xmm8);
+    __ vextractf128h(Address(rsp,144),xmm9);
+    __ vextractf128h(Address(rsp,160),xmm10);
+    __ vextractf128h(Address(rsp,176),xmm11);
+    __ vextractf128h(Address(rsp,192),xmm12);
+    __ vextractf128h(Address(rsp,208),xmm13);
+    __ vextractf128h(Address(rsp,224),xmm14);
+    __ vextractf128h(Address(rsp,240),xmm15);
+  }
   if (frame::arg_reg_save_area_bytes != 0) {
     // Allocate argument register save area
     __ subptr(rsp, frame::arg_reg_save_area_bytes);
@@ -168,112 +201,111 @@
 
   OopMapSet *oop_maps = new OopMapSet();
   OopMap* map = new OopMap(frame_size_in_slots, 0);
-  map->set_callee_saved(VMRegImpl::stack2reg( rax_off  + additional_frame_slots), rax->as_VMReg());
-  map->set_callee_saved(VMRegImpl::stack2reg( rcx_off  + additional_frame_slots), rcx->as_VMReg());
-  map->set_callee_saved(VMRegImpl::stack2reg( rdx_off  + additional_frame_slots), rdx->as_VMReg());
-  map->set_callee_saved(VMRegImpl::stack2reg( rbx_off  + additional_frame_slots), rbx->as_VMReg());
+
+#define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_slots)
+
+  map->set_callee_saved(STACK_OFFSET( rax_off ), rax->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET( rcx_off ), rcx->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET( rdx_off ), rdx->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET( rbx_off ), rbx->as_VMReg());
   // rbp location is known implicitly by the frame sender code, needs no oopmap
   // and the location where rbp was saved by is ignored
-  map->set_callee_saved(VMRegImpl::stack2reg( rsi_off  + additional_frame_slots), rsi->as_VMReg());
-  map->set_callee_saved(VMRegImpl::stack2reg( rdi_off  + additional_frame_slots), rdi->as_VMReg());
-  map->set_callee_saved(VMRegImpl::stack2reg( r8_off   + additional_frame_slots), r8->as_VMReg());
-  map->set_callee_saved(VMRegImpl::stack2reg( r9_off   + additional_frame_slots), r9->as_VMReg());
-  map->set_callee_saved(VMRegImpl::stack2reg( r10_off  + additional_frame_slots), r10->as_VMReg());
-  map->set_callee_saved(VMRegImpl::stack2reg( r11_off  + additional_frame_slots), r11->as_VMReg());
-  map->set_callee_saved(VMRegImpl::stack2reg( r12_off  + additional_frame_slots), r12->as_VMReg());
-  map->set_callee_saved(VMRegImpl::stack2reg( r13_off  + additional_frame_slots), r13->as_VMReg());
-  map->set_callee_saved(VMRegImpl::stack2reg( r14_off  + additional_frame_slots), r14->as_VMReg());
-  map->set_callee_saved(VMRegImpl::stack2reg( r15_off  + additional_frame_slots), r15->as_VMReg());
-  map->set_callee_saved(VMRegImpl::stack2reg(xmm0_off  + additional_frame_slots), xmm0->as_VMReg());
-  map->set_callee_saved(VMRegImpl::stack2reg(xmm1_off  + additional_frame_slots), xmm1->as_VMReg());
-  map->set_callee_saved(VMRegImpl::stack2reg(xmm2_off  + additional_frame_slots), xmm2->as_VMReg());
-  map->set_callee_saved(VMRegImpl::stack2reg(xmm3_off  + additional_frame_slots), xmm3->as_VMReg());
-  map->set_callee_saved(VMRegImpl::stack2reg(xmm4_off  + additional_frame_slots), xmm4->as_VMReg());
-  map->set_callee_saved(VMRegImpl::stack2reg(xmm5_off  + additional_frame_slots), xmm5->as_VMReg());
-  map->set_callee_saved(VMRegImpl::stack2reg(xmm6_off  + additional_frame_slots), xmm6->as_VMReg());
-  map->set_callee_saved(VMRegImpl::stack2reg(xmm7_off  + additional_frame_slots), xmm7->as_VMReg());
-  map->set_callee_saved(VMRegImpl::stack2reg(xmm8_off  + additional_frame_slots), xmm8->as_VMReg());
-  map->set_callee_saved(VMRegImpl::stack2reg(xmm9_off  + additional_frame_slots), xmm9->as_VMReg());
-  map->set_callee_saved(VMRegImpl::stack2reg(xmm10_off + additional_frame_slots), xmm10->as_VMReg());
-  map->set_callee_saved(VMRegImpl::stack2reg(xmm11_off + additional_frame_slots), xmm11->as_VMReg());
-  map->set_callee_saved(VMRegImpl::stack2reg(xmm12_off + additional_frame_slots), xmm12->as_VMReg());
-  map->set_callee_saved(VMRegImpl::stack2reg(xmm13_off + additional_frame_slots), xmm13->as_VMReg());
-  map->set_callee_saved(VMRegImpl::stack2reg(xmm14_off + additional_frame_slots), xmm14->as_VMReg());
-  map->set_callee_saved(VMRegImpl::stack2reg(xmm15_off + additional_frame_slots), xmm15->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET( rsi_off ), rsi->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET( rdi_off ), rdi->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET( r8_off  ), r8->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET( r9_off  ), r9->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET( r10_off ), r10->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET( r11_off ), r11->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET( r12_off ), r12->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET( r13_off ), r13->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET( r14_off ), r14->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET( r15_off ), r15->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET(xmm0_off ), xmm0->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET(xmm1_off ), xmm1->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET(xmm2_off ), xmm2->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET(xmm3_off ), xmm3->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET(xmm4_off ), xmm4->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET(xmm5_off ), xmm5->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET(xmm6_off ), xmm6->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET(xmm7_off ), xmm7->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET(xmm8_off ), xmm8->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET(xmm9_off ), xmm9->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET(xmm10_off), xmm10->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET(xmm11_off), xmm11->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET(xmm12_off), xmm12->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET(xmm13_off), xmm13->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET(xmm14_off), xmm14->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET(xmm15_off), xmm15->as_VMReg());
 
   // %%% These should all be a waste but we'll keep things as they were for now
   if (true) {
-    map->set_callee_saved(VMRegImpl::stack2reg( raxH_off  + additional_frame_slots),
-                          rax->as_VMReg()->next());
-    map->set_callee_saved(VMRegImpl::stack2reg( rcxH_off  + additional_frame_slots),
-                          rcx->as_VMReg()->next());
-    map->set_callee_saved(VMRegImpl::stack2reg( rdxH_off  + additional_frame_slots),
-                          rdx->as_VMReg()->next());
-    map->set_callee_saved(VMRegImpl::stack2reg( rbxH_off  + additional_frame_slots),
-                          rbx->as_VMReg()->next());
+    map->set_callee_saved(STACK_OFFSET( raxH_off ), rax->as_VMReg()->next());
+    map->set_callee_saved(STACK_OFFSET( rcxH_off ), rcx->as_VMReg()->next());
+    map->set_callee_saved(STACK_OFFSET( rdxH_off ), rdx->as_VMReg()->next());
+    map->set_callee_saved(STACK_OFFSET( rbxH_off ), rbx->as_VMReg()->next());
     // rbp location is known implicitly by the frame sender code, needs no oopmap
-    map->set_callee_saved(VMRegImpl::stack2reg( rsiH_off  + additional_frame_slots),
-                          rsi->as_VMReg()->next());
-    map->set_callee_saved(VMRegImpl::stack2reg( rdiH_off  + additional_frame_slots),
-                          rdi->as_VMReg()->next());
-    map->set_callee_saved(VMRegImpl::stack2reg( r8H_off   + additional_frame_slots),
-                          r8->as_VMReg()->next());
-    map->set_callee_saved(VMRegImpl::stack2reg( r9H_off   + additional_frame_slots),
-                          r9->as_VMReg()->next());
-    map->set_callee_saved(VMRegImpl::stack2reg( r10H_off  + additional_frame_slots),
-                          r10->as_VMReg()->next());
-    map->set_callee_saved(VMRegImpl::stack2reg( r11H_off  + additional_frame_slots),
-                          r11->as_VMReg()->next());
-    map->set_callee_saved(VMRegImpl::stack2reg( r12H_off  + additional_frame_slots),
-                          r12->as_VMReg()->next());
-    map->set_callee_saved(VMRegImpl::stack2reg( r13H_off  + additional_frame_slots),
-                          r13->as_VMReg()->next());
-    map->set_callee_saved(VMRegImpl::stack2reg( r14H_off  + additional_frame_slots),
-                          r14->as_VMReg()->next());
-    map->set_callee_saved(VMRegImpl::stack2reg( r15H_off  + additional_frame_slots),
-                          r15->as_VMReg()->next());
-    map->set_callee_saved(VMRegImpl::stack2reg(xmm0H_off  + additional_frame_slots),
-                          xmm0->as_VMReg()->next());
-    map->set_callee_saved(VMRegImpl::stack2reg(xmm1H_off  + additional_frame_slots),
-                          xmm1->as_VMReg()->next());
-    map->set_callee_saved(VMRegImpl::stack2reg(xmm2H_off  + additional_frame_slots),
-                          xmm2->as_VMReg()->next());
-    map->set_callee_saved(VMRegImpl::stack2reg(xmm3H_off  + additional_frame_slots),
-                          xmm3->as_VMReg()->next());
-    map->set_callee_saved(VMRegImpl::stack2reg(xmm4H_off  + additional_frame_slots),
-                          xmm4->as_VMReg()->next());
-    map->set_callee_saved(VMRegImpl::stack2reg(xmm5H_off  + additional_frame_slots),
-                          xmm5->as_VMReg()->next());
-    map->set_callee_saved(VMRegImpl::stack2reg(xmm6H_off  + additional_frame_slots),
-                          xmm6->as_VMReg()->next());
-    map->set_callee_saved(VMRegImpl::stack2reg(xmm7H_off  + additional_frame_slots),
-                          xmm7->as_VMReg()->next());
-    map->set_callee_saved(VMRegImpl::stack2reg(xmm8H_off  + additional_frame_slots),
-                          xmm8->as_VMReg()->next());
-    map->set_callee_saved(VMRegImpl::stack2reg(xmm9H_off  + additional_frame_slots),
-                          xmm9->as_VMReg()->next());
-    map->set_callee_saved(VMRegImpl::stack2reg(xmm10H_off + additional_frame_slots),
-                          xmm10->as_VMReg()->next());
-    map->set_callee_saved(VMRegImpl::stack2reg(xmm11H_off + additional_frame_slots),
-                          xmm11->as_VMReg()->next());
-    map->set_callee_saved(VMRegImpl::stack2reg(xmm12H_off + additional_frame_slots),
-                          xmm12->as_VMReg()->next());
-    map->set_callee_saved(VMRegImpl::stack2reg(xmm13H_off + additional_frame_slots),
-                          xmm13->as_VMReg()->next());
-    map->set_callee_saved(VMRegImpl::stack2reg(xmm14H_off + additional_frame_slots),
-                          xmm14->as_VMReg()->next());
-    map->set_callee_saved(VMRegImpl::stack2reg(xmm15H_off + additional_frame_slots),
-                          xmm15->as_VMReg()->next());
+    map->set_callee_saved(STACK_OFFSET( rsiH_off ), rsi->as_VMReg()->next());
+    map->set_callee_saved(STACK_OFFSET( rdiH_off ), rdi->as_VMReg()->next());
+    map->set_callee_saved(STACK_OFFSET( r8H_off  ), r8->as_VMReg()->next());
+    map->set_callee_saved(STACK_OFFSET( r9H_off  ), r9->as_VMReg()->next());
+    map->set_callee_saved(STACK_OFFSET( r10H_off ), r10->as_VMReg()->next());
+    map->set_callee_saved(STACK_OFFSET( r11H_off ), r11->as_VMReg()->next());
+    map->set_callee_saved(STACK_OFFSET( r12H_off ), r12->as_VMReg()->next());
+    map->set_callee_saved(STACK_OFFSET( r13H_off ), r13->as_VMReg()->next());
+    map->set_callee_saved(STACK_OFFSET( r14H_off ), r14->as_VMReg()->next());
+    map->set_callee_saved(STACK_OFFSET( r15H_off ), r15->as_VMReg()->next());
+    map->set_callee_saved(STACK_OFFSET(xmm0H_off ), xmm0->as_VMReg()->next());
+    map->set_callee_saved(STACK_OFFSET(xmm1H_off ), xmm1->as_VMReg()->next());
+    map->set_callee_saved(STACK_OFFSET(xmm2H_off ), xmm2->as_VMReg()->next());
+    map->set_callee_saved(STACK_OFFSET(xmm3H_off ), xmm3->as_VMReg()->next());
+    map->set_callee_saved(STACK_OFFSET(xmm4H_off ), xmm4->as_VMReg()->next());
+    map->set_callee_saved(STACK_OFFSET(xmm5H_off ), xmm5->as_VMReg()->next());
+    map->set_callee_saved(STACK_OFFSET(xmm6H_off ), xmm6->as_VMReg()->next());
+    map->set_callee_saved(STACK_OFFSET(xmm7H_off ), xmm7->as_VMReg()->next());
+    map->set_callee_saved(STACK_OFFSET(xmm8H_off ), xmm8->as_VMReg()->next());
+    map->set_callee_saved(STACK_OFFSET(xmm9H_off ), xmm9->as_VMReg()->next());
+    map->set_callee_saved(STACK_OFFSET(xmm10H_off), xmm10->as_VMReg()->next());
+    map->set_callee_saved(STACK_OFFSET(xmm11H_off), xmm11->as_VMReg()->next());
+    map->set_callee_saved(STACK_OFFSET(xmm12H_off), xmm12->as_VMReg()->next());
+    map->set_callee_saved(STACK_OFFSET(xmm13H_off), xmm13->as_VMReg()->next());
+    map->set_callee_saved(STACK_OFFSET(xmm14H_off), xmm14->as_VMReg()->next());
+    map->set_callee_saved(STACK_OFFSET(xmm15H_off), xmm15->as_VMReg()->next());
   }
 
   return map;
 }
 
-void RegisterSaver::restore_live_registers(MacroAssembler* masm) {
+void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) {
   if (frame::arg_reg_save_area_bytes != 0) {
     // Pop arg register save area
     __ addptr(rsp, frame::arg_reg_save_area_bytes);
   }
+#ifdef COMPILER2
+  if (restore_vectors) {
+    // Restore upper half of YMM registes.
+    assert(UseAVX > 0, "256bit vectors are supported only with AVX");
+    assert(MaxVectorSize == 32, "only 256bit vectors are supported now");
+    __ vinsertf128h(xmm0, Address(rsp,  0));
+    __ vinsertf128h(xmm1, Address(rsp, 16));
+    __ vinsertf128h(xmm2, Address(rsp, 32));
+    __ vinsertf128h(xmm3, Address(rsp, 48));
+    __ vinsertf128h(xmm4, Address(rsp, 64));
+    __ vinsertf128h(xmm5, Address(rsp, 80));
+    __ vinsertf128h(xmm6, Address(rsp, 96));
+    __ vinsertf128h(xmm7, Address(rsp,112));
+    __ vinsertf128h(xmm8, Address(rsp,128));
+    __ vinsertf128h(xmm9, Address(rsp,144));
+    __ vinsertf128h(xmm10, Address(rsp,160));
+    __ vinsertf128h(xmm11, Address(rsp,176));
+    __ vinsertf128h(xmm12, Address(rsp,192));
+    __ vinsertf128h(xmm13, Address(rsp,208));
+    __ vinsertf128h(xmm14, Address(rsp,224));
+    __ vinsertf128h(xmm15, Address(rsp,240));
+    __ addptr(rsp, 256);
+  }
+#else
+  assert(!restore_vectors, "vectors are generated only by C2");
+#endif
   // Recover CPU state
   __ pop_CPU_state();
   // Get the rbp described implicitly by the calling convention (no oopMap)
@@ -298,6 +330,12 @@
   __ addptr(rsp, return_offset_in_bytes());
 }
 
+// Is vector's size (in bytes) bigger than a size saved by default?
+// 16 bytes XMM registers are saved by default using fxsave/fxrstor instructions.
+bool SharedRuntime::is_wide_vector(int size) {
+  return size > 16;
+}
+
 // The java_calling_convention describes stack locations as ideal slots on
 // a frame with no abi restrictions. Since we must observe abi restrictions
 // (like the placement of the register window) the slots must be biased by
@@ -414,8 +452,7 @@
 // Patch the callers callsite with entry to compiled code if it exists.
 static void patch_callers_callsite(MacroAssembler *masm) {
   Label L;
-  __ verify_oop(rbx);
-  __ cmpptr(Address(rbx, in_bytes(methodOopDesc::code_offset())), (int32_t)NULL_WORD);
+  __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), (int32_t)NULL_WORD);
   __ jcc(Assembler::equal, L);
 
   // Save the current stack pointer
@@ -429,8 +466,6 @@
   __ andptr(rsp, -(StackAlignmentInBytes));
   __ push_CPU_state();
 
-
-  __ verify_oop(rbx);
   // VM needs caller's callsite
   // VM needs target method
   // This needs to be a long call since we will relocate this adapter to
@@ -587,7 +622,7 @@
   }
 
   // Schedule the branch target address early.
-  __ movptr(rcx, Address(rbx, in_bytes(methodOopDesc::interpreter_entry_offset())));
+  __ movptr(rcx, Address(rbx, in_bytes(Method::interpreter_entry_offset())));
   __ jmp(rcx);
 }
 
@@ -699,7 +734,7 @@
 
   // Will jump to the compiled code just as if compiled code was doing it.
   // Pre-load the register-jump target early, to schedule it better.
-  __ movptr(r11, Address(rbx, in_bytes(methodOopDesc::from_compiled_offset())));
+  __ movptr(r11, Address(rbx, in_bytes(Method::from_compiled_offset())));
 
 #ifdef GRAAL
   // check if this call should be routed towards a specific entry point
@@ -804,8 +839,8 @@
 
   __ movptr(Address(r15_thread, JavaThread::callee_target_offset()), rbx);
 
-  // put methodOop where a c2i would expect should we end up there
-  // only needed becaus eof c2 resolve stubs return methodOop as a result in
+  // put Method* where a c2i would expect should we end up there
+  // only needed becaus eof c2 resolve stubs return Method* as a result in
   // rax
   __ mov(rax, rbx);
   __ jmp(r11);
@@ -823,7 +858,7 @@
   gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
 
   // -------------------------------------------------------------------------
-  // Generate a C2I adapter.  On entry we know rbx holds the methodOop during calls
+  // Generate a C2I adapter.  On entry we know rbx holds the Method* during calls
   // to the interpreter.  The args start out packed in the compiled layout.  They
   // need to be unpacked into the interpreter layout.  This will almost always
   // require some stack space.  We grow the current (compiled) stack, then repack
@@ -840,12 +875,9 @@
   Register temp = rbx;
 
   {
-    __ verify_oop(holder);
     __ load_klass(temp, receiver);
-    __ verify_oop(temp);
-
-    __ cmpptr(temp, Address(holder, compiledICHolderOopDesc::holder_klass_offset()));
-    __ movptr(rbx, Address(holder, compiledICHolderOopDesc::holder_method_offset()));
+    __ cmpptr(temp, Address(holder, CompiledICHolder::holder_klass_offset()));
+    __ movptr(rbx, Address(holder, CompiledICHolder::holder_method_offset()));
     __ jcc(Assembler::equal, ok);
     __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 
@@ -853,7 +885,7 @@
     // Method might have been compiled since the call site was patched to
     // interpreted if that is the case treat it as a miss so we can get
     // the call site corrected.
-    __ cmpptr(Address(rbx, in_bytes(methodOopDesc::code_offset())), (int32_t)NULL_WORD);
+    __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), (int32_t)NULL_WORD);
     __ jcc(Assembler::equal, skip_fixup);
     __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
   }
@@ -920,6 +952,7 @@
       case T_OBJECT:
       case T_ARRAY:
       case T_ADDRESS:
+      case T_METADATA:
         if (int_args < Argument::n_int_register_parameters_c) {
           regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
 #ifdef _WIN64
@@ -1609,12 +1642,12 @@
 };
 
 static void verify_oop_args(MacroAssembler* masm,
-                            int total_args_passed,
+                            methodHandle method,
                             const BasicType* sig_bt,
                             const VMRegPair* regs) {
   Register temp_reg = rbx;  // not part of any compiled calling seq
   if (VerifyOops) {
-    for (int i = 0; i < total_args_passed; i++) {
+    for (int i = 0; i < method->size_of_parameters(); i++) {
       if (sig_bt[i] == T_OBJECT ||
           sig_bt[i] == T_ARRAY) {
         VMReg r = regs[i].first();
@@ -1631,35 +1664,32 @@
 }
 
 static void gen_special_dispatch(MacroAssembler* masm,
-                                 int total_args_passed,
-                                 int comp_args_on_stack,
-                                 vmIntrinsics::ID special_dispatch,
+                                 methodHandle method,
                                  const BasicType* sig_bt,
                                  const VMRegPair* regs) {
-  verify_oop_args(masm, total_args_passed, sig_bt, regs);
+  verify_oop_args(masm, method, sig_bt, regs);
+  vmIntrinsics::ID iid = method->intrinsic_id();
 
   // Now write the args into the outgoing interpreter space
   bool     has_receiver   = false;
   Register receiver_reg   = noreg;
   int      member_arg_pos = -1;
   Register member_reg     = noreg;
-  int      ref_kind       = MethodHandles::signature_polymorphic_intrinsic_ref_kind(special_dispatch);
+  int      ref_kind       = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid);
   if (ref_kind != 0) {
-    member_arg_pos = total_args_passed - 1;  // trailing MemberName argument
+    member_arg_pos = method->size_of_parameters() - 1;  // trailing MemberName argument
     member_reg = rbx;  // known to be free at this point
     has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
-  } else if (special_dispatch == vmIntrinsics::_invokeBasic) {
+  } else if (iid == vmIntrinsics::_invokeBasic) {
     has_receiver = true;
   } else {
-    guarantee(false, err_msg("special_dispatch=%d", special_dispatch));
+    fatal(err_msg_res("unexpected intrinsic id %d", iid));
   }
 
   if (member_reg != noreg) {
     // Load the member_arg into register, if necessary.
-    assert(member_arg_pos >= 0 && member_arg_pos < total_args_passed, "oob");
-    assert(sig_bt[member_arg_pos] == T_OBJECT, "dispatch argument must be an object");
+    SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs);
     VMReg r = regs[member_arg_pos].first();
-    assert(r->is_valid(), "bad member arg");
     if (r->is_stack()) {
       __ movptr(member_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
     } else {
@@ -1670,7 +1700,7 @@
 
   if (has_receiver) {
     // Make sure the receiver is loaded into a register.
-    assert(total_args_passed > 0, "oob");
+    assert(method->size_of_parameters() > 0, "oob");
     assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
     VMReg r = regs[0].first();
     assert(r->is_valid(), "bad receiver arg");
@@ -1678,7 +1708,7 @@
       // Porting note:  This assumes that compiled calling conventions always
       // pass the receiver oop in a register.  If this is not true on some
       // platform, pick a temp and load the receiver from stack.
-      assert(false, "receiver always in a register");
+      fatal("receiver always in a register");
       receiver_reg = j_rarg0;  // known to be free at this point
       __ movptr(receiver_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
     } else {
@@ -1688,7 +1718,7 @@
   }
 
   // Figure out which address we are really jumping to:
-  MethodHandles::generate_method_handle_dispatch(masm, special_dispatch,
+  MethodHandles::generate_method_handle_dispatch(masm, iid,
                                                  receiver_reg, member_reg, /*for_compiler_entry:*/ true);
 }
 
@@ -1724,8 +1754,6 @@
 nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
                                                 methodHandle method,
                                                 int compile_id,
-                                                int total_in_args,
-                                                int comp_args_on_stack,
                                                 BasicType* in_sig_bt,
                                                 VMRegPair* in_regs,
                                                 BasicType ret_type) {
@@ -1734,9 +1762,7 @@
     intptr_t start = (intptr_t)__ pc();
     int vep_offset = ((intptr_t)__ pc()) - start;
     gen_special_dispatch(masm,
-                         total_in_args,
-                         comp_args_on_stack,
-                         method->intrinsic_id(),
+                         method,
                          in_sig_bt,
                          in_regs);
     int frame_complete = ((intptr_t)__ pc()) - start;  // not complete, period
@@ -1770,6 +1796,7 @@
   // we convert the java signature to a C signature by inserting
   // the hidden arguments as arg[0] and possibly arg[1] (static method)
 
+  const int total_in_args = method->size_of_parameters();
   int total_c_args = total_in_args;
   if (!is_critical_native) {
     total_c_args += 1;
@@ -2207,7 +2234,7 @@
   if (method->is_static() && !is_critical_native) {
 
     //  load oop into a register
-    __ movoop(oop_handle_reg, JNIHandles::make_local(Klass::cast(method->method_holder())->java_mirror()));
+    __ movoop(oop_handle_reg, JNIHandles::make_local(method->method_holder()->java_mirror()));
 
     // Now handlize the static class mirror it's known not-null.
     __ movptr(Address(rsp, klass_offset), oop_handle_reg);
@@ -2239,7 +2266,7 @@
     SkipIfEqual skip(masm, &DTraceMethodProbes, false);
     // protect the args we've loaded
     save_args(masm, total_c_args, c_arg, out_regs);
-    __ movoop(c_rarg1, JNIHandles::make_local(method()));
+    __ mov_metadata(c_rarg1, method());
     __ call_VM_leaf(
       CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
       r15_thread, c_rarg1);
@@ -2250,7 +2277,7 @@
   if (RC_TRACE_IN_RANGE(0x00001000, 0x00002000)) {
     // protect the args we've loaded
     save_args(masm, total_c_args, c_arg, out_regs);
-    __ movoop(c_rarg1, JNIHandles::make_local(method()));
+    __ mov_metadata(c_rarg1, method());
     __ call_VM_leaf(
       CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry),
       r15_thread, c_rarg1);
@@ -2503,7 +2530,7 @@
   {
     SkipIfEqual skip(masm, &DTraceMethodProbes, false);
     save_native_result(masm, ret_type, stack_slots);
-    __ movoop(c_rarg1, JNIHandles::make_local(method()));
+    __ mov_metadata(c_rarg1, method());
     __ call_VM_leaf(
          CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
          r15_thread, c_rarg1);
@@ -3301,7 +3328,6 @@
   return 0;
 }
 
-
 //------------------------------generate_deopt_blob----------------------------
 void SharedRuntime::generate_deopt_blob() {
   // Allocate space for the code
@@ -3842,7 +3868,7 @@
 // Generate a special Compile2Runtime blob that saves all registers,
 // and setup oopmap.
 //
-SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, bool cause_return) {
+SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) {
   assert(StubRoutines::forward_exception_entry() != NULL,
          "must be generated before");
 
@@ -3857,6 +3883,8 @@
   address start   = __ pc();
   address call_pc = NULL;
   int frame_size_in_words;
+  bool cause_return = (poll_type == POLL_AT_RETURN);
+  bool save_vectors = (poll_type == POLL_AT_VECTOR_LOOP);
 
   // Make room for return address (or push it again)
   if (!cause_return) {
@@ -3864,7 +3892,7 @@
   }
 
   // Save registers, fpu state, and flags
-  map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
+  map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, save_vectors);
 
   // The following is basically a call_VM.  However, we need the precise
   // address of the call in order to generate an oopmap. Hence, we do all the
@@ -3901,7 +3929,7 @@
 
   // Exception pending
 
-  RegisterSaver::restore_live_registers(masm);
+  RegisterSaver::restore_live_registers(masm, save_vectors);
 
   __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
 
@@ -3909,7 +3937,7 @@
   __ bind(noException);
 
   // Normal exit, restore registers and exit.
-  RegisterSaver::restore_live_registers(masm);
+  RegisterSaver::restore_live_registers(masm, save_vectors);
 
   __ ret(0);
 
@@ -3970,8 +3998,8 @@
   __ cmpptr(Address(r15_thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD);
   __ jcc(Assembler::notEqual, pending);
 
-  // get the returned methodOop
-  __ movptr(rbx, Address(r15_thread, JavaThread::vm_result_offset()));
+  // get the returned Method*
+  __ get_vm_result_2(rbx, r15_thread);
   __ movptr(Address(rsp, RegisterSaver::rbx_offset_in_bytes()), rbx);
 
   __ movptr(Address(rsp, RegisterSaver::rax_offset_in_bytes()), rax);