# HG changeset patch
# User acorn
# Date 1300128236 25200
# Node ID 4775a1e3e923d5d183fddf6b09460400163d291d
# Parent  02e6fc2effd815daccc8a070308ff244bbdf7eb4# Parent  3d5a546351ef2b2a7876af8a9f5942563f40a6a1
Merge

diff -r 02e6fc2effd8 -r 4775a1e3e923 src/share/vm/code/nmethod.cpp
--- a/src/share/vm/code/nmethod.cpp	Fri Mar 11 22:41:33 2011 -0800
+++ b/src/share/vm/code/nmethod.cpp	Mon Mar 14 11:43:56 2011 -0700
@@ -170,7 +170,7 @@
   int pc_desc_resets;   // number of resets (= number of caches)
   int pc_desc_queries;  // queries to nmethod::find_pc_desc
   int pc_desc_approx;   // number of those which have approximate true
-  int pc_desc_repeats;  // number of _last_pc_desc hits
+  int pc_desc_repeats;  // number of _pc_descs[0] hits
   int pc_desc_hits;     // number of LRU cache hits
   int pc_desc_tests;    // total number of PcDesc examinations
   int pc_desc_searches; // total number of quasi-binary search steps
@@ -278,40 +278,44 @@
 
 void PcDescCache::reset_to(PcDesc* initial_pc_desc) {
   if (initial_pc_desc == NULL) {
-    _last_pc_desc = NULL;  // native method
+    _pc_descs[0] = NULL; // native method; no PcDescs at all
     return;
   }
   NOT_PRODUCT(++nmethod_stats.pc_desc_resets);
   // reset the cache by filling it with benign (non-null) values
   assert(initial_pc_desc->pc_offset() < 0, "must be sentinel");
-  _last_pc_desc = initial_pc_desc + 1;  // first valid one is after sentinel
   for (int i = 0; i < cache_size; i++)
     _pc_descs[i] = initial_pc_desc;
 }
 
 PcDesc* PcDescCache::find_pc_desc(int pc_offset, bool approximate) {
   NOT_PRODUCT(++nmethod_stats.pc_desc_queries);
-  NOT_PRODUCT(if (approximate)  ++nmethod_stats.pc_desc_approx);
+  NOT_PRODUCT(if (approximate) ++nmethod_stats.pc_desc_approx);
+
+  // Note: one might think that caching the most recently
+  // read value separately would be a win, but one would be
+  // wrong.  When many threads are updating it, the cache
+  // line it's in would bounce between caches, negating
+  // any benefit.
 
   // In order to prevent race conditions do not load cache elements
   // repeatedly, but use a local copy:
   PcDesc* res;
 
-  // Step one:  Check the most recently returned value.
-  res = _last_pc_desc;
-  if (res == NULL)  return NULL;  // native method; no PcDescs at all
+  // Step one:  Check the most recently added value.
+  res = _pc_descs[0];
+  if (res == NULL) return NULL;  // native method; no PcDescs at all
   if (match_desc(res, pc_offset, approximate)) {
     NOT_PRODUCT(++nmethod_stats.pc_desc_repeats);
     return res;
   }
 
-  // Step two:  Check the LRU cache.
-  for (int i = 0; i < cache_size; i++) {
+  // Step two:  Check the rest of the LRU cache.
+  for (int i = 1; i < cache_size; ++i) {
     res = _pc_descs[i];
-    if (res->pc_offset() < 0)  break;  // optimization: skip empty cache
+    if (res->pc_offset() < 0) break;  // optimization: skip empty cache
     if (match_desc(res, pc_offset, approximate)) {
       NOT_PRODUCT(++nmethod_stats.pc_desc_hits);
-      _last_pc_desc = res;  // record this cache hit in case of repeat
       return res;
     }
   }
@@ -322,24 +326,23 @@
 
 void PcDescCache::add_pc_desc(PcDesc* pc_desc) {
   NOT_PRODUCT(++nmethod_stats.pc_desc_adds);
-  // Update the LRU cache by shifting pc_desc forward:
+  // Update the LRU cache by shifting pc_desc forward.
   for (int i = 0; i < cache_size; i++)  {
     PcDesc* next = _pc_descs[i];
     _pc_descs[i] = pc_desc;
     pc_desc = next;
   }
-  // Note:  Do not update _last_pc_desc.  It fronts for the LRU cache.
 }
 
 // adjust pcs_size so that it is a multiple of both oopSize and
 // sizeof(PcDesc) (assumes that if sizeof(PcDesc) is not a multiple
 // of oopSize, then 2*sizeof(PcDesc) is)
-static int  adjust_pcs_size(int pcs_size) {
+static int adjust_pcs_size(int pcs_size) {
   int nsize = round_to(pcs_size,   oopSize);
   if ((nsize % sizeof(PcDesc)) != 0) {
     nsize = pcs_size + sizeof(PcDesc);
   }
-  assert((nsize %  oopSize) == 0, "correct alignment");
+  assert((nsize % oopSize) == 0, "correct alignment");
   return nsize;
 }
 
diff -r 02e6fc2effd8 -r 4775a1e3e923 src/share/vm/code/nmethod.hpp
--- a/src/share/vm/code/nmethod.hpp	Fri Mar 11 22:41:33 2011 -0800
+++ b/src/share/vm/code/nmethod.hpp	Mon Mar 14 11:43:56 2011 -0700
@@ -69,14 +69,13 @@
   friend class VMStructs;
  private:
   enum { cache_size = 4 };
-  PcDesc* _last_pc_desc;         // most recent pc_desc found
   PcDesc* _pc_descs[cache_size]; // last cache_size pc_descs found
  public:
-  PcDescCache() { debug_only(_last_pc_desc = NULL); }
+  PcDescCache() { debug_only(_pc_descs[0] = NULL); }
   void    reset_to(PcDesc* initial_pc_desc);
   PcDesc* find_pc_desc(int pc_offset, bool approximate);
   void    add_pc_desc(PcDesc* pc_desc);
-  PcDesc* last_pc_desc() { return _last_pc_desc; }
+  PcDesc* last_pc_desc() { return _pc_descs[0]; }
 };
 
 
@@ -178,7 +177,7 @@
   unsigned int _has_method_handle_invokes:1; // Has this method MethodHandle invokes?
 
   // Protected by Patching_lock
-  unsigned char _state;                      // {alive, not_entrant, zombie, unloaded)
+  unsigned char _state;                      // {alive, not_entrant, zombie, unloaded}
 
 #ifdef ASSERT
   bool _oops_are_stale;  // indicates that it's no longer safe to access oops section
diff -r 02e6fc2effd8 -r 4775a1e3e923 src/share/vm/runtime/deoptimization.cpp
--- a/src/share/vm/runtime/deoptimization.cpp	Fri Mar 11 22:41:33 2011 -0800
+++ b/src/share/vm/runtime/deoptimization.cpp	Mon Mar 14 11:43:56 2011 -0700
@@ -101,9 +101,9 @@
   _frame_pcs                 = frame_pcs;
   _register_block            = NEW_C_HEAP_ARRAY(intptr_t, RegisterMap::reg_count * 2);
   _return_type               = return_type;
+  _initial_fp                = 0;
   // PD (x86 only)
   _counter_temp              = 0;
-  _initial_fp                = 0;
   _unpack_kind               = 0;
   _sender_sp_temp            = 0;
 
@@ -459,18 +459,9 @@
                                       frame_sizes,
                                       frame_pcs,
                                       return_type);
-#if defined(IA32) || defined(AMD64)
-  // We need a way to pass fp to the unpacking code so the skeletal frames
-  // come out correct. This is only needed for x86 because of c2 using ebp
-  // as an allocatable register. So this update is useless (and harmless)
-  // on the other platforms. It would be nice to do this in a different
-  // way but even the old style deoptimization had a problem with deriving
-  // this value. NEEDS_CLEANUP
-  // Note: now that c1 is using c2's deopt blob we must do this on all
-  // x86 based platforms
-  intptr_t** fp_addr = (intptr_t**) (((address)info) + info->initial_fp_offset_in_bytes());
-  *fp_addr = array->sender().fp(); // was adapter_caller
-#endif /* IA32 || AMD64 */
+  // On some platforms, we need a way to pass fp to the unpacking code
+  // so the skeletal frames come out correct.
+  info->set_initial_fp((intptr_t) array->sender().fp());
 
   if (array->frames() > 1) {
     if (VerifyStack && TraceDeoptimization) {
diff -r 02e6fc2effd8 -r 4775a1e3e923 src/share/vm/runtime/deoptimization.hpp
--- a/src/share/vm/runtime/deoptimization.hpp	Fri Mar 11 22:41:33 2011 -0800
+++ b/src/share/vm/runtime/deoptimization.hpp	Mon Mar 14 11:43:56 2011 -0700
@@ -136,12 +136,12 @@
     address*  _frame_pcs;                 // Array of frame pc's, in bytes, for unrolling the stack
     intptr_t* _register_block;            // Block for storing callee-saved registers.
     BasicType _return_type;               // Tells if we have to restore double or long return value
+    intptr_t  _initial_fp;                // FP of the sender frame
     // The following fields are used as temps during the unpacking phase
     // (which is tight on registers, especially on x86). They really ought
     // to be PD variables but that involves moving this class into its own
     // file to use the pd include mechanism. Maybe in a later cleanup ...
     intptr_t  _counter_temp;              // SHOULD BE PD VARIABLE (x86 frame count temp)
-    intptr_t  _initial_fp;                // SHOULD BE PD VARIABLE (x86/c2 initial ebp)
     intptr_t  _unpack_kind;               // SHOULD BE PD VARIABLE (x86 unpack kind)
     intptr_t  _sender_sp_temp;            // SHOULD BE PD VARIABLE (x86 sender_sp)
    public:
@@ -165,6 +165,8 @@
     // Returns the total size of frames
     int size_of_frames() const;
 
+    void set_initial_fp(intptr_t fp) { _initial_fp = fp; }
+
     // Accessors used by the code generator for the unpack stub.
     static int size_of_deoptimized_frame_offset_in_bytes() { return offset_of(UnrollBlock, _size_of_deoptimized_frame); }
     static int caller_adjustment_offset_in_bytes()         { return offset_of(UnrollBlock, _caller_adjustment);         }