# HG changeset patch # User acorn # Date 1300128236 25200 # Node ID 4775a1e3e923d5d183fddf6b09460400163d291d # Parent 02e6fc2effd815daccc8a070308ff244bbdf7eb4# Parent 3d5a546351ef2b2a7876af8a9f5942563f40a6a1 Merge diff -r 02e6fc2effd8 -r 4775a1e3e923 src/share/vm/code/nmethod.cpp --- a/src/share/vm/code/nmethod.cpp Fri Mar 11 22:41:33 2011 -0800 +++ b/src/share/vm/code/nmethod.cpp Mon Mar 14 11:43:56 2011 -0700 @@ -170,7 +170,7 @@ int pc_desc_resets; // number of resets (= number of caches) int pc_desc_queries; // queries to nmethod::find_pc_desc int pc_desc_approx; // number of those which have approximate true - int pc_desc_repeats; // number of _last_pc_desc hits + int pc_desc_repeats; // number of _pc_descs[0] hits int pc_desc_hits; // number of LRU cache hits int pc_desc_tests; // total number of PcDesc examinations int pc_desc_searches; // total number of quasi-binary search steps @@ -278,40 +278,44 @@ void PcDescCache::reset_to(PcDesc* initial_pc_desc) { if (initial_pc_desc == NULL) { - _last_pc_desc = NULL; // native method + _pc_descs[0] = NULL; // native method; no PcDescs at all return; } NOT_PRODUCT(++nmethod_stats.pc_desc_resets); // reset the cache by filling it with benign (non-null) values assert(initial_pc_desc->pc_offset() < 0, "must be sentinel"); - _last_pc_desc = initial_pc_desc + 1; // first valid one is after sentinel for (int i = 0; i < cache_size; i++) _pc_descs[i] = initial_pc_desc; } PcDesc* PcDescCache::find_pc_desc(int pc_offset, bool approximate) { NOT_PRODUCT(++nmethod_stats.pc_desc_queries); - NOT_PRODUCT(if (approximate) ++nmethod_stats.pc_desc_approx); + NOT_PRODUCT(if (approximate) ++nmethod_stats.pc_desc_approx); + + // Note: one might think that caching the most recently + // read value separately would be a win, but one would be + // wrong. When many threads are updating it, the cache + // line it's in would bounce between caches, negating + // any benefit. // In order to prevent race conditions do not load cache elements // repeatedly, but use a local copy: PcDesc* res; - // Step one: Check the most recently returned value. - res = _last_pc_desc; - if (res == NULL) return NULL; // native method; no PcDescs at all + // Step one: Check the most recently added value. + res = _pc_descs[0]; + if (res == NULL) return NULL; // native method; no PcDescs at all if (match_desc(res, pc_offset, approximate)) { NOT_PRODUCT(++nmethod_stats.pc_desc_repeats); return res; } - // Step two: Check the LRU cache. - for (int i = 0; i < cache_size; i++) { + // Step two: Check the rest of the LRU cache. + for (int i = 1; i < cache_size; ++i) { res = _pc_descs[i]; - if (res->pc_offset() < 0) break; // optimization: skip empty cache + if (res->pc_offset() < 0) break; // optimization: skip empty cache if (match_desc(res, pc_offset, approximate)) { NOT_PRODUCT(++nmethod_stats.pc_desc_hits); - _last_pc_desc = res; // record this cache hit in case of repeat return res; } } @@ -322,24 +326,23 @@ void PcDescCache::add_pc_desc(PcDesc* pc_desc) { NOT_PRODUCT(++nmethod_stats.pc_desc_adds); - // Update the LRU cache by shifting pc_desc forward: + // Update the LRU cache by shifting pc_desc forward. for (int i = 0; i < cache_size; i++) { PcDesc* next = _pc_descs[i]; _pc_descs[i] = pc_desc; pc_desc = next; } - // Note: Do not update _last_pc_desc. It fronts for the LRU cache. } // adjust pcs_size so that it is a multiple of both oopSize and // sizeof(PcDesc) (assumes that if sizeof(PcDesc) is not a multiple // of oopSize, then 2*sizeof(PcDesc) is) -static int adjust_pcs_size(int pcs_size) { +static int adjust_pcs_size(int pcs_size) { int nsize = round_to(pcs_size, oopSize); if ((nsize % sizeof(PcDesc)) != 0) { nsize = pcs_size + sizeof(PcDesc); } - assert((nsize % oopSize) == 0, "correct alignment"); + assert((nsize % oopSize) == 0, "correct alignment"); return nsize; } diff -r 02e6fc2effd8 -r 4775a1e3e923 src/share/vm/code/nmethod.hpp --- a/src/share/vm/code/nmethod.hpp Fri Mar 11 22:41:33 2011 -0800 +++ b/src/share/vm/code/nmethod.hpp Mon Mar 14 11:43:56 2011 -0700 @@ -69,14 +69,13 @@ friend class VMStructs; private: enum { cache_size = 4 }; - PcDesc* _last_pc_desc; // most recent pc_desc found PcDesc* _pc_descs[cache_size]; // last cache_size pc_descs found public: - PcDescCache() { debug_only(_last_pc_desc = NULL); } + PcDescCache() { debug_only(_pc_descs[0] = NULL); } void reset_to(PcDesc* initial_pc_desc); PcDesc* find_pc_desc(int pc_offset, bool approximate); void add_pc_desc(PcDesc* pc_desc); - PcDesc* last_pc_desc() { return _last_pc_desc; } + PcDesc* last_pc_desc() { return _pc_descs[0]; } }; @@ -178,7 +177,7 @@ unsigned int _has_method_handle_invokes:1; // Has this method MethodHandle invokes? // Protected by Patching_lock - unsigned char _state; // {alive, not_entrant, zombie, unloaded) + unsigned char _state; // {alive, not_entrant, zombie, unloaded} #ifdef ASSERT bool _oops_are_stale; // indicates that it's no longer safe to access oops section diff -r 02e6fc2effd8 -r 4775a1e3e923 src/share/vm/runtime/deoptimization.cpp --- a/src/share/vm/runtime/deoptimization.cpp Fri Mar 11 22:41:33 2011 -0800 +++ b/src/share/vm/runtime/deoptimization.cpp Mon Mar 14 11:43:56 2011 -0700 @@ -101,9 +101,9 @@ _frame_pcs = frame_pcs; _register_block = NEW_C_HEAP_ARRAY(intptr_t, RegisterMap::reg_count * 2); _return_type = return_type; + _initial_fp = 0; // PD (x86 only) _counter_temp = 0; - _initial_fp = 0; _unpack_kind = 0; _sender_sp_temp = 0; @@ -459,18 +459,9 @@ frame_sizes, frame_pcs, return_type); -#if defined(IA32) || defined(AMD64) - // We need a way to pass fp to the unpacking code so the skeletal frames - // come out correct. This is only needed for x86 because of c2 using ebp - // as an allocatable register. So this update is useless (and harmless) - // on the other platforms. It would be nice to do this in a different - // way but even the old style deoptimization had a problem with deriving - // this value. NEEDS_CLEANUP - // Note: now that c1 is using c2's deopt blob we must do this on all - // x86 based platforms - intptr_t** fp_addr = (intptr_t**) (((address)info) + info->initial_fp_offset_in_bytes()); - *fp_addr = array->sender().fp(); // was adapter_caller -#endif /* IA32 || AMD64 */ + // On some platforms, we need a way to pass fp to the unpacking code + // so the skeletal frames come out correct. + info->set_initial_fp((intptr_t) array->sender().fp()); if (array->frames() > 1) { if (VerifyStack && TraceDeoptimization) { diff -r 02e6fc2effd8 -r 4775a1e3e923 src/share/vm/runtime/deoptimization.hpp --- a/src/share/vm/runtime/deoptimization.hpp Fri Mar 11 22:41:33 2011 -0800 +++ b/src/share/vm/runtime/deoptimization.hpp Mon Mar 14 11:43:56 2011 -0700 @@ -136,12 +136,12 @@ address* _frame_pcs; // Array of frame pc's, in bytes, for unrolling the stack intptr_t* _register_block; // Block for storing callee-saved registers. BasicType _return_type; // Tells if we have to restore double or long return value + intptr_t _initial_fp; // FP of the sender frame // The following fields are used as temps during the unpacking phase // (which is tight on registers, especially on x86). They really ought // to be PD variables but that involves moving this class into its own // file to use the pd include mechanism. Maybe in a later cleanup ... intptr_t _counter_temp; // SHOULD BE PD VARIABLE (x86 frame count temp) - intptr_t _initial_fp; // SHOULD BE PD VARIABLE (x86/c2 initial ebp) intptr_t _unpack_kind; // SHOULD BE PD VARIABLE (x86 unpack kind) intptr_t _sender_sp_temp; // SHOULD BE PD VARIABLE (x86 sender_sp) public: @@ -165,6 +165,8 @@ // Returns the total size of frames int size_of_frames() const; + void set_initial_fp(intptr_t fp) { _initial_fp = fp; } + // Accessors used by the code generator for the unpack stub. static int size_of_deoptimized_frame_offset_in_bytes() { return offset_of(UnrollBlock, _size_of_deoptimized_frame); } static int caller_adjustment_offset_in_bytes() { return offset_of(UnrollBlock, _caller_adjustment); }