diff src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp @ 4912:a9647476d1a4

7132029: G1: mixed GC phase lasts for longer than it should Summary: Revamp of the mechanism that chooses old regions for inclusion in the CSet. It simplifies the code and introduces min and max bounds on the number of old regions added to the CSet at each mixed GC to avoid pathological cases. It also ensures that when we do a mixed GC we'll always find old regions to add to the CSet (i.e., it eliminates the case where a mixed GC will collect no old regions which can happen today). Reviewed-by: johnc, brutisso
author tonyp
date Wed, 15 Feb 2012 13:06:53 -0500
parents eff609af17d7
children 21595f05bc93
line wrap: on
line diff
--- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Wed Jan 18 09:50:16 2012 -0800
+++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Wed Feb 15 13:06:53 2012 -0500
@@ -206,7 +206,6 @@
 
   _initiate_conc_mark_if_possible(false),
   _during_initial_mark_pause(false),
-  _should_revert_to_young_gcs(false),
   _last_young_gc(false),
   _last_gc_was_young(false),
 
@@ -295,9 +294,6 @@
   _par_last_gc_worker_times_ms = new double[_parallel_gc_threads];
   _par_last_gc_worker_other_times_ms = new double[_parallel_gc_threads];
 
-  // start conservatively
-  _expensive_region_limit_ms = 0.5 * (double) MaxGCPauseMillis;
-
   int index;
   if (ParallelGCThreads == 0)
     index = 0;
@@ -629,16 +625,9 @@
       // possible to maximize how many old regions we can add to it.
     }
   } else {
-    if (gcs_are_young()) {
-      young_list_target_length = _young_list_fixed_length;
-    } else {
-      // A bit arbitrary: during mixed GCs we allocate half
-      // the young regions to try to add old regions to the CSet.
-      young_list_target_length = _young_list_fixed_length / 2;
-      // We choose to accept that we might go under the desired min
-      // length given that we intentionally ask for a smaller young gen.
-      desired_min_length = absolute_min_length;
-    }
+    // The user asked for a fixed young gen so we'll fix the young gen
+    // whether the next GC is young or mixed.
+    young_list_target_length = _young_list_fixed_length;
   }
 
   // Make sure we don't go over the desired max length, nor under the
@@ -872,7 +861,6 @@
   // transitions and make sure we start with young GCs after the Full GC.
   set_gcs_are_young(true);
   _last_young_gc = false;
-  _should_revert_to_young_gcs = false;
   clear_initiate_conc_mark_if_possible();
   clear_during_initial_mark_pause();
   _known_garbage_bytes = 0;
@@ -889,7 +877,7 @@
   // Reset survivors SurvRateGroup.
   _survivor_surv_rate_group->reset();
   update_young_list_target_length();
-  _collectionSetChooser->updateAfterFullCollection();
+  _collectionSetChooser->clearMarkedHeapRegions();
 }
 
 void G1CollectorPolicy::record_stop_world_start() {
@@ -1000,7 +988,6 @@
 }
 
 void G1CollectorPolicy::record_concurrent_mark_cleanup_completed() {
-  _should_revert_to_young_gcs = false;
   _last_young_gc = true;
   _in_marking_window = false;
 }
@@ -1205,9 +1192,7 @@
   last_pause_included_initial_mark = during_initial_mark_pause();
   if (last_pause_included_initial_mark) {
     record_concurrent_mark_init_end(0.0);
-  }
-
-  if (!_last_young_gc && need_to_start_conc_mark("end of GC")) {
+  } else if (!_last_young_gc && need_to_start_conc_mark("end of GC")) {
     // Note: this might have already been set, if during the last
     // pause we decided to start a cycle but at the beginning of
     // this pause we decided to postpone it. That's OK.
@@ -1492,12 +1477,14 @@
   }
 
   if (_last_young_gc) {
+    // This is supposed to to be the "last young GC" before we start
+    // doing mixed GCs. Here we decide whether to start mixed GCs or not.
+
     if (!last_pause_included_initial_mark) {
-      ergo_verbose2(ErgoMixedGCs,
-                    "start mixed GCs",
-                    ergo_format_byte_perc("known garbage"),
-                    _known_garbage_bytes, _known_garbage_ratio * 100.0);
-      set_gcs_are_young(false);
+      if (next_gc_should_be_mixed("start mixed GCs",
+                                  "do not start mixed GCs")) {
+        set_gcs_are_young(false);
+      }
     } else {
       ergo_verbose0(ErgoMixedGCs,
                     "do not start mixed GCs",
@@ -1507,39 +1494,14 @@
   }
 
   if (!_last_gc_was_young) {
-    if (_should_revert_to_young_gcs) {
-      ergo_verbose2(ErgoMixedGCs,
-                    "end mixed GCs",
-                    ergo_format_reason("mixed GCs end requested")
-                    ergo_format_byte_perc("known garbage"),
-                    _known_garbage_bytes, _known_garbage_ratio * 100.0);
-      set_gcs_are_young(true);
-    } else if (_known_garbage_ratio < 0.05) {
-      ergo_verbose3(ErgoMixedGCs,
-               "end mixed GCs",
-               ergo_format_reason("known garbage percent lower than threshold")
-               ergo_format_byte_perc("known garbage")
-               ergo_format_perc("threshold"),
-               _known_garbage_bytes, _known_garbage_ratio * 100.0,
-               0.05 * 100.0);
-      set_gcs_are_young(true);
-    } else if (adaptive_young_list_length() &&
-              (get_gc_eff_factor() * cur_efficiency < predict_young_gc_eff())) {
-      ergo_verbose5(ErgoMixedGCs,
-                    "end mixed GCs",
-                    ergo_format_reason("current GC efficiency lower than "
-                                       "predicted young GC efficiency")
-                    ergo_format_double("GC efficiency factor")
-                    ergo_format_double("current GC efficiency")
-                    ergo_format_double("predicted young GC efficiency")
-                    ergo_format_byte_perc("known garbage"),
-                    get_gc_eff_factor(), cur_efficiency,
-                    predict_young_gc_eff(),
-                    _known_garbage_bytes, _known_garbage_ratio * 100.0);
+    // This is a mixed GC. Here we decide whether to continue doing
+    // mixed GCs or not.
+
+    if (!next_gc_should_be_mixed("continue mixed GCs",
+                                 "do not continue mixed GCs")) {
       set_gcs_are_young(true);
     }
   }
-  _should_revert_to_young_gcs = false;
 
   if (_last_gc_was_young && !_during_marking) {
     _young_gc_eff_seq->add(cur_efficiency);
@@ -1648,15 +1610,6 @@
 
     _pending_cards_seq->add((double) _pending_cards);
     _rs_lengths_seq->add((double) _max_rs_lengths);
-
-    double expensive_region_limit_ms =
-      (double) MaxGCPauseMillis - predict_constant_other_time_ms();
-    if (expensive_region_limit_ms < 0.0) {
-      // this means that the other time was predicted to be longer than
-      // than the max pause time
-      expensive_region_limit_ms = (double) MaxGCPauseMillis;
-    }
-    _expensive_region_limit_ms = expensive_region_limit_ms;
   }
 
   _in_marking_window = new_in_marking_window;
@@ -1838,13 +1791,11 @@
   if (hr->is_marked())
     bytes_to_copy = hr->max_live_bytes();
   else {
-    guarantee( hr->is_young() && hr->age_in_surv_rate_group() != -1,
-               "invariant" );
+    assert(hr->is_young() && hr->age_in_surv_rate_group() != -1, "invariant");
     int age = hr->age_in_surv_rate_group();
     double yg_surv_rate = predict_yg_surv_rate(age, hr->surv_rate_group());
     bytes_to_copy = (size_t) ((double) hr->used() * yg_surv_rate);
   }
-
   return bytes_to_copy;
 }
 
@@ -1860,22 +1811,6 @@
   _recorded_rs_lengths = rs_lengths;
 }
 
-void G1CollectorPolicy::check_if_region_is_too_expensive(double
-                                                           predicted_time_ms) {
-  // I don't think we need to do this when in young GC mode since
-  // marking will be initiated next time we hit the soft limit anyway...
-  if (predicted_time_ms > _expensive_region_limit_ms) {
-    ergo_verbose2(ErgoMixedGCs,
-              "request mixed GCs end",
-              ergo_format_reason("predicted region time higher than threshold")
-              ergo_format_ms("predicted region time")
-              ergo_format_ms("threshold"),
-              predicted_time_ms, _expensive_region_limit_ms);
-    // no point in doing another mixed GC
-    _should_revert_to_young_gcs = true;
-  }
-}
-
 void G1CollectorPolicy::update_recent_gc_times(double end_time_sec,
                                                double elapsed_ms) {
   _recent_gc_times_ms->add(elapsed_ms);
@@ -2274,12 +2209,12 @@
 }
 
 class KnownGarbageClosure: public HeapRegionClosure {
+  G1CollectedHeap* _g1h;
   CollectionSetChooser* _hrSorted;
 
 public:
   KnownGarbageClosure(CollectionSetChooser* hrSorted) :
-    _hrSorted(hrSorted)
-  {}
+    _g1h(G1CollectedHeap::heap()), _hrSorted(hrSorted) { }
 
   bool doHeapRegion(HeapRegion* r) {
     // We only include humongous regions in collection
@@ -2288,11 +2223,10 @@
 
     // Do we have any marking information for this region?
     if (r->is_marked()) {
-      // We don't include humongous regions in collection
-      // sets because we collect them immediately at the end of a marking
-      // cycle.  We also don't include young regions because we *must*
-      // include them in the next collection pause.
-      if (!r->isHumongous() && !r->is_young()) {
+      // We will skip any region that's currently used as an old GC
+      // alloc region (we should not consider those for collection
+      // before we fill them up).
+      if (_hrSorted->shouldAdd(r) && !_g1h->is_old_gc_alloc_region(r)) {
         _hrSorted->addMarkedHeapRegion(r);
       }
     }
@@ -2301,8 +2235,10 @@
 };
 
 class ParKnownGarbageHRClosure: public HeapRegionClosure {
+  G1CollectedHeap* _g1h;
   CollectionSetChooser* _hrSorted;
   jint _marked_regions_added;
+  size_t _reclaimable_bytes_added;
   jint _chunk_size;
   jint _cur_chunk_idx;
   jint _cur_chunk_end; // Cur chunk [_cur_chunk_idx, _cur_chunk_end)
@@ -2320,6 +2256,7 @@
     assert(_cur_chunk_idx < _cur_chunk_end, "postcondition");
     _hrSorted->setMarkedHeapRegion(_cur_chunk_idx, r);
     _marked_regions_added++;
+    _reclaimable_bytes_added += r->reclaimable_bytes();
     _cur_chunk_idx++;
   }
 
@@ -2327,10 +2264,10 @@
   ParKnownGarbageHRClosure(CollectionSetChooser* hrSorted,
                            jint chunk_size,
                            int worker) :
-    _hrSorted(hrSorted), _chunk_size(chunk_size), _worker(worker),
-    _marked_regions_added(0), _cur_chunk_idx(0), _cur_chunk_end(0),
-    _invokes(0)
-  {}
+      _g1h(G1CollectedHeap::heap()),
+      _hrSorted(hrSorted), _chunk_size(chunk_size), _worker(worker),
+      _marked_regions_added(0), _reclaimable_bytes_added(0),
+      _cur_chunk_idx(0), _cur_chunk_end(0), _invokes(0) { }
 
   bool doHeapRegion(HeapRegion* r) {
     // We only include humongous regions in collection
@@ -2340,17 +2277,17 @@
 
     // Do we have any marking information for this region?
     if (r->is_marked()) {
-      // We don't include humongous regions in collection
-      // sets because we collect them immediately at the end of a marking
-      // cycle.
-      // We also do not include young regions in collection sets
-      if (!r->isHumongous() && !r->is_young()) {
+      // We will skip any region that's currently used as an old GC
+      // alloc region (we should not consider those for collection
+      // before we fill them up).
+      if (_hrSorted->shouldAdd(r) && !_g1h->is_old_gc_alloc_region(r)) {
         add_region(r);
       }
     }
     return false;
   }
   jint marked_regions_added() { return _marked_regions_added; }
+  size_t reclaimable_bytes_added() { return _reclaimable_bytes_added; }
   int invokes() { return _invokes; }
 };
 
@@ -2362,8 +2299,7 @@
   ParKnownGarbageTask(CollectionSetChooser* hrSorted, jint chunk_size) :
     AbstractGangTask("ParKnownGarbageTask"),
     _hrSorted(hrSorted), _chunk_size(chunk_size),
-    _g1(G1CollectedHeap::heap())
-  {}
+    _g1(G1CollectedHeap::heap()) { }
 
   void work(uint worker_id) {
     ParKnownGarbageHRClosure parKnownGarbageCl(_hrSorted,
@@ -2374,7 +2310,9 @@
                                          _g1->workers()->active_workers(),
                                          HeapRegion::InitialClaimValue);
     jint regions_added = parKnownGarbageCl.marked_regions_added();
-    _hrSorted->incNumMarkedHeapRegions(regions_added);
+    size_t reclaimable_bytes_added =
+                                   parKnownGarbageCl.reclaimable_bytes_added();
+    _hrSorted->updateTotals(regions_added, reclaimable_bytes_added);
     if (G1PrintParCleanupStats) {
       gclog_or_tty->print_cr("     Thread %d called %d times, added %d regions to list.",
                  worker_id, parKnownGarbageCl.invokes(), regions_added);
@@ -2658,7 +2596,43 @@
 }
 #endif // !PRODUCT
 
-void G1CollectorPolicy::choose_collection_set(double target_pause_time_ms) {
+bool G1CollectorPolicy::next_gc_should_be_mixed(const char* true_action_str,
+                                                const char* false_action_str) {
+  CollectionSetChooser* cset_chooser = _collectionSetChooser;
+  if (cset_chooser->isEmpty()) {
+    ergo_verbose0(ErgoMixedGCs,
+                  false_action_str,
+                  ergo_format_reason("candidate old regions not available"));
+    return false;
+  }
+  size_t reclaimable_bytes = cset_chooser->remainingReclaimableBytes();
+  size_t capacity_bytes = _g1->capacity();
+  double perc = (double) reclaimable_bytes * 100.0 / (double) capacity_bytes;
+  double threshold = (double) G1OldReclaimableThresholdPercent;
+  if (perc < threshold) {
+    ergo_verbose4(ErgoMixedGCs,
+              false_action_str,
+              ergo_format_reason("reclaimable percentage lower than threshold")
+              ergo_format_region("candidate old regions")
+              ergo_format_byte_perc("reclaimable")
+              ergo_format_perc("threshold"),
+              cset_chooser->remainingRegions(),
+              reclaimable_bytes, perc, threshold);
+    return false;
+  }
+
+  ergo_verbose4(ErgoMixedGCs,
+                true_action_str,
+                ergo_format_reason("candidate old regions available")
+                ergo_format_region("candidate old regions")
+                ergo_format_byte_perc("reclaimable")
+                ergo_format_perc("threshold"),
+                cset_chooser->remainingRegions(),
+                reclaimable_bytes, perc, threshold);
+  return true;
+}
+
+void G1CollectorPolicy::finalize_cset(double target_pause_time_ms) {
   // Set this here - in case we're not doing young collections.
   double non_young_start_time_sec = os::elapsedTime();
 
@@ -2672,7 +2646,6 @@
 
   double base_time_ms = predict_base_elapsed_time_ms(_pending_cards);
   double predicted_pause_time_ms = base_time_ms;
-
   double time_remaining_ms = target_pause_time_ms - base_time_ms;
 
   ergo_verbose3(ErgoCSetConstruction | ErgoHigh,
@@ -2682,22 +2655,6 @@
                 ergo_format_ms("target pause time"),
                 base_time_ms, time_remaining_ms, target_pause_time_ms);
 
-  // the 10% and 50% values are arbitrary...
-  double threshold = 0.10 * target_pause_time_ms;
-  if (time_remaining_ms < threshold) {
-    double prev_time_remaining_ms = time_remaining_ms;
-    time_remaining_ms = 0.50 * target_pause_time_ms;
-    ergo_verbose3(ErgoCSetConstruction,
-                  "adjust remaining time",
-                  ergo_format_reason("remaining time lower than threshold")
-                  ergo_format_ms("remaining time")
-                  ergo_format_ms("threshold")
-                  ergo_format_ms("adjusted remaining time"),
-                  prev_time_remaining_ms, threshold, time_remaining_ms);
-  }
-
-  size_t expansion_bytes = _g1->expansion_regions() * HeapRegion::GrainBytes;
-
   HeapRegion* hr;
   double young_start_time_sec = os::elapsedTime();
 
@@ -2752,78 +2709,97 @@
   non_young_start_time_sec = young_end_time_sec;
 
   if (!gcs_are_young()) {
-    bool should_continue = true;
-    NumberSeq seq;
-    double avg_prediction = 100000000000000000.0; // something very large
-
-    double prev_predicted_pause_time_ms = predicted_pause_time_ms;
-    do {
-      // Note that add_old_region_to_cset() increments the
-      // _old_cset_region_length field and cset_region_length() returns the
-      // sum of _eden_cset_region_length, _survivor_cset_region_length, and
-      // _old_cset_region_length. So, as old regions are added to the
-      // CSet, _old_cset_region_length will be incremented and
-      // cset_region_length(), which is used below, will always reflect
-      // the the total number of regions added up to this point to the CSet.
-
-      hr = _collectionSetChooser->getNextMarkedRegion(time_remaining_ms,
-                                                      avg_prediction);
-      if (hr != NULL) {
-        _g1->old_set_remove(hr);
-        double predicted_time_ms = predict_region_elapsed_time_ms(hr, false);
-        time_remaining_ms -= predicted_time_ms;
-        predicted_pause_time_ms += predicted_time_ms;
-        add_old_region_to_cset(hr);
-        seq.add(predicted_time_ms);
-        avg_prediction = seq.avg() + seq.sd();
+    CollectionSetChooser* cset_chooser = _collectionSetChooser;
+    assert(cset_chooser->verify(), "CSet Chooser verification - pre");
+    const size_t min_old_cset_length = cset_chooser->calcMinOldCSetLength();
+    const size_t max_old_cset_length = cset_chooser->calcMaxOldCSetLength();
+
+    size_t expensive_region_num = 0;
+    bool check_time_remaining = adaptive_young_list_length();
+    HeapRegion* hr = cset_chooser->peek();
+    while (hr != NULL) {
+      if (old_cset_region_length() >= max_old_cset_length) {
+        // Added maximum number of old regions to the CSet.
+        ergo_verbose2(ErgoCSetConstruction,
+                      "finish adding old regions to CSet",
+                      ergo_format_reason("old CSet region num reached max")
+                      ergo_format_region("old")
+                      ergo_format_region("max"),
+                      old_cset_region_length(), max_old_cset_length);
+        break;
       }
 
-      should_continue = true;
-      if (hr == NULL) {
-        // No need for an ergo verbose message here,
-        // getNextMarkRegion() does this when it returns NULL.
-        should_continue = false;
+      double predicted_time_ms = predict_region_elapsed_time_ms(hr, false);
+      if (check_time_remaining) {
+        if (predicted_time_ms > time_remaining_ms) {
+          // Too expensive for the current CSet.
+
+          if (old_cset_region_length() >= min_old_cset_length) {
+            // We have added the minimum number of old regions to the CSet,
+            // we are done with this CSet.
+            ergo_verbose4(ErgoCSetConstruction,
+                          "finish adding old regions to CSet",
+                          ergo_format_reason("predicted time is too high")
+                          ergo_format_ms("predicted time")
+                          ergo_format_ms("remaining time")
+                          ergo_format_region("old")
+                          ergo_format_region("min"),
+                          predicted_time_ms, time_remaining_ms,
+                          old_cset_region_length(), min_old_cset_length);
+            break;
+          }
+
+          // We'll add it anyway given that we haven't reached the
+          // minimum number of old regions.
+          expensive_region_num += 1;
+        }
       } else {
-        if (adaptive_young_list_length()) {
-          if (time_remaining_ms < 0.0) {
-            ergo_verbose1(ErgoCSetConstruction,
-                          "stop adding old regions to CSet",
-                          ergo_format_reason("remaining time is lower than 0")
-                          ergo_format_ms("remaining time"),
-                          time_remaining_ms);
-            should_continue = false;
-          }
-        } else {
-          if (cset_region_length() >= _young_list_fixed_length) {
-            ergo_verbose2(ErgoCSetConstruction,
-                          "stop adding old regions to CSet",
-                          ergo_format_reason("CSet length reached target")
-                          ergo_format_region("CSet")
-                          ergo_format_region("young target"),
-                          cset_region_length(), _young_list_fixed_length);
-            should_continue = false;
-          }
+        if (old_cset_region_length() >= min_old_cset_length) {
+          // In the non-auto-tuning case, we'll finish adding regions
+          // to the CSet if we reach the minimum.
+          ergo_verbose2(ErgoCSetConstruction,
+                        "finish adding old regions to CSet",
+                        ergo_format_reason("old CSet region num reached min")
+                        ergo_format_region("old")
+                        ergo_format_region("min"),
+                        old_cset_region_length(), min_old_cset_length);
+          break;
         }
       }
-    } while (should_continue);
-
-    if (!adaptive_young_list_length() &&
-        cset_region_length() < _young_list_fixed_length) {
-      ergo_verbose2(ErgoCSetConstruction,
-                    "request mixed GCs end",
-                    ergo_format_reason("CSet length lower than target")
-                    ergo_format_region("CSet")
-                    ergo_format_region("young target"),
-                    cset_region_length(), _young_list_fixed_length);
-      _should_revert_to_young_gcs  = true;
+
+      // We will add this region to the CSet.
+      time_remaining_ms -= predicted_time_ms;
+      predicted_pause_time_ms += predicted_time_ms;
+      cset_chooser->remove_and_move_to_next(hr);
+      _g1->old_set_remove(hr);
+      add_old_region_to_cset(hr);
+
+      hr = cset_chooser->peek();
+    }
+    if (hr == NULL) {
+      ergo_verbose0(ErgoCSetConstruction,
+                    "finish adding old regions to CSet",
+                    ergo_format_reason("candidate old regions not available"));
     }
 
-    ergo_verbose2(ErgoCSetConstruction | ErgoHigh,
-                  "add old regions to CSet",
-                  ergo_format_region("old")
-                  ergo_format_ms("predicted old region time"),
-                  old_cset_region_length(),
-                  predicted_pause_time_ms - prev_predicted_pause_time_ms);
+    if (expensive_region_num > 0) {
+      // We print the information once here at the end, predicated on
+      // whether we added any apparently expensive regions or not, to
+      // avoid generating output per region.
+      ergo_verbose4(ErgoCSetConstruction,
+                    "added expensive regions to CSet",
+                    ergo_format_reason("old CSet region num not reached min")
+                    ergo_format_region("old")
+                    ergo_format_region("expensive")
+                    ergo_format_region("min")
+                    ergo_format_ms("remaining time"),
+                    old_cset_region_length(),
+                    expensive_region_num,
+                    min_old_cset_length,
+                    time_remaining_ms);
+    }
+
+    assert(cset_chooser->verify(), "CSet Chooser verification - post");
   }
 
   stop_incremental_cset_building();