view src/share/vm/gc_implementation/g1/g1GCPhaseTimes.cpp @ 23024:cbc7c4c9e11c

8048179: Early reclaim of large objects that are referenced by a few objects Summary: Push the remembered sets of large objects with few referenced into the dirty card queue at the beginning of the evacuation so that they may end up with zero remembered set entries at the end of the collection, and are potentially reclaimed. Also improve timing measurements of the early reclaim mechanism, and shorten flag names. Reviewed-by: brutisso, jmasa, dfazunen
author tschatzl
date Wed, 07 Jan 2015 15:15:37 +0100
parents 3ca53859c3c7
children a5b77ac78ad2
line wrap: on
line source

/*
 * Copyright (c) 2013, 2014 Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */

#include "precompiled.hpp"
#include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
#include "gc_implementation/g1/g1GCPhaseTimes.hpp"
#include "gc_implementation/g1/g1Log.hpp"
#include "gc_implementation/g1/g1StringDedup.hpp"
#include "memory/allocation.hpp"
#include "runtime/os.hpp"

// Helper class for avoiding interleaved logging
class LineBuffer: public StackObj {

private:
  static const int BUFFER_LEN = 1024;
  static const int INDENT_CHARS = 3;
  char _buffer[BUFFER_LEN];
  int _indent_level;
  int _cur;

  void vappend(const char* format, va_list ap)  ATTRIBUTE_PRINTF(2, 0) {
    int res = vsnprintf(&_buffer[_cur], BUFFER_LEN - _cur, format, ap);
    if (res != -1) {
      _cur += res;
    } else {
      DEBUG_ONLY(warning("buffer too small in LineBuffer");)
      _buffer[BUFFER_LEN -1] = 0;
      _cur = BUFFER_LEN; // vsnprintf above should not add to _buffer if we are called again
    }
  }

public:
  explicit LineBuffer(int indent_level): _indent_level(indent_level), _cur(0) {
    for (; (_cur < BUFFER_LEN && _cur < (_indent_level * INDENT_CHARS)); _cur++) {
      _buffer[_cur] = ' ';
    }
  }

#ifndef PRODUCT
  ~LineBuffer() {
    assert(_cur == _indent_level * INDENT_CHARS, "pending data in buffer - append_and_print_cr() not called?");
  }
#endif

  void append(const char* format, ...)  ATTRIBUTE_PRINTF(2, 3) {
    va_list ap;
    va_start(ap, format);
    vappend(format, ap);
    va_end(ap);
  }

  void print_cr() {
    gclog_or_tty->print_cr("%s", _buffer);
    _cur = _indent_level * INDENT_CHARS;
  }

  void append_and_print_cr(const char* format, ...)  ATTRIBUTE_PRINTF(2, 3) {
    va_list ap;
    va_start(ap, format);
    vappend(format, ap);
    va_end(ap);
    print_cr();
  }
};

template <class T>
class WorkerDataArray  : public CHeapObj<mtGC> {
  friend class G1GCParPhasePrinter;
  T*          _data;
  uint        _length;
  const char* _title;
  bool        _print_sum;
  int         _log_level;
  uint        _indent_level;
  bool        _enabled;

  WorkerDataArray<size_t>* _thread_work_items;

  NOT_PRODUCT(T uninitialized();)

  // We are caching the sum and average to only have to calculate them once.
  // This is not done in an MT-safe way. It is intended to allow single
  // threaded code to call sum() and average() multiple times in any order
  // without having to worry about the cost.
  bool   _has_new_data;
  T      _sum;
  T      _min;
  T      _max;
  double _average;

 public:
  WorkerDataArray(uint length, const char* title, bool print_sum, int log_level, uint indent_level) :
    _title(title), _length(0), _print_sum(print_sum), _log_level(log_level), _indent_level(indent_level),
    _has_new_data(true), _thread_work_items(NULL), _enabled(true) {
    assert(length > 0, "Must have some workers to store data for");
    _length = length;
    _data = NEW_C_HEAP_ARRAY(T, _length, mtGC);
  }

  ~WorkerDataArray() {
    FREE_C_HEAP_ARRAY(T, _data, mtGC);
  }

  void link_thread_work_items(WorkerDataArray<size_t>* thread_work_items) {
    _thread_work_items = thread_work_items;
  }

  WorkerDataArray<size_t>* thread_work_items() { return _thread_work_items; }

  void set(uint worker_i, T value) {
    assert(worker_i < _length, err_msg("Worker %d is greater than max: %d", worker_i, _length));
    assert(_data[worker_i] == WorkerDataArray<T>::uninitialized(), err_msg("Overwriting data for worker %d in %s", worker_i, _title));
    _data[worker_i] = value;
    _has_new_data = true;
  }

  void set_thread_work_item(uint worker_i, size_t value) {
    assert(_thread_work_items != NULL, "No sub count");
    _thread_work_items->set(worker_i, value);
  }

  T get(uint worker_i) {
    assert(worker_i < _length, err_msg("Worker %d is greater than max: %d", worker_i, _length));
    assert(_data[worker_i] != WorkerDataArray<T>::uninitialized(), err_msg("No data added for worker %d", worker_i));
    return _data[worker_i];
  }

  void add(uint worker_i, T value) {
    assert(worker_i < _length, err_msg("Worker %d is greater than max: %d", worker_i, _length));
    assert(_data[worker_i] != WorkerDataArray<T>::uninitialized(), err_msg("No data to add to for worker %d", worker_i));
    _data[worker_i] += value;
    _has_new_data = true;
  }

  double average(){
    calculate_totals();
    return _average;
  }

  T sum() {
    calculate_totals();
    return _sum;
  }

  T minimum() {
    calculate_totals();
    return _min;
  }

  T maximum() {
    calculate_totals();
    return _max;
  }

  void reset() PRODUCT_RETURN;
  void verify() PRODUCT_RETURN;

  void set_enabled(bool enabled) { _enabled = enabled; }

  int log_level() { return _log_level;  }

 private:

  void calculate_totals(){
    if (!_has_new_data) {
      return;
    }

    _sum = (T)0;
    _min = _data[0];
    _max = _min;
    for (uint i = 0; i < _length; ++i) {
      T val = _data[i];
      _sum += val;
      _min = MIN2(_min, val);
      _max = MAX2(_max, val);
    }
    _average = (double)_sum / (double)_length;
    _has_new_data = false;
  }
};


#ifndef PRODUCT

template <>
size_t WorkerDataArray<size_t>::uninitialized() {
  return (size_t)-1;
}

template <>
double WorkerDataArray<double>::uninitialized() {
  return -1.0;
}

template <class T>
void WorkerDataArray<T>::reset() {
  for (uint i = 0; i < _length; i++) {
    _data[i] = WorkerDataArray<T>::uninitialized();
  }
  if (_thread_work_items != NULL) {
    _thread_work_items->reset();
  }
}

template <class T>
void WorkerDataArray<T>::verify() {
  if (!_enabled) {
    return;
  }

  for (uint i = 0; i < _length; i++) {
    assert(_data[i] != WorkerDataArray<T>::uninitialized(),
        err_msg("Invalid data for worker %u in '%s'", i, _title));
  }
  if (_thread_work_items != NULL) {
    _thread_work_items->verify();
  }
}

#endif

G1GCPhaseTimes::G1GCPhaseTimes(uint max_gc_threads) :
  _max_gc_threads(max_gc_threads)
{
  assert(max_gc_threads > 0, "Must have some GC threads");

  _gc_par_phases[GCWorkerStart] = new WorkerDataArray<double>(max_gc_threads, "GC Worker Start (ms)", false, G1Log::LevelFiner, 2);
  _gc_par_phases[ExtRootScan] = new WorkerDataArray<double>(max_gc_threads, "Ext Root Scanning (ms)", true, G1Log::LevelFiner, 2);

  // Root scanning phases
  _gc_par_phases[ThreadRoots] = new WorkerDataArray<double>(max_gc_threads, "Thread Roots (ms)", true, G1Log::LevelFinest, 3);
  _gc_par_phases[StringTableRoots] = new WorkerDataArray<double>(max_gc_threads, "StringTable Roots (ms)", true, G1Log::LevelFinest, 3);
  _gc_par_phases[UniverseRoots] = new WorkerDataArray<double>(max_gc_threads, "Universe Roots (ms)", true, G1Log::LevelFinest, 3);
  _gc_par_phases[JNIRoots] = new WorkerDataArray<double>(max_gc_threads, "JNI Handles Roots (ms)", true, G1Log::LevelFinest, 3);
  _gc_par_phases[ObjectSynchronizerRoots] = new WorkerDataArray<double>(max_gc_threads, "ObjectSynchronizer Roots (ms)", true, G1Log::LevelFinest, 3);
  _gc_par_phases[FlatProfilerRoots] = new WorkerDataArray<double>(max_gc_threads, "FlatProfiler Roots (ms)", true, G1Log::LevelFinest, 3);
  _gc_par_phases[ManagementRoots] = new WorkerDataArray<double>(max_gc_threads, "Management Roots (ms)", true, G1Log::LevelFinest, 3);
  _gc_par_phases[SystemDictionaryRoots] = new WorkerDataArray<double>(max_gc_threads, "SystemDictionary Roots (ms)", true, G1Log::LevelFinest, 3);
  _gc_par_phases[CLDGRoots] = new WorkerDataArray<double>(max_gc_threads, "CLDG Roots (ms)", true, G1Log::LevelFinest, 3);
  _gc_par_phases[JVMTIRoots] = new WorkerDataArray<double>(max_gc_threads, "JVMTI Roots (ms)", true, G1Log::LevelFinest, 3);
  _gc_par_phases[CodeCacheRoots] = new WorkerDataArray<double>(max_gc_threads, "CodeCache Roots (ms)", true, G1Log::LevelFinest, 3);
  _gc_par_phases[CMRefRoots] = new WorkerDataArray<double>(max_gc_threads, "CM RefProcessor Roots (ms)", true, G1Log::LevelFinest, 3);
  _gc_par_phases[WaitForStrongCLD] = new WorkerDataArray<double>(max_gc_threads, "Wait For Strong CLD (ms)", true, G1Log::LevelFinest, 3);
  _gc_par_phases[WeakCLDRoots] = new WorkerDataArray<double>(max_gc_threads, "Weak CLD Roots (ms)", true, G1Log::LevelFinest, 3);
  _gc_par_phases[SATBFiltering] = new WorkerDataArray<double>(max_gc_threads, "SATB Filtering (ms)", true, G1Log::LevelFinest, 3);

  _gc_par_phases[UpdateRS] = new WorkerDataArray<double>(max_gc_threads, "Update RS (ms)", true, G1Log::LevelFiner, 2);
  _gc_par_phases[ScanRS] = new WorkerDataArray<double>(max_gc_threads, "Scan RS (ms)", true, G1Log::LevelFiner, 2);
  _gc_par_phases[CodeRoots] = new WorkerDataArray<double>(max_gc_threads, "Code Root Scanning (ms)", true, G1Log::LevelFiner, 2);
  _gc_par_phases[ObjCopy] = new WorkerDataArray<double>(max_gc_threads, "Object Copy (ms)", true, G1Log::LevelFiner, 2);
  _gc_par_phases[Termination] = new WorkerDataArray<double>(max_gc_threads, "Termination (ms)", true, G1Log::LevelFiner, 2);
  _gc_par_phases[GCWorkerTotal] = new WorkerDataArray<double>(max_gc_threads, "GC Worker Total (ms)", true, G1Log::LevelFiner, 2);
  _gc_par_phases[GCWorkerEnd] = new WorkerDataArray<double>(max_gc_threads, "GC Worker End (ms)", false, G1Log::LevelFiner, 2);
  _gc_par_phases[Other] = new WorkerDataArray<double>(max_gc_threads, "GC Worker Other (ms)", true, G1Log::LevelFiner, 2);

  _update_rs_processed_buffers = new WorkerDataArray<size_t>(max_gc_threads, "Processed Buffers", true, G1Log::LevelFiner, 3);
  _gc_par_phases[UpdateRS]->link_thread_work_items(_update_rs_processed_buffers);

  _termination_attempts = new WorkerDataArray<size_t>(max_gc_threads, "Termination Attempts", true, G1Log::LevelFinest, 3);
  _gc_par_phases[Termination]->link_thread_work_items(_termination_attempts);

  _gc_par_phases[StringDedupQueueFixup] = new WorkerDataArray<double>(max_gc_threads, "Queue Fixup (ms)", true, G1Log::LevelFiner, 2);
  _gc_par_phases[StringDedupTableFixup] = new WorkerDataArray<double>(max_gc_threads, "Table Fixup (ms)", true, G1Log::LevelFiner, 2);

  _gc_par_phases[RedirtyCards] = new WorkerDataArray<double>(max_gc_threads, "Parallel Redirty", true, G1Log::LevelFinest, 3);
  _redirtied_cards = new WorkerDataArray<size_t>(max_gc_threads, "Redirtied Cards", true, G1Log::LevelFinest, 3);
  _gc_par_phases[RedirtyCards]->link_thread_work_items(_redirtied_cards);
}

void G1GCPhaseTimes::note_gc_start(uint active_gc_threads, bool mark_in_progress) {
  assert(active_gc_threads > 0, "The number of threads must be > 0");
  assert(active_gc_threads <= _max_gc_threads, "The number of active threads must be <= the max number of threads");
  _active_gc_threads = active_gc_threads;

  for (int i = 0; i < GCParPhasesSentinel; i++) {
    _gc_par_phases[i]->reset();
  }

  _gc_par_phases[StringDedupQueueFixup]->set_enabled(G1StringDedup::is_enabled());
  _gc_par_phases[StringDedupTableFixup]->set_enabled(G1StringDedup::is_enabled());
}

void G1GCPhaseTimes::note_gc_end() {
  for (uint i = 0; i < _active_gc_threads; i++) {
    double worker_time = _gc_par_phases[GCWorkerEnd]->get(i) - _gc_par_phases[GCWorkerStart]->get(i);
    record_time_secs(GCWorkerTotal, i , worker_time);

    double worker_known_time =
        _gc_par_phases[ExtRootScan]->get(i) +
        _gc_par_phases[SATBFiltering]->get(i) +
        _gc_par_phases[UpdateRS]->get(i) +
        _gc_par_phases[ScanRS]->get(i) +
        _gc_par_phases[CodeRoots]->get(i) +
        _gc_par_phases[ObjCopy]->get(i) +
        _gc_par_phases[Termination]->get(i);

    record_time_secs(Other, i, worker_time - worker_known_time);
  }

  for (int i = 0; i < GCParPhasesSentinel; i++) {
    _gc_par_phases[i]->verify();
  }
}

void G1GCPhaseTimes::print_stats(int level, const char* str, double value) {
  LineBuffer(level).append_and_print_cr("[%s: %.1lf ms]", str, value);
}

void G1GCPhaseTimes::print_stats(int level, const char* str, size_t value) {
  LineBuffer(level).append_and_print_cr("[%s: "SIZE_FORMAT"]", str, value);
}

void G1GCPhaseTimes::print_stats(int level, const char* str, double value, uint workers) {
  LineBuffer(level).append_and_print_cr("[%s: %.1lf ms, GC Workers: %u]", str, value, workers);
}

double G1GCPhaseTimes::accounted_time_ms() {
    // Subtract the root region scanning wait time. It's initialized to
    // zero at the start of the pause.
    double misc_time_ms = _root_region_scan_wait_time_ms;

    misc_time_ms += _cur_collection_par_time_ms;

    // Now subtract the time taken to fix up roots in generated code
    misc_time_ms += _cur_collection_code_root_fixup_time_ms;

    // Strong code root purge time
    misc_time_ms += _cur_strong_code_root_purge_time_ms;

    if (G1StringDedup::is_enabled()) {
      // String dedup fixup time
      misc_time_ms += _cur_string_dedup_fixup_time_ms;
    }

    // Subtract the time taken to clean the card table from the
    // current value of "other time"
    misc_time_ms += _cur_clear_ct_time_ms;

    return misc_time_ms;
}

// record the time a phase took in seconds
void G1GCPhaseTimes::record_time_secs(GCParPhases phase, uint worker_i, double secs) {
  _gc_par_phases[phase]->set(worker_i, secs);
}

// add a number of seconds to a phase
void G1GCPhaseTimes::add_time_secs(GCParPhases phase, uint worker_i, double secs) {
  _gc_par_phases[phase]->add(worker_i, secs);
}

void G1GCPhaseTimes::record_thread_work_item(GCParPhases phase, uint worker_i, size_t count) {
  _gc_par_phases[phase]->set_thread_work_item(worker_i, count);
}

// return the average time for a phase in milliseconds
double G1GCPhaseTimes::average_time_ms(GCParPhases phase) {
  return _gc_par_phases[phase]->average() * 1000.0;
}

double G1GCPhaseTimes::get_time_ms(GCParPhases phase, uint worker_i) {
  return _gc_par_phases[phase]->get(worker_i) * 1000.0;
}

double G1GCPhaseTimes::sum_time_ms(GCParPhases phase) {
  return _gc_par_phases[phase]->sum() * 1000.0;
}

double G1GCPhaseTimes::min_time_ms(GCParPhases phase) {
  return _gc_par_phases[phase]->minimum() * 1000.0;
}

double G1GCPhaseTimes::max_time_ms(GCParPhases phase) {
  return _gc_par_phases[phase]->maximum() * 1000.0;
}

size_t G1GCPhaseTimes::get_thread_work_item(GCParPhases phase, uint worker_i) {
  assert(_gc_par_phases[phase]->thread_work_items() != NULL, "No sub count");
  return _gc_par_phases[phase]->thread_work_items()->get(worker_i);
}

size_t G1GCPhaseTimes::sum_thread_work_items(GCParPhases phase) {
  assert(_gc_par_phases[phase]->thread_work_items() != NULL, "No sub count");
  return _gc_par_phases[phase]->thread_work_items()->sum();
}

double G1GCPhaseTimes::average_thread_work_items(GCParPhases phase) {
  assert(_gc_par_phases[phase]->thread_work_items() != NULL, "No sub count");
  return _gc_par_phases[phase]->thread_work_items()->average();
}

size_t G1GCPhaseTimes::min_thread_work_items(GCParPhases phase) {
  assert(_gc_par_phases[phase]->thread_work_items() != NULL, "No sub count");
  return _gc_par_phases[phase]->thread_work_items()->minimum();
}

size_t G1GCPhaseTimes::max_thread_work_items(GCParPhases phase) {
  assert(_gc_par_phases[phase]->thread_work_items() != NULL, "No sub count");
  return _gc_par_phases[phase]->thread_work_items()->maximum();
}

class G1GCParPhasePrinter : public StackObj {
  G1GCPhaseTimes* _phase_times;
 public:
  G1GCParPhasePrinter(G1GCPhaseTimes* phase_times) : _phase_times(phase_times) {}

  void print(G1GCPhaseTimes::GCParPhases phase_id) {
    WorkerDataArray<double>* phase = _phase_times->_gc_par_phases[phase_id];

    if (phase->_log_level > G1Log::level() || !phase->_enabled) {
      return;
    }

    if (phase->_length == 1) {
      print_single_length(phase_id, phase);
    } else {
      print_multi_length(phase_id, phase);
    }
  }

 private:

  void print_single_length(G1GCPhaseTimes::GCParPhases phase_id, WorkerDataArray<double>* phase) {
    // No need for min, max, average and sum for only one worker
    LineBuffer buf(phase->_indent_level);
    buf.append_and_print_cr("[%s:  %.1lf]", phase->_title, _phase_times->get_time_ms(phase_id, 0));

    if (phase->_thread_work_items != NULL) {
      LineBuffer buf2(phase->_thread_work_items->_indent_level);
      buf2.append_and_print_cr("[%s:  "SIZE_FORMAT"]", phase->_thread_work_items->_title, _phase_times->sum_thread_work_items(phase_id));
    }
  }

  void print_time_values(LineBuffer& buf, G1GCPhaseTimes::GCParPhases phase_id, WorkerDataArray<double>* phase) {
    for (uint i = 0; i < phase->_length; ++i) {
      buf.append("  %.1lf", _phase_times->get_time_ms(phase_id, i));
    }
    buf.print_cr();
  }

  void print_count_values(LineBuffer& buf, G1GCPhaseTimes::GCParPhases phase_id, WorkerDataArray<size_t>* thread_work_items) {
    for (uint i = 0; i < thread_work_items->_length; ++i) {
      buf.append("  " SIZE_FORMAT, _phase_times->get_thread_work_item(phase_id, i));
    }
    buf.print_cr();
  }

  void print_thread_work_items(G1GCPhaseTimes::GCParPhases phase_id, WorkerDataArray<size_t>* thread_work_items) {
    LineBuffer buf(thread_work_items->_indent_level);
    buf.append("[%s:", thread_work_items->_title);

    if (G1Log::finest()) {
      print_count_values(buf, phase_id, thread_work_items);
    }

    assert(thread_work_items->_print_sum, err_msg("%s does not have print sum true even though it is a count", thread_work_items->_title));

    buf.append_and_print_cr(" Min: " SIZE_FORMAT ", Avg: %.1lf, Max: " SIZE_FORMAT ", Diff: " SIZE_FORMAT ", Sum: " SIZE_FORMAT "]",
        _phase_times->min_thread_work_items(phase_id), _phase_times->average_thread_work_items(phase_id), _phase_times->max_thread_work_items(phase_id),
        _phase_times->max_thread_work_items(phase_id) - _phase_times->min_thread_work_items(phase_id), _phase_times->sum_thread_work_items(phase_id));
  }

  void print_multi_length(G1GCPhaseTimes::GCParPhases phase_id, WorkerDataArray<double>* phase) {
    LineBuffer buf(phase->_indent_level);
    buf.append("[%s:", phase->_title);

    if (G1Log::finest()) {
      print_time_values(buf, phase_id, phase);
    }

    buf.append(" Min: %.1lf, Avg: %.1lf, Max: %.1lf, Diff: %.1lf",
        _phase_times->min_time_ms(phase_id), _phase_times->average_time_ms(phase_id), _phase_times->max_time_ms(phase_id),
        _phase_times->max_time_ms(phase_id) - _phase_times->min_time_ms(phase_id));

    if (phase->_print_sum) {
      // for things like the start and end times the sum is not
      // that relevant
      buf.append(", Sum: %.1lf", _phase_times->sum_time_ms(phase_id));
    }

    buf.append_and_print_cr("]");

    if (phase->_thread_work_items != NULL) {
      print_thread_work_items(phase_id, phase->_thread_work_items);
    }
  }
};

void G1GCPhaseTimes::print(double pause_time_sec) {
  G1GCParPhasePrinter par_phase_printer(this);

  if (_root_region_scan_wait_time_ms > 0.0) {
    print_stats(1, "Root Region Scan Waiting", _root_region_scan_wait_time_ms);
  }

  print_stats(1, "Parallel Time", _cur_collection_par_time_ms, _active_gc_threads);
  for (int i = 0; i <= GCMainParPhasesLast; i++) {
    par_phase_printer.print((GCParPhases) i);
  }

  print_stats(1, "Code Root Fixup", _cur_collection_code_root_fixup_time_ms);
  print_stats(1, "Code Root Purge", _cur_strong_code_root_purge_time_ms);
  if (G1StringDedup::is_enabled()) {
    print_stats(1, "String Dedup Fixup", _cur_string_dedup_fixup_time_ms, _active_gc_threads);
    for (int i = StringDedupPhasesFirst; i <= StringDedupPhasesLast; i++) {
      par_phase_printer.print((GCParPhases) i);
    }
  }
  print_stats(1, "Clear CT", _cur_clear_ct_time_ms);
  double misc_time_ms = pause_time_sec * MILLIUNITS - accounted_time_ms();
  print_stats(1, "Other", misc_time_ms);
  if (_cur_verify_before_time_ms > 0.0) {
    print_stats(2, "Verify Before", _cur_verify_before_time_ms);
  }
  if (G1CollectedHeap::heap()->evacuation_failed()) {
    double evac_fail_handling = _cur_evac_fail_recalc_used + _cur_evac_fail_remove_self_forwards +
      _cur_evac_fail_restore_remsets;
    print_stats(2, "Evacuation Failure", evac_fail_handling);
    if (G1Log::finest()) {
      print_stats(3, "Recalculate Used", _cur_evac_fail_recalc_used);
      print_stats(3, "Remove Self Forwards", _cur_evac_fail_remove_self_forwards);
      print_stats(3, "Restore RemSet", _cur_evac_fail_restore_remsets);
    }
  }
  print_stats(2, "Choose CSet",
    (_recorded_young_cset_choice_time_ms +
    _recorded_non_young_cset_choice_time_ms));
  print_stats(2, "Ref Proc", _cur_ref_proc_time_ms);
  print_stats(2, "Ref Enq", _cur_ref_enq_time_ms);
  print_stats(2, "Redirty Cards", _recorded_redirty_logged_cards_time_ms);
  par_phase_printer.print(RedirtyCards);

  if (G1EagerReclaimHumongousObjects) {
    print_stats(2, "Humongous Register", _cur_fast_reclaim_humongous_register_time_ms);
    if (G1Log::finest()) {
      print_stats(3, "Humongous Total", _cur_fast_reclaim_humongous_total);
      print_stats(3, "Humongous Candidate", _cur_fast_reclaim_humongous_candidates);
    }
    print_stats(2, "Humongous Reclaim", _cur_fast_reclaim_humongous_time_ms);
    if (G1Log::finest()) {
      print_stats(3, "Humongous Reclaimed", _cur_fast_reclaim_humongous_reclaimed);
    }
  }
  print_stats(2, "Free CSet",
    (_recorded_young_free_cset_time_ms +
    _recorded_non_young_free_cset_time_ms));
  if (G1Log::finest()) {
    print_stats(3, "Young Free CSet", _recorded_young_free_cset_time_ms);
    print_stats(3, "Non-Young Free CSet", _recorded_non_young_free_cset_time_ms);
  }
  if (_cur_verify_after_time_ms > 0.0) {
    print_stats(2, "Verify After", _cur_verify_after_time_ms);
  }
}

G1GCParPhaseTimesTracker::G1GCParPhaseTimesTracker(G1GCPhaseTimes* phase_times, G1GCPhaseTimes::GCParPhases phase, uint worker_id) :
    _phase_times(phase_times), _phase(phase), _worker_id(worker_id) {
  if (_phase_times != NULL) {
    _start_time = os::elapsedTime();
  }
}

G1GCParPhaseTimesTracker::~G1GCParPhaseTimesTracker() {
  if (_phase_times != NULL) {
    _phase_times->record_time_secs(_phase, _worker_id, os::elapsedTime() - _start_time);
  }
}