view src/share/vm/gc_implementation/g1/g1StringDedupTable.cpp @ 20211:82693fb204a5

8038930: G1CodeRootSet::test fails with assert(_num_chunks_handed_out == 0) failed: No elements must have been handed out yet Summary: The test incorrectly assumed that it had been started with no other previous compilation activity. Fix this by allowing multiple code root free chunk lists, and use one separate from the global one to perform the test. Reviewed-by: brutisso
author tschatzl
date Wed, 16 Apr 2014 10:14:50 +0200
parents 595c0f60d50d
children 04a62a3d51d7
line wrap: on
line source

/*
 * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */

#include "precompiled.hpp"
#include "classfile/altHashing.hpp"
#include "classfile/javaClasses.hpp"
#include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
#include "gc_implementation/g1/g1StringDedupTable.hpp"
#include "memory/gcLocker.hpp"
#include "memory/padded.inline.hpp"
#include "oops/typeArrayOop.hpp"
#include "runtime/mutexLocker.hpp"

//
// Freelist in the deduplication table entry cache. Links table
// entries together using their _next fields.
//
class G1StringDedupEntryFreeList : public CHeapObj<mtGC> {
private:
  G1StringDedupEntry* _list;
  size_t              _length;

public:
  G1StringDedupEntryFreeList() :
    _list(NULL),
    _length(0) {
  }

  void add(G1StringDedupEntry* entry) {
    entry->set_next(_list);
    _list = entry;
    _length++;
  }

  G1StringDedupEntry* remove() {
    G1StringDedupEntry* entry = _list;
    if (entry != NULL) {
      _list = entry->next();
      _length--;
    }
    return entry;
  }

  size_t length() {
    return _length;
  }
};

//
// Cache of deduplication table entries. This cache provides fast allocation and
// reuse of table entries to lower the pressure on the underlying allocator.
// But more importantly, it provides fast/deferred freeing of table entries. This
// is important because freeing of table entries is done during stop-the-world
// phases and it is not uncommon for large number of entries to be freed at once.
// Tables entries that are freed during these phases are placed onto a freelist in
// the cache. The deduplication thread, which executes in a concurrent phase, will
// later reuse or free the underlying memory for these entries.
//
// The cache allows for single-threaded allocations and multi-threaded frees.
// Allocations are synchronized by StringDedupTable_lock as part of a table
// modification.
//
class G1StringDedupEntryCache : public CHeapObj<mtGC> {
private:
  // One freelist per GC worker to allow lock less freeing of
  // entries while doing a parallel scan of the table. Using
  // PaddedEnd to avoid false sharing.
  PaddedEnd<G1StringDedupEntryFreeList>* _lists;
  size_t                                 _nlists;

public:
  G1StringDedupEntryCache();
  ~G1StringDedupEntryCache();

  // Get a table entry from the cache freelist, or allocate a new
  // entry if the cache is empty.
  G1StringDedupEntry* alloc();

  // Insert a table entry into the cache freelist.
  void free(G1StringDedupEntry* entry, uint worker_id);

  // Returns current number of entries in the cache.
  size_t size();

  // If the cache has grown above the given max size, trim it down
  // and deallocate the memory occupied by trimmed of entries.
  void trim(size_t max_size);
};

G1StringDedupEntryCache::G1StringDedupEntryCache() {
  _nlists = MAX2(ParallelGCThreads, (size_t)1);
  _lists = PaddedArray<G1StringDedupEntryFreeList, mtGC>::create_unfreeable((uint)_nlists);
}

G1StringDedupEntryCache::~G1StringDedupEntryCache() {
  ShouldNotReachHere();
}

G1StringDedupEntry* G1StringDedupEntryCache::alloc() {
  for (size_t i = 0; i < _nlists; i++) {
    G1StringDedupEntry* entry = _lists[i].remove();
    if (entry != NULL) {
      return entry;
    }
  }
  return new G1StringDedupEntry();
}

void G1StringDedupEntryCache::free(G1StringDedupEntry* entry, uint worker_id) {
  assert(entry->obj() != NULL, "Double free");
  assert(worker_id < _nlists, "Invalid worker id");
  entry->set_obj(NULL);
  entry->set_hash(0);
  _lists[worker_id].add(entry);
}

size_t G1StringDedupEntryCache::size() {
  size_t size = 0;
  for (size_t i = 0; i < _nlists; i++) {
    size += _lists[i].length();
  }
  return size;
}

void G1StringDedupEntryCache::trim(size_t max_size) {
  size_t cache_size = 0;
  for (size_t i = 0; i < _nlists; i++) {
    G1StringDedupEntryFreeList* list = &_lists[i];
    cache_size += list->length();
    while (cache_size > max_size) {
      G1StringDedupEntry* entry = list->remove();
      assert(entry != NULL, "Should not be null");
      cache_size--;
      delete entry;
    }
  }
}

G1StringDedupTable*      G1StringDedupTable::_table = NULL;
G1StringDedupEntryCache* G1StringDedupTable::_entry_cache = NULL;

const size_t             G1StringDedupTable::_min_size = (1 << 10);   // 1024
const size_t             G1StringDedupTable::_max_size = (1 << 24);   // 16777216
const double             G1StringDedupTable::_grow_load_factor = 2.0; // Grow table at 200% load
const double             G1StringDedupTable::_shrink_load_factor = _grow_load_factor / 3.0; // Shrink table at 67% load
const double             G1StringDedupTable::_max_cache_factor = 0.1; // Cache a maximum of 10% of the table size
const uintx              G1StringDedupTable::_rehash_multiple = 60;   // Hash bucket has 60 times more collisions than expected
const uintx              G1StringDedupTable::_rehash_threshold = (uintx)(_rehash_multiple * _grow_load_factor);

uintx                    G1StringDedupTable::_entries_added = 0;
uintx                    G1StringDedupTable::_entries_removed = 0;
uintx                    G1StringDedupTable::_resize_count = 0;
uintx                    G1StringDedupTable::_rehash_count = 0;

G1StringDedupTable::G1StringDedupTable(size_t size, jint hash_seed) :
  _size(size),
  _entries(0),
  _grow_threshold((uintx)(size * _grow_load_factor)),
  _shrink_threshold((uintx)(size * _shrink_load_factor)),
  _rehash_needed(false),
  _hash_seed(hash_seed) {
  assert(is_power_of_2(size), "Table size must be a power of 2");
  _buckets = NEW_C_HEAP_ARRAY(G1StringDedupEntry*, _size, mtGC);
  memset(_buckets, 0, _size * sizeof(G1StringDedupEntry*));
}

G1StringDedupTable::~G1StringDedupTable() {
  FREE_C_HEAP_ARRAY(G1StringDedupEntry*, _buckets, mtGC);
}

void G1StringDedupTable::create() {
  assert(_table == NULL, "One string deduplication table allowed");
  _entry_cache = new G1StringDedupEntryCache();
  _table = new G1StringDedupTable(_min_size);
}

void G1StringDedupTable::add(typeArrayOop value, unsigned int hash, G1StringDedupEntry** list) {
  G1StringDedupEntry* entry = _entry_cache->alloc();
  entry->set_obj(value);
  entry->set_hash(hash);
  entry->set_next(*list);
  *list = entry;
  _entries++;
}

void G1StringDedupTable::remove(G1StringDedupEntry** pentry, uint worker_id) {
  G1StringDedupEntry* entry = *pentry;
  *pentry = entry->next();
  _entry_cache->free(entry, worker_id);
}

void G1StringDedupTable::transfer(G1StringDedupEntry** pentry, G1StringDedupTable* dest) {
  G1StringDedupEntry* entry = *pentry;
  *pentry = entry->next();
  unsigned int hash = entry->hash();
  size_t index = dest->hash_to_index(hash);
  G1StringDedupEntry** list = dest->bucket(index);
  entry->set_next(*list);
  *list = entry;
}

bool G1StringDedupTable::equals(typeArrayOop value1, typeArrayOop value2) {
  return (value1 == value2 ||
          (value1->length() == value2->length() &&
           (!memcmp(value1->base(T_CHAR),
                    value2->base(T_CHAR),
                    value1->length() * sizeof(jchar)))));
}

typeArrayOop G1StringDedupTable::lookup(typeArrayOop value, unsigned int hash,
                                        G1StringDedupEntry** list, uintx &count) {
  for (G1StringDedupEntry* entry = *list; entry != NULL; entry = entry->next()) {
    if (entry->hash() == hash) {
      typeArrayOop existing_value = entry->obj();
      if (equals(value, existing_value)) {
        // Match found
        return existing_value;
      }
    }
    count++;
  }

  // Not found
  return NULL;
}

typeArrayOop G1StringDedupTable::lookup_or_add_inner(typeArrayOop value, unsigned int hash) {
  size_t index = hash_to_index(hash);
  G1StringDedupEntry** list = bucket(index);
  uintx count = 0;

  // Lookup in list
  typeArrayOop existing_value = lookup(value, hash, list, count);

  // Check if rehash is needed
  if (count > _rehash_threshold) {
    _rehash_needed = true;
  }

  if (existing_value == NULL) {
    // Not found, add new entry
    add(value, hash, list);

    // Update statistics
    _entries_added++;
  }

  return existing_value;
}

unsigned int G1StringDedupTable::hash_code(typeArrayOop value) {
  unsigned int hash;
  int length = value->length();
  const jchar* data = (jchar*)value->base(T_CHAR);

  if (use_java_hash()) {
    hash = java_lang_String::hash_code(data, length);
  } else {
    hash = AltHashing::murmur3_32(_table->_hash_seed, data, length);
  }

  return hash;
}

void G1StringDedupTable::deduplicate(oop java_string, G1StringDedupStat& stat) {
  assert(java_lang_String::is_instance(java_string), "Must be a string");
  No_Safepoint_Verifier nsv;

  stat.inc_inspected();

  typeArrayOop value = java_lang_String::value(java_string);
  if (value == NULL) {
    // String has no value
    stat.inc_skipped();
    return;
  }

  unsigned int hash = 0;

  if (use_java_hash()) {
    // Get hash code from cache
    hash = java_lang_String::hash(java_string);
  }

  if (hash == 0) {
    // Compute hash
    hash = hash_code(value);
    stat.inc_hashed();
  }

  if (use_java_hash() && hash != 0) {
    // Store hash code in cache
    java_lang_String::set_hash(java_string, hash);
  }

  typeArrayOop existing_value = lookup_or_add(value, hash);
  if (existing_value == value) {
    // Same value, already known
    stat.inc_known();
    return;
  }

  // Get size of value array
  uintx size_in_bytes = value->size() * HeapWordSize;
  stat.inc_new(size_in_bytes);

  if (existing_value != NULL) {
    // Enqueue the reference to make sure it is kept alive. Concurrent mark might
    // otherwise declare it dead if there are no other strong references to this object.
    G1SATBCardTableModRefBS::enqueue(existing_value);

    // Existing value found, deduplicate string
    java_lang_String::set_value(java_string, existing_value);

    if (G1CollectedHeap::heap()->is_in_young(value)) {
      stat.inc_deduped_young(size_in_bytes);
    } else {
      stat.inc_deduped_old(size_in_bytes);
    }
  }
}

G1StringDedupTable* G1StringDedupTable::prepare_resize() {
  size_t size = _table->_size;

  // Check if the hashtable needs to be resized
  if (_table->_entries > _table->_grow_threshold) {
    // Grow table, double the size
    size *= 2;
    if (size > _max_size) {
      // Too big, don't resize
      return NULL;
    }
  } else if (_table->_entries < _table->_shrink_threshold) {
    // Shrink table, half the size
    size /= 2;
    if (size < _min_size) {
      // Too small, don't resize
      return NULL;
    }
  } else if (StringDeduplicationResizeALot) {
    // Force grow
    size *= 2;
    if (size > _max_size) {
      // Too big, force shrink instead
      size /= 4;
    }
  } else {
    // Resize not needed
    return NULL;
  }

  // Update statistics
  _resize_count++;

  // Allocate the new table. The new table will be populated by workers
  // calling unlink_or_oops_do() and finally installed by finish_resize().
  return new G1StringDedupTable(size, _table->_hash_seed);
}

void G1StringDedupTable::finish_resize(G1StringDedupTable* resized_table) {
  assert(resized_table != NULL, "Invalid table");

  resized_table->_entries = _table->_entries;

  // Free old table
  delete _table;

  // Install new table
  _table = resized_table;
}

void G1StringDedupTable::unlink_or_oops_do(G1StringDedupUnlinkOrOopsDoClosure* cl, uint worker_id) {
  // The table is divided into partitions to allow lock-less parallel processing by
  // multiple worker threads. A worker thread first claims a partition, which ensures
  // exclusive access to that part of the table, then continues to process it. To allow
  // shrinking of the table in parallel we also need to make sure that the same worker
  // thread processes all partitions where entries will hash to the same destination
  // partition. Since the table size is always a power of two and we always shrink by
  // dividing the table in half, we know that for a given partition there is only one
  // other partition whoes entries will hash to the same destination partition. That
  // other partition is always the sibling partition in the second half of the table.
  // For example, if the table is divided into 8 partitions, the sibling of partition 0
  // is partition 4, the sibling of partition 1 is partition 5, etc.
  size_t table_half = _table->_size / 2;

  // Let each partition be one page worth of buckets
  size_t partition_size = MIN2(table_half, os::vm_page_size() / sizeof(G1StringDedupEntry*));
  assert(table_half % partition_size == 0, "Invalid partition size");

  // Number of entries removed during the scan
  uintx removed = 0;

  for (;;) {
    // Grab next partition to scan
    size_t partition_begin = cl->claim_table_partition(partition_size);
    size_t partition_end = partition_begin + partition_size;
    if (partition_begin >= table_half) {
      // End of table
      break;
    }

    // Scan the partition followed by the sibling partition in the second half of the table
    removed += unlink_or_oops_do(cl, partition_begin, partition_end, worker_id);
    removed += unlink_or_oops_do(cl, table_half + partition_begin, table_half + partition_end, worker_id);
  }

  // Delayed update avoid contention on the table lock
  if (removed > 0) {
    MutexLockerEx ml(StringDedupTable_lock, Mutex::_no_safepoint_check_flag);
    _table->_entries -= removed;
    _entries_removed += removed;
  }
}

uintx G1StringDedupTable::unlink_or_oops_do(G1StringDedupUnlinkOrOopsDoClosure* cl,
                                            size_t partition_begin,
                                            size_t partition_end,
                                            uint worker_id) {
  uintx removed = 0;
  for (size_t bucket = partition_begin; bucket < partition_end; bucket++) {
    G1StringDedupEntry** entry = _table->bucket(bucket);
    while (*entry != NULL) {
      oop* p = (oop*)(*entry)->obj_addr();
      if (cl->is_alive(*p)) {
        cl->keep_alive(p);
        if (cl->is_resizing()) {
          // We are resizing the table, transfer entry to the new table
          _table->transfer(entry, cl->resized_table());
        } else {
          if (cl->is_rehashing()) {
            // We are rehashing the table, rehash the entry but keep it
            // in the table. We can't transfer entries into the new table
            // at this point since we don't have exclusive access to all
            // destination partitions. finish_rehash() will do a single
            // threaded transfer of all entries.
            typeArrayOop value = (typeArrayOop)*p;
            unsigned int hash = hash_code(value);
            (*entry)->set_hash(hash);
          }

          // Move to next entry
          entry = (*entry)->next_addr();
        }
      } else {
        // Not alive, remove entry from table
        _table->remove(entry, worker_id);
        removed++;
      }
    }
  }

  return removed;
}

G1StringDedupTable* G1StringDedupTable::prepare_rehash() {
  if (!_table->_rehash_needed && !StringDeduplicationRehashALot) {
    // Rehash not needed
    return NULL;
  }

  // Update statistics
  _rehash_count++;

  // Compute new hash seed
  _table->_hash_seed = AltHashing::compute_seed();

  // Allocate the new table, same size and hash seed
  return new G1StringDedupTable(_table->_size, _table->_hash_seed);
}

void G1StringDedupTable::finish_rehash(G1StringDedupTable* rehashed_table) {
  assert(rehashed_table != NULL, "Invalid table");

  // Move all newly rehashed entries into the correct buckets in the new table
  for (size_t bucket = 0; bucket < _table->_size; bucket++) {
    G1StringDedupEntry** entry = _table->bucket(bucket);
    while (*entry != NULL) {
      _table->transfer(entry, rehashed_table);
    }
  }

  rehashed_table->_entries = _table->_entries;

  // Free old table
  delete _table;

  // Install new table
  _table = rehashed_table;
}

void G1StringDedupTable::verify() {
  for (size_t bucket = 0; bucket < _table->_size; bucket++) {
    // Verify entries
    G1StringDedupEntry** entry = _table->bucket(bucket);
    while (*entry != NULL) {
      typeArrayOop value = (*entry)->obj();
      guarantee(value != NULL, "Object must not be NULL");
      guarantee(Universe::heap()->is_in_reserved(value), "Object must be on the heap");
      guarantee(!value->is_forwarded(), "Object must not be forwarded");
      guarantee(value->is_typeArray(), "Object must be a typeArrayOop");
      unsigned int hash = hash_code(value);
      guarantee((*entry)->hash() == hash, "Table entry has inorrect hash");
      guarantee(_table->hash_to_index(hash) == bucket, "Table entry has incorrect index");
      entry = (*entry)->next_addr();
    }

    // Verify that we do not have entries with identical oops or identical arrays.
    // We only need to compare entries in the same bucket. If the same oop or an
    // identical array has been inserted more than once into different/incorrect
    // buckets the verification step above will catch that.
    G1StringDedupEntry** entry1 = _table->bucket(bucket);
    while (*entry1 != NULL) {
      typeArrayOop value1 = (*entry1)->obj();
      G1StringDedupEntry** entry2 = (*entry1)->next_addr();
      while (*entry2 != NULL) {
        typeArrayOop value2 = (*entry2)->obj();
        guarantee(!equals(value1, value2), "Table entries must not have identical arrays");
        entry2 = (*entry2)->next_addr();
      }
      entry1 = (*entry1)->next_addr();
    }
  }
}

void G1StringDedupTable::trim_entry_cache() {
  MutexLockerEx ml(StringDedupTable_lock, Mutex::_no_safepoint_check_flag);
  size_t max_cache_size = (size_t)(_table->_size * _max_cache_factor);
  _entry_cache->trim(max_cache_size);
}

void G1StringDedupTable::print_statistics(outputStream* st) {
  st->print_cr(
    "   [Table]\n"
    "      [Memory Usage: "G1_STRDEDUP_BYTES_FORMAT_NS"]\n"
    "      [Size: "SIZE_FORMAT", Min: "SIZE_FORMAT", Max: "SIZE_FORMAT"]\n"
    "      [Entries: "UINTX_FORMAT", Load: "G1_STRDEDUP_PERCENT_FORMAT_NS", Cached: " UINTX_FORMAT ", Added: "UINTX_FORMAT", Removed: "UINTX_FORMAT"]\n"
    "      [Resize Count: "UINTX_FORMAT", Shrink Threshold: "UINTX_FORMAT"("G1_STRDEDUP_PERCENT_FORMAT_NS"), Grow Threshold: "UINTX_FORMAT"("G1_STRDEDUP_PERCENT_FORMAT_NS")]\n"
    "      [Rehash Count: "UINTX_FORMAT", Rehash Threshold: "UINTX_FORMAT", Hash Seed: 0x%x]\n"
    "      [Age Threshold: "UINTX_FORMAT"]",
    G1_STRDEDUP_BYTES_PARAM(_table->_size * sizeof(G1StringDedupEntry*) + (_table->_entries + _entry_cache->size()) * sizeof(G1StringDedupEntry)),
    _table->_size, _min_size, _max_size,
    _table->_entries, (double)_table->_entries / (double)_table->_size * 100.0, _entry_cache->size(), _entries_added, _entries_removed,
    _resize_count, _table->_shrink_threshold, _shrink_load_factor * 100.0, _table->_grow_threshold, _grow_load_factor * 100.0,
    _rehash_count, _rehash_threshold, _table->_hash_seed,
    StringDeduplicationAgeThreshold);
}