Mercurial > hg > graal-jvmci-8
diff src/share/vm/classfile/symbolTable.cpp @ 6162:e9140bf80b4a
7158800: Improve storage of symbol tables
Summary: Use an alternate version of hashing algorithm for symbol string tables and after a certain bucket size to improve performance
Reviewed-by: pbk, kamg, dlong, kvn, fparain
author | coleenp |
---|---|
date | Wed, 13 Jun 2012 19:52:59 -0400 |
parents | fc9d8850ab8b |
children | 246d977b51f2 |
line wrap: on
line diff
--- a/src/share/vm/classfile/symbolTable.cpp Mon Jun 11 13:10:14 2012 -0400 +++ b/src/share/vm/classfile/symbolTable.cpp Wed Jun 13 19:52:59 2012 -0400 @@ -23,6 +23,7 @@ */ #include "precompiled.hpp" +#include "classfile/altHashing.hpp" #include "classfile/javaClasses.hpp" #include "classfile/symbolTable.hpp" #include "classfile/systemDictionary.hpp" @@ -34,12 +35,15 @@ #include "oops/oop.inline2.hpp" #include "runtime/mutexLocker.hpp" #include "utilities/hashtable.inline.hpp" +#include "utilities/numberSeq.hpp" // -------------------------------------------------------------------------- SymbolTable* SymbolTable::_the_table = NULL; // Static arena for symbols that are not deallocated Arena* SymbolTable::_arena = NULL; +bool SymbolTable::_needs_rehashing = false; +jint SymbolTable::_seed = 0; Symbol* SymbolTable::allocate_symbol(const u1* name, int len, bool c_heap, TRAPS) { // Don't allow symbols to be created which cannot fit in a Symbol*. @@ -121,12 +125,41 @@ } } +unsigned int SymbolTable::new_hash(Symbol* sym) { + ResourceMark rm; + // Use alternate hashing algorithm on this symbol. + return AltHashing::murmur3_32(seed(), (const jbyte*)sym->as_C_string(), sym->utf8_length()); +} + +// Create a new table and using alternate hash code, populate the new table +// with the existing strings. Set flag to use the alternate hash code afterwards. +void SymbolTable::rehash_table() { + assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint"); + assert(!DumpSharedSpaces, "this should never happen with -Xshare:dump"); + // Create a new symbol table + SymbolTable* new_table = new SymbolTable(); + + // Initialize the global seed for hashing. + _seed = AltHashing::compute_seed(); + assert(seed() != 0, "shouldn't be zero"); + + the_table()->move_to(new_table); + + // Delete the table and buckets (entries are reused in new table). + delete _the_table; + // Don't check if we need rehashing until the table gets unbalanced again. + // Then rehash with a new global seed. + _needs_rehashing = false; + _the_table = new_table; +} // Lookup a symbol in a bucket. Symbol* SymbolTable::lookup(int index, const char* name, int len, unsigned int hash) { + int count = 0; for (HashtableEntry<Symbol*>* e = bucket(index); e != NULL; e = e->next()) { + count++; // count all entries in this bucket, not just ones with same hash if (e->hash() == hash) { Symbol* sym = e->literal(); if (sym->equals(name, len)) { @@ -136,9 +169,21 @@ } } } + // If the bucket size is too deep check if this hash code is insufficient. + if (count >= BasicHashtable::rehash_count && !needs_rehashing()) { + _needs_rehashing = check_rehash_table(count); + } return NULL; } +// Pick hashing algorithm, but return value already given if not using a new +// hash algorithm. +unsigned int SymbolTable::hash_symbol(const char* s, int len, unsigned int hashValue) { + return use_alternate_hashcode() ? + AltHashing::murmur3_32(seed(), (const jbyte*)s, len) : + (hashValue != 0 ? hashValue : java_lang_String::to_hash(s, len)); +} + // We take care not to be blocking while holding the // SymbolTable_lock. Otherwise, the system might deadlock, since the @@ -287,13 +332,17 @@ } Symbol* SymbolTable::basic_add(int index, u1 *name, int len, - unsigned int hashValue, bool c_heap, TRAPS) { + unsigned int hashValue_arg, bool c_heap, TRAPS) { assert(!Universe::heap()->is_in_reserved(name) || GC_locker::is_active(), "proposed name of symbol must be stable"); // Grab SymbolTable_lock first. MutexLocker ml(SymbolTable_lock, THREAD); + // Check if the symbol table has been rehashed, if so, need to recalculate + // the hash value. + unsigned int hashValue = hash_symbol((const char*)name, len, hashValue_arg); + // Since look-up was done lock-free, we need to check if another // thread beat us in the race to insert the symbol. Symbol* test = lookup(index, (char*)name, len, hashValue); @@ -332,10 +381,13 @@ MutexLocker ml(SymbolTable_lock, THREAD); for (int i=0; i<names_count; i++) { + // Check if the symbol table has been rehashed, if so, need to recalculate + // the hash value. + unsigned int hashValue = hash_symbol(names[i], lengths[i], hashValues[i]); // Since look-up was done lock-free, we need to check if another // thread beat us in the race to insert the symbol. - int index = hash_to_index(hashValues[i]); - Symbol* test = lookup(index, names[i], lengths[i], hashValues[i]); + int index = hash_to_index(hashValue); + Symbol* test = lookup(index, names[i], lengths[i], hashValue); if (test != NULL) { // A race occurred and another thread introduced the symbol, this one // will be dropped and collected. Use test instead. @@ -347,7 +399,7 @@ bool c_heap = class_loader() != NULL; Symbol* sym = allocate_symbol((const u1*)names[i], lengths[i], c_heap, CHECK_(false)); assert(sym->equals(names[i], lengths[i]), "symbol must be properly initialized"); // why wouldn't it be??? - HashtableEntry<Symbol*>* entry = new_entry(hashValues[i], sym); + HashtableEntry<Symbol*>* entry = new_entry(hashValue, sym); add_entry(index, entry); cp->symbol_at_put(cp_indices[i], sym); } @@ -370,6 +422,24 @@ } } +void SymbolTable::dump(outputStream* st) { + NumberSeq summary; + for (int i = 0; i < the_table()->table_size(); ++i) { + int count = 0; + for (HashtableEntry<Symbol*>* e = the_table()->bucket(i); + e != NULL; e = e->next()) { + count++; + } + summary.add((double)count); + } + st->print_cr("SymbolTable statistics:"); + st->print_cr("Number of buckets : %7d", summary.num()); + st->print_cr("Average bucket size : %7.0f", summary.avg()); + st->print_cr("Variance of bucket size : %7.0f", summary.variance()); + st->print_cr("Std. dev. of bucket size: %7.0f", summary.sd()); + st->print_cr("Maximum bucket size : %7.0f", summary.maximum()); +} + //--------------------------------------------------------------------------- // Non-product code @@ -468,7 +538,6 @@ } } } - #endif // PRODUCT // -------------------------------------------------------------------------- @@ -514,21 +583,36 @@ // -------------------------------------------------------------------------- StringTable* StringTable::_the_table = NULL; +bool StringTable::_needs_rehashing = false; +jint StringTable::_seed = 0; + +// Pick hashing algorithm +unsigned int StringTable::hash_string(const jchar* s, int len, unsigned int hashValue) { + return use_alternate_hashcode() ? AltHashing::murmur3_32(seed(), s, len) : + (hashValue != 0 ? hashValue : java_lang_String::to_hash(s, len)); +} + oop StringTable::lookup(int index, jchar* name, int len, unsigned int hash) { + int count = 0; for (HashtableEntry<oop>* l = bucket(index); l != NULL; l = l->next()) { + count++; if (l->hash() == hash) { if (java_lang_String::equals(l->literal(), name, len)) { return l->literal(); } } } + // If the bucket size is too deep check if this hash code is insufficient. + if (count >= BasicHashtable::rehash_count && !needs_rehashing()) { + _needs_rehashing = check_rehash_table(count); + } return NULL; } oop StringTable::basic_add(int index, Handle string_or_null, jchar* name, - int len, unsigned int hashValue, TRAPS) { + int len, unsigned int hashValue_arg, TRAPS) { debug_only(StableMemoryChecker smc(name, len * sizeof(name[0]))); assert(!Universe::heap()->is_in_reserved(name) || GC_locker::is_active(), "proposed name of symbol must be stable"); @@ -547,6 +631,10 @@ assert(java_lang_String::equals(string(), name, len), "string must be properly initialized"); + // Check if the symbol table has been rehashed, if so, need to recalculate + // the hash value before second lookup. + unsigned int hashValue = hash_string(name, len, hashValue_arg); + // Since look-up was done lock-free, we need to check if another // thread beat us in the race to insert the symbol. @@ -566,7 +654,7 @@ ResourceMark rm; int length; jchar* chars = symbol->as_unicode(length); - unsigned int hashValue = java_lang_String::hash_string(chars, length); + unsigned int hashValue = hash_string(chars, length); int index = the_table()->hash_to_index(hashValue); return the_table()->lookup(index, chars, length, hashValue); } @@ -574,7 +662,7 @@ oop StringTable::intern(Handle string_or_null, jchar* name, int len, TRAPS) { - unsigned int hashValue = java_lang_String::hash_string(name, len); + unsigned int hashValue = hash_string(name, len); int index = the_table()->hash_to_index(hashValue); oop string = the_table()->lookup(index, name, len, hashValue); @@ -675,3 +763,52 @@ } } } + +void StringTable::dump(outputStream* st) { + NumberSeq summary; + for (int i = 0; i < the_table()->table_size(); ++i) { + HashtableEntry<oop>* p = the_table()->bucket(i); + int count = 0; + for ( ; p != NULL; p = p->next()) { + count++; + } + summary.add((double)count); + } + st->print_cr("StringTable statistics:"); + st->print_cr("Number of buckets : %7d", summary.num()); + st->print_cr("Average bucket size : %7.0f", summary.avg()); + st->print_cr("Variance of bucket size : %7.0f", summary.variance()); + st->print_cr("Std. dev. of bucket size: %7.0f", summary.sd()); + st->print_cr("Maximum bucket size : %7.0f", summary.maximum()); +} + + +unsigned int StringTable::new_hash(oop string) { + ResourceMark rm; + int length; + jchar* chars = java_lang_String::as_unicode_string(string, length); + // Use alternate hashing algorithm on the string + return AltHashing::murmur3_32(seed(), chars, length); +} + +// Create a new table and using alternate hash code, populate the new table +// with the existing strings. Set flag to use the alternate hash code afterwards. +void StringTable::rehash_table() { + assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint"); + assert(!DumpSharedSpaces, "this should never happen with -Xshare:dump"); + StringTable* new_table = new StringTable(); + + // Initialize new global seed for hashing. + _seed = AltHashing::compute_seed(); + assert(seed() != 0, "shouldn't be zero"); + + // Rehash the table + the_table()->move_to(new_table); + + // Delete the table and buckets (entries are reused in new table). + delete _the_table; + // Don't check if we need rehashing until the table gets unbalanced again. + // Then rehash with a new global seed. + _needs_rehashing = false; + _the_table = new_table; +}