Mercurial > hg > graal-jvmci-8
comparison src/share/vm/classfile/symbolTable.cpp @ 6162:e9140bf80b4a
7158800: Improve storage of symbol tables
Summary: Use an alternate version of hashing algorithm for symbol string tables and after a certain bucket size to improve performance
Reviewed-by: pbk, kamg, dlong, kvn, fparain
author | coleenp |
---|---|
date | Wed, 13 Jun 2012 19:52:59 -0400 |
parents | fc9d8850ab8b |
children | 246d977b51f2 |
comparison
equal
deleted
inserted
replaced
6129:4d399f013e5a | 6162:e9140bf80b4a |
---|---|
21 * questions. | 21 * questions. |
22 * | 22 * |
23 */ | 23 */ |
24 | 24 |
25 #include "precompiled.hpp" | 25 #include "precompiled.hpp" |
26 #include "classfile/altHashing.hpp" | |
26 #include "classfile/javaClasses.hpp" | 27 #include "classfile/javaClasses.hpp" |
27 #include "classfile/symbolTable.hpp" | 28 #include "classfile/symbolTable.hpp" |
28 #include "classfile/systemDictionary.hpp" | 29 #include "classfile/systemDictionary.hpp" |
29 #include "gc_interface/collectedHeap.inline.hpp" | 30 #include "gc_interface/collectedHeap.inline.hpp" |
30 #include "memory/allocation.inline.hpp" | 31 #include "memory/allocation.inline.hpp" |
32 #include "memory/gcLocker.inline.hpp" | 33 #include "memory/gcLocker.inline.hpp" |
33 #include "oops/oop.inline.hpp" | 34 #include "oops/oop.inline.hpp" |
34 #include "oops/oop.inline2.hpp" | 35 #include "oops/oop.inline2.hpp" |
35 #include "runtime/mutexLocker.hpp" | 36 #include "runtime/mutexLocker.hpp" |
36 #include "utilities/hashtable.inline.hpp" | 37 #include "utilities/hashtable.inline.hpp" |
38 #include "utilities/numberSeq.hpp" | |
37 | 39 |
38 // -------------------------------------------------------------------------- | 40 // -------------------------------------------------------------------------- |
39 | 41 |
40 SymbolTable* SymbolTable::_the_table = NULL; | 42 SymbolTable* SymbolTable::_the_table = NULL; |
41 // Static arena for symbols that are not deallocated | 43 // Static arena for symbols that are not deallocated |
42 Arena* SymbolTable::_arena = NULL; | 44 Arena* SymbolTable::_arena = NULL; |
45 bool SymbolTable::_needs_rehashing = false; | |
46 jint SymbolTable::_seed = 0; | |
43 | 47 |
44 Symbol* SymbolTable::allocate_symbol(const u1* name, int len, bool c_heap, TRAPS) { | 48 Symbol* SymbolTable::allocate_symbol(const u1* name, int len, bool c_heap, TRAPS) { |
45 // Don't allow symbols to be created which cannot fit in a Symbol*. | 49 // Don't allow symbols to be created which cannot fit in a Symbol*. |
46 if (len > Symbol::max_length()) { | 50 if (len > Symbol::max_length()) { |
47 THROW_MSG_0(vmSymbols::java_lang_InternalError(), | 51 THROW_MSG_0(vmSymbols::java_lang_InternalError(), |
119 gclog_or_tty->print(" [Symbols=%d size=" SIZE_FORMAT "K] ", total, | 123 gclog_or_tty->print(" [Symbols=%d size=" SIZE_FORMAT "K] ", total, |
120 (memory_total*HeapWordSize)/1024); | 124 (memory_total*HeapWordSize)/1024); |
121 } | 125 } |
122 } | 126 } |
123 | 127 |
128 unsigned int SymbolTable::new_hash(Symbol* sym) { | |
129 ResourceMark rm; | |
130 // Use alternate hashing algorithm on this symbol. | |
131 return AltHashing::murmur3_32(seed(), (const jbyte*)sym->as_C_string(), sym->utf8_length()); | |
132 } | |
133 | |
134 // Create a new table and using alternate hash code, populate the new table | |
135 // with the existing strings. Set flag to use the alternate hash code afterwards. | |
136 void SymbolTable::rehash_table() { | |
137 assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint"); | |
138 assert(!DumpSharedSpaces, "this should never happen with -Xshare:dump"); | |
139 // Create a new symbol table | |
140 SymbolTable* new_table = new SymbolTable(); | |
141 | |
142 // Initialize the global seed for hashing. | |
143 _seed = AltHashing::compute_seed(); | |
144 assert(seed() != 0, "shouldn't be zero"); | |
145 | |
146 the_table()->move_to(new_table); | |
147 | |
148 // Delete the table and buckets (entries are reused in new table). | |
149 delete _the_table; | |
150 // Don't check if we need rehashing until the table gets unbalanced again. | |
151 // Then rehash with a new global seed. | |
152 _needs_rehashing = false; | |
153 _the_table = new_table; | |
154 } | |
124 | 155 |
125 // Lookup a symbol in a bucket. | 156 // Lookup a symbol in a bucket. |
126 | 157 |
127 Symbol* SymbolTable::lookup(int index, const char* name, | 158 Symbol* SymbolTable::lookup(int index, const char* name, |
128 int len, unsigned int hash) { | 159 int len, unsigned int hash) { |
160 int count = 0; | |
129 for (HashtableEntry<Symbol*>* e = bucket(index); e != NULL; e = e->next()) { | 161 for (HashtableEntry<Symbol*>* e = bucket(index); e != NULL; e = e->next()) { |
162 count++; // count all entries in this bucket, not just ones with same hash | |
130 if (e->hash() == hash) { | 163 if (e->hash() == hash) { |
131 Symbol* sym = e->literal(); | 164 Symbol* sym = e->literal(); |
132 if (sym->equals(name, len)) { | 165 if (sym->equals(name, len)) { |
133 // something is referencing this symbol now. | 166 // something is referencing this symbol now. |
134 sym->increment_refcount(); | 167 sym->increment_refcount(); |
135 return sym; | 168 return sym; |
136 } | 169 } |
137 } | 170 } |
138 } | 171 } |
172 // If the bucket size is too deep check if this hash code is insufficient. | |
173 if (count >= BasicHashtable::rehash_count && !needs_rehashing()) { | |
174 _needs_rehashing = check_rehash_table(count); | |
175 } | |
139 return NULL; | 176 return NULL; |
177 } | |
178 | |
179 // Pick hashing algorithm, but return value already given if not using a new | |
180 // hash algorithm. | |
181 unsigned int SymbolTable::hash_symbol(const char* s, int len, unsigned int hashValue) { | |
182 return use_alternate_hashcode() ? | |
183 AltHashing::murmur3_32(seed(), (const jbyte*)s, len) : | |
184 (hashValue != 0 ? hashValue : java_lang_String::to_hash(s, len)); | |
140 } | 185 } |
141 | 186 |
142 | 187 |
143 // We take care not to be blocking while holding the | 188 // We take care not to be blocking while holding the |
144 // SymbolTable_lock. Otherwise, the system might deadlock, since the | 189 // SymbolTable_lock. Otherwise, the system might deadlock, since the |
285 int index = table->hash_to_index(hash); | 330 int index = table->hash_to_index(hash); |
286 return table->basic_add(index, (u1*)name, (int)strlen(name), hash, false, THREAD); | 331 return table->basic_add(index, (u1*)name, (int)strlen(name), hash, false, THREAD); |
287 } | 332 } |
288 | 333 |
289 Symbol* SymbolTable::basic_add(int index, u1 *name, int len, | 334 Symbol* SymbolTable::basic_add(int index, u1 *name, int len, |
290 unsigned int hashValue, bool c_heap, TRAPS) { | 335 unsigned int hashValue_arg, bool c_heap, TRAPS) { |
291 assert(!Universe::heap()->is_in_reserved(name) || GC_locker::is_active(), | 336 assert(!Universe::heap()->is_in_reserved(name) || GC_locker::is_active(), |
292 "proposed name of symbol must be stable"); | 337 "proposed name of symbol must be stable"); |
293 | 338 |
294 // Grab SymbolTable_lock first. | 339 // Grab SymbolTable_lock first. |
295 MutexLocker ml(SymbolTable_lock, THREAD); | 340 MutexLocker ml(SymbolTable_lock, THREAD); |
341 | |
342 // Check if the symbol table has been rehashed, if so, need to recalculate | |
343 // the hash value. | |
344 unsigned int hashValue = hash_symbol((const char*)name, len, hashValue_arg); | |
296 | 345 |
297 // Since look-up was done lock-free, we need to check if another | 346 // Since look-up was done lock-free, we need to check if another |
298 // thread beat us in the race to insert the symbol. | 347 // thread beat us in the race to insert the symbol. |
299 Symbol* test = lookup(index, (char*)name, len, hashValue); | 348 Symbol* test = lookup(index, (char*)name, len, hashValue); |
300 if (test != NULL) { | 349 if (test != NULL) { |
330 | 379 |
331 // Hold SymbolTable_lock through the symbol creation | 380 // Hold SymbolTable_lock through the symbol creation |
332 MutexLocker ml(SymbolTable_lock, THREAD); | 381 MutexLocker ml(SymbolTable_lock, THREAD); |
333 | 382 |
334 for (int i=0; i<names_count; i++) { | 383 for (int i=0; i<names_count; i++) { |
384 // Check if the symbol table has been rehashed, if so, need to recalculate | |
385 // the hash value. | |
386 unsigned int hashValue = hash_symbol(names[i], lengths[i], hashValues[i]); | |
335 // Since look-up was done lock-free, we need to check if another | 387 // Since look-up was done lock-free, we need to check if another |
336 // thread beat us in the race to insert the symbol. | 388 // thread beat us in the race to insert the symbol. |
337 int index = hash_to_index(hashValues[i]); | 389 int index = hash_to_index(hashValue); |
338 Symbol* test = lookup(index, names[i], lengths[i], hashValues[i]); | 390 Symbol* test = lookup(index, names[i], lengths[i], hashValue); |
339 if (test != NULL) { | 391 if (test != NULL) { |
340 // A race occurred and another thread introduced the symbol, this one | 392 // A race occurred and another thread introduced the symbol, this one |
341 // will be dropped and collected. Use test instead. | 393 // will be dropped and collected. Use test instead. |
342 cp->symbol_at_put(cp_indices[i], test); | 394 cp->symbol_at_put(cp_indices[i], test); |
343 assert(test->refcount() != 0, "lookup should have incremented the count"); | 395 assert(test->refcount() != 0, "lookup should have incremented the count"); |
345 // Create a new symbol. The null class loader is never unloaded so these | 397 // Create a new symbol. The null class loader is never unloaded so these |
346 // are allocated specially in a permanent arena. | 398 // are allocated specially in a permanent arena. |
347 bool c_heap = class_loader() != NULL; | 399 bool c_heap = class_loader() != NULL; |
348 Symbol* sym = allocate_symbol((const u1*)names[i], lengths[i], c_heap, CHECK_(false)); | 400 Symbol* sym = allocate_symbol((const u1*)names[i], lengths[i], c_heap, CHECK_(false)); |
349 assert(sym->equals(names[i], lengths[i]), "symbol must be properly initialized"); // why wouldn't it be??? | 401 assert(sym->equals(names[i], lengths[i]), "symbol must be properly initialized"); // why wouldn't it be??? |
350 HashtableEntry<Symbol*>* entry = new_entry(hashValues[i], sym); | 402 HashtableEntry<Symbol*>* entry = new_entry(hashValue, sym); |
351 add_entry(index, entry); | 403 add_entry(index, entry); |
352 cp->symbol_at_put(cp_indices[i], sym); | 404 cp->symbol_at_put(cp_indices[i], sym); |
353 } | 405 } |
354 } | 406 } |
355 return true; | 407 return true; |
366 guarantee(p->hash() == h, "broken hash in symbol table entry"); | 418 guarantee(p->hash() == h, "broken hash in symbol table entry"); |
367 guarantee(the_table()->hash_to_index(h) == i, | 419 guarantee(the_table()->hash_to_index(h) == i, |
368 "wrong index in symbol table"); | 420 "wrong index in symbol table"); |
369 } | 421 } |
370 } | 422 } |
423 } | |
424 | |
425 void SymbolTable::dump(outputStream* st) { | |
426 NumberSeq summary; | |
427 for (int i = 0; i < the_table()->table_size(); ++i) { | |
428 int count = 0; | |
429 for (HashtableEntry<Symbol*>* e = the_table()->bucket(i); | |
430 e != NULL; e = e->next()) { | |
431 count++; | |
432 } | |
433 summary.add((double)count); | |
434 } | |
435 st->print_cr("SymbolTable statistics:"); | |
436 st->print_cr("Number of buckets : %7d", summary.num()); | |
437 st->print_cr("Average bucket size : %7.0f", summary.avg()); | |
438 st->print_cr("Variance of bucket size : %7.0f", summary.variance()); | |
439 st->print_cr("Std. dev. of bucket size: %7.0f", summary.sd()); | |
440 st->print_cr("Maximum bucket size : %7.0f", summary.maximum()); | |
371 } | 441 } |
372 | 442 |
373 | 443 |
374 //--------------------------------------------------------------------------- | 444 //--------------------------------------------------------------------------- |
375 // Non-product code | 445 // Non-product code |
466 } | 536 } |
467 tty->cr(); | 537 tty->cr(); |
468 } | 538 } |
469 } | 539 } |
470 } | 540 } |
471 | |
472 #endif // PRODUCT | 541 #endif // PRODUCT |
473 | 542 |
474 // -------------------------------------------------------------------------- | 543 // -------------------------------------------------------------------------- |
475 | 544 |
476 #ifdef ASSERT | 545 #ifdef ASSERT |
512 | 581 |
513 | 582 |
514 // -------------------------------------------------------------------------- | 583 // -------------------------------------------------------------------------- |
515 StringTable* StringTable::_the_table = NULL; | 584 StringTable* StringTable::_the_table = NULL; |
516 | 585 |
586 bool StringTable::_needs_rehashing = false; | |
587 jint StringTable::_seed = 0; | |
588 | |
589 // Pick hashing algorithm | |
590 unsigned int StringTable::hash_string(const jchar* s, int len, unsigned int hashValue) { | |
591 return use_alternate_hashcode() ? AltHashing::murmur3_32(seed(), s, len) : | |
592 (hashValue != 0 ? hashValue : java_lang_String::to_hash(s, len)); | |
593 } | |
594 | |
517 oop StringTable::lookup(int index, jchar* name, | 595 oop StringTable::lookup(int index, jchar* name, |
518 int len, unsigned int hash) { | 596 int len, unsigned int hash) { |
597 int count = 0; | |
519 for (HashtableEntry<oop>* l = bucket(index); l != NULL; l = l->next()) { | 598 for (HashtableEntry<oop>* l = bucket(index); l != NULL; l = l->next()) { |
599 count++; | |
520 if (l->hash() == hash) { | 600 if (l->hash() == hash) { |
521 if (java_lang_String::equals(l->literal(), name, len)) { | 601 if (java_lang_String::equals(l->literal(), name, len)) { |
522 return l->literal(); | 602 return l->literal(); |
523 } | 603 } |
524 } | 604 } |
525 } | 605 } |
606 // If the bucket size is too deep check if this hash code is insufficient. | |
607 if (count >= BasicHashtable::rehash_count && !needs_rehashing()) { | |
608 _needs_rehashing = check_rehash_table(count); | |
609 } | |
526 return NULL; | 610 return NULL; |
527 } | 611 } |
528 | 612 |
529 | 613 |
530 oop StringTable::basic_add(int index, Handle string_or_null, jchar* name, | 614 oop StringTable::basic_add(int index, Handle string_or_null, jchar* name, |
531 int len, unsigned int hashValue, TRAPS) { | 615 int len, unsigned int hashValue_arg, TRAPS) { |
532 debug_only(StableMemoryChecker smc(name, len * sizeof(name[0]))); | 616 debug_only(StableMemoryChecker smc(name, len * sizeof(name[0]))); |
533 assert(!Universe::heap()->is_in_reserved(name) || GC_locker::is_active(), | 617 assert(!Universe::heap()->is_in_reserved(name) || GC_locker::is_active(), |
534 "proposed name of symbol must be stable"); | 618 "proposed name of symbol must be stable"); |
535 | 619 |
536 Handle string; | 620 Handle string; |
545 MutexLocker ml(StringTable_lock, THREAD); | 629 MutexLocker ml(StringTable_lock, THREAD); |
546 | 630 |
547 assert(java_lang_String::equals(string(), name, len), | 631 assert(java_lang_String::equals(string(), name, len), |
548 "string must be properly initialized"); | 632 "string must be properly initialized"); |
549 | 633 |
634 // Check if the symbol table has been rehashed, if so, need to recalculate | |
635 // the hash value before second lookup. | |
636 unsigned int hashValue = hash_string(name, len, hashValue_arg); | |
637 | |
550 // Since look-up was done lock-free, we need to check if another | 638 // Since look-up was done lock-free, we need to check if another |
551 // thread beat us in the race to insert the symbol. | 639 // thread beat us in the race to insert the symbol. |
552 | 640 |
553 oop test = lookup(index, name, len, hashValue); // calls lookup(u1*, int) | 641 oop test = lookup(index, name, len, hashValue); // calls lookup(u1*, int) |
554 if (test != NULL) { | 642 if (test != NULL) { |
564 | 652 |
565 oop StringTable::lookup(Symbol* symbol) { | 653 oop StringTable::lookup(Symbol* symbol) { |
566 ResourceMark rm; | 654 ResourceMark rm; |
567 int length; | 655 int length; |
568 jchar* chars = symbol->as_unicode(length); | 656 jchar* chars = symbol->as_unicode(length); |
569 unsigned int hashValue = java_lang_String::hash_string(chars, length); | 657 unsigned int hashValue = hash_string(chars, length); |
570 int index = the_table()->hash_to_index(hashValue); | 658 int index = the_table()->hash_to_index(hashValue); |
571 return the_table()->lookup(index, chars, length, hashValue); | 659 return the_table()->lookup(index, chars, length, hashValue); |
572 } | 660 } |
573 | 661 |
574 | 662 |
575 oop StringTable::intern(Handle string_or_null, jchar* name, | 663 oop StringTable::intern(Handle string_or_null, jchar* name, |
576 int len, TRAPS) { | 664 int len, TRAPS) { |
577 unsigned int hashValue = java_lang_String::hash_string(name, len); | 665 unsigned int hashValue = hash_string(name, len); |
578 int index = the_table()->hash_to_index(hashValue); | 666 int index = the_table()->hash_to_index(hashValue); |
579 oop string = the_table()->lookup(index, name, len, hashValue); | 667 oop string = the_table()->lookup(index, name, len, hashValue); |
580 | 668 |
581 // Found | 669 // Found |
582 if (string != NULL) return string; | 670 if (string != NULL) return string; |
673 guarantee(the_table()->hash_to_index(h) == i, | 761 guarantee(the_table()->hash_to_index(h) == i, |
674 "wrong index in string table"); | 762 "wrong index in string table"); |
675 } | 763 } |
676 } | 764 } |
677 } | 765 } |
766 | |
767 void StringTable::dump(outputStream* st) { | |
768 NumberSeq summary; | |
769 for (int i = 0; i < the_table()->table_size(); ++i) { | |
770 HashtableEntry<oop>* p = the_table()->bucket(i); | |
771 int count = 0; | |
772 for ( ; p != NULL; p = p->next()) { | |
773 count++; | |
774 } | |
775 summary.add((double)count); | |
776 } | |
777 st->print_cr("StringTable statistics:"); | |
778 st->print_cr("Number of buckets : %7d", summary.num()); | |
779 st->print_cr("Average bucket size : %7.0f", summary.avg()); | |
780 st->print_cr("Variance of bucket size : %7.0f", summary.variance()); | |
781 st->print_cr("Std. dev. of bucket size: %7.0f", summary.sd()); | |
782 st->print_cr("Maximum bucket size : %7.0f", summary.maximum()); | |
783 } | |
784 | |
785 | |
786 unsigned int StringTable::new_hash(oop string) { | |
787 ResourceMark rm; | |
788 int length; | |
789 jchar* chars = java_lang_String::as_unicode_string(string, length); | |
790 // Use alternate hashing algorithm on the string | |
791 return AltHashing::murmur3_32(seed(), chars, length); | |
792 } | |
793 | |
794 // Create a new table and using alternate hash code, populate the new table | |
795 // with the existing strings. Set flag to use the alternate hash code afterwards. | |
796 void StringTable::rehash_table() { | |
797 assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint"); | |
798 assert(!DumpSharedSpaces, "this should never happen with -Xshare:dump"); | |
799 StringTable* new_table = new StringTable(); | |
800 | |
801 // Initialize new global seed for hashing. | |
802 _seed = AltHashing::compute_seed(); | |
803 assert(seed() != 0, "shouldn't be zero"); | |
804 | |
805 // Rehash the table | |
806 the_table()->move_to(new_table); | |
807 | |
808 // Delete the table and buckets (entries are reused in new table). | |
809 delete _the_table; | |
810 // Don't check if we need rehashing until the table gets unbalanced again. | |
811 // Then rehash with a new global seed. | |
812 _needs_rehashing = false; | |
813 _the_table = new_table; | |
814 } |