# HG changeset patch # User tschatzl # Date 1390214827 -3600 # Node ID 5a32d2a3cc1ebd53ea7a646fab9e9f9503488651 # Parent 04f1d5d36714fd9ef442174abda0e5a0a33d8340 8027476: Improve performance of Stringtable unlink 8027455: Improve symbol table scan times during gc pauses Summary: Parallelize string table and symbol table scan during remark and full GC. Some additional statistics output if the experimental flag G1TraceStringSymbolTableScrubbing is set. Reviewed-by: mgerdin, coleenp, brutisso diff -r 04f1d5d36714 -r 5a32d2a3cc1e src/share/vm/classfile/symbolTable.cpp --- a/src/share/vm/classfile/symbolTable.cpp Tue Jan 07 13:31:33 2014 +0100 +++ b/src/share/vm/classfile/symbolTable.cpp Mon Jan 20 11:47:07 2014 +0100 @@ -38,6 +38,9 @@ // -------------------------------------------------------------------------- +// the number of buckets a thread claims +const int ClaimChunkSize = 32; + SymbolTable* SymbolTable::_the_table = NULL; // Static arena for symbols that are not deallocated Arena* SymbolTable::_arena = NULL; @@ -83,16 +86,12 @@ } } -int SymbolTable::symbols_removed = 0; -int SymbolTable::symbols_counted = 0; +int SymbolTable::_symbols_removed = 0; +int SymbolTable::_symbols_counted = 0; +volatile int SymbolTable::_parallel_claimed_idx = 0; -// Remove unreferenced symbols from the symbol table -// This is done late during GC. -void SymbolTable::unlink() { - int removed = 0; - int total = 0; - size_t memory_total = 0; - for (int i = 0; i < the_table()->table_size(); ++i) { +void SymbolTable::buckets_unlink(int start_idx, int end_idx, int* processed, int* removed, size_t* memory_total) { + for (int i = start_idx; i < end_idx; ++i) { HashtableEntry** p = the_table()->bucket_addr(i); HashtableEntry* entry = the_table()->bucket(i); while (entry != NULL) { @@ -104,14 +103,14 @@ break; } Symbol* s = entry->literal(); - memory_total += s->size(); - total++; + (*memory_total) += s->size(); + (*processed)++; assert(s != NULL, "just checking"); // If reference count is zero, remove. if (s->refcount() == 0) { assert(!entry->is_shared(), "shared entries should be kept live"); delete s; - removed++; + (*removed)++; *p = entry->next(); the_table()->free_entry(entry); } else { @@ -121,12 +120,45 @@ entry = (HashtableEntry*)HashtableEntry::make_ptr(*p); } } - symbols_removed += removed; - symbols_counted += total; +} + +// Remove unreferenced symbols from the symbol table +// This is done late during GC. +void SymbolTable::unlink(int* processed, int* removed) { + size_t memory_total = 0; + buckets_unlink(0, the_table()->table_size(), processed, removed, &memory_total); + _symbols_removed += *removed; + _symbols_counted += *processed; // Exclude printing for normal PrintGCDetails because people parse // this output. if (PrintGCDetails && Verbose && WizardMode) { - gclog_or_tty->print(" [Symbols=%d size=" SIZE_FORMAT "K] ", total, + gclog_or_tty->print(" [Symbols=%d size=" SIZE_FORMAT "K] ", *processed, + (memory_total*HeapWordSize)/1024); + } +} + +void SymbolTable::possibly_parallel_unlink(int* processed, int* removed) { + const int limit = the_table()->table_size(); + + size_t memory_total = 0; + + for (;;) { + // Grab next set of buckets to scan + int start_idx = Atomic::add(ClaimChunkSize, &_parallel_claimed_idx) - ClaimChunkSize; + if (start_idx >= limit) { + // End of table + break; + } + + int end_idx = MIN2(limit, start_idx + ClaimChunkSize); + buckets_unlink(start_idx, end_idx, processed, removed, &memory_total); + } + Atomic::add(*processed, &_symbols_counted); + Atomic::add(*removed, &_symbols_removed); + // Exclude printing for normal PrintGCDetails because people parse + // this output. + if (PrintGCDetails && Verbose && WizardMode) { + gclog_or_tty->print(" [Symbols: scanned=%d removed=%d size=" SIZE_FORMAT "K] ", *processed, *removed, (memory_total*HeapWordSize)/1024); } } @@ -494,11 +526,11 @@ tty->print_cr("Total number of symbols %5d", count); tty->print_cr("Total size in memory %5dK", (memory_total*HeapWordSize)/1024); - tty->print_cr("Total counted %5d", symbols_counted); - tty->print_cr("Total removed %5d", symbols_removed); - if (symbols_counted > 0) { + tty->print_cr("Total counted %5d", _symbols_counted); + tty->print_cr("Total removed %5d", _symbols_removed); + if (_symbols_counted > 0) { tty->print_cr("Percent removed %3.2f", - ((float)symbols_removed/(float)symbols_counted)* 100); + ((float)_symbols_removed/(float)_symbols_counted)* 100); } tty->print_cr("Reference counts %5d", Symbol::_total_count); tty->print_cr("Symbol arena size %5d used %5d", @@ -739,39 +771,38 @@ return result; } -void StringTable::unlink_or_oops_do(BoolObjectClosure* is_alive, OopClosure* f) { +void StringTable::unlink_or_oops_do(BoolObjectClosure* is_alive, OopClosure* f, int* processed, int* removed) { + buckets_unlink_or_oops_do(is_alive, f, 0, the_table()->table_size(), processed, removed); +} + +void StringTable::possibly_parallel_unlink_or_oops_do(BoolObjectClosure* is_alive, OopClosure* f, int* processed, int* removed) { // Readers of the table are unlocked, so we should only be removing // entries at a safepoint. assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint"); - for (int i = 0; i < the_table()->table_size(); ++i) { - HashtableEntry** p = the_table()->bucket_addr(i); - HashtableEntry* entry = the_table()->bucket(i); - while (entry != NULL) { - assert(!entry->is_shared(), "CDS not used for the StringTable"); + const int limit = the_table()->table_size(); - if (is_alive->do_object_b(entry->literal())) { - if (f != NULL) { - f->do_oop((oop*)entry->literal_addr()); - } - p = entry->next_addr(); - } else { - *p = entry->next(); - the_table()->free_entry(entry); - } - entry = *p; + for (;;) { + // Grab next set of buckets to scan + int start_idx = Atomic::add(ClaimChunkSize, &_parallel_claimed_idx) - ClaimChunkSize; + if (start_idx >= limit) { + // End of table + break; } + + int end_idx = MIN2(limit, start_idx + ClaimChunkSize); + buckets_unlink_or_oops_do(is_alive, f, start_idx, end_idx, processed, removed); } } -void StringTable::buckets_do(OopClosure* f, int start_idx, int end_idx) { +void StringTable::buckets_oops_do(OopClosure* f, int start_idx, int end_idx) { const int limit = the_table()->table_size(); assert(0 <= start_idx && start_idx <= limit, - err_msg("start_idx (" INT32_FORMAT ") oob?", start_idx)); + err_msg("start_idx (" INT32_FORMAT ") is out of bounds", start_idx)); assert(0 <= end_idx && end_idx <= limit, - err_msg("end_idx (" INT32_FORMAT ") oob?", end_idx)); + err_msg("end_idx (" INT32_FORMAT ") is out of bounds", end_idx)); assert(start_idx <= end_idx, - err_msg("Ordering: start_idx=" INT32_FORMAT", end_idx=" INT32_FORMAT, + err_msg("Index ordering: start_idx=" INT32_FORMAT", end_idx=" INT32_FORMAT, start_idx, end_idx)); for (int i = start_idx; i < end_idx; i += 1) { @@ -786,12 +817,44 @@ } } +void StringTable::buckets_unlink_or_oops_do(BoolObjectClosure* is_alive, OopClosure* f, int start_idx, int end_idx, int* processed, int* removed) { + const int limit = the_table()->table_size(); + + assert(0 <= start_idx && start_idx <= limit, + err_msg("start_idx (" INT32_FORMAT ") is out of bounds", start_idx)); + assert(0 <= end_idx && end_idx <= limit, + err_msg("end_idx (" INT32_FORMAT ") is out of bounds", end_idx)); + assert(start_idx <= end_idx, + err_msg("Index ordering: start_idx=" INT32_FORMAT", end_idx=" INT32_FORMAT, + start_idx, end_idx)); + + for (int i = start_idx; i < end_idx; ++i) { + HashtableEntry** p = the_table()->bucket_addr(i); + HashtableEntry* entry = the_table()->bucket(i); + while (entry != NULL) { + assert(!entry->is_shared(), "CDS not used for the StringTable"); + + if (is_alive->do_object_b(entry->literal())) { + if (f != NULL) { + f->do_oop((oop*)entry->literal_addr()); + } + p = entry->next_addr(); + } else { + *p = entry->next(); + the_table()->free_entry(entry); + (*removed)++; + } + (*processed)++; + entry = *p; + } + } +} + void StringTable::oops_do(OopClosure* f) { - buckets_do(f, 0, the_table()->table_size()); + buckets_oops_do(f, 0, the_table()->table_size()); } void StringTable::possibly_parallel_oops_do(OopClosure* f) { - const int ClaimChunkSize = 32; const int limit = the_table()->table_size(); for (;;) { @@ -803,7 +866,7 @@ } int end_idx = MIN2(limit, start_idx + ClaimChunkSize); - buckets_do(f, start_idx, end_idx); + buckets_oops_do(f, start_idx, end_idx); } } diff -r 04f1d5d36714 -r 5a32d2a3cc1e src/share/vm/classfile/symbolTable.hpp --- a/src/share/vm/classfile/symbolTable.hpp Tue Jan 07 13:31:33 2014 +0100 +++ b/src/share/vm/classfile/symbolTable.hpp Mon Jan 20 11:47:07 2014 +0100 @@ -86,8 +86,8 @@ static bool _needs_rehashing; // For statistics - static int symbols_removed; - static int symbols_counted; + static int _symbols_removed; + static int _symbols_counted; Symbol* allocate_symbol(const u1* name, int len, bool c_heap, TRAPS); // Assumes no characters larger than 0x7F @@ -121,6 +121,11 @@ static Arena* arena() { return _arena; } // called for statistics static void initialize_symbols(int arena_alloc_size = 0); + + static volatile int _parallel_claimed_idx; + + // Release any dead symbols + static void buckets_unlink(int start_idx, int end_idx, int* processed, int* removed, size_t* memory_total); public: enum { symbol_alloc_batch_size = 8, @@ -177,7 +182,14 @@ unsigned int* hashValues, TRAPS); // Release any dead symbols - static void unlink(); + static void unlink() { + int processed = 0; + int removed = 0; + unlink(&processed, &removed); + } + static void unlink(int* processed, int* removed); + // Release any dead symbols, possibly parallel version + static void possibly_parallel_unlink(int* processed, int* removed); // iterate over symbols static void symbols_do(SymbolClosure *cl); @@ -235,6 +247,9 @@ // Rehash the symbol table if it gets out of balance static void rehash_table(); static bool needs_rehashing() { return _needs_rehashing; } + // Parallel chunked scanning + static void clear_parallel_claimed_index() { _parallel_claimed_idx = 0; } + static int parallel_claimed_index() { return _parallel_claimed_idx; } }; class StringTable : public Hashtable { @@ -258,7 +273,10 @@ // Apply the give oop closure to the entries to the buckets // in the range [start_idx, end_idx). - static void buckets_do(OopClosure* f, int start_idx, int end_idx); + static void buckets_oops_do(OopClosure* f, int start_idx, int end_idx); + // Unlink or apply the give oop closure to the entries to the buckets + // in the range [start_idx, end_idx). + static void buckets_unlink_or_oops_do(BoolObjectClosure* is_alive, OopClosure* f, int start_idx, int end_idx, int* processed, int* removed); StringTable() : Hashtable((int)StringTableSize, sizeof (HashtableEntry)) {} @@ -280,15 +298,28 @@ // GC support // Delete pointers to otherwise-unreachable objects. - static void unlink_or_oops_do(BoolObjectClosure* cl, OopClosure* f); + static void unlink_or_oops_do(BoolObjectClosure* cl, OopClosure* f) { + int processed = 0; + int removed = 0; + unlink_or_oops_do(cl, f, &processed, &removed); + } static void unlink(BoolObjectClosure* cl) { - unlink_or_oops_do(cl, NULL); + int processed = 0; + int removed = 0; + unlink_or_oops_do(cl, NULL, &processed, &removed); } - + static void unlink_or_oops_do(BoolObjectClosure* cl, OopClosure* f, int* processed, int* removed); + static void unlink(BoolObjectClosure* cl, int* processed, int* removed) { + unlink_or_oops_do(cl, NULL, processed, removed); + } // Serially invoke "f->do_oop" on the locations of all oops in the table. static void oops_do(OopClosure* f); - // Possibly parallel version of the above + // Possibly parallel versions of the above + static void possibly_parallel_unlink_or_oops_do(BoolObjectClosure* cl, OopClosure* f, int* processed, int* removed); + static void possibly_parallel_unlink(BoolObjectClosure* cl, int* processed, int* removed) { + possibly_parallel_unlink_or_oops_do(cl, NULL, processed, removed); + } static void possibly_parallel_oops_do(OopClosure* f); // Hashing algorithm, used as the hash value used by the @@ -349,5 +380,6 @@ // Parallel chunked scanning static void clear_parallel_claimed_index() { _parallel_claimed_idx = 0; } + static int parallel_claimed_index() { return _parallel_claimed_idx; } }; #endif // SHARE_VM_CLASSFILE_SYMBOLTABLE_HPP diff -r 04f1d5d36714 -r 5a32d2a3cc1e src/share/vm/gc_implementation/g1/concurrentMark.cpp --- a/src/share/vm/gc_implementation/g1/concurrentMark.cpp Tue Jan 07 13:31:33 2014 +0100 +++ b/src/share/vm/gc_implementation/g1/concurrentMark.cpp Mon Jan 20 11:47:07 2014 +0100 @@ -1619,7 +1619,6 @@ } }; - class G1ParVerifyFinalCountTask: public AbstractGangTask { protected: G1CollectedHeap* _g1h; @@ -2529,10 +2528,7 @@ assert(!rp->discovery_enabled(), "Post condition"); } - // Now clean up stale oops in StringTable - StringTable::unlink(&g1_is_alive); - // Clean up unreferenced symbols in symbol table. - SymbolTable::unlink(); + g1h->unlink_string_and_symbol_table(&g1_is_alive); } void ConcurrentMark::swapMarkBitMaps() { diff -r 04f1d5d36714 -r 5a32d2a3cc1e src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Tue Jan 07 13:31:33 2014 +0100 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Mon Jan 20 11:47:07 2014 +0100 @@ -5212,6 +5212,99 @@ SharedHeap::process_weak_roots(root_closure, &roots_in_blobs); } +class G1StringSymbolTableUnlinkTask : public AbstractGangTask { +private: + BoolObjectClosure* _is_alive; + int _initial_string_table_size; + int _initial_symbol_table_size; + + bool _process_strings; + int _strings_processed; + int _strings_removed; + + bool _process_symbols; + int _symbols_processed; + int _symbols_removed; +public: + G1StringSymbolTableUnlinkTask(BoolObjectClosure* is_alive, bool process_strings, bool process_symbols) : + AbstractGangTask("Par String/Symbol table unlink"), _is_alive(is_alive), + _process_strings(process_strings), _strings_processed(0), _strings_removed(0), + _process_symbols(process_symbols), _symbols_processed(0), _symbols_removed(0) { + + _initial_string_table_size = StringTable::the_table()->table_size(); + _initial_symbol_table_size = SymbolTable::the_table()->table_size(); + if (process_strings) { + StringTable::clear_parallel_claimed_index(); + } + if (process_symbols) { + SymbolTable::clear_parallel_claimed_index(); + } + } + + ~G1StringSymbolTableUnlinkTask() { + guarantee(!_process_strings || StringTable::parallel_claimed_index() >= _initial_string_table_size, + err_msg("claim value "INT32_FORMAT" after unlink less than initial string table size "INT32_FORMAT, + StringTable::parallel_claimed_index(), _initial_string_table_size)); + guarantee(!_process_strings || SymbolTable::parallel_claimed_index() >= _initial_symbol_table_size, + err_msg("claim value "INT32_FORMAT" after unlink less than initial symbol table size "INT32_FORMAT, + SymbolTable::parallel_claimed_index(), _initial_symbol_table_size)); + } + + void work(uint worker_id) { + if (G1CollectedHeap::use_parallel_gc_threads()) { + int strings_processed = 0; + int strings_removed = 0; + int symbols_processed = 0; + int symbols_removed = 0; + if (_process_strings) { + StringTable::possibly_parallel_unlink(_is_alive, &strings_processed, &strings_removed); + Atomic::add(strings_processed, &_strings_processed); + Atomic::add(strings_removed, &_strings_removed); + } + if (_process_symbols) { + SymbolTable::possibly_parallel_unlink(&symbols_processed, &symbols_removed); + Atomic::add(symbols_processed, &_symbols_processed); + Atomic::add(symbols_removed, &_symbols_removed); + } + } else { + if (_process_strings) { + StringTable::unlink(_is_alive, &_strings_processed, &_strings_removed); + } + if (_process_symbols) { + SymbolTable::unlink(&_symbols_processed, &_symbols_removed); + } + } + } + + size_t strings_processed() const { return (size_t)_strings_processed; } + size_t strings_removed() const { return (size_t)_strings_removed; } + + size_t symbols_processed() const { return (size_t)_symbols_processed; } + size_t symbols_removed() const { return (size_t)_symbols_removed; } +}; + +void G1CollectedHeap::unlink_string_and_symbol_table(BoolObjectClosure* is_alive, + bool process_strings, bool process_symbols) { + uint n_workers = (G1CollectedHeap::use_parallel_gc_threads() ? + _g1h->workers()->active_workers() : 1); + + G1StringSymbolTableUnlinkTask g1_unlink_task(is_alive, process_strings, process_symbols); + if (G1CollectedHeap::use_parallel_gc_threads()) { + set_par_threads(n_workers); + workers()->run_task(&g1_unlink_task); + set_par_threads(0); + } else { + g1_unlink_task.work(0); + } + if (G1TraceStringSymbolTableScrubbing) { + gclog_or_tty->print_cr("Cleaned string and symbol table, " + "strings: "SIZE_FORMAT" processed, "SIZE_FORMAT" removed, " + "symbols: "SIZE_FORMAT" processed, "SIZE_FORMAT" removed", + g1_unlink_task.strings_processed(), g1_unlink_task.strings_removed(), + g1_unlink_task.symbols_processed(), g1_unlink_task.symbols_removed()); + } +} + // Weak Reference Processing support // An always "is_alive" closure that is used to preserve referents. diff -r 04f1d5d36714 -r 5a32d2a3cc1e src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp Tue Jan 07 13:31:33 2014 +0100 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp Mon Jan 20 11:47:07 2014 +0100 @@ -1677,6 +1677,10 @@ // after a full GC void rebuild_strong_code_roots(); + // Delete entries for dead interned string and clean up unreferenced symbols + // in symbol table, possibly in parallel. + void unlink_string_and_symbol_table(BoolObjectClosure* is_alive, bool unlink_strings = true, bool unlink_symbols = true); + // Verification // The following is just to alert the verification code diff -r 04f1d5d36714 -r 5a32d2a3cc1e src/share/vm/gc_implementation/g1/g1MarkSweep.cpp --- a/src/share/vm/gc_implementation/g1/g1MarkSweep.cpp Tue Jan 07 13:31:33 2014 +0100 +++ b/src/share/vm/gc_implementation/g1/g1MarkSweep.cpp Mon Jan 20 11:47:07 2014 +0100 @@ -163,11 +163,8 @@ // Prune dead klasses from subklass/sibling/implementor lists. Klass::clean_weak_klass_links(&GenMarkSweep::is_alive); - // Delete entries for dead interned strings. - StringTable::unlink(&GenMarkSweep::is_alive); - - // Clean up unreferenced symbols in symbol table. - SymbolTable::unlink(); + // Delete entries for dead interned string and clean up unreferenced symbols in symbol table. + G1CollectedHeap::heap()->unlink_string_and_symbol_table(&GenMarkSweep::is_alive); if (VerifyDuringGC) { HandleMark hm; // handle scope diff -r 04f1d5d36714 -r 5a32d2a3cc1e src/share/vm/gc_implementation/g1/g1_globals.hpp --- a/src/share/vm/gc_implementation/g1/g1_globals.hpp Tue Jan 07 13:31:33 2014 +0100 +++ b/src/share/vm/gc_implementation/g1/g1_globals.hpp Mon Jan 20 11:47:07 2014 +0100 @@ -71,6 +71,9 @@ diagnostic(bool, G1TraceConcRefinement, false, \ "Trace G1 concurrent refinement") \ \ + experimental(bool, G1TraceStringSymbolTableScrubbing, false, \ + "Trace information string and symbol table scrubbing.") \ + \ product(double, G1ConcMarkStepDurationMillis, 10.0, \ "Target duration of individual concurrent marking steps " \ "in milliseconds.") \ diff -r 04f1d5d36714 -r 5a32d2a3cc1e test/gc/g1/TestStringSymbolTableStats.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/gc/g1/TestStringSymbolTableStats.java Mon Jan 20 11:47:07 2014 +0100 @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * @test TestStringSymbolTableStats.java + * @bug 8027476 8027455 + * @summary Ensure that the G1TraceStringSymbolTableScrubbing prints the expected message. + * @key gc + * @library /testlibrary + */ + +import com.oracle.java.testlibrary.ProcessTools; +import com.oracle.java.testlibrary.OutputAnalyzer; + +public class TestStringSymbolTableStats { + public static void main(String[] args) throws Exception { + + ProcessBuilder pb = ProcessTools.createJavaProcessBuilder("-XX:+UseG1GC", + "-XX:+UnlockExperimentalVMOptions", + "-XX:+G1TraceStringSymbolTableScrubbing", + SystemGCTest.class.getName()); + + OutputAnalyzer output = new OutputAnalyzer(pb.start()); + + System.out.println("Output:\n" + output.getOutput()); + + output.shouldContain("Cleaned string and symbol table"); + output.shouldHaveExitValue(0); + } + + static class SystemGCTest { + public static void main(String [] args) { + System.out.println("Calling System.gc()"); + System.gc(); + } + } +}