# HG changeset patch # User acorn # Date 1222906503 14400 # Node ID b7483806cc4960817546f4de8cb87bef3adb18f5 # Parent 99dd4bbd9eec0e9364f13f95e3d1ce8274b71f27# Parent ddfad9496151eb7f3dc8d9ee2a9b6e41cdfb5c81 Merge diff -r 99dd4bbd9eec -r b7483806cc49 src/share/vm/asm/assembler.cpp --- a/src/share/vm/asm/assembler.cpp Tue Sep 30 12:24:27 2008 -0400 +++ b/src/share/vm/asm/assembler.cpp Wed Oct 01 20:15:03 2008 -0400 @@ -249,8 +249,6 @@ bool MacroAssembler::needs_explicit_null_check(intptr_t offset) { // Exception handler checks the nmethod's implicit null checks table // only when this method returns false. -#ifndef SPARC - // Sparc does not have based addressing if (UseCompressedOops) { // The first page after heap_base is unmapped and // the 'offset' is equal to [heap_base + offset] for @@ -261,7 +259,6 @@ offset = (intptr_t)(pointer_delta((void*)offset, (void*)heap_base, 1)); } } -#endif // SPARC return offset < 0 || os::vm_page_size() <= offset; } diff -r 99dd4bbd9eec -r b7483806cc49 src/share/vm/ci/ciMethodBlocks.cpp --- a/src/share/vm/ci/ciMethodBlocks.cpp Tue Sep 30 12:24:27 2008 -0400 +++ b/src/share/vm/ci/ciMethodBlocks.cpp Wed Oct 01 20:15:03 2008 -0400 @@ -49,7 +49,7 @@ // first half. Returns the range beginning at bci. ciBlock *ciMethodBlocks::split_block_at(int bci) { ciBlock *former_block = block_containing(bci); - ciBlock *new_block = new(_arena) ciBlock(_method, _num_blocks++, this, former_block->start_bci()); + ciBlock *new_block = new(_arena) ciBlock(_method, _num_blocks++, former_block->start_bci()); _blocks->append(new_block); assert(former_block != NULL, "must not be NULL"); new_block->set_limit_bci(bci); @@ -83,7 +83,7 @@ if (cb == NULL ) { // This is our first time visiting this bytecode. Create // a fresh block and assign it this starting point. - ciBlock *nb = new(_arena) ciBlock(_method, _num_blocks++, this, bci); + ciBlock *nb = new(_arena) ciBlock(_method, _num_blocks++, bci); _blocks->append(nb); _bci_to_block[bci] = nb; return nb; @@ -98,6 +98,11 @@ } } +ciBlock *ciMethodBlocks::make_dummy_block() { + ciBlock *dum = new(_arena) ciBlock(_method, -1, 0); + return dum; +} + void ciMethodBlocks::do_analysis() { ciBytecodeStream s(_method); ciBlock *cur_block = block_containing(0); @@ -253,7 +258,7 @@ Copy::zero_to_words((HeapWord*) _bci_to_block, b2bsize / sizeof(HeapWord)); // create initial block covering the entire method - ciBlock *b = new(arena) ciBlock(_method, _num_blocks++, this, 0); + ciBlock *b = new(arena) ciBlock(_method, _num_blocks++, 0); _blocks->append(b); _bci_to_block[0] = b; @@ -334,7 +339,7 @@ #endif -ciBlock::ciBlock(ciMethod *method, int index, ciMethodBlocks *mb, int start_bci) : +ciBlock::ciBlock(ciMethod *method, int index, int start_bci) : #ifndef PRODUCT _method(method), #endif diff -r 99dd4bbd9eec -r b7483806cc49 src/share/vm/ci/ciMethodBlocks.hpp --- a/src/share/vm/ci/ciMethodBlocks.hpp Tue Sep 30 12:24:27 2008 -0400 +++ b/src/share/vm/ci/ciMethodBlocks.hpp Wed Oct 01 20:15:03 2008 -0400 @@ -48,6 +48,8 @@ int num_blocks() { return _num_blocks;} void clear_processed(); + ciBlock *make_dummy_block(); // a block not associated with a bci + #ifndef PRODUCT void dump(); #endif @@ -81,7 +83,7 @@ fall_through_bci = -1 }; - ciBlock(ciMethod *method, int index, ciMethodBlocks *mb, int start_bci); + ciBlock(ciMethod *method, int index, int start_bci); int start_bci() const { return _start_bci; } int limit_bci() const { return _limit_bci; } int control_bci() const { return _control_bci; } @@ -94,7 +96,6 @@ int ex_limit_bci() const { return _ex_limit_bci; } bool contains(int bci) const { return start_bci() <= bci && bci < limit_bci(); } - // flag handling bool processed() const { return (_flags & Processed) != 0; } bool is_handler() const { return (_flags & Handler) != 0; } diff -r 99dd4bbd9eec -r b7483806cc49 src/share/vm/ci/ciTypeFlow.cpp --- a/src/share/vm/ci/ciTypeFlow.cpp Tue Sep 30 12:24:27 2008 -0400 +++ b/src/share/vm/ci/ciTypeFlow.cpp Wed Oct 01 20:15:03 2008 -0400 @@ -338,8 +338,10 @@ } _trap_bci = -1; _trap_index = 0; + _def_locals.clear(); } + // ------------------------------------------------------------------ // ciTypeFlow::get_start_state // @@ -735,7 +737,7 @@ void ciTypeFlow::StateVector::do_new(ciBytecodeStream* str) { bool will_link; ciKlass* klass = str->get_klass(will_link); - if (!will_link) { + if (!will_link || str->is_unresolved_klass()) { trap(str, klass, str->get_klass_index()); } else { push_object(klass); @@ -1268,7 +1270,9 @@ } case Bytecodes::_iinc: { - check_int(local(str->get_index())); + int lnum = str->get_index(); + check_int(local(lnum)); + store_to_local(lnum); break; } case Bytecodes::_iload: load_local_int(str->get_index()); break; @@ -1506,6 +1510,46 @@ } #endif + +// ------------------------------------------------------------------ +// ciTypeFlow::SuccIter::next +// +void ciTypeFlow::SuccIter::next() { + int succ_ct = _pred->successors()->length(); + int next = _index + 1; + if (next < succ_ct) { + _index = next; + _succ = _pred->successors()->at(next); + return; + } + for (int i = next - succ_ct; i < _pred->exceptions()->length(); i++) { + // Do not compile any code for unloaded exception types. + // Following compiler passes are responsible for doing this also. + ciInstanceKlass* exception_klass = _pred->exc_klasses()->at(i); + if (exception_klass->is_loaded()) { + _index = next; + _succ = _pred->exceptions()->at(i); + return; + } + next++; + } + _index = -1; + _succ = NULL; +} + +// ------------------------------------------------------------------ +// ciTypeFlow::SuccIter::set_succ +// +void ciTypeFlow::SuccIter::set_succ(Block* succ) { + int succ_ct = _pred->successors()->length(); + if (_index < succ_ct) { + _pred->successors()->at_put(_index, succ); + } else { + int idx = _index - succ_ct; + _pred->exceptions()->at_put(idx, succ); + } +} + // ciTypeFlow::Block // // A basic block. @@ -1526,10 +1570,11 @@ _jsrs = new_jsrs; _next = NULL; _on_work_list = false; - _pre_order = -1; assert(!has_pre_order(), ""); - _private_copy = false; + _backedge_copy = false; + _exception_entry = false; _trap_bci = -1; _trap_index = 0; + df_init(); if (CITraceTypeFlow) { tty->print_cr(">> Created new block"); @@ -1541,55 +1586,13 @@ } // ------------------------------------------------------------------ -// ciTypeFlow::Block::clone_loop_head -// -ciTypeFlow::Block* -ciTypeFlow::Block::clone_loop_head(ciTypeFlow* analyzer, - int branch_bci, - ciTypeFlow::Block* target, - ciTypeFlow::JsrSet* jsrs) { - // Loop optimizations are not performed on Tier1 compiles. Do nothing. - if (analyzer->env()->comp_level() < CompLevel_full_optimization) { - return target; - } - - // The current block ends with a branch. - // - // If the target block appears to be the test-clause of a for loop, and - // it is not too large, and it has not yet been cloned, clone it. - // The pre-existing copy becomes the private clone used only by - // the initial iteration of the loop. (We know we are simulating - // the initial iteration right now, since we have never calculated - // successors before for this block.) - - if (branch_bci <= start() - && (target->limit() - target->start()) <= CICloneLoopTestLimit - && target->private_copy_count() == 0) { - // Setting the private_copy bit ensures that the target block cannot be - // reached by any other paths, such as fall-in from the loop body. - // The private copy will be accessible only on successor lists - // created up to this point. - target->set_private_copy(true); - if (CITraceTypeFlow) { - tty->print(">> Cloning a test-clause block "); - print_value_on(tty); - tty->cr(); - } - // If the target is the current block, then later on a new copy of the - // target block will be created when its bytecodes are reached by - // an alternate path. (This is the case for loops with the loop - // head at the bci-wise bottom of the loop, as with pre-1.4.2 javac.) - // - // Otherwise, duplicate the target block now and use it immediately. - // (The case for loops with the loop head at the bci-wise top of the - // loop, as with 1.4.2 javac.) - // - // In either case, the new copy of the block will remain public. - if (target != this) { - target = analyzer->block_at(branch_bci, jsrs); - } - } - return target; +// ciTypeFlow::Block::df_init +void ciTypeFlow::Block::df_init() { + _pre_order = -1; assert(!has_pre_order(), ""); + _post_order = -1; assert(!has_post_order(), ""); + _loop = NULL; + _irreducible_entry = false; + _rpo_next = NULL; } // ------------------------------------------------------------------ @@ -1644,7 +1647,6 @@ case Bytecodes::_ifnull: case Bytecodes::_ifnonnull: // Our successors are the branch target and the next bci. branch_bci = str->get_dest(); - clone_loop_head(analyzer, branch_bci, this, jsrs); _successors = new (arena) GrowableArray(arena, 2, 0, NULL); assert(_successors->length() == IF_NOT_TAKEN, ""); @@ -1658,14 +1660,7 @@ _successors = new (arena) GrowableArray(arena, 1, 0, NULL); assert(_successors->length() == GOTO_TARGET, ""); - target = analyzer->block_at(branch_bci, jsrs); - // If the target block has not been visited yet, and looks like - // a two-way branch, attempt to clone it if it is a loop head. - if (target->_successors != NULL - && target->_successors->length() == (IF_TAKEN + 1)) { - target = clone_loop_head(analyzer, branch_bci, target, jsrs); - } - _successors->append(target); + _successors->append(analyzer->block_at(branch_bci, jsrs)); break; case Bytecodes::_jsr: @@ -1801,65 +1796,60 @@ } // ------------------------------------------------------------------ -// ciTypeFlow::Block::is_simpler_than -// -// A relation used to order our work list. We work on a block earlier -// if it has a smaller jsr stack or it occurs earlier in the program -// text. -// -// Note: maybe we should redo this functionality to make blocks -// which correspond to exceptions lower priority. -bool ciTypeFlow::Block::is_simpler_than(ciTypeFlow::Block* other) { - if (other == NULL) { - return true; - } else { - int size1 = _jsrs->size(); - int size2 = other->_jsrs->size(); - if (size1 < size2) { - return true; - } else if (size2 < size1) { - return false; - } else { -#if 0 - if (size1 > 0) { - int r1 = _jsrs->record_at(0)->return_address(); - int r2 = _jsrs->record_at(0)->return_address(); - if (r1 < r2) { - return true; - } else if (r2 < r1) { - return false; - } else { - int e1 = _jsrs->record_at(0)->return_address(); - int e2 = _jsrs->record_at(0)->return_address(); - if (e1 < e2) { - return true; - } else if (e2 < e1) { - return false; - } - } - } -#endif - return (start() <= other->start()); - } - } +// ciTypeFlow::Block::set_backedge_copy +// Use this only to make a pre-existing public block into a backedge copy. +void ciTypeFlow::Block::set_backedge_copy(bool z) { + assert(z || (z == is_backedge_copy()), "cannot make a backedge copy public"); + _backedge_copy = z; } // ------------------------------------------------------------------ -// ciTypeFlow::Block::set_private_copy -// Use this only to make a pre-existing public block into a private copy. -void ciTypeFlow::Block::set_private_copy(bool z) { - assert(z || (z == is_private_copy()), "cannot make a private copy public"); - _private_copy = z; +// ciTypeFlow::Block::is_clonable_exit +// +// At most 2 normal successors, one of which continues looping, +// and all exceptional successors must exit. +bool ciTypeFlow::Block::is_clonable_exit(ciTypeFlow::Loop* lp) { + int normal_cnt = 0; + int in_loop_cnt = 0; + for (SuccIter iter(this); !iter.done(); iter.next()) { + Block* succ = iter.succ(); + if (iter.is_normal_ctrl()) { + if (++normal_cnt > 2) return false; + if (lp->contains(succ->loop())) { + if (++in_loop_cnt > 1) return false; + } + } else { + if (lp->contains(succ->loop())) return false; + } + } + return in_loop_cnt == 1; +} + +// ------------------------------------------------------------------ +// ciTypeFlow::Block::looping_succ +// +ciTypeFlow::Block* ciTypeFlow::Block::looping_succ(ciTypeFlow::Loop* lp) { + assert(successors()->length() <= 2, "at most 2 normal successors"); + for (SuccIter iter(this); !iter.done(); iter.next()) { + Block* succ = iter.succ(); + if (lp->contains(succ->loop())) { + return succ; + } + } + return NULL; } #ifndef PRODUCT // ------------------------------------------------------------------ // ciTypeFlow::Block::print_value_on void ciTypeFlow::Block::print_value_on(outputStream* st) const { - if (has_pre_order()) st->print("#%-2d ", pre_order()); + if (has_pre_order()) st->print("#%-2d ", pre_order()); + if (has_rpo()) st->print("rpo#%-2d ", rpo()); st->print("[%d - %d)", start(), limit()); + if (is_loop_head()) st->print(" lphd"); + if (is_irreducible_entry()) st->print(" irred"); if (_jsrs->size() > 0) { st->print("/"); _jsrs->print_on(st); } - if (is_private_copy()) st->print("/private_copy"); + if (is_backedge_copy()) st->print("/backedge_copy"); } // ------------------------------------------------------------------ @@ -1871,6 +1861,16 @@ st->print_cr(" ==================================================== "); st->print (" "); print_value_on(st); + st->print(" Stored locals: "); def_locals()->print_on(st, outer()->method()->max_locals()); tty->cr(); + if (loop() && loop()->parent() != NULL) { + st->print(" loops:"); + Loop* lp = loop(); + do { + st->print(" %d<-%d", lp->head()->pre_order(),lp->tail()->pre_order()); + if (lp->is_irreducible()) st->print("(ir)"); + lp = lp->parent(); + } while (lp->parent() != NULL); + } st->cr(); _state->print_on(st); if (_successors == NULL) { @@ -1907,6 +1907,21 @@ } #endif +#ifndef PRODUCT +// ------------------------------------------------------------------ +// ciTypeFlow::LocalSet::print_on +void ciTypeFlow::LocalSet::print_on(outputStream* st, int limit) const { + st->print("{"); + for (int i = 0; i < max; i++) { + if (test(i)) st->print(" %d", i); + } + if (limit > max) { + st->print(" %d..%d ", max, limit); + } + st->print(" }"); +} +#endif + // ciTypeFlow // // This is a pass over the bytecodes which computes the following: @@ -1922,12 +1937,11 @@ _max_locals = method->max_locals(); _max_stack = method->max_stack(); _code_size = method->code_size(); + _has_irreducible_entry = false; _osr_bci = osr_bci; _failure_reason = NULL; assert(start_bci() >= 0 && start_bci() < code_size() , "correct osr_bci argument"); - _work_list = NULL; - _next_pre_order = 0; _ciblock_count = _methodBlocks->num_blocks(); _idx_to_blocklist = NEW_ARENA_ARRAY(arena(), GrowableArray*, _ciblock_count); @@ -1949,12 +1963,6 @@ _work_list = next_block->next(); next_block->set_next(NULL); next_block->set_on_work_list(false); - if (!next_block->has_pre_order()) { - // Assign "pre_order" as each new block is taken from the work list. - // This number may be used by following phases to order block visits. - assert(!have_block_count(), "must not have mapped blocks yet") - next_block->set_pre_order(_next_pre_order++); - } return next_block; } @@ -1962,30 +1970,37 @@ // ciTypeFlow::add_to_work_list // // Add a basic block to our work list. +// List is sorted by decreasing postorder sort (same as increasing RPO) void ciTypeFlow::add_to_work_list(ciTypeFlow::Block* block) { assert(!block->is_on_work_list(), "must not already be on work list"); if (CITraceTypeFlow) { - tty->print(">> Adding block%s ", block->has_pre_order() ? " (again)" : ""); + tty->print(">> Adding block "); block->print_value_on(tty); tty->print_cr(" to the work list : "); } block->set_on_work_list(true); - if (block->is_simpler_than(_work_list)) { + + // decreasing post order sort + + Block* prev = NULL; + Block* current = _work_list; + int po = block->post_order(); + while (current != NULL) { + if (!current->has_post_order() || po > current->post_order()) + break; + prev = current; + current = current->next(); + } + if (prev == NULL) { block->set_next(_work_list); _work_list = block; } else { - Block *temp = _work_list; - while (!block->is_simpler_than(temp->next())) { - if (CITraceTypeFlow) { - tty->print("."); - } - temp = temp->next(); - } - block->set_next(temp->next()); - temp->set_next(block); + block->set_next(current); + prev->set_next(block); } + if (CITraceTypeFlow) { tty->cr(); } @@ -2008,7 +2023,7 @@ assert(ciblk->start_bci() == bci, "bad ciBlock boundaries"); Block* block = get_block_for(ciblk->index(), jsrs, option); - assert(block == NULL? (option == no_create): block->is_private_copy() == (option == create_private_copy), "create option consistent with result"); + assert(block == NULL? (option == no_create): block->is_backedge_copy() == (option == create_backedge_copy), "create option consistent with result"); if (CITraceTypeFlow) { if (block != NULL) { @@ -2072,8 +2087,9 @@ } if (block->meet_exception(exception_klass, state)) { - // Block was modified. Add it to the work list. - if (!block->is_on_work_list()) { + // Block was modified and has PO. Add it to the work list. + if (block->has_post_order() && + !block->is_on_work_list()) { add_to_work_list(block); } } @@ -2091,8 +2107,9 @@ for (int i = 0; i < len; i++) { Block* block = successors->at(i); if (block->meet(state)) { - // Block was modified. Add it to the work list. - if (!block->is_on_work_list()) { + // Block was modified and has PO. Add it to the work list. + if (block->has_post_order() && + !block->is_on_work_list()) { add_to_work_list(block); } } @@ -2133,6 +2150,111 @@ return true; } +// ------------------------------------------------------------------ +// ciTypeFlow::clone_loop_heads +// +// Clone the loop heads +bool ciTypeFlow::clone_loop_heads(Loop* lp, StateVector* temp_vector, JsrSet* temp_set) { + bool rslt = false; + for (PreorderLoops iter(loop_tree_root()); !iter.done(); iter.next()) { + lp = iter.current(); + Block* head = lp->head(); + if (lp == loop_tree_root() || + lp->is_irreducible() || + !head->is_clonable_exit(lp)) + continue; + + // check not already cloned + if (head->backedge_copy_count() != 0) + continue; + + // check _no_ shared head below us + Loop* ch; + for (ch = lp->child(); ch != NULL && ch->head() != head; ch = ch->sibling()); + if (ch != NULL) + continue; + + // Clone head + Block* new_head = head->looping_succ(lp); + Block* clone = clone_loop_head(lp, temp_vector, temp_set); + // Update lp's info + clone->set_loop(lp); + lp->set_head(new_head); + lp->set_tail(clone); + // And move original head into outer loop + head->set_loop(lp->parent()); + + rslt = true; + } + return rslt; +} + +// ------------------------------------------------------------------ +// ciTypeFlow::clone_loop_head +// +// Clone lp's head and replace tail's successors with clone. +// +// | +// v +// head <-> body +// | +// v +// exit +// +// new_head +// +// | +// v +// head ----------\ +// | | +// | v +// | clone <-> body +// | | +// | /--/ +// | | +// v v +// exit +// +ciTypeFlow::Block* ciTypeFlow::clone_loop_head(Loop* lp, StateVector* temp_vector, JsrSet* temp_set) { + Block* head = lp->head(); + Block* tail = lp->tail(); + if (CITraceTypeFlow) { + tty->print(">> Requesting clone of loop head "); head->print_value_on(tty); + tty->print(" for predecessor "); tail->print_value_on(tty); + tty->cr(); + } + Block* clone = block_at(head->start(), head->jsrs(), create_backedge_copy); + assert(clone->backedge_copy_count() == 1, "one backedge copy for all back edges"); + + assert(!clone->has_pre_order(), "just created"); + clone->set_next_pre_order(); + + // Insert clone after (orig) tail in reverse post order + clone->set_rpo_next(tail->rpo_next()); + tail->set_rpo_next(clone); + + // tail->head becomes tail->clone + for (SuccIter iter(tail); !iter.done(); iter.next()) { + if (iter.succ() == head) { + iter.set_succ(clone); + break; + } + } + flow_block(tail, temp_vector, temp_set); + if (head == tail) { + // For self-loops, clone->head becomes clone->clone + flow_block(clone, temp_vector, temp_set); + for (SuccIter iter(clone); !iter.done(); iter.next()) { + if (iter.succ() == head) { + iter.set_succ(clone); + break; + } + } + } + flow_block(clone, temp_vector, temp_set); + + return clone; +} // ------------------------------------------------------------------ // ciTypeFlow::flow_block @@ -2159,11 +2281,14 @@ // Grab the state from the current block. block->copy_state_into(state); + state->def_locals()->clear(); GrowableArray* exceptions = block->exceptions(); GrowableArray* exc_klasses = block->exc_klasses(); bool has_exceptions = exceptions->length() > 0; + bool exceptions_used = false; + ciBytecodeStream str(method()); str.reset_to_bci(start); Bytecodes::Code code; @@ -2172,6 +2297,7 @@ // Check for exceptional control flow from this point. if (has_exceptions && can_trap(str)) { flow_exceptions(exceptions, exc_klasses, state); + exceptions_used = true; } // Apply the effects of the current bytecode to our state. bool res = state->apply_one_bytecode(&str); @@ -2189,9 +2315,14 @@ block->print_on(tty); } + // Save set of locals defined in this block + block->def_locals()->add(state->def_locals()); + // Record (no) successors. block->successors(&str, state, jsrs); + assert(!has_exceptions || exceptions_used, "Not removing exceptions"); + // Discontinue interpretation of this Block. return; } @@ -2202,6 +2333,7 @@ // Check for exceptional control flow from this point. if (has_exceptions && can_trap(str)) { flow_exceptions(exceptions, exc_klasses, state); + exceptions_used = true; } // Fix the JsrSet to reflect effect of the bytecode. @@ -2218,11 +2350,306 @@ successors = block->successors(&str, NULL, NULL); } + // Save set of locals defined in this block + block->def_locals()->add(state->def_locals()); + + // Remove untaken exception paths + if (!exceptions_used) + exceptions->clear(); + // Pass our state to successors. flow_successors(successors, state); } // ------------------------------------------------------------------ +// ciTypeFlow::PostOrderLoops::next +// +// Advance to next loop tree using a postorder, left-to-right traversal. +void ciTypeFlow::PostorderLoops::next() { + assert(!done(), "must not be done."); + if (_current->sibling() != NULL) { + _current = _current->sibling(); + while (_current->child() != NULL) { + _current = _current->child(); + } + } else { + _current = _current->parent(); + } +} + +// ------------------------------------------------------------------ +// ciTypeFlow::PreOrderLoops::next +// +// Advance to next loop tree using a preorder, left-to-right traversal. +void ciTypeFlow::PreorderLoops::next() { + assert(!done(), "must not be done."); + if (_current->child() != NULL) { + _current = _current->child(); + } else if (_current->sibling() != NULL) { + _current = _current->sibling(); + } else { + while (_current != _root && _current->sibling() == NULL) { + _current = _current->parent(); + } + if (_current == _root) { + _current = NULL; + assert(done(), "must be done."); + } else { + assert(_current->sibling() != NULL, "must be more to do"); + _current = _current->sibling(); + } + } +} + +// ------------------------------------------------------------------ +// ciTypeFlow::Loop::sorted_merge +// +// Merge the branch lp into this branch, sorting on the loop head +// pre_orders. Returns the leaf of the merged branch. +// Child and sibling pointers will be setup later. +// Sort is (looking from leaf towards the root) +// descending on primary key: loop head's pre_order, and +// ascending on secondary key: loop tail's pre_order. +ciTypeFlow::Loop* ciTypeFlow::Loop::sorted_merge(Loop* lp) { + Loop* leaf = this; + Loop* prev = NULL; + Loop* current = leaf; + while (lp != NULL) { + int lp_pre_order = lp->head()->pre_order(); + // Find insertion point for "lp" + while (current != NULL) { + if (current == lp) + return leaf; // Already in list + if (current->head()->pre_order() < lp_pre_order) + break; + if (current->head()->pre_order() == lp_pre_order && + current->tail()->pre_order() > lp->tail()->pre_order()) { + break; + } + prev = current; + current = current->parent(); + } + Loop* next_lp = lp->parent(); // Save future list of items to insert + // Insert lp before current + lp->set_parent(current); + if (prev != NULL) { + prev->set_parent(lp); + } else { + leaf = lp; + } + prev = lp; // Inserted item is new prev[ious] + lp = next_lp; // Next item to insert + } + return leaf; +} + +// ------------------------------------------------------------------ +// ciTypeFlow::build_loop_tree +// +// Incrementally build loop tree. +void ciTypeFlow::build_loop_tree(Block* blk) { + assert(!blk->is_post_visited(), "precondition"); + Loop* innermost = NULL; // merge of loop tree branches over all successors + + for (SuccIter iter(blk); !iter.done(); iter.next()) { + Loop* lp = NULL; + Block* succ = iter.succ(); + if (!succ->is_post_visited()) { + // Found backedge since predecessor post visited, but successor is not + assert(succ->pre_order() <= blk->pre_order(), "should be backedge"); + + // Create a LoopNode to mark this loop. + lp = new (arena()) Loop(succ, blk); + if (succ->loop() == NULL) + succ->set_loop(lp); + // succ->loop will be updated to innermost loop on a later call, when blk==succ + + } else { // Nested loop + lp = succ->loop(); + + // If succ is loop head, find outer loop. + while (lp != NULL && lp->head() == succ) { + lp = lp->parent(); + } + if (lp == NULL) { + // Infinite loop, it's parent is the root + lp = loop_tree_root(); + } + } + + // Check for irreducible loop. + // Successor has already been visited. If the successor's loop head + // has already been post-visited, then this is another entry into the loop. + while (lp->head()->is_post_visited() && lp != loop_tree_root()) { + _has_irreducible_entry = true; + lp->set_irreducible(succ); + if (!succ->is_on_work_list()) { + // Assume irreducible entries need more data flow + add_to_work_list(succ); + } + lp = lp->parent(); + assert(lp != NULL, "nested loop must have parent by now"); + } + + // Merge loop tree branch for all successors. + innermost = innermost == NULL ? lp : innermost->sorted_merge(lp); + + } // end loop + + if (innermost == NULL) { + assert(blk->successors()->length() == 0, "CFG exit"); + blk->set_loop(loop_tree_root()); + } else if (innermost->head() == blk) { + // If loop header, complete the tree pointers + if (blk->loop() != innermost) { +#if ASSERT + assert(blk->loop()->head() == innermost->head(), "same head"); + Loop* dl; + for (dl = innermost; dl != NULL && dl != blk->loop(); dl = dl->parent()); + assert(dl == blk->loop(), "blk->loop() already in innermost list"); +#endif + blk->set_loop(innermost); + } + innermost->def_locals()->add(blk->def_locals()); + Loop* l = innermost; + Loop* p = l->parent(); + while (p && l->head() == blk) { + l->set_sibling(p->child()); // Put self on parents 'next child' + p->set_child(l); // Make self the first child of parent + p->def_locals()->add(l->def_locals()); + l = p; // Walk up the parent chain + p = l->parent(); + } + } else { + blk->set_loop(innermost); + innermost->def_locals()->add(blk->def_locals()); + } +} + +// ------------------------------------------------------------------ +// ciTypeFlow::Loop::contains +// +// Returns true if lp is nested loop. +bool ciTypeFlow::Loop::contains(ciTypeFlow::Loop* lp) const { + assert(lp != NULL, ""); + if (this == lp || head() == lp->head()) return true; + int depth1 = depth(); + int depth2 = lp->depth(); + if (depth1 > depth2) + return false; + while (depth1 < depth2) { + depth2--; + lp = lp->parent(); + } + return this == lp; +} + +// ------------------------------------------------------------------ +// ciTypeFlow::Loop::depth +// +// Loop depth +int ciTypeFlow::Loop::depth() const { + int dp = 0; + for (Loop* lp = this->parent(); lp != NULL; lp = lp->parent()) + dp++; + return dp; +} + +#ifndef PRODUCT +// ------------------------------------------------------------------ +// ciTypeFlow::Loop::print +void ciTypeFlow::Loop::print(outputStream* st, int indent) const { + for (int i = 0; i < indent; i++) st->print(" "); + st->print("%d<-%d %s", + is_root() ? 0 : this->head()->pre_order(), + is_root() ? 0 : this->tail()->pre_order(), + is_irreducible()?" irr":""); + st->print(" defs: "); + def_locals()->print_on(st, _head->outer()->method()->max_locals()); + st->cr(); + for (Loop* ch = child(); ch != NULL; ch = ch->sibling()) + ch->print(st, indent+2); +} +#endif + +// ------------------------------------------------------------------ +// ciTypeFlow::df_flow_types +// +// Perform the depth first type flow analysis. Helper for flow_types. +void ciTypeFlow::df_flow_types(Block* start, + bool do_flow, + StateVector* temp_vector, + JsrSet* temp_set) { + int dft_len = 100; + GrowableArray stk(arena(), dft_len, 0, NULL); + + ciBlock* dummy = _methodBlocks->make_dummy_block(); + JsrSet* root_set = new JsrSet(NULL, 0); + Block* root_head = new (arena()) Block(this, dummy, root_set); + Block* root_tail = new (arena()) Block(this, dummy, root_set); + root_head->set_pre_order(0); + root_head->set_post_order(0); + root_tail->set_pre_order(max_jint); + root_tail->set_post_order(max_jint); + set_loop_tree_root(new (arena()) Loop(root_head, root_tail)); + + stk.push(start); + + _next_pre_order = 0; // initialize pre_order counter + _rpo_list = NULL; + int next_po = 0; // initialize post_order counter + + // Compute RPO and the control flow graph + int size; + while ((size = stk.length()) > 0) { + Block* blk = stk.top(); // Leave node on stack + if (!blk->is_visited()) { + // forward arc in graph + assert (!blk->has_pre_order(), ""); + blk->set_next_pre_order(); + + if (_next_pre_order >= MaxNodeLimit / 2) { + // Too many basic blocks. Bail out. + // This can happen when try/finally constructs are nested to depth N, + // and there is O(2**N) cloning of jsr bodies. See bug 4697245! + // "MaxNodeLimit / 2" is used because probably the parser will + // generate at least twice that many nodes and bail out. + record_failure("too many basic blocks"); + return; + } + if (do_flow) { + flow_block(blk, temp_vector, temp_set); + if (failing()) return; // Watch for bailouts. + } + } else if (!blk->is_post_visited()) { + // cross or back arc + for (SuccIter iter(blk); !iter.done(); iter.next()) { + Block* succ = iter.succ(); + if (!succ->is_visited()) { + stk.push(succ); + } + } + if (stk.length() == size) { + // There were no additional children, post visit node now + stk.pop(); // Remove node from stack + + build_loop_tree(blk); + blk->set_post_order(next_po++); // Assign post order + prepend_to_rpo_list(blk); + assert(blk->is_post_visited(), ""); + + if (blk->is_loop_head() && !blk->is_on_work_list()) { + // Assume loop heads need more data flow + add_to_work_list(blk); + } + } + } else { + stk.pop(); // Remove post-visited node from stack + } + } +} + +// ------------------------------------------------------------------ // ciTypeFlow::flow_types // // Perform the type flow analysis, creating and cloning Blocks as @@ -2233,91 +2660,93 @@ JsrSet* temp_set = new JsrSet(NULL, 16); // Create the method entry block. - Block* block = block_at(start_bci(), temp_set); - block->set_pre_order(_next_pre_order++); - assert(block->is_start(), "start block must have order #0"); + Block* start = block_at(start_bci(), temp_set); // Load the initial state into it. const StateVector* start_state = get_start_state(); if (failing()) return; - block->meet(start_state); - add_to_work_list(block); + start->meet(start_state); + + // Depth first visit + df_flow_types(start, true /*do flow*/, temp_vector, temp_set); - // Trickle away. - while (!work_list_empty()) { - Block* block = work_list_next(); - flow_block(block, temp_vector, temp_set); + if (failing()) return; + assert(_rpo_list == start, "must be start"); + // Any loops found? + if (loop_tree_root()->child() != NULL && + env()->comp_level() >= CompLevel_full_optimization) { + // Loop optimizations are not performed on Tier1 compiles. + + bool changed = clone_loop_heads(loop_tree_root(), temp_vector, temp_set); - // NodeCountCutoff is the number of nodes at which the parser - // will bail out. Probably if we already have lots of BBs, - // the parser will generate at least twice that many nodes and bail out. - // Therefore, this is a conservatively large limit at which to - // bail out in the pre-parse typeflow pass. - int block_limit = MaxNodeLimit / 2; + // If some loop heads were cloned, recompute postorder and loop tree + if (changed) { + loop_tree_root()->set_child(NULL); + for (Block* blk = _rpo_list; blk != NULL;) { + Block* next = blk->rpo_next(); + blk->df_init(); + blk = next; + } + df_flow_types(start, false /*no flow*/, temp_vector, temp_set); + } + } - if (_next_pre_order >= block_limit) { - // Too many basic blocks. Bail out. - // - // This can happen when try/finally constructs are nested to depth N, - // and there is O(2**N) cloning of jsr bodies. See bug 4697245! - record_failure("too many basic blocks"); - return; - } + if (CITraceTypeFlow) { + tty->print_cr("\nLoop tree"); + loop_tree_root()->print(); + } + + // Continue flow analysis until fixed point reached + + debug_only(int max_block = _next_pre_order;) - // Watch for bailouts. - if (failing()) return; + while (!work_list_empty()) { + Block* blk = work_list_next(); + assert (blk->has_post_order(), "post order assigned above"); + + flow_block(blk, temp_vector, temp_set); + + assert (max_block == _next_pre_order, "no new blocks"); + assert (!failing(), "no more bailouts"); } } // ------------------------------------------------------------------ // ciTypeFlow::map_blocks // -// Create the block map, which indexes blocks in pre_order. +// Create the block map, which indexes blocks in reverse post-order. void ciTypeFlow::map_blocks() { assert(_block_map == NULL, "single initialization"); - int pre_order_limit = _next_pre_order; - _block_map = NEW_ARENA_ARRAY(arena(), Block*, pre_order_limit); - assert(pre_order_limit == block_count(), ""); - int po; - for (po = 0; po < pre_order_limit; po++) { - debug_only(_block_map[po] = NULL); + int block_ct = _next_pre_order; + _block_map = NEW_ARENA_ARRAY(arena(), Block*, block_ct); + assert(block_ct == block_count(), ""); + + Block* blk = _rpo_list; + for (int m = 0; m < block_ct; m++) { + int rpo = blk->rpo(); + assert(rpo == m, "should be sequential"); + _block_map[rpo] = blk; + blk = blk->rpo_next(); } - ciMethodBlocks *mblks = _methodBlocks; - ciBlock* current = NULL; - int limit_bci = code_size(); - for (int bci = 0; bci < limit_bci; bci++) { - ciBlock* ciblk = mblks->block_containing(bci); - if (ciblk != NULL && ciblk != current) { - current = ciblk; - int curidx = ciblk->index(); - int block_count = (_idx_to_blocklist[curidx] == NULL) ? 0 : _idx_to_blocklist[curidx]->length(); - for (int i = 0; i < block_count; i++) { - Block* block = _idx_to_blocklist[curidx]->at(i); - if (!block->has_pre_order()) continue; - int po = block->pre_order(); - assert(_block_map[po] == NULL, "unique ref to block"); - assert(0 <= po && po < pre_order_limit, ""); - _block_map[po] = block; - } - } - } - for (po = 0; po < pre_order_limit; po++) { - assert(_block_map[po] != NULL, "must not drop any blocks"); - Block* block = _block_map[po]; + assert(blk == NULL, "should be done"); + + for (int j = 0; j < block_ct; j++) { + assert(_block_map[j] != NULL, "must not drop any blocks"); + Block* block = _block_map[j]; // Remove dead blocks from successor lists: for (int e = 0; e <= 1; e++) { GrowableArray* l = e? block->exceptions(): block->successors(); - for (int i = 0; i < l->length(); i++) { - Block* s = l->at(i); - if (!s->has_pre_order()) { + for (int k = 0; k < l->length(); k++) { + Block* s = l->at(k); + if (!s->has_post_order()) { if (CITraceTypeFlow) { tty->print("Removing dead %s successor of #%d: ", (e? "exceptional": "normal"), block->pre_order()); s->print_value_on(tty); tty->cr(); } l->remove(s); - --i; + --k; } } } @@ -2329,7 +2758,7 @@ // // Find a block with this ciBlock which has a compatible JsrSet. // If no such block exists, create it, unless the option is no_create. -// If the option is create_private_copy, always create a fresh private copy. +// If the option is create_backedge_copy, always create a fresh backedge copy. ciTypeFlow::Block* ciTypeFlow::get_block_for(int ciBlockIndex, ciTypeFlow::JsrSet* jsrs, CreateOption option) { Arena* a = arena(); GrowableArray* blocks = _idx_to_blocklist[ciBlockIndex]; @@ -2342,11 +2771,11 @@ _idx_to_blocklist[ciBlockIndex] = blocks; } - if (option != create_private_copy) { + if (option != create_backedge_copy) { int len = blocks->length(); for (int i = 0; i < len; i++) { Block* block = blocks->at(i); - if (!block->is_private_copy() && block->is_compatible_with(jsrs)) { + if (!block->is_backedge_copy() && block->is_compatible_with(jsrs)) { return block; } } @@ -2357,15 +2786,15 @@ // We did not find a compatible block. Create one. Block* new_block = new (a) Block(this, _methodBlocks->block(ciBlockIndex), jsrs); - if (option == create_private_copy) new_block->set_private_copy(true); + if (option == create_backedge_copy) new_block->set_backedge_copy(true); blocks->append(new_block); return new_block; } // ------------------------------------------------------------------ -// ciTypeFlow::private_copy_count +// ciTypeFlow::backedge_copy_count // -int ciTypeFlow::private_copy_count(int ciBlockIndex, ciTypeFlow::JsrSet* jsrs) const { +int ciTypeFlow::backedge_copy_count(int ciBlockIndex, ciTypeFlow::JsrSet* jsrs) const { GrowableArray* blocks = _idx_to_blocklist[ciBlockIndex]; if (blocks == NULL) { @@ -2376,7 +2805,7 @@ int len = blocks->length(); for (int i = 0; i < len; i++) { Block* block = blocks->at(i); - if (block->is_private_copy() && block->is_compatible_with(jsrs)) { + if (block->is_backedge_copy() && block->is_compatible_with(jsrs)) { count++; } } @@ -2405,10 +2834,12 @@ if (failing()) { return; } + + map_blocks(); + if (CIPrintTypeFlow || CITraceTypeFlow) { - print_on(tty); + rpo_print_on(tty); } - map_blocks(); } // ------------------------------------------------------------------ @@ -2466,4 +2897,19 @@ st->print_cr("********************************************************"); st->cr(); } + +void ciTypeFlow::rpo_print_on(outputStream* st) const { + st->print_cr("********************************************************"); + st->print ("TypeFlow for "); + method()->name()->print_symbol_on(st); + int limit_bci = code_size(); + st->print_cr(" %d bytes", limit_bci); + for (Block* blk = _rpo_list; blk != NULL; blk = blk->rpo_next()) { + blk->print_on(st); + st->print_cr("--------------------------------------------------------"); + st->cr(); + } + st->print_cr("********************************************************"); + st->cr(); +} #endif diff -r 99dd4bbd9eec -r b7483806cc49 src/share/vm/ci/ciTypeFlow.hpp --- a/src/share/vm/ci/ciTypeFlow.hpp Tue Sep 30 12:24:27 2008 -0400 +++ b/src/share/vm/ci/ciTypeFlow.hpp Wed Oct 01 20:15:03 2008 -0400 @@ -34,11 +34,13 @@ int _max_locals; int _max_stack; int _code_size; + bool _has_irreducible_entry; const char* _failure_reason; public: class StateVector; + class Loop; class Block; // Build a type flow analyzer @@ -55,6 +57,7 @@ int max_stack() const { return _max_stack; } int max_cells() const { return _max_locals + _max_stack; } int code_size() const { return _code_size; } + bool has_irreducible_entry() const { return _has_irreducible_entry; } // Represents information about an "active" jsr call. This // class represents a call to the routine at some entry address @@ -125,6 +128,19 @@ void print_on(outputStream* st) const PRODUCT_RETURN; }; + class LocalSet VALUE_OBJ_CLASS_SPEC { + private: + enum Constants { max = 63 }; + uint64_t _bits; + public: + LocalSet() : _bits(0) {} + void add(uint32_t i) { if (i < (uint32_t)max) _bits |= (1LL << i); } + void add(LocalSet* ls) { _bits |= ls->_bits; } + bool test(uint32_t i) const { return i < (uint32_t)max ? (_bits>>i)&1U : true; } + void clear() { _bits = 0; } + void print_on(outputStream* st, int limit) const PRODUCT_RETURN; + }; + // Used as a combined index for locals and temps enum Cell { Cell_0, Cell_max = INT_MAX @@ -142,6 +158,8 @@ int _trap_bci; int _trap_index; + LocalSet _def_locals; // For entire block + static ciType* type_meet_internal(ciType* t1, ciType* t2, ciTypeFlow* analyzer); public: @@ -181,6 +199,9 @@ int monitor_count() const { return _monitor_count; } void set_monitor_count(int mc) { _monitor_count = mc; } + LocalSet* def_locals() { return &_def_locals; } + const LocalSet* def_locals() const { return &_def_locals; } + static Cell start_cell() { return (Cell)0; } static Cell next_cell(Cell c) { return (Cell)(((int)c) + 1); } Cell limit_cell() const { @@ -250,6 +271,10 @@ return type->basic_type() == T_DOUBLE; } + void store_to_local(int lnum) { + _def_locals.add((uint) lnum); + } + void push_translate(ciType* type); void push_int() { @@ -358,6 +383,7 @@ "must be reference type or return address"); overwrite_local_double_long(index); set_type_at(local(index), type); + store_to_local(index); } void load_local_double(int index) { @@ -376,6 +402,8 @@ overwrite_local_double_long(index); set_type_at(local(index), type); set_type_at(local(index+1), type2); + store_to_local(index); + store_to_local(index+1); } void load_local_float(int index) { @@ -388,6 +416,7 @@ assert(is_float(type), "must be float type"); overwrite_local_double_long(index); set_type_at(local(index), type); + store_to_local(index); } void load_local_int(int index) { @@ -400,6 +429,7 @@ assert(is_int(type), "must be int type"); overwrite_local_double_long(index); set_type_at(local(index), type); + store_to_local(index); } void load_local_long(int index) { @@ -418,6 +448,8 @@ overwrite_local_double_long(index); set_type_at(local(index), type); set_type_at(local(index+1), type2); + store_to_local(index); + store_to_local(index+1); } // Stop interpretation of this path with a trap. @@ -450,13 +482,31 @@ }; // Parameter for "find_block" calls: - // Describes the difference between a public and private copy. + // Describes the difference between a public and backedge copy. enum CreateOption { create_public_copy, - create_private_copy, + create_backedge_copy, no_create }; + // Successor iterator + class SuccIter : public StackObj { + private: + Block* _pred; + int _index; + Block* _succ; + public: + SuccIter() : _pred(NULL), _index(-1), _succ(NULL) {} + SuccIter(Block* pred) : _pred(pred), _index(-1), _succ(NULL) { next(); } + int index() { return _index; } + Block* pred() { return _pred; } // Return predecessor + bool done() { return _index < 0; } // Finished? + Block* succ() { return _succ; } // Return current successor + void next(); // Advance + void set_succ(Block* succ); // Update current successor + bool is_normal_ctrl() { return index() < _pred->successors()->length(); } + }; + // A basic block class Block : public ResourceObj { private: @@ -470,15 +520,24 @@ int _trap_bci; int _trap_index; - // A reasonable approximation to pre-order, provided.to the client. + // pre_order, assigned at first visit. Used as block ID and "visited" tag int _pre_order; - // Has this block been cloned for some special purpose? - bool _private_copy; + // A post-order, used to compute the reverse post order (RPO) provided to the client + int _post_order; // used to compute rpo + + // Has this block been cloned for a loop backedge? + bool _backedge_copy; // A pointer used for our internal work list - Block* _next; - bool _on_work_list; + Block* _next; + bool _on_work_list; // on the work list + Block* _rpo_next; // Reverse post order list + + // Loop info + Loop* _loop; // nearest loop + bool _irreducible_entry; // entry to irreducible loop + bool _exception_entry; // entry to exception handler ciBlock* ciblock() const { return _ciblock; } StateVector* state() const { return _state; } @@ -504,10 +563,11 @@ int start() const { return _ciblock->start_bci(); } int limit() const { return _ciblock->limit_bci(); } int control() const { return _ciblock->control_bci(); } + JsrSet* jsrs() const { return _jsrs; } - bool is_private_copy() const { return _private_copy; } - void set_private_copy(bool z); - int private_copy_count() const { return outer()->private_copy_count(ciblock()->index(), _jsrs); } + bool is_backedge_copy() const { return _backedge_copy; } + void set_backedge_copy(bool z); + int backedge_copy_count() const { return outer()->backedge_copy_count(ciblock()->index(), _jsrs); } // access to entry state int stack_size() const { return _state->stack_size(); } @@ -515,6 +575,20 @@ ciType* local_type_at(int i) const { return _state->local_type_at(i); } ciType* stack_type_at(int i) const { return _state->stack_type_at(i); } + // Data flow on locals + bool is_invariant_local(uint v) const { + assert(is_loop_head(), "only loop heads"); + // Find outermost loop with same loop head + Loop* lp = loop(); + while (lp->parent() != NULL) { + if (lp->parent()->head() != lp->head()) break; + lp = lp->parent(); + } + return !lp->def_locals()->test(v); + } + LocalSet* def_locals() { return _state->def_locals(); } + const LocalSet* def_locals() const { return _state->def_locals(); } + // Get the successors for this Block. GrowableArray* successors(ciBytecodeStream* str, StateVector* state, @@ -524,13 +598,6 @@ return _successors; } - // Helper function for "successors" when making private copies of - // loop heads for C2. - Block * clone_loop_head(ciTypeFlow* analyzer, - int branch_bci, - Block* target, - JsrSet* jsrs); - // Get the exceptional successors for this Block. GrowableArray* exceptions() { if (_exceptions == NULL) { @@ -584,17 +651,126 @@ bool is_on_work_list() const { return _on_work_list; } bool has_pre_order() const { return _pre_order >= 0; } - void set_pre_order(int po) { assert(!has_pre_order() && po >= 0, ""); _pre_order = po; } + void set_pre_order(int po) { assert(!has_pre_order(), ""); _pre_order = po; } int pre_order() const { assert(has_pre_order(), ""); return _pre_order; } + void set_next_pre_order() { set_pre_order(outer()->inc_next_pre_order()); } bool is_start() const { return _pre_order == outer()->start_block_num(); } - // A ranking used in determining order within the work list. - bool is_simpler_than(Block* other); + // Reverse post order + void df_init(); + bool has_post_order() const { return _post_order >= 0; } + void set_post_order(int po) { assert(!has_post_order() && po >= 0, ""); _post_order = po; } + void reset_post_order(int o){ _post_order = o; } + int post_order() const { assert(has_post_order(), ""); return _post_order; } + + bool has_rpo() const { return has_post_order() && outer()->have_block_count(); } + int rpo() const { assert(has_rpo(), ""); return outer()->block_count() - post_order() - 1; } + void set_rpo_next(Block* b) { _rpo_next = b; } + Block* rpo_next() { return _rpo_next; } + + // Loops + Loop* loop() const { return _loop; } + void set_loop(Loop* lp) { _loop = lp; } + bool is_loop_head() const { return _loop && _loop->head() == this; } + void set_irreducible_entry(bool c) { _irreducible_entry = c; } + bool is_irreducible_entry() const { return _irreducible_entry; } + bool is_visited() const { return has_pre_order(); } + bool is_post_visited() const { return has_post_order(); } + bool is_clonable_exit(Loop* lp); + Block* looping_succ(Loop* lp); // Successor inside of loop + bool is_single_entry_loop_head() const { + if (!is_loop_head()) return false; + for (Loop* lp = loop(); lp != NULL && lp->head() == this; lp = lp->parent()) + if (lp->is_irreducible()) return false; + return true; + } void print_value_on(outputStream* st) const PRODUCT_RETURN; void print_on(outputStream* st) const PRODUCT_RETURN; }; + // Loop + class Loop : public ResourceObj { + private: + Loop* _parent; + Loop* _sibling; // List of siblings, null terminated + Loop* _child; // Head of child list threaded thru sibling pointer + Block* _head; // Head of loop + Block* _tail; // Tail of loop + bool _irreducible; + LocalSet _def_locals; + + public: + Loop(Block* head, Block* tail) : + _head(head), _tail(tail), + _parent(NULL), _sibling(NULL), _child(NULL), + _irreducible(false), _def_locals() {} + + Loop* parent() const { return _parent; } + Loop* sibling() const { return _sibling; } + Loop* child() const { return _child; } + Block* head() const { return _head; } + Block* tail() const { return _tail; } + void set_parent(Loop* p) { _parent = p; } + void set_sibling(Loop* s) { _sibling = s; } + void set_child(Loop* c) { _child = c; } + void set_head(Block* hd) { _head = hd; } + void set_tail(Block* tl) { _tail = tl; } + + int depth() const; // nesting depth + + // Returns true if lp is a nested loop or us. + bool contains(Loop* lp) const; + bool contains(Block* blk) const { return contains(blk->loop()); } + + // Data flow on locals + LocalSet* def_locals() { return &_def_locals; } + const LocalSet* def_locals() const { return &_def_locals; } + + // Merge the branch lp into this branch, sorting on the loop head + // pre_orders. Returns the new branch. + Loop* sorted_merge(Loop* lp); + + // Mark non-single entry to loop + void set_irreducible(Block* entry) { + _irreducible = true; + entry->set_irreducible_entry(true); + } + bool is_irreducible() const { return _irreducible; } + + bool is_root() const { return _tail->pre_order() == max_jint; } + + void print(outputStream* st = tty, int indent = 0) const PRODUCT_RETURN; + }; + + // Postorder iteration over the loop tree. + class PostorderLoops : public StackObj { + private: + Loop* _root; + Loop* _current; + public: + PostorderLoops(Loop* root) : _root(root), _current(root) { + while (_current->child() != NULL) { + _current = _current->child(); + } + } + bool done() { return _current == NULL; } // Finished iterating? + void next(); // Advance to next loop + Loop* current() { return _current; } // Return current loop. + }; + + // Preorder iteration over the loop tree. + class PreorderLoops : public StackObj { + private: + Loop* _root; + Loop* _current; + public: + PreorderLoops(Loop* root) : _root(root), _current(root) {} + bool done() { return _current == NULL; } // Finished iterating? + void next(); // Advance to next loop + Loop* current() { return _current; } // Return current loop. + }; + // Standard indexes of successors, for various bytecodes. enum { FALL_THROUGH = 0, // normal control @@ -619,6 +795,12 @@ // Tells if a given instruction is able to generate an exception edge. bool can_trap(ciBytecodeStream& str); + // Clone the loop heads. Returns true if any cloning occurred. + bool clone_loop_heads(Loop* lp, StateVector* temp_vector, JsrSet* temp_set); + + // Clone lp's head and replace tail's successors with clone. + Block* clone_loop_head(Loop* lp, StateVector* temp_vector, JsrSet* temp_set); + public: // Return the block beginning at bci which has a JsrSet compatible // with jsrs. @@ -627,8 +809,8 @@ // block factory Block* get_block_for(int ciBlockIndex, JsrSet* jsrs, CreateOption option = create_public_copy); - // How many of the blocks have the private_copy bit set? - int private_copy_count(int ciBlockIndex, JsrSet* jsrs) const; + // How many of the blocks have the backedge_copy bit set? + int backedge_copy_count(int ciBlockIndex, JsrSet* jsrs) const; // Return an existing block containing bci which has a JsrSet compatible // with jsrs, or NULL if there is none. @@ -651,11 +833,18 @@ return _block_map[po]; } Block* start_block() const { return pre_order_at(start_block_num()); } int start_block_num() const { return 0; } + Block* rpo_at(int rpo) const { assert(0 <= rpo && rpo < block_count(), "out of bounds"); + return _block_map[rpo]; } + int next_pre_order() { return _next_pre_order; } + int inc_next_pre_order() { return _next_pre_order++; } private: // A work list used during flow analysis. Block* _work_list; + // List of blocks in reverse post order + Block* _rpo_list; + // Next Block::_pre_order. After mapping, doubles as block_count. int _next_pre_order; @@ -668,6 +857,15 @@ // Add a basic block to our work list. void add_to_work_list(Block* block); + // Prepend a basic block to rpo list. + void prepend_to_rpo_list(Block* blk) { + blk->set_rpo_next(_rpo_list); + _rpo_list = blk; + } + + // Root of the loop tree + Loop* _loop_tree_root; + // State used for make_jsr_record int _jsr_count; GrowableArray* _jsr_records; @@ -677,6 +875,9 @@ // does not already exist. JsrRecord* make_jsr_record(int entry_address, int return_address); + void set_loop_tree_root(Loop* ltr) { _loop_tree_root = ltr; } + Loop* loop_tree_root() { return _loop_tree_root; } + private: // Get the initial state for start_bci: const StateVector* get_start_state(); @@ -703,6 +904,15 @@ // necessary. void flow_types(); + // Perform the depth first type flow analysis. Helper for flow_types. + void df_flow_types(Block* start, + bool do_flow, + StateVector* temp_vector, + JsrSet* temp_set); + + // Incrementally build loop tree. + void build_loop_tree(Block* blk); + // Create the block map, which indexes blocks in pre_order. void map_blocks(); @@ -711,4 +921,6 @@ void do_flow(); void print_on(outputStream* st) const PRODUCT_RETURN; + + void rpo_print_on(outputStream* st) const PRODUCT_RETURN; }; diff -r 99dd4bbd9eec -r b7483806cc49 src/share/vm/code/nmethod.cpp --- a/src/share/vm/code/nmethod.cpp Tue Sep 30 12:24:27 2008 -0400 +++ b/src/share/vm/code/nmethod.cpp Wed Oct 01 20:15:03 2008 -0400 @@ -1350,11 +1350,7 @@ return false; } } - if (!UseParallelOldGC || !VerifyParallelOldWithMarkSweep) { - // Cannot do this test if verification of the UseParallelOldGC - // code using the PSMarkSweep code is being done. - assert(unloading_occurred, "Inconsistency in unloading"); - } + assert(unloading_occurred, "Inconsistency in unloading"); make_unloaded(is_alive, obj); return true; } diff -r 99dd4bbd9eec -r b7483806cc49 src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp --- a/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp Tue Sep 30 12:24:27 2008 -0400 +++ b/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp Wed Oct 01 20:15:03 2008 -0400 @@ -210,10 +210,6 @@ PSScavenge::initialize(); if (UseParallelOldGC) { PSParallelCompact::post_initialize(); - if (VerifyParallelOldWithMarkSweep) { - // Will be used for verification of par old. - PSMarkSweep::initialize(); - } } else { PSMarkSweep::initialize(); } @@ -402,7 +398,7 @@ return result; } if (!is_tlab && - size >= (young_gen()->eden_space()->capacity_in_words() / 2)) { + size >= (young_gen()->eden_space()->capacity_in_words(Thread::current()) / 2)) { result = old_gen()->allocate(size, is_tlab); if (result != NULL) { return result; diff -r 99dd4bbd9eec -r b7483806cc49 src/share/vm/gc_implementation/parallelScavenge/pcTasks.cpp --- a/src/share/vm/gc_implementation/parallelScavenge/pcTasks.cpp Tue Sep 30 12:24:27 2008 -0400 +++ b/src/share/vm/gc_implementation/parallelScavenge/pcTasks.cpp Wed Oct 01 20:15:03 2008 -0400 @@ -146,7 +146,7 @@ { ParallelScavengeHeap* heap = PSParallelCompact::gc_heap(); uint parallel_gc_threads = heap->gc_task_manager()->workers(); - ChunkTaskQueueSet* qset = ParCompactionManager::chunk_array(); + RegionTaskQueueSet* qset = ParCompactionManager::region_array(); ParallelTaskTerminator terminator(parallel_gc_threads, qset); GCTaskQueue* q = GCTaskQueue::create(); for(uint i=0; iis_gc_active(), "called outside gc"); - NOT_PRODUCT(TraceTime tm("StealChunkCompactionTask", + NOT_PRODUCT(TraceTime tm("StealRegionCompactionTask", PrintGCDetails && TraceParallelOldGCTasks, true, gclog_or_tty)); ParCompactionManager* cm = ParCompactionManager::gc_thread_compaction_manager(which); - // Has to drain stacks first because there may be chunks on + // Has to drain stacks first because there may be regions on // preloaded onto the stack and this thread may never have // done a draining task. Are the draining tasks needed? - cm->drain_chunk_stacks(); + cm->drain_region_stacks(); - size_t chunk_index = 0; + size_t region_index = 0; int random_seed = 17; // If we're the termination task, try 10 rounds of stealing before // setting the termination flag while(true) { - if (ParCompactionManager::steal(which, &random_seed, chunk_index)) { - PSParallelCompact::fill_and_update_chunk(cm, chunk_index); - cm->drain_chunk_stacks(); + if (ParCompactionManager::steal(which, &random_seed, region_index)) { + PSParallelCompact::fill_and_update_region(cm, region_index); + cm->drain_region_stacks(); } else { if (terminator()->offer_termination()) { break; @@ -249,11 +249,10 @@ UpdateDensePrefixTask::UpdateDensePrefixTask( PSParallelCompact::SpaceId space_id, - size_t chunk_index_start, - size_t chunk_index_end) : - _space_id(space_id), _chunk_index_start(chunk_index_start), - _chunk_index_end(chunk_index_end) -{} + size_t region_index_start, + size_t region_index_end) : + _space_id(space_id), _region_index_start(region_index_start), + _region_index_end(region_index_end) {} void UpdateDensePrefixTask::do_it(GCTaskManager* manager, uint which) { @@ -265,8 +264,8 @@ PSParallelCompact::update_and_deadwood_in_dense_prefix(cm, _space_id, - _chunk_index_start, - _chunk_index_end); + _region_index_start, + _region_index_end); } void DrainStacksCompactionTask::do_it(GCTaskManager* manager, uint which) { @@ -278,6 +277,6 @@ ParCompactionManager* cm = ParCompactionManager::gc_thread_compaction_manager(which); - // Process any chunks already in the compaction managers stacks. - cm->drain_chunk_stacks(); + // Process any regions already in the compaction managers stacks. + cm->drain_region_stacks(); } diff -r 99dd4bbd9eec -r b7483806cc49 src/share/vm/gc_implementation/parallelScavenge/pcTasks.hpp --- a/src/share/vm/gc_implementation/parallelScavenge/pcTasks.hpp Tue Sep 30 12:24:27 2008 -0400 +++ b/src/share/vm/gc_implementation/parallelScavenge/pcTasks.hpp Wed Oct 01 20:15:03 2008 -0400 @@ -188,18 +188,18 @@ }; // -// StealChunkCompactionTask +// StealRegionCompactionTask // // This task is used to distribute work to idle threads. // -class StealChunkCompactionTask : public GCTask { +class StealRegionCompactionTask : public GCTask { private: ParallelTaskTerminator* const _terminator; public: - StealChunkCompactionTask(ParallelTaskTerminator* t); + StealRegionCompactionTask(ParallelTaskTerminator* t); - char* name() { return (char *)"steal-chunk-task"; } + char* name() { return (char *)"steal-region-task"; } ParallelTaskTerminator* terminator() { return _terminator; } virtual void do_it(GCTaskManager* manager, uint which); @@ -215,15 +215,15 @@ class UpdateDensePrefixTask : public GCTask { private: PSParallelCompact::SpaceId _space_id; - size_t _chunk_index_start; - size_t _chunk_index_end; + size_t _region_index_start; + size_t _region_index_end; public: char* name() { return (char *)"update-dense_prefix-task"; } UpdateDensePrefixTask(PSParallelCompact::SpaceId space_id, - size_t chunk_index_start, - size_t chunk_index_end); + size_t region_index_start, + size_t region_index_end); virtual void do_it(GCTaskManager* manager, uint which); }; @@ -231,17 +231,17 @@ // // DrainStacksCompactionTask // -// This task processes chunks that have been added to the stacks of each +// This task processes regions that have been added to the stacks of each // compaction manager. // // Trying to use one draining thread does not work because there are no // guarantees about which task will be picked up by which thread. For example, -// if thread A gets all the preloaded chunks, thread A may not get a draining +// if thread A gets all the preloaded regions, thread A may not get a draining // task (they may all be done by other threads). // class DrainStacksCompactionTask : public GCTask { public: - char* name() { return (char *)"drain-chunk-task"; } + char* name() { return (char *)"drain-region-task"; } virtual void do_it(GCTaskManager* manager, uint which); }; diff -r 99dd4bbd9eec -r b7483806cc49 src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.cpp --- a/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.cpp Tue Sep 30 12:24:27 2008 -0400 +++ b/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.cpp Wed Oct 01 20:15:03 2008 -0400 @@ -30,7 +30,7 @@ OopTaskQueueSet* ParCompactionManager::_stack_array = NULL; ObjectStartArray* ParCompactionManager::_start_array = NULL; ParMarkBitMap* ParCompactionManager::_mark_bitmap = NULL; -ChunkTaskQueueSet* ParCompactionManager::_chunk_array = NULL; +RegionTaskQueueSet* ParCompactionManager::_region_array = NULL; ParCompactionManager::ParCompactionManager() : _action(CopyAndUpdate) { @@ -46,13 +46,13 @@ // We want the overflow stack to be permanent _overflow_stack = new (ResourceObj::C_HEAP) GrowableArray(10, true); -#ifdef USE_ChunkTaskQueueWithOverflow - chunk_stack()->initialize(); +#ifdef USE_RegionTaskQueueWithOverflow + region_stack()->initialize(); #else - chunk_stack()->initialize(); + region_stack()->initialize(); // We want the overflow stack to be permanent - _chunk_overflow_stack = + _region_overflow_stack = new (ResourceObj::C_HEAP) GrowableArray(10, true); #endif @@ -86,18 +86,18 @@ _stack_array = new OopTaskQueueSet(parallel_gc_threads); guarantee(_stack_array != NULL, "Count not initialize promotion manager"); - _chunk_array = new ChunkTaskQueueSet(parallel_gc_threads); - guarantee(_chunk_array != NULL, "Count not initialize promotion manager"); + _region_array = new RegionTaskQueueSet(parallel_gc_threads); + guarantee(_region_array != NULL, "Count not initialize promotion manager"); // Create and register the ParCompactionManager(s) for the worker threads. for(uint i=0; iregister_queue(i, _manager_array[i]->marking_stack()); -#ifdef USE_ChunkTaskQueueWithOverflow - chunk_array()->register_queue(i, _manager_array[i]->chunk_stack()->task_queue()); +#ifdef USE_RegionTaskQueueWithOverflow + region_array()->register_queue(i, _manager_array[i]->region_stack()->task_queue()); #else - chunk_array()->register_queue(i, _manager_array[i]->chunk_stack()); + region_array()->register_queue(i, _manager_array[i]->region_stack()); #endif } @@ -153,31 +153,31 @@ return NULL; } -// Save chunk on a stack -void ParCompactionManager::save_for_processing(size_t chunk_index) { +// Save region on a stack +void ParCompactionManager::save_for_processing(size_t region_index) { #ifdef ASSERT const ParallelCompactData& sd = PSParallelCompact::summary_data(); - ParallelCompactData::ChunkData* const chunk_ptr = sd.chunk(chunk_index); - assert(chunk_ptr->claimed(), "must be claimed"); - assert(chunk_ptr->_pushed++ == 0, "should only be pushed once"); + ParallelCompactData::RegionData* const region_ptr = sd.region(region_index); + assert(region_ptr->claimed(), "must be claimed"); + assert(region_ptr->_pushed++ == 0, "should only be pushed once"); #endif - chunk_stack_push(chunk_index); + region_stack_push(region_index); } -void ParCompactionManager::chunk_stack_push(size_t chunk_index) { +void ParCompactionManager::region_stack_push(size_t region_index) { -#ifdef USE_ChunkTaskQueueWithOverflow - chunk_stack()->save(chunk_index); +#ifdef USE_RegionTaskQueueWithOverflow + region_stack()->save(region_index); #else - if(!chunk_stack()->push(chunk_index)) { - chunk_overflow_stack()->push(chunk_index); + if(!region_stack()->push(region_index)) { + region_overflow_stack()->push(region_index); } #endif } -bool ParCompactionManager::retrieve_for_processing(size_t& chunk_index) { -#ifdef USE_ChunkTaskQueueWithOverflow - return chunk_stack()->retrieve(chunk_index); +bool ParCompactionManager::retrieve_for_processing(size_t& region_index) { +#ifdef USE_RegionTaskQueueWithOverflow + return region_stack()->retrieve(region_index); #else // Should not be used in the parallel case ShouldNotReachHere(); @@ -230,14 +230,14 @@ assert(overflow_stack()->length() == 0, "Sanity"); } -void ParCompactionManager::drain_chunk_overflow_stack() { - size_t chunk_index = (size_t) -1; - while(chunk_stack()->retrieve_from_overflow(chunk_index)) { - PSParallelCompact::fill_and_update_chunk(this, chunk_index); +void ParCompactionManager::drain_region_overflow_stack() { + size_t region_index = (size_t) -1; + while(region_stack()->retrieve_from_overflow(region_index)) { + PSParallelCompact::fill_and_update_region(this, region_index); } } -void ParCompactionManager::drain_chunk_stacks() { +void ParCompactionManager::drain_region_stacks() { #ifdef ASSERT ParallelScavengeHeap* heap = (ParallelScavengeHeap*)Universe::heap(); assert(heap->kind() == CollectedHeap::ParallelScavengeHeap, "Sanity"); @@ -249,42 +249,42 @@ #if 1 // def DO_PARALLEL - the serial code hasn't been updated do { -#ifdef USE_ChunkTaskQueueWithOverflow +#ifdef USE_RegionTaskQueueWithOverflow // Drain overflow stack first, so other threads can steal from // claimed stack while we work. - size_t chunk_index = (size_t) -1; - while(chunk_stack()->retrieve_from_overflow(chunk_index)) { - PSParallelCompact::fill_and_update_chunk(this, chunk_index); + size_t region_index = (size_t) -1; + while(region_stack()->retrieve_from_overflow(region_index)) { + PSParallelCompact::fill_and_update_region(this, region_index); } - while (chunk_stack()->retrieve_from_stealable_queue(chunk_index)) { - PSParallelCompact::fill_and_update_chunk(this, chunk_index); + while (region_stack()->retrieve_from_stealable_queue(region_index)) { + PSParallelCompact::fill_and_update_region(this, region_index); } - } while (!chunk_stack()->is_empty()); + } while (!region_stack()->is_empty()); #else // Drain overflow stack first, so other threads can steal from // claimed stack while we work. - while(!chunk_overflow_stack()->is_empty()) { - size_t chunk_index = chunk_overflow_stack()->pop(); - PSParallelCompact::fill_and_update_chunk(this, chunk_index); + while(!region_overflow_stack()->is_empty()) { + size_t region_index = region_overflow_stack()->pop(); + PSParallelCompact::fill_and_update_region(this, region_index); } - size_t chunk_index = -1; + size_t region_index = -1; // obj is a reference!!! - while (chunk_stack()->pop_local(chunk_index)) { + while (region_stack()->pop_local(region_index)) { // It would be nice to assert about the type of objects we might // pop, but they can come from anywhere, unfortunately. - PSParallelCompact::fill_and_update_chunk(this, chunk_index); + PSParallelCompact::fill_and_update_region(this, region_index); } - } while((chunk_stack()->size() != 0) || - (chunk_overflow_stack()->length() != 0)); + } while((region_stack()->size() != 0) || + (region_overflow_stack()->length() != 0)); #endif -#ifdef USE_ChunkTaskQueueWithOverflow - assert(chunk_stack()->is_empty(), "Sanity"); +#ifdef USE_RegionTaskQueueWithOverflow + assert(region_stack()->is_empty(), "Sanity"); #else - assert(chunk_stack()->size() == 0, "Sanity"); - assert(chunk_overflow_stack()->length() == 0, "Sanity"); + assert(region_stack()->size() == 0, "Sanity"); + assert(region_overflow_stack()->length() == 0, "Sanity"); #endif #else oop obj; diff -r 99dd4bbd9eec -r b7483806cc49 src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.hpp --- a/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.hpp Tue Sep 30 12:24:27 2008 -0400 +++ b/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.hpp Wed Oct 01 20:15:03 2008 -0400 @@ -52,7 +52,7 @@ friend class ParallelTaskTerminator; friend class ParMarkBitMap; friend class PSParallelCompact; - friend class StealChunkCompactionTask; + friend class StealRegionCompactionTask; friend class UpdateAndFillClosure; friend class RefProcTaskExecutor; @@ -72,27 +72,27 @@ // ------------------------ End don't putback if not needed private: - static ParCompactionManager** _manager_array; - static OopTaskQueueSet* _stack_array; - static ObjectStartArray* _start_array; - static ChunkTaskQueueSet* _chunk_array; - static PSOldGen* _old_gen; + static ParCompactionManager** _manager_array; + static OopTaskQueueSet* _stack_array; + static ObjectStartArray* _start_array; + static RegionTaskQueueSet* _region_array; + static PSOldGen* _old_gen; - OopTaskQueue _marking_stack; - GrowableArray* _overflow_stack; + OopTaskQueue _marking_stack; + GrowableArray* _overflow_stack; // Is there a way to reuse the _marking_stack for the - // saving empty chunks? For now just create a different + // saving empty regions? For now just create a different // type of TaskQueue. -#ifdef USE_ChunkTaskQueueWithOverflow - ChunkTaskQueueWithOverflow _chunk_stack; +#ifdef USE_RegionTaskQueueWithOverflow + RegionTaskQueueWithOverflow _region_stack; #else - ChunkTaskQueue _chunk_stack; - GrowableArray* _chunk_overflow_stack; + RegionTaskQueue _region_stack; + GrowableArray* _region_overflow_stack; #endif #if 1 // does this happen enough to need a per thread stack? - GrowableArray* _revisit_klass_stack; + GrowableArray* _revisit_klass_stack; #endif static ParMarkBitMap* _mark_bitmap; @@ -100,21 +100,22 @@ static PSOldGen* old_gen() { return _old_gen; } static ObjectStartArray* start_array() { return _start_array; } - static OopTaskQueueSet* stack_array() { return _stack_array; } + static OopTaskQueueSet* stack_array() { return _stack_array; } static void initialize(ParMarkBitMap* mbm); protected: // Array of tasks. Needed by the ParallelTaskTerminator. - static ChunkTaskQueueSet* chunk_array() { return _chunk_array; } - - OopTaskQueue* marking_stack() { return &_marking_stack; } - GrowableArray* overflow_stack() { return _overflow_stack; } -#ifdef USE_ChunkTaskQueueWithOverflow - ChunkTaskQueueWithOverflow* chunk_stack() { return &_chunk_stack; } + static RegionTaskQueueSet* region_array() { return _region_array; } + OopTaskQueue* marking_stack() { return &_marking_stack; } + GrowableArray* overflow_stack() { return _overflow_stack; } +#ifdef USE_RegionTaskQueueWithOverflow + RegionTaskQueueWithOverflow* region_stack() { return &_region_stack; } #else - ChunkTaskQueue* chunk_stack() { return &_chunk_stack; } - GrowableArray* chunk_overflow_stack() { return _chunk_overflow_stack; } + RegionTaskQueue* region_stack() { return &_region_stack; } + GrowableArray* region_overflow_stack() { + return _region_overflow_stack; + } #endif // Pushes onto the marking stack. If the marking stack is full, @@ -123,9 +124,9 @@ // Do not implement an equivalent stack_pop. Deal with the // marking stack and overflow stack directly. - // Pushes onto the chunk stack. If the chunk stack is full, - // pushes onto the chunk overflow stack. - void chunk_stack_push(size_t chunk_index); + // Pushes onto the region stack. If the region stack is full, + // pushes onto the region overflow stack. + void region_stack_push(size_t region_index); public: Action action() { return _action; } @@ -160,10 +161,10 @@ // Get a oop for scanning. If returns null, no oop were found. oop retrieve_for_scanning(); - // Save chunk for later processing. Must not fail. - void save_for_processing(size_t chunk_index); - // Get a chunk for processing. If returns null, no chunk were found. - bool retrieve_for_processing(size_t& chunk_index); + // Save region for later processing. Must not fail. + void save_for_processing(size_t region_index); + // Get a region for processing. If returns null, no region were found. + bool retrieve_for_processing(size_t& region_index); // Access function for compaction managers static ParCompactionManager* gc_thread_compaction_manager(int index); @@ -172,18 +173,18 @@ return stack_array()->steal(queue_num, seed, t); } - static bool steal(int queue_num, int* seed, ChunkTask& t) { - return chunk_array()->steal(queue_num, seed, t); + static bool steal(int queue_num, int* seed, RegionTask& t) { + return region_array()->steal(queue_num, seed, t); } // Process tasks remaining on any stack void drain_marking_stacks(OopClosure *blk); // Process tasks remaining on any stack - void drain_chunk_stacks(); + void drain_region_stacks(); // Process tasks remaining on any stack - void drain_chunk_overflow_stack(); + void drain_region_overflow_stack(); // Debugging support #ifdef ASSERT diff -r 99dd4bbd9eec -r b7483806cc49 src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp --- a/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp Tue Sep 30 12:24:27 2008 -0400 +++ b/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp Wed Oct 01 20:15:03 2008 -0400 @@ -35,9 +35,7 @@ _ref_processor = new ReferenceProcessor(mr, true, // atomic_discovery false); // mt_discovery - if (!UseParallelOldGC || !VerifyParallelOldWithMarkSweep) { - _counters = new CollectorCounters("PSMarkSweep", 1); - } + _counters = new CollectorCounters("PSMarkSweep", 1); } // This method contains all heap specific policy for invoking mark sweep. @@ -518,9 +516,6 @@ follow_stack(); // Process reference objects found during marking - - // Skipping the reference processing for VerifyParallelOldWithMarkSweep - // affects the marking (makes it different). { ReferencePolicy *soft_ref_policy; if (clear_all_softrefs) { diff -r 99dd4bbd9eec -r b7483806cc49 src/share/vm/gc_implementation/parallelScavenge/psMarkSweepDecorator.cpp --- a/src/share/vm/gc_implementation/parallelScavenge/psMarkSweepDecorator.cpp Tue Sep 30 12:24:27 2008 -0400 +++ b/src/share/vm/gc_implementation/parallelScavenge/psMarkSweepDecorator.cpp Wed Oct 01 20:15:03 2008 -0400 @@ -152,20 +152,15 @@ oop(q)->forward_to(oop(compact_top)); assert(oop(q)->is_gc_marked(), "encoding the pointer should preserve the mark"); } else { - // Don't clear the mark since it's confuses parallel old - // verification. - if (!UseParallelOldGC || !VerifyParallelOldWithMarkSweep) { - // if the object isn't moving we can just set the mark to the default - // mark and handle it specially later on. - oop(q)->init_mark(); - } + // if the object isn't moving we can just set the mark to the default + // mark and handle it specially later on. + oop(q)->init_mark(); assert(oop(q)->forwardee() == NULL, "should be forwarded to NULL"); } // Update object start array - if (!UseParallelOldGC || !VerifyParallelOldWithMarkSweep) { - if (start_array) - start_array->allocate_block(compact_top); + if (start_array) { + start_array->allocate_block(compact_top); } VALIDATE_MARK_SWEEP_ONLY(MarkSweep::register_live_oop(oop(q), size)); @@ -219,19 +214,14 @@ assert(oop(q)->is_gc_marked(), "encoding the pointer should preserve the mark"); } else { // if the object isn't moving we can just set the mark to the default - // Don't clear the mark since it's confuses parallel old - // verification. - if (!UseParallelOldGC || !VerifyParallelOldWithMarkSweep) { - // mark and handle it specially later on. - oop(q)->init_mark(); - } + // mark and handle it specially later on. + oop(q)->init_mark(); assert(oop(q)->forwardee() == NULL, "should be forwarded to NULL"); } - if (!UseParallelOldGC || !VerifyParallelOldWithMarkSweep) { - // Update object start array - if (start_array) - start_array->allocate_block(compact_top); + // Update object start array + if (start_array) { + start_array->allocate_block(compact_top); } VALIDATE_MARK_SWEEP_ONLY(MarkSweep::register_live_oop(oop(q), sz)); diff -r 99dd4bbd9eec -r b7483806cc49 src/share/vm/gc_implementation/parallelScavenge/psOldGen.cpp --- a/src/share/vm/gc_implementation/parallelScavenge/psOldGen.cpp Tue Sep 30 12:24:27 2008 -0400 +++ b/src/share/vm/gc_implementation/parallelScavenge/psOldGen.cpp Wed Oct 01 20:15:03 2008 -0400 @@ -152,9 +152,7 @@ assert(heap->kind() == CollectedHeap::ParallelScavengeHeap, "Sanity"); // Reset start array first. - debug_only(if (!UseParallelOldGC || !VerifyParallelOldWithMarkSweep) {) start_array()->reset(); - debug_only(}) object_mark_sweep()->precompact(); diff -r 99dd4bbd9eec -r b7483806cc49 src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp --- a/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp Tue Sep 30 12:24:27 2008 -0400 +++ b/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp Wed Oct 01 20:15:03 2008 -0400 @@ -28,43 +28,31 @@ #include // All sizes are in HeapWords. -const size_t ParallelCompactData::Log2ChunkSize = 9; // 512 words -const size_t ParallelCompactData::ChunkSize = (size_t)1 << Log2ChunkSize; -const size_t ParallelCompactData::ChunkSizeBytes = ChunkSize << LogHeapWordSize; -const size_t ParallelCompactData::ChunkSizeOffsetMask = ChunkSize - 1; -const size_t ParallelCompactData::ChunkAddrOffsetMask = ChunkSizeBytes - 1; -const size_t ParallelCompactData::ChunkAddrMask = ~ChunkAddrOffsetMask; - -// 32-bit: 128 words covers 4 bitmap words -// 64-bit: 128 words covers 2 bitmap words -const size_t ParallelCompactData::Log2BlockSize = 7; // 128 words -const size_t ParallelCompactData::BlockSize = (size_t)1 << Log2BlockSize; -const size_t ParallelCompactData::BlockOffsetMask = BlockSize - 1; -const size_t ParallelCompactData::BlockMask = ~BlockOffsetMask; - -const size_t ParallelCompactData::BlocksPerChunk = ChunkSize / BlockSize; - -const ParallelCompactData::ChunkData::chunk_sz_t -ParallelCompactData::ChunkData::dc_shift = 27; - -const ParallelCompactData::ChunkData::chunk_sz_t -ParallelCompactData::ChunkData::dc_mask = ~0U << dc_shift; - -const ParallelCompactData::ChunkData::chunk_sz_t -ParallelCompactData::ChunkData::dc_one = 0x1U << dc_shift; - -const ParallelCompactData::ChunkData::chunk_sz_t -ParallelCompactData::ChunkData::los_mask = ~dc_mask; - -const ParallelCompactData::ChunkData::chunk_sz_t -ParallelCompactData::ChunkData::dc_claimed = 0x8U << dc_shift; - -const ParallelCompactData::ChunkData::chunk_sz_t -ParallelCompactData::ChunkData::dc_completed = 0xcU << dc_shift; - -#ifdef ASSERT -short ParallelCompactData::BlockData::_cur_phase = 0; -#endif +const size_t ParallelCompactData::Log2RegionSize = 9; // 512 words +const size_t ParallelCompactData::RegionSize = (size_t)1 << Log2RegionSize; +const size_t ParallelCompactData::RegionSizeBytes = + RegionSize << LogHeapWordSize; +const size_t ParallelCompactData::RegionSizeOffsetMask = RegionSize - 1; +const size_t ParallelCompactData::RegionAddrOffsetMask = RegionSizeBytes - 1; +const size_t ParallelCompactData::RegionAddrMask = ~RegionAddrOffsetMask; + +const ParallelCompactData::RegionData::region_sz_t +ParallelCompactData::RegionData::dc_shift = 27; + +const ParallelCompactData::RegionData::region_sz_t +ParallelCompactData::RegionData::dc_mask = ~0U << dc_shift; + +const ParallelCompactData::RegionData::region_sz_t +ParallelCompactData::RegionData::dc_one = 0x1U << dc_shift; + +const ParallelCompactData::RegionData::region_sz_t +ParallelCompactData::RegionData::los_mask = ~dc_mask; + +const ParallelCompactData::RegionData::region_sz_t +ParallelCompactData::RegionData::dc_claimed = 0x8U << dc_shift; + +const ParallelCompactData::RegionData::region_sz_t +ParallelCompactData::RegionData::dc_completed = 0xcU << dc_shift; SpaceInfo PSParallelCompact::_space_info[PSParallelCompact::last_space_id]; bool PSParallelCompact::_print_phases = false; @@ -100,99 +88,12 @@ GrowableArray * PSParallelCompact::_last_gc_live_oops_size = NULL; #endif -// XXX beg - verification code; only works while we also mark in object headers -static void -verify_mark_bitmap(ParMarkBitMap& _mark_bitmap) -{ - ParallelScavengeHeap* heap = PSParallelCompact::gc_heap(); - - PSPermGen* perm_gen = heap->perm_gen(); - PSOldGen* old_gen = heap->old_gen(); - PSYoungGen* young_gen = heap->young_gen(); - - MutableSpace* perm_space = perm_gen->object_space(); - MutableSpace* old_space = old_gen->object_space(); - MutableSpace* eden_space = young_gen->eden_space(); - MutableSpace* from_space = young_gen->from_space(); - MutableSpace* to_space = young_gen->to_space(); - - // 'from_space' here is the survivor space at the lower address. - if (to_space->bottom() < from_space->bottom()) { - from_space = to_space; - to_space = young_gen->from_space(); - } - - HeapWord* boundaries[12]; - unsigned int bidx = 0; - const unsigned int bidx_max = sizeof(boundaries) / sizeof(boundaries[0]); - - boundaries[0] = perm_space->bottom(); - boundaries[1] = perm_space->top(); - boundaries[2] = old_space->bottom(); - boundaries[3] = old_space->top(); - boundaries[4] = eden_space->bottom(); - boundaries[5] = eden_space->top(); - boundaries[6] = from_space->bottom(); - boundaries[7] = from_space->top(); - boundaries[8] = to_space->bottom(); - boundaries[9] = to_space->top(); - boundaries[10] = to_space->end(); - boundaries[11] = to_space->end(); - - BitMap::idx_t beg_bit = 0; - BitMap::idx_t end_bit; - BitMap::idx_t tmp_bit; - const BitMap::idx_t last_bit = _mark_bitmap.size(); - do { - HeapWord* addr = _mark_bitmap.bit_to_addr(beg_bit); - if (_mark_bitmap.is_marked(beg_bit)) { - oop obj = (oop)addr; - assert(obj->is_gc_marked(), "obj header is not marked"); - end_bit = _mark_bitmap.find_obj_end(beg_bit, last_bit); - const size_t size = _mark_bitmap.obj_size(beg_bit, end_bit); - assert(size == (size_t)obj->size(), "end bit wrong?"); - beg_bit = _mark_bitmap.find_obj_beg(beg_bit + 1, last_bit); - assert(beg_bit > end_bit, "bit set in middle of an obj"); - } else { - if (addr >= boundaries[bidx] && addr < boundaries[bidx + 1]) { - // a dead object in the current space. - oop obj = (oop)addr; - end_bit = _mark_bitmap.addr_to_bit(addr + obj->size()); - assert(!obj->is_gc_marked(), "obj marked in header, not in bitmap"); - tmp_bit = beg_bit + 1; - beg_bit = _mark_bitmap.find_obj_beg(tmp_bit, end_bit); - assert(beg_bit == end_bit, "beg bit set in unmarked obj"); - beg_bit = _mark_bitmap.find_obj_end(tmp_bit, end_bit); - assert(beg_bit == end_bit, "end bit set in unmarked obj"); - } else if (addr < boundaries[bidx + 2]) { - // addr is between top in the current space and bottom in the next. - end_bit = beg_bit + pointer_delta(boundaries[bidx + 2], addr); - tmp_bit = beg_bit; - beg_bit = _mark_bitmap.find_obj_beg(tmp_bit, end_bit); - assert(beg_bit == end_bit, "beg bit set above top"); - beg_bit = _mark_bitmap.find_obj_end(tmp_bit, end_bit); - assert(beg_bit == end_bit, "end bit set above top"); - bidx += 2; - } else if (bidx < bidx_max - 2) { - bidx += 2; // ??? - } else { - tmp_bit = beg_bit; - beg_bit = _mark_bitmap.find_obj_beg(tmp_bit, last_bit); - assert(beg_bit == last_bit, "beg bit set outside heap"); - beg_bit = _mark_bitmap.find_obj_end(tmp_bit, last_bit); - assert(beg_bit == last_bit, "end bit set outside heap"); - } - } - } while (beg_bit < last_bit); -} -// XXX end - verification code; only works while we also mark in object headers - #ifndef PRODUCT const char* PSParallelCompact::space_names[] = { "perm", "old ", "eden", "from", "to " }; -void PSParallelCompact::print_chunk_ranges() +void PSParallelCompact::print_region_ranges() { tty->print_cr("space bottom top end new_top"); tty->print_cr("------ ---------- ---------- ---------- ----------"); @@ -203,31 +104,31 @@ SIZE_FORMAT_W(10) " " SIZE_FORMAT_W(10) " " SIZE_FORMAT_W(10) " " SIZE_FORMAT_W(10) " ", id, space_names[id], - summary_data().addr_to_chunk_idx(space->bottom()), - summary_data().addr_to_chunk_idx(space->top()), - summary_data().addr_to_chunk_idx(space->end()), - summary_data().addr_to_chunk_idx(_space_info[id].new_top())); + summary_data().addr_to_region_idx(space->bottom()), + summary_data().addr_to_region_idx(space->top()), + summary_data().addr_to_region_idx(space->end()), + summary_data().addr_to_region_idx(_space_info[id].new_top())); } } void -print_generic_summary_chunk(size_t i, const ParallelCompactData::ChunkData* c) +print_generic_summary_region(size_t i, const ParallelCompactData::RegionData* c) { -#define CHUNK_IDX_FORMAT SIZE_FORMAT_W(7) -#define CHUNK_DATA_FORMAT SIZE_FORMAT_W(5) +#define REGION_IDX_FORMAT SIZE_FORMAT_W(7) +#define REGION_DATA_FORMAT SIZE_FORMAT_W(5) ParallelCompactData& sd = PSParallelCompact::summary_data(); - size_t dci = c->destination() ? sd.addr_to_chunk_idx(c->destination()) : 0; - tty->print_cr(CHUNK_IDX_FORMAT " " PTR_FORMAT " " - CHUNK_IDX_FORMAT " " PTR_FORMAT " " - CHUNK_DATA_FORMAT " " CHUNK_DATA_FORMAT " " - CHUNK_DATA_FORMAT " " CHUNK_IDX_FORMAT " %d", + size_t dci = c->destination() ? sd.addr_to_region_idx(c->destination()) : 0; + tty->print_cr(REGION_IDX_FORMAT " " PTR_FORMAT " " + REGION_IDX_FORMAT " " PTR_FORMAT " " + REGION_DATA_FORMAT " " REGION_DATA_FORMAT " " + REGION_DATA_FORMAT " " REGION_IDX_FORMAT " %d", i, c->data_location(), dci, c->destination(), c->partial_obj_size(), c->live_obj_size(), - c->data_size(), c->source_chunk(), c->destination_count()); - -#undef CHUNK_IDX_FORMAT -#undef CHUNK_DATA_FORMAT + c->data_size(), c->source_region(), c->destination_count()); + +#undef REGION_IDX_FORMAT +#undef REGION_DATA_FORMAT } void @@ -236,14 +137,14 @@ HeapWord* const end_addr) { size_t total_words = 0; - size_t i = summary_data.addr_to_chunk_idx(beg_addr); - const size_t last = summary_data.addr_to_chunk_idx(end_addr); + size_t i = summary_data.addr_to_region_idx(beg_addr); + const size_t last = summary_data.addr_to_region_idx(end_addr); HeapWord* pdest = 0; while (i <= last) { - ParallelCompactData::ChunkData* c = summary_data.chunk(i); + ParallelCompactData::RegionData* c = summary_data.region(i); if (c->data_size() != 0 || c->destination() != pdest) { - print_generic_summary_chunk(i, c); + print_generic_summary_region(i, c); total_words += c->data_size(); pdest = c->destination(); } @@ -265,16 +166,16 @@ } void -print_initial_summary_chunk(size_t i, - const ParallelCompactData::ChunkData* c, - bool newline = true) +print_initial_summary_region(size_t i, + const ParallelCompactData::RegionData* c, + bool newline = true) { tty->print(SIZE_FORMAT_W(5) " " PTR_FORMAT " " SIZE_FORMAT_W(5) " " SIZE_FORMAT_W(5) " " SIZE_FORMAT_W(5) " " SIZE_FORMAT_W(5) " %d", i, c->destination(), c->partial_obj_size(), c->live_obj_size(), - c->data_size(), c->source_chunk(), c->destination_count()); + c->data_size(), c->source_region(), c->destination_count()); if (newline) tty->cr(); } @@ -285,47 +186,48 @@ return; } - const size_t chunk_size = ParallelCompactData::ChunkSize; - HeapWord* const top_aligned_up = summary_data.chunk_align_up(space->top()); - const size_t end_chunk = summary_data.addr_to_chunk_idx(top_aligned_up); - const ParallelCompactData::ChunkData* c = summary_data.chunk(end_chunk - 1); + const size_t region_size = ParallelCompactData::RegionSize; + typedef ParallelCompactData::RegionData RegionData; + HeapWord* const top_aligned_up = summary_data.region_align_up(space->top()); + const size_t end_region = summary_data.addr_to_region_idx(top_aligned_up); + const RegionData* c = summary_data.region(end_region - 1); HeapWord* end_addr = c->destination() + c->data_size(); const size_t live_in_space = pointer_delta(end_addr, space->bottom()); - // Print (and count) the full chunks at the beginning of the space. - size_t full_chunk_count = 0; - size_t i = summary_data.addr_to_chunk_idx(space->bottom()); - while (i < end_chunk && summary_data.chunk(i)->data_size() == chunk_size) { - print_initial_summary_chunk(i, summary_data.chunk(i)); - ++full_chunk_count; + // Print (and count) the full regions at the beginning of the space. + size_t full_region_count = 0; + size_t i = summary_data.addr_to_region_idx(space->bottom()); + while (i < end_region && summary_data.region(i)->data_size() == region_size) { + print_initial_summary_region(i, summary_data.region(i)); + ++full_region_count; ++i; } - size_t live_to_right = live_in_space - full_chunk_count * chunk_size; + size_t live_to_right = live_in_space - full_region_count * region_size; double max_reclaimed_ratio = 0.0; - size_t max_reclaimed_ratio_chunk = 0; + size_t max_reclaimed_ratio_region = 0; size_t max_dead_to_right = 0; size_t max_live_to_right = 0; - // Print the 'reclaimed ratio' for chunks while there is something live in the - // chunk or to the right of it. The remaining chunks are empty (and + // Print the 'reclaimed ratio' for regions while there is something live in + // the region or to the right of it. The remaining regions are empty (and // uninteresting), and computing the ratio will result in division by 0. - while (i < end_chunk && live_to_right > 0) { - c = summary_data.chunk(i); - HeapWord* const chunk_addr = summary_data.chunk_to_addr(i); - const size_t used_to_right = pointer_delta(space->top(), chunk_addr); + while (i < end_region && live_to_right > 0) { + c = summary_data.region(i); + HeapWord* const region_addr = summary_data.region_to_addr(i); + const size_t used_to_right = pointer_delta(space->top(), region_addr); const size_t dead_to_right = used_to_right - live_to_right; const double reclaimed_ratio = double(dead_to_right) / live_to_right; if (reclaimed_ratio > max_reclaimed_ratio) { max_reclaimed_ratio = reclaimed_ratio; - max_reclaimed_ratio_chunk = i; + max_reclaimed_ratio_region = i; max_dead_to_right = dead_to_right; max_live_to_right = live_to_right; } - print_initial_summary_chunk(i, c, false); + print_initial_summary_region(i, c, false); tty->print_cr(" %12.10f " SIZE_FORMAT_W(10) " " SIZE_FORMAT_W(10), reclaimed_ratio, dead_to_right, live_to_right); @@ -333,14 +235,14 @@ ++i; } - // Any remaining chunks are empty. Print one more if there is one. - if (i < end_chunk) { - print_initial_summary_chunk(i, summary_data.chunk(i)); + // Any remaining regions are empty. Print one more if there is one. + if (i < end_region) { + print_initial_summary_region(i, summary_data.region(i)); } tty->print_cr("max: " SIZE_FORMAT_W(4) " d2r=" SIZE_FORMAT_W(10) " " "l2r=" SIZE_FORMAT_W(10) " max_ratio=%14.12f", - max_reclaimed_ratio_chunk, max_dead_to_right, + max_reclaimed_ratio_region, max_dead_to_right, max_live_to_right, max_reclaimed_ratio); } @@ -372,13 +274,9 @@ { _region_start = 0; - _chunk_vspace = 0; - _chunk_data = 0; - _chunk_count = 0; - - _block_vspace = 0; - _block_data = 0; - _block_count = 0; + _region_vspace = 0; + _region_data = 0; + _region_count = 0; } bool ParallelCompactData::initialize(MemRegion covered_region) @@ -387,18 +285,12 @@ const size_t region_size = covered_region.word_size(); DEBUG_ONLY(_region_end = _region_start + region_size;) - assert(chunk_align_down(_region_start) == _region_start, + assert(region_align_down(_region_start) == _region_start, "region start not aligned"); - assert((region_size & ChunkSizeOffsetMask) == 0, - "region size not a multiple of ChunkSize"); - - bool result = initialize_chunk_data(region_size); - - // Initialize the block data if it will be used for updating pointers, or if - // this is a debug build. - if (!UseParallelOldGCChunkPointerCalc || trueInDebug) { - result = result && initialize_block_data(region_size); - } + assert((region_size & RegionSizeOffsetMask) == 0, + "region size not a multiple of RegionSize"); + + bool result = initialize_region_data(region_size); return result; } @@ -429,25 +321,13 @@ return 0; } -bool ParallelCompactData::initialize_chunk_data(size_t region_size) +bool ParallelCompactData::initialize_region_data(size_t region_size) { - const size_t count = (region_size + ChunkSizeOffsetMask) >> Log2ChunkSize; - _chunk_vspace = create_vspace(count, sizeof(ChunkData)); - if (_chunk_vspace != 0) { - _chunk_data = (ChunkData*)_chunk_vspace->reserved_low_addr(); - _chunk_count = count; - return true; - } - return false; -} - -bool ParallelCompactData::initialize_block_data(size_t region_size) -{ - const size_t count = (region_size + BlockOffsetMask) >> Log2BlockSize; - _block_vspace = create_vspace(count, sizeof(BlockData)); - if (_block_vspace != 0) { - _block_data = (BlockData*)_block_vspace->reserved_low_addr(); - _block_count = count; + const size_t count = (region_size + RegionSizeOffsetMask) >> Log2RegionSize; + _region_vspace = create_vspace(count, sizeof(RegionData)); + if (_region_vspace != 0) { + _region_data = (RegionData*)_region_vspace->reserved_low_addr(); + _region_count = count; return true; } return false; @@ -455,38 +335,27 @@ void ParallelCompactData::clear() { - if (_block_data) { - memset(_block_data, 0, _block_vspace->committed_size()); - } - memset(_chunk_data, 0, _chunk_vspace->committed_size()); + memset(_region_data, 0, _region_vspace->committed_size()); } -void ParallelCompactData::clear_range(size_t beg_chunk, size_t end_chunk) { - assert(beg_chunk <= _chunk_count, "beg_chunk out of range"); - assert(end_chunk <= _chunk_count, "end_chunk out of range"); - assert(ChunkSize % BlockSize == 0, "ChunkSize not a multiple of BlockSize"); - - const size_t chunk_cnt = end_chunk - beg_chunk; - - if (_block_data) { - const size_t blocks_per_chunk = ChunkSize / BlockSize; - const size_t beg_block = beg_chunk * blocks_per_chunk; - const size_t block_cnt = chunk_cnt * blocks_per_chunk; - memset(_block_data + beg_block, 0, block_cnt * sizeof(BlockData)); - } - memset(_chunk_data + beg_chunk, 0, chunk_cnt * sizeof(ChunkData)); +void ParallelCompactData::clear_range(size_t beg_region, size_t end_region) { + assert(beg_region <= _region_count, "beg_region out of range"); + assert(end_region <= _region_count, "end_region out of range"); + + const size_t region_cnt = end_region - beg_region; + memset(_region_data + beg_region, 0, region_cnt * sizeof(RegionData)); } -HeapWord* ParallelCompactData::partial_obj_end(size_t chunk_idx) const +HeapWord* ParallelCompactData::partial_obj_end(size_t region_idx) const { - const ChunkData* cur_cp = chunk(chunk_idx); - const ChunkData* const end_cp = chunk(chunk_count() - 1); - - HeapWord* result = chunk_to_addr(chunk_idx); + const RegionData* cur_cp = region(region_idx); + const RegionData* const end_cp = region(region_count() - 1); + + HeapWord* result = region_to_addr(region_idx); if (cur_cp < end_cp) { do { result += cur_cp->partial_obj_size(); - } while (cur_cp->partial_obj_size() == ChunkSize && ++cur_cp < end_cp); + } while (cur_cp->partial_obj_size() == RegionSize && ++cur_cp < end_cp); } return result; } @@ -494,56 +363,56 @@ void ParallelCompactData::add_obj(HeapWord* addr, size_t len) { const size_t obj_ofs = pointer_delta(addr, _region_start); - const size_t beg_chunk = obj_ofs >> Log2ChunkSize; - const size_t end_chunk = (obj_ofs + len - 1) >> Log2ChunkSize; + const size_t beg_region = obj_ofs >> Log2RegionSize; + const size_t end_region = (obj_ofs + len - 1) >> Log2RegionSize; DEBUG_ONLY(Atomic::inc_ptr(&add_obj_count);) DEBUG_ONLY(Atomic::add_ptr(len, &add_obj_size);) - if (beg_chunk == end_chunk) { - // All in one chunk. - _chunk_data[beg_chunk].add_live_obj(len); + if (beg_region == end_region) { + // All in one region. + _region_data[beg_region].add_live_obj(len); return; } - // First chunk. - const size_t beg_ofs = chunk_offset(addr); - _chunk_data[beg_chunk].add_live_obj(ChunkSize - beg_ofs); + // First region. + const size_t beg_ofs = region_offset(addr); + _region_data[beg_region].add_live_obj(RegionSize - beg_ofs); klassOop klass = ((oop)addr)->klass(); - // Middle chunks--completely spanned by this object. - for (size_t chunk = beg_chunk + 1; chunk < end_chunk; ++chunk) { - _chunk_data[chunk].set_partial_obj_size(ChunkSize); - _chunk_data[chunk].set_partial_obj_addr(addr); + // Middle regions--completely spanned by this object. + for (size_t region = beg_region + 1; region < end_region; ++region) { + _region_data[region].set_partial_obj_size(RegionSize); + _region_data[region].set_partial_obj_addr(addr); } - // Last chunk. - const size_t end_ofs = chunk_offset(addr + len - 1); - _chunk_data[end_chunk].set_partial_obj_size(end_ofs + 1); - _chunk_data[end_chunk].set_partial_obj_addr(addr); + // Last region. + const size_t end_ofs = region_offset(addr + len - 1); + _region_data[end_region].set_partial_obj_size(end_ofs + 1); + _region_data[end_region].set_partial_obj_addr(addr); } void ParallelCompactData::summarize_dense_prefix(HeapWord* beg, HeapWord* end) { - assert(chunk_offset(beg) == 0, "not ChunkSize aligned"); - assert(chunk_offset(end) == 0, "not ChunkSize aligned"); - - size_t cur_chunk = addr_to_chunk_idx(beg); - const size_t end_chunk = addr_to_chunk_idx(end); + assert(region_offset(beg) == 0, "not RegionSize aligned"); + assert(region_offset(end) == 0, "not RegionSize aligned"); + + size_t cur_region = addr_to_region_idx(beg); + const size_t end_region = addr_to_region_idx(end); HeapWord* addr = beg; - while (cur_chunk < end_chunk) { - _chunk_data[cur_chunk].set_destination(addr); - _chunk_data[cur_chunk].set_destination_count(0); - _chunk_data[cur_chunk].set_source_chunk(cur_chunk); - _chunk_data[cur_chunk].set_data_location(addr); - - // Update live_obj_size so the chunk appears completely full. - size_t live_size = ChunkSize - _chunk_data[cur_chunk].partial_obj_size(); - _chunk_data[cur_chunk].set_live_obj_size(live_size); - - ++cur_chunk; - addr += ChunkSize; + while (cur_region < end_region) { + _region_data[cur_region].set_destination(addr); + _region_data[cur_region].set_destination_count(0); + _region_data[cur_region].set_source_region(cur_region); + _region_data[cur_region].set_data_location(addr); + + // Update live_obj_size so the region appears completely full. + size_t live_size = RegionSize - _region_data[cur_region].partial_obj_size(); + _region_data[cur_region].set_live_obj_size(live_size); + + ++cur_region; + addr += RegionSize; } } @@ -552,7 +421,7 @@ HeapWord** target_next, HeapWord** source_next) { // This is too strict. - // assert(chunk_offset(source_beg) == 0, "not ChunkSize aligned"); + // assert(region_offset(source_beg) == 0, "not RegionSize aligned"); if (TraceParallelOldGCSummaryPhase) { tty->print_cr("tb=" PTR_FORMAT " te=" PTR_FORMAT " " @@ -564,125 +433,93 @@ source_next != 0 ? *source_next : (HeapWord*) 0); } - size_t cur_chunk = addr_to_chunk_idx(source_beg); - const size_t end_chunk = addr_to_chunk_idx(chunk_align_up(source_end)); + size_t cur_region = addr_to_region_idx(source_beg); + const size_t end_region = addr_to_region_idx(region_align_up(source_end)); HeapWord *dest_addr = target_beg; - while (cur_chunk < end_chunk) { - size_t words = _chunk_data[cur_chunk].data_size(); + while (cur_region < end_region) { + size_t words = _region_data[cur_region].data_size(); #if 1 assert(pointer_delta(target_end, dest_addr) >= words, "source region does not fit into target region"); #else - // XXX - need some work on the corner cases here. If the chunk does not - // fit, then must either make sure any partial_obj from the chunk fits, or - // 'undo' the initial part of the partial_obj that is in the previous chunk. + // XXX - need some work on the corner cases here. If the region does not + // fit, then must either make sure any partial_obj from the region fits, or + // "undo" the initial part of the partial_obj that is in the previous + // region. if (dest_addr + words >= target_end) { // Let the caller know where to continue. *target_next = dest_addr; - *source_next = chunk_to_addr(cur_chunk); + *source_next = region_to_addr(cur_region); return false; } #endif // #if 1 - _chunk_data[cur_chunk].set_destination(dest_addr); - - // Set the destination_count for cur_chunk, and if necessary, update - // source_chunk for a destination chunk. The source_chunk field is updated - // if cur_chunk is the first (left-most) chunk to be copied to a destination - // chunk. + _region_data[cur_region].set_destination(dest_addr); + + // Set the destination_count for cur_region, and if necessary, update + // source_region for a destination region. The source_region field is + // updated if cur_region is the first (left-most) region to be copied to a + // destination region. // - // The destination_count calculation is a bit subtle. A chunk that has data - // that compacts into itself does not count itself as a destination. This - // maintains the invariant that a zero count means the chunk is available - // and can be claimed and then filled. + // The destination_count calculation is a bit subtle. A region that has + // data that compacts into itself does not count itself as a destination. + // This maintains the invariant that a zero count means the region is + // available and can be claimed and then filled. if (words > 0) { HeapWord* const last_addr = dest_addr + words - 1; - const size_t dest_chunk_1 = addr_to_chunk_idx(dest_addr); - const size_t dest_chunk_2 = addr_to_chunk_idx(last_addr); + const size_t dest_region_1 = addr_to_region_idx(dest_addr); + const size_t dest_region_2 = addr_to_region_idx(last_addr); #if 0 - // Initially assume that the destination chunks will be the same and + // Initially assume that the destination regions will be the same and // adjust the value below if necessary. Under this assumption, if - // cur_chunk == dest_chunk_2, then cur_chunk will be compacted completely - // into itself. - uint destination_count = cur_chunk == dest_chunk_2 ? 0 : 1; - if (dest_chunk_1 != dest_chunk_2) { - // Destination chunks differ; adjust destination_count. + // cur_region == dest_region_2, then cur_region will be compacted + // completely into itself. + uint destination_count = cur_region == dest_region_2 ? 0 : 1; + if (dest_region_1 != dest_region_2) { + // Destination regions differ; adjust destination_count. destination_count += 1; - // Data from cur_chunk will be copied to the start of dest_chunk_2. - _chunk_data[dest_chunk_2].set_source_chunk(cur_chunk); - } else if (chunk_offset(dest_addr) == 0) { - // Data from cur_chunk will be copied to the start of the destination - // chunk. - _chunk_data[dest_chunk_1].set_source_chunk(cur_chunk); + // Data from cur_region will be copied to the start of dest_region_2. + _region_data[dest_region_2].set_source_region(cur_region); + } else if (region_offset(dest_addr) == 0) { + // Data from cur_region will be copied to the start of the destination + // region. + _region_data[dest_region_1].set_source_region(cur_region); } #else - // Initially assume that the destination chunks will be different and + // Initially assume that the destination regions will be different and // adjust the value below if necessary. Under this assumption, if - // cur_chunk == dest_chunk2, then cur_chunk will be compacted partially - // into dest_chunk_1 and partially into itself. - uint destination_count = cur_chunk == dest_chunk_2 ? 1 : 2; - if (dest_chunk_1 != dest_chunk_2) { - // Data from cur_chunk will be copied to the start of dest_chunk_2. - _chunk_data[dest_chunk_2].set_source_chunk(cur_chunk); + // cur_region == dest_region2, then cur_region will be compacted partially + // into dest_region_1 and partially into itself. + uint destination_count = cur_region == dest_region_2 ? 1 : 2; + if (dest_region_1 != dest_region_2) { + // Data from cur_region will be copied to the start of dest_region_2. + _region_data[dest_region_2].set_source_region(cur_region); } else { - // Destination chunks are the same; adjust destination_count. + // Destination regions are the same; adjust destination_count. destination_count -= 1; - if (chunk_offset(dest_addr) == 0) { - // Data from cur_chunk will be copied to the start of the destination - // chunk. - _chunk_data[dest_chunk_1].set_source_chunk(cur_chunk); + if (region_offset(dest_addr) == 0) { + // Data from cur_region will be copied to the start of the destination + // region. + _region_data[dest_region_1].set_source_region(cur_region); } } #endif // #if 0 - _chunk_data[cur_chunk].set_destination_count(destination_count); - _chunk_data[cur_chunk].set_data_location(chunk_to_addr(cur_chunk)); + _region_data[cur_region].set_destination_count(destination_count); + _region_data[cur_region].set_data_location(region_to_addr(cur_region)); dest_addr += words; } - ++cur_chunk; + ++cur_region; } *target_next = dest_addr; return true; } -bool ParallelCompactData::partial_obj_ends_in_block(size_t block_index) { - HeapWord* block_addr = block_to_addr(block_index); - HeapWord* block_end_addr = block_addr + BlockSize; - size_t chunk_index = addr_to_chunk_idx(block_addr); - HeapWord* partial_obj_end_addr = partial_obj_end(chunk_index); - - // An object that ends at the end of the block, ends - // in the block (the last word of the object is to - // the left of the end). - if ((block_addr < partial_obj_end_addr) && - (partial_obj_end_addr <= block_end_addr)) { - return true; - } - - return false; -} - HeapWord* ParallelCompactData::calc_new_pointer(HeapWord* addr) { - HeapWord* result = NULL; - if (UseParallelOldGCChunkPointerCalc) { - result = chunk_calc_new_pointer(addr); - } else { - result = block_calc_new_pointer(addr); - } - return result; -} - -// This method is overly complicated (expensive) to be called -// for every reference. -// Try to restructure this so that a NULL is returned if -// the object is dead. But don't wast the cycles to explicitly check -// that it is dead since only live objects should be passed in. - -HeapWord* ParallelCompactData::chunk_calc_new_pointer(HeapWord* addr) { assert(addr != NULL, "Should detect NULL oop earlier"); assert(PSParallelCompact::gc_heap()->is_in(addr), "addr not in heap"); #ifdef ASSERT @@ -692,30 +529,30 @@ #endif assert(PSParallelCompact::mark_bitmap()->is_marked(addr), "obj not marked"); - // Chunk covering the object. - size_t chunk_index = addr_to_chunk_idx(addr); - const ChunkData* const chunk_ptr = chunk(chunk_index); - HeapWord* const chunk_addr = chunk_align_down(addr); - - assert(addr < chunk_addr + ChunkSize, "Chunk does not cover object"); - assert(addr_to_chunk_ptr(chunk_addr) == chunk_ptr, "sanity check"); - - HeapWord* result = chunk_ptr->destination(); - - // If all the data in the chunk is live, then the new location of the object - // can be calculated from the destination of the chunk plus the offset of the - // object in the chunk. - if (chunk_ptr->data_size() == ChunkSize) { - result += pointer_delta(addr, chunk_addr); + // Region covering the object. + size_t region_index = addr_to_region_idx(addr); + const RegionData* const region_ptr = region(region_index); + HeapWord* const region_addr = region_align_down(addr); + + assert(addr < region_addr + RegionSize, "Region does not cover object"); + assert(addr_to_region_ptr(region_addr) == region_ptr, "sanity check"); + + HeapWord* result = region_ptr->destination(); + + // If all the data in the region is live, then the new location of the object + // can be calculated from the destination of the region plus the offset of the + // object in the region. + if (region_ptr->data_size() == RegionSize) { + result += pointer_delta(addr, region_addr); return result; } // The new location of the object is - // chunk destination + - // size of the partial object extending onto the chunk + - // sizes of the live objects in the Chunk that are to the left of addr - const size_t partial_obj_size = chunk_ptr->partial_obj_size(); - HeapWord* const search_start = chunk_addr + partial_obj_size; + // region destination + + // size of the partial object extending onto the region + + // sizes of the live objects in the Region that are to the left of addr + const size_t partial_obj_size = region_ptr->partial_obj_size(); + HeapWord* const search_start = region_addr + partial_obj_size; const ParMarkBitMap* bitmap = PSParallelCompact::mark_bitmap(); size_t live_to_left = bitmap->live_words_in_range(search_start, oop(addr)); @@ -725,50 +562,6 @@ return result; } -HeapWord* ParallelCompactData::block_calc_new_pointer(HeapWord* addr) { - assert(addr != NULL, "Should detect NULL oop earlier"); - assert(PSParallelCompact::gc_heap()->is_in(addr), "addr not in heap"); -#ifdef ASSERT - if (PSParallelCompact::mark_bitmap()->is_unmarked(addr)) { - gclog_or_tty->print_cr("calc_new_pointer:: addr " PTR_FORMAT, addr); - } -#endif - assert(PSParallelCompact::mark_bitmap()->is_marked(addr), "obj not marked"); - - // Chunk covering the object. - size_t chunk_index = addr_to_chunk_idx(addr); - const ChunkData* const chunk_ptr = chunk(chunk_index); - HeapWord* const chunk_addr = chunk_align_down(addr); - - assert(addr < chunk_addr + ChunkSize, "Chunk does not cover object"); - assert(addr_to_chunk_ptr(chunk_addr) == chunk_ptr, "sanity check"); - - HeapWord* result = chunk_ptr->destination(); - - // If all the data in the chunk is live, then the new location of the object - // can be calculated from the destination of the chunk plus the offset of the - // object in the chunk. - if (chunk_ptr->data_size() == ChunkSize) { - result += pointer_delta(addr, chunk_addr); - return result; - } - - // The new location of the object is - // chunk destination + - // block offset + - // sizes of the live objects in the Block that are to the left of addr - const size_t block_offset = addr_to_block_ptr(addr)->offset(); - HeapWord* const search_start = chunk_addr + block_offset; - - const ParMarkBitMap* bitmap = PSParallelCompact::mark_bitmap(); - size_t live_to_left = bitmap->live_words_in_range(search_start, oop(addr)); - - result += block_offset + live_to_left; - assert(result <= addr, "object cannot move to the right"); - assert(result == chunk_calc_new_pointer(addr), "Should match"); - return result; -} - klassOop ParallelCompactData::calc_new_klass(klassOop old_klass) { klassOop updated_klass; if (PSParallelCompact::should_update_klass(old_klass)) { @@ -792,15 +585,14 @@ void ParallelCompactData::verify_clear() { - verify_clear(_chunk_vspace); - verify_clear(_block_vspace); + verify_clear(_region_vspace); } #endif // #ifdef ASSERT #ifdef NOT_PRODUCT -ParallelCompactData::ChunkData* debug_chunk(size_t chunk_index) { +ParallelCompactData::RegionData* debug_region(size_t region_index) { ParallelCompactData& sd = PSParallelCompact::summary_data(); - return sd.chunk(chunk_index); + return sd.region(region_index); } #endif @@ -953,10 +745,10 @@ const idx_t end_bit = BitMap::word_align_up(_mark_bitmap.addr_to_bit(top)); _mark_bitmap.clear_range(beg_bit, end_bit); - const size_t beg_chunk = _summary_data.addr_to_chunk_idx(bot); - const size_t end_chunk = - _summary_data.addr_to_chunk_idx(_summary_data.chunk_align_up(max_top)); - _summary_data.clear_range(beg_chunk, end_chunk); + const size_t beg_region = _summary_data.addr_to_region_idx(bot); + const size_t end_region = + _summary_data.addr_to_region_idx(_summary_data.region_align_up(max_top)); + _summary_data.clear_range(beg_region, end_region); } void PSParallelCompact::pre_compact(PreGCValues* pre_gc_values) @@ -1072,19 +864,19 @@ PSParallelCompact::compute_dense_prefix_via_density(const SpaceId id, bool maximum_compaction) { - const size_t chunk_size = ParallelCompactData::ChunkSize; + const size_t region_size = ParallelCompactData::RegionSize; const ParallelCompactData& sd = summary_data(); const MutableSpace* const space = _space_info[id].space(); - HeapWord* const top_aligned_up = sd.chunk_align_up(space->top()); - const ChunkData* const beg_cp = sd.addr_to_chunk_ptr(space->bottom()); - const ChunkData* const end_cp = sd.addr_to_chunk_ptr(top_aligned_up); - - // Skip full chunks at the beginning of the space--they are necessarily part + HeapWord* const top_aligned_up = sd.region_align_up(space->top()); + const RegionData* const beg_cp = sd.addr_to_region_ptr(space->bottom()); + const RegionData* const end_cp = sd.addr_to_region_ptr(top_aligned_up); + + // Skip full regions at the beginning of the space--they are necessarily part // of the dense prefix. size_t full_count = 0; - const ChunkData* cp; - for (cp = beg_cp; cp < end_cp && cp->data_size() == chunk_size; ++cp) { + const RegionData* cp; + for (cp = beg_cp; cp < end_cp && cp->data_size() == region_size; ++cp) { ++full_count; } @@ -1093,7 +885,7 @@ const bool interval_ended = gcs_since_max > HeapMaximumCompactionInterval; if (maximum_compaction || cp == end_cp || interval_ended) { _maximum_compaction_gc_num = total_invocations(); - return sd.chunk_to_addr(cp); + return sd.region_to_addr(cp); } HeapWord* const new_top = _space_info[id].new_top(); @@ -1116,52 +908,53 @@ } // XXX - Use binary search? - HeapWord* dense_prefix = sd.chunk_to_addr(cp); - const ChunkData* full_cp = cp; - const ChunkData* const top_cp = sd.addr_to_chunk_ptr(space->top() - 1); + HeapWord* dense_prefix = sd.region_to_addr(cp); + const RegionData* full_cp = cp; + const RegionData* const top_cp = sd.addr_to_region_ptr(space->top() - 1); while (cp < end_cp) { - HeapWord* chunk_destination = cp->destination(); - const size_t cur_deadwood = pointer_delta(dense_prefix, chunk_destination); + HeapWord* region_destination = cp->destination(); + const size_t cur_deadwood = pointer_delta(dense_prefix, region_destination); if (TraceParallelOldGCDensePrefix && Verbose) { tty->print_cr("c#=" SIZE_FORMAT_W(4) " dst=" PTR_FORMAT " " "dp=" SIZE_FORMAT_W(8) " " "cdw=" SIZE_FORMAT_W(8), - sd.chunk(cp), chunk_destination, + sd.region(cp), region_destination, dense_prefix, cur_deadwood); } if (cur_deadwood >= deadwood_goal) { - // Found the chunk that has the correct amount of deadwood to the left. - // This typically occurs after crossing a fairly sparse set of chunks, so - // iterate backwards over those sparse chunks, looking for the chunk that - // has the lowest density of live objects 'to the right.' - size_t space_to_left = sd.chunk(cp) * chunk_size; + // Found the region that has the correct amount of deadwood to the left. + // This typically occurs after crossing a fairly sparse set of regions, so + // iterate backwards over those sparse regions, looking for the region + // that has the lowest density of live objects 'to the right.' + size_t space_to_left = sd.region(cp) * region_size; size_t live_to_left = space_to_left - cur_deadwood; size_t space_to_right = space_capacity - space_to_left; size_t live_to_right = space_live - live_to_left; double density_to_right = double(live_to_right) / space_to_right; while (cp > full_cp) { --cp; - const size_t prev_chunk_live_to_right = live_to_right - cp->data_size(); - const size_t prev_chunk_space_to_right = space_to_right + chunk_size; - double prev_chunk_density_to_right = - double(prev_chunk_live_to_right) / prev_chunk_space_to_right; - if (density_to_right <= prev_chunk_density_to_right) { + const size_t prev_region_live_to_right = live_to_right - + cp->data_size(); + const size_t prev_region_space_to_right = space_to_right + region_size; + double prev_region_density_to_right = + double(prev_region_live_to_right) / prev_region_space_to_right; + if (density_to_right <= prev_region_density_to_right) { return dense_prefix; } if (TraceParallelOldGCDensePrefix && Verbose) { tty->print_cr("backing up from c=" SIZE_FORMAT_W(4) " d2r=%10.8f " - "pc_d2r=%10.8f", sd.chunk(cp), density_to_right, - prev_chunk_density_to_right); + "pc_d2r=%10.8f", sd.region(cp), density_to_right, + prev_region_density_to_right); } - dense_prefix -= chunk_size; - live_to_right = prev_chunk_live_to_right; - space_to_right = prev_chunk_space_to_right; - density_to_right = prev_chunk_density_to_right; + dense_prefix -= region_size; + live_to_right = prev_region_live_to_right; + space_to_right = prev_region_space_to_right; + density_to_right = prev_region_density_to_right; } return dense_prefix; } - dense_prefix += chunk_size; + dense_prefix += region_size; ++cp; } @@ -1174,8 +967,8 @@ const bool maximum_compaction, HeapWord* const addr) { - const size_t chunk_idx = summary_data().addr_to_chunk_idx(addr); - ChunkData* const cp = summary_data().chunk(chunk_idx); + const size_t region_idx = summary_data().addr_to_region_idx(addr); + RegionData* const cp = summary_data().region(region_idx); const MutableSpace* const space = _space_info[id].space(); HeapWord* const new_top = _space_info[id].new_top(); @@ -1191,7 +984,7 @@ "d2l=" SIZE_FORMAT " d2l%%=%6.4f " "d2r=" SIZE_FORMAT " l2r=" SIZE_FORMAT " ratio=%10.8f", - algorithm, addr, chunk_idx, + algorithm, addr, region_idx, space_live, dead_to_left, dead_to_left_pct, dead_to_right, live_to_right, @@ -1253,52 +1046,52 @@ return MAX2(limit, 0.0); } -ParallelCompactData::ChunkData* -PSParallelCompact::first_dead_space_chunk(const ChunkData* beg, - const ChunkData* end) +ParallelCompactData::RegionData* +PSParallelCompact::first_dead_space_region(const RegionData* beg, + const RegionData* end) { - const size_t chunk_size = ParallelCompactData::ChunkSize; + const size_t region_size = ParallelCompactData::RegionSize; ParallelCompactData& sd = summary_data(); - size_t left = sd.chunk(beg); - size_t right = end > beg ? sd.chunk(end) - 1 : left; + size_t left = sd.region(beg); + size_t right = end > beg ? sd.region(end) - 1 : left; // Binary search. while (left < right) { // Equivalent to (left + right) / 2, but does not overflow. const size_t middle = left + (right - left) / 2; - ChunkData* const middle_ptr = sd.chunk(middle); + RegionData* const middle_ptr = sd.region(middle); HeapWord* const dest = middle_ptr->destination(); - HeapWord* const addr = sd.chunk_to_addr(middle); + HeapWord* const addr = sd.region_to_addr(middle); assert(dest != NULL, "sanity"); assert(dest <= addr, "must move left"); if (middle > left && dest < addr) { right = middle - 1; - } else if (middle < right && middle_ptr->data_size() == chunk_size) { + } else if (middle < right && middle_ptr->data_size() == region_size) { left = middle + 1; } else { return middle_ptr; } } - return sd.chunk(left); + return sd.region(left); } -ParallelCompactData::ChunkData* -PSParallelCompact::dead_wood_limit_chunk(const ChunkData* beg, - const ChunkData* end, - size_t dead_words) +ParallelCompactData::RegionData* +PSParallelCompact::dead_wood_limit_region(const RegionData* beg, + const RegionData* end, + size_t dead_words) { ParallelCompactData& sd = summary_data(); - size_t left = sd.chunk(beg); - size_t right = end > beg ? sd.chunk(end) - 1 : left; + size_t left = sd.region(beg); + size_t right = end > beg ? sd.region(end) - 1 : left; // Binary search. while (left < right) { // Equivalent to (left + right) / 2, but does not overflow. const size_t middle = left + (right - left) / 2; - ChunkData* const middle_ptr = sd.chunk(middle); + RegionData* const middle_ptr = sd.region(middle); HeapWord* const dest = middle_ptr->destination(); - HeapWord* const addr = sd.chunk_to_addr(middle); + HeapWord* const addr = sd.region_to_addr(middle); assert(dest != NULL, "sanity"); assert(dest <= addr, "must move left"); @@ -1311,13 +1104,13 @@ return middle_ptr; } } - return sd.chunk(left); + return sd.region(left); } // The result is valid during the summary phase, after the initial summarization // of each space into itself, and before final summarization. inline double -PSParallelCompact::reclaimed_ratio(const ChunkData* const cp, +PSParallelCompact::reclaimed_ratio(const RegionData* const cp, HeapWord* const bottom, HeapWord* const top, HeapWord* const new_top) @@ -1331,12 +1124,13 @@ assert(top >= new_top, "summary data problem?"); assert(new_top > bottom, "space is empty; should not be here"); assert(new_top >= cp->destination(), "sanity"); - assert(top >= sd.chunk_to_addr(cp), "sanity"); + assert(top >= sd.region_to_addr(cp), "sanity"); HeapWord* const destination = cp->destination(); const size_t dense_prefix_live = pointer_delta(destination, bottom); const size_t compacted_region_live = pointer_delta(new_top, destination); - const size_t compacted_region_used = pointer_delta(top, sd.chunk_to_addr(cp)); + const size_t compacted_region_used = pointer_delta(top, + sd.region_to_addr(cp)); const size_t reclaimable = compacted_region_used - compacted_region_live; const double divisor = dense_prefix_live + 1.25 * compacted_region_live; @@ -1344,39 +1138,40 @@ } // Return the address of the end of the dense prefix, a.k.a. the start of the -// compacted region. The address is always on a chunk boundary. +// compacted region. The address is always on a region boundary. // -// Completely full chunks at the left are skipped, since no compaction can occur -// in those chunks. Then the maximum amount of dead wood to allow is computed, -// based on the density (amount live / capacity) of the generation; the chunk -// with approximately that amount of dead space to the left is identified as the -// limit chunk. Chunks between the last completely full chunk and the limit -// chunk are scanned and the one that has the best (maximum) reclaimed_ratio() -// is selected. +// Completely full regions at the left are skipped, since no compaction can +// occur in those regions. Then the maximum amount of dead wood to allow is +// computed, based on the density (amount live / capacity) of the generation; +// the region with approximately that amount of dead space to the left is +// identified as the limit region. Regions between the last completely full +// region and the limit region are scanned and the one that has the best +// (maximum) reclaimed_ratio() is selected. HeapWord* PSParallelCompact::compute_dense_prefix(const SpaceId id, bool maximum_compaction) { - const size_t chunk_size = ParallelCompactData::ChunkSize; + const size_t region_size = ParallelCompactData::RegionSize; const ParallelCompactData& sd = summary_data(); const MutableSpace* const space = _space_info[id].space(); HeapWord* const top = space->top(); - HeapWord* const top_aligned_up = sd.chunk_align_up(top); + HeapWord* const top_aligned_up = sd.region_align_up(top); HeapWord* const new_top = _space_info[id].new_top(); - HeapWord* const new_top_aligned_up = sd.chunk_align_up(new_top); + HeapWord* const new_top_aligned_up = sd.region_align_up(new_top); HeapWord* const bottom = space->bottom(); - const ChunkData* const beg_cp = sd.addr_to_chunk_ptr(bottom); - const ChunkData* const top_cp = sd.addr_to_chunk_ptr(top_aligned_up); - const ChunkData* const new_top_cp = sd.addr_to_chunk_ptr(new_top_aligned_up); - - // Skip full chunks at the beginning of the space--they are necessarily part + const RegionData* const beg_cp = sd.addr_to_region_ptr(bottom); + const RegionData* const top_cp = sd.addr_to_region_ptr(top_aligned_up); + const RegionData* const new_top_cp = + sd.addr_to_region_ptr(new_top_aligned_up); + + // Skip full regions at the beginning of the space--they are necessarily part // of the dense prefix. - const ChunkData* const full_cp = first_dead_space_chunk(beg_cp, new_top_cp); - assert(full_cp->destination() == sd.chunk_to_addr(full_cp) || + const RegionData* const full_cp = first_dead_space_region(beg_cp, new_top_cp); + assert(full_cp->destination() == sd.region_to_addr(full_cp) || space->is_empty(), "no dead space allowed to the left"); - assert(full_cp->data_size() < chunk_size || full_cp == new_top_cp - 1, - "chunk must have dead space"); + assert(full_cp->data_size() < region_size || full_cp == new_top_cp - 1, + "region must have dead space"); // The gc number is saved whenever a maximum compaction is done, and used to // determine when the maximum compaction interval has expired. This avoids @@ -1387,7 +1182,7 @@ total_invocations() == HeapFirstMaximumCompactionCount; if (maximum_compaction || full_cp == top_cp || interval_ended) { _maximum_compaction_gc_num = total_invocations(); - return sd.chunk_to_addr(full_cp); + return sd.region_to_addr(full_cp); } const size_t space_live = pointer_delta(new_top, bottom); @@ -1413,15 +1208,15 @@ dead_wood_max, dead_wood_limit); } - // Locate the chunk with the desired amount of dead space to the left. - const ChunkData* const limit_cp = - dead_wood_limit_chunk(full_cp, top_cp, dead_wood_limit); - - // Scan from the first chunk with dead space to the limit chunk and find the + // Locate the region with the desired amount of dead space to the left. + const RegionData* const limit_cp = + dead_wood_limit_region(full_cp, top_cp, dead_wood_limit); + + // Scan from the first region with dead space to the limit region and find the // one with the best (largest) reclaimed ratio. double best_ratio = 0.0; - const ChunkData* best_cp = full_cp; - for (const ChunkData* cp = full_cp; cp < limit_cp; ++cp) { + const RegionData* best_cp = full_cp; + for (const RegionData* cp = full_cp; cp < limit_cp; ++cp) { double tmp_ratio = reclaimed_ratio(cp, bottom, top, new_top); if (tmp_ratio > best_ratio) { best_cp = cp; @@ -1430,18 +1225,18 @@ } #if 0 - // Something to consider: if the chunk with the best ratio is 'close to' the - // first chunk w/free space, choose the first chunk with free space - // ("first-free"). The first-free chunk is usually near the start of the + // Something to consider: if the region with the best ratio is 'close to' the + // first region w/free space, choose the first region with free space + // ("first-free"). The first-free region is usually near the start of the // heap, which means we are copying most of the heap already, so copy a bit // more to get complete compaction. - if (pointer_delta(best_cp, full_cp, sizeof(ChunkData)) < 4) { + if (pointer_delta(best_cp, full_cp, sizeof(RegionData)) < 4) { _maximum_compaction_gc_num = total_invocations(); best_cp = full_cp; } #endif // #if 0 - return sd.chunk_to_addr(best_cp); + return sd.region_to_addr(best_cp); } void PSParallelCompact::summarize_spaces_quick() @@ -1459,9 +1254,9 @@ void PSParallelCompact::fill_dense_prefix_end(SpaceId id) { HeapWord* const dense_prefix_end = dense_prefix(id); - const ChunkData* chunk = _summary_data.addr_to_chunk_ptr(dense_prefix_end); + const RegionData* region = _summary_data.addr_to_region_ptr(dense_prefix_end); const idx_t dense_prefix_bit = _mark_bitmap.addr_to_bit(dense_prefix_end); - if (dead_space_crosses_boundary(chunk, dense_prefix_bit)) { + if (dead_space_crosses_boundary(region, dense_prefix_bit)) { // Only enough dead space is filled so that any remaining dead space to the // left is larger than the minimum filler object. (The remainder is filled // during the copy/update phase.) @@ -1552,7 +1347,7 @@ fill_dense_prefix_end(id); } - // Compute the destination of each Chunk, and thus each object. + // Compute the destination of each Region, and thus each object. _summary_data.summarize_dense_prefix(space->bottom(), dense_prefix_end); _summary_data.summarize(dense_prefix_end, space->end(), dense_prefix_end, space->top(), @@ -1560,19 +1355,19 @@ } if (TraceParallelOldGCSummaryPhase) { - const size_t chunk_size = ParallelCompactData::ChunkSize; + const size_t region_size = ParallelCompactData::RegionSize; HeapWord* const dense_prefix_end = _space_info[id].dense_prefix(); - const size_t dp_chunk = _summary_data.addr_to_chunk_idx(dense_prefix_end); + const size_t dp_region = _summary_data.addr_to_region_idx(dense_prefix_end); const size_t dp_words = pointer_delta(dense_prefix_end, space->bottom()); HeapWord* const new_top = _space_info[id].new_top(); - const HeapWord* nt_aligned_up = _summary_data.chunk_align_up(new_top); + const HeapWord* nt_aligned_up = _summary_data.region_align_up(new_top); const size_t cr_words = pointer_delta(nt_aligned_up, dense_prefix_end); tty->print_cr("id=%d cap=" SIZE_FORMAT " dp=" PTR_FORMAT " " - "dp_chunk=" SIZE_FORMAT " " "dp_count=" SIZE_FORMAT " " + "dp_region=" SIZE_FORMAT " " "dp_count=" SIZE_FORMAT " " "cr_count=" SIZE_FORMAT " " "nt=" PTR_FORMAT, id, space->capacity_in_words(), dense_prefix_end, - dp_chunk, dp_words / chunk_size, - cr_words / chunk_size, new_top); + dp_region, dp_words / region_size, + cr_words / region_size, new_top); } } @@ -1584,11 +1379,6 @@ // trace("2"); #ifdef ASSERT - if (VerifyParallelOldWithMarkSweep && - (PSParallelCompact::total_invocations() % - VerifyParallelOldWithMarkSweepInterval) == 0) { - verify_mark_bitmap(_mark_bitmap); - } if (TraceParallelOldGCMarkingPhase) { tty->print_cr("add_obj_count=" SIZE_FORMAT " " "add_obj_bytes=" SIZE_FORMAT, @@ -1605,7 +1395,7 @@ if (TraceParallelOldGCSummaryPhase) { tty->print_cr("summary_phase: after summarizing each space to self"); Universe::print(); - NOT_PRODUCT(print_chunk_ranges()); + NOT_PRODUCT(print_region_ranges()); if (Verbose) { NOT_PRODUCT(print_initial_summary_data(_summary_data, _space_info)); } @@ -1651,14 +1441,15 @@ space->bottom(), space->top(), new_top_addr); - // Clear the source_chunk field for each chunk in the space. + // Clear the source_region field for each region in the space. HeapWord* const new_top = _space_info[id].new_top(); - HeapWord* const clear_end = _summary_data.chunk_align_up(new_top); - ChunkData* beg_chunk = _summary_data.addr_to_chunk_ptr(space->bottom()); - ChunkData* end_chunk = _summary_data.addr_to_chunk_ptr(clear_end); - while (beg_chunk < end_chunk) { - beg_chunk->set_source_chunk(0); - ++beg_chunk; + HeapWord* const clear_end = _summary_data.region_align_up(new_top); + RegionData* beg_region = + _summary_data.addr_to_region_ptr(space->bottom()); + RegionData* end_region = _summary_data.addr_to_region_ptr(clear_end); + while (beg_region < end_region) { + beg_region->set_source_region(0); + ++beg_region; } // Reset the new_top value for the space. @@ -1666,243 +1457,16 @@ } } - // Fill in the block data after any changes to the chunks have - // been made. -#ifdef ASSERT - summarize_blocks(cm, perm_space_id); - summarize_blocks(cm, old_space_id); -#else - if (!UseParallelOldGCChunkPointerCalc) { - summarize_blocks(cm, perm_space_id); - summarize_blocks(cm, old_space_id); - } -#endif - if (TraceParallelOldGCSummaryPhase) { tty->print_cr("summary_phase: after final summarization"); Universe::print(); - NOT_PRODUCT(print_chunk_ranges()); + NOT_PRODUCT(print_region_ranges()); if (Verbose) { NOT_PRODUCT(print_generic_summary_data(_summary_data, _space_info)); } } } -// Fill in the BlockData. -// Iterate over the spaces and within each space iterate over -// the chunks and fill in the BlockData for each chunk. - -void PSParallelCompact::summarize_blocks(ParCompactionManager* cm, - SpaceId first_compaction_space_id) { -#if 0 - DEBUG_ONLY(ParallelCompactData::BlockData::set_cur_phase(1);) - for (SpaceId cur_space_id = first_compaction_space_id; - cur_space_id != last_space_id; - cur_space_id = next_compaction_space_id(cur_space_id)) { - // Iterate over the chunks in the space - size_t start_chunk_index = - _summary_data.addr_to_chunk_idx(space(cur_space_id)->bottom()); - BitBlockUpdateClosure bbu(mark_bitmap(), - cm, - start_chunk_index); - // Iterate over blocks. - for (size_t chunk_index = start_chunk_index; - chunk_index < _summary_data.chunk_count() && - _summary_data.chunk_to_addr(chunk_index) < space(cur_space_id)->top(); - chunk_index++) { - - // Reset the closure for the new chunk. Note that the closure - // maintains some data that does not get reset for each chunk - // so a new instance of the closure is no appropriate. - bbu.reset_chunk(chunk_index); - - // Start the iteration with the first live object. This - // may return the end of the chunk. That is acceptable since - // it will properly limit the iterations. - ParMarkBitMap::idx_t left_offset = mark_bitmap()->addr_to_bit( - _summary_data.first_live_or_end_in_chunk(chunk_index)); - - // End the iteration at the end of the chunk. - HeapWord* chunk_addr = _summary_data.chunk_to_addr(chunk_index); - HeapWord* chunk_end = chunk_addr + ParallelCompactData::ChunkSize; - ParMarkBitMap::idx_t right_offset = - mark_bitmap()->addr_to_bit(chunk_end); - - // Blocks that have not objects starting in them can be - // skipped because their data will never be used. - if (left_offset < right_offset) { - - // Iterate through the objects in the chunk. - ParMarkBitMap::idx_t last_offset = - mark_bitmap()->pair_iterate(&bbu, left_offset, right_offset); - - // If last_offset is less than right_offset, then the iterations - // terminated while it was looking for an end bit. "last_offset" - // is then the offset for the last start bit. In this situation - // the "offset" field for the next block to the right (_cur_block + 1) - // will not have been update although there may be live data - // to the left of the chunk. - - size_t cur_block_plus_1 = bbu.cur_block() + 1; - HeapWord* cur_block_plus_1_addr = - _summary_data.block_to_addr(bbu.cur_block()) + - ParallelCompactData::BlockSize; - HeapWord* last_offset_addr = mark_bitmap()->bit_to_addr(last_offset); - #if 1 // This code works. The else doesn't but should. Why does it? - // The current block (cur_block()) has already been updated. - // The last block that may need to be updated is either the - // next block (current block + 1) or the block where the - // last object starts (which can be greater than the - // next block if there were no objects found in intervening - // blocks). - size_t last_block = - MAX2(bbu.cur_block() + 1, - _summary_data.addr_to_block_idx(last_offset_addr)); - #else - // The current block has already been updated. The only block - // that remains to be updated is the block where the last - // object in the chunk starts. - size_t last_block = _summary_data.addr_to_block_idx(last_offset_addr); - #endif - assert_bit_is_start(last_offset); - assert((last_block == _summary_data.block_count()) || - (_summary_data.block(last_block)->raw_offset() == 0), - "Should not have been set"); - // Is the last block still in the current chunk? If still - // in this chunk, update the last block (the counting that - // included the current block is meant for the offset of the last - // block). If not in this chunk, do nothing. Should not - // update a block in the next chunk. - if (ParallelCompactData::chunk_contains_block(bbu.chunk_index(), - last_block)) { - if (last_offset < right_offset) { - // The last object started in this chunk but ends beyond - // this chunk. Update the block for this last object. - assert(mark_bitmap()->is_marked(last_offset), "Should be marked"); - // No end bit was found. The closure takes care of - // the cases where - // an objects crosses over into the next block - // an objects starts and ends in the next block - // It does not handle the case where an object is - // the first object in a later block and extends - // past the end of the chunk (i.e., the closure - // only handles complete objects that are in the range - // it is given). That object is handed back here - // for any special consideration necessary. - // - // Is the first bit in the last block a start or end bit? - // - // If the partial object ends in the last block L, - // then the 1st bit in L may be an end bit. - // - // Else does the last object start in a block after the current - // block? A block AA will already have been updated if an - // object ends in the next block AA+1. An object found to end in - // the AA+1 is the trigger that updates AA. Objects are being - // counted in the current block for updaing a following - // block. An object may start in later block - // block but may extend beyond the last block in the chunk. - // Updates are only done when the end of an object has been - // found. If the last object (covered by block L) starts - // beyond the current block, then no object ends in L (otherwise - // L would be the current block). So the first bit in L is - // a start bit. - // - // Else the last objects start in the current block and ends - // beyond the chunk. The current block has already been - // updated and there is no later block (with an object - // starting in it) that needs to be updated. - // - if (_summary_data.partial_obj_ends_in_block(last_block)) { - _summary_data.block(last_block)->set_end_bit_offset( - bbu.live_data_left()); - } else if (last_offset_addr >= cur_block_plus_1_addr) { - // The start of the object is on a later block - // (to the right of the current block and there are no - // complete live objects to the left of this last object - // within the chunk. - // The first bit in the block is for the start of the - // last object. - _summary_data.block(last_block)->set_start_bit_offset( - bbu.live_data_left()); - } else { - // The start of the last object was found in - // the current chunk (which has already - // been updated). - assert(bbu.cur_block() == - _summary_data.addr_to_block_idx(last_offset_addr), - "Should be a block already processed"); - } -#ifdef ASSERT - // Is there enough block information to find this object? - // The destination of the chunk has not been set so the - // values returned by calc_new_pointer() and - // block_calc_new_pointer() will only be - // offsets. But they should agree. - HeapWord* moved_obj_with_chunks = - _summary_data.chunk_calc_new_pointer(last_offset_addr); - HeapWord* moved_obj_with_blocks = - _summary_data.calc_new_pointer(last_offset_addr); - assert(moved_obj_with_chunks == moved_obj_with_blocks, - "Block calculation is wrong"); -#endif - } else if (last_block < _summary_data.block_count()) { - // Iterations ended looking for a start bit (but - // did not run off the end of the block table). - _summary_data.block(last_block)->set_start_bit_offset( - bbu.live_data_left()); - } - } -#ifdef ASSERT - // Is there enough block information to find this object? - HeapWord* left_offset_addr = mark_bitmap()->bit_to_addr(left_offset); - HeapWord* moved_obj_with_chunks = - _summary_data.calc_new_pointer(left_offset_addr); - HeapWord* moved_obj_with_blocks = - _summary_data.calc_new_pointer(left_offset_addr); - assert(moved_obj_with_chunks == moved_obj_with_blocks, - "Block calculation is wrong"); -#endif - - // Is there another block after the end of this chunk? -#ifdef ASSERT - if (last_block < _summary_data.block_count()) { - // No object may have been found in a block. If that - // block is at the end of the chunk, the iteration will - // terminate without incrementing the current block so - // that the current block is not the last block in the - // chunk. That situation precludes asserting that the - // current block is the last block in the chunk. Assert - // the lesser condition that the current block does not - // exceed the chunk. - assert(_summary_data.block_to_addr(last_block) <= - (_summary_data.chunk_to_addr(chunk_index) + - ParallelCompactData::ChunkSize), - "Chunk and block inconsistency"); - assert(last_offset <= right_offset, "Iteration over ran end"); - } -#endif - } -#ifdef ASSERT - if (PrintGCDetails && Verbose) { - if (_summary_data.chunk(chunk_index)->partial_obj_size() == 1) { - size_t first_block = - chunk_index / ParallelCompactData::BlocksPerChunk; - gclog_or_tty->print_cr("first_block " PTR_FORMAT - " _offset " PTR_FORMAT - "_first_is_start_bit %d", - first_block, - _summary_data.block(first_block)->raw_offset(), - _summary_data.block(first_block)->first_is_start_bit()); - } - } -#endif - } - } - DEBUG_ONLY(ParallelCompactData::BlockData::set_cur_phase(16);) -#endif // #if 0 -} - // This method should contain all heap-specific policy for invoking a full // collection. invoke_no_policy() will only attempt to compact the heap; it // will do nothing further. If we need to bail out for policy reasons, scavenge @@ -1937,18 +1501,9 @@ } } -bool ParallelCompactData::chunk_contains(size_t chunk_index, HeapWord* addr) { - size_t addr_chunk_index = addr_to_chunk_idx(addr); - return chunk_index == addr_chunk_index; -} - -bool ParallelCompactData::chunk_contains_block(size_t chunk_index, - size_t block_index) { - size_t first_block_in_chunk = chunk_index * BlocksPerChunk; - size_t last_block_in_chunk = (chunk_index + 1) * BlocksPerChunk - 1; - - return (first_block_in_chunk <= block_index) && - (block_index <= last_block_in_chunk); +bool ParallelCompactData::region_contains(size_t region_index, HeapWord* addr) { + size_t addr_region_index = addr_to_region_idx(addr); + return region_index == addr_region_index; } // This method contains no policy. You should probably @@ -2038,39 +1593,9 @@ } #endif // #ifndef PRODUCT -#ifdef ASSERT - if (VerifyParallelOldWithMarkSweep && - (PSParallelCompact::total_invocations() % - VerifyParallelOldWithMarkSweepInterval) == 0) { - gclog_or_tty->print_cr("Verify marking with mark_sweep_phase1()"); - if (PrintGCDetails && Verbose) { - gclog_or_tty->print_cr("mark_sweep_phase1:"); - } - // Clear the discovered lists so that discovered objects - // don't look like they have been discovered twice. - ref_processor()->clear_discovered_references(); - - PSMarkSweep::allocate_stacks(); - MemRegion mr = Universe::heap()->reserved_region(); - PSMarkSweep::ref_processor()->enable_discovery(); - PSMarkSweep::mark_sweep_phase1(maximum_heap_compaction); - } -#endif - bool max_on_system_gc = UseMaximumCompactionOnSystemGC && is_system_gc; summary_phase(vmthread_cm, maximum_heap_compaction || max_on_system_gc); -#ifdef ASSERT - if (VerifyParallelOldWithMarkSweep && - (PSParallelCompact::total_invocations() % - VerifyParallelOldWithMarkSweepInterval) == 0) { - if (PrintGCDetails && Verbose) { - gclog_or_tty->print_cr("mark_sweep_phase2:"); - } - PSMarkSweep::mark_sweep_phase2(); - } -#endif - COMPILER2_PRESENT(assert(DerivedPointerTable::is_active(), "Sanity")); COMPILER2_PRESENT(DerivedPointerTable::set_active(false)); @@ -2078,28 +1603,6 @@ // needed by the compaction for filling holes in the dense prefix. adjust_roots(); -#ifdef ASSERT - if (VerifyParallelOldWithMarkSweep && - (PSParallelCompact::total_invocations() % - VerifyParallelOldWithMarkSweepInterval) == 0) { - // Do a separate verify phase so that the verify - // code can use the the forwarding pointers to - // check the new pointer calculation. The restore_marks() - // has to be done before the real compact. - vmthread_cm->set_action(ParCompactionManager::VerifyUpdate); - compact_perm(vmthread_cm); - compact_serial(vmthread_cm); - vmthread_cm->set_action(ParCompactionManager::ResetObjects); - compact_perm(vmthread_cm); - compact_serial(vmthread_cm); - vmthread_cm->set_action(ParCompactionManager::UpdateAndCopy); - - // For debugging only - PSMarkSweep::restore_marks(); - PSMarkSweep::deallocate_stacks(); - } -#endif - compaction_start.update(); // Does the perm gen always have to be done serially because // klasses are used in the update of an object? @@ -2349,7 +1852,7 @@ ParallelScavengeHeap* heap = gc_heap(); uint parallel_gc_threads = heap->gc_task_manager()->workers(); - TaskQueueSetSuper* qset = ParCompactionManager::chunk_array(); + TaskQueueSetSuper* qset = ParCompactionManager::region_array(); ParallelTaskTerminator terminator(parallel_gc_threads, qset); PSParallelCompact::MarkAndPushClosure mark_and_push_closure(cm); @@ -2487,8 +1990,9 @@ move_and_update(cm, perm_space_id); } -void PSParallelCompact::enqueue_chunk_draining_tasks(GCTaskQueue* q, - uint parallel_gc_threads) { +void PSParallelCompact::enqueue_region_draining_tasks(GCTaskQueue* q, + uint parallel_gc_threads) +{ TraceTime tm("drain task setup", print_phases(), true, gclog_or_tty); const unsigned int task_count = MAX2(parallel_gc_threads, 1U); @@ -2496,13 +2000,13 @@ q->enqueue(new DrainStacksCompactionTask()); } - // Find all chunks that are available (can be filled immediately) and + // Find all regions that are available (can be filled immediately) and // distribute them to the thread stacks. The iteration is done in reverse - // order (high to low) so the chunks will be removed in ascending order. + // order (high to low) so the regions will be removed in ascending order. const ParallelCompactData& sd = PSParallelCompact::summary_data(); - size_t fillable_chunks = 0; // A count for diagnostic purposes. + size_t fillable_regions = 0; // A count for diagnostic purposes. unsigned int which = 0; // The worker thread number. for (unsigned int id = to_space_id; id > perm_space_id; --id) { @@ -2510,25 +2014,26 @@ MutableSpace* const space = space_info->space(); HeapWord* const new_top = space_info->new_top(); - const size_t beg_chunk = sd.addr_to_chunk_idx(space_info->dense_prefix()); - const size_t end_chunk = sd.addr_to_chunk_idx(sd.chunk_align_up(new_top)); - assert(end_chunk > 0, "perm gen cannot be empty"); - - for (size_t cur = end_chunk - 1; cur >= beg_chunk; --cur) { - if (sd.chunk(cur)->claim_unsafe()) { + const size_t beg_region = sd.addr_to_region_idx(space_info->dense_prefix()); + const size_t end_region = + sd.addr_to_region_idx(sd.region_align_up(new_top)); + assert(end_region > 0, "perm gen cannot be empty"); + + for (size_t cur = end_region - 1; cur >= beg_region; --cur) { + if (sd.region(cur)->claim_unsafe()) { ParCompactionManager* cm = ParCompactionManager::manager_array(which); cm->save_for_processing(cur); if (TraceParallelOldGCCompactionPhase && Verbose) { - const size_t count_mod_8 = fillable_chunks & 7; + const size_t count_mod_8 = fillable_regions & 7; if (count_mod_8 == 0) gclog_or_tty->print("fillable: "); gclog_or_tty->print(" " SIZE_FORMAT_W(7), cur); if (count_mod_8 == 7) gclog_or_tty->cr(); } - NOT_PRODUCT(++fillable_chunks;) - - // Assign chunks to threads in round-robin fashion. + NOT_PRODUCT(++fillable_regions;) + + // Assign regions to threads in round-robin fashion. if (++which == task_count) { which = 0; } @@ -2537,8 +2042,8 @@ } if (TraceParallelOldGCCompactionPhase) { - if (Verbose && (fillable_chunks & 7) != 0) gclog_or_tty->cr(); - gclog_or_tty->print_cr("%u initially fillable chunks", fillable_chunks); + if (Verbose && (fillable_regions & 7) != 0) gclog_or_tty->cr(); + gclog_or_tty->print_cr("%u initially fillable regions", fillable_regions); } } @@ -2551,7 +2056,7 @@ ParallelCompactData& sd = PSParallelCompact::summary_data(); // Iterate over all the spaces adding tasks for updating - // chunks in the dense prefix. Assume that 1 gc thread + // regions in the dense prefix. Assume that 1 gc thread // will work on opening the gaps and the remaining gc threads // will work on the dense prefix. SpaceId space_id = old_space_id; @@ -2565,30 +2070,31 @@ continue; } - // The dense prefix is before this chunk. - size_t chunk_index_end_dense_prefix = - sd.addr_to_chunk_idx(dense_prefix_end); - ChunkData* const dense_prefix_cp = sd.chunk(chunk_index_end_dense_prefix); + // The dense prefix is before this region. + size_t region_index_end_dense_prefix = + sd.addr_to_region_idx(dense_prefix_end); + RegionData* const dense_prefix_cp = + sd.region(region_index_end_dense_prefix); assert(dense_prefix_end == space->end() || dense_prefix_cp->available() || dense_prefix_cp->claimed(), - "The chunk after the dense prefix should always be ready to fill"); - - size_t chunk_index_start = sd.addr_to_chunk_idx(space->bottom()); + "The region after the dense prefix should always be ready to fill"); + + size_t region_index_start = sd.addr_to_region_idx(space->bottom()); // Is there dense prefix work? - size_t total_dense_prefix_chunks = - chunk_index_end_dense_prefix - chunk_index_start; - // How many chunks of the dense prefix should be given to + size_t total_dense_prefix_regions = + region_index_end_dense_prefix - region_index_start; + // How many regions of the dense prefix should be given to // each thread? - if (total_dense_prefix_chunks > 0) { + if (total_dense_prefix_regions > 0) { uint tasks_for_dense_prefix = 1; if (UseParallelDensePrefixUpdate) { - if (total_dense_prefix_chunks <= + if (total_dense_prefix_regions <= (parallel_gc_threads * PAR_OLD_DENSE_PREFIX_OVER_PARTITIONING)) { // Don't over partition. This assumes that // PAR_OLD_DENSE_PREFIX_OVER_PARTITIONING is a small integer value - // so there are not many chunks to process. + // so there are not many regions to process. tasks_for_dense_prefix = parallel_gc_threads; } else { // Over partition @@ -2596,50 +2102,50 @@ PAR_OLD_DENSE_PREFIX_OVER_PARTITIONING; } } - size_t chunks_per_thread = total_dense_prefix_chunks / + size_t regions_per_thread = total_dense_prefix_regions / tasks_for_dense_prefix; - // Give each thread at least 1 chunk. - if (chunks_per_thread == 0) { - chunks_per_thread = 1; + // Give each thread at least 1 region. + if (regions_per_thread == 0) { + regions_per_thread = 1; } for (uint k = 0; k < tasks_for_dense_prefix; k++) { - if (chunk_index_start >= chunk_index_end_dense_prefix) { + if (region_index_start >= region_index_end_dense_prefix) { break; } - // chunk_index_end is not processed - size_t chunk_index_end = MIN2(chunk_index_start + chunks_per_thread, - chunk_index_end_dense_prefix); + // region_index_end is not processed + size_t region_index_end = MIN2(region_index_start + regions_per_thread, + region_index_end_dense_prefix); q->enqueue(new UpdateDensePrefixTask( space_id, - chunk_index_start, - chunk_index_end)); - chunk_index_start = chunk_index_end; + region_index_start, + region_index_end)); + region_index_start = region_index_end; } } // This gets any part of the dense prefix that did not // fit evenly. - if (chunk_index_start < chunk_index_end_dense_prefix) { + if (region_index_start < region_index_end_dense_prefix) { q->enqueue(new UpdateDensePrefixTask( space_id, - chunk_index_start, - chunk_index_end_dense_prefix)); + region_index_start, + region_index_end_dense_prefix)); } space_id = next_compaction_space_id(space_id); } // End tasks for dense prefix } -void PSParallelCompact::enqueue_chunk_stealing_tasks( +void PSParallelCompact::enqueue_region_stealing_tasks( GCTaskQueue* q, ParallelTaskTerminator* terminator_ptr, uint parallel_gc_threads) { TraceTime tm("steal task setup", print_phases(), true, gclog_or_tty); - // Once a thread has drained it's stack, it should try to steal chunks from + // Once a thread has drained it's stack, it should try to steal regions from // other threads. if (parallel_gc_threads > 1) { for (uint j = 0; j < parallel_gc_threads; j++) { - q->enqueue(new StealChunkCompactionTask(terminator_ptr)); + q->enqueue(new StealRegionCompactionTask(terminator_ptr)); } } } @@ -2654,13 +2160,13 @@ PSOldGen* old_gen = heap->old_gen(); old_gen->start_array()->reset(); uint parallel_gc_threads = heap->gc_task_manager()->workers(); - TaskQueueSetSuper* qset = ParCompactionManager::chunk_array(); + TaskQueueSetSuper* qset = ParCompactionManager::region_array(); ParallelTaskTerminator terminator(parallel_gc_threads, qset); GCTaskQueue* q = GCTaskQueue::create(); - enqueue_chunk_draining_tasks(q, parallel_gc_threads); + enqueue_region_draining_tasks(q, parallel_gc_threads); enqueue_dense_prefix_tasks(q, parallel_gc_threads); - enqueue_chunk_stealing_tasks(q, &terminator, parallel_gc_threads); + enqueue_region_stealing_tasks(q, &terminator, parallel_gc_threads); { TraceTime tm_pc("par compact", print_phases(), true, gclog_or_tty); @@ -2676,9 +2182,9 @@ WaitForBarrierGCTask::destroy(fin); #ifdef ASSERT - // Verify that all chunks have been processed before the deferred updates. + // Verify that all regions have been processed before the deferred updates. // Note that perm_space_id is skipped; this type of verification is not - // valid until the perm gen is compacted by chunks. + // valid until the perm gen is compacted by regions. for (unsigned int id = old_space_id; id < last_space_id; ++id) { verify_complete(SpaceId(id)); } @@ -2697,42 +2203,42 @@ #ifdef ASSERT void PSParallelCompact::verify_complete(SpaceId space_id) { - // All Chunks between space bottom() to new_top() should be marked as filled - // and all Chunks between new_top() and top() should be available (i.e., + // All Regions between space bottom() to new_top() should be marked as filled + // and all Regions between new_top() and top() should be available (i.e., // should have been emptied). ParallelCompactData& sd = summary_data(); SpaceInfo si = _space_info[space_id]; - HeapWord* new_top_addr = sd.chunk_align_up(si.new_top()); - HeapWord* old_top_addr = sd.chunk_align_up(si.space()->top()); - const size_t beg_chunk = sd.addr_to_chunk_idx(si.space()->bottom()); - const size_t new_top_chunk = sd.addr_to_chunk_idx(new_top_addr); - const size_t old_top_chunk = sd.addr_to_chunk_idx(old_top_addr); + HeapWord* new_top_addr = sd.region_align_up(si.new_top()); + HeapWord* old_top_addr = sd.region_align_up(si.space()->top()); + const size_t beg_region = sd.addr_to_region_idx(si.space()->bottom()); + const size_t new_top_region = sd.addr_to_region_idx(new_top_addr); + const size_t old_top_region = sd.addr_to_region_idx(old_top_addr); bool issued_a_warning = false; - size_t cur_chunk; - for (cur_chunk = beg_chunk; cur_chunk < new_top_chunk; ++cur_chunk) { - const ChunkData* const c = sd.chunk(cur_chunk); + size_t cur_region; + for (cur_region = beg_region; cur_region < new_top_region; ++cur_region) { + const RegionData* const c = sd.region(cur_region); if (!c->completed()) { - warning("chunk " SIZE_FORMAT " not filled: " + warning("region " SIZE_FORMAT " not filled: " "destination_count=" SIZE_FORMAT, - cur_chunk, c->destination_count()); + cur_region, c->destination_count()); issued_a_warning = true; } } - for (cur_chunk = new_top_chunk; cur_chunk < old_top_chunk; ++cur_chunk) { - const ChunkData* const c = sd.chunk(cur_chunk); + for (cur_region = new_top_region; cur_region < old_top_region; ++cur_region) { + const RegionData* const c = sd.region(cur_region); if (!c->available()) { - warning("chunk " SIZE_FORMAT " not empty: " + warning("region " SIZE_FORMAT " not empty: " "destination_count=" SIZE_FORMAT, - cur_chunk, c->destination_count()); + cur_region, c->destination_count()); issued_a_warning = true; } } if (issued_a_warning) { - print_chunk_ranges(); + print_region_ranges(); } } #endif // #ifdef ASSERT @@ -2933,46 +2439,47 @@ } #endif //VALIDATE_MARK_SWEEP -// Update interior oops in the ranges of chunks [beg_chunk, end_chunk). +// Update interior oops in the ranges of regions [beg_region, end_region). void PSParallelCompact::update_and_deadwood_in_dense_prefix(ParCompactionManager* cm, SpaceId space_id, - size_t beg_chunk, - size_t end_chunk) { + size_t beg_region, + size_t end_region) { ParallelCompactData& sd = summary_data(); ParMarkBitMap* const mbm = mark_bitmap(); - HeapWord* beg_addr = sd.chunk_to_addr(beg_chunk); - HeapWord* const end_addr = sd.chunk_to_addr(end_chunk); - assert(beg_chunk <= end_chunk, "bad chunk range"); + HeapWord* beg_addr = sd.region_to_addr(beg_region); + HeapWord* const end_addr = sd.region_to_addr(end_region); + assert(beg_region <= end_region, "bad region range"); assert(end_addr <= dense_prefix(space_id), "not in the dense prefix"); #ifdef ASSERT - // Claim the chunks to avoid triggering an assert when they are marked as + // Claim the regions to avoid triggering an assert when they are marked as // filled. - for (size_t claim_chunk = beg_chunk; claim_chunk < end_chunk; ++claim_chunk) { - assert(sd.chunk(claim_chunk)->claim_unsafe(), "claim() failed"); + for (size_t claim_region = beg_region; claim_region < end_region; ++claim_region) { + assert(sd.region(claim_region)->claim_unsafe(), "claim() failed"); } #endif // #ifdef ASSERT if (beg_addr != space(space_id)->bottom()) { // Find the first live object or block of dead space that *starts* in this - // range of chunks. If a partial object crosses onto the chunk, skip it; it - // will be marked for 'deferred update' when the object head is processed. - // If dead space crosses onto the chunk, it is also skipped; it will be - // filled when the prior chunk is processed. If neither of those apply, the - // first word in the chunk is the start of a live object or dead space. + // range of regions. If a partial object crosses onto the region, skip it; + // it will be marked for 'deferred update' when the object head is + // processed. If dead space crosses onto the region, it is also skipped; it + // will be filled when the prior region is processed. If neither of those + // apply, the first word in the region is the start of a live object or dead + // space. assert(beg_addr > space(space_id)->bottom(), "sanity"); - const ChunkData* const cp = sd.chunk(beg_chunk); + const RegionData* const cp = sd.region(beg_region); if (cp->partial_obj_size() != 0) { - beg_addr = sd.partial_obj_end(beg_chunk); + beg_addr = sd.partial_obj_end(beg_region); } else if (dead_space_crosses_boundary(cp, mbm->addr_to_bit(beg_addr))) { beg_addr = mbm->find_obj_beg(beg_addr, end_addr); } } if (beg_addr < end_addr) { - // A live object or block of dead space starts in this range of Chunks. + // A live object or block of dead space starts in this range of Regions. HeapWord* const dense_prefix_end = dense_prefix(space_id); // Create closures and iterate. @@ -2986,10 +2493,10 @@ } } - // Mark the chunks as filled. - ChunkData* const beg_cp = sd.chunk(beg_chunk); - ChunkData* const end_cp = sd.chunk(end_chunk); - for (ChunkData* cp = beg_cp; cp < end_cp; ++cp) { + // Mark the regions as filled. + RegionData* const beg_cp = sd.region(beg_region); + RegionData* const end_cp = sd.region(end_region); + for (RegionData* cp = beg_cp; cp < end_cp; ++cp) { cp->set_completed(); } } @@ -3021,13 +2528,13 @@ const MutableSpace* const space = space_info->space(); assert(space_info->dense_prefix() >= space->bottom(), "dense_prefix not set"); HeapWord* const beg_addr = space_info->dense_prefix(); - HeapWord* const end_addr = sd.chunk_align_up(space_info->new_top()); - - const ChunkData* const beg_chunk = sd.addr_to_chunk_ptr(beg_addr); - const ChunkData* const end_chunk = sd.addr_to_chunk_ptr(end_addr); - const ChunkData* cur_chunk; - for (cur_chunk = beg_chunk; cur_chunk < end_chunk; ++cur_chunk) { - HeapWord* const addr = cur_chunk->deferred_obj_addr(); + HeapWord* const end_addr = sd.region_align_up(space_info->new_top()); + + const RegionData* const beg_region = sd.addr_to_region_ptr(beg_addr); + const RegionData* const end_region = sd.addr_to_region_ptr(end_addr); + const RegionData* cur_region; + for (cur_region = beg_region; cur_region < end_region; ++cur_region) { + HeapWord* const addr = cur_region->deferred_obj_addr(); if (addr != NULL) { if (start_array != NULL) { start_array->allocate_block(addr); @@ -3073,45 +2580,45 @@ HeapWord* PSParallelCompact::first_src_addr(HeapWord* const dest_addr, - size_t src_chunk_idx) + size_t src_region_idx) { ParMarkBitMap* const bitmap = mark_bitmap(); const ParallelCompactData& sd = summary_data(); - const size_t ChunkSize = ParallelCompactData::ChunkSize; - - assert(sd.is_chunk_aligned(dest_addr), "not aligned"); - - const ChunkData* const src_chunk_ptr = sd.chunk(src_chunk_idx); - const size_t partial_obj_size = src_chunk_ptr->partial_obj_size(); - HeapWord* const src_chunk_destination = src_chunk_ptr->destination(); - - assert(dest_addr >= src_chunk_destination, "wrong src chunk"); - assert(src_chunk_ptr->data_size() > 0, "src chunk cannot be empty"); - - HeapWord* const src_chunk_beg = sd.chunk_to_addr(src_chunk_idx); - HeapWord* const src_chunk_end = src_chunk_beg + ChunkSize; - - HeapWord* addr = src_chunk_beg; - if (dest_addr == src_chunk_destination) { - // Return the first live word in the source chunk. + const size_t RegionSize = ParallelCompactData::RegionSize; + + assert(sd.is_region_aligned(dest_addr), "not aligned"); + + const RegionData* const src_region_ptr = sd.region(src_region_idx); + const size_t partial_obj_size = src_region_ptr->partial_obj_size(); + HeapWord* const src_region_destination = src_region_ptr->destination(); + + assert(dest_addr >= src_region_destination, "wrong src region"); + assert(src_region_ptr->data_size() > 0, "src region cannot be empty"); + + HeapWord* const src_region_beg = sd.region_to_addr(src_region_idx); + HeapWord* const src_region_end = src_region_beg + RegionSize; + + HeapWord* addr = src_region_beg; + if (dest_addr == src_region_destination) { + // Return the first live word in the source region. if (partial_obj_size == 0) { - addr = bitmap->find_obj_beg(addr, src_chunk_end); - assert(addr < src_chunk_end, "no objects start in src chunk"); + addr = bitmap->find_obj_beg(addr, src_region_end); + assert(addr < src_region_end, "no objects start in src region"); } return addr; } // Must skip some live data. - size_t words_to_skip = dest_addr - src_chunk_destination; - assert(src_chunk_ptr->data_size() > words_to_skip, "wrong src chunk"); + size_t words_to_skip = dest_addr - src_region_destination; + assert(src_region_ptr->data_size() > words_to_skip, "wrong src region"); if (partial_obj_size >= words_to_skip) { // All the live words to skip are part of the partial object. addr += words_to_skip; if (partial_obj_size == words_to_skip) { // Find the first live word past the partial object. - addr = bitmap->find_obj_beg(addr, src_chunk_end); - assert(addr < src_chunk_end, "wrong src chunk"); + addr = bitmap->find_obj_beg(addr, src_region_end); + assert(addr < src_region_end, "wrong src region"); } return addr; } @@ -3122,63 +2629,64 @@ addr += partial_obj_size; } - // Skip over live words due to objects that start in the chunk. - addr = skip_live_words(addr, src_chunk_end, words_to_skip); - assert(addr < src_chunk_end, "wrong src chunk"); + // Skip over live words due to objects that start in the region. + addr = skip_live_words(addr, src_region_end, words_to_skip); + assert(addr < src_region_end, "wrong src region"); return addr; } void PSParallelCompact::decrement_destination_counts(ParCompactionManager* cm, - size_t beg_chunk, + size_t beg_region, HeapWord* end_addr) { ParallelCompactData& sd = summary_data(); - ChunkData* const beg = sd.chunk(beg_chunk); - HeapWord* const end_addr_aligned_up = sd.chunk_align_up(end_addr); - ChunkData* const end = sd.addr_to_chunk_ptr(end_addr_aligned_up); - size_t cur_idx = beg_chunk; - for (ChunkData* cur = beg; cur < end; ++cur, ++cur_idx) { - assert(cur->data_size() > 0, "chunk must have live data"); + RegionData* const beg = sd.region(beg_region); + HeapWord* const end_addr_aligned_up = sd.region_align_up(end_addr); + RegionData* const end = sd.addr_to_region_ptr(end_addr_aligned_up); + size_t cur_idx = beg_region; + for (RegionData* cur = beg; cur < end; ++cur, ++cur_idx) { + assert(cur->data_size() > 0, "region must have live data"); cur->decrement_destination_count(); - if (cur_idx <= cur->source_chunk() && cur->available() && cur->claim()) { + if (cur_idx <= cur->source_region() && cur->available() && cur->claim()) { cm->save_for_processing(cur_idx); } } } -size_t PSParallelCompact::next_src_chunk(MoveAndUpdateClosure& closure, - SpaceId& src_space_id, - HeapWord*& src_space_top, - HeapWord* end_addr) +size_t PSParallelCompact::next_src_region(MoveAndUpdateClosure& closure, + SpaceId& src_space_id, + HeapWord*& src_space_top, + HeapWord* end_addr) { - typedef ParallelCompactData::ChunkData ChunkData; + typedef ParallelCompactData::RegionData RegionData; ParallelCompactData& sd = PSParallelCompact::summary_data(); - const size_t chunk_size = ParallelCompactData::ChunkSize; - - size_t src_chunk_idx = 0; - - // Skip empty chunks (if any) up to the top of the space. - HeapWord* const src_aligned_up = sd.chunk_align_up(end_addr); - ChunkData* src_chunk_ptr = sd.addr_to_chunk_ptr(src_aligned_up); - HeapWord* const top_aligned_up = sd.chunk_align_up(src_space_top); - const ChunkData* const top_chunk_ptr = sd.addr_to_chunk_ptr(top_aligned_up); - while (src_chunk_ptr < top_chunk_ptr && src_chunk_ptr->data_size() == 0) { - ++src_chunk_ptr; + const size_t region_size = ParallelCompactData::RegionSize; + + size_t src_region_idx = 0; + + // Skip empty regions (if any) up to the top of the space. + HeapWord* const src_aligned_up = sd.region_align_up(end_addr); + RegionData* src_region_ptr = sd.addr_to_region_ptr(src_aligned_up); + HeapWord* const top_aligned_up = sd.region_align_up(src_space_top); + const RegionData* const top_region_ptr = + sd.addr_to_region_ptr(top_aligned_up); + while (src_region_ptr < top_region_ptr && src_region_ptr->data_size() == 0) { + ++src_region_ptr; } - if (src_chunk_ptr < top_chunk_ptr) { - // The next source chunk is in the current space. Update src_chunk_idx and - // the source address to match src_chunk_ptr. - src_chunk_idx = sd.chunk(src_chunk_ptr); - HeapWord* const src_chunk_addr = sd.chunk_to_addr(src_chunk_idx); - if (src_chunk_addr > closure.source()) { - closure.set_source(src_chunk_addr); + if (src_region_ptr < top_region_ptr) { + // The next source region is in the current space. Update src_region_idx + // and the source address to match src_region_ptr. + src_region_idx = sd.region(src_region_ptr); + HeapWord* const src_region_addr = sd.region_to_addr(src_region_idx); + if (src_region_addr > closure.source()) { + closure.set_source(src_region_addr); } - return src_chunk_idx; + return src_region_idx; } - // Switch to a new source space and find the first non-empty chunk. + // Switch to a new source space and find the first non-empty region. unsigned int space_id = src_space_id + 1; assert(space_id < last_space_id, "not enough spaces"); @@ -3187,14 +2695,14 @@ do { MutableSpace* space = _space_info[space_id].space(); HeapWord* const bottom = space->bottom(); - const ChunkData* const bottom_cp = sd.addr_to_chunk_ptr(bottom); + const RegionData* const bottom_cp = sd.addr_to_region_ptr(bottom); // Iterate over the spaces that do not compact into themselves. if (bottom_cp->destination() != bottom) { - HeapWord* const top_aligned_up = sd.chunk_align_up(space->top()); - const ChunkData* const top_cp = sd.addr_to_chunk_ptr(top_aligned_up); - - for (const ChunkData* src_cp = bottom_cp; src_cp < top_cp; ++src_cp) { + HeapWord* const top_aligned_up = sd.region_align_up(space->top()); + const RegionData* const top_cp = sd.addr_to_region_ptr(top_aligned_up); + + for (const RegionData* src_cp = bottom_cp; src_cp < top_cp; ++src_cp) { if (src_cp->live_obj_size() > 0) { // Found it. assert(src_cp->destination() == destination, @@ -3204,9 +2712,9 @@ src_space_id = SpaceId(space_id); src_space_top = space->top(); - const size_t src_chunk_idx = sd.chunk(src_cp); - closure.set_source(sd.chunk_to_addr(src_chunk_idx)); - return src_chunk_idx; + const size_t src_region_idx = sd.region(src_cp); + closure.set_source(sd.region_to_addr(src_region_idx)); + return src_region_idx; } else { assert(src_cp->data_size() == 0, "sanity"); } @@ -3214,38 +2722,38 @@ } } while (++space_id < last_space_id); - assert(false, "no source chunk was found"); + assert(false, "no source region was found"); return 0; } -void PSParallelCompact::fill_chunk(ParCompactionManager* cm, size_t chunk_idx) +void PSParallelCompact::fill_region(ParCompactionManager* cm, size_t region_idx) { typedef ParMarkBitMap::IterationStatus IterationStatus; - const size_t ChunkSize = ParallelCompactData::ChunkSize; + const size_t RegionSize = ParallelCompactData::RegionSize; ParMarkBitMap* const bitmap = mark_bitmap(); ParallelCompactData& sd = summary_data(); - ChunkData* const chunk_ptr = sd.chunk(chunk_idx); + RegionData* const region_ptr = sd.region(region_idx); // Get the items needed to construct the closure. - HeapWord* dest_addr = sd.chunk_to_addr(chunk_idx); + HeapWord* dest_addr = sd.region_to_addr(region_idx); SpaceId dest_space_id = space_id(dest_addr); ObjectStartArray* start_array = _space_info[dest_space_id].start_array(); HeapWord* new_top = _space_info[dest_space_id].new_top(); assert(dest_addr < new_top, "sanity"); - const size_t words = MIN2(pointer_delta(new_top, dest_addr), ChunkSize); - - // Get the source chunk and related info. - size_t src_chunk_idx = chunk_ptr->source_chunk(); - SpaceId src_space_id = space_id(sd.chunk_to_addr(src_chunk_idx)); + const size_t words = MIN2(pointer_delta(new_top, dest_addr), RegionSize); + + // Get the source region and related info. + size_t src_region_idx = region_ptr->source_region(); + SpaceId src_space_id = space_id(sd.region_to_addr(src_region_idx)); HeapWord* src_space_top = _space_info[src_space_id].space()->top(); MoveAndUpdateClosure closure(bitmap, cm, start_array, dest_addr, words); - closure.set_source(first_src_addr(dest_addr, src_chunk_idx)); - - // Adjust src_chunk_idx to prepare for decrementing destination counts (the - // destination count is not decremented when a chunk is copied to itself). - if (src_chunk_idx == chunk_idx) { - src_chunk_idx += 1; + closure.set_source(first_src_addr(dest_addr, src_region_idx)); + + // Adjust src_region_idx to prepare for decrementing destination counts (the + // destination count is not decremented when a region is copied to itself). + if (src_region_idx == region_idx) { + src_region_idx += 1; } if (bitmap->is_unmarked(closure.source())) { @@ -3255,32 +2763,33 @@ HeapWord* const old_src_addr = closure.source(); closure.copy_partial_obj(); if (closure.is_full()) { - decrement_destination_counts(cm, src_chunk_idx, closure.source()); - chunk_ptr->set_deferred_obj_addr(NULL); - chunk_ptr->set_completed(); + decrement_destination_counts(cm, src_region_idx, closure.source()); + region_ptr->set_deferred_obj_addr(NULL); + region_ptr->set_completed(); return; } - HeapWord* const end_addr = sd.chunk_align_down(closure.source()); - if (sd.chunk_align_down(old_src_addr) != end_addr) { - // The partial object was copied from more than one source chunk. - decrement_destination_counts(cm, src_chunk_idx, end_addr); - - // Move to the next source chunk, possibly switching spaces as well. All + HeapWord* const end_addr = sd.region_align_down(closure.source()); + if (sd.region_align_down(old_src_addr) != end_addr) { + // The partial object was copied from more than one source region. + decrement_destination_counts(cm, src_region_idx, end_addr); + + // Move to the next source region, possibly switching spaces as well. All // args except end_addr may be modified. - src_chunk_idx = next_src_chunk(closure, src_space_id, src_space_top, - end_addr); + src_region_idx = next_src_region(closure, src_space_id, src_space_top, + end_addr); } } do { HeapWord* const cur_addr = closure.source(); - HeapWord* const end_addr = MIN2(sd.chunk_align_up(cur_addr + 1), + HeapWord* const end_addr = MIN2(sd.region_align_up(cur_addr + 1), src_space_top); IterationStatus status = bitmap->iterate(&closure, cur_addr, end_addr); if (status == ParMarkBitMap::incomplete) { - // The last obj that starts in the source chunk does not end in the chunk. + // The last obj that starts in the source region does not end in the + // region. assert(closure.source() < end_addr, "sanity") HeapWord* const obj_beg = closure.source(); HeapWord* const range_end = MIN2(obj_beg + closure.words_remaining(), @@ -3299,28 +2808,28 @@ if (status == ParMarkBitMap::would_overflow) { // The last object did not fit. Note that interior oop updates were - // deferred, then copy enough of the object to fill the chunk. - chunk_ptr->set_deferred_obj_addr(closure.destination()); + // deferred, then copy enough of the object to fill the region. + region_ptr->set_deferred_obj_addr(closure.destination()); status = closure.copy_until_full(); // copies from closure.source() - decrement_destination_counts(cm, src_chunk_idx, closure.source()); - chunk_ptr->set_completed(); + decrement_destination_counts(cm, src_region_idx, closure.source()); + region_ptr->set_completed(); return; } if (status == ParMarkBitMap::full) { - decrement_destination_counts(cm, src_chunk_idx, closure.source()); - chunk_ptr->set_deferred_obj_addr(NULL); - chunk_ptr->set_completed(); + decrement_destination_counts(cm, src_region_idx, closure.source()); + region_ptr->set_deferred_obj_addr(NULL); + region_ptr->set_completed(); return; } - decrement_destination_counts(cm, src_chunk_idx, end_addr); - - // Move to the next source chunk, possibly switching spaces as well. All + decrement_destination_counts(cm, src_region_idx, end_addr); + + // Move to the next source region, possibly switching spaces as well. All // args except end_addr may be modified. - src_chunk_idx = next_src_chunk(closure, src_space_id, src_space_top, - end_addr); + src_region_idx = next_src_region(closure, src_space_id, src_space_top, + end_addr); } while (true); } @@ -3352,15 +2861,15 @@ } #endif - const size_t beg_chunk = sd.addr_to_chunk_idx(beg_addr); - const size_t dp_chunk = sd.addr_to_chunk_idx(dp_addr); - if (beg_chunk < dp_chunk) { - update_and_deadwood_in_dense_prefix(cm, space_id, beg_chunk, dp_chunk); + const size_t beg_region = sd.addr_to_region_idx(beg_addr); + const size_t dp_region = sd.addr_to_region_idx(dp_addr); + if (beg_region < dp_region) { + update_and_deadwood_in_dense_prefix(cm, space_id, beg_region, dp_region); } - // The destination of the first live object that starts in the chunk is one - // past the end of the partial object entering the chunk (if any). - HeapWord* const dest_addr = sd.partial_obj_end(dp_chunk); + // The destination of the first live object that starts in the region is one + // past the end of the partial object entering the region (if any). + HeapWord* const dest_addr = sd.partial_obj_end(dp_region); HeapWord* const new_top = _space_info[space_id].new_top(); assert(new_top >= dest_addr, "bad new_top value"); const size_t words = pointer_delta(new_top, dest_addr); @@ -3469,172 +2978,6 @@ return ParMarkBitMap::incomplete; } -BitBlockUpdateClosure::BitBlockUpdateClosure(ParMarkBitMap* mbm, - ParCompactionManager* cm, - size_t chunk_index) : - ParMarkBitMapClosure(mbm, cm), - _live_data_left(0), - _cur_block(0) { - _chunk_start = - PSParallelCompact::summary_data().chunk_to_addr(chunk_index); - _chunk_end = - PSParallelCompact::summary_data().chunk_to_addr(chunk_index) + - ParallelCompactData::ChunkSize; - _chunk_index = chunk_index; - _cur_block = - PSParallelCompact::summary_data().addr_to_block_idx(_chunk_start); -} - -bool BitBlockUpdateClosure::chunk_contains_cur_block() { - return ParallelCompactData::chunk_contains_block(_chunk_index, _cur_block); -} - -void BitBlockUpdateClosure::reset_chunk(size_t chunk_index) { - DEBUG_ONLY(ParallelCompactData::BlockData::set_cur_phase(7);) - ParallelCompactData& sd = PSParallelCompact::summary_data(); - _chunk_index = chunk_index; - _live_data_left = 0; - _chunk_start = sd.chunk_to_addr(chunk_index); - _chunk_end = sd.chunk_to_addr(chunk_index) + ParallelCompactData::ChunkSize; - - // The first block in this chunk - size_t first_block = sd.addr_to_block_idx(_chunk_start); - size_t partial_live_size = sd.chunk(chunk_index)->partial_obj_size(); - - // Set the offset to 0. By definition it should have that value - // but it may have been written while processing an earlier chunk. - if (partial_live_size == 0) { - // No live object extends onto the chunk. The first bit - // in the bit map for the first chunk must be a start bit. - // Although there may not be any marked bits, it is safe - // to set it as a start bit. - sd.block(first_block)->set_start_bit_offset(0); - sd.block(first_block)->set_first_is_start_bit(true); - } else if (sd.partial_obj_ends_in_block(first_block)) { - sd.block(first_block)->set_end_bit_offset(0); - sd.block(first_block)->set_first_is_start_bit(false); - } else { - // The partial object extends beyond the first block. - // There is no object starting in the first block - // so the offset and bit parity are not needed. - // Set the the bit parity to start bit so assertions - // work when not bit is found. - sd.block(first_block)->set_end_bit_offset(0); - sd.block(first_block)->set_first_is_start_bit(false); - } - _cur_block = first_block; -#ifdef ASSERT - if (sd.block(first_block)->first_is_start_bit()) { - assert(!sd.partial_obj_ends_in_block(first_block), - "Partial object cannot end in first block"); - } - - if (PrintGCDetails && Verbose) { - if (partial_live_size == 1) { - gclog_or_tty->print_cr("first_block " PTR_FORMAT - " _offset " PTR_FORMAT - " _first_is_start_bit %d", - first_block, - sd.block(first_block)->raw_offset(), - sd.block(first_block)->first_is_start_bit()); - } - } -#endif - DEBUG_ONLY(ParallelCompactData::BlockData::set_cur_phase(17);) -} - -// This method is called when a object has been found (both beginning -// and end of the object) in the range of iteration. This method is -// calculating the words of live data to the left of a block. That live -// data includes any object starting to the left of the block (i.e., -// the live-data-to-the-left of block AAA will include the full size -// of any object entering AAA). - -ParMarkBitMapClosure::IterationStatus -BitBlockUpdateClosure::do_addr(HeapWord* addr, size_t words) { - // add the size to the block data. - HeapWord* obj = addr; - ParallelCompactData& sd = PSParallelCompact::summary_data(); - - assert(bitmap()->obj_size(obj) == words, "bad size"); - assert(_chunk_start <= obj, "object is not in chunk"); - assert(obj + words <= _chunk_end, "object is not in chunk"); - - // Update the live data to the left - size_t prev_live_data_left = _live_data_left; - _live_data_left = _live_data_left + words; - - // Is this object in the current block. - size_t block_of_obj = sd.addr_to_block_idx(obj); - size_t block_of_obj_last = sd.addr_to_block_idx(obj + words - 1); - HeapWord* block_of_obj_last_addr = sd.block_to_addr(block_of_obj_last); - if (_cur_block < block_of_obj) { - - // - // No object crossed the block boundary and this object was found - // on the other side of the block boundary. Update the offset for - // the new block with the data size that does not include this object. - // - // The first bit in block_of_obj is a start bit except in the - // case where the partial object for the chunk extends into - // this block. - if (sd.partial_obj_ends_in_block(block_of_obj)) { - sd.block(block_of_obj)->set_end_bit_offset(prev_live_data_left); - } else { - sd.block(block_of_obj)->set_start_bit_offset(prev_live_data_left); - } - - // Does this object pass beyond the its block? - if (block_of_obj < block_of_obj_last) { - // Object crosses block boundary. Two blocks need to be udpated: - // the current block where the object started - // the block where the object ends - // - // The offset for blocks with no objects starting in them - // (e.g., blocks between _cur_block and block_of_obj_last) - // should not be needed. - // Note that block_of_obj_last may be in another chunk. If so, - // it should be overwritten later. This is a problem (writting - // into a block in a later chunk) for parallel execution. - assert(obj < block_of_obj_last_addr, - "Object should start in previous block"); - - // obj is crossing into block_of_obj_last so the first bit - // is and end bit. - sd.block(block_of_obj_last)->set_end_bit_offset(_live_data_left); - - _cur_block = block_of_obj_last; - } else { - // _first_is_start_bit has already been set correctly - // in the if-then-else above so don't reset it here. - _cur_block = block_of_obj; - } - } else { - // The current block only changes if the object extends beyound - // the block it starts in. - // - // The object starts in the current block. - // Does this object pass beyond the end of it? - if (block_of_obj < block_of_obj_last) { - // Object crosses block boundary. - // See note above on possible blocks between block_of_obj and - // block_of_obj_last - assert(obj < block_of_obj_last_addr, - "Object should start in previous block"); - - sd.block(block_of_obj_last)->set_end_bit_offset(_live_data_left); - - _cur_block = block_of_obj_last; - } - } - - // Return incomplete if there are more blocks to be done. - if (chunk_contains_cur_block()) { - return ParMarkBitMap::incomplete; - } - return ParMarkBitMap::complete; -} - // Verify the new location using the forwarding pointer // from MarkSweep::mark_sweep_phase2(). Set the mark_word // to the initial value. @@ -3707,12 +3050,3 @@ return last_space_id; } } - -// Here temporarily for debugging -#ifdef ASSERT - size_t ParallelCompactData::block_idx(BlockData* block) { - size_t index = pointer_delta(block, - PSParallelCompact::summary_data()._block_data, sizeof(BlockData)); - return index; - } -#endif diff -r 99dd4bbd9eec -r b7483806cc49 src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp --- a/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp Tue Sep 30 12:24:27 2008 -0400 +++ b/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp Wed Oct 01 20:15:03 2008 -0400 @@ -76,87 +76,80 @@ { public: // Sizes are in HeapWords, unless indicated otherwise. - static const size_t Log2ChunkSize; - static const size_t ChunkSize; - static const size_t ChunkSizeBytes; + static const size_t Log2RegionSize; + static const size_t RegionSize; + static const size_t RegionSizeBytes; - // Mask for the bits in a size_t to get an offset within a chunk. - static const size_t ChunkSizeOffsetMask; - // Mask for the bits in a pointer to get an offset within a chunk. - static const size_t ChunkAddrOffsetMask; - // Mask for the bits in a pointer to get the address of the start of a chunk. - static const size_t ChunkAddrMask; + // Mask for the bits in a size_t to get an offset within a region. + static const size_t RegionSizeOffsetMask; + // Mask for the bits in a pointer to get an offset within a region. + static const size_t RegionAddrOffsetMask; + // Mask for the bits in a pointer to get the address of the start of a region. + static const size_t RegionAddrMask; - static const size_t Log2BlockSize; - static const size_t BlockSize; - static const size_t BlockOffsetMask; - static const size_t BlockMask; - - static const size_t BlocksPerChunk; - - class ChunkData + class RegionData { public: - // Destination address of the chunk. + // Destination address of the region. HeapWord* destination() const { return _destination; } - // The first chunk containing data destined for this chunk. - size_t source_chunk() const { return _source_chunk; } + // The first region containing data destined for this region. + size_t source_region() const { return _source_region; } - // The object (if any) starting in this chunk and ending in a different - // chunk that could not be updated during the main (parallel) compaction + // The object (if any) starting in this region and ending in a different + // region that could not be updated during the main (parallel) compaction // phase. This is different from _partial_obj_addr, which is an object that - // extends onto a source chunk. However, the two uses do not overlap in + // extends onto a source region. However, the two uses do not overlap in // time, so the same field is used to save space. HeapWord* deferred_obj_addr() const { return _partial_obj_addr; } - // The starting address of the partial object extending onto the chunk. + // The starting address of the partial object extending onto the region. HeapWord* partial_obj_addr() const { return _partial_obj_addr; } - // Size of the partial object extending onto the chunk (words). + // Size of the partial object extending onto the region (words). size_t partial_obj_size() const { return _partial_obj_size; } - // Size of live data that lies within this chunk due to objects that start - // in this chunk (words). This does not include the partial object - // extending onto the chunk (if any), or the part of an object that extends - // onto the next chunk (if any). + // Size of live data that lies within this region due to objects that start + // in this region (words). This does not include the partial object + // extending onto the region (if any), or the part of an object that extends + // onto the next region (if any). size_t live_obj_size() const { return _dc_and_los & los_mask; } - // Total live data that lies within the chunk (words). + // Total live data that lies within the region (words). size_t data_size() const { return partial_obj_size() + live_obj_size(); } - // The destination_count is the number of other chunks to which data from - // this chunk will be copied. At the end of the summary phase, the valid + // The destination_count is the number of other regions to which data from + // this region will be copied. At the end of the summary phase, the valid // values of destination_count are // - // 0 - data from the chunk will be compacted completely into itself, or the - // chunk is empty. The chunk can be claimed and then filled. - // 1 - data from the chunk will be compacted into 1 other chunk; some - // data from the chunk may also be compacted into the chunk itself. - // 2 - data from the chunk will be copied to 2 other chunks. + // 0 - data from the region will be compacted completely into itself, or the + // region is empty. The region can be claimed and then filled. + // 1 - data from the region will be compacted into 1 other region; some + // data from the region may also be compacted into the region itself. + // 2 - data from the region will be copied to 2 other regions. // - // During compaction as chunks are emptied, the destination_count is + // During compaction as regions are emptied, the destination_count is // decremented (atomically) and when it reaches 0, it can be claimed and // then filled. // - // A chunk is claimed for processing by atomically changing the - // destination_count to the claimed value (dc_claimed). After a chunk has + // A region is claimed for processing by atomically changing the + // destination_count to the claimed value (dc_claimed). After a region has // been filled, the destination_count should be set to the completed value // (dc_completed). inline uint destination_count() const; inline uint destination_count_raw() const; - // The location of the java heap data that corresponds to this chunk. + // The location of the java heap data that corresponds to this region. inline HeapWord* data_location() const; - // The highest address referenced by objects in this chunk. + // The highest address referenced by objects in this region. inline HeapWord* highest_ref() const; - // Whether this chunk is available to be claimed, has been claimed, or has + // Whether this region is available to be claimed, has been claimed, or has // been completed. // - // Minor subtlety: claimed() returns true if the chunk is marked - // completed(), which is desirable since a chunk must be claimed before it + // Minor subtlety: claimed() returns true if the region is marked + // completed(), which is desirable since a region must be claimed before it // can be completed. bool available() const { return _dc_and_los < dc_one; } bool claimed() const { return _dc_and_los >= dc_claimed; } @@ -164,11 +157,11 @@ // These are not atomic. void set_destination(HeapWord* addr) { _destination = addr; } - void set_source_chunk(size_t chunk) { _source_chunk = chunk; } + void set_source_region(size_t region) { _source_region = region; } void set_deferred_obj_addr(HeapWord* addr) { _partial_obj_addr = addr; } void set_partial_obj_addr(HeapWord* addr) { _partial_obj_addr = addr; } void set_partial_obj_size(size_t words) { - _partial_obj_size = (chunk_sz_t) words; + _partial_obj_size = (region_sz_t) words; } inline void set_destination_count(uint count); @@ -184,129 +177,57 @@ inline bool claim(); private: - // The type used to represent object sizes within a chunk. - typedef uint chunk_sz_t; + // The type used to represent object sizes within a region. + typedef uint region_sz_t; // Constants for manipulating the _dc_and_los field, which holds both the // destination count and live obj size. The live obj size lives at the // least significant end so no masking is necessary when adding. - static const chunk_sz_t dc_shift; // Shift amount. - static const chunk_sz_t dc_mask; // Mask for destination count. - static const chunk_sz_t dc_one; // 1, shifted appropriately. - static const chunk_sz_t dc_claimed; // Chunk has been claimed. - static const chunk_sz_t dc_completed; // Chunk has been completed. - static const chunk_sz_t los_mask; // Mask for live obj size. + static const region_sz_t dc_shift; // Shift amount. + static const region_sz_t dc_mask; // Mask for destination count. + static const region_sz_t dc_one; // 1, shifted appropriately. + static const region_sz_t dc_claimed; // Region has been claimed. + static const region_sz_t dc_completed; // Region has been completed. + static const region_sz_t los_mask; // Mask for live obj size. - HeapWord* _destination; - size_t _source_chunk; - HeapWord* _partial_obj_addr; - chunk_sz_t _partial_obj_size; - chunk_sz_t volatile _dc_and_los; + HeapWord* _destination; + size_t _source_region; + HeapWord* _partial_obj_addr; + region_sz_t _partial_obj_size; + region_sz_t volatile _dc_and_los; #ifdef ASSERT // These enable optimizations that are only partially implemented. Use // debug builds to prevent the code fragments from breaking. - HeapWord* _data_location; - HeapWord* _highest_ref; + HeapWord* _data_location; + HeapWord* _highest_ref; #endif // #ifdef ASSERT #ifdef ASSERT public: - uint _pushed; // 0 until chunk is pushed onto a worker's stack + uint _pushed; // 0 until region is pushed onto a worker's stack private: #endif }; - // 'Blocks' allow shorter sections of the bitmap to be searched. Each Block - // holds an offset, which is the amount of live data in the Chunk to the left - // of the first live object in the Block. This amount of live data will - // include any object extending into the block. The first block in - // a chunk does not include any partial object extending into the - // the chunk. - // - // The offset also encodes the - // 'parity' of the first 1 bit in the Block: a positive offset means the - // first 1 bit marks the start of an object, a negative offset means the first - // 1 bit marks the end of an object. - class BlockData - { - public: - typedef short int blk_ofs_t; - - blk_ofs_t offset() const { return _offset >= 0 ? _offset : -_offset; } - blk_ofs_t raw_offset() const { return _offset; } - void set_first_is_start_bit(bool v) { _first_is_start_bit = v; } - -#if 0 - // The need for this method was anticipated but it is - // never actually used. Do not include it for now. If - // it is needed, consider the problem of what is passed - // as "v". To avoid warning errors the method set_start_bit_offset() - // was changed to take a size_t as the parameter and to do the - // check for the possible overflow. Doing the cast in these - // methods better limits the potential problems because of - // the size of the field to this class. - void set_raw_offset(blk_ofs_t v) { _offset = v; } -#endif - void set_start_bit_offset(size_t val) { - assert(val >= 0, "sanity"); - _offset = (blk_ofs_t) val; - assert(val == (size_t) _offset, "Value is too large"); - _first_is_start_bit = true; - } - void set_end_bit_offset(size_t val) { - assert(val >= 0, "sanity"); - _offset = (blk_ofs_t) val; - assert(val == (size_t) _offset, "Value is too large"); - _offset = - _offset; - _first_is_start_bit = false; - } - bool first_is_start_bit() { - assert(_set_phase > 0, "Not initialized"); - return _first_is_start_bit; - } - bool first_is_end_bit() { - assert(_set_phase > 0, "Not initialized"); - return !_first_is_start_bit; - } - - private: - blk_ofs_t _offset; - // This is temporary until the mark_bitmap is separated into - // a start bit array and an end bit array. - bool _first_is_start_bit; -#ifdef ASSERT - short _set_phase; - static short _cur_phase; - public: - static void set_cur_phase(short v) { _cur_phase = v; } -#endif - }; - public: ParallelCompactData(); bool initialize(MemRegion covered_region); - size_t chunk_count() const { return _chunk_count; } - - // Convert chunk indices to/from ChunkData pointers. - inline ChunkData* chunk(size_t chunk_idx) const; - inline size_t chunk(const ChunkData* const chunk_ptr) const; + size_t region_count() const { return _region_count; } - // Returns true if the given address is contained within the chunk - bool chunk_contains(size_t chunk_index, HeapWord* addr); + // Convert region indices to/from RegionData pointers. + inline RegionData* region(size_t region_idx) const; + inline size_t region(const RegionData* const region_ptr) const; - size_t block_count() const { return _block_count; } - inline BlockData* block(size_t n) const; - - // Returns true if the given block is in the given chunk. - static bool chunk_contains_block(size_t chunk_index, size_t block_index); + // Returns true if the given address is contained within the region + bool region_contains(size_t region_index, HeapWord* addr); void add_obj(HeapWord* addr, size_t len); void add_obj(oop p, size_t len) { add_obj((HeapWord*)p, len); } - // Fill in the chunks covering [beg, end) so that no data moves; i.e., the - // destination of chunk n is simply the start of chunk n. The argument beg - // must be chunk-aligned; end need not be. + // Fill in the regions covering [beg, end) so that no data moves; i.e., the + // destination of region n is simply the start of region n. The argument beg + // must be region-aligned; end need not be. void summarize_dense_prefix(HeapWord* beg, HeapWord* end); bool summarize(HeapWord* target_beg, HeapWord* target_end, @@ -314,48 +235,33 @@ HeapWord** target_next, HeapWord** source_next = 0); void clear(); - void clear_range(size_t beg_chunk, size_t end_chunk); + void clear_range(size_t beg_region, size_t end_region); void clear_range(HeapWord* beg, HeapWord* end) { - clear_range(addr_to_chunk_idx(beg), addr_to_chunk_idx(end)); + clear_range(addr_to_region_idx(beg), addr_to_region_idx(end)); } - // Return the number of words between addr and the start of the chunk + // Return the number of words between addr and the start of the region // containing addr. - inline size_t chunk_offset(const HeapWord* addr) const; - - // Convert addresses to/from a chunk index or chunk pointer. - inline size_t addr_to_chunk_idx(const HeapWord* addr) const; - inline ChunkData* addr_to_chunk_ptr(const HeapWord* addr) const; - inline HeapWord* chunk_to_addr(size_t chunk) const; - inline HeapWord* chunk_to_addr(size_t chunk, size_t offset) const; - inline HeapWord* chunk_to_addr(const ChunkData* chunk) const; + inline size_t region_offset(const HeapWord* addr) const; - inline HeapWord* chunk_align_down(HeapWord* addr) const; - inline HeapWord* chunk_align_up(HeapWord* addr) const; - inline bool is_chunk_aligned(HeapWord* addr) const; + // Convert addresses to/from a region index or region pointer. + inline size_t addr_to_region_idx(const HeapWord* addr) const; + inline RegionData* addr_to_region_ptr(const HeapWord* addr) const; + inline HeapWord* region_to_addr(size_t region) const; + inline HeapWord* region_to_addr(size_t region, size_t offset) const; + inline HeapWord* region_to_addr(const RegionData* region) const; - // Analogous to chunk_offset() for blocks. - size_t block_offset(const HeapWord* addr) const; - size_t addr_to_block_idx(const HeapWord* addr) const; - size_t addr_to_block_idx(const oop obj) const { - return addr_to_block_idx((HeapWord*) obj); - } - inline BlockData* addr_to_block_ptr(const HeapWord* addr) const; - inline HeapWord* block_to_addr(size_t block) const; + inline HeapWord* region_align_down(HeapWord* addr) const; + inline HeapWord* region_align_up(HeapWord* addr) const; + inline bool is_region_aligned(HeapWord* addr) const; // Return the address one past the end of the partial object. - HeapWord* partial_obj_end(size_t chunk_idx) const; + HeapWord* partial_obj_end(size_t region_idx) const; // Return the new location of the object p after the // the compaction. HeapWord* calc_new_pointer(HeapWord* addr); - // Same as calc_new_pointer() using blocks. - HeapWord* block_calc_new_pointer(HeapWord* addr); - - // Same as calc_new_pointer() using chunks. - HeapWord* chunk_calc_new_pointer(HeapWord* addr); - HeapWord* calc_new_pointer(oop p) { return calc_new_pointer((HeapWord*) p); } @@ -363,22 +269,13 @@ // Return the updated address for the given klass klassOop calc_new_klass(klassOop); - // Given a block returns true if the partial object for the - // corresponding chunk ends in the block. Returns false, otherwise - // If there is no partial object, returns false. - bool partial_obj_ends_in_block(size_t block_index); - - // Returns the block index for the block - static size_t block_idx(BlockData* block); - #ifdef ASSERT void verify_clear(const PSVirtualSpace* vspace); void verify_clear(); #endif // #ifdef ASSERT private: - bool initialize_block_data(size_t region_size); - bool initialize_chunk_data(size_t region_size); + bool initialize_region_data(size_t region_size); PSVirtualSpace* create_vspace(size_t count, size_t element_size); private: @@ -387,74 +284,70 @@ HeapWord* _region_end; #endif // #ifdef ASSERT - PSVirtualSpace* _chunk_vspace; - ChunkData* _chunk_data; - size_t _chunk_count; - - PSVirtualSpace* _block_vspace; - BlockData* _block_data; - size_t _block_count; + PSVirtualSpace* _region_vspace; + RegionData* _region_data; + size_t _region_count; }; inline uint -ParallelCompactData::ChunkData::destination_count_raw() const +ParallelCompactData::RegionData::destination_count_raw() const { return _dc_and_los & dc_mask; } inline uint -ParallelCompactData::ChunkData::destination_count() const +ParallelCompactData::RegionData::destination_count() const { return destination_count_raw() >> dc_shift; } inline void -ParallelCompactData::ChunkData::set_destination_count(uint count) +ParallelCompactData::RegionData::set_destination_count(uint count) { assert(count <= (dc_completed >> dc_shift), "count too large"); - const chunk_sz_t live_sz = (chunk_sz_t) live_obj_size(); + const region_sz_t live_sz = (region_sz_t) live_obj_size(); _dc_and_los = (count << dc_shift) | live_sz; } -inline void ParallelCompactData::ChunkData::set_live_obj_size(size_t words) +inline void ParallelCompactData::RegionData::set_live_obj_size(size_t words) { assert(words <= los_mask, "would overflow"); - _dc_and_los = destination_count_raw() | (chunk_sz_t)words; + _dc_and_los = destination_count_raw() | (region_sz_t)words; } -inline void ParallelCompactData::ChunkData::decrement_destination_count() +inline void ParallelCompactData::RegionData::decrement_destination_count() { assert(_dc_and_los < dc_claimed, "already claimed"); assert(_dc_and_los >= dc_one, "count would go negative"); Atomic::add((int)dc_mask, (volatile int*)&_dc_and_los); } -inline HeapWord* ParallelCompactData::ChunkData::data_location() const +inline HeapWord* ParallelCompactData::RegionData::data_location() const { DEBUG_ONLY(return _data_location;) NOT_DEBUG(return NULL;) } -inline HeapWord* ParallelCompactData::ChunkData::highest_ref() const +inline HeapWord* ParallelCompactData::RegionData::highest_ref() const { DEBUG_ONLY(return _highest_ref;) NOT_DEBUG(return NULL;) } -inline void ParallelCompactData::ChunkData::set_data_location(HeapWord* addr) +inline void ParallelCompactData::RegionData::set_data_location(HeapWord* addr) { DEBUG_ONLY(_data_location = addr;) } -inline void ParallelCompactData::ChunkData::set_completed() +inline void ParallelCompactData::RegionData::set_completed() { assert(claimed(), "must be claimed first"); - _dc_and_los = dc_completed | (chunk_sz_t) live_obj_size(); + _dc_and_los = dc_completed | (region_sz_t) live_obj_size(); } -// MT-unsafe claiming of a chunk. Should only be used during single threaded +// MT-unsafe claiming of a region. Should only be used during single threaded // execution. -inline bool ParallelCompactData::ChunkData::claim_unsafe() +inline bool ParallelCompactData::RegionData::claim_unsafe() { if (available()) { _dc_and_los |= dc_claimed; @@ -463,13 +356,13 @@ return false; } -inline void ParallelCompactData::ChunkData::add_live_obj(size_t words) +inline void ParallelCompactData::RegionData::add_live_obj(size_t words) { assert(words <= (size_t)los_mask - live_obj_size(), "overflow"); Atomic::add((int) words, (volatile int*) &_dc_and_los); } -inline void ParallelCompactData::ChunkData::set_highest_ref(HeapWord* addr) +inline void ParallelCompactData::RegionData::set_highest_ref(HeapWord* addr) { #ifdef ASSERT HeapWord* tmp = _highest_ref; @@ -479,7 +372,7 @@ #endif // #ifdef ASSERT } -inline bool ParallelCompactData::ChunkData::claim() +inline bool ParallelCompactData::RegionData::claim() { const int los = (int) live_obj_size(); const int old = Atomic::cmpxchg(dc_claimed | los, @@ -487,119 +380,85 @@ return old == los; } -inline ParallelCompactData::ChunkData* -ParallelCompactData::chunk(size_t chunk_idx) const +inline ParallelCompactData::RegionData* +ParallelCompactData::region(size_t region_idx) const { - assert(chunk_idx <= chunk_count(), "bad arg"); - return _chunk_data + chunk_idx; + assert(region_idx <= region_count(), "bad arg"); + return _region_data + region_idx; } inline size_t -ParallelCompactData::chunk(const ChunkData* const chunk_ptr) const +ParallelCompactData::region(const RegionData* const region_ptr) const { - assert(chunk_ptr >= _chunk_data, "bad arg"); - assert(chunk_ptr <= _chunk_data + chunk_count(), "bad arg"); - return pointer_delta(chunk_ptr, _chunk_data, sizeof(ChunkData)); -} - -inline ParallelCompactData::BlockData* -ParallelCompactData::block(size_t n) const { - assert(n < block_count(), "bad arg"); - return _block_data + n; + assert(region_ptr >= _region_data, "bad arg"); + assert(region_ptr <= _region_data + region_count(), "bad arg"); + return pointer_delta(region_ptr, _region_data, sizeof(RegionData)); } inline size_t -ParallelCompactData::chunk_offset(const HeapWord* addr) const +ParallelCompactData::region_offset(const HeapWord* addr) const { assert(addr >= _region_start, "bad addr"); assert(addr <= _region_end, "bad addr"); - return (size_t(addr) & ChunkAddrOffsetMask) >> LogHeapWordSize; + return (size_t(addr) & RegionAddrOffsetMask) >> LogHeapWordSize; } inline size_t -ParallelCompactData::addr_to_chunk_idx(const HeapWord* addr) const +ParallelCompactData::addr_to_region_idx(const HeapWord* addr) const { assert(addr >= _region_start, "bad addr"); assert(addr <= _region_end, "bad addr"); - return pointer_delta(addr, _region_start) >> Log2ChunkSize; -} - -inline ParallelCompactData::ChunkData* -ParallelCompactData::addr_to_chunk_ptr(const HeapWord* addr) const -{ - return chunk(addr_to_chunk_idx(addr)); + return pointer_delta(addr, _region_start) >> Log2RegionSize; } -inline HeapWord* -ParallelCompactData::chunk_to_addr(size_t chunk) const +inline ParallelCompactData::RegionData* +ParallelCompactData::addr_to_region_ptr(const HeapWord* addr) const { - assert(chunk <= _chunk_count, "chunk out of range"); - return _region_start + (chunk << Log2ChunkSize); -} - -inline HeapWord* -ParallelCompactData::chunk_to_addr(const ChunkData* chunk) const -{ - return chunk_to_addr(pointer_delta(chunk, _chunk_data, sizeof(ChunkData))); + return region(addr_to_region_idx(addr)); } inline HeapWord* -ParallelCompactData::chunk_to_addr(size_t chunk, size_t offset) const +ParallelCompactData::region_to_addr(size_t region) const { - assert(chunk <= _chunk_count, "chunk out of range"); - assert(offset < ChunkSize, "offset too big"); // This may be too strict. - return chunk_to_addr(chunk) + offset; + assert(region <= _region_count, "region out of range"); + return _region_start + (region << Log2RegionSize); +} + +inline HeapWord* +ParallelCompactData::region_to_addr(const RegionData* region) const +{ + return region_to_addr(pointer_delta(region, _region_data, + sizeof(RegionData))); } inline HeapWord* -ParallelCompactData::chunk_align_down(HeapWord* addr) const +ParallelCompactData::region_to_addr(size_t region, size_t offset) const { - assert(addr >= _region_start, "bad addr"); - assert(addr < _region_end + ChunkSize, "bad addr"); - return (HeapWord*)(size_t(addr) & ChunkAddrMask); + assert(region <= _region_count, "region out of range"); + assert(offset < RegionSize, "offset too big"); // This may be too strict. + return region_to_addr(region) + offset; } inline HeapWord* -ParallelCompactData::chunk_align_up(HeapWord* addr) const +ParallelCompactData::region_align_down(HeapWord* addr) const +{ + assert(addr >= _region_start, "bad addr"); + assert(addr < _region_end + RegionSize, "bad addr"); + return (HeapWord*)(size_t(addr) & RegionAddrMask); +} + +inline HeapWord* +ParallelCompactData::region_align_up(HeapWord* addr) const { assert(addr >= _region_start, "bad addr"); assert(addr <= _region_end, "bad addr"); - return chunk_align_down(addr + ChunkSizeOffsetMask); + return region_align_down(addr + RegionSizeOffsetMask); } inline bool -ParallelCompactData::is_chunk_aligned(HeapWord* addr) const -{ - return chunk_offset(addr) == 0; -} - -inline size_t -ParallelCompactData::block_offset(const HeapWord* addr) const -{ - assert(addr >= _region_start, "bad addr"); - assert(addr <= _region_end, "bad addr"); - return pointer_delta(addr, _region_start) & BlockOffsetMask; -} - -inline size_t -ParallelCompactData::addr_to_block_idx(const HeapWord* addr) const +ParallelCompactData::is_region_aligned(HeapWord* addr) const { - assert(addr >= _region_start, "bad addr"); - assert(addr <= _region_end, "bad addr"); - return pointer_delta(addr, _region_start) >> Log2BlockSize; -} - -inline ParallelCompactData::BlockData* -ParallelCompactData::addr_to_block_ptr(const HeapWord* addr) const -{ - return block(addr_to_block_idx(addr)); -} - -inline HeapWord* -ParallelCompactData::block_to_addr(size_t block) const -{ - assert(block < _block_count, "block out of range"); - return _region_start + (block << Log2BlockSize); + return region_offset(addr) == 0; } // Abstract closure for use with ParMarkBitMap::iterate(), which will invoke the @@ -687,45 +546,15 @@ _words_remaining -= words; } -// Closure for updating the block data during the summary phase. -class BitBlockUpdateClosure: public ParMarkBitMapClosure { - // ParallelCompactData::BlockData::blk_ofs_t _live_data_left; - size_t _live_data_left; - size_t _cur_block; - HeapWord* _chunk_start; - HeapWord* _chunk_end; - size_t _chunk_index; - - public: - BitBlockUpdateClosure(ParMarkBitMap* mbm, - ParCompactionManager* cm, - size_t chunk_index); - - size_t cur_block() { return _cur_block; } - size_t chunk_index() { return _chunk_index; } - size_t live_data_left() { return _live_data_left; } - // Returns true the first bit in the current block (cur_block) is - // a start bit. - // Returns true if the current block is within the chunk for the closure; - bool chunk_contains_cur_block(); - - // Set the chunk index and related chunk values for - // a new chunk. - void reset_chunk(size_t chunk_index); - - virtual IterationStatus do_addr(HeapWord* addr, size_t words); -}; - -// The UseParallelOldGC collector is a stop-the-world garbage -// collector that does parts of the collection using parallel threads. -// The collection includes the tenured generation and the young -// generation. The permanent generation is collected at the same -// time as the other two generations but the permanent generation -// is collect by a single GC thread. The permanent generation is -// collected serially because of the requirement that during the -// processing of a klass AAA, any objects reference by AAA must -// already have been processed. This requirement is enforced by -// a left (lower address) to right (higher address) sliding compaction. +// The UseParallelOldGC collector is a stop-the-world garbage collector that +// does parts of the collection using parallel threads. The collection includes +// the tenured generation and the young generation. The permanent generation is +// collected at the same time as the other two generations but the permanent +// generation is collect by a single GC thread. The permanent generation is +// collected serially because of the requirement that during the processing of a +// klass AAA, any objects reference by AAA must already have been processed. +// This requirement is enforced by a left (lower address) to right (higher +// address) sliding compaction. // // There are four phases of the collection. // @@ -740,81 +569,75 @@ // - move the objects to their destination // - update some references and reinitialize some variables // -// These three phases are invoked in PSParallelCompact::invoke_no_policy(). -// The marking phase is implemented in PSParallelCompact::marking_phase() -// and does a complete marking of the heap. -// The summary phase is implemented in PSParallelCompact::summary_phase(). -// The move and update phase is implemented in PSParallelCompact::compact(). +// These three phases are invoked in PSParallelCompact::invoke_no_policy(). The +// marking phase is implemented in PSParallelCompact::marking_phase() and does a +// complete marking of the heap. The summary phase is implemented in +// PSParallelCompact::summary_phase(). The move and update phase is implemented +// in PSParallelCompact::compact(). // -// A space that is being collected is divided into chunks and with -// each chunk is associated an object of type ParallelCompactData. -// Each chunk is of a fixed size and typically will contain more than -// 1 object and may have parts of objects at the front and back of the -// chunk. +// A space that is being collected is divided into regions and with each region +// is associated an object of type ParallelCompactData. Each region is of a +// fixed size and typically will contain more than 1 object and may have parts +// of objects at the front and back of the region. // -// chunk -----+---------------------+---------- +// region -----+---------------------+---------- // objects covered [ AAA )[ BBB )[ CCC )[ DDD ) // -// The marking phase does a complete marking of all live objects in the -// heap. The marking also compiles the size of the data for -// all live objects covered by the chunk. This size includes the -// part of any live object spanning onto the chunk (part of AAA -// if it is live) from the front, all live objects contained in the chunk -// (BBB and/or CCC if they are live), and the part of any live objects -// covered by the chunk that extends off the chunk (part of DDD if it is -// live). The marking phase uses multiple GC threads and marking is -// done in a bit array of type ParMarkBitMap. The marking of the -// bit map is done atomically as is the accumulation of the size of the -// live objects covered by a chunk. +// The marking phase does a complete marking of all live objects in the heap. +// The marking also compiles the size of the data for all live objects covered +// by the region. This size includes the part of any live object spanning onto +// the region (part of AAA if it is live) from the front, all live objects +// contained in the region (BBB and/or CCC if they are live), and the part of +// any live objects covered by the region that extends off the region (part of +// DDD if it is live). The marking phase uses multiple GC threads and marking +// is done in a bit array of type ParMarkBitMap. The marking of the bit map is +// done atomically as is the accumulation of the size of the live objects +// covered by a region. // -// The summary phase calculates the total live data to the left of -// each chunk XXX. Based on that total and the bottom of the space, -// it can calculate the starting location of the live data in XXX. -// The summary phase calculates for each chunk XXX quantites such as +// The summary phase calculates the total live data to the left of each region +// XXX. Based on that total and the bottom of the space, it can calculate the +// starting location of the live data in XXX. The summary phase calculates for +// each region XXX quantites such as // -// - the amount of live data at the beginning of a chunk from an object -// entering the chunk. -// - the location of the first live data on the chunk -// - a count of the number of chunks receiving live data from XXX. +// - the amount of live data at the beginning of a region from an object +// entering the region. +// - the location of the first live data on the region +// - a count of the number of regions receiving live data from XXX. // // See ParallelCompactData for precise details. The summary phase also -// calculates the dense prefix for the compaction. The dense prefix -// is a portion at the beginning of the space that is not moved. The -// objects in the dense prefix do need to have their object references -// updated. See method summarize_dense_prefix(). +// calculates the dense prefix for the compaction. The dense prefix is a +// portion at the beginning of the space that is not moved. The objects in the +// dense prefix do need to have their object references updated. See method +// summarize_dense_prefix(). // // The summary phase is done using 1 GC thread. // -// The compaction phase moves objects to their new location and updates -// all references in the object. +// The compaction phase moves objects to their new location and updates all +// references in the object. // -// A current exception is that objects that cross a chunk boundary -// are moved but do not have their references updated. References are -// not updated because it cannot easily be determined if the klass -// pointer KKK for the object AAA has been updated. KKK likely resides -// in a chunk to the left of the chunk containing AAA. These AAA's -// have there references updated at the end in a clean up phase. -// See the method PSParallelCompact::update_deferred_objects(). An -// alternate strategy is being investigated for this deferral of updating. +// A current exception is that objects that cross a region boundary are moved +// but do not have their references updated. References are not updated because +// it cannot easily be determined if the klass pointer KKK for the object AAA +// has been updated. KKK likely resides in a region to the left of the region +// containing AAA. These AAA's have there references updated at the end in a +// clean up phase. See the method PSParallelCompact::update_deferred_objects(). +// An alternate strategy is being investigated for this deferral of updating. // -// Compaction is done on a chunk basis. A chunk that is ready to be -// filled is put on a ready list and GC threads take chunk off the list -// and fill them. A chunk is ready to be filled if it -// empty of live objects. Such a chunk may have been initially -// empty (only contained -// dead objects) or may have had all its live objects copied out already. -// A chunk that compacts into itself is also ready for filling. The -// ready list is initially filled with empty chunks and chunks compacting -// into themselves. There is always at least 1 chunk that can be put on -// the ready list. The chunks are atomically added and removed from -// the ready list. -// +// Compaction is done on a region basis. A region that is ready to be filled is +// put on a ready list and GC threads take region off the list and fill them. A +// region is ready to be filled if it empty of live objects. Such a region may +// have been initially empty (only contained dead objects) or may have had all +// its live objects copied out already. A region that compacts into itself is +// also ready for filling. The ready list is initially filled with empty +// regions and regions compacting into themselves. There is always at least 1 +// region that can be put on the ready list. The regions are atomically added +// and removed from the ready list. + class PSParallelCompact : AllStatic { public: // Convenient access to type names. typedef ParMarkBitMap::idx_t idx_t; - typedef ParallelCompactData::ChunkData ChunkData; - typedef ParallelCompactData::BlockData BlockData; + typedef ParallelCompactData::RegionData RegionData; typedef enum { perm_space_id, old_space_id, eden_space_id, @@ -977,26 +800,26 @@ // not reclaimed). static double dead_wood_limiter(double density, size_t min_percent); - // Find the first (left-most) chunk in the range [beg, end) that has at least + // Find the first (left-most) region in the range [beg, end) that has at least // dead_words of dead space to the left. The argument beg must be the first - // chunk in the space that is not completely live. - static ChunkData* dead_wood_limit_chunk(const ChunkData* beg, - const ChunkData* end, - size_t dead_words); + // region in the space that is not completely live. + static RegionData* dead_wood_limit_region(const RegionData* beg, + const RegionData* end, + size_t dead_words); - // Return a pointer to the first chunk in the range [beg, end) that is not + // Return a pointer to the first region in the range [beg, end) that is not // completely full. - static ChunkData* first_dead_space_chunk(const ChunkData* beg, - const ChunkData* end); + static RegionData* first_dead_space_region(const RegionData* beg, + const RegionData* end); // Return a value indicating the benefit or 'yield' if the compacted region // were to start (or equivalently if the dense prefix were to end) at the - // candidate chunk. Higher values are better. + // candidate region. Higher values are better. // // The value is based on the amount of space reclaimed vs. the costs of (a) // updating references in the dense prefix plus (b) copying objects and // updating references in the compacted region. - static inline double reclaimed_ratio(const ChunkData* const candidate, + static inline double reclaimed_ratio(const RegionData* const candidate, HeapWord* const bottom, HeapWord* const top, HeapWord* const new_top); @@ -1005,9 +828,9 @@ static HeapWord* compute_dense_prefix(const SpaceId id, bool maximum_compaction); - // Return true if dead space crosses onto the specified Chunk; bit must be the - // bit index corresponding to the first word of the Chunk. - static inline bool dead_space_crosses_boundary(const ChunkData* chunk, + // Return true if dead space crosses onto the specified Region; bit must be + // the bit index corresponding to the first word of the Region. + static inline bool dead_space_crosses_boundary(const RegionData* region, idx_t bit); // Summary phase utility routine to fill dead space (if any) at the dense @@ -1019,12 +842,6 @@ static void summarize_space(SpaceId id, bool maximum_compaction); static void summary_phase(ParCompactionManager* cm, bool maximum_compaction); - static bool block_first_offset(size_t block_index, idx_t* block_offset_ptr); - - // Fill in the BlockData - static void summarize_blocks(ParCompactionManager* cm, - SpaceId first_compaction_space_id); - // The space that is compacted after space_id. static SpaceId next_compaction_space_id(SpaceId space_id); @@ -1038,16 +855,16 @@ static void compact_perm(ParCompactionManager* cm); static void compact(); - // Add available chunks to the stack and draining tasks to the task queue. - static void enqueue_chunk_draining_tasks(GCTaskQueue* q, - uint parallel_gc_threads); + // Add available regions to the stack and draining tasks to the task queue. + static void enqueue_region_draining_tasks(GCTaskQueue* q, + uint parallel_gc_threads); // Add dense prefix update tasks to the task queue. static void enqueue_dense_prefix_tasks(GCTaskQueue* q, uint parallel_gc_threads); - // Add chunk stealing tasks to the task queue. - static void enqueue_chunk_stealing_tasks( + // Add region stealing tasks to the task queue. + static void enqueue_region_stealing_tasks( GCTaskQueue* q, ParallelTaskTerminator* terminator_ptr, uint parallel_gc_threads); @@ -1154,56 +971,56 @@ // Move and update the live objects in the specified space. static void move_and_update(ParCompactionManager* cm, SpaceId space_id); - // Process the end of the given chunk range in the dense prefix. + // Process the end of the given region range in the dense prefix. // This includes saving any object not updated. - static void dense_prefix_chunks_epilogue(ParCompactionManager* cm, - size_t chunk_start_index, - size_t chunk_end_index, - idx_t exiting_object_offset, - idx_t chunk_offset_start, - idx_t chunk_offset_end); + static void dense_prefix_regions_epilogue(ParCompactionManager* cm, + size_t region_start_index, + size_t region_end_index, + idx_t exiting_object_offset, + idx_t region_offset_start, + idx_t region_offset_end); - // Update a chunk in the dense prefix. For each live object - // in the chunk, update it's interior references. For each + // Update a region in the dense prefix. For each live object + // in the region, update it's interior references. For each // dead object, fill it with deadwood. Dead space at the end - // of a chunk range will be filled to the start of the next - // live object regardless of the chunk_index_end. None of the + // of a region range will be filled to the start of the next + // live object regardless of the region_index_end. None of the // objects in the dense prefix move and dead space is dead // (holds only dead objects that don't need any processing), so // dead space can be filled in any order. static void update_and_deadwood_in_dense_prefix(ParCompactionManager* cm, SpaceId space_id, - size_t chunk_index_start, - size_t chunk_index_end); + size_t region_index_start, + size_t region_index_end); // Return the address of the count + 1st live word in the range [beg, end). static HeapWord* skip_live_words(HeapWord* beg, HeapWord* end, size_t count); // Return the address of the word to be copied to dest_addr, which must be - // aligned to a chunk boundary. + // aligned to a region boundary. static HeapWord* first_src_addr(HeapWord* const dest_addr, - size_t src_chunk_idx); + size_t src_region_idx); - // Determine the next source chunk, set closure.source() to the start of the - // new chunk return the chunk index. Parameter end_addr is the address one + // Determine the next source region, set closure.source() to the start of the + // new region return the region index. Parameter end_addr is the address one // beyond the end of source range just processed. If necessary, switch to a // new source space and set src_space_id (in-out parameter) and src_space_top // (out parameter) accordingly. - static size_t next_src_chunk(MoveAndUpdateClosure& closure, - SpaceId& src_space_id, - HeapWord*& src_space_top, - HeapWord* end_addr); + static size_t next_src_region(MoveAndUpdateClosure& closure, + SpaceId& src_space_id, + HeapWord*& src_space_top, + HeapWord* end_addr); - // Decrement the destination count for each non-empty source chunk in the - // range [beg_chunk, chunk(chunk_align_up(end_addr))). + // Decrement the destination count for each non-empty source region in the + // range [beg_region, region(region_align_up(end_addr))). static void decrement_destination_counts(ParCompactionManager* cm, - size_t beg_chunk, + size_t beg_region, HeapWord* end_addr); - // Fill a chunk, copying objects from one or more source chunks. - static void fill_chunk(ParCompactionManager* cm, size_t chunk_idx); - static void fill_and_update_chunk(ParCompactionManager* cm, size_t chunk) { - fill_chunk(cm, chunk); + // Fill a region, copying objects from one or more source regions. + static void fill_region(ParCompactionManager* cm, size_t region_idx); + static void fill_and_update_region(ParCompactionManager* cm, size_t region) { + fill_region(cm, region); } // Update the deferred objects in the space. @@ -1259,7 +1076,7 @@ #ifndef PRODUCT // Debugging support. static const char* space_names[last_space_id]; - static void print_chunk_ranges(); + static void print_region_ranges(); static void print_dense_prefix_stats(const char* const algorithm, const SpaceId id, const bool maximum_compaction, @@ -1267,7 +1084,7 @@ #endif // #ifndef PRODUCT #ifdef ASSERT - // Verify that all the chunks have been emptied. + // Verify that all the regions have been emptied. static void verify_complete(SpaceId space_id); #endif // #ifdef ASSERT }; @@ -1376,17 +1193,17 @@ } inline bool -PSParallelCompact::dead_space_crosses_boundary(const ChunkData* chunk, +PSParallelCompact::dead_space_crosses_boundary(const RegionData* region, idx_t bit) { - assert(bit > 0, "cannot call this for the first bit/chunk"); - assert(_summary_data.chunk_to_addr(chunk) == _mark_bitmap.bit_to_addr(bit), + assert(bit > 0, "cannot call this for the first bit/region"); + assert(_summary_data.region_to_addr(region) == _mark_bitmap.bit_to_addr(bit), "sanity check"); // Dead space crosses the boundary if (1) a partial object does not extend - // onto the chunk, (2) an object does not start at the beginning of the chunk, - // and (3) an object does not end at the end of the prior chunk. - return chunk->partial_obj_size() == 0 && + // onto the region, (2) an object does not start at the beginning of the + // region, and (3) an object does not end at the end of the prior region. + return region->partial_obj_size() == 0 && !_mark_bitmap.is_obj_beg(bit) && !_mark_bitmap.is_obj_end(bit - 1); } diff -r 99dd4bbd9eec -r b7483806cc49 src/share/vm/gc_implementation/parallelScavenge/psPermGen.cpp --- a/src/share/vm/gc_implementation/parallelScavenge/psPermGen.cpp Tue Sep 30 12:24:27 2008 -0400 +++ b/src/share/vm/gc_implementation/parallelScavenge/psPermGen.cpp Wed Oct 01 20:15:03 2008 -0400 @@ -123,8 +123,6 @@ void PSPermGen::precompact() { // Reset start array first. - debug_only(if (!UseParallelOldGC || !VerifyParallelOldWithMarkSweep) {) _start_array.reset(); - debug_only(}) object_mark_sweep()->precompact(); } diff -r 99dd4bbd9eec -r b7483806cc49 src/share/vm/gc_implementation/shared/immutableSpace.hpp --- a/src/share/vm/gc_implementation/shared/immutableSpace.hpp Tue Sep 30 12:24:27 2008 -0400 +++ b/src/share/vm/gc_implementation/shared/immutableSpace.hpp Wed Oct 01 20:15:03 2008 -0400 @@ -50,7 +50,8 @@ size_t capacity_in_bytes() const { return capacity_in_words() * HeapWordSize; } // Size computations. Sizes are in heapwords. - size_t capacity_in_words() const { return pointer_delta(end(), bottom()); } + size_t capacity_in_words() const { return pointer_delta(end(), bottom()); } + virtual size_t capacity_in_words(Thread*) const { return capacity_in_words(); } // Iteration. virtual void oop_iterate(OopClosure* cl); diff -r 99dd4bbd9eec -r b7483806cc49 src/share/vm/gc_implementation/shared/markSweep.inline.hpp --- a/src/share/vm/gc_implementation/shared/markSweep.inline.hpp Tue Sep 30 12:24:27 2008 -0400 +++ b/src/share/vm/gc_implementation/shared/markSweep.inline.hpp Wed Oct 01 20:15:03 2008 -0400 @@ -23,13 +23,6 @@ */ inline void MarkSweep::mark_object(oop obj) { -#ifndef SERIALGC - if (UseParallelOldGC && VerifyParallelOldWithMarkSweep) { - assert(PSParallelCompact::mark_bitmap()->is_marked(obj), - "Should be marked in the marking bitmap"); - } -#endif // SERIALGC - // some marks may contain information we need to preserve so we store them away // and overwrite the mark. We'll restore it at the end of markSweep. markOop mark = obj->mark(); diff -r 99dd4bbd9eec -r b7483806cc49 src/share/vm/gc_implementation/shared/mutableNUMASpace.cpp --- a/src/share/vm/gc_implementation/shared/mutableNUMASpace.cpp Tue Sep 30 12:24:27 2008 -0400 +++ b/src/share/vm/gc_implementation/shared/mutableNUMASpace.cpp Wed Oct 01 20:15:03 2008 -0400 @@ -181,6 +181,25 @@ return lgrp_spaces()->at(i)->space()->free_in_bytes(); } + +size_t MutableNUMASpace::capacity_in_words(Thread* thr) const { + guarantee(thr != NULL, "No thread"); + int lgrp_id = thr->lgrp_id(); + if (lgrp_id == -1) { + if (lgrp_spaces()->length() > 0) { + return capacity_in_words() / lgrp_spaces()->length(); + } else { + assert(false, "There should be at least one locality group"); + return 0; + } + } + int i = lgrp_spaces()->find(&lgrp_id, LGRPSpace::equals); + if (i == -1) { + return 0; + } + return lgrp_spaces()->at(i)->space()->capacity_in_words(); +} + // Check if the NUMA topology has changed. Add and remove spaces if needed. // The update can be forced by setting the force parameter equal to true. bool MutableNUMASpace::update_layout(bool force) { @@ -722,7 +741,8 @@ i = os::random() % lgrp_spaces()->length(); } - MutableSpace *s = lgrp_spaces()->at(i)->space(); + LGRPSpace* ls = lgrp_spaces()->at(i); + MutableSpace *s = ls->space(); HeapWord *p = s->allocate(size); if (p != NULL) { @@ -743,6 +763,9 @@ *(int*)i = 0; } } + if (p == NULL) { + ls->set_allocation_failed(); + } return p; } @@ -761,7 +784,8 @@ if (i == -1) { i = os::random() % lgrp_spaces()->length(); } - MutableSpace *s = lgrp_spaces()->at(i)->space(); + LGRPSpace *ls = lgrp_spaces()->at(i); + MutableSpace *s = ls->space(); HeapWord *p = s->cas_allocate(size); if (p != NULL) { size_t remainder = pointer_delta(s->end(), p + size); @@ -790,6 +814,9 @@ *(int*)i = 0; } } + if (p == NULL) { + ls->set_allocation_failed(); + } return p; } diff -r 99dd4bbd9eec -r b7483806cc49 src/share/vm/gc_implementation/shared/mutableNUMASpace.hpp --- a/src/share/vm/gc_implementation/shared/mutableNUMASpace.hpp Tue Sep 30 12:24:27 2008 -0400 +++ b/src/share/vm/gc_implementation/shared/mutableNUMASpace.hpp Wed Oct 01 20:15:03 2008 -0400 @@ -60,6 +60,7 @@ MutableSpace* _space; MemRegion _invalid_region; AdaptiveWeightedAverage *_alloc_rate; + bool _allocation_failed; struct SpaceStats { size_t _local_space, _remote_space, _unbiased_space, _uncommited_space; @@ -81,7 +82,7 @@ char* last_page_scanned() { return _last_page_scanned; } void set_last_page_scanned(char* p) { _last_page_scanned = p; } public: - LGRPSpace(int l) : _lgrp_id(l), _last_page_scanned(NULL) { + LGRPSpace(int l) : _lgrp_id(l), _last_page_scanned(NULL), _allocation_failed(false) { _space = new MutableSpace(); _alloc_rate = new AdaptiveWeightedAverage(NUMAChunkResizeWeight); } @@ -103,8 +104,21 @@ return *(int*)lgrp_id_value == p->lgrp_id(); } + // Report a failed allocation. + void set_allocation_failed() { _allocation_failed = true; } + void sample() { - alloc_rate()->sample(space()->used_in_bytes()); + // If there was a failed allocation make allocation rate equal + // to the size of the whole chunk. This ensures the progress of + // the adaptation process. + size_t alloc_rate_sample; + if (_allocation_failed) { + alloc_rate_sample = space()->capacity_in_bytes(); + _allocation_failed = false; + } else { + alloc_rate_sample = space()->used_in_bytes(); + } + alloc_rate()->sample(alloc_rate_sample); } MemRegion invalid_region() const { return _invalid_region; } @@ -190,6 +204,9 @@ virtual void ensure_parsability(); virtual size_t used_in_words() const; virtual size_t free_in_words() const; + + using MutableSpace::capacity_in_words; + virtual size_t capacity_in_words(Thread* thr) const; virtual size_t tlab_capacity(Thread* thr) const; virtual size_t unsafe_max_tlab_alloc(Thread* thr) const; diff -r 99dd4bbd9eec -r b7483806cc49 src/share/vm/includeDB_compiler2 --- a/src/share/vm/includeDB_compiler2 Tue Sep 30 12:24:27 2008 -0400 +++ b/src/share/vm/includeDB_compiler2 Wed Oct 01 20:15:03 2008 -0400 @@ -586,6 +586,7 @@ loopTransform.cpp addnode.hpp loopTransform.cpp allocation.inline.hpp loopTransform.cpp connode.hpp +loopTransform.cpp compileLog.hpp loopTransform.cpp divnode.hpp loopTransform.cpp loopnode.hpp loopTransform.cpp mulnode.hpp @@ -601,6 +602,7 @@ loopnode.cpp allocation.inline.hpp loopnode.cpp callnode.hpp loopnode.cpp ciMethodData.hpp +loopnode.cpp compileLog.hpp loopnode.cpp connode.hpp loopnode.cpp divnode.hpp loopnode.cpp loopnode.hpp diff -r 99dd4bbd9eec -r b7483806cc49 src/share/vm/opto/bytecodeInfo.cpp --- a/src/share/vm/opto/bytecodeInfo.cpp Tue Sep 30 12:24:27 2008 -0400 +++ b/src/share/vm/opto/bytecodeInfo.cpp Wed Oct 01 20:15:03 2008 -0400 @@ -25,19 +25,6 @@ #include "incls/_precompiled.incl" #include "incls/_bytecodeInfo.cpp.incl" -// These variables are declared in parse1.cpp -extern int explicit_null_checks_inserted; -extern int explicit_null_checks_elided; -extern int explicit_null_checks_inserted_old; -extern int explicit_null_checks_elided_old; -extern int nodes_created_old; -extern int nodes_created; -extern int methods_parsed_old; -extern int methods_parsed; -extern int methods_seen; -extern int methods_seen_old; - - //============================================================================= //------------------------------InlineTree------------------------------------- InlineTree::InlineTree( Compile* c, const InlineTree *caller_tree, ciMethod* callee, JVMState* caller_jvms, int caller_bci, float site_invoke_ratio ) @@ -517,27 +504,3 @@ } return iltp; } - -// ---------------------------------------------------------------------------- -#ifndef PRODUCT - -static void per_method_stats() { - // Compute difference between this method's cumulative totals and old totals - int explicit_null_checks_cur = explicit_null_checks_inserted - explicit_null_checks_inserted_old; - int elided_null_checks_cur = explicit_null_checks_elided - explicit_null_checks_elided_old; - - // Print differences - if( explicit_null_checks_cur ) - tty->print_cr("XXX Explicit NULL checks inserted: %d", explicit_null_checks_cur); - if( elided_null_checks_cur ) - tty->print_cr("XXX Explicit NULL checks removed at parse time: %d", elided_null_checks_cur); - - // Store the current cumulative totals - nodes_created_old = nodes_created; - methods_parsed_old = methods_parsed; - methods_seen_old = methods_seen; - explicit_null_checks_inserted_old = explicit_null_checks_inserted; - explicit_null_checks_elided_old = explicit_null_checks_elided; -} - -#endif diff -r 99dd4bbd9eec -r b7483806cc49 src/share/vm/opto/callnode.cpp --- a/src/share/vm/opto/callnode.cpp Tue Sep 30 12:24:27 2008 -0400 +++ b/src/share/vm/opto/callnode.cpp Wed Oct 01 20:15:03 2008 -0400 @@ -1034,6 +1034,39 @@ //============================================================================= uint AllocateArrayNode::size_of() const { return sizeof(*this); } +// Retrieve the length from the AllocateArrayNode. Narrow the type with a +// CastII, if appropriate. If we are not allowed to create new nodes, and +// a CastII is appropriate, return NULL. +Node *AllocateArrayNode::make_ideal_length(const TypeOopPtr* oop_type, PhaseTransform *phase, bool allow_new_nodes) { + Node *length = in(AllocateNode::ALength); + assert(length != NULL, "length is not null"); + + const TypeInt* length_type = phase->find_int_type(length); + const TypeAryPtr* ary_type = oop_type->isa_aryptr(); + + if (ary_type != NULL && length_type != NULL) { + const TypeInt* narrow_length_type = ary_type->narrow_size_type(length_type); + if (narrow_length_type != length_type) { + // Assert one of: + // - the narrow_length is 0 + // - the narrow_length is not wider than length + assert(narrow_length_type == TypeInt::ZERO || + (narrow_length_type->_hi <= length_type->_hi && + narrow_length_type->_lo >= length_type->_lo), + "narrow type must be narrower than length type"); + + // Return NULL if new nodes are not allowed + if (!allow_new_nodes) return NULL; + // Create a cast which is control dependent on the initialization to + // propagate the fact that the array length must be positive. + length = new (phase->C, 2) CastIINode(length, narrow_length_type); + length->set_req(0, initialization()->proj_out(0)); + } + } + + return length; +} + //============================================================================= uint LockNode::size_of() const { return sizeof(*this); } diff -r 99dd4bbd9eec -r b7483806cc49 src/share/vm/opto/callnode.hpp --- a/src/share/vm/opto/callnode.hpp Tue Sep 30 12:24:27 2008 -0400 +++ b/src/share/vm/opto/callnode.hpp Wed Oct 01 20:15:03 2008 -0400 @@ -755,6 +755,15 @@ virtual int Opcode() const; virtual uint size_of() const; // Size is bigger + // Dig the length operand out of a array allocation site. + Node* Ideal_length() { + return in(AllocateNode::ALength); + } + + // Dig the length operand out of a array allocation site and narrow the + // type with a CastII, if necesssary + Node* make_ideal_length(const TypeOopPtr* ary_type, PhaseTransform *phase, bool can_create = true); + // Pattern-match a possible usage of AllocateArrayNode. // Return null if no allocation is recognized. static AllocateArrayNode* Ideal_array_allocation(Node* ptr, PhaseTransform* phase) { @@ -762,12 +771,6 @@ return (allo == NULL || !allo->is_AllocateArray()) ? NULL : allo->as_AllocateArray(); } - - // Dig the length operand out of a (possible) array allocation site. - static Node* Ideal_length(Node* ptr, PhaseTransform* phase) { - AllocateArrayNode* allo = Ideal_array_allocation(ptr, phase); - return (allo == NULL) ? NULL : allo->in(AllocateNode::ALength); - } }; //------------------------------AbstractLockNode----------------------------------- diff -r 99dd4bbd9eec -r b7483806cc49 src/share/vm/opto/cfgnode.cpp --- a/src/share/vm/opto/cfgnode.cpp Tue Sep 30 12:24:27 2008 -0400 +++ b/src/share/vm/opto/cfgnode.cpp Wed Oct 01 20:15:03 2008 -0400 @@ -1665,7 +1665,11 @@ // compress paths and change unreachable cycles to TOP // If not, we can update the input infinitely along a MergeMem cycle // Equivalent code is in MemNode::Ideal_common - Node *m = phase->transform(n); + Node *m = phase->transform(n); + if (outcnt() == 0) { // Above transform() may kill us! + progress = phase->C->top(); + break; + } // If tranformed to a MergeMem, get the desired slice // Otherwise the returned node represents memory for every slice Node *new_mem = (m->is_MergeMem()) ? @@ -1765,6 +1769,51 @@ } } +#ifdef _LP64 + // Push DecodeN down through phi. + // The rest of phi graph will transform by split EncodeP node though phis up. + if (UseCompressedOops && can_reshape && progress == NULL) { + bool may_push = true; + bool has_decodeN = false; + Node* in_decodeN = NULL; + for (uint i=1; iis_DecodeN() && ii->bottom_type() == bottom_type()) { + has_decodeN = true; + in_decodeN = ii->in(1); + } else if (!ii->is_Phi()) { + may_push = false; + } + } + + if (has_decodeN && may_push) { + PhaseIterGVN *igvn = phase->is_IterGVN(); + // Note: in_decodeN is used only to define the type of new phi here. + PhiNode *new_phi = PhiNode::make_blank(in(0), in_decodeN); + uint orig_cnt = req(); + for (uint i=1; iis_DecodeN()) { + assert(ii->bottom_type() == bottom_type(), "sanity"); + new_ii = ii->in(1); + } else { + assert(ii->is_Phi(), "sanity"); + if (ii->as_Phi() == this) { + new_ii = new_phi; + } else { + new_ii = new (phase->C, 2) EncodePNode(ii, in_decodeN->bottom_type()); + igvn->register_new_node_with_optimizer(new_ii); + } + } + new_phi->set_req(i, new_ii); + } + igvn->register_new_node_with_optimizer(new_phi, this); + progress = new (phase->C, 2) DecodeNNode(new_phi, bottom_type()); + } + } +#endif + return progress; // Return any progress } diff -r 99dd4bbd9eec -r b7483806cc49 src/share/vm/opto/compile.cpp --- a/src/share/vm/opto/compile.cpp Tue Sep 30 12:24:27 2008 -0400 +++ b/src/share/vm/opto/compile.cpp Wed Oct 01 20:15:03 2008 -0400 @@ -467,6 +467,7 @@ } } set_print_assembly(print_opto_assembly); + set_parsed_irreducible_loop(false); #endif if (ProfileTraps) { @@ -550,6 +551,8 @@ rethrow_exceptions(kit.transfer_exceptions_into_jvms()); } + print_method("Before RemoveUseless"); + // Remove clutter produced by parsing. if (!failing()) { ResourceMark rm; @@ -615,8 +618,6 @@ if (failing()) return; NOT_PRODUCT( verify_graph_edges(); ) - print_method("Before Matching"); - #ifndef PRODUCT if (PrintIdeal) { ttyLocker ttyl; // keep the following output all in one block @@ -720,6 +721,7 @@ TraceTime t1(NULL, &_t_totalCompilation, TimeCompiler, false); TraceTime t2(NULL, &_t_stubCompilation, TimeCompiler, false); set_print_assembly(PrintFrameConverterAssembly); + set_parsed_irreducible_loop(false); #endif CompileWrapper cw(this); Init(/*AliasLevel=*/ 0); @@ -2073,6 +2075,44 @@ } #ifdef _LP64 + case Op_CastPP: + if (n->in(1)->is_DecodeN() && UseImplicitNullCheckForNarrowOop) { + Compile* C = Compile::current(); + Node* in1 = n->in(1); + const Type* t = n->bottom_type(); + Node* new_in1 = in1->clone(); + new_in1->as_DecodeN()->set_type(t); + + if (!Matcher::clone_shift_expressions) { + // + // x86, ARM and friends can handle 2 adds in addressing mode + // and Matcher can fold a DecodeN node into address by using + // a narrow oop directly and do implicit NULL check in address: + // + // [R12 + narrow_oop_reg<<3 + offset] + // NullCheck narrow_oop_reg + // + // On other platforms (Sparc) we have to keep new DecodeN node and + // use it to do implicit NULL check in address: + // + // decode_not_null narrow_oop_reg, base_reg + // [base_reg + offset] + // NullCheck base_reg + // + // Pin the new DecodeN node to non-null path on these patforms (Sparc) + // to keep the information to which NULL check the new DecodeN node + // corresponds to use it as value in implicit_null_check(). + // + new_in1->set_req(0, n->in(0)); + } + + n->subsume_by(new_in1); + if (in1->outcnt() == 0) { + in1->disconnect_inputs(NULL); + } + } + break; + case Op_CmpP: // Do this transformation here to preserve CmpPNode::sub() and // other TypePtr related Ideal optimizations (for example, ptr nullness). @@ -2092,24 +2132,44 @@ } else if (in2->Opcode() == Op_ConP) { const Type* t = in2->bottom_type(); if (t == TypePtr::NULL_PTR && UseImplicitNullCheckForNarrowOop) { - if (Matcher::clone_shift_expressions) { - // x86, ARM and friends can handle 2 adds in addressing mode. - // Decode a narrow oop and do implicit NULL check in address - // [R12 + narrow_oop_reg<<3 + offset] - new_in2 = ConNode::make(C, TypeNarrowOop::NULL_PTR); - } else { - // Don't replace CmpP(o ,null) if 'o' is used in AddP - // to generate implicit NULL check on Sparc where - // narrow oops can't be used in address. - uint i = 0; - for (; i < in1->outcnt(); i++) { - if (in1->raw_out(i)->is_AddP()) - break; - } - if (i >= in1->outcnt()) { - new_in2 = ConNode::make(C, TypeNarrowOop::NULL_PTR); - } - } + new_in2 = ConNode::make(C, TypeNarrowOop::NULL_PTR); + // + // This transformation together with CastPP transformation above + // will generated code for implicit NULL checks for compressed oops. + // + // The original code after Optimize() + // + // LoadN memory, narrow_oop_reg + // decode narrow_oop_reg, base_reg + // CmpP base_reg, NULL + // CastPP base_reg // NotNull + // Load [base_reg + offset], val_reg + // + // after these transformations will be + // + // LoadN memory, narrow_oop_reg + // CmpN narrow_oop_reg, NULL + // decode_not_null narrow_oop_reg, base_reg + // Load [base_reg + offset], val_reg + // + // and the uncommon path (== NULL) will use narrow_oop_reg directly + // since narrow oops can be used in debug info now (see the code in + // final_graph_reshaping_walk()). + // + // At the end the code will be matched to + // on x86: + // + // Load_narrow_oop memory, narrow_oop_reg + // Load [R12 + narrow_oop_reg<<3 + offset], val_reg + // NullCheck narrow_oop_reg + // + // and on sparc: + // + // Load_narrow_oop memory, narrow_oop_reg + // decode_not_null narrow_oop_reg, base_reg + // Load [base_reg + offset], val_reg + // NullCheck base_reg + // } else if (t->isa_oopptr()) { new_in2 = ConNode::make(C, t->make_narrowoop()); } @@ -2126,6 +2186,49 @@ } } break; + + case Op_DecodeN: + assert(!n->in(1)->is_EncodeP(), "should be optimized out"); + break; + + case Op_EncodeP: { + Node* in1 = n->in(1); + if (in1->is_DecodeN()) { + n->subsume_by(in1->in(1)); + } else if (in1->Opcode() == Op_ConP) { + Compile* C = Compile::current(); + const Type* t = in1->bottom_type(); + if (t == TypePtr::NULL_PTR) { + n->subsume_by(ConNode::make(C, TypeNarrowOop::NULL_PTR)); + } else if (t->isa_oopptr()) { + n->subsume_by(ConNode::make(C, t->make_narrowoop())); + } + } + if (in1->outcnt() == 0) { + in1->disconnect_inputs(NULL); + } + break; + } + + case Op_Phi: + if (n->as_Phi()->bottom_type()->isa_narrowoop()) { + // The EncodeP optimization may create Phi with the same edges + // for all paths. It is not handled well by Register Allocator. + Node* unique_in = n->in(1); + assert(unique_in != NULL, ""); + uint cnt = n->req(); + for (uint i = 2; i < cnt; i++) { + Node* m = n->in(i); + assert(m != NULL, ""); + if (unique_in != m) + unique_in = NULL; + } + if (unique_in != NULL) { + n->subsume_by(unique_in); + } + } + break; + #endif case Op_ModI: diff -r 99dd4bbd9eec -r b7483806cc49 src/share/vm/opto/compile.hpp --- a/src/share/vm/opto/compile.hpp Tue Sep 30 12:24:27 2008 -0400 +++ b/src/share/vm/opto/compile.hpp Wed Oct 01 20:15:03 2008 -0400 @@ -160,6 +160,7 @@ bool _print_assembly; // True if we should dump assembly code for this compilation #ifndef PRODUCT bool _trace_opto_output; + bool _parsed_irreducible_loop; // True if ciTypeFlow detected irreducible loops during parsing #endif // Compilation environment. @@ -319,6 +320,8 @@ } #ifndef PRODUCT bool trace_opto_output() const { return _trace_opto_output; } + bool parsed_irreducible_loop() const { return _parsed_irreducible_loop; } + void set_parsed_irreducible_loop(bool z) { _parsed_irreducible_loop = z; } #endif void begin_method() { diff -r 99dd4bbd9eec -r b7483806cc49 src/share/vm/opto/connode.cpp --- a/src/share/vm/opto/connode.cpp Tue Sep 30 12:24:27 2008 -0400 +++ b/src/share/vm/opto/connode.cpp Wed Oct 01 20:15:03 2008 -0400 @@ -433,8 +433,8 @@ // If not converting int->oop, throw away cast after constant propagation Node *CastPPNode::Ideal_DU_postCCP( PhaseCCP *ccp ) { const Type *t = ccp->type(in(1)); - if (!t->isa_oop_ptr()) { - return NULL; // do not transform raw pointers + if (!t->isa_oop_ptr() || in(1)->is_DecodeN()) { + return NULL; // do not transform raw pointers or narrow oops } return ConstraintCastNode::Ideal_DU_postCCP(ccp); } diff -r 99dd4bbd9eec -r b7483806cc49 src/share/vm/opto/doCall.cpp --- a/src/share/vm/opto/doCall.cpp Tue Sep 30 12:24:27 2008 -0400 +++ b/src/share/vm/opto/doCall.cpp Wed Oct 01 20:15:03 2008 -0400 @@ -795,7 +795,7 @@ ciInstanceKlass *ikl = receiver_type->klass()->as_instance_klass(); if (ikl->is_loaded() && ikl->is_initialized() && !ikl->is_interface() && - (ikl == actual_receiver || ikl->is_subclass_of(actual_receiver))) { + (ikl == actual_receiver || ikl->is_subtype_of(actual_receiver))) { // ikl is a same or better type than the original actual_receiver, // e.g. static receiver from bytecodes. actual_receiver = ikl; diff -r 99dd4bbd9eec -r b7483806cc49 src/share/vm/opto/graphKit.cpp --- a/src/share/vm/opto/graphKit.cpp Tue Sep 30 12:24:27 2008 -0400 +++ b/src/share/vm/opto/graphKit.cpp Wed Oct 01 20:15:03 2008 -0400 @@ -587,7 +587,7 @@ #ifdef ASSERT _bci = kit->bci(); Parse* parser = kit->is_Parse(); - int block = (parser == NULL || parser->block() == NULL) ? -1 : parser->block()->pre_order(); + int block = (parser == NULL || parser->block() == NULL) ? -1 : parser->block()->rpo(); _block = block; #endif } @@ -596,7 +596,7 @@ #ifdef ASSERT assert(kit->bci() == _bci, "bci must not shift"); Parse* parser = kit->is_Parse(); - int block = (parser == NULL || parser->block() == NULL) ? -1 : parser->block()->pre_order(); + int block = (parser == NULL || parser->block() == NULL) ? -1 : parser->block()->rpo(); assert(block == _block, "block must not shift"); #endif kit->set_map(_map); @@ -1049,10 +1049,19 @@ //-------------------------load_array_length----------------------------------- Node* GraphKit::load_array_length(Node* array) { // Special-case a fresh allocation to avoid building nodes: - Node* alen = AllocateArrayNode::Ideal_length(array, &_gvn); - if (alen != NULL) return alen; - Node *r_adr = basic_plus_adr(array, arrayOopDesc::length_offset_in_bytes()); - return _gvn.transform( new (C, 3) LoadRangeNode(0, immutable_memory(), r_adr, TypeInt::POS)); + AllocateArrayNode* alloc = AllocateArrayNode::Ideal_array_allocation(array, &_gvn); + Node *alen; + if (alloc == NULL) { + Node *r_adr = basic_plus_adr(array, arrayOopDesc::length_offset_in_bytes()); + alen = _gvn.transform( new (C, 3) LoadRangeNode(0, immutable_memory(), r_adr, TypeInt::POS)); + } else { + alen = alloc->Ideal_length(); + Node* ccast = alloc->make_ideal_length(_gvn.type(array)->is_aryptr(), &_gvn); + if (ccast != alen) { + alen = _gvn.transform(ccast); + } + } + return alen; } //------------------------------do_null_check---------------------------------- @@ -2847,20 +2856,18 @@ assert(just_allocated_object(control()) == javaoop, "just allocated"); #ifdef ASSERT - { // Verify that the AllocateNode::Ideal_foo recognizers work: - Node* kn = alloc->in(AllocateNode::KlassNode); - Node* ln = alloc->in(AllocateNode::ALength); - assert(AllocateNode::Ideal_klass(rawoop, &_gvn) == kn, - "Ideal_klass works"); - assert(AllocateNode::Ideal_klass(javaoop, &_gvn) == kn, - "Ideal_klass works"); + { // Verify that the AllocateNode::Ideal_allocation recognizers work: + assert(AllocateNode::Ideal_allocation(rawoop, &_gvn) == alloc, + "Ideal_allocation works"); + assert(AllocateNode::Ideal_allocation(javaoop, &_gvn) == alloc, + "Ideal_allocation works"); if (alloc->is_AllocateArray()) { - assert(AllocateArrayNode::Ideal_length(rawoop, &_gvn) == ln, - "Ideal_length works"); - assert(AllocateArrayNode::Ideal_length(javaoop, &_gvn) == ln, - "Ideal_length works"); + assert(AllocateArrayNode::Ideal_array_allocation(rawoop, &_gvn) == alloc->as_AllocateArray(), + "Ideal_allocation works"); + assert(AllocateArrayNode::Ideal_array_allocation(javaoop, &_gvn) == alloc->as_AllocateArray(), + "Ideal_allocation works"); } else { - assert(ln->is_top(), "no length, please"); + assert(alloc->in(AllocateNode::ALength)->is_top(), "no length, please"); } } #endif //ASSERT @@ -3109,25 +3116,20 @@ // (This happens via a non-constant argument to inline_native_newArray.) // In any case, the value of klass_node provides the desired array type. const TypeInt* length_type = _gvn.find_int_type(length); - const TypeInt* narrow_length_type = NULL; const TypeOopPtr* ary_type = _gvn.type(klass_node)->is_klassptr()->as_instance_type(); if (ary_type->isa_aryptr() && length_type != NULL) { // Try to get a better type than POS for the size ary_type = ary_type->is_aryptr()->cast_to_size(length_type); - narrow_length_type = ary_type->is_aryptr()->size(); - if (narrow_length_type == length_type) - narrow_length_type = NULL; } Node* javaoop = set_output_for_allocation(alloc, ary_type, raw_mem_only); - // Cast length on remaining path to be positive: - if (narrow_length_type != NULL) { - Node* ccast = new (C, 2) CastIINode(length, narrow_length_type); - ccast->set_req(0, control()); - _gvn.set_type_bottom(ccast); - record_for_igvn(ccast); - if (map()->find_edge(length) >= 0) { + // Cast length on remaining path to be as narrow as possible + if (map()->find_edge(length) >= 0) { + Node* ccast = alloc->make_ideal_length(ary_type, &_gvn); + if (ccast != length) { + _gvn.set_type_bottom(ccast); + record_for_igvn(ccast); replace_in_map(length, ccast); } } diff -r 99dd4bbd9eec -r b7483806cc49 src/share/vm/opto/ifg.cpp --- a/src/share/vm/opto/ifg.cpp Tue Sep 30 12:24:27 2008 -0400 +++ b/src/share/vm/opto/ifg.cpp Wed Oct 01 20:15:03 2008 -0400 @@ -485,8 +485,9 @@ // Liveout things are presumed live for the whole block. We accumulate // 'area' accordingly. If they get killed in the block, we'll subtract // the unused part of the block from the area. - double cost = b->_freq * double(last_inst-last_phi); - assert( cost >= 0, "negative spill cost" ); + int inst_count = last_inst - last_phi; + double cost = (inst_count <= 0) ? 0.0 : b->_freq * double(inst_count); + assert(!(cost < 0.0), "negative spill cost" ); IndexSetIterator elements(&liveout); uint lidx; while ((lidx = elements.next()) != 0) { @@ -590,7 +591,7 @@ } else { // Else it is live // A DEF also ends 'area' partway through the block. lrgs(r)._area -= cost; - assert( lrgs(r)._area >= 0, "negative spill area" ); + assert(!(lrgs(r)._area < 0.0), "negative spill area" ); // Insure high score for immediate-use spill copies so they get a color if( n->is_SpillCopy() @@ -703,8 +704,9 @@ } // End of if normal register-allocated value - cost -= b->_freq; // Area remaining in the block - if( cost < 0.0 ) cost = 0.0; // Cost goes negative in the Phi area + // Area remaining in the block + inst_count--; + cost = (inst_count <= 0) ? 0.0 : b->_freq * double(inst_count); // Make all inputs live if( !n->is_Phi() ) { // Phi function uses come from prior block @@ -751,7 +753,7 @@ assert( pressure[0] == count_int_pressure (&liveout), "" ); assert( pressure[1] == count_float_pressure(&liveout), "" ); } - assert( lrg._area >= 0, "negative spill area" ); + assert(!(lrg._area < 0.0), "negative spill area" ); } } } // End of reverse pass over all instructions in block diff -r 99dd4bbd9eec -r b7483806cc49 src/share/vm/opto/loopTransform.cpp --- a/src/share/vm/opto/loopTransform.cpp Tue Sep 30 12:24:27 2008 -0400 +++ b/src/share/vm/opto/loopTransform.cpp Wed Oct 01 20:15:03 2008 -0400 @@ -1012,6 +1012,8 @@ if (!has_ctrl(old)) set_loop(nnn, loop); } + + loop->record_for_igvn(); } //------------------------------do_maximally_unroll---------------------------- diff -r 99dd4bbd9eec -r b7483806cc49 src/share/vm/opto/loopnode.cpp --- a/src/share/vm/opto/loopnode.cpp Tue Sep 30 12:24:27 2008 -0400 +++ b/src/share/vm/opto/loopnode.cpp Wed Oct 01 20:15:03 2008 -0400 @@ -1279,7 +1279,7 @@ // Visit all children, looking for Phis for (DUIterator i = cl->outs(); cl->has_out(i); i++) { Node *out = cl->out(i); - if (!out->is_Phi()) continue; // Looking for phis + if (!out->is_Phi() || out == phi) continue; // Looking for other phis PhiNode* phi2 = out->as_Phi(); Node *incr2 = phi2->in( LoopNode::LoopBackControl ); // Look for induction variables of the form: X += constant @@ -1388,6 +1388,37 @@ #endif +static void log_loop_tree(IdealLoopTree* root, IdealLoopTree* loop, CompileLog* log) { + if (loop == root) { + if (loop->_child != NULL) { + log->begin_head("loop_tree"); + log->end_head(); + if( loop->_child ) log_loop_tree(root, loop->_child, log); + log->tail("loop_tree"); + assert(loop->_next == NULL, "what?"); + } + } else { + Node* head = loop->_head; + log->begin_head("loop"); + log->print(" idx='%d' ", head->_idx); + if (loop->_irreducible) log->print("irreducible='1' "); + if (head->is_Loop()) { + if (head->as_Loop()->is_inner_loop()) log->print("inner_loop='1' "); + if (head->as_Loop()->is_partial_peel_loop()) log->print("partial_peel_loop='1' "); + } + if (head->is_CountedLoop()) { + CountedLoopNode* cl = head->as_CountedLoop(); + if (cl->is_pre_loop()) log->print("pre_loop='%d' ", cl->main_idx()); + if (cl->is_main_loop()) log->print("main_loop='%d' ", cl->_idx); + if (cl->is_post_loop()) log->print("post_loop='%d' ", cl->main_idx()); + } + log->end_head(); + if( loop->_child ) log_loop_tree(root, loop->_child, log); + log->tail("loop"); + if( loop->_next ) log_loop_tree(root, loop->_next, log); + } +} + //============================================================================= //------------------------------PhaseIdealLoop--------------------------------- // Create a PhaseLoop. Build the ideal Loop tree. Map each Ideal Node to @@ -1624,10 +1655,13 @@ // Cleanup any modified bits _igvn.optimize(); - // Do not repeat loop optimizations if irreducible loops are present - // by claiming no-progress. - if( _has_irreducible_loops ) - C->clear_major_progress(); + // disable assert until issue with split_flow_path is resolved (6742111) + // assert(!_has_irreducible_loops || C->parsed_irreducible_loop() || C->is_osr_compilation(), + // "shouldn't introduce irreducible loops"); + + if (C->log() != NULL) { + log_loop_tree(_ltree_root, _ltree_root, C->log()); + } } #ifndef PRODUCT @@ -2732,11 +2766,7 @@ } void PhaseIdealLoop::dump( IdealLoopTree *loop, uint idx, Node_List &rpo_list ) const { - - // Indent by loop nesting depth - for( uint x = 0; x < loop->_nest; x++ ) - tty->print(" "); - tty->print_cr("---- Loop N%d-N%d ----", loop->_head->_idx,loop->_tail->_idx); + loop->dump_head(); // Now scan for CFG nodes in the same loop for( uint j=idx; j > 0; j-- ) { diff -r 99dd4bbd9eec -r b7483806cc49 src/share/vm/opto/loopnode.hpp --- a/src/share/vm/opto/loopnode.hpp Tue Sep 30 12:24:27 2008 -0400 +++ b/src/share/vm/opto/loopnode.hpp Wed Oct 01 20:15:03 2008 -0400 @@ -192,6 +192,8 @@ int is_main_no_pre_loop() const { return _loop_flags & Main_Has_No_Pre_Loop; } void set_main_no_pre_loop() { _loop_flags |= Main_Has_No_Pre_Loop; } + int main_idx() const { return _main_idx; } + void set_pre_loop (CountedLoopNode *main) { assert(is_normal_loop(),""); _loop_flags |= Pre ; _main_idx = main->_idx; } void set_main_loop ( ) { assert(is_normal_loop(),""); _loop_flags |= Main; } diff -r 99dd4bbd9eec -r b7483806cc49 src/share/vm/opto/loopopts.cpp --- a/src/share/vm/opto/loopopts.cpp Tue Sep 30 12:24:27 2008 -0400 +++ b/src/share/vm/opto/loopopts.cpp Wed Oct 01 20:15:03 2008 -0400 @@ -2667,6 +2667,10 @@ // Fix this by adjusting to use the post-increment trip counter. Node *phi = cl->phi(); if( !phi ) return; // Dead infinite loop + + // Shape messed up, probably by iteration_split_impl + if (phi->in(LoopNode::LoopBackControl) != cl->incr()) return; + bool progress = true; while (progress) { progress = false; diff -r 99dd4bbd9eec -r b7483806cc49 src/share/vm/opto/matcher.cpp --- a/src/share/vm/opto/matcher.cpp Tue Sep 30 12:24:27 2008 -0400 +++ b/src/share/vm/opto/matcher.cpp Wed Oct 01 20:15:03 2008 -0400 @@ -273,7 +273,7 @@ find_shared( C->root() ); find_shared( C->top() ); - C->print_method("Before Matching", 2); + C->print_method("Before Matching"); // Swap out to old-space; emptying new-space Arena *old = C->node_arena()->move_contents(C->old_arena()); @@ -840,7 +840,7 @@ _new2old_map.map(m->_idx, n); #endif if (m->in(0) != NULL) // m might be top - collect_null_checks(m); + collect_null_checks(m, n); } else { // Else just a regular 'ol guy m = n->clone(); // So just clone into new-space #ifdef ASSERT @@ -1478,12 +1478,19 @@ m = _mem_node; assert(m != NULL && m->is_Mem(), "expecting memory node"); } - if (m->adr_type() != mach->adr_type()) { + const Type* mach_at = mach->adr_type(); + // DecodeN node consumed by an address may have different type + // then its input. Don't compare types for such case. + if (m->adr_type() != mach_at && m->in(MemNode::Address)->is_AddP() && + m->in(MemNode::Address)->in(AddPNode::Address)->is_DecodeN()) { + mach_at = m->adr_type(); + } + if (m->adr_type() != mach_at) { m->dump(); tty->print_cr("mach:"); mach->dump(1); } - assert(m->adr_type() == mach->adr_type(), "matcher should not change adr type"); + assert(m->adr_type() == mach_at, "matcher should not change adr type"); } #endif } @@ -1995,7 +2002,7 @@ // it. Used by later implicit-null-check handling. Actually collects // either an IfTrue or IfFalse for the common NOT-null path, AND the ideal // value being tested. -void Matcher::collect_null_checks( Node *proj ) { +void Matcher::collect_null_checks( Node *proj, Node *orig_proj ) { Node *iff = proj->in(0); if( iff->Opcode() == Op_If ) { // During matching If's have Bool & Cmp side-by-side @@ -2008,20 +2015,47 @@ if (ct == TypePtr::NULL_PTR || (opc == Op_CmpN && ct == TypeNarrowOop::NULL_PTR)) { + bool push_it = false; if( proj->Opcode() == Op_IfTrue ) { extern int all_null_checks_found; all_null_checks_found++; if( b->_test._test == BoolTest::ne ) { - _null_check_tests.push(proj); - _null_check_tests.push(cmp->in(1)); + push_it = true; } } else { assert( proj->Opcode() == Op_IfFalse, "" ); if( b->_test._test == BoolTest::eq ) { - _null_check_tests.push(proj); - _null_check_tests.push(cmp->in(1)); + push_it = true; } } + if( push_it ) { + _null_check_tests.push(proj); + Node* val = cmp->in(1); +#ifdef _LP64 + if (UseCompressedOops && !Matcher::clone_shift_expressions && + val->bottom_type()->isa_narrowoop()) { + // + // Look for DecodeN node which should be pinned to orig_proj. + // On platforms (Sparc) which can not handle 2 adds + // in addressing mode we have to keep a DecodeN node and + // use it to do implicit NULL check in address. + // + // DecodeN node was pinned to non-null path (orig_proj) during + // CastPP transformation in final_graph_reshaping_impl(). + // + uint cnt = orig_proj->outcnt(); + for (uint i = 0; i < orig_proj->outcnt(); i++) { + Node* d = orig_proj->raw_out(i); + if (d->is_DecodeN() && d->in(1) == val) { + val = d; + val->set_req(0, NULL); // Unpin now. + break; + } + } + } +#endif + _null_check_tests.push(val); + } } } } diff -r 99dd4bbd9eec -r b7483806cc49 src/share/vm/opto/matcher.hpp --- a/src/share/vm/opto/matcher.hpp Tue Sep 30 12:24:27 2008 -0400 +++ b/src/share/vm/opto/matcher.hpp Wed Oct 01 20:15:03 2008 -0400 @@ -166,7 +166,7 @@ // List of IfFalse or IfTrue Nodes that indicate a taken null test. // List is valid in the post-matching space. Node_List _null_check_tests; - void collect_null_checks( Node *proj ); + void collect_null_checks( Node *proj, Node *orig_proj ); void validate_null_checks( ); Matcher( Node_List &proj_list ); diff -r 99dd4bbd9eec -r b7483806cc49 src/share/vm/opto/memnode.cpp --- a/src/share/vm/opto/memnode.cpp Tue Sep 30 12:24:27 2008 -0400 +++ b/src/share/vm/opto/memnode.cpp Wed Oct 01 20:15:03 2008 -0400 @@ -1887,6 +1887,38 @@ return tap->size(); } +//-------------------------------Ideal--------------------------------------- +// Feed through the length in AllocateArray(...length...)._length. +Node *LoadRangeNode::Ideal(PhaseGVN *phase, bool can_reshape) { + Node* p = MemNode::Ideal_common(phase, can_reshape); + if (p) return (p == NodeSentinel) ? NULL : p; + + // Take apart the address into an oop and and offset. + // Return 'this' if we cannot. + Node* adr = in(MemNode::Address); + intptr_t offset = 0; + Node* base = AddPNode::Ideal_base_and_offset(adr, phase, offset); + if (base == NULL) return NULL; + const TypeAryPtr* tary = phase->type(adr)->isa_aryptr(); + if (tary == NULL) return NULL; + + // We can fetch the length directly through an AllocateArrayNode. + // This works even if the length is not constant (clone or newArray). + if (offset == arrayOopDesc::length_offset_in_bytes()) { + AllocateArrayNode* alloc = AllocateArrayNode::Ideal_array_allocation(base, phase); + if (alloc != NULL) { + Node* allocated_length = alloc->Ideal_length(); + Node* len = alloc->make_ideal_length(tary, phase); + if (allocated_length != len) { + // New CastII improves on this. + return len; + } + } + } + + return NULL; +} + //------------------------------Identity--------------------------------------- // Feed through the length in AllocateArray(...length...)._length. Node* LoadRangeNode::Identity( PhaseTransform *phase ) { @@ -1905,15 +1937,22 @@ // We can fetch the length directly through an AllocateArrayNode. // This works even if the length is not constant (clone or newArray). if (offset == arrayOopDesc::length_offset_in_bytes()) { - Node* allocated_length = AllocateArrayNode::Ideal_length(base, phase); - if (allocated_length != NULL) { - return allocated_length; + AllocateArrayNode* alloc = AllocateArrayNode::Ideal_array_allocation(base, phase); + if (alloc != NULL) { + Node* allocated_length = alloc->Ideal_length(); + // Do not allow make_ideal_length to allocate a CastII node. + Node* len = alloc->make_ideal_length(tary, phase, false); + if (allocated_length == len) { + // Return allocated_length only if it would not be improved by a CastII. + return allocated_length; + } } } return this; } + //============================================================================= //---------------------------StoreNode::make----------------------------------- // Polymorphic factory method: diff -r 99dd4bbd9eec -r b7483806cc49 src/share/vm/opto/memnode.hpp --- a/src/share/vm/opto/memnode.hpp Tue Sep 30 12:24:27 2008 -0400 +++ b/src/share/vm/opto/memnode.hpp Wed Oct 01 20:15:03 2008 -0400 @@ -241,6 +241,7 @@ virtual int Opcode() const; virtual const Type *Value( PhaseTransform *phase ) const; virtual Node *Identity( PhaseTransform *phase ); + virtual Node *Ideal(PhaseGVN *phase, bool can_reshape); }; //------------------------------LoadLNode-------------------------------------- diff -r 99dd4bbd9eec -r b7483806cc49 src/share/vm/opto/parse.hpp --- a/src/share/vm/opto/parse.hpp Tue Sep 30 12:24:27 2008 -0400 +++ b/src/share/vm/opto/parse.hpp Wed Oct 01 20:15:03 2008 -0400 @@ -167,9 +167,19 @@ int start() const { return flow()->start(); } int limit() const { return flow()->limit(); } - int pre_order() const { return flow()->pre_order(); } + int rpo() const { return flow()->rpo(); } int start_sp() const { return flow()->stack_size(); } + bool is_loop_head() const { return flow()->is_loop_head(); } + bool is_SEL_head() const { return flow()->is_single_entry_loop_head(); } + bool is_SEL_backedge(Block* pred) const{ return is_SEL_head() && pred->rpo() >= rpo(); } + bool is_invariant_local(uint i) const { + const JVMState* jvms = start_map()->jvms(); + if (!jvms->is_loc(i)) return false; + return flow()->is_invariant_local(i - jvms->locoff()); + } + bool can_elide_SEL_phi(uint i) const { assert(is_SEL_head(),""); return is_invariant_local(i); } + const Type* peek(int off=0) const { return stack_type_at(start_sp() - (off+1)); } const Type* stack_type_at(int i) const; @@ -305,7 +315,7 @@ // entry_bci() -- see osr_bci, etc. ciTypeFlow* flow() const { return _flow; } - // blocks() -- see pre_order_at, start_block, etc. + // blocks() -- see rpo_at, start_block, etc. int block_count() const { return _block_count; } GraphKit& exits() { return _exits; } @@ -330,12 +340,12 @@ // Must this parse be aborted? bool failing() { return C->failing(); } - Block* pre_order_at(int po) { - assert(0 <= po && po < _block_count, "oob"); - return &_blocks[po]; + Block* rpo_at(int rpo) { + assert(0 <= rpo && rpo < _block_count, "oob"); + return &_blocks[rpo]; } Block* start_block() { - return pre_order_at(flow()->start_block()->pre_order()); + return rpo_at(flow()->start_block()->rpo()); } // Can return NULL if the flow pass did not complete a block. Block* successor_for_bci(int bci) { @@ -359,9 +369,6 @@ // Parse all the basic blocks. void do_all_blocks(); - // Helper for do_all_blocks; makes one pass in pre-order. - void visit_blocks(); - // Parse the current basic block void do_one_block(); diff -r 99dd4bbd9eec -r b7483806cc49 src/share/vm/opto/parse1.cpp --- a/src/share/vm/opto/parse1.cpp Tue Sep 30 12:24:27 2008 -0400 +++ b/src/share/vm/opto/parse1.cpp Wed Oct 01 20:15:03 2008 -0400 @@ -29,17 +29,17 @@ // the most. Some of the non-static variables are needed in bytecodeInfo.cpp // and eventually should be encapsulated in a proper class (gri 8/18/98). -int nodes_created = 0; int nodes_created_old = 0; -int methods_parsed = 0; int methods_parsed_old = 0; -int methods_seen = 0; int methods_seen_old = 0; +int nodes_created = 0; +int methods_parsed = 0; +int methods_seen = 0; +int blocks_parsed = 0; +int blocks_seen = 0; -int explicit_null_checks_inserted = 0, explicit_null_checks_inserted_old = 0; -int explicit_null_checks_elided = 0, explicit_null_checks_elided_old = 0; +int explicit_null_checks_inserted = 0; +int explicit_null_checks_elided = 0; int all_null_checks_found = 0, implicit_null_checks = 0; int implicit_null_throws = 0; -int parse_idx = 0; -size_t parse_arena = 0; int reclaim_idx = 0; int reclaim_in = 0; int reclaim_node = 0; @@ -61,6 +61,7 @@ tty->cr(); if (methods_seen != methods_parsed) tty->print_cr("Reasons for parse failures (NOT cumulative):"); + tty->print_cr("Blocks parsed: %d Blocks seen: %d", blocks_parsed, blocks_seen); if( explicit_null_checks_inserted ) tty->print_cr("%d original NULL checks - %d elided (%2d%%); optimizer leaves %d,", explicit_null_checks_inserted, explicit_null_checks_elided, (100*explicit_null_checks_elided)/explicit_null_checks_inserted, all_null_checks_found); @@ -373,6 +374,12 @@ C->record_method_not_compilable_all_tiers(_flow->failure_reason()); } +#ifndef PRODUCT + if (_flow->has_irreducible_entry()) { + C->set_parsed_irreducible_loop(true); + } +#endif + if (_expected_uses <= 0) { _prof_factor = 1; } else { @@ -556,118 +563,93 @@ set_map(entry_map); do_exits(); - // Collect a few more statistics. - parse_idx += C->unique(); - parse_arena += C->node_arena()->used(); - if (log) log->done("parse nodes='%d' memory='%d'", C->unique(), C->node_arena()->used()); } //---------------------------do_all_blocks------------------------------------- void Parse::do_all_blocks() { - _blocks_merged = 0; - _blocks_parsed = 0; + bool has_irreducible = flow()->has_irreducible_entry(); + + // Walk over all blocks in Reverse Post-Order. + while (true) { + bool progress = false; + for (int rpo = 0; rpo < block_count(); rpo++) { + Block* block = rpo_at(rpo); + + if (block->is_parsed()) continue; - int old_blocks_merged = -1; - int old_blocks_parsed = -1; + if (!block->is_merged()) { + // Dead block, no state reaches this block + continue; + } - for (int tries = 0; ; tries++) { - visit_blocks(); - if (failing()) return; // Check for bailout + // Prepare to parse this block. + load_state_from(block); + + if (stopped()) { + // Block is dead. + continue; + } + + blocks_parsed++; - // No need for a work list. The outer loop is hardly ever repeated. - // The following loop traverses the blocks in a reasonable pre-order, - // as produced by the ciTypeFlow pass. + progress = true; + if (block->is_loop_head() || block->is_handler() || has_irreducible && !block->is_ready()) { + // Not all preds have been parsed. We must build phis everywhere. + // (Note that dead locals do not get phis built, ever.) + ensure_phis_everywhere(); + + // Leave behind an undisturbed copy of the map, for future merges. + set_map(clone_map()); + } - // This loop can be taken more than once if there are two entries to - // a loop (irreduceable CFG), and the edge which ciTypeFlow chose - // as the first predecessor to the loop goes dead in the parser, - // due to parse-time optimization. (Could happen with obfuscated code.) + if (control()->is_Region() && !block->is_loop_head() && !has_irreducible && !block->is_handler()) { + // In the absence of irreducible loops, the Region and Phis + // associated with a merge that doesn't involve a backedge can + // be simplfied now since the RPO parsing order guarantees + // that any path which was supposed to reach here has already + // been parsed or must be dead. + Node* c = control(); + Node* result = _gvn.transform_no_reclaim(control()); + if (c != result && TraceOptoParse) { + tty->print_cr("Block #%d replace %d with %d", block->rpo(), c->_idx, result->_idx); + } + if (result != top()) { + record_for_igvn(result); + } + } - // Look for progress, or the lack of it: - if (_blocks_parsed == block_count()) { - // That's all, folks. - if (TraceOptoParse) { - tty->print_cr("All blocks parsed."); - } + // Parse the block. + do_one_block(); + + // Check for bailouts. + if (failing()) return; + } + + // with irreducible loops multiple passes might be necessary to parse everything + if (!has_irreducible || !progress) { break; } + } - // How much work was done this time around? - int new_blocks_merged = _blocks_merged - old_blocks_merged; - int new_blocks_parsed = _blocks_parsed - old_blocks_parsed; - if (new_blocks_merged == 0) { - if (TraceOptoParse) { - tty->print_cr("All live blocks parsed; %d dead blocks.", block_count() - _blocks_parsed); - } - // No new blocks have become parseable. Some blocks are just dead. - break; - } - assert(new_blocks_parsed > 0, "must make progress"); - assert(tries < block_count(), "the pre-order cannot be this bad!"); - - old_blocks_merged = _blocks_merged; - old_blocks_parsed = _blocks_parsed; - } + blocks_seen += block_count(); #ifndef PRODUCT // Make sure there are no half-processed blocks remaining. // Every remaining unprocessed block is dead and may be ignored now. - for (int po = 0; po < block_count(); po++) { - Block* block = pre_order_at(po); + for (int rpo = 0; rpo < block_count(); rpo++) { + Block* block = rpo_at(rpo); if (!block->is_parsed()) { if (TraceOptoParse) { - tty->print("Skipped dead block %d at bci:%d", po, block->start()); - assert(!block->is_merged(), "no half-processed blocks"); + tty->print_cr("Skipped dead block %d at bci:%d", rpo, block->start()); } + assert(!block->is_merged(), "no half-processed blocks"); } } #endif } -//---------------------------visit_blocks-------------------------------------- -void Parse::visit_blocks() { - // Walk over all blocks, parsing every one that has been reached (merged). - for (int po = 0; po < block_count(); po++) { - Block* block = pre_order_at(po); - - if (block->is_parsed()) { - // Do not parse twice. - continue; - } - - if (!block->is_merged()) { - // No state on this block. It had not yet been reached. - // Delay reaching it until later. - continue; - } - - // Prepare to parse this block. - load_state_from(block); - - if (stopped()) { - // Block is dead. - continue; - } - - if (!block->is_ready() || block->is_handler()) { - // Not all preds have been parsed. We must build phis everywhere. - // (Note that dead locals do not get phis built, ever.) - ensure_phis_everywhere(); - - // Leave behind an undisturbed copy of the map, for future merges. - set_map(clone_map()); - } - - // Ready or not, parse the block. - do_one_block(); - - // Check for bailouts. - if (failing()) return; - } -} - //-------------------------------build_exits---------------------------------- // Build normal and exceptional exit merge points. void Parse::build_exits() { @@ -1134,24 +1116,24 @@ _blocks = NEW_RESOURCE_ARRAY(Block, _block_count); Copy::zero_to_bytes(_blocks, sizeof(Block)*_block_count); - int po; + int rpo; // Initialize the structs. - for (po = 0; po < block_count(); po++) { - Block* block = pre_order_at(po); - block->init_node(this, po); + for (rpo = 0; rpo < block_count(); rpo++) { + Block* block = rpo_at(rpo); + block->init_node(this, rpo); } // Collect predecessor and successor information. - for (po = 0; po < block_count(); po++) { - Block* block = pre_order_at(po); + for (rpo = 0; rpo < block_count(); rpo++) { + Block* block = rpo_at(rpo); block->init_graph(this); } } //-------------------------------init_node------------------------------------- -void Parse::Block::init_node(Parse* outer, int po) { - _flow = outer->flow()->pre_order_at(po); +void Parse::Block::init_node(Parse* outer, int rpo) { + _flow = outer->flow()->rpo_at(rpo); _pred_count = 0; _preds_parsed = 0; _count = 0; @@ -1177,7 +1159,7 @@ int p = 0; for (int i = 0; i < ns+ne; i++) { ciTypeFlow::Block* tf2 = (i < ns) ? tfs->at(i) : tfe->at(i-ns); - Block* block2 = outer->pre_order_at(tf2->pre_order()); + Block* block2 = outer->rpo_at(tf2->rpo()); _successors[i] = block2; // Accumulate pred info for the other block, too. @@ -1368,10 +1350,11 @@ int nt = b->all_successors(); tty->print("Parsing block #%d at bci [%d,%d), successors: ", - block()->pre_order(), block()->start(), block()->limit()); + block()->rpo(), block()->start(), block()->limit()); for (int i = 0; i < nt; i++) { - tty->print((( i < ns) ? " %d" : " %d(e)"), b->successor_at(i)->pre_order()); + tty->print((( i < ns) ? " %d" : " %d(e)"), b->successor_at(i)->rpo()); } + if (b->is_loop_head()) tty->print(" lphd"); tty->print_cr(""); } @@ -1501,7 +1484,7 @@ #ifndef PRODUCT Block* b = block(); int trap_bci = b->flow()->has_trap()? b->flow()->trap_bci(): -1; - tty->print_cr("### Missing successor at bci:%d for block #%d (trap_bci:%d)", target_bci, b->pre_order(), trap_bci); + tty->print_cr("### Missing successor at bci:%d for block #%d (trap_bci:%d)", target_bci, b->rpo(), trap_bci); #endif ShouldNotReachHere(); } @@ -1509,7 +1492,7 @@ //--------------------------merge_common--------------------------------------- void Parse::merge_common(Parse::Block* target, int pnum) { if (TraceOptoParse) { - tty->print("Merging state at block #%d bci:%d", target->pre_order(), target->start()); + tty->print("Merging state at block #%d bci:%d", target->rpo(), target->start()); } // Zap extra stack slots to top @@ -1534,6 +1517,7 @@ // which must not be allowed into this block's map.) if (pnum > PhiNode::Input // Known multiple inputs. || target->is_handler() // These have unpredictable inputs. + || target->is_loop_head() // Known multiple inputs || control()->is_Region()) { // We must hide this guy. // Add a Region to start the new basic block. Phis will be added // later lazily. @@ -1575,15 +1559,21 @@ // Compute where to merge into // Merge incoming control path - r->set_req(pnum, newin->control()); + r->init_req(pnum, newin->control()); if (pnum == 1) { // Last merge for this Region? - _gvn.transform_no_reclaim(r); + if (!block()->flow()->is_irreducible_entry()) { + Node* result = _gvn.transform_no_reclaim(r); + if (r != result && TraceOptoParse) { + tty->print_cr("Block #%d replace %d with %d", block()->rpo(), r->_idx, result->_idx); + } + } record_for_igvn(r); } // Update all the non-control inputs to map: assert(TypeFunc::Parms == newin->jvms()->locoff(), "parser map should contain only youngest jvms"); + bool check_elide_phi = target->is_SEL_backedge(save_block); for (uint j = 1; j < newin->req(); j++) { Node* m = map()->in(j); // Current state of target. Node* n = newin->in(j); // Incoming change to target state. @@ -1603,7 +1593,11 @@ merge_memory_edges(n->as_MergeMem(), pnum, nophi); continue; default: // All normal stuff - if (phi == NULL) phi = ensure_phi(j, nophi); + if (phi == NULL) { + if (!check_elide_phi || !target->can_elide_SEL_phi(j)) { + phi = ensure_phi(j, nophi); + } + } break; } } @@ -1736,9 +1730,13 @@ uint nof_monitors = map()->jvms()->nof_monitors(); assert(TypeFunc::Parms == map()->jvms()->locoff(), "parser map should contain only youngest jvms"); + bool check_elide_phi = block()->is_SEL_head(); for (uint i = TypeFunc::Parms; i < monoff; i++) { - ensure_phi(i); + if (!check_elide_phi || !block()->can_elide_SEL_phi(i)) { + ensure_phi(i); + } } + // Even monitors need Phis, though they are well-structured. // This is true for OSR methods, and also for the rare cases where // a monitor object is the subject of a replace_in_map operation. diff -r 99dd4bbd9eec -r b7483806cc49 src/share/vm/opto/parse2.cpp --- a/src/share/vm/opto/parse2.cpp Tue Sep 30 12:24:27 2008 -0400 +++ b/src/share/vm/opto/parse2.cpp Wed Oct 01 20:15:03 2008 -0400 @@ -100,16 +100,17 @@ // Do the range check if (GenerateRangeChecks && need_range_check) { - // Range is constant in array-oop, so we can use the original state of mem - Node* len = load_array_length(ary); Node* tst; if (sizetype->_hi <= 0) { - // If the greatest array bound is negative, we can conclude that we're + // The greatest array bound is negative, so we can conclude that we're // compiling unreachable code, but the unsigned compare trick used below // only works with non-negative lengths. Instead, hack "tst" to be zero so // the uncommon_trap path will always be taken. tst = _gvn.intcon(0); } else { + // Range is constant in array-oop, so we can use the original state of mem + Node* len = load_array_length(ary); + // Test length vs index (standard trick using unsigned compare) Node* chk = _gvn.transform( new (C, 3) CmpUNode(idx, len) ); BoolTest::mask btest = BoolTest::lt; @@ -137,9 +138,12 @@ // Check for always knowing you are throwing a range-check exception if (stopped()) return top(); - Node* ptr = array_element_address( ary, idx, type, sizetype); + Node* ptr = array_element_address(ary, idx, type, sizetype); if (result2 != NULL) *result2 = elemtype; + + assert(ptr != top(), "top should go hand-in-hand with stopped"); + return ptr; } diff -r 99dd4bbd9eec -r b7483806cc49 src/share/vm/opto/type.cpp --- a/src/share/vm/opto/type.cpp Tue Sep 30 12:24:27 2008 -0400 +++ b/src/share/vm/opto/type.cpp Wed Oct 01 20:15:03 2008 -0400 @@ -3157,17 +3157,18 @@ // Narrow the given size type to the index range for the given array base type. // Return NULL if the resulting int type becomes empty. -const TypeInt* TypeAryPtr::narrow_size_type(const TypeInt* size, BasicType elem) { +const TypeInt* TypeAryPtr::narrow_size_type(const TypeInt* size) const { jint hi = size->_hi; jint lo = size->_lo; jint min_lo = 0; - jint max_hi = max_array_length(elem); + jint max_hi = max_array_length(elem()->basic_type()); //if (index_not_size) --max_hi; // type of a valid array index, FTR bool chg = false; if (lo < min_lo) { lo = min_lo; chg = true; } if (hi > max_hi) { hi = max_hi; chg = true; } + // Negative length arrays will produce weird intermediate dead fath-path code if (lo > hi) - return NULL; + return TypeInt::ZERO; if (!chg) return size; return TypeInt::make(lo, hi, Type::WidenMin); @@ -3176,9 +3177,7 @@ //-------------------------------cast_to_size---------------------------------- const TypeAryPtr* TypeAryPtr::cast_to_size(const TypeInt* new_size) const { assert(new_size != NULL, ""); - new_size = narrow_size_type(new_size, elem()->basic_type()); - if (new_size == NULL) // Negative length arrays will produce weird - new_size = TypeInt::ZERO; // intermediate dead fast-path goo + new_size = narrow_size_type(new_size); if (new_size == size()) return this; const TypeAry* new_ary = TypeAry::make(elem(), new_size); return make(ptr(), const_oop(), new_ary, klass(), klass_is_exact(), _offset, _instance_id); diff -r 99dd4bbd9eec -r b7483806cc49 src/share/vm/opto/type.hpp --- a/src/share/vm/opto/type.hpp Tue Sep 30 12:24:27 2008 -0400 +++ b/src/share/vm/opto/type.hpp Wed Oct 01 20:15:03 2008 -0400 @@ -840,6 +840,7 @@ virtual const TypeOopPtr *cast_to_instance_id(int instance_id) const; virtual const TypeAryPtr* cast_to_size(const TypeInt* size) const; + virtual const TypeInt* narrow_size_type(const TypeInt* size) const; virtual bool empty(void) const; // TRUE if type is vacuous virtual const TypePtr *add_offset( intptr_t offset ) const; @@ -865,7 +866,6 @@ } static const TypeAryPtr *_array_body_type[T_CONFLICT+1]; // sharpen the type of an int which is used as an array size - static const TypeInt* narrow_size_type(const TypeInt* size, BasicType elem); #ifndef PRODUCT virtual void dump2( Dict &d, uint depth, outputStream *st ) const; // Specialized per-Type dumping #endif diff -r 99dd4bbd9eec -r b7483806cc49 src/share/vm/prims/jvmtiEnvBase.cpp --- a/src/share/vm/prims/jvmtiEnvBase.cpp Tue Sep 30 12:24:27 2008 -0400 +++ b/src/share/vm/prims/jvmtiEnvBase.cpp Wed Oct 01 20:15:03 2008 -0400 @@ -121,7 +121,7 @@ JvmtiEventController::env_initialize((JvmtiEnv*)this); #ifdef JVMTI_TRACE - _jvmti_external.functions = strlen(TraceJVMTI)? &jvmtiTrace_Interface : &jvmti_Interface; + _jvmti_external.functions = TraceJVMTI != NULL ? &jvmtiTrace_Interface : &jvmti_Interface; #else _jvmti_external.functions = &jvmti_Interface; #endif diff -r 99dd4bbd9eec -r b7483806cc49 src/share/vm/prims/jvmtiTrace.cpp --- a/src/share/vm/prims/jvmtiTrace.cpp Tue Sep 30 12:24:27 2008 -0400 +++ b/src/share/vm/prims/jvmtiTrace.cpp Wed Oct 01 20:15:03 2008 -0400 @@ -73,7 +73,7 @@ const char *very_end; const char *curr; - if (strlen(TraceJVMTI)) { + if (TraceJVMTI != NULL) { curr = TraceJVMTI; } else { curr = ""; // hack in fixed tracing here diff -r 99dd4bbd9eec -r b7483806cc49 src/share/vm/runtime/globals.cpp --- a/src/share/vm/runtime/globals.cpp Tue Sep 30 12:24:27 2008 -0400 +++ b/src/share/vm/runtime/globals.cpp Wed Oct 01 20:15:03 2008 -0400 @@ -365,8 +365,11 @@ if (result == NULL) return false; if (!result->is_ccstr()) return false; ccstr old_value = result->get_ccstr(); - char* new_value = NEW_C_HEAP_ARRAY(char, strlen(*value)+1); - strcpy(new_value, *value); + char* new_value = NULL; + if (*value != NULL) { + new_value = NEW_C_HEAP_ARRAY(char, strlen(*value)+1); + strcpy(new_value, *value); + } result->set_ccstr(new_value); if (result->origin == DEFAULT && old_value != NULL) { // Prior value is NOT heap allocated, but was a literal constant. diff -r 99dd4bbd9eec -r b7483806cc49 src/share/vm/runtime/globals.hpp --- a/src/share/vm/runtime/globals.hpp Tue Sep 30 12:24:27 2008 -0400 +++ b/src/share/vm/runtime/globals.hpp Wed Oct 01 20:15:03 2008 -0400 @@ -707,7 +707,7 @@ diagnostic(bool, PrintAssembly, false, \ "Print assembly code (using external disassembler.so)") \ \ - diagnostic(ccstr, PrintAssemblyOptions, false, \ + diagnostic(ccstr, PrintAssemblyOptions, NULL, \ "Options string passed to disassembler.so") \ \ diagnostic(bool, PrintNMethods, false, \ @@ -848,7 +848,7 @@ "Use LWP-based instead of libthread-based synchronization " \ "(SPARC only)") \ \ - product(ccstr, SyncKnobs, "", \ + product(ccstr, SyncKnobs, NULL, \ "(Unstable) Various monitor synchronization tunables") \ \ product(intx, EmitSync, 0, \ @@ -1032,7 +1032,7 @@ notproduct(bool, TraceJVMCalls, false, \ "Trace JVM calls") \ \ - product(ccstr, TraceJVMTI, "", \ + product(ccstr, TraceJVMTI, NULL, \ "Trace flags for JVMTI functions and events") \ \ /* This option can change an EMCP method into an obsolete method. */ \ @@ -1157,10 +1157,6 @@ "In the Parallel Old garbage collector use parallel dense" \ " prefix update") \ \ - develop(bool, UseParallelOldGCChunkPointerCalc, true, \ - "In the Parallel Old garbage collector use chucks to calculate" \ - " new object locations") \ - \ product(uintx, HeapMaximumCompactionInterval, 20, \ "How often should we maximally compact the heap (not allowing " \ "any dead space)") \ @@ -1189,21 +1185,14 @@ product(uintx, ParallelCMSThreads, 0, \ "Max number of threads CMS will use for concurrent work") \ \ - develop(bool, VerifyParallelOldWithMarkSweep, false, \ - "Use the MarkSweep code to verify phases of Parallel Old") \ - \ - develop(uintx, VerifyParallelOldWithMarkSweepInterval, 1, \ - "Interval at which the MarkSweep code is used to verify " \ - "phases of Parallel Old") \ - \ develop(bool, ParallelOldMTUnsafeMarkBitMap, false, \ "Use the Parallel Old MT unsafe in marking the bitmap") \ \ develop(bool, ParallelOldMTUnsafeUpdateLiveData, false, \ "Use the Parallel Old MT unsafe in update of live size") \ \ - develop(bool, TraceChunkTasksQueuing, false, \ - "Trace the queuing of the chunk tasks") \ + develop(bool, TraceRegionTasksQueuing, false, \ + "Trace the queuing of the region tasks") \ \ product(uintx, ParallelMarkingThreads, 0, \ "Number of marking threads concurrent gc will use") \ diff -r 99dd4bbd9eec -r b7483806cc49 src/share/vm/utilities/taskqueue.cpp --- a/src/share/vm/utilities/taskqueue.cpp Tue Sep 30 12:24:27 2008 -0400 +++ b/src/share/vm/utilities/taskqueue.cpp Wed Oct 01 20:15:03 2008 -0400 @@ -109,72 +109,72 @@ } } -bool ChunkTaskQueueWithOverflow::is_empty() { - return (_chunk_queue.size() == 0) && +bool RegionTaskQueueWithOverflow::is_empty() { + return (_region_queue.size() == 0) && (_overflow_stack->length() == 0); } -bool ChunkTaskQueueWithOverflow::stealable_is_empty() { - return _chunk_queue.size() == 0; +bool RegionTaskQueueWithOverflow::stealable_is_empty() { + return _region_queue.size() == 0; } -bool ChunkTaskQueueWithOverflow::overflow_is_empty() { +bool RegionTaskQueueWithOverflow::overflow_is_empty() { return _overflow_stack->length() == 0; } -void ChunkTaskQueueWithOverflow::initialize() { - _chunk_queue.initialize(); +void RegionTaskQueueWithOverflow::initialize() { + _region_queue.initialize(); assert(_overflow_stack == 0, "Creating memory leak"); _overflow_stack = - new (ResourceObj::C_HEAP) GrowableArray(10, true); + new (ResourceObj::C_HEAP) GrowableArray(10, true); } -void ChunkTaskQueueWithOverflow::save(ChunkTask t) { - if (TraceChunkTasksQueuing && Verbose) { +void RegionTaskQueueWithOverflow::save(RegionTask t) { + if (TraceRegionTasksQueuing && Verbose) { gclog_or_tty->print_cr("CTQ: save " PTR_FORMAT, t); } - if(!_chunk_queue.push(t)) { + if(!_region_queue.push(t)) { _overflow_stack->push(t); } } -// Note that using this method will retrieve all chunks +// Note that using this method will retrieve all regions // that have been saved but that it will always check // the overflow stack. It may be more efficient to // check the stealable queue and the overflow stack // separately. -bool ChunkTaskQueueWithOverflow::retrieve(ChunkTask& chunk_task) { - bool result = retrieve_from_overflow(chunk_task); +bool RegionTaskQueueWithOverflow::retrieve(RegionTask& region_task) { + bool result = retrieve_from_overflow(region_task); if (!result) { - result = retrieve_from_stealable_queue(chunk_task); + result = retrieve_from_stealable_queue(region_task); } - if (TraceChunkTasksQueuing && Verbose && result) { + if (TraceRegionTasksQueuing && Verbose && result) { gclog_or_tty->print_cr(" CTQ: retrieve " PTR_FORMAT, result); } return result; } -bool ChunkTaskQueueWithOverflow::retrieve_from_stealable_queue( - ChunkTask& chunk_task) { - bool result = _chunk_queue.pop_local(chunk_task); - if (TraceChunkTasksQueuing && Verbose) { - gclog_or_tty->print_cr("CTQ: retrieve_stealable " PTR_FORMAT, chunk_task); +bool RegionTaskQueueWithOverflow::retrieve_from_stealable_queue( + RegionTask& region_task) { + bool result = _region_queue.pop_local(region_task); + if (TraceRegionTasksQueuing && Verbose) { + gclog_or_tty->print_cr("CTQ: retrieve_stealable " PTR_FORMAT, region_task); } return result; } -bool ChunkTaskQueueWithOverflow::retrieve_from_overflow( - ChunkTask& chunk_task) { +bool +RegionTaskQueueWithOverflow::retrieve_from_overflow(RegionTask& region_task) { bool result; if (!_overflow_stack->is_empty()) { - chunk_task = _overflow_stack->pop(); + region_task = _overflow_stack->pop(); result = true; } else { - chunk_task = (ChunkTask) NULL; + region_task = (RegionTask) NULL; result = false; } - if (TraceChunkTasksQueuing && Verbose) { - gclog_or_tty->print_cr("CTQ: retrieve_stealable " PTR_FORMAT, chunk_task); + if (TraceRegionTasksQueuing && Verbose) { + gclog_or_tty->print_cr("CTQ: retrieve_stealable " PTR_FORMAT, region_task); } return result; } diff -r 99dd4bbd9eec -r b7483806cc49 src/share/vm/utilities/taskqueue.hpp --- a/src/share/vm/utilities/taskqueue.hpp Tue Sep 30 12:24:27 2008 -0400 +++ b/src/share/vm/utilities/taskqueue.hpp Wed Oct 01 20:15:03 2008 -0400 @@ -557,32 +557,32 @@ typedef GenericTaskQueue OopStarTaskQueue; typedef GenericTaskQueueSet OopStarTaskQueueSet; -typedef size_t ChunkTask; // index for chunk -typedef GenericTaskQueue ChunkTaskQueue; -typedef GenericTaskQueueSet ChunkTaskQueueSet; +typedef size_t RegionTask; // index for region +typedef GenericTaskQueue RegionTaskQueue; +typedef GenericTaskQueueSet RegionTaskQueueSet; -class ChunkTaskQueueWithOverflow: public CHeapObj { +class RegionTaskQueueWithOverflow: public CHeapObj { protected: - ChunkTaskQueue _chunk_queue; - GrowableArray* _overflow_stack; + RegionTaskQueue _region_queue; + GrowableArray* _overflow_stack; public: - ChunkTaskQueueWithOverflow() : _overflow_stack(NULL) {} + RegionTaskQueueWithOverflow() : _overflow_stack(NULL) {} // Initialize both stealable queue and overflow void initialize(); // Save first to stealable queue and then to overflow - void save(ChunkTask t); + void save(RegionTask t); // Retrieve first from overflow and then from stealable queue - bool retrieve(ChunkTask& chunk_index); + bool retrieve(RegionTask& region_index); // Retrieve from stealable queue - bool retrieve_from_stealable_queue(ChunkTask& chunk_index); + bool retrieve_from_stealable_queue(RegionTask& region_index); // Retrieve from overflow - bool retrieve_from_overflow(ChunkTask& chunk_index); + bool retrieve_from_overflow(RegionTask& region_index); bool is_empty(); bool stealable_is_empty(); bool overflow_is_empty(); - juint stealable_size() { return _chunk_queue.size(); } - ChunkTaskQueue* task_queue() { return &_chunk_queue; } + juint stealable_size() { return _region_queue.size(); } + RegionTaskQueue* task_queue() { return &_region_queue; } }; -#define USE_ChunkTaskQueueWithOverflow +#define USE_RegionTaskQueueWithOverflow diff -r 99dd4bbd9eec -r b7483806cc49 test/compiler/6711100/Test.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/compiler/6711100/Test.java Wed Oct 01 20:15:03 2008 -0400 @@ -0,0 +1,53 @@ +/* + * Copyright 2008 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + */ + +/* + * @test + * @bug 6711100 + * @summary 64bit fastdebug server vm crashes with assert(_base == Int,"Not an Int") + * @run main/othervm -Xcomp -XX:CompileOnly=Test. Test + */ + +public class Test { + + static byte b; + + // The server compiler chokes on compiling + // this method when f() is not inlined + public Test() { + b = (new byte[1])[(new byte[f()])[-1]]; + } + + protected static int f() { + return 1; + } + + public static void main(String[] args) { + try { + Test t = new Test(); + } catch (ArrayIndexOutOfBoundsException e) { + } + } +} + +