# HG changeset patch # User kvn # Date 1275008516 25200 # Node ID 2d127394260e3fce257cc27578cebff8cac75e3a # Parent de91a2f25c7e21b65265887528a70c6c0303242e 6916623: Align object to 16 bytes to use Compressed Oops with java heap up to 64Gb Summary: Added new product ObjectAlignmentInBytes flag to control object alignment. Reviewed-by: twisti, ysr, iveresov diff -r de91a2f25c7e -r 2d127394260e agent/src/share/classes/sun/jvm/hotspot/memory/CompactibleFreeListSpace.java --- a/agent/src/share/classes/sun/jvm/hotspot/memory/CompactibleFreeListSpace.java Thu May 27 09:54:07 2010 -0700 +++ b/agent/src/share/classes/sun/jvm/hotspot/memory/CompactibleFreeListSpace.java Thu May 27 18:01:56 2010 -0700 @@ -73,18 +73,11 @@ public CompactibleFreeListSpace(Address addr) { super(addr); - if ( VM.getVM().isLP64() ) { - heapWordSize = 8; - IndexSetStart = 1; - IndexSetStride = 1; - } - else { - heapWordSize = 4; - IndexSetStart = 2; - IndexSetStride = 2; - } - - IndexSetSize = 257; + VM vm = VM.getVM(); + heapWordSize = vm.getHeapWordSize(); + IndexSetStart = vm.getMinObjAlignmentInBytes() / heapWordSize; + IndexSetStride = IndexSetStart; + IndexSetSize = 257; } // Accessing block offset table diff -r de91a2f25c7e -r 2d127394260e agent/src/share/classes/sun/jvm/hotspot/oops/Oop.java --- a/agent/src/share/classes/sun/jvm/hotspot/oops/Oop.java Thu May 27 09:54:07 2010 -0700 +++ b/agent/src/share/classes/sun/jvm/hotspot/oops/Oop.java Thu May 27 18:01:56 2010 -0700 @@ -128,7 +128,7 @@ // Align the object size. public static long alignObjectSize(long size) { - return VM.getVM().alignUp(size, VM.getVM().getMinObjAlignment()); + return VM.getVM().alignUp(size, VM.getVM().getMinObjAlignmentInBytes()); } // All vm's align longs, so pad out certain offsets. diff -r de91a2f25c7e -r 2d127394260e agent/src/share/classes/sun/jvm/hotspot/runtime/VM.java --- a/agent/src/share/classes/sun/jvm/hotspot/runtime/VM.java Thu May 27 09:54:07 2010 -0700 +++ b/agent/src/share/classes/sun/jvm/hotspot/runtime/VM.java Thu May 27 18:01:56 2010 -0700 @@ -93,6 +93,7 @@ /** alignment constants */ private boolean isLP64; private int bytesPerLong; + private int objectAlignmentInBytes; private int minObjAlignmentInBytes; private int logMinObjAlignmentInBytes; private int heapWordSize; @@ -313,9 +314,15 @@ isLP64 = debugger.getMachineDescription().isLP64(); } bytesPerLong = db.lookupIntConstant("BytesPerLong").intValue(); - minObjAlignmentInBytes = db.lookupIntConstant("MinObjAlignmentInBytes").intValue(); - // minObjAlignment = db.lookupIntConstant("MinObjAlignment").intValue(); - logMinObjAlignmentInBytes = db.lookupIntConstant("LogMinObjAlignmentInBytes").intValue(); + minObjAlignmentInBytes = getObjectAlignmentInBytes(); + if (minObjAlignmentInBytes == 8) { + logMinObjAlignmentInBytes = 3; + } else if (minObjAlignmentInBytes == 16) { + logMinObjAlignmentInBytes = 4; + } else { + throw new RuntimeException("Object alignment " + minObjAlignmentInBytes + " not yet supported"); + } + heapWordSize = db.lookupIntConstant("HeapWordSize").intValue(); oopSize = db.lookupIntConstant("oopSize").intValue(); @@ -492,10 +499,6 @@ } /** Get minimum object alignment in bytes. */ - public int getMinObjAlignment() { - return minObjAlignmentInBytes; - } - public int getMinObjAlignmentInBytes() { return minObjAlignmentInBytes; } @@ -754,6 +757,14 @@ return compressedOopsEnabled.booleanValue(); } + public int getObjectAlignmentInBytes() { + if (objectAlignmentInBytes == 0) { + Flag flag = getCommandLineFlag("ObjectAlignmentInBytes"); + objectAlignmentInBytes = (flag == null) ? 8 : (int)flag.getIntx(); + } + return objectAlignmentInBytes; + } + // returns null, if not available. public Flag[] getCommandLineFlags() { if (commandLineFlags == null) { diff -r de91a2f25c7e -r 2d127394260e src/cpu/sparc/vm/copy_sparc.hpp --- a/src/cpu/sparc/vm/copy_sparc.hpp Thu May 27 09:54:07 2010 -0700 +++ b/src/cpu/sparc/vm/copy_sparc.hpp Thu May 27 18:01:56 2010 -0700 @@ -154,7 +154,7 @@ } static void pd_fill_to_aligned_words(HeapWord* tohw, size_t count, juint value) { - assert(MinObjAlignmentInBytes == BytesPerLong, "need alternate implementation"); + assert(MinObjAlignmentInBytes >= BytesPerLong, "need alternate implementation"); julong* to = (julong*)tohw; julong v = ((julong)value << 32) | value; @@ -162,7 +162,7 @@ // and be equal to 0 on 64-bit platform. size_t odd = count % (BytesPerLong / HeapWordSize) ; - size_t aligned_count = align_object_size(count - odd) / HeapWordsPerLong; + size_t aligned_count = align_object_offset(count - odd) / HeapWordsPerLong; julong* end = ((julong*)tohw) + aligned_count - 1; while (to <= end) { DEBUG_ONLY(count -= BytesPerLong / HeapWordSize ;) diff -r de91a2f25c7e -r 2d127394260e src/cpu/sparc/vm/sparc.ad --- a/src/cpu/sparc/vm/sparc.ad Thu May 27 09:54:07 2010 -0700 +++ b/src/cpu/sparc/vm/sparc.ad Thu May 27 18:01:56 2010 -0700 @@ -821,6 +821,10 @@ !(n->ideal_Opcode()==Op_ConvI2D && ld_op==Op_LoadF) && !(n->ideal_Opcode()==Op_PrefetchRead && ld_op==Op_LoadI) && !(n->ideal_Opcode()==Op_PrefetchWrite && ld_op==Op_LoadI) && + !(n->ideal_Opcode()==Op_Load2I && ld_op==Op_LoadD) && + !(n->ideal_Opcode()==Op_Load4C && ld_op==Op_LoadD) && + !(n->ideal_Opcode()==Op_Load4S && ld_op==Op_LoadD) && + !(n->ideal_Opcode()==Op_Load8B && ld_op==Op_LoadD) && !(n->rule() == loadUB_rule)) { verify_oops_warning(n, n->ideal_Opcode(), ld_op); } @@ -832,6 +836,9 @@ !(n->ideal_Opcode()==Op_StoreI && st_op==Op_StoreF) && !(n->ideal_Opcode()==Op_StoreF && st_op==Op_StoreI) && !(n->ideal_Opcode()==Op_StoreL && st_op==Op_StoreI) && + !(n->ideal_Opcode()==Op_Store2I && st_op==Op_StoreD) && + !(n->ideal_Opcode()==Op_Store4C && st_op==Op_StoreD) && + !(n->ideal_Opcode()==Op_Store8B && st_op==Op_StoreD) && !(n->ideal_Opcode()==Op_StoreD && st_op==Op_StoreI && n->rule() == storeD0_rule)) { verify_oops_warning(n, n->ideal_Opcode(), st_op); } diff -r de91a2f25c7e -r 2d127394260e src/cpu/x86/vm/assembler_x86.cpp --- a/src/cpu/x86/vm/assembler_x86.cpp Thu May 27 09:54:07 2010 -0700 +++ b/src/cpu/x86/vm/assembler_x86.cpp Thu May 27 18:01:56 2010 -0700 @@ -8185,9 +8185,14 @@ assert (Universe::heap() != NULL, "java heap should be initialized"); movl(dst, Address(src, oopDesc::klass_offset_in_bytes())); if (Universe::narrow_oop_shift() != 0) { - assert(Address::times_8 == LogMinObjAlignmentInBytes && - Address::times_8 == Universe::narrow_oop_shift(), "decode alg wrong"); - movq(dst, Address(r12_heapbase, dst, Address::times_8, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); + assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + if (LogMinObjAlignmentInBytes == Address::times_8) { + movq(dst, Address(r12_heapbase, dst, Address::times_8, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); + } else { + // OK to use shift since we don't need to preserve flags. + shlq(dst, LogMinObjAlignmentInBytes); + movq(dst, Address(r12_heapbase, dst, Address::times_1, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); + } } else { movq(dst, Address(dst, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); } @@ -8361,31 +8366,43 @@ } void MacroAssembler::decode_heap_oop_not_null(Register r) { + // Note: it will change flags assert (UseCompressedOops, "should only be used for compressed headers"); assert (Universe::heap() != NULL, "java heap should be initialized"); // Cannot assert, unverified entry point counts instructions (see .ad file) // vtableStubs also counts instructions in pd_code_size_limit. // Also do not verify_oop as this is called by verify_oop. if (Universe::narrow_oop_shift() != 0) { - assert (Address::times_8 == LogMinObjAlignmentInBytes && - Address::times_8 == Universe::narrow_oop_shift(), "decode alg wrong"); - // Don't use Shift since it modifies flags. - leaq(r, Address(r12_heapbase, r, Address::times_8, 0)); + assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + shlq(r, LogMinObjAlignmentInBytes); + if (Universe::narrow_oop_base() != NULL) { + addq(r, r12_heapbase); + } } else { assert (Universe::narrow_oop_base() == NULL, "sanity"); } } void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { + // Note: it will change flags assert (UseCompressedOops, "should only be used for compressed headers"); assert (Universe::heap() != NULL, "java heap should be initialized"); // Cannot assert, unverified entry point counts instructions (see .ad file) // vtableStubs also counts instructions in pd_code_size_limit. // Also do not verify_oop as this is called by verify_oop. if (Universe::narrow_oop_shift() != 0) { - assert (Address::times_8 == LogMinObjAlignmentInBytes && - Address::times_8 == Universe::narrow_oop_shift(), "decode alg wrong"); - leaq(dst, Address(r12_heapbase, src, Address::times_8, 0)); + assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + if (LogMinObjAlignmentInBytes == Address::times_8) { + leaq(dst, Address(r12_heapbase, src, Address::times_8, 0)); + } else { + if (dst != src) { + movq(dst, src); + } + shlq(dst, LogMinObjAlignmentInBytes); + if (Universe::narrow_oop_base() != NULL) { + addq(dst, r12_heapbase); + } + } } else if (dst != src) { assert (Universe::narrow_oop_base() == NULL, "sanity"); movq(dst, src); diff -r de91a2f25c7e -r 2d127394260e src/cpu/x86/vm/x86_64.ad --- a/src/cpu/x86/vm/x86_64.ad Thu May 27 09:54:07 2010 -0700 +++ b/src/cpu/x86/vm/x86_64.ad Thu May 27 18:01:56 2010 -0700 @@ -1851,29 +1851,24 @@ void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const { if (UseCompressedOops) { - st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes() #%d]\t", oopDesc::klass_offset_in_bytes()); + st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass"); if (Universe::narrow_oop_shift() != 0) { - st->print_cr("leaq rscratch1, [r12_heapbase, r, Address::times_8, 0]"); - } - st->print_cr("cmpq rax, rscratch1\t # Inline cache check"); + st->print_cr("\tdecode_heap_oop_not_null rscratch1, rscratch1"); + } + st->print_cr("\tcmpq rax, rscratch1\t # Inline cache check"); } else { - st->print_cr("cmpq rax, [j_rarg0 + oopDesc::klass_offset_in_bytes() #%d]\t" - "# Inline cache check", oopDesc::klass_offset_in_bytes()); + st->print_cr("\tcmpq rax, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t" + "# Inline cache check"); } st->print_cr("\tjne SharedRuntime::_ic_miss_stub"); - st->print_cr("\tnop"); - if (!OptoBreakpoint) { - st->print_cr("\tnop"); - } + st->print_cr("\tnop\t# nops to align entry point"); } #endif void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { MacroAssembler masm(&cbuf); -#ifdef ASSERT uint code_size = cbuf.code_size(); -#endif if (UseCompressedOops) { masm.load_klass(rscratch1, j_rarg0); masm.cmpptr(rax, rscratch1); @@ -1884,33 +1879,21 @@ masm.jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub())); /* WARNING these NOPs are critical so that verified entry point is properly - aligned for patching by NativeJump::patch_verified_entry() */ - int nops_cnt = 1; - if (!OptoBreakpoint) { + 4 bytes aligned for patching by NativeJump::patch_verified_entry() */ + int nops_cnt = 4 - ((cbuf.code_size() - code_size) & 0x3); + if (OptoBreakpoint) { // Leave space for int3 - nops_cnt += 1; + nops_cnt -= 1; } - if (UseCompressedOops) { - // ??? divisible by 4 is aligned? - nops_cnt += 1; - } - masm.nop(nops_cnt); - - assert(cbuf.code_size() - code_size == size(ra_), - "checking code size of inline cache node"); + nops_cnt &= 0x3; // Do not add nops if code is aligned. + if (nops_cnt > 0) + masm.nop(nops_cnt); } uint MachUEPNode::size(PhaseRegAlloc* ra_) const { - if (UseCompressedOops) { - if (Universe::narrow_oop_shift() == 0) { - return OptoBreakpoint ? 15 : 16; - } else { - return OptoBreakpoint ? 19 : 20; - } - } else { - return OptoBreakpoint ? 11 : 12; - } + return MachNode::size(ra_); // too many variables; just compute it + // the hard way } @@ -5127,7 +5110,7 @@ // Note: x86 architecture doesn't support "scale * index + offset" without a base // we can't free r12 even with Universe::narrow_oop_base() == NULL. operand indCompressedOopOffset(rRegN reg, immL32 off) %{ - predicate(UseCompressedOops && (Universe::narrow_oop_shift() != 0)); + predicate(UseCompressedOops && (Universe::narrow_oop_shift() == Address::times_8)); constraint(ALLOC_IN_RC(ptr_reg)); match(AddP (DecodeN reg) off); @@ -7742,10 +7725,11 @@ ins_pipe(ialu_reg_long); %} -instruct decodeHeapOop_not_null(rRegP dst, rRegN src) %{ +instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{ predicate(n->bottom_type()->is_oopptr()->ptr() == TypePtr::NotNull || n->bottom_type()->is_oopptr()->ptr() == TypePtr::Constant); match(Set dst (DecodeN src)); + effect(KILL cr); format %{ "decode_heap_oop_not_null $dst,$src" %} ins_encode %{ Register s = $src$$Register; diff -r de91a2f25c7e -r 2d127394260e src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp --- a/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp Thu May 27 09:54:07 2010 -0700 +++ b/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp Thu May 27 18:01:56 2010 -0700 @@ -32,6 +32,23 @@ // highest ranked free list lock rank int CompactibleFreeListSpace::_lockRank = Mutex::leaf + 3; +// Defaults are 0 so things will break badly if incorrectly initialized. +int CompactibleFreeListSpace::IndexSetStart = 0; +int CompactibleFreeListSpace::IndexSetStride = 0; + +size_t MinChunkSize = 0; + +void CompactibleFreeListSpace::set_cms_values() { + // Set CMS global values + assert(MinChunkSize == 0, "already set"); + #define numQuanta(x,y) ((x+y-1)/y) + MinChunkSize = numQuanta(sizeof(FreeChunk), MinObjAlignmentInBytes) * MinObjAlignment; + + assert(IndexSetStart == 0 && IndexSetStride == 0, "already set"); + IndexSetStart = MinObjAlignment; + IndexSetStride = MinObjAlignment; +} + // Constructor CompactibleFreeListSpace::CompactibleFreeListSpace(BlockOffsetSharedArray* bs, MemRegion mr, bool use_adaptive_freelists, @@ -302,7 +319,7 @@ size_t CompactibleFreeListSpace::totalCountInIndexedFreeLists() const { size_t count = 0; - for (int i = MinChunkSize; i < IndexSetSize; i++) { + for (int i = (int)MinChunkSize; i < IndexSetSize; i++) { debug_only( ssize_t total_list_count = 0; for (FreeChunk* fc = _indexedFreeList[i].head(); fc != NULL; diff -r de91a2f25c7e -r 2d127394260e src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp --- a/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp Thu May 27 09:54:07 2010 -0700 +++ b/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp Thu May 27 18:01:56 2010 -0700 @@ -91,10 +91,10 @@ enum SomeConstants { SmallForLinearAlloc = 16, // size < this then use _sLAB SmallForDictionary = 257, // size < this then use _indexedFreeList - IndexSetSize = SmallForDictionary, // keep this odd-sized - IndexSetStart = MinObjAlignment, - IndexSetStride = MinObjAlignment + IndexSetSize = SmallForDictionary // keep this odd-sized }; + static int IndexSetStart; + static int IndexSetStride; private: enum FitStrategyOptions { @@ -278,6 +278,9 @@ HeapWord* nearLargestChunk() const { return _nearLargestChunk; } void set_nearLargestChunk(HeapWord* v) { _nearLargestChunk = v; } + // Set CMS global values + static void set_cms_values(); + // Return the free chunk at the end of the space. If no such // chunk exists, return NULL. FreeChunk* find_chunk_at_end(); diff -r de91a2f25c7e -r 2d127394260e src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp --- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp Thu May 27 09:54:07 2010 -0700 +++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp Thu May 27 18:01:56 2010 -0700 @@ -159,7 +159,7 @@ CardTableRS* ct, bool use_adaptive_freelists, FreeBlockDictionary::DictionaryChoice dictionaryChoice) : CardGeneration(rs, initial_byte_size, level, ct), - _dilatation_factor(((double)MinChunkSize)/((double)(oopDesc::header_size()))), + _dilatation_factor(((double)MinChunkSize)/((double)(CollectedHeap::min_fill_size()))), _debug_collection_type(Concurrent_collection_type) { HeapWord* bottom = (HeapWord*) _virtual_space.low(); @@ -222,7 +222,7 @@ // promoting generation, we'll instead just use the mimimum // object size (which today is a header's worth of space); // note that all arithmetic is in units of HeapWords. - assert(MinChunkSize >= oopDesc::header_size(), "just checking"); + assert(MinChunkSize >= CollectedHeap::min_fill_size(), "just checking"); assert(_dilatation_factor >= 1.0, "from previous assert"); } diff -r de91a2f25c7e -r 2d127394260e src/share/vm/gc_implementation/concurrentMarkSweep/freeChunk.hpp --- a/src/share/vm/gc_implementation/concurrentMarkSweep/freeChunk.hpp Thu May 27 09:54:07 2010 -0700 +++ b/src/share/vm/gc_implementation/concurrentMarkSweep/freeChunk.hpp Thu May 27 18:01:56 2010 -0700 @@ -133,9 +133,5 @@ void print_on(outputStream* st); }; -// Alignment helpers etc. -#define numQuanta(x,y) ((x+y-1)/y) -enum AlignmentConstants { - MinChunkSize = numQuanta(sizeof(FreeChunk), MinObjAlignmentInBytes) * MinObjAlignment -}; +extern size_t MinChunkSize; diff -r de91a2f25c7e -r 2d127394260e src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Thu May 27 09:54:07 2010 -0700 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Thu May 27 18:01:56 2010 -0700 @@ -3644,7 +3644,7 @@ do { free_words = r->free()/HeapWordSize; // If there's too little space, no one can allocate, so we're done. - if (free_words < (size_t)oopDesc::header_size()) return; + if (free_words < CollectedHeap::min_fill_size()) return; // Otherwise, try to claim it. block = r->par_allocate(free_words); } while (block == NULL); diff -r de91a2f25c7e -r 2d127394260e src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp --- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp Thu May 27 09:54:07 2010 -0700 +++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp Thu May 27 18:01:56 2010 -0700 @@ -2523,14 +2523,14 @@ } if (ParallelGCThreads > 0) { const size_t OverpartitionFactor = 4; - const size_t MinChunkSize = 8; - const size_t ChunkSize = + const size_t MinWorkUnit = 8; + const size_t WorkUnit = MAX2(_g1->n_regions() / (ParallelGCThreads * OverpartitionFactor), - MinChunkSize); + MinWorkUnit); _collectionSetChooser->prepareForAddMarkedHeapRegionsPar(_g1->n_regions(), - ChunkSize); + WorkUnit); ParKnownGarbageTask parKnownGarbageTask(_collectionSetChooser, - (int) ChunkSize); + (int) WorkUnit); _g1->workers()->run_task(&parKnownGarbageTask); assert(_g1->check_heap_region_claim_values(HeapRegion::InitialClaimValue), diff -r de91a2f25c7e -r 2d127394260e src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp --- a/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp Thu May 27 09:54:07 2010 -0700 +++ b/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp Thu May 27 18:01:56 2010 -0700 @@ -711,6 +711,7 @@ // object in the region. if (region_ptr->data_size() == RegionSize) { result += pointer_delta(addr, region_addr); + DEBUG_ONLY(PSParallelCompact::check_new_location(addr, result);) return result; } @@ -1487,13 +1488,14 @@ space->set_top_for_allocations(); } - size_t obj_len = 8; + size_t min_size = CollectedHeap::min_fill_size(); + size_t obj_len = min_size; while (b + obj_len <= t) { CollectedHeap::fill_with_object(b, obj_len); mark_bitmap()->mark_obj(b, obj_len); summary_data().add_obj(b, obj_len); b += obj_len; - obj_len = (obj_len & 0x18) + 8; // 8 16 24 32 8 16 24 32 ... + obj_len = (obj_len & (min_size*3)) + min_size; // 8 16 24 32 8 16 24 32 ... } if (b < t) { // The loop didn't completely fill to t (top); adjust top downward. @@ -1680,11 +1682,13 @@ // +-------+ // Initially assume case a, c or e will apply. - size_t obj_len = (size_t)oopDesc::header_size(); + size_t obj_len = CollectedHeap::min_fill_size(); HeapWord* obj_beg = dense_prefix_end - obj_len; #ifdef _LP64 - if (_mark_bitmap.is_obj_end(dense_prefix_bit - 2)) { + if (MinObjAlignment > 1) { // object alignment > heap word size + // Cases a, c or e. + } else if (_mark_bitmap.is_obj_end(dense_prefix_bit - 2)) { // Case b above. obj_beg = dense_prefix_end - 1; } else if (!_mark_bitmap.is_obj_end(dense_prefix_bit - 3) && diff -r de91a2f25c7e -r 2d127394260e src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp --- a/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp Thu May 27 09:54:07 2010 -0700 +++ b/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp Thu May 27 18:01:56 2010 -0700 @@ -1414,6 +1414,8 @@ { assert(old_addr >= new_addr || space_id(old_addr) != space_id(new_addr), "must move left or to a different space"); + assert(is_object_aligned((intptr_t)old_addr) && is_object_aligned((intptr_t)new_addr), + "checking alignment"); } #endif // ASSERT diff -r de91a2f25c7e -r 2d127394260e src/share/vm/gc_implementation/shared/mutableNUMASpace.cpp --- a/src/share/vm/gc_implementation/shared/mutableNUMASpace.cpp Thu May 27 09:54:07 2010 -0700 +++ b/src/share/vm/gc_implementation/shared/mutableNUMASpace.cpp Thu May 27 18:01:56 2010 -0700 @@ -761,7 +761,7 @@ if (p != NULL) { size_t remainder = s->free_in_words(); - if (remainder < (size_t)oopDesc::header_size() && remainder > 0) { + if (remainder < CollectedHeap::min_fill_size() && remainder > 0) { s->set_top(s->top() - size); p = NULL; } @@ -803,7 +803,7 @@ HeapWord *p = s->cas_allocate(size); if (p != NULL) { size_t remainder = pointer_delta(s->end(), p + size); - if (remainder < (size_t)oopDesc::header_size() && remainder > 0) { + if (remainder < CollectedHeap::min_fill_size() && remainder > 0) { if (s->cas_deallocate(p, size)) { // We were the last to allocate and created a fragment less than // a minimal object. diff -r de91a2f25c7e -r 2d127394260e src/share/vm/gc_interface/collectedHeap.cpp --- a/src/share/vm/gc_interface/collectedHeap.cpp Thu May 27 09:54:07 2010 -0700 +++ b/src/share/vm/gc_interface/collectedHeap.cpp Thu May 27 18:01:56 2010 -0700 @@ -239,11 +239,11 @@ } size_t CollectedHeap::filler_array_hdr_size() { - return size_t(arrayOopDesc::header_size(T_INT)); + return size_t(align_object_offset(arrayOopDesc::header_size(T_INT))); // align to Long } size_t CollectedHeap::filler_array_min_size() { - return align_object_size(filler_array_hdr_size()); + return align_object_size(filler_array_hdr_size()); // align to MinObjAlignment } size_t CollectedHeap::filler_array_max_size() { diff -r de91a2f25c7e -r 2d127394260e src/share/vm/memory/space.cpp --- a/src/share/vm/memory/space.cpp Thu May 27 09:54:07 2010 -0700 +++ b/src/share/vm/memory/space.cpp Thu May 27 18:01:56 2010 -0700 @@ -861,9 +861,9 @@ } size = align_object_size(size); - const size_t min_int_array_size = typeArrayOopDesc::header_size(T_INT); - if (size >= min_int_array_size) { - size_t length = (size - min_int_array_size) * (HeapWordSize / sizeof(jint)); + const size_t array_header_size = typeArrayOopDesc::header_size(T_INT); + if (size >= (size_t)align_object_size(array_header_size)) { + size_t length = (size - array_header_size) * (HeapWordSize / sizeof(jint)); // allocate uninitialized int array typeArrayOop t = (typeArrayOop) allocate(size); assert(t != NULL, "allocation should succeed"); @@ -871,7 +871,7 @@ t->set_klass(Universe::intArrayKlassObj()); t->set_length((int)length); } else { - assert((int) size == instanceOopDesc::header_size(), + assert(size == CollectedHeap::min_fill_size(), "size for smallest fake object doesn't match"); instanceOop obj = (instanceOop) allocate(size); obj->set_mark(markOopDesc::prototype()); diff -r de91a2f25c7e -r 2d127394260e src/share/vm/memory/threadLocalAllocBuffer.inline.hpp --- a/src/share/vm/memory/threadLocalAllocBuffer.inline.hpp Thu May 27 09:54:07 2010 -0700 +++ b/src/share/vm/memory/threadLocalAllocBuffer.inline.hpp Thu May 27 18:01:56 2010 -0700 @@ -31,7 +31,7 @@ // Skip mangling the space corresponding to the object header to // ensure that the returned space is not considered parsable by // any concurrent GC thread. - size_t hdr_size = CollectedHeap::min_fill_size(); + size_t hdr_size = oopDesc::header_size(); Copy::fill_to_words(obj + hdr_size, size - hdr_size, badHeapWordVal); #endif // ASSERT // This addition is safe because we know that top is diff -r de91a2f25c7e -r 2d127394260e src/share/vm/memory/universe.cpp --- a/src/share/vm/memory/universe.cpp Thu May 27 09:54:07 2010 -0700 +++ b/src/share/vm/memory/universe.cpp Thu May 27 18:01:56 2010 -0700 @@ -748,7 +748,7 @@ // 4Gb static const uint64_t NarrowOopHeapMax = (uint64_t(max_juint) + 1); // 32Gb -static const uint64_t OopEncodingHeapMax = NarrowOopHeapMax << LogMinObjAlignmentInBytes; +// OopEncodingHeapMax == NarrowOopHeapMax << LogMinObjAlignmentInBytes; char* Universe::preferred_heap_base(size_t heap_size, NARROW_OOP_MODE mode) { size_t base = 0; @@ -1261,7 +1261,7 @@ // decide which low-order bits we require to be clear: size_t alignSize = MinObjAlignmentInBytes; - size_t min_object_size = oopDesc::header_size(); + size_t min_object_size = CollectedHeap::min_fill_size(); // make an inclusive limit: uintptr_t max = (uintptr_t)high_boundary - min_object_size*wordSize; diff -r de91a2f25c7e -r 2d127394260e src/share/vm/oops/arrayOop.hpp --- a/src/share/vm/oops/arrayOop.hpp Thu May 27 09:54:07 2010 -0700 +++ b/src/share/vm/oops/arrayOop.hpp Thu May 27 18:01:56 2010 -0700 @@ -92,7 +92,7 @@ static int header_size(BasicType type) { size_t typesize_in_bytes = header_size_in_bytes(); return (int)(Universe::element_type_should_be_aligned(type) - ? align_object_size(typesize_in_bytes/HeapWordSize) + ? align_object_offset(typesize_in_bytes/HeapWordSize) : typesize_in_bytes/HeapWordSize); } diff -r de91a2f25c7e -r 2d127394260e src/share/vm/oops/oop.hpp --- a/src/share/vm/oops/oop.hpp Thu May 27 09:54:07 2010 -0700 +++ b/src/share/vm/oops/oop.hpp Thu May 27 18:01:56 2010 -0700 @@ -149,10 +149,6 @@ // Need this as public for garbage collection. template T* obj_field_addr(int offset) const; - // Oop encoding heap max - static const uint64_t OopEncodingHeapMax = - (uint64_t(max_juint) + 1) << LogMinObjAlignmentInBytes; - static bool is_null(oop obj); static bool is_null(narrowOop obj); diff -r de91a2f25c7e -r 2d127394260e src/share/vm/oops/oop.inline.hpp --- a/src/share/vm/oops/oop.inline.hpp Thu May 27 09:54:07 2010 -0700 +++ b/src/share/vm/oops/oop.inline.hpp Thu May 27 18:01:56 2010 -0700 @@ -146,8 +146,13 @@ // offset from the heap base. Saving the check for null can save instructions // in inner GC loops so these are separated. +inline bool check_obj_alignment(oop obj) { + return (intptr_t)obj % MinObjAlignmentInBytes == 0; +} + inline narrowOop oopDesc::encode_heap_oop_not_null(oop v) { assert(!is_null(v), "oop value can never be zero"); + assert(check_obj_alignment(v), "Address not aligned"); assert(Universe::heap()->is_in_reserved(v), "Address not in heap"); address base = Universe::narrow_oop_base(); int shift = Universe::narrow_oop_shift(); @@ -167,7 +172,9 @@ assert(!is_null(v), "narrow oop value can never be zero"); address base = Universe::narrow_oop_base(); int shift = Universe::narrow_oop_shift(); - return (oop)(void*)((uintptr_t)base + ((uintptr_t)v << shift)); + oop result = (oop)(void*)((uintptr_t)base + ((uintptr_t)v << shift)); + assert(check_obj_alignment(result), "Address not aligned"); + return result; } inline oop oopDesc::decode_heap_oop(narrowOop v) { @@ -522,10 +529,6 @@ return mark()->has_bias_pattern(); } -inline bool check_obj_alignment(oop obj) { - return (intptr_t)obj % MinObjAlignmentInBytes == 0; -} - // used only for asserts inline bool oopDesc::is_oop(bool ignore_mark_word) const { @@ -600,6 +603,8 @@ // Used by scavengers inline void oopDesc::forward_to(oop p) { + assert(check_obj_alignment(p), + "forwarding to something not aligned"); assert(Universe::heap()->is_in_reserved(p), "forwarding to something not in heap"); markOop m = markOopDesc::encode_pointer_as_mark(p); @@ -609,6 +614,8 @@ // Used by parallel scavengers inline bool oopDesc::cas_forward_to(oop p, markOop compare) { + assert(check_obj_alignment(p), + "forwarding to something not aligned"); assert(Universe::heap()->is_in_reserved(p), "forwarding to something not in heap"); markOop m = markOopDesc::encode_pointer_as_mark(p); diff -r de91a2f25c7e -r 2d127394260e src/share/vm/runtime/arguments.cpp --- a/src/share/vm/runtime/arguments.cpp Thu May 27 09:54:07 2010 -0700 +++ b/src/share/vm/runtime/arguments.cpp Thu May 27 18:01:56 2010 -0700 @@ -1211,8 +1211,44 @@ } #endif // KERNEL +void set_object_alignment() { + // Object alignment. + assert(is_power_of_2(ObjectAlignmentInBytes), "ObjectAlignmentInBytes must be power of 2"); + MinObjAlignmentInBytes = ObjectAlignmentInBytes; + assert(MinObjAlignmentInBytes >= HeapWordsPerLong * HeapWordSize, "ObjectAlignmentInBytes value is too small"); + MinObjAlignment = MinObjAlignmentInBytes / HeapWordSize; + assert(MinObjAlignmentInBytes == MinObjAlignment * HeapWordSize, "ObjectAlignmentInBytes value is incorrect"); + MinObjAlignmentInBytesMask = MinObjAlignmentInBytes - 1; + + LogMinObjAlignmentInBytes = exact_log2(ObjectAlignmentInBytes); + LogMinObjAlignment = LogMinObjAlignmentInBytes - LogHeapWordSize; + + // Oop encoding heap max + OopEncodingHeapMax = (uint64_t(max_juint) + 1) << LogMinObjAlignmentInBytes; + +#ifndef KERNEL + // Set CMS global values + CompactibleFreeListSpace::set_cms_values(); +#endif // KERNEL +} + +bool verify_object_alignment() { + // Object alignment. + if (!is_power_of_2(ObjectAlignmentInBytes)) { + jio_fprintf(defaultStream::error_stream(), + "error: ObjectAlignmentInBytes=%d must be power of 2", (int)ObjectAlignmentInBytes); + return false; + } + if ((int)ObjectAlignmentInBytes < BytesPerLong) { + jio_fprintf(defaultStream::error_stream(), + "error: ObjectAlignmentInBytes=%d must be greater or equal %d", (int)ObjectAlignmentInBytes, BytesPerLong); + return false; + } + return true; +} + inline uintx max_heap_for_compressed_oops() { - LP64_ONLY(return oopDesc::OopEncodingHeapMax - MaxPermSize - os::vm_page_size()); + LP64_ONLY(return OopEncodingHeapMax - MaxPermSize - os::vm_page_size()); NOT_LP64(ShouldNotReachHere(); return 0); } @@ -1776,6 +1812,8 @@ status = status && verify_interval(TLABWasteTargetPercent, 1, 100, "TLABWasteTargetPercent"); + status = status && verify_object_alignment(); + return status; } @@ -2848,6 +2886,9 @@ UseCompressedOops = false; #endif + // Set object alignment values. + set_object_alignment(); + #ifdef SERIALGC force_serial_gc(); #endif // SERIALGC diff -r de91a2f25c7e -r 2d127394260e src/share/vm/runtime/globals.hpp --- a/src/share/vm/runtime/globals.hpp Thu May 27 09:54:07 2010 -0700 +++ b/src/share/vm/runtime/globals.hpp Thu May 27 18:01:56 2010 -0700 @@ -321,6 +321,9 @@ diagnostic(bool, PrintCompressedOopsMode, false, \ "Print compressed oops base address and encoding mode") \ \ + lp64_product(intx, ObjectAlignmentInBytes, 8, \ + "Default object alignment in bytes, 8 is minimum") \ + \ /* UseMembar is theoretically a temp flag used for memory barrier \ * removal testing. It was supposed to be removed before FCS but has \ * been re-added (see 6401008) */ \ diff -r de91a2f25c7e -r 2d127394260e src/share/vm/runtime/vmStructs.cpp --- a/src/share/vm/runtime/vmStructs.cpp Thu May 27 09:54:07 2010 -0700 +++ b/src/share/vm/runtime/vmStructs.cpp Thu May 27 18:01:56 2010 -0700 @@ -1328,14 +1328,6 @@ declare_constant(LogBytesPerWord) \ declare_constant(BytesPerLong) \ \ - /********************/ \ - /* Object alignment */ \ - /********************/ \ - \ - declare_constant(MinObjAlignment) \ - declare_constant(MinObjAlignmentInBytes) \ - declare_constant(LogMinObjAlignmentInBytes) \ - \ /********************************************/ \ /* Generation and Space Hierarchy Constants */ \ /********************************************/ \ diff -r de91a2f25c7e -r 2d127394260e src/share/vm/utilities/copy.hpp --- a/src/share/vm/utilities/copy.hpp Thu May 27 09:54:07 2010 -0700 +++ b/src/share/vm/utilities/copy.hpp Thu May 27 18:01:56 2010 -0700 @@ -51,7 +51,7 @@ class Copy : AllStatic { public: // Block copy methods have four attributes. We don't define all possibilities. - // alignment: aligned according to minimum Java object alignment (MinObjAlignment) + // alignment: aligned to BytesPerLong // arrayof: arraycopy operation with both operands aligned on the same // boundary as the first element of an array of the copy unit. // This is currently a HeapWord boundary on all platforms, except @@ -70,7 +70,7 @@ // [ '_atomic' ] // // Except in the arrayof case, whatever the alignment is, we assume we can copy - // whole alignment units. E.g., if MinObjAlignment is 2x word alignment, an odd + // whole alignment units. E.g., if BytesPerLong is 2x word alignment, an odd // count may copy an extra word. In the arrayof case, we are allowed to copy // only the number of copy units specified. @@ -305,17 +305,17 @@ } static void assert_params_aligned(HeapWord* from, HeapWord* to) { #ifdef ASSERT - if (mask_bits((uintptr_t)from, MinObjAlignmentInBytes-1) != 0) - basic_fatal("not object aligned"); - if (mask_bits((uintptr_t)to, MinObjAlignmentInBytes-1) != 0) - basic_fatal("not object aligned"); + if (mask_bits((uintptr_t)from, BytesPerLong-1) != 0) + basic_fatal("not long aligned"); + if (mask_bits((uintptr_t)to, BytesPerLong-1) != 0) + basic_fatal("not long aligned"); #endif } static void assert_params_aligned(HeapWord* to) { #ifdef ASSERT - if (mask_bits((uintptr_t)to, MinObjAlignmentInBytes-1) != 0) - basic_fatal("not object aligned"); + if (mask_bits((uintptr_t)to, BytesPerLong-1) != 0) + basic_fatal("not long aligned"); #endif } diff -r de91a2f25c7e -r 2d127394260e src/share/vm/utilities/globalDefinitions.cpp --- a/src/share/vm/utilities/globalDefinitions.cpp Thu May 27 09:54:07 2010 -0700 +++ b/src/share/vm/utilities/globalDefinitions.cpp Thu May 27 18:01:56 2010 -0700 @@ -34,6 +34,18 @@ int BytesPerHeapOop = 0; int BitsPerHeapOop = 0; +// Object alignment, in units of HeapWords. +// Defaults are -1 so things will break badly if incorrectly initialized. +int MinObjAlignment = -1; +int MinObjAlignmentInBytes = -1; +int MinObjAlignmentInBytesMask = 0; + +int LogMinObjAlignment = -1; +int LogMinObjAlignmentInBytes = -1; + +// Oop encoding heap max +uint64_t OopEncodingHeapMax = 0; + void basic_fatal(const char* msg) { fatal(msg); } diff -r de91a2f25c7e -r 2d127394260e src/share/vm/utilities/globalDefinitions.hpp --- a/src/share/vm/utilities/globalDefinitions.hpp Thu May 27 09:54:07 2010 -0700 +++ b/src/share/vm/utilities/globalDefinitions.hpp Thu May 27 18:01:56 2010 -0700 @@ -73,6 +73,9 @@ extern int BytesPerHeapOop; extern int BitsPerHeapOop; +// Oop encoding heap max +extern uint64_t OopEncodingHeapMax; + const int BitsPerJavaInteger = 32; const int BitsPerJavaLong = 64; const int BitsPerSize_t = size_tSize * BitsPerByte; @@ -292,12 +295,12 @@ // Minimum is max(BytesPerLong, BytesPerDouble, BytesPerOop) / HeapWordSize, so jlong, jdouble and // reference fields can be naturally aligned. -const int MinObjAlignment = HeapWordsPerLong; -const int MinObjAlignmentInBytes = MinObjAlignment * HeapWordSize; -const int MinObjAlignmentInBytesMask = MinObjAlignmentInBytes - 1; +extern int MinObjAlignment; +extern int MinObjAlignmentInBytes; +extern int MinObjAlignmentInBytesMask; -const int LogMinObjAlignment = LogHeapWordsPerLong; -const int LogMinObjAlignmentInBytes = LogMinObjAlignment + LogHeapWordSize; +extern int LogMinObjAlignment; +extern int LogMinObjAlignmentInBytes; // Machine dependent stuff @@ -332,18 +335,16 @@ return align_size_up(size, MinObjAlignment); } -// Pad out certain offsets to jlong alignment, in HeapWord units. +inline bool is_object_aligned(intptr_t addr) { + return addr == align_object_size(addr); +} -#define align_object_offset_(offset) align_size_up_(offset, HeapWordsPerLong) +// Pad out certain offsets to jlong alignment, in HeapWord units. inline intptr_t align_object_offset(intptr_t offset) { return align_size_up(offset, HeapWordsPerLong); } -inline bool is_object_aligned(intptr_t offset) { - return offset == align_object_offset(offset); -} - //---------------------------------------------------------------------------------------------------- // Utility macros for compilers