# HG changeset patch # User kvn # Date 1270669167 25200 # Node ID 9e321dcfa5b7d156b9b6777c70451869e778bc31 # Parent b9d85fcdf7437ee14993c53fd7a79d5b18e42634 6940726: Use BIS instruction for allocation prefetch on Sparc Summary: Use BIS instruction for allocation prefetch on Sparc Reviewed-by: twisti diff -r b9d85fcdf743 -r 9e321dcfa5b7 src/cpu/sparc/vm/sparc.ad --- a/src/cpu/sparc/vm/sparc.ad Wed Apr 07 10:35:56 2010 -0700 +++ b/src/cpu/sparc/vm/sparc.ad Wed Apr 07 12:39:27 2010 -0700 @@ -471,6 +471,9 @@ source %{ #define __ _masm. +// Block initializing store +#define ASI_BLK_INIT_QUAD_LDD_P 0xE2 + // tertiary op of a LoadP or StoreP encoding #define REGP_OP true @@ -6147,6 +6150,7 @@ %} instruct prefetchw( memory mem ) %{ + predicate(AllocatePrefetchStyle != 3 ); match( PrefetchWrite mem ); ins_cost(MEMORY_REF_COST); @@ -6156,6 +6160,23 @@ ins_pipe(iload_mem); %} +// Use BIS instruction to prefetch. +instruct prefetchw_bis( memory mem ) %{ + predicate(AllocatePrefetchStyle == 3); + match( PrefetchWrite mem ); + ins_cost(MEMORY_REF_COST); + + format %{ "STXA G0,$mem\t! // Block initializing store" %} + ins_encode %{ + Register base = as_Register($mem$$base); + int disp = $mem$$disp; + if (disp != 0) { + __ add(base, AllocatePrefetchStepSize, base); + } + __ stxa(G0, base, G0, ASI_BLK_INIT_QUAD_LDD_P); + %} + ins_pipe(istore_mem_reg); +%} //----------Store Instructions------------------------------------------------- // Store Byte diff -r b9d85fcdf743 -r 9e321dcfa5b7 src/cpu/sparc/vm/vm_version_sparc.cpp --- a/src/cpu/sparc/vm/vm_version_sparc.cpp Wed Apr 07 10:35:56 2010 -0700 +++ b/src/cpu/sparc/vm/vm_version_sparc.cpp Wed Apr 07 12:39:27 2010 -0700 @@ -86,9 +86,19 @@ if (FLAG_IS_DEFAULT(InteriorEntryAlignment)) { FLAG_SET_DEFAULT(InteriorEntryAlignment, 4); } - if (is_niagara1_plus() && FLAG_IS_DEFAULT(AllocatePrefetchDistance)) { - // Use smaller prefetch distance on N2 - FLAG_SET_DEFAULT(AllocatePrefetchDistance, 256); + if (is_niagara1_plus()) { + if (AllocatePrefetchStyle > 0 && FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { + // Use BIS instruction for allocation prefetch. + FLAG_SET_DEFAULT(AllocatePrefetchStyle, 3); + if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) { + // Use smaller prefetch distance on N2 with BIS + FLAG_SET_DEFAULT(AllocatePrefetchDistance, 64); + } + } + if (AllocatePrefetchStyle != 3 && FLAG_IS_DEFAULT(AllocatePrefetchDistance)) { + // Use different prefetch distance without BIS + FLAG_SET_DEFAULT(AllocatePrefetchDistance, 256); + } } #endif if (FLAG_IS_DEFAULT(OptoLoopAlignment)) { diff -r b9d85fcdf743 -r 9e321dcfa5b7 src/share/vm/memory/threadLocalAllocBuffer.hpp --- a/src/share/vm/memory/threadLocalAllocBuffer.hpp Wed Apr 07 10:35:56 2010 -0700 +++ b/src/share/vm/memory/threadLocalAllocBuffer.hpp Wed Apr 07 12:39:27 2010 -0700 @@ -111,7 +111,22 @@ // Allocate size HeapWords. The memory is NOT initialized to zero. inline HeapWord* allocate(size_t size); - static size_t alignment_reserve() { return align_object_size(typeArrayOopDesc::header_size(T_INT)); } + + // Reserve space at the end of TLAB + static size_t end_reserve() { + int reserve_size = typeArrayOopDesc::header_size(T_INT); + if (AllocatePrefetchStyle == 3) { + // BIS is used to prefetch - we need a space for it. + // +1 for rounding up to next cache line +1 to be safe + int lines = AllocatePrefetchLines + 2; + int step_size = AllocatePrefetchStepSize; + int distance = AllocatePrefetchDistance; + int prefetch_end = (distance + step_size*lines)/(int)HeapWordSize; + reserve_size = MAX2(reserve_size, prefetch_end); + } + return reserve_size; + } + static size_t alignment_reserve() { return align_object_size(end_reserve()); } static size_t alignment_reserve_in_bytes() { return alignment_reserve() * HeapWordSize; } // Return tlab size or remaining space in eden such that the diff -r b9d85fcdf743 -r 9e321dcfa5b7 src/share/vm/opto/macro.cpp --- a/src/share/vm/opto/macro.cpp Wed Apr 07 10:35:56 2010 -0700 +++ b/src/share/vm/opto/macro.cpp Wed Apr 07 12:39:27 2010 -0700 @@ -1487,11 +1487,11 @@ Node*& contended_phi_rawmem, Node* old_eden_top, Node* new_eden_top, Node* length) { + enum { fall_in_path = 1, pf_path = 2 }; if( UseTLAB && AllocatePrefetchStyle == 2 ) { // Generate prefetch allocation with watermark check. // As an allocation hits the watermark, we will prefetch starting // at a "distance" away from watermark. - enum { fall_in_path = 1, pf_path = 2 }; Node *pf_region = new (C, 3) RegionNode(3); Node *pf_phi_rawmem = new (C, 3) PhiNode( pf_region, Type::MEMORY, @@ -1570,6 +1570,45 @@ needgc_false = pf_region; contended_phi_rawmem = pf_phi_rawmem; i_o = pf_phi_abio; + } else if( UseTLAB && AllocatePrefetchStyle == 3 ) { + // Insert a prefetch for each allocation only on the fast-path + Node *pf_region = new (C, 3) RegionNode(3); + Node *pf_phi_rawmem = new (C, 3) PhiNode( pf_region, Type::MEMORY, + TypeRawPtr::BOTTOM ); + + // Generate several prefetch instructions only for arrays. + uint lines = (length != NULL) ? AllocatePrefetchLines : 1; + uint step_size = AllocatePrefetchStepSize; + uint distance = AllocatePrefetchDistance; + + // Next cache address. + Node *cache_adr = new (C, 4) AddPNode(old_eden_top, old_eden_top, + _igvn.MakeConX(distance)); + transform_later(cache_adr); + cache_adr = new (C, 2) CastP2XNode(needgc_false, cache_adr); + transform_later(cache_adr); + Node* mask = _igvn.MakeConX(~(intptr_t)(step_size-1)); + cache_adr = new (C, 3) AndXNode(cache_adr, mask); + transform_later(cache_adr); + cache_adr = new (C, 2) CastX2PNode(cache_adr); + transform_later(cache_adr); + + // Prefetch + Node *prefetch = new (C, 3) PrefetchWriteNode( contended_phi_rawmem, cache_adr ); + prefetch->set_req(0, needgc_false); + transform_later(prefetch); + contended_phi_rawmem = prefetch; + Node *prefetch_adr; + distance = step_size; + for ( uint i = 1; i < lines; i++ ) { + prefetch_adr = new (C, 4) AddPNode( cache_adr, cache_adr, + _igvn.MakeConX(distance) ); + transform_later(prefetch_adr); + prefetch = new (C, 3) PrefetchWriteNode( contended_phi_rawmem, prefetch_adr ); + transform_later(prefetch); + distance += step_size; + contended_phi_rawmem = prefetch; + } } else if( AllocatePrefetchStyle > 0 ) { // Insert a prefetch for each allocation only on the fast-path Node *prefetch_adr; diff -r b9d85fcdf743 -r 9e321dcfa5b7 src/share/vm/opto/memnode.hpp --- a/src/share/vm/opto/memnode.hpp Wed Apr 07 10:35:56 2010 -0700 +++ b/src/share/vm/opto/memnode.hpp Wed Apr 07 12:39:27 2010 -0700 @@ -1244,5 +1244,5 @@ virtual int Opcode() const; virtual uint ideal_reg() const { return NotAMachineReg; } virtual uint match_edge(uint idx) const { return idx==2; } - virtual const Type *bottom_type() const { return Type::ABIO; } + virtual const Type *bottom_type() const { return ( AllocatePrefetchStyle == 3 ) ? Type::MEMORY : Type::ABIO; } }; diff -r b9d85fcdf743 -r 9e321dcfa5b7 src/share/vm/runtime/globals.hpp --- a/src/share/vm/runtime/globals.hpp Wed Apr 07 10:35:56 2010 -0700 +++ b/src/share/vm/runtime/globals.hpp Wed Apr 07 12:39:27 2010 -0700 @@ -2708,7 +2708,8 @@ product(intx, AllocatePrefetchStyle, 1, \ "0 = no prefetch, " \ "1 = prefetch instructions for each allocation, " \ - "2 = use TLAB watermark to gate allocation prefetch") \ + "2 = use TLAB watermark to gate allocation prefetch, " \ + "3 = use BIS instruction on Sparc for allocation prefetch") \ \ product(intx, AllocatePrefetchDistance, -1, \ "Distance to prefetch ahead of allocation pointer") \