comparison src/cpu/sparc/vm/sparc.ad @ 3892:baf763f388e6

7059037: Use BIS for zeroing on T4 Summary: Use BIS for zeroing new allocated big (2Kb and more) objects and arrays. Reviewed-by: never, twisti, ysr
author kvn
date Fri, 26 Aug 2011 08:52:22 -0700
parents bd87c0dcaba5
children a64d352d1118
comparison
equal deleted inserted replaced
3891:ac8738449b6f 3892:baf763f388e6
458 // definitions necessary in the rest of the architecture description 458 // definitions necessary in the rest of the architecture description
459 source_hpp %{ 459 source_hpp %{
460 // Must be visible to the DFA in dfa_sparc.cpp 460 // Must be visible to the DFA in dfa_sparc.cpp
461 extern bool can_branch_register( Node *bol, Node *cmp ); 461 extern bool can_branch_register( Node *bol, Node *cmp );
462 462
463 extern bool use_block_zeroing(Node* count);
464
463 // Macros to extract hi & lo halves from a long pair. 465 // Macros to extract hi & lo halves from a long pair.
464 // G0 is not part of any long pair, so assert on that. 466 // G0 is not part of any long pair, so assert on that.
465 // Prevents accidentally using G1 instead of G0. 467 // Prevents accidentally using G1 instead of G0.
466 #define LONG_HI_REG(x) (x) 468 #define LONG_HI_REG(x) (x)
467 #define LONG_LO_REG(x) (x) 469 #define LONG_LO_REG(x) (x)
517 if( !x->in(i)->is_Load() ) 519 if( !x->in(i)->is_Load() )
518 return false; 520 return false;
519 return true; 521 return true;
520 } 522 }
521 return false; 523 return false;
524 }
525
526 bool use_block_zeroing(Node* count) {
527 // Use BIS for zeroing if count is not constant
528 // or it is >= BlockZeroingLowLimit.
529 return UseBlockZeroing && (count->find_intptr_t_con(BlockZeroingLowLimit) >= BlockZeroingLowLimit);
522 } 530 }
523 531
524 // **************************************************************************** 532 // ****************************************************************************
525 533
526 // REQUIRED FUNCTIONALITY 534 // REQUIRED FUNCTIONALITY
2806 FloatRegister Fsrc2 = $primary ? reg_to_SingleFloatRegister_object($src2$$reg) 2814 FloatRegister Fsrc2 = $primary ? reg_to_SingleFloatRegister_object($src2$$reg)
2807 : reg_to_DoubleFloatRegister_object($src2$$reg); 2815 : reg_to_DoubleFloatRegister_object($src2$$reg);
2808 2816
2809 // Convert condition code fcc0 into -1,0,1; unordered reports less-than (-1) 2817 // Convert condition code fcc0 into -1,0,1; unordered reports less-than (-1)
2810 __ float_cmp( $primary, -1, Fsrc1, Fsrc2, Rdst); 2818 __ float_cmp( $primary, -1, Fsrc1, Fsrc2, Rdst);
2811 %}
2812
2813 // Compiler ensures base is doubleword aligned and cnt is count of doublewords
2814 enc_class enc_Clear_Array(iRegX cnt, iRegP base, iRegX temp) %{
2815 MacroAssembler _masm(&cbuf);
2816 Register nof_bytes_arg = reg_to_register_object($cnt$$reg);
2817 Register nof_bytes_tmp = reg_to_register_object($temp$$reg);
2818 Register base_pointer_arg = reg_to_register_object($base$$reg);
2819
2820 Label loop;
2821 __ mov(nof_bytes_arg, nof_bytes_tmp);
2822
2823 // Loop and clear, walking backwards through the array.
2824 // nof_bytes_tmp (if >0) is always the number of bytes to zero
2825 __ bind(loop);
2826 __ deccc(nof_bytes_tmp, 8);
2827 __ br(Assembler::greaterEqual, true, Assembler::pt, loop);
2828 __ delayed()-> stx(G0, base_pointer_arg, nof_bytes_tmp);
2829 // %%%% this mini-loop must not cross a cache boundary!
2830 %} 2819 %}
2831 2820
2832 2821
2833 enc_class enc_String_Compare(o0RegP str1, o1RegP str2, g3RegI cnt1, g4RegI cnt2, notemp_iRegI result) %{ 2822 enc_class enc_String_Compare(o0RegP str1, o1RegP str2, g3RegI cnt1, g4RegI cnt2, notemp_iRegI result) %{
2834 Label Ldone, Lloop; 2823 Label Ldone, Lloop;
10255 format %{ "FASTUNLOCK $object, $box; KILL $scratch, $scratch2, $box" %} 10244 format %{ "FASTUNLOCK $object, $box; KILL $scratch, $scratch2, $box" %}
10256 ins_encode( Fast_Unlock(object, box, scratch, scratch2) ); 10245 ins_encode( Fast_Unlock(object, box, scratch, scratch2) );
10257 ins_pipe(long_memory_op); 10246 ins_pipe(long_memory_op);
10258 %} 10247 %}
10259 10248
10260 // Count and Base registers are fixed because the allocator cannot 10249 // The encodings are generic.
10261 // kill unknown registers. The encodings are generic.
10262 instruct clear_array(iRegX cnt, iRegP base, iRegX temp, Universe dummy, flagsReg ccr) %{ 10250 instruct clear_array(iRegX cnt, iRegP base, iRegX temp, Universe dummy, flagsReg ccr) %{
10251 predicate(!use_block_zeroing(n->in(2)) );
10263 match(Set dummy (ClearArray cnt base)); 10252 match(Set dummy (ClearArray cnt base));
10264 effect(TEMP temp, KILL ccr); 10253 effect(TEMP temp, KILL ccr);
10265 ins_cost(300); 10254 ins_cost(300);
10266 format %{ "MOV $cnt,$temp\n" 10255 format %{ "MOV $cnt,$temp\n"
10267 "loop: SUBcc $temp,8,$temp\t! Count down a dword of bytes\n" 10256 "loop: SUBcc $temp,8,$temp\t! Count down a dword of bytes\n"
10268 " BRge loop\t\t! Clearing loop\n" 10257 " BRge loop\t\t! Clearing loop\n"
10269 " STX G0,[$base+$temp]\t! delay slot" %} 10258 " STX G0,[$base+$temp]\t! delay slot" %}
10270 ins_encode( enc_Clear_Array(cnt, base, temp) ); 10259
10260 ins_encode %{
10261 // Compiler ensures base is doubleword aligned and cnt is count of doublewords
10262 Register nof_bytes_arg = $cnt$$Register;
10263 Register nof_bytes_tmp = $temp$$Register;
10264 Register base_pointer_arg = $base$$Register;
10265
10266 Label loop;
10267 __ mov(nof_bytes_arg, nof_bytes_tmp);
10268
10269 // Loop and clear, walking backwards through the array.
10270 // nof_bytes_tmp (if >0) is always the number of bytes to zero
10271 __ bind(loop);
10272 __ deccc(nof_bytes_tmp, 8);
10273 __ br(Assembler::greaterEqual, true, Assembler::pt, loop);
10274 __ delayed()-> stx(G0, base_pointer_arg, nof_bytes_tmp);
10275 // %%%% this mini-loop must not cross a cache boundary!
10276 %}
10277 ins_pipe(long_memory_op);
10278 %}
10279
10280 instruct clear_array_bis(g1RegX cnt, o0RegP base, Universe dummy, flagsReg ccr) %{
10281 predicate(use_block_zeroing(n->in(2)));
10282 match(Set dummy (ClearArray cnt base));
10283 effect(USE_KILL cnt, USE_KILL base, KILL ccr);
10284 ins_cost(300);
10285 format %{ "CLEAR [$base, $cnt]\t! ClearArray" %}
10286
10287 ins_encode %{
10288
10289 assert(MinObjAlignmentInBytes >= BytesPerLong, "need alternate implementation");
10290 Register to = $base$$Register;
10291 Register count = $cnt$$Register;
10292
10293 Label Ldone;
10294 __ nop(); // Separate short branches
10295 // Use BIS for zeroing (temp is not used).
10296 __ bis_zeroing(to, count, G0, Ldone);
10297 __ bind(Ldone);
10298
10299 %}
10300 ins_pipe(long_memory_op);
10301 %}
10302
10303 instruct clear_array_bis_2(g1RegX cnt, o0RegP base, iRegX tmp, Universe dummy, flagsReg ccr) %{
10304 predicate(use_block_zeroing(n->in(2)) && !Assembler::is_simm13((int)BlockZeroingLowLimit));
10305 match(Set dummy (ClearArray cnt base));
10306 effect(TEMP tmp, USE_KILL cnt, USE_KILL base, KILL ccr);
10307 ins_cost(300);
10308 format %{ "CLEAR [$base, $cnt]\t! ClearArray" %}
10309
10310 ins_encode %{
10311
10312 assert(MinObjAlignmentInBytes >= BytesPerLong, "need alternate implementation");
10313 Register to = $base$$Register;
10314 Register count = $cnt$$Register;
10315 Register temp = $tmp$$Register;
10316
10317 Label Ldone;
10318 __ nop(); // Separate short branches
10319 // Use BIS for zeroing
10320 __ bis_zeroing(to, count, temp, Ldone);
10321 __ bind(Ldone);
10322
10323 %}
10271 ins_pipe(long_memory_op); 10324 ins_pipe(long_memory_op);
10272 %} 10325 %}
10273 10326
10274 instruct string_compare(o0RegP str1, o1RegP str2, g3RegI cnt1, g4RegI cnt2, notemp_iRegI result, 10327 instruct string_compare(o0RegP str1, o1RegP str2, g3RegI cnt1, g4RegI cnt2, notemp_iRegI result,
10275 o7RegI tmp, flagsReg ccr) %{ 10328 o7RegI tmp, flagsReg ccr) %{