comparison src/cpu/sparc/vm/stubGenerator_sparc.cpp @ 1364:0dc88ad3244e

6940677: Use 64 bytes chunk copy for arraycopy on Sparc Summary: For large arrays we should use 64 bytes chunks copy. Reviewed-by: twisti
author kvn
date Tue, 06 Apr 2010 15:18:10 -0700
parents 3cf667df43ef
children 6476042f815c
comparison
equal deleted inserted replaced
1363:ed4f78aa9282 1364:0dc88ad3244e
1998 // Arguments: 1998 // Arguments:
1999 // from: O0 1999 // from: O0
2000 // to: O1 2000 // to: O1
2001 // count: O2 treated as signed 2001 // count: O2 treated as signed
2002 // 2002 //
2003 // count -= 2;
2004 // if ( count >= 0 ) { // >= 2 elements
2005 // if ( count > 6) { // >= 8 elements
2006 // count -= 6; // original count - 8
2007 // do {
2008 // copy_8_elements;
2009 // count -= 8;
2010 // } while ( count >= 0 );
2011 // count += 6;
2012 // }
2013 // if ( count >= 0 ) { // >= 2 elements
2014 // do {
2015 // copy_2_elements;
2016 // } while ( (count=count-2) >= 0 );
2017 // }
2018 // }
2019 // count += 2;
2020 // if ( count != 0 ) { // 1 element left
2021 // copy_1_element;
2022 // }
2023 //
2003 void generate_disjoint_long_copy_core(bool aligned) { 2024 void generate_disjoint_long_copy_core(bool aligned) {
2004 Label L_copy_8_bytes, L_copy_16_bytes, L_exit; 2025 Label L_copy_8_bytes, L_copy_16_bytes, L_exit;
2005 const Register from = O0; // source array address 2026 const Register from = O0; // source array address
2006 const Register to = O1; // destination array address 2027 const Register to = O1; // destination array address
2007 const Register count = O2; // elements count 2028 const Register count = O2; // elements count
2010 2031
2011 __ deccc(count, 2); 2032 __ deccc(count, 2);
2012 __ mov(G0, offset0); // offset from start of arrays (0) 2033 __ mov(G0, offset0); // offset from start of arrays (0)
2013 __ brx(Assembler::negative, false, Assembler::pn, L_copy_8_bytes ); 2034 __ brx(Assembler::negative, false, Assembler::pn, L_copy_8_bytes );
2014 __ delayed()->add(offset0, 8, offset8); 2035 __ delayed()->add(offset0, 8, offset8);
2036
2037 // Copy by 64 bytes chunks
2038 Label L_copy_64_bytes;
2039 const Register from64 = O3; // source address
2040 const Register to64 = G3; // destination address
2041 __ subcc(count, 6, O3);
2042 __ brx(Assembler::negative, false, Assembler::pt, L_copy_16_bytes );
2043 __ delayed()->mov(to, to64);
2044 // Now we can use O4(offset0), O5(offset8) as temps
2045 __ mov(O3, count);
2046 __ mov(from, from64);
2047
2048 __ align(16);
2049 __ BIND(L_copy_64_bytes);
2050 for( int off = 0; off < 64; off += 16 ) {
2051 __ ldx(from64, off+0, O4);
2052 __ ldx(from64, off+8, O5);
2053 __ stx(O4, to64, off+0);
2054 __ stx(O5, to64, off+8);
2055 }
2056 __ deccc(count, 8);
2057 __ inc(from64, 64);
2058 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_copy_64_bytes);
2059 __ delayed()->inc(to64, 64);
2060
2061 // Restore O4(offset0), O5(offset8)
2062 __ sub(from64, from, offset0);
2063 __ inccc(count, 6);
2064 __ brx(Assembler::negative, false, Assembler::pn, L_copy_8_bytes );
2065 __ delayed()->add(offset0, 8, offset8);
2066
2067 // Copy by 16 bytes chunks
2015 __ align(16); 2068 __ align(16);
2016 __ BIND(L_copy_16_bytes); 2069 __ BIND(L_copy_16_bytes);
2017 __ ldx(from, offset0, O3); 2070 __ ldx(from, offset0, O3);
2018 __ ldx(from, offset8, G3); 2071 __ ldx(from, offset8, G3);
2019 __ deccc(count, 2); 2072 __ deccc(count, 2);
2021 __ inc(offset0, 16); 2074 __ inc(offset0, 16);
2022 __ stx(G3, to, offset8); 2075 __ stx(G3, to, offset8);
2023 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_copy_16_bytes); 2076 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_copy_16_bytes);
2024 __ delayed()->inc(offset8, 16); 2077 __ delayed()->inc(offset8, 16);
2025 2078
2079 // Copy last 8 bytes
2026 __ BIND(L_copy_8_bytes); 2080 __ BIND(L_copy_8_bytes);
2027 __ inccc(count, 2); 2081 __ inccc(count, 2);
2028 __ brx(Assembler::zero, true, Assembler::pn, L_exit ); 2082 __ brx(Assembler::zero, true, Assembler::pn, L_exit );
2029 __ delayed()->mov(offset0, offset8); // Set O5 used by other stubs 2083 __ delayed()->mov(offset0, offset8); // Set O5 used by other stubs
2030 __ ldx(from, offset0, O3); 2084 __ ldx(from, offset0, O3);