Mercurial > hg > truffle
comparison src/cpu/sparc/vm/stubGenerator_sparc.cpp @ 1364:0dc88ad3244e
6940677: Use 64 bytes chunk copy for arraycopy on Sparc
Summary: For large arrays we should use 64 bytes chunks copy.
Reviewed-by: twisti
author | kvn |
---|---|
date | Tue, 06 Apr 2010 15:18:10 -0700 |
parents | 3cf667df43ef |
children | 6476042f815c |
comparison
equal
deleted
inserted
replaced
1363:ed4f78aa9282 | 1364:0dc88ad3244e |
---|---|
1998 // Arguments: | 1998 // Arguments: |
1999 // from: O0 | 1999 // from: O0 |
2000 // to: O1 | 2000 // to: O1 |
2001 // count: O2 treated as signed | 2001 // count: O2 treated as signed |
2002 // | 2002 // |
2003 // count -= 2; | |
2004 // if ( count >= 0 ) { // >= 2 elements | |
2005 // if ( count > 6) { // >= 8 elements | |
2006 // count -= 6; // original count - 8 | |
2007 // do { | |
2008 // copy_8_elements; | |
2009 // count -= 8; | |
2010 // } while ( count >= 0 ); | |
2011 // count += 6; | |
2012 // } | |
2013 // if ( count >= 0 ) { // >= 2 elements | |
2014 // do { | |
2015 // copy_2_elements; | |
2016 // } while ( (count=count-2) >= 0 ); | |
2017 // } | |
2018 // } | |
2019 // count += 2; | |
2020 // if ( count != 0 ) { // 1 element left | |
2021 // copy_1_element; | |
2022 // } | |
2023 // | |
2003 void generate_disjoint_long_copy_core(bool aligned) { | 2024 void generate_disjoint_long_copy_core(bool aligned) { |
2004 Label L_copy_8_bytes, L_copy_16_bytes, L_exit; | 2025 Label L_copy_8_bytes, L_copy_16_bytes, L_exit; |
2005 const Register from = O0; // source array address | 2026 const Register from = O0; // source array address |
2006 const Register to = O1; // destination array address | 2027 const Register to = O1; // destination array address |
2007 const Register count = O2; // elements count | 2028 const Register count = O2; // elements count |
2010 | 2031 |
2011 __ deccc(count, 2); | 2032 __ deccc(count, 2); |
2012 __ mov(G0, offset0); // offset from start of arrays (0) | 2033 __ mov(G0, offset0); // offset from start of arrays (0) |
2013 __ brx(Assembler::negative, false, Assembler::pn, L_copy_8_bytes ); | 2034 __ brx(Assembler::negative, false, Assembler::pn, L_copy_8_bytes ); |
2014 __ delayed()->add(offset0, 8, offset8); | 2035 __ delayed()->add(offset0, 8, offset8); |
2036 | |
2037 // Copy by 64 bytes chunks | |
2038 Label L_copy_64_bytes; | |
2039 const Register from64 = O3; // source address | |
2040 const Register to64 = G3; // destination address | |
2041 __ subcc(count, 6, O3); | |
2042 __ brx(Assembler::negative, false, Assembler::pt, L_copy_16_bytes ); | |
2043 __ delayed()->mov(to, to64); | |
2044 // Now we can use O4(offset0), O5(offset8) as temps | |
2045 __ mov(O3, count); | |
2046 __ mov(from, from64); | |
2047 | |
2048 __ align(16); | |
2049 __ BIND(L_copy_64_bytes); | |
2050 for( int off = 0; off < 64; off += 16 ) { | |
2051 __ ldx(from64, off+0, O4); | |
2052 __ ldx(from64, off+8, O5); | |
2053 __ stx(O4, to64, off+0); | |
2054 __ stx(O5, to64, off+8); | |
2055 } | |
2056 __ deccc(count, 8); | |
2057 __ inc(from64, 64); | |
2058 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_copy_64_bytes); | |
2059 __ delayed()->inc(to64, 64); | |
2060 | |
2061 // Restore O4(offset0), O5(offset8) | |
2062 __ sub(from64, from, offset0); | |
2063 __ inccc(count, 6); | |
2064 __ brx(Assembler::negative, false, Assembler::pn, L_copy_8_bytes ); | |
2065 __ delayed()->add(offset0, 8, offset8); | |
2066 | |
2067 // Copy by 16 bytes chunks | |
2015 __ align(16); | 2068 __ align(16); |
2016 __ BIND(L_copy_16_bytes); | 2069 __ BIND(L_copy_16_bytes); |
2017 __ ldx(from, offset0, O3); | 2070 __ ldx(from, offset0, O3); |
2018 __ ldx(from, offset8, G3); | 2071 __ ldx(from, offset8, G3); |
2019 __ deccc(count, 2); | 2072 __ deccc(count, 2); |
2021 __ inc(offset0, 16); | 2074 __ inc(offset0, 16); |
2022 __ stx(G3, to, offset8); | 2075 __ stx(G3, to, offset8); |
2023 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_copy_16_bytes); | 2076 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_copy_16_bytes); |
2024 __ delayed()->inc(offset8, 16); | 2077 __ delayed()->inc(offset8, 16); |
2025 | 2078 |
2079 // Copy last 8 bytes | |
2026 __ BIND(L_copy_8_bytes); | 2080 __ BIND(L_copy_8_bytes); |
2027 __ inccc(count, 2); | 2081 __ inccc(count, 2); |
2028 __ brx(Assembler::zero, true, Assembler::pn, L_exit ); | 2082 __ brx(Assembler::zero, true, Assembler::pn, L_exit ); |
2029 __ delayed()->mov(offset0, offset8); // Set O5 used by other stubs | 2083 __ delayed()->mov(offset0, offset8); // Set O5 used by other stubs |
2030 __ ldx(from, offset0, O3); | 2084 __ ldx(from, offset0, O3); |