comparison src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp @ 2446:13bc79b5c9c8

7033154: Improve C1 arraycopy performance Summary: better static analysis. Take advantage of array copy stubs. Reviewed-by: never
author roland
date Sun, 03 Apr 2011 12:00:54 +0200
parents 1b4e6a5d98e0
children bb22629531fa
comparison
equal deleted inserted replaced
2445:08eb13460b3a 2446:13bc79b5c9c8
2063 2063
2064 // always do stub if no type information is available. it's ok if 2064 // always do stub if no type information is available. it's ok if
2065 // the known type isn't loaded since the code sanity checks 2065 // the known type isn't loaded since the code sanity checks
2066 // in debug mode and the type isn't required when we know the exact type 2066 // in debug mode and the type isn't required when we know the exact type
2067 // also check that the type is an array type. 2067 // also check that the type is an array type.
2068 // We also, for now, always call the stub if the barrier set requires a 2068 if (op->expected_type() == NULL) {
2069 // write_ref_pre barrier (which the stub does, but none of the optimized
2070 // cases currently does).
2071 if (op->expected_type() == NULL ||
2072 Universe::heap()->barrier_set()->has_write_ref_pre_barrier()) {
2073 __ mov(src, O0); 2069 __ mov(src, O0);
2074 __ mov(src_pos, O1); 2070 __ mov(src_pos, O1);
2075 __ mov(dst, O2); 2071 __ mov(dst, O2);
2076 __ mov(dst_pos, O3); 2072 __ mov(dst_pos, O3);
2077 __ mov(length, O4); 2073 __ mov(length, O4);
2078 __ call_VM_leaf(tmp, CAST_FROM_FN_PTR(address, Runtime1::arraycopy)); 2074 address copyfunc_addr = StubRoutines::generic_arraycopy();
2079 2075
2080 __ br_zero(Assembler::less, false, Assembler::pn, O0, *stub->entry()); 2076 if (copyfunc_addr == NULL) { // Use C version if stub was not generated
2081 __ delayed()->nop(); 2077 __ call_VM_leaf(tmp, CAST_FROM_FN_PTR(address, Runtime1::arraycopy));
2078 } else {
2079 #ifndef PRODUCT
2080 if (PrintC1Statistics) {
2081 address counter = (address)&Runtime1::_generic_arraycopystub_cnt;
2082 __ inc_counter(counter, G1, G3);
2083 }
2084 #endif
2085 __ call_VM_leaf(tmp, copyfunc_addr);
2086 }
2087
2088 if (copyfunc_addr != NULL) {
2089 __ xor3(O0, -1, tmp);
2090 __ sub(length, tmp, length);
2091 __ add(src_pos, tmp, src_pos);
2092 __ br_zero(Assembler::less, false, Assembler::pn, O0, *stub->entry());
2093 __ delayed()->add(dst_pos, tmp, dst_pos);
2094 } else {
2095 __ br_zero(Assembler::less, false, Assembler::pn, O0, *stub->entry());
2096 __ delayed()->nop();
2097 }
2082 __ bind(*stub->continuation()); 2098 __ bind(*stub->continuation());
2083 return; 2099 return;
2084 } 2100 }
2085 2101
2086 assert(default_type != NULL && default_type->is_array_klass(), "must be true at this point"); 2102 assert(default_type != NULL && default_type->is_array_klass(), "must be true at this point");
2133 __ cmp(tmp2, tmp); 2149 __ cmp(tmp2, tmp);
2134 __ br(Assembler::carrySet, false, Assembler::pn, *stub->entry()); 2150 __ br(Assembler::carrySet, false, Assembler::pn, *stub->entry());
2135 __ delayed()->nop(); 2151 __ delayed()->nop();
2136 } 2152 }
2137 2153
2154 #ifndef _LP64
2155 __ sra(dst_pos, 0, dst_pos); //higher 32bits must be null
2156 __ sra(src_pos, 0, src_pos); //higher 32bits must be null
2157 #endif
2158
2159 int shift = shift_amount(basic_type);
2160
2138 if (flags & LIR_OpArrayCopy::type_check) { 2161 if (flags & LIR_OpArrayCopy::type_check) {
2139 if (UseCompressedOops) { 2162 // We don't know the array types are compatible
2140 // We don't need decode because we just need to compare 2163 if (basic_type != T_OBJECT) {
2141 __ lduw(src, oopDesc::klass_offset_in_bytes(), tmp); 2164 // Simple test for basic type arrays
2142 __ lduw(dst, oopDesc::klass_offset_in_bytes(), tmp2); 2165 if (UseCompressedOops) {
2143 __ cmp(tmp, tmp2); 2166 // We don't need decode because we just need to compare
2144 __ br(Assembler::notEqual, false, Assembler::pt, *stub->entry()); 2167 __ lduw(src, oopDesc::klass_offset_in_bytes(), tmp);
2168 __ lduw(dst, oopDesc::klass_offset_in_bytes(), tmp2);
2169 __ cmp(tmp, tmp2);
2170 __ br(Assembler::notEqual, false, Assembler::pt, *stub->entry());
2171 } else {
2172 __ ld_ptr(src, oopDesc::klass_offset_in_bytes(), tmp);
2173 __ ld_ptr(dst, oopDesc::klass_offset_in_bytes(), tmp2);
2174 __ cmp(tmp, tmp2);
2175 __ brx(Assembler::notEqual, false, Assembler::pt, *stub->entry());
2176 }
2177 __ delayed()->nop();
2145 } else { 2178 } else {
2146 __ ld_ptr(src, oopDesc::klass_offset_in_bytes(), tmp); 2179 // For object arrays, if src is a sub class of dst then we can
2147 __ ld_ptr(dst, oopDesc::klass_offset_in_bytes(), tmp2); 2180 // safely do the copy.
2148 __ cmp(tmp, tmp2); 2181 address copyfunc_addr = StubRoutines::checkcast_arraycopy();
2149 __ brx(Assembler::notEqual, false, Assembler::pt, *stub->entry()); 2182
2150 } 2183 Label cont, slow;
2151 __ delayed()->nop(); 2184 assert_different_registers(tmp, tmp2, G3, G1);
2185
2186 __ load_klass(src, G3);
2187 __ load_klass(dst, G1);
2188
2189 __ check_klass_subtype_fast_path(G3, G1, tmp, tmp2, &cont, copyfunc_addr == NULL ? stub->entry() : &slow, NULL);
2190
2191 __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type);
2192 __ delayed()->nop();
2193
2194 __ cmp(G3, 0);
2195 if (copyfunc_addr != NULL) { // use stub if available
2196 // src is not a sub class of dst so we have to do a
2197 // per-element check.
2198 __ br(Assembler::notEqual, false, Assembler::pt, cont);
2199 __ delayed()->nop();
2200
2201 __ bind(slow);
2202
2203 int mask = LIR_OpArrayCopy::src_objarray|LIR_OpArrayCopy::dst_objarray;
2204 if ((flags & mask) != mask) {
2205 // Check that at least both of them object arrays.
2206 assert(flags & mask, "one of the two should be known to be an object array");
2207
2208 if (!(flags & LIR_OpArrayCopy::src_objarray)) {
2209 __ load_klass(src, tmp);
2210 } else if (!(flags & LIR_OpArrayCopy::dst_objarray)) {
2211 __ load_klass(dst, tmp);
2212 }
2213 int lh_offset = klassOopDesc::header_size() * HeapWordSize +
2214 Klass::layout_helper_offset_in_bytes();
2215
2216 __ lduw(tmp, lh_offset, tmp2);
2217
2218 jint objArray_lh = Klass::array_layout_helper(T_OBJECT);
2219 __ set(objArray_lh, tmp);
2220 __ cmp(tmp, tmp2);
2221 __ br(Assembler::notEqual, false, Assembler::pt, *stub->entry());
2222 __ delayed()->nop();
2223 }
2224
2225 Register src_ptr = O0;
2226 Register dst_ptr = O1;
2227 Register len = O2;
2228 Register chk_off = O3;
2229 Register super_k = O4;
2230
2231 __ add(src, arrayOopDesc::base_offset_in_bytes(basic_type), src_ptr);
2232 if (shift == 0) {
2233 __ add(src_ptr, src_pos, src_ptr);
2234 } else {
2235 __ sll(src_pos, shift, tmp);
2236 __ add(src_ptr, tmp, src_ptr);
2237 }
2238
2239 __ add(dst, arrayOopDesc::base_offset_in_bytes(basic_type), dst_ptr);
2240 if (shift == 0) {
2241 __ add(dst_ptr, dst_pos, dst_ptr);
2242 } else {
2243 __ sll(dst_pos, shift, tmp);
2244 __ add(dst_ptr, tmp, dst_ptr);
2245 }
2246 LP64_ONLY( __ sra(length, 0, length)); //higher 32bits must be null
2247 __ mov(length, len);
2248 __ load_klass(dst, tmp);
2249
2250 int ek_offset = (klassOopDesc::header_size() * HeapWordSize +
2251 objArrayKlass::element_klass_offset_in_bytes());
2252 __ ld_ptr(tmp, ek_offset, super_k);
2253
2254 int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
2255 Klass::super_check_offset_offset_in_bytes());
2256 __ lduw(super_k, sco_offset, chk_off);
2257
2258 __ call_VM_leaf(tmp, copyfunc_addr);
2259
2260 #ifndef PRODUCT
2261 if (PrintC1Statistics) {
2262 Label failed;
2263 __ br_notnull(O0, false, Assembler::pn, failed);
2264 __ delayed()->nop();
2265 __ inc_counter((address)&Runtime1::_arraycopy_checkcast_cnt, G1, G3);
2266 __ bind(failed);
2267 }
2268 #endif
2269
2270 __ br_null(O0, false, Assembler::pt, *stub->continuation());
2271 __ delayed()->xor3(O0, -1, tmp);
2272
2273 #ifndef PRODUCT
2274 if (PrintC1Statistics) {
2275 __ inc_counter((address)&Runtime1::_arraycopy_checkcast_attempt_cnt, G1, G3);
2276 }
2277 #endif
2278
2279 __ sub(length, tmp, length);
2280 __ add(src_pos, tmp, src_pos);
2281 __ br(Assembler::always, false, Assembler::pt, *stub->entry());
2282 __ delayed()->add(dst_pos, tmp, dst_pos);
2283
2284 __ bind(cont);
2285 } else {
2286 __ br(Assembler::equal, false, Assembler::pn, *stub->entry());
2287 __ delayed()->nop();
2288 __ bind(cont);
2289 }
2290 }
2152 } 2291 }
2153 2292
2154 #ifdef ASSERT 2293 #ifdef ASSERT
2155 if (basic_type != T_OBJECT || !(flags & LIR_OpArrayCopy::type_check)) { 2294 if (basic_type != T_OBJECT || !(flags & LIR_OpArrayCopy::type_check)) {
2156 // Sanity check the known type with the incoming class. For the 2295 // Sanity check the known type with the incoming class. For the
2205 __ stop("incorrect type information in arraycopy"); 2344 __ stop("incorrect type information in arraycopy");
2206 __ bind(known_ok); 2345 __ bind(known_ok);
2207 } 2346 }
2208 #endif 2347 #endif
2209 2348
2210 int shift = shift_amount(basic_type); 2349 #ifndef PRODUCT
2350 if (PrintC1Statistics) {
2351 address counter = Runtime1::arraycopy_count_address(basic_type);
2352 __ inc_counter(counter, G1, G3);
2353 }
2354 #endif
2211 2355
2212 Register src_ptr = O0; 2356 Register src_ptr = O0;
2213 Register dst_ptr = O1; 2357 Register dst_ptr = O1;
2214 Register len = O2; 2358 Register len = O2;
2215 2359
2216 __ add(src, arrayOopDesc::base_offset_in_bytes(basic_type), src_ptr); 2360 __ add(src, arrayOopDesc::base_offset_in_bytes(basic_type), src_ptr);
2217 LP64_ONLY(__ sra(src_pos, 0, src_pos);) //higher 32bits must be null
2218 if (shift == 0) { 2361 if (shift == 0) {
2219 __ add(src_ptr, src_pos, src_ptr); 2362 __ add(src_ptr, src_pos, src_ptr);
2220 } else { 2363 } else {
2221 __ sll(src_pos, shift, tmp); 2364 __ sll(src_pos, shift, tmp);
2222 __ add(src_ptr, tmp, src_ptr); 2365 __ add(src_ptr, tmp, src_ptr);
2223 } 2366 }
2224 2367
2225 __ add(dst, arrayOopDesc::base_offset_in_bytes(basic_type), dst_ptr); 2368 __ add(dst, arrayOopDesc::base_offset_in_bytes(basic_type), dst_ptr);
2226 LP64_ONLY(__ sra(dst_pos, 0, dst_pos);) //higher 32bits must be null
2227 if (shift == 0) { 2369 if (shift == 0) {
2228 __ add(dst_ptr, dst_pos, dst_ptr); 2370 __ add(dst_ptr, dst_pos, dst_ptr);
2229 } else { 2371 } else {
2230 __ sll(dst_pos, shift, tmp); 2372 __ sll(dst_pos, shift, tmp);
2231 __ add(dst_ptr, tmp, dst_ptr); 2373 __ add(dst_ptr, tmp, dst_ptr);
2232 } 2374 }
2233 2375
2234 if (basic_type != T_OBJECT) { 2376 bool disjoint = (flags & LIR_OpArrayCopy::overlapping) == 0;
2235 if (shift == 0) { 2377 bool aligned = (flags & LIR_OpArrayCopy::unaligned) == 0;
2236 __ mov(length, len); 2378 const char *name;
2237 } else { 2379 address entry = StubRoutines::select_arraycopy_function(basic_type, aligned, disjoint, name, false);
2238 __ sll(length, shift, len); 2380
2239 } 2381 // arraycopy stubs takes a length in number of elements, so don't scale it.
2240 __ call_VM_leaf(tmp, CAST_FROM_FN_PTR(address, Runtime1::primitive_arraycopy)); 2382 __ mov(length, len);
2241 } else { 2383 __ call_VM_leaf(tmp, entry);
2242 // oop_arraycopy takes a length in number of elements, so don't scale it.
2243 __ mov(length, len);
2244 __ call_VM_leaf(tmp, CAST_FROM_FN_PTR(address, Runtime1::oop_arraycopy));
2245 }
2246 2384
2247 __ bind(*stub->continuation()); 2385 __ bind(*stub->continuation());
2248 } 2386 }
2249 2387
2250 2388