Mercurial > hg > graal-jvmci-8
comparison src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp @ 2446:13bc79b5c9c8
7033154: Improve C1 arraycopy performance
Summary: better static analysis. Take advantage of array copy stubs.
Reviewed-by: never
author | roland |
---|---|
date | Sun, 03 Apr 2011 12:00:54 +0200 |
parents | 1b4e6a5d98e0 |
children | bb22629531fa |
comparison
equal
deleted
inserted
replaced
2445:08eb13460b3a | 2446:13bc79b5c9c8 |
---|---|
2063 | 2063 |
2064 // always do stub if no type information is available. it's ok if | 2064 // always do stub if no type information is available. it's ok if |
2065 // the known type isn't loaded since the code sanity checks | 2065 // the known type isn't loaded since the code sanity checks |
2066 // in debug mode and the type isn't required when we know the exact type | 2066 // in debug mode and the type isn't required when we know the exact type |
2067 // also check that the type is an array type. | 2067 // also check that the type is an array type. |
2068 // We also, for now, always call the stub if the barrier set requires a | 2068 if (op->expected_type() == NULL) { |
2069 // write_ref_pre barrier (which the stub does, but none of the optimized | |
2070 // cases currently does). | |
2071 if (op->expected_type() == NULL || | |
2072 Universe::heap()->barrier_set()->has_write_ref_pre_barrier()) { | |
2073 __ mov(src, O0); | 2069 __ mov(src, O0); |
2074 __ mov(src_pos, O1); | 2070 __ mov(src_pos, O1); |
2075 __ mov(dst, O2); | 2071 __ mov(dst, O2); |
2076 __ mov(dst_pos, O3); | 2072 __ mov(dst_pos, O3); |
2077 __ mov(length, O4); | 2073 __ mov(length, O4); |
2078 __ call_VM_leaf(tmp, CAST_FROM_FN_PTR(address, Runtime1::arraycopy)); | 2074 address copyfunc_addr = StubRoutines::generic_arraycopy(); |
2079 | 2075 |
2080 __ br_zero(Assembler::less, false, Assembler::pn, O0, *stub->entry()); | 2076 if (copyfunc_addr == NULL) { // Use C version if stub was not generated |
2081 __ delayed()->nop(); | 2077 __ call_VM_leaf(tmp, CAST_FROM_FN_PTR(address, Runtime1::arraycopy)); |
2078 } else { | |
2079 #ifndef PRODUCT | |
2080 if (PrintC1Statistics) { | |
2081 address counter = (address)&Runtime1::_generic_arraycopystub_cnt; | |
2082 __ inc_counter(counter, G1, G3); | |
2083 } | |
2084 #endif | |
2085 __ call_VM_leaf(tmp, copyfunc_addr); | |
2086 } | |
2087 | |
2088 if (copyfunc_addr != NULL) { | |
2089 __ xor3(O0, -1, tmp); | |
2090 __ sub(length, tmp, length); | |
2091 __ add(src_pos, tmp, src_pos); | |
2092 __ br_zero(Assembler::less, false, Assembler::pn, O0, *stub->entry()); | |
2093 __ delayed()->add(dst_pos, tmp, dst_pos); | |
2094 } else { | |
2095 __ br_zero(Assembler::less, false, Assembler::pn, O0, *stub->entry()); | |
2096 __ delayed()->nop(); | |
2097 } | |
2082 __ bind(*stub->continuation()); | 2098 __ bind(*stub->continuation()); |
2083 return; | 2099 return; |
2084 } | 2100 } |
2085 | 2101 |
2086 assert(default_type != NULL && default_type->is_array_klass(), "must be true at this point"); | 2102 assert(default_type != NULL && default_type->is_array_klass(), "must be true at this point"); |
2133 __ cmp(tmp2, tmp); | 2149 __ cmp(tmp2, tmp); |
2134 __ br(Assembler::carrySet, false, Assembler::pn, *stub->entry()); | 2150 __ br(Assembler::carrySet, false, Assembler::pn, *stub->entry()); |
2135 __ delayed()->nop(); | 2151 __ delayed()->nop(); |
2136 } | 2152 } |
2137 | 2153 |
2154 #ifndef _LP64 | |
2155 __ sra(dst_pos, 0, dst_pos); //higher 32bits must be null | |
2156 __ sra(src_pos, 0, src_pos); //higher 32bits must be null | |
2157 #endif | |
2158 | |
2159 int shift = shift_amount(basic_type); | |
2160 | |
2138 if (flags & LIR_OpArrayCopy::type_check) { | 2161 if (flags & LIR_OpArrayCopy::type_check) { |
2139 if (UseCompressedOops) { | 2162 // We don't know the array types are compatible |
2140 // We don't need decode because we just need to compare | 2163 if (basic_type != T_OBJECT) { |
2141 __ lduw(src, oopDesc::klass_offset_in_bytes(), tmp); | 2164 // Simple test for basic type arrays |
2142 __ lduw(dst, oopDesc::klass_offset_in_bytes(), tmp2); | 2165 if (UseCompressedOops) { |
2143 __ cmp(tmp, tmp2); | 2166 // We don't need decode because we just need to compare |
2144 __ br(Assembler::notEqual, false, Assembler::pt, *stub->entry()); | 2167 __ lduw(src, oopDesc::klass_offset_in_bytes(), tmp); |
2168 __ lduw(dst, oopDesc::klass_offset_in_bytes(), tmp2); | |
2169 __ cmp(tmp, tmp2); | |
2170 __ br(Assembler::notEqual, false, Assembler::pt, *stub->entry()); | |
2171 } else { | |
2172 __ ld_ptr(src, oopDesc::klass_offset_in_bytes(), tmp); | |
2173 __ ld_ptr(dst, oopDesc::klass_offset_in_bytes(), tmp2); | |
2174 __ cmp(tmp, tmp2); | |
2175 __ brx(Assembler::notEqual, false, Assembler::pt, *stub->entry()); | |
2176 } | |
2177 __ delayed()->nop(); | |
2145 } else { | 2178 } else { |
2146 __ ld_ptr(src, oopDesc::klass_offset_in_bytes(), tmp); | 2179 // For object arrays, if src is a sub class of dst then we can |
2147 __ ld_ptr(dst, oopDesc::klass_offset_in_bytes(), tmp2); | 2180 // safely do the copy. |
2148 __ cmp(tmp, tmp2); | 2181 address copyfunc_addr = StubRoutines::checkcast_arraycopy(); |
2149 __ brx(Assembler::notEqual, false, Assembler::pt, *stub->entry()); | 2182 |
2150 } | 2183 Label cont, slow; |
2151 __ delayed()->nop(); | 2184 assert_different_registers(tmp, tmp2, G3, G1); |
2185 | |
2186 __ load_klass(src, G3); | |
2187 __ load_klass(dst, G1); | |
2188 | |
2189 __ check_klass_subtype_fast_path(G3, G1, tmp, tmp2, &cont, copyfunc_addr == NULL ? stub->entry() : &slow, NULL); | |
2190 | |
2191 __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type); | |
2192 __ delayed()->nop(); | |
2193 | |
2194 __ cmp(G3, 0); | |
2195 if (copyfunc_addr != NULL) { // use stub if available | |
2196 // src is not a sub class of dst so we have to do a | |
2197 // per-element check. | |
2198 __ br(Assembler::notEqual, false, Assembler::pt, cont); | |
2199 __ delayed()->nop(); | |
2200 | |
2201 __ bind(slow); | |
2202 | |
2203 int mask = LIR_OpArrayCopy::src_objarray|LIR_OpArrayCopy::dst_objarray; | |
2204 if ((flags & mask) != mask) { | |
2205 // Check that at least both of them object arrays. | |
2206 assert(flags & mask, "one of the two should be known to be an object array"); | |
2207 | |
2208 if (!(flags & LIR_OpArrayCopy::src_objarray)) { | |
2209 __ load_klass(src, tmp); | |
2210 } else if (!(flags & LIR_OpArrayCopy::dst_objarray)) { | |
2211 __ load_klass(dst, tmp); | |
2212 } | |
2213 int lh_offset = klassOopDesc::header_size() * HeapWordSize + | |
2214 Klass::layout_helper_offset_in_bytes(); | |
2215 | |
2216 __ lduw(tmp, lh_offset, tmp2); | |
2217 | |
2218 jint objArray_lh = Klass::array_layout_helper(T_OBJECT); | |
2219 __ set(objArray_lh, tmp); | |
2220 __ cmp(tmp, tmp2); | |
2221 __ br(Assembler::notEqual, false, Assembler::pt, *stub->entry()); | |
2222 __ delayed()->nop(); | |
2223 } | |
2224 | |
2225 Register src_ptr = O0; | |
2226 Register dst_ptr = O1; | |
2227 Register len = O2; | |
2228 Register chk_off = O3; | |
2229 Register super_k = O4; | |
2230 | |
2231 __ add(src, arrayOopDesc::base_offset_in_bytes(basic_type), src_ptr); | |
2232 if (shift == 0) { | |
2233 __ add(src_ptr, src_pos, src_ptr); | |
2234 } else { | |
2235 __ sll(src_pos, shift, tmp); | |
2236 __ add(src_ptr, tmp, src_ptr); | |
2237 } | |
2238 | |
2239 __ add(dst, arrayOopDesc::base_offset_in_bytes(basic_type), dst_ptr); | |
2240 if (shift == 0) { | |
2241 __ add(dst_ptr, dst_pos, dst_ptr); | |
2242 } else { | |
2243 __ sll(dst_pos, shift, tmp); | |
2244 __ add(dst_ptr, tmp, dst_ptr); | |
2245 } | |
2246 LP64_ONLY( __ sra(length, 0, length)); //higher 32bits must be null | |
2247 __ mov(length, len); | |
2248 __ load_klass(dst, tmp); | |
2249 | |
2250 int ek_offset = (klassOopDesc::header_size() * HeapWordSize + | |
2251 objArrayKlass::element_klass_offset_in_bytes()); | |
2252 __ ld_ptr(tmp, ek_offset, super_k); | |
2253 | |
2254 int sco_offset = (klassOopDesc::header_size() * HeapWordSize + | |
2255 Klass::super_check_offset_offset_in_bytes()); | |
2256 __ lduw(super_k, sco_offset, chk_off); | |
2257 | |
2258 __ call_VM_leaf(tmp, copyfunc_addr); | |
2259 | |
2260 #ifndef PRODUCT | |
2261 if (PrintC1Statistics) { | |
2262 Label failed; | |
2263 __ br_notnull(O0, false, Assembler::pn, failed); | |
2264 __ delayed()->nop(); | |
2265 __ inc_counter((address)&Runtime1::_arraycopy_checkcast_cnt, G1, G3); | |
2266 __ bind(failed); | |
2267 } | |
2268 #endif | |
2269 | |
2270 __ br_null(O0, false, Assembler::pt, *stub->continuation()); | |
2271 __ delayed()->xor3(O0, -1, tmp); | |
2272 | |
2273 #ifndef PRODUCT | |
2274 if (PrintC1Statistics) { | |
2275 __ inc_counter((address)&Runtime1::_arraycopy_checkcast_attempt_cnt, G1, G3); | |
2276 } | |
2277 #endif | |
2278 | |
2279 __ sub(length, tmp, length); | |
2280 __ add(src_pos, tmp, src_pos); | |
2281 __ br(Assembler::always, false, Assembler::pt, *stub->entry()); | |
2282 __ delayed()->add(dst_pos, tmp, dst_pos); | |
2283 | |
2284 __ bind(cont); | |
2285 } else { | |
2286 __ br(Assembler::equal, false, Assembler::pn, *stub->entry()); | |
2287 __ delayed()->nop(); | |
2288 __ bind(cont); | |
2289 } | |
2290 } | |
2152 } | 2291 } |
2153 | 2292 |
2154 #ifdef ASSERT | 2293 #ifdef ASSERT |
2155 if (basic_type != T_OBJECT || !(flags & LIR_OpArrayCopy::type_check)) { | 2294 if (basic_type != T_OBJECT || !(flags & LIR_OpArrayCopy::type_check)) { |
2156 // Sanity check the known type with the incoming class. For the | 2295 // Sanity check the known type with the incoming class. For the |
2205 __ stop("incorrect type information in arraycopy"); | 2344 __ stop("incorrect type information in arraycopy"); |
2206 __ bind(known_ok); | 2345 __ bind(known_ok); |
2207 } | 2346 } |
2208 #endif | 2347 #endif |
2209 | 2348 |
2210 int shift = shift_amount(basic_type); | 2349 #ifndef PRODUCT |
2350 if (PrintC1Statistics) { | |
2351 address counter = Runtime1::arraycopy_count_address(basic_type); | |
2352 __ inc_counter(counter, G1, G3); | |
2353 } | |
2354 #endif | |
2211 | 2355 |
2212 Register src_ptr = O0; | 2356 Register src_ptr = O0; |
2213 Register dst_ptr = O1; | 2357 Register dst_ptr = O1; |
2214 Register len = O2; | 2358 Register len = O2; |
2215 | 2359 |
2216 __ add(src, arrayOopDesc::base_offset_in_bytes(basic_type), src_ptr); | 2360 __ add(src, arrayOopDesc::base_offset_in_bytes(basic_type), src_ptr); |
2217 LP64_ONLY(__ sra(src_pos, 0, src_pos);) //higher 32bits must be null | |
2218 if (shift == 0) { | 2361 if (shift == 0) { |
2219 __ add(src_ptr, src_pos, src_ptr); | 2362 __ add(src_ptr, src_pos, src_ptr); |
2220 } else { | 2363 } else { |
2221 __ sll(src_pos, shift, tmp); | 2364 __ sll(src_pos, shift, tmp); |
2222 __ add(src_ptr, tmp, src_ptr); | 2365 __ add(src_ptr, tmp, src_ptr); |
2223 } | 2366 } |
2224 | 2367 |
2225 __ add(dst, arrayOopDesc::base_offset_in_bytes(basic_type), dst_ptr); | 2368 __ add(dst, arrayOopDesc::base_offset_in_bytes(basic_type), dst_ptr); |
2226 LP64_ONLY(__ sra(dst_pos, 0, dst_pos);) //higher 32bits must be null | |
2227 if (shift == 0) { | 2369 if (shift == 0) { |
2228 __ add(dst_ptr, dst_pos, dst_ptr); | 2370 __ add(dst_ptr, dst_pos, dst_ptr); |
2229 } else { | 2371 } else { |
2230 __ sll(dst_pos, shift, tmp); | 2372 __ sll(dst_pos, shift, tmp); |
2231 __ add(dst_ptr, tmp, dst_ptr); | 2373 __ add(dst_ptr, tmp, dst_ptr); |
2232 } | 2374 } |
2233 | 2375 |
2234 if (basic_type != T_OBJECT) { | 2376 bool disjoint = (flags & LIR_OpArrayCopy::overlapping) == 0; |
2235 if (shift == 0) { | 2377 bool aligned = (flags & LIR_OpArrayCopy::unaligned) == 0; |
2236 __ mov(length, len); | 2378 const char *name; |
2237 } else { | 2379 address entry = StubRoutines::select_arraycopy_function(basic_type, aligned, disjoint, name, false); |
2238 __ sll(length, shift, len); | 2380 |
2239 } | 2381 // arraycopy stubs takes a length in number of elements, so don't scale it. |
2240 __ call_VM_leaf(tmp, CAST_FROM_FN_PTR(address, Runtime1::primitive_arraycopy)); | 2382 __ mov(length, len); |
2241 } else { | 2383 __ call_VM_leaf(tmp, entry); |
2242 // oop_arraycopy takes a length in number of elements, so don't scale it. | |
2243 __ mov(length, len); | |
2244 __ call_VM_leaf(tmp, CAST_FROM_FN_PTR(address, Runtime1::oop_arraycopy)); | |
2245 } | |
2246 | 2384 |
2247 __ bind(*stub->continuation()); | 2385 __ bind(*stub->continuation()); |
2248 } | 2386 } |
2249 | 2387 |
2250 | 2388 |