Mercurial > hg > truffle
comparison src/cpu/x86/vm/c1_LIRAssembler_x86.cpp @ 2446:13bc79b5c9c8
7033154: Improve C1 arraycopy performance
Summary: better static analysis. Take advantage of array copy stubs.
Reviewed-by: never
author | roland |
---|---|
date | Sun, 03 Apr 2011 12:00:54 +0200 |
parents | 09f96c3ff1ad |
children | 15c9a0e16269 |
comparison
equal
deleted
inserted
replaced
2445:08eb13460b3a | 2446:13bc79b5c9c8 |
---|---|
3100 CodeStub* stub = op->stub(); | 3100 CodeStub* stub = op->stub(); |
3101 int flags = op->flags(); | 3101 int flags = op->flags(); |
3102 BasicType basic_type = default_type != NULL ? default_type->element_type()->basic_type() : T_ILLEGAL; | 3102 BasicType basic_type = default_type != NULL ? default_type->element_type()->basic_type() : T_ILLEGAL; |
3103 if (basic_type == T_ARRAY) basic_type = T_OBJECT; | 3103 if (basic_type == T_ARRAY) basic_type = T_OBJECT; |
3104 | 3104 |
3105 // if we don't know anything or it's an object array, just go through the generic arraycopy | 3105 // if we don't know anything, just go through the generic arraycopy |
3106 if (default_type == NULL) { | 3106 if (default_type == NULL) { |
3107 Label done; | 3107 Label done; |
3108 // save outgoing arguments on stack in case call to System.arraycopy is needed | 3108 // save outgoing arguments on stack in case call to System.arraycopy is needed |
3109 // HACK ALERT. This code used to push the parameters in a hardwired fashion | 3109 // HACK ALERT. This code used to push the parameters in a hardwired fashion |
3110 // for interpreter calling conventions. Now we have to do it in new style conventions. | 3110 // for interpreter calling conventions. Now we have to do it in new style conventions. |
3121 // these are just temporary placements until we need to reload | 3121 // these are just temporary placements until we need to reload |
3122 store_parameter(src_pos, 3); | 3122 store_parameter(src_pos, 3); |
3123 store_parameter(src, 4); | 3123 store_parameter(src, 4); |
3124 NOT_LP64(assert(src == rcx && src_pos == rdx, "mismatch in calling convention");) | 3124 NOT_LP64(assert(src == rcx && src_pos == rdx, "mismatch in calling convention");) |
3125 | 3125 |
3126 address entry = CAST_FROM_FN_PTR(address, Runtime1::arraycopy); | 3126 address C_entry = CAST_FROM_FN_PTR(address, Runtime1::arraycopy); |
3127 | |
3128 address copyfunc_addr = StubRoutines::generic_arraycopy(); | |
3127 | 3129 |
3128 // pass arguments: may push as this is not a safepoint; SP must be fix at each safepoint | 3130 // pass arguments: may push as this is not a safepoint; SP must be fix at each safepoint |
3129 #ifdef _LP64 | 3131 #ifdef _LP64 |
3130 // The arguments are in java calling convention so we can trivially shift them to C | 3132 // The arguments are in java calling convention so we can trivially shift them to C |
3131 // convention | 3133 // convention |
3139 __ mov(c_rarg3, j_rarg3); | 3141 __ mov(c_rarg3, j_rarg3); |
3140 #ifdef _WIN64 | 3142 #ifdef _WIN64 |
3141 // Allocate abi space for args but be sure to keep stack aligned | 3143 // Allocate abi space for args but be sure to keep stack aligned |
3142 __ subptr(rsp, 6*wordSize); | 3144 __ subptr(rsp, 6*wordSize); |
3143 store_parameter(j_rarg4, 4); | 3145 store_parameter(j_rarg4, 4); |
3144 __ call(RuntimeAddress(entry)); | 3146 if (copyfunc_addr == NULL) { // Use C version if stub was not generated |
3147 __ call(RuntimeAddress(C_entry)); | |
3148 } else { | |
3149 #ifndef PRODUCT | |
3150 if (PrintC1Statistics) { | |
3151 __ incrementl(ExternalAddress((address)&Runtime1::_generic_arraycopystub_cnt)); | |
3152 } | |
3153 #endif | |
3154 __ call(RuntimeAddress(copyfunc_addr)); | |
3155 } | |
3145 __ addptr(rsp, 6*wordSize); | 3156 __ addptr(rsp, 6*wordSize); |
3146 #else | 3157 #else |
3147 __ mov(c_rarg4, j_rarg4); | 3158 __ mov(c_rarg4, j_rarg4); |
3148 __ call(RuntimeAddress(entry)); | 3159 if (copyfunc_addr == NULL) { // Use C version if stub was not generated |
3160 __ call(RuntimeAddress(C_entry)); | |
3161 } else { | |
3162 #ifndef PRODUCT | |
3163 if (PrintC1Statistics) { | |
3164 __ incrementl(ExternalAddress((address)&Runtime1::_generic_arraycopystub_cnt)); | |
3165 } | |
3166 #endif | |
3167 __ call(RuntimeAddress(copyfunc_addr)); | |
3168 } | |
3149 #endif // _WIN64 | 3169 #endif // _WIN64 |
3150 #else | 3170 #else |
3151 __ push(length); | 3171 __ push(length); |
3152 __ push(dst_pos); | 3172 __ push(dst_pos); |
3153 __ push(dst); | 3173 __ push(dst); |
3154 __ push(src_pos); | 3174 __ push(src_pos); |
3155 __ push(src); | 3175 __ push(src); |
3156 __ call_VM_leaf(entry, 5); // removes pushed parameter from the stack | 3176 |
3177 if (copyfunc_addr == NULL) { // Use C version if stub was not generated | |
3178 __ call_VM_leaf(C_entry, 5); // removes pushed parameter from the stack | |
3179 } else { | |
3180 #ifndef PRODUCT | |
3181 if (PrintC1Statistics) { | |
3182 __ incrementl(ExternalAddress((address)&Runtime1::_generic_arraycopystub_cnt)); | |
3183 } | |
3184 #endif | |
3185 __ call_VM_leaf(copyfunc_addr, 5); // removes pushed parameter from the stack | |
3186 } | |
3157 | 3187 |
3158 #endif // _LP64 | 3188 #endif // _LP64 |
3159 | 3189 |
3160 __ cmpl(rax, 0); | 3190 __ cmpl(rax, 0); |
3161 __ jcc(Assembler::equal, *stub->continuation()); | 3191 __ jcc(Assembler::equal, *stub->continuation()); |
3192 | |
3193 if (copyfunc_addr != NULL) { | |
3194 __ mov(tmp, rax); | |
3195 __ xorl(tmp, -1); | |
3196 } | |
3162 | 3197 |
3163 // Reload values from the stack so they are where the stub | 3198 // Reload values from the stack so they are where the stub |
3164 // expects them. | 3199 // expects them. |
3165 __ movptr (dst, Address(rsp, 0*BytesPerWord)); | 3200 __ movptr (dst, Address(rsp, 0*BytesPerWord)); |
3166 __ movptr (dst_pos, Address(rsp, 1*BytesPerWord)); | 3201 __ movptr (dst_pos, Address(rsp, 1*BytesPerWord)); |
3167 __ movptr (length, Address(rsp, 2*BytesPerWord)); | 3202 __ movptr (length, Address(rsp, 2*BytesPerWord)); |
3168 __ movptr (src_pos, Address(rsp, 3*BytesPerWord)); | 3203 __ movptr (src_pos, Address(rsp, 3*BytesPerWord)); |
3169 __ movptr (src, Address(rsp, 4*BytesPerWord)); | 3204 __ movptr (src, Address(rsp, 4*BytesPerWord)); |
3205 | |
3206 if (copyfunc_addr != NULL) { | |
3207 __ subl(length, tmp); | |
3208 __ addl(src_pos, tmp); | |
3209 __ addl(dst_pos, tmp); | |
3210 } | |
3170 __ jmp(*stub->entry()); | 3211 __ jmp(*stub->entry()); |
3171 | 3212 |
3172 __ bind(*stub->continuation()); | 3213 __ bind(*stub->continuation()); |
3173 return; | 3214 return; |
3174 } | 3215 } |
3224 } | 3265 } |
3225 if (flags & LIR_OpArrayCopy::dst_pos_positive_check) { | 3266 if (flags & LIR_OpArrayCopy::dst_pos_positive_check) { |
3226 __ testl(dst_pos, dst_pos); | 3267 __ testl(dst_pos, dst_pos); |
3227 __ jcc(Assembler::less, *stub->entry()); | 3268 __ jcc(Assembler::less, *stub->entry()); |
3228 } | 3269 } |
3229 if (flags & LIR_OpArrayCopy::length_positive_check) { | |
3230 __ testl(length, length); | |
3231 __ jcc(Assembler::less, *stub->entry()); | |
3232 } | |
3233 | 3270 |
3234 if (flags & LIR_OpArrayCopy::src_range_check) { | 3271 if (flags & LIR_OpArrayCopy::src_range_check) { |
3235 __ lea(tmp, Address(src_pos, length, Address::times_1, 0)); | 3272 __ lea(tmp, Address(src_pos, length, Address::times_1, 0)); |
3236 __ cmpl(tmp, src_length_addr); | 3273 __ cmpl(tmp, src_length_addr); |
3237 __ jcc(Assembler::above, *stub->entry()); | 3274 __ jcc(Assembler::above, *stub->entry()); |
3240 __ lea(tmp, Address(dst_pos, length, Address::times_1, 0)); | 3277 __ lea(tmp, Address(dst_pos, length, Address::times_1, 0)); |
3241 __ cmpl(tmp, dst_length_addr); | 3278 __ cmpl(tmp, dst_length_addr); |
3242 __ jcc(Assembler::above, *stub->entry()); | 3279 __ jcc(Assembler::above, *stub->entry()); |
3243 } | 3280 } |
3244 | 3281 |
3282 if (flags & LIR_OpArrayCopy::length_positive_check) { | |
3283 __ testl(length, length); | |
3284 __ jcc(Assembler::less, *stub->entry()); | |
3285 __ jcc(Assembler::zero, *stub->continuation()); | |
3286 } | |
3287 | |
3288 #ifdef _LP64 | |
3289 __ movl2ptr(src_pos, src_pos); //higher 32bits must be null | |
3290 __ movl2ptr(dst_pos, dst_pos); //higher 32bits must be null | |
3291 #endif | |
3292 | |
3245 if (flags & LIR_OpArrayCopy::type_check) { | 3293 if (flags & LIR_OpArrayCopy::type_check) { |
3246 if (UseCompressedOops) { | 3294 // We don't know the array types are compatible |
3247 __ movl(tmp, src_klass_addr); | 3295 if (basic_type != T_OBJECT) { |
3248 __ cmpl(tmp, dst_klass_addr); | 3296 // Simple test for basic type arrays |
3249 } else { | 3297 if (UseCompressedOops) { |
3250 __ movptr(tmp, src_klass_addr); | 3298 __ movl(tmp, src_klass_addr); |
3251 __ cmpptr(tmp, dst_klass_addr); | 3299 __ cmpl(tmp, dst_klass_addr); |
3252 } | 3300 } else { |
3253 __ jcc(Assembler::notEqual, *stub->entry()); | 3301 __ movptr(tmp, src_klass_addr); |
3302 __ cmpptr(tmp, dst_klass_addr); | |
3303 } | |
3304 __ jcc(Assembler::notEqual, *stub->entry()); | |
3305 } else { | |
3306 // For object arrays, if src is a sub class of dst then we can | |
3307 // safely do the copy. | |
3308 Label cont, slow; | |
3309 | |
3310 __ push(src); | |
3311 __ push(dst); | |
3312 | |
3313 __ load_klass(src, src); | |
3314 __ load_klass(dst, dst); | |
3315 | |
3316 __ check_klass_subtype_fast_path(src, dst, tmp, &cont, &slow, NULL); | |
3317 | |
3318 __ push(src); | |
3319 __ push(dst); | |
3320 __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id))); | |
3321 __ pop(dst); | |
3322 __ pop(src); | |
3323 | |
3324 __ cmpl(src, 0); | |
3325 __ jcc(Assembler::notEqual, cont); | |
3326 | |
3327 __ bind(slow); | |
3328 __ pop(dst); | |
3329 __ pop(src); | |
3330 | |
3331 address copyfunc_addr = StubRoutines::checkcast_arraycopy(); | |
3332 if (copyfunc_addr != NULL) { // use stub if available | |
3333 // src is not a sub class of dst so we have to do a | |
3334 // per-element check. | |
3335 | |
3336 int mask = LIR_OpArrayCopy::src_objarray|LIR_OpArrayCopy::dst_objarray; | |
3337 if ((flags & mask) != mask) { | |
3338 // Check that at least both of them object arrays. | |
3339 assert(flags & mask, "one of the two should be known to be an object array"); | |
3340 | |
3341 if (!(flags & LIR_OpArrayCopy::src_objarray)) { | |
3342 __ load_klass(tmp, src); | |
3343 } else if (!(flags & LIR_OpArrayCopy::dst_objarray)) { | |
3344 __ load_klass(tmp, dst); | |
3345 } | |
3346 int lh_offset = klassOopDesc::header_size() * HeapWordSize + | |
3347 Klass::layout_helper_offset_in_bytes(); | |
3348 Address klass_lh_addr(tmp, lh_offset); | |
3349 jint objArray_lh = Klass::array_layout_helper(T_OBJECT); | |
3350 __ cmpl(klass_lh_addr, objArray_lh); | |
3351 __ jcc(Assembler::notEqual, *stub->entry()); | |
3352 } | |
3353 | |
3354 #ifndef _LP64 | |
3355 // save caller save registers | |
3356 store_parameter(rax, 2); | |
3357 store_parameter(rcx, 1); | |
3358 store_parameter(rdx, 0); | |
3359 | |
3360 __ movptr(tmp, dst_klass_addr); | |
3361 __ movptr(tmp, Address(tmp, objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc))); | |
3362 __ push(tmp); | |
3363 __ movl(tmp, Address(tmp, Klass::super_check_offset_offset_in_bytes() + sizeof(oopDesc))); | |
3364 __ push(tmp); | |
3365 __ push(length); | |
3366 __ lea(tmp, Address(dst, dst_pos, scale, arrayOopDesc::base_offset_in_bytes(basic_type))); | |
3367 __ push(tmp); | |
3368 __ lea(tmp, Address(src, src_pos, scale, arrayOopDesc::base_offset_in_bytes(basic_type))); | |
3369 __ push(tmp); | |
3370 | |
3371 __ call_VM_leaf(copyfunc_addr, 5); | |
3372 #else | |
3373 __ movl2ptr(length, length); //higher 32bits must be null | |
3374 | |
3375 // save caller save registers: copy them to callee save registers | |
3376 __ mov(rbx, rdx); | |
3377 __ mov(r13, r8); | |
3378 __ mov(r14, r9); | |
3379 #ifndef _WIN64 | |
3380 store_parameter(rsi, 1); | |
3381 store_parameter(rcx, 0); | |
3382 // on WIN64 other incoming parameters are in rdi and rsi saved | |
3383 // across the call | |
3384 #endif | |
3385 | |
3386 __ lea(c_rarg0, Address(src, src_pos, scale, arrayOopDesc::base_offset_in_bytes(basic_type))); | |
3387 assert_different_registers(c_rarg0, dst, dst_pos, length); | |
3388 __ lea(c_rarg1, Address(dst, dst_pos, scale, arrayOopDesc::base_offset_in_bytes(basic_type))); | |
3389 assert_different_registers(c_rarg1, dst, length); | |
3390 | |
3391 __ mov(c_rarg2, length); | |
3392 assert_different_registers(c_rarg2, dst); | |
3393 | |
3394 #ifdef _WIN64 | |
3395 // Allocate abi space for args but be sure to keep stack aligned | |
3396 __ subptr(rsp, 6*wordSize); | |
3397 __ load_klass(c_rarg3, dst); | |
3398 __ movptr(c_rarg3, Address(c_rarg3, objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc))); | |
3399 store_parameter(c_rarg3, 4); | |
3400 __ movl(c_rarg3, Address(c_rarg3, Klass::super_check_offset_offset_in_bytes() + sizeof(oopDesc))); | |
3401 __ call(RuntimeAddress(copyfunc_addr)); | |
3402 __ addptr(rsp, 6*wordSize); | |
3403 #else | |
3404 __ load_klass(c_rarg4, dst); | |
3405 __ movptr(c_rarg4, Address(c_rarg4, objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc))); | |
3406 __ movl(c_rarg3, Address(c_rarg4, Klass::super_check_offset_offset_in_bytes() + sizeof(oopDesc))); | |
3407 __ call(RuntimeAddress(copyfunc_addr)); | |
3408 #endif | |
3409 | |
3410 #endif | |
3411 | |
3412 #ifndef PRODUCT | |
3413 if (PrintC1Statistics) { | |
3414 Label failed; | |
3415 __ testl(rax, rax); | |
3416 __ jcc(Assembler::notZero, failed); | |
3417 __ incrementl(ExternalAddress((address)&Runtime1::_arraycopy_checkcast_cnt)); | |
3418 __ bind(failed); | |
3419 } | |
3420 #endif | |
3421 | |
3422 __ testl(rax, rax); | |
3423 __ jcc(Assembler::zero, *stub->continuation()); | |
3424 | |
3425 #ifndef PRODUCT | |
3426 if (PrintC1Statistics) { | |
3427 __ incrementl(ExternalAddress((address)&Runtime1::_arraycopy_checkcast_attempt_cnt)); | |
3428 } | |
3429 #endif | |
3430 | |
3431 __ mov(tmp, rax); | |
3432 | |
3433 __ xorl(tmp, -1); | |
3434 | |
3435 #ifndef _LP64 | |
3436 // restore caller save registers | |
3437 assert_different_registers(tmp, rdx, rcx, rax); // result of stub will be lost | |
3438 __ movptr(rdx, Address(rsp, 0*BytesPerWord)); | |
3439 __ movptr(rcx, Address(rsp, 1*BytesPerWord)); | |
3440 __ movptr(rax, Address(rsp, 2*BytesPerWord)); | |
3441 #else | |
3442 // restore caller save registers | |
3443 __ mov(rdx, rbx); | |
3444 __ mov(r8, r13); | |
3445 __ mov(r9, r14); | |
3446 #ifndef _WIN64 | |
3447 assert_different_registers(tmp, rdx, r8, r9, rcx, rsi); // result of stub will be lost | |
3448 __ movptr(rcx, Address(rsp, 0*BytesPerWord)); | |
3449 __ movptr(rsi, Address(rsp, 1*BytesPerWord)); | |
3450 #else | |
3451 assert_different_registers(tmp, rdx, r8, r9); // result of stub will be lost | |
3452 #endif | |
3453 #endif | |
3454 | |
3455 __ subl(length, tmp); | |
3456 __ addl(src_pos, tmp); | |
3457 __ addl(dst_pos, tmp); | |
3458 } | |
3459 | |
3460 __ jmp(*stub->entry()); | |
3461 | |
3462 __ bind(cont); | |
3463 __ pop(dst); | |
3464 __ pop(src); | |
3465 } | |
3254 } | 3466 } |
3255 | 3467 |
3256 #ifdef ASSERT | 3468 #ifdef ASSERT |
3257 if (basic_type != T_OBJECT || !(flags & LIR_OpArrayCopy::type_check)) { | 3469 if (basic_type != T_OBJECT || !(flags & LIR_OpArrayCopy::type_check)) { |
3258 // Sanity check the known type with the incoming class. For the | 3470 // Sanity check the known type with the incoming class. For the |
3289 __ stop("incorrect type information in arraycopy"); | 3501 __ stop("incorrect type information in arraycopy"); |
3290 __ bind(known_ok); | 3502 __ bind(known_ok); |
3291 } | 3503 } |
3292 #endif | 3504 #endif |
3293 | 3505 |
3294 if (shift_amount > 0 && basic_type != T_OBJECT) { | 3506 #ifndef PRODUCT |
3295 __ shlptr(length, shift_amount); | 3507 if (PrintC1Statistics) { |
3296 } | 3508 __ incrementl(ExternalAddress(Runtime1::arraycopy_count_address(basic_type))); |
3509 } | |
3510 #endif | |
3297 | 3511 |
3298 #ifdef _LP64 | 3512 #ifdef _LP64 |
3299 assert_different_registers(c_rarg0, dst, dst_pos, length); | 3513 assert_different_registers(c_rarg0, dst, dst_pos, length); |
3300 __ movl2ptr(src_pos, src_pos); //higher 32bits must be null | |
3301 __ lea(c_rarg0, Address(src, src_pos, scale, arrayOopDesc::base_offset_in_bytes(basic_type))); | 3514 __ lea(c_rarg0, Address(src, src_pos, scale, arrayOopDesc::base_offset_in_bytes(basic_type))); |
3302 assert_different_registers(c_rarg1, length); | 3515 assert_different_registers(c_rarg1, length); |
3303 __ movl2ptr(dst_pos, dst_pos); //higher 32bits must be null | |
3304 __ lea(c_rarg1, Address(dst, dst_pos, scale, arrayOopDesc::base_offset_in_bytes(basic_type))); | 3516 __ lea(c_rarg1, Address(dst, dst_pos, scale, arrayOopDesc::base_offset_in_bytes(basic_type))); |
3305 __ mov(c_rarg2, length); | 3517 __ mov(c_rarg2, length); |
3306 | 3518 |
3307 #else | 3519 #else |
3308 __ lea(tmp, Address(src, src_pos, scale, arrayOopDesc::base_offset_in_bytes(basic_type))); | 3520 __ lea(tmp, Address(src, src_pos, scale, arrayOopDesc::base_offset_in_bytes(basic_type))); |
3309 store_parameter(tmp, 0); | 3521 store_parameter(tmp, 0); |
3310 __ lea(tmp, Address(dst, dst_pos, scale, arrayOopDesc::base_offset_in_bytes(basic_type))); | 3522 __ lea(tmp, Address(dst, dst_pos, scale, arrayOopDesc::base_offset_in_bytes(basic_type))); |
3311 store_parameter(tmp, 1); | 3523 store_parameter(tmp, 1); |
3312 store_parameter(length, 2); | 3524 store_parameter(length, 2); |
3313 #endif // _LP64 | 3525 #endif // _LP64 |
3314 if (basic_type == T_OBJECT) { | 3526 |
3315 __ call_VM_leaf(CAST_FROM_FN_PTR(address, Runtime1::oop_arraycopy), 0); | 3527 bool disjoint = (flags & LIR_OpArrayCopy::overlapping) == 0; |
3316 } else { | 3528 bool aligned = (flags & LIR_OpArrayCopy::unaligned) == 0; |
3317 __ call_VM_leaf(CAST_FROM_FN_PTR(address, Runtime1::primitive_arraycopy), 0); | 3529 const char *name; |
3318 } | 3530 address entry = StubRoutines::select_arraycopy_function(basic_type, aligned, disjoint, name, false); |
3531 __ call_VM_leaf(entry, 0); | |
3319 | 3532 |
3320 __ bind(*stub->continuation()); | 3533 __ bind(*stub->continuation()); |
3321 } | 3534 } |
3322 | 3535 |
3323 | 3536 |