comparison src/cpu/x86/vm/c1_LIRAssembler_x86.cpp @ 2446:13bc79b5c9c8

7033154: Improve C1 arraycopy performance Summary: better static analysis. Take advantage of array copy stubs. Reviewed-by: never
author roland
date Sun, 03 Apr 2011 12:00:54 +0200
parents 09f96c3ff1ad
children 15c9a0e16269
comparison
equal deleted inserted replaced
2445:08eb13460b3a 2446:13bc79b5c9c8
3100 CodeStub* stub = op->stub(); 3100 CodeStub* stub = op->stub();
3101 int flags = op->flags(); 3101 int flags = op->flags();
3102 BasicType basic_type = default_type != NULL ? default_type->element_type()->basic_type() : T_ILLEGAL; 3102 BasicType basic_type = default_type != NULL ? default_type->element_type()->basic_type() : T_ILLEGAL;
3103 if (basic_type == T_ARRAY) basic_type = T_OBJECT; 3103 if (basic_type == T_ARRAY) basic_type = T_OBJECT;
3104 3104
3105 // if we don't know anything or it's an object array, just go through the generic arraycopy 3105 // if we don't know anything, just go through the generic arraycopy
3106 if (default_type == NULL) { 3106 if (default_type == NULL) {
3107 Label done; 3107 Label done;
3108 // save outgoing arguments on stack in case call to System.arraycopy is needed 3108 // save outgoing arguments on stack in case call to System.arraycopy is needed
3109 // HACK ALERT. This code used to push the parameters in a hardwired fashion 3109 // HACK ALERT. This code used to push the parameters in a hardwired fashion
3110 // for interpreter calling conventions. Now we have to do it in new style conventions. 3110 // for interpreter calling conventions. Now we have to do it in new style conventions.
3121 // these are just temporary placements until we need to reload 3121 // these are just temporary placements until we need to reload
3122 store_parameter(src_pos, 3); 3122 store_parameter(src_pos, 3);
3123 store_parameter(src, 4); 3123 store_parameter(src, 4);
3124 NOT_LP64(assert(src == rcx && src_pos == rdx, "mismatch in calling convention");) 3124 NOT_LP64(assert(src == rcx && src_pos == rdx, "mismatch in calling convention");)
3125 3125
3126 address entry = CAST_FROM_FN_PTR(address, Runtime1::arraycopy); 3126 address C_entry = CAST_FROM_FN_PTR(address, Runtime1::arraycopy);
3127
3128 address copyfunc_addr = StubRoutines::generic_arraycopy();
3127 3129
3128 // pass arguments: may push as this is not a safepoint; SP must be fix at each safepoint 3130 // pass arguments: may push as this is not a safepoint; SP must be fix at each safepoint
3129 #ifdef _LP64 3131 #ifdef _LP64
3130 // The arguments are in java calling convention so we can trivially shift them to C 3132 // The arguments are in java calling convention so we can trivially shift them to C
3131 // convention 3133 // convention
3139 __ mov(c_rarg3, j_rarg3); 3141 __ mov(c_rarg3, j_rarg3);
3140 #ifdef _WIN64 3142 #ifdef _WIN64
3141 // Allocate abi space for args but be sure to keep stack aligned 3143 // Allocate abi space for args but be sure to keep stack aligned
3142 __ subptr(rsp, 6*wordSize); 3144 __ subptr(rsp, 6*wordSize);
3143 store_parameter(j_rarg4, 4); 3145 store_parameter(j_rarg4, 4);
3144 __ call(RuntimeAddress(entry)); 3146 if (copyfunc_addr == NULL) { // Use C version if stub was not generated
3147 __ call(RuntimeAddress(C_entry));
3148 } else {
3149 #ifndef PRODUCT
3150 if (PrintC1Statistics) {
3151 __ incrementl(ExternalAddress((address)&Runtime1::_generic_arraycopystub_cnt));
3152 }
3153 #endif
3154 __ call(RuntimeAddress(copyfunc_addr));
3155 }
3145 __ addptr(rsp, 6*wordSize); 3156 __ addptr(rsp, 6*wordSize);
3146 #else 3157 #else
3147 __ mov(c_rarg4, j_rarg4); 3158 __ mov(c_rarg4, j_rarg4);
3148 __ call(RuntimeAddress(entry)); 3159 if (copyfunc_addr == NULL) { // Use C version if stub was not generated
3160 __ call(RuntimeAddress(C_entry));
3161 } else {
3162 #ifndef PRODUCT
3163 if (PrintC1Statistics) {
3164 __ incrementl(ExternalAddress((address)&Runtime1::_generic_arraycopystub_cnt));
3165 }
3166 #endif
3167 __ call(RuntimeAddress(copyfunc_addr));
3168 }
3149 #endif // _WIN64 3169 #endif // _WIN64
3150 #else 3170 #else
3151 __ push(length); 3171 __ push(length);
3152 __ push(dst_pos); 3172 __ push(dst_pos);
3153 __ push(dst); 3173 __ push(dst);
3154 __ push(src_pos); 3174 __ push(src_pos);
3155 __ push(src); 3175 __ push(src);
3156 __ call_VM_leaf(entry, 5); // removes pushed parameter from the stack 3176
3177 if (copyfunc_addr == NULL) { // Use C version if stub was not generated
3178 __ call_VM_leaf(C_entry, 5); // removes pushed parameter from the stack
3179 } else {
3180 #ifndef PRODUCT
3181 if (PrintC1Statistics) {
3182 __ incrementl(ExternalAddress((address)&Runtime1::_generic_arraycopystub_cnt));
3183 }
3184 #endif
3185 __ call_VM_leaf(copyfunc_addr, 5); // removes pushed parameter from the stack
3186 }
3157 3187
3158 #endif // _LP64 3188 #endif // _LP64
3159 3189
3160 __ cmpl(rax, 0); 3190 __ cmpl(rax, 0);
3161 __ jcc(Assembler::equal, *stub->continuation()); 3191 __ jcc(Assembler::equal, *stub->continuation());
3192
3193 if (copyfunc_addr != NULL) {
3194 __ mov(tmp, rax);
3195 __ xorl(tmp, -1);
3196 }
3162 3197
3163 // Reload values from the stack so they are where the stub 3198 // Reload values from the stack so they are where the stub
3164 // expects them. 3199 // expects them.
3165 __ movptr (dst, Address(rsp, 0*BytesPerWord)); 3200 __ movptr (dst, Address(rsp, 0*BytesPerWord));
3166 __ movptr (dst_pos, Address(rsp, 1*BytesPerWord)); 3201 __ movptr (dst_pos, Address(rsp, 1*BytesPerWord));
3167 __ movptr (length, Address(rsp, 2*BytesPerWord)); 3202 __ movptr (length, Address(rsp, 2*BytesPerWord));
3168 __ movptr (src_pos, Address(rsp, 3*BytesPerWord)); 3203 __ movptr (src_pos, Address(rsp, 3*BytesPerWord));
3169 __ movptr (src, Address(rsp, 4*BytesPerWord)); 3204 __ movptr (src, Address(rsp, 4*BytesPerWord));
3205
3206 if (copyfunc_addr != NULL) {
3207 __ subl(length, tmp);
3208 __ addl(src_pos, tmp);
3209 __ addl(dst_pos, tmp);
3210 }
3170 __ jmp(*stub->entry()); 3211 __ jmp(*stub->entry());
3171 3212
3172 __ bind(*stub->continuation()); 3213 __ bind(*stub->continuation());
3173 return; 3214 return;
3174 } 3215 }
3224 } 3265 }
3225 if (flags & LIR_OpArrayCopy::dst_pos_positive_check) { 3266 if (flags & LIR_OpArrayCopy::dst_pos_positive_check) {
3226 __ testl(dst_pos, dst_pos); 3267 __ testl(dst_pos, dst_pos);
3227 __ jcc(Assembler::less, *stub->entry()); 3268 __ jcc(Assembler::less, *stub->entry());
3228 } 3269 }
3229 if (flags & LIR_OpArrayCopy::length_positive_check) {
3230 __ testl(length, length);
3231 __ jcc(Assembler::less, *stub->entry());
3232 }
3233 3270
3234 if (flags & LIR_OpArrayCopy::src_range_check) { 3271 if (flags & LIR_OpArrayCopy::src_range_check) {
3235 __ lea(tmp, Address(src_pos, length, Address::times_1, 0)); 3272 __ lea(tmp, Address(src_pos, length, Address::times_1, 0));
3236 __ cmpl(tmp, src_length_addr); 3273 __ cmpl(tmp, src_length_addr);
3237 __ jcc(Assembler::above, *stub->entry()); 3274 __ jcc(Assembler::above, *stub->entry());
3240 __ lea(tmp, Address(dst_pos, length, Address::times_1, 0)); 3277 __ lea(tmp, Address(dst_pos, length, Address::times_1, 0));
3241 __ cmpl(tmp, dst_length_addr); 3278 __ cmpl(tmp, dst_length_addr);
3242 __ jcc(Assembler::above, *stub->entry()); 3279 __ jcc(Assembler::above, *stub->entry());
3243 } 3280 }
3244 3281
3282 if (flags & LIR_OpArrayCopy::length_positive_check) {
3283 __ testl(length, length);
3284 __ jcc(Assembler::less, *stub->entry());
3285 __ jcc(Assembler::zero, *stub->continuation());
3286 }
3287
3288 #ifdef _LP64
3289 __ movl2ptr(src_pos, src_pos); //higher 32bits must be null
3290 __ movl2ptr(dst_pos, dst_pos); //higher 32bits must be null
3291 #endif
3292
3245 if (flags & LIR_OpArrayCopy::type_check) { 3293 if (flags & LIR_OpArrayCopy::type_check) {
3246 if (UseCompressedOops) { 3294 // We don't know the array types are compatible
3247 __ movl(tmp, src_klass_addr); 3295 if (basic_type != T_OBJECT) {
3248 __ cmpl(tmp, dst_klass_addr); 3296 // Simple test for basic type arrays
3249 } else { 3297 if (UseCompressedOops) {
3250 __ movptr(tmp, src_klass_addr); 3298 __ movl(tmp, src_klass_addr);
3251 __ cmpptr(tmp, dst_klass_addr); 3299 __ cmpl(tmp, dst_klass_addr);
3252 } 3300 } else {
3253 __ jcc(Assembler::notEqual, *stub->entry()); 3301 __ movptr(tmp, src_klass_addr);
3302 __ cmpptr(tmp, dst_klass_addr);
3303 }
3304 __ jcc(Assembler::notEqual, *stub->entry());
3305 } else {
3306 // For object arrays, if src is a sub class of dst then we can
3307 // safely do the copy.
3308 Label cont, slow;
3309
3310 __ push(src);
3311 __ push(dst);
3312
3313 __ load_klass(src, src);
3314 __ load_klass(dst, dst);
3315
3316 __ check_klass_subtype_fast_path(src, dst, tmp, &cont, &slow, NULL);
3317
3318 __ push(src);
3319 __ push(dst);
3320 __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
3321 __ pop(dst);
3322 __ pop(src);
3323
3324 __ cmpl(src, 0);
3325 __ jcc(Assembler::notEqual, cont);
3326
3327 __ bind(slow);
3328 __ pop(dst);
3329 __ pop(src);
3330
3331 address copyfunc_addr = StubRoutines::checkcast_arraycopy();
3332 if (copyfunc_addr != NULL) { // use stub if available
3333 // src is not a sub class of dst so we have to do a
3334 // per-element check.
3335
3336 int mask = LIR_OpArrayCopy::src_objarray|LIR_OpArrayCopy::dst_objarray;
3337 if ((flags & mask) != mask) {
3338 // Check that at least both of them object arrays.
3339 assert(flags & mask, "one of the two should be known to be an object array");
3340
3341 if (!(flags & LIR_OpArrayCopy::src_objarray)) {
3342 __ load_klass(tmp, src);
3343 } else if (!(flags & LIR_OpArrayCopy::dst_objarray)) {
3344 __ load_klass(tmp, dst);
3345 }
3346 int lh_offset = klassOopDesc::header_size() * HeapWordSize +
3347 Klass::layout_helper_offset_in_bytes();
3348 Address klass_lh_addr(tmp, lh_offset);
3349 jint objArray_lh = Klass::array_layout_helper(T_OBJECT);
3350 __ cmpl(klass_lh_addr, objArray_lh);
3351 __ jcc(Assembler::notEqual, *stub->entry());
3352 }
3353
3354 #ifndef _LP64
3355 // save caller save registers
3356 store_parameter(rax, 2);
3357 store_parameter(rcx, 1);
3358 store_parameter(rdx, 0);
3359
3360 __ movptr(tmp, dst_klass_addr);
3361 __ movptr(tmp, Address(tmp, objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc)));
3362 __ push(tmp);
3363 __ movl(tmp, Address(tmp, Klass::super_check_offset_offset_in_bytes() + sizeof(oopDesc)));
3364 __ push(tmp);
3365 __ push(length);
3366 __ lea(tmp, Address(dst, dst_pos, scale, arrayOopDesc::base_offset_in_bytes(basic_type)));
3367 __ push(tmp);
3368 __ lea(tmp, Address(src, src_pos, scale, arrayOopDesc::base_offset_in_bytes(basic_type)));
3369 __ push(tmp);
3370
3371 __ call_VM_leaf(copyfunc_addr, 5);
3372 #else
3373 __ movl2ptr(length, length); //higher 32bits must be null
3374
3375 // save caller save registers: copy them to callee save registers
3376 __ mov(rbx, rdx);
3377 __ mov(r13, r8);
3378 __ mov(r14, r9);
3379 #ifndef _WIN64
3380 store_parameter(rsi, 1);
3381 store_parameter(rcx, 0);
3382 // on WIN64 other incoming parameters are in rdi and rsi saved
3383 // across the call
3384 #endif
3385
3386 __ lea(c_rarg0, Address(src, src_pos, scale, arrayOopDesc::base_offset_in_bytes(basic_type)));
3387 assert_different_registers(c_rarg0, dst, dst_pos, length);
3388 __ lea(c_rarg1, Address(dst, dst_pos, scale, arrayOopDesc::base_offset_in_bytes(basic_type)));
3389 assert_different_registers(c_rarg1, dst, length);
3390
3391 __ mov(c_rarg2, length);
3392 assert_different_registers(c_rarg2, dst);
3393
3394 #ifdef _WIN64
3395 // Allocate abi space for args but be sure to keep stack aligned
3396 __ subptr(rsp, 6*wordSize);
3397 __ load_klass(c_rarg3, dst);
3398 __ movptr(c_rarg3, Address(c_rarg3, objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc)));
3399 store_parameter(c_rarg3, 4);
3400 __ movl(c_rarg3, Address(c_rarg3, Klass::super_check_offset_offset_in_bytes() + sizeof(oopDesc)));
3401 __ call(RuntimeAddress(copyfunc_addr));
3402 __ addptr(rsp, 6*wordSize);
3403 #else
3404 __ load_klass(c_rarg4, dst);
3405 __ movptr(c_rarg4, Address(c_rarg4, objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc)));
3406 __ movl(c_rarg3, Address(c_rarg4, Klass::super_check_offset_offset_in_bytes() + sizeof(oopDesc)));
3407 __ call(RuntimeAddress(copyfunc_addr));
3408 #endif
3409
3410 #endif
3411
3412 #ifndef PRODUCT
3413 if (PrintC1Statistics) {
3414 Label failed;
3415 __ testl(rax, rax);
3416 __ jcc(Assembler::notZero, failed);
3417 __ incrementl(ExternalAddress((address)&Runtime1::_arraycopy_checkcast_cnt));
3418 __ bind(failed);
3419 }
3420 #endif
3421
3422 __ testl(rax, rax);
3423 __ jcc(Assembler::zero, *stub->continuation());
3424
3425 #ifndef PRODUCT
3426 if (PrintC1Statistics) {
3427 __ incrementl(ExternalAddress((address)&Runtime1::_arraycopy_checkcast_attempt_cnt));
3428 }
3429 #endif
3430
3431 __ mov(tmp, rax);
3432
3433 __ xorl(tmp, -1);
3434
3435 #ifndef _LP64
3436 // restore caller save registers
3437 assert_different_registers(tmp, rdx, rcx, rax); // result of stub will be lost
3438 __ movptr(rdx, Address(rsp, 0*BytesPerWord));
3439 __ movptr(rcx, Address(rsp, 1*BytesPerWord));
3440 __ movptr(rax, Address(rsp, 2*BytesPerWord));
3441 #else
3442 // restore caller save registers
3443 __ mov(rdx, rbx);
3444 __ mov(r8, r13);
3445 __ mov(r9, r14);
3446 #ifndef _WIN64
3447 assert_different_registers(tmp, rdx, r8, r9, rcx, rsi); // result of stub will be lost
3448 __ movptr(rcx, Address(rsp, 0*BytesPerWord));
3449 __ movptr(rsi, Address(rsp, 1*BytesPerWord));
3450 #else
3451 assert_different_registers(tmp, rdx, r8, r9); // result of stub will be lost
3452 #endif
3453 #endif
3454
3455 __ subl(length, tmp);
3456 __ addl(src_pos, tmp);
3457 __ addl(dst_pos, tmp);
3458 }
3459
3460 __ jmp(*stub->entry());
3461
3462 __ bind(cont);
3463 __ pop(dst);
3464 __ pop(src);
3465 }
3254 } 3466 }
3255 3467
3256 #ifdef ASSERT 3468 #ifdef ASSERT
3257 if (basic_type != T_OBJECT || !(flags & LIR_OpArrayCopy::type_check)) { 3469 if (basic_type != T_OBJECT || !(flags & LIR_OpArrayCopy::type_check)) {
3258 // Sanity check the known type with the incoming class. For the 3470 // Sanity check the known type with the incoming class. For the
3289 __ stop("incorrect type information in arraycopy"); 3501 __ stop("incorrect type information in arraycopy");
3290 __ bind(known_ok); 3502 __ bind(known_ok);
3291 } 3503 }
3292 #endif 3504 #endif
3293 3505
3294 if (shift_amount > 0 && basic_type != T_OBJECT) { 3506 #ifndef PRODUCT
3295 __ shlptr(length, shift_amount); 3507 if (PrintC1Statistics) {
3296 } 3508 __ incrementl(ExternalAddress(Runtime1::arraycopy_count_address(basic_type)));
3509 }
3510 #endif
3297 3511
3298 #ifdef _LP64 3512 #ifdef _LP64
3299 assert_different_registers(c_rarg0, dst, dst_pos, length); 3513 assert_different_registers(c_rarg0, dst, dst_pos, length);
3300 __ movl2ptr(src_pos, src_pos); //higher 32bits must be null
3301 __ lea(c_rarg0, Address(src, src_pos, scale, arrayOopDesc::base_offset_in_bytes(basic_type))); 3514 __ lea(c_rarg0, Address(src, src_pos, scale, arrayOopDesc::base_offset_in_bytes(basic_type)));
3302 assert_different_registers(c_rarg1, length); 3515 assert_different_registers(c_rarg1, length);
3303 __ movl2ptr(dst_pos, dst_pos); //higher 32bits must be null
3304 __ lea(c_rarg1, Address(dst, dst_pos, scale, arrayOopDesc::base_offset_in_bytes(basic_type))); 3516 __ lea(c_rarg1, Address(dst, dst_pos, scale, arrayOopDesc::base_offset_in_bytes(basic_type)));
3305 __ mov(c_rarg2, length); 3517 __ mov(c_rarg2, length);
3306 3518
3307 #else 3519 #else
3308 __ lea(tmp, Address(src, src_pos, scale, arrayOopDesc::base_offset_in_bytes(basic_type))); 3520 __ lea(tmp, Address(src, src_pos, scale, arrayOopDesc::base_offset_in_bytes(basic_type)));
3309 store_parameter(tmp, 0); 3521 store_parameter(tmp, 0);
3310 __ lea(tmp, Address(dst, dst_pos, scale, arrayOopDesc::base_offset_in_bytes(basic_type))); 3522 __ lea(tmp, Address(dst, dst_pos, scale, arrayOopDesc::base_offset_in_bytes(basic_type)));
3311 store_parameter(tmp, 1); 3523 store_parameter(tmp, 1);
3312 store_parameter(length, 2); 3524 store_parameter(length, 2);
3313 #endif // _LP64 3525 #endif // _LP64
3314 if (basic_type == T_OBJECT) { 3526
3315 __ call_VM_leaf(CAST_FROM_FN_PTR(address, Runtime1::oop_arraycopy), 0); 3527 bool disjoint = (flags & LIR_OpArrayCopy::overlapping) == 0;
3316 } else { 3528 bool aligned = (flags & LIR_OpArrayCopy::unaligned) == 0;
3317 __ call_VM_leaf(CAST_FROM_FN_PTR(address, Runtime1::primitive_arraycopy), 0); 3529 const char *name;
3318 } 3530 address entry = StubRoutines::select_arraycopy_function(basic_type, aligned, disjoint, name, false);
3531 __ call_VM_leaf(entry, 0);
3319 3532
3320 __ bind(*stub->continuation()); 3533 __ bind(*stub->continuation());
3321 } 3534 }
3322 3535
3323 3536