comparison src/cpu/sparc/vm/stubGenerator_sparc.cpp @ 14909:4ca6dc0799b6

Backout jdk9 merge
author Gilles Duboscq <duboscq@ssw.jku.at>
date Tue, 01 Apr 2014 13:57:07 +0200
parents d8041d695d19
children 4062efea018b
comparison
equal deleted inserted replaced
14908:8db6e76cb658 14909:4ca6dc0799b6
1 /* 1 /*
2 * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. 2 * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 * 4 *
5 * This code is free software; you can redistribute it and/or modify it 5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as 6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
3302 if (UseBlockZeroing) { 3302 if (UseBlockZeroing) {
3303 StubRoutines::_zero_aligned_words = generate_zero_aligned_words("zero_aligned_words"); 3303 StubRoutines::_zero_aligned_words = generate_zero_aligned_words("zero_aligned_words");
3304 } 3304 }
3305 } 3305 }
3306 3306
3307 address generate_aescrypt_encryptBlock() {
3308 __ align(CodeEntryAlignment);
3309 StubCodeMark mark(this, "StubRoutines", "aesencryptBlock");
3310 Label L_doLast128bit, L_storeOutput;
3311 address start = __ pc();
3312 Register from = O0; // source byte array
3313 Register to = O1; // destination byte array
3314 Register key = O2; // expanded key array
3315 const Register keylen = O4; //reg for storing expanded key array length
3316
3317 // read expanded key length
3318 __ ldsw(Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)), keylen, 0);
3319
3320 // load input into F54-F56; F30-F31 used as temp
3321 __ ldf(FloatRegisterImpl::S, from, 0, F30);
3322 __ ldf(FloatRegisterImpl::S, from, 4, F31);
3323 __ fmov(FloatRegisterImpl::D, F30, F54);
3324 __ ldf(FloatRegisterImpl::S, from, 8, F30);
3325 __ ldf(FloatRegisterImpl::S, from, 12, F31);
3326 __ fmov(FloatRegisterImpl::D, F30, F56);
3327
3328 // load expanded key
3329 for ( int i = 0; i <= 38; i += 2 ) {
3330 __ ldf(FloatRegisterImpl::D, key, i*4, as_FloatRegister(i));
3331 }
3332
3333 // perform cipher transformation
3334 __ fxor(FloatRegisterImpl::D, F0, F54, F54);
3335 __ fxor(FloatRegisterImpl::D, F2, F56, F56);
3336 // rounds 1 through 8
3337 for ( int i = 4; i <= 28; i += 8 ) {
3338 __ aes_eround01(as_FloatRegister(i), F54, F56, F58);
3339 __ aes_eround23(as_FloatRegister(i+2), F54, F56, F60);
3340 __ aes_eround01(as_FloatRegister(i+4), F58, F60, F54);
3341 __ aes_eround23(as_FloatRegister(i+6), F58, F60, F56);
3342 }
3343 __ aes_eround01(F36, F54, F56, F58); //round 9
3344 __ aes_eround23(F38, F54, F56, F60);
3345
3346 // 128-bit original key size
3347 __ cmp_and_brx_short(keylen, 44, Assembler::equal, Assembler::pt, L_doLast128bit);
3348
3349 for ( int i = 40; i <= 50; i += 2 ) {
3350 __ ldf(FloatRegisterImpl::D, key, i*4, as_FloatRegister(i) );
3351 }
3352 __ aes_eround01(F40, F58, F60, F54); //round 10
3353 __ aes_eround23(F42, F58, F60, F56);
3354 __ aes_eround01(F44, F54, F56, F58); //round 11
3355 __ aes_eround23(F46, F54, F56, F60);
3356
3357 // 192-bit original key size
3358 __ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pt, L_storeOutput);
3359
3360 __ ldf(FloatRegisterImpl::D, key, 208, F52);
3361 __ aes_eround01(F48, F58, F60, F54); //round 12
3362 __ aes_eround23(F50, F58, F60, F56);
3363 __ ldf(FloatRegisterImpl::D, key, 216, F46);
3364 __ ldf(FloatRegisterImpl::D, key, 224, F48);
3365 __ ldf(FloatRegisterImpl::D, key, 232, F50);
3366 __ aes_eround01(F52, F54, F56, F58); //round 13
3367 __ aes_eround23(F46, F54, F56, F60);
3368 __ br(Assembler::always, false, Assembler::pt, L_storeOutput);
3369 __ delayed()->nop();
3370
3371 __ BIND(L_doLast128bit);
3372 __ ldf(FloatRegisterImpl::D, key, 160, F48);
3373 __ ldf(FloatRegisterImpl::D, key, 168, F50);
3374
3375 __ BIND(L_storeOutput);
3376 // perform last round of encryption common for all key sizes
3377 __ aes_eround01_l(F48, F58, F60, F54); //last round
3378 __ aes_eround23_l(F50, F58, F60, F56);
3379
3380 // store output into the destination array, F0-F1 used as temp
3381 __ fmov(FloatRegisterImpl::D, F54, F0);
3382 __ stf(FloatRegisterImpl::S, F0, to, 0);
3383 __ stf(FloatRegisterImpl::S, F1, to, 4);
3384 __ fmov(FloatRegisterImpl::D, F56, F0);
3385 __ stf(FloatRegisterImpl::S, F0, to, 8);
3386 __ retl();
3387 __ delayed()->stf(FloatRegisterImpl::S, F1, to, 12);
3388
3389 return start;
3390 }
3391
3392 address generate_aescrypt_decryptBlock() {
3393 __ align(CodeEntryAlignment);
3394 StubCodeMark mark(this, "StubRoutines", "aesdecryptBlock");
3395 address start = __ pc();
3396 Label L_expand192bit, L_expand256bit, L_common_transform;
3397 Register from = O0; // source byte array
3398 Register to = O1; // destination byte array
3399 Register key = O2; // expanded key array
3400 Register original_key = O3; // original key array only required during decryption
3401 const Register keylen = O4; // reg for storing expanded key array length
3402
3403 // read expanded key array length
3404 __ ldsw(Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)), keylen, 0);
3405
3406 // load input into F52-F54; F30,F31 used as temp
3407 __ ldf(FloatRegisterImpl::S, from, 0, F30);
3408 __ ldf(FloatRegisterImpl::S, from, 4, F31);
3409 __ fmov(FloatRegisterImpl::D, F30, F52);
3410 __ ldf(FloatRegisterImpl::S, from, 8, F30);
3411 __ ldf(FloatRegisterImpl::S, from, 12, F31);
3412 __ fmov(FloatRegisterImpl::D, F30, F54);
3413
3414 // load original key from SunJCE expanded decryption key
3415 for ( int i = 0; i <= 3; i++ ) {
3416 __ ldf(FloatRegisterImpl::S, original_key, i*4, as_FloatRegister(i));
3417 }
3418
3419 // 256-bit original key size
3420 __ cmp_and_brx_short(keylen, 60, Assembler::equal, Assembler::pn, L_expand256bit);
3421
3422 // 192-bit original key size
3423 __ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pn, L_expand192bit);
3424
3425 // 128-bit original key size
3426 // perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions
3427 for ( int i = 0; i <= 36; i += 4 ) {
3428 __ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+2), i/4, as_FloatRegister(i+4));
3429 __ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+4), as_FloatRegister(i+6));
3430 }
3431
3432 // perform 128-bit key specific inverse cipher transformation
3433 __ fxor(FloatRegisterImpl::D, F42, F54, F54);
3434 __ fxor(FloatRegisterImpl::D, F40, F52, F52);
3435 __ br(Assembler::always, false, Assembler::pt, L_common_transform);
3436 __ delayed()->nop();
3437
3438 __ BIND(L_expand192bit);
3439
3440 // start loading rest of the 192-bit key
3441 __ ldf(FloatRegisterImpl::S, original_key, 16, F4);
3442 __ ldf(FloatRegisterImpl::S, original_key, 20, F5);
3443
3444 // perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions
3445 for ( int i = 0; i <= 36; i += 6 ) {
3446 __ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+4), i/6, as_FloatRegister(i+6));
3447 __ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+6), as_FloatRegister(i+8));
3448 __ aes_kexpand2(as_FloatRegister(i+4), as_FloatRegister(i+8), as_FloatRegister(i+10));
3449 }
3450 __ aes_kexpand1(F42, F46, 7, F48);
3451 __ aes_kexpand2(F44, F48, F50);
3452
3453 // perform 192-bit key specific inverse cipher transformation
3454 __ fxor(FloatRegisterImpl::D, F50, F54, F54);
3455 __ fxor(FloatRegisterImpl::D, F48, F52, F52);
3456 __ aes_dround23(F46, F52, F54, F58);
3457 __ aes_dround01(F44, F52, F54, F56);
3458 __ aes_dround23(F42, F56, F58, F54);
3459 __ aes_dround01(F40, F56, F58, F52);
3460 __ br(Assembler::always, false, Assembler::pt, L_common_transform);
3461 __ delayed()->nop();
3462
3463 __ BIND(L_expand256bit);
3464
3465 // load rest of the 256-bit key
3466 for ( int i = 4; i <= 7; i++ ) {
3467 __ ldf(FloatRegisterImpl::S, original_key, i*4, as_FloatRegister(i));
3468 }
3469
3470 // perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions
3471 for ( int i = 0; i <= 40; i += 8 ) {
3472 __ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+6), i/8, as_FloatRegister(i+8));
3473 __ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+8), as_FloatRegister(i+10));
3474 __ aes_kexpand0(as_FloatRegister(i+4), as_FloatRegister(i+10), as_FloatRegister(i+12));
3475 __ aes_kexpand2(as_FloatRegister(i+6), as_FloatRegister(i+12), as_FloatRegister(i+14));
3476 }
3477 __ aes_kexpand1(F48, F54, 6, F56);
3478 __ aes_kexpand2(F50, F56, F58);
3479
3480 for ( int i = 0; i <= 6; i += 2 ) {
3481 __ fmov(FloatRegisterImpl::D, as_FloatRegister(58-i), as_FloatRegister(i));
3482 }
3483
3484 // load input into F52-F54
3485 __ ldf(FloatRegisterImpl::D, from, 0, F52);
3486 __ ldf(FloatRegisterImpl::D, from, 8, F54);
3487
3488 // perform 256-bit key specific inverse cipher transformation
3489 __ fxor(FloatRegisterImpl::D, F0, F54, F54);
3490 __ fxor(FloatRegisterImpl::D, F2, F52, F52);
3491 __ aes_dround23(F4, F52, F54, F58);
3492 __ aes_dround01(F6, F52, F54, F56);
3493 __ aes_dround23(F50, F56, F58, F54);
3494 __ aes_dround01(F48, F56, F58, F52);
3495 __ aes_dround23(F46, F52, F54, F58);
3496 __ aes_dround01(F44, F52, F54, F56);
3497 __ aes_dround23(F42, F56, F58, F54);
3498 __ aes_dround01(F40, F56, F58, F52);
3499
3500 for ( int i = 0; i <= 7; i++ ) {
3501 __ ldf(FloatRegisterImpl::S, original_key, i*4, as_FloatRegister(i));
3502 }
3503
3504 // perform inverse cipher transformations common for all key sizes
3505 __ BIND(L_common_transform);
3506 for ( int i = 38; i >= 6; i -= 8 ) {
3507 __ aes_dround23(as_FloatRegister(i), F52, F54, F58);
3508 __ aes_dround01(as_FloatRegister(i-2), F52, F54, F56);
3509 if ( i != 6) {
3510 __ aes_dround23(as_FloatRegister(i-4), F56, F58, F54);
3511 __ aes_dround01(as_FloatRegister(i-6), F56, F58, F52);
3512 } else {
3513 __ aes_dround23_l(as_FloatRegister(i-4), F56, F58, F54);
3514 __ aes_dround01_l(as_FloatRegister(i-6), F56, F58, F52);
3515 }
3516 }
3517
3518 // store output to destination array, F0-F1 used as temp
3519 __ fmov(FloatRegisterImpl::D, F52, F0);
3520 __ stf(FloatRegisterImpl::S, F0, to, 0);
3521 __ stf(FloatRegisterImpl::S, F1, to, 4);
3522 __ fmov(FloatRegisterImpl::D, F54, F0);
3523 __ stf(FloatRegisterImpl::S, F0, to, 8);
3524 __ retl();
3525 __ delayed()->stf(FloatRegisterImpl::S, F1, to, 12);
3526
3527 return start;
3528 }
3529
3530 address generate_cipherBlockChaining_encryptAESCrypt() {
3531 __ align(CodeEntryAlignment);
3532 StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt");
3533 Label L_cbcenc128, L_cbcenc192, L_cbcenc256;
3534 address start = __ pc();
3535 Register from = O0; // source byte array
3536 Register to = O1; // destination byte array
3537 Register key = O2; // expanded key array
3538 Register rvec = O3; // init vector
3539 const Register len_reg = O4; // cipher length
3540 const Register keylen = O5; // reg for storing expanded key array length
3541
3542 // save cipher len to return in the end
3543 __ mov(len_reg, L1);
3544
3545 // read expanded key length
3546 __ ldsw(Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)), keylen, 0);
3547
3548 // load init vector
3549 __ ldf(FloatRegisterImpl::D, rvec, 0, F60);
3550 __ ldf(FloatRegisterImpl::D, rvec, 8, F62);
3551 __ ldx(key,0,G1);
3552 __ ldx(key,8,G2);
3553
3554 // start loading expanded key
3555 for ( int i = 0, j = 16; i <= 38; i += 2, j += 8 ) {
3556 __ ldf(FloatRegisterImpl::D, key, j, as_FloatRegister(i));
3557 }
3558
3559 // 128-bit original key size
3560 __ cmp_and_brx_short(keylen, 44, Assembler::equal, Assembler::pt, L_cbcenc128);
3561
3562 for ( int i = 40, j = 176; i <= 46; i += 2, j += 8 ) {
3563 __ ldf(FloatRegisterImpl::D, key, j, as_FloatRegister(i));
3564 }
3565
3566 // 192-bit original key size
3567 __ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pt, L_cbcenc192);
3568
3569 for ( int i = 48, j = 208; i <= 54; i += 2, j += 8 ) {
3570 __ ldf(FloatRegisterImpl::D, key, j, as_FloatRegister(i));
3571 }
3572
3573 // 256-bit original key size
3574 __ br(Assembler::always, false, Assembler::pt, L_cbcenc256);
3575 __ delayed()->nop();
3576
3577 __ align(OptoLoopAlignment);
3578 __ BIND(L_cbcenc128);
3579 __ ldx(from,0,G3);
3580 __ ldx(from,8,G4);
3581 __ xor3(G1,G3,G3);
3582 __ xor3(G2,G4,G4);
3583 __ movxtod(G3,F56);
3584 __ movxtod(G4,F58);
3585 __ fxor(FloatRegisterImpl::D, F60, F56, F60);
3586 __ fxor(FloatRegisterImpl::D, F62, F58, F62);
3587
3588 // TEN_EROUNDS
3589 for ( int i = 0; i <= 32; i += 8 ) {
3590 __ aes_eround01(as_FloatRegister(i), F60, F62, F56);
3591 __ aes_eround23(as_FloatRegister(i+2), F60, F62, F58);
3592 if (i != 32 ) {
3593 __ aes_eround01(as_FloatRegister(i+4), F56, F58, F60);
3594 __ aes_eround23(as_FloatRegister(i+6), F56, F58, F62);
3595 } else {
3596 __ aes_eround01_l(as_FloatRegister(i+4), F56, F58, F60);
3597 __ aes_eround23_l(as_FloatRegister(i+6), F56, F58, F62);
3598 }
3599 }
3600
3601 __ stf(FloatRegisterImpl::D, F60, to, 0);
3602 __ stf(FloatRegisterImpl::D, F62, to, 8);
3603 __ add(from, 16, from);
3604 __ add(to, 16, to);
3605 __ subcc(len_reg, 16, len_reg);
3606 __ br(Assembler::notEqual, false, Assembler::pt, L_cbcenc128);
3607 __ delayed()->nop();
3608 __ stf(FloatRegisterImpl::D, F60, rvec, 0);
3609 __ stf(FloatRegisterImpl::D, F62, rvec, 8);
3610 __ retl();
3611 __ delayed()->mov(L1, O0);
3612
3613 __ align(OptoLoopAlignment);
3614 __ BIND(L_cbcenc192);
3615 __ ldx(from,0,G3);
3616 __ ldx(from,8,G4);
3617 __ xor3(G1,G3,G3);
3618 __ xor3(G2,G4,G4);
3619 __ movxtod(G3,F56);
3620 __ movxtod(G4,F58);
3621 __ fxor(FloatRegisterImpl::D, F60, F56, F60);
3622 __ fxor(FloatRegisterImpl::D, F62, F58, F62);
3623
3624 // TWELEVE_EROUNDS
3625 for ( int i = 0; i <= 40; i += 8 ) {
3626 __ aes_eround01(as_FloatRegister(i), F60, F62, F56);
3627 __ aes_eround23(as_FloatRegister(i+2), F60, F62, F58);
3628 if (i != 40 ) {
3629 __ aes_eround01(as_FloatRegister(i+4), F56, F58, F60);
3630 __ aes_eround23(as_FloatRegister(i+6), F56, F58, F62);
3631 } else {
3632 __ aes_eround01_l(as_FloatRegister(i+4), F56, F58, F60);
3633 __ aes_eround23_l(as_FloatRegister(i+6), F56, F58, F62);
3634 }
3635 }
3636
3637 __ stf(FloatRegisterImpl::D, F60, to, 0);
3638 __ stf(FloatRegisterImpl::D, F62, to, 8);
3639 __ add(from, 16, from);
3640 __ subcc(len_reg, 16, len_reg);
3641 __ add(to, 16, to);
3642 __ br(Assembler::notEqual, false, Assembler::pt, L_cbcenc192);
3643 __ delayed()->nop();
3644 __ stf(FloatRegisterImpl::D, F60, rvec, 0);
3645 __ stf(FloatRegisterImpl::D, F62, rvec, 8);
3646 __ retl();
3647 __ delayed()->mov(L1, O0);
3648
3649 __ align(OptoLoopAlignment);
3650 __ BIND(L_cbcenc256);
3651 __ ldx(from,0,G3);
3652 __ ldx(from,8,G4);
3653 __ xor3(G1,G3,G3);
3654 __ xor3(G2,G4,G4);
3655 __ movxtod(G3,F56);
3656 __ movxtod(G4,F58);
3657 __ fxor(FloatRegisterImpl::D, F60, F56, F60);
3658 __ fxor(FloatRegisterImpl::D, F62, F58, F62);
3659
3660 // FOURTEEN_EROUNDS
3661 for ( int i = 0; i <= 48; i += 8 ) {
3662 __ aes_eround01(as_FloatRegister(i), F60, F62, F56);
3663 __ aes_eround23(as_FloatRegister(i+2), F60, F62, F58);
3664 if (i != 48 ) {
3665 __ aes_eround01(as_FloatRegister(i+4), F56, F58, F60);
3666 __ aes_eround23(as_FloatRegister(i+6), F56, F58, F62);
3667 } else {
3668 __ aes_eround01_l(as_FloatRegister(i+4), F56, F58, F60);
3669 __ aes_eround23_l(as_FloatRegister(i+6), F56, F58, F62);
3670 }
3671 }
3672
3673 __ stf(FloatRegisterImpl::D, F60, to, 0);
3674 __ stf(FloatRegisterImpl::D, F62, to, 8);
3675 __ add(from, 16, from);
3676 __ subcc(len_reg, 16, len_reg);
3677 __ add(to, 16, to);
3678 __ br(Assembler::notEqual, false, Assembler::pt, L_cbcenc256);
3679 __ delayed()->nop();
3680 __ stf(FloatRegisterImpl::D, F60, rvec, 0);
3681 __ stf(FloatRegisterImpl::D, F62, rvec, 8);
3682 __ retl();
3683 __ delayed()->mov(L1, O0);
3684
3685 return start;
3686 }
3687
3688 address generate_cipherBlockChaining_decryptAESCrypt_Parallel() {
3689 __ align(CodeEntryAlignment);
3690 StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt");
3691 Label L_cbcdec_end, L_expand192bit, L_expand256bit, L_dec_first_block_start;
3692 Label L_dec_first_block128, L_dec_first_block192, L_dec_next2_blocks128, L_dec_next2_blocks192, L_dec_next2_blocks256;
3693 address start = __ pc();
3694 Register from = I0; // source byte array
3695 Register to = I1; // destination byte array
3696 Register key = I2; // expanded key array
3697 Register rvec = I3; // init vector
3698 const Register len_reg = I4; // cipher length
3699 const Register original_key = I5; // original key array only required during decryption
3700 const Register keylen = L6; // reg for storing expanded key array length
3701
3702 // save cipher len before save_frame, to return in the end
3703 __ mov(O4, L0);
3704 __ save_frame(0); //args are read from I* registers since we save the frame in the beginning
3705
3706 // load original key from SunJCE expanded decryption key
3707 for ( int i = 0; i <= 3; i++ ) {
3708 __ ldf(FloatRegisterImpl::S, original_key, i*4, as_FloatRegister(i));
3709 }
3710
3711 // load initial vector
3712 __ ldx(rvec,0,L0);
3713 __ ldx(rvec,8,L1);
3714
3715 // read expanded key array length
3716 __ ldsw(Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)), keylen, 0);
3717
3718 // 256-bit original key size
3719 __ cmp_and_brx_short(keylen, 60, Assembler::equal, Assembler::pn, L_expand256bit);
3720
3721 // 192-bit original key size
3722 __ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pn, L_expand192bit);
3723
3724 // 128-bit original key size
3725 // perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions
3726 for ( int i = 0; i <= 36; i += 4 ) {
3727 __ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+2), i/4, as_FloatRegister(i+4));
3728 __ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+4), as_FloatRegister(i+6));
3729 }
3730
3731 // load expanded key[last-1] and key[last] elements
3732 __ movdtox(F40,L2);
3733 __ movdtox(F42,L3);
3734
3735 __ and3(len_reg, 16, L4);
3736 __ br_null(L4, false, Assembler::pt, L_dec_next2_blocks128);
3737 __ delayed()->nop();
3738
3739 __ br(Assembler::always, false, Assembler::pt, L_dec_first_block_start);
3740 __ delayed()->nop();
3741
3742 __ BIND(L_expand192bit);
3743 // load rest of the 192-bit key
3744 __ ldf(FloatRegisterImpl::S, original_key, 16, F4);
3745 __ ldf(FloatRegisterImpl::S, original_key, 20, F5);
3746
3747 // perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions
3748 for ( int i = 0; i <= 36; i += 6 ) {
3749 __ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+4), i/6, as_FloatRegister(i+6));
3750 __ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+6), as_FloatRegister(i+8));
3751 __ aes_kexpand2(as_FloatRegister(i+4), as_FloatRegister(i+8), as_FloatRegister(i+10));
3752 }
3753 __ aes_kexpand1(F42, F46, 7, F48);
3754 __ aes_kexpand2(F44, F48, F50);
3755
3756 // load expanded key[last-1] and key[last] elements
3757 __ movdtox(F48,L2);
3758 __ movdtox(F50,L3);
3759
3760 __ and3(len_reg, 16, L4);
3761 __ br_null(L4, false, Assembler::pt, L_dec_next2_blocks192);
3762 __ delayed()->nop();
3763
3764 __ br(Assembler::always, false, Assembler::pt, L_dec_first_block_start);
3765 __ delayed()->nop();
3766
3767 __ BIND(L_expand256bit);
3768 // load rest of the 256-bit key
3769 for ( int i = 4; i <= 7; i++ ) {
3770 __ ldf(FloatRegisterImpl::S, original_key, i*4, as_FloatRegister(i));
3771 }
3772
3773 // perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions
3774 for ( int i = 0; i <= 40; i += 8 ) {
3775 __ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+6), i/8, as_FloatRegister(i+8));
3776 __ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+8), as_FloatRegister(i+10));
3777 __ aes_kexpand0(as_FloatRegister(i+4), as_FloatRegister(i+10), as_FloatRegister(i+12));
3778 __ aes_kexpand2(as_FloatRegister(i+6), as_FloatRegister(i+12), as_FloatRegister(i+14));
3779 }
3780 __ aes_kexpand1(F48, F54, 6, F56);
3781 __ aes_kexpand2(F50, F56, F58);
3782
3783 // load expanded key[last-1] and key[last] elements
3784 __ movdtox(F56,L2);
3785 __ movdtox(F58,L3);
3786
3787 __ and3(len_reg, 16, L4);
3788 __ br_null(L4, false, Assembler::pt, L_dec_next2_blocks256);
3789 __ delayed()->nop();
3790
3791 __ BIND(L_dec_first_block_start);
3792 __ ldx(from,0,L4);
3793 __ ldx(from,8,L5);
3794 __ xor3(L2,L4,G1);
3795 __ movxtod(G1,F60);
3796 __ xor3(L3,L5,G1);
3797 __ movxtod(G1,F62);
3798
3799 // 128-bit original key size
3800 __ cmp_and_brx_short(keylen, 44, Assembler::equal, Assembler::pn, L_dec_first_block128);
3801
3802 // 192-bit original key size
3803 __ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pn, L_dec_first_block192);
3804
3805 __ aes_dround23(F54, F60, F62, F58);
3806 __ aes_dround01(F52, F60, F62, F56);
3807 __ aes_dround23(F50, F56, F58, F62);
3808 __ aes_dround01(F48, F56, F58, F60);
3809
3810 __ BIND(L_dec_first_block192);
3811 __ aes_dround23(F46, F60, F62, F58);
3812 __ aes_dround01(F44, F60, F62, F56);
3813 __ aes_dround23(F42, F56, F58, F62);
3814 __ aes_dround01(F40, F56, F58, F60);
3815
3816 __ BIND(L_dec_first_block128);
3817 for ( int i = 38; i >= 6; i -= 8 ) {
3818 __ aes_dround23(as_FloatRegister(i), F60, F62, F58);
3819 __ aes_dround01(as_FloatRegister(i-2), F60, F62, F56);
3820 if ( i != 6) {
3821 __ aes_dround23(as_FloatRegister(i-4), F56, F58, F62);
3822 __ aes_dround01(as_FloatRegister(i-6), F56, F58, F60);
3823 } else {
3824 __ aes_dround23_l(as_FloatRegister(i-4), F56, F58, F62);
3825 __ aes_dround01_l(as_FloatRegister(i-6), F56, F58, F60);
3826 }
3827 }
3828
3829 __ movxtod(L0,F56);
3830 __ movxtod(L1,F58);
3831 __ mov(L4,L0);
3832 __ mov(L5,L1);
3833 __ fxor(FloatRegisterImpl::D, F56, F60, F60);
3834 __ fxor(FloatRegisterImpl::D, F58, F62, F62);
3835
3836 __ stf(FloatRegisterImpl::D, F60, to, 0);
3837 __ stf(FloatRegisterImpl::D, F62, to, 8);
3838
3839 __ add(from, 16, from);
3840 __ add(to, 16, to);
3841 __ subcc(len_reg, 16, len_reg);
3842 __ br(Assembler::equal, false, Assembler::pt, L_cbcdec_end);
3843 __ delayed()->nop();
3844
3845 // 256-bit original key size
3846 __ cmp_and_brx_short(keylen, 60, Assembler::equal, Assembler::pn, L_dec_next2_blocks256);
3847
3848 // 192-bit original key size
3849 __ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pn, L_dec_next2_blocks192);
3850
3851 __ align(OptoLoopAlignment);
3852 __ BIND(L_dec_next2_blocks128);
3853 __ nop();
3854
3855 // F40:F42 used for first 16-bytes
3856 __ ldx(from,0,G4);
3857 __ ldx(from,8,G5);
3858 __ xor3(L2,G4,G1);
3859 __ movxtod(G1,F40);
3860 __ xor3(L3,G5,G1);
3861 __ movxtod(G1,F42);
3862
3863 // F60:F62 used for next 16-bytes
3864 __ ldx(from,16,L4);
3865 __ ldx(from,24,L5);
3866 __ xor3(L2,L4,G1);
3867 __ movxtod(G1,F60);
3868 __ xor3(L3,L5,G1);
3869 __ movxtod(G1,F62);
3870
3871 for ( int i = 38; i >= 6; i -= 8 ) {
3872 __ aes_dround23(as_FloatRegister(i), F40, F42, F44);
3873 __ aes_dround01(as_FloatRegister(i-2), F40, F42, F46);
3874 __ aes_dround23(as_FloatRegister(i), F60, F62, F58);
3875 __ aes_dround01(as_FloatRegister(i-2), F60, F62, F56);
3876 if (i != 6 ) {
3877 __ aes_dround23(as_FloatRegister(i-4), F46, F44, F42);
3878 __ aes_dround01(as_FloatRegister(i-6), F46, F44, F40);
3879 __ aes_dround23(as_FloatRegister(i-4), F56, F58, F62);
3880 __ aes_dround01(as_FloatRegister(i-6), F56, F58, F60);
3881 } else {
3882 __ aes_dround23_l(as_FloatRegister(i-4), F46, F44, F42);
3883 __ aes_dround01_l(as_FloatRegister(i-6), F46, F44, F40);
3884 __ aes_dround23_l(as_FloatRegister(i-4), F56, F58, F62);
3885 __ aes_dround01_l(as_FloatRegister(i-6), F56, F58, F60);
3886 }
3887 }
3888
3889 __ movxtod(L0,F46);
3890 __ movxtod(L1,F44);
3891 __ fxor(FloatRegisterImpl::D, F46, F40, F40);
3892 __ fxor(FloatRegisterImpl::D, F44, F42, F42);
3893
3894 __ stf(FloatRegisterImpl::D, F40, to, 0);
3895 __ stf(FloatRegisterImpl::D, F42, to, 8);
3896
3897 __ movxtod(G4,F56);
3898 __ movxtod(G5,F58);
3899 __ mov(L4,L0);
3900 __ mov(L5,L1);
3901 __ fxor(FloatRegisterImpl::D, F56, F60, F60);
3902 __ fxor(FloatRegisterImpl::D, F58, F62, F62);
3903
3904 __ stf(FloatRegisterImpl::D, F60, to, 16);
3905 __ stf(FloatRegisterImpl::D, F62, to, 24);
3906
3907 __ add(from, 32, from);
3908 __ add(to, 32, to);
3909 __ subcc(len_reg, 32, len_reg);
3910 __ br(Assembler::notEqual, false, Assembler::pt, L_dec_next2_blocks128);
3911 __ delayed()->nop();
3912 __ br(Assembler::always, false, Assembler::pt, L_cbcdec_end);
3913 __ delayed()->nop();
3914
3915 __ align(OptoLoopAlignment);
3916 __ BIND(L_dec_next2_blocks192);
3917 __ nop();
3918
3919 // F48:F50 used for first 16-bytes
3920 __ ldx(from,0,G4);
3921 __ ldx(from,8,G5);
3922 __ xor3(L2,G4,G1);
3923 __ movxtod(G1,F48);
3924 __ xor3(L3,G5,G1);
3925 __ movxtod(G1,F50);
3926
3927 // F60:F62 used for next 16-bytes
3928 __ ldx(from,16,L4);
3929 __ ldx(from,24,L5);
3930 __ xor3(L2,L4,G1);
3931 __ movxtod(G1,F60);
3932 __ xor3(L3,L5,G1);
3933 __ movxtod(G1,F62);
3934
3935 for ( int i = 46; i >= 6; i -= 8 ) {
3936 __ aes_dround23(as_FloatRegister(i), F48, F50, F52);
3937 __ aes_dround01(as_FloatRegister(i-2), F48, F50, F54);
3938 __ aes_dround23(as_FloatRegister(i), F60, F62, F58);
3939 __ aes_dround01(as_FloatRegister(i-2), F60, F62, F56);
3940 if (i != 6 ) {
3941 __ aes_dround23(as_FloatRegister(i-4), F54, F52, F50);
3942 __ aes_dround01(as_FloatRegister(i-6), F54, F52, F48);
3943 __ aes_dround23(as_FloatRegister(i-4), F56, F58, F62);
3944 __ aes_dround01(as_FloatRegister(i-6), F56, F58, F60);
3945 } else {
3946 __ aes_dround23_l(as_FloatRegister(i-4), F54, F52, F50);
3947 __ aes_dround01_l(as_FloatRegister(i-6), F54, F52, F48);
3948 __ aes_dround23_l(as_FloatRegister(i-4), F56, F58, F62);
3949 __ aes_dround01_l(as_FloatRegister(i-6), F56, F58, F60);
3950 }
3951 }
3952
3953 __ movxtod(L0,F54);
3954 __ movxtod(L1,F52);
3955 __ fxor(FloatRegisterImpl::D, F54, F48, F48);
3956 __ fxor(FloatRegisterImpl::D, F52, F50, F50);
3957
3958 __ stf(FloatRegisterImpl::D, F48, to, 0);
3959 __ stf(FloatRegisterImpl::D, F50, to, 8);
3960
3961 __ movxtod(G4,F56);
3962 __ movxtod(G5,F58);
3963 __ mov(L4,L0);
3964 __ mov(L5,L1);
3965 __ fxor(FloatRegisterImpl::D, F56, F60, F60);
3966 __ fxor(FloatRegisterImpl::D, F58, F62, F62);
3967
3968 __ stf(FloatRegisterImpl::D, F60, to, 16);
3969 __ stf(FloatRegisterImpl::D, F62, to, 24);
3970
3971 __ add(from, 32, from);
3972 __ add(to, 32, to);
3973 __ subcc(len_reg, 32, len_reg);
3974 __ br(Assembler::notEqual, false, Assembler::pt, L_dec_next2_blocks192);
3975 __ delayed()->nop();
3976 __ br(Assembler::always, false, Assembler::pt, L_cbcdec_end);
3977 __ delayed()->nop();
3978
3979 __ align(OptoLoopAlignment);
3980 __ BIND(L_dec_next2_blocks256);
3981 __ nop();
3982
3983 // F0:F2 used for first 16-bytes
3984 __ ldx(from,0,G4);
3985 __ ldx(from,8,G5);
3986 __ xor3(L2,G4,G1);
3987 __ movxtod(G1,F0);
3988 __ xor3(L3,G5,G1);
3989 __ movxtod(G1,F2);
3990
3991 // F60:F62 used for next 16-bytes
3992 __ ldx(from,16,L4);
3993 __ ldx(from,24,L5);
3994 __ xor3(L2,L4,G1);
3995 __ movxtod(G1,F60);
3996 __ xor3(L3,L5,G1);
3997 __ movxtod(G1,F62);
3998
3999 __ aes_dround23(F54, F0, F2, F4);
4000 __ aes_dround01(F52, F0, F2, F6);
4001 __ aes_dround23(F54, F60, F62, F58);
4002 __ aes_dround01(F52, F60, F62, F56);
4003 __ aes_dround23(F50, F6, F4, F2);
4004 __ aes_dround01(F48, F6, F4, F0);
4005 __ aes_dround23(F50, F56, F58, F62);
4006 __ aes_dround01(F48, F56, F58, F60);
4007 // save F48:F54 in temp registers
4008 __ movdtox(F54,G2);
4009 __ movdtox(F52,G3);
4010 __ movdtox(F50,G6);
4011 __ movdtox(F48,G1);
4012 for ( int i = 46; i >= 14; i -= 8 ) {
4013 __ aes_dround23(as_FloatRegister(i), F0, F2, F4);
4014 __ aes_dround01(as_FloatRegister(i-2), F0, F2, F6);
4015 __ aes_dround23(as_FloatRegister(i), F60, F62, F58);
4016 __ aes_dround01(as_FloatRegister(i-2), F60, F62, F56);
4017 __ aes_dround23(as_FloatRegister(i-4), F6, F4, F2);
4018 __ aes_dround01(as_FloatRegister(i-6), F6, F4, F0);
4019 __ aes_dround23(as_FloatRegister(i-4), F56, F58, F62);
4020 __ aes_dround01(as_FloatRegister(i-6), F56, F58, F60);
4021 }
4022 // init F48:F54 with F0:F6 values (original key)
4023 __ ldf(FloatRegisterImpl::D, original_key, 0, F48);
4024 __ ldf(FloatRegisterImpl::D, original_key, 8, F50);
4025 __ ldf(FloatRegisterImpl::D, original_key, 16, F52);
4026 __ ldf(FloatRegisterImpl::D, original_key, 24, F54);
4027 __ aes_dround23(F54, F0, F2, F4);
4028 __ aes_dround01(F52, F0, F2, F6);
4029 __ aes_dround23(F54, F60, F62, F58);
4030 __ aes_dround01(F52, F60, F62, F56);
4031 __ aes_dround23_l(F50, F6, F4, F2);
4032 __ aes_dround01_l(F48, F6, F4, F0);
4033 __ aes_dround23_l(F50, F56, F58, F62);
4034 __ aes_dround01_l(F48, F56, F58, F60);
4035 // re-init F48:F54 with their original values
4036 __ movxtod(G2,F54);
4037 __ movxtod(G3,F52);
4038 __ movxtod(G6,F50);
4039 __ movxtod(G1,F48);
4040
4041 __ movxtod(L0,F6);
4042 __ movxtod(L1,F4);
4043 __ fxor(FloatRegisterImpl::D, F6, F0, F0);
4044 __ fxor(FloatRegisterImpl::D, F4, F2, F2);
4045
4046 __ stf(FloatRegisterImpl::D, F0, to, 0);
4047 __ stf(FloatRegisterImpl::D, F2, to, 8);
4048
4049 __ movxtod(G4,F56);
4050 __ movxtod(G5,F58);
4051 __ mov(L4,L0);
4052 __ mov(L5,L1);
4053 __ fxor(FloatRegisterImpl::D, F56, F60, F60);
4054 __ fxor(FloatRegisterImpl::D, F58, F62, F62);
4055
4056 __ stf(FloatRegisterImpl::D, F60, to, 16);
4057 __ stf(FloatRegisterImpl::D, F62, to, 24);
4058
4059 __ add(from, 32, from);
4060 __ add(to, 32, to);
4061 __ subcc(len_reg, 32, len_reg);
4062 __ br(Assembler::notEqual, false, Assembler::pt, L_dec_next2_blocks256);
4063 __ delayed()->nop();
4064
4065 __ BIND(L_cbcdec_end);
4066 __ stx(L0, rvec, 0);
4067 __ stx(L1, rvec, 8);
4068 __ restore();
4069 __ mov(L0, O0);
4070 __ retl();
4071 __ delayed()->nop();
4072
4073 return start;
4074 }
4075
4076 void generate_initial() { 3307 void generate_initial() {
4077 // Generates all stubs and initializes the entry points 3308 // Generates all stubs and initializes the entry points
4078 3309
4079 //------------------------------------------------------------------------------------------------------------------------ 3310 //------------------------------------------------------------------------------------------------------------------------
4080 // entry points that exist in all platforms 3311 // entry points that exist in all platforms
4136 &StubRoutines::_safefetch32_fault_pc, 3367 &StubRoutines::_safefetch32_fault_pc,
4137 &StubRoutines::_safefetch32_continuation_pc); 3368 &StubRoutines::_safefetch32_continuation_pc);
4138 generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry, 3369 generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
4139 &StubRoutines::_safefetchN_fault_pc, 3370 &StubRoutines::_safefetchN_fault_pc,
4140 &StubRoutines::_safefetchN_continuation_pc); 3371 &StubRoutines::_safefetchN_continuation_pc);
4141
4142 // generate AES intrinsics code
4143 if (UseAESIntrinsics) {
4144 StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
4145 StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
4146 StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
4147 StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel();
4148 }
4149 } 3372 }
4150 3373
4151 3374
4152 public: 3375 public:
4153 StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) { 3376 StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {