Mercurial > hg > truffle
comparison src/cpu/sparc/vm/stubGenerator_sparc.cpp @ 14518:d8041d695d19
Merged with jdk9/dev/hotspot changeset 3812c088b945
author | twisti |
---|---|
date | Tue, 11 Mar 2014 18:45:59 -0700 |
parents | cefad50507d8 00f5eff62d18 |
children | 4ca6dc0799b6 |
comparison
equal
deleted
inserted
replaced
14141:f97c5ec83832 | 14518:d8041d695d19 |
---|---|
1 /* | 1 /* |
2 * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. | 2 * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. |
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 * | 4 * |
5 * This code is free software; you can redistribute it and/or modify it | 5 * This code is free software; you can redistribute it and/or modify it |
6 * under the terms of the GNU General Public License version 2 only, as | 6 * under the terms of the GNU General Public License version 2 only, as |
7 * published by the Free Software Foundation. | 7 * published by the Free Software Foundation. |
3302 if (UseBlockZeroing) { | 3302 if (UseBlockZeroing) { |
3303 StubRoutines::_zero_aligned_words = generate_zero_aligned_words("zero_aligned_words"); | 3303 StubRoutines::_zero_aligned_words = generate_zero_aligned_words("zero_aligned_words"); |
3304 } | 3304 } |
3305 } | 3305 } |
3306 | 3306 |
3307 address generate_aescrypt_encryptBlock() { | |
3308 __ align(CodeEntryAlignment); | |
3309 StubCodeMark mark(this, "StubRoutines", "aesencryptBlock"); | |
3310 Label L_doLast128bit, L_storeOutput; | |
3311 address start = __ pc(); | |
3312 Register from = O0; // source byte array | |
3313 Register to = O1; // destination byte array | |
3314 Register key = O2; // expanded key array | |
3315 const Register keylen = O4; //reg for storing expanded key array length | |
3316 | |
3317 // read expanded key length | |
3318 __ ldsw(Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)), keylen, 0); | |
3319 | |
3320 // load input into F54-F56; F30-F31 used as temp | |
3321 __ ldf(FloatRegisterImpl::S, from, 0, F30); | |
3322 __ ldf(FloatRegisterImpl::S, from, 4, F31); | |
3323 __ fmov(FloatRegisterImpl::D, F30, F54); | |
3324 __ ldf(FloatRegisterImpl::S, from, 8, F30); | |
3325 __ ldf(FloatRegisterImpl::S, from, 12, F31); | |
3326 __ fmov(FloatRegisterImpl::D, F30, F56); | |
3327 | |
3328 // load expanded key | |
3329 for ( int i = 0; i <= 38; i += 2 ) { | |
3330 __ ldf(FloatRegisterImpl::D, key, i*4, as_FloatRegister(i)); | |
3331 } | |
3332 | |
3333 // perform cipher transformation | |
3334 __ fxor(FloatRegisterImpl::D, F0, F54, F54); | |
3335 __ fxor(FloatRegisterImpl::D, F2, F56, F56); | |
3336 // rounds 1 through 8 | |
3337 for ( int i = 4; i <= 28; i += 8 ) { | |
3338 __ aes_eround01(as_FloatRegister(i), F54, F56, F58); | |
3339 __ aes_eround23(as_FloatRegister(i+2), F54, F56, F60); | |
3340 __ aes_eround01(as_FloatRegister(i+4), F58, F60, F54); | |
3341 __ aes_eround23(as_FloatRegister(i+6), F58, F60, F56); | |
3342 } | |
3343 __ aes_eround01(F36, F54, F56, F58); //round 9 | |
3344 __ aes_eround23(F38, F54, F56, F60); | |
3345 | |
3346 // 128-bit original key size | |
3347 __ cmp_and_brx_short(keylen, 44, Assembler::equal, Assembler::pt, L_doLast128bit); | |
3348 | |
3349 for ( int i = 40; i <= 50; i += 2 ) { | |
3350 __ ldf(FloatRegisterImpl::D, key, i*4, as_FloatRegister(i) ); | |
3351 } | |
3352 __ aes_eround01(F40, F58, F60, F54); //round 10 | |
3353 __ aes_eround23(F42, F58, F60, F56); | |
3354 __ aes_eround01(F44, F54, F56, F58); //round 11 | |
3355 __ aes_eround23(F46, F54, F56, F60); | |
3356 | |
3357 // 192-bit original key size | |
3358 __ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pt, L_storeOutput); | |
3359 | |
3360 __ ldf(FloatRegisterImpl::D, key, 208, F52); | |
3361 __ aes_eround01(F48, F58, F60, F54); //round 12 | |
3362 __ aes_eround23(F50, F58, F60, F56); | |
3363 __ ldf(FloatRegisterImpl::D, key, 216, F46); | |
3364 __ ldf(FloatRegisterImpl::D, key, 224, F48); | |
3365 __ ldf(FloatRegisterImpl::D, key, 232, F50); | |
3366 __ aes_eround01(F52, F54, F56, F58); //round 13 | |
3367 __ aes_eround23(F46, F54, F56, F60); | |
3368 __ br(Assembler::always, false, Assembler::pt, L_storeOutput); | |
3369 __ delayed()->nop(); | |
3370 | |
3371 __ BIND(L_doLast128bit); | |
3372 __ ldf(FloatRegisterImpl::D, key, 160, F48); | |
3373 __ ldf(FloatRegisterImpl::D, key, 168, F50); | |
3374 | |
3375 __ BIND(L_storeOutput); | |
3376 // perform last round of encryption common for all key sizes | |
3377 __ aes_eround01_l(F48, F58, F60, F54); //last round | |
3378 __ aes_eround23_l(F50, F58, F60, F56); | |
3379 | |
3380 // store output into the destination array, F0-F1 used as temp | |
3381 __ fmov(FloatRegisterImpl::D, F54, F0); | |
3382 __ stf(FloatRegisterImpl::S, F0, to, 0); | |
3383 __ stf(FloatRegisterImpl::S, F1, to, 4); | |
3384 __ fmov(FloatRegisterImpl::D, F56, F0); | |
3385 __ stf(FloatRegisterImpl::S, F0, to, 8); | |
3386 __ retl(); | |
3387 __ delayed()->stf(FloatRegisterImpl::S, F1, to, 12); | |
3388 | |
3389 return start; | |
3390 } | |
3391 | |
3392 address generate_aescrypt_decryptBlock() { | |
3393 __ align(CodeEntryAlignment); | |
3394 StubCodeMark mark(this, "StubRoutines", "aesdecryptBlock"); | |
3395 address start = __ pc(); | |
3396 Label L_expand192bit, L_expand256bit, L_common_transform; | |
3397 Register from = O0; // source byte array | |
3398 Register to = O1; // destination byte array | |
3399 Register key = O2; // expanded key array | |
3400 Register original_key = O3; // original key array only required during decryption | |
3401 const Register keylen = O4; // reg for storing expanded key array length | |
3402 | |
3403 // read expanded key array length | |
3404 __ ldsw(Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)), keylen, 0); | |
3405 | |
3406 // load input into F52-F54; F30,F31 used as temp | |
3407 __ ldf(FloatRegisterImpl::S, from, 0, F30); | |
3408 __ ldf(FloatRegisterImpl::S, from, 4, F31); | |
3409 __ fmov(FloatRegisterImpl::D, F30, F52); | |
3410 __ ldf(FloatRegisterImpl::S, from, 8, F30); | |
3411 __ ldf(FloatRegisterImpl::S, from, 12, F31); | |
3412 __ fmov(FloatRegisterImpl::D, F30, F54); | |
3413 | |
3414 // load original key from SunJCE expanded decryption key | |
3415 for ( int i = 0; i <= 3; i++ ) { | |
3416 __ ldf(FloatRegisterImpl::S, original_key, i*4, as_FloatRegister(i)); | |
3417 } | |
3418 | |
3419 // 256-bit original key size | |
3420 __ cmp_and_brx_short(keylen, 60, Assembler::equal, Assembler::pn, L_expand256bit); | |
3421 | |
3422 // 192-bit original key size | |
3423 __ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pn, L_expand192bit); | |
3424 | |
3425 // 128-bit original key size | |
3426 // perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions | |
3427 for ( int i = 0; i <= 36; i += 4 ) { | |
3428 __ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+2), i/4, as_FloatRegister(i+4)); | |
3429 __ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+4), as_FloatRegister(i+6)); | |
3430 } | |
3431 | |
3432 // perform 128-bit key specific inverse cipher transformation | |
3433 __ fxor(FloatRegisterImpl::D, F42, F54, F54); | |
3434 __ fxor(FloatRegisterImpl::D, F40, F52, F52); | |
3435 __ br(Assembler::always, false, Assembler::pt, L_common_transform); | |
3436 __ delayed()->nop(); | |
3437 | |
3438 __ BIND(L_expand192bit); | |
3439 | |
3440 // start loading rest of the 192-bit key | |
3441 __ ldf(FloatRegisterImpl::S, original_key, 16, F4); | |
3442 __ ldf(FloatRegisterImpl::S, original_key, 20, F5); | |
3443 | |
3444 // perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions | |
3445 for ( int i = 0; i <= 36; i += 6 ) { | |
3446 __ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+4), i/6, as_FloatRegister(i+6)); | |
3447 __ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+6), as_FloatRegister(i+8)); | |
3448 __ aes_kexpand2(as_FloatRegister(i+4), as_FloatRegister(i+8), as_FloatRegister(i+10)); | |
3449 } | |
3450 __ aes_kexpand1(F42, F46, 7, F48); | |
3451 __ aes_kexpand2(F44, F48, F50); | |
3452 | |
3453 // perform 192-bit key specific inverse cipher transformation | |
3454 __ fxor(FloatRegisterImpl::D, F50, F54, F54); | |
3455 __ fxor(FloatRegisterImpl::D, F48, F52, F52); | |
3456 __ aes_dround23(F46, F52, F54, F58); | |
3457 __ aes_dround01(F44, F52, F54, F56); | |
3458 __ aes_dround23(F42, F56, F58, F54); | |
3459 __ aes_dround01(F40, F56, F58, F52); | |
3460 __ br(Assembler::always, false, Assembler::pt, L_common_transform); | |
3461 __ delayed()->nop(); | |
3462 | |
3463 __ BIND(L_expand256bit); | |
3464 | |
3465 // load rest of the 256-bit key | |
3466 for ( int i = 4; i <= 7; i++ ) { | |
3467 __ ldf(FloatRegisterImpl::S, original_key, i*4, as_FloatRegister(i)); | |
3468 } | |
3469 | |
3470 // perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions | |
3471 for ( int i = 0; i <= 40; i += 8 ) { | |
3472 __ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+6), i/8, as_FloatRegister(i+8)); | |
3473 __ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+8), as_FloatRegister(i+10)); | |
3474 __ aes_kexpand0(as_FloatRegister(i+4), as_FloatRegister(i+10), as_FloatRegister(i+12)); | |
3475 __ aes_kexpand2(as_FloatRegister(i+6), as_FloatRegister(i+12), as_FloatRegister(i+14)); | |
3476 } | |
3477 __ aes_kexpand1(F48, F54, 6, F56); | |
3478 __ aes_kexpand2(F50, F56, F58); | |
3479 | |
3480 for ( int i = 0; i <= 6; i += 2 ) { | |
3481 __ fmov(FloatRegisterImpl::D, as_FloatRegister(58-i), as_FloatRegister(i)); | |
3482 } | |
3483 | |
3484 // load input into F52-F54 | |
3485 __ ldf(FloatRegisterImpl::D, from, 0, F52); | |
3486 __ ldf(FloatRegisterImpl::D, from, 8, F54); | |
3487 | |
3488 // perform 256-bit key specific inverse cipher transformation | |
3489 __ fxor(FloatRegisterImpl::D, F0, F54, F54); | |
3490 __ fxor(FloatRegisterImpl::D, F2, F52, F52); | |
3491 __ aes_dround23(F4, F52, F54, F58); | |
3492 __ aes_dround01(F6, F52, F54, F56); | |
3493 __ aes_dround23(F50, F56, F58, F54); | |
3494 __ aes_dround01(F48, F56, F58, F52); | |
3495 __ aes_dround23(F46, F52, F54, F58); | |
3496 __ aes_dround01(F44, F52, F54, F56); | |
3497 __ aes_dround23(F42, F56, F58, F54); | |
3498 __ aes_dround01(F40, F56, F58, F52); | |
3499 | |
3500 for ( int i = 0; i <= 7; i++ ) { | |
3501 __ ldf(FloatRegisterImpl::S, original_key, i*4, as_FloatRegister(i)); | |
3502 } | |
3503 | |
3504 // perform inverse cipher transformations common for all key sizes | |
3505 __ BIND(L_common_transform); | |
3506 for ( int i = 38; i >= 6; i -= 8 ) { | |
3507 __ aes_dround23(as_FloatRegister(i), F52, F54, F58); | |
3508 __ aes_dround01(as_FloatRegister(i-2), F52, F54, F56); | |
3509 if ( i != 6) { | |
3510 __ aes_dround23(as_FloatRegister(i-4), F56, F58, F54); | |
3511 __ aes_dround01(as_FloatRegister(i-6), F56, F58, F52); | |
3512 } else { | |
3513 __ aes_dround23_l(as_FloatRegister(i-4), F56, F58, F54); | |
3514 __ aes_dround01_l(as_FloatRegister(i-6), F56, F58, F52); | |
3515 } | |
3516 } | |
3517 | |
3518 // store output to destination array, F0-F1 used as temp | |
3519 __ fmov(FloatRegisterImpl::D, F52, F0); | |
3520 __ stf(FloatRegisterImpl::S, F0, to, 0); | |
3521 __ stf(FloatRegisterImpl::S, F1, to, 4); | |
3522 __ fmov(FloatRegisterImpl::D, F54, F0); | |
3523 __ stf(FloatRegisterImpl::S, F0, to, 8); | |
3524 __ retl(); | |
3525 __ delayed()->stf(FloatRegisterImpl::S, F1, to, 12); | |
3526 | |
3527 return start; | |
3528 } | |
3529 | |
3530 address generate_cipherBlockChaining_encryptAESCrypt() { | |
3531 __ align(CodeEntryAlignment); | |
3532 StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt"); | |
3533 Label L_cbcenc128, L_cbcenc192, L_cbcenc256; | |
3534 address start = __ pc(); | |
3535 Register from = O0; // source byte array | |
3536 Register to = O1; // destination byte array | |
3537 Register key = O2; // expanded key array | |
3538 Register rvec = O3; // init vector | |
3539 const Register len_reg = O4; // cipher length | |
3540 const Register keylen = O5; // reg for storing expanded key array length | |
3541 | |
3542 // save cipher len to return in the end | |
3543 __ mov(len_reg, L1); | |
3544 | |
3545 // read expanded key length | |
3546 __ ldsw(Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)), keylen, 0); | |
3547 | |
3548 // load init vector | |
3549 __ ldf(FloatRegisterImpl::D, rvec, 0, F60); | |
3550 __ ldf(FloatRegisterImpl::D, rvec, 8, F62); | |
3551 __ ldx(key,0,G1); | |
3552 __ ldx(key,8,G2); | |
3553 | |
3554 // start loading expanded key | |
3555 for ( int i = 0, j = 16; i <= 38; i += 2, j += 8 ) { | |
3556 __ ldf(FloatRegisterImpl::D, key, j, as_FloatRegister(i)); | |
3557 } | |
3558 | |
3559 // 128-bit original key size | |
3560 __ cmp_and_brx_short(keylen, 44, Assembler::equal, Assembler::pt, L_cbcenc128); | |
3561 | |
3562 for ( int i = 40, j = 176; i <= 46; i += 2, j += 8 ) { | |
3563 __ ldf(FloatRegisterImpl::D, key, j, as_FloatRegister(i)); | |
3564 } | |
3565 | |
3566 // 192-bit original key size | |
3567 __ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pt, L_cbcenc192); | |
3568 | |
3569 for ( int i = 48, j = 208; i <= 54; i += 2, j += 8 ) { | |
3570 __ ldf(FloatRegisterImpl::D, key, j, as_FloatRegister(i)); | |
3571 } | |
3572 | |
3573 // 256-bit original key size | |
3574 __ br(Assembler::always, false, Assembler::pt, L_cbcenc256); | |
3575 __ delayed()->nop(); | |
3576 | |
3577 __ align(OptoLoopAlignment); | |
3578 __ BIND(L_cbcenc128); | |
3579 __ ldx(from,0,G3); | |
3580 __ ldx(from,8,G4); | |
3581 __ xor3(G1,G3,G3); | |
3582 __ xor3(G2,G4,G4); | |
3583 __ movxtod(G3,F56); | |
3584 __ movxtod(G4,F58); | |
3585 __ fxor(FloatRegisterImpl::D, F60, F56, F60); | |
3586 __ fxor(FloatRegisterImpl::D, F62, F58, F62); | |
3587 | |
3588 // TEN_EROUNDS | |
3589 for ( int i = 0; i <= 32; i += 8 ) { | |
3590 __ aes_eround01(as_FloatRegister(i), F60, F62, F56); | |
3591 __ aes_eround23(as_FloatRegister(i+2), F60, F62, F58); | |
3592 if (i != 32 ) { | |
3593 __ aes_eround01(as_FloatRegister(i+4), F56, F58, F60); | |
3594 __ aes_eround23(as_FloatRegister(i+6), F56, F58, F62); | |
3595 } else { | |
3596 __ aes_eround01_l(as_FloatRegister(i+4), F56, F58, F60); | |
3597 __ aes_eround23_l(as_FloatRegister(i+6), F56, F58, F62); | |
3598 } | |
3599 } | |
3600 | |
3601 __ stf(FloatRegisterImpl::D, F60, to, 0); | |
3602 __ stf(FloatRegisterImpl::D, F62, to, 8); | |
3603 __ add(from, 16, from); | |
3604 __ add(to, 16, to); | |
3605 __ subcc(len_reg, 16, len_reg); | |
3606 __ br(Assembler::notEqual, false, Assembler::pt, L_cbcenc128); | |
3607 __ delayed()->nop(); | |
3608 __ stf(FloatRegisterImpl::D, F60, rvec, 0); | |
3609 __ stf(FloatRegisterImpl::D, F62, rvec, 8); | |
3610 __ retl(); | |
3611 __ delayed()->mov(L1, O0); | |
3612 | |
3613 __ align(OptoLoopAlignment); | |
3614 __ BIND(L_cbcenc192); | |
3615 __ ldx(from,0,G3); | |
3616 __ ldx(from,8,G4); | |
3617 __ xor3(G1,G3,G3); | |
3618 __ xor3(G2,G4,G4); | |
3619 __ movxtod(G3,F56); | |
3620 __ movxtod(G4,F58); | |
3621 __ fxor(FloatRegisterImpl::D, F60, F56, F60); | |
3622 __ fxor(FloatRegisterImpl::D, F62, F58, F62); | |
3623 | |
3624 // TWELEVE_EROUNDS | |
3625 for ( int i = 0; i <= 40; i += 8 ) { | |
3626 __ aes_eround01(as_FloatRegister(i), F60, F62, F56); | |
3627 __ aes_eround23(as_FloatRegister(i+2), F60, F62, F58); | |
3628 if (i != 40 ) { | |
3629 __ aes_eround01(as_FloatRegister(i+4), F56, F58, F60); | |
3630 __ aes_eround23(as_FloatRegister(i+6), F56, F58, F62); | |
3631 } else { | |
3632 __ aes_eround01_l(as_FloatRegister(i+4), F56, F58, F60); | |
3633 __ aes_eround23_l(as_FloatRegister(i+6), F56, F58, F62); | |
3634 } | |
3635 } | |
3636 | |
3637 __ stf(FloatRegisterImpl::D, F60, to, 0); | |
3638 __ stf(FloatRegisterImpl::D, F62, to, 8); | |
3639 __ add(from, 16, from); | |
3640 __ subcc(len_reg, 16, len_reg); | |
3641 __ add(to, 16, to); | |
3642 __ br(Assembler::notEqual, false, Assembler::pt, L_cbcenc192); | |
3643 __ delayed()->nop(); | |
3644 __ stf(FloatRegisterImpl::D, F60, rvec, 0); | |
3645 __ stf(FloatRegisterImpl::D, F62, rvec, 8); | |
3646 __ retl(); | |
3647 __ delayed()->mov(L1, O0); | |
3648 | |
3649 __ align(OptoLoopAlignment); | |
3650 __ BIND(L_cbcenc256); | |
3651 __ ldx(from,0,G3); | |
3652 __ ldx(from,8,G4); | |
3653 __ xor3(G1,G3,G3); | |
3654 __ xor3(G2,G4,G4); | |
3655 __ movxtod(G3,F56); | |
3656 __ movxtod(G4,F58); | |
3657 __ fxor(FloatRegisterImpl::D, F60, F56, F60); | |
3658 __ fxor(FloatRegisterImpl::D, F62, F58, F62); | |
3659 | |
3660 // FOURTEEN_EROUNDS | |
3661 for ( int i = 0; i <= 48; i += 8 ) { | |
3662 __ aes_eround01(as_FloatRegister(i), F60, F62, F56); | |
3663 __ aes_eround23(as_FloatRegister(i+2), F60, F62, F58); | |
3664 if (i != 48 ) { | |
3665 __ aes_eround01(as_FloatRegister(i+4), F56, F58, F60); | |
3666 __ aes_eround23(as_FloatRegister(i+6), F56, F58, F62); | |
3667 } else { | |
3668 __ aes_eround01_l(as_FloatRegister(i+4), F56, F58, F60); | |
3669 __ aes_eround23_l(as_FloatRegister(i+6), F56, F58, F62); | |
3670 } | |
3671 } | |
3672 | |
3673 __ stf(FloatRegisterImpl::D, F60, to, 0); | |
3674 __ stf(FloatRegisterImpl::D, F62, to, 8); | |
3675 __ add(from, 16, from); | |
3676 __ subcc(len_reg, 16, len_reg); | |
3677 __ add(to, 16, to); | |
3678 __ br(Assembler::notEqual, false, Assembler::pt, L_cbcenc256); | |
3679 __ delayed()->nop(); | |
3680 __ stf(FloatRegisterImpl::D, F60, rvec, 0); | |
3681 __ stf(FloatRegisterImpl::D, F62, rvec, 8); | |
3682 __ retl(); | |
3683 __ delayed()->mov(L1, O0); | |
3684 | |
3685 return start; | |
3686 } | |
3687 | |
3688 address generate_cipherBlockChaining_decryptAESCrypt_Parallel() { | |
3689 __ align(CodeEntryAlignment); | |
3690 StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt"); | |
3691 Label L_cbcdec_end, L_expand192bit, L_expand256bit, L_dec_first_block_start; | |
3692 Label L_dec_first_block128, L_dec_first_block192, L_dec_next2_blocks128, L_dec_next2_blocks192, L_dec_next2_blocks256; | |
3693 address start = __ pc(); | |
3694 Register from = I0; // source byte array | |
3695 Register to = I1; // destination byte array | |
3696 Register key = I2; // expanded key array | |
3697 Register rvec = I3; // init vector | |
3698 const Register len_reg = I4; // cipher length | |
3699 const Register original_key = I5; // original key array only required during decryption | |
3700 const Register keylen = L6; // reg for storing expanded key array length | |
3701 | |
3702 // save cipher len before save_frame, to return in the end | |
3703 __ mov(O4, L0); | |
3704 __ save_frame(0); //args are read from I* registers since we save the frame in the beginning | |
3705 | |
3706 // load original key from SunJCE expanded decryption key | |
3707 for ( int i = 0; i <= 3; i++ ) { | |
3708 __ ldf(FloatRegisterImpl::S, original_key, i*4, as_FloatRegister(i)); | |
3709 } | |
3710 | |
3711 // load initial vector | |
3712 __ ldx(rvec,0,L0); | |
3713 __ ldx(rvec,8,L1); | |
3714 | |
3715 // read expanded key array length | |
3716 __ ldsw(Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)), keylen, 0); | |
3717 | |
3718 // 256-bit original key size | |
3719 __ cmp_and_brx_short(keylen, 60, Assembler::equal, Assembler::pn, L_expand256bit); | |
3720 | |
3721 // 192-bit original key size | |
3722 __ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pn, L_expand192bit); | |
3723 | |
3724 // 128-bit original key size | |
3725 // perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions | |
3726 for ( int i = 0; i <= 36; i += 4 ) { | |
3727 __ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+2), i/4, as_FloatRegister(i+4)); | |
3728 __ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+4), as_FloatRegister(i+6)); | |
3729 } | |
3730 | |
3731 // load expanded key[last-1] and key[last] elements | |
3732 __ movdtox(F40,L2); | |
3733 __ movdtox(F42,L3); | |
3734 | |
3735 __ and3(len_reg, 16, L4); | |
3736 __ br_null(L4, false, Assembler::pt, L_dec_next2_blocks128); | |
3737 __ delayed()->nop(); | |
3738 | |
3739 __ br(Assembler::always, false, Assembler::pt, L_dec_first_block_start); | |
3740 __ delayed()->nop(); | |
3741 | |
3742 __ BIND(L_expand192bit); | |
3743 // load rest of the 192-bit key | |
3744 __ ldf(FloatRegisterImpl::S, original_key, 16, F4); | |
3745 __ ldf(FloatRegisterImpl::S, original_key, 20, F5); | |
3746 | |
3747 // perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions | |
3748 for ( int i = 0; i <= 36; i += 6 ) { | |
3749 __ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+4), i/6, as_FloatRegister(i+6)); | |
3750 __ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+6), as_FloatRegister(i+8)); | |
3751 __ aes_kexpand2(as_FloatRegister(i+4), as_FloatRegister(i+8), as_FloatRegister(i+10)); | |
3752 } | |
3753 __ aes_kexpand1(F42, F46, 7, F48); | |
3754 __ aes_kexpand2(F44, F48, F50); | |
3755 | |
3756 // load expanded key[last-1] and key[last] elements | |
3757 __ movdtox(F48,L2); | |
3758 __ movdtox(F50,L3); | |
3759 | |
3760 __ and3(len_reg, 16, L4); | |
3761 __ br_null(L4, false, Assembler::pt, L_dec_next2_blocks192); | |
3762 __ delayed()->nop(); | |
3763 | |
3764 __ br(Assembler::always, false, Assembler::pt, L_dec_first_block_start); | |
3765 __ delayed()->nop(); | |
3766 | |
3767 __ BIND(L_expand256bit); | |
3768 // load rest of the 256-bit key | |
3769 for ( int i = 4; i <= 7; i++ ) { | |
3770 __ ldf(FloatRegisterImpl::S, original_key, i*4, as_FloatRegister(i)); | |
3771 } | |
3772 | |
3773 // perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions | |
3774 for ( int i = 0; i <= 40; i += 8 ) { | |
3775 __ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+6), i/8, as_FloatRegister(i+8)); | |
3776 __ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+8), as_FloatRegister(i+10)); | |
3777 __ aes_kexpand0(as_FloatRegister(i+4), as_FloatRegister(i+10), as_FloatRegister(i+12)); | |
3778 __ aes_kexpand2(as_FloatRegister(i+6), as_FloatRegister(i+12), as_FloatRegister(i+14)); | |
3779 } | |
3780 __ aes_kexpand1(F48, F54, 6, F56); | |
3781 __ aes_kexpand2(F50, F56, F58); | |
3782 | |
3783 // load expanded key[last-1] and key[last] elements | |
3784 __ movdtox(F56,L2); | |
3785 __ movdtox(F58,L3); | |
3786 | |
3787 __ and3(len_reg, 16, L4); | |
3788 __ br_null(L4, false, Assembler::pt, L_dec_next2_blocks256); | |
3789 __ delayed()->nop(); | |
3790 | |
3791 __ BIND(L_dec_first_block_start); | |
3792 __ ldx(from,0,L4); | |
3793 __ ldx(from,8,L5); | |
3794 __ xor3(L2,L4,G1); | |
3795 __ movxtod(G1,F60); | |
3796 __ xor3(L3,L5,G1); | |
3797 __ movxtod(G1,F62); | |
3798 | |
3799 // 128-bit original key size | |
3800 __ cmp_and_brx_short(keylen, 44, Assembler::equal, Assembler::pn, L_dec_first_block128); | |
3801 | |
3802 // 192-bit original key size | |
3803 __ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pn, L_dec_first_block192); | |
3804 | |
3805 __ aes_dround23(F54, F60, F62, F58); | |
3806 __ aes_dround01(F52, F60, F62, F56); | |
3807 __ aes_dround23(F50, F56, F58, F62); | |
3808 __ aes_dround01(F48, F56, F58, F60); | |
3809 | |
3810 __ BIND(L_dec_first_block192); | |
3811 __ aes_dround23(F46, F60, F62, F58); | |
3812 __ aes_dround01(F44, F60, F62, F56); | |
3813 __ aes_dround23(F42, F56, F58, F62); | |
3814 __ aes_dround01(F40, F56, F58, F60); | |
3815 | |
3816 __ BIND(L_dec_first_block128); | |
3817 for ( int i = 38; i >= 6; i -= 8 ) { | |
3818 __ aes_dround23(as_FloatRegister(i), F60, F62, F58); | |
3819 __ aes_dround01(as_FloatRegister(i-2), F60, F62, F56); | |
3820 if ( i != 6) { | |
3821 __ aes_dround23(as_FloatRegister(i-4), F56, F58, F62); | |
3822 __ aes_dround01(as_FloatRegister(i-6), F56, F58, F60); | |
3823 } else { | |
3824 __ aes_dround23_l(as_FloatRegister(i-4), F56, F58, F62); | |
3825 __ aes_dround01_l(as_FloatRegister(i-6), F56, F58, F60); | |
3826 } | |
3827 } | |
3828 | |
3829 __ movxtod(L0,F56); | |
3830 __ movxtod(L1,F58); | |
3831 __ mov(L4,L0); | |
3832 __ mov(L5,L1); | |
3833 __ fxor(FloatRegisterImpl::D, F56, F60, F60); | |
3834 __ fxor(FloatRegisterImpl::D, F58, F62, F62); | |
3835 | |
3836 __ stf(FloatRegisterImpl::D, F60, to, 0); | |
3837 __ stf(FloatRegisterImpl::D, F62, to, 8); | |
3838 | |
3839 __ add(from, 16, from); | |
3840 __ add(to, 16, to); | |
3841 __ subcc(len_reg, 16, len_reg); | |
3842 __ br(Assembler::equal, false, Assembler::pt, L_cbcdec_end); | |
3843 __ delayed()->nop(); | |
3844 | |
3845 // 256-bit original key size | |
3846 __ cmp_and_brx_short(keylen, 60, Assembler::equal, Assembler::pn, L_dec_next2_blocks256); | |
3847 | |
3848 // 192-bit original key size | |
3849 __ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pn, L_dec_next2_blocks192); | |
3850 | |
3851 __ align(OptoLoopAlignment); | |
3852 __ BIND(L_dec_next2_blocks128); | |
3853 __ nop(); | |
3854 | |
3855 // F40:F42 used for first 16-bytes | |
3856 __ ldx(from,0,G4); | |
3857 __ ldx(from,8,G5); | |
3858 __ xor3(L2,G4,G1); | |
3859 __ movxtod(G1,F40); | |
3860 __ xor3(L3,G5,G1); | |
3861 __ movxtod(G1,F42); | |
3862 | |
3863 // F60:F62 used for next 16-bytes | |
3864 __ ldx(from,16,L4); | |
3865 __ ldx(from,24,L5); | |
3866 __ xor3(L2,L4,G1); | |
3867 __ movxtod(G1,F60); | |
3868 __ xor3(L3,L5,G1); | |
3869 __ movxtod(G1,F62); | |
3870 | |
3871 for ( int i = 38; i >= 6; i -= 8 ) { | |
3872 __ aes_dround23(as_FloatRegister(i), F40, F42, F44); | |
3873 __ aes_dround01(as_FloatRegister(i-2), F40, F42, F46); | |
3874 __ aes_dround23(as_FloatRegister(i), F60, F62, F58); | |
3875 __ aes_dround01(as_FloatRegister(i-2), F60, F62, F56); | |
3876 if (i != 6 ) { | |
3877 __ aes_dround23(as_FloatRegister(i-4), F46, F44, F42); | |
3878 __ aes_dround01(as_FloatRegister(i-6), F46, F44, F40); | |
3879 __ aes_dround23(as_FloatRegister(i-4), F56, F58, F62); | |
3880 __ aes_dround01(as_FloatRegister(i-6), F56, F58, F60); | |
3881 } else { | |
3882 __ aes_dround23_l(as_FloatRegister(i-4), F46, F44, F42); | |
3883 __ aes_dround01_l(as_FloatRegister(i-6), F46, F44, F40); | |
3884 __ aes_dround23_l(as_FloatRegister(i-4), F56, F58, F62); | |
3885 __ aes_dround01_l(as_FloatRegister(i-6), F56, F58, F60); | |
3886 } | |
3887 } | |
3888 | |
3889 __ movxtod(L0,F46); | |
3890 __ movxtod(L1,F44); | |
3891 __ fxor(FloatRegisterImpl::D, F46, F40, F40); | |
3892 __ fxor(FloatRegisterImpl::D, F44, F42, F42); | |
3893 | |
3894 __ stf(FloatRegisterImpl::D, F40, to, 0); | |
3895 __ stf(FloatRegisterImpl::D, F42, to, 8); | |
3896 | |
3897 __ movxtod(G4,F56); | |
3898 __ movxtod(G5,F58); | |
3899 __ mov(L4,L0); | |
3900 __ mov(L5,L1); | |
3901 __ fxor(FloatRegisterImpl::D, F56, F60, F60); | |
3902 __ fxor(FloatRegisterImpl::D, F58, F62, F62); | |
3903 | |
3904 __ stf(FloatRegisterImpl::D, F60, to, 16); | |
3905 __ stf(FloatRegisterImpl::D, F62, to, 24); | |
3906 | |
3907 __ add(from, 32, from); | |
3908 __ add(to, 32, to); | |
3909 __ subcc(len_reg, 32, len_reg); | |
3910 __ br(Assembler::notEqual, false, Assembler::pt, L_dec_next2_blocks128); | |
3911 __ delayed()->nop(); | |
3912 __ br(Assembler::always, false, Assembler::pt, L_cbcdec_end); | |
3913 __ delayed()->nop(); | |
3914 | |
3915 __ align(OptoLoopAlignment); | |
3916 __ BIND(L_dec_next2_blocks192); | |
3917 __ nop(); | |
3918 | |
3919 // F48:F50 used for first 16-bytes | |
3920 __ ldx(from,0,G4); | |
3921 __ ldx(from,8,G5); | |
3922 __ xor3(L2,G4,G1); | |
3923 __ movxtod(G1,F48); | |
3924 __ xor3(L3,G5,G1); | |
3925 __ movxtod(G1,F50); | |
3926 | |
3927 // F60:F62 used for next 16-bytes | |
3928 __ ldx(from,16,L4); | |
3929 __ ldx(from,24,L5); | |
3930 __ xor3(L2,L4,G1); | |
3931 __ movxtod(G1,F60); | |
3932 __ xor3(L3,L5,G1); | |
3933 __ movxtod(G1,F62); | |
3934 | |
3935 for ( int i = 46; i >= 6; i -= 8 ) { | |
3936 __ aes_dround23(as_FloatRegister(i), F48, F50, F52); | |
3937 __ aes_dround01(as_FloatRegister(i-2), F48, F50, F54); | |
3938 __ aes_dround23(as_FloatRegister(i), F60, F62, F58); | |
3939 __ aes_dround01(as_FloatRegister(i-2), F60, F62, F56); | |
3940 if (i != 6 ) { | |
3941 __ aes_dround23(as_FloatRegister(i-4), F54, F52, F50); | |
3942 __ aes_dround01(as_FloatRegister(i-6), F54, F52, F48); | |
3943 __ aes_dround23(as_FloatRegister(i-4), F56, F58, F62); | |
3944 __ aes_dround01(as_FloatRegister(i-6), F56, F58, F60); | |
3945 } else { | |
3946 __ aes_dround23_l(as_FloatRegister(i-4), F54, F52, F50); | |
3947 __ aes_dround01_l(as_FloatRegister(i-6), F54, F52, F48); | |
3948 __ aes_dround23_l(as_FloatRegister(i-4), F56, F58, F62); | |
3949 __ aes_dround01_l(as_FloatRegister(i-6), F56, F58, F60); | |
3950 } | |
3951 } | |
3952 | |
3953 __ movxtod(L0,F54); | |
3954 __ movxtod(L1,F52); | |
3955 __ fxor(FloatRegisterImpl::D, F54, F48, F48); | |
3956 __ fxor(FloatRegisterImpl::D, F52, F50, F50); | |
3957 | |
3958 __ stf(FloatRegisterImpl::D, F48, to, 0); | |
3959 __ stf(FloatRegisterImpl::D, F50, to, 8); | |
3960 | |
3961 __ movxtod(G4,F56); | |
3962 __ movxtod(G5,F58); | |
3963 __ mov(L4,L0); | |
3964 __ mov(L5,L1); | |
3965 __ fxor(FloatRegisterImpl::D, F56, F60, F60); | |
3966 __ fxor(FloatRegisterImpl::D, F58, F62, F62); | |
3967 | |
3968 __ stf(FloatRegisterImpl::D, F60, to, 16); | |
3969 __ stf(FloatRegisterImpl::D, F62, to, 24); | |
3970 | |
3971 __ add(from, 32, from); | |
3972 __ add(to, 32, to); | |
3973 __ subcc(len_reg, 32, len_reg); | |
3974 __ br(Assembler::notEqual, false, Assembler::pt, L_dec_next2_blocks192); | |
3975 __ delayed()->nop(); | |
3976 __ br(Assembler::always, false, Assembler::pt, L_cbcdec_end); | |
3977 __ delayed()->nop(); | |
3978 | |
3979 __ align(OptoLoopAlignment); | |
3980 __ BIND(L_dec_next2_blocks256); | |
3981 __ nop(); | |
3982 | |
3983 // F0:F2 used for first 16-bytes | |
3984 __ ldx(from,0,G4); | |
3985 __ ldx(from,8,G5); | |
3986 __ xor3(L2,G4,G1); | |
3987 __ movxtod(G1,F0); | |
3988 __ xor3(L3,G5,G1); | |
3989 __ movxtod(G1,F2); | |
3990 | |
3991 // F60:F62 used for next 16-bytes | |
3992 __ ldx(from,16,L4); | |
3993 __ ldx(from,24,L5); | |
3994 __ xor3(L2,L4,G1); | |
3995 __ movxtod(G1,F60); | |
3996 __ xor3(L3,L5,G1); | |
3997 __ movxtod(G1,F62); | |
3998 | |
3999 __ aes_dround23(F54, F0, F2, F4); | |
4000 __ aes_dround01(F52, F0, F2, F6); | |
4001 __ aes_dround23(F54, F60, F62, F58); | |
4002 __ aes_dround01(F52, F60, F62, F56); | |
4003 __ aes_dround23(F50, F6, F4, F2); | |
4004 __ aes_dround01(F48, F6, F4, F0); | |
4005 __ aes_dround23(F50, F56, F58, F62); | |
4006 __ aes_dround01(F48, F56, F58, F60); | |
4007 // save F48:F54 in temp registers | |
4008 __ movdtox(F54,G2); | |
4009 __ movdtox(F52,G3); | |
4010 __ movdtox(F50,G6); | |
4011 __ movdtox(F48,G1); | |
4012 for ( int i = 46; i >= 14; i -= 8 ) { | |
4013 __ aes_dround23(as_FloatRegister(i), F0, F2, F4); | |
4014 __ aes_dround01(as_FloatRegister(i-2), F0, F2, F6); | |
4015 __ aes_dround23(as_FloatRegister(i), F60, F62, F58); | |
4016 __ aes_dround01(as_FloatRegister(i-2), F60, F62, F56); | |
4017 __ aes_dround23(as_FloatRegister(i-4), F6, F4, F2); | |
4018 __ aes_dround01(as_FloatRegister(i-6), F6, F4, F0); | |
4019 __ aes_dround23(as_FloatRegister(i-4), F56, F58, F62); | |
4020 __ aes_dround01(as_FloatRegister(i-6), F56, F58, F60); | |
4021 } | |
4022 // init F48:F54 with F0:F6 values (original key) | |
4023 __ ldf(FloatRegisterImpl::D, original_key, 0, F48); | |
4024 __ ldf(FloatRegisterImpl::D, original_key, 8, F50); | |
4025 __ ldf(FloatRegisterImpl::D, original_key, 16, F52); | |
4026 __ ldf(FloatRegisterImpl::D, original_key, 24, F54); | |
4027 __ aes_dround23(F54, F0, F2, F4); | |
4028 __ aes_dround01(F52, F0, F2, F6); | |
4029 __ aes_dround23(F54, F60, F62, F58); | |
4030 __ aes_dround01(F52, F60, F62, F56); | |
4031 __ aes_dround23_l(F50, F6, F4, F2); | |
4032 __ aes_dround01_l(F48, F6, F4, F0); | |
4033 __ aes_dround23_l(F50, F56, F58, F62); | |
4034 __ aes_dround01_l(F48, F56, F58, F60); | |
4035 // re-init F48:F54 with their original values | |
4036 __ movxtod(G2,F54); | |
4037 __ movxtod(G3,F52); | |
4038 __ movxtod(G6,F50); | |
4039 __ movxtod(G1,F48); | |
4040 | |
4041 __ movxtod(L0,F6); | |
4042 __ movxtod(L1,F4); | |
4043 __ fxor(FloatRegisterImpl::D, F6, F0, F0); | |
4044 __ fxor(FloatRegisterImpl::D, F4, F2, F2); | |
4045 | |
4046 __ stf(FloatRegisterImpl::D, F0, to, 0); | |
4047 __ stf(FloatRegisterImpl::D, F2, to, 8); | |
4048 | |
4049 __ movxtod(G4,F56); | |
4050 __ movxtod(G5,F58); | |
4051 __ mov(L4,L0); | |
4052 __ mov(L5,L1); | |
4053 __ fxor(FloatRegisterImpl::D, F56, F60, F60); | |
4054 __ fxor(FloatRegisterImpl::D, F58, F62, F62); | |
4055 | |
4056 __ stf(FloatRegisterImpl::D, F60, to, 16); | |
4057 __ stf(FloatRegisterImpl::D, F62, to, 24); | |
4058 | |
4059 __ add(from, 32, from); | |
4060 __ add(to, 32, to); | |
4061 __ subcc(len_reg, 32, len_reg); | |
4062 __ br(Assembler::notEqual, false, Assembler::pt, L_dec_next2_blocks256); | |
4063 __ delayed()->nop(); | |
4064 | |
4065 __ BIND(L_cbcdec_end); | |
4066 __ stx(L0, rvec, 0); | |
4067 __ stx(L1, rvec, 8); | |
4068 __ restore(); | |
4069 __ mov(L0, O0); | |
4070 __ retl(); | |
4071 __ delayed()->nop(); | |
4072 | |
4073 return start; | |
4074 } | |
4075 | |
3307 void generate_initial() { | 4076 void generate_initial() { |
3308 // Generates all stubs and initializes the entry points | 4077 // Generates all stubs and initializes the entry points |
3309 | 4078 |
3310 //------------------------------------------------------------------------------------------------------------------------ | 4079 //------------------------------------------------------------------------------------------------------------------------ |
3311 // entry points that exist in all platforms | 4080 // entry points that exist in all platforms |
3367 &StubRoutines::_safefetch32_fault_pc, | 4136 &StubRoutines::_safefetch32_fault_pc, |
3368 &StubRoutines::_safefetch32_continuation_pc); | 4137 &StubRoutines::_safefetch32_continuation_pc); |
3369 generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry, | 4138 generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry, |
3370 &StubRoutines::_safefetchN_fault_pc, | 4139 &StubRoutines::_safefetchN_fault_pc, |
3371 &StubRoutines::_safefetchN_continuation_pc); | 4140 &StubRoutines::_safefetchN_continuation_pc); |
4141 | |
4142 // generate AES intrinsics code | |
4143 if (UseAESIntrinsics) { | |
4144 StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock(); | |
4145 StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock(); | |
4146 StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt(); | |
4147 StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel(); | |
4148 } | |
3372 } | 4149 } |
3373 | 4150 |
3374 | 4151 |
3375 public: | 4152 public: |
3376 StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) { | 4153 StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) { |