comparison src/cpu/sparc/vm/stubGenerator_sparc.cpp @ 18041:52b4284cb496

Merge with jdk8u20-b26
author Gilles Duboscq <duboscq@ssw.jku.at>
date Wed, 15 Oct 2014 16:02:50 +0200
parents 89152779163c 0342d80559e0
children 7848fc12602b
comparison
equal deleted inserted replaced
17606:45d7b2c7029d 18041:52b4284cb496
1 /* 1 /*
2 * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. 2 * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 * 4 *
5 * This code is free software; you can redistribute it and/or modify it 5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as 6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
81 81
82 class StubGenerator: public StubCodeGenerator { 82 class StubGenerator: public StubCodeGenerator {
83 private: 83 private:
84 84
85 #ifdef PRODUCT 85 #ifdef PRODUCT
86 #define inc_counter_np(a,b,c) (0) 86 #define inc_counter_np(a,b,c)
87 #else 87 #else
88 #define inc_counter_np(counter, t1, t2) \ 88 #define inc_counter_np(counter, t1, t2) \
89 BLOCK_COMMENT("inc_counter " #counter); \ 89 BLOCK_COMMENT("inc_counter " #counter); \
90 __ inc_counter(&counter, t1, t2); 90 __ inc_counter(&counter, t1, t2);
91 #endif 91 #endif
1053 // 1053 //
1054 typedef void (StubGenerator::*CopyLoopFunc)(Register from, Register to, Register count, int count_dec, 1054 typedef void (StubGenerator::*CopyLoopFunc)(Register from, Register to, Register count, int count_dec,
1055 Label& L_loop, bool use_prefetch, bool use_bis); 1055 Label& L_loop, bool use_prefetch, bool use_bis);
1056 1056
1057 void disjoint_copy_core(Register from, Register to, Register count, int log2_elem_size, 1057 void disjoint_copy_core(Register from, Register to, Register count, int log2_elem_size,
1058 int iter_size, CopyLoopFunc copy_loop_func) { 1058 int iter_size, StubGenerator::CopyLoopFunc copy_loop_func) {
1059 Label L_copy; 1059 Label L_copy;
1060 1060
1061 assert(log2_elem_size <= 3, "the following code should be changed"); 1061 assert(log2_elem_size <= 3, "the following code should be changed");
1062 int count_dec = 16>>log2_elem_size; 1062 int count_dec = 16>>log2_elem_size;
1063 1063
1204 __ andn(from, 7, from); // Align address 1204 __ andn(from, 7, from); // Align address
1205 __ ldx(from, 0, O3); 1205 __ ldx(from, 0, O3);
1206 __ inc(from, 8); 1206 __ inc(from, 8);
1207 __ sllx(O3, left_shift, O3); 1207 __ sllx(O3, left_shift, O3);
1208 1208
1209 disjoint_copy_core(from, to, count, log2_elem_size, 16, copy_16_bytes_shift_loop); 1209 disjoint_copy_core(from, to, count, log2_elem_size, 16, &StubGenerator::copy_16_bytes_shift_loop);
1210 1210
1211 __ inccc(count, count_dec>>1 ); // + 8 bytes 1211 __ inccc(count, count_dec>>1 ); // + 8 bytes
1212 __ brx(Assembler::negative, true, Assembler::pn, L_copy_last_bytes); 1212 __ brx(Assembler::negative, true, Assembler::pn, L_copy_last_bytes);
1213 __ delayed()->inc(count, count_dec>>1); // restore 'count' 1213 __ delayed()->inc(count, count_dec>>1); // restore 'count'
1214 1214
2083 2083
2084 // copy with shift 4 elements (16 bytes) at a time 2084 // copy with shift 4 elements (16 bytes) at a time
2085 __ dec(count, 4); // The cmp at the beginning guaranty count >= 4 2085 __ dec(count, 4); // The cmp at the beginning guaranty count >= 4
2086 __ sllx(O3, 32, O3); 2086 __ sllx(O3, 32, O3);
2087 2087
2088 disjoint_copy_core(from, to, count, 2, 16, copy_16_bytes_loop); 2088 disjoint_copy_core(from, to, count, 2, 16, &StubGenerator::copy_16_bytes_loop);
2089 2089
2090 __ br(Assembler::always, false, Assembler::pt, L_copy_4_bytes); 2090 __ br(Assembler::always, false, Assembler::pt, L_copy_4_bytes);
2091 __ delayed()->inc(count, 4); // restore 'count' 2091 __ delayed()->inc(count, 4); // restore 'count'
2092 2092
2093 __ BIND(L_aligned_copy); 2093 __ BIND(L_aligned_copy);
2364 // Now we can use O4(offset0), O5(offset8) as temps 2364 // Now we can use O4(offset0), O5(offset8) as temps
2365 __ mov(O3, count); 2365 __ mov(O3, count);
2366 // count >= 0 (original count - 8) 2366 // count >= 0 (original count - 8)
2367 __ mov(from, from64); 2367 __ mov(from, from64);
2368 2368
2369 disjoint_copy_core(from64, to64, count, 3, 64, copy_64_bytes_loop); 2369 disjoint_copy_core(from64, to64, count, 3, 64, &StubGenerator::copy_64_bytes_loop);
2370 2370
2371 // Restore O4(offset0), O5(offset8) 2371 // Restore O4(offset0), O5(offset8)
2372 __ sub(from64, from, offset0); 2372 __ sub(from64, from, offset0);
2373 __ inccc(count, 6); // restore count 2373 __ inccc(count, 6); // restore count
2374 __ brx(Assembler::negative, false, Assembler::pn, L_copy_8_bytes ); 2374 __ brx(Assembler::negative, false, Assembler::pn, L_copy_8_bytes );
3302 if (UseBlockZeroing) { 3302 if (UseBlockZeroing) {
3303 StubRoutines::_zero_aligned_words = generate_zero_aligned_words("zero_aligned_words"); 3303 StubRoutines::_zero_aligned_words = generate_zero_aligned_words("zero_aligned_words");
3304 } 3304 }
3305 } 3305 }
3306 3306
3307 address generate_aescrypt_encryptBlock() {
3308 // required since we read expanded key 'int' array starting first element without alignment considerations
3309 assert((arrayOopDesc::base_offset_in_bytes(T_INT) & 7) == 0,
3310 "the following code assumes that first element of an int array is aligned to 8 bytes");
3311 __ align(CodeEntryAlignment);
3312 StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock");
3313 Label L_load_misaligned_input, L_load_expanded_key, L_doLast128bit, L_storeOutput, L_store_misaligned_output;
3314 address start = __ pc();
3315 Register from = O0; // source byte array
3316 Register to = O1; // destination byte array
3317 Register key = O2; // expanded key array
3318 const Register keylen = O4; //reg for storing expanded key array length
3319
3320 // read expanded key length
3321 __ ldsw(Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)), keylen, 0);
3322
3323 // Method to address arbitrary alignment for load instructions:
3324 // Check last 3 bits of 'from' address to see if it is aligned to 8-byte boundary
3325 // If zero/aligned then continue with double FP load instructions
3326 // If not zero/mis-aligned then alignaddr will set GSR.align with number of bytes to skip during faligndata
3327 // alignaddr will also convert arbitrary aligned 'from' address to nearest 8-byte aligned address
3328 // load 3 * 8-byte components (to read 16 bytes input) in 3 different FP regs starting at this aligned address
3329 // faligndata will then extract (based on GSR.align value) the appropriate 8 bytes from the 2 source regs
3330
3331 // check for 8-byte alignment since source byte array may have an arbitrary alignment if offset mod 8 is non-zero
3332 __ andcc(from, 7, G0);
3333 __ br(Assembler::notZero, true, Assembler::pn, L_load_misaligned_input);
3334 __ delayed()->alignaddr(from, G0, from);
3335
3336 // aligned case: load input into F54-F56
3337 __ ldf(FloatRegisterImpl::D, from, 0, F54);
3338 __ ldf(FloatRegisterImpl::D, from, 8, F56);
3339 __ ba_short(L_load_expanded_key);
3340
3341 __ BIND(L_load_misaligned_input);
3342 __ ldf(FloatRegisterImpl::D, from, 0, F54);
3343 __ ldf(FloatRegisterImpl::D, from, 8, F56);
3344 __ ldf(FloatRegisterImpl::D, from, 16, F58);
3345 __ faligndata(F54, F56, F54);
3346 __ faligndata(F56, F58, F56);
3347
3348 __ BIND(L_load_expanded_key);
3349 // Since we load expanded key buffers starting first element, 8-byte alignment is guaranteed
3350 for ( int i = 0; i <= 38; i += 2 ) {
3351 __ ldf(FloatRegisterImpl::D, key, i*4, as_FloatRegister(i));
3352 }
3353
3354 // perform cipher transformation
3355 __ fxor(FloatRegisterImpl::D, F0, F54, F54);
3356 __ fxor(FloatRegisterImpl::D, F2, F56, F56);
3357 // rounds 1 through 8
3358 for ( int i = 4; i <= 28; i += 8 ) {
3359 __ aes_eround01(as_FloatRegister(i), F54, F56, F58);
3360 __ aes_eround23(as_FloatRegister(i+2), F54, F56, F60);
3361 __ aes_eround01(as_FloatRegister(i+4), F58, F60, F54);
3362 __ aes_eround23(as_FloatRegister(i+6), F58, F60, F56);
3363 }
3364 __ aes_eround01(F36, F54, F56, F58); //round 9
3365 __ aes_eround23(F38, F54, F56, F60);
3366
3367 // 128-bit original key size
3368 __ cmp_and_brx_short(keylen, 44, Assembler::equal, Assembler::pt, L_doLast128bit);
3369
3370 for ( int i = 40; i <= 50; i += 2 ) {
3371 __ ldf(FloatRegisterImpl::D, key, i*4, as_FloatRegister(i) );
3372 }
3373 __ aes_eround01(F40, F58, F60, F54); //round 10
3374 __ aes_eround23(F42, F58, F60, F56);
3375 __ aes_eround01(F44, F54, F56, F58); //round 11
3376 __ aes_eround23(F46, F54, F56, F60);
3377
3378 // 192-bit original key size
3379 __ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pt, L_storeOutput);
3380
3381 __ ldf(FloatRegisterImpl::D, key, 208, F52);
3382 __ aes_eround01(F48, F58, F60, F54); //round 12
3383 __ aes_eround23(F50, F58, F60, F56);
3384 __ ldf(FloatRegisterImpl::D, key, 216, F46);
3385 __ ldf(FloatRegisterImpl::D, key, 224, F48);
3386 __ ldf(FloatRegisterImpl::D, key, 232, F50);
3387 __ aes_eround01(F52, F54, F56, F58); //round 13
3388 __ aes_eround23(F46, F54, F56, F60);
3389 __ ba_short(L_storeOutput);
3390
3391 __ BIND(L_doLast128bit);
3392 __ ldf(FloatRegisterImpl::D, key, 160, F48);
3393 __ ldf(FloatRegisterImpl::D, key, 168, F50);
3394
3395 __ BIND(L_storeOutput);
3396 // perform last round of encryption common for all key sizes
3397 __ aes_eround01_l(F48, F58, F60, F54); //last round
3398 __ aes_eround23_l(F50, F58, F60, F56);
3399
3400 // Method to address arbitrary alignment for store instructions:
3401 // Check last 3 bits of 'dest' address to see if it is aligned to 8-byte boundary
3402 // If zero/aligned then continue with double FP store instructions
3403 // If not zero/mis-aligned then edge8n will generate edge mask in result reg (O3 in below case)
3404 // Example: If dest address is 0x07 and nearest 8-byte aligned address is 0x00 then edge mask will be 00000001
3405 // Compute (8-n) where n is # of bytes skipped by partial store(stpartialf) inst from edge mask, n=7 in this case
3406 // We get the value of n from the andcc that checks 'dest' alignment. n is available in O5 in below case.
3407 // Set GSR.align to (8-n) using alignaddr
3408 // Circular byte shift store values by n places so that the original bytes are at correct position for stpartialf
3409 // Set the arbitrarily aligned 'dest' address to nearest 8-byte aligned address
3410 // Store (partial) the original first (8-n) bytes starting at the original 'dest' address
3411 // Negate the edge mask so that the subsequent stpartialf can store the original (8-n-1)th through 8th bytes at appropriate address
3412 // We need to execute this process for both the 8-byte result values
3413
3414 // check for 8-byte alignment since dest byte array may have arbitrary alignment if offset mod 8 is non-zero
3415 __ andcc(to, 7, O5);
3416 __ br(Assembler::notZero, true, Assembler::pn, L_store_misaligned_output);
3417 __ delayed()->edge8n(to, G0, O3);
3418
3419 // aligned case: store output into the destination array
3420 __ stf(FloatRegisterImpl::D, F54, to, 0);
3421 __ retl();
3422 __ delayed()->stf(FloatRegisterImpl::D, F56, to, 8);
3423
3424 __ BIND(L_store_misaligned_output);
3425 __ add(to, 8, O4);
3426 __ mov(8, O2);
3427 __ sub(O2, O5, O2);
3428 __ alignaddr(O2, G0, O2);
3429 __ faligndata(F54, F54, F54);
3430 __ faligndata(F56, F56, F56);
3431 __ and3(to, -8, to);
3432 __ and3(O4, -8, O4);
3433 __ stpartialf(to, O3, F54, Assembler::ASI_PST8_PRIMARY);
3434 __ stpartialf(O4, O3, F56, Assembler::ASI_PST8_PRIMARY);
3435 __ add(to, 8, to);
3436 __ add(O4, 8, O4);
3437 __ orn(G0, O3, O3);
3438 __ stpartialf(to, O3, F54, Assembler::ASI_PST8_PRIMARY);
3439 __ retl();
3440 __ delayed()->stpartialf(O4, O3, F56, Assembler::ASI_PST8_PRIMARY);
3441
3442 return start;
3443 }
3444
3445 address generate_aescrypt_decryptBlock() {
3446 assert((arrayOopDesc::base_offset_in_bytes(T_INT) & 7) == 0,
3447 "the following code assumes that first element of an int array is aligned to 8 bytes");
3448 // required since we read original key 'byte' array as well in the decryption stubs
3449 assert((arrayOopDesc::base_offset_in_bytes(T_BYTE) & 7) == 0,
3450 "the following code assumes that first element of a byte array is aligned to 8 bytes");
3451 __ align(CodeEntryAlignment);
3452 StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock");
3453 address start = __ pc();
3454 Label L_load_misaligned_input, L_load_original_key, L_expand192bit, L_expand256bit, L_reload_misaligned_input;
3455 Label L_256bit_transform, L_common_transform, L_store_misaligned_output;
3456 Register from = O0; // source byte array
3457 Register to = O1; // destination byte array
3458 Register key = O2; // expanded key array
3459 Register original_key = O3; // original key array only required during decryption
3460 const Register keylen = O4; // reg for storing expanded key array length
3461
3462 // read expanded key array length
3463 __ ldsw(Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)), keylen, 0);
3464
3465 // save 'from' since we may need to recheck alignment in case of 256-bit decryption
3466 __ mov(from, G1);
3467
3468 // check for 8-byte alignment since source byte array may have an arbitrary alignment if offset mod 8 is non-zero
3469 __ andcc(from, 7, G0);
3470 __ br(Assembler::notZero, true, Assembler::pn, L_load_misaligned_input);
3471 __ delayed()->alignaddr(from, G0, from);
3472
3473 // aligned case: load input into F52-F54
3474 __ ldf(FloatRegisterImpl::D, from, 0, F52);
3475 __ ldf(FloatRegisterImpl::D, from, 8, F54);
3476 __ ba_short(L_load_original_key);
3477
3478 __ BIND(L_load_misaligned_input);
3479 __ ldf(FloatRegisterImpl::D, from, 0, F52);
3480 __ ldf(FloatRegisterImpl::D, from, 8, F54);
3481 __ ldf(FloatRegisterImpl::D, from, 16, F56);
3482 __ faligndata(F52, F54, F52);
3483 __ faligndata(F54, F56, F54);
3484
3485 __ BIND(L_load_original_key);
3486 // load original key from SunJCE expanded decryption key
3487 // Since we load original key buffer starting first element, 8-byte alignment is guaranteed
3488 for ( int i = 0; i <= 3; i++ ) {
3489 __ ldf(FloatRegisterImpl::S, original_key, i*4, as_FloatRegister(i));
3490 }
3491
3492 // 256-bit original key size
3493 __ cmp_and_brx_short(keylen, 60, Assembler::equal, Assembler::pn, L_expand256bit);
3494
3495 // 192-bit original key size
3496 __ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pn, L_expand192bit);
3497
3498 // 128-bit original key size
3499 // perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions
3500 for ( int i = 0; i <= 36; i += 4 ) {
3501 __ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+2), i/4, as_FloatRegister(i+4));
3502 __ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+4), as_FloatRegister(i+6));
3503 }
3504
3505 // perform 128-bit key specific inverse cipher transformation
3506 __ fxor(FloatRegisterImpl::D, F42, F54, F54);
3507 __ fxor(FloatRegisterImpl::D, F40, F52, F52);
3508 __ ba_short(L_common_transform);
3509
3510 __ BIND(L_expand192bit);
3511
3512 // start loading rest of the 192-bit key
3513 __ ldf(FloatRegisterImpl::S, original_key, 16, F4);
3514 __ ldf(FloatRegisterImpl::S, original_key, 20, F5);
3515
3516 // perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions
3517 for ( int i = 0; i <= 36; i += 6 ) {
3518 __ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+4), i/6, as_FloatRegister(i+6));
3519 __ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+6), as_FloatRegister(i+8));
3520 __ aes_kexpand2(as_FloatRegister(i+4), as_FloatRegister(i+8), as_FloatRegister(i+10));
3521 }
3522 __ aes_kexpand1(F42, F46, 7, F48);
3523 __ aes_kexpand2(F44, F48, F50);
3524
3525 // perform 192-bit key specific inverse cipher transformation
3526 __ fxor(FloatRegisterImpl::D, F50, F54, F54);
3527 __ fxor(FloatRegisterImpl::D, F48, F52, F52);
3528 __ aes_dround23(F46, F52, F54, F58);
3529 __ aes_dround01(F44, F52, F54, F56);
3530 __ aes_dround23(F42, F56, F58, F54);
3531 __ aes_dround01(F40, F56, F58, F52);
3532 __ ba_short(L_common_transform);
3533
3534 __ BIND(L_expand256bit);
3535
3536 // load rest of the 256-bit key
3537 for ( int i = 4; i <= 7; i++ ) {
3538 __ ldf(FloatRegisterImpl::S, original_key, i*4, as_FloatRegister(i));
3539 }
3540
3541 // perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions
3542 for ( int i = 0; i <= 40; i += 8 ) {
3543 __ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+6), i/8, as_FloatRegister(i+8));
3544 __ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+8), as_FloatRegister(i+10));
3545 __ aes_kexpand0(as_FloatRegister(i+4), as_FloatRegister(i+10), as_FloatRegister(i+12));
3546 __ aes_kexpand2(as_FloatRegister(i+6), as_FloatRegister(i+12), as_FloatRegister(i+14));
3547 }
3548 __ aes_kexpand1(F48, F54, 6, F56);
3549 __ aes_kexpand2(F50, F56, F58);
3550
3551 for ( int i = 0; i <= 6; i += 2 ) {
3552 __ fsrc2(FloatRegisterImpl::D, as_FloatRegister(58-i), as_FloatRegister(i));
3553 }
3554
3555 // reload original 'from' address
3556 __ mov(G1, from);
3557
3558 // re-check 8-byte alignment
3559 __ andcc(from, 7, G0);
3560 __ br(Assembler::notZero, true, Assembler::pn, L_reload_misaligned_input);
3561 __ delayed()->alignaddr(from, G0, from);
3562
3563 // aligned case: load input into F52-F54
3564 __ ldf(FloatRegisterImpl::D, from, 0, F52);
3565 __ ldf(FloatRegisterImpl::D, from, 8, F54);
3566 __ ba_short(L_256bit_transform);
3567
3568 __ BIND(L_reload_misaligned_input);
3569 __ ldf(FloatRegisterImpl::D, from, 0, F52);
3570 __ ldf(FloatRegisterImpl::D, from, 8, F54);
3571 __ ldf(FloatRegisterImpl::D, from, 16, F56);
3572 __ faligndata(F52, F54, F52);
3573 __ faligndata(F54, F56, F54);
3574
3575 // perform 256-bit key specific inverse cipher transformation
3576 __ BIND(L_256bit_transform);
3577 __ fxor(FloatRegisterImpl::D, F0, F54, F54);
3578 __ fxor(FloatRegisterImpl::D, F2, F52, F52);
3579 __ aes_dround23(F4, F52, F54, F58);
3580 __ aes_dround01(F6, F52, F54, F56);
3581 __ aes_dround23(F50, F56, F58, F54);
3582 __ aes_dround01(F48, F56, F58, F52);
3583 __ aes_dround23(F46, F52, F54, F58);
3584 __ aes_dround01(F44, F52, F54, F56);
3585 __ aes_dround23(F42, F56, F58, F54);
3586 __ aes_dround01(F40, F56, F58, F52);
3587
3588 for ( int i = 0; i <= 7; i++ ) {
3589 __ ldf(FloatRegisterImpl::S, original_key, i*4, as_FloatRegister(i));
3590 }
3591
3592 // perform inverse cipher transformations common for all key sizes
3593 __ BIND(L_common_transform);
3594 for ( int i = 38; i >= 6; i -= 8 ) {
3595 __ aes_dround23(as_FloatRegister(i), F52, F54, F58);
3596 __ aes_dround01(as_FloatRegister(i-2), F52, F54, F56);
3597 if ( i != 6) {
3598 __ aes_dround23(as_FloatRegister(i-4), F56, F58, F54);
3599 __ aes_dround01(as_FloatRegister(i-6), F56, F58, F52);
3600 } else {
3601 __ aes_dround23_l(as_FloatRegister(i-4), F56, F58, F54);
3602 __ aes_dround01_l(as_FloatRegister(i-6), F56, F58, F52);
3603 }
3604 }
3605
3606 // check for 8-byte alignment since dest byte array may have arbitrary alignment if offset mod 8 is non-zero
3607 __ andcc(to, 7, O5);
3608 __ br(Assembler::notZero, true, Assembler::pn, L_store_misaligned_output);
3609 __ delayed()->edge8n(to, G0, O3);
3610
3611 // aligned case: store output into the destination array
3612 __ stf(FloatRegisterImpl::D, F52, to, 0);
3613 __ retl();
3614 __ delayed()->stf(FloatRegisterImpl::D, F54, to, 8);
3615
3616 __ BIND(L_store_misaligned_output);
3617 __ add(to, 8, O4);
3618 __ mov(8, O2);
3619 __ sub(O2, O5, O2);
3620 __ alignaddr(O2, G0, O2);
3621 __ faligndata(F52, F52, F52);
3622 __ faligndata(F54, F54, F54);
3623 __ and3(to, -8, to);
3624 __ and3(O4, -8, O4);
3625 __ stpartialf(to, O3, F52, Assembler::ASI_PST8_PRIMARY);
3626 __ stpartialf(O4, O3, F54, Assembler::ASI_PST8_PRIMARY);
3627 __ add(to, 8, to);
3628 __ add(O4, 8, O4);
3629 __ orn(G0, O3, O3);
3630 __ stpartialf(to, O3, F52, Assembler::ASI_PST8_PRIMARY);
3631 __ retl();
3632 __ delayed()->stpartialf(O4, O3, F54, Assembler::ASI_PST8_PRIMARY);
3633
3634 return start;
3635 }
3636
3637 address generate_cipherBlockChaining_encryptAESCrypt() {
3638 assert((arrayOopDesc::base_offset_in_bytes(T_INT) & 7) == 0,
3639 "the following code assumes that first element of an int array is aligned to 8 bytes");
3640 assert((arrayOopDesc::base_offset_in_bytes(T_BYTE) & 7) == 0,
3641 "the following code assumes that first element of a byte array is aligned to 8 bytes");
3642 __ align(CodeEntryAlignment);
3643 StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt");
3644 Label L_cbcenc128, L_load_misaligned_input_128bit, L_128bit_transform, L_store_misaligned_output_128bit;
3645 Label L_check_loop_end_128bit, L_cbcenc192, L_load_misaligned_input_192bit, L_192bit_transform;
3646 Label L_store_misaligned_output_192bit, L_check_loop_end_192bit, L_cbcenc256, L_load_misaligned_input_256bit;
3647 Label L_256bit_transform, L_store_misaligned_output_256bit, L_check_loop_end_256bit;
3648 address start = __ pc();
3649 Register from = I0; // source byte array
3650 Register to = I1; // destination byte array
3651 Register key = I2; // expanded key array
3652 Register rvec = I3; // init vector
3653 const Register len_reg = I4; // cipher length
3654 const Register keylen = I5; // reg for storing expanded key array length
3655
3656 __ save_frame(0);
3657 // save cipher len to return in the end
3658 __ mov(len_reg, L0);
3659
3660 // read expanded key length
3661 __ ldsw(Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)), keylen, 0);
3662
3663 // load initial vector, 8-byte alignment is guranteed
3664 __ ldf(FloatRegisterImpl::D, rvec, 0, F60);
3665 __ ldf(FloatRegisterImpl::D, rvec, 8, F62);
3666 // load key, 8-byte alignment is guranteed
3667 __ ldx(key,0,G1);
3668 __ ldx(key,8,G5);
3669
3670 // start loading expanded key, 8-byte alignment is guranteed
3671 for ( int i = 0, j = 16; i <= 38; i += 2, j += 8 ) {
3672 __ ldf(FloatRegisterImpl::D, key, j, as_FloatRegister(i));
3673 }
3674
3675 // 128-bit original key size
3676 __ cmp_and_brx_short(keylen, 44, Assembler::equal, Assembler::pt, L_cbcenc128);
3677
3678 for ( int i = 40, j = 176; i <= 46; i += 2, j += 8 ) {
3679 __ ldf(FloatRegisterImpl::D, key, j, as_FloatRegister(i));
3680 }
3681
3682 // 192-bit original key size
3683 __ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pt, L_cbcenc192);
3684
3685 for ( int i = 48, j = 208; i <= 54; i += 2, j += 8 ) {
3686 __ ldf(FloatRegisterImpl::D, key, j, as_FloatRegister(i));
3687 }
3688
3689 // 256-bit original key size
3690 __ ba_short(L_cbcenc256);
3691
3692 __ align(OptoLoopAlignment);
3693 __ BIND(L_cbcenc128);
3694 // check for 8-byte alignment since source byte array may have an arbitrary alignment if offset mod 8 is non-zero
3695 __ andcc(from, 7, G0);
3696 __ br(Assembler::notZero, true, Assembler::pn, L_load_misaligned_input_128bit);
3697 __ delayed()->mov(from, L1); // save original 'from' address before alignaddr
3698
3699 // aligned case: load input into G3 and G4
3700 __ ldx(from,0,G3);
3701 __ ldx(from,8,G4);
3702 __ ba_short(L_128bit_transform);
3703
3704 __ BIND(L_load_misaligned_input_128bit);
3705 // can clobber F48, F50 and F52 as they are not used in 128 and 192-bit key encryption
3706 __ alignaddr(from, G0, from);
3707 __ ldf(FloatRegisterImpl::D, from, 0, F48);
3708 __ ldf(FloatRegisterImpl::D, from, 8, F50);
3709 __ ldf(FloatRegisterImpl::D, from, 16, F52);
3710 __ faligndata(F48, F50, F48);
3711 __ faligndata(F50, F52, F50);
3712 __ movdtox(F48, G3);
3713 __ movdtox(F50, G4);
3714 __ mov(L1, from);
3715
3716 __ BIND(L_128bit_transform);
3717 __ xor3(G1,G3,G3);
3718 __ xor3(G5,G4,G4);
3719 __ movxtod(G3,F56);
3720 __ movxtod(G4,F58);
3721 __ fxor(FloatRegisterImpl::D, F60, F56, F60);
3722 __ fxor(FloatRegisterImpl::D, F62, F58, F62);
3723
3724 // TEN_EROUNDS
3725 for ( int i = 0; i <= 32; i += 8 ) {
3726 __ aes_eround01(as_FloatRegister(i), F60, F62, F56);
3727 __ aes_eround23(as_FloatRegister(i+2), F60, F62, F58);
3728 if (i != 32 ) {
3729 __ aes_eround01(as_FloatRegister(i+4), F56, F58, F60);
3730 __ aes_eround23(as_FloatRegister(i+6), F56, F58, F62);
3731 } else {
3732 __ aes_eround01_l(as_FloatRegister(i+4), F56, F58, F60);
3733 __ aes_eround23_l(as_FloatRegister(i+6), F56, F58, F62);
3734 }
3735 }
3736
3737 // check for 8-byte alignment since dest byte array may have arbitrary alignment if offset mod 8 is non-zero
3738 __ andcc(to, 7, L1);
3739 __ br(Assembler::notZero, true, Assembler::pn, L_store_misaligned_output_128bit);
3740 __ delayed()->edge8n(to, G0, L2);
3741
3742 // aligned case: store output into the destination array
3743 __ stf(FloatRegisterImpl::D, F60, to, 0);
3744 __ stf(FloatRegisterImpl::D, F62, to, 8);
3745 __ ba_short(L_check_loop_end_128bit);
3746
3747 __ BIND(L_store_misaligned_output_128bit);
3748 __ add(to, 8, L3);
3749 __ mov(8, L4);
3750 __ sub(L4, L1, L4);
3751 __ alignaddr(L4, G0, L4);
3752 // save cipher text before circular right shift
3753 // as it needs to be stored as iv for next block (see code before next retl)
3754 __ movdtox(F60, L6);
3755 __ movdtox(F62, L7);
3756 __ faligndata(F60, F60, F60);
3757 __ faligndata(F62, F62, F62);
3758 __ mov(to, L5);
3759 __ and3(to, -8, to);
3760 __ and3(L3, -8, L3);
3761 __ stpartialf(to, L2, F60, Assembler::ASI_PST8_PRIMARY);
3762 __ stpartialf(L3, L2, F62, Assembler::ASI_PST8_PRIMARY);
3763 __ add(to, 8, to);
3764 __ add(L3, 8, L3);
3765 __ orn(G0, L2, L2);
3766 __ stpartialf(to, L2, F60, Assembler::ASI_PST8_PRIMARY);
3767 __ stpartialf(L3, L2, F62, Assembler::ASI_PST8_PRIMARY);
3768 __ mov(L5, to);
3769 __ movxtod(L6, F60);
3770 __ movxtod(L7, F62);
3771
3772 __ BIND(L_check_loop_end_128bit);
3773 __ add(from, 16, from);
3774 __ add(to, 16, to);
3775 __ subcc(len_reg, 16, len_reg);
3776 __ br(Assembler::notEqual, false, Assembler::pt, L_cbcenc128);
3777 __ delayed()->nop();
3778 // re-init intial vector for next block, 8-byte alignment is guaranteed
3779 __ stf(FloatRegisterImpl::D, F60, rvec, 0);
3780 __ stf(FloatRegisterImpl::D, F62, rvec, 8);
3781 __ mov(L0, I0);
3782 __ ret();
3783 __ delayed()->restore();
3784
3785 __ align(OptoLoopAlignment);
3786 __ BIND(L_cbcenc192);
3787 // check for 8-byte alignment since source byte array may have an arbitrary alignment if offset mod 8 is non-zero
3788 __ andcc(from, 7, G0);
3789 __ br(Assembler::notZero, true, Assembler::pn, L_load_misaligned_input_192bit);
3790 __ delayed()->mov(from, L1); // save original 'from' address before alignaddr
3791
3792 // aligned case: load input into G3 and G4
3793 __ ldx(from,0,G3);
3794 __ ldx(from,8,G4);
3795 __ ba_short(L_192bit_transform);
3796
3797 __ BIND(L_load_misaligned_input_192bit);
3798 // can clobber F48, F50 and F52 as they are not used in 128 and 192-bit key encryption
3799 __ alignaddr(from, G0, from);
3800 __ ldf(FloatRegisterImpl::D, from, 0, F48);
3801 __ ldf(FloatRegisterImpl::D, from, 8, F50);
3802 __ ldf(FloatRegisterImpl::D, from, 16, F52);
3803 __ faligndata(F48, F50, F48);
3804 __ faligndata(F50, F52, F50);
3805 __ movdtox(F48, G3);
3806 __ movdtox(F50, G4);
3807 __ mov(L1, from);
3808
3809 __ BIND(L_192bit_transform);
3810 __ xor3(G1,G3,G3);
3811 __ xor3(G5,G4,G4);
3812 __ movxtod(G3,F56);
3813 __ movxtod(G4,F58);
3814 __ fxor(FloatRegisterImpl::D, F60, F56, F60);
3815 __ fxor(FloatRegisterImpl::D, F62, F58, F62);
3816
3817 // TWELEVE_EROUNDS
3818 for ( int i = 0; i <= 40; i += 8 ) {
3819 __ aes_eround01(as_FloatRegister(i), F60, F62, F56);
3820 __ aes_eround23(as_FloatRegister(i+2), F60, F62, F58);
3821 if (i != 40 ) {
3822 __ aes_eround01(as_FloatRegister(i+4), F56, F58, F60);
3823 __ aes_eround23(as_FloatRegister(i+6), F56, F58, F62);
3824 } else {
3825 __ aes_eround01_l(as_FloatRegister(i+4), F56, F58, F60);
3826 __ aes_eround23_l(as_FloatRegister(i+6), F56, F58, F62);
3827 }
3828 }
3829
3830 // check for 8-byte alignment since dest byte array may have arbitrary alignment if offset mod 8 is non-zero
3831 __ andcc(to, 7, L1);
3832 __ br(Assembler::notZero, true, Assembler::pn, L_store_misaligned_output_192bit);
3833 __ delayed()->edge8n(to, G0, L2);
3834
3835 // aligned case: store output into the destination array
3836 __ stf(FloatRegisterImpl::D, F60, to, 0);
3837 __ stf(FloatRegisterImpl::D, F62, to, 8);
3838 __ ba_short(L_check_loop_end_192bit);
3839
3840 __ BIND(L_store_misaligned_output_192bit);
3841 __ add(to, 8, L3);
3842 __ mov(8, L4);
3843 __ sub(L4, L1, L4);
3844 __ alignaddr(L4, G0, L4);
3845 __ movdtox(F60, L6);
3846 __ movdtox(F62, L7);
3847 __ faligndata(F60, F60, F60);
3848 __ faligndata(F62, F62, F62);
3849 __ mov(to, L5);
3850 __ and3(to, -8, to);
3851 __ and3(L3, -8, L3);
3852 __ stpartialf(to, L2, F60, Assembler::ASI_PST8_PRIMARY);
3853 __ stpartialf(L3, L2, F62, Assembler::ASI_PST8_PRIMARY);
3854 __ add(to, 8, to);
3855 __ add(L3, 8, L3);
3856 __ orn(G0, L2, L2);
3857 __ stpartialf(to, L2, F60, Assembler::ASI_PST8_PRIMARY);
3858 __ stpartialf(L3, L2, F62, Assembler::ASI_PST8_PRIMARY);
3859 __ mov(L5, to);
3860 __ movxtod(L6, F60);
3861 __ movxtod(L7, F62);
3862
3863 __ BIND(L_check_loop_end_192bit);
3864 __ add(from, 16, from);
3865 __ subcc(len_reg, 16, len_reg);
3866 __ add(to, 16, to);
3867 __ br(Assembler::notEqual, false, Assembler::pt, L_cbcenc192);
3868 __ delayed()->nop();
3869 // re-init intial vector for next block, 8-byte alignment is guaranteed
3870 __ stf(FloatRegisterImpl::D, F60, rvec, 0);
3871 __ stf(FloatRegisterImpl::D, F62, rvec, 8);
3872 __ mov(L0, I0);
3873 __ ret();
3874 __ delayed()->restore();
3875
3876 __ align(OptoLoopAlignment);
3877 __ BIND(L_cbcenc256);
3878 // check for 8-byte alignment since source byte array may have an arbitrary alignment if offset mod 8 is non-zero
3879 __ andcc(from, 7, G0);
3880 __ br(Assembler::notZero, true, Assembler::pn, L_load_misaligned_input_256bit);
3881 __ delayed()->mov(from, L1); // save original 'from' address before alignaddr
3882
3883 // aligned case: load input into G3 and G4
3884 __ ldx(from,0,G3);
3885 __ ldx(from,8,G4);
3886 __ ba_short(L_256bit_transform);
3887
3888 __ BIND(L_load_misaligned_input_256bit);
3889 // cannot clobber F48, F50 and F52. F56, F58 can be used though
3890 __ alignaddr(from, G0, from);
3891 __ movdtox(F60, L2); // save F60 before overwriting
3892 __ ldf(FloatRegisterImpl::D, from, 0, F56);
3893 __ ldf(FloatRegisterImpl::D, from, 8, F58);
3894 __ ldf(FloatRegisterImpl::D, from, 16, F60);
3895 __ faligndata(F56, F58, F56);
3896 __ faligndata(F58, F60, F58);
3897 __ movdtox(F56, G3);
3898 __ movdtox(F58, G4);
3899 __ mov(L1, from);
3900 __ movxtod(L2, F60);
3901
3902 __ BIND(L_256bit_transform);
3903 __ xor3(G1,G3,G3);
3904 __ xor3(G5,G4,G4);
3905 __ movxtod(G3,F56);
3906 __ movxtod(G4,F58);
3907 __ fxor(FloatRegisterImpl::D, F60, F56, F60);
3908 __ fxor(FloatRegisterImpl::D, F62, F58, F62);
3909
3910 // FOURTEEN_EROUNDS
3911 for ( int i = 0; i <= 48; i += 8 ) {
3912 __ aes_eround01(as_FloatRegister(i), F60, F62, F56);
3913 __ aes_eround23(as_FloatRegister(i+2), F60, F62, F58);
3914 if (i != 48 ) {
3915 __ aes_eround01(as_FloatRegister(i+4), F56, F58, F60);
3916 __ aes_eround23(as_FloatRegister(i+6), F56, F58, F62);
3917 } else {
3918 __ aes_eround01_l(as_FloatRegister(i+4), F56, F58, F60);
3919 __ aes_eround23_l(as_FloatRegister(i+6), F56, F58, F62);
3920 }
3921 }
3922
3923 // check for 8-byte alignment since dest byte array may have arbitrary alignment if offset mod 8 is non-zero
3924 __ andcc(to, 7, L1);
3925 __ br(Assembler::notZero, true, Assembler::pn, L_store_misaligned_output_256bit);
3926 __ delayed()->edge8n(to, G0, L2);
3927
3928 // aligned case: store output into the destination array
3929 __ stf(FloatRegisterImpl::D, F60, to, 0);
3930 __ stf(FloatRegisterImpl::D, F62, to, 8);
3931 __ ba_short(L_check_loop_end_256bit);
3932
3933 __ BIND(L_store_misaligned_output_256bit);
3934 __ add(to, 8, L3);
3935 __ mov(8, L4);
3936 __ sub(L4, L1, L4);
3937 __ alignaddr(L4, G0, L4);
3938 __ movdtox(F60, L6);
3939 __ movdtox(F62, L7);
3940 __ faligndata(F60, F60, F60);
3941 __ faligndata(F62, F62, F62);
3942 __ mov(to, L5);
3943 __ and3(to, -8, to);
3944 __ and3(L3, -8, L3);
3945 __ stpartialf(to, L2, F60, Assembler::ASI_PST8_PRIMARY);
3946 __ stpartialf(L3, L2, F62, Assembler::ASI_PST8_PRIMARY);
3947 __ add(to, 8, to);
3948 __ add(L3, 8, L3);
3949 __ orn(G0, L2, L2);
3950 __ stpartialf(to, L2, F60, Assembler::ASI_PST8_PRIMARY);
3951 __ stpartialf(L3, L2, F62, Assembler::ASI_PST8_PRIMARY);
3952 __ mov(L5, to);
3953 __ movxtod(L6, F60);
3954 __ movxtod(L7, F62);
3955
3956 __ BIND(L_check_loop_end_256bit);
3957 __ add(from, 16, from);
3958 __ subcc(len_reg, 16, len_reg);
3959 __ add(to, 16, to);
3960 __ br(Assembler::notEqual, false, Assembler::pt, L_cbcenc256);
3961 __ delayed()->nop();
3962 // re-init intial vector for next block, 8-byte alignment is guaranteed
3963 __ stf(FloatRegisterImpl::D, F60, rvec, 0);
3964 __ stf(FloatRegisterImpl::D, F62, rvec, 8);
3965 __ mov(L0, I0);
3966 __ ret();
3967 __ delayed()->restore();
3968
3969 return start;
3970 }
3971
3972 address generate_cipherBlockChaining_decryptAESCrypt_Parallel() {
3973 assert((arrayOopDesc::base_offset_in_bytes(T_INT) & 7) == 0,
3974 "the following code assumes that first element of an int array is aligned to 8 bytes");
3975 assert((arrayOopDesc::base_offset_in_bytes(T_BYTE) & 7) == 0,
3976 "the following code assumes that first element of a byte array is aligned to 8 bytes");
3977 __ align(CodeEntryAlignment);
3978 StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt");
3979 Label L_cbcdec_end, L_expand192bit, L_expand256bit, L_dec_first_block_start;
3980 Label L_dec_first_block128, L_dec_first_block192, L_dec_next2_blocks128, L_dec_next2_blocks192, L_dec_next2_blocks256;
3981 Label L_load_misaligned_input_first_block, L_transform_first_block, L_load_misaligned_next2_blocks128, L_transform_next2_blocks128;
3982 Label L_load_misaligned_next2_blocks192, L_transform_next2_blocks192, L_load_misaligned_next2_blocks256, L_transform_next2_blocks256;
3983 Label L_store_misaligned_output_first_block, L_check_decrypt_end, L_store_misaligned_output_next2_blocks128;
3984 Label L_check_decrypt_loop_end128, L_store_misaligned_output_next2_blocks192, L_check_decrypt_loop_end192;
3985 Label L_store_misaligned_output_next2_blocks256, L_check_decrypt_loop_end256;
3986 address start = __ pc();
3987 Register from = I0; // source byte array
3988 Register to = I1; // destination byte array
3989 Register key = I2; // expanded key array
3990 Register rvec = I3; // init vector
3991 const Register len_reg = I4; // cipher length
3992 const Register original_key = I5; // original key array only required during decryption
3993 const Register keylen = L6; // reg for storing expanded key array length
3994
3995 __ save_frame(0); //args are read from I* registers since we save the frame in the beginning
3996 // save cipher len to return in the end
3997 __ mov(len_reg, L7);
3998
3999 // load original key from SunJCE expanded decryption key
4000 // Since we load original key buffer starting first element, 8-byte alignment is guaranteed
4001 for ( int i = 0; i <= 3; i++ ) {
4002 __ ldf(FloatRegisterImpl::S, original_key, i*4, as_FloatRegister(i));
4003 }
4004
4005 // load initial vector, 8-byte alignment is guaranteed
4006 __ ldx(rvec,0,L0);
4007 __ ldx(rvec,8,L1);
4008
4009 // read expanded key array length
4010 __ ldsw(Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)), keylen, 0);
4011
4012 // 256-bit original key size
4013 __ cmp_and_brx_short(keylen, 60, Assembler::equal, Assembler::pn, L_expand256bit);
4014
4015 // 192-bit original key size
4016 __ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pn, L_expand192bit);
4017
4018 // 128-bit original key size
4019 // perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions
4020 for ( int i = 0; i <= 36; i += 4 ) {
4021 __ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+2), i/4, as_FloatRegister(i+4));
4022 __ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+4), as_FloatRegister(i+6));
4023 }
4024
4025 // load expanded key[last-1] and key[last] elements
4026 __ movdtox(F40,L2);
4027 __ movdtox(F42,L3);
4028
4029 __ and3(len_reg, 16, L4);
4030 __ br_null_short(L4, Assembler::pt, L_dec_next2_blocks128);
4031 __ nop();
4032
4033 __ ba_short(L_dec_first_block_start);
4034
4035 __ BIND(L_expand192bit);
4036 // load rest of the 192-bit key
4037 __ ldf(FloatRegisterImpl::S, original_key, 16, F4);
4038 __ ldf(FloatRegisterImpl::S, original_key, 20, F5);
4039
4040 // perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions
4041 for ( int i = 0; i <= 36; i += 6 ) {
4042 __ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+4), i/6, as_FloatRegister(i+6));
4043 __ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+6), as_FloatRegister(i+8));
4044 __ aes_kexpand2(as_FloatRegister(i+4), as_FloatRegister(i+8), as_FloatRegister(i+10));
4045 }
4046 __ aes_kexpand1(F42, F46, 7, F48);
4047 __ aes_kexpand2(F44, F48, F50);
4048
4049 // load expanded key[last-1] and key[last] elements
4050 __ movdtox(F48,L2);
4051 __ movdtox(F50,L3);
4052
4053 __ and3(len_reg, 16, L4);
4054 __ br_null_short(L4, Assembler::pt, L_dec_next2_blocks192);
4055 __ nop();
4056
4057 __ ba_short(L_dec_first_block_start);
4058
4059 __ BIND(L_expand256bit);
4060 // load rest of the 256-bit key
4061 for ( int i = 4; i <= 7; i++ ) {
4062 __ ldf(FloatRegisterImpl::S, original_key, i*4, as_FloatRegister(i));
4063 }
4064
4065 // perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions
4066 for ( int i = 0; i <= 40; i += 8 ) {
4067 __ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+6), i/8, as_FloatRegister(i+8));
4068 __ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+8), as_FloatRegister(i+10));
4069 __ aes_kexpand0(as_FloatRegister(i+4), as_FloatRegister(i+10), as_FloatRegister(i+12));
4070 __ aes_kexpand2(as_FloatRegister(i+6), as_FloatRegister(i+12), as_FloatRegister(i+14));
4071 }
4072 __ aes_kexpand1(F48, F54, 6, F56);
4073 __ aes_kexpand2(F50, F56, F58);
4074
4075 // load expanded key[last-1] and key[last] elements
4076 __ movdtox(F56,L2);
4077 __ movdtox(F58,L3);
4078
4079 __ and3(len_reg, 16, L4);
4080 __ br_null_short(L4, Assembler::pt, L_dec_next2_blocks256);
4081
4082 __ BIND(L_dec_first_block_start);
4083 // check for 8-byte alignment since source byte array may have an arbitrary alignment if offset mod 8 is non-zero
4084 __ andcc(from, 7, G0);
4085 __ br(Assembler::notZero, true, Assembler::pn, L_load_misaligned_input_first_block);
4086 __ delayed()->mov(from, G1); // save original 'from' address before alignaddr
4087
4088 // aligned case: load input into L4 and L5
4089 __ ldx(from,0,L4);
4090 __ ldx(from,8,L5);
4091 __ ba_short(L_transform_first_block);
4092
4093 __ BIND(L_load_misaligned_input_first_block);
4094 __ alignaddr(from, G0, from);
4095 // F58, F60, F62 can be clobbered
4096 __ ldf(FloatRegisterImpl::D, from, 0, F58);
4097 __ ldf(FloatRegisterImpl::D, from, 8, F60);
4098 __ ldf(FloatRegisterImpl::D, from, 16, F62);
4099 __ faligndata(F58, F60, F58);
4100 __ faligndata(F60, F62, F60);
4101 __ movdtox(F58, L4);
4102 __ movdtox(F60, L5);
4103 __ mov(G1, from);
4104
4105 __ BIND(L_transform_first_block);
4106 __ xor3(L2,L4,G1);
4107 __ movxtod(G1,F60);
4108 __ xor3(L3,L5,G1);
4109 __ movxtod(G1,F62);
4110
4111 // 128-bit original key size
4112 __ cmp_and_brx_short(keylen, 44, Assembler::equal, Assembler::pn, L_dec_first_block128);
4113
4114 // 192-bit original key size
4115 __ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pn, L_dec_first_block192);
4116
4117 __ aes_dround23(F54, F60, F62, F58);
4118 __ aes_dround01(F52, F60, F62, F56);
4119 __ aes_dround23(F50, F56, F58, F62);
4120 __ aes_dround01(F48, F56, F58, F60);
4121
4122 __ BIND(L_dec_first_block192);
4123 __ aes_dround23(F46, F60, F62, F58);
4124 __ aes_dround01(F44, F60, F62, F56);
4125 __ aes_dround23(F42, F56, F58, F62);
4126 __ aes_dround01(F40, F56, F58, F60);
4127
4128 __ BIND(L_dec_first_block128);
4129 for ( int i = 38; i >= 6; i -= 8 ) {
4130 __ aes_dround23(as_FloatRegister(i), F60, F62, F58);
4131 __ aes_dround01(as_FloatRegister(i-2), F60, F62, F56);
4132 if ( i != 6) {
4133 __ aes_dround23(as_FloatRegister(i-4), F56, F58, F62);
4134 __ aes_dround01(as_FloatRegister(i-6), F56, F58, F60);
4135 } else {
4136 __ aes_dround23_l(as_FloatRegister(i-4), F56, F58, F62);
4137 __ aes_dround01_l(as_FloatRegister(i-6), F56, F58, F60);
4138 }
4139 }
4140
4141 __ movxtod(L0,F56);
4142 __ movxtod(L1,F58);
4143 __ mov(L4,L0);
4144 __ mov(L5,L1);
4145 __ fxor(FloatRegisterImpl::D, F56, F60, F60);
4146 __ fxor(FloatRegisterImpl::D, F58, F62, F62);
4147
4148 // check for 8-byte alignment since dest byte array may have arbitrary alignment if offset mod 8 is non-zero
4149 __ andcc(to, 7, G1);
4150 __ br(Assembler::notZero, true, Assembler::pn, L_store_misaligned_output_first_block);
4151 __ delayed()->edge8n(to, G0, G2);
4152
4153 // aligned case: store output into the destination array
4154 __ stf(FloatRegisterImpl::D, F60, to, 0);
4155 __ stf(FloatRegisterImpl::D, F62, to, 8);
4156 __ ba_short(L_check_decrypt_end);
4157
4158 __ BIND(L_store_misaligned_output_first_block);
4159 __ add(to, 8, G3);
4160 __ mov(8, G4);
4161 __ sub(G4, G1, G4);
4162 __ alignaddr(G4, G0, G4);
4163 __ faligndata(F60, F60, F60);
4164 __ faligndata(F62, F62, F62);
4165 __ mov(to, G1);
4166 __ and3(to, -8, to);
4167 __ and3(G3, -8, G3);
4168 __ stpartialf(to, G2, F60, Assembler::ASI_PST8_PRIMARY);
4169 __ stpartialf(G3, G2, F62, Assembler::ASI_PST8_PRIMARY);
4170 __ add(to, 8, to);
4171 __ add(G3, 8, G3);
4172 __ orn(G0, G2, G2);
4173 __ stpartialf(to, G2, F60, Assembler::ASI_PST8_PRIMARY);
4174 __ stpartialf(G3, G2, F62, Assembler::ASI_PST8_PRIMARY);
4175 __ mov(G1, to);
4176
4177 __ BIND(L_check_decrypt_end);
4178 __ add(from, 16, from);
4179 __ add(to, 16, to);
4180 __ subcc(len_reg, 16, len_reg);
4181 __ br(Assembler::equal, false, Assembler::pt, L_cbcdec_end);
4182 __ delayed()->nop();
4183
4184 // 256-bit original key size
4185 __ cmp_and_brx_short(keylen, 60, Assembler::equal, Assembler::pn, L_dec_next2_blocks256);
4186
4187 // 192-bit original key size
4188 __ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pn, L_dec_next2_blocks192);
4189
4190 __ align(OptoLoopAlignment);
4191 __ BIND(L_dec_next2_blocks128);
4192 __ nop();
4193
4194 // check for 8-byte alignment since source byte array may have an arbitrary alignment if offset mod 8 is non-zero
4195 __ andcc(from, 7, G0);
4196 __ br(Assembler::notZero, true, Assembler::pn, L_load_misaligned_next2_blocks128);
4197 __ delayed()->mov(from, G1); // save original 'from' address before alignaddr
4198
4199 // aligned case: load input into G4, G5, L4 and L5
4200 __ ldx(from,0,G4);
4201 __ ldx(from,8,G5);
4202 __ ldx(from,16,L4);
4203 __ ldx(from,24,L5);
4204 __ ba_short(L_transform_next2_blocks128);
4205
4206 __ BIND(L_load_misaligned_next2_blocks128);
4207 __ alignaddr(from, G0, from);
4208 // F40, F42, F58, F60, F62 can be clobbered
4209 __ ldf(FloatRegisterImpl::D, from, 0, F40);
4210 __ ldf(FloatRegisterImpl::D, from, 8, F42);
4211 __ ldf(FloatRegisterImpl::D, from, 16, F60);
4212 __ ldf(FloatRegisterImpl::D, from, 24, F62);
4213 __ ldf(FloatRegisterImpl::D, from, 32, F58);
4214 __ faligndata(F40, F42, F40);
4215 __ faligndata(F42, F60, F42);
4216 __ faligndata(F60, F62, F60);
4217 __ faligndata(F62, F58, F62);
4218 __ movdtox(F40, G4);
4219 __ movdtox(F42, G5);
4220 __ movdtox(F60, L4);
4221 __ movdtox(F62, L5);
4222 __ mov(G1, from);
4223
4224 __ BIND(L_transform_next2_blocks128);
4225 // F40:F42 used for first 16-bytes
4226 __ xor3(L2,G4,G1);
4227 __ movxtod(G1,F40);
4228 __ xor3(L3,G5,G1);
4229 __ movxtod(G1,F42);
4230
4231 // F60:F62 used for next 16-bytes
4232 __ xor3(L2,L4,G1);
4233 __ movxtod(G1,F60);
4234 __ xor3(L3,L5,G1);
4235 __ movxtod(G1,F62);
4236
4237 for ( int i = 38; i >= 6; i -= 8 ) {
4238 __ aes_dround23(as_FloatRegister(i), F40, F42, F44);
4239 __ aes_dround01(as_FloatRegister(i-2), F40, F42, F46);
4240 __ aes_dround23(as_FloatRegister(i), F60, F62, F58);
4241 __ aes_dround01(as_FloatRegister(i-2), F60, F62, F56);
4242 if (i != 6 ) {
4243 __ aes_dround23(as_FloatRegister(i-4), F46, F44, F42);
4244 __ aes_dround01(as_FloatRegister(i-6), F46, F44, F40);
4245 __ aes_dround23(as_FloatRegister(i-4), F56, F58, F62);
4246 __ aes_dround01(as_FloatRegister(i-6), F56, F58, F60);
4247 } else {
4248 __ aes_dround23_l(as_FloatRegister(i-4), F46, F44, F42);
4249 __ aes_dround01_l(as_FloatRegister(i-6), F46, F44, F40);
4250 __ aes_dround23_l(as_FloatRegister(i-4), F56, F58, F62);
4251 __ aes_dround01_l(as_FloatRegister(i-6), F56, F58, F60);
4252 }
4253 }
4254
4255 __ movxtod(L0,F46);
4256 __ movxtod(L1,F44);
4257 __ fxor(FloatRegisterImpl::D, F46, F40, F40);
4258 __ fxor(FloatRegisterImpl::D, F44, F42, F42);
4259
4260 __ movxtod(G4,F56);
4261 __ movxtod(G5,F58);
4262 __ mov(L4,L0);
4263 __ mov(L5,L1);
4264 __ fxor(FloatRegisterImpl::D, F56, F60, F60);
4265 __ fxor(FloatRegisterImpl::D, F58, F62, F62);
4266
4267 // For mis-aligned store of 32 bytes of result we can do:
4268 // Circular right-shift all 4 FP registers so that 'head' and 'tail'
4269 // parts that need to be stored starting at mis-aligned address are in a FP reg
4270 // the other 3 FP regs can thus be stored using regular store
4271 // we then use the edge + partial-store mechanism to store the 'head' and 'tail' parts
4272
4273 // check for 8-byte alignment since dest byte array may have arbitrary alignment if offset mod 8 is non-zero
4274 __ andcc(to, 7, G1);
4275 __ br(Assembler::notZero, true, Assembler::pn, L_store_misaligned_output_next2_blocks128);
4276 __ delayed()->edge8n(to, G0, G2);
4277
4278 // aligned case: store output into the destination array
4279 __ stf(FloatRegisterImpl::D, F40, to, 0);
4280 __ stf(FloatRegisterImpl::D, F42, to, 8);
4281 __ stf(FloatRegisterImpl::D, F60, to, 16);
4282 __ stf(FloatRegisterImpl::D, F62, to, 24);
4283 __ ba_short(L_check_decrypt_loop_end128);
4284
4285 __ BIND(L_store_misaligned_output_next2_blocks128);
4286 __ mov(8, G4);
4287 __ sub(G4, G1, G4);
4288 __ alignaddr(G4, G0, G4);
4289 __ faligndata(F40, F42, F56); // F56 can be clobbered
4290 __ faligndata(F42, F60, F42);
4291 __ faligndata(F60, F62, F60);
4292 __ faligndata(F62, F40, F40);
4293 __ mov(to, G1);
4294 __ and3(to, -8, to);
4295 __ stpartialf(to, G2, F40, Assembler::ASI_PST8_PRIMARY);
4296 __ stf(FloatRegisterImpl::D, F56, to, 8);
4297 __ stf(FloatRegisterImpl::D, F42, to, 16);
4298 __ stf(FloatRegisterImpl::D, F60, to, 24);
4299 __ add(to, 32, to);
4300 __ orn(G0, G2, G2);
4301 __ stpartialf(to, G2, F40, Assembler::ASI_PST8_PRIMARY);
4302 __ mov(G1, to);
4303
4304 __ BIND(L_check_decrypt_loop_end128);
4305 __ add(from, 32, from);
4306 __ add(to, 32, to);
4307 __ subcc(len_reg, 32, len_reg);
4308 __ br(Assembler::notEqual, false, Assembler::pt, L_dec_next2_blocks128);
4309 __ delayed()->nop();
4310 __ ba_short(L_cbcdec_end);
4311
4312 __ align(OptoLoopAlignment);
4313 __ BIND(L_dec_next2_blocks192);
4314 __ nop();
4315
4316 // check for 8-byte alignment since source byte array may have an arbitrary alignment if offset mod 8 is non-zero
4317 __ andcc(from, 7, G0);
4318 __ br(Assembler::notZero, true, Assembler::pn, L_load_misaligned_next2_blocks192);
4319 __ delayed()->mov(from, G1); // save original 'from' address before alignaddr
4320
4321 // aligned case: load input into G4, G5, L4 and L5
4322 __ ldx(from,0,G4);
4323 __ ldx(from,8,G5);
4324 __ ldx(from,16,L4);
4325 __ ldx(from,24,L5);
4326 __ ba_short(L_transform_next2_blocks192);
4327
4328 __ BIND(L_load_misaligned_next2_blocks192);
4329 __ alignaddr(from, G0, from);
4330 // F48, F50, F52, F60, F62 can be clobbered
4331 __ ldf(FloatRegisterImpl::D, from, 0, F48);
4332 __ ldf(FloatRegisterImpl::D, from, 8, F50);
4333 __ ldf(FloatRegisterImpl::D, from, 16, F60);
4334 __ ldf(FloatRegisterImpl::D, from, 24, F62);
4335 __ ldf(FloatRegisterImpl::D, from, 32, F52);
4336 __ faligndata(F48, F50, F48);
4337 __ faligndata(F50, F60, F50);
4338 __ faligndata(F60, F62, F60);
4339 __ faligndata(F62, F52, F62);
4340 __ movdtox(F48, G4);
4341 __ movdtox(F50, G5);
4342 __ movdtox(F60, L4);
4343 __ movdtox(F62, L5);
4344 __ mov(G1, from);
4345
4346 __ BIND(L_transform_next2_blocks192);
4347 // F48:F50 used for first 16-bytes
4348 __ xor3(L2,G4,G1);
4349 __ movxtod(G1,F48);
4350 __ xor3(L3,G5,G1);
4351 __ movxtod(G1,F50);
4352
4353 // F60:F62 used for next 16-bytes
4354 __ xor3(L2,L4,G1);
4355 __ movxtod(G1,F60);
4356 __ xor3(L3,L5,G1);
4357 __ movxtod(G1,F62);
4358
4359 for ( int i = 46; i >= 6; i -= 8 ) {
4360 __ aes_dround23(as_FloatRegister(i), F48, F50, F52);
4361 __ aes_dround01(as_FloatRegister(i-2), F48, F50, F54);
4362 __ aes_dround23(as_FloatRegister(i), F60, F62, F58);
4363 __ aes_dround01(as_FloatRegister(i-2), F60, F62, F56);
4364 if (i != 6 ) {
4365 __ aes_dround23(as_FloatRegister(i-4), F54, F52, F50);
4366 __ aes_dround01(as_FloatRegister(i-6), F54, F52, F48);
4367 __ aes_dround23(as_FloatRegister(i-4), F56, F58, F62);
4368 __ aes_dround01(as_FloatRegister(i-6), F56, F58, F60);
4369 } else {
4370 __ aes_dround23_l(as_FloatRegister(i-4), F54, F52, F50);
4371 __ aes_dround01_l(as_FloatRegister(i-6), F54, F52, F48);
4372 __ aes_dround23_l(as_FloatRegister(i-4), F56, F58, F62);
4373 __ aes_dround01_l(as_FloatRegister(i-6), F56, F58, F60);
4374 }
4375 }
4376
4377 __ movxtod(L0,F54);
4378 __ movxtod(L1,F52);
4379 __ fxor(FloatRegisterImpl::D, F54, F48, F48);
4380 __ fxor(FloatRegisterImpl::D, F52, F50, F50);
4381
4382 __ movxtod(G4,F56);
4383 __ movxtod(G5,F58);
4384 __ mov(L4,L0);
4385 __ mov(L5,L1);
4386 __ fxor(FloatRegisterImpl::D, F56, F60, F60);
4387 __ fxor(FloatRegisterImpl::D, F58, F62, F62);
4388
4389 // check for 8-byte alignment since dest byte array may have arbitrary alignment if offset mod 8 is non-zero
4390 __ andcc(to, 7, G1);
4391 __ br(Assembler::notZero, true, Assembler::pn, L_store_misaligned_output_next2_blocks192);
4392 __ delayed()->edge8n(to, G0, G2);
4393
4394 // aligned case: store output into the destination array
4395 __ stf(FloatRegisterImpl::D, F48, to, 0);
4396 __ stf(FloatRegisterImpl::D, F50, to, 8);
4397 __ stf(FloatRegisterImpl::D, F60, to, 16);
4398 __ stf(FloatRegisterImpl::D, F62, to, 24);
4399 __ ba_short(L_check_decrypt_loop_end192);
4400
4401 __ BIND(L_store_misaligned_output_next2_blocks192);
4402 __ mov(8, G4);
4403 __ sub(G4, G1, G4);
4404 __ alignaddr(G4, G0, G4);
4405 __ faligndata(F48, F50, F56); // F56 can be clobbered
4406 __ faligndata(F50, F60, F50);
4407 __ faligndata(F60, F62, F60);
4408 __ faligndata(F62, F48, F48);
4409 __ mov(to, G1);
4410 __ and3(to, -8, to);
4411 __ stpartialf(to, G2, F48, Assembler::ASI_PST8_PRIMARY);
4412 __ stf(FloatRegisterImpl::D, F56, to, 8);
4413 __ stf(FloatRegisterImpl::D, F50, to, 16);
4414 __ stf(FloatRegisterImpl::D, F60, to, 24);
4415 __ add(to, 32, to);
4416 __ orn(G0, G2, G2);
4417 __ stpartialf(to, G2, F48, Assembler::ASI_PST8_PRIMARY);
4418 __ mov(G1, to);
4419
4420 __ BIND(L_check_decrypt_loop_end192);
4421 __ add(from, 32, from);
4422 __ add(to, 32, to);
4423 __ subcc(len_reg, 32, len_reg);
4424 __ br(Assembler::notEqual, false, Assembler::pt, L_dec_next2_blocks192);
4425 __ delayed()->nop();
4426 __ ba_short(L_cbcdec_end);
4427
4428 __ align(OptoLoopAlignment);
4429 __ BIND(L_dec_next2_blocks256);
4430 __ nop();
4431
4432 // check for 8-byte alignment since source byte array may have an arbitrary alignment if offset mod 8 is non-zero
4433 __ andcc(from, 7, G0);
4434 __ br(Assembler::notZero, true, Assembler::pn, L_load_misaligned_next2_blocks256);
4435 __ delayed()->mov(from, G1); // save original 'from' address before alignaddr
4436
4437 // aligned case: load input into G4, G5, L4 and L5
4438 __ ldx(from,0,G4);
4439 __ ldx(from,8,G5);
4440 __ ldx(from,16,L4);
4441 __ ldx(from,24,L5);
4442 __ ba_short(L_transform_next2_blocks256);
4443
4444 __ BIND(L_load_misaligned_next2_blocks256);
4445 __ alignaddr(from, G0, from);
4446 // F0, F2, F4, F60, F62 can be clobbered
4447 __ ldf(FloatRegisterImpl::D, from, 0, F0);
4448 __ ldf(FloatRegisterImpl::D, from, 8, F2);
4449 __ ldf(FloatRegisterImpl::D, from, 16, F60);
4450 __ ldf(FloatRegisterImpl::D, from, 24, F62);
4451 __ ldf(FloatRegisterImpl::D, from, 32, F4);
4452 __ faligndata(F0, F2, F0);
4453 __ faligndata(F2, F60, F2);
4454 __ faligndata(F60, F62, F60);
4455 __ faligndata(F62, F4, F62);
4456 __ movdtox(F0, G4);
4457 __ movdtox(F2, G5);
4458 __ movdtox(F60, L4);
4459 __ movdtox(F62, L5);
4460 __ mov(G1, from);
4461
4462 __ BIND(L_transform_next2_blocks256);
4463 // F0:F2 used for first 16-bytes
4464 __ xor3(L2,G4,G1);
4465 __ movxtod(G1,F0);
4466 __ xor3(L3,G5,G1);
4467 __ movxtod(G1,F2);
4468
4469 // F60:F62 used for next 16-bytes
4470 __ xor3(L2,L4,G1);
4471 __ movxtod(G1,F60);
4472 __ xor3(L3,L5,G1);
4473 __ movxtod(G1,F62);
4474
4475 __ aes_dround23(F54, F0, F2, F4);
4476 __ aes_dround01(F52, F0, F2, F6);
4477 __ aes_dround23(F54, F60, F62, F58);
4478 __ aes_dround01(F52, F60, F62, F56);
4479 __ aes_dround23(F50, F6, F4, F2);
4480 __ aes_dround01(F48, F6, F4, F0);
4481 __ aes_dround23(F50, F56, F58, F62);
4482 __ aes_dround01(F48, F56, F58, F60);
4483 // save F48:F54 in temp registers
4484 __ movdtox(F54,G2);
4485 __ movdtox(F52,G3);
4486 __ movdtox(F50,G6);
4487 __ movdtox(F48,G1);
4488 for ( int i = 46; i >= 14; i -= 8 ) {
4489 __ aes_dround23(as_FloatRegister(i), F0, F2, F4);
4490 __ aes_dround01(as_FloatRegister(i-2), F0, F2, F6);
4491 __ aes_dround23(as_FloatRegister(i), F60, F62, F58);
4492 __ aes_dround01(as_FloatRegister(i-2), F60, F62, F56);
4493 __ aes_dround23(as_FloatRegister(i-4), F6, F4, F2);
4494 __ aes_dround01(as_FloatRegister(i-6), F6, F4, F0);
4495 __ aes_dround23(as_FloatRegister(i-4), F56, F58, F62);
4496 __ aes_dround01(as_FloatRegister(i-6), F56, F58, F60);
4497 }
4498 // init F48:F54 with F0:F6 values (original key)
4499 __ ldf(FloatRegisterImpl::D, original_key, 0, F48);
4500 __ ldf(FloatRegisterImpl::D, original_key, 8, F50);
4501 __ ldf(FloatRegisterImpl::D, original_key, 16, F52);
4502 __ ldf(FloatRegisterImpl::D, original_key, 24, F54);
4503 __ aes_dround23(F54, F0, F2, F4);
4504 __ aes_dround01(F52, F0, F2, F6);
4505 __ aes_dround23(F54, F60, F62, F58);
4506 __ aes_dround01(F52, F60, F62, F56);
4507 __ aes_dround23_l(F50, F6, F4, F2);
4508 __ aes_dround01_l(F48, F6, F4, F0);
4509 __ aes_dround23_l(F50, F56, F58, F62);
4510 __ aes_dround01_l(F48, F56, F58, F60);
4511 // re-init F48:F54 with their original values
4512 __ movxtod(G2,F54);
4513 __ movxtod(G3,F52);
4514 __ movxtod(G6,F50);
4515 __ movxtod(G1,F48);
4516
4517 __ movxtod(L0,F6);
4518 __ movxtod(L1,F4);
4519 __ fxor(FloatRegisterImpl::D, F6, F0, F0);
4520 __ fxor(FloatRegisterImpl::D, F4, F2, F2);
4521
4522 __ movxtod(G4,F56);
4523 __ movxtod(G5,F58);
4524 __ mov(L4,L0);
4525 __ mov(L5,L1);
4526 __ fxor(FloatRegisterImpl::D, F56, F60, F60);
4527 __ fxor(FloatRegisterImpl::D, F58, F62, F62);
4528
4529 // check for 8-byte alignment since dest byte array may have arbitrary alignment if offset mod 8 is non-zero
4530 __ andcc(to, 7, G1);
4531 __ br(Assembler::notZero, true, Assembler::pn, L_store_misaligned_output_next2_blocks256);
4532 __ delayed()->edge8n(to, G0, G2);
4533
4534 // aligned case: store output into the destination array
4535 __ stf(FloatRegisterImpl::D, F0, to, 0);
4536 __ stf(FloatRegisterImpl::D, F2, to, 8);
4537 __ stf(FloatRegisterImpl::D, F60, to, 16);
4538 __ stf(FloatRegisterImpl::D, F62, to, 24);
4539 __ ba_short(L_check_decrypt_loop_end256);
4540
4541 __ BIND(L_store_misaligned_output_next2_blocks256);
4542 __ mov(8, G4);
4543 __ sub(G4, G1, G4);
4544 __ alignaddr(G4, G0, G4);
4545 __ faligndata(F0, F2, F56); // F56 can be clobbered
4546 __ faligndata(F2, F60, F2);
4547 __ faligndata(F60, F62, F60);
4548 __ faligndata(F62, F0, F0);
4549 __ mov(to, G1);
4550 __ and3(to, -8, to);
4551 __ stpartialf(to, G2, F0, Assembler::ASI_PST8_PRIMARY);
4552 __ stf(FloatRegisterImpl::D, F56, to, 8);
4553 __ stf(FloatRegisterImpl::D, F2, to, 16);
4554 __ stf(FloatRegisterImpl::D, F60, to, 24);
4555 __ add(to, 32, to);
4556 __ orn(G0, G2, G2);
4557 __ stpartialf(to, G2, F0, Assembler::ASI_PST8_PRIMARY);
4558 __ mov(G1, to);
4559
4560 __ BIND(L_check_decrypt_loop_end256);
4561 __ add(from, 32, from);
4562 __ add(to, 32, to);
4563 __ subcc(len_reg, 32, len_reg);
4564 __ br(Assembler::notEqual, false, Assembler::pt, L_dec_next2_blocks256);
4565 __ delayed()->nop();
4566
4567 __ BIND(L_cbcdec_end);
4568 // re-init intial vector for next block, 8-byte alignment is guaranteed
4569 __ stx(L0, rvec, 0);
4570 __ stx(L1, rvec, 8);
4571 __ mov(L7, I0);
4572 __ ret();
4573 __ delayed()->restore();
4574
4575 return start;
4576 }
4577
3307 void generate_initial() { 4578 void generate_initial() {
3308 // Generates all stubs and initializes the entry points 4579 // Generates all stubs and initializes the entry points
3309 4580
3310 //------------------------------------------------------------------------------------------------------------------------ 4581 //------------------------------------------------------------------------------------------------------------------------
3311 // entry points that exist in all platforms 4582 // entry points that exist in all platforms
3366 &StubRoutines::_safefetch32_fault_pc, 4637 &StubRoutines::_safefetch32_fault_pc,
3367 &StubRoutines::_safefetch32_continuation_pc); 4638 &StubRoutines::_safefetch32_continuation_pc);
3368 generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry, 4639 generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
3369 &StubRoutines::_safefetchN_fault_pc, 4640 &StubRoutines::_safefetchN_fault_pc,
3370 &StubRoutines::_safefetchN_continuation_pc); 4641 &StubRoutines::_safefetchN_continuation_pc);
4642
4643 // generate AES intrinsics code
4644 if (UseAESIntrinsics) {
4645 StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
4646 StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
4647 StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
4648 StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel();
4649 }
3371 } 4650 }
3372 4651
3373 4652
3374 public: 4653 public:
3375 StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) { 4654 StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {