Mercurial > hg > truffle
comparison src/cpu/sparc/vm/stubGenerator_sparc.cpp @ 18041:52b4284cb496
Merge with jdk8u20-b26
author | Gilles Duboscq <duboscq@ssw.jku.at> |
---|---|
date | Wed, 15 Oct 2014 16:02:50 +0200 |
parents | 89152779163c 0342d80559e0 |
children | 7848fc12602b |
comparison
equal
deleted
inserted
replaced
17606:45d7b2c7029d | 18041:52b4284cb496 |
---|---|
1 /* | 1 /* |
2 * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. | 2 * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. |
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 * | 4 * |
5 * This code is free software; you can redistribute it and/or modify it | 5 * This code is free software; you can redistribute it and/or modify it |
6 * under the terms of the GNU General Public License version 2 only, as | 6 * under the terms of the GNU General Public License version 2 only, as |
7 * published by the Free Software Foundation. | 7 * published by the Free Software Foundation. |
81 | 81 |
82 class StubGenerator: public StubCodeGenerator { | 82 class StubGenerator: public StubCodeGenerator { |
83 private: | 83 private: |
84 | 84 |
85 #ifdef PRODUCT | 85 #ifdef PRODUCT |
86 #define inc_counter_np(a,b,c) (0) | 86 #define inc_counter_np(a,b,c) |
87 #else | 87 #else |
88 #define inc_counter_np(counter, t1, t2) \ | 88 #define inc_counter_np(counter, t1, t2) \ |
89 BLOCK_COMMENT("inc_counter " #counter); \ | 89 BLOCK_COMMENT("inc_counter " #counter); \ |
90 __ inc_counter(&counter, t1, t2); | 90 __ inc_counter(&counter, t1, t2); |
91 #endif | 91 #endif |
1053 // | 1053 // |
1054 typedef void (StubGenerator::*CopyLoopFunc)(Register from, Register to, Register count, int count_dec, | 1054 typedef void (StubGenerator::*CopyLoopFunc)(Register from, Register to, Register count, int count_dec, |
1055 Label& L_loop, bool use_prefetch, bool use_bis); | 1055 Label& L_loop, bool use_prefetch, bool use_bis); |
1056 | 1056 |
1057 void disjoint_copy_core(Register from, Register to, Register count, int log2_elem_size, | 1057 void disjoint_copy_core(Register from, Register to, Register count, int log2_elem_size, |
1058 int iter_size, CopyLoopFunc copy_loop_func) { | 1058 int iter_size, StubGenerator::CopyLoopFunc copy_loop_func) { |
1059 Label L_copy; | 1059 Label L_copy; |
1060 | 1060 |
1061 assert(log2_elem_size <= 3, "the following code should be changed"); | 1061 assert(log2_elem_size <= 3, "the following code should be changed"); |
1062 int count_dec = 16>>log2_elem_size; | 1062 int count_dec = 16>>log2_elem_size; |
1063 | 1063 |
1204 __ andn(from, 7, from); // Align address | 1204 __ andn(from, 7, from); // Align address |
1205 __ ldx(from, 0, O3); | 1205 __ ldx(from, 0, O3); |
1206 __ inc(from, 8); | 1206 __ inc(from, 8); |
1207 __ sllx(O3, left_shift, O3); | 1207 __ sllx(O3, left_shift, O3); |
1208 | 1208 |
1209 disjoint_copy_core(from, to, count, log2_elem_size, 16, copy_16_bytes_shift_loop); | 1209 disjoint_copy_core(from, to, count, log2_elem_size, 16, &StubGenerator::copy_16_bytes_shift_loop); |
1210 | 1210 |
1211 __ inccc(count, count_dec>>1 ); // + 8 bytes | 1211 __ inccc(count, count_dec>>1 ); // + 8 bytes |
1212 __ brx(Assembler::negative, true, Assembler::pn, L_copy_last_bytes); | 1212 __ brx(Assembler::negative, true, Assembler::pn, L_copy_last_bytes); |
1213 __ delayed()->inc(count, count_dec>>1); // restore 'count' | 1213 __ delayed()->inc(count, count_dec>>1); // restore 'count' |
1214 | 1214 |
2083 | 2083 |
2084 // copy with shift 4 elements (16 bytes) at a time | 2084 // copy with shift 4 elements (16 bytes) at a time |
2085 __ dec(count, 4); // The cmp at the beginning guaranty count >= 4 | 2085 __ dec(count, 4); // The cmp at the beginning guaranty count >= 4 |
2086 __ sllx(O3, 32, O3); | 2086 __ sllx(O3, 32, O3); |
2087 | 2087 |
2088 disjoint_copy_core(from, to, count, 2, 16, copy_16_bytes_loop); | 2088 disjoint_copy_core(from, to, count, 2, 16, &StubGenerator::copy_16_bytes_loop); |
2089 | 2089 |
2090 __ br(Assembler::always, false, Assembler::pt, L_copy_4_bytes); | 2090 __ br(Assembler::always, false, Assembler::pt, L_copy_4_bytes); |
2091 __ delayed()->inc(count, 4); // restore 'count' | 2091 __ delayed()->inc(count, 4); // restore 'count' |
2092 | 2092 |
2093 __ BIND(L_aligned_copy); | 2093 __ BIND(L_aligned_copy); |
2364 // Now we can use O4(offset0), O5(offset8) as temps | 2364 // Now we can use O4(offset0), O5(offset8) as temps |
2365 __ mov(O3, count); | 2365 __ mov(O3, count); |
2366 // count >= 0 (original count - 8) | 2366 // count >= 0 (original count - 8) |
2367 __ mov(from, from64); | 2367 __ mov(from, from64); |
2368 | 2368 |
2369 disjoint_copy_core(from64, to64, count, 3, 64, copy_64_bytes_loop); | 2369 disjoint_copy_core(from64, to64, count, 3, 64, &StubGenerator::copy_64_bytes_loop); |
2370 | 2370 |
2371 // Restore O4(offset0), O5(offset8) | 2371 // Restore O4(offset0), O5(offset8) |
2372 __ sub(from64, from, offset0); | 2372 __ sub(from64, from, offset0); |
2373 __ inccc(count, 6); // restore count | 2373 __ inccc(count, 6); // restore count |
2374 __ brx(Assembler::negative, false, Assembler::pn, L_copy_8_bytes ); | 2374 __ brx(Assembler::negative, false, Assembler::pn, L_copy_8_bytes ); |
3302 if (UseBlockZeroing) { | 3302 if (UseBlockZeroing) { |
3303 StubRoutines::_zero_aligned_words = generate_zero_aligned_words("zero_aligned_words"); | 3303 StubRoutines::_zero_aligned_words = generate_zero_aligned_words("zero_aligned_words"); |
3304 } | 3304 } |
3305 } | 3305 } |
3306 | 3306 |
3307 address generate_aescrypt_encryptBlock() { | |
3308 // required since we read expanded key 'int' array starting first element without alignment considerations | |
3309 assert((arrayOopDesc::base_offset_in_bytes(T_INT) & 7) == 0, | |
3310 "the following code assumes that first element of an int array is aligned to 8 bytes"); | |
3311 __ align(CodeEntryAlignment); | |
3312 StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock"); | |
3313 Label L_load_misaligned_input, L_load_expanded_key, L_doLast128bit, L_storeOutput, L_store_misaligned_output; | |
3314 address start = __ pc(); | |
3315 Register from = O0; // source byte array | |
3316 Register to = O1; // destination byte array | |
3317 Register key = O2; // expanded key array | |
3318 const Register keylen = O4; //reg for storing expanded key array length | |
3319 | |
3320 // read expanded key length | |
3321 __ ldsw(Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)), keylen, 0); | |
3322 | |
3323 // Method to address arbitrary alignment for load instructions: | |
3324 // Check last 3 bits of 'from' address to see if it is aligned to 8-byte boundary | |
3325 // If zero/aligned then continue with double FP load instructions | |
3326 // If not zero/mis-aligned then alignaddr will set GSR.align with number of bytes to skip during faligndata | |
3327 // alignaddr will also convert arbitrary aligned 'from' address to nearest 8-byte aligned address | |
3328 // load 3 * 8-byte components (to read 16 bytes input) in 3 different FP regs starting at this aligned address | |
3329 // faligndata will then extract (based on GSR.align value) the appropriate 8 bytes from the 2 source regs | |
3330 | |
3331 // check for 8-byte alignment since source byte array may have an arbitrary alignment if offset mod 8 is non-zero | |
3332 __ andcc(from, 7, G0); | |
3333 __ br(Assembler::notZero, true, Assembler::pn, L_load_misaligned_input); | |
3334 __ delayed()->alignaddr(from, G0, from); | |
3335 | |
3336 // aligned case: load input into F54-F56 | |
3337 __ ldf(FloatRegisterImpl::D, from, 0, F54); | |
3338 __ ldf(FloatRegisterImpl::D, from, 8, F56); | |
3339 __ ba_short(L_load_expanded_key); | |
3340 | |
3341 __ BIND(L_load_misaligned_input); | |
3342 __ ldf(FloatRegisterImpl::D, from, 0, F54); | |
3343 __ ldf(FloatRegisterImpl::D, from, 8, F56); | |
3344 __ ldf(FloatRegisterImpl::D, from, 16, F58); | |
3345 __ faligndata(F54, F56, F54); | |
3346 __ faligndata(F56, F58, F56); | |
3347 | |
3348 __ BIND(L_load_expanded_key); | |
3349 // Since we load expanded key buffers starting first element, 8-byte alignment is guaranteed | |
3350 for ( int i = 0; i <= 38; i += 2 ) { | |
3351 __ ldf(FloatRegisterImpl::D, key, i*4, as_FloatRegister(i)); | |
3352 } | |
3353 | |
3354 // perform cipher transformation | |
3355 __ fxor(FloatRegisterImpl::D, F0, F54, F54); | |
3356 __ fxor(FloatRegisterImpl::D, F2, F56, F56); | |
3357 // rounds 1 through 8 | |
3358 for ( int i = 4; i <= 28; i += 8 ) { | |
3359 __ aes_eround01(as_FloatRegister(i), F54, F56, F58); | |
3360 __ aes_eround23(as_FloatRegister(i+2), F54, F56, F60); | |
3361 __ aes_eround01(as_FloatRegister(i+4), F58, F60, F54); | |
3362 __ aes_eround23(as_FloatRegister(i+6), F58, F60, F56); | |
3363 } | |
3364 __ aes_eround01(F36, F54, F56, F58); //round 9 | |
3365 __ aes_eround23(F38, F54, F56, F60); | |
3366 | |
3367 // 128-bit original key size | |
3368 __ cmp_and_brx_short(keylen, 44, Assembler::equal, Assembler::pt, L_doLast128bit); | |
3369 | |
3370 for ( int i = 40; i <= 50; i += 2 ) { | |
3371 __ ldf(FloatRegisterImpl::D, key, i*4, as_FloatRegister(i) ); | |
3372 } | |
3373 __ aes_eround01(F40, F58, F60, F54); //round 10 | |
3374 __ aes_eround23(F42, F58, F60, F56); | |
3375 __ aes_eround01(F44, F54, F56, F58); //round 11 | |
3376 __ aes_eround23(F46, F54, F56, F60); | |
3377 | |
3378 // 192-bit original key size | |
3379 __ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pt, L_storeOutput); | |
3380 | |
3381 __ ldf(FloatRegisterImpl::D, key, 208, F52); | |
3382 __ aes_eround01(F48, F58, F60, F54); //round 12 | |
3383 __ aes_eround23(F50, F58, F60, F56); | |
3384 __ ldf(FloatRegisterImpl::D, key, 216, F46); | |
3385 __ ldf(FloatRegisterImpl::D, key, 224, F48); | |
3386 __ ldf(FloatRegisterImpl::D, key, 232, F50); | |
3387 __ aes_eround01(F52, F54, F56, F58); //round 13 | |
3388 __ aes_eround23(F46, F54, F56, F60); | |
3389 __ ba_short(L_storeOutput); | |
3390 | |
3391 __ BIND(L_doLast128bit); | |
3392 __ ldf(FloatRegisterImpl::D, key, 160, F48); | |
3393 __ ldf(FloatRegisterImpl::D, key, 168, F50); | |
3394 | |
3395 __ BIND(L_storeOutput); | |
3396 // perform last round of encryption common for all key sizes | |
3397 __ aes_eround01_l(F48, F58, F60, F54); //last round | |
3398 __ aes_eround23_l(F50, F58, F60, F56); | |
3399 | |
3400 // Method to address arbitrary alignment for store instructions: | |
3401 // Check last 3 bits of 'dest' address to see if it is aligned to 8-byte boundary | |
3402 // If zero/aligned then continue with double FP store instructions | |
3403 // If not zero/mis-aligned then edge8n will generate edge mask in result reg (O3 in below case) | |
3404 // Example: If dest address is 0x07 and nearest 8-byte aligned address is 0x00 then edge mask will be 00000001 | |
3405 // Compute (8-n) where n is # of bytes skipped by partial store(stpartialf) inst from edge mask, n=7 in this case | |
3406 // We get the value of n from the andcc that checks 'dest' alignment. n is available in O5 in below case. | |
3407 // Set GSR.align to (8-n) using alignaddr | |
3408 // Circular byte shift store values by n places so that the original bytes are at correct position for stpartialf | |
3409 // Set the arbitrarily aligned 'dest' address to nearest 8-byte aligned address | |
3410 // Store (partial) the original first (8-n) bytes starting at the original 'dest' address | |
3411 // Negate the edge mask so that the subsequent stpartialf can store the original (8-n-1)th through 8th bytes at appropriate address | |
3412 // We need to execute this process for both the 8-byte result values | |
3413 | |
3414 // check for 8-byte alignment since dest byte array may have arbitrary alignment if offset mod 8 is non-zero | |
3415 __ andcc(to, 7, O5); | |
3416 __ br(Assembler::notZero, true, Assembler::pn, L_store_misaligned_output); | |
3417 __ delayed()->edge8n(to, G0, O3); | |
3418 | |
3419 // aligned case: store output into the destination array | |
3420 __ stf(FloatRegisterImpl::D, F54, to, 0); | |
3421 __ retl(); | |
3422 __ delayed()->stf(FloatRegisterImpl::D, F56, to, 8); | |
3423 | |
3424 __ BIND(L_store_misaligned_output); | |
3425 __ add(to, 8, O4); | |
3426 __ mov(8, O2); | |
3427 __ sub(O2, O5, O2); | |
3428 __ alignaddr(O2, G0, O2); | |
3429 __ faligndata(F54, F54, F54); | |
3430 __ faligndata(F56, F56, F56); | |
3431 __ and3(to, -8, to); | |
3432 __ and3(O4, -8, O4); | |
3433 __ stpartialf(to, O3, F54, Assembler::ASI_PST8_PRIMARY); | |
3434 __ stpartialf(O4, O3, F56, Assembler::ASI_PST8_PRIMARY); | |
3435 __ add(to, 8, to); | |
3436 __ add(O4, 8, O4); | |
3437 __ orn(G0, O3, O3); | |
3438 __ stpartialf(to, O3, F54, Assembler::ASI_PST8_PRIMARY); | |
3439 __ retl(); | |
3440 __ delayed()->stpartialf(O4, O3, F56, Assembler::ASI_PST8_PRIMARY); | |
3441 | |
3442 return start; | |
3443 } | |
3444 | |
3445 address generate_aescrypt_decryptBlock() { | |
3446 assert((arrayOopDesc::base_offset_in_bytes(T_INT) & 7) == 0, | |
3447 "the following code assumes that first element of an int array is aligned to 8 bytes"); | |
3448 // required since we read original key 'byte' array as well in the decryption stubs | |
3449 assert((arrayOopDesc::base_offset_in_bytes(T_BYTE) & 7) == 0, | |
3450 "the following code assumes that first element of a byte array is aligned to 8 bytes"); | |
3451 __ align(CodeEntryAlignment); | |
3452 StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock"); | |
3453 address start = __ pc(); | |
3454 Label L_load_misaligned_input, L_load_original_key, L_expand192bit, L_expand256bit, L_reload_misaligned_input; | |
3455 Label L_256bit_transform, L_common_transform, L_store_misaligned_output; | |
3456 Register from = O0; // source byte array | |
3457 Register to = O1; // destination byte array | |
3458 Register key = O2; // expanded key array | |
3459 Register original_key = O3; // original key array only required during decryption | |
3460 const Register keylen = O4; // reg for storing expanded key array length | |
3461 | |
3462 // read expanded key array length | |
3463 __ ldsw(Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)), keylen, 0); | |
3464 | |
3465 // save 'from' since we may need to recheck alignment in case of 256-bit decryption | |
3466 __ mov(from, G1); | |
3467 | |
3468 // check for 8-byte alignment since source byte array may have an arbitrary alignment if offset mod 8 is non-zero | |
3469 __ andcc(from, 7, G0); | |
3470 __ br(Assembler::notZero, true, Assembler::pn, L_load_misaligned_input); | |
3471 __ delayed()->alignaddr(from, G0, from); | |
3472 | |
3473 // aligned case: load input into F52-F54 | |
3474 __ ldf(FloatRegisterImpl::D, from, 0, F52); | |
3475 __ ldf(FloatRegisterImpl::D, from, 8, F54); | |
3476 __ ba_short(L_load_original_key); | |
3477 | |
3478 __ BIND(L_load_misaligned_input); | |
3479 __ ldf(FloatRegisterImpl::D, from, 0, F52); | |
3480 __ ldf(FloatRegisterImpl::D, from, 8, F54); | |
3481 __ ldf(FloatRegisterImpl::D, from, 16, F56); | |
3482 __ faligndata(F52, F54, F52); | |
3483 __ faligndata(F54, F56, F54); | |
3484 | |
3485 __ BIND(L_load_original_key); | |
3486 // load original key from SunJCE expanded decryption key | |
3487 // Since we load original key buffer starting first element, 8-byte alignment is guaranteed | |
3488 for ( int i = 0; i <= 3; i++ ) { | |
3489 __ ldf(FloatRegisterImpl::S, original_key, i*4, as_FloatRegister(i)); | |
3490 } | |
3491 | |
3492 // 256-bit original key size | |
3493 __ cmp_and_brx_short(keylen, 60, Assembler::equal, Assembler::pn, L_expand256bit); | |
3494 | |
3495 // 192-bit original key size | |
3496 __ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pn, L_expand192bit); | |
3497 | |
3498 // 128-bit original key size | |
3499 // perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions | |
3500 for ( int i = 0; i <= 36; i += 4 ) { | |
3501 __ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+2), i/4, as_FloatRegister(i+4)); | |
3502 __ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+4), as_FloatRegister(i+6)); | |
3503 } | |
3504 | |
3505 // perform 128-bit key specific inverse cipher transformation | |
3506 __ fxor(FloatRegisterImpl::D, F42, F54, F54); | |
3507 __ fxor(FloatRegisterImpl::D, F40, F52, F52); | |
3508 __ ba_short(L_common_transform); | |
3509 | |
3510 __ BIND(L_expand192bit); | |
3511 | |
3512 // start loading rest of the 192-bit key | |
3513 __ ldf(FloatRegisterImpl::S, original_key, 16, F4); | |
3514 __ ldf(FloatRegisterImpl::S, original_key, 20, F5); | |
3515 | |
3516 // perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions | |
3517 for ( int i = 0; i <= 36; i += 6 ) { | |
3518 __ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+4), i/6, as_FloatRegister(i+6)); | |
3519 __ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+6), as_FloatRegister(i+8)); | |
3520 __ aes_kexpand2(as_FloatRegister(i+4), as_FloatRegister(i+8), as_FloatRegister(i+10)); | |
3521 } | |
3522 __ aes_kexpand1(F42, F46, 7, F48); | |
3523 __ aes_kexpand2(F44, F48, F50); | |
3524 | |
3525 // perform 192-bit key specific inverse cipher transformation | |
3526 __ fxor(FloatRegisterImpl::D, F50, F54, F54); | |
3527 __ fxor(FloatRegisterImpl::D, F48, F52, F52); | |
3528 __ aes_dround23(F46, F52, F54, F58); | |
3529 __ aes_dround01(F44, F52, F54, F56); | |
3530 __ aes_dround23(F42, F56, F58, F54); | |
3531 __ aes_dround01(F40, F56, F58, F52); | |
3532 __ ba_short(L_common_transform); | |
3533 | |
3534 __ BIND(L_expand256bit); | |
3535 | |
3536 // load rest of the 256-bit key | |
3537 for ( int i = 4; i <= 7; i++ ) { | |
3538 __ ldf(FloatRegisterImpl::S, original_key, i*4, as_FloatRegister(i)); | |
3539 } | |
3540 | |
3541 // perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions | |
3542 for ( int i = 0; i <= 40; i += 8 ) { | |
3543 __ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+6), i/8, as_FloatRegister(i+8)); | |
3544 __ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+8), as_FloatRegister(i+10)); | |
3545 __ aes_kexpand0(as_FloatRegister(i+4), as_FloatRegister(i+10), as_FloatRegister(i+12)); | |
3546 __ aes_kexpand2(as_FloatRegister(i+6), as_FloatRegister(i+12), as_FloatRegister(i+14)); | |
3547 } | |
3548 __ aes_kexpand1(F48, F54, 6, F56); | |
3549 __ aes_kexpand2(F50, F56, F58); | |
3550 | |
3551 for ( int i = 0; i <= 6; i += 2 ) { | |
3552 __ fsrc2(FloatRegisterImpl::D, as_FloatRegister(58-i), as_FloatRegister(i)); | |
3553 } | |
3554 | |
3555 // reload original 'from' address | |
3556 __ mov(G1, from); | |
3557 | |
3558 // re-check 8-byte alignment | |
3559 __ andcc(from, 7, G0); | |
3560 __ br(Assembler::notZero, true, Assembler::pn, L_reload_misaligned_input); | |
3561 __ delayed()->alignaddr(from, G0, from); | |
3562 | |
3563 // aligned case: load input into F52-F54 | |
3564 __ ldf(FloatRegisterImpl::D, from, 0, F52); | |
3565 __ ldf(FloatRegisterImpl::D, from, 8, F54); | |
3566 __ ba_short(L_256bit_transform); | |
3567 | |
3568 __ BIND(L_reload_misaligned_input); | |
3569 __ ldf(FloatRegisterImpl::D, from, 0, F52); | |
3570 __ ldf(FloatRegisterImpl::D, from, 8, F54); | |
3571 __ ldf(FloatRegisterImpl::D, from, 16, F56); | |
3572 __ faligndata(F52, F54, F52); | |
3573 __ faligndata(F54, F56, F54); | |
3574 | |
3575 // perform 256-bit key specific inverse cipher transformation | |
3576 __ BIND(L_256bit_transform); | |
3577 __ fxor(FloatRegisterImpl::D, F0, F54, F54); | |
3578 __ fxor(FloatRegisterImpl::D, F2, F52, F52); | |
3579 __ aes_dround23(F4, F52, F54, F58); | |
3580 __ aes_dround01(F6, F52, F54, F56); | |
3581 __ aes_dround23(F50, F56, F58, F54); | |
3582 __ aes_dround01(F48, F56, F58, F52); | |
3583 __ aes_dround23(F46, F52, F54, F58); | |
3584 __ aes_dround01(F44, F52, F54, F56); | |
3585 __ aes_dround23(F42, F56, F58, F54); | |
3586 __ aes_dround01(F40, F56, F58, F52); | |
3587 | |
3588 for ( int i = 0; i <= 7; i++ ) { | |
3589 __ ldf(FloatRegisterImpl::S, original_key, i*4, as_FloatRegister(i)); | |
3590 } | |
3591 | |
3592 // perform inverse cipher transformations common for all key sizes | |
3593 __ BIND(L_common_transform); | |
3594 for ( int i = 38; i >= 6; i -= 8 ) { | |
3595 __ aes_dround23(as_FloatRegister(i), F52, F54, F58); | |
3596 __ aes_dround01(as_FloatRegister(i-2), F52, F54, F56); | |
3597 if ( i != 6) { | |
3598 __ aes_dround23(as_FloatRegister(i-4), F56, F58, F54); | |
3599 __ aes_dround01(as_FloatRegister(i-6), F56, F58, F52); | |
3600 } else { | |
3601 __ aes_dround23_l(as_FloatRegister(i-4), F56, F58, F54); | |
3602 __ aes_dround01_l(as_FloatRegister(i-6), F56, F58, F52); | |
3603 } | |
3604 } | |
3605 | |
3606 // check for 8-byte alignment since dest byte array may have arbitrary alignment if offset mod 8 is non-zero | |
3607 __ andcc(to, 7, O5); | |
3608 __ br(Assembler::notZero, true, Assembler::pn, L_store_misaligned_output); | |
3609 __ delayed()->edge8n(to, G0, O3); | |
3610 | |
3611 // aligned case: store output into the destination array | |
3612 __ stf(FloatRegisterImpl::D, F52, to, 0); | |
3613 __ retl(); | |
3614 __ delayed()->stf(FloatRegisterImpl::D, F54, to, 8); | |
3615 | |
3616 __ BIND(L_store_misaligned_output); | |
3617 __ add(to, 8, O4); | |
3618 __ mov(8, O2); | |
3619 __ sub(O2, O5, O2); | |
3620 __ alignaddr(O2, G0, O2); | |
3621 __ faligndata(F52, F52, F52); | |
3622 __ faligndata(F54, F54, F54); | |
3623 __ and3(to, -8, to); | |
3624 __ and3(O4, -8, O4); | |
3625 __ stpartialf(to, O3, F52, Assembler::ASI_PST8_PRIMARY); | |
3626 __ stpartialf(O4, O3, F54, Assembler::ASI_PST8_PRIMARY); | |
3627 __ add(to, 8, to); | |
3628 __ add(O4, 8, O4); | |
3629 __ orn(G0, O3, O3); | |
3630 __ stpartialf(to, O3, F52, Assembler::ASI_PST8_PRIMARY); | |
3631 __ retl(); | |
3632 __ delayed()->stpartialf(O4, O3, F54, Assembler::ASI_PST8_PRIMARY); | |
3633 | |
3634 return start; | |
3635 } | |
3636 | |
3637 address generate_cipherBlockChaining_encryptAESCrypt() { | |
3638 assert((arrayOopDesc::base_offset_in_bytes(T_INT) & 7) == 0, | |
3639 "the following code assumes that first element of an int array is aligned to 8 bytes"); | |
3640 assert((arrayOopDesc::base_offset_in_bytes(T_BYTE) & 7) == 0, | |
3641 "the following code assumes that first element of a byte array is aligned to 8 bytes"); | |
3642 __ align(CodeEntryAlignment); | |
3643 StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt"); | |
3644 Label L_cbcenc128, L_load_misaligned_input_128bit, L_128bit_transform, L_store_misaligned_output_128bit; | |
3645 Label L_check_loop_end_128bit, L_cbcenc192, L_load_misaligned_input_192bit, L_192bit_transform; | |
3646 Label L_store_misaligned_output_192bit, L_check_loop_end_192bit, L_cbcenc256, L_load_misaligned_input_256bit; | |
3647 Label L_256bit_transform, L_store_misaligned_output_256bit, L_check_loop_end_256bit; | |
3648 address start = __ pc(); | |
3649 Register from = I0; // source byte array | |
3650 Register to = I1; // destination byte array | |
3651 Register key = I2; // expanded key array | |
3652 Register rvec = I3; // init vector | |
3653 const Register len_reg = I4; // cipher length | |
3654 const Register keylen = I5; // reg for storing expanded key array length | |
3655 | |
3656 __ save_frame(0); | |
3657 // save cipher len to return in the end | |
3658 __ mov(len_reg, L0); | |
3659 | |
3660 // read expanded key length | |
3661 __ ldsw(Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)), keylen, 0); | |
3662 | |
3663 // load initial vector, 8-byte alignment is guranteed | |
3664 __ ldf(FloatRegisterImpl::D, rvec, 0, F60); | |
3665 __ ldf(FloatRegisterImpl::D, rvec, 8, F62); | |
3666 // load key, 8-byte alignment is guranteed | |
3667 __ ldx(key,0,G1); | |
3668 __ ldx(key,8,G5); | |
3669 | |
3670 // start loading expanded key, 8-byte alignment is guranteed | |
3671 for ( int i = 0, j = 16; i <= 38; i += 2, j += 8 ) { | |
3672 __ ldf(FloatRegisterImpl::D, key, j, as_FloatRegister(i)); | |
3673 } | |
3674 | |
3675 // 128-bit original key size | |
3676 __ cmp_and_brx_short(keylen, 44, Assembler::equal, Assembler::pt, L_cbcenc128); | |
3677 | |
3678 for ( int i = 40, j = 176; i <= 46; i += 2, j += 8 ) { | |
3679 __ ldf(FloatRegisterImpl::D, key, j, as_FloatRegister(i)); | |
3680 } | |
3681 | |
3682 // 192-bit original key size | |
3683 __ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pt, L_cbcenc192); | |
3684 | |
3685 for ( int i = 48, j = 208; i <= 54; i += 2, j += 8 ) { | |
3686 __ ldf(FloatRegisterImpl::D, key, j, as_FloatRegister(i)); | |
3687 } | |
3688 | |
3689 // 256-bit original key size | |
3690 __ ba_short(L_cbcenc256); | |
3691 | |
3692 __ align(OptoLoopAlignment); | |
3693 __ BIND(L_cbcenc128); | |
3694 // check for 8-byte alignment since source byte array may have an arbitrary alignment if offset mod 8 is non-zero | |
3695 __ andcc(from, 7, G0); | |
3696 __ br(Assembler::notZero, true, Assembler::pn, L_load_misaligned_input_128bit); | |
3697 __ delayed()->mov(from, L1); // save original 'from' address before alignaddr | |
3698 | |
3699 // aligned case: load input into G3 and G4 | |
3700 __ ldx(from,0,G3); | |
3701 __ ldx(from,8,G4); | |
3702 __ ba_short(L_128bit_transform); | |
3703 | |
3704 __ BIND(L_load_misaligned_input_128bit); | |
3705 // can clobber F48, F50 and F52 as they are not used in 128 and 192-bit key encryption | |
3706 __ alignaddr(from, G0, from); | |
3707 __ ldf(FloatRegisterImpl::D, from, 0, F48); | |
3708 __ ldf(FloatRegisterImpl::D, from, 8, F50); | |
3709 __ ldf(FloatRegisterImpl::D, from, 16, F52); | |
3710 __ faligndata(F48, F50, F48); | |
3711 __ faligndata(F50, F52, F50); | |
3712 __ movdtox(F48, G3); | |
3713 __ movdtox(F50, G4); | |
3714 __ mov(L1, from); | |
3715 | |
3716 __ BIND(L_128bit_transform); | |
3717 __ xor3(G1,G3,G3); | |
3718 __ xor3(G5,G4,G4); | |
3719 __ movxtod(G3,F56); | |
3720 __ movxtod(G4,F58); | |
3721 __ fxor(FloatRegisterImpl::D, F60, F56, F60); | |
3722 __ fxor(FloatRegisterImpl::D, F62, F58, F62); | |
3723 | |
3724 // TEN_EROUNDS | |
3725 for ( int i = 0; i <= 32; i += 8 ) { | |
3726 __ aes_eround01(as_FloatRegister(i), F60, F62, F56); | |
3727 __ aes_eround23(as_FloatRegister(i+2), F60, F62, F58); | |
3728 if (i != 32 ) { | |
3729 __ aes_eround01(as_FloatRegister(i+4), F56, F58, F60); | |
3730 __ aes_eround23(as_FloatRegister(i+6), F56, F58, F62); | |
3731 } else { | |
3732 __ aes_eround01_l(as_FloatRegister(i+4), F56, F58, F60); | |
3733 __ aes_eround23_l(as_FloatRegister(i+6), F56, F58, F62); | |
3734 } | |
3735 } | |
3736 | |
3737 // check for 8-byte alignment since dest byte array may have arbitrary alignment if offset mod 8 is non-zero | |
3738 __ andcc(to, 7, L1); | |
3739 __ br(Assembler::notZero, true, Assembler::pn, L_store_misaligned_output_128bit); | |
3740 __ delayed()->edge8n(to, G0, L2); | |
3741 | |
3742 // aligned case: store output into the destination array | |
3743 __ stf(FloatRegisterImpl::D, F60, to, 0); | |
3744 __ stf(FloatRegisterImpl::D, F62, to, 8); | |
3745 __ ba_short(L_check_loop_end_128bit); | |
3746 | |
3747 __ BIND(L_store_misaligned_output_128bit); | |
3748 __ add(to, 8, L3); | |
3749 __ mov(8, L4); | |
3750 __ sub(L4, L1, L4); | |
3751 __ alignaddr(L4, G0, L4); | |
3752 // save cipher text before circular right shift | |
3753 // as it needs to be stored as iv for next block (see code before next retl) | |
3754 __ movdtox(F60, L6); | |
3755 __ movdtox(F62, L7); | |
3756 __ faligndata(F60, F60, F60); | |
3757 __ faligndata(F62, F62, F62); | |
3758 __ mov(to, L5); | |
3759 __ and3(to, -8, to); | |
3760 __ and3(L3, -8, L3); | |
3761 __ stpartialf(to, L2, F60, Assembler::ASI_PST8_PRIMARY); | |
3762 __ stpartialf(L3, L2, F62, Assembler::ASI_PST8_PRIMARY); | |
3763 __ add(to, 8, to); | |
3764 __ add(L3, 8, L3); | |
3765 __ orn(G0, L2, L2); | |
3766 __ stpartialf(to, L2, F60, Assembler::ASI_PST8_PRIMARY); | |
3767 __ stpartialf(L3, L2, F62, Assembler::ASI_PST8_PRIMARY); | |
3768 __ mov(L5, to); | |
3769 __ movxtod(L6, F60); | |
3770 __ movxtod(L7, F62); | |
3771 | |
3772 __ BIND(L_check_loop_end_128bit); | |
3773 __ add(from, 16, from); | |
3774 __ add(to, 16, to); | |
3775 __ subcc(len_reg, 16, len_reg); | |
3776 __ br(Assembler::notEqual, false, Assembler::pt, L_cbcenc128); | |
3777 __ delayed()->nop(); | |
3778 // re-init intial vector for next block, 8-byte alignment is guaranteed | |
3779 __ stf(FloatRegisterImpl::D, F60, rvec, 0); | |
3780 __ stf(FloatRegisterImpl::D, F62, rvec, 8); | |
3781 __ mov(L0, I0); | |
3782 __ ret(); | |
3783 __ delayed()->restore(); | |
3784 | |
3785 __ align(OptoLoopAlignment); | |
3786 __ BIND(L_cbcenc192); | |
3787 // check for 8-byte alignment since source byte array may have an arbitrary alignment if offset mod 8 is non-zero | |
3788 __ andcc(from, 7, G0); | |
3789 __ br(Assembler::notZero, true, Assembler::pn, L_load_misaligned_input_192bit); | |
3790 __ delayed()->mov(from, L1); // save original 'from' address before alignaddr | |
3791 | |
3792 // aligned case: load input into G3 and G4 | |
3793 __ ldx(from,0,G3); | |
3794 __ ldx(from,8,G4); | |
3795 __ ba_short(L_192bit_transform); | |
3796 | |
3797 __ BIND(L_load_misaligned_input_192bit); | |
3798 // can clobber F48, F50 and F52 as they are not used in 128 and 192-bit key encryption | |
3799 __ alignaddr(from, G0, from); | |
3800 __ ldf(FloatRegisterImpl::D, from, 0, F48); | |
3801 __ ldf(FloatRegisterImpl::D, from, 8, F50); | |
3802 __ ldf(FloatRegisterImpl::D, from, 16, F52); | |
3803 __ faligndata(F48, F50, F48); | |
3804 __ faligndata(F50, F52, F50); | |
3805 __ movdtox(F48, G3); | |
3806 __ movdtox(F50, G4); | |
3807 __ mov(L1, from); | |
3808 | |
3809 __ BIND(L_192bit_transform); | |
3810 __ xor3(G1,G3,G3); | |
3811 __ xor3(G5,G4,G4); | |
3812 __ movxtod(G3,F56); | |
3813 __ movxtod(G4,F58); | |
3814 __ fxor(FloatRegisterImpl::D, F60, F56, F60); | |
3815 __ fxor(FloatRegisterImpl::D, F62, F58, F62); | |
3816 | |
3817 // TWELEVE_EROUNDS | |
3818 for ( int i = 0; i <= 40; i += 8 ) { | |
3819 __ aes_eround01(as_FloatRegister(i), F60, F62, F56); | |
3820 __ aes_eround23(as_FloatRegister(i+2), F60, F62, F58); | |
3821 if (i != 40 ) { | |
3822 __ aes_eround01(as_FloatRegister(i+4), F56, F58, F60); | |
3823 __ aes_eround23(as_FloatRegister(i+6), F56, F58, F62); | |
3824 } else { | |
3825 __ aes_eround01_l(as_FloatRegister(i+4), F56, F58, F60); | |
3826 __ aes_eround23_l(as_FloatRegister(i+6), F56, F58, F62); | |
3827 } | |
3828 } | |
3829 | |
3830 // check for 8-byte alignment since dest byte array may have arbitrary alignment if offset mod 8 is non-zero | |
3831 __ andcc(to, 7, L1); | |
3832 __ br(Assembler::notZero, true, Assembler::pn, L_store_misaligned_output_192bit); | |
3833 __ delayed()->edge8n(to, G0, L2); | |
3834 | |
3835 // aligned case: store output into the destination array | |
3836 __ stf(FloatRegisterImpl::D, F60, to, 0); | |
3837 __ stf(FloatRegisterImpl::D, F62, to, 8); | |
3838 __ ba_short(L_check_loop_end_192bit); | |
3839 | |
3840 __ BIND(L_store_misaligned_output_192bit); | |
3841 __ add(to, 8, L3); | |
3842 __ mov(8, L4); | |
3843 __ sub(L4, L1, L4); | |
3844 __ alignaddr(L4, G0, L4); | |
3845 __ movdtox(F60, L6); | |
3846 __ movdtox(F62, L7); | |
3847 __ faligndata(F60, F60, F60); | |
3848 __ faligndata(F62, F62, F62); | |
3849 __ mov(to, L5); | |
3850 __ and3(to, -8, to); | |
3851 __ and3(L3, -8, L3); | |
3852 __ stpartialf(to, L2, F60, Assembler::ASI_PST8_PRIMARY); | |
3853 __ stpartialf(L3, L2, F62, Assembler::ASI_PST8_PRIMARY); | |
3854 __ add(to, 8, to); | |
3855 __ add(L3, 8, L3); | |
3856 __ orn(G0, L2, L2); | |
3857 __ stpartialf(to, L2, F60, Assembler::ASI_PST8_PRIMARY); | |
3858 __ stpartialf(L3, L2, F62, Assembler::ASI_PST8_PRIMARY); | |
3859 __ mov(L5, to); | |
3860 __ movxtod(L6, F60); | |
3861 __ movxtod(L7, F62); | |
3862 | |
3863 __ BIND(L_check_loop_end_192bit); | |
3864 __ add(from, 16, from); | |
3865 __ subcc(len_reg, 16, len_reg); | |
3866 __ add(to, 16, to); | |
3867 __ br(Assembler::notEqual, false, Assembler::pt, L_cbcenc192); | |
3868 __ delayed()->nop(); | |
3869 // re-init intial vector for next block, 8-byte alignment is guaranteed | |
3870 __ stf(FloatRegisterImpl::D, F60, rvec, 0); | |
3871 __ stf(FloatRegisterImpl::D, F62, rvec, 8); | |
3872 __ mov(L0, I0); | |
3873 __ ret(); | |
3874 __ delayed()->restore(); | |
3875 | |
3876 __ align(OptoLoopAlignment); | |
3877 __ BIND(L_cbcenc256); | |
3878 // check for 8-byte alignment since source byte array may have an arbitrary alignment if offset mod 8 is non-zero | |
3879 __ andcc(from, 7, G0); | |
3880 __ br(Assembler::notZero, true, Assembler::pn, L_load_misaligned_input_256bit); | |
3881 __ delayed()->mov(from, L1); // save original 'from' address before alignaddr | |
3882 | |
3883 // aligned case: load input into G3 and G4 | |
3884 __ ldx(from,0,G3); | |
3885 __ ldx(from,8,G4); | |
3886 __ ba_short(L_256bit_transform); | |
3887 | |
3888 __ BIND(L_load_misaligned_input_256bit); | |
3889 // cannot clobber F48, F50 and F52. F56, F58 can be used though | |
3890 __ alignaddr(from, G0, from); | |
3891 __ movdtox(F60, L2); // save F60 before overwriting | |
3892 __ ldf(FloatRegisterImpl::D, from, 0, F56); | |
3893 __ ldf(FloatRegisterImpl::D, from, 8, F58); | |
3894 __ ldf(FloatRegisterImpl::D, from, 16, F60); | |
3895 __ faligndata(F56, F58, F56); | |
3896 __ faligndata(F58, F60, F58); | |
3897 __ movdtox(F56, G3); | |
3898 __ movdtox(F58, G4); | |
3899 __ mov(L1, from); | |
3900 __ movxtod(L2, F60); | |
3901 | |
3902 __ BIND(L_256bit_transform); | |
3903 __ xor3(G1,G3,G3); | |
3904 __ xor3(G5,G4,G4); | |
3905 __ movxtod(G3,F56); | |
3906 __ movxtod(G4,F58); | |
3907 __ fxor(FloatRegisterImpl::D, F60, F56, F60); | |
3908 __ fxor(FloatRegisterImpl::D, F62, F58, F62); | |
3909 | |
3910 // FOURTEEN_EROUNDS | |
3911 for ( int i = 0; i <= 48; i += 8 ) { | |
3912 __ aes_eround01(as_FloatRegister(i), F60, F62, F56); | |
3913 __ aes_eround23(as_FloatRegister(i+2), F60, F62, F58); | |
3914 if (i != 48 ) { | |
3915 __ aes_eround01(as_FloatRegister(i+4), F56, F58, F60); | |
3916 __ aes_eround23(as_FloatRegister(i+6), F56, F58, F62); | |
3917 } else { | |
3918 __ aes_eround01_l(as_FloatRegister(i+4), F56, F58, F60); | |
3919 __ aes_eround23_l(as_FloatRegister(i+6), F56, F58, F62); | |
3920 } | |
3921 } | |
3922 | |
3923 // check for 8-byte alignment since dest byte array may have arbitrary alignment if offset mod 8 is non-zero | |
3924 __ andcc(to, 7, L1); | |
3925 __ br(Assembler::notZero, true, Assembler::pn, L_store_misaligned_output_256bit); | |
3926 __ delayed()->edge8n(to, G0, L2); | |
3927 | |
3928 // aligned case: store output into the destination array | |
3929 __ stf(FloatRegisterImpl::D, F60, to, 0); | |
3930 __ stf(FloatRegisterImpl::D, F62, to, 8); | |
3931 __ ba_short(L_check_loop_end_256bit); | |
3932 | |
3933 __ BIND(L_store_misaligned_output_256bit); | |
3934 __ add(to, 8, L3); | |
3935 __ mov(8, L4); | |
3936 __ sub(L4, L1, L4); | |
3937 __ alignaddr(L4, G0, L4); | |
3938 __ movdtox(F60, L6); | |
3939 __ movdtox(F62, L7); | |
3940 __ faligndata(F60, F60, F60); | |
3941 __ faligndata(F62, F62, F62); | |
3942 __ mov(to, L5); | |
3943 __ and3(to, -8, to); | |
3944 __ and3(L3, -8, L3); | |
3945 __ stpartialf(to, L2, F60, Assembler::ASI_PST8_PRIMARY); | |
3946 __ stpartialf(L3, L2, F62, Assembler::ASI_PST8_PRIMARY); | |
3947 __ add(to, 8, to); | |
3948 __ add(L3, 8, L3); | |
3949 __ orn(G0, L2, L2); | |
3950 __ stpartialf(to, L2, F60, Assembler::ASI_PST8_PRIMARY); | |
3951 __ stpartialf(L3, L2, F62, Assembler::ASI_PST8_PRIMARY); | |
3952 __ mov(L5, to); | |
3953 __ movxtod(L6, F60); | |
3954 __ movxtod(L7, F62); | |
3955 | |
3956 __ BIND(L_check_loop_end_256bit); | |
3957 __ add(from, 16, from); | |
3958 __ subcc(len_reg, 16, len_reg); | |
3959 __ add(to, 16, to); | |
3960 __ br(Assembler::notEqual, false, Assembler::pt, L_cbcenc256); | |
3961 __ delayed()->nop(); | |
3962 // re-init intial vector for next block, 8-byte alignment is guaranteed | |
3963 __ stf(FloatRegisterImpl::D, F60, rvec, 0); | |
3964 __ stf(FloatRegisterImpl::D, F62, rvec, 8); | |
3965 __ mov(L0, I0); | |
3966 __ ret(); | |
3967 __ delayed()->restore(); | |
3968 | |
3969 return start; | |
3970 } | |
3971 | |
3972 address generate_cipherBlockChaining_decryptAESCrypt_Parallel() { | |
3973 assert((arrayOopDesc::base_offset_in_bytes(T_INT) & 7) == 0, | |
3974 "the following code assumes that first element of an int array is aligned to 8 bytes"); | |
3975 assert((arrayOopDesc::base_offset_in_bytes(T_BYTE) & 7) == 0, | |
3976 "the following code assumes that first element of a byte array is aligned to 8 bytes"); | |
3977 __ align(CodeEntryAlignment); | |
3978 StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt"); | |
3979 Label L_cbcdec_end, L_expand192bit, L_expand256bit, L_dec_first_block_start; | |
3980 Label L_dec_first_block128, L_dec_first_block192, L_dec_next2_blocks128, L_dec_next2_blocks192, L_dec_next2_blocks256; | |
3981 Label L_load_misaligned_input_first_block, L_transform_first_block, L_load_misaligned_next2_blocks128, L_transform_next2_blocks128; | |
3982 Label L_load_misaligned_next2_blocks192, L_transform_next2_blocks192, L_load_misaligned_next2_blocks256, L_transform_next2_blocks256; | |
3983 Label L_store_misaligned_output_first_block, L_check_decrypt_end, L_store_misaligned_output_next2_blocks128; | |
3984 Label L_check_decrypt_loop_end128, L_store_misaligned_output_next2_blocks192, L_check_decrypt_loop_end192; | |
3985 Label L_store_misaligned_output_next2_blocks256, L_check_decrypt_loop_end256; | |
3986 address start = __ pc(); | |
3987 Register from = I0; // source byte array | |
3988 Register to = I1; // destination byte array | |
3989 Register key = I2; // expanded key array | |
3990 Register rvec = I3; // init vector | |
3991 const Register len_reg = I4; // cipher length | |
3992 const Register original_key = I5; // original key array only required during decryption | |
3993 const Register keylen = L6; // reg for storing expanded key array length | |
3994 | |
3995 __ save_frame(0); //args are read from I* registers since we save the frame in the beginning | |
3996 // save cipher len to return in the end | |
3997 __ mov(len_reg, L7); | |
3998 | |
3999 // load original key from SunJCE expanded decryption key | |
4000 // Since we load original key buffer starting first element, 8-byte alignment is guaranteed | |
4001 for ( int i = 0; i <= 3; i++ ) { | |
4002 __ ldf(FloatRegisterImpl::S, original_key, i*4, as_FloatRegister(i)); | |
4003 } | |
4004 | |
4005 // load initial vector, 8-byte alignment is guaranteed | |
4006 __ ldx(rvec,0,L0); | |
4007 __ ldx(rvec,8,L1); | |
4008 | |
4009 // read expanded key array length | |
4010 __ ldsw(Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)), keylen, 0); | |
4011 | |
4012 // 256-bit original key size | |
4013 __ cmp_and_brx_short(keylen, 60, Assembler::equal, Assembler::pn, L_expand256bit); | |
4014 | |
4015 // 192-bit original key size | |
4016 __ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pn, L_expand192bit); | |
4017 | |
4018 // 128-bit original key size | |
4019 // perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions | |
4020 for ( int i = 0; i <= 36; i += 4 ) { | |
4021 __ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+2), i/4, as_FloatRegister(i+4)); | |
4022 __ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+4), as_FloatRegister(i+6)); | |
4023 } | |
4024 | |
4025 // load expanded key[last-1] and key[last] elements | |
4026 __ movdtox(F40,L2); | |
4027 __ movdtox(F42,L3); | |
4028 | |
4029 __ and3(len_reg, 16, L4); | |
4030 __ br_null_short(L4, Assembler::pt, L_dec_next2_blocks128); | |
4031 __ nop(); | |
4032 | |
4033 __ ba_short(L_dec_first_block_start); | |
4034 | |
4035 __ BIND(L_expand192bit); | |
4036 // load rest of the 192-bit key | |
4037 __ ldf(FloatRegisterImpl::S, original_key, 16, F4); | |
4038 __ ldf(FloatRegisterImpl::S, original_key, 20, F5); | |
4039 | |
4040 // perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions | |
4041 for ( int i = 0; i <= 36; i += 6 ) { | |
4042 __ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+4), i/6, as_FloatRegister(i+6)); | |
4043 __ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+6), as_FloatRegister(i+8)); | |
4044 __ aes_kexpand2(as_FloatRegister(i+4), as_FloatRegister(i+8), as_FloatRegister(i+10)); | |
4045 } | |
4046 __ aes_kexpand1(F42, F46, 7, F48); | |
4047 __ aes_kexpand2(F44, F48, F50); | |
4048 | |
4049 // load expanded key[last-1] and key[last] elements | |
4050 __ movdtox(F48,L2); | |
4051 __ movdtox(F50,L3); | |
4052 | |
4053 __ and3(len_reg, 16, L4); | |
4054 __ br_null_short(L4, Assembler::pt, L_dec_next2_blocks192); | |
4055 __ nop(); | |
4056 | |
4057 __ ba_short(L_dec_first_block_start); | |
4058 | |
4059 __ BIND(L_expand256bit); | |
4060 // load rest of the 256-bit key | |
4061 for ( int i = 4; i <= 7; i++ ) { | |
4062 __ ldf(FloatRegisterImpl::S, original_key, i*4, as_FloatRegister(i)); | |
4063 } | |
4064 | |
4065 // perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions | |
4066 for ( int i = 0; i <= 40; i += 8 ) { | |
4067 __ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+6), i/8, as_FloatRegister(i+8)); | |
4068 __ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+8), as_FloatRegister(i+10)); | |
4069 __ aes_kexpand0(as_FloatRegister(i+4), as_FloatRegister(i+10), as_FloatRegister(i+12)); | |
4070 __ aes_kexpand2(as_FloatRegister(i+6), as_FloatRegister(i+12), as_FloatRegister(i+14)); | |
4071 } | |
4072 __ aes_kexpand1(F48, F54, 6, F56); | |
4073 __ aes_kexpand2(F50, F56, F58); | |
4074 | |
4075 // load expanded key[last-1] and key[last] elements | |
4076 __ movdtox(F56,L2); | |
4077 __ movdtox(F58,L3); | |
4078 | |
4079 __ and3(len_reg, 16, L4); | |
4080 __ br_null_short(L4, Assembler::pt, L_dec_next2_blocks256); | |
4081 | |
4082 __ BIND(L_dec_first_block_start); | |
4083 // check for 8-byte alignment since source byte array may have an arbitrary alignment if offset mod 8 is non-zero | |
4084 __ andcc(from, 7, G0); | |
4085 __ br(Assembler::notZero, true, Assembler::pn, L_load_misaligned_input_first_block); | |
4086 __ delayed()->mov(from, G1); // save original 'from' address before alignaddr | |
4087 | |
4088 // aligned case: load input into L4 and L5 | |
4089 __ ldx(from,0,L4); | |
4090 __ ldx(from,8,L5); | |
4091 __ ba_short(L_transform_first_block); | |
4092 | |
4093 __ BIND(L_load_misaligned_input_first_block); | |
4094 __ alignaddr(from, G0, from); | |
4095 // F58, F60, F62 can be clobbered | |
4096 __ ldf(FloatRegisterImpl::D, from, 0, F58); | |
4097 __ ldf(FloatRegisterImpl::D, from, 8, F60); | |
4098 __ ldf(FloatRegisterImpl::D, from, 16, F62); | |
4099 __ faligndata(F58, F60, F58); | |
4100 __ faligndata(F60, F62, F60); | |
4101 __ movdtox(F58, L4); | |
4102 __ movdtox(F60, L5); | |
4103 __ mov(G1, from); | |
4104 | |
4105 __ BIND(L_transform_first_block); | |
4106 __ xor3(L2,L4,G1); | |
4107 __ movxtod(G1,F60); | |
4108 __ xor3(L3,L5,G1); | |
4109 __ movxtod(G1,F62); | |
4110 | |
4111 // 128-bit original key size | |
4112 __ cmp_and_brx_short(keylen, 44, Assembler::equal, Assembler::pn, L_dec_first_block128); | |
4113 | |
4114 // 192-bit original key size | |
4115 __ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pn, L_dec_first_block192); | |
4116 | |
4117 __ aes_dround23(F54, F60, F62, F58); | |
4118 __ aes_dround01(F52, F60, F62, F56); | |
4119 __ aes_dround23(F50, F56, F58, F62); | |
4120 __ aes_dround01(F48, F56, F58, F60); | |
4121 | |
4122 __ BIND(L_dec_first_block192); | |
4123 __ aes_dround23(F46, F60, F62, F58); | |
4124 __ aes_dround01(F44, F60, F62, F56); | |
4125 __ aes_dround23(F42, F56, F58, F62); | |
4126 __ aes_dround01(F40, F56, F58, F60); | |
4127 | |
4128 __ BIND(L_dec_first_block128); | |
4129 for ( int i = 38; i >= 6; i -= 8 ) { | |
4130 __ aes_dround23(as_FloatRegister(i), F60, F62, F58); | |
4131 __ aes_dround01(as_FloatRegister(i-2), F60, F62, F56); | |
4132 if ( i != 6) { | |
4133 __ aes_dround23(as_FloatRegister(i-4), F56, F58, F62); | |
4134 __ aes_dround01(as_FloatRegister(i-6), F56, F58, F60); | |
4135 } else { | |
4136 __ aes_dround23_l(as_FloatRegister(i-4), F56, F58, F62); | |
4137 __ aes_dround01_l(as_FloatRegister(i-6), F56, F58, F60); | |
4138 } | |
4139 } | |
4140 | |
4141 __ movxtod(L0,F56); | |
4142 __ movxtod(L1,F58); | |
4143 __ mov(L4,L0); | |
4144 __ mov(L5,L1); | |
4145 __ fxor(FloatRegisterImpl::D, F56, F60, F60); | |
4146 __ fxor(FloatRegisterImpl::D, F58, F62, F62); | |
4147 | |
4148 // check for 8-byte alignment since dest byte array may have arbitrary alignment if offset mod 8 is non-zero | |
4149 __ andcc(to, 7, G1); | |
4150 __ br(Assembler::notZero, true, Assembler::pn, L_store_misaligned_output_first_block); | |
4151 __ delayed()->edge8n(to, G0, G2); | |
4152 | |
4153 // aligned case: store output into the destination array | |
4154 __ stf(FloatRegisterImpl::D, F60, to, 0); | |
4155 __ stf(FloatRegisterImpl::D, F62, to, 8); | |
4156 __ ba_short(L_check_decrypt_end); | |
4157 | |
4158 __ BIND(L_store_misaligned_output_first_block); | |
4159 __ add(to, 8, G3); | |
4160 __ mov(8, G4); | |
4161 __ sub(G4, G1, G4); | |
4162 __ alignaddr(G4, G0, G4); | |
4163 __ faligndata(F60, F60, F60); | |
4164 __ faligndata(F62, F62, F62); | |
4165 __ mov(to, G1); | |
4166 __ and3(to, -8, to); | |
4167 __ and3(G3, -8, G3); | |
4168 __ stpartialf(to, G2, F60, Assembler::ASI_PST8_PRIMARY); | |
4169 __ stpartialf(G3, G2, F62, Assembler::ASI_PST8_PRIMARY); | |
4170 __ add(to, 8, to); | |
4171 __ add(G3, 8, G3); | |
4172 __ orn(G0, G2, G2); | |
4173 __ stpartialf(to, G2, F60, Assembler::ASI_PST8_PRIMARY); | |
4174 __ stpartialf(G3, G2, F62, Assembler::ASI_PST8_PRIMARY); | |
4175 __ mov(G1, to); | |
4176 | |
4177 __ BIND(L_check_decrypt_end); | |
4178 __ add(from, 16, from); | |
4179 __ add(to, 16, to); | |
4180 __ subcc(len_reg, 16, len_reg); | |
4181 __ br(Assembler::equal, false, Assembler::pt, L_cbcdec_end); | |
4182 __ delayed()->nop(); | |
4183 | |
4184 // 256-bit original key size | |
4185 __ cmp_and_brx_short(keylen, 60, Assembler::equal, Assembler::pn, L_dec_next2_blocks256); | |
4186 | |
4187 // 192-bit original key size | |
4188 __ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pn, L_dec_next2_blocks192); | |
4189 | |
4190 __ align(OptoLoopAlignment); | |
4191 __ BIND(L_dec_next2_blocks128); | |
4192 __ nop(); | |
4193 | |
4194 // check for 8-byte alignment since source byte array may have an arbitrary alignment if offset mod 8 is non-zero | |
4195 __ andcc(from, 7, G0); | |
4196 __ br(Assembler::notZero, true, Assembler::pn, L_load_misaligned_next2_blocks128); | |
4197 __ delayed()->mov(from, G1); // save original 'from' address before alignaddr | |
4198 | |
4199 // aligned case: load input into G4, G5, L4 and L5 | |
4200 __ ldx(from,0,G4); | |
4201 __ ldx(from,8,G5); | |
4202 __ ldx(from,16,L4); | |
4203 __ ldx(from,24,L5); | |
4204 __ ba_short(L_transform_next2_blocks128); | |
4205 | |
4206 __ BIND(L_load_misaligned_next2_blocks128); | |
4207 __ alignaddr(from, G0, from); | |
4208 // F40, F42, F58, F60, F62 can be clobbered | |
4209 __ ldf(FloatRegisterImpl::D, from, 0, F40); | |
4210 __ ldf(FloatRegisterImpl::D, from, 8, F42); | |
4211 __ ldf(FloatRegisterImpl::D, from, 16, F60); | |
4212 __ ldf(FloatRegisterImpl::D, from, 24, F62); | |
4213 __ ldf(FloatRegisterImpl::D, from, 32, F58); | |
4214 __ faligndata(F40, F42, F40); | |
4215 __ faligndata(F42, F60, F42); | |
4216 __ faligndata(F60, F62, F60); | |
4217 __ faligndata(F62, F58, F62); | |
4218 __ movdtox(F40, G4); | |
4219 __ movdtox(F42, G5); | |
4220 __ movdtox(F60, L4); | |
4221 __ movdtox(F62, L5); | |
4222 __ mov(G1, from); | |
4223 | |
4224 __ BIND(L_transform_next2_blocks128); | |
4225 // F40:F42 used for first 16-bytes | |
4226 __ xor3(L2,G4,G1); | |
4227 __ movxtod(G1,F40); | |
4228 __ xor3(L3,G5,G1); | |
4229 __ movxtod(G1,F42); | |
4230 | |
4231 // F60:F62 used for next 16-bytes | |
4232 __ xor3(L2,L4,G1); | |
4233 __ movxtod(G1,F60); | |
4234 __ xor3(L3,L5,G1); | |
4235 __ movxtod(G1,F62); | |
4236 | |
4237 for ( int i = 38; i >= 6; i -= 8 ) { | |
4238 __ aes_dround23(as_FloatRegister(i), F40, F42, F44); | |
4239 __ aes_dround01(as_FloatRegister(i-2), F40, F42, F46); | |
4240 __ aes_dround23(as_FloatRegister(i), F60, F62, F58); | |
4241 __ aes_dround01(as_FloatRegister(i-2), F60, F62, F56); | |
4242 if (i != 6 ) { | |
4243 __ aes_dround23(as_FloatRegister(i-4), F46, F44, F42); | |
4244 __ aes_dround01(as_FloatRegister(i-6), F46, F44, F40); | |
4245 __ aes_dround23(as_FloatRegister(i-4), F56, F58, F62); | |
4246 __ aes_dround01(as_FloatRegister(i-6), F56, F58, F60); | |
4247 } else { | |
4248 __ aes_dround23_l(as_FloatRegister(i-4), F46, F44, F42); | |
4249 __ aes_dround01_l(as_FloatRegister(i-6), F46, F44, F40); | |
4250 __ aes_dround23_l(as_FloatRegister(i-4), F56, F58, F62); | |
4251 __ aes_dround01_l(as_FloatRegister(i-6), F56, F58, F60); | |
4252 } | |
4253 } | |
4254 | |
4255 __ movxtod(L0,F46); | |
4256 __ movxtod(L1,F44); | |
4257 __ fxor(FloatRegisterImpl::D, F46, F40, F40); | |
4258 __ fxor(FloatRegisterImpl::D, F44, F42, F42); | |
4259 | |
4260 __ movxtod(G4,F56); | |
4261 __ movxtod(G5,F58); | |
4262 __ mov(L4,L0); | |
4263 __ mov(L5,L1); | |
4264 __ fxor(FloatRegisterImpl::D, F56, F60, F60); | |
4265 __ fxor(FloatRegisterImpl::D, F58, F62, F62); | |
4266 | |
4267 // For mis-aligned store of 32 bytes of result we can do: | |
4268 // Circular right-shift all 4 FP registers so that 'head' and 'tail' | |
4269 // parts that need to be stored starting at mis-aligned address are in a FP reg | |
4270 // the other 3 FP regs can thus be stored using regular store | |
4271 // we then use the edge + partial-store mechanism to store the 'head' and 'tail' parts | |
4272 | |
4273 // check for 8-byte alignment since dest byte array may have arbitrary alignment if offset mod 8 is non-zero | |
4274 __ andcc(to, 7, G1); | |
4275 __ br(Assembler::notZero, true, Assembler::pn, L_store_misaligned_output_next2_blocks128); | |
4276 __ delayed()->edge8n(to, G0, G2); | |
4277 | |
4278 // aligned case: store output into the destination array | |
4279 __ stf(FloatRegisterImpl::D, F40, to, 0); | |
4280 __ stf(FloatRegisterImpl::D, F42, to, 8); | |
4281 __ stf(FloatRegisterImpl::D, F60, to, 16); | |
4282 __ stf(FloatRegisterImpl::D, F62, to, 24); | |
4283 __ ba_short(L_check_decrypt_loop_end128); | |
4284 | |
4285 __ BIND(L_store_misaligned_output_next2_blocks128); | |
4286 __ mov(8, G4); | |
4287 __ sub(G4, G1, G4); | |
4288 __ alignaddr(G4, G0, G4); | |
4289 __ faligndata(F40, F42, F56); // F56 can be clobbered | |
4290 __ faligndata(F42, F60, F42); | |
4291 __ faligndata(F60, F62, F60); | |
4292 __ faligndata(F62, F40, F40); | |
4293 __ mov(to, G1); | |
4294 __ and3(to, -8, to); | |
4295 __ stpartialf(to, G2, F40, Assembler::ASI_PST8_PRIMARY); | |
4296 __ stf(FloatRegisterImpl::D, F56, to, 8); | |
4297 __ stf(FloatRegisterImpl::D, F42, to, 16); | |
4298 __ stf(FloatRegisterImpl::D, F60, to, 24); | |
4299 __ add(to, 32, to); | |
4300 __ orn(G0, G2, G2); | |
4301 __ stpartialf(to, G2, F40, Assembler::ASI_PST8_PRIMARY); | |
4302 __ mov(G1, to); | |
4303 | |
4304 __ BIND(L_check_decrypt_loop_end128); | |
4305 __ add(from, 32, from); | |
4306 __ add(to, 32, to); | |
4307 __ subcc(len_reg, 32, len_reg); | |
4308 __ br(Assembler::notEqual, false, Assembler::pt, L_dec_next2_blocks128); | |
4309 __ delayed()->nop(); | |
4310 __ ba_short(L_cbcdec_end); | |
4311 | |
4312 __ align(OptoLoopAlignment); | |
4313 __ BIND(L_dec_next2_blocks192); | |
4314 __ nop(); | |
4315 | |
4316 // check for 8-byte alignment since source byte array may have an arbitrary alignment if offset mod 8 is non-zero | |
4317 __ andcc(from, 7, G0); | |
4318 __ br(Assembler::notZero, true, Assembler::pn, L_load_misaligned_next2_blocks192); | |
4319 __ delayed()->mov(from, G1); // save original 'from' address before alignaddr | |
4320 | |
4321 // aligned case: load input into G4, G5, L4 and L5 | |
4322 __ ldx(from,0,G4); | |
4323 __ ldx(from,8,G5); | |
4324 __ ldx(from,16,L4); | |
4325 __ ldx(from,24,L5); | |
4326 __ ba_short(L_transform_next2_blocks192); | |
4327 | |
4328 __ BIND(L_load_misaligned_next2_blocks192); | |
4329 __ alignaddr(from, G0, from); | |
4330 // F48, F50, F52, F60, F62 can be clobbered | |
4331 __ ldf(FloatRegisterImpl::D, from, 0, F48); | |
4332 __ ldf(FloatRegisterImpl::D, from, 8, F50); | |
4333 __ ldf(FloatRegisterImpl::D, from, 16, F60); | |
4334 __ ldf(FloatRegisterImpl::D, from, 24, F62); | |
4335 __ ldf(FloatRegisterImpl::D, from, 32, F52); | |
4336 __ faligndata(F48, F50, F48); | |
4337 __ faligndata(F50, F60, F50); | |
4338 __ faligndata(F60, F62, F60); | |
4339 __ faligndata(F62, F52, F62); | |
4340 __ movdtox(F48, G4); | |
4341 __ movdtox(F50, G5); | |
4342 __ movdtox(F60, L4); | |
4343 __ movdtox(F62, L5); | |
4344 __ mov(G1, from); | |
4345 | |
4346 __ BIND(L_transform_next2_blocks192); | |
4347 // F48:F50 used for first 16-bytes | |
4348 __ xor3(L2,G4,G1); | |
4349 __ movxtod(G1,F48); | |
4350 __ xor3(L3,G5,G1); | |
4351 __ movxtod(G1,F50); | |
4352 | |
4353 // F60:F62 used for next 16-bytes | |
4354 __ xor3(L2,L4,G1); | |
4355 __ movxtod(G1,F60); | |
4356 __ xor3(L3,L5,G1); | |
4357 __ movxtod(G1,F62); | |
4358 | |
4359 for ( int i = 46; i >= 6; i -= 8 ) { | |
4360 __ aes_dround23(as_FloatRegister(i), F48, F50, F52); | |
4361 __ aes_dround01(as_FloatRegister(i-2), F48, F50, F54); | |
4362 __ aes_dround23(as_FloatRegister(i), F60, F62, F58); | |
4363 __ aes_dround01(as_FloatRegister(i-2), F60, F62, F56); | |
4364 if (i != 6 ) { | |
4365 __ aes_dround23(as_FloatRegister(i-4), F54, F52, F50); | |
4366 __ aes_dround01(as_FloatRegister(i-6), F54, F52, F48); | |
4367 __ aes_dround23(as_FloatRegister(i-4), F56, F58, F62); | |
4368 __ aes_dround01(as_FloatRegister(i-6), F56, F58, F60); | |
4369 } else { | |
4370 __ aes_dround23_l(as_FloatRegister(i-4), F54, F52, F50); | |
4371 __ aes_dround01_l(as_FloatRegister(i-6), F54, F52, F48); | |
4372 __ aes_dround23_l(as_FloatRegister(i-4), F56, F58, F62); | |
4373 __ aes_dround01_l(as_FloatRegister(i-6), F56, F58, F60); | |
4374 } | |
4375 } | |
4376 | |
4377 __ movxtod(L0,F54); | |
4378 __ movxtod(L1,F52); | |
4379 __ fxor(FloatRegisterImpl::D, F54, F48, F48); | |
4380 __ fxor(FloatRegisterImpl::D, F52, F50, F50); | |
4381 | |
4382 __ movxtod(G4,F56); | |
4383 __ movxtod(G5,F58); | |
4384 __ mov(L4,L0); | |
4385 __ mov(L5,L1); | |
4386 __ fxor(FloatRegisterImpl::D, F56, F60, F60); | |
4387 __ fxor(FloatRegisterImpl::D, F58, F62, F62); | |
4388 | |
4389 // check for 8-byte alignment since dest byte array may have arbitrary alignment if offset mod 8 is non-zero | |
4390 __ andcc(to, 7, G1); | |
4391 __ br(Assembler::notZero, true, Assembler::pn, L_store_misaligned_output_next2_blocks192); | |
4392 __ delayed()->edge8n(to, G0, G2); | |
4393 | |
4394 // aligned case: store output into the destination array | |
4395 __ stf(FloatRegisterImpl::D, F48, to, 0); | |
4396 __ stf(FloatRegisterImpl::D, F50, to, 8); | |
4397 __ stf(FloatRegisterImpl::D, F60, to, 16); | |
4398 __ stf(FloatRegisterImpl::D, F62, to, 24); | |
4399 __ ba_short(L_check_decrypt_loop_end192); | |
4400 | |
4401 __ BIND(L_store_misaligned_output_next2_blocks192); | |
4402 __ mov(8, G4); | |
4403 __ sub(G4, G1, G4); | |
4404 __ alignaddr(G4, G0, G4); | |
4405 __ faligndata(F48, F50, F56); // F56 can be clobbered | |
4406 __ faligndata(F50, F60, F50); | |
4407 __ faligndata(F60, F62, F60); | |
4408 __ faligndata(F62, F48, F48); | |
4409 __ mov(to, G1); | |
4410 __ and3(to, -8, to); | |
4411 __ stpartialf(to, G2, F48, Assembler::ASI_PST8_PRIMARY); | |
4412 __ stf(FloatRegisterImpl::D, F56, to, 8); | |
4413 __ stf(FloatRegisterImpl::D, F50, to, 16); | |
4414 __ stf(FloatRegisterImpl::D, F60, to, 24); | |
4415 __ add(to, 32, to); | |
4416 __ orn(G0, G2, G2); | |
4417 __ stpartialf(to, G2, F48, Assembler::ASI_PST8_PRIMARY); | |
4418 __ mov(G1, to); | |
4419 | |
4420 __ BIND(L_check_decrypt_loop_end192); | |
4421 __ add(from, 32, from); | |
4422 __ add(to, 32, to); | |
4423 __ subcc(len_reg, 32, len_reg); | |
4424 __ br(Assembler::notEqual, false, Assembler::pt, L_dec_next2_blocks192); | |
4425 __ delayed()->nop(); | |
4426 __ ba_short(L_cbcdec_end); | |
4427 | |
4428 __ align(OptoLoopAlignment); | |
4429 __ BIND(L_dec_next2_blocks256); | |
4430 __ nop(); | |
4431 | |
4432 // check for 8-byte alignment since source byte array may have an arbitrary alignment if offset mod 8 is non-zero | |
4433 __ andcc(from, 7, G0); | |
4434 __ br(Assembler::notZero, true, Assembler::pn, L_load_misaligned_next2_blocks256); | |
4435 __ delayed()->mov(from, G1); // save original 'from' address before alignaddr | |
4436 | |
4437 // aligned case: load input into G4, G5, L4 and L5 | |
4438 __ ldx(from,0,G4); | |
4439 __ ldx(from,8,G5); | |
4440 __ ldx(from,16,L4); | |
4441 __ ldx(from,24,L5); | |
4442 __ ba_short(L_transform_next2_blocks256); | |
4443 | |
4444 __ BIND(L_load_misaligned_next2_blocks256); | |
4445 __ alignaddr(from, G0, from); | |
4446 // F0, F2, F4, F60, F62 can be clobbered | |
4447 __ ldf(FloatRegisterImpl::D, from, 0, F0); | |
4448 __ ldf(FloatRegisterImpl::D, from, 8, F2); | |
4449 __ ldf(FloatRegisterImpl::D, from, 16, F60); | |
4450 __ ldf(FloatRegisterImpl::D, from, 24, F62); | |
4451 __ ldf(FloatRegisterImpl::D, from, 32, F4); | |
4452 __ faligndata(F0, F2, F0); | |
4453 __ faligndata(F2, F60, F2); | |
4454 __ faligndata(F60, F62, F60); | |
4455 __ faligndata(F62, F4, F62); | |
4456 __ movdtox(F0, G4); | |
4457 __ movdtox(F2, G5); | |
4458 __ movdtox(F60, L4); | |
4459 __ movdtox(F62, L5); | |
4460 __ mov(G1, from); | |
4461 | |
4462 __ BIND(L_transform_next2_blocks256); | |
4463 // F0:F2 used for first 16-bytes | |
4464 __ xor3(L2,G4,G1); | |
4465 __ movxtod(G1,F0); | |
4466 __ xor3(L3,G5,G1); | |
4467 __ movxtod(G1,F2); | |
4468 | |
4469 // F60:F62 used for next 16-bytes | |
4470 __ xor3(L2,L4,G1); | |
4471 __ movxtod(G1,F60); | |
4472 __ xor3(L3,L5,G1); | |
4473 __ movxtod(G1,F62); | |
4474 | |
4475 __ aes_dround23(F54, F0, F2, F4); | |
4476 __ aes_dround01(F52, F0, F2, F6); | |
4477 __ aes_dround23(F54, F60, F62, F58); | |
4478 __ aes_dround01(F52, F60, F62, F56); | |
4479 __ aes_dround23(F50, F6, F4, F2); | |
4480 __ aes_dround01(F48, F6, F4, F0); | |
4481 __ aes_dround23(F50, F56, F58, F62); | |
4482 __ aes_dround01(F48, F56, F58, F60); | |
4483 // save F48:F54 in temp registers | |
4484 __ movdtox(F54,G2); | |
4485 __ movdtox(F52,G3); | |
4486 __ movdtox(F50,G6); | |
4487 __ movdtox(F48,G1); | |
4488 for ( int i = 46; i >= 14; i -= 8 ) { | |
4489 __ aes_dround23(as_FloatRegister(i), F0, F2, F4); | |
4490 __ aes_dround01(as_FloatRegister(i-2), F0, F2, F6); | |
4491 __ aes_dround23(as_FloatRegister(i), F60, F62, F58); | |
4492 __ aes_dround01(as_FloatRegister(i-2), F60, F62, F56); | |
4493 __ aes_dround23(as_FloatRegister(i-4), F6, F4, F2); | |
4494 __ aes_dround01(as_FloatRegister(i-6), F6, F4, F0); | |
4495 __ aes_dround23(as_FloatRegister(i-4), F56, F58, F62); | |
4496 __ aes_dround01(as_FloatRegister(i-6), F56, F58, F60); | |
4497 } | |
4498 // init F48:F54 with F0:F6 values (original key) | |
4499 __ ldf(FloatRegisterImpl::D, original_key, 0, F48); | |
4500 __ ldf(FloatRegisterImpl::D, original_key, 8, F50); | |
4501 __ ldf(FloatRegisterImpl::D, original_key, 16, F52); | |
4502 __ ldf(FloatRegisterImpl::D, original_key, 24, F54); | |
4503 __ aes_dround23(F54, F0, F2, F4); | |
4504 __ aes_dround01(F52, F0, F2, F6); | |
4505 __ aes_dround23(F54, F60, F62, F58); | |
4506 __ aes_dround01(F52, F60, F62, F56); | |
4507 __ aes_dround23_l(F50, F6, F4, F2); | |
4508 __ aes_dround01_l(F48, F6, F4, F0); | |
4509 __ aes_dround23_l(F50, F56, F58, F62); | |
4510 __ aes_dround01_l(F48, F56, F58, F60); | |
4511 // re-init F48:F54 with their original values | |
4512 __ movxtod(G2,F54); | |
4513 __ movxtod(G3,F52); | |
4514 __ movxtod(G6,F50); | |
4515 __ movxtod(G1,F48); | |
4516 | |
4517 __ movxtod(L0,F6); | |
4518 __ movxtod(L1,F4); | |
4519 __ fxor(FloatRegisterImpl::D, F6, F0, F0); | |
4520 __ fxor(FloatRegisterImpl::D, F4, F2, F2); | |
4521 | |
4522 __ movxtod(G4,F56); | |
4523 __ movxtod(G5,F58); | |
4524 __ mov(L4,L0); | |
4525 __ mov(L5,L1); | |
4526 __ fxor(FloatRegisterImpl::D, F56, F60, F60); | |
4527 __ fxor(FloatRegisterImpl::D, F58, F62, F62); | |
4528 | |
4529 // check for 8-byte alignment since dest byte array may have arbitrary alignment if offset mod 8 is non-zero | |
4530 __ andcc(to, 7, G1); | |
4531 __ br(Assembler::notZero, true, Assembler::pn, L_store_misaligned_output_next2_blocks256); | |
4532 __ delayed()->edge8n(to, G0, G2); | |
4533 | |
4534 // aligned case: store output into the destination array | |
4535 __ stf(FloatRegisterImpl::D, F0, to, 0); | |
4536 __ stf(FloatRegisterImpl::D, F2, to, 8); | |
4537 __ stf(FloatRegisterImpl::D, F60, to, 16); | |
4538 __ stf(FloatRegisterImpl::D, F62, to, 24); | |
4539 __ ba_short(L_check_decrypt_loop_end256); | |
4540 | |
4541 __ BIND(L_store_misaligned_output_next2_blocks256); | |
4542 __ mov(8, G4); | |
4543 __ sub(G4, G1, G4); | |
4544 __ alignaddr(G4, G0, G4); | |
4545 __ faligndata(F0, F2, F56); // F56 can be clobbered | |
4546 __ faligndata(F2, F60, F2); | |
4547 __ faligndata(F60, F62, F60); | |
4548 __ faligndata(F62, F0, F0); | |
4549 __ mov(to, G1); | |
4550 __ and3(to, -8, to); | |
4551 __ stpartialf(to, G2, F0, Assembler::ASI_PST8_PRIMARY); | |
4552 __ stf(FloatRegisterImpl::D, F56, to, 8); | |
4553 __ stf(FloatRegisterImpl::D, F2, to, 16); | |
4554 __ stf(FloatRegisterImpl::D, F60, to, 24); | |
4555 __ add(to, 32, to); | |
4556 __ orn(G0, G2, G2); | |
4557 __ stpartialf(to, G2, F0, Assembler::ASI_PST8_PRIMARY); | |
4558 __ mov(G1, to); | |
4559 | |
4560 __ BIND(L_check_decrypt_loop_end256); | |
4561 __ add(from, 32, from); | |
4562 __ add(to, 32, to); | |
4563 __ subcc(len_reg, 32, len_reg); | |
4564 __ br(Assembler::notEqual, false, Assembler::pt, L_dec_next2_blocks256); | |
4565 __ delayed()->nop(); | |
4566 | |
4567 __ BIND(L_cbcdec_end); | |
4568 // re-init intial vector for next block, 8-byte alignment is guaranteed | |
4569 __ stx(L0, rvec, 0); | |
4570 __ stx(L1, rvec, 8); | |
4571 __ mov(L7, I0); | |
4572 __ ret(); | |
4573 __ delayed()->restore(); | |
4574 | |
4575 return start; | |
4576 } | |
4577 | |
3307 void generate_initial() { | 4578 void generate_initial() { |
3308 // Generates all stubs and initializes the entry points | 4579 // Generates all stubs and initializes the entry points |
3309 | 4580 |
3310 //------------------------------------------------------------------------------------------------------------------------ | 4581 //------------------------------------------------------------------------------------------------------------------------ |
3311 // entry points that exist in all platforms | 4582 // entry points that exist in all platforms |
3366 &StubRoutines::_safefetch32_fault_pc, | 4637 &StubRoutines::_safefetch32_fault_pc, |
3367 &StubRoutines::_safefetch32_continuation_pc); | 4638 &StubRoutines::_safefetch32_continuation_pc); |
3368 generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry, | 4639 generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry, |
3369 &StubRoutines::_safefetchN_fault_pc, | 4640 &StubRoutines::_safefetchN_fault_pc, |
3370 &StubRoutines::_safefetchN_continuation_pc); | 4641 &StubRoutines::_safefetchN_continuation_pc); |
4642 | |
4643 // generate AES intrinsics code | |
4644 if (UseAESIntrinsics) { | |
4645 StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock(); | |
4646 StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock(); | |
4647 StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt(); | |
4648 StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel(); | |
4649 } | |
3371 } | 4650 } |
3372 | 4651 |
3373 | 4652 |
3374 public: | 4653 public: |
3375 StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) { | 4654 StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) { |