Mercurial > hg > graal-compiler
comparison src/cpu/x86/vm/stubGenerator_x86_32.cpp @ 7427:2c7f594145dc
8004835: Improve AES intrinsics on x86
Summary: Enable AES intrinsics on non-AVX cpus, group together aes instructions in crypto stubs.
Reviewed-by: roland, twisti
author | kvn |
---|---|
date | Wed, 19 Dec 2012 15:40:35 -0800 |
parents | d2f8c38e543d |
children | e2e6bf86682c |
comparison
equal
deleted
inserted
replaced
7426:65c8342f726a | 7427:2c7f594145dc |
---|---|
2172 // c_rarg0 - source byte array address | 2172 // c_rarg0 - source byte array address |
2173 // c_rarg1 - destination byte array address | 2173 // c_rarg1 - destination byte array address |
2174 // c_rarg2 - K (key) in little endian int array | 2174 // c_rarg2 - K (key) in little endian int array |
2175 // | 2175 // |
2176 address generate_aescrypt_encryptBlock() { | 2176 address generate_aescrypt_encryptBlock() { |
2177 assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support"); | 2177 assert(UseAES, "need AES instructions and misaligned SSE support"); |
2178 __ align(CodeEntryAlignment); | 2178 __ align(CodeEntryAlignment); |
2179 StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock"); | 2179 StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock"); |
2180 Label L_doLast; | 2180 Label L_doLast; |
2181 address start = __ pc(); | 2181 address start = __ pc(); |
2182 | 2182 |
2183 const Register from = rsi; // source array address | 2183 const Register from = rdx; // source array address |
2184 const Register to = rdx; // destination array address | 2184 const Register to = rdx; // destination array address |
2185 const Register key = rcx; // key array address | 2185 const Register key = rcx; // key array address |
2186 const Register keylen = rax; | 2186 const Register keylen = rax; |
2187 const Address from_param(rbp, 8+0); | 2187 const Address from_param(rbp, 8+0); |
2188 const Address to_param (rbp, 8+4); | 2188 const Address to_param (rbp, 8+4); |
2189 const Address key_param (rbp, 8+8); | 2189 const Address key_param (rbp, 8+8); |
2190 | 2190 |
2191 const XMMRegister xmm_result = xmm0; | 2191 const XMMRegister xmm_result = xmm0; |
2192 const XMMRegister xmm_temp = xmm1; | 2192 const XMMRegister xmm_key_shuf_mask = xmm1; |
2193 const XMMRegister xmm_key_shuf_mask = xmm2; | 2193 const XMMRegister xmm_temp1 = xmm2; |
2194 | 2194 const XMMRegister xmm_temp2 = xmm3; |
2195 __ enter(); // required for proper stackwalking of RuntimeStub frame | 2195 const XMMRegister xmm_temp3 = xmm4; |
2196 __ push(rsi); | 2196 const XMMRegister xmm_temp4 = xmm5; |
2197 __ movptr(from , from_param); | 2197 |
2198 __ movptr(to , to_param); | 2198 __ enter(); // required for proper stackwalking of RuntimeStub frame |
2199 __ movptr(key , key_param); | 2199 __ movptr(from, from_param); |
2200 | 2200 __ movptr(key, key_param); |
2201 | |
2202 // keylen could be only {11, 13, 15} * 4 = {44, 52, 60} | |
2201 __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); | 2203 __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); |
2202 // keylen = # of 32-bit words, convert to 128-bit words | |
2203 __ shrl(keylen, 2); | |
2204 __ subl(keylen, 11); // every key has at least 11 128-bit words, some have more | |
2205 | 2204 |
2206 __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); | 2205 __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); |
2207 __ movdqu(xmm_result, Address(from, 0)); // get 16 bytes of input | 2206 __ movdqu(xmm_result, Address(from, 0)); // get 16 bytes of input |
2207 __ movptr(to, to_param); | |
2208 | 2208 |
2209 // For encryption, the java expanded key ordering is just what we need | 2209 // For encryption, the java expanded key ordering is just what we need |
2210 | 2210 |
2211 load_key(xmm_temp, key, 0x00, xmm_key_shuf_mask); | 2211 load_key(xmm_temp1, key, 0x00, xmm_key_shuf_mask); |
2212 __ pxor(xmm_result, xmm_temp); | 2212 __ pxor(xmm_result, xmm_temp1); |
2213 for (int offset = 0x10; offset <= 0x90; offset += 0x10) { | 2213 |
2214 aes_enc_key(xmm_result, xmm_temp, key, offset, xmm_key_shuf_mask); | 2214 load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask); |
2215 } | 2215 load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask); |
2216 load_key (xmm_temp, key, 0xa0, xmm_key_shuf_mask); | 2216 load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask); |
2217 __ cmpl(keylen, 0); | 2217 load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask); |
2218 __ jcc(Assembler::equal, L_doLast); | 2218 |
2219 __ aesenc(xmm_result, xmm_temp); // only in 192 and 256 bit keys | 2219 __ aesenc(xmm_result, xmm_temp1); |
2220 aes_enc_key(xmm_result, xmm_temp, key, 0xb0, xmm_key_shuf_mask); | 2220 __ aesenc(xmm_result, xmm_temp2); |
2221 load_key(xmm_temp, key, 0xc0, xmm_key_shuf_mask); | 2221 __ aesenc(xmm_result, xmm_temp3); |
2222 __ subl(keylen, 2); | 2222 __ aesenc(xmm_result, xmm_temp4); |
2223 __ jcc(Assembler::equal, L_doLast); | 2223 |
2224 __ aesenc(xmm_result, xmm_temp); // only in 256 bit keys | 2224 load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask); |
2225 aes_enc_key(xmm_result, xmm_temp, key, 0xd0, xmm_key_shuf_mask); | 2225 load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask); |
2226 load_key(xmm_temp, key, 0xe0, xmm_key_shuf_mask); | 2226 load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask); |
2227 load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask); | |
2228 | |
2229 __ aesenc(xmm_result, xmm_temp1); | |
2230 __ aesenc(xmm_result, xmm_temp2); | |
2231 __ aesenc(xmm_result, xmm_temp3); | |
2232 __ aesenc(xmm_result, xmm_temp4); | |
2233 | |
2234 load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask); | |
2235 load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask); | |
2236 | |
2237 __ cmpl(keylen, 44); | |
2238 __ jccb(Assembler::equal, L_doLast); | |
2239 | |
2240 __ aesenc(xmm_result, xmm_temp1); | |
2241 __ aesenc(xmm_result, xmm_temp2); | |
2242 | |
2243 load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask); | |
2244 load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask); | |
2245 | |
2246 __ cmpl(keylen, 52); | |
2247 __ jccb(Assembler::equal, L_doLast); | |
2248 | |
2249 __ aesenc(xmm_result, xmm_temp1); | |
2250 __ aesenc(xmm_result, xmm_temp2); | |
2251 | |
2252 load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask); | |
2253 load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask); | |
2227 | 2254 |
2228 __ BIND(L_doLast); | 2255 __ BIND(L_doLast); |
2229 __ aesenclast(xmm_result, xmm_temp); | 2256 __ aesenc(xmm_result, xmm_temp1); |
2257 __ aesenclast(xmm_result, xmm_temp2); | |
2230 __ movdqu(Address(to, 0), xmm_result); // store the result | 2258 __ movdqu(Address(to, 0), xmm_result); // store the result |
2231 __ xorptr(rax, rax); // return 0 | 2259 __ xorptr(rax, rax); // return 0 |
2232 __ pop(rsi); | |
2233 __ leave(); // required for proper stackwalking of RuntimeStub frame | 2260 __ leave(); // required for proper stackwalking of RuntimeStub frame |
2234 __ ret(0); | 2261 __ ret(0); |
2235 | 2262 |
2236 return start; | 2263 return start; |
2237 } | 2264 } |
2243 // c_rarg0 - source byte array address | 2270 // c_rarg0 - source byte array address |
2244 // c_rarg1 - destination byte array address | 2271 // c_rarg1 - destination byte array address |
2245 // c_rarg2 - K (key) in little endian int array | 2272 // c_rarg2 - K (key) in little endian int array |
2246 // | 2273 // |
2247 address generate_aescrypt_decryptBlock() { | 2274 address generate_aescrypt_decryptBlock() { |
2248 assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support"); | 2275 assert(UseAES, "need AES instructions and misaligned SSE support"); |
2249 __ align(CodeEntryAlignment); | 2276 __ align(CodeEntryAlignment); |
2250 StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock"); | 2277 StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock"); |
2251 Label L_doLast; | 2278 Label L_doLast; |
2252 address start = __ pc(); | 2279 address start = __ pc(); |
2253 | 2280 |
2254 const Register from = rsi; // source array address | 2281 const Register from = rdx; // source array address |
2255 const Register to = rdx; // destination array address | 2282 const Register to = rdx; // destination array address |
2256 const Register key = rcx; // key array address | 2283 const Register key = rcx; // key array address |
2257 const Register keylen = rax; | 2284 const Register keylen = rax; |
2258 const Address from_param(rbp, 8+0); | 2285 const Address from_param(rbp, 8+0); |
2259 const Address to_param (rbp, 8+4); | 2286 const Address to_param (rbp, 8+4); |
2260 const Address key_param (rbp, 8+8); | 2287 const Address key_param (rbp, 8+8); |
2261 | 2288 |
2262 const XMMRegister xmm_result = xmm0; | 2289 const XMMRegister xmm_result = xmm0; |
2263 const XMMRegister xmm_temp = xmm1; | 2290 const XMMRegister xmm_key_shuf_mask = xmm1; |
2264 const XMMRegister xmm_key_shuf_mask = xmm2; | 2291 const XMMRegister xmm_temp1 = xmm2; |
2292 const XMMRegister xmm_temp2 = xmm3; | |
2293 const XMMRegister xmm_temp3 = xmm4; | |
2294 const XMMRegister xmm_temp4 = xmm5; | |
2265 | 2295 |
2266 __ enter(); // required for proper stackwalking of RuntimeStub frame | 2296 __ enter(); // required for proper stackwalking of RuntimeStub frame |
2267 __ push(rsi); | 2297 __ movptr(from, from_param); |
2268 __ movptr(from , from_param); | 2298 __ movptr(key, key_param); |
2269 __ movptr(to , to_param); | 2299 |
2270 __ movptr(key , key_param); | 2300 // keylen could be only {11, 13, 15} * 4 = {44, 52, 60} |
2271 | |
2272 __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); | 2301 __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); |
2273 // keylen = # of 32-bit words, convert to 128-bit words | |
2274 __ shrl(keylen, 2); | |
2275 __ subl(keylen, 11); // every key has at least 11 128-bit words, some have more | |
2276 | 2302 |
2277 __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); | 2303 __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); |
2278 __ movdqu(xmm_result, Address(from, 0)); | 2304 __ movdqu(xmm_result, Address(from, 0)); |
2305 __ movptr(to, to_param); | |
2279 | 2306 |
2280 // for decryption java expanded key ordering is rotated one position from what we want | 2307 // for decryption java expanded key ordering is rotated one position from what we want |
2281 // so we start from 0x10 here and hit 0x00 last | 2308 // so we start from 0x10 here and hit 0x00 last |
2282 // we don't know if the key is aligned, hence not using load-execute form | 2309 // we don't know if the key is aligned, hence not using load-execute form |
2283 load_key(xmm_temp, key, 0x10, xmm_key_shuf_mask); | 2310 load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask); |
2284 __ pxor (xmm_result, xmm_temp); | 2311 load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask); |
2285 for (int offset = 0x20; offset <= 0xa0; offset += 0x10) { | 2312 load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask); |
2286 aes_dec_key(xmm_result, xmm_temp, key, offset, xmm_key_shuf_mask); | 2313 load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask); |
2287 } | 2314 |
2288 __ cmpl(keylen, 0); | 2315 __ pxor (xmm_result, xmm_temp1); |
2289 __ jcc(Assembler::equal, L_doLast); | 2316 __ aesdec(xmm_result, xmm_temp2); |
2290 // only in 192 and 256 bit keys | 2317 __ aesdec(xmm_result, xmm_temp3); |
2291 aes_dec_key(xmm_result, xmm_temp, key, 0xb0, xmm_key_shuf_mask); | 2318 __ aesdec(xmm_result, xmm_temp4); |
2292 aes_dec_key(xmm_result, xmm_temp, key, 0xc0, xmm_key_shuf_mask); | 2319 |
2293 __ subl(keylen, 2); | 2320 load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask); |
2294 __ jcc(Assembler::equal, L_doLast); | 2321 load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask); |
2295 // only in 256 bit keys | 2322 load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask); |
2296 aes_dec_key(xmm_result, xmm_temp, key, 0xd0, xmm_key_shuf_mask); | 2323 load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask); |
2297 aes_dec_key(xmm_result, xmm_temp, key, 0xe0, xmm_key_shuf_mask); | 2324 |
2325 __ aesdec(xmm_result, xmm_temp1); | |
2326 __ aesdec(xmm_result, xmm_temp2); | |
2327 __ aesdec(xmm_result, xmm_temp3); | |
2328 __ aesdec(xmm_result, xmm_temp4); | |
2329 | |
2330 load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask); | |
2331 load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask); | |
2332 load_key(xmm_temp3, key, 0x00, xmm_key_shuf_mask); | |
2333 | |
2334 __ cmpl(keylen, 44); | |
2335 __ jccb(Assembler::equal, L_doLast); | |
2336 | |
2337 __ aesdec(xmm_result, xmm_temp1); | |
2338 __ aesdec(xmm_result, xmm_temp2); | |
2339 | |
2340 load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask); | |
2341 load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask); | |
2342 | |
2343 __ cmpl(keylen, 52); | |
2344 __ jccb(Assembler::equal, L_doLast); | |
2345 | |
2346 __ aesdec(xmm_result, xmm_temp1); | |
2347 __ aesdec(xmm_result, xmm_temp2); | |
2348 | |
2349 load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask); | |
2350 load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask); | |
2298 | 2351 |
2299 __ BIND(L_doLast); | 2352 __ BIND(L_doLast); |
2353 __ aesdec(xmm_result, xmm_temp1); | |
2354 __ aesdec(xmm_result, xmm_temp2); | |
2355 | |
2300 // for decryption the aesdeclast operation is always on key+0x00 | 2356 // for decryption the aesdeclast operation is always on key+0x00 |
2301 load_key(xmm_temp, key, 0x00, xmm_key_shuf_mask); | 2357 __ aesdeclast(xmm_result, xmm_temp3); |
2302 __ aesdeclast(xmm_result, xmm_temp); | |
2303 | |
2304 __ movdqu(Address(to, 0), xmm_result); // store the result | 2358 __ movdqu(Address(to, 0), xmm_result); // store the result |
2305 | |
2306 __ xorptr(rax, rax); // return 0 | 2359 __ xorptr(rax, rax); // return 0 |
2307 __ pop(rsi); | |
2308 __ leave(); // required for proper stackwalking of RuntimeStub frame | 2360 __ leave(); // required for proper stackwalking of RuntimeStub frame |
2309 __ ret(0); | 2361 __ ret(0); |
2310 | 2362 |
2311 return start; | 2363 return start; |
2312 } | 2364 } |
2338 // c_rarg2 - K (key) in little endian int array | 2390 // c_rarg2 - K (key) in little endian int array |
2339 // c_rarg3 - r vector byte array address | 2391 // c_rarg3 - r vector byte array address |
2340 // c_rarg4 - input length | 2392 // c_rarg4 - input length |
2341 // | 2393 // |
2342 address generate_cipherBlockChaining_encryptAESCrypt() { | 2394 address generate_cipherBlockChaining_encryptAESCrypt() { |
2343 assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support"); | 2395 assert(UseAES, "need AES instructions and misaligned SSE support"); |
2344 __ align(CodeEntryAlignment); | 2396 __ align(CodeEntryAlignment); |
2345 StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt"); | 2397 StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt"); |
2346 address start = __ pc(); | 2398 address start = __ pc(); |
2347 | 2399 |
2348 Label L_exit, L_key_192_256, L_key_256, L_loopTop_128, L_loopTop_192, L_loopTop_256; | 2400 Label L_exit, L_key_192_256, L_key_256, L_loopTop_128, L_loopTop_192, L_loopTop_256; |
2391 __ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); | 2443 __ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); |
2392 __ cmpl(rax, 44); | 2444 __ cmpl(rax, 44); |
2393 __ jcc(Assembler::notEqual, L_key_192_256); | 2445 __ jcc(Assembler::notEqual, L_key_192_256); |
2394 | 2446 |
2395 // 128 bit code follows here | 2447 // 128 bit code follows here |
2396 __ movptr(pos, 0); | 2448 __ movl(pos, 0); |
2397 __ align(OptoLoopAlignment); | 2449 __ align(OptoLoopAlignment); |
2398 __ BIND(L_loopTop_128); | 2450 __ BIND(L_loopTop_128); |
2399 __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input | 2451 __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input |
2400 __ pxor (xmm_result, xmm_temp); // xor with the current r vector | 2452 __ pxor (xmm_result, xmm_temp); // xor with the current r vector |
2401 | 2453 |
2421 handleSOERegisters(false /*restoring*/); | 2473 handleSOERegisters(false /*restoring*/); |
2422 __ movl(rax, 0); // return 0 (why?) | 2474 __ movl(rax, 0); // return 0 (why?) |
2423 __ leave(); // required for proper stackwalking of RuntimeStub frame | 2475 __ leave(); // required for proper stackwalking of RuntimeStub frame |
2424 __ ret(0); | 2476 __ ret(0); |
2425 | 2477 |
2426 __ BIND(L_key_192_256); | 2478 __ BIND(L_key_192_256); |
2427 // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256) | 2479 // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256) |
2428 __ cmpl(rax, 52); | 2480 __ cmpl(rax, 52); |
2429 __ jcc(Assembler::notEqual, L_key_256); | 2481 __ jcc(Assembler::notEqual, L_key_256); |
2430 | 2482 |
2431 // 192-bit code follows here (could be changed to use more xmm registers) | 2483 // 192-bit code follows here (could be changed to use more xmm registers) |
2432 __ movptr(pos, 0); | 2484 __ movl(pos, 0); |
2433 __ align(OptoLoopAlignment); | 2485 __ align(OptoLoopAlignment); |
2434 __ BIND(L_loopTop_192); | 2486 __ BIND(L_loopTop_192); |
2435 __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input | 2487 __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input |
2436 __ pxor (xmm_result, xmm_temp); // xor with the current r vector | 2488 __ pxor (xmm_result, xmm_temp); // xor with the current r vector |
2437 | 2489 |
2438 __ pxor (xmm_result, xmm_key0); // do the aes rounds | 2490 __ pxor (xmm_result, xmm_key0); // do the aes rounds |
2439 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { | 2491 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { |
2450 __ addptr(pos, AESBlockSize); | 2502 __ addptr(pos, AESBlockSize); |
2451 __ subptr(len_reg, AESBlockSize); | 2503 __ subptr(len_reg, AESBlockSize); |
2452 __ jcc(Assembler::notEqual, L_loopTop_192); | 2504 __ jcc(Assembler::notEqual, L_loopTop_192); |
2453 __ jmp(L_exit); | 2505 __ jmp(L_exit); |
2454 | 2506 |
2455 __ BIND(L_key_256); | 2507 __ BIND(L_key_256); |
2456 // 256-bit code follows here (could be changed to use more xmm registers) | 2508 // 256-bit code follows here (could be changed to use more xmm registers) |
2457 __ movptr(pos, 0); | 2509 __ movl(pos, 0); |
2458 __ align(OptoLoopAlignment); | 2510 __ align(OptoLoopAlignment); |
2459 __ BIND(L_loopTop_256); | 2511 __ BIND(L_loopTop_256); |
2460 __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input | 2512 __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input |
2461 __ pxor (xmm_result, xmm_temp); // xor with the current r vector | 2513 __ pxor (xmm_result, xmm_temp); // xor with the current r vector |
2462 | 2514 |
2463 __ pxor (xmm_result, xmm_key0); // do the aes rounds | 2515 __ pxor (xmm_result, xmm_key0); // do the aes rounds |
2464 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { | 2516 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { |
2493 // c_rarg3 - r vector byte array address | 2545 // c_rarg3 - r vector byte array address |
2494 // c_rarg4 - input length | 2546 // c_rarg4 - input length |
2495 // | 2547 // |
2496 | 2548 |
2497 address generate_cipherBlockChaining_decryptAESCrypt() { | 2549 address generate_cipherBlockChaining_decryptAESCrypt() { |
2498 assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support"); | 2550 assert(UseAES, "need AES instructions and misaligned SSE support"); |
2499 __ align(CodeEntryAlignment); | 2551 __ align(CodeEntryAlignment); |
2500 StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt"); | 2552 StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt"); |
2501 address start = __ pc(); | 2553 address start = __ pc(); |
2502 | 2554 |
2503 Label L_exit, L_key_192_256, L_key_256; | 2555 Label L_exit, L_key_192_256, L_key_256; |
2554 __ cmpl(rax, 44); | 2606 __ cmpl(rax, 44); |
2555 __ jcc(Assembler::notEqual, L_key_192_256); | 2607 __ jcc(Assembler::notEqual, L_key_192_256); |
2556 | 2608 |
2557 | 2609 |
2558 // 128-bit code follows here, parallelized | 2610 // 128-bit code follows here, parallelized |
2559 __ movptr(pos, 0); | 2611 __ movl(pos, 0); |
2560 __ align(OptoLoopAlignment); | 2612 __ align(OptoLoopAlignment); |
2561 __ BIND(L_singleBlock_loopTop_128); | 2613 __ BIND(L_singleBlock_loopTop_128); |
2562 __ cmpptr(len_reg, 0); // any blocks left?? | 2614 __ cmpptr(len_reg, 0); // any blocks left?? |
2563 __ jcc(Assembler::equal, L_exit); | 2615 __ jcc(Assembler::equal, L_exit); |
2564 __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input | 2616 __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input |
2565 __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds | 2617 __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds |
2566 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { | 2618 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { |
2595 // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256) | 2647 // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256) |
2596 __ cmpl(rax, 52); | 2648 __ cmpl(rax, 52); |
2597 __ jcc(Assembler::notEqual, L_key_256); | 2649 __ jcc(Assembler::notEqual, L_key_256); |
2598 | 2650 |
2599 // 192-bit code follows here (could be optimized to use parallelism) | 2651 // 192-bit code follows here (could be optimized to use parallelism) |
2600 __ movptr(pos, 0); | 2652 __ movl(pos, 0); |
2601 __ align(OptoLoopAlignment); | 2653 __ align(OptoLoopAlignment); |
2602 __ BIND(L_singleBlock_loopTop_192); | 2654 __ BIND(L_singleBlock_loopTop_192); |
2603 __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input | 2655 __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input |
2604 __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds | 2656 __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds |
2605 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { | 2657 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { |
2620 __ jcc(Assembler::notEqual,L_singleBlock_loopTop_192); | 2672 __ jcc(Assembler::notEqual,L_singleBlock_loopTop_192); |
2621 __ jmp(L_exit); | 2673 __ jmp(L_exit); |
2622 | 2674 |
2623 __ BIND(L_key_256); | 2675 __ BIND(L_key_256); |
2624 // 256-bit code follows here (could be optimized to use parallelism) | 2676 // 256-bit code follows here (could be optimized to use parallelism) |
2625 __ movptr(pos, 0); | 2677 __ movl(pos, 0); |
2626 __ align(OptoLoopAlignment); | 2678 __ align(OptoLoopAlignment); |
2627 __ BIND(L_singleBlock_loopTop_256); | 2679 __ BIND(L_singleBlock_loopTop_256); |
2628 __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input | 2680 __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input |
2629 __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds | 2681 __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds |
2630 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { | 2682 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { |