comparison src/cpu/x86/vm/stubGenerator_x86_32.cpp @ 7482:989155e2d07a

Merge with hs25-b15.
author Thomas Wuerthinger <thomas.wuerthinger@oracle.com>
date Wed, 16 Jan 2013 01:34:24 +0100
parents 291ffc492eb6 e2e6bf86682c
children b9a918201d47
comparison
equal deleted inserted replaced
7381:6761a8f854a4 7482:989155e2d07a
794 // Copy 64-byte chunks 794 // Copy 64-byte chunks
795 __ jmpb(L_copy_64_bytes); 795 __ jmpb(L_copy_64_bytes);
796 __ align(OptoLoopAlignment); 796 __ align(OptoLoopAlignment);
797 __ BIND(L_copy_64_bytes_loop); 797 __ BIND(L_copy_64_bytes_loop);
798 798
799 if(UseUnalignedLoadStores) { 799 if (UseUnalignedLoadStores) {
800 __ movdqu(xmm0, Address(from, 0)); 800 if (UseAVX >= 2) {
801 __ movdqu(Address(from, to_from, Address::times_1, 0), xmm0); 801 __ vmovdqu(xmm0, Address(from, 0));
802 __ movdqu(xmm1, Address(from, 16)); 802 __ vmovdqu(Address(from, to_from, Address::times_1, 0), xmm0);
803 __ movdqu(Address(from, to_from, Address::times_1, 16), xmm1); 803 __ vmovdqu(xmm1, Address(from, 32));
804 __ movdqu(xmm2, Address(from, 32)); 804 __ vmovdqu(Address(from, to_from, Address::times_1, 32), xmm1);
805 __ movdqu(Address(from, to_from, Address::times_1, 32), xmm2); 805 } else {
806 __ movdqu(xmm3, Address(from, 48)); 806 __ movdqu(xmm0, Address(from, 0));
807 __ movdqu(Address(from, to_from, Address::times_1, 48), xmm3); 807 __ movdqu(Address(from, to_from, Address::times_1, 0), xmm0);
808 808 __ movdqu(xmm1, Address(from, 16));
809 __ movdqu(Address(from, to_from, Address::times_1, 16), xmm1);
810 __ movdqu(xmm2, Address(from, 32));
811 __ movdqu(Address(from, to_from, Address::times_1, 32), xmm2);
812 __ movdqu(xmm3, Address(from, 48));
813 __ movdqu(Address(from, to_from, Address::times_1, 48), xmm3);
814 }
809 } else { 815 } else {
810 __ movq(xmm0, Address(from, 0)); 816 __ movq(xmm0, Address(from, 0));
811 __ movq(Address(from, to_from, Address::times_1, 0), xmm0); 817 __ movq(Address(from, to_from, Address::times_1, 0), xmm0);
812 __ movq(xmm1, Address(from, 8)); 818 __ movq(xmm1, Address(from, 8));
813 __ movq(Address(from, to_from, Address::times_1, 8), xmm1); 819 __ movq(Address(from, to_from, Address::times_1, 8), xmm1);
2172 // c_rarg0 - source byte array address 2178 // c_rarg0 - source byte array address
2173 // c_rarg1 - destination byte array address 2179 // c_rarg1 - destination byte array address
2174 // c_rarg2 - K (key) in little endian int array 2180 // c_rarg2 - K (key) in little endian int array
2175 // 2181 //
2176 address generate_aescrypt_encryptBlock() { 2182 address generate_aescrypt_encryptBlock() {
2177 assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support"); 2183 assert(UseAES, "need AES instructions and misaligned SSE support");
2178 __ align(CodeEntryAlignment); 2184 __ align(CodeEntryAlignment);
2179 StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock"); 2185 StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock");
2180 Label L_doLast; 2186 Label L_doLast;
2181 address start = __ pc(); 2187 address start = __ pc();
2182 2188
2183 const Register from = rsi; // source array address 2189 const Register from = rdx; // source array address
2184 const Register to = rdx; // destination array address 2190 const Register to = rdx; // destination array address
2185 const Register key = rcx; // key array address 2191 const Register key = rcx; // key array address
2186 const Register keylen = rax; 2192 const Register keylen = rax;
2187 const Address from_param(rbp, 8+0); 2193 const Address from_param(rbp, 8+0);
2188 const Address to_param (rbp, 8+4); 2194 const Address to_param (rbp, 8+4);
2189 const Address key_param (rbp, 8+8); 2195 const Address key_param (rbp, 8+8);
2190 2196
2191 const XMMRegister xmm_result = xmm0; 2197 const XMMRegister xmm_result = xmm0;
2192 const XMMRegister xmm_temp = xmm1; 2198 const XMMRegister xmm_key_shuf_mask = xmm1;
2193 const XMMRegister xmm_key_shuf_mask = xmm2; 2199 const XMMRegister xmm_temp1 = xmm2;
2194 2200 const XMMRegister xmm_temp2 = xmm3;
2195 __ enter(); // required for proper stackwalking of RuntimeStub frame 2201 const XMMRegister xmm_temp3 = xmm4;
2196 __ push(rsi); 2202 const XMMRegister xmm_temp4 = xmm5;
2197 __ movptr(from , from_param); 2203
2198 __ movptr(to , to_param); 2204 __ enter(); // required for proper stackwalking of RuntimeStub frame
2199 __ movptr(key , key_param); 2205 __ movptr(from, from_param);
2200 2206 __ movptr(key, key_param);
2207
2208 // keylen could be only {11, 13, 15} * 4 = {44, 52, 60}
2201 __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); 2209 __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
2202 // keylen = # of 32-bit words, convert to 128-bit words
2203 __ shrl(keylen, 2);
2204 __ subl(keylen, 11); // every key has at least 11 128-bit words, some have more
2205 2210
2206 __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); 2211 __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
2207 __ movdqu(xmm_result, Address(from, 0)); // get 16 bytes of input 2212 __ movdqu(xmm_result, Address(from, 0)); // get 16 bytes of input
2213 __ movptr(to, to_param);
2208 2214
2209 // For encryption, the java expanded key ordering is just what we need 2215 // For encryption, the java expanded key ordering is just what we need
2210 2216
2211 load_key(xmm_temp, key, 0x00, xmm_key_shuf_mask); 2217 load_key(xmm_temp1, key, 0x00, xmm_key_shuf_mask);
2212 __ pxor(xmm_result, xmm_temp); 2218 __ pxor(xmm_result, xmm_temp1);
2213 for (int offset = 0x10; offset <= 0x90; offset += 0x10) { 2219
2214 aes_enc_key(xmm_result, xmm_temp, key, offset, xmm_key_shuf_mask); 2220 load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask);
2215 } 2221 load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask);
2216 load_key (xmm_temp, key, 0xa0, xmm_key_shuf_mask); 2222 load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask);
2217 __ cmpl(keylen, 0); 2223 load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask);
2218 __ jcc(Assembler::equal, L_doLast); 2224
2219 __ aesenc(xmm_result, xmm_temp); // only in 192 and 256 bit keys 2225 __ aesenc(xmm_result, xmm_temp1);
2220 aes_enc_key(xmm_result, xmm_temp, key, 0xb0, xmm_key_shuf_mask); 2226 __ aesenc(xmm_result, xmm_temp2);
2221 load_key(xmm_temp, key, 0xc0, xmm_key_shuf_mask); 2227 __ aesenc(xmm_result, xmm_temp3);
2222 __ subl(keylen, 2); 2228 __ aesenc(xmm_result, xmm_temp4);
2223 __ jcc(Assembler::equal, L_doLast); 2229
2224 __ aesenc(xmm_result, xmm_temp); // only in 256 bit keys 2230 load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask);
2225 aes_enc_key(xmm_result, xmm_temp, key, 0xd0, xmm_key_shuf_mask); 2231 load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask);
2226 load_key(xmm_temp, key, 0xe0, xmm_key_shuf_mask); 2232 load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask);
2233 load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask);
2234
2235 __ aesenc(xmm_result, xmm_temp1);
2236 __ aesenc(xmm_result, xmm_temp2);
2237 __ aesenc(xmm_result, xmm_temp3);
2238 __ aesenc(xmm_result, xmm_temp4);
2239
2240 load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask);
2241 load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask);
2242
2243 __ cmpl(keylen, 44);
2244 __ jccb(Assembler::equal, L_doLast);
2245
2246 __ aesenc(xmm_result, xmm_temp1);
2247 __ aesenc(xmm_result, xmm_temp2);
2248
2249 load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask);
2250 load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask);
2251
2252 __ cmpl(keylen, 52);
2253 __ jccb(Assembler::equal, L_doLast);
2254
2255 __ aesenc(xmm_result, xmm_temp1);
2256 __ aesenc(xmm_result, xmm_temp2);
2257
2258 load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask);
2259 load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask);
2227 2260
2228 __ BIND(L_doLast); 2261 __ BIND(L_doLast);
2229 __ aesenclast(xmm_result, xmm_temp); 2262 __ aesenc(xmm_result, xmm_temp1);
2263 __ aesenclast(xmm_result, xmm_temp2);
2230 __ movdqu(Address(to, 0), xmm_result); // store the result 2264 __ movdqu(Address(to, 0), xmm_result); // store the result
2231 __ xorptr(rax, rax); // return 0 2265 __ xorptr(rax, rax); // return 0
2232 __ pop(rsi);
2233 __ leave(); // required for proper stackwalking of RuntimeStub frame 2266 __ leave(); // required for proper stackwalking of RuntimeStub frame
2234 __ ret(0); 2267 __ ret(0);
2235 2268
2236 return start; 2269 return start;
2237 } 2270 }
2243 // c_rarg0 - source byte array address 2276 // c_rarg0 - source byte array address
2244 // c_rarg1 - destination byte array address 2277 // c_rarg1 - destination byte array address
2245 // c_rarg2 - K (key) in little endian int array 2278 // c_rarg2 - K (key) in little endian int array
2246 // 2279 //
2247 address generate_aescrypt_decryptBlock() { 2280 address generate_aescrypt_decryptBlock() {
2248 assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support"); 2281 assert(UseAES, "need AES instructions and misaligned SSE support");
2249 __ align(CodeEntryAlignment); 2282 __ align(CodeEntryAlignment);
2250 StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock"); 2283 StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock");
2251 Label L_doLast; 2284 Label L_doLast;
2252 address start = __ pc(); 2285 address start = __ pc();
2253 2286
2254 const Register from = rsi; // source array address 2287 const Register from = rdx; // source array address
2255 const Register to = rdx; // destination array address 2288 const Register to = rdx; // destination array address
2256 const Register key = rcx; // key array address 2289 const Register key = rcx; // key array address
2257 const Register keylen = rax; 2290 const Register keylen = rax;
2258 const Address from_param(rbp, 8+0); 2291 const Address from_param(rbp, 8+0);
2259 const Address to_param (rbp, 8+4); 2292 const Address to_param (rbp, 8+4);
2260 const Address key_param (rbp, 8+8); 2293 const Address key_param (rbp, 8+8);
2261 2294
2262 const XMMRegister xmm_result = xmm0; 2295 const XMMRegister xmm_result = xmm0;
2263 const XMMRegister xmm_temp = xmm1; 2296 const XMMRegister xmm_key_shuf_mask = xmm1;
2264 const XMMRegister xmm_key_shuf_mask = xmm2; 2297 const XMMRegister xmm_temp1 = xmm2;
2298 const XMMRegister xmm_temp2 = xmm3;
2299 const XMMRegister xmm_temp3 = xmm4;
2300 const XMMRegister xmm_temp4 = xmm5;
2265 2301
2266 __ enter(); // required for proper stackwalking of RuntimeStub frame 2302 __ enter(); // required for proper stackwalking of RuntimeStub frame
2267 __ push(rsi); 2303 __ movptr(from, from_param);
2268 __ movptr(from , from_param); 2304 __ movptr(key, key_param);
2269 __ movptr(to , to_param); 2305
2270 __ movptr(key , key_param); 2306 // keylen could be only {11, 13, 15} * 4 = {44, 52, 60}
2271
2272 __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); 2307 __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
2273 // keylen = # of 32-bit words, convert to 128-bit words
2274 __ shrl(keylen, 2);
2275 __ subl(keylen, 11); // every key has at least 11 128-bit words, some have more
2276 2308
2277 __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); 2309 __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
2278 __ movdqu(xmm_result, Address(from, 0)); 2310 __ movdqu(xmm_result, Address(from, 0));
2311 __ movptr(to, to_param);
2279 2312
2280 // for decryption java expanded key ordering is rotated one position from what we want 2313 // for decryption java expanded key ordering is rotated one position from what we want
2281 // so we start from 0x10 here and hit 0x00 last 2314 // so we start from 0x10 here and hit 0x00 last
2282 // we don't know if the key is aligned, hence not using load-execute form 2315 // we don't know if the key is aligned, hence not using load-execute form
2283 load_key(xmm_temp, key, 0x10, xmm_key_shuf_mask); 2316 load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask);
2284 __ pxor (xmm_result, xmm_temp); 2317 load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask);
2285 for (int offset = 0x20; offset <= 0xa0; offset += 0x10) { 2318 load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask);
2286 aes_dec_key(xmm_result, xmm_temp, key, offset, xmm_key_shuf_mask); 2319 load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask);
2287 } 2320
2288 __ cmpl(keylen, 0); 2321 __ pxor (xmm_result, xmm_temp1);
2289 __ jcc(Assembler::equal, L_doLast); 2322 __ aesdec(xmm_result, xmm_temp2);
2290 // only in 192 and 256 bit keys 2323 __ aesdec(xmm_result, xmm_temp3);
2291 aes_dec_key(xmm_result, xmm_temp, key, 0xb0, xmm_key_shuf_mask); 2324 __ aesdec(xmm_result, xmm_temp4);
2292 aes_dec_key(xmm_result, xmm_temp, key, 0xc0, xmm_key_shuf_mask); 2325
2293 __ subl(keylen, 2); 2326 load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask);
2294 __ jcc(Assembler::equal, L_doLast); 2327 load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask);
2295 // only in 256 bit keys 2328 load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask);
2296 aes_dec_key(xmm_result, xmm_temp, key, 0xd0, xmm_key_shuf_mask); 2329 load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask);
2297 aes_dec_key(xmm_result, xmm_temp, key, 0xe0, xmm_key_shuf_mask); 2330
2331 __ aesdec(xmm_result, xmm_temp1);
2332 __ aesdec(xmm_result, xmm_temp2);
2333 __ aesdec(xmm_result, xmm_temp3);
2334 __ aesdec(xmm_result, xmm_temp4);
2335
2336 load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask);
2337 load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask);
2338 load_key(xmm_temp3, key, 0x00, xmm_key_shuf_mask);
2339
2340 __ cmpl(keylen, 44);
2341 __ jccb(Assembler::equal, L_doLast);
2342
2343 __ aesdec(xmm_result, xmm_temp1);
2344 __ aesdec(xmm_result, xmm_temp2);
2345
2346 load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask);
2347 load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask);
2348
2349 __ cmpl(keylen, 52);
2350 __ jccb(Assembler::equal, L_doLast);
2351
2352 __ aesdec(xmm_result, xmm_temp1);
2353 __ aesdec(xmm_result, xmm_temp2);
2354
2355 load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask);
2356 load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask);
2298 2357
2299 __ BIND(L_doLast); 2358 __ BIND(L_doLast);
2359 __ aesdec(xmm_result, xmm_temp1);
2360 __ aesdec(xmm_result, xmm_temp2);
2361
2300 // for decryption the aesdeclast operation is always on key+0x00 2362 // for decryption the aesdeclast operation is always on key+0x00
2301 load_key(xmm_temp, key, 0x00, xmm_key_shuf_mask); 2363 __ aesdeclast(xmm_result, xmm_temp3);
2302 __ aesdeclast(xmm_result, xmm_temp);
2303
2304 __ movdqu(Address(to, 0), xmm_result); // store the result 2364 __ movdqu(Address(to, 0), xmm_result); // store the result
2305
2306 __ xorptr(rax, rax); // return 0 2365 __ xorptr(rax, rax); // return 0
2307 __ pop(rsi);
2308 __ leave(); // required for proper stackwalking of RuntimeStub frame 2366 __ leave(); // required for proper stackwalking of RuntimeStub frame
2309 __ ret(0); 2367 __ ret(0);
2310 2368
2311 return start; 2369 return start;
2312 } 2370 }
2338 // c_rarg2 - K (key) in little endian int array 2396 // c_rarg2 - K (key) in little endian int array
2339 // c_rarg3 - r vector byte array address 2397 // c_rarg3 - r vector byte array address
2340 // c_rarg4 - input length 2398 // c_rarg4 - input length
2341 // 2399 //
2342 address generate_cipherBlockChaining_encryptAESCrypt() { 2400 address generate_cipherBlockChaining_encryptAESCrypt() {
2343 assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support"); 2401 assert(UseAES, "need AES instructions and misaligned SSE support");
2344 __ align(CodeEntryAlignment); 2402 __ align(CodeEntryAlignment);
2345 StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt"); 2403 StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt");
2346 address start = __ pc(); 2404 address start = __ pc();
2347 2405
2348 Label L_exit, L_key_192_256, L_key_256, L_loopTop_128, L_loopTop_192, L_loopTop_256; 2406 Label L_exit, L_key_192_256, L_key_256, L_loopTop_128, L_loopTop_192, L_loopTop_256;
2391 __ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); 2449 __ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
2392 __ cmpl(rax, 44); 2450 __ cmpl(rax, 44);
2393 __ jcc(Assembler::notEqual, L_key_192_256); 2451 __ jcc(Assembler::notEqual, L_key_192_256);
2394 2452
2395 // 128 bit code follows here 2453 // 128 bit code follows here
2396 __ movptr(pos, 0); 2454 __ movl(pos, 0);
2397 __ align(OptoLoopAlignment); 2455 __ align(OptoLoopAlignment);
2398 __ BIND(L_loopTop_128); 2456 __ BIND(L_loopTop_128);
2399 __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input 2457 __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input
2400 __ pxor (xmm_result, xmm_temp); // xor with the current r vector 2458 __ pxor (xmm_result, xmm_temp); // xor with the current r vector
2401 2459
2421 handleSOERegisters(false /*restoring*/); 2479 handleSOERegisters(false /*restoring*/);
2422 __ movl(rax, 0); // return 0 (why?) 2480 __ movl(rax, 0); // return 0 (why?)
2423 __ leave(); // required for proper stackwalking of RuntimeStub frame 2481 __ leave(); // required for proper stackwalking of RuntimeStub frame
2424 __ ret(0); 2482 __ ret(0);
2425 2483
2426 __ BIND(L_key_192_256); 2484 __ BIND(L_key_192_256);
2427 // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256) 2485 // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256)
2428 __ cmpl(rax, 52); 2486 __ cmpl(rax, 52);
2429 __ jcc(Assembler::notEqual, L_key_256); 2487 __ jcc(Assembler::notEqual, L_key_256);
2430 2488
2431 // 192-bit code follows here (could be changed to use more xmm registers) 2489 // 192-bit code follows here (could be changed to use more xmm registers)
2432 __ movptr(pos, 0); 2490 __ movl(pos, 0);
2433 __ align(OptoLoopAlignment); 2491 __ align(OptoLoopAlignment);
2434 __ BIND(L_loopTop_192); 2492 __ BIND(L_loopTop_192);
2435 __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input 2493 __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input
2436 __ pxor (xmm_result, xmm_temp); // xor with the current r vector 2494 __ pxor (xmm_result, xmm_temp); // xor with the current r vector
2437 2495
2438 __ pxor (xmm_result, xmm_key0); // do the aes rounds 2496 __ pxor (xmm_result, xmm_key0); // do the aes rounds
2439 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { 2497 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
2450 __ addptr(pos, AESBlockSize); 2508 __ addptr(pos, AESBlockSize);
2451 __ subptr(len_reg, AESBlockSize); 2509 __ subptr(len_reg, AESBlockSize);
2452 __ jcc(Assembler::notEqual, L_loopTop_192); 2510 __ jcc(Assembler::notEqual, L_loopTop_192);
2453 __ jmp(L_exit); 2511 __ jmp(L_exit);
2454 2512
2455 __ BIND(L_key_256); 2513 __ BIND(L_key_256);
2456 // 256-bit code follows here (could be changed to use more xmm registers) 2514 // 256-bit code follows here (could be changed to use more xmm registers)
2457 __ movptr(pos, 0); 2515 __ movl(pos, 0);
2458 __ align(OptoLoopAlignment); 2516 __ align(OptoLoopAlignment);
2459 __ BIND(L_loopTop_256); 2517 __ BIND(L_loopTop_256);
2460 __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input 2518 __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input
2461 __ pxor (xmm_result, xmm_temp); // xor with the current r vector 2519 __ pxor (xmm_result, xmm_temp); // xor with the current r vector
2462 2520
2463 __ pxor (xmm_result, xmm_key0); // do the aes rounds 2521 __ pxor (xmm_result, xmm_key0); // do the aes rounds
2464 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { 2522 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
2493 // c_rarg3 - r vector byte array address 2551 // c_rarg3 - r vector byte array address
2494 // c_rarg4 - input length 2552 // c_rarg4 - input length
2495 // 2553 //
2496 2554
2497 address generate_cipherBlockChaining_decryptAESCrypt() { 2555 address generate_cipherBlockChaining_decryptAESCrypt() {
2498 assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support"); 2556 assert(UseAES, "need AES instructions and misaligned SSE support");
2499 __ align(CodeEntryAlignment); 2557 __ align(CodeEntryAlignment);
2500 StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt"); 2558 StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt");
2501 address start = __ pc(); 2559 address start = __ pc();
2502 2560
2503 Label L_exit, L_key_192_256, L_key_256; 2561 Label L_exit, L_key_192_256, L_key_256;
2554 __ cmpl(rax, 44); 2612 __ cmpl(rax, 44);
2555 __ jcc(Assembler::notEqual, L_key_192_256); 2613 __ jcc(Assembler::notEqual, L_key_192_256);
2556 2614
2557 2615
2558 // 128-bit code follows here, parallelized 2616 // 128-bit code follows here, parallelized
2559 __ movptr(pos, 0); 2617 __ movl(pos, 0);
2560 __ align(OptoLoopAlignment); 2618 __ align(OptoLoopAlignment);
2561 __ BIND(L_singleBlock_loopTop_128); 2619 __ BIND(L_singleBlock_loopTop_128);
2562 __ cmpptr(len_reg, 0); // any blocks left?? 2620 __ cmpptr(len_reg, 0); // any blocks left??
2563 __ jcc(Assembler::equal, L_exit); 2621 __ jcc(Assembler::equal, L_exit);
2564 __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input 2622 __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input
2565 __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds 2623 __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds
2566 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { 2624 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
2595 // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256) 2653 // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256)
2596 __ cmpl(rax, 52); 2654 __ cmpl(rax, 52);
2597 __ jcc(Assembler::notEqual, L_key_256); 2655 __ jcc(Assembler::notEqual, L_key_256);
2598 2656
2599 // 192-bit code follows here (could be optimized to use parallelism) 2657 // 192-bit code follows here (could be optimized to use parallelism)
2600 __ movptr(pos, 0); 2658 __ movl(pos, 0);
2601 __ align(OptoLoopAlignment); 2659 __ align(OptoLoopAlignment);
2602 __ BIND(L_singleBlock_loopTop_192); 2660 __ BIND(L_singleBlock_loopTop_192);
2603 __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input 2661 __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input
2604 __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds 2662 __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds
2605 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { 2663 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
2620 __ jcc(Assembler::notEqual,L_singleBlock_loopTop_192); 2678 __ jcc(Assembler::notEqual,L_singleBlock_loopTop_192);
2621 __ jmp(L_exit); 2679 __ jmp(L_exit);
2622 2680
2623 __ BIND(L_key_256); 2681 __ BIND(L_key_256);
2624 // 256-bit code follows here (could be optimized to use parallelism) 2682 // 256-bit code follows here (could be optimized to use parallelism)
2625 __ movptr(pos, 0); 2683 __ movl(pos, 0);
2626 __ align(OptoLoopAlignment); 2684 __ align(OptoLoopAlignment);
2627 __ BIND(L_singleBlock_loopTop_256); 2685 __ BIND(L_singleBlock_loopTop_256);
2628 __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input 2686 __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input
2629 __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds 2687 __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds
2630 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { 2688 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {