Mercurial > hg > graal-compiler
comparison src/cpu/x86/vm/stubGenerator_x86_32.cpp @ 7482:989155e2d07a
Merge with hs25-b15.
author | Thomas Wuerthinger <thomas.wuerthinger@oracle.com> |
---|---|
date | Wed, 16 Jan 2013 01:34:24 +0100 |
parents | 291ffc492eb6 e2e6bf86682c |
children | b9a918201d47 |
comparison
equal
deleted
inserted
replaced
7381:6761a8f854a4 | 7482:989155e2d07a |
---|---|
794 // Copy 64-byte chunks | 794 // Copy 64-byte chunks |
795 __ jmpb(L_copy_64_bytes); | 795 __ jmpb(L_copy_64_bytes); |
796 __ align(OptoLoopAlignment); | 796 __ align(OptoLoopAlignment); |
797 __ BIND(L_copy_64_bytes_loop); | 797 __ BIND(L_copy_64_bytes_loop); |
798 | 798 |
799 if(UseUnalignedLoadStores) { | 799 if (UseUnalignedLoadStores) { |
800 __ movdqu(xmm0, Address(from, 0)); | 800 if (UseAVX >= 2) { |
801 __ movdqu(Address(from, to_from, Address::times_1, 0), xmm0); | 801 __ vmovdqu(xmm0, Address(from, 0)); |
802 __ movdqu(xmm1, Address(from, 16)); | 802 __ vmovdqu(Address(from, to_from, Address::times_1, 0), xmm0); |
803 __ movdqu(Address(from, to_from, Address::times_1, 16), xmm1); | 803 __ vmovdqu(xmm1, Address(from, 32)); |
804 __ movdqu(xmm2, Address(from, 32)); | 804 __ vmovdqu(Address(from, to_from, Address::times_1, 32), xmm1); |
805 __ movdqu(Address(from, to_from, Address::times_1, 32), xmm2); | 805 } else { |
806 __ movdqu(xmm3, Address(from, 48)); | 806 __ movdqu(xmm0, Address(from, 0)); |
807 __ movdqu(Address(from, to_from, Address::times_1, 48), xmm3); | 807 __ movdqu(Address(from, to_from, Address::times_1, 0), xmm0); |
808 | 808 __ movdqu(xmm1, Address(from, 16)); |
809 __ movdqu(Address(from, to_from, Address::times_1, 16), xmm1); | |
810 __ movdqu(xmm2, Address(from, 32)); | |
811 __ movdqu(Address(from, to_from, Address::times_1, 32), xmm2); | |
812 __ movdqu(xmm3, Address(from, 48)); | |
813 __ movdqu(Address(from, to_from, Address::times_1, 48), xmm3); | |
814 } | |
809 } else { | 815 } else { |
810 __ movq(xmm0, Address(from, 0)); | 816 __ movq(xmm0, Address(from, 0)); |
811 __ movq(Address(from, to_from, Address::times_1, 0), xmm0); | 817 __ movq(Address(from, to_from, Address::times_1, 0), xmm0); |
812 __ movq(xmm1, Address(from, 8)); | 818 __ movq(xmm1, Address(from, 8)); |
813 __ movq(Address(from, to_from, Address::times_1, 8), xmm1); | 819 __ movq(Address(from, to_from, Address::times_1, 8), xmm1); |
2172 // c_rarg0 - source byte array address | 2178 // c_rarg0 - source byte array address |
2173 // c_rarg1 - destination byte array address | 2179 // c_rarg1 - destination byte array address |
2174 // c_rarg2 - K (key) in little endian int array | 2180 // c_rarg2 - K (key) in little endian int array |
2175 // | 2181 // |
2176 address generate_aescrypt_encryptBlock() { | 2182 address generate_aescrypt_encryptBlock() { |
2177 assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support"); | 2183 assert(UseAES, "need AES instructions and misaligned SSE support"); |
2178 __ align(CodeEntryAlignment); | 2184 __ align(CodeEntryAlignment); |
2179 StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock"); | 2185 StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock"); |
2180 Label L_doLast; | 2186 Label L_doLast; |
2181 address start = __ pc(); | 2187 address start = __ pc(); |
2182 | 2188 |
2183 const Register from = rsi; // source array address | 2189 const Register from = rdx; // source array address |
2184 const Register to = rdx; // destination array address | 2190 const Register to = rdx; // destination array address |
2185 const Register key = rcx; // key array address | 2191 const Register key = rcx; // key array address |
2186 const Register keylen = rax; | 2192 const Register keylen = rax; |
2187 const Address from_param(rbp, 8+0); | 2193 const Address from_param(rbp, 8+0); |
2188 const Address to_param (rbp, 8+4); | 2194 const Address to_param (rbp, 8+4); |
2189 const Address key_param (rbp, 8+8); | 2195 const Address key_param (rbp, 8+8); |
2190 | 2196 |
2191 const XMMRegister xmm_result = xmm0; | 2197 const XMMRegister xmm_result = xmm0; |
2192 const XMMRegister xmm_temp = xmm1; | 2198 const XMMRegister xmm_key_shuf_mask = xmm1; |
2193 const XMMRegister xmm_key_shuf_mask = xmm2; | 2199 const XMMRegister xmm_temp1 = xmm2; |
2194 | 2200 const XMMRegister xmm_temp2 = xmm3; |
2195 __ enter(); // required for proper stackwalking of RuntimeStub frame | 2201 const XMMRegister xmm_temp3 = xmm4; |
2196 __ push(rsi); | 2202 const XMMRegister xmm_temp4 = xmm5; |
2197 __ movptr(from , from_param); | 2203 |
2198 __ movptr(to , to_param); | 2204 __ enter(); // required for proper stackwalking of RuntimeStub frame |
2199 __ movptr(key , key_param); | 2205 __ movptr(from, from_param); |
2200 | 2206 __ movptr(key, key_param); |
2207 | |
2208 // keylen could be only {11, 13, 15} * 4 = {44, 52, 60} | |
2201 __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); | 2209 __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); |
2202 // keylen = # of 32-bit words, convert to 128-bit words | |
2203 __ shrl(keylen, 2); | |
2204 __ subl(keylen, 11); // every key has at least 11 128-bit words, some have more | |
2205 | 2210 |
2206 __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); | 2211 __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); |
2207 __ movdqu(xmm_result, Address(from, 0)); // get 16 bytes of input | 2212 __ movdqu(xmm_result, Address(from, 0)); // get 16 bytes of input |
2213 __ movptr(to, to_param); | |
2208 | 2214 |
2209 // For encryption, the java expanded key ordering is just what we need | 2215 // For encryption, the java expanded key ordering is just what we need |
2210 | 2216 |
2211 load_key(xmm_temp, key, 0x00, xmm_key_shuf_mask); | 2217 load_key(xmm_temp1, key, 0x00, xmm_key_shuf_mask); |
2212 __ pxor(xmm_result, xmm_temp); | 2218 __ pxor(xmm_result, xmm_temp1); |
2213 for (int offset = 0x10; offset <= 0x90; offset += 0x10) { | 2219 |
2214 aes_enc_key(xmm_result, xmm_temp, key, offset, xmm_key_shuf_mask); | 2220 load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask); |
2215 } | 2221 load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask); |
2216 load_key (xmm_temp, key, 0xa0, xmm_key_shuf_mask); | 2222 load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask); |
2217 __ cmpl(keylen, 0); | 2223 load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask); |
2218 __ jcc(Assembler::equal, L_doLast); | 2224 |
2219 __ aesenc(xmm_result, xmm_temp); // only in 192 and 256 bit keys | 2225 __ aesenc(xmm_result, xmm_temp1); |
2220 aes_enc_key(xmm_result, xmm_temp, key, 0xb0, xmm_key_shuf_mask); | 2226 __ aesenc(xmm_result, xmm_temp2); |
2221 load_key(xmm_temp, key, 0xc0, xmm_key_shuf_mask); | 2227 __ aesenc(xmm_result, xmm_temp3); |
2222 __ subl(keylen, 2); | 2228 __ aesenc(xmm_result, xmm_temp4); |
2223 __ jcc(Assembler::equal, L_doLast); | 2229 |
2224 __ aesenc(xmm_result, xmm_temp); // only in 256 bit keys | 2230 load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask); |
2225 aes_enc_key(xmm_result, xmm_temp, key, 0xd0, xmm_key_shuf_mask); | 2231 load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask); |
2226 load_key(xmm_temp, key, 0xe0, xmm_key_shuf_mask); | 2232 load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask); |
2233 load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask); | |
2234 | |
2235 __ aesenc(xmm_result, xmm_temp1); | |
2236 __ aesenc(xmm_result, xmm_temp2); | |
2237 __ aesenc(xmm_result, xmm_temp3); | |
2238 __ aesenc(xmm_result, xmm_temp4); | |
2239 | |
2240 load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask); | |
2241 load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask); | |
2242 | |
2243 __ cmpl(keylen, 44); | |
2244 __ jccb(Assembler::equal, L_doLast); | |
2245 | |
2246 __ aesenc(xmm_result, xmm_temp1); | |
2247 __ aesenc(xmm_result, xmm_temp2); | |
2248 | |
2249 load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask); | |
2250 load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask); | |
2251 | |
2252 __ cmpl(keylen, 52); | |
2253 __ jccb(Assembler::equal, L_doLast); | |
2254 | |
2255 __ aesenc(xmm_result, xmm_temp1); | |
2256 __ aesenc(xmm_result, xmm_temp2); | |
2257 | |
2258 load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask); | |
2259 load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask); | |
2227 | 2260 |
2228 __ BIND(L_doLast); | 2261 __ BIND(L_doLast); |
2229 __ aesenclast(xmm_result, xmm_temp); | 2262 __ aesenc(xmm_result, xmm_temp1); |
2263 __ aesenclast(xmm_result, xmm_temp2); | |
2230 __ movdqu(Address(to, 0), xmm_result); // store the result | 2264 __ movdqu(Address(to, 0), xmm_result); // store the result |
2231 __ xorptr(rax, rax); // return 0 | 2265 __ xorptr(rax, rax); // return 0 |
2232 __ pop(rsi); | |
2233 __ leave(); // required for proper stackwalking of RuntimeStub frame | 2266 __ leave(); // required for proper stackwalking of RuntimeStub frame |
2234 __ ret(0); | 2267 __ ret(0); |
2235 | 2268 |
2236 return start; | 2269 return start; |
2237 } | 2270 } |
2243 // c_rarg0 - source byte array address | 2276 // c_rarg0 - source byte array address |
2244 // c_rarg1 - destination byte array address | 2277 // c_rarg1 - destination byte array address |
2245 // c_rarg2 - K (key) in little endian int array | 2278 // c_rarg2 - K (key) in little endian int array |
2246 // | 2279 // |
2247 address generate_aescrypt_decryptBlock() { | 2280 address generate_aescrypt_decryptBlock() { |
2248 assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support"); | 2281 assert(UseAES, "need AES instructions and misaligned SSE support"); |
2249 __ align(CodeEntryAlignment); | 2282 __ align(CodeEntryAlignment); |
2250 StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock"); | 2283 StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock"); |
2251 Label L_doLast; | 2284 Label L_doLast; |
2252 address start = __ pc(); | 2285 address start = __ pc(); |
2253 | 2286 |
2254 const Register from = rsi; // source array address | 2287 const Register from = rdx; // source array address |
2255 const Register to = rdx; // destination array address | 2288 const Register to = rdx; // destination array address |
2256 const Register key = rcx; // key array address | 2289 const Register key = rcx; // key array address |
2257 const Register keylen = rax; | 2290 const Register keylen = rax; |
2258 const Address from_param(rbp, 8+0); | 2291 const Address from_param(rbp, 8+0); |
2259 const Address to_param (rbp, 8+4); | 2292 const Address to_param (rbp, 8+4); |
2260 const Address key_param (rbp, 8+8); | 2293 const Address key_param (rbp, 8+8); |
2261 | 2294 |
2262 const XMMRegister xmm_result = xmm0; | 2295 const XMMRegister xmm_result = xmm0; |
2263 const XMMRegister xmm_temp = xmm1; | 2296 const XMMRegister xmm_key_shuf_mask = xmm1; |
2264 const XMMRegister xmm_key_shuf_mask = xmm2; | 2297 const XMMRegister xmm_temp1 = xmm2; |
2298 const XMMRegister xmm_temp2 = xmm3; | |
2299 const XMMRegister xmm_temp3 = xmm4; | |
2300 const XMMRegister xmm_temp4 = xmm5; | |
2265 | 2301 |
2266 __ enter(); // required for proper stackwalking of RuntimeStub frame | 2302 __ enter(); // required for proper stackwalking of RuntimeStub frame |
2267 __ push(rsi); | 2303 __ movptr(from, from_param); |
2268 __ movptr(from , from_param); | 2304 __ movptr(key, key_param); |
2269 __ movptr(to , to_param); | 2305 |
2270 __ movptr(key , key_param); | 2306 // keylen could be only {11, 13, 15} * 4 = {44, 52, 60} |
2271 | |
2272 __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); | 2307 __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); |
2273 // keylen = # of 32-bit words, convert to 128-bit words | |
2274 __ shrl(keylen, 2); | |
2275 __ subl(keylen, 11); // every key has at least 11 128-bit words, some have more | |
2276 | 2308 |
2277 __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); | 2309 __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); |
2278 __ movdqu(xmm_result, Address(from, 0)); | 2310 __ movdqu(xmm_result, Address(from, 0)); |
2311 __ movptr(to, to_param); | |
2279 | 2312 |
2280 // for decryption java expanded key ordering is rotated one position from what we want | 2313 // for decryption java expanded key ordering is rotated one position from what we want |
2281 // so we start from 0x10 here and hit 0x00 last | 2314 // so we start from 0x10 here and hit 0x00 last |
2282 // we don't know if the key is aligned, hence not using load-execute form | 2315 // we don't know if the key is aligned, hence not using load-execute form |
2283 load_key(xmm_temp, key, 0x10, xmm_key_shuf_mask); | 2316 load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask); |
2284 __ pxor (xmm_result, xmm_temp); | 2317 load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask); |
2285 for (int offset = 0x20; offset <= 0xa0; offset += 0x10) { | 2318 load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask); |
2286 aes_dec_key(xmm_result, xmm_temp, key, offset, xmm_key_shuf_mask); | 2319 load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask); |
2287 } | 2320 |
2288 __ cmpl(keylen, 0); | 2321 __ pxor (xmm_result, xmm_temp1); |
2289 __ jcc(Assembler::equal, L_doLast); | 2322 __ aesdec(xmm_result, xmm_temp2); |
2290 // only in 192 and 256 bit keys | 2323 __ aesdec(xmm_result, xmm_temp3); |
2291 aes_dec_key(xmm_result, xmm_temp, key, 0xb0, xmm_key_shuf_mask); | 2324 __ aesdec(xmm_result, xmm_temp4); |
2292 aes_dec_key(xmm_result, xmm_temp, key, 0xc0, xmm_key_shuf_mask); | 2325 |
2293 __ subl(keylen, 2); | 2326 load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask); |
2294 __ jcc(Assembler::equal, L_doLast); | 2327 load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask); |
2295 // only in 256 bit keys | 2328 load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask); |
2296 aes_dec_key(xmm_result, xmm_temp, key, 0xd0, xmm_key_shuf_mask); | 2329 load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask); |
2297 aes_dec_key(xmm_result, xmm_temp, key, 0xe0, xmm_key_shuf_mask); | 2330 |
2331 __ aesdec(xmm_result, xmm_temp1); | |
2332 __ aesdec(xmm_result, xmm_temp2); | |
2333 __ aesdec(xmm_result, xmm_temp3); | |
2334 __ aesdec(xmm_result, xmm_temp4); | |
2335 | |
2336 load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask); | |
2337 load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask); | |
2338 load_key(xmm_temp3, key, 0x00, xmm_key_shuf_mask); | |
2339 | |
2340 __ cmpl(keylen, 44); | |
2341 __ jccb(Assembler::equal, L_doLast); | |
2342 | |
2343 __ aesdec(xmm_result, xmm_temp1); | |
2344 __ aesdec(xmm_result, xmm_temp2); | |
2345 | |
2346 load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask); | |
2347 load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask); | |
2348 | |
2349 __ cmpl(keylen, 52); | |
2350 __ jccb(Assembler::equal, L_doLast); | |
2351 | |
2352 __ aesdec(xmm_result, xmm_temp1); | |
2353 __ aesdec(xmm_result, xmm_temp2); | |
2354 | |
2355 load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask); | |
2356 load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask); | |
2298 | 2357 |
2299 __ BIND(L_doLast); | 2358 __ BIND(L_doLast); |
2359 __ aesdec(xmm_result, xmm_temp1); | |
2360 __ aesdec(xmm_result, xmm_temp2); | |
2361 | |
2300 // for decryption the aesdeclast operation is always on key+0x00 | 2362 // for decryption the aesdeclast operation is always on key+0x00 |
2301 load_key(xmm_temp, key, 0x00, xmm_key_shuf_mask); | 2363 __ aesdeclast(xmm_result, xmm_temp3); |
2302 __ aesdeclast(xmm_result, xmm_temp); | |
2303 | |
2304 __ movdqu(Address(to, 0), xmm_result); // store the result | 2364 __ movdqu(Address(to, 0), xmm_result); // store the result |
2305 | |
2306 __ xorptr(rax, rax); // return 0 | 2365 __ xorptr(rax, rax); // return 0 |
2307 __ pop(rsi); | |
2308 __ leave(); // required for proper stackwalking of RuntimeStub frame | 2366 __ leave(); // required for proper stackwalking of RuntimeStub frame |
2309 __ ret(0); | 2367 __ ret(0); |
2310 | 2368 |
2311 return start; | 2369 return start; |
2312 } | 2370 } |
2338 // c_rarg2 - K (key) in little endian int array | 2396 // c_rarg2 - K (key) in little endian int array |
2339 // c_rarg3 - r vector byte array address | 2397 // c_rarg3 - r vector byte array address |
2340 // c_rarg4 - input length | 2398 // c_rarg4 - input length |
2341 // | 2399 // |
2342 address generate_cipherBlockChaining_encryptAESCrypt() { | 2400 address generate_cipherBlockChaining_encryptAESCrypt() { |
2343 assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support"); | 2401 assert(UseAES, "need AES instructions and misaligned SSE support"); |
2344 __ align(CodeEntryAlignment); | 2402 __ align(CodeEntryAlignment); |
2345 StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt"); | 2403 StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt"); |
2346 address start = __ pc(); | 2404 address start = __ pc(); |
2347 | 2405 |
2348 Label L_exit, L_key_192_256, L_key_256, L_loopTop_128, L_loopTop_192, L_loopTop_256; | 2406 Label L_exit, L_key_192_256, L_key_256, L_loopTop_128, L_loopTop_192, L_loopTop_256; |
2391 __ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); | 2449 __ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); |
2392 __ cmpl(rax, 44); | 2450 __ cmpl(rax, 44); |
2393 __ jcc(Assembler::notEqual, L_key_192_256); | 2451 __ jcc(Assembler::notEqual, L_key_192_256); |
2394 | 2452 |
2395 // 128 bit code follows here | 2453 // 128 bit code follows here |
2396 __ movptr(pos, 0); | 2454 __ movl(pos, 0); |
2397 __ align(OptoLoopAlignment); | 2455 __ align(OptoLoopAlignment); |
2398 __ BIND(L_loopTop_128); | 2456 __ BIND(L_loopTop_128); |
2399 __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input | 2457 __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input |
2400 __ pxor (xmm_result, xmm_temp); // xor with the current r vector | 2458 __ pxor (xmm_result, xmm_temp); // xor with the current r vector |
2401 | 2459 |
2421 handleSOERegisters(false /*restoring*/); | 2479 handleSOERegisters(false /*restoring*/); |
2422 __ movl(rax, 0); // return 0 (why?) | 2480 __ movl(rax, 0); // return 0 (why?) |
2423 __ leave(); // required for proper stackwalking of RuntimeStub frame | 2481 __ leave(); // required for proper stackwalking of RuntimeStub frame |
2424 __ ret(0); | 2482 __ ret(0); |
2425 | 2483 |
2426 __ BIND(L_key_192_256); | 2484 __ BIND(L_key_192_256); |
2427 // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256) | 2485 // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256) |
2428 __ cmpl(rax, 52); | 2486 __ cmpl(rax, 52); |
2429 __ jcc(Assembler::notEqual, L_key_256); | 2487 __ jcc(Assembler::notEqual, L_key_256); |
2430 | 2488 |
2431 // 192-bit code follows here (could be changed to use more xmm registers) | 2489 // 192-bit code follows here (could be changed to use more xmm registers) |
2432 __ movptr(pos, 0); | 2490 __ movl(pos, 0); |
2433 __ align(OptoLoopAlignment); | 2491 __ align(OptoLoopAlignment); |
2434 __ BIND(L_loopTop_192); | 2492 __ BIND(L_loopTop_192); |
2435 __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input | 2493 __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input |
2436 __ pxor (xmm_result, xmm_temp); // xor with the current r vector | 2494 __ pxor (xmm_result, xmm_temp); // xor with the current r vector |
2437 | 2495 |
2438 __ pxor (xmm_result, xmm_key0); // do the aes rounds | 2496 __ pxor (xmm_result, xmm_key0); // do the aes rounds |
2439 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { | 2497 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { |
2450 __ addptr(pos, AESBlockSize); | 2508 __ addptr(pos, AESBlockSize); |
2451 __ subptr(len_reg, AESBlockSize); | 2509 __ subptr(len_reg, AESBlockSize); |
2452 __ jcc(Assembler::notEqual, L_loopTop_192); | 2510 __ jcc(Assembler::notEqual, L_loopTop_192); |
2453 __ jmp(L_exit); | 2511 __ jmp(L_exit); |
2454 | 2512 |
2455 __ BIND(L_key_256); | 2513 __ BIND(L_key_256); |
2456 // 256-bit code follows here (could be changed to use more xmm registers) | 2514 // 256-bit code follows here (could be changed to use more xmm registers) |
2457 __ movptr(pos, 0); | 2515 __ movl(pos, 0); |
2458 __ align(OptoLoopAlignment); | 2516 __ align(OptoLoopAlignment); |
2459 __ BIND(L_loopTop_256); | 2517 __ BIND(L_loopTop_256); |
2460 __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input | 2518 __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input |
2461 __ pxor (xmm_result, xmm_temp); // xor with the current r vector | 2519 __ pxor (xmm_result, xmm_temp); // xor with the current r vector |
2462 | 2520 |
2463 __ pxor (xmm_result, xmm_key0); // do the aes rounds | 2521 __ pxor (xmm_result, xmm_key0); // do the aes rounds |
2464 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { | 2522 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { |
2493 // c_rarg3 - r vector byte array address | 2551 // c_rarg3 - r vector byte array address |
2494 // c_rarg4 - input length | 2552 // c_rarg4 - input length |
2495 // | 2553 // |
2496 | 2554 |
2497 address generate_cipherBlockChaining_decryptAESCrypt() { | 2555 address generate_cipherBlockChaining_decryptAESCrypt() { |
2498 assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support"); | 2556 assert(UseAES, "need AES instructions and misaligned SSE support"); |
2499 __ align(CodeEntryAlignment); | 2557 __ align(CodeEntryAlignment); |
2500 StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt"); | 2558 StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt"); |
2501 address start = __ pc(); | 2559 address start = __ pc(); |
2502 | 2560 |
2503 Label L_exit, L_key_192_256, L_key_256; | 2561 Label L_exit, L_key_192_256, L_key_256; |
2554 __ cmpl(rax, 44); | 2612 __ cmpl(rax, 44); |
2555 __ jcc(Assembler::notEqual, L_key_192_256); | 2613 __ jcc(Assembler::notEqual, L_key_192_256); |
2556 | 2614 |
2557 | 2615 |
2558 // 128-bit code follows here, parallelized | 2616 // 128-bit code follows here, parallelized |
2559 __ movptr(pos, 0); | 2617 __ movl(pos, 0); |
2560 __ align(OptoLoopAlignment); | 2618 __ align(OptoLoopAlignment); |
2561 __ BIND(L_singleBlock_loopTop_128); | 2619 __ BIND(L_singleBlock_loopTop_128); |
2562 __ cmpptr(len_reg, 0); // any blocks left?? | 2620 __ cmpptr(len_reg, 0); // any blocks left?? |
2563 __ jcc(Assembler::equal, L_exit); | 2621 __ jcc(Assembler::equal, L_exit); |
2564 __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input | 2622 __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input |
2565 __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds | 2623 __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds |
2566 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { | 2624 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { |
2595 // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256) | 2653 // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256) |
2596 __ cmpl(rax, 52); | 2654 __ cmpl(rax, 52); |
2597 __ jcc(Assembler::notEqual, L_key_256); | 2655 __ jcc(Assembler::notEqual, L_key_256); |
2598 | 2656 |
2599 // 192-bit code follows here (could be optimized to use parallelism) | 2657 // 192-bit code follows here (could be optimized to use parallelism) |
2600 __ movptr(pos, 0); | 2658 __ movl(pos, 0); |
2601 __ align(OptoLoopAlignment); | 2659 __ align(OptoLoopAlignment); |
2602 __ BIND(L_singleBlock_loopTop_192); | 2660 __ BIND(L_singleBlock_loopTop_192); |
2603 __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input | 2661 __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input |
2604 __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds | 2662 __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds |
2605 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { | 2663 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { |
2620 __ jcc(Assembler::notEqual,L_singleBlock_loopTop_192); | 2678 __ jcc(Assembler::notEqual,L_singleBlock_loopTop_192); |
2621 __ jmp(L_exit); | 2679 __ jmp(L_exit); |
2622 | 2680 |
2623 __ BIND(L_key_256); | 2681 __ BIND(L_key_256); |
2624 // 256-bit code follows here (could be optimized to use parallelism) | 2682 // 256-bit code follows here (could be optimized to use parallelism) |
2625 __ movptr(pos, 0); | 2683 __ movl(pos, 0); |
2626 __ align(OptoLoopAlignment); | 2684 __ align(OptoLoopAlignment); |
2627 __ BIND(L_singleBlock_loopTop_256); | 2685 __ BIND(L_singleBlock_loopTop_256); |
2628 __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input | 2686 __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input |
2629 __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds | 2687 __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds |
2630 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { | 2688 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { |