Mercurial > hg > graal-compiler
comparison src/cpu/x86/vm/stubGenerator_x86_32.cpp @ 6948:e522a00b91aa
Merge with http://hg.openjdk.java.net/hsx/hsx25/hotspot/ after NPG - C++ build works
author | Doug Simon <doug.simon@oracle.com> |
---|---|
date | Mon, 12 Nov 2012 23:14:12 +0100 |
parents | 957c266d8bc5 a3ecd773a7b9 |
children | 291ffc492eb6 |
comparison
equal
deleted
inserted
replaced
6711:ae13cc658b80 | 6948:e522a00b91aa |
---|---|
1 /* | 1 /* |
2 * Copyright (c) 1999, 2011, Oracle and/or its affiliates. All rights reserved. | 2 * Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved. |
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 * | 4 * |
5 * This code is free software; you can redistribute it and/or modify it | 5 * This code is free software; you can redistribute it and/or modify it |
6 * under the terms of the GNU General Public License version 2 only, as | 6 * under the terms of the GNU General Public License version 2 only, as |
7 * published by the Free Software Foundation. | 7 * published by the Free Software Foundation. |
26 #include "asm/assembler.hpp" | 26 #include "asm/assembler.hpp" |
27 #include "assembler_x86.inline.hpp" | 27 #include "assembler_x86.inline.hpp" |
28 #include "interpreter/interpreter.hpp" | 28 #include "interpreter/interpreter.hpp" |
29 #include "nativeInst_x86.hpp" | 29 #include "nativeInst_x86.hpp" |
30 #include "oops/instanceOop.hpp" | 30 #include "oops/instanceOop.hpp" |
31 #include "oops/methodOop.hpp" | 31 #include "oops/method.hpp" |
32 #include "oops/objArrayKlass.hpp" | 32 #include "oops/objArrayKlass.hpp" |
33 #include "oops/oop.inline.hpp" | 33 #include "oops/oop.inline.hpp" |
34 #include "prims/methodHandles.hpp" | 34 #include "prims/methodHandles.hpp" |
35 #include "runtime/frame.inline.hpp" | 35 #include "runtime/frame.inline.hpp" |
36 #include "runtime/handles.inline.hpp" | 36 #include "runtime/handles.inline.hpp" |
232 __ decrement(rcx); | 232 __ decrement(rcx); |
233 __ jcc(Assembler::notZero, loop); | 233 __ jcc(Assembler::notZero, loop); |
234 | 234 |
235 // call Java function | 235 // call Java function |
236 __ BIND(parameters_done); | 236 __ BIND(parameters_done); |
237 __ movptr(rbx, method); // get methodOop | 237 __ movptr(rbx, method); // get Method* |
238 __ movptr(rax, entry_point); // get entry_point | 238 __ movptr(rax, entry_point); // get entry_point |
239 __ mov(rsi, rsp); // set sender sp | 239 __ mov(rsi, rsp); // set sender sp |
240 BLOCK_COMMENT("call Java function"); | 240 BLOCK_COMMENT("call Java function"); |
241 __ call(rax); | 241 __ call(rax); |
242 | 242 |
680 __ mov(rdx, rax); | 680 __ mov(rdx, rax); |
681 __ andptr(rdx, oop_mask); | 681 __ andptr(rdx, oop_mask); |
682 __ cmpptr(rdx, oop_bits); | 682 __ cmpptr(rdx, oop_bits); |
683 __ jcc(Assembler::notZero, error); | 683 __ jcc(Assembler::notZero, error); |
684 | 684 |
685 // make sure klass is 'reasonable' | 685 // make sure klass is 'reasonable', which is not zero. |
686 __ movptr(rax, Address(rax, oopDesc::klass_offset_in_bytes())); // get klass | 686 __ movptr(rax, Address(rax, oopDesc::klass_offset_in_bytes())); // get klass |
687 __ testptr(rax, rax); | 687 __ testptr(rax, rax); |
688 __ jcc(Assembler::zero, error); // if klass is NULL it is broken | 688 __ jcc(Assembler::zero, error); // if klass is NULL it is broken |
689 | 689 // TODO: Future assert that klass is lower 4g memory for UseCompressedKlassPointers |
690 // Check if the klass is in the right area of memory | |
691 const int klass_mask = Universe::verify_klass_mask(); | |
692 const int klass_bits = Universe::verify_klass_bits(); | |
693 __ mov(rdx, rax); | |
694 __ andptr(rdx, klass_mask); | |
695 __ cmpptr(rdx, klass_bits); | |
696 __ jcc(Assembler::notZero, error); | |
697 | |
698 // make sure klass' klass is 'reasonable' | |
699 __ movptr(rax, Address(rax, oopDesc::klass_offset_in_bytes())); // get klass' klass | |
700 __ testptr(rax, rax); | |
701 __ jcc(Assembler::zero, error); // if klass' klass is NULL it is broken | |
702 | |
703 __ mov(rdx, rax); | |
704 __ andptr(rdx, klass_mask); | |
705 __ cmpptr(rdx, klass_bits); | |
706 __ jcc(Assembler::notZero, error); // if klass not in right area | |
707 // of memory it is broken too. | |
708 | 690 |
709 // return if everything seems ok | 691 // return if everything seems ok |
710 __ bind(exit); | 692 __ bind(exit); |
711 __ movptr(rax, Address(rsp, 5 * wordSize)); // get saved rax, back | 693 __ movptr(rax, Address(rsp, 5 * wordSize)); // get saved rax, back |
712 __ pop(rdx); // restore rdx | 694 __ pop(rdx); // restore rdx |
1817 #endif | 1799 #endif |
1818 | 1800 |
1819 assert_different_registers(src, src_pos, dst, dst_pos, rcx_lh); | 1801 assert_different_registers(src, src_pos, dst, dst_pos, rcx_lh); |
1820 arraycopy_range_checks(src, src_pos, dst, dst_pos, LENGTH, L_failed); | 1802 arraycopy_range_checks(src, src_pos, dst, dst_pos, LENGTH, L_failed); |
1821 | 1803 |
1822 // typeArrayKlass | 1804 // TypeArrayKlass |
1823 // | 1805 // |
1824 // src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize); | 1806 // src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize); |
1825 // dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize); | 1807 // dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize); |
1826 // | 1808 // |
1827 const Register rsi_offset = rsi; // array offset | 1809 const Register rsi_offset = rsi; // array offset |
1880 __ pop(rdi); | 1862 __ pop(rdi); |
1881 __ pop(rsi); | 1863 __ pop(rsi); |
1882 __ leave(); // required for proper stackwalking of RuntimeStub frame | 1864 __ leave(); // required for proper stackwalking of RuntimeStub frame |
1883 __ ret(0); | 1865 __ ret(0); |
1884 | 1866 |
1885 // objArrayKlass | 1867 // ObjArrayKlass |
1886 __ BIND(L_objArray); | 1868 __ BIND(L_objArray); |
1887 // live at this point: rcx_src_klass, src[_pos], dst[_pos] | 1869 // live at this point: rcx_src_klass, src[_pos], dst[_pos] |
1888 | 1870 |
1889 Label L_plain_copy, L_checkcast_copy; | 1871 Label L_plain_copy, L_checkcast_copy; |
1890 // test array classes for subtyping | 1872 // test array classes for subtyping |
1910 | 1892 |
1911 __ BIND(L_checkcast_copy); | 1893 __ BIND(L_checkcast_copy); |
1912 // live at this point: rcx_src_klass, dst[_pos], src[_pos] | 1894 // live at this point: rcx_src_klass, dst[_pos], src[_pos] |
1913 { | 1895 { |
1914 // Handy offsets: | 1896 // Handy offsets: |
1915 int ek_offset = in_bytes(objArrayKlass::element_klass_offset()); | 1897 int ek_offset = in_bytes(ObjArrayKlass::element_klass_offset()); |
1916 int sco_offset = in_bytes(Klass::super_check_offset_offset()); | 1898 int sco_offset = in_bytes(Klass::super_check_offset_offset()); |
1917 | 1899 |
1918 Register rsi_dst_klass = rsi; | 1900 Register rsi_dst_klass = rsi; |
1919 Register rdi_temp = rdi; | 1901 Register rdi_temp = rdi; |
1920 assert(rsi_dst_klass == src_pos, "expected alias w/ src_pos"); | 1902 assert(rsi_dst_klass == src_pos, "expected alias w/ src_pos"); |
2152 __ fld_d(Address(rsp, 4)); | 2134 __ fld_d(Address(rsp, 4)); |
2153 __ pow_with_fallback(0); | 2135 __ pow_with_fallback(0); |
2154 __ ret(0); | 2136 __ ret(0); |
2155 } | 2137 } |
2156 } | 2138 } |
2139 | |
2140 // AES intrinsic stubs | |
2141 enum {AESBlockSize = 16}; | |
2142 | |
2143 address generate_key_shuffle_mask() { | |
2144 __ align(16); | |
2145 StubCodeMark mark(this, "StubRoutines", "key_shuffle_mask"); | |
2146 address start = __ pc(); | |
2147 __ emit_data(0x00010203, relocInfo::none, 0 ); | |
2148 __ emit_data(0x04050607, relocInfo::none, 0 ); | |
2149 __ emit_data(0x08090a0b, relocInfo::none, 0 ); | |
2150 __ emit_data(0x0c0d0e0f, relocInfo::none, 0 ); | |
2151 return start; | |
2152 } | |
2153 | |
2154 // Utility routine for loading a 128-bit key word in little endian format | |
2155 // can optionally specify that the shuffle mask is already in an xmmregister | |
2156 void load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) { | |
2157 __ movdqu(xmmdst, Address(key, offset)); | |
2158 if (xmm_shuf_mask != NULL) { | |
2159 __ pshufb(xmmdst, xmm_shuf_mask); | |
2160 } else { | |
2161 __ pshufb(xmmdst, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); | |
2162 } | |
2163 } | |
2164 | |
2165 // aesenc using specified key+offset | |
2166 // can optionally specify that the shuffle mask is already in an xmmregister | |
2167 void aes_enc_key(XMMRegister xmmdst, XMMRegister xmmtmp, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) { | |
2168 load_key(xmmtmp, key, offset, xmm_shuf_mask); | |
2169 __ aesenc(xmmdst, xmmtmp); | |
2170 } | |
2171 | |
2172 // aesdec using specified key+offset | |
2173 // can optionally specify that the shuffle mask is already in an xmmregister | |
2174 void aes_dec_key(XMMRegister xmmdst, XMMRegister xmmtmp, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) { | |
2175 load_key(xmmtmp, key, offset, xmm_shuf_mask); | |
2176 __ aesdec(xmmdst, xmmtmp); | |
2177 } | |
2178 | |
2179 | |
2180 // Arguments: | |
2181 // | |
2182 // Inputs: | |
2183 // c_rarg0 - source byte array address | |
2184 // c_rarg1 - destination byte array address | |
2185 // c_rarg2 - K (key) in little endian int array | |
2186 // | |
2187 address generate_aescrypt_encryptBlock() { | |
2188 assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support"); | |
2189 __ align(CodeEntryAlignment); | |
2190 StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock"); | |
2191 Label L_doLast; | |
2192 address start = __ pc(); | |
2193 | |
2194 const Register from = rsi; // source array address | |
2195 const Register to = rdx; // destination array address | |
2196 const Register key = rcx; // key array address | |
2197 const Register keylen = rax; | |
2198 const Address from_param(rbp, 8+0); | |
2199 const Address to_param (rbp, 8+4); | |
2200 const Address key_param (rbp, 8+8); | |
2201 | |
2202 const XMMRegister xmm_result = xmm0; | |
2203 const XMMRegister xmm_temp = xmm1; | |
2204 const XMMRegister xmm_key_shuf_mask = xmm2; | |
2205 | |
2206 __ enter(); // required for proper stackwalking of RuntimeStub frame | |
2207 __ push(rsi); | |
2208 __ movptr(from , from_param); | |
2209 __ movptr(to , to_param); | |
2210 __ movptr(key , key_param); | |
2211 | |
2212 __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); | |
2213 // keylen = # of 32-bit words, convert to 128-bit words | |
2214 __ shrl(keylen, 2); | |
2215 __ subl(keylen, 11); // every key has at least 11 128-bit words, some have more | |
2216 | |
2217 __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); | |
2218 __ movdqu(xmm_result, Address(from, 0)); // get 16 bytes of input | |
2219 | |
2220 // For encryption, the java expanded key ordering is just what we need | |
2221 | |
2222 load_key(xmm_temp, key, 0x00, xmm_key_shuf_mask); | |
2223 __ pxor(xmm_result, xmm_temp); | |
2224 for (int offset = 0x10; offset <= 0x90; offset += 0x10) { | |
2225 aes_enc_key(xmm_result, xmm_temp, key, offset, xmm_key_shuf_mask); | |
2226 } | |
2227 load_key (xmm_temp, key, 0xa0, xmm_key_shuf_mask); | |
2228 __ cmpl(keylen, 0); | |
2229 __ jcc(Assembler::equal, L_doLast); | |
2230 __ aesenc(xmm_result, xmm_temp); // only in 192 and 256 bit keys | |
2231 aes_enc_key(xmm_result, xmm_temp, key, 0xb0, xmm_key_shuf_mask); | |
2232 load_key(xmm_temp, key, 0xc0, xmm_key_shuf_mask); | |
2233 __ subl(keylen, 2); | |
2234 __ jcc(Assembler::equal, L_doLast); | |
2235 __ aesenc(xmm_result, xmm_temp); // only in 256 bit keys | |
2236 aes_enc_key(xmm_result, xmm_temp, key, 0xd0, xmm_key_shuf_mask); | |
2237 load_key(xmm_temp, key, 0xe0, xmm_key_shuf_mask); | |
2238 | |
2239 __ BIND(L_doLast); | |
2240 __ aesenclast(xmm_result, xmm_temp); | |
2241 __ movdqu(Address(to, 0), xmm_result); // store the result | |
2242 __ xorptr(rax, rax); // return 0 | |
2243 __ pop(rsi); | |
2244 __ leave(); // required for proper stackwalking of RuntimeStub frame | |
2245 __ ret(0); | |
2246 | |
2247 return start; | |
2248 } | |
2249 | |
2250 | |
2251 // Arguments: | |
2252 // | |
2253 // Inputs: | |
2254 // c_rarg0 - source byte array address | |
2255 // c_rarg1 - destination byte array address | |
2256 // c_rarg2 - K (key) in little endian int array | |
2257 // | |
2258 address generate_aescrypt_decryptBlock() { | |
2259 assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support"); | |
2260 __ align(CodeEntryAlignment); | |
2261 StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock"); | |
2262 Label L_doLast; | |
2263 address start = __ pc(); | |
2264 | |
2265 const Register from = rsi; // source array address | |
2266 const Register to = rdx; // destination array address | |
2267 const Register key = rcx; // key array address | |
2268 const Register keylen = rax; | |
2269 const Address from_param(rbp, 8+0); | |
2270 const Address to_param (rbp, 8+4); | |
2271 const Address key_param (rbp, 8+8); | |
2272 | |
2273 const XMMRegister xmm_result = xmm0; | |
2274 const XMMRegister xmm_temp = xmm1; | |
2275 const XMMRegister xmm_key_shuf_mask = xmm2; | |
2276 | |
2277 __ enter(); // required for proper stackwalking of RuntimeStub frame | |
2278 __ push(rsi); | |
2279 __ movptr(from , from_param); | |
2280 __ movptr(to , to_param); | |
2281 __ movptr(key , key_param); | |
2282 | |
2283 __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); | |
2284 // keylen = # of 32-bit words, convert to 128-bit words | |
2285 __ shrl(keylen, 2); | |
2286 __ subl(keylen, 11); // every key has at least 11 128-bit words, some have more | |
2287 | |
2288 __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); | |
2289 __ movdqu(xmm_result, Address(from, 0)); | |
2290 | |
2291 // for decryption java expanded key ordering is rotated one position from what we want | |
2292 // so we start from 0x10 here and hit 0x00 last | |
2293 // we don't know if the key is aligned, hence not using load-execute form | |
2294 load_key(xmm_temp, key, 0x10, xmm_key_shuf_mask); | |
2295 __ pxor (xmm_result, xmm_temp); | |
2296 for (int offset = 0x20; offset <= 0xa0; offset += 0x10) { | |
2297 aes_dec_key(xmm_result, xmm_temp, key, offset, xmm_key_shuf_mask); | |
2298 } | |
2299 __ cmpl(keylen, 0); | |
2300 __ jcc(Assembler::equal, L_doLast); | |
2301 // only in 192 and 256 bit keys | |
2302 aes_dec_key(xmm_result, xmm_temp, key, 0xb0, xmm_key_shuf_mask); | |
2303 aes_dec_key(xmm_result, xmm_temp, key, 0xc0, xmm_key_shuf_mask); | |
2304 __ subl(keylen, 2); | |
2305 __ jcc(Assembler::equal, L_doLast); | |
2306 // only in 256 bit keys | |
2307 aes_dec_key(xmm_result, xmm_temp, key, 0xd0, xmm_key_shuf_mask); | |
2308 aes_dec_key(xmm_result, xmm_temp, key, 0xe0, xmm_key_shuf_mask); | |
2309 | |
2310 __ BIND(L_doLast); | |
2311 // for decryption the aesdeclast operation is always on key+0x00 | |
2312 load_key(xmm_temp, key, 0x00, xmm_key_shuf_mask); | |
2313 __ aesdeclast(xmm_result, xmm_temp); | |
2314 | |
2315 __ movdqu(Address(to, 0), xmm_result); // store the result | |
2316 | |
2317 __ xorptr(rax, rax); // return 0 | |
2318 __ pop(rsi); | |
2319 __ leave(); // required for proper stackwalking of RuntimeStub frame | |
2320 __ ret(0); | |
2321 | |
2322 return start; | |
2323 } | |
2324 | |
2325 void handleSOERegisters(bool saving) { | |
2326 const int saveFrameSizeInBytes = 4 * wordSize; | |
2327 const Address saved_rbx (rbp, -3 * wordSize); | |
2328 const Address saved_rsi (rbp, -2 * wordSize); | |
2329 const Address saved_rdi (rbp, -1 * wordSize); | |
2330 | |
2331 if (saving) { | |
2332 __ subptr(rsp, saveFrameSizeInBytes); | |
2333 __ movptr(saved_rsi, rsi); | |
2334 __ movptr(saved_rdi, rdi); | |
2335 __ movptr(saved_rbx, rbx); | |
2336 } else { | |
2337 // restoring | |
2338 __ movptr(rsi, saved_rsi); | |
2339 __ movptr(rdi, saved_rdi); | |
2340 __ movptr(rbx, saved_rbx); | |
2341 } | |
2342 } | |
2343 | |
2344 // Arguments: | |
2345 // | |
2346 // Inputs: | |
2347 // c_rarg0 - source byte array address | |
2348 // c_rarg1 - destination byte array address | |
2349 // c_rarg2 - K (key) in little endian int array | |
2350 // c_rarg3 - r vector byte array address | |
2351 // c_rarg4 - input length | |
2352 // | |
2353 address generate_cipherBlockChaining_encryptAESCrypt() { | |
2354 assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support"); | |
2355 __ align(CodeEntryAlignment); | |
2356 StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt"); | |
2357 address start = __ pc(); | |
2358 | |
2359 Label L_exit, L_key_192_256, L_key_256, L_loopTop_128, L_loopTop_192, L_loopTop_256; | |
2360 const Register from = rsi; // source array address | |
2361 const Register to = rdx; // destination array address | |
2362 const Register key = rcx; // key array address | |
2363 const Register rvec = rdi; // r byte array initialized from initvector array address | |
2364 // and left with the results of the last encryption block | |
2365 const Register len_reg = rbx; // src len (must be multiple of blocksize 16) | |
2366 const Register pos = rax; | |
2367 | |
2368 // xmm register assignments for the loops below | |
2369 const XMMRegister xmm_result = xmm0; | |
2370 const XMMRegister xmm_temp = xmm1; | |
2371 // first 6 keys preloaded into xmm2-xmm7 | |
2372 const int XMM_REG_NUM_KEY_FIRST = 2; | |
2373 const int XMM_REG_NUM_KEY_LAST = 7; | |
2374 const XMMRegister xmm_key0 = as_XMMRegister(XMM_REG_NUM_KEY_FIRST); | |
2375 | |
2376 __ enter(); // required for proper stackwalking of RuntimeStub frame | |
2377 handleSOERegisters(true /*saving*/); | |
2378 | |
2379 // load registers from incoming parameters | |
2380 const Address from_param(rbp, 8+0); | |
2381 const Address to_param (rbp, 8+4); | |
2382 const Address key_param (rbp, 8+8); | |
2383 const Address rvec_param (rbp, 8+12); | |
2384 const Address len_param (rbp, 8+16); | |
2385 __ movptr(from , from_param); | |
2386 __ movptr(to , to_param); | |
2387 __ movptr(key , key_param); | |
2388 __ movptr(rvec , rvec_param); | |
2389 __ movptr(len_reg , len_param); | |
2390 | |
2391 const XMMRegister xmm_key_shuf_mask = xmm_temp; // used temporarily to swap key bytes up front | |
2392 __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); | |
2393 // load up xmm regs 2 thru 7 with keys 0-5 | |
2394 for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x00; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { | |
2395 load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask); | |
2396 offset += 0x10; | |
2397 } | |
2398 | |
2399 __ movdqu(xmm_result, Address(rvec, 0x00)); // initialize xmm_result with r vec | |
2400 | |
2401 // now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256)) | |
2402 __ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); | |
2403 __ cmpl(rax, 44); | |
2404 __ jcc(Assembler::notEqual, L_key_192_256); | |
2405 | |
2406 // 128 bit code follows here | |
2407 __ movptr(pos, 0); | |
2408 __ align(OptoLoopAlignment); | |
2409 __ BIND(L_loopTop_128); | |
2410 __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input | |
2411 __ pxor (xmm_result, xmm_temp); // xor with the current r vector | |
2412 | |
2413 __ pxor (xmm_result, xmm_key0); // do the aes rounds | |
2414 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { | |
2415 __ aesenc(xmm_result, as_XMMRegister(rnum)); | |
2416 } | |
2417 for (int key_offset = 0x60; key_offset <= 0x90; key_offset += 0x10) { | |
2418 aes_enc_key(xmm_result, xmm_temp, key, key_offset); | |
2419 } | |
2420 load_key(xmm_temp, key, 0xa0); | |
2421 __ aesenclast(xmm_result, xmm_temp); | |
2422 | |
2423 __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output | |
2424 // no need to store r to memory until we exit | |
2425 __ addptr(pos, AESBlockSize); | |
2426 __ subptr(len_reg, AESBlockSize); | |
2427 __ jcc(Assembler::notEqual, L_loopTop_128); | |
2428 | |
2429 __ BIND(L_exit); | |
2430 __ movdqu(Address(rvec, 0), xmm_result); // final value of r stored in rvec of CipherBlockChaining object | |
2431 | |
2432 handleSOERegisters(false /*restoring*/); | |
2433 __ movl(rax, 0); // return 0 (why?) | |
2434 __ leave(); // required for proper stackwalking of RuntimeStub frame | |
2435 __ ret(0); | |
2436 | |
2437 __ BIND(L_key_192_256); | |
2438 // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256) | |
2439 __ cmpl(rax, 52); | |
2440 __ jcc(Assembler::notEqual, L_key_256); | |
2441 | |
2442 // 192-bit code follows here (could be changed to use more xmm registers) | |
2443 __ movptr(pos, 0); | |
2444 __ align(OptoLoopAlignment); | |
2445 __ BIND(L_loopTop_192); | |
2446 __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input | |
2447 __ pxor (xmm_result, xmm_temp); // xor with the current r vector | |
2448 | |
2449 __ pxor (xmm_result, xmm_key0); // do the aes rounds | |
2450 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { | |
2451 __ aesenc(xmm_result, as_XMMRegister(rnum)); | |
2452 } | |
2453 for (int key_offset = 0x60; key_offset <= 0xb0; key_offset += 0x10) { | |
2454 aes_enc_key(xmm_result, xmm_temp, key, key_offset); | |
2455 } | |
2456 load_key(xmm_temp, key, 0xc0); | |
2457 __ aesenclast(xmm_result, xmm_temp); | |
2458 | |
2459 __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output | |
2460 // no need to store r to memory until we exit | |
2461 __ addptr(pos, AESBlockSize); | |
2462 __ subptr(len_reg, AESBlockSize); | |
2463 __ jcc(Assembler::notEqual, L_loopTop_192); | |
2464 __ jmp(L_exit); | |
2465 | |
2466 __ BIND(L_key_256); | |
2467 // 256-bit code follows here (could be changed to use more xmm registers) | |
2468 __ movptr(pos, 0); | |
2469 __ align(OptoLoopAlignment); | |
2470 __ BIND(L_loopTop_256); | |
2471 __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input | |
2472 __ pxor (xmm_result, xmm_temp); // xor with the current r vector | |
2473 | |
2474 __ pxor (xmm_result, xmm_key0); // do the aes rounds | |
2475 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { | |
2476 __ aesenc(xmm_result, as_XMMRegister(rnum)); | |
2477 } | |
2478 for (int key_offset = 0x60; key_offset <= 0xd0; key_offset += 0x10) { | |
2479 aes_enc_key(xmm_result, xmm_temp, key, key_offset); | |
2480 } | |
2481 load_key(xmm_temp, key, 0xe0); | |
2482 __ aesenclast(xmm_result, xmm_temp); | |
2483 | |
2484 __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output | |
2485 // no need to store r to memory until we exit | |
2486 __ addptr(pos, AESBlockSize); | |
2487 __ subptr(len_reg, AESBlockSize); | |
2488 __ jcc(Assembler::notEqual, L_loopTop_256); | |
2489 __ jmp(L_exit); | |
2490 | |
2491 return start; | |
2492 } | |
2493 | |
2494 | |
2495 // CBC AES Decryption. | |
2496 // In 32-bit stub, because of lack of registers we do not try to parallelize 4 blocks at a time. | |
2497 // | |
2498 // Arguments: | |
2499 // | |
2500 // Inputs: | |
2501 // c_rarg0 - source byte array address | |
2502 // c_rarg1 - destination byte array address | |
2503 // c_rarg2 - K (key) in little endian int array | |
2504 // c_rarg3 - r vector byte array address | |
2505 // c_rarg4 - input length | |
2506 // | |
2507 | |
2508 address generate_cipherBlockChaining_decryptAESCrypt() { | |
2509 assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support"); | |
2510 __ align(CodeEntryAlignment); | |
2511 StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt"); | |
2512 address start = __ pc(); | |
2513 | |
2514 Label L_exit, L_key_192_256, L_key_256; | |
2515 Label L_singleBlock_loopTop_128; | |
2516 Label L_singleBlock_loopTop_192, L_singleBlock_loopTop_256; | |
2517 const Register from = rsi; // source array address | |
2518 const Register to = rdx; // destination array address | |
2519 const Register key = rcx; // key array address | |
2520 const Register rvec = rdi; // r byte array initialized from initvector array address | |
2521 // and left with the results of the last encryption block | |
2522 const Register len_reg = rbx; // src len (must be multiple of blocksize 16) | |
2523 const Register pos = rax; | |
2524 | |
2525 // xmm register assignments for the loops below | |
2526 const XMMRegister xmm_result = xmm0; | |
2527 const XMMRegister xmm_temp = xmm1; | |
2528 // first 6 keys preloaded into xmm2-xmm7 | |
2529 const int XMM_REG_NUM_KEY_FIRST = 2; | |
2530 const int XMM_REG_NUM_KEY_LAST = 7; | |
2531 const int FIRST_NON_REG_KEY_offset = 0x70; | |
2532 const XMMRegister xmm_key_first = as_XMMRegister(XMM_REG_NUM_KEY_FIRST); | |
2533 | |
2534 __ enter(); // required for proper stackwalking of RuntimeStub frame | |
2535 handleSOERegisters(true /*saving*/); | |
2536 | |
2537 // load registers from incoming parameters | |
2538 const Address from_param(rbp, 8+0); | |
2539 const Address to_param (rbp, 8+4); | |
2540 const Address key_param (rbp, 8+8); | |
2541 const Address rvec_param (rbp, 8+12); | |
2542 const Address len_param (rbp, 8+16); | |
2543 __ movptr(from , from_param); | |
2544 __ movptr(to , to_param); | |
2545 __ movptr(key , key_param); | |
2546 __ movptr(rvec , rvec_param); | |
2547 __ movptr(len_reg , len_param); | |
2548 | |
2549 // the java expanded key ordering is rotated one position from what we want | |
2550 // so we start from 0x10 here and hit 0x00 last | |
2551 const XMMRegister xmm_key_shuf_mask = xmm1; // used temporarily to swap key bytes up front | |
2552 __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); | |
2553 // load up xmm regs 2 thru 6 with first 5 keys | |
2554 for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x10; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { | |
2555 load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask); | |
2556 offset += 0x10; | |
2557 } | |
2558 | |
2559 // inside here, use the rvec register to point to previous block cipher | |
2560 // with which we xor at the end of each newly decrypted block | |
2561 const Register prev_block_cipher_ptr = rvec; | |
2562 | |
2563 // now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256)) | |
2564 __ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); | |
2565 __ cmpl(rax, 44); | |
2566 __ jcc(Assembler::notEqual, L_key_192_256); | |
2567 | |
2568 | |
2569 // 128-bit code follows here, parallelized | |
2570 __ movptr(pos, 0); | |
2571 __ align(OptoLoopAlignment); | |
2572 __ BIND(L_singleBlock_loopTop_128); | |
2573 __ cmpptr(len_reg, 0); // any blocks left?? | |
2574 __ jcc(Assembler::equal, L_exit); | |
2575 __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input | |
2576 __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds | |
2577 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { | |
2578 __ aesdec(xmm_result, as_XMMRegister(rnum)); | |
2579 } | |
2580 for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xa0; key_offset += 0x10) { // 128-bit runs up to key offset a0 | |
2581 aes_dec_key(xmm_result, xmm_temp, key, key_offset); | |
2582 } | |
2583 load_key(xmm_temp, key, 0x00); // final key is stored in java expanded array at offset 0 | |
2584 __ aesdeclast(xmm_result, xmm_temp); | |
2585 __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00)); | |
2586 __ pxor (xmm_result, xmm_temp); // xor with the current r vector | |
2587 __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output | |
2588 // no need to store r to memory until we exit | |
2589 __ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0)); // set up new ptr | |
2590 __ addptr(pos, AESBlockSize); | |
2591 __ subptr(len_reg, AESBlockSize); | |
2592 __ jmp(L_singleBlock_loopTop_128); | |
2593 | |
2594 | |
2595 __ BIND(L_exit); | |
2596 __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00)); | |
2597 __ movptr(rvec , rvec_param); // restore this since used in loop | |
2598 __ movdqu(Address(rvec, 0), xmm_temp); // final value of r stored in rvec of CipherBlockChaining object | |
2599 handleSOERegisters(false /*restoring*/); | |
2600 __ movl(rax, 0); // return 0 (why?) | |
2601 __ leave(); // required for proper stackwalking of RuntimeStub frame | |
2602 __ ret(0); | |
2603 | |
2604 | |
2605 __ BIND(L_key_192_256); | |
2606 // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256) | |
2607 __ cmpl(rax, 52); | |
2608 __ jcc(Assembler::notEqual, L_key_256); | |
2609 | |
2610 // 192-bit code follows here (could be optimized to use parallelism) | |
2611 __ movptr(pos, 0); | |
2612 __ align(OptoLoopAlignment); | |
2613 __ BIND(L_singleBlock_loopTop_192); | |
2614 __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input | |
2615 __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds | |
2616 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { | |
2617 __ aesdec(xmm_result, as_XMMRegister(rnum)); | |
2618 } | |
2619 for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xc0; key_offset += 0x10) { // 192-bit runs up to key offset c0 | |
2620 aes_dec_key(xmm_result, xmm_temp, key, key_offset); | |
2621 } | |
2622 load_key(xmm_temp, key, 0x00); // final key is stored in java expanded array at offset 0 | |
2623 __ aesdeclast(xmm_result, xmm_temp); | |
2624 __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00)); | |
2625 __ pxor (xmm_result, xmm_temp); // xor with the current r vector | |
2626 __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output | |
2627 // no need to store r to memory until we exit | |
2628 __ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0)); // set up new ptr | |
2629 __ addptr(pos, AESBlockSize); | |
2630 __ subptr(len_reg, AESBlockSize); | |
2631 __ jcc(Assembler::notEqual,L_singleBlock_loopTop_192); | |
2632 __ jmp(L_exit); | |
2633 | |
2634 __ BIND(L_key_256); | |
2635 // 256-bit code follows here (could be optimized to use parallelism) | |
2636 __ movptr(pos, 0); | |
2637 __ align(OptoLoopAlignment); | |
2638 __ BIND(L_singleBlock_loopTop_256); | |
2639 __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input | |
2640 __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds | |
2641 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { | |
2642 __ aesdec(xmm_result, as_XMMRegister(rnum)); | |
2643 } | |
2644 for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xe0; key_offset += 0x10) { // 256-bit runs up to key offset e0 | |
2645 aes_dec_key(xmm_result, xmm_temp, key, key_offset); | |
2646 } | |
2647 load_key(xmm_temp, key, 0x00); // final key is stored in java expanded array at offset 0 | |
2648 __ aesdeclast(xmm_result, xmm_temp); | |
2649 __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00)); | |
2650 __ pxor (xmm_result, xmm_temp); // xor with the current r vector | |
2651 __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output | |
2652 // no need to store r to memory until we exit | |
2653 __ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0)); // set up new ptr | |
2654 __ addptr(pos, AESBlockSize); | |
2655 __ subptr(len_reg, AESBlockSize); | |
2656 __ jcc(Assembler::notEqual,L_singleBlock_loopTop_256); | |
2657 __ jmp(L_exit); | |
2658 | |
2659 return start; | |
2660 } | |
2661 | |
2157 | 2662 |
2158 public: | 2663 public: |
2159 // Information about frame layout at time of blocking runtime call. | 2664 // Information about frame layout at time of blocking runtime call. |
2160 // Note that we only have to preserve callee-saved registers since | 2665 // Note that we only have to preserve callee-saved registers since |
2161 // the compilers are responsible for supplying a continuation point | 2666 // the compilers are responsible for supplying a continuation point |
2348 | 2853 |
2349 // arraycopy stubs used by compilers | 2854 // arraycopy stubs used by compilers |
2350 generate_arraycopy_stubs(); | 2855 generate_arraycopy_stubs(); |
2351 | 2856 |
2352 generate_math_stubs(); | 2857 generate_math_stubs(); |
2858 | |
2859 // don't bother generating these AES intrinsic stubs unless global flag is set | |
2860 if (UseAESIntrinsics) { | |
2861 StubRoutines::x86::_key_shuffle_mask_addr = generate_key_shuffle_mask(); // might be needed by the others | |
2862 | |
2863 StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock(); | |
2864 StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock(); | |
2865 StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt(); | |
2866 StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt(); | |
2867 } | |
2353 } | 2868 } |
2354 | 2869 |
2355 | 2870 |
2356 public: | 2871 public: |
2357 StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) { | 2872 StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) { |