comparison src/cpu/x86/vm/stubGenerator_x86_32.cpp @ 6948:e522a00b91aa

Merge with http://hg.openjdk.java.net/hsx/hsx25/hotspot/ after NPG - C++ build works
author Doug Simon <doug.simon@oracle.com>
date Mon, 12 Nov 2012 23:14:12 +0100
parents 957c266d8bc5 a3ecd773a7b9
children 291ffc492eb6
comparison
equal deleted inserted replaced
6711:ae13cc658b80 6948:e522a00b91aa
1 /* 1 /*
2 * Copyright (c) 1999, 2011, Oracle and/or its affiliates. All rights reserved. 2 * Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 * 4 *
5 * This code is free software; you can redistribute it and/or modify it 5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as 6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
26 #include "asm/assembler.hpp" 26 #include "asm/assembler.hpp"
27 #include "assembler_x86.inline.hpp" 27 #include "assembler_x86.inline.hpp"
28 #include "interpreter/interpreter.hpp" 28 #include "interpreter/interpreter.hpp"
29 #include "nativeInst_x86.hpp" 29 #include "nativeInst_x86.hpp"
30 #include "oops/instanceOop.hpp" 30 #include "oops/instanceOop.hpp"
31 #include "oops/methodOop.hpp" 31 #include "oops/method.hpp"
32 #include "oops/objArrayKlass.hpp" 32 #include "oops/objArrayKlass.hpp"
33 #include "oops/oop.inline.hpp" 33 #include "oops/oop.inline.hpp"
34 #include "prims/methodHandles.hpp" 34 #include "prims/methodHandles.hpp"
35 #include "runtime/frame.inline.hpp" 35 #include "runtime/frame.inline.hpp"
36 #include "runtime/handles.inline.hpp" 36 #include "runtime/handles.inline.hpp"
232 __ decrement(rcx); 232 __ decrement(rcx);
233 __ jcc(Assembler::notZero, loop); 233 __ jcc(Assembler::notZero, loop);
234 234
235 // call Java function 235 // call Java function
236 __ BIND(parameters_done); 236 __ BIND(parameters_done);
237 __ movptr(rbx, method); // get methodOop 237 __ movptr(rbx, method); // get Method*
238 __ movptr(rax, entry_point); // get entry_point 238 __ movptr(rax, entry_point); // get entry_point
239 __ mov(rsi, rsp); // set sender sp 239 __ mov(rsi, rsp); // set sender sp
240 BLOCK_COMMENT("call Java function"); 240 BLOCK_COMMENT("call Java function");
241 __ call(rax); 241 __ call(rax);
242 242
680 __ mov(rdx, rax); 680 __ mov(rdx, rax);
681 __ andptr(rdx, oop_mask); 681 __ andptr(rdx, oop_mask);
682 __ cmpptr(rdx, oop_bits); 682 __ cmpptr(rdx, oop_bits);
683 __ jcc(Assembler::notZero, error); 683 __ jcc(Assembler::notZero, error);
684 684
685 // make sure klass is 'reasonable' 685 // make sure klass is 'reasonable', which is not zero.
686 __ movptr(rax, Address(rax, oopDesc::klass_offset_in_bytes())); // get klass 686 __ movptr(rax, Address(rax, oopDesc::klass_offset_in_bytes())); // get klass
687 __ testptr(rax, rax); 687 __ testptr(rax, rax);
688 __ jcc(Assembler::zero, error); // if klass is NULL it is broken 688 __ jcc(Assembler::zero, error); // if klass is NULL it is broken
689 689 // TODO: Future assert that klass is lower 4g memory for UseCompressedKlassPointers
690 // Check if the klass is in the right area of memory
691 const int klass_mask = Universe::verify_klass_mask();
692 const int klass_bits = Universe::verify_klass_bits();
693 __ mov(rdx, rax);
694 __ andptr(rdx, klass_mask);
695 __ cmpptr(rdx, klass_bits);
696 __ jcc(Assembler::notZero, error);
697
698 // make sure klass' klass is 'reasonable'
699 __ movptr(rax, Address(rax, oopDesc::klass_offset_in_bytes())); // get klass' klass
700 __ testptr(rax, rax);
701 __ jcc(Assembler::zero, error); // if klass' klass is NULL it is broken
702
703 __ mov(rdx, rax);
704 __ andptr(rdx, klass_mask);
705 __ cmpptr(rdx, klass_bits);
706 __ jcc(Assembler::notZero, error); // if klass not in right area
707 // of memory it is broken too.
708 690
709 // return if everything seems ok 691 // return if everything seems ok
710 __ bind(exit); 692 __ bind(exit);
711 __ movptr(rax, Address(rsp, 5 * wordSize)); // get saved rax, back 693 __ movptr(rax, Address(rsp, 5 * wordSize)); // get saved rax, back
712 __ pop(rdx); // restore rdx 694 __ pop(rdx); // restore rdx
1817 #endif 1799 #endif
1818 1800
1819 assert_different_registers(src, src_pos, dst, dst_pos, rcx_lh); 1801 assert_different_registers(src, src_pos, dst, dst_pos, rcx_lh);
1820 arraycopy_range_checks(src, src_pos, dst, dst_pos, LENGTH, L_failed); 1802 arraycopy_range_checks(src, src_pos, dst, dst_pos, LENGTH, L_failed);
1821 1803
1822 // typeArrayKlass 1804 // TypeArrayKlass
1823 // 1805 //
1824 // src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize); 1806 // src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize);
1825 // dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize); 1807 // dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize);
1826 // 1808 //
1827 const Register rsi_offset = rsi; // array offset 1809 const Register rsi_offset = rsi; // array offset
1880 __ pop(rdi); 1862 __ pop(rdi);
1881 __ pop(rsi); 1863 __ pop(rsi);
1882 __ leave(); // required for proper stackwalking of RuntimeStub frame 1864 __ leave(); // required for proper stackwalking of RuntimeStub frame
1883 __ ret(0); 1865 __ ret(0);
1884 1866
1885 // objArrayKlass 1867 // ObjArrayKlass
1886 __ BIND(L_objArray); 1868 __ BIND(L_objArray);
1887 // live at this point: rcx_src_klass, src[_pos], dst[_pos] 1869 // live at this point: rcx_src_klass, src[_pos], dst[_pos]
1888 1870
1889 Label L_plain_copy, L_checkcast_copy; 1871 Label L_plain_copy, L_checkcast_copy;
1890 // test array classes for subtyping 1872 // test array classes for subtyping
1910 1892
1911 __ BIND(L_checkcast_copy); 1893 __ BIND(L_checkcast_copy);
1912 // live at this point: rcx_src_klass, dst[_pos], src[_pos] 1894 // live at this point: rcx_src_klass, dst[_pos], src[_pos]
1913 { 1895 {
1914 // Handy offsets: 1896 // Handy offsets:
1915 int ek_offset = in_bytes(objArrayKlass::element_klass_offset()); 1897 int ek_offset = in_bytes(ObjArrayKlass::element_klass_offset());
1916 int sco_offset = in_bytes(Klass::super_check_offset_offset()); 1898 int sco_offset = in_bytes(Klass::super_check_offset_offset());
1917 1899
1918 Register rsi_dst_klass = rsi; 1900 Register rsi_dst_klass = rsi;
1919 Register rdi_temp = rdi; 1901 Register rdi_temp = rdi;
1920 assert(rsi_dst_klass == src_pos, "expected alias w/ src_pos"); 1902 assert(rsi_dst_klass == src_pos, "expected alias w/ src_pos");
2152 __ fld_d(Address(rsp, 4)); 2134 __ fld_d(Address(rsp, 4));
2153 __ pow_with_fallback(0); 2135 __ pow_with_fallback(0);
2154 __ ret(0); 2136 __ ret(0);
2155 } 2137 }
2156 } 2138 }
2139
2140 // AES intrinsic stubs
2141 enum {AESBlockSize = 16};
2142
2143 address generate_key_shuffle_mask() {
2144 __ align(16);
2145 StubCodeMark mark(this, "StubRoutines", "key_shuffle_mask");
2146 address start = __ pc();
2147 __ emit_data(0x00010203, relocInfo::none, 0 );
2148 __ emit_data(0x04050607, relocInfo::none, 0 );
2149 __ emit_data(0x08090a0b, relocInfo::none, 0 );
2150 __ emit_data(0x0c0d0e0f, relocInfo::none, 0 );
2151 return start;
2152 }
2153
2154 // Utility routine for loading a 128-bit key word in little endian format
2155 // can optionally specify that the shuffle mask is already in an xmmregister
2156 void load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) {
2157 __ movdqu(xmmdst, Address(key, offset));
2158 if (xmm_shuf_mask != NULL) {
2159 __ pshufb(xmmdst, xmm_shuf_mask);
2160 } else {
2161 __ pshufb(xmmdst, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
2162 }
2163 }
2164
2165 // aesenc using specified key+offset
2166 // can optionally specify that the shuffle mask is already in an xmmregister
2167 void aes_enc_key(XMMRegister xmmdst, XMMRegister xmmtmp, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) {
2168 load_key(xmmtmp, key, offset, xmm_shuf_mask);
2169 __ aesenc(xmmdst, xmmtmp);
2170 }
2171
2172 // aesdec using specified key+offset
2173 // can optionally specify that the shuffle mask is already in an xmmregister
2174 void aes_dec_key(XMMRegister xmmdst, XMMRegister xmmtmp, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) {
2175 load_key(xmmtmp, key, offset, xmm_shuf_mask);
2176 __ aesdec(xmmdst, xmmtmp);
2177 }
2178
2179
2180 // Arguments:
2181 //
2182 // Inputs:
2183 // c_rarg0 - source byte array address
2184 // c_rarg1 - destination byte array address
2185 // c_rarg2 - K (key) in little endian int array
2186 //
2187 address generate_aescrypt_encryptBlock() {
2188 assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support");
2189 __ align(CodeEntryAlignment);
2190 StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock");
2191 Label L_doLast;
2192 address start = __ pc();
2193
2194 const Register from = rsi; // source array address
2195 const Register to = rdx; // destination array address
2196 const Register key = rcx; // key array address
2197 const Register keylen = rax;
2198 const Address from_param(rbp, 8+0);
2199 const Address to_param (rbp, 8+4);
2200 const Address key_param (rbp, 8+8);
2201
2202 const XMMRegister xmm_result = xmm0;
2203 const XMMRegister xmm_temp = xmm1;
2204 const XMMRegister xmm_key_shuf_mask = xmm2;
2205
2206 __ enter(); // required for proper stackwalking of RuntimeStub frame
2207 __ push(rsi);
2208 __ movptr(from , from_param);
2209 __ movptr(to , to_param);
2210 __ movptr(key , key_param);
2211
2212 __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
2213 // keylen = # of 32-bit words, convert to 128-bit words
2214 __ shrl(keylen, 2);
2215 __ subl(keylen, 11); // every key has at least 11 128-bit words, some have more
2216
2217 __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
2218 __ movdqu(xmm_result, Address(from, 0)); // get 16 bytes of input
2219
2220 // For encryption, the java expanded key ordering is just what we need
2221
2222 load_key(xmm_temp, key, 0x00, xmm_key_shuf_mask);
2223 __ pxor(xmm_result, xmm_temp);
2224 for (int offset = 0x10; offset <= 0x90; offset += 0x10) {
2225 aes_enc_key(xmm_result, xmm_temp, key, offset, xmm_key_shuf_mask);
2226 }
2227 load_key (xmm_temp, key, 0xa0, xmm_key_shuf_mask);
2228 __ cmpl(keylen, 0);
2229 __ jcc(Assembler::equal, L_doLast);
2230 __ aesenc(xmm_result, xmm_temp); // only in 192 and 256 bit keys
2231 aes_enc_key(xmm_result, xmm_temp, key, 0xb0, xmm_key_shuf_mask);
2232 load_key(xmm_temp, key, 0xc0, xmm_key_shuf_mask);
2233 __ subl(keylen, 2);
2234 __ jcc(Assembler::equal, L_doLast);
2235 __ aesenc(xmm_result, xmm_temp); // only in 256 bit keys
2236 aes_enc_key(xmm_result, xmm_temp, key, 0xd0, xmm_key_shuf_mask);
2237 load_key(xmm_temp, key, 0xe0, xmm_key_shuf_mask);
2238
2239 __ BIND(L_doLast);
2240 __ aesenclast(xmm_result, xmm_temp);
2241 __ movdqu(Address(to, 0), xmm_result); // store the result
2242 __ xorptr(rax, rax); // return 0
2243 __ pop(rsi);
2244 __ leave(); // required for proper stackwalking of RuntimeStub frame
2245 __ ret(0);
2246
2247 return start;
2248 }
2249
2250
2251 // Arguments:
2252 //
2253 // Inputs:
2254 // c_rarg0 - source byte array address
2255 // c_rarg1 - destination byte array address
2256 // c_rarg2 - K (key) in little endian int array
2257 //
2258 address generate_aescrypt_decryptBlock() {
2259 assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support");
2260 __ align(CodeEntryAlignment);
2261 StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock");
2262 Label L_doLast;
2263 address start = __ pc();
2264
2265 const Register from = rsi; // source array address
2266 const Register to = rdx; // destination array address
2267 const Register key = rcx; // key array address
2268 const Register keylen = rax;
2269 const Address from_param(rbp, 8+0);
2270 const Address to_param (rbp, 8+4);
2271 const Address key_param (rbp, 8+8);
2272
2273 const XMMRegister xmm_result = xmm0;
2274 const XMMRegister xmm_temp = xmm1;
2275 const XMMRegister xmm_key_shuf_mask = xmm2;
2276
2277 __ enter(); // required for proper stackwalking of RuntimeStub frame
2278 __ push(rsi);
2279 __ movptr(from , from_param);
2280 __ movptr(to , to_param);
2281 __ movptr(key , key_param);
2282
2283 __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
2284 // keylen = # of 32-bit words, convert to 128-bit words
2285 __ shrl(keylen, 2);
2286 __ subl(keylen, 11); // every key has at least 11 128-bit words, some have more
2287
2288 __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
2289 __ movdqu(xmm_result, Address(from, 0));
2290
2291 // for decryption java expanded key ordering is rotated one position from what we want
2292 // so we start from 0x10 here and hit 0x00 last
2293 // we don't know if the key is aligned, hence not using load-execute form
2294 load_key(xmm_temp, key, 0x10, xmm_key_shuf_mask);
2295 __ pxor (xmm_result, xmm_temp);
2296 for (int offset = 0x20; offset <= 0xa0; offset += 0x10) {
2297 aes_dec_key(xmm_result, xmm_temp, key, offset, xmm_key_shuf_mask);
2298 }
2299 __ cmpl(keylen, 0);
2300 __ jcc(Assembler::equal, L_doLast);
2301 // only in 192 and 256 bit keys
2302 aes_dec_key(xmm_result, xmm_temp, key, 0xb0, xmm_key_shuf_mask);
2303 aes_dec_key(xmm_result, xmm_temp, key, 0xc0, xmm_key_shuf_mask);
2304 __ subl(keylen, 2);
2305 __ jcc(Assembler::equal, L_doLast);
2306 // only in 256 bit keys
2307 aes_dec_key(xmm_result, xmm_temp, key, 0xd0, xmm_key_shuf_mask);
2308 aes_dec_key(xmm_result, xmm_temp, key, 0xe0, xmm_key_shuf_mask);
2309
2310 __ BIND(L_doLast);
2311 // for decryption the aesdeclast operation is always on key+0x00
2312 load_key(xmm_temp, key, 0x00, xmm_key_shuf_mask);
2313 __ aesdeclast(xmm_result, xmm_temp);
2314
2315 __ movdqu(Address(to, 0), xmm_result); // store the result
2316
2317 __ xorptr(rax, rax); // return 0
2318 __ pop(rsi);
2319 __ leave(); // required for proper stackwalking of RuntimeStub frame
2320 __ ret(0);
2321
2322 return start;
2323 }
2324
2325 void handleSOERegisters(bool saving) {
2326 const int saveFrameSizeInBytes = 4 * wordSize;
2327 const Address saved_rbx (rbp, -3 * wordSize);
2328 const Address saved_rsi (rbp, -2 * wordSize);
2329 const Address saved_rdi (rbp, -1 * wordSize);
2330
2331 if (saving) {
2332 __ subptr(rsp, saveFrameSizeInBytes);
2333 __ movptr(saved_rsi, rsi);
2334 __ movptr(saved_rdi, rdi);
2335 __ movptr(saved_rbx, rbx);
2336 } else {
2337 // restoring
2338 __ movptr(rsi, saved_rsi);
2339 __ movptr(rdi, saved_rdi);
2340 __ movptr(rbx, saved_rbx);
2341 }
2342 }
2343
2344 // Arguments:
2345 //
2346 // Inputs:
2347 // c_rarg0 - source byte array address
2348 // c_rarg1 - destination byte array address
2349 // c_rarg2 - K (key) in little endian int array
2350 // c_rarg3 - r vector byte array address
2351 // c_rarg4 - input length
2352 //
2353 address generate_cipherBlockChaining_encryptAESCrypt() {
2354 assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support");
2355 __ align(CodeEntryAlignment);
2356 StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt");
2357 address start = __ pc();
2358
2359 Label L_exit, L_key_192_256, L_key_256, L_loopTop_128, L_loopTop_192, L_loopTop_256;
2360 const Register from = rsi; // source array address
2361 const Register to = rdx; // destination array address
2362 const Register key = rcx; // key array address
2363 const Register rvec = rdi; // r byte array initialized from initvector array address
2364 // and left with the results of the last encryption block
2365 const Register len_reg = rbx; // src len (must be multiple of blocksize 16)
2366 const Register pos = rax;
2367
2368 // xmm register assignments for the loops below
2369 const XMMRegister xmm_result = xmm0;
2370 const XMMRegister xmm_temp = xmm1;
2371 // first 6 keys preloaded into xmm2-xmm7
2372 const int XMM_REG_NUM_KEY_FIRST = 2;
2373 const int XMM_REG_NUM_KEY_LAST = 7;
2374 const XMMRegister xmm_key0 = as_XMMRegister(XMM_REG_NUM_KEY_FIRST);
2375
2376 __ enter(); // required for proper stackwalking of RuntimeStub frame
2377 handleSOERegisters(true /*saving*/);
2378
2379 // load registers from incoming parameters
2380 const Address from_param(rbp, 8+0);
2381 const Address to_param (rbp, 8+4);
2382 const Address key_param (rbp, 8+8);
2383 const Address rvec_param (rbp, 8+12);
2384 const Address len_param (rbp, 8+16);
2385 __ movptr(from , from_param);
2386 __ movptr(to , to_param);
2387 __ movptr(key , key_param);
2388 __ movptr(rvec , rvec_param);
2389 __ movptr(len_reg , len_param);
2390
2391 const XMMRegister xmm_key_shuf_mask = xmm_temp; // used temporarily to swap key bytes up front
2392 __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
2393 // load up xmm regs 2 thru 7 with keys 0-5
2394 for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x00; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
2395 load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask);
2396 offset += 0x10;
2397 }
2398
2399 __ movdqu(xmm_result, Address(rvec, 0x00)); // initialize xmm_result with r vec
2400
2401 // now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256))
2402 __ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
2403 __ cmpl(rax, 44);
2404 __ jcc(Assembler::notEqual, L_key_192_256);
2405
2406 // 128 bit code follows here
2407 __ movptr(pos, 0);
2408 __ align(OptoLoopAlignment);
2409 __ BIND(L_loopTop_128);
2410 __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input
2411 __ pxor (xmm_result, xmm_temp); // xor with the current r vector
2412
2413 __ pxor (xmm_result, xmm_key0); // do the aes rounds
2414 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
2415 __ aesenc(xmm_result, as_XMMRegister(rnum));
2416 }
2417 for (int key_offset = 0x60; key_offset <= 0x90; key_offset += 0x10) {
2418 aes_enc_key(xmm_result, xmm_temp, key, key_offset);
2419 }
2420 load_key(xmm_temp, key, 0xa0);
2421 __ aesenclast(xmm_result, xmm_temp);
2422
2423 __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
2424 // no need to store r to memory until we exit
2425 __ addptr(pos, AESBlockSize);
2426 __ subptr(len_reg, AESBlockSize);
2427 __ jcc(Assembler::notEqual, L_loopTop_128);
2428
2429 __ BIND(L_exit);
2430 __ movdqu(Address(rvec, 0), xmm_result); // final value of r stored in rvec of CipherBlockChaining object
2431
2432 handleSOERegisters(false /*restoring*/);
2433 __ movl(rax, 0); // return 0 (why?)
2434 __ leave(); // required for proper stackwalking of RuntimeStub frame
2435 __ ret(0);
2436
2437 __ BIND(L_key_192_256);
2438 // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256)
2439 __ cmpl(rax, 52);
2440 __ jcc(Assembler::notEqual, L_key_256);
2441
2442 // 192-bit code follows here (could be changed to use more xmm registers)
2443 __ movptr(pos, 0);
2444 __ align(OptoLoopAlignment);
2445 __ BIND(L_loopTop_192);
2446 __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input
2447 __ pxor (xmm_result, xmm_temp); // xor with the current r vector
2448
2449 __ pxor (xmm_result, xmm_key0); // do the aes rounds
2450 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
2451 __ aesenc(xmm_result, as_XMMRegister(rnum));
2452 }
2453 for (int key_offset = 0x60; key_offset <= 0xb0; key_offset += 0x10) {
2454 aes_enc_key(xmm_result, xmm_temp, key, key_offset);
2455 }
2456 load_key(xmm_temp, key, 0xc0);
2457 __ aesenclast(xmm_result, xmm_temp);
2458
2459 __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
2460 // no need to store r to memory until we exit
2461 __ addptr(pos, AESBlockSize);
2462 __ subptr(len_reg, AESBlockSize);
2463 __ jcc(Assembler::notEqual, L_loopTop_192);
2464 __ jmp(L_exit);
2465
2466 __ BIND(L_key_256);
2467 // 256-bit code follows here (could be changed to use more xmm registers)
2468 __ movptr(pos, 0);
2469 __ align(OptoLoopAlignment);
2470 __ BIND(L_loopTop_256);
2471 __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input
2472 __ pxor (xmm_result, xmm_temp); // xor with the current r vector
2473
2474 __ pxor (xmm_result, xmm_key0); // do the aes rounds
2475 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
2476 __ aesenc(xmm_result, as_XMMRegister(rnum));
2477 }
2478 for (int key_offset = 0x60; key_offset <= 0xd0; key_offset += 0x10) {
2479 aes_enc_key(xmm_result, xmm_temp, key, key_offset);
2480 }
2481 load_key(xmm_temp, key, 0xe0);
2482 __ aesenclast(xmm_result, xmm_temp);
2483
2484 __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
2485 // no need to store r to memory until we exit
2486 __ addptr(pos, AESBlockSize);
2487 __ subptr(len_reg, AESBlockSize);
2488 __ jcc(Assembler::notEqual, L_loopTop_256);
2489 __ jmp(L_exit);
2490
2491 return start;
2492 }
2493
2494
2495 // CBC AES Decryption.
2496 // In 32-bit stub, because of lack of registers we do not try to parallelize 4 blocks at a time.
2497 //
2498 // Arguments:
2499 //
2500 // Inputs:
2501 // c_rarg0 - source byte array address
2502 // c_rarg1 - destination byte array address
2503 // c_rarg2 - K (key) in little endian int array
2504 // c_rarg3 - r vector byte array address
2505 // c_rarg4 - input length
2506 //
2507
2508 address generate_cipherBlockChaining_decryptAESCrypt() {
2509 assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support");
2510 __ align(CodeEntryAlignment);
2511 StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt");
2512 address start = __ pc();
2513
2514 Label L_exit, L_key_192_256, L_key_256;
2515 Label L_singleBlock_loopTop_128;
2516 Label L_singleBlock_loopTop_192, L_singleBlock_loopTop_256;
2517 const Register from = rsi; // source array address
2518 const Register to = rdx; // destination array address
2519 const Register key = rcx; // key array address
2520 const Register rvec = rdi; // r byte array initialized from initvector array address
2521 // and left with the results of the last encryption block
2522 const Register len_reg = rbx; // src len (must be multiple of blocksize 16)
2523 const Register pos = rax;
2524
2525 // xmm register assignments for the loops below
2526 const XMMRegister xmm_result = xmm0;
2527 const XMMRegister xmm_temp = xmm1;
2528 // first 6 keys preloaded into xmm2-xmm7
2529 const int XMM_REG_NUM_KEY_FIRST = 2;
2530 const int XMM_REG_NUM_KEY_LAST = 7;
2531 const int FIRST_NON_REG_KEY_offset = 0x70;
2532 const XMMRegister xmm_key_first = as_XMMRegister(XMM_REG_NUM_KEY_FIRST);
2533
2534 __ enter(); // required for proper stackwalking of RuntimeStub frame
2535 handleSOERegisters(true /*saving*/);
2536
2537 // load registers from incoming parameters
2538 const Address from_param(rbp, 8+0);
2539 const Address to_param (rbp, 8+4);
2540 const Address key_param (rbp, 8+8);
2541 const Address rvec_param (rbp, 8+12);
2542 const Address len_param (rbp, 8+16);
2543 __ movptr(from , from_param);
2544 __ movptr(to , to_param);
2545 __ movptr(key , key_param);
2546 __ movptr(rvec , rvec_param);
2547 __ movptr(len_reg , len_param);
2548
2549 // the java expanded key ordering is rotated one position from what we want
2550 // so we start from 0x10 here and hit 0x00 last
2551 const XMMRegister xmm_key_shuf_mask = xmm1; // used temporarily to swap key bytes up front
2552 __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
2553 // load up xmm regs 2 thru 6 with first 5 keys
2554 for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x10; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
2555 load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask);
2556 offset += 0x10;
2557 }
2558
2559 // inside here, use the rvec register to point to previous block cipher
2560 // with which we xor at the end of each newly decrypted block
2561 const Register prev_block_cipher_ptr = rvec;
2562
2563 // now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256))
2564 __ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
2565 __ cmpl(rax, 44);
2566 __ jcc(Assembler::notEqual, L_key_192_256);
2567
2568
2569 // 128-bit code follows here, parallelized
2570 __ movptr(pos, 0);
2571 __ align(OptoLoopAlignment);
2572 __ BIND(L_singleBlock_loopTop_128);
2573 __ cmpptr(len_reg, 0); // any blocks left??
2574 __ jcc(Assembler::equal, L_exit);
2575 __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input
2576 __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds
2577 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
2578 __ aesdec(xmm_result, as_XMMRegister(rnum));
2579 }
2580 for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xa0; key_offset += 0x10) { // 128-bit runs up to key offset a0
2581 aes_dec_key(xmm_result, xmm_temp, key, key_offset);
2582 }
2583 load_key(xmm_temp, key, 0x00); // final key is stored in java expanded array at offset 0
2584 __ aesdeclast(xmm_result, xmm_temp);
2585 __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00));
2586 __ pxor (xmm_result, xmm_temp); // xor with the current r vector
2587 __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
2588 // no need to store r to memory until we exit
2589 __ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0)); // set up new ptr
2590 __ addptr(pos, AESBlockSize);
2591 __ subptr(len_reg, AESBlockSize);
2592 __ jmp(L_singleBlock_loopTop_128);
2593
2594
2595 __ BIND(L_exit);
2596 __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00));
2597 __ movptr(rvec , rvec_param); // restore this since used in loop
2598 __ movdqu(Address(rvec, 0), xmm_temp); // final value of r stored in rvec of CipherBlockChaining object
2599 handleSOERegisters(false /*restoring*/);
2600 __ movl(rax, 0); // return 0 (why?)
2601 __ leave(); // required for proper stackwalking of RuntimeStub frame
2602 __ ret(0);
2603
2604
2605 __ BIND(L_key_192_256);
2606 // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256)
2607 __ cmpl(rax, 52);
2608 __ jcc(Assembler::notEqual, L_key_256);
2609
2610 // 192-bit code follows here (could be optimized to use parallelism)
2611 __ movptr(pos, 0);
2612 __ align(OptoLoopAlignment);
2613 __ BIND(L_singleBlock_loopTop_192);
2614 __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input
2615 __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds
2616 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
2617 __ aesdec(xmm_result, as_XMMRegister(rnum));
2618 }
2619 for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xc0; key_offset += 0x10) { // 192-bit runs up to key offset c0
2620 aes_dec_key(xmm_result, xmm_temp, key, key_offset);
2621 }
2622 load_key(xmm_temp, key, 0x00); // final key is stored in java expanded array at offset 0
2623 __ aesdeclast(xmm_result, xmm_temp);
2624 __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00));
2625 __ pxor (xmm_result, xmm_temp); // xor with the current r vector
2626 __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
2627 // no need to store r to memory until we exit
2628 __ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0)); // set up new ptr
2629 __ addptr(pos, AESBlockSize);
2630 __ subptr(len_reg, AESBlockSize);
2631 __ jcc(Assembler::notEqual,L_singleBlock_loopTop_192);
2632 __ jmp(L_exit);
2633
2634 __ BIND(L_key_256);
2635 // 256-bit code follows here (could be optimized to use parallelism)
2636 __ movptr(pos, 0);
2637 __ align(OptoLoopAlignment);
2638 __ BIND(L_singleBlock_loopTop_256);
2639 __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input
2640 __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds
2641 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
2642 __ aesdec(xmm_result, as_XMMRegister(rnum));
2643 }
2644 for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xe0; key_offset += 0x10) { // 256-bit runs up to key offset e0
2645 aes_dec_key(xmm_result, xmm_temp, key, key_offset);
2646 }
2647 load_key(xmm_temp, key, 0x00); // final key is stored in java expanded array at offset 0
2648 __ aesdeclast(xmm_result, xmm_temp);
2649 __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00));
2650 __ pxor (xmm_result, xmm_temp); // xor with the current r vector
2651 __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
2652 // no need to store r to memory until we exit
2653 __ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0)); // set up new ptr
2654 __ addptr(pos, AESBlockSize);
2655 __ subptr(len_reg, AESBlockSize);
2656 __ jcc(Assembler::notEqual,L_singleBlock_loopTop_256);
2657 __ jmp(L_exit);
2658
2659 return start;
2660 }
2661
2157 2662
2158 public: 2663 public:
2159 // Information about frame layout at time of blocking runtime call. 2664 // Information about frame layout at time of blocking runtime call.
2160 // Note that we only have to preserve callee-saved registers since 2665 // Note that we only have to preserve callee-saved registers since
2161 // the compilers are responsible for supplying a continuation point 2666 // the compilers are responsible for supplying a continuation point
2348 2853
2349 // arraycopy stubs used by compilers 2854 // arraycopy stubs used by compilers
2350 generate_arraycopy_stubs(); 2855 generate_arraycopy_stubs();
2351 2856
2352 generate_math_stubs(); 2857 generate_math_stubs();
2858
2859 // don't bother generating these AES intrinsic stubs unless global flag is set
2860 if (UseAESIntrinsics) {
2861 StubRoutines::x86::_key_shuffle_mask_addr = generate_key_shuffle_mask(); // might be needed by the others
2862
2863 StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
2864 StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
2865 StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
2866 StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt();
2867 }
2353 } 2868 }
2354 2869
2355 2870
2356 public: 2871 public:
2357 StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) { 2872 StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {