# HG changeset patch # User morris # Date 1407882576 0 # Node ID f5b4600d736811408b582149d99e65f2e9c3ad3f # Parent 46bbe04d1cadc20ca01002d1437b9ebdec5f13d7# Parent b20a35eae4425be2b04b7d33c1a3db577cc0f93a Merge diff -r 46bbe04d1cad -r f5b4600d7368 src/cpu/sparc/vm/assembler_sparc.hpp --- a/src/cpu/sparc/vm/assembler_sparc.hpp Fri Apr 11 13:52:51 2014 +0200 +++ b/src/cpu/sparc/vm/assembler_sparc.hpp Tue Aug 12 22:29:36 2014 +0000 @@ -123,6 +123,7 @@ fpop2_op3 = 0x35, impdep1_op3 = 0x36, aes3_op3 = 0x36, + sha_op3 = 0x36, alignaddr_op3 = 0x36, faligndata_op3 = 0x36, flog3_op3 = 0x36, @@ -223,7 +224,11 @@ mwtos_opf = 0x119, aes_kexpand0_opf = 0x130, - aes_kexpand2_opf = 0x131 + aes_kexpand2_opf = 0x131, + + sha1_opf = 0x141, + sha256_opf = 0x142, + sha512_opf = 0x143 }; enum op5s { @@ -595,6 +600,11 @@ // AES crypto instructions supported only on certain processors static void aes_only() { assert( VM_Version::has_aes(), "This instruction only works on SPARC with AES instructions support"); } + // SHA crypto instructions supported only on certain processors + static void sha1_only() { assert( VM_Version::has_sha1(), "This instruction only works on SPARC with SHA1"); } + static void sha256_only() { assert( VM_Version::has_sha256(), "This instruction only works on SPARC with SHA256"); } + static void sha512_only() { assert( VM_Version::has_sha512(), "This instruction only works on SPARC with SHA512"); } + // instruction only in VIS1 static void vis1_only() { assert( VM_Version::has_vis1(), "This instruction only works on SPARC with VIS1"); } @@ -1179,7 +1189,6 @@ u_field(3, 29, 25) | immed(true) | simm(simm13a, 13)); } inline void wrfprs( Register d) { v9_only(); emit_int32( op(arith_op) | rs1(d) | op3(wrreg_op3) | u_field(6, 29, 25)); } - // VIS1 instructions void alignaddr( Register s1, Register s2, Register d ) { vis1_only(); emit_int32( op(arith_op) | rd(d) | op3(alignaddr_op3) | rs1(s1) | opf(alignaddr_opf) | rs2(s2)); } @@ -1203,6 +1212,12 @@ void movwtos( Register s, FloatRegister d ) { vis3_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::S) | op3(mftoi_op3) | opf(mwtos_opf) | rs2(s)); } void movxtod( Register s, FloatRegister d ) { vis3_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(mftoi_op3) | opf(mxtod_opf) | rs2(s)); } + // Crypto SHA instructions + + void sha1() { sha1_only(); emit_int32( op(arith_op) | op3(sha_op3) | opf(sha1_opf)); } + void sha256() { sha256_only(); emit_int32( op(arith_op) | op3(sha_op3) | opf(sha256_opf)); } + void sha512() { sha512_only(); emit_int32( op(arith_op) | op3(sha_op3) | opf(sha512_opf)); } + // Creation Assembler(CodeBuffer* code) : AbstractAssembler(code) { #ifdef CHECK_DELAY diff -r 46bbe04d1cad -r f5b4600d7368 src/cpu/sparc/vm/stubGenerator_sparc.cpp --- a/src/cpu/sparc/vm/stubGenerator_sparc.cpp Fri Apr 11 13:52:51 2014 +0200 +++ b/src/cpu/sparc/vm/stubGenerator_sparc.cpp Tue Aug 12 22:29:36 2014 +0000 @@ -4575,6 +4575,219 @@ return start; } + address generate_sha1_implCompress(bool multi_block, const char *name) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + + Label L_sha1_loop, L_sha1_unaligned_input, L_sha1_unaligned_input_loop; + int i; + + Register buf = O0; // byte[] source+offset + Register state = O1; // int[] SHA.state + Register ofs = O2; // int offset + Register limit = O3; // int limit + + // load state into F0-F4 + for (i = 0; i < 5; i++) { + __ ldf(FloatRegisterImpl::S, state, i*4, as_FloatRegister(i)); + } + + __ andcc(buf, 7, G0); + __ br(Assembler::notZero, false, Assembler::pn, L_sha1_unaligned_input); + __ delayed()->nop(); + + __ BIND(L_sha1_loop); + // load buf into F8-F22 + for (i = 0; i < 8; i++) { + __ ldf(FloatRegisterImpl::D, buf, i*8, as_FloatRegister(i*2 + 8)); + } + __ sha1(); + if (multi_block) { + __ add(ofs, 64, ofs); + __ add(buf, 64, buf); + __ cmp_and_brx_short(ofs, limit, Assembler::lessEqual, Assembler::pt, L_sha1_loop); + __ mov(ofs, O0); // to be returned + } + + // store F0-F4 into state and return + for (i = 0; i < 4; i++) { + __ stf(FloatRegisterImpl::S, as_FloatRegister(i), state, i*4); + } + __ retl(); + __ delayed()->stf(FloatRegisterImpl::S, F4, state, 0x10); + + __ BIND(L_sha1_unaligned_input); + __ alignaddr(buf, G0, buf); + + __ BIND(L_sha1_unaligned_input_loop); + // load buf into F8-F22 + for (i = 0; i < 9; i++) { + __ ldf(FloatRegisterImpl::D, buf, i*8, as_FloatRegister(i*2 + 8)); + } + for (i = 0; i < 8; i++) { + __ faligndata(as_FloatRegister(i*2 + 8), as_FloatRegister(i*2 + 10), as_FloatRegister(i*2 + 8)); + } + __ sha1(); + if (multi_block) { + __ add(ofs, 64, ofs); + __ add(buf, 64, buf); + __ cmp_and_brx_short(ofs, limit, Assembler::lessEqual, Assembler::pt, L_sha1_unaligned_input_loop); + __ mov(ofs, O0); // to be returned + } + + // store F0-F4 into state and return + for (i = 0; i < 4; i++) { + __ stf(FloatRegisterImpl::S, as_FloatRegister(i), state, i*4); + } + __ retl(); + __ delayed()->stf(FloatRegisterImpl::S, F4, state, 0x10); + + return start; + } + + address generate_sha256_implCompress(bool multi_block, const char *name) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + + Label L_sha256_loop, L_sha256_unaligned_input, L_sha256_unaligned_input_loop; + int i; + + Register buf = O0; // byte[] source+offset + Register state = O1; // int[] SHA2.state + Register ofs = O2; // int offset + Register limit = O3; // int limit + + // load state into F0-F7 + for (i = 0; i < 8; i++) { + __ ldf(FloatRegisterImpl::S, state, i*4, as_FloatRegister(i)); + } + + __ andcc(buf, 7, G0); + __ br(Assembler::notZero, false, Assembler::pn, L_sha256_unaligned_input); + __ delayed()->nop(); + + __ BIND(L_sha256_loop); + // load buf into F8-F22 + for (i = 0; i < 8; i++) { + __ ldf(FloatRegisterImpl::D, buf, i*8, as_FloatRegister(i*2 + 8)); + } + __ sha256(); + if (multi_block) { + __ add(ofs, 64, ofs); + __ add(buf, 64, buf); + __ cmp_and_brx_short(ofs, limit, Assembler::lessEqual, Assembler::pt, L_sha256_loop); + __ mov(ofs, O0); // to be returned + } + + // store F0-F7 into state and return + for (i = 0; i < 7; i++) { + __ stf(FloatRegisterImpl::S, as_FloatRegister(i), state, i*4); + } + __ retl(); + __ delayed()->stf(FloatRegisterImpl::S, F7, state, 0x1c); + + __ BIND(L_sha256_unaligned_input); + __ alignaddr(buf, G0, buf); + + __ BIND(L_sha256_unaligned_input_loop); + // load buf into F8-F22 + for (i = 0; i < 9; i++) { + __ ldf(FloatRegisterImpl::D, buf, i*8, as_FloatRegister(i*2 + 8)); + } + for (i = 0; i < 8; i++) { + __ faligndata(as_FloatRegister(i*2 + 8), as_FloatRegister(i*2 + 10), as_FloatRegister(i*2 + 8)); + } + __ sha256(); + if (multi_block) { + __ add(ofs, 64, ofs); + __ add(buf, 64, buf); + __ cmp_and_brx_short(ofs, limit, Assembler::lessEqual, Assembler::pt, L_sha256_unaligned_input_loop); + __ mov(ofs, O0); // to be returned + } + + // store F0-F7 into state and return + for (i = 0; i < 7; i++) { + __ stf(FloatRegisterImpl::S, as_FloatRegister(i), state, i*4); + } + __ retl(); + __ delayed()->stf(FloatRegisterImpl::S, F7, state, 0x1c); + + return start; + } + + address generate_sha512_implCompress(bool multi_block, const char *name) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + + Label L_sha512_loop, L_sha512_unaligned_input, L_sha512_unaligned_input_loop; + int i; + + Register buf = O0; // byte[] source+offset + Register state = O1; // long[] SHA5.state + Register ofs = O2; // int offset + Register limit = O3; // int limit + + // load state into F0-F14 + for (i = 0; i < 8; i++) { + __ ldf(FloatRegisterImpl::D, state, i*8, as_FloatRegister(i*2)); + } + + __ andcc(buf, 7, G0); + __ br(Assembler::notZero, false, Assembler::pn, L_sha512_unaligned_input); + __ delayed()->nop(); + + __ BIND(L_sha512_loop); + // load buf into F16-F46 + for (i = 0; i < 16; i++) { + __ ldf(FloatRegisterImpl::D, buf, i*8, as_FloatRegister(i*2 + 16)); + } + __ sha512(); + if (multi_block) { + __ add(ofs, 128, ofs); + __ add(buf, 128, buf); + __ cmp_and_brx_short(ofs, limit, Assembler::lessEqual, Assembler::pt, L_sha512_loop); + __ mov(ofs, O0); // to be returned + } + + // store F0-F14 into state and return + for (i = 0; i < 7; i++) { + __ stf(FloatRegisterImpl::D, as_FloatRegister(i*2), state, i*8); + } + __ retl(); + __ delayed()->stf(FloatRegisterImpl::D, F14, state, 0x38); + + __ BIND(L_sha512_unaligned_input); + __ alignaddr(buf, G0, buf); + + __ BIND(L_sha512_unaligned_input_loop); + // load buf into F16-F46 + for (i = 0; i < 17; i++) { + __ ldf(FloatRegisterImpl::D, buf, i*8, as_FloatRegister(i*2 + 16)); + } + for (i = 0; i < 16; i++) { + __ faligndata(as_FloatRegister(i*2 + 16), as_FloatRegister(i*2 + 18), as_FloatRegister(i*2 + 16)); + } + __ sha512(); + if (multi_block) { + __ add(ofs, 128, ofs); + __ add(buf, 128, buf); + __ cmp_and_brx_short(ofs, limit, Assembler::lessEqual, Assembler::pt, L_sha512_unaligned_input_loop); + __ mov(ofs, O0); // to be returned + } + + // store F0-F14 into state and return + for (i = 0; i < 7; i++) { + __ stf(FloatRegisterImpl::D, as_FloatRegister(i*2), state, i*8); + } + __ retl(); + __ delayed()->stf(FloatRegisterImpl::D, F14, state, 0x38); + + return start; + } + void generate_initial() { // Generates all stubs and initializes the entry points @@ -4647,6 +4860,20 @@ StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt(); StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel(); } + + // generate SHA1/SHA256/SHA512 intrinsics code + if (UseSHA1Intrinsics) { + StubRoutines::_sha1_implCompress = generate_sha1_implCompress(false, "sha1_implCompress"); + StubRoutines::_sha1_implCompressMB = generate_sha1_implCompress(true, "sha1_implCompressMB"); + } + if (UseSHA256Intrinsics) { + StubRoutines::_sha256_implCompress = generate_sha256_implCompress(false, "sha256_implCompress"); + StubRoutines::_sha256_implCompressMB = generate_sha256_implCompress(true, "sha256_implCompressMB"); + } + if (UseSHA512Intrinsics) { + StubRoutines::_sha512_implCompress = generate_sha512_implCompress(false, "sha512_implCompress"); + StubRoutines::_sha512_implCompressMB = generate_sha512_implCompress(true, "sha512_implCompressMB"); + } } diff -r 46bbe04d1cad -r f5b4600d7368 src/cpu/sparc/vm/stubRoutines_sparc.hpp --- a/src/cpu/sparc/vm/stubRoutines_sparc.hpp Fri Apr 11 13:52:51 2014 +0200 +++ b/src/cpu/sparc/vm/stubRoutines_sparc.hpp Tue Aug 12 22:29:36 2014 +0000 @@ -41,7 +41,7 @@ enum /* platform_dependent_constants */ { // %%%%%%%% May be able to shrink this a lot code_size1 = 20000, // simply increase if too small (assembler will crash if too small) - code_size2 = 22000 // simply increase if too small (assembler will crash if too small) + code_size2 = 23000 // simply increase if too small (assembler will crash if too small) }; class Sparc { diff -r 46bbe04d1cad -r f5b4600d7368 src/cpu/sparc/vm/vm_version_sparc.cpp --- a/src/cpu/sparc/vm/vm_version_sparc.cpp Fri Apr 11 13:52:51 2014 +0200 +++ b/src/cpu/sparc/vm/vm_version_sparc.cpp Tue Aug 12 22:29:36 2014 +0000 @@ -234,7 +234,7 @@ assert((OptoLoopAlignment % relocInfo::addr_unit()) == 0, "alignment is not a multiple of NOP size"); char buf[512]; - jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", + jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", (has_v9() ? ", v9" : (has_v8() ? ", v8" : "")), (has_hardware_popc() ? ", popc" : ""), (has_vis1() ? ", vis1" : ""), @@ -243,6 +243,9 @@ (has_blk_init() ? ", blk_init" : ""), (has_cbcond() ? ", cbcond" : ""), (has_aes() ? ", aes" : ""), + (has_sha1() ? ", sha1" : ""), + (has_sha256() ? ", sha256" : ""), + (has_sha512() ? ", sha512" : ""), (is_ultra3() ? ", ultra3" : ""), (is_sun4v() ? ", sun4v" : ""), (is_niagara_plus() ? ", niagara_plus" : (is_niagara() ? ", niagara" : "")), @@ -301,6 +304,58 @@ } } + // SHA1, SHA256, and SHA512 instructions were added to SPARC T-series at different times + if (has_sha1() || has_sha256() || has_sha512()) { + if (UseVIS > 0) { // SHA intrinsics use VIS1 instructions + if (FLAG_IS_DEFAULT(UseSHA)) { + FLAG_SET_DEFAULT(UseSHA, true); + } + } else { + if (UseSHA) { + warning("SPARC SHA intrinsics require VIS1 instruction support. Intrinsics will be disabled."); + FLAG_SET_DEFAULT(UseSHA, false); + } + } + } else if (UseSHA) { + warning("SHA instructions are not available on this CPU"); + FLAG_SET_DEFAULT(UseSHA, false); + } + + if (!UseSHA) { + FLAG_SET_DEFAULT(UseSHA1Intrinsics, false); + FLAG_SET_DEFAULT(UseSHA256Intrinsics, false); + FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); + } else { + if (has_sha1()) { + if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) { + FLAG_SET_DEFAULT(UseSHA1Intrinsics, true); + } + } else if (UseSHA1Intrinsics) { + warning("SHA1 instruction is not available on this CPU."); + FLAG_SET_DEFAULT(UseSHA1Intrinsics, false); + } + if (has_sha256()) { + if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) { + FLAG_SET_DEFAULT(UseSHA256Intrinsics, true); + } + } else if (UseSHA256Intrinsics) { + warning("SHA256 instruction (for SHA-224 and SHA-256) is not available on this CPU."); + FLAG_SET_DEFAULT(UseSHA256Intrinsics, false); + } + + if (has_sha512()) { + if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) { + FLAG_SET_DEFAULT(UseSHA512Intrinsics, true); + } + } else if (UseSHA512Intrinsics) { + warning("SHA512 instruction (for SHA-384 and SHA-512) is not available on this CPU."); + FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); + } + if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) { + FLAG_SET_DEFAULT(UseSHA, false); + } + } + if (FLAG_IS_DEFAULT(ContendedPaddingWidth) && (cache_line_size > ContendedPaddingWidth)) ContendedPaddingWidth = cache_line_size; diff -r 46bbe04d1cad -r f5b4600d7368 src/cpu/sparc/vm/vm_version_sparc.hpp --- a/src/cpu/sparc/vm/vm_version_sparc.hpp Fri Apr 11 13:52:51 2014 +0200 +++ b/src/cpu/sparc/vm/vm_version_sparc.hpp Tue Aug 12 22:29:36 2014 +0000 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -50,7 +50,10 @@ T_family = 16, T1_model = 17, sparc5_instructions = 18, - aes_instructions = 19 + aes_instructions = 19, + sha1_instruction = 20, + sha256_instruction = 21, + sha512_instruction = 22 }; enum Feature_Flag_Set { @@ -77,6 +80,9 @@ T1_model_m = 1 << T1_model, sparc5_instructions_m = 1 << sparc5_instructions, aes_instructions_m = 1 << aes_instructions, + sha1_instruction_m = 1 << sha1_instruction, + sha256_instruction_m = 1 << sha256_instruction, + sha512_instruction_m = 1 << sha512_instruction, generic_v8_m = v8_instructions_m | hardware_mul32_m | hardware_div32_m | hardware_fsmuld_m, generic_v9_m = generic_v8_m | v9_instructions_m, @@ -129,6 +135,9 @@ static bool has_cbcond() { return (_features & cbcond_instructions_m) != 0; } static bool has_sparc5_instr() { return (_features & sparc5_instructions_m) != 0; } static bool has_aes() { return (_features & aes_instructions_m) != 0; } + static bool has_sha1() { return (_features & sha1_instruction_m) != 0; } + static bool has_sha256() { return (_features & sha256_instruction_m) != 0; } + static bool has_sha512() { return (_features & sha512_instruction_m) != 0; } static bool supports_compare_and_exchange() { return has_v9(); } diff -r 46bbe04d1cad -r f5b4600d7368 src/cpu/x86/vm/assembler_x86.cpp --- a/src/cpu/x86/vm/assembler_x86.cpp Fri Apr 11 13:52:51 2014 +0200 +++ b/src/cpu/x86/vm/assembler_x86.cpp Tue Aug 12 22:29:36 2014 +0000 @@ -3854,6 +3854,15 @@ } // Carry-Less Multiplication Quadword +void Assembler::pclmulqdq(XMMRegister dst, XMMRegister src, int mask) { + assert(VM_Version::supports_clmul(), ""); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A); + emit_int8(0x44); + emit_int8((unsigned char)(0xC0 | encode)); + emit_int8((unsigned char)mask); +} + +// Carry-Less Multiplication Quadword void Assembler::vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask) { assert(VM_Version::supports_avx() && VM_Version::supports_clmul(), ""); bool vector256 = false; diff -r 46bbe04d1cad -r f5b4600d7368 src/cpu/x86/vm/assembler_x86.hpp --- a/src/cpu/x86/vm/assembler_x86.hpp Fri Apr 11 13:52:51 2014 +0200 +++ b/src/cpu/x86/vm/assembler_x86.hpp Tue Aug 12 22:29:36 2014 +0000 @@ -1837,6 +1837,7 @@ void vpbroadcastd(XMMRegister dst, XMMRegister src); // Carry-Less Multiplication Quadword + void pclmulqdq(XMMRegister dst, XMMRegister src, int mask); void vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask); // AVX instruction which is used to clear upper 128 bits of YMM registers and diff -r 46bbe04d1cad -r f5b4600d7368 src/cpu/x86/vm/macroAssembler_x86.cpp --- a/src/cpu/x86/vm/macroAssembler_x86.cpp Fri Apr 11 13:52:51 2014 +0200 +++ b/src/cpu/x86/vm/macroAssembler_x86.cpp Tue Aug 12 22:29:36 2014 +0000 @@ -7316,17 +7316,34 @@ * Fold 128-bit data chunk */ void MacroAssembler::fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, Register buf, int offset) { - vpclmulhdq(xtmp, xK, xcrc); // [123:64] - vpclmulldq(xcrc, xK, xcrc); // [63:0] - vpxor(xcrc, xcrc, Address(buf, offset), false /* vector256 */); - pxor(xcrc, xtmp); + if (UseAVX > 0) { + vpclmulhdq(xtmp, xK, xcrc); // [123:64] + vpclmulldq(xcrc, xK, xcrc); // [63:0] + vpxor(xcrc, xcrc, Address(buf, offset), false /* vector256 */); + pxor(xcrc, xtmp); + } else { + movdqa(xtmp, xcrc); + pclmulhdq(xtmp, xK); // [123:64] + pclmulldq(xcrc, xK); // [63:0] + pxor(xcrc, xtmp); + movdqu(xtmp, Address(buf, offset)); + pxor(xcrc, xtmp); + } } void MacroAssembler::fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, XMMRegister xbuf) { - vpclmulhdq(xtmp, xK, xcrc); - vpclmulldq(xcrc, xK, xcrc); - pxor(xcrc, xbuf); - pxor(xcrc, xtmp); + if (UseAVX > 0) { + vpclmulhdq(xtmp, xK, xcrc); + vpclmulldq(xcrc, xK, xcrc); + pxor(xcrc, xbuf); + pxor(xcrc, xtmp); + } else { + movdqa(xtmp, xcrc); + pclmulhdq(xtmp, xK); + pclmulldq(xcrc, xK); + pxor(xcrc, xbuf); + pxor(xcrc, xtmp); + } } /** @@ -7444,9 +7461,17 @@ // Fold 128 bits in xmm1 down into 32 bits in crc register. BIND(L_fold_128b); movdqu(xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_addr())); - vpclmulqdq(xmm2, xmm0, xmm1, 0x1); - vpand(xmm3, xmm0, xmm2, false /* vector256 */); - vpclmulqdq(xmm0, xmm0, xmm3, 0x1); + if (UseAVX > 0) { + vpclmulqdq(xmm2, xmm0, xmm1, 0x1); + vpand(xmm3, xmm0, xmm2, false /* vector256 */); + vpclmulqdq(xmm0, xmm0, xmm3, 0x1); + } else { + movdqa(xmm2, xmm0); + pclmulqdq(xmm2, xmm1, 0x1); + movdqa(xmm3, xmm0); + pand(xmm3, xmm2); + pclmulqdq(xmm0, xmm3, 0x1); + } psrldq(xmm1, 8); psrldq(xmm2, 4); pxor(xmm0, xmm1); diff -r 46bbe04d1cad -r f5b4600d7368 src/cpu/x86/vm/macroAssembler_x86.hpp --- a/src/cpu/x86/vm/macroAssembler_x86.hpp Fri Apr 11 13:52:51 2014 +0200 +++ b/src/cpu/x86/vm/macroAssembler_x86.hpp Tue Aug 12 22:29:36 2014 +0000 @@ -966,6 +966,16 @@ void mulss(XMMRegister dst, Address src) { Assembler::mulss(dst, src); } void mulss(XMMRegister dst, AddressLiteral src); + // Carry-Less Multiplication Quadword + void pclmulldq(XMMRegister dst, XMMRegister src) { + // 0x00 - multiply lower 64 bits [0:63] + Assembler::pclmulqdq(dst, src, 0x00); + } + void pclmulhdq(XMMRegister dst, XMMRegister src) { + // 0x11 - multiply upper 64 bits [64:127] + Assembler::pclmulqdq(dst, src, 0x11); + } + void sqrtsd(XMMRegister dst, XMMRegister src) { Assembler::sqrtsd(dst, src); } void sqrtsd(XMMRegister dst, Address src) { Assembler::sqrtsd(dst, src); } void sqrtsd(XMMRegister dst, AddressLiteral src); diff -r 46bbe04d1cad -r f5b4600d7368 src/cpu/x86/vm/vm_version_x86.cpp --- a/src/cpu/x86/vm/vm_version_x86.cpp Fri Apr 11 13:52:51 2014 +0200 +++ b/src/cpu/x86/vm/vm_version_x86.cpp Tue Aug 12 22:29:36 2014 +0000 @@ -568,7 +568,7 @@ FLAG_SET_DEFAULT(UseCLMUL, false); } - if (UseCLMUL && (UseAVX > 0) && (UseSSE > 2)) { + if (UseCLMUL && (UseSSE > 2)) { if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) { UseCRC32Intrinsics = true; } @@ -590,6 +590,17 @@ FLAG_SET_DEFAULT(UseAESIntrinsics, false); } + if (UseSHA) { + warning("SHA instructions are not available on this CPU"); + FLAG_SET_DEFAULT(UseSHA, false); + } + if (UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics) { + warning("SHA intrinsics are not available on this CPU"); + FLAG_SET_DEFAULT(UseSHA1Intrinsics, false); + FLAG_SET_DEFAULT(UseSHA256Intrinsics, false); + FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); + } + // Adjust RTM (Restricted Transactional Memory) flags if (!supports_rtm() && UseRTMLocking) { // Can't continue because UseRTMLocking affects UseBiasedLocking flag @@ -803,6 +814,21 @@ } } } + if ((cpu_family() == 0x06) && + ((extended_cpu_model() == 0x36) || // Centerton + (extended_cpu_model() == 0x37) || // Silvermont + (extended_cpu_model() == 0x4D))) { +#ifdef COMPILER2 + if (FLAG_IS_DEFAULT(OptoScheduling)) { + OptoScheduling = true; + } +#endif + if (supports_sse4_2()) { // Silvermont + if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { + UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus + } + } + } } // Use count leading zeros count instruction if available. @@ -890,23 +916,25 @@ AllocatePrefetchDistance = allocate_prefetch_distance(); AllocatePrefetchStyle = allocate_prefetch_style(); - if( is_intel() && cpu_family() == 6 && supports_sse3() ) { - if( AllocatePrefetchStyle == 2 ) { // watermark prefetching on Core + if (is_intel() && cpu_family() == 6 && supports_sse3()) { + if (AllocatePrefetchStyle == 2) { // watermark prefetching on Core #ifdef _LP64 AllocatePrefetchDistance = 384; #else AllocatePrefetchDistance = 320; #endif } - if( supports_sse4_2() && supports_ht() ) { // Nehalem based cpus + if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus AllocatePrefetchDistance = 192; AllocatePrefetchLines = 4; + } #ifdef COMPILER2 - if (AggressiveOpts && FLAG_IS_DEFAULT(UseFPUForSpilling)) { + if (supports_sse4_2()) { + if (FLAG_IS_DEFAULT(UseFPUForSpilling)) { FLAG_SET_DEFAULT(UseFPUForSpilling, true); } + } #endif - } } assert(AllocatePrefetchDistance % AllocatePrefetchStepSize == 0, "invalid value"); diff -r 46bbe04d1cad -r f5b4600d7368 src/os_cpu/solaris_sparc/vm/vm_version_solaris_sparc.cpp --- a/src/os_cpu/solaris_sparc/vm/vm_version_solaris_sparc.cpp Fri Apr 11 13:52:51 2014 +0200 +++ b/src/os_cpu/solaris_sparc/vm/vm_version_solaris_sparc.cpp Tue Aug 12 22:29:36 2014 +0000 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2006, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -137,6 +137,21 @@ #endif if (av & AV_SPARC_AES) features |= aes_instructions_m; +#ifndef AV_SPARC_SHA1 +#define AV_SPARC_SHA1 0x00400000 /* sha1 instruction supported */ +#endif + if (av & AV_SPARC_SHA1) features |= sha1_instruction_m; + +#ifndef AV_SPARC_SHA256 +#define AV_SPARC_SHA256 0x00800000 /* sha256 instruction supported */ +#endif + if (av & AV_SPARC_SHA256) features |= sha256_instruction_m; + +#ifndef AV_SPARC_SHA512 +#define AV_SPARC_SHA512 0x01000000 /* sha512 instruction supported */ +#endif + if (av & AV_SPARC_SHA512) features |= sha512_instruction_m; + } else { // getisax(2) failed, use the old legacy code. #ifndef PRODUCT diff -r 46bbe04d1cad -r f5b4600d7368 src/share/vm/classfile/vmSymbols.hpp --- a/src/share/vm/classfile/vmSymbols.hpp Fri Apr 11 13:52:51 2014 +0200 +++ b/src/share/vm/classfile/vmSymbols.hpp Tue Aug 12 22:29:36 2014 +0000 @@ -789,6 +789,26 @@ do_name( decrypt_name, "decrypt") \ do_signature(byteArray_int_int_byteArray_int_signature, "([BII[BI)I") \ \ + /* support for sun.security.provider.SHA */ \ + do_class(sun_security_provider_sha, "sun/security/provider/SHA") \ + do_intrinsic(_sha_implCompress, sun_security_provider_sha, implCompress_name, implCompress_signature, F_R) \ + do_name( implCompress_name, "implCompress") \ + do_signature(implCompress_signature, "([BI)V") \ + \ + /* support for sun.security.provider.SHA2 */ \ + do_class(sun_security_provider_sha2, "sun/security/provider/SHA2") \ + do_intrinsic(_sha2_implCompress, sun_security_provider_sha2, implCompress_name, implCompress_signature, F_R) \ + \ + /* support for sun.security.provider.SHA5 */ \ + do_class(sun_security_provider_sha5, "sun/security/provider/SHA5") \ + do_intrinsic(_sha5_implCompress, sun_security_provider_sha5, implCompress_name, implCompress_signature, F_R) \ + \ + /* support for sun.security.provider.DigestBase */ \ + do_class(sun_security_provider_digestbase, "sun/security/provider/DigestBase") \ + do_intrinsic(_digestBase_implCompressMB, sun_security_provider_digestbase, implCompressMB_name, implCompressMB_signature, F_R) \ + do_name( implCompressMB_name, "implCompressMultiBlock") \ + do_signature(implCompressMB_signature, "([BII)I") \ + \ /* support for java.util.zip */ \ do_class(java_util_zip_CRC32, "java/util/zip/CRC32") \ do_intrinsic(_updateCRC32, java_util_zip_CRC32, update_name, int2_int_signature, F_SN) \ diff -r 46bbe04d1cad -r f5b4600d7368 src/share/vm/opto/callGenerator.cpp --- a/src/share/vm/opto/callGenerator.cpp Fri Apr 11 13:52:51 2014 +0200 +++ b/src/share/vm/opto/callGenerator.cpp Tue Aug 12 22:29:36 2014 +0000 @@ -710,7 +710,15 @@ Node* iophi = PhiNode::make(region, kit.i_o(), Type::ABIO); iophi->set_req(2, slow_map->i_o()); kit.set_i_o(gvn.transform(iophi)); + // Merge memory kit.merge_memory(slow_map->merged_memory(), region, 2); + // Transform new memory Phis. + for (MergeMemStream mms(kit.merged_memory()); mms.next_non_empty();) { + Node* phi = mms.memory(); + if (phi->is_Phi() && phi->in(0) == region) { + mms.set_memory(gvn.transform(phi)); + } + } uint tos = kit.jvms()->stkoff() + kit.sp(); uint limit = slow_map->req(); for (uint i = TypeFunc::Parms; i < limit; i++) { @@ -864,15 +872,15 @@ } -//------------------------PredictedIntrinsicGenerator------------------------------ -// Internal class which handles all predicted Intrinsic calls. -class PredictedIntrinsicGenerator : public CallGenerator { +//------------------------PredicatedIntrinsicGenerator------------------------------ +// Internal class which handles all predicated Intrinsic calls. +class PredicatedIntrinsicGenerator : public CallGenerator { CallGenerator* _intrinsic; CallGenerator* _cg; public: - PredictedIntrinsicGenerator(CallGenerator* intrinsic, - CallGenerator* cg) + PredicatedIntrinsicGenerator(CallGenerator* intrinsic, + CallGenerator* cg) : CallGenerator(cg->method()) { _intrinsic = intrinsic; @@ -887,103 +895,182 @@ }; -CallGenerator* CallGenerator::for_predicted_intrinsic(CallGenerator* intrinsic, - CallGenerator* cg) { - return new PredictedIntrinsicGenerator(intrinsic, cg); +CallGenerator* CallGenerator::for_predicated_intrinsic(CallGenerator* intrinsic, + CallGenerator* cg) { + return new PredicatedIntrinsicGenerator(intrinsic, cg); } -JVMState* PredictedIntrinsicGenerator::generate(JVMState* jvms, Parse* parent_parser) { +JVMState* PredicatedIntrinsicGenerator::generate(JVMState* jvms, Parse* parent_parser) { + // The code we want to generate here is: + // if (receiver == NULL) + // uncommon_Trap + // if (predicate(0)) + // do_intrinsic(0) + // else + // if (predicate(1)) + // do_intrinsic(1) + // ... + // else + // do_java_comp + GraphKit kit(jvms); PhaseGVN& gvn = kit.gvn(); CompileLog* log = kit.C->log(); if (log != NULL) { - log->elem("predicted_intrinsic bci='%d' method='%d'", + log->elem("predicated_intrinsic bci='%d' method='%d'", jvms->bci(), log->identify(method())); } - Node* slow_ctl = _intrinsic->generate_predicate(kit.sync_jvms()); - if (kit.failing()) - return NULL; // might happen because of NodeCountInliningCutoff - - SafePointNode* slow_map = NULL; - JVMState* slow_jvms; - if (slow_ctl != NULL) { - PreserveJVMState pjvms(&kit); - kit.set_control(slow_ctl); - if (!kit.stopped()) { - slow_jvms = _cg->generate(kit.sync_jvms(), parent_parser); - if (kit.failing()) - return NULL; // might happen because of NodeCountInliningCutoff - assert(slow_jvms != NULL, "must be"); - kit.add_exception_states_from(slow_jvms); - kit.set_map(slow_jvms->map()); - if (!kit.stopped()) - slow_map = kit.stop(); + if (!method()->is_static()) { + // We need an explicit receiver null_check before checking its type in predicate. + // We share a map with the caller, so his JVMS gets adjusted. + Node* receiver = kit.null_check_receiver_before_call(method()); + if (kit.stopped()) { + return kit.transfer_exceptions_into_jvms(); } } - if (kit.stopped()) { - // Predicate is always false. - kit.set_jvms(slow_jvms); + int n_predicates = _intrinsic->predicates_count(); + assert(n_predicates > 0, "sanity"); + + JVMState** result_jvms = NEW_RESOURCE_ARRAY(JVMState*, (n_predicates+1)); + + // Region for normal compilation code if intrinsic failed. + Node* slow_region = new (kit.C) RegionNode(1); + + int results = 0; + for (int predicate = 0; (predicate < n_predicates) && !kit.stopped(); predicate++) { +#ifdef ASSERT + JVMState* old_jvms = kit.jvms(); + SafePointNode* old_map = kit.map(); + Node* old_io = old_map->i_o(); + Node* old_mem = old_map->memory(); + Node* old_exc = old_map->next_exception(); +#endif + Node* else_ctrl = _intrinsic->generate_predicate(kit.sync_jvms(), predicate); +#ifdef ASSERT + // Assert(no_new_memory && no_new_io && no_new_exceptions) after generate_predicate. + assert(old_jvms == kit.jvms(), "generate_predicate should not change jvm state"); + SafePointNode* new_map = kit.map(); + assert(old_io == new_map->i_o(), "generate_predicate should not change i_o"); + assert(old_mem == new_map->memory(), "generate_predicate should not change memory"); + assert(old_exc == new_map->next_exception(), "generate_predicate should not add exceptions"); +#endif + if (!kit.stopped()) { + PreserveJVMState pjvms(&kit); + // Generate intrinsic code: + JVMState* new_jvms = _intrinsic->generate(kit.sync_jvms(), parent_parser); + if (new_jvms == NULL) { + // Intrinsic failed, use normal compilation path for this predicate. + slow_region->add_req(kit.control()); + } else { + kit.add_exception_states_from(new_jvms); + kit.set_jvms(new_jvms); + if (!kit.stopped()) { + result_jvms[results++] = kit.jvms(); + } + } + } + if (else_ctrl == NULL) { + else_ctrl = kit.C->top(); + } + kit.set_control(else_ctrl); + } + if (!kit.stopped()) { + // Final 'else' after predicates. + slow_region->add_req(kit.control()); + } + if (slow_region->req() > 1) { + PreserveJVMState pjvms(&kit); + // Generate normal compilation code: + kit.set_control(gvn.transform(slow_region)); + JVMState* new_jvms = _cg->generate(kit.sync_jvms(), parent_parser); + if (kit.failing()) + return NULL; // might happen because of NodeCountInliningCutoff + assert(new_jvms != NULL, "must be"); + kit.add_exception_states_from(new_jvms); + kit.set_jvms(new_jvms); + if (!kit.stopped()) { + result_jvms[results++] = kit.jvms(); + } + } + + if (results == 0) { + // All paths ended in uncommon traps. + (void) kit.stop(); return kit.transfer_exceptions_into_jvms(); } - // Generate intrinsic code: - JVMState* new_jvms = _intrinsic->generate(kit.sync_jvms(), parent_parser); - if (new_jvms == NULL) { - // Intrinsic failed, so use slow code or make a direct call. - if (slow_map == NULL) { - CallGenerator* cg = CallGenerator::for_direct_call(method()); - new_jvms = cg->generate(kit.sync_jvms(), parent_parser); - } else { - kit.set_jvms(slow_jvms); - return kit.transfer_exceptions_into_jvms(); - } - } - kit.add_exception_states_from(new_jvms); - kit.set_jvms(new_jvms); - - // Need to merge slow and fast? - if (slow_map == NULL) { - // The fast path is the only path remaining. + if (results == 1) { // Only one path + kit.set_jvms(result_jvms[0]); return kit.transfer_exceptions_into_jvms(); } - if (kit.stopped()) { - // Intrinsic method threw an exception, so it's just the slow path after all. - kit.set_jvms(slow_jvms); - return kit.transfer_exceptions_into_jvms(); + // Merge all paths. + kit.C->set_has_split_ifs(true); // Has chance for split-if optimization + RegionNode* region = new (kit.C) RegionNode(results + 1); + Node* iophi = PhiNode::make(region, kit.i_o(), Type::ABIO); + for (int i = 0; i < results; i++) { + JVMState* jvms = result_jvms[i]; + int path = i + 1; + SafePointNode* map = jvms->map(); + region->init_req(path, map->control()); + iophi->set_req(path, map->i_o()); + if (i == 0) { + kit.set_jvms(jvms); + } else { + kit.merge_memory(map->merged_memory(), region, path); + } + } + kit.set_control(gvn.transform(region)); + kit.set_i_o(gvn.transform(iophi)); + // Transform new memory Phis. + for (MergeMemStream mms(kit.merged_memory()); mms.next_non_empty();) { + Node* phi = mms.memory(); + if (phi->is_Phi() && phi->in(0) == region) { + mms.set_memory(gvn.transform(phi)); + } } - // Finish the diamond. - kit.C->set_has_split_ifs(true); // Has chance for split-if optimization - RegionNode* region = new (kit.C) RegionNode(3); - region->init_req(1, kit.control()); - region->init_req(2, slow_map->control()); - kit.set_control(gvn.transform(region)); - Node* iophi = PhiNode::make(region, kit.i_o(), Type::ABIO); - iophi->set_req(2, slow_map->i_o()); - kit.set_i_o(gvn.transform(iophi)); - kit.merge_memory(slow_map->merged_memory(), region, 2); + // Merge debug info. + Node** ins = NEW_RESOURCE_ARRAY(Node*, results); uint tos = kit.jvms()->stkoff() + kit.sp(); - uint limit = slow_map->req(); + Node* map = kit.map(); + uint limit = map->req(); for (uint i = TypeFunc::Parms; i < limit; i++) { // Skip unused stack slots; fast forward to monoff(); if (i == tos) { i = kit.jvms()->monoff(); if( i >= limit ) break; } - Node* m = kit.map()->in(i); - Node* n = slow_map->in(i); - if (m != n) { - const Type* t = gvn.type(m)->meet_speculative(gvn.type(n)); - Node* phi = PhiNode::make(region, m, t); - phi->set_req(2, n); - kit.map()->set_req(i, gvn.transform(phi)); + Node* n = map->in(i); + ins[0] = n; + const Type* t = gvn.type(n); + bool needs_phi = false; + for (int j = 1; j < results; j++) { + JVMState* jvms = result_jvms[j]; + Node* jmap = jvms->map(); + Node* m = NULL; + if (jmap->req() > i) { + m = jmap->in(i); + if (m != n) { + needs_phi = true; + t = t->meet_speculative(gvn.type(m)); + } + } + ins[j] = m; + } + if (needs_phi) { + Node* phi = PhiNode::make(region, n, t); + for (int j = 1; j < results; j++) { + phi->set_req(j + 1, ins[j]); + } + map->set_req(i, gvn.transform(phi)); } } + return kit.transfer_exceptions_into_jvms(); } diff -r 46bbe04d1cad -r f5b4600d7368 src/share/vm/opto/callGenerator.hpp --- a/src/share/vm/opto/callGenerator.hpp Fri Apr 11 13:52:51 2014 +0200 +++ b/src/share/vm/opto/callGenerator.hpp Tue Aug 12 22:29:36 2014 +0000 @@ -63,8 +63,9 @@ virtual bool is_virtual() const { return false; } // is_deferred: The decision whether to inline or not is deferred. virtual bool is_deferred() const { return false; } - // is_predicted: Uses an explicit check against a predicted type. - virtual bool is_predicted() const { return false; } + // is_predicated: Uses an explicit check (predicate). + virtual bool is_predicated() const { return false; } + virtual int predicates_count() const { return 0; } // is_trap: Does not return to the caller. (E.g., uncommon trap.) virtual bool is_trap() const { return false; } // does_virtual_dispatch: Should try inlining as normal method first. @@ -157,9 +158,9 @@ // Registry for intrinsics: static CallGenerator* for_intrinsic(ciMethod* m); static void register_intrinsic(ciMethod* m, CallGenerator* cg); - static CallGenerator* for_predicted_intrinsic(CallGenerator* intrinsic, - CallGenerator* cg); - virtual Node* generate_predicate(JVMState* jvms) { return NULL; }; + static CallGenerator* for_predicated_intrinsic(CallGenerator* intrinsic, + CallGenerator* cg); + virtual Node* generate_predicate(JVMState* jvms, int predicate) { return NULL; }; virtual void print_inlining_late(const char* msg) { ShouldNotReachHere(); } diff -r 46bbe04d1cad -r f5b4600d7368 src/share/vm/opto/doCall.cpp --- a/src/share/vm/opto/doCall.cpp Fri Apr 11 13:52:51 2014 +0200 +++ b/src/share/vm/opto/doCall.cpp Tue Aug 12 22:29:36 2014 +0000 @@ -115,12 +115,12 @@ if (allow_inline && allow_intrinsics) { CallGenerator* cg = find_intrinsic(callee, call_does_dispatch); if (cg != NULL) { - if (cg->is_predicted()) { + if (cg->is_predicated()) { // Code without intrinsic but, hopefully, inlined. CallGenerator* inline_cg = this->call_generator(callee, vtable_index, call_does_dispatch, jvms, allow_inline, prof_factor, speculative_receiver_type, false); if (inline_cg != NULL) { - cg = CallGenerator::for_predicted_intrinsic(cg, inline_cg); + cg = CallGenerator::for_predicated_intrinsic(cg, inline_cg); } } diff -r 46bbe04d1cad -r f5b4600d7368 src/share/vm/opto/escape.cpp --- a/src/share/vm/opto/escape.cpp Fri Apr 11 13:52:51 2014 +0200 +++ b/src/share/vm/opto/escape.cpp Tue Aug 12 22:29:36 2014 +0000 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2005, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -938,7 +938,13 @@ strcmp(call->as_CallLeaf()->_name, "aescrypt_encryptBlock") == 0 || strcmp(call->as_CallLeaf()->_name, "aescrypt_decryptBlock") == 0 || strcmp(call->as_CallLeaf()->_name, "cipherBlockChaining_encryptAESCrypt") == 0 || - strcmp(call->as_CallLeaf()->_name, "cipherBlockChaining_decryptAESCrypt") == 0) + strcmp(call->as_CallLeaf()->_name, "cipherBlockChaining_decryptAESCrypt") == 0 || + strcmp(call->as_CallLeaf()->_name, "sha1_implCompress") == 0 || + strcmp(call->as_CallLeaf()->_name, "sha1_implCompressMB") == 0 || + strcmp(call->as_CallLeaf()->_name, "sha256_implCompress") == 0 || + strcmp(call->as_CallLeaf()->_name, "sha256_implCompressMB") == 0 || + strcmp(call->as_CallLeaf()->_name, "sha512_implCompress") == 0 || + strcmp(call->as_CallLeaf()->_name, "sha512_implCompressMB") == 0) ))) { call->dump(); fatal(err_msg_res("EA unexpected CallLeaf %s", call->as_CallLeaf()->_name)); diff -r 46bbe04d1cad -r f5b4600d7368 src/share/vm/opto/graphKit.cpp --- a/src/share/vm/opto/graphKit.cpp Fri Apr 11 13:52:51 2014 +0200 +++ b/src/share/vm/opto/graphKit.cpp Tue Aug 12 22:29:36 2014 +0000 @@ -2435,23 +2435,24 @@ Node* new_slice = mms.memory2(); if (old_slice != new_slice) { PhiNode* phi; - if (new_slice->is_Phi() && new_slice->as_Phi()->region() == region) { - phi = new_slice->as_Phi(); - #ifdef ASSERT - if (old_slice->is_Phi() && old_slice->as_Phi()->region() == region) - old_slice = old_slice->in(new_path); - // Caller is responsible for ensuring that any pre-existing - // phis are already aware of old memory. - int old_path = (new_path > 1) ? 1 : 2; // choose old_path != new_path - assert(phi->in(old_path) == old_slice, "pre-existing phis OK"); - #endif - mms.set_memory(phi); + if (old_slice->is_Phi() && old_slice->as_Phi()->region() == region) { + if (mms.is_empty()) { + // clone base memory Phi's inputs for this memory slice + assert(old_slice == mms.base_memory(), "sanity"); + phi = PhiNode::make(region, NULL, Type::MEMORY, mms.adr_type(C)); + _gvn.set_type(phi, Type::MEMORY); + for (uint i = 1; i < phi->req(); i++) { + phi->init_req(i, old_slice->in(i)); + } + } else { + phi = old_slice->as_Phi(); // Phi was generated already + } } else { phi = PhiNode::make(region, old_slice, Type::MEMORY, mms.adr_type(C)); _gvn.set_type(phi, Type::MEMORY); - phi->set_req(new_path, new_slice); - mms.set_memory(_gvn.transform(phi)); // assume it is complete } + phi->set_req(new_path, new_slice); + mms.set_memory(phi); } } } diff -r 46bbe04d1cad -r f5b4600d7368 src/share/vm/opto/lcm.cpp --- a/src/share/vm/opto/lcm.cpp Fri Apr 11 13:52:51 2014 +0200 +++ b/src/share/vm/opto/lcm.cpp Tue Aug 12 22:29:36 2014 +0000 @@ -484,7 +484,9 @@ iop == Op_CreateEx || // Create-exception must start block iop == Op_CheckCastPP ) { - worklist.map(i,worklist.pop()); + // select the node n + // remove n from worklist and retain the order of remaining nodes + worklist.remove((uint)i); return n; } @@ -570,7 +572,9 @@ assert(idx >= 0, "index should be set"); Node *n = worklist[(uint)idx]; // Get the winner - worklist.map((uint)idx, worklist.pop()); // Compress worklist + // select the node n + // remove n from worklist and retain the order of remaining nodes + worklist.remove((uint)idx); return n; } diff -r 46bbe04d1cad -r f5b4600d7368 src/share/vm/opto/library_call.cpp --- a/src/share/vm/opto/library_call.cpp Fri Apr 11 13:52:51 2014 +0200 +++ b/src/share/vm/opto/library_call.cpp Tue Aug 12 22:29:36 2014 +0000 @@ -46,25 +46,28 @@ public: private: bool _is_virtual; - bool _is_predicted; bool _does_virtual_dispatch; + int8_t _predicates_count; // Intrinsic is predicated by several conditions + int8_t _last_predicate; // Last generated predicate vmIntrinsics::ID _intrinsic_id; public: - LibraryIntrinsic(ciMethod* m, bool is_virtual, bool is_predicted, bool does_virtual_dispatch, vmIntrinsics::ID id) + LibraryIntrinsic(ciMethod* m, bool is_virtual, int predicates_count, bool does_virtual_dispatch, vmIntrinsics::ID id) : InlineCallGenerator(m), _is_virtual(is_virtual), - _is_predicted(is_predicted), _does_virtual_dispatch(does_virtual_dispatch), + _predicates_count((int8_t)predicates_count), + _last_predicate((int8_t)-1), _intrinsic_id(id) { } virtual bool is_intrinsic() const { return true; } virtual bool is_virtual() const { return _is_virtual; } - virtual bool is_predicted() const { return _is_predicted; } + virtual bool is_predicated() const { return _predicates_count > 0; } + virtual int predicates_count() const { return _predicates_count; } virtual bool does_virtual_dispatch() const { return _does_virtual_dispatch; } virtual JVMState* generate(JVMState* jvms, Parse* parent_parser); - virtual Node* generate_predicate(JVMState* jvms); + virtual Node* generate_predicate(JVMState* jvms, int predicate); vmIntrinsics::ID intrinsic_id() const { return _intrinsic_id; } }; @@ -107,8 +110,8 @@ vmIntrinsics::ID intrinsic_id() const { return _intrinsic->intrinsic_id(); } ciMethod* callee() const { return _intrinsic->method(); } - bool try_to_inline(); - Node* try_to_predicate(); + bool try_to_inline(int predicate); + Node* try_to_predicate(int predicate); void push_result() { // Push the result onto the stack. @@ -307,6 +310,14 @@ Node* inline_cipherBlockChaining_AESCrypt_predicate(bool decrypting); Node* get_key_start_from_aescrypt_object(Node* aescrypt_object); Node* get_original_key_start_from_aescrypt_object(Node* aescrypt_object); + bool inline_sha_implCompress(vmIntrinsics::ID id); + bool inline_digestBase_implCompressMB(int predicate); + bool inline_sha_implCompressMB(Node* digestBaseObj, ciInstanceKlass* instklass_SHA, + bool long_state, address stubAddr, const char *stubName, + Node* src_start, Node* ofs, Node* limit); + Node* get_state_from_sha_object(Node *sha_object); + Node* get_state_from_sha5_object(Node *sha_object); + Node* inline_digestBase_implCompressMB_predicate(int predicate); bool inline_encodeISOArray(); bool inline_updateCRC32(); bool inline_updateBytesCRC32(); @@ -367,7 +378,7 @@ } } - bool is_predicted = false; + int predicates = 0; bool does_virtual_dispatch = false; switch (id) { @@ -508,7 +519,24 @@ case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt: if (!UseAESIntrinsics) return NULL; // these two require the predicated logic - is_predicted = true; + predicates = 1; + break; + + case vmIntrinsics::_sha_implCompress: + if (!UseSHA1Intrinsics) return NULL; + break; + + case vmIntrinsics::_sha2_implCompress: + if (!UseSHA256Intrinsics) return NULL; + break; + + case vmIntrinsics::_sha5_implCompress: + if (!UseSHA512Intrinsics) return NULL; + break; + + case vmIntrinsics::_digestBase_implCompressMB: + if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) return NULL; + predicates = 3; break; case vmIntrinsics::_updateCRC32: @@ -577,7 +605,7 @@ if (!InlineUnsafeOps) return NULL; } - return new LibraryIntrinsic(m, is_virtual, is_predicted, does_virtual_dispatch, (vmIntrinsics::ID) id); + return new LibraryIntrinsic(m, is_virtual, predicates, does_virtual_dispatch, (vmIntrinsics::ID) id); } //----------------------register_library_intrinsics----------------------- @@ -601,7 +629,7 @@ const int bci = kit.bci(); // Try to inline the intrinsic. - if (kit.try_to_inline()) { + if (kit.try_to_inline(_last_predicate)) { if (C->print_intrinsics() || C->print_inlining()) { C->print_inlining(callee, jvms->depth() - 1, bci, is_virtual() ? "(intrinsic, virtual)" : "(intrinsic)"); } @@ -634,12 +662,13 @@ return NULL; } -Node* LibraryIntrinsic::generate_predicate(JVMState* jvms) { +Node* LibraryIntrinsic::generate_predicate(JVMState* jvms, int predicate) { LibraryCallKit kit(jvms, this); Compile* C = kit.C; int nodes = C->unique(); + _last_predicate = predicate; #ifndef PRODUCT - assert(is_predicted(), "sanity"); + assert(is_predicated() && predicate < predicates_count(), "sanity"); if ((C->print_intrinsics() || C->print_inlining()) && Verbose) { char buf[1000]; const char* str = vmIntrinsics::short_name_as_C_string(intrinsic_id(), buf, sizeof(buf)); @@ -649,10 +678,10 @@ ciMethod* callee = kit.callee(); const int bci = kit.bci(); - Node* slow_ctl = kit.try_to_predicate(); + Node* slow_ctl = kit.try_to_predicate(predicate); if (!kit.failing()) { if (C->print_intrinsics() || C->print_inlining()) { - C->print_inlining(callee, jvms->depth() - 1, bci, is_virtual() ? "(intrinsic, virtual)" : "(intrinsic)"); + C->print_inlining(callee, jvms->depth() - 1, bci, is_virtual() ? "(intrinsic, virtual, predicate)" : "(intrinsic, predicate)"); } C->gather_intrinsic_statistics(intrinsic_id(), is_virtual(), Compile::_intrinsic_worked); if (C->log()) { @@ -681,7 +710,7 @@ return NULL; } -bool LibraryCallKit::try_to_inline() { +bool LibraryCallKit::try_to_inline(int predicate) { // Handle symbolic names for otherwise undistinguished boolean switches: const bool is_store = true; const bool is_native_ptr = true; @@ -875,6 +904,14 @@ case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt: return inline_cipherBlockChaining_AESCrypt(intrinsic_id()); + case vmIntrinsics::_sha_implCompress: + case vmIntrinsics::_sha2_implCompress: + case vmIntrinsics::_sha5_implCompress: + return inline_sha_implCompress(intrinsic_id()); + + case vmIntrinsics::_digestBase_implCompressMB: + return inline_digestBase_implCompressMB(predicate); + case vmIntrinsics::_encodeISOArray: return inline_encodeISOArray(); @@ -898,7 +935,7 @@ } } -Node* LibraryCallKit::try_to_predicate() { +Node* LibraryCallKit::try_to_predicate(int predicate) { if (!jvms()->has_method()) { // Root JVMState has a null method. assert(map()->memory()->Opcode() == Op_Parm, ""); @@ -912,6 +949,8 @@ return inline_cipherBlockChaining_AESCrypt_predicate(false); case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt: return inline_cipherBlockChaining_AESCrypt_predicate(true); + case vmIntrinsics::_digestBase_implCompressMB: + return inline_digestBase_implCompressMB_predicate(predicate); default: // If you get here, it may be that someone has added a new intrinsic @@ -5866,7 +5905,12 @@ BasicType bt = field->layout_type(); // Build the resultant type of the load - const Type *type = TypeOopPtr::make_from_klass(field_klass->as_klass()); + const Type *type; + if (bt == T_OBJECT) { + type = TypeOopPtr::make_from_klass(field_klass->as_klass()); + } else { + type = Type::get_const_basic_type(bt); + } // Build the load. Node* loadedField = make_load(NULL, adr, type, bt, adr_type, MemNode::unordered, is_vol); @@ -5996,7 +6040,7 @@ assert(tinst != NULL, "CBC obj is null"); assert(tinst->klass()->is_loaded(), "CBC obj is not loaded"); ciKlass* klass_AESCrypt = tinst->klass()->as_instance_klass()->find_klass(ciSymbol::make("com/sun/crypto/provider/AESCrypt")); - if (!klass_AESCrypt->is_loaded()) return false; + assert(klass_AESCrypt->is_loaded(), "predicate checks that this class is loaded"); ciInstanceKlass* instklass_AESCrypt = klass_AESCrypt->as_instance_klass(); const TypeKlassPtr* aklass = TypeKlassPtr::make(instklass_AESCrypt); @@ -6071,11 +6115,8 @@ // note cipher==plain is more conservative than the original java code but that's OK // Node* LibraryCallKit::inline_cipherBlockChaining_AESCrypt_predicate(bool decrypting) { - // First, check receiver for NULL since it is virtual method. + // The receiver was checked for NULL already. Node* objCBC = argument(0); - objCBC = null_check(objCBC); - - if (stopped()) return NULL; // Always NULL // Load embeddedCipher field of CipherBlockChaining object. Node* embeddedCipherObj = load_field_from_object(objCBC, "embeddedCipher", "Lcom/sun/crypto/provider/SymmetricCipher;", /*is_exact*/ false); @@ -6122,3 +6163,258 @@ record_for_igvn(region); return _gvn.transform(region); } + +//------------------------------inline_sha_implCompress----------------------- +// +// Calculate SHA (i.e., SHA-1) for single-block byte[] array. +// void com.sun.security.provider.SHA.implCompress(byte[] buf, int ofs) +// +// Calculate SHA2 (i.e., SHA-244 or SHA-256) for single-block byte[] array. +// void com.sun.security.provider.SHA2.implCompress(byte[] buf, int ofs) +// +// Calculate SHA5 (i.e., SHA-384 or SHA-512) for single-block byte[] array. +// void com.sun.security.provider.SHA5.implCompress(byte[] buf, int ofs) +// +bool LibraryCallKit::inline_sha_implCompress(vmIntrinsics::ID id) { + assert(callee()->signature()->size() == 2, "sha_implCompress has 2 parameters"); + + Node* sha_obj = argument(0); + Node* src = argument(1); // type oop + Node* ofs = argument(2); // type int + + const Type* src_type = src->Value(&_gvn); + const TypeAryPtr* top_src = src_type->isa_aryptr(); + if (top_src == NULL || top_src->klass() == NULL) { + // failed array check + return false; + } + // Figure out the size and type of the elements we will be copying. + BasicType src_elem = src_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type(); + if (src_elem != T_BYTE) { + return false; + } + // 'src_start' points to src array + offset + Node* src_start = array_element_address(src, ofs, src_elem); + Node* state = NULL; + address stubAddr; + const char *stubName; + + switch(id) { + case vmIntrinsics::_sha_implCompress: + assert(UseSHA1Intrinsics, "need SHA1 instruction support"); + state = get_state_from_sha_object(sha_obj); + stubAddr = StubRoutines::sha1_implCompress(); + stubName = "sha1_implCompress"; + break; + case vmIntrinsics::_sha2_implCompress: + assert(UseSHA256Intrinsics, "need SHA256 instruction support"); + state = get_state_from_sha_object(sha_obj); + stubAddr = StubRoutines::sha256_implCompress(); + stubName = "sha256_implCompress"; + break; + case vmIntrinsics::_sha5_implCompress: + assert(UseSHA512Intrinsics, "need SHA512 instruction support"); + state = get_state_from_sha5_object(sha_obj); + stubAddr = StubRoutines::sha512_implCompress(); + stubName = "sha512_implCompress"; + break; + default: + fatal_unexpected_iid(id); + return false; + } + if (state == NULL) return false; + + // Call the stub. + Node* call = make_runtime_call(RC_LEAF|RC_NO_FP, OptoRuntime::sha_implCompress_Type(), + stubAddr, stubName, TypePtr::BOTTOM, + src_start, state); + + return true; +} + +//------------------------------inline_digestBase_implCompressMB----------------------- +// +// Calculate SHA/SHA2/SHA5 for multi-block byte[] array. +// int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit) +// +bool LibraryCallKit::inline_digestBase_implCompressMB(int predicate) { + assert(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics, + "need SHA1/SHA256/SHA512 instruction support"); + assert((uint)predicate < 3, "sanity"); + assert(callee()->signature()->size() == 3, "digestBase_implCompressMB has 3 parameters"); + + Node* digestBase_obj = argument(0); // The receiver was checked for NULL already. + Node* src = argument(1); // byte[] array + Node* ofs = argument(2); // type int + Node* limit = argument(3); // type int + + const Type* src_type = src->Value(&_gvn); + const TypeAryPtr* top_src = src_type->isa_aryptr(); + if (top_src == NULL || top_src->klass() == NULL) { + // failed array check + return false; + } + // Figure out the size and type of the elements we will be copying. + BasicType src_elem = src_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type(); + if (src_elem != T_BYTE) { + return false; + } + // 'src_start' points to src array + offset + Node* src_start = array_element_address(src, ofs, src_elem); + + const char* klass_SHA_name = NULL; + const char* stub_name = NULL; + address stub_addr = NULL; + bool long_state = false; + + switch (predicate) { + case 0: + if (UseSHA1Intrinsics) { + klass_SHA_name = "sun/security/provider/SHA"; + stub_name = "sha1_implCompressMB"; + stub_addr = StubRoutines::sha1_implCompressMB(); + } + break; + case 1: + if (UseSHA256Intrinsics) { + klass_SHA_name = "sun/security/provider/SHA2"; + stub_name = "sha256_implCompressMB"; + stub_addr = StubRoutines::sha256_implCompressMB(); + } + break; + case 2: + if (UseSHA512Intrinsics) { + klass_SHA_name = "sun/security/provider/SHA5"; + stub_name = "sha512_implCompressMB"; + stub_addr = StubRoutines::sha512_implCompressMB(); + long_state = true; + } + break; + default: + fatal(err_msg_res("unknown SHA intrinsic predicate: %d", predicate)); + } + if (klass_SHA_name != NULL) { + // get DigestBase klass to lookup for SHA klass + const TypeInstPtr* tinst = _gvn.type(digestBase_obj)->isa_instptr(); + assert(tinst != NULL, "digestBase_obj is not instance???"); + assert(tinst->klass()->is_loaded(), "DigestBase is not loaded"); + + ciKlass* klass_SHA = tinst->klass()->as_instance_klass()->find_klass(ciSymbol::make(klass_SHA_name)); + assert(klass_SHA->is_loaded(), "predicate checks that this class is loaded"); + ciInstanceKlass* instklass_SHA = klass_SHA->as_instance_klass(); + return inline_sha_implCompressMB(digestBase_obj, instklass_SHA, long_state, stub_addr, stub_name, src_start, ofs, limit); + } + return false; +} +//------------------------------inline_sha_implCompressMB----------------------- +bool LibraryCallKit::inline_sha_implCompressMB(Node* digestBase_obj, ciInstanceKlass* instklass_SHA, + bool long_state, address stubAddr, const char *stubName, + Node* src_start, Node* ofs, Node* limit) { + const TypeKlassPtr* aklass = TypeKlassPtr::make(instklass_SHA); + const TypeOopPtr* xtype = aklass->as_instance_type(); + Node* sha_obj = new (C) CheckCastPPNode(control(), digestBase_obj, xtype); + sha_obj = _gvn.transform(sha_obj); + + Node* state; + if (long_state) { + state = get_state_from_sha5_object(sha_obj); + } else { + state = get_state_from_sha_object(sha_obj); + } + if (state == NULL) return false; + + // Call the stub. + Node* call = make_runtime_call(RC_LEAF|RC_NO_FP, + OptoRuntime::digestBase_implCompressMB_Type(), + stubAddr, stubName, TypePtr::BOTTOM, + src_start, state, ofs, limit); + // return ofs (int) + Node* result = _gvn.transform(new (C) ProjNode(call, TypeFunc::Parms)); + set_result(result); + + return true; +} + +//------------------------------get_state_from_sha_object----------------------- +Node * LibraryCallKit::get_state_from_sha_object(Node *sha_object) { + Node* sha_state = load_field_from_object(sha_object, "state", "[I", /*is_exact*/ false); + assert (sha_state != NULL, "wrong version of sun.security.provider.SHA/SHA2"); + if (sha_state == NULL) return (Node *) NULL; + + // now have the array, need to get the start address of the state array + Node* state = array_element_address(sha_state, intcon(0), T_INT); + return state; +} + +//------------------------------get_state_from_sha5_object----------------------- +Node * LibraryCallKit::get_state_from_sha5_object(Node *sha_object) { + Node* sha_state = load_field_from_object(sha_object, "state", "[J", /*is_exact*/ false); + assert (sha_state != NULL, "wrong version of sun.security.provider.SHA5"); + if (sha_state == NULL) return (Node *) NULL; + + // now have the array, need to get the start address of the state array + Node* state = array_element_address(sha_state, intcon(0), T_LONG); + return state; +} + +//----------------------------inline_digestBase_implCompressMB_predicate---------------------------- +// Return node representing slow path of predicate check. +// the pseudo code we want to emulate with this predicate is: +// if (digestBaseObj instanceof SHA/SHA2/SHA5) do_intrinsic, else do_javapath +// +Node* LibraryCallKit::inline_digestBase_implCompressMB_predicate(int predicate) { + assert(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics, + "need SHA1/SHA256/SHA512 instruction support"); + assert((uint)predicate < 3, "sanity"); + + // The receiver was checked for NULL already. + Node* digestBaseObj = argument(0); + + // get DigestBase klass for instanceOf check + const TypeInstPtr* tinst = _gvn.type(digestBaseObj)->isa_instptr(); + assert(tinst != NULL, "digestBaseObj is null"); + assert(tinst->klass()->is_loaded(), "DigestBase is not loaded"); + + const char* klass_SHA_name = NULL; + switch (predicate) { + case 0: + if (UseSHA1Intrinsics) { + // we want to do an instanceof comparison against the SHA class + klass_SHA_name = "sun/security/provider/SHA"; + } + break; + case 1: + if (UseSHA256Intrinsics) { + // we want to do an instanceof comparison against the SHA2 class + klass_SHA_name = "sun/security/provider/SHA2"; + } + break; + case 2: + if (UseSHA512Intrinsics) { + // we want to do an instanceof comparison against the SHA5 class + klass_SHA_name = "sun/security/provider/SHA5"; + } + break; + default: + fatal(err_msg_res("unknown SHA intrinsic predicate: %d", predicate)); + } + + ciKlass* klass_SHA = NULL; + if (klass_SHA_name != NULL) { + klass_SHA = tinst->klass()->as_instance_klass()->find_klass(ciSymbol::make(klass_SHA_name)); + } + if ((klass_SHA == NULL) || !klass_SHA->is_loaded()) { + // if none of SHA/SHA2/SHA5 is loaded, we never take the intrinsic fast path + Node* ctrl = control(); + set_control(top()); // no intrinsic path + return ctrl; + } + ciInstanceKlass* instklass_SHA = klass_SHA->as_instance_klass(); + + Node* instofSHA = gen_instanceof(digestBaseObj, makecon(TypeKlassPtr::make(instklass_SHA))); + Node* cmp_instof = _gvn.transform(new (C) CmpINode(instofSHA, intcon(1))); + Node* bool_instof = _gvn.transform(new (C) BoolNode(cmp_instof, BoolTest::ne)); + Node* instof_false = generate_guard(bool_instof, NULL, PROB_MIN); + + return instof_false; // even if it is NULL +} diff -r 46bbe04d1cad -r f5b4600d7368 src/share/vm/opto/runtime.cpp --- a/src/share/vm/opto/runtime.cpp Fri Apr 11 13:52:51 2014 +0200 +++ b/src/share/vm/opto/runtime.cpp Tue Aug 12 22:29:36 2014 +0000 @@ -898,6 +898,50 @@ return TypeFunc::make(domain, range); } +/* + * void implCompress(byte[] buf, int ofs) + */ +const TypeFunc* OptoRuntime::sha_implCompress_Type() { + // create input type (domain) + int num_args = 2; + int argcnt = num_args; + const Type** fields = TypeTuple::fields(argcnt); + int argp = TypeFunc::Parms; + fields[argp++] = TypePtr::NOTNULL; // buf + fields[argp++] = TypePtr::NOTNULL; // state + assert(argp == TypeFunc::Parms+argcnt, "correct decoding"); + const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields); + + // no result type needed + fields = TypeTuple::fields(1); + fields[TypeFunc::Parms+0] = NULL; // void + const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields); + return TypeFunc::make(domain, range); +} + +/* + * int implCompressMultiBlock(byte[] b, int ofs, int limit) + */ +const TypeFunc* OptoRuntime::digestBase_implCompressMB_Type() { + // create input type (domain) + int num_args = 4; + int argcnt = num_args; + const Type** fields = TypeTuple::fields(argcnt); + int argp = TypeFunc::Parms; + fields[argp++] = TypePtr::NOTNULL; // buf + fields[argp++] = TypePtr::NOTNULL; // state + fields[argp++] = TypeInt::INT; // ofs + fields[argp++] = TypeInt::INT; // limit + assert(argp == TypeFunc::Parms+argcnt, "correct decoding"); + const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields); + + // returning ofs (int) + fields = TypeTuple::fields(1); + fields[TypeFunc::Parms+0] = TypeInt::INT; // ofs + const TypeTuple* range = TypeTuple::make(TypeFunc::Parms+1, fields); + return TypeFunc::make(domain, range); +} + //------------- Interpreter state access for on stack replacement const TypeFunc* OptoRuntime::osr_end_Type() { // create input type (domain) diff -r 46bbe04d1cad -r f5b4600d7368 src/share/vm/opto/runtime.hpp --- a/src/share/vm/opto/runtime.hpp Fri Apr 11 13:52:51 2014 +0200 +++ b/src/share/vm/opto/runtime.hpp Tue Aug 12 22:29:36 2014 +0000 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1998, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -300,6 +300,9 @@ static const TypeFunc* aescrypt_block_Type(); static const TypeFunc* cipherBlockChaining_aescrypt_Type(); + static const TypeFunc* sha_implCompress_Type(); + static const TypeFunc* digestBase_implCompressMB_Type(); + static const TypeFunc* updateBytesCRC32_Type(); // leaf on stack replacement interpreter accessor types diff -r 46bbe04d1cad -r f5b4600d7368 src/share/vm/opto/superword.cpp --- a/src/share/vm/opto/superword.cpp Fri Apr 11 13:52:51 2014 +0200 +++ b/src/share/vm/opto/superword.cpp Tue Aug 12 22:29:36 2014 +0000 @@ -1374,6 +1374,20 @@ if (n->is_Load()) { Node* ctl = n->in(MemNode::Control); Node* mem = first->in(MemNode::Memory); + SWPointer p1(n->as_Mem(), this); + // Identify the memory dependency for the new loadVector node by + // walking up through memory chain. + // This is done to give flexibility to the new loadVector node so that + // it can move above independent storeVector nodes. + while (mem->is_StoreVector()) { + SWPointer p2(mem->as_Mem(), this); + int cmp = p1.cmp(p2); + if (SWPointer::not_equal(cmp) || !SWPointer::comparable(cmp)) { + mem = mem->in(MemNode::Memory); + } else { + break; // dependent memory + } + } Node* adr = low_adr->in(MemNode::Address); const TypePtr* atyp = n->adr_type(); vn = LoadVectorNode::make(C, opc, ctl, mem, adr, atyp, vlen, velt_basic_type(n)); diff -r 46bbe04d1cad -r f5b4600d7368 src/share/vm/runtime/globals.hpp --- a/src/share/vm/runtime/globals.hpp Fri Apr 11 13:52:51 2014 +0200 +++ b/src/share/vm/runtime/globals.hpp Tue Aug 12 22:29:36 2014 +0000 @@ -597,6 +597,9 @@ product(bool, UseAES, false, \ "Control whether AES instructions can be used on x86/x64") \ \ + product(bool, UseSHA, false, \ + "Control whether SHA instructions can be used on SPARC") \ + \ product(uintx, LargePageSizeInBytes, 0, \ "Large page size (0 to let VM choose the page size)") \ \ @@ -703,6 +706,15 @@ product(bool, UseAESIntrinsics, false, \ "Use intrinsics for AES versions of crypto") \ \ + product(bool, UseSHA1Intrinsics, false, \ + "Use intrinsics for SHA-1 crypto hash function") \ + \ + product(bool, UseSHA256Intrinsics, false, \ + "Use intrinsics for SHA-224 and SHA-256 crypto hash functions") \ + \ + product(bool, UseSHA512Intrinsics, false, \ + "Use intrinsics for SHA-384 and SHA-512 crypto hash functions") \ + \ product(bool, UseCRC32Intrinsics, false, \ "use intrinsics for java.util.zip.CRC32") \ \ diff -r 46bbe04d1cad -r f5b4600d7368 src/share/vm/runtime/stubRoutines.cpp --- a/src/share/vm/runtime/stubRoutines.cpp Fri Apr 11 13:52:51 2014 +0200 +++ b/src/share/vm/runtime/stubRoutines.cpp Tue Aug 12 22:29:36 2014 +0000 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -125,6 +125,13 @@ address StubRoutines::_cipherBlockChaining_encryptAESCrypt = NULL; address StubRoutines::_cipherBlockChaining_decryptAESCrypt = NULL; +address StubRoutines::_sha1_implCompress = NULL; +address StubRoutines::_sha1_implCompressMB = NULL; +address StubRoutines::_sha256_implCompress = NULL; +address StubRoutines::_sha256_implCompressMB = NULL; +address StubRoutines::_sha512_implCompress = NULL; +address StubRoutines::_sha512_implCompressMB = NULL; + address StubRoutines::_updateBytesCRC32 = NULL; address StubRoutines::_crc_table_adr = NULL; diff -r 46bbe04d1cad -r f5b4600d7368 src/share/vm/runtime/stubRoutines.hpp --- a/src/share/vm/runtime/stubRoutines.hpp Fri Apr 11 13:52:51 2014 +0200 +++ b/src/share/vm/runtime/stubRoutines.hpp Tue Aug 12 22:29:36 2014 +0000 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -207,6 +207,13 @@ static address _cipherBlockChaining_encryptAESCrypt; static address _cipherBlockChaining_decryptAESCrypt; + static address _sha1_implCompress; + static address _sha1_implCompressMB; + static address _sha256_implCompress; + static address _sha256_implCompressMB; + static address _sha512_implCompress; + static address _sha512_implCompressMB; + static address _updateBytesCRC32; static address _crc_table_adr; @@ -356,6 +363,13 @@ static address cipherBlockChaining_encryptAESCrypt() { return _cipherBlockChaining_encryptAESCrypt; } static address cipherBlockChaining_decryptAESCrypt() { return _cipherBlockChaining_decryptAESCrypt; } + static address sha1_implCompress() { return _sha1_implCompress; } + static address sha1_implCompressMB() { return _sha1_implCompressMB; } + static address sha256_implCompress() { return _sha256_implCompress; } + static address sha256_implCompressMB() { return _sha256_implCompressMB; } + static address sha512_implCompress() { return _sha512_implCompress; } + static address sha512_implCompressMB() { return _sha512_implCompressMB; } + static address updateBytesCRC32() { return _updateBytesCRC32; } static address crc_table_addr() { return _crc_table_adr; } diff -r 46bbe04d1cad -r f5b4600d7368 test/compiler/intrinsics/sha/TestSHA.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/compiler/intrinsics/sha/TestSHA.java Tue Aug 12 22:29:36 2014 +0000 @@ -0,0 +1,141 @@ +/* + * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/** + * @test + * @bug 8035968 + * @summary C2 support for SHA on SPARC + * + * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=SHA-1 TestSHA + * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=SHA-224 TestSHA + * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=SHA-256 TestSHA + * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=SHA-384 TestSHA + * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=SHA-512 TestSHA + * + * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=SHA-1 -Doffset=1 TestSHA + * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=SHA-224 -Doffset=1 TestSHA + * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=SHA-256 -Doffset=1 TestSHA + * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=SHA-384 -Doffset=1 TestSHA + * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=SHA-512 -Doffset=1 TestSHA + * + * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=SHA-1 -Dalgorithm2=SHA-256 TestSHA + * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=SHA-1 -Dalgorithm2=SHA-512 TestSHA + * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=SHA-256 -Dalgorithm2=SHA-512 TestSHA + * + * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=SHA-1 -Dalgorithm2=MD5 TestSHA + * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=MD5 -Dalgorithm2=SHA-1 TestSHA + */ + +import java.security.MessageDigest; +import java.util.Arrays; + +public class TestSHA { + private static final int HASH_LEN = 64; /* up to 512-bit */ + private static final int ALIGN = 8; /* for different data alignments */ + + public static void main(String[] args) throws Exception { + String provider = System.getProperty("provider", "SUN"); + String algorithm = System.getProperty("algorithm", "SHA-1"); + String algorithm2 = System.getProperty("algorithm2", ""); + int msgSize = Integer.getInteger("msgSize", 1024); + int offset = Integer.getInteger("offset", 0) % ALIGN; + int iters = (args.length > 0 ? Integer.valueOf(args[0]) : 100000); + int warmupIters = (args.length > 1 ? Integer.valueOf(args[1]) : 20000); + + testSHA(provider, algorithm, msgSize, offset, iters, warmupIters); + + if (algorithm2.equals("") == false) { + testSHA(provider, algorithm2, msgSize, offset, iters, warmupIters); + } + } + + static void testSHA(String provider, String algorithm, int msgSize, + int offset, int iters, int warmupIters) throws Exception { + System.out.println("provider = " + provider); + System.out.println("algorithm = " + algorithm); + System.out.println("msgSize = " + msgSize + " bytes"); + System.out.println("offset = " + offset); + System.out.println("iters = " + iters); + + byte[] expectedHash = new byte[HASH_LEN]; + byte[] hash = new byte[HASH_LEN]; + byte[] data = new byte[msgSize + offset]; + for (int i = 0; i < (msgSize + offset); i++) { + data[i] = (byte)(i & 0xff); + } + + try { + MessageDigest sha = MessageDigest.getInstance(algorithm, provider); + + /* do once, which doesn't use intrinsics */ + sha.reset(); + sha.update(data, offset, msgSize); + expectedHash = sha.digest(); + + /* warm up */ + for (int i = 0; i < warmupIters; i++) { + sha.reset(); + sha.update(data, offset, msgSize); + hash = sha.digest(); + } + + /* check result */ + if (Arrays.equals(hash, expectedHash) == false) { + System.out.println("TestSHA Error: "); + showArray(expectedHash, "expectedHash"); + showArray(hash, "computedHash"); + //System.exit(1); + throw new Exception("TestSHA Error"); + } else { + showArray(hash, "hash"); + } + + /* measure performance */ + long start = System.nanoTime(); + for (int i = 0; i < iters; i++) { + sha.reset(); + sha.update(data, offset, msgSize); + hash = sha.digest(); + } + long end = System.nanoTime(); + double total = (double)(end - start)/1e9; /* in seconds */ + double thruput = (double)msgSize*iters/1e6/total; /* in MB/s */ + System.out.println("TestSHA runtime = " + total + " seconds"); + System.out.println("TestSHA throughput = " + thruput + " MB/s"); + System.out.println(); + } catch (Exception e) { + System.out.println("Exception: " + e); + //System.exit(1); + throw new Exception(e); + } + } + + static void showArray(byte b[], String name) { + System.out.format("%s [%d]: ", name, b.length); + for (int i = 0; i < Math.min(b.length, HASH_LEN); i++) { + System.out.format("%02x ", b[i] & 0xff); + } + System.out.println(); + } +}