# HG changeset patch # User kevinw # Date 1390211807 0 # Node ID ce3b1e29425a90d21f3457424d5670ae06bb207c # Parent baa7d4400c6209468fc2e32a311c3de74ef5f5a9# Parent 8254553994b923fd905e39885f3731861c140114 Merge diff -r baa7d4400c62 -r ce3b1e29425a src/cpu/sparc/vm/assembler_sparc.hpp --- a/src/cpu/sparc/vm/assembler_sparc.hpp Fri Jan 17 18:39:22 2014 +0000 +++ b/src/cpu/sparc/vm/assembler_sparc.hpp Mon Jan 20 09:56:47 2014 +0000 @@ -88,6 +88,7 @@ orncc_op3 = 0x16, xnorcc_op3 = 0x17, addccc_op3 = 0x18, + aes4_op3 = 0x19, umulcc_op3 = 0x1a, smulcc_op3 = 0x1b, subccc_op3 = 0x1c, @@ -121,6 +122,8 @@ fpop1_op3 = 0x34, fpop2_op3 = 0x35, impdep1_op3 = 0x36, + aes3_op3 = 0x36, + flog3_op3 = 0x36, impdep2_op3 = 0x37, jmpl_op3 = 0x38, rett_op3 = 0x39, @@ -172,41 +175,56 @@ enum opfs { // selected opfs - fmovs_opf = 0x01, - fmovd_opf = 0x02, + fmovs_opf = 0x01, + fmovd_opf = 0x02, - fnegs_opf = 0x05, - fnegd_opf = 0x06, + fnegs_opf = 0x05, + fnegd_opf = 0x06, - fadds_opf = 0x41, - faddd_opf = 0x42, - fsubs_opf = 0x45, - fsubd_opf = 0x46, + fadds_opf = 0x41, + faddd_opf = 0x42, + fsubs_opf = 0x45, + fsubd_opf = 0x46, - fmuls_opf = 0x49, - fmuld_opf = 0x4a, - fdivs_opf = 0x4d, - fdivd_opf = 0x4e, + fmuls_opf = 0x49, + fmuld_opf = 0x4a, + fdivs_opf = 0x4d, + fdivd_opf = 0x4e, + + fcmps_opf = 0x51, + fcmpd_opf = 0x52, - fcmps_opf = 0x51, - fcmpd_opf = 0x52, + fstox_opf = 0x81, + fdtox_opf = 0x82, + fxtos_opf = 0x84, + fxtod_opf = 0x88, + fitos_opf = 0xc4, + fdtos_opf = 0xc6, + fitod_opf = 0xc8, + fstod_opf = 0xc9, + fstoi_opf = 0xd1, + fdtoi_opf = 0xd2, - fstox_opf = 0x81, - fdtox_opf = 0x82, - fxtos_opf = 0x84, - fxtod_opf = 0x88, - fitos_opf = 0xc4, - fdtos_opf = 0xc6, - fitod_opf = 0xc8, - fstod_opf = 0xc9, - fstoi_opf = 0xd1, - fdtoi_opf = 0xd2, + mdtox_opf = 0x110, + mstouw_opf = 0x111, + mstosw_opf = 0x113, + mxtod_opf = 0x118, + mwtos_opf = 0x119, + + aes_kexpand0_opf = 0x130, + aes_kexpand2_opf = 0x131 + }; - mdtox_opf = 0x110, - mstouw_opf = 0x111, - mstosw_opf = 0x113, - mxtod_opf = 0x118, - mwtos_opf = 0x119 + enum op5s { + aes_eround01_op5 = 0x00, + aes_eround23_op5 = 0x01, + aes_dround01_op5 = 0x02, + aes_dround23_op5 = 0x03, + aes_eround01_l_op5 = 0x04, + aes_eround23_l_op5 = 0x05, + aes_dround01_l_op5 = 0x06, + aes_dround23_l_op5 = 0x07, + aes_kexpand1_op5 = 0x08 }; enum RCondition { rc_z = 1, rc_lez = 2, rc_lz = 3, rc_nz = 5, rc_gz = 6, rc_gez = 7, rc_last = rc_gez }; @@ -427,6 +445,7 @@ static int immed( bool i) { return u_field(i ? 1 : 0, 13, 13); } static int opf_low6( int w) { return u_field(w, 10, 5); } static int opf_low5( int w) { return u_field(w, 9, 5); } + static int op5( int x) { return u_field(x, 8, 5); } static int trapcc( CC cc) { return u_field(cc, 12, 11); } static int sx( int i) { return u_field(i, 12, 12); } // shift x=1 means 64-bit static int opf( int x) { return u_field(x, 13, 5); } @@ -451,6 +470,7 @@ static int fd( FloatRegister r, FloatRegisterImpl::Width fwa) { return u_field(r->encoding(fwa), 29, 25); }; static int fs1(FloatRegister r, FloatRegisterImpl::Width fwa) { return u_field(r->encoding(fwa), 18, 14); }; static int fs2(FloatRegister r, FloatRegisterImpl::Width fwa) { return u_field(r->encoding(fwa), 4, 0); }; + static int fs3(FloatRegister r, FloatRegisterImpl::Width fwa) { return u_field(r->encoding(fwa), 13, 9); }; // some float instructions use this encoding on the op3 field static int alt_op3(int op, FloatRegisterImpl::Width w) { @@ -559,6 +579,12 @@ return x & ((1 << 10) - 1); } + // AES crypto instructions supported only on certain processors + static void aes_only() { assert( VM_Version::has_aes(), "This instruction only works on SPARC with AES instructions support"); } + + // instruction only in VIS1 + static void vis1_only() { assert( VM_Version::has_vis1(), "This instruction only works on SPARC with VIS1"); } + // instruction only in VIS3 static void vis3_only() { assert( VM_Version::has_vis3(), "This instruction only works on SPARC with VIS3"); } @@ -682,6 +708,24 @@ void addccc( Register s1, int simm13a, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(addc_op3 | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); } + // 4-operand AES instructions + + void aes_eround01( FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes4_op3) | fs1(s1, FloatRegisterImpl::D) | fs3(s3, FloatRegisterImpl::D) | op5(aes_eround01_op5) | fs2(s2, FloatRegisterImpl::D) ); } + void aes_eround23( FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes4_op3) | fs1(s1, FloatRegisterImpl::D) | fs3(s3, FloatRegisterImpl::D) | op5(aes_eround23_op5) | fs2(s2, FloatRegisterImpl::D) ); } + void aes_dround01( FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes4_op3) | fs1(s1, FloatRegisterImpl::D) | fs3(s3, FloatRegisterImpl::D) | op5(aes_dround01_op5) | fs2(s2, FloatRegisterImpl::D) ); } + void aes_dround23( FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes4_op3) | fs1(s1, FloatRegisterImpl::D) | fs3(s3, FloatRegisterImpl::D) | op5(aes_dround23_op5) | fs2(s2, FloatRegisterImpl::D) ); } + void aes_eround01_l( FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes4_op3) | fs1(s1, FloatRegisterImpl::D) | fs3(s3, FloatRegisterImpl::D) | op5(aes_eround01_l_op5) | fs2(s2, FloatRegisterImpl::D) ); } + void aes_eround23_l( FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes4_op3) | fs1(s1, FloatRegisterImpl::D) | fs3(s3, FloatRegisterImpl::D) | op5(aes_eround23_l_op5) | fs2(s2, FloatRegisterImpl::D) ); } + void aes_dround01_l( FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes4_op3) | fs1(s1, FloatRegisterImpl::D) | fs3(s3, FloatRegisterImpl::D) | op5(aes_dround01_l_op5) | fs2(s2, FloatRegisterImpl::D) ); } + void aes_dround23_l( FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes4_op3) | fs1(s1, FloatRegisterImpl::D) | fs3(s3, FloatRegisterImpl::D) | op5(aes_dround23_l_op5) | fs2(s2, FloatRegisterImpl::D) ); } + void aes_kexpand1( FloatRegister s1, FloatRegister s2, int imm5a, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes4_op3) | fs1(s1, FloatRegisterImpl::D) | u_field(imm5a, 13, 9) | op5(aes_kexpand1_op5) | fs2(s2, FloatRegisterImpl::D) ); } + + + // 3-operand AES instructions + + void aes_kexpand0( FloatRegister s1, FloatRegister s2, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes3_op3) | fs1(s1, FloatRegisterImpl::D) | opf(aes_kexpand0_opf) | fs2(s2, FloatRegisterImpl::D) ); } + void aes_kexpand2( FloatRegister s1, FloatRegister s2, FloatRegister d ) { aes_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(aes3_op3) | fs1(s1, FloatRegisterImpl::D) | opf(aes_kexpand2_opf) | fs2(s2, FloatRegisterImpl::D) ); } + // pp 136 inline void bpr(RCondition c, bool a, Predict p, Register s1, address d, relocInfo::relocType rt = relocInfo::none); @@ -784,6 +828,10 @@ void fmul( FloatRegisterImpl::Width sw, FloatRegisterImpl::Width dw, FloatRegister s1, FloatRegister s2, FloatRegister d ) { emit_int32( op(arith_op) | fd(d, dw) | op3(fpop1_op3) | fs1(s1, sw) | opf(0x60 + sw + dw*4) | fs2(s2, sw)); } void fdiv( FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister d ) { emit_int32( op(arith_op) | fd(d, w) | op3(fpop1_op3) | fs1(s1, w) | opf(0x4c + w) | fs2(s2, w)); } + // FXORs/FXORd instructions + + void fxor( FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister d ) { vis1_only(); emit_int32( op(arith_op) | fd(d, w) | op3(flog3_op3) | fs1(s1, w) | opf(0x6E - w) | fs2(s2, w)); } + // pp 164 void fsqrt( FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d ) { emit_int32( op(arith_op) | fd(d, w) | op3(fpop1_op3) | opf(0x28 + w) | fs2(s, w)); } diff -r baa7d4400c62 -r ce3b1e29425a src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp --- a/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp Fri Jan 17 18:39:22 2014 +0000 +++ b/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp Mon Jan 20 09:56:47 2014 +0000 @@ -1315,7 +1315,7 @@ } Address LIR_Assembler::as_Address(LIR_Address* addr) { - Register reg = addr->base()->as_register(); + Register reg = addr->base()->as_pointer_register(); LIR_Opr index = addr->index(); if (index->is_illegal()) { return Address(reg, addr->disp()); @@ -3101,7 +3101,145 @@ } void LIR_Assembler::emit_profile_type(LIR_OpProfileType* op) { - fatal("Type profiling not implemented on this platform"); + Register obj = op->obj()->as_register(); + Register tmp1 = op->tmp()->as_pointer_register(); + Register tmp2 = G1; + Address mdo_addr = as_Address(op->mdp()->as_address_ptr()); + ciKlass* exact_klass = op->exact_klass(); + intptr_t current_klass = op->current_klass(); + bool not_null = op->not_null(); + bool no_conflict = op->no_conflict(); + + Label update, next, none; + + bool do_null = !not_null; + bool exact_klass_set = exact_klass != NULL && ciTypeEntries::valid_ciklass(current_klass) == exact_klass; + bool do_update = !TypeEntries::is_type_unknown(current_klass) && !exact_klass_set; + + assert(do_null || do_update, "why are we here?"); + assert(!TypeEntries::was_null_seen(current_klass) || do_update, "why are we here?"); + + __ verify_oop(obj); + + if (tmp1 != obj) { + __ mov(obj, tmp1); + } + if (do_null) { + __ br_notnull_short(tmp1, Assembler::pt, update); + if (!TypeEntries::was_null_seen(current_klass)) { + __ ld_ptr(mdo_addr, tmp1); + __ or3(tmp1, TypeEntries::null_seen, tmp1); + __ st_ptr(tmp1, mdo_addr); + } + if (do_update) { + __ ba(next); + __ delayed()->nop(); + } +#ifdef ASSERT + } else { + __ br_notnull_short(tmp1, Assembler::pt, update); + __ stop("unexpect null obj"); +#endif + } + + __ bind(update); + + if (do_update) { +#ifdef ASSERT + if (exact_klass != NULL) { + Label ok; + __ load_klass(tmp1, tmp1); + metadata2reg(exact_klass->constant_encoding(), tmp2); + __ cmp_and_br_short(tmp1, tmp2, Assembler::equal, Assembler::pt, ok); + __ stop("exact klass and actual klass differ"); + __ bind(ok); + } +#endif + + Label do_update; + __ ld_ptr(mdo_addr, tmp2); + + if (!no_conflict) { + if (exact_klass == NULL || TypeEntries::is_type_none(current_klass)) { + if (exact_klass != NULL) { + metadata2reg(exact_klass->constant_encoding(), tmp1); + } else { + __ load_klass(tmp1, tmp1); + } + + __ xor3(tmp1, tmp2, tmp1); + __ btst(TypeEntries::type_klass_mask, tmp1); + // klass seen before, nothing to do. The unknown bit may have been + // set already but no need to check. + __ brx(Assembler::zero, false, Assembler::pt, next); + __ delayed()-> + + btst(TypeEntries::type_unknown, tmp1); + // already unknown. Nothing to do anymore. + __ brx(Assembler::notZero, false, Assembler::pt, next); + + if (TypeEntries::is_type_none(current_klass)) { + __ delayed()->btst(TypeEntries::type_mask, tmp2); + __ brx(Assembler::zero, true, Assembler::pt, do_update); + // first time here. Set profile type. + __ delayed()->or3(tmp2, tmp1, tmp2); + } else { + __ delayed()->nop(); + } + } else { + assert(ciTypeEntries::valid_ciklass(current_klass) != NULL && + ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "conflict only"); + + __ btst(TypeEntries::type_unknown, tmp2); + // already unknown. Nothing to do anymore. + __ brx(Assembler::notZero, false, Assembler::pt, next); + __ delayed()->nop(); + } + + // different than before. Cannot keep accurate profile. + __ or3(tmp2, TypeEntries::type_unknown, tmp2); + } else { + // There's a single possible klass at this profile point + assert(exact_klass != NULL, "should be"); + if (TypeEntries::is_type_none(current_klass)) { + metadata2reg(exact_klass->constant_encoding(), tmp1); + __ xor3(tmp1, tmp2, tmp1); + __ btst(TypeEntries::type_klass_mask, tmp1); + __ brx(Assembler::zero, false, Assembler::pt, next); +#ifdef ASSERT + + { + Label ok; + __ delayed()->btst(TypeEntries::type_mask, tmp2); + __ brx(Assembler::zero, true, Assembler::pt, ok); + __ delayed()->nop(); + + __ stop("unexpected profiling mismatch"); + __ bind(ok); + } + // first time here. Set profile type. + __ or3(tmp2, tmp1, tmp2); +#else + // first time here. Set profile type. + __ delayed()->or3(tmp2, tmp1, tmp2); +#endif + + } else { + assert(ciTypeEntries::valid_ciklass(current_klass) != NULL && + ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "inconsistent"); + + // already unknown. Nothing to do anymore. + __ btst(TypeEntries::type_unknown, tmp2); + __ brx(Assembler::notZero, false, Assembler::pt, next); + __ delayed()->or3(tmp2, TypeEntries::type_unknown, tmp2); + } + } + + __ bind(do_update); + __ st_ptr(tmp2, mdo_addr); + + __ bind(next); + } } void LIR_Assembler::align_backward_branch_target() { @@ -3321,9 +3459,14 @@ void LIR_Assembler::leal(LIR_Opr addr_opr, LIR_Opr dest) { LIR_Address* addr = addr_opr->as_address_ptr(); - assert(addr->index()->is_illegal() && addr->scale() == LIR_Address::times_1 && Assembler::is_simm13(addr->disp()), "can't handle complex addresses yet"); - - __ add(addr->base()->as_pointer_register(), addr->disp(), dest->as_pointer_register()); + assert(addr->index()->is_illegal() && addr->scale() == LIR_Address::times_1, "can't handle complex addresses yet"); + + if (Assembler::is_simm13(addr->disp())) { + __ add(addr->base()->as_pointer_register(), addr->disp(), dest->as_pointer_register()); + } else { + __ set(addr->disp(), G3_scratch); + __ add(addr->base()->as_pointer_register(), G3_scratch, dest->as_pointer_register()); + } } diff -r baa7d4400c62 -r ce3b1e29425a src/cpu/sparc/vm/interp_masm_sparc.cpp --- a/src/cpu/sparc/vm/interp_masm_sparc.cpp Fri Jan 17 18:39:22 2014 +0000 +++ b/src/cpu/sparc/vm/interp_masm_sparc.cpp Mon Jan 20 09:56:47 2014 +0000 @@ -1892,6 +1892,220 @@ } } +void InterpreterMacroAssembler::profile_obj_type(Register obj, const Address& mdo_addr, Register tmp) { + Label not_null, do_nothing, do_update; + + assert_different_registers(obj, mdo_addr.base(), tmp); + + verify_oop(obj); + + ld_ptr(mdo_addr, tmp); + + br_notnull_short(obj, pt, not_null); + or3(tmp, TypeEntries::null_seen, tmp); + ba_short(do_update); + + bind(not_null); + load_klass(obj, obj); + + xor3(obj, tmp, obj); + btst(TypeEntries::type_klass_mask, obj); + // klass seen before, nothing to do. The unknown bit may have been + // set already but no need to check. + brx(zero, false, pt, do_nothing); + delayed()-> + + btst(TypeEntries::type_unknown, obj); + // already unknown. Nothing to do anymore. + brx(notZero, false, pt, do_nothing); + delayed()-> + + btst(TypeEntries::type_mask, tmp); + brx(zero, true, pt, do_update); + // first time here. Set profile type. + delayed()->or3(tmp, obj, tmp); + + // different than before. Cannot keep accurate profile. + or3(tmp, TypeEntries::type_unknown, tmp); + + bind(do_update); + // update profile + st_ptr(tmp, mdo_addr); + + bind(do_nothing); +} + +void InterpreterMacroAssembler::profile_arguments_type(Register callee, Register tmp1, Register tmp2, bool is_virtual) { + if (!ProfileInterpreter) { + return; + } + + assert_different_registers(callee, tmp1, tmp2, ImethodDataPtr); + + if (MethodData::profile_arguments() || MethodData::profile_return()) { + Label profile_continue; + + test_method_data_pointer(profile_continue); + + int off_to_start = is_virtual ? in_bytes(VirtualCallData::virtual_call_data_size()) : in_bytes(CounterData::counter_data_size()); + + ldub(ImethodDataPtr, in_bytes(DataLayout::tag_offset()) - off_to_start, tmp1); + cmp_and_br_short(tmp1, is_virtual ? DataLayout::virtual_call_type_data_tag : DataLayout::call_type_data_tag, notEqual, pn, profile_continue); + + if (MethodData::profile_arguments()) { + Label done; + int off_to_args = in_bytes(TypeEntriesAtCall::args_data_offset()); + add(ImethodDataPtr, off_to_args, ImethodDataPtr); + + for (int i = 0; i < TypeProfileArgsLimit; i++) { + if (i > 0 || MethodData::profile_return()) { + // If return value type is profiled we may have no argument to profile + ld_ptr(ImethodDataPtr, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args, tmp1); + sub(tmp1, i*TypeStackSlotEntries::per_arg_count(), tmp1); + cmp_and_br_short(tmp1, TypeStackSlotEntries::per_arg_count(), less, pn, done); + } + ld_ptr(Address(callee, Method::const_offset()), tmp1); + lduh(Address(tmp1, ConstMethod::size_of_parameters_offset()), tmp1); + // stack offset o (zero based) from the start of the argument + // list, for n arguments translates into offset n - o - 1 from + // the end of the argument list. But there's an extra slot at + // the stop of the stack. So the offset is n - o from Lesp. + ld_ptr(ImethodDataPtr, in_bytes(TypeEntriesAtCall::stack_slot_offset(i))-off_to_args, tmp2); + sub(tmp1, tmp2, tmp1); + + // Can't use MacroAssembler::argument_address() which needs Gargs to be set up + sll(tmp1, Interpreter::logStackElementSize, tmp1); + ld_ptr(Lesp, tmp1, tmp1); + + Address mdo_arg_addr(ImethodDataPtr, in_bytes(TypeEntriesAtCall::argument_type_offset(i))-off_to_args); + profile_obj_type(tmp1, mdo_arg_addr, tmp2); + + int to_add = in_bytes(TypeStackSlotEntries::per_arg_size()); + add(ImethodDataPtr, to_add, ImethodDataPtr); + off_to_args += to_add; + } + + if (MethodData::profile_return()) { + ld_ptr(ImethodDataPtr, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args, tmp1); + sub(tmp1, TypeProfileArgsLimit*TypeStackSlotEntries::per_arg_count(), tmp1); + } + + bind(done); + + if (MethodData::profile_return()) { + // We're right after the type profile for the last + // argument. tmp1 is the number of cells left in the + // CallTypeData/VirtualCallTypeData to reach its end. Non null + // if there's a return to profile. + assert(ReturnTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type"); + sll(tmp1, exact_log2(DataLayout::cell_size), tmp1); + add(ImethodDataPtr, tmp1, ImethodDataPtr); + } + } else { + assert(MethodData::profile_return(), "either profile call args or call ret"); + update_mdp_by_constant(in_bytes(ReturnTypeEntry::size())); + } + + // mdp points right after the end of the + // CallTypeData/VirtualCallTypeData, right after the cells for the + // return value type if there's one. + + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::profile_return_type(Register ret, Register tmp1, Register tmp2) { + assert_different_registers(ret, tmp1, tmp2); + if (ProfileInterpreter && MethodData::profile_return()) { + Label profile_continue, done; + + test_method_data_pointer(profile_continue); + + if (MethodData::profile_return_jsr292_only()) { + // If we don't profile all invoke bytecodes we must make sure + // it's a bytecode we indeed profile. We can't go back to the + // begining of the ProfileData we intend to update to check its + // type because we're right after it and we don't known its + // length. + Label do_profile; + ldub(Lbcp, 0, tmp1); + cmp_and_br_short(tmp1, Bytecodes::_invokedynamic, equal, pn, do_profile); + cmp(tmp1, Bytecodes::_invokehandle); + br(equal, false, pn, do_profile); + delayed()->ldub(Lmethod, Method::intrinsic_id_offset_in_bytes(), tmp1); + cmp_and_br_short(tmp1, vmIntrinsics::_compiledLambdaForm, notEqual, pt, profile_continue); + + bind(do_profile); + } + + Address mdo_ret_addr(ImethodDataPtr, -in_bytes(ReturnTypeEntry::size())); + mov(ret, tmp1); + profile_obj_type(tmp1, mdo_ret_addr, tmp2); + + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::profile_parameters_type(Register tmp1, Register tmp2, Register tmp3, Register tmp4) { + if (ProfileInterpreter && MethodData::profile_parameters()) { + Label profile_continue, done; + + test_method_data_pointer(profile_continue); + + // Load the offset of the area within the MDO used for + // parameters. If it's negative we're not profiling any parameters. + lduw(ImethodDataPtr, in_bytes(MethodData::parameters_type_data_di_offset()) - in_bytes(MethodData::data_offset()), tmp1); + cmp_and_br_short(tmp1, 0, less, pn, profile_continue); + + // Compute a pointer to the area for parameters from the offset + // and move the pointer to the slot for the last + // parameters. Collect profiling from last parameter down. + // mdo start + parameters offset + array length - 1 + + // Pointer to the parameter area in the MDO + Register mdp = tmp1; + add(ImethodDataPtr, tmp1, mdp); + + // offset of the current profile entry to update + Register entry_offset = tmp2; + // entry_offset = array len in number of cells + ld_ptr(mdp, ArrayData::array_len_offset(), entry_offset); + + int off_base = in_bytes(ParametersTypeData::stack_slot_offset(0)); + assert(off_base % DataLayout::cell_size == 0, "should be a number of cells"); + + // entry_offset (number of cells) = array len - size of 1 entry + offset of the stack slot field + sub(entry_offset, TypeStackSlotEntries::per_arg_count() - (off_base / DataLayout::cell_size), entry_offset); + // entry_offset in bytes + sll(entry_offset, exact_log2(DataLayout::cell_size), entry_offset); + + Label loop; + bind(loop); + + // load offset on the stack from the slot for this parameter + ld_ptr(mdp, entry_offset, tmp3); + sll(tmp3,Interpreter::logStackElementSize, tmp3); + neg(tmp3); + // read the parameter from the local area + ld_ptr(Llocals, tmp3, tmp3); + + // make entry_offset now point to the type field for this parameter + int type_base = in_bytes(ParametersTypeData::type_offset(0)); + assert(type_base > off_base, "unexpected"); + add(entry_offset, type_base - off_base, entry_offset); + + // profile the parameter + Address arg_type(mdp, entry_offset); + profile_obj_type(tmp3, arg_type, tmp4); + + // go to next parameter + sub(entry_offset, TypeStackSlotEntries::per_arg_count() * DataLayout::cell_size + (type_base - off_base), entry_offset); + cmp_and_br_short(entry_offset, off_base, greaterEqual, pt, loop); + + bind(profile_continue); + } +} + // add a InterpMonitorElem to stack (see frame_sparc.hpp) void InterpreterMacroAssembler::add_monitor_to_stack( bool stack_is_empty, diff -r baa7d4400c62 -r ce3b1e29425a src/cpu/sparc/vm/interp_masm_sparc.hpp --- a/src/cpu/sparc/vm/interp_masm_sparc.hpp Fri Jan 17 18:39:22 2014 +0000 +++ b/src/cpu/sparc/vm/interp_masm_sparc.hpp Mon Jan 20 09:56:47 2014 +0000 @@ -323,6 +323,11 @@ Register scratch2, Register scratch3); + void profile_obj_type(Register obj, const Address& mdo_addr, Register tmp); + void profile_arguments_type(Register callee, Register tmp1, Register tmp2, bool is_virtual); + void profile_return_type(Register ret, Register tmp1, Register tmp2); + void profile_parameters_type(Register tmp1, Register tmp2, Register tmp3, Register tmp4); + // Debugging void interp_verify_oop(Register reg, TosState state, const char * file, int line); // only if +VerifyOops && state == atos void verify_oop_or_return_address(Register reg, Register rtmp); // for astore diff -r baa7d4400c62 -r ce3b1e29425a src/cpu/sparc/vm/sparc.ad --- a/src/cpu/sparc/vm/sparc.ad Fri Jan 17 18:39:22 2014 +0000 +++ b/src/cpu/sparc/vm/sparc.ad Mon Jan 20 09:56:47 2014 +0000 @@ -1848,6 +1848,12 @@ return false; } +// Current (2013) SPARC platforms need to read original key +// to construct decryption expanded key +const bool Matcher::pass_original_key_for_aes() { + return true; +} + // USII supports fxtof through the whole range of number, USIII doesn't const bool Matcher::convL2FSupported(void) { return VM_Version::has_fast_fxtof(); diff -r baa7d4400c62 -r ce3b1e29425a src/cpu/sparc/vm/stubGenerator_sparc.cpp --- a/src/cpu/sparc/vm/stubGenerator_sparc.cpp Fri Jan 17 18:39:22 2014 +0000 +++ b/src/cpu/sparc/vm/stubGenerator_sparc.cpp Mon Jan 20 09:56:47 2014 +0000 @@ -3304,6 +3304,775 @@ } } + address generate_aescrypt_encryptBlock() { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "aesencryptBlock"); + Label L_doLast128bit, L_storeOutput; + address start = __ pc(); + Register from = O0; // source byte array + Register to = O1; // destination byte array + Register key = O2; // expanded key array + const Register keylen = O4; //reg for storing expanded key array length + + // read expanded key length + __ ldsw(Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)), keylen, 0); + + // load input into F54-F56; F30-F31 used as temp + __ ldf(FloatRegisterImpl::S, from, 0, F30); + __ ldf(FloatRegisterImpl::S, from, 4, F31); + __ fmov(FloatRegisterImpl::D, F30, F54); + __ ldf(FloatRegisterImpl::S, from, 8, F30); + __ ldf(FloatRegisterImpl::S, from, 12, F31); + __ fmov(FloatRegisterImpl::D, F30, F56); + + // load expanded key + for ( int i = 0; i <= 38; i += 2 ) { + __ ldf(FloatRegisterImpl::D, key, i*4, as_FloatRegister(i)); + } + + // perform cipher transformation + __ fxor(FloatRegisterImpl::D, F0, F54, F54); + __ fxor(FloatRegisterImpl::D, F2, F56, F56); + // rounds 1 through 8 + for ( int i = 4; i <= 28; i += 8 ) { + __ aes_eround01(as_FloatRegister(i), F54, F56, F58); + __ aes_eround23(as_FloatRegister(i+2), F54, F56, F60); + __ aes_eround01(as_FloatRegister(i+4), F58, F60, F54); + __ aes_eround23(as_FloatRegister(i+6), F58, F60, F56); + } + __ aes_eround01(F36, F54, F56, F58); //round 9 + __ aes_eround23(F38, F54, F56, F60); + + // 128-bit original key size + __ cmp_and_brx_short(keylen, 44, Assembler::equal, Assembler::pt, L_doLast128bit); + + for ( int i = 40; i <= 50; i += 2 ) { + __ ldf(FloatRegisterImpl::D, key, i*4, as_FloatRegister(i) ); + } + __ aes_eround01(F40, F58, F60, F54); //round 10 + __ aes_eround23(F42, F58, F60, F56); + __ aes_eround01(F44, F54, F56, F58); //round 11 + __ aes_eround23(F46, F54, F56, F60); + + // 192-bit original key size + __ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pt, L_storeOutput); + + __ ldf(FloatRegisterImpl::D, key, 208, F52); + __ aes_eround01(F48, F58, F60, F54); //round 12 + __ aes_eround23(F50, F58, F60, F56); + __ ldf(FloatRegisterImpl::D, key, 216, F46); + __ ldf(FloatRegisterImpl::D, key, 224, F48); + __ ldf(FloatRegisterImpl::D, key, 232, F50); + __ aes_eround01(F52, F54, F56, F58); //round 13 + __ aes_eround23(F46, F54, F56, F60); + __ br(Assembler::always, false, Assembler::pt, L_storeOutput); + __ delayed()->nop(); + + __ BIND(L_doLast128bit); + __ ldf(FloatRegisterImpl::D, key, 160, F48); + __ ldf(FloatRegisterImpl::D, key, 168, F50); + + __ BIND(L_storeOutput); + // perform last round of encryption common for all key sizes + __ aes_eround01_l(F48, F58, F60, F54); //last round + __ aes_eround23_l(F50, F58, F60, F56); + + // store output into the destination array, F0-F1 used as temp + __ fmov(FloatRegisterImpl::D, F54, F0); + __ stf(FloatRegisterImpl::S, F0, to, 0); + __ stf(FloatRegisterImpl::S, F1, to, 4); + __ fmov(FloatRegisterImpl::D, F56, F0); + __ stf(FloatRegisterImpl::S, F0, to, 8); + __ retl(); + __ delayed()->stf(FloatRegisterImpl::S, F1, to, 12); + + return start; + } + + address generate_aescrypt_decryptBlock() { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "aesdecryptBlock"); + address start = __ pc(); + Label L_expand192bit, L_expand256bit, L_common_transform; + Register from = O0; // source byte array + Register to = O1; // destination byte array + Register key = O2; // expanded key array + Register original_key = O3; // original key array only required during decryption + const Register keylen = O4; // reg for storing expanded key array length + + // read expanded key array length + __ ldsw(Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)), keylen, 0); + + // load input into F52-F54; F30,F31 used as temp + __ ldf(FloatRegisterImpl::S, from, 0, F30); + __ ldf(FloatRegisterImpl::S, from, 4, F31); + __ fmov(FloatRegisterImpl::D, F30, F52); + __ ldf(FloatRegisterImpl::S, from, 8, F30); + __ ldf(FloatRegisterImpl::S, from, 12, F31); + __ fmov(FloatRegisterImpl::D, F30, F54); + + // load original key from SunJCE expanded decryption key + for ( int i = 0; i <= 3; i++ ) { + __ ldf(FloatRegisterImpl::S, original_key, i*4, as_FloatRegister(i)); + } + + // 256-bit original key size + __ cmp_and_brx_short(keylen, 60, Assembler::equal, Assembler::pn, L_expand256bit); + + // 192-bit original key size + __ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pn, L_expand192bit); + + // 128-bit original key size + // perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions + for ( int i = 0; i <= 36; i += 4 ) { + __ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+2), i/4, as_FloatRegister(i+4)); + __ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+4), as_FloatRegister(i+6)); + } + + // perform 128-bit key specific inverse cipher transformation + __ fxor(FloatRegisterImpl::D, F42, F54, F54); + __ fxor(FloatRegisterImpl::D, F40, F52, F52); + __ br(Assembler::always, false, Assembler::pt, L_common_transform); + __ delayed()->nop(); + + __ BIND(L_expand192bit); + + // start loading rest of the 192-bit key + __ ldf(FloatRegisterImpl::S, original_key, 16, F4); + __ ldf(FloatRegisterImpl::S, original_key, 20, F5); + + // perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions + for ( int i = 0; i <= 36; i += 6 ) { + __ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+4), i/6, as_FloatRegister(i+6)); + __ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+6), as_FloatRegister(i+8)); + __ aes_kexpand2(as_FloatRegister(i+4), as_FloatRegister(i+8), as_FloatRegister(i+10)); + } + __ aes_kexpand1(F42, F46, 7, F48); + __ aes_kexpand2(F44, F48, F50); + + // perform 192-bit key specific inverse cipher transformation + __ fxor(FloatRegisterImpl::D, F50, F54, F54); + __ fxor(FloatRegisterImpl::D, F48, F52, F52); + __ aes_dround23(F46, F52, F54, F58); + __ aes_dround01(F44, F52, F54, F56); + __ aes_dround23(F42, F56, F58, F54); + __ aes_dround01(F40, F56, F58, F52); + __ br(Assembler::always, false, Assembler::pt, L_common_transform); + __ delayed()->nop(); + + __ BIND(L_expand256bit); + + // load rest of the 256-bit key + for ( int i = 4; i <= 7; i++ ) { + __ ldf(FloatRegisterImpl::S, original_key, i*4, as_FloatRegister(i)); + } + + // perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions + for ( int i = 0; i <= 40; i += 8 ) { + __ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+6), i/8, as_FloatRegister(i+8)); + __ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+8), as_FloatRegister(i+10)); + __ aes_kexpand0(as_FloatRegister(i+4), as_FloatRegister(i+10), as_FloatRegister(i+12)); + __ aes_kexpand2(as_FloatRegister(i+6), as_FloatRegister(i+12), as_FloatRegister(i+14)); + } + __ aes_kexpand1(F48, F54, 6, F56); + __ aes_kexpand2(F50, F56, F58); + + for ( int i = 0; i <= 6; i += 2 ) { + __ fmov(FloatRegisterImpl::D, as_FloatRegister(58-i), as_FloatRegister(i)); + } + + // load input into F52-F54 + __ ldf(FloatRegisterImpl::D, from, 0, F52); + __ ldf(FloatRegisterImpl::D, from, 8, F54); + + // perform 256-bit key specific inverse cipher transformation + __ fxor(FloatRegisterImpl::D, F0, F54, F54); + __ fxor(FloatRegisterImpl::D, F2, F52, F52); + __ aes_dround23(F4, F52, F54, F58); + __ aes_dround01(F6, F52, F54, F56); + __ aes_dround23(F50, F56, F58, F54); + __ aes_dround01(F48, F56, F58, F52); + __ aes_dround23(F46, F52, F54, F58); + __ aes_dround01(F44, F52, F54, F56); + __ aes_dround23(F42, F56, F58, F54); + __ aes_dround01(F40, F56, F58, F52); + + for ( int i = 0; i <= 7; i++ ) { + __ ldf(FloatRegisterImpl::S, original_key, i*4, as_FloatRegister(i)); + } + + // perform inverse cipher transformations common for all key sizes + __ BIND(L_common_transform); + for ( int i = 38; i >= 6; i -= 8 ) { + __ aes_dround23(as_FloatRegister(i), F52, F54, F58); + __ aes_dround01(as_FloatRegister(i-2), F52, F54, F56); + if ( i != 6) { + __ aes_dround23(as_FloatRegister(i-4), F56, F58, F54); + __ aes_dround01(as_FloatRegister(i-6), F56, F58, F52); + } else { + __ aes_dround23_l(as_FloatRegister(i-4), F56, F58, F54); + __ aes_dround01_l(as_FloatRegister(i-6), F56, F58, F52); + } + } + + // store output to destination array, F0-F1 used as temp + __ fmov(FloatRegisterImpl::D, F52, F0); + __ stf(FloatRegisterImpl::S, F0, to, 0); + __ stf(FloatRegisterImpl::S, F1, to, 4); + __ fmov(FloatRegisterImpl::D, F54, F0); + __ stf(FloatRegisterImpl::S, F0, to, 8); + __ retl(); + __ delayed()->stf(FloatRegisterImpl::S, F1, to, 12); + + return start; + } + + address generate_cipherBlockChaining_encryptAESCrypt() { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt"); + Label L_cbcenc128, L_cbcenc192, L_cbcenc256; + address start = __ pc(); + Register from = O0; // source byte array + Register to = O1; // destination byte array + Register key = O2; // expanded key array + Register rvec = O3; // init vector + const Register len_reg = O4; // cipher length + const Register keylen = O5; // reg for storing expanded key array length + + // save cipher len to return in the end + __ mov(len_reg, L1); + + // read expanded key length + __ ldsw(Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)), keylen, 0); + + // load init vector + __ ldf(FloatRegisterImpl::D, rvec, 0, F60); + __ ldf(FloatRegisterImpl::D, rvec, 8, F62); + __ ldx(key,0,G1); + __ ldx(key,8,G2); + + // start loading expanded key + for ( int i = 0, j = 16; i <= 38; i += 2, j += 8 ) { + __ ldf(FloatRegisterImpl::D, key, j, as_FloatRegister(i)); + } + + // 128-bit original key size + __ cmp_and_brx_short(keylen, 44, Assembler::equal, Assembler::pt, L_cbcenc128); + + for ( int i = 40, j = 176; i <= 46; i += 2, j += 8 ) { + __ ldf(FloatRegisterImpl::D, key, j, as_FloatRegister(i)); + } + + // 192-bit original key size + __ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pt, L_cbcenc192); + + for ( int i = 48, j = 208; i <= 54; i += 2, j += 8 ) { + __ ldf(FloatRegisterImpl::D, key, j, as_FloatRegister(i)); + } + + // 256-bit original key size + __ br(Assembler::always, false, Assembler::pt, L_cbcenc256); + __ delayed()->nop(); + + __ align(OptoLoopAlignment); + __ BIND(L_cbcenc128); + __ ldx(from,0,G3); + __ ldx(from,8,G4); + __ xor3(G1,G3,G3); + __ xor3(G2,G4,G4); + __ movxtod(G3,F56); + __ movxtod(G4,F58); + __ fxor(FloatRegisterImpl::D, F60, F56, F60); + __ fxor(FloatRegisterImpl::D, F62, F58, F62); + + // TEN_EROUNDS + for ( int i = 0; i <= 32; i += 8 ) { + __ aes_eround01(as_FloatRegister(i), F60, F62, F56); + __ aes_eround23(as_FloatRegister(i+2), F60, F62, F58); + if (i != 32 ) { + __ aes_eround01(as_FloatRegister(i+4), F56, F58, F60); + __ aes_eround23(as_FloatRegister(i+6), F56, F58, F62); + } else { + __ aes_eround01_l(as_FloatRegister(i+4), F56, F58, F60); + __ aes_eround23_l(as_FloatRegister(i+6), F56, F58, F62); + } + } + + __ stf(FloatRegisterImpl::D, F60, to, 0); + __ stf(FloatRegisterImpl::D, F62, to, 8); + __ add(from, 16, from); + __ add(to, 16, to); + __ subcc(len_reg, 16, len_reg); + __ br(Assembler::notEqual, false, Assembler::pt, L_cbcenc128); + __ delayed()->nop(); + __ stf(FloatRegisterImpl::D, F60, rvec, 0); + __ stf(FloatRegisterImpl::D, F62, rvec, 8); + __ retl(); + __ delayed()->mov(L1, O0); + + __ align(OptoLoopAlignment); + __ BIND(L_cbcenc192); + __ ldx(from,0,G3); + __ ldx(from,8,G4); + __ xor3(G1,G3,G3); + __ xor3(G2,G4,G4); + __ movxtod(G3,F56); + __ movxtod(G4,F58); + __ fxor(FloatRegisterImpl::D, F60, F56, F60); + __ fxor(FloatRegisterImpl::D, F62, F58, F62); + + // TWELEVE_EROUNDS + for ( int i = 0; i <= 40; i += 8 ) { + __ aes_eround01(as_FloatRegister(i), F60, F62, F56); + __ aes_eround23(as_FloatRegister(i+2), F60, F62, F58); + if (i != 40 ) { + __ aes_eround01(as_FloatRegister(i+4), F56, F58, F60); + __ aes_eround23(as_FloatRegister(i+6), F56, F58, F62); + } else { + __ aes_eround01_l(as_FloatRegister(i+4), F56, F58, F60); + __ aes_eround23_l(as_FloatRegister(i+6), F56, F58, F62); + } + } + + __ stf(FloatRegisterImpl::D, F60, to, 0); + __ stf(FloatRegisterImpl::D, F62, to, 8); + __ add(from, 16, from); + __ subcc(len_reg, 16, len_reg); + __ add(to, 16, to); + __ br(Assembler::notEqual, false, Assembler::pt, L_cbcenc192); + __ delayed()->nop(); + __ stf(FloatRegisterImpl::D, F60, rvec, 0); + __ stf(FloatRegisterImpl::D, F62, rvec, 8); + __ retl(); + __ delayed()->mov(L1, O0); + + __ align(OptoLoopAlignment); + __ BIND(L_cbcenc256); + __ ldx(from,0,G3); + __ ldx(from,8,G4); + __ xor3(G1,G3,G3); + __ xor3(G2,G4,G4); + __ movxtod(G3,F56); + __ movxtod(G4,F58); + __ fxor(FloatRegisterImpl::D, F60, F56, F60); + __ fxor(FloatRegisterImpl::D, F62, F58, F62); + + // FOURTEEN_EROUNDS + for ( int i = 0; i <= 48; i += 8 ) { + __ aes_eround01(as_FloatRegister(i), F60, F62, F56); + __ aes_eround23(as_FloatRegister(i+2), F60, F62, F58); + if (i != 48 ) { + __ aes_eround01(as_FloatRegister(i+4), F56, F58, F60); + __ aes_eround23(as_FloatRegister(i+6), F56, F58, F62); + } else { + __ aes_eround01_l(as_FloatRegister(i+4), F56, F58, F60); + __ aes_eround23_l(as_FloatRegister(i+6), F56, F58, F62); + } + } + + __ stf(FloatRegisterImpl::D, F60, to, 0); + __ stf(FloatRegisterImpl::D, F62, to, 8); + __ add(from, 16, from); + __ subcc(len_reg, 16, len_reg); + __ add(to, 16, to); + __ br(Assembler::notEqual, false, Assembler::pt, L_cbcenc256); + __ delayed()->nop(); + __ stf(FloatRegisterImpl::D, F60, rvec, 0); + __ stf(FloatRegisterImpl::D, F62, rvec, 8); + __ retl(); + __ delayed()->mov(L1, O0); + + return start; + } + + address generate_cipherBlockChaining_decryptAESCrypt_Parallel() { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt"); + Label L_cbcdec_end, L_expand192bit, L_expand256bit, L_dec_first_block_start; + Label L_dec_first_block128, L_dec_first_block192, L_dec_next2_blocks128, L_dec_next2_blocks192, L_dec_next2_blocks256; + address start = __ pc(); + Register from = I0; // source byte array + Register to = I1; // destination byte array + Register key = I2; // expanded key array + Register rvec = I3; // init vector + const Register len_reg = I4; // cipher length + const Register original_key = I5; // original key array only required during decryption + const Register keylen = L6; // reg for storing expanded key array length + + // save cipher len before save_frame, to return in the end + __ mov(O4, L0); + __ save_frame(0); //args are read from I* registers since we save the frame in the beginning + + // load original key from SunJCE expanded decryption key + for ( int i = 0; i <= 3; i++ ) { + __ ldf(FloatRegisterImpl::S, original_key, i*4, as_FloatRegister(i)); + } + + // load initial vector + __ ldx(rvec,0,L0); + __ ldx(rvec,8,L1); + + // read expanded key array length + __ ldsw(Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)), keylen, 0); + + // 256-bit original key size + __ cmp_and_brx_short(keylen, 60, Assembler::equal, Assembler::pn, L_expand256bit); + + // 192-bit original key size + __ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pn, L_expand192bit); + + // 128-bit original key size + // perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions + for ( int i = 0; i <= 36; i += 4 ) { + __ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+2), i/4, as_FloatRegister(i+4)); + __ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+4), as_FloatRegister(i+6)); + } + + // load expanded key[last-1] and key[last] elements + __ movdtox(F40,L2); + __ movdtox(F42,L3); + + __ and3(len_reg, 16, L4); + __ br_null(L4, false, Assembler::pt, L_dec_next2_blocks128); + __ delayed()->nop(); + + __ br(Assembler::always, false, Assembler::pt, L_dec_first_block_start); + __ delayed()->nop(); + + __ BIND(L_expand192bit); + // load rest of the 192-bit key + __ ldf(FloatRegisterImpl::S, original_key, 16, F4); + __ ldf(FloatRegisterImpl::S, original_key, 20, F5); + + // perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions + for ( int i = 0; i <= 36; i += 6 ) { + __ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+4), i/6, as_FloatRegister(i+6)); + __ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+6), as_FloatRegister(i+8)); + __ aes_kexpand2(as_FloatRegister(i+4), as_FloatRegister(i+8), as_FloatRegister(i+10)); + } + __ aes_kexpand1(F42, F46, 7, F48); + __ aes_kexpand2(F44, F48, F50); + + // load expanded key[last-1] and key[last] elements + __ movdtox(F48,L2); + __ movdtox(F50,L3); + + __ and3(len_reg, 16, L4); + __ br_null(L4, false, Assembler::pt, L_dec_next2_blocks192); + __ delayed()->nop(); + + __ br(Assembler::always, false, Assembler::pt, L_dec_first_block_start); + __ delayed()->nop(); + + __ BIND(L_expand256bit); + // load rest of the 256-bit key + for ( int i = 4; i <= 7; i++ ) { + __ ldf(FloatRegisterImpl::S, original_key, i*4, as_FloatRegister(i)); + } + + // perform key expansion since SunJCE decryption-key expansion is not compatible with SPARC crypto instructions + for ( int i = 0; i <= 40; i += 8 ) { + __ aes_kexpand1(as_FloatRegister(i), as_FloatRegister(i+6), i/8, as_FloatRegister(i+8)); + __ aes_kexpand2(as_FloatRegister(i+2), as_FloatRegister(i+8), as_FloatRegister(i+10)); + __ aes_kexpand0(as_FloatRegister(i+4), as_FloatRegister(i+10), as_FloatRegister(i+12)); + __ aes_kexpand2(as_FloatRegister(i+6), as_FloatRegister(i+12), as_FloatRegister(i+14)); + } + __ aes_kexpand1(F48, F54, 6, F56); + __ aes_kexpand2(F50, F56, F58); + + // load expanded key[last-1] and key[last] elements + __ movdtox(F56,L2); + __ movdtox(F58,L3); + + __ and3(len_reg, 16, L4); + __ br_null(L4, false, Assembler::pt, L_dec_next2_blocks256); + __ delayed()->nop(); + + __ BIND(L_dec_first_block_start); + __ ldx(from,0,L4); + __ ldx(from,8,L5); + __ xor3(L2,L4,G1); + __ movxtod(G1,F60); + __ xor3(L3,L5,G1); + __ movxtod(G1,F62); + + // 128-bit original key size + __ cmp_and_brx_short(keylen, 44, Assembler::equal, Assembler::pn, L_dec_first_block128); + + // 192-bit original key size + __ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pn, L_dec_first_block192); + + __ aes_dround23(F54, F60, F62, F58); + __ aes_dround01(F52, F60, F62, F56); + __ aes_dround23(F50, F56, F58, F62); + __ aes_dround01(F48, F56, F58, F60); + + __ BIND(L_dec_first_block192); + __ aes_dround23(F46, F60, F62, F58); + __ aes_dround01(F44, F60, F62, F56); + __ aes_dround23(F42, F56, F58, F62); + __ aes_dround01(F40, F56, F58, F60); + + __ BIND(L_dec_first_block128); + for ( int i = 38; i >= 6; i -= 8 ) { + __ aes_dround23(as_FloatRegister(i), F60, F62, F58); + __ aes_dround01(as_FloatRegister(i-2), F60, F62, F56); + if ( i != 6) { + __ aes_dround23(as_FloatRegister(i-4), F56, F58, F62); + __ aes_dround01(as_FloatRegister(i-6), F56, F58, F60); + } else { + __ aes_dround23_l(as_FloatRegister(i-4), F56, F58, F62); + __ aes_dround01_l(as_FloatRegister(i-6), F56, F58, F60); + } + } + + __ movxtod(L0,F56); + __ movxtod(L1,F58); + __ mov(L4,L0); + __ mov(L5,L1); + __ fxor(FloatRegisterImpl::D, F56, F60, F60); + __ fxor(FloatRegisterImpl::D, F58, F62, F62); + + __ stf(FloatRegisterImpl::D, F60, to, 0); + __ stf(FloatRegisterImpl::D, F62, to, 8); + + __ add(from, 16, from); + __ add(to, 16, to); + __ subcc(len_reg, 16, len_reg); + __ br(Assembler::equal, false, Assembler::pt, L_cbcdec_end); + __ delayed()->nop(); + + // 256-bit original key size + __ cmp_and_brx_short(keylen, 60, Assembler::equal, Assembler::pn, L_dec_next2_blocks256); + + // 192-bit original key size + __ cmp_and_brx_short(keylen, 52, Assembler::equal, Assembler::pn, L_dec_next2_blocks192); + + __ align(OptoLoopAlignment); + __ BIND(L_dec_next2_blocks128); + __ nop(); + + // F40:F42 used for first 16-bytes + __ ldx(from,0,G4); + __ ldx(from,8,G5); + __ xor3(L2,G4,G1); + __ movxtod(G1,F40); + __ xor3(L3,G5,G1); + __ movxtod(G1,F42); + + // F60:F62 used for next 16-bytes + __ ldx(from,16,L4); + __ ldx(from,24,L5); + __ xor3(L2,L4,G1); + __ movxtod(G1,F60); + __ xor3(L3,L5,G1); + __ movxtod(G1,F62); + + for ( int i = 38; i >= 6; i -= 8 ) { + __ aes_dround23(as_FloatRegister(i), F40, F42, F44); + __ aes_dround01(as_FloatRegister(i-2), F40, F42, F46); + __ aes_dround23(as_FloatRegister(i), F60, F62, F58); + __ aes_dround01(as_FloatRegister(i-2), F60, F62, F56); + if (i != 6 ) { + __ aes_dround23(as_FloatRegister(i-4), F46, F44, F42); + __ aes_dround01(as_FloatRegister(i-6), F46, F44, F40); + __ aes_dround23(as_FloatRegister(i-4), F56, F58, F62); + __ aes_dround01(as_FloatRegister(i-6), F56, F58, F60); + } else { + __ aes_dround23_l(as_FloatRegister(i-4), F46, F44, F42); + __ aes_dround01_l(as_FloatRegister(i-6), F46, F44, F40); + __ aes_dround23_l(as_FloatRegister(i-4), F56, F58, F62); + __ aes_dround01_l(as_FloatRegister(i-6), F56, F58, F60); + } + } + + __ movxtod(L0,F46); + __ movxtod(L1,F44); + __ fxor(FloatRegisterImpl::D, F46, F40, F40); + __ fxor(FloatRegisterImpl::D, F44, F42, F42); + + __ stf(FloatRegisterImpl::D, F40, to, 0); + __ stf(FloatRegisterImpl::D, F42, to, 8); + + __ movxtod(G4,F56); + __ movxtod(G5,F58); + __ mov(L4,L0); + __ mov(L5,L1); + __ fxor(FloatRegisterImpl::D, F56, F60, F60); + __ fxor(FloatRegisterImpl::D, F58, F62, F62); + + __ stf(FloatRegisterImpl::D, F60, to, 16); + __ stf(FloatRegisterImpl::D, F62, to, 24); + + __ add(from, 32, from); + __ add(to, 32, to); + __ subcc(len_reg, 32, len_reg); + __ br(Assembler::notEqual, false, Assembler::pt, L_dec_next2_blocks128); + __ delayed()->nop(); + __ br(Assembler::always, false, Assembler::pt, L_cbcdec_end); + __ delayed()->nop(); + + __ align(OptoLoopAlignment); + __ BIND(L_dec_next2_blocks192); + __ nop(); + + // F48:F50 used for first 16-bytes + __ ldx(from,0,G4); + __ ldx(from,8,G5); + __ xor3(L2,G4,G1); + __ movxtod(G1,F48); + __ xor3(L3,G5,G1); + __ movxtod(G1,F50); + + // F60:F62 used for next 16-bytes + __ ldx(from,16,L4); + __ ldx(from,24,L5); + __ xor3(L2,L4,G1); + __ movxtod(G1,F60); + __ xor3(L3,L5,G1); + __ movxtod(G1,F62); + + for ( int i = 46; i >= 6; i -= 8 ) { + __ aes_dround23(as_FloatRegister(i), F48, F50, F52); + __ aes_dround01(as_FloatRegister(i-2), F48, F50, F54); + __ aes_dround23(as_FloatRegister(i), F60, F62, F58); + __ aes_dround01(as_FloatRegister(i-2), F60, F62, F56); + if (i != 6 ) { + __ aes_dround23(as_FloatRegister(i-4), F54, F52, F50); + __ aes_dround01(as_FloatRegister(i-6), F54, F52, F48); + __ aes_dround23(as_FloatRegister(i-4), F56, F58, F62); + __ aes_dround01(as_FloatRegister(i-6), F56, F58, F60); + } else { + __ aes_dround23_l(as_FloatRegister(i-4), F54, F52, F50); + __ aes_dround01_l(as_FloatRegister(i-6), F54, F52, F48); + __ aes_dround23_l(as_FloatRegister(i-4), F56, F58, F62); + __ aes_dround01_l(as_FloatRegister(i-6), F56, F58, F60); + } + } + + __ movxtod(L0,F54); + __ movxtod(L1,F52); + __ fxor(FloatRegisterImpl::D, F54, F48, F48); + __ fxor(FloatRegisterImpl::D, F52, F50, F50); + + __ stf(FloatRegisterImpl::D, F48, to, 0); + __ stf(FloatRegisterImpl::D, F50, to, 8); + + __ movxtod(G4,F56); + __ movxtod(G5,F58); + __ mov(L4,L0); + __ mov(L5,L1); + __ fxor(FloatRegisterImpl::D, F56, F60, F60); + __ fxor(FloatRegisterImpl::D, F58, F62, F62); + + __ stf(FloatRegisterImpl::D, F60, to, 16); + __ stf(FloatRegisterImpl::D, F62, to, 24); + + __ add(from, 32, from); + __ add(to, 32, to); + __ subcc(len_reg, 32, len_reg); + __ br(Assembler::notEqual, false, Assembler::pt, L_dec_next2_blocks192); + __ delayed()->nop(); + __ br(Assembler::always, false, Assembler::pt, L_cbcdec_end); + __ delayed()->nop(); + + __ align(OptoLoopAlignment); + __ BIND(L_dec_next2_blocks256); + __ nop(); + + // F0:F2 used for first 16-bytes + __ ldx(from,0,G4); + __ ldx(from,8,G5); + __ xor3(L2,G4,G1); + __ movxtod(G1,F0); + __ xor3(L3,G5,G1); + __ movxtod(G1,F2); + + // F60:F62 used for next 16-bytes + __ ldx(from,16,L4); + __ ldx(from,24,L5); + __ xor3(L2,L4,G1); + __ movxtod(G1,F60); + __ xor3(L3,L5,G1); + __ movxtod(G1,F62); + + __ aes_dround23(F54, F0, F2, F4); + __ aes_dround01(F52, F0, F2, F6); + __ aes_dround23(F54, F60, F62, F58); + __ aes_dround01(F52, F60, F62, F56); + __ aes_dround23(F50, F6, F4, F2); + __ aes_dround01(F48, F6, F4, F0); + __ aes_dround23(F50, F56, F58, F62); + __ aes_dround01(F48, F56, F58, F60); + // save F48:F54 in temp registers + __ movdtox(F54,G2); + __ movdtox(F52,G3); + __ movdtox(F50,G6); + __ movdtox(F48,G1); + for ( int i = 46; i >= 14; i -= 8 ) { + __ aes_dround23(as_FloatRegister(i), F0, F2, F4); + __ aes_dround01(as_FloatRegister(i-2), F0, F2, F6); + __ aes_dround23(as_FloatRegister(i), F60, F62, F58); + __ aes_dround01(as_FloatRegister(i-2), F60, F62, F56); + __ aes_dround23(as_FloatRegister(i-4), F6, F4, F2); + __ aes_dround01(as_FloatRegister(i-6), F6, F4, F0); + __ aes_dround23(as_FloatRegister(i-4), F56, F58, F62); + __ aes_dround01(as_FloatRegister(i-6), F56, F58, F60); + } + // init F48:F54 with F0:F6 values (original key) + __ ldf(FloatRegisterImpl::D, original_key, 0, F48); + __ ldf(FloatRegisterImpl::D, original_key, 8, F50); + __ ldf(FloatRegisterImpl::D, original_key, 16, F52); + __ ldf(FloatRegisterImpl::D, original_key, 24, F54); + __ aes_dround23(F54, F0, F2, F4); + __ aes_dround01(F52, F0, F2, F6); + __ aes_dround23(F54, F60, F62, F58); + __ aes_dround01(F52, F60, F62, F56); + __ aes_dround23_l(F50, F6, F4, F2); + __ aes_dround01_l(F48, F6, F4, F0); + __ aes_dround23_l(F50, F56, F58, F62); + __ aes_dround01_l(F48, F56, F58, F60); + // re-init F48:F54 with their original values + __ movxtod(G2,F54); + __ movxtod(G3,F52); + __ movxtod(G6,F50); + __ movxtod(G1,F48); + + __ movxtod(L0,F6); + __ movxtod(L1,F4); + __ fxor(FloatRegisterImpl::D, F6, F0, F0); + __ fxor(FloatRegisterImpl::D, F4, F2, F2); + + __ stf(FloatRegisterImpl::D, F0, to, 0); + __ stf(FloatRegisterImpl::D, F2, to, 8); + + __ movxtod(G4,F56); + __ movxtod(G5,F58); + __ mov(L4,L0); + __ mov(L5,L1); + __ fxor(FloatRegisterImpl::D, F56, F60, F60); + __ fxor(FloatRegisterImpl::D, F58, F62, F62); + + __ stf(FloatRegisterImpl::D, F60, to, 16); + __ stf(FloatRegisterImpl::D, F62, to, 24); + + __ add(from, 32, from); + __ add(to, 32, to); + __ subcc(len_reg, 32, len_reg); + __ br(Assembler::notEqual, false, Assembler::pt, L_dec_next2_blocks256); + __ delayed()->nop(); + + __ BIND(L_cbcdec_end); + __ stx(L0, rvec, 0); + __ stx(L1, rvec, 8); + __ restore(); + __ mov(L0, O0); + __ retl(); + __ delayed()->nop(); + + return start; + } + void generate_initial() { // Generates all stubs and initializes the entry points @@ -3368,6 +4137,14 @@ generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry, &StubRoutines::_safefetchN_fault_pc, &StubRoutines::_safefetchN_continuation_pc); + + // generate AES intrinsics code + if (UseAESIntrinsics) { + StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock(); + StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock(); + StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt(); + StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel(); + } } diff -r baa7d4400c62 -r ce3b1e29425a src/cpu/sparc/vm/templateInterpreter_sparc.cpp --- a/src/cpu/sparc/vm/templateInterpreter_sparc.cpp Fri Jan 17 18:39:22 2014 +0000 +++ b/src/cpu/sparc/vm/templateInterpreter_sparc.cpp Mon Jan 20 09:56:47 2014 +0000 @@ -156,6 +156,10 @@ address TemplateInterpreterGenerator::generate_return_entry_for(TosState state, int step, size_t index_size) { address entry = __ pc(); + if (state == atos) { + __ profile_return_type(O0, G3_scratch, G1_scratch); + } + #if !defined(_LP64) && defined(COMPILER2) // All return values are where we want them, except for Longs. C2 returns // longs in G1 in the 32-bit build whereas the interpreter wants them in O0/O1. @@ -1333,6 +1337,7 @@ __ movbool(true, G3_scratch); __ stbool(G3_scratch, do_not_unlock_if_synchronized); + __ profile_parameters_type(G1_scratch, G3_scratch, G4_scratch, Lscratch); // increment invocation counter and check for overflow // // Note: checking for negative value instead of overflow diff -r baa7d4400c62 -r ce3b1e29425a src/cpu/sparc/vm/templateTable_sparc.cpp --- a/src/cpu/sparc/vm/templateTable_sparc.cpp Fri Jan 17 18:39:22 2014 +0000 +++ b/src/cpu/sparc/vm/templateTable_sparc.cpp Mon Jan 20 09:56:47 2014 +0000 @@ -2942,12 +2942,12 @@ void TemplateTable::generate_vtable_call(Register Rrecv, Register Rindex, Register Rret) { - Register Rtemp = G4_scratch; Register Rcall = Rindex; assert_different_registers(Rcall, G5_method, Gargs, Rret); // get target Method* & entry point __ lookup_virtual_method(Rrecv, Rindex, G5_method); + __ profile_arguments_type(G5_method, Rcall, Gargs, true); __ call_from_interpreter(Rcall, Gargs, Rret); } @@ -3022,6 +3022,7 @@ __ null_check(O0); __ profile_final_call(O4); + __ profile_arguments_type(G5_method, Rscratch, Gargs, true); // get return address AddressLiteral table(Interpreter::invoke_return_entry_table()); @@ -3051,6 +3052,7 @@ // do the call __ profile_call(O4); + __ profile_arguments_type(G5_method, Rscratch, Gargs, false); __ call_from_interpreter(Rscratch, Gargs, Rret); } @@ -3066,6 +3068,7 @@ // do the call __ profile_call(O4); + __ profile_arguments_type(G5_method, Rscratch, Gargs, false); __ call_from_interpreter(Rscratch, Gargs, Rret); } @@ -3091,6 +3094,7 @@ // do the call - the index (f2) contains the Method* assert_different_registers(G5_method, Gargs, Rcall); __ mov(Rindex, G5_method); + __ profile_arguments_type(G5_method, Rcall, Gargs, true); __ call_from_interpreter(Rcall, Gargs, Rret); __ bind(notFinal); @@ -3197,6 +3201,7 @@ Register Rcall = Rinterface; assert_different_registers(Rcall, G5_method, Gargs, Rret); + __ profile_arguments_type(G5_method, Rcall, Gargs, true); __ call_from_interpreter(Rcall, Gargs, Rret); } @@ -3226,6 +3231,7 @@ // do the call __ verify_oop(G4_mtype); __ profile_final_call(O4); // FIXME: profile the LambdaForm also + __ profile_arguments_type(G5_method, Rscratch, Gargs, true); __ call_from_interpreter(Rscratch, Gargs, Rret); } @@ -3262,6 +3268,7 @@ // do the call __ verify_oop(G4_callsite); + __ profile_arguments_type(G5_method, Rscratch, Gargs, false); __ call_from_interpreter(Rscratch, Gargs, Rret); } diff -r baa7d4400c62 -r ce3b1e29425a src/cpu/sparc/vm/vm_version_sparc.cpp --- a/src/cpu/sparc/vm/vm_version_sparc.cpp Fri Jan 17 18:39:22 2014 +0000 +++ b/src/cpu/sparc/vm/vm_version_sparc.cpp Mon Jan 20 09:56:47 2014 +0000 @@ -234,7 +234,7 @@ assert((OptoLoopAlignment % relocInfo::addr_unit()) == 0, "alignment is not a multiple of NOP size"); char buf[512]; - jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s", + jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", (has_v9() ? ", v9" : (has_v8() ? ", v8" : "")), (has_hardware_popc() ? ", popc" : ""), (has_vis1() ? ", vis1" : ""), @@ -242,6 +242,7 @@ (has_vis3() ? ", vis3" : ""), (has_blk_init() ? ", blk_init" : ""), (has_cbcond() ? ", cbcond" : ""), + (has_aes() ? ", aes" : ""), (is_ultra3() ? ", ultra3" : ""), (is_sun4v() ? ", sun4v" : ""), (is_niagara_plus() ? ", niagara_plus" : (is_niagara() ? ", niagara" : "")), @@ -265,6 +266,41 @@ if (!has_vis1()) // Drop to 0 if no VIS1 support UseVIS = 0; + // T2 and above should have support for AES instructions + if (has_aes()) { + if (UseVIS > 0) { // AES intrinsics use FXOR instruction which is VIS1 + if (FLAG_IS_DEFAULT(UseAES)) { + FLAG_SET_DEFAULT(UseAES, true); + } + if (FLAG_IS_DEFAULT(UseAESIntrinsics)) { + FLAG_SET_DEFAULT(UseAESIntrinsics, true); + } + // we disable both the AES flags if either of them is disabled on the command line + if (!UseAES || !UseAESIntrinsics) { + FLAG_SET_DEFAULT(UseAES, false); + FLAG_SET_DEFAULT(UseAESIntrinsics, false); + } + } else { + if (UseAES || UseAESIntrinsics) { + warning("SPARC AES intrinsics require VIS1 instruction support. Intrinsics will be disabled."); + if (UseAES) { + FLAG_SET_DEFAULT(UseAES, false); + } + if (UseAESIntrinsics) { + FLAG_SET_DEFAULT(UseAESIntrinsics, false); + } + } + } + } else if (UseAES || UseAESIntrinsics) { + warning("AES instructions are not available on this CPU"); + if (UseAES) { + FLAG_SET_DEFAULT(UseAES, false); + } + if (UseAESIntrinsics) { + FLAG_SET_DEFAULT(UseAESIntrinsics, false); + } + } + if (FLAG_IS_DEFAULT(ContendedPaddingWidth) && (cache_line_size > ContendedPaddingWidth)) ContendedPaddingWidth = cache_line_size; diff -r baa7d4400c62 -r ce3b1e29425a src/cpu/sparc/vm/vm_version_sparc.hpp --- a/src/cpu/sparc/vm/vm_version_sparc.hpp Fri Jan 17 18:39:22 2014 +0000 +++ b/src/cpu/sparc/vm/vm_version_sparc.hpp Mon Jan 20 09:56:47 2014 +0000 @@ -48,7 +48,8 @@ sparc64_family = 14, M_family = 15, T_family = 16, - T1_model = 17 + T1_model = 17, + aes_instructions = 18 }; enum Feature_Flag_Set { @@ -73,6 +74,7 @@ M_family_m = 1 << M_family, T_family_m = 1 << T_family, T1_model_m = 1 << T1_model, + aes_instructions_m = 1 << aes_instructions, generic_v8_m = v8_instructions_m | hardware_mul32_m | hardware_div32_m | hardware_fsmuld_m, generic_v9_m = generic_v8_m | v9_instructions_m, @@ -123,6 +125,7 @@ static bool has_vis3() { return (_features & vis3_instructions_m) != 0; } static bool has_blk_init() { return (_features & blk_init_instructions_m) != 0; } static bool has_cbcond() { return (_features & cbcond_instructions_m) != 0; } + static bool has_aes() { return (_features & aes_instructions_m) != 0; } static bool supports_compare_and_exchange() { return has_v9(); } diff -r baa7d4400c62 -r ce3b1e29425a src/cpu/x86/vm/c1_LIRAssembler_x86.cpp --- a/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp Fri Jan 17 18:39:22 2014 +0000 +++ b/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp Mon Jan 20 09:56:47 2014 +0000 @@ -38,6 +38,7 @@ #include "nativeInst_x86.hpp" #include "oops/objArrayKlass.hpp" #include "runtime/sharedRuntime.hpp" +#include "vmreg_x86.inline.hpp" // These masks are used to provide 128-bit aligned bitmasks to the XMM @@ -1006,6 +1007,9 @@ if (UseCompressedOops && !wide) { __ movptr(compressed_src, src->as_register()); __ encode_heap_oop(compressed_src); + if (patch_code != lir_patch_none) { + info->oop_map()->set_narrowoop(compressed_src->as_VMReg()); + } } #endif } diff -r baa7d4400c62 -r ce3b1e29425a src/cpu/x86/vm/c1_LIRGenerator_x86.cpp --- a/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp Fri Jan 17 18:39:22 2014 +0000 +++ b/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp Mon Jan 20 09:56:47 2014 +0000 @@ -941,6 +941,8 @@ case vmIntrinsics::_updateCRC32: { LIRItem crc(x->argument_at(0), this); LIRItem val(x->argument_at(1), this); + // val is destroyed by update_crc32 + val.set_destroys_register(); crc.load_item(); val.load_item(); __ update_crc32(crc.result(), val.result(), result); diff -r baa7d4400c62 -r ce3b1e29425a src/cpu/x86/vm/interp_masm_x86.cpp --- a/src/cpu/x86/vm/interp_masm_x86.cpp Fri Jan 17 18:39:22 2014 +0000 +++ b/src/cpu/x86/vm/interp_masm_x86.cpp Mon Jan 20 09:56:47 2014 +0000 @@ -127,7 +127,7 @@ if (MethodData::profile_return()) { // We're right after the type profile for the last - // argument. tmp is the number of cell left in the + // argument. tmp is the number of cells left in the // CallTypeData/VirtualCallTypeData to reach its end. Non null // if there's a return to profile. assert(ReturnTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type"); @@ -198,7 +198,7 @@ // parameters. Collect profiling from last parameter down. // mdo start + parameters offset + array length - 1 addptr(mdp, tmp1); - movptr(tmp1, Address(mdp, in_bytes(ArrayData::array_len_offset()))); + movptr(tmp1, Address(mdp, ArrayData::array_len_offset())); decrement(tmp1, TypeStackSlotEntries::per_arg_count()); Label loop; diff -r baa7d4400c62 -r ce3b1e29425a src/cpu/x86/vm/stubGenerator_x86_32.cpp --- a/src/cpu/x86/vm/stubGenerator_x86_32.cpp Fri Jan 17 18:39:22 2014 +0000 +++ b/src/cpu/x86/vm/stubGenerator_x86_32.cpp Mon Jan 20 09:56:47 2014 +0000 @@ -2403,6 +2403,9 @@ // c_rarg3 - r vector byte array address // c_rarg4 - input length // + // Output: + // rax - input length + // address generate_cipherBlockChaining_encryptAESCrypt() { assert(UseAES, "need AES instructions and misaligned SSE support"); __ align(CodeEntryAlignment); @@ -2483,7 +2486,7 @@ __ movdqu(Address(rvec, 0), xmm_result); // final value of r stored in rvec of CipherBlockChaining object handleSOERegisters(false /*restoring*/); - __ movl(rax, 0); // return 0 (why?) + __ movptr(rax, len_param); // return length __ leave(); // required for proper stackwalking of RuntimeStub frame __ ret(0); @@ -2557,6 +2560,9 @@ // c_rarg3 - r vector byte array address // c_rarg4 - input length // + // Output: + // rax - input length + // address generate_cipherBlockChaining_decryptAESCrypt() { assert(UseAES, "need AES instructions and misaligned SSE support"); @@ -2650,7 +2656,7 @@ __ movptr(rvec , rvec_param); // restore this since used in loop __ movdqu(Address(rvec, 0), xmm_temp); // final value of r stored in rvec of CipherBlockChaining object handleSOERegisters(false /*restoring*/); - __ movl(rax, 0); // return 0 (why?) + __ movptr(rax, len_param); // return length __ leave(); // required for proper stackwalking of RuntimeStub frame __ ret(0); diff -r baa7d4400c62 -r ce3b1e29425a src/cpu/x86/vm/stubGenerator_x86_64.cpp --- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp Fri Jan 17 18:39:22 2014 +0000 +++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp Mon Jan 20 09:56:47 2014 +0000 @@ -3217,6 +3217,9 @@ // c_rarg3 - r vector byte array address // c_rarg4 - input length // + // Output: + // rax - input length + // address generate_cipherBlockChaining_encryptAESCrypt() { assert(UseAES, "need AES instructions and misaligned SSE support"); __ align(CodeEntryAlignment); @@ -3232,7 +3235,7 @@ #ifndef _WIN64 const Register len_reg = c_rarg4; // src len (must be multiple of blocksize 16) #else - const Address len_mem(rsp, 6 * wordSize); // length is on stack on Win64 + const Address len_mem(rbp, 6 * wordSize); // length is on stack on Win64 const Register len_reg = r10; // pick the first volatile windows register #endif const Register pos = rax; @@ -3259,6 +3262,8 @@ for (int i = 6; i <= XMM_REG_NUM_KEY_LAST; i++) { __ movdqu(xmm_save(i), as_XMMRegister(i)); } +#else + __ push(len_reg); // Save #endif const XMMRegister xmm_key_shuf_mask = xmm_temp; // used temporarily to swap key bytes up front @@ -3301,8 +3306,10 @@ for (int i = 6; i <= XMM_REG_NUM_KEY_LAST; i++) { __ movdqu(as_XMMRegister(i), xmm_save(i)); } + __ movl(rax, len_mem); +#else + __ pop(rax); // return length #endif - __ movl(rax, 0); // return 0 (why?) __ leave(); // required for proper stackwalking of RuntimeStub frame __ ret(0); @@ -3409,6 +3416,9 @@ // c_rarg3 - r vector byte array address // c_rarg4 - input length // + // Output: + // rax - input length + // address generate_cipherBlockChaining_decryptAESCrypt_Parallel() { assert(UseAES, "need AES instructions and misaligned SSE support"); @@ -3427,7 +3437,7 @@ #ifndef _WIN64 const Register len_reg = c_rarg4; // src len (must be multiple of blocksize 16) #else - const Address len_mem(rsp, 6 * wordSize); // length is on stack on Win64 + const Address len_mem(rbp, 6 * wordSize); // length is on stack on Win64 const Register len_reg = r10; // pick the first volatile windows register #endif const Register pos = rax; @@ -3448,7 +3458,10 @@ for (int i = 6; i <= XMM_REG_NUM_KEY_LAST; i++) { __ movdqu(xmm_save(i), as_XMMRegister(i)); } +#else + __ push(len_reg); // Save #endif + // the java expanded key ordering is rotated one position from what we want // so we start from 0x10 here and hit 0x00 last const XMMRegister xmm_key_shuf_mask = xmm1; // used temporarily to swap key bytes up front @@ -3554,8 +3567,10 @@ for (int i = 6; i <= XMM_REG_NUM_KEY_LAST; i++) { __ movdqu(as_XMMRegister(i), xmm_save(i)); } + __ movl(rax, len_mem); +#else + __ pop(rax); // return length #endif - __ movl(rax, 0); // return 0 (why?) __ leave(); // required for proper stackwalking of RuntimeStub frame __ ret(0); diff -r baa7d4400c62 -r ce3b1e29425a src/cpu/x86/vm/x86.ad --- a/src/cpu/x86/vm/x86.ad Fri Jan 17 18:39:22 2014 +0000 +++ b/src/cpu/x86/vm/x86.ad Mon Jan 20 09:56:47 2014 +0000 @@ -581,6 +581,12 @@ return !AlignVector; // can be changed by flag } +// x86 AES instructions are compatible with SunJCE expanded +// keys, hence we do not need to pass the original key to stubs +const bool Matcher::pass_original_key_for_aes() { + return false; +} + // Helper methods for MachSpillCopyNode::implementation(). static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, int src_hi, int dst_hi, uint ireg, outputStream* st) { diff -r baa7d4400c62 -r ce3b1e29425a src/os_cpu/solaris_sparc/vm/vm_version_solaris_sparc.cpp --- a/src/os_cpu/solaris_sparc/vm/vm_version_solaris_sparc.cpp Fri Jan 17 18:39:22 2014 +0000 +++ b/src/os_cpu/solaris_sparc/vm/vm_version_solaris_sparc.cpp Mon Jan 20 09:56:47 2014 +0000 @@ -119,6 +119,11 @@ #endif if (av & AV_SPARC_CBCOND) features |= cbcond_instructions_m; +#ifndef AV_SPARC_AES +#define AV_SPARC_AES 0x00020000 /* aes instrs supported */ +#endif + if (av & AV_SPARC_AES) features |= aes_instructions_m; + } else { // getisax(2) failed, use the old legacy code. #ifndef PRODUCT diff -r baa7d4400c62 -r ce3b1e29425a src/share/vm/c1/c1_LIRGenerator.cpp --- a/src/share/vm/c1/c1_LIRGenerator.cpp Fri Jan 17 18:39:22 2014 +0000 +++ b/src/share/vm/c1/c1_LIRGenerator.cpp Mon Jan 20 09:56:47 2014 +0000 @@ -3288,7 +3288,10 @@ ciSignature* signature_at_call = NULL; x->method()->get_method_at_bci(bci, ignored_will_link, &signature_at_call); - ciKlass* exact = profile_type(md, 0, md->byte_offset_of_slot(data, ret->type_offset()), + // The offset within the MDO of the entry to update may be too large + // to be used in load/store instructions on some platforms. So have + // profile_type() compute the address of the profile in a register. + ciKlass* exact = profile_type(md, md->byte_offset_of_slot(data, ret->type_offset()), 0, ret->type(), x->ret(), mdp, !x->needs_null_check(), signature_at_call->return_type()->as_klass(), diff -r baa7d4400c62 -r ce3b1e29425a src/share/vm/classfile/vmSymbols.hpp --- a/src/share/vm/classfile/vmSymbols.hpp Fri Jan 17 18:39:22 2014 +0000 +++ b/src/share/vm/classfile/vmSymbols.hpp Mon Jan 20 09:56:47 2014 +0000 @@ -787,7 +787,7 @@ do_intrinsic(_cipherBlockChaining_decryptAESCrypt, com_sun_crypto_provider_cipherBlockChaining, decrypt_name, byteArray_int_int_byteArray_int_signature, F_R) \ do_name( encrypt_name, "encrypt") \ do_name( decrypt_name, "decrypt") \ - do_signature(byteArray_int_int_byteArray_int_signature, "([BII[BI)V") \ + do_signature(byteArray_int_int_byteArray_int_signature, "([BII[BI)I") \ \ /* support for java.util.zip */ \ do_class(java_util_zip_CRC32, "java/util/zip/CRC32") \ diff -r baa7d4400c62 -r ce3b1e29425a src/share/vm/code/codeCache.cpp --- a/src/share/vm/code/codeCache.cpp Fri Jan 17 18:39:22 2014 +0000 +++ b/src/share/vm/code/codeCache.cpp Mon Jan 20 09:56:47 2014 +0000 @@ -596,20 +596,13 @@ } #ifndef PRODUCT -// used to keep track of how much time is spent in mark_for_deoptimization +// Keeps track of time spent for checking dependencies static elapsedTimer dependentCheckTime; -static int dependentCheckCount = 0; -#endif // PRODUCT +#endif int CodeCache::mark_for_deoptimization(DepChange& changes) { MutexLockerEx mu(CodeCache_lock, Mutex::_no_safepoint_check_flag); - -#ifndef PRODUCT - dependentCheckTime.start(); - dependentCheckCount++; -#endif // PRODUCT - int number_of_marked_CodeBlobs = 0; // search the hierarchy looking for nmethods which are affected by the loading of this class @@ -617,32 +610,23 @@ // then search the interfaces this class implements looking for nmethods // which might be dependent of the fact that an interface only had one // implementor. - - { No_Safepoint_Verifier nsv; - for (DepChange::ContextStream str(changes, nsv); str.next(); ) { - Klass* d = str.klass(); - number_of_marked_CodeBlobs += InstanceKlass::cast(d)->mark_dependent_nmethods(changes); - } - } - - if (VerifyDependencies) { - // Turn off dependency tracing while actually testing deps. - NOT_PRODUCT( FlagSetting fs(TraceDependencies, false) ); - FOR_ALL_ALIVE_NMETHODS(nm) { - if (!nm->is_marked_for_deoptimization() && - nm->check_all_dependencies()) { - ResourceMark rm; - tty->print_cr("Should have been marked for deoptimization:"); - changes.print(); - nm->print(); - nm->print_dependencies(); - } - } + // nmethod::check_all_dependencies works only correctly, if no safepoint + // can happen + No_Safepoint_Verifier nsv; + for (DepChange::ContextStream str(changes, nsv); str.next(); ) { + Klass* d = str.klass(); + number_of_marked_CodeBlobs += InstanceKlass::cast(d)->mark_dependent_nmethods(changes); } #ifndef PRODUCT - dependentCheckTime.stop(); -#endif // PRODUCT + if (VerifyDependencies) { + // Object pointers are used as unique identifiers for dependency arguments. This + // is only possible if no safepoint, i.e., GC occurs during the verification code. + dependentCheckTime.start(); + nmethod::check_all_dependencies(changes); + dependentCheckTime.stop(); + } +#endif return number_of_marked_CodeBlobs; } @@ -899,9 +883,7 @@ } tty->print_cr("CodeCache:"); - - tty->print_cr("nmethod dependency checking time %f", dependentCheckTime.seconds(), - dependentCheckTime.seconds() / dependentCheckCount); + tty->print_cr("nmethod dependency checking time %fs", dependentCheckTime.seconds()); if (!live.is_empty()) { live.print("live"); diff -r baa7d4400c62 -r ce3b1e29425a src/share/vm/code/dependencies.cpp --- a/src/share/vm/code/dependencies.cpp Fri Jan 17 18:39:22 2014 +0000 +++ b/src/share/vm/code/dependencies.cpp Mon Jan 20 09:56:47 2014 +0000 @@ -678,6 +678,17 @@ return result; } +/** + * Returns a unique identifier for each dependency argument. + */ +uintptr_t Dependencies::DepStream::get_identifier(int i) { + if (has_oop_argument()) { + return (uintptr_t)(oopDesc*)argument_oop(i); + } else { + return (uintptr_t)argument(i); + } +} + oop Dependencies::DepStream::argument_oop(int i) { oop result = recorded_oop_at(argument_index(i)); assert(result == NULL || result->is_oop(), "must be"); @@ -713,6 +724,57 @@ return NULL; } +// ----------------- DependencySignature -------------------------------------- +bool DependencySignature::equals(const DependencySignature& sig) const { + if (type() != sig.type()) { + return false; + } + + if (args_count() != sig.args_count()) { + return false; + } + + for (int i = 0; i < sig.args_count(); i++) { + if (arg(i) != sig.arg(i)) { + return false; + } + } + return true; +} + + +// ----------------- DependencySignatureBuffer -------------------------------------- +DependencySignatureBuffer::DependencySignatureBuffer() { + _signatures = NEW_RESOURCE_ARRAY(GrowableArray*, Dependencies::TYPE_LIMIT); + memset(_signatures, 0, sizeof(DependencySignature*) * Dependencies::TYPE_LIMIT); +} + +/* Check if arguments are identical. Two dependency signatures are considered + * identical, if the type as well as all argument identifiers are identical. + * If the dependency has not already been checked, the dependency signature is + * added to the checked dependencies of the same type. The function returns + * false, which causes the dependency to be checked in the caller. + */ +bool DependencySignatureBuffer::add_if_missing(const DependencySignature& sig) { + const int index = sig.type(); + GrowableArray* buffer = _signatures[index]; + if (buffer == NULL) { + buffer = new GrowableArray(); + _signatures[index] = buffer; + } + + // Check if we have already checked the dependency + for (int i = 0; i < buffer->length(); i++) { + DependencySignature* checked_signature = buffer->at(i); + if (checked_signature->equals(sig)) { + return true; + } + } + buffer->append((DependencySignature*)&sig); + return false; +} + + /// Checking dependencies: // This hierarchy walker inspects subtypes of a given type, diff -r baa7d4400c62 -r ce3b1e29425a src/share/vm/code/dependencies.hpp --- a/src/share/vm/code/dependencies.hpp Fri Jan 17 18:39:22 2014 +0000 +++ b/src/share/vm/code/dependencies.hpp Mon Jan 20 09:56:47 2014 +0000 @@ -480,6 +480,9 @@ bool next(); DepType type() { return _type; } + bool has_oop_argument() { return type() == call_site_target_value; } + uintptr_t get_identifier(int i); + int argument_count() { return dep_args(type()); } int argument_index(int i) { assert(0 <= i && i < argument_count(), "oob"); return _xi[i]; } @@ -523,6 +526,38 @@ }; +class DependencySignature : public ResourceObj { + private: + int _args_count; + uintptr_t _argument_hash[Dependencies::max_arg_count]; + Dependencies::DepType _type; + + + public: + DependencySignature(Dependencies::DepStream& dep) { + _args_count = dep.argument_count(); + _type = dep.type(); + for (int i = 0; i < _args_count; i++) { + _argument_hash[i] = dep.get_identifier(i); + } + } + + bool equals(const DependencySignature& sig) const; + + int args_count() const { return _args_count; } + uintptr_t arg(int idx) const { return _argument_hash[idx]; } + Dependencies::DepType type() const { return _type; } +}; + +class DependencySignatureBuffer : public StackObj { + private: + GrowableArray** _signatures; + + public: + DependencySignatureBuffer(); + bool add_if_missing(const DependencySignature& sig); +}; + // Every particular DepChange is a sub-class of this class. class DepChange : public StackObj { public: diff -r baa7d4400c62 -r ce3b1e29425a src/share/vm/code/nmethod.cpp --- a/src/share/vm/code/nmethod.cpp Fri Jan 17 18:39:22 2014 +0000 +++ b/src/share/vm/code/nmethod.cpp Mon Jan 20 09:56:47 2014 +0000 @@ -2161,16 +2161,41 @@ } -bool nmethod::check_all_dependencies() { - bool found_check = false; - // wholesale check of all dependencies - for (Dependencies::DepStream deps(this); deps.next(); ) { - if (deps.check_dependency() != NULL) { - found_check = true; - NOT_DEBUG(break); +void nmethod::check_all_dependencies(DepChange& changes) { + // Checked dependencies are allocated into this ResourceMark + ResourceMark rm; + + // Turn off dependency tracing while actually testing dependencies. + NOT_PRODUCT( FlagSetting fs(TraceDependencies, false) ); + + // 'dep_signature_buffers' caches already checked dependencies. + DependencySignatureBuffer dep_signature_buffers; + + // Iterate over live nmethods and check dependencies of all nmethods that are not + // marked for deoptimization. A particular dependency is only checked once. + for(nmethod* nm = CodeCache::alive_nmethod(CodeCache::first()); nm != NULL; nm = CodeCache::alive_nmethod(CodeCache::next(nm))) { + if (!nm->is_marked_for_deoptimization()) { + for (Dependencies::DepStream deps(nm); deps.next(); ) { + // Construct abstraction of a dependency. + const DependencySignature* current_sig = new DependencySignature(deps); + // Determine if 'deps' is already checked. If it is not checked, + // 'add_if_missing()' adds the dependency signature and returns + // false. + if (!dep_signature_buffers.add_if_missing(*current_sig)) { + if (deps.check_dependency() != NULL) { + // Dependency checking failed. Print out information about the failed + // dependency and finally fail with an assert. We can fail here, since + // dependency checking is never done in a product build. + ResourceMark rm; + changes.print(); + nm->print(); + nm->print_dependencies(); + assert(false, "Should have been marked for deoptimization"); + } + } + } } } - return found_check; // tell caller if we found anything } bool nmethod::check_dependency_on(DepChange& changes) { diff -r baa7d4400c62 -r ce3b1e29425a src/share/vm/code/nmethod.hpp --- a/src/share/vm/code/nmethod.hpp Fri Jan 17 18:39:22 2014 +0000 +++ b/src/share/vm/code/nmethod.hpp Mon Jan 20 09:56:47 2014 +0000 @@ -679,7 +679,7 @@ // tells if any of this method's dependencies have been invalidated // (this is expensive!) - bool check_all_dependencies(); + static void check_all_dependencies(DepChange& changes); // tells if this compiled method is dependent on the given changes, // and the changes have invalidated it diff -r baa7d4400c62 -r ce3b1e29425a src/share/vm/gc_implementation/g1/satbQueue.cpp --- a/src/share/vm/gc_implementation/g1/satbQueue.cpp Fri Jan 17 18:39:22 2014 +0000 +++ b/src/share/vm/gc_implementation/g1/satbQueue.cpp Mon Jan 20 09:56:47 2014 +0000 @@ -219,58 +219,52 @@ } #ifdef ASSERT -void SATBMarkQueueSet::dump_active_values(JavaThread* first, - bool expected_active) { - gclog_or_tty->print_cr("SATB queue active values for Java Threads"); - gclog_or_tty->print_cr(" SATB queue set: active is %s", - (is_active()) ? "TRUE" : "FALSE"); - gclog_or_tty->print_cr(" expected_active is %s", - (expected_active) ? "TRUE" : "FALSE"); - for (JavaThread* t = first; t; t = t->next()) { - bool active = t->satb_mark_queue().is_active(); - gclog_or_tty->print_cr(" thread %s, active is %s", - t->name(), (active) ? "TRUE" : "FALSE"); +void SATBMarkQueueSet::dump_active_states(bool expected_active) { + gclog_or_tty->print_cr("Expected SATB active state: %s", + expected_active ? "ACTIVE" : "INACTIVE"); + gclog_or_tty->print_cr("Actual SATB active states:"); + gclog_or_tty->print_cr(" Queue set: %s", is_active() ? "ACTIVE" : "INACTIVE"); + for (JavaThread* t = Threads::first(); t; t = t->next()) { + gclog_or_tty->print_cr(" Thread \"%s\" queue: %s", t->name(), + t->satb_mark_queue().is_active() ? "ACTIVE" : "INACTIVE"); + } + gclog_or_tty->print_cr(" Shared queue: %s", + shared_satb_queue()->is_active() ? "ACTIVE" : "INACTIVE"); +} + +void SATBMarkQueueSet::verify_active_states(bool expected_active) { + // Verify queue set state + if (is_active() != expected_active) { + dump_active_states(expected_active); + guarantee(false, "SATB queue set has an unexpected active state"); + } + + // Verify thread queue states + for (JavaThread* t = Threads::first(); t; t = t->next()) { + if (t->satb_mark_queue().is_active() != expected_active) { + dump_active_states(expected_active); + guarantee(false, "Thread SATB queue has an unexpected active state"); + } + } + + // Verify shared queue state + if (shared_satb_queue()->is_active() != expected_active) { + dump_active_states(expected_active); + guarantee(false, "Shared SATB queue has an unexpected active state"); } } #endif // ASSERT -void SATBMarkQueueSet::set_active_all_threads(bool b, - bool expected_active) { +void SATBMarkQueueSet::set_active_all_threads(bool active, bool expected_active) { assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint."); - JavaThread* first = Threads::first(); - #ifdef ASSERT - if (_all_active != expected_active) { - dump_active_values(first, expected_active); - - // I leave this here as a guarantee, instead of an assert, so - // that it will still be compiled in if we choose to uncomment - // the #ifdef ASSERT in a product build. The whole block is - // within an #ifdef ASSERT so the guarantee will not be compiled - // in a product build anyway. - guarantee(false, - "SATB queue set has an unexpected active value"); - } + verify_active_states(expected_active); #endif // ASSERT - _all_active = b; - - for (JavaThread* t = first; t; t = t->next()) { -#ifdef ASSERT - bool active = t->satb_mark_queue().is_active(); - if (active != expected_active) { - dump_active_values(first, expected_active); - - // I leave this here as a guarantee, instead of an assert, so - // that it will still be compiled in if we choose to uncomment - // the #ifdef ASSERT in a product build. The whole block is - // within an #ifdef ASSERT so the guarantee will not be compiled - // in a product build anyway. - guarantee(false, - "thread has an unexpected active value in its SATB queue"); - } -#endif // ASSERT - t->satb_mark_queue().set_active(b); + _all_active = active; + for (JavaThread* t = Threads::first(); t; t = t->next()) { + t->satb_mark_queue().set_active(active); } + shared_satb_queue()->set_active(active); } void SATBMarkQueueSet::filter_thread_buffers() { diff -r baa7d4400c62 -r ce3b1e29425a src/share/vm/gc_implementation/g1/satbQueue.hpp --- a/src/share/vm/gc_implementation/g1/satbQueue.hpp Fri Jan 17 18:39:22 2014 +0000 +++ b/src/share/vm/gc_implementation/g1/satbQueue.hpp Mon Jan 20 09:56:47 2014 +0000 @@ -87,7 +87,8 @@ bool apply_closure_to_completed_buffer_work(bool par, int worker); #ifdef ASSERT - void dump_active_values(JavaThread* first, bool expected_active); + void dump_active_states(bool expected_active); + void verify_active_states(bool expected_active); #endif // ASSERT public: @@ -99,11 +100,11 @@ static void handle_zero_index_for_thread(JavaThread* t); - // Apply "set_active(b)" to all Java threads' SATB queues. It should be + // Apply "set_active(active)" to all SATB queues in the set. It should be // called only with the world stopped. The method will assert that the // SATB queues of all threads it visits, as well as the SATB queue // set itself, has an active value same as expected_active. - void set_active_all_threads(bool b, bool expected_active); + void set_active_all_threads(bool active, bool expected_active); // Filter all the currently-active SATB buffers. void filter_thread_buffers(); diff -r baa7d4400c62 -r ce3b1e29425a src/share/vm/memory/metaspace.cpp --- a/src/share/vm/memory/metaspace.cpp Fri Jan 17 18:39:22 2014 +0000 +++ b/src/share/vm/memory/metaspace.cpp Mon Jan 20 09:56:47 2014 +0000 @@ -1455,9 +1455,10 @@ // No expansion, now see if we want to shrink // We would never want to shrink more than this + assert(capacity_until_GC >= minimum_desired_capacity, + err_msg(SIZE_FORMAT " >= " SIZE_FORMAT, + capacity_until_GC, minimum_desired_capacity)); size_t max_shrink_bytes = capacity_until_GC - minimum_desired_capacity; - assert(max_shrink_bytes >= 0, err_msg("max_shrink_bytes " SIZE_FORMAT, - max_shrink_bytes)); // Should shrinking be considered? if (MaxMetaspaceFreeRatio < 100) { diff -r baa7d4400c62 -r ce3b1e29425a src/share/vm/memory/referenceProcessor.cpp --- a/src/share/vm/memory/referenceProcessor.cpp Fri Jan 17 18:39:22 2014 +0000 +++ b/src/share/vm/memory/referenceProcessor.cpp Mon Jan 20 09:56:47 2014 +0000 @@ -100,7 +100,6 @@ _enqueuing_is_done(false), _is_alive_non_header(is_alive_non_header), _discovered_list_needs_barrier(discovered_list_needs_barrier), - _bs(NULL), _processing_is_mt(mt_processing), _next_id(0) { @@ -126,10 +125,6 @@ _discovered_refs[i].set_length(0); } - // If we do barriers, cache a copy of the barrier set. - if (discovered_list_needs_barrier) { - _bs = Universe::heap()->barrier_set(); - } setup_policy(false /* default soft ref policy */); } @@ -317,13 +312,9 @@ // Enqueue references that are not made active again, and // clear the decks for the next collection (cycle). ref->enqueue_discovered_reflists((HeapWord*)pending_list_addr, task_executor); - // Do the oop-check on pending_list_addr missed in - // enqueue_discovered_reflist. We should probably - // do a raw oop_check so that future such idempotent - // oop_stores relying on the oop-check side-effect - // may be elided automatically and safely without - // affecting correctness. - oop_store(pending_list_addr, oopDesc::load_decode_heap_oop(pending_list_addr)); + // Do the post-barrier on pending_list_addr missed in + // enqueue_discovered_reflist. + oopDesc::bs()->write_ref_field(pending_list_addr, oopDesc::load_decode_heap_oop(pending_list_addr)); // Stop treating discovered references specially. ref->disable_discovery(); @@ -372,15 +363,17 @@ assert(java_lang_ref_Reference::next(obj) == NULL, "Reference not active; should not be discovered"); // Self-loop next, so as to make Ref not active. - java_lang_ref_Reference::set_next(obj, obj); + // Post-barrier not needed when looping to self. + java_lang_ref_Reference::set_next_raw(obj, obj); if (next_d == obj) { // obj is last // Swap refs_list into pendling_list_addr and // set obj's discovered to what we read from pending_list_addr. oop old = oopDesc::atomic_exchange_oop(refs_list.head(), pending_list_addr); - // Need oop_check on pending_list_addr above; - // see special oop-check code at the end of + // Need post-barrier on pending_list_addr above; + // see special post-barrier code at the end of // enqueue_discovered_reflists() further below. - java_lang_ref_Reference::set_discovered(obj, old); // old may be NULL + java_lang_ref_Reference::set_discovered_raw(obj, old); // old may be NULL + oopDesc::bs()->write_ref_field(java_lang_ref_Reference::discovered_addr(obj), old); } } } else { // Old behaviour @@ -516,13 +509,11 @@ // the reference object and will fail // CT verification. if (UseG1GC) { - BarrierSet* bs = oopDesc::bs(); HeapWord* next_addr = java_lang_ref_Reference::next_addr(_ref); - if (UseCompressedOops) { - bs->write_ref_field_pre((narrowOop*)next_addr, NULL); + oopDesc::bs()->write_ref_field_pre((narrowOop*)next_addr, NULL); } else { - bs->write_ref_field_pre((oop*)next_addr, NULL); + oopDesc::bs()->write_ref_field_pre((oop*)next_addr, NULL); } java_lang_ref_Reference::set_next_raw(_ref, NULL); } else { @@ -790,10 +781,9 @@ }; void ReferenceProcessor::set_discovered(oop ref, oop value) { + java_lang_ref_Reference::set_discovered_raw(ref, value); if (_discovered_list_needs_barrier) { - java_lang_ref_Reference::set_discovered(ref, value); - } else { - java_lang_ref_Reference::set_discovered_raw(ref, value); + oopDesc::bs()->write_ref_field(ref, value); } } @@ -1085,7 +1075,7 @@ // so this will expand to nothing. As a result, we have manually // elided this out for G1, but left in the test for some future // collector that might have need for a pre-barrier here, e.g.:- - // _bs->write_ref_field_pre((oop* or narrowOop*)discovered_addr, next_discovered); + // oopDesc::bs()->write_ref_field_pre((oop* or narrowOop*)discovered_addr, next_discovered); assert(!_discovered_list_needs_barrier || UseG1GC, "Need to check non-G1 collector: " "may need a pre-write-barrier for CAS from NULL below"); @@ -1098,7 +1088,7 @@ refs_list.set_head(obj); refs_list.inc_length(1); if (_discovered_list_needs_barrier) { - _bs->write_ref_field((void*)discovered_addr, next_discovered); + oopDesc::bs()->write_ref_field((void*)discovered_addr, next_discovered); } if (TraceReferenceGC) { @@ -1260,13 +1250,13 @@ // As in the case further above, since we are over-writing a NULL // pre-value, we can safely elide the pre-barrier here for the case of G1. - // e.g.:- _bs->write_ref_field_pre((oop* or narrowOop*)discovered_addr, next_discovered); + // e.g.:- oopDesc::bs()->write_ref_field_pre((oop* or narrowOop*)discovered_addr, next_discovered); assert(discovered == NULL, "control point invariant"); assert(!_discovered_list_needs_barrier || UseG1GC, "For non-G1 collector, may need a pre-write-barrier for CAS from NULL below"); oop_store_raw(discovered_addr, next_discovered); if (_discovered_list_needs_barrier) { - _bs->write_ref_field((void*)discovered_addr, next_discovered); + oopDesc::bs()->write_ref_field((void*)discovered_addr, next_discovered); } list->set_head(obj); list->inc_length(1); diff -r baa7d4400c62 -r ce3b1e29425a src/share/vm/memory/referenceProcessor.hpp --- a/src/share/vm/memory/referenceProcessor.hpp Fri Jan 17 18:39:22 2014 +0000 +++ b/src/share/vm/memory/referenceProcessor.hpp Mon Jan 20 09:56:47 2014 +0000 @@ -235,7 +235,6 @@ // discovery.) bool _discovered_list_needs_barrier; - BarrierSet* _bs; // Cached copy of BarrierSet. bool _enqueuing_is_done; // true if all weak references enqueued bool _processing_is_mt; // true during phases when // reference processing is MT. @@ -420,25 +419,6 @@ void update_soft_ref_master_clock(); public: - // constructor - ReferenceProcessor(): - _span((HeapWord*)NULL, (HeapWord*)NULL), - _discovered_refs(NULL), - _discoveredSoftRefs(NULL), _discoveredWeakRefs(NULL), - _discoveredFinalRefs(NULL), _discoveredPhantomRefs(NULL), - _discovering_refs(false), - _discovery_is_atomic(true), - _enqueuing_is_done(false), - _discovery_is_mt(false), - _discovered_list_needs_barrier(false), - _bs(NULL), - _is_alive_non_header(NULL), - _num_q(0), - _max_num_q(0), - _processing_is_mt(false), - _next_id(0) - { } - // Default parameters give you a vanilla reference processor. ReferenceProcessor(MemRegion span, bool mt_processing = false, uint mt_processing_degree = 1, diff -r baa7d4400c62 -r ce3b1e29425a src/share/vm/opto/cfgnode.cpp --- a/src/share/vm/opto/cfgnode.cpp Fri Jan 17 18:39:22 2014 +0000 +++ b/src/share/vm/opto/cfgnode.cpp Mon Jan 20 09:56:47 2014 +0000 @@ -1018,7 +1018,7 @@ !jtkp->klass_is_exact() && // Keep exact interface klass (6894807) ttkp->is_loaded() && !ttkp->klass()->is_interface() ) { assert(ft == ttkp->cast_to_ptr_type(jtkp->ptr()) || - ft->isa_narrowoop() && ft->make_ptr() == ttkp->cast_to_ptr_type(jtkp->ptr()), ""); + ft->isa_narrowklass() && ft->make_ptr() == ttkp->cast_to_ptr_type(jtkp->ptr()), ""); jt = ft; } } diff -r baa7d4400c62 -r ce3b1e29425a src/share/vm/opto/library_call.cpp --- a/src/share/vm/opto/library_call.cpp Fri Jan 17 18:39:22 2014 +0000 +++ b/src/share/vm/opto/library_call.cpp Mon Jan 20 09:56:47 2014 +0000 @@ -304,6 +304,7 @@ bool inline_cipherBlockChaining_AESCrypt(vmIntrinsics::ID id); Node* inline_cipherBlockChaining_AESCrypt_predicate(bool decrypting); Node* get_key_start_from_aescrypt_object(Node* aescrypt_object); + Node* get_original_key_start_from_aescrypt_object(Node* aescrypt_object); bool inline_encodeISOArray(); bool inline_updateCRC32(); bool inline_updateBytesCRC32(); @@ -5936,10 +5937,22 @@ Node* k_start = get_key_start_from_aescrypt_object(aescrypt_object); if (k_start == NULL) return false; - // Call the stub. - make_runtime_call(RC_LEAF|RC_NO_FP, OptoRuntime::aescrypt_block_Type(), - stubAddr, stubName, TypePtr::BOTTOM, - src_start, dest_start, k_start); + if (Matcher::pass_original_key_for_aes()) { + // on SPARC we need to pass the original key since key expansion needs to happen in intrinsics due to + // compatibility issues between Java key expansion and SPARC crypto instructions + Node* original_k_start = get_original_key_start_from_aescrypt_object(aescrypt_object); + if (original_k_start == NULL) return false; + + // Call the stub. + make_runtime_call(RC_LEAF|RC_NO_FP, OptoRuntime::aescrypt_block_Type(), + stubAddr, stubName, TypePtr::BOTTOM, + src_start, dest_start, k_start, original_k_start); + } else { + // Call the stub. + make_runtime_call(RC_LEAF|RC_NO_FP, OptoRuntime::aescrypt_block_Type(), + stubAddr, stubName, TypePtr::BOTTOM, + src_start, dest_start, k_start); + } return true; } @@ -6017,14 +6030,29 @@ if (objRvec == NULL) return false; Node* r_start = array_element_address(objRvec, intcon(0), T_BYTE); - // Call the stub, passing src_start, dest_start, k_start, r_start and src_len - make_runtime_call(RC_LEAF|RC_NO_FP, - OptoRuntime::cipherBlockChaining_aescrypt_Type(), - stubAddr, stubName, TypePtr::BOTTOM, - src_start, dest_start, k_start, r_start, len); - - // return is void so no result needs to be pushed - + Node* cbcCrypt; + if (Matcher::pass_original_key_for_aes()) { + // on SPARC we need to pass the original key since key expansion needs to happen in intrinsics due to + // compatibility issues between Java key expansion and SPARC crypto instructions + Node* original_k_start = get_original_key_start_from_aescrypt_object(aescrypt_object); + if (original_k_start == NULL) return false; + + // Call the stub, passing src_start, dest_start, k_start, r_start, src_len and original_k_start + cbcCrypt = make_runtime_call(RC_LEAF|RC_NO_FP, + OptoRuntime::cipherBlockChaining_aescrypt_Type(), + stubAddr, stubName, TypePtr::BOTTOM, + src_start, dest_start, k_start, r_start, len, original_k_start); + } else { + // Call the stub, passing src_start, dest_start, k_start, r_start and src_len + cbcCrypt = make_runtime_call(RC_LEAF|RC_NO_FP, + OptoRuntime::cipherBlockChaining_aescrypt_Type(), + stubAddr, stubName, TypePtr::BOTTOM, + src_start, dest_start, k_start, r_start, len); + } + + // return cipher length (int) + Node* retvalue = _gvn.transform(new (C) ProjNode(cbcCrypt, TypeFunc::Parms)); + set_result(retvalue); return true; } @@ -6039,6 +6067,17 @@ return k_start; } +//------------------------------get_original_key_start_from_aescrypt_object----------------------- +Node * LibraryCallKit::get_original_key_start_from_aescrypt_object(Node *aescrypt_object) { + Node* objAESCryptKey = load_field_from_object(aescrypt_object, "lastKey", "[B", /*is_exact*/ false); + assert (objAESCryptKey != NULL, "wrong version of com.sun.crypto.provider.AESCrypt"); + if (objAESCryptKey == NULL) return (Node *) NULL; + + // now have the array, need to get the start address of the lastKey array + Node* original_k_start = array_element_address(objAESCryptKey, intcon(0), T_BYTE); + return original_k_start; +} + //----------------------------inline_cipherBlockChaining_AESCrypt_predicate---------------------------- // Return node representing slow path of predicate check. // the pseudo code we want to emulate with this predicate is: diff -r baa7d4400c62 -r ce3b1e29425a src/share/vm/opto/matcher.hpp --- a/src/share/vm/opto/matcher.hpp Fri Jan 17 18:39:22 2014 +0000 +++ b/src/share/vm/opto/matcher.hpp Mon Jan 20 09:56:47 2014 +0000 @@ -286,6 +286,9 @@ // CPU supports misaligned vectors store/load. static const bool misaligned_vectors_ok(); + // Should original key array reference be passed to AES stubs + static const bool pass_original_key_for_aes(); + // Used to determine a "low complexity" 64-bit constant. (Zero is simple.) // The standard of comparison is one (StoreL ConL) vs. two (StoreI ConI). // Depends on the details of 64-bit constant generation on the CPU. diff -r baa7d4400c62 -r ce3b1e29425a src/share/vm/opto/runtime.cpp --- a/src/share/vm/opto/runtime.cpp Fri Jan 17 18:39:22 2014 +0000 +++ b/src/share/vm/opto/runtime.cpp Mon Jan 20 09:56:47 2014 +0000 @@ -814,12 +814,18 @@ const TypeFunc* OptoRuntime::aescrypt_block_Type() { // create input type (domain) int num_args = 3; + if (Matcher::pass_original_key_for_aes()) { + num_args = 4; + } int argcnt = num_args; const Type** fields = TypeTuple::fields(argcnt); int argp = TypeFunc::Parms; fields[argp++] = TypePtr::NOTNULL; // src fields[argp++] = TypePtr::NOTNULL; // dest fields[argp++] = TypePtr::NOTNULL; // k array + if (Matcher::pass_original_key_for_aes()) { + fields[argp++] = TypePtr::NOTNULL; // original k array + } assert(argp == TypeFunc::Parms+argcnt, "correct decoding"); const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields); @@ -856,6 +862,9 @@ const TypeFunc* OptoRuntime::cipherBlockChaining_aescrypt_Type() { // create input type (domain) int num_args = 5; + if (Matcher::pass_original_key_for_aes()) { + num_args = 6; + } int argcnt = num_args; const Type** fields = TypeTuple::fields(argcnt); int argp = TypeFunc::Parms; @@ -864,13 +873,16 @@ fields[argp++] = TypePtr::NOTNULL; // k array fields[argp++] = TypePtr::NOTNULL; // r array fields[argp++] = TypeInt::INT; // src len + if (Matcher::pass_original_key_for_aes()) { + fields[argp++] = TypePtr::NOTNULL; // original k array + } assert(argp == TypeFunc::Parms+argcnt, "correct decoding"); const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields); - // no result type needed + // returning cipher len (int) fields = TypeTuple::fields(1); - fields[TypeFunc::Parms+0] = NULL; // void - const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields); + fields[TypeFunc::Parms+0] = TypeInt::INT; + const TypeTuple* range = TypeTuple::make(TypeFunc::Parms+1, fields); return TypeFunc::make(domain, range); } diff -r baa7d4400c62 -r ce3b1e29425a src/share/vm/prims/jvmtiRedefineClasses.cpp --- a/src/share/vm/prims/jvmtiRedefineClasses.cpp Fri Jan 17 18:39:22 2014 +0000 +++ b/src/share/vm/prims/jvmtiRedefineClasses.cpp Mon Jan 20 09:56:47 2014 +0000 @@ -147,6 +147,9 @@ _scratch_classes[i] = NULL; } + // Disable any dependent concurrent compilations + SystemDictionary::notice_modification(); + // Set flag indicating that some invariants are no longer true. // See jvmtiExport.hpp for detailed explanation. JvmtiExport::set_has_redefined_a_class(); diff -r baa7d4400c62 -r ce3b1e29425a src/share/vm/runtime/arguments.cpp --- a/src/share/vm/runtime/arguments.cpp Fri Jan 17 18:39:22 2014 +0000 +++ b/src/share/vm/runtime/arguments.cpp Mon Jan 20 09:56:47 2014 +0000 @@ -3727,10 +3727,6 @@ // Doing the replace in parent maps helps speculation FLAG_SET_DEFAULT(ReplaceInParentMaps, true); } -#ifndef X86 - // Only on x86 for now - FLAG_SET_DEFAULT(TypeProfileLevel, 0); -#endif #endif if (PrintAssembly && FLAG_IS_DEFAULT(DebugNonSafepoints)) { diff -r baa7d4400c62 -r ce3b1e29425a test/compiler/7184394/TestAESMain.java --- a/test/compiler/7184394/TestAESMain.java Fri Jan 17 18:39:22 2014 +0000 +++ b/test/compiler/7184394/TestAESMain.java Mon Jan 20 09:56:47 2014 +0000 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, 2014 Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -39,20 +39,32 @@ System.out.println(iters + " iterations"); TestAESEncode etest = new TestAESEncode(); etest.prepare(); + // warm-up for 20K iterations + System.out.println("Starting encryption warm-up"); + for (int i=0; i<20000; i++) { + etest.run(); + } + System.out.println("Finished encryption warm-up"); long start = System.nanoTime(); for (int i=0; i