# HG changeset patch # User ctornqvi # Date 1410370580 0 # Node ID 64b480f9eb1a761d8fe30f397fb4f365f41a651f # Parent 4edd7572c235a6c97078676010bf9f6c385a4646# Parent 42460b71ba70cfce9e526ed2ce9fd9fff44b0740 Merge diff -r 4edd7572c235 -r 64b480f9eb1a src/cpu/x86/vm/assembler_x86.cpp --- a/src/cpu/x86/vm/assembler_x86.cpp Tue Sep 09 09:48:42 2014 -0700 +++ b/src/cpu/x86/vm/assembler_x86.cpp Wed Sep 10 17:36:20 2014 +0000 @@ -4937,6 +4937,26 @@ emit_arith(0x03, 0xC0, dst, src); } +void Assembler::adcxq(Register dst, Register src) { + //assert(VM_Version::supports_adx(), "adx instructions not supported"); + emit_int8((unsigned char)0x66); + int encode = prefixq_and_encode(dst->encoding(), src->encoding()); + emit_int8(0x0F); + emit_int8(0x38); + emit_int8((unsigned char)0xF6); + emit_int8((unsigned char)(0xC0 | encode)); +} + +void Assembler::adoxq(Register dst, Register src) { + //assert(VM_Version::supports_adx(), "adx instructions not supported"); + emit_int8((unsigned char)0xF3); + int encode = prefixq_and_encode(dst->encoding(), src->encoding()); + emit_int8(0x0F); + emit_int8(0x38); + emit_int8((unsigned char)0xF6); + emit_int8((unsigned char)(0xC0 | encode)); +} + void Assembler::andq(Address dst, int32_t imm32) { InstructionMark im(this); prefixq(dst); @@ -5444,6 +5464,26 @@ emit_int8((unsigned char)(0xC0 | encode)); } +void Assembler::mulq(Address src) { + InstructionMark im(this); + prefixq(src); + emit_int8((unsigned char)0xF7); + emit_operand(rsp, src); +} + +void Assembler::mulq(Register src) { + int encode = prefixq_and_encode(src->encoding()); + emit_int8((unsigned char)0xF7); + emit_int8((unsigned char)(0xE0 | encode)); +} + +void Assembler::mulxq(Register dst1, Register dst2, Register src) { + assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported"); + int encode = vex_prefix_and_encode(dst1->encoding(), dst2->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_38, true, false); + emit_int8((unsigned char)0xF6); + emit_int8((unsigned char)(0xC0 | encode)); +} + void Assembler::negq(Register dst) { int encode = prefixq_and_encode(dst->encoding()); emit_int8((unsigned char)0xF7); @@ -5572,6 +5612,28 @@ emit_int8(imm8); } } + +void Assembler::rorq(Register dst, int imm8) { + assert(isShiftCount(imm8 >> 1), "illegal shift count"); + int encode = prefixq_and_encode(dst->encoding()); + if (imm8 == 1) { + emit_int8((unsigned char)0xD1); + emit_int8((unsigned char)(0xC8 | encode)); + } else { + emit_int8((unsigned char)0xC1); + emit_int8((unsigned char)(0xc8 | encode)); + emit_int8(imm8); + } +} + +void Assembler::rorxq(Register dst, Register src, int imm8) { + assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported"); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_3A, true, false); + emit_int8((unsigned char)0xF0); + emit_int8((unsigned char)(0xC0 | encode)); + emit_int8(imm8); +} + void Assembler::sarq(Register dst, int imm8) { assert(isShiftCount(imm8 >> 1), "illegal shift count"); int encode = prefixq_and_encode(dst->encoding()); diff -r 4edd7572c235 -r 64b480f9eb1a src/cpu/x86/vm/assembler_x86.hpp --- a/src/cpu/x86/vm/assembler_x86.hpp Tue Sep 09 09:48:42 2014 -0700 +++ b/src/cpu/x86/vm/assembler_x86.hpp Wed Sep 10 17:36:20 2014 +0000 @@ -888,6 +888,14 @@ void addq(Register dst, Address src); void addq(Register dst, Register src); +#ifdef _LP64 + //Add Unsigned Integers with Carry Flag + void adcxq(Register dst, Register src); + + //Add Unsigned Integers with Overflow Flag + void adoxq(Register dst, Register src); +#endif + void addr_nop_4(); void addr_nop_5(); void addr_nop_7(); @@ -1204,19 +1212,20 @@ void idivl(Register src); void divl(Register src); // Unsigned division +#ifdef _LP64 void idivq(Register src); +#endif void imull(Register dst, Register src); void imull(Register dst, Register src, int value); void imull(Register dst, Address src); +#ifdef _LP64 void imulq(Register dst, Register src); void imulq(Register dst, Register src, int value); -#ifdef _LP64 void imulq(Register dst, Address src); #endif - // jcc is the generic conditional branch generator to run- // time routines, jcc is used for branches to labels. jcc // takes a branch opcode (cc) and a label (L) and generates @@ -1408,9 +1417,16 @@ void movzwq(Register dst, Register src); #endif + // Unsigned multiply with RAX destination register void mull(Address src); void mull(Register src); +#ifdef _LP64 + void mulq(Address src); + void mulq(Register src); + void mulxq(Register dst1, Register dst2, Register src); +#endif + // Multiply Scalar Double-Precision Floating-Point Values void mulsd(XMMRegister dst, Address src); void mulsd(XMMRegister dst, XMMRegister src); @@ -1541,6 +1557,11 @@ void ret(int imm16); +#ifdef _LP64 + void rorq(Register dst, int imm8); + void rorxq(Register dst, Register src, int imm8); +#endif + void sahf(); void sarl(Register dst, int imm8); diff -r 4edd7572c235 -r 64b480f9eb1a src/cpu/x86/vm/globals_x86.hpp --- a/src/cpu/x86/vm/globals_x86.hpp Tue Sep 09 09:48:42 2014 -0700 +++ b/src/cpu/x86/vm/globals_x86.hpp Wed Sep 10 17:36:20 2014 +0000 @@ -176,6 +176,8 @@ "Use count trailing zeros instruction") \ \ product(bool, UseBMI1Instructions, false, \ - "Use BMI instructions") - + "Use BMI1 instructions") \ + \ + product(bool, UseBMI2Instructions, false, \ + "Use BMI2 instructions") #endif // CPU_X86_VM_GLOBALS_X86_HPP diff -r 4edd7572c235 -r 64b480f9eb1a src/cpu/x86/vm/macroAssembler_x86.cpp --- a/src/cpu/x86/vm/macroAssembler_x86.cpp Tue Sep 09 09:48:42 2014 -0700 +++ b/src/cpu/x86/vm/macroAssembler_x86.cpp Wed Sep 10 17:36:20 2014 +0000 @@ -7293,6 +7293,467 @@ bind(L_done); } +#ifdef _LP64 +/** + * Helper for multiply_to_len(). + */ +void MacroAssembler::add2_with_carry(Register dest_hi, Register dest_lo, Register src1, Register src2) { + addq(dest_lo, src1); + adcq(dest_hi, 0); + addq(dest_lo, src2); + adcq(dest_hi, 0); +} + +/** + * Multiply 64 bit by 64 bit first loop. + */ +void MacroAssembler::multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart, + Register y, Register y_idx, Register z, + Register carry, Register product, + Register idx, Register kdx) { + // + // jlong carry, x[], y[], z[]; + // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx-, kdx--) { + // huge_128 product = y[idx] * x[xstart] + carry; + // z[kdx] = (jlong)product; + // carry = (jlong)(product >>> 64); + // } + // z[xstart] = carry; + // + + Label L_first_loop, L_first_loop_exit; + Label L_one_x, L_one_y, L_multiply; + + decrementl(xstart); + jcc(Assembler::negative, L_one_x); + + movq(x_xstart, Address(x, xstart, Address::times_4, 0)); + rorq(x_xstart, 32); // convert big-endian to little-endian + + bind(L_first_loop); + decrementl(idx); + jcc(Assembler::negative, L_first_loop_exit); + decrementl(idx); + jcc(Assembler::negative, L_one_y); + movq(y_idx, Address(y, idx, Address::times_4, 0)); + rorq(y_idx, 32); // convert big-endian to little-endian + bind(L_multiply); + movq(product, x_xstart); + mulq(y_idx); // product(rax) * y_idx -> rdx:rax + addq(product, carry); + adcq(rdx, 0); + subl(kdx, 2); + movl(Address(z, kdx, Address::times_4, 4), product); + shrq(product, 32); + movl(Address(z, kdx, Address::times_4, 0), product); + movq(carry, rdx); + jmp(L_first_loop); + + bind(L_one_y); + movl(y_idx, Address(y, 0)); + jmp(L_multiply); + + bind(L_one_x); + movl(x_xstart, Address(x, 0)); + jmp(L_first_loop); + + bind(L_first_loop_exit); +} + +/** + * Multiply 64 bit by 64 bit and add 128 bit. + */ +void MacroAssembler::multiply_add_128_x_128(Register x_xstart, Register y, Register z, + Register yz_idx, Register idx, + Register carry, Register product, int offset) { + // huge_128 product = (y[idx] * x_xstart) + z[kdx] + carry; + // z[kdx] = (jlong)product; + + movq(yz_idx, Address(y, idx, Address::times_4, offset)); + rorq(yz_idx, 32); // convert big-endian to little-endian + movq(product, x_xstart); + mulq(yz_idx); // product(rax) * yz_idx -> rdx:product(rax) + movq(yz_idx, Address(z, idx, Address::times_4, offset)); + rorq(yz_idx, 32); // convert big-endian to little-endian + + add2_with_carry(rdx, product, carry, yz_idx); + + movl(Address(z, idx, Address::times_4, offset+4), product); + shrq(product, 32); + movl(Address(z, idx, Address::times_4, offset), product); + +} + +/** + * Multiply 128 bit by 128 bit. Unrolled inner loop. + */ +void MacroAssembler::multiply_128_x_128_loop(Register x_xstart, Register y, Register z, + Register yz_idx, Register idx, Register jdx, + Register carry, Register product, + Register carry2) { + // jlong carry, x[], y[], z[]; + // int kdx = ystart+1; + // for (int idx=ystart-2; idx >= 0; idx -= 2) { // Third loop + // huge_128 product = (y[idx+1] * x_xstart) + z[kdx+idx+1] + carry; + // z[kdx+idx+1] = (jlong)product; + // jlong carry2 = (jlong)(product >>> 64); + // product = (y[idx] * x_xstart) + z[kdx+idx] + carry2; + // z[kdx+idx] = (jlong)product; + // carry = (jlong)(product >>> 64); + // } + // idx += 2; + // if (idx > 0) { + // product = (y[idx] * x_xstart) + z[kdx+idx] + carry; + // z[kdx+idx] = (jlong)product; + // carry = (jlong)(product >>> 64); + // } + // + + Label L_third_loop, L_third_loop_exit, L_post_third_loop_done; + + movl(jdx, idx); + andl(jdx, 0xFFFFFFFC); + shrl(jdx, 2); + + bind(L_third_loop); + subl(jdx, 1); + jcc(Assembler::negative, L_third_loop_exit); + subl(idx, 4); + + multiply_add_128_x_128(x_xstart, y, z, yz_idx, idx, carry, product, 8); + movq(carry2, rdx); + + multiply_add_128_x_128(x_xstart, y, z, yz_idx, idx, carry2, product, 0); + movq(carry, rdx); + jmp(L_third_loop); + + bind (L_third_loop_exit); + + andl (idx, 0x3); + jcc(Assembler::zero, L_post_third_loop_done); + + Label L_check_1; + subl(idx, 2); + jcc(Assembler::negative, L_check_1); + + multiply_add_128_x_128(x_xstart, y, z, yz_idx, idx, carry, product, 0); + movq(carry, rdx); + + bind (L_check_1); + addl (idx, 0x2); + andl (idx, 0x1); + subl(idx, 1); + jcc(Assembler::negative, L_post_third_loop_done); + + movl(yz_idx, Address(y, idx, Address::times_4, 0)); + movq(product, x_xstart); + mulq(yz_idx); // product(rax) * yz_idx -> rdx:product(rax) + movl(yz_idx, Address(z, idx, Address::times_4, 0)); + + add2_with_carry(rdx, product, yz_idx, carry); + + movl(Address(z, idx, Address::times_4, 0), product); + shrq(product, 32); + + shlq(rdx, 32); + orq(product, rdx); + movq(carry, product); + + bind(L_post_third_loop_done); +} + +/** + * Multiply 128 bit by 128 bit using BMI2. Unrolled inner loop. + * + */ +void MacroAssembler::multiply_128_x_128_bmi2_loop(Register y, Register z, + Register carry, Register carry2, + Register idx, Register jdx, + Register yz_idx1, Register yz_idx2, + Register tmp, Register tmp3, Register tmp4) { + assert(UseBMI2Instructions, "should be used only when BMI2 is available"); + + // jlong carry, x[], y[], z[]; + // int kdx = ystart+1; + // for (int idx=ystart-2; idx >= 0; idx -= 2) { // Third loop + // huge_128 tmp3 = (y[idx+1] * rdx) + z[kdx+idx+1] + carry; + // jlong carry2 = (jlong)(tmp3 >>> 64); + // huge_128 tmp4 = (y[idx] * rdx) + z[kdx+idx] + carry2; + // carry = (jlong)(tmp4 >>> 64); + // z[kdx+idx+1] = (jlong)tmp3; + // z[kdx+idx] = (jlong)tmp4; + // } + // idx += 2; + // if (idx > 0) { + // yz_idx1 = (y[idx] * rdx) + z[kdx+idx] + carry; + // z[kdx+idx] = (jlong)yz_idx1; + // carry = (jlong)(yz_idx1 >>> 64); + // } + // + + Label L_third_loop, L_third_loop_exit, L_post_third_loop_done; + + movl(jdx, idx); + andl(jdx, 0xFFFFFFFC); + shrl(jdx, 2); + + bind(L_third_loop); + subl(jdx, 1); + jcc(Assembler::negative, L_third_loop_exit); + subl(idx, 4); + + movq(yz_idx1, Address(y, idx, Address::times_4, 8)); + rorxq(yz_idx1, yz_idx1, 32); // convert big-endian to little-endian + movq(yz_idx2, Address(y, idx, Address::times_4, 0)); + rorxq(yz_idx2, yz_idx2, 32); + + mulxq(tmp4, tmp3, yz_idx1); // yz_idx1 * rdx -> tmp4:tmp3 + mulxq(carry2, tmp, yz_idx2); // yz_idx2 * rdx -> carry2:tmp + + movq(yz_idx1, Address(z, idx, Address::times_4, 8)); + rorxq(yz_idx1, yz_idx1, 32); + movq(yz_idx2, Address(z, idx, Address::times_4, 0)); + rorxq(yz_idx2, yz_idx2, 32); + + if (VM_Version::supports_adx()) { + adcxq(tmp3, carry); + adoxq(tmp3, yz_idx1); + + adcxq(tmp4, tmp); + adoxq(tmp4, yz_idx2); + + movl(carry, 0); // does not affect flags + adcxq(carry2, carry); + adoxq(carry2, carry); + } else { + add2_with_carry(tmp4, tmp3, carry, yz_idx1); + add2_with_carry(carry2, tmp4, tmp, yz_idx2); + } + movq(carry, carry2); + + movl(Address(z, idx, Address::times_4, 12), tmp3); + shrq(tmp3, 32); + movl(Address(z, idx, Address::times_4, 8), tmp3); + + movl(Address(z, idx, Address::times_4, 4), tmp4); + shrq(tmp4, 32); + movl(Address(z, idx, Address::times_4, 0), tmp4); + + jmp(L_third_loop); + + bind (L_third_loop_exit); + + andl (idx, 0x3); + jcc(Assembler::zero, L_post_third_loop_done); + + Label L_check_1; + subl(idx, 2); + jcc(Assembler::negative, L_check_1); + + movq(yz_idx1, Address(y, idx, Address::times_4, 0)); + rorxq(yz_idx1, yz_idx1, 32); + mulxq(tmp4, tmp3, yz_idx1); // yz_idx1 * rdx -> tmp4:tmp3 + movq(yz_idx2, Address(z, idx, Address::times_4, 0)); + rorxq(yz_idx2, yz_idx2, 32); + + add2_with_carry(tmp4, tmp3, carry, yz_idx2); + + movl(Address(z, idx, Address::times_4, 4), tmp3); + shrq(tmp3, 32); + movl(Address(z, idx, Address::times_4, 0), tmp3); + movq(carry, tmp4); + + bind (L_check_1); + addl (idx, 0x2); + andl (idx, 0x1); + subl(idx, 1); + jcc(Assembler::negative, L_post_third_loop_done); + movl(tmp4, Address(y, idx, Address::times_4, 0)); + mulxq(carry2, tmp3, tmp4); // tmp4 * rdx -> carry2:tmp3 + movl(tmp4, Address(z, idx, Address::times_4, 0)); + + add2_with_carry(carry2, tmp3, tmp4, carry); + + movl(Address(z, idx, Address::times_4, 0), tmp3); + shrq(tmp3, 32); + + shlq(carry2, 32); + orq(tmp3, carry2); + movq(carry, tmp3); + + bind(L_post_third_loop_done); +} + +/** + * Code for BigInteger::multiplyToLen() instrinsic. + * + * rdi: x + * rax: xlen + * rsi: y + * rcx: ylen + * r8: z + * r11: zlen + * r12: tmp1 + * r13: tmp2 + * r14: tmp3 + * r15: tmp4 + * rbx: tmp5 + * + */ +void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Register ylen, Register z, Register zlen, + Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5) { + ShortBranchVerifier sbv(this); + assert_different_registers(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, rdx); + + push(tmp1); + push(tmp2); + push(tmp3); + push(tmp4); + push(tmp5); + + push(xlen); + push(zlen); + + const Register idx = tmp1; + const Register kdx = tmp2; + const Register xstart = tmp3; + + const Register y_idx = tmp4; + const Register carry = tmp5; + const Register product = xlen; + const Register x_xstart = zlen; // reuse register + + // First Loop. + // + // final static long LONG_MASK = 0xffffffffL; + // int xstart = xlen - 1; + // int ystart = ylen - 1; + // long carry = 0; + // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx-, kdx--) { + // long product = (y[idx] & LONG_MASK) * (x[xstart] & LONG_MASK) + carry; + // z[kdx] = (int)product; + // carry = product >>> 32; + // } + // z[xstart] = (int)carry; + // + + movl(idx, ylen); // idx = ylen; + movl(kdx, zlen); // kdx = xlen+ylen; + xorq(carry, carry); // carry = 0; + + Label L_done; + + movl(xstart, xlen); + decrementl(xstart); + jcc(Assembler::negative, L_done); + + multiply_64_x_64_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx); + + Label L_second_loop; + testl(kdx, kdx); + jcc(Assembler::zero, L_second_loop); + + Label L_carry; + subl(kdx, 1); + jcc(Assembler::zero, L_carry); + + movl(Address(z, kdx, Address::times_4, 0), carry); + shrq(carry, 32); + subl(kdx, 1); + + bind(L_carry); + movl(Address(z, kdx, Address::times_4, 0), carry); + + // Second and third (nested) loops. + // + // for (int i = xstart-1; i >= 0; i--) { // Second loop + // carry = 0; + // for (int jdx=ystart, k=ystart+1+i; jdx >= 0; jdx--, k--) { // Third loop + // long product = (y[jdx] & LONG_MASK) * (x[i] & LONG_MASK) + + // (z[k] & LONG_MASK) + carry; + // z[k] = (int)product; + // carry = product >>> 32; + // } + // z[i] = (int)carry; + // } + // + // i = xlen, j = tmp1, k = tmp2, carry = tmp5, x[i] = rdx + + const Register jdx = tmp1; + + bind(L_second_loop); + xorl(carry, carry); // carry = 0; + movl(jdx, ylen); // j = ystart+1 + + subl(xstart, 1); // i = xstart-1; + jcc(Assembler::negative, L_done); + + push (z); + + Label L_last_x; + lea(z, Address(z, xstart, Address::times_4, 4)); // z = z + k - j + subl(xstart, 1); // i = xstart-1; + jcc(Assembler::negative, L_last_x); + + if (UseBMI2Instructions) { + movq(rdx, Address(x, xstart, Address::times_4, 0)); + rorxq(rdx, rdx, 32); // convert big-endian to little-endian + } else { + movq(x_xstart, Address(x, xstart, Address::times_4, 0)); + rorq(x_xstart, 32); // convert big-endian to little-endian + } + + Label L_third_loop_prologue; + bind(L_third_loop_prologue); + + push (x); + push (xstart); + push (ylen); + + + if (UseBMI2Instructions) { + multiply_128_x_128_bmi2_loop(y, z, carry, x, jdx, ylen, product, tmp2, x_xstart, tmp3, tmp4); + } else { // !UseBMI2Instructions + multiply_128_x_128_loop(x_xstart, y, z, y_idx, jdx, ylen, carry, product, x); + } + + pop(ylen); + pop(xlen); + pop(x); + pop(z); + + movl(tmp3, xlen); + addl(tmp3, 1); + movl(Address(z, tmp3, Address::times_4, 0), carry); + subl(tmp3, 1); + jccb(Assembler::negative, L_done); + + shrq(carry, 32); + movl(Address(z, tmp3, Address::times_4, 0), carry); + jmp(L_second_loop); + + // Next infrequent code is moved outside loops. + bind(L_last_x); + if (UseBMI2Instructions) { + movl(rdx, Address(x, 0)); + } else { + movl(x_xstart, Address(x, 0)); + } + jmp(L_third_loop_prologue); + + bind(L_done); + + pop(zlen); + pop(xlen); + + pop(tmp5); + pop(tmp4); + pop(tmp3); + pop(tmp2); + pop(tmp1); +} +#endif + /** * Emits code to update CRC-32 with a byte value according to constants in table * diff -r 4edd7572c235 -r 64b480f9eb1a src/cpu/x86/vm/macroAssembler_x86.hpp --- a/src/cpu/x86/vm/macroAssembler_x86.hpp Tue Sep 09 09:48:42 2014 -0700 +++ b/src/cpu/x86/vm/macroAssembler_x86.hpp Wed Sep 10 17:36:20 2014 +0000 @@ -1221,6 +1221,28 @@ XMMRegister tmp1, XMMRegister tmp2, XMMRegister tmp3, XMMRegister tmp4, Register tmp5, Register result); +#ifdef _LP64 + void add2_with_carry(Register dest_hi, Register dest_lo, Register src1, Register src2); + void multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart, + Register y, Register y_idx, Register z, + Register carry, Register product, + Register idx, Register kdx); + void multiply_add_128_x_128(Register x_xstart, Register y, Register z, + Register yz_idx, Register idx, + Register carry, Register product, int offset); + void multiply_128_x_128_bmi2_loop(Register y, Register z, + Register carry, Register carry2, + Register idx, Register jdx, + Register yz_idx1, Register yz_idx2, + Register tmp, Register tmp3, Register tmp4); + void multiply_128_x_128_loop(Register x_xstart, Register y, Register z, + Register yz_idx, Register idx, Register jdx, + Register carry, Register product, + Register carry2); + void multiply_to_len(Register x, Register xlen, Register y, Register ylen, Register z, Register zlen, + Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5); +#endif + // CRC32 code for java.util.zip.CRC32::updateBytes() instrinsic. void update_byte_crc32(Register crc, Register val, Register table); void kernel_crc32(Register crc, Register buf, Register len, Register table, Register tmp); diff -r 4edd7572c235 -r 64b480f9eb1a src/cpu/x86/vm/stubGenerator_x86_64.cpp --- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp Tue Sep 09 09:48:42 2014 -0700 +++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp Wed Sep 10 17:36:20 2014 +0000 @@ -3677,6 +3677,70 @@ return start; } + + /** + * Arguments: + * + * Input: + * c_rarg0 - x address + * c_rarg1 - x length + * c_rarg2 - y address + * c_rarg3 - y lenth + * not Win64 + * c_rarg4 - z address + * c_rarg5 - z length + * Win64 + * rsp+40 - z address + * rsp+48 - z length + */ + address generate_multiplyToLen() { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "multiplyToLen"); + + address start = __ pc(); + // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...) + // Unix: rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...) + const Register x = rdi; + const Register xlen = rax; + const Register y = rsi; + const Register ylen = rcx; + const Register z = r8; + const Register zlen = r11; + + // Next registers will be saved on stack in multiply_to_len(). + const Register tmp1 = r12; + const Register tmp2 = r13; + const Register tmp3 = r14; + const Register tmp4 = r15; + const Register tmp5 = rbx; + + BLOCK_COMMENT("Entry:"); + __ enter(); // required for proper stackwalking of RuntimeStub frame + +#ifndef _WIN64 + __ movptr(zlen, r9); // Save r9 in r11 - zlen +#endif + setup_arg_regs(4); // x => rdi, xlen => rsi, y => rdx + // ylen => rcx, z => r8, zlen => r11 + // r9 and r10 may be used to save non-volatile registers +#ifdef _WIN64 + // last 2 arguments (#4, #5) are on stack on Win64 + __ movptr(z, Address(rsp, 6 * wordSize)); + __ movptr(zlen, Address(rsp, 7 * wordSize)); +#endif + + __ movptr(xlen, rsi); + __ movptr(y, rdx); + __ multiply_to_len(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5); + + restore_arg_regs(); + + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret(0); + + return start; + } + #undef __ #define __ masm-> @@ -3917,6 +3981,11 @@ generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry, &StubRoutines::_safefetchN_fault_pc, &StubRoutines::_safefetchN_continuation_pc); +#ifdef COMPILER2 + if (UseMultiplyToLenIntrinsic) { + StubRoutines::_multiplyToLen = generate_multiplyToLen(); + } +#endif } public: diff -r 4edd7572c235 -r 64b480f9eb1a src/cpu/x86/vm/vm_version_x86.cpp --- a/src/cpu/x86/vm/vm_version_x86.cpp Tue Sep 09 09:48:42 2014 -0700 +++ b/src/cpu/x86/vm/vm_version_x86.cpp Wed Sep 10 17:36:20 2014 +0000 @@ -493,7 +493,7 @@ } char buf[256]; - jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", + jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", cores_per_cpu(), threads_per_core(), cpu_family(), _model, _stepping, (supports_cmov() ? ", cmov" : ""), @@ -522,7 +522,8 @@ (supports_tscinv_bit() ? ", tscinvbit": ""), (supports_tscinv() ? ", tscinv": ""), (supports_bmi1() ? ", bmi1" : ""), - (supports_bmi2() ? ", bmi2" : "")); + (supports_bmi2() ? ", bmi2" : ""), + (supports_adx() ? ", adx" : "")); _features_str = strdup(buf); // UseSSE is set to the smaller of what hardware supports and what @@ -574,7 +575,7 @@ } } else if (UseCRC32Intrinsics) { if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics)) - warning("CRC32 Intrinsics requires AVX and CLMUL instructions (not available on this CPU)"); + warning("CRC32 Intrinsics requires CLMUL instructions (not available on this CPU)"); FLAG_SET_DEFAULT(UseCRC32Intrinsics, false); } @@ -697,7 +698,20 @@ } #endif } + +#ifdef _LP64 + if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) { + UseMultiplyToLenIntrinsic = true; + } +#else + if (UseMultiplyToLenIntrinsic) { + if (!FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) { + warning("multiplyToLen intrinsic is not available in 32-bit VM"); + } + FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, false); + } #endif +#endif // COMPILER2 // On new cpus instructions which update whole XMM register should be used // to prevent partial register stall due to dependencies on high half. @@ -840,6 +854,9 @@ } } } + if(FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) { + AllocatePrefetchInstr = 3; + } } // Use count leading zeros count instruction if available. @@ -852,23 +869,35 @@ FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false); } + // Use count trailing zeros instruction if available if (supports_bmi1()) { + // tzcnt does not require VEX prefix + if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) { + UseCountTrailingZerosInstruction = true; + } + } else if (UseCountTrailingZerosInstruction) { + warning("tzcnt instruction is not available on this CPU"); + FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false); + } + + // BMI instructions use an encoding with VEX prefix. + // VEX prefix is generated only when AVX > 0. + if (supports_bmi1() && supports_avx()) { if (FLAG_IS_DEFAULT(UseBMI1Instructions)) { UseBMI1Instructions = true; } } else if (UseBMI1Instructions) { - warning("BMI1 instructions are not available on this CPU"); + warning("BMI1 instructions are not available on this CPU (AVX is also required)"); FLAG_SET_DEFAULT(UseBMI1Instructions, false); } - // Use count trailing zeros instruction if available - if (supports_bmi1()) { - if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) { - UseCountTrailingZerosInstruction = UseBMI1Instructions; + if (supports_bmi2() && supports_avx()) { + if (FLAG_IS_DEFAULT(UseBMI2Instructions)) { + UseBMI2Instructions = true; } - } else if (UseCountTrailingZerosInstruction) { - warning("tzcnt instruction is not available on this CPU"); - FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false); + } else if (UseBMI2Instructions) { + warning("BMI2 instructions are not available on this CPU (AVX is also required)"); + FLAG_SET_DEFAULT(UseBMI2Instructions, false); } // Use population count instruction if available. diff -r 4edd7572c235 -r 64b480f9eb1a src/cpu/x86/vm/vm_version_x86.hpp --- a/src/cpu/x86/vm/vm_version_x86.hpp Tue Sep 09 09:48:42 2014 -0700 +++ b/src/cpu/x86/vm/vm_version_x86.hpp Wed Sep 10 17:36:20 2014 +0000 @@ -209,7 +209,9 @@ erms : 1, : 1, rtm : 1, - : 20; + : 7, + adx : 1, + : 12; } bits; }; @@ -260,7 +262,8 @@ CPU_CLMUL = (1 << 21), // carryless multiply for CRC CPU_BMI1 = (1 << 22), CPU_BMI2 = (1 << 23), - CPU_RTM = (1 << 24) // Restricted Transactional Memory instructions + CPU_RTM = (1 << 24), // Restricted Transactional Memory instructions + CPU_ADX = (1 << 25) } cpuFeatureFlags; enum { @@ -465,10 +468,16 @@ } // Intel features. if(is_intel()) { + if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0) + result |= CPU_ADX; if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0) result |= CPU_BMI2; if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0) result |= CPU_LZCNT; + // for Intel, ecx.bits.misalignsse bit (bit 8) indicates support for prefetchw + if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse != 0) { + result |= CPU_3DNOW_PREFETCH; + } } return result; @@ -621,6 +630,7 @@ static bool supports_rtm() { return (_cpuFeatures & CPU_RTM) != 0; } static bool supports_bmi1() { return (_cpuFeatures & CPU_BMI1) != 0; } static bool supports_bmi2() { return (_cpuFeatures & CPU_BMI2) != 0; } + static bool supports_adx() { return (_cpuFeatures & CPU_ADX) != 0; } // Intel features static bool is_intel_family_core() { return is_intel() && extended_cpu_family() == CPU_FAMILY_INTEL_CORE; } diff -r 4edd7572c235 -r 64b480f9eb1a src/share/vm/asm/codeBuffer.cpp --- a/src/share/vm/asm/codeBuffer.cpp Tue Sep 09 09:48:42 2014 -0700 +++ b/src/share/vm/asm/codeBuffer.cpp Wed Sep 10 17:36:20 2014 +0000 @@ -133,6 +133,10 @@ // free any overflow storage delete _overflow_arena; + // Claim is that stack allocation ensures resources are cleaned up. + // This is resource clean up, let's hope that all were properly copied out. + free_strings(); + #ifdef ASSERT // Save allocation type to execute assert in ~ResourceObj() // which is called after this destructor. @@ -704,7 +708,7 @@ relocate_code_to(&dest); // transfer strings and comments from buffer to blob - dest_blob->set_strings(_strings); + dest_blob->set_strings(_code_strings); // Done moving code bytes; were they the right size? assert(round_to(dest.total_content_size(), oopSize) == dest_blob->content_size(), "sanity"); @@ -1003,11 +1007,11 @@ void CodeBuffer::block_comment(intptr_t offset, const char * comment) { - _strings.add_comment(offset, comment); + _code_strings.add_comment(offset, comment); } const char* CodeBuffer::code_string(const char* str) { - return _strings.add_string(str); + return _code_strings.add_string(str); } class CodeString: public CHeapObj { @@ -1073,6 +1077,7 @@ } void CodeStrings::add_comment(intptr_t offset, const char * comment) { + check_valid(); CodeString* c = new CodeString(comment, offset); CodeString* inspos = (_strings == NULL) ? NULL : find_last(offset); @@ -1088,11 +1093,32 @@ } void CodeStrings::assign(CodeStrings& other) { + other.check_valid(); + // Cannot do following because CodeStrings constructor is not alway run! + assert(is_null(), "Cannot assign onto non-empty CodeStrings"); _strings = other._strings; + other.set_null_and_invalidate(); +} + +// Deep copy of CodeStrings for consistent memory management. +// Only used for actual disassembly so this is cheaper than reference counting +// for the "normal" fastdebug case. +void CodeStrings::copy(CodeStrings& other) { + other.check_valid(); + check_valid(); + assert(is_null(), "Cannot copy onto non-empty CodeStrings"); + CodeString* n = other._strings; + CodeString** ps = &_strings; + while (n != NULL) { + *ps = new CodeString(n->string(),n->offset()); + ps = &((*ps)->_next); + n = n->next(); + } } void CodeStrings::print_block_comment(outputStream* stream, intptr_t offset) const { - if (_strings != NULL) { + check_valid(); + if (_strings != NULL) { CodeString* c = find(offset); while (c && c->offset() == offset) { stream->bol(); @@ -1103,7 +1129,7 @@ } } - +// Also sets isNull() void CodeStrings::free() { CodeString* n = _strings; while (n) { @@ -1113,10 +1139,11 @@ delete n; n = p; } - _strings = NULL; + set_null_and_invalidate(); } const char* CodeStrings::add_string(const char * string) { + check_valid(); CodeString* s = new CodeString(string); s->set_next(_strings); _strings = s; diff -r 4edd7572c235 -r 64b480f9eb1a src/share/vm/asm/codeBuffer.hpp --- a/src/share/vm/asm/codeBuffer.hpp Tue Sep 09 09:48:42 2014 -0700 +++ b/src/share/vm/asm/codeBuffer.hpp Wed Sep 10 17:36:20 2014 +0000 @@ -27,6 +27,7 @@ #include "code/oopRecorder.hpp" #include "code/relocInfo.hpp" +#include "utilities/debug.hpp" class CodeStrings; class PhaseCFG; @@ -245,15 +246,39 @@ private: #ifndef PRODUCT CodeString* _strings; +#ifdef ASSERT + // Becomes true after copy-out, forbids further use. + bool _defunct; // Zero bit pattern is "valid", see memset call in decode_env::decode_env +#endif #endif CodeString* find(intptr_t offset) const; CodeString* find_last(intptr_t offset) const; + void set_null_and_invalidate() { +#ifndef PRODUCT + _strings = NULL; +#ifdef ASSERT + _defunct = true; +#endif +#endif + } + public: CodeStrings() { #ifndef PRODUCT _strings = NULL; +#ifdef ASSERT + _defunct = false; +#endif +#endif + } + + bool is_null() { +#ifdef ASSERT + return _strings == NULL; +#else + return true; #endif } @@ -261,8 +286,17 @@ void add_comment(intptr_t offset, const char * comment) PRODUCT_RETURN; void print_block_comment(outputStream* stream, intptr_t offset) const PRODUCT_RETURN; + // MOVE strings from other to this; invalidate other. void assign(CodeStrings& other) PRODUCT_RETURN; + // COPY strings from other to this; leave other valid. + void copy(CodeStrings& other) PRODUCT_RETURN; void free() PRODUCT_RETURN; + // Guarantee that _strings are used at most once; assign invalidates a buffer. + inline void check_valid() const { +#ifdef ASSERT + assert(!_defunct, "Use of invalid CodeStrings"); +#endif + } }; // A CodeBuffer describes a memory space into which assembly @@ -330,7 +364,7 @@ csize_t _total_size; // size in bytes of combined memory buffer OopRecorder* _oop_recorder; - CodeStrings _strings; + CodeStrings _code_strings; OopRecorder _default_oop_recorder; // override with initialize_oop_recorder Arena* _overflow_arena; @@ -531,7 +565,13 @@ void initialize_oop_recorder(OopRecorder* r); OopRecorder* oop_recorder() const { return _oop_recorder; } - CodeStrings& strings() { return _strings; } + CodeStrings& strings() { return _code_strings; } + + void free_strings() { + if (!_code_strings.is_null()) { + _code_strings.free(); // sets _strings Null as a side-effect. + } + } // Code generation void relocate(address at, RelocationHolder const& rspec, int format = 0) { diff -r 4edd7572c235 -r 64b480f9eb1a src/share/vm/asm/register.hpp --- a/src/share/vm/asm/register.hpp Tue Sep 09 09:48:42 2014 -0700 +++ b/src/share/vm/asm/register.hpp Wed Sep 10 17:36:20 2014 +0000 @@ -275,4 +275,101 @@ ); } +inline void assert_different_registers( + AbstractRegister a, + AbstractRegister b, + AbstractRegister c, + AbstractRegister d, + AbstractRegister e, + AbstractRegister f, + AbstractRegister g, + AbstractRegister h, + AbstractRegister i, + AbstractRegister j +) { + assert( + a != b && a != c && a != d && a != e && a != f && a != g && a != h && a != i && a != j + && b != c && b != d && b != e && b != f && b != g && b != h && b != i && b != j + && c != d && c != e && c != f && c != g && c != h && c != i && c != j + && d != e && d != f && d != g && d != h && d != i && d != j + && e != f && e != g && e != h && e != i && e != j + && f != g && f != h && f != i && f != j + && g != h && g != i && g != j + && h != i && h != j + && i != j, + err_msg_res("registers must be different: a=" INTPTR_FORMAT ", b=" INTPTR_FORMAT + ", c=" INTPTR_FORMAT ", d=" INTPTR_FORMAT ", e=" INTPTR_FORMAT + ", f=" INTPTR_FORMAT ", g=" INTPTR_FORMAT ", h=" INTPTR_FORMAT + ", i=" INTPTR_FORMAT ", j=" INTPTR_FORMAT "", + p2i(a), p2i(b), p2i(c), p2i(d), p2i(e), p2i(f), p2i(g), p2i(h), p2i(i), p2i(j)) + ); +} + +inline void assert_different_registers( + AbstractRegister a, + AbstractRegister b, + AbstractRegister c, + AbstractRegister d, + AbstractRegister e, + AbstractRegister f, + AbstractRegister g, + AbstractRegister h, + AbstractRegister i, + AbstractRegister j, + AbstractRegister k +) { + assert( + a != b && a != c && a != d && a != e && a != f && a != g && a != h && a != i && a != j && a !=k + && b != c && b != d && b != e && b != f && b != g && b != h && b != i && b != j && b !=k + && c != d && c != e && c != f && c != g && c != h && c != i && c != j && c !=k + && d != e && d != f && d != g && d != h && d != i && d != j && d !=k + && e != f && e != g && e != h && e != i && e != j && e !=k + && f != g && f != h && f != i && f != j && f !=k + && g != h && g != i && g != j && g !=k + && h != i && h != j && h !=k + && i != j && i !=k + && j !=k, + err_msg_res("registers must be different: a=" INTPTR_FORMAT ", b=" INTPTR_FORMAT + ", c=" INTPTR_FORMAT ", d=" INTPTR_FORMAT ", e=" INTPTR_FORMAT + ", f=" INTPTR_FORMAT ", g=" INTPTR_FORMAT ", h=" INTPTR_FORMAT + ", i=" INTPTR_FORMAT ", j=" INTPTR_FORMAT ", k=" INTPTR_FORMAT "", + p2i(a), p2i(b), p2i(c), p2i(d), p2i(e), p2i(f), p2i(g), p2i(h), p2i(i), p2i(j), p2i(k)) + ); +} + +inline void assert_different_registers( + AbstractRegister a, + AbstractRegister b, + AbstractRegister c, + AbstractRegister d, + AbstractRegister e, + AbstractRegister f, + AbstractRegister g, + AbstractRegister h, + AbstractRegister i, + AbstractRegister j, + AbstractRegister k, + AbstractRegister l +) { + assert( + a != b && a != c && a != d && a != e && a != f && a != g && a != h && a != i && a != j && a !=k && a !=l + && b != c && b != d && b != e && b != f && b != g && b != h && b != i && b != j && b !=k && b !=l + && c != d && c != e && c != f && c != g && c != h && c != i && c != j && c !=k && c !=l + && d != e && d != f && d != g && d != h && d != i && d != j && d !=k && d !=l + && e != f && e != g && e != h && e != i && e != j && e !=k && e !=l + && f != g && f != h && f != i && f != j && f !=k && f !=l + && g != h && g != i && g != j && g !=k && g !=l + && h != i && h != j && h !=k && h !=l + && i != j && i !=k && i !=l + && j !=k && j !=l + && k !=l, + err_msg_res("registers must be different: a=" INTPTR_FORMAT ", b=" INTPTR_FORMAT + ", c=" INTPTR_FORMAT ", d=" INTPTR_FORMAT ", e=" INTPTR_FORMAT + ", f=" INTPTR_FORMAT ", g=" INTPTR_FORMAT ", h=" INTPTR_FORMAT + ", i=" INTPTR_FORMAT ", j=" INTPTR_FORMAT ", k=" INTPTR_FORMAT + ", l=" INTPTR_FORMAT "", + p2i(a), p2i(b), p2i(c), p2i(d), p2i(e), p2i(f), p2i(g), p2i(h), p2i(i), p2i(j), p2i(k), p2i(l)) + ); +} + #endif // SHARE_VM_ASM_REGISTER_HPP diff -r 4edd7572c235 -r 64b480f9eb1a src/share/vm/ci/ciMethod.cpp --- a/src/share/vm/ci/ciMethod.cpp Tue Sep 09 09:48:42 2014 -0700 +++ b/src/share/vm/ci/ciMethod.cpp Wed Sep 10 17:36:20 2014 +0000 @@ -1106,6 +1106,22 @@ } // ------------------------------------------------------------------ +// ciMethod::has_option_value +// +template +bool ciMethod::has_option_value(const char* option, T& value) { + check_is_loaded(); + VM_ENTRY_MARK; + methodHandle mh(THREAD, get_Method()); + return CompilerOracle::has_option_value(mh, option, value); +} +// Explicit instantiation for all OptionTypes supported. +template bool ciMethod::has_option_value(const char* option, intx& value); +template bool ciMethod::has_option_value(const char* option, uintx& value); +template bool ciMethod::has_option_value(const char* option, bool& value); +template bool ciMethod::has_option_value(const char* option, ccstr& value); + +// ------------------------------------------------------------------ // ciMethod::can_be_compiled // // Have previous compilations of this method succeeded? diff -r 4edd7572c235 -r 64b480f9eb1a src/share/vm/ci/ciMethod.hpp --- a/src/share/vm/ci/ciMethod.hpp Tue Sep 09 09:48:42 2014 -0700 +++ b/src/share/vm/ci/ciMethod.hpp Wed Sep 10 17:36:20 2014 +0000 @@ -264,6 +264,8 @@ bool should_print_assembly(); bool break_at_execute(); bool has_option(const char *option); + template + bool has_option_value(const char* option, T& value); bool can_be_compiled(); bool can_be_osr_compiled(int entry_bci); void set_not_compilable(const char* reason = NULL); diff -r 4edd7572c235 -r 64b480f9eb1a src/share/vm/classfile/classLoader.cpp --- a/src/share/vm/classfile/classLoader.cpp Tue Sep 09 09:48:42 2014 -0700 +++ b/src/share/vm/classfile/classLoader.cpp Wed Sep 10 17:36:20 2014 +0000 @@ -1609,7 +1609,7 @@ if (TieredCompilation && TieredStopAtLevel >= CompLevel_full_optimization) { // Clobber the first compile and force second tier compilation nmethod* nm = m->code(); - if (nm != NULL) { + if (nm != NULL && !m->is_method_handle_intrinsic()) { // Throw out the code so that the code cache doesn't fill up nm->make_not_entrant(); m->clear_code(); @@ -1628,7 +1628,7 @@ } nmethod* nm = m->code(); - if (nm != NULL) { + if (nm != NULL && !m->is_method_handle_intrinsic()) { // Throw out the code so that the code cache doesn't fill up nm->make_not_entrant(); m->clear_code(); diff -r 4edd7572c235 -r 64b480f9eb1a src/share/vm/classfile/systemDictionary.cpp --- a/src/share/vm/classfile/systemDictionary.cpp Tue Sep 09 09:48:42 2014 -0700 +++ b/src/share/vm/classfile/systemDictionary.cpp Wed Sep 10 17:36:20 2014 +0000 @@ -2291,6 +2291,9 @@ } assert(spe != NULL && spe->method() != NULL, ""); + assert(!UseCompiler || (spe->method()->has_compiled_code() && + spe->method()->code()->entry_point() == spe->method()->from_compiled_entry()), + "MH intrinsic invariant"); return spe->method(); } diff -r 4edd7572c235 -r 64b480f9eb1a src/share/vm/classfile/vmSymbols.hpp --- a/src/share/vm/classfile/vmSymbols.hpp Tue Sep 09 09:48:42 2014 -0700 +++ b/src/share/vm/classfile/vmSymbols.hpp Wed Sep 10 17:36:20 2014 +0000 @@ -787,6 +787,11 @@ do_name( encodeISOArray_name, "encodeISOArray") \ do_signature(encodeISOArray_signature, "([CI[BII)I") \ \ + do_class(java_math_BigInteger, "java/math/BigInteger") \ + do_intrinsic(_multiplyToLen, java_math_BigInteger, multiplyToLen_name, multiplyToLen_signature, F_R) \ + do_name( multiplyToLen_name, "multiplyToLen") \ + do_signature(multiplyToLen_signature, "([II[II[I)[I") \ + \ /* java/lang/ref/Reference */ \ do_intrinsic(_Reference_get, java_lang_ref_Reference, get_name, void_object_signature, F_R) \ \ diff -r 4edd7572c235 -r 64b480f9eb1a src/share/vm/code/codeBlob.cpp --- a/src/share/vm/code/codeBlob.cpp Tue Sep 09 09:48:42 2014 -0700 +++ b/src/share/vm/code/codeBlob.cpp Wed Sep 10 17:36:20 2014 +0000 @@ -253,6 +253,7 @@ void BufferBlob::free( BufferBlob *blob ) { ThreadInVMfromUnknown __tiv; // get to VM state in case we block on CodeCache_lock + blob->flush(); { MutexLockerEx mu(CodeCache_lock, Mutex::_no_safepoint_check_flag); CodeCache::free((CodeBlob*)blob); diff -r 4edd7572c235 -r 64b480f9eb1a src/share/vm/code/codeCache.cpp --- a/src/share/vm/code/codeCache.cpp Tue Sep 09 09:48:42 2014 -0700 +++ b/src/share/vm/code/codeCache.cpp Wed Sep 10 17:36:20 2014 +0000 @@ -712,7 +712,9 @@ void CodeCache::mark_all_nmethods_for_deoptimization() { MutexLockerEx mu(CodeCache_lock, Mutex::_no_safepoint_check_flag); FOR_ALL_ALIVE_NMETHODS(nm) { - nm->mark_for_deoptimization(); + if (!nm->method()->is_method_handle_intrinsic()) { + nm->mark_for_deoptimization(); + } } } diff -r 4edd7572c235 -r 64b480f9eb1a src/share/vm/code/compiledIC.cpp --- a/src/share/vm/code/compiledIC.cpp Tue Sep 09 09:48:42 2014 -0700 +++ b/src/share/vm/code/compiledIC.cpp Wed Sep 10 17:36:20 2014 +0000 @@ -595,6 +595,7 @@ } else { // Callee is interpreted code. In any case entering the interpreter // puts a converter-frame on the stack to save arguments. + assert(!m->is_method_handle_intrinsic(), "Compiled code should never call interpreter MH intrinsics"); info._to_interpreter = true; info._entry = m()->get_c2i_entry(); } diff -r 4edd7572c235 -r 64b480f9eb1a src/share/vm/code/nmethod.hpp --- a/src/share/vm/code/nmethod.hpp Tue Sep 09 09:48:42 2014 -0700 +++ b/src/share/vm/code/nmethod.hpp Wed Sep 10 17:36:20 2014 +0000 @@ -448,7 +448,10 @@ // alive. It is used when an uncommon trap happens. Returns true // if this thread changed the state of the nmethod or false if // another thread performed the transition. - bool make_not_entrant() { return make_not_entrant_or_zombie(not_entrant); } + bool make_not_entrant() { + assert(!method()->is_method_handle_intrinsic(), "Cannot make MH intrinsic not entrant"); + return make_not_entrant_or_zombie(not_entrant); + } bool make_zombie() { return make_not_entrant_or_zombie(zombie); } // used by jvmti to track if the unload event has been reported diff -r 4edd7572c235 -r 64b480f9eb1a src/share/vm/compiler/compilerOracle.cpp --- a/src/share/vm/compiler/compilerOracle.cpp Tue Sep 09 09:48:42 2014 -0700 +++ b/src/share/vm/compiler/compilerOracle.cpp Wed Sep 10 17:36:20 2014 +0000 @@ -167,44 +167,134 @@ } } +enum OptionType { + IntxType, + UintxType, + BoolType, + CcstrType, + UnknownType +}; -class MethodOptionMatcher: public MethodMatcher { - const char * option; - public: - MethodOptionMatcher(Symbol* class_name, Mode class_mode, - Symbol* method_name, Mode method_mode, - Symbol* signature, const char * opt, MethodMatcher* next): - MethodMatcher(class_name, class_mode, method_name, method_mode, signature, next) { - option = opt; +/* Methods to map real type names to OptionType */ +template +static OptionType get_type_for() { + return UnknownType; +}; + +template<> OptionType get_type_for() { + return IntxType; +} + +template<> OptionType get_type_for() { + return UintxType; +} + +template<> OptionType get_type_for() { + return BoolType; +} + +template<> OptionType get_type_for() { + return CcstrType; +} + +template +static const T copy_value(const T value) { + return value; +} + +template<> const ccstr copy_value(const ccstr value) { + return (const ccstr)strdup(value); +} + +template +class TypedMethodOptionMatcher : public MethodMatcher { + const char* _option; + OptionType _type; + const T _value; + +public: + TypedMethodOptionMatcher(Symbol* class_name, Mode class_mode, + Symbol* method_name, Mode method_mode, + Symbol* signature, const char* opt, + const T value, MethodMatcher* next) : + MethodMatcher(class_name, class_mode, method_name, method_mode, signature, next), + _type(get_type_for()), _value(copy_value(value)) { + _option = strdup(opt); } - bool match(methodHandle method, const char* opt) { - MethodOptionMatcher* current = this; + ~TypedMethodOptionMatcher() { + free((void*)_option); + } + + TypedMethodOptionMatcher* match(methodHandle method, const char* opt) { + TypedMethodOptionMatcher* current = this; while (current != NULL) { - current = (MethodOptionMatcher*)current->find(method); + current = (TypedMethodOptionMatcher*)current->find(method); if (current == NULL) { - return false; + return NULL; } - if (strcmp(current->option, opt) == 0) { - return true; + if (strcmp(current->_option, opt) == 0) { + return current; } current = current->next(); } - return false; + return NULL; + } + + TypedMethodOptionMatcher* next() { + return (TypedMethodOptionMatcher*)_next; } - MethodOptionMatcher* next() { - return (MethodOptionMatcher*)_next; - } + OptionType get_type(void) { + return _type; + }; + + T value() { return _value; } - virtual void print() { + void print() { + ttyLocker ttyl; print_base(); - tty->print(" %s", option); + tty->print(" %s", _option); + tty->print(" "); tty->cr(); } }; +template<> +void TypedMethodOptionMatcher::print() { + ttyLocker ttyl; + print_base(); + tty->print(" intx %s", _option); + tty->print(" = " INTX_FORMAT, _value); + tty->cr(); +}; +template<> +void TypedMethodOptionMatcher::print() { + ttyLocker ttyl; + print_base(); + tty->print(" uintx %s", _option); + tty->print(" = " UINTX_FORMAT, _value); + tty->cr(); +}; + +template<> +void TypedMethodOptionMatcher::print() { + ttyLocker ttyl; + print_base(); + tty->print(" bool %s", _option); + tty->print(" = %s", _value ? "true" : "false"); + tty->cr(); +}; + +template<> +void TypedMethodOptionMatcher::print() { + ttyLocker ttyl; + print_base(); + tty->print(" const char* %s", _option); + tty->print(" = '%s'", _value); + tty->cr(); +}; // this must parallel the command_names below enum OracleCommand { @@ -259,23 +349,46 @@ return lists[command]; } - - +template static MethodMatcher* add_option_string(Symbol* class_name, MethodMatcher::Mode c_mode, Symbol* method_name, MethodMatcher::Mode m_mode, Symbol* signature, - const char* option) { - lists[OptionCommand] = new MethodOptionMatcher(class_name, c_mode, method_name, m_mode, - signature, option, lists[OptionCommand]); + const char* option, + T value) { + lists[OptionCommand] = new TypedMethodOptionMatcher(class_name, c_mode, method_name, m_mode, + signature, option, value, lists[OptionCommand]); return lists[OptionCommand]; } +template +static bool get_option_value(methodHandle method, const char* option, T& value) { + TypedMethodOptionMatcher* m; + if (lists[OptionCommand] != NULL + && (m = ((TypedMethodOptionMatcher*)lists[OptionCommand])->match(method, option)) != NULL + && m->get_type() == get_type_for()) { + value = m->value(); + return true; + } else { + return false; + } +} bool CompilerOracle::has_option_string(methodHandle method, const char* option) { - return lists[OptionCommand] != NULL && - ((MethodOptionMatcher*)lists[OptionCommand])->match(method, option); + bool value = false; + get_option_value(method, option, value); + return value; } +template +bool CompilerOracle::has_option_value(methodHandle method, const char* option, T& value) { + return ::get_option_value(method, option, value); +} + +// Explicit instantiation for all OptionTypes supported. +template bool CompilerOracle::has_option_value(methodHandle method, const char* option, intx& value); +template bool CompilerOracle::has_option_value(methodHandle method, const char* option, uintx& value); +template bool CompilerOracle::has_option_value(methodHandle method, const char* option, bool& value); +template bool CompilerOracle::has_option_value(methodHandle method, const char* option, ccstr& value); bool CompilerOracle::should_exclude(methodHandle method, bool& quietly) { quietly = true; @@ -433,6 +546,94 @@ +// Scan next flag and value in line, return MethodMatcher object on success, NULL on failure. +// On failure, error_msg contains description for the first error. +// For future extensions: set error_msg on first error. +static MethodMatcher* scan_flag_and_value(const char* type, const char* line, int& total_bytes_read, + Symbol* c_name, MethodMatcher::Mode c_match, + Symbol* m_name, MethodMatcher::Mode m_match, + Symbol* signature, + char* errorbuf, const int buf_size) { + total_bytes_read = 0; + int bytes_read = 0; + char flag[256]; + + // Read flag name. + if (sscanf(line, "%*[ \t]%255[a-zA-Z0-9]%n", flag, &bytes_read) == 1) { + line += bytes_read; + total_bytes_read += bytes_read; + + // Read value. + if (strcmp(type, "intx") == 0) { + intx value; + if (sscanf(line, "%*[ \t]" INTX_FORMAT "%n", &value, &bytes_read) == 1) { + total_bytes_read += bytes_read; + return add_option_string(c_name, c_match, m_name, m_match, signature, flag, value); + } else { + jio_snprintf(errorbuf, buf_size, " Value cannot be read for flag %s of type %s ", flag, type); + } + } else if (strcmp(type, "uintx") == 0) { + uintx value; + if (sscanf(line, "%*[ \t]" UINTX_FORMAT "%n", &value, &bytes_read) == 1) { + total_bytes_read += bytes_read; + return add_option_string(c_name, c_match, m_name, m_match, signature, flag, value); + } else { + jio_snprintf(errorbuf, buf_size, " Value cannot be read for flag %s of type %s", flag, type); + } + } else if (strcmp(type, "ccstr") == 0) { + ResourceMark rm; + char* value = NEW_RESOURCE_ARRAY(char, strlen(line) + 1); + if (sscanf(line, "%*[ \t]%255[_a-zA-Z0-9]%n", value, &bytes_read) == 1) { + total_bytes_read += bytes_read; + return add_option_string(c_name, c_match, m_name, m_match, signature, flag, (ccstr)value); + } else { + jio_snprintf(errorbuf, buf_size, " Value cannot be read for flag %s of type %s", flag, type); + } + } else if (strcmp(type, "ccstrlist") == 0) { + // Accumulates several strings into one. The internal type is ccstr. + ResourceMark rm; + char* value = NEW_RESOURCE_ARRAY(char, strlen(line) + 1); + char* next_value = value; + if (sscanf(line, "%*[ \t]%255[_a-zA-Z0-9]%n", next_value, &bytes_read) == 1) { + total_bytes_read += bytes_read; + line += bytes_read; + next_value += bytes_read; + char* end_value = next_value-1; + while (sscanf(line, "%*[ \t]%255[_a-zA-Z0-9]%n", next_value, &bytes_read) == 1) { + total_bytes_read += bytes_read; + line += bytes_read; + *end_value = ' '; // override '\0' + next_value += bytes_read; + end_value = next_value-1; + } + return add_option_string(c_name, c_match, m_name, m_match, signature, flag, (ccstr)value); + } else { + jio_snprintf(errorbuf, buf_size, " Value cannot be read for flag %s of type %s", flag, type); + } + } else if (strcmp(type, "bool") == 0) { + char value[256]; + if (sscanf(line, "%*[ \t]%255[a-zA-Z]%n", value, &bytes_read) == 1) { + if (strcmp(value, "true") == 0) { + total_bytes_read += bytes_read; + return add_option_string(c_name, c_match, m_name, m_match, signature, flag, true); + } else if (strcmp(value, "false") == 0) { + total_bytes_read += bytes_read; + return add_option_string(c_name, c_match, m_name, m_match, signature, flag, false); + } else { + jio_snprintf(errorbuf, buf_size, " Value cannot be read for flag %s of type %s", flag, type); + } + } else { + jio_snprintf(errorbuf, sizeof(errorbuf), " Value cannot be read for flag %s of type %s", flag, type); + } + } else { + jio_snprintf(errorbuf, sizeof(errorbuf), " Type %s not supported ", type); + } + } else { + jio_snprintf(errorbuf, sizeof(errorbuf), " Flag name for type %s should be alphanumeric ", type); + } + return NULL; +} + void CompilerOracle::parse_from_line(char* line) { if (line[0] == '\0') return; if (line[0] == '#') return; @@ -462,8 +663,10 @@ int bytes_read; OracleCommand command = parse_command_name(line, &bytes_read); line += bytes_read; + ResourceMark rm; if (command == UnknownCommand) { + ttyLocker ttyl; tty->print_cr("CompilerOracle: unrecognized line"); tty->print_cr(" \"%s\"", original_line); return; @@ -485,7 +688,7 @@ char method_name[256]; char sig[1024]; char errorbuf[1024]; - const char* error_msg = NULL; + const char* error_msg = NULL; // description of first error that appears MethodMatcher* match = NULL; if (scan_line(line, class_name, &c_match, method_name, &m_match, &bytes_read, error_msg)) { @@ -504,43 +707,77 @@ } if (command == OptionCommand) { - // Look for trailing options to support - // ciMethod::has_option("string") to control features in the - // compiler. Multiple options may follow the method name. - char option[256]; + // Look for trailing options. + // + // Two types of trailing options are + // supported: + // + // (1) CompileCommand=option,Klass::method,flag + // (2) CompileCommand=option,Klass::method,type,flag,value + // + // Type (1) is used to support ciMethod::has_option("someflag") + // (i.e., to check if a flag "someflag" is enabled for a method). + // + // Type (2) is used to support options with a value. Values can have the + // the following types: intx, uintx, bool, ccstr, and ccstrlist. + // + // For future extensions: extend scan_flag_and_value() + char option[256]; // stores flag for Type (1) and type of Type (2) while (sscanf(line, "%*[ \t]%255[a-zA-Z0-9]%n", option, &bytes_read) == 1) { if (match != NULL && !_quiet) { // Print out the last match added + ttyLocker ttyl; tty->print("CompilerOracle: %s ", command_names[command]); match->print(); } - match = add_option_string(c_name, c_match, m_name, m_match, signature, strdup(option)); line += bytes_read; - } + + if (strcmp(option, "intx") == 0 + || strcmp(option, "uintx") == 0 + || strcmp(option, "bool") == 0 + || strcmp(option, "ccstr") == 0 + || strcmp(option, "ccstrlist") == 0 + ) { + + // Type (2) option: parse flag name and value. + match = scan_flag_and_value(option, line, bytes_read, + c_name, c_match, m_name, m_match, signature, + errorbuf, sizeof(errorbuf)); + if (match == NULL) { + error_msg = errorbuf; + break; + } + line += bytes_read; + } else { + // Type (1) option + match = add_option_string(c_name, c_match, m_name, m_match, signature, option, true); + } + } // while( } else { - bytes_read = 0; - sscanf(line, "%*[ \t]%n", &bytes_read); - if (line[bytes_read] != '\0') { - jio_snprintf(errorbuf, sizeof(errorbuf), " Unrecognized text after command: %s", line); - error_msg = errorbuf; - } else { - match = add_predicate(command, c_name, c_match, m_name, m_match, signature); - } + match = add_predicate(command, c_name, c_match, m_name, m_match, signature); } } - if (match != NULL) { - if (!_quiet) { - ResourceMark rm; - tty->print("CompilerOracle: %s ", command_names[command]); - match->print(); - } - } else { + ttyLocker ttyl; + if (error_msg != NULL) { + // an error has happened tty->print_cr("CompilerOracle: unrecognized line"); tty->print_cr(" \"%s\"", original_line); if (error_msg != NULL) { tty->print_cr("%s", error_msg); } + } else { + // check for remaining characters + bytes_read = 0; + sscanf(line, "%*[ \t]%n", &bytes_read); + if (line[bytes_read] != '\0') { + tty->print_cr("CompilerOracle: unrecognized line"); + tty->print_cr(" \"%s\"", original_line); + tty->print_cr(" Unrecognized text %s after command ", line); + } else if (match != NULL && !_quiet) { + tty->print("CompilerOracle: %s ", command_names[command]); + match->print(); + } } } diff -r 4edd7572c235 -r 64b480f9eb1a src/share/vm/compiler/compilerOracle.hpp --- a/src/share/vm/compiler/compilerOracle.hpp Tue Sep 09 09:48:42 2014 -0700 +++ b/src/share/vm/compiler/compilerOracle.hpp Wed Sep 10 17:36:20 2014 +0000 @@ -64,6 +64,11 @@ // Check to see if this method has option set for it static bool has_option_string(methodHandle method, const char * option); + // Check if method has option and value set. If yes, overwrite value and return true, + // otherwise leave value unchanged and return false. + template + static bool has_option_value(methodHandle method, const char* option, T& value); + // Reads from string instead of file static void parse_from_string(const char* command_string, void (*parser)(char*)); diff -r 4edd7572c235 -r 64b480f9eb1a src/share/vm/compiler/disassembler.cpp --- a/src/share/vm/compiler/disassembler.cpp Tue Sep 09 09:48:42 2014 -0700 +++ b/src/share/vm/compiler/disassembler.cpp Wed Sep 10 17:36:20 2014 +0000 @@ -245,12 +245,12 @@ }; decode_env::decode_env(CodeBlob* code, outputStream* output, CodeStrings c) { - memset(this, 0, sizeof(*this)); + memset(this, 0, sizeof(*this)); // Beware, this zeroes bits of fields. _output = output ? output : tty; _code = code; if (code != NULL && code->is_nmethod()) _nm = (nmethod*) code; - _strings.assign(c); + _strings.copy(c); // by default, output pc but not bytes: _print_pc = true; diff -r 4edd7572c235 -r 64b480f9eb1a src/share/vm/interpreter/interpreter.hpp --- a/src/share/vm/interpreter/interpreter.hpp Tue Sep 09 09:48:42 2014 -0700 +++ b/src/share/vm/interpreter/interpreter.hpp Wed Sep 10 17:36:20 2014 +0000 @@ -53,7 +53,9 @@ public: // Initialization/finalization void initialize(int size, - CodeStrings& strings) { _size = size; DEBUG_ONLY(_strings.assign(strings);) } + CodeStrings& strings) { _size = size; + DEBUG_ONLY(::new(&_strings) CodeStrings();) + DEBUG_ONLY(_strings.assign(strings);) } void finalize() { ShouldNotCallThis(); } // General info/converters diff -r 4edd7572c235 -r 64b480f9eb1a src/share/vm/oops/methodData.hpp --- a/src/share/vm/oops/methodData.hpp Tue Sep 09 09:48:42 2014 -0700 +++ b/src/share/vm/oops/methodData.hpp Wed Sep 10 17:36:20 2014 +0000 @@ -2057,7 +2057,7 @@ // Whole-method sticky bits and flags enum { - _trap_hist_limit = 19, // decoupled from Deoptimization::Reason_LIMIT + _trap_hist_limit = 20, // decoupled from Deoptimization::Reason_LIMIT _trap_hist_mask = max_jubyte, _extra_data_count = 4 // extra DataLayout headers, for trap history }; // Public flag values diff -r 4edd7572c235 -r 64b480f9eb1a src/share/vm/opto/c2_globals.hpp --- a/src/share/vm/opto/c2_globals.hpp Tue Sep 09 09:48:42 2014 -0700 +++ b/src/share/vm/opto/c2_globals.hpp Wed Sep 10 17:36:20 2014 +0000 @@ -653,6 +653,9 @@ product(bool, UseMathExactIntrinsics, true, \ "Enables intrinsification of various java.lang.Math functions") \ \ + product(bool, UseMultiplyToLenIntrinsic, false, \ + "Enables intrinsification of BigInteger.multiplyToLen()") \ + \ product(bool, UseTypeSpeculation, true, \ "Speculatively propagate types from profiles") \ \ diff -r 4edd7572c235 -r 64b480f9eb1a src/share/vm/opto/compile.hpp --- a/src/share/vm/opto/compile.hpp Tue Sep 09 09:48:42 2014 -0700 +++ b/src/share/vm/opto/compile.hpp Wed Sep 10 17:36:20 2014 +0000 @@ -598,6 +598,10 @@ bool method_has_option(const char * option) { return method() != NULL && method()->has_option(option); } + template + bool method_has_option_value(const char * option, T& value) { + return method() != NULL && method()->has_option_value(option, value); + } #ifndef PRODUCT bool trace_opto_output() const { return _trace_opto_output; } bool parsed_irreducible_loop() const { return _parsed_irreducible_loop; } diff -r 4edd7572c235 -r 64b480f9eb1a src/share/vm/opto/escape.cpp --- a/src/share/vm/opto/escape.cpp Tue Sep 09 09:48:42 2014 -0700 +++ b/src/share/vm/opto/escape.cpp Wed Sep 10 17:36:20 2014 +0000 @@ -944,7 +944,8 @@ strcmp(call->as_CallLeaf()->_name, "sha256_implCompress") == 0 || strcmp(call->as_CallLeaf()->_name, "sha256_implCompressMB") == 0 || strcmp(call->as_CallLeaf()->_name, "sha512_implCompress") == 0 || - strcmp(call->as_CallLeaf()->_name, "sha512_implCompressMB") == 0) + strcmp(call->as_CallLeaf()->_name, "sha512_implCompressMB") == 0 || + strcmp(call->as_CallLeaf()->_name, "multiplyToLen") == 0) ))) { call->dump(); fatal(err_msg_res("EA unexpected CallLeaf %s", call->as_CallLeaf()->_name)); diff -r 4edd7572c235 -r 64b480f9eb1a src/share/vm/opto/library_call.cpp --- a/src/share/vm/opto/library_call.cpp Tue Sep 09 09:48:42 2014 -0700 +++ b/src/share/vm/opto/library_call.cpp Wed Sep 10 17:36:20 2014 +0000 @@ -322,6 +322,7 @@ bool inline_updateCRC32(); bool inline_updateBytesCRC32(); bool inline_updateByteBufferCRC32(); + bool inline_multiplyToLen(); }; @@ -330,8 +331,12 @@ vmIntrinsics::ID id = m->intrinsic_id(); assert(id != vmIntrinsics::_none, "must be a VM intrinsic"); - if (DisableIntrinsic[0] != '\0' - && strstr(DisableIntrinsic, vmIntrinsics::name_at(id)) != NULL) { + ccstr disable_intr = NULL; + + if ((DisableIntrinsic[0] != '\0' + && strstr(DisableIntrinsic, vmIntrinsics::name_at(id)) != NULL) || + (method_has_option_value("DisableIntrinsic", disable_intr) + && strstr(disable_intr, vmIntrinsics::name_at(id)) != NULL)) { // disabled by a user request on the command line: // example: -XX:DisableIntrinsic=_hashCode,_getClass return NULL; @@ -515,6 +520,10 @@ if (!UseAESIntrinsics) return NULL; break; + case vmIntrinsics::_multiplyToLen: + if (!UseMultiplyToLenIntrinsic) return NULL; + break; + case vmIntrinsics::_cipherBlockChaining_encryptAESCrypt: case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt: if (!UseAESIntrinsics) return NULL; @@ -912,6 +921,9 @@ case vmIntrinsics::_digestBase_implCompressMB: return inline_digestBase_implCompressMB(predicate); + case vmIntrinsics::_multiplyToLen: + return inline_multiplyToLen(); + case vmIntrinsics::_encodeISOArray: return inline_encodeISOArray(); @@ -5735,6 +5747,106 @@ return true; } +//-------------inline_multiplyToLen----------------------------------- +bool LibraryCallKit::inline_multiplyToLen() { + assert(UseMultiplyToLenIntrinsic, "not implementated on this platform"); + + address stubAddr = StubRoutines::multiplyToLen(); + if (stubAddr == NULL) { + return false; // Intrinsic's stub is not implemented on this platform + } + const char* stubName = "multiplyToLen"; + + assert(callee()->signature()->size() == 5, "multiplyToLen has 5 parameters"); + + Node* x = argument(1); + Node* xlen = argument(2); + Node* y = argument(3); + Node* ylen = argument(4); + Node* z = argument(5); + + const Type* x_type = x->Value(&_gvn); + const Type* y_type = y->Value(&_gvn); + const TypeAryPtr* top_x = x_type->isa_aryptr(); + const TypeAryPtr* top_y = y_type->isa_aryptr(); + if (top_x == NULL || top_x->klass() == NULL || + top_y == NULL || top_y->klass() == NULL) { + // failed array check + return false; + } + + BasicType x_elem = x_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type(); + BasicType y_elem = y_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type(); + if (x_elem != T_INT || y_elem != T_INT) { + return false; + } + + // Set the original stack and the reexecute bit for the interpreter to reexecute + // the bytecode that invokes BigInteger.multiplyToLen() if deoptimization happens + // on the return from z array allocation in runtime. + { PreserveReexecuteState preexecs(this); + jvms()->set_should_reexecute(true); + + Node* x_start = array_element_address(x, intcon(0), x_elem); + Node* y_start = array_element_address(y, intcon(0), y_elem); + // 'x_start' points to x array + scaled xlen + // 'y_start' points to y array + scaled ylen + + // Allocate the result array + Node* zlen = _gvn.transform(new(C) AddINode(xlen, ylen)); + Node* klass_node = makecon(TypeKlassPtr::make(ciTypeArrayKlass::make(T_INT))); + + IdealKit ideal(this); + +#define __ ideal. + Node* one = __ ConI(1); + Node* zero = __ ConI(0); + IdealVariable need_alloc(ideal), z_alloc(ideal); __ declarations_done(); + __ set(need_alloc, zero); + __ set(z_alloc, z); + __ if_then(z, BoolTest::eq, null()); { + __ increment (need_alloc, one); + } __ else_(); { + // Update graphKit memory and control from IdealKit. + sync_kit(ideal); + Node* zlen_arg = load_array_length(z); + // Update IdealKit memory and control from graphKit. + __ sync_kit(this); + __ if_then(zlen_arg, BoolTest::lt, zlen); { + __ increment (need_alloc, one); + } __ end_if(); + } __ end_if(); + + __ if_then(__ value(need_alloc), BoolTest::ne, zero); { + // Update graphKit memory and control from IdealKit. + sync_kit(ideal); + Node * narr = new_array(klass_node, zlen, 1); + // Update IdealKit memory and control from graphKit. + __ sync_kit(this); + __ set(z_alloc, narr); + } __ end_if(); + + sync_kit(ideal); + z = __ value(z_alloc); + _gvn.set_type(z, TypeAryPtr::INTS); + // Final sync IdealKit and GraphKit. + final_sync(ideal); +#undef __ + + Node* z_start = array_element_address(z, intcon(0), T_INT); + + Node* call = make_runtime_call(RC_LEAF|RC_NO_FP, + OptoRuntime::multiplyToLen_Type(), + stubAddr, stubName, TypePtr::BOTTOM, + x_start, xlen, y_start, ylen, z_start, zlen); + } // original reexecute is set back here + + C->set_has_split_ifs(true); // Has chance for split-if optimization + set_result(z); + return true; +} + + /** * Calculate CRC32 for byte. * int java.util.zip.CRC32.update(int crc, int b) diff -r 4edd7572c235 -r 64b480f9eb1a src/share/vm/opto/parse.hpp --- a/src/share/vm/opto/parse.hpp Tue Sep 09 09:48:42 2014 -0700 +++ b/src/share/vm/opto/parse.hpp Wed Sep 10 17:36:20 2014 +0000 @@ -551,8 +551,9 @@ float dynamic_branch_prediction(float &cnt); float branch_prediction(float &cnt, BoolTest::mask btest, int target_bci); - bool seems_never_taken(float prob); - bool seems_stable_comparison(BoolTest::mask btest, Node* c); + bool seems_never_taken(float prob) const; + bool path_is_suitable_for_uncommon_trap(float prob) const; + bool seems_stable_comparison() const; void do_ifnull(BoolTest::mask btest, Node* c); void do_if(BoolTest::mask btest, Node* c); diff -r 4edd7572c235 -r 64b480f9eb1a src/share/vm/opto/parse2.cpp --- a/src/share/vm/opto/parse2.cpp Tue Sep 09 09:48:42 2014 -0700 +++ b/src/share/vm/opto/parse2.cpp Wed Sep 10 17:36:20 2014 +0000 @@ -884,7 +884,7 @@ // some branches (e.g., _213_javac.Assembler.eliminate) validly produce // very small but nonzero probabilities, which if confused with zero // counts would keep the program recompiling indefinitely. -bool Parse::seems_never_taken(float prob) { +bool Parse::seems_never_taken(float prob) const { return prob < PROB_MIN; } @@ -895,53 +895,12 @@ // if a path is never taken, its controlling comparison is // already acting in a stable fashion. If the comparison // seems stable, we will put an expensive uncommon trap -// on the untaken path. To be conservative, and to allow -// partially executed counted loops to be compiled fully, -// we will plant uncommon traps only after pointer comparisons. -bool Parse::seems_stable_comparison(BoolTest::mask btest, Node* cmp) { - for (int depth = 4; depth > 0; depth--) { - // The following switch can find CmpP here over half the time for - // dynamic language code rich with type tests. - // Code using counted loops or array manipulations (typical - // of benchmarks) will have many (>80%) CmpI instructions. - switch (cmp->Opcode()) { - case Op_CmpP: - // A never-taken null check looks like CmpP/BoolTest::eq. - // These certainly should be closed off as uncommon traps. - if (btest == BoolTest::eq) - return true; - // A never-failed type check looks like CmpP/BoolTest::ne. - // Let's put traps on those, too, so that we don't have to compile - // unused paths with indeterminate dynamic type information. - if (ProfileDynamicTypes) - return true; - return false; - - case Op_CmpI: - // A small minority (< 10%) of CmpP are masked as CmpI, - // as if by boolean conversion ((p == q? 1: 0) != 0). - // Detect that here, even if it hasn't optimized away yet. - // Specifically, this covers the 'instanceof' operator. - if (btest == BoolTest::ne || btest == BoolTest::eq) { - if (_gvn.type(cmp->in(2))->singleton() && - cmp->in(1)->is_Phi()) { - PhiNode* phi = cmp->in(1)->as_Phi(); - int true_path = phi->is_diamond_phi(); - if (true_path > 0 && - _gvn.type(phi->in(1))->singleton() && - _gvn.type(phi->in(2))->singleton()) { - // phi->region->if_proj->ifnode->bool->cmp - BoolNode* bol = phi->in(0)->in(1)->in(0)->in(1)->as_Bool(); - btest = bol->_test._test; - cmp = bol->in(1); - continue; - } - } - } - return false; - } +// on the untaken path. +bool Parse::seems_stable_comparison() const { + if (C->too_many_traps(method(), bci(), Deoptimization::Reason_unstable_if)) { + return false; } - return false; + return true; } //-------------------------------repush_if_args-------------------------------- @@ -1166,6 +1125,14 @@ } } +bool Parse::path_is_suitable_for_uncommon_trap(float prob) const { + // Don't want to speculate on uncommon traps when running with -Xcomp + if (!UseInterpreter) { + return false; + } + return (seems_never_taken(prob) && seems_stable_comparison()); +} + //----------------------------adjust_map_after_if------------------------------ // Adjust the JVM state to reflect the result of taking this path. // Basically, it means inspecting the CmpNode controlling this @@ -1179,33 +1146,9 @@ bool is_fallthrough = (path == successor_for_bci(iter().next_bci())); - if (seems_never_taken(prob) && seems_stable_comparison(btest, c)) { - // If this might possibly turn into an implicit null check, - // and the null has never yet been seen, we need to generate - // an uncommon trap, so as to recompile instead of suffering - // with very slow branches. (We'll get the slow branches if - // the program ever changes phase and starts seeing nulls here.) - // - // We do not inspect for a null constant, since a node may - // optimize to 'null' later on. - // - // Null checks, and other tests which expect inequality, - // show btest == BoolTest::eq along the non-taken branch. - // On the other hand, type tests, must-be-null tests, - // and other tests which expect pointer equality, - // show btest == BoolTest::ne along the non-taken branch. - // We prune both types of branches if they look unused. + if (path_is_suitable_for_uncommon_trap(prob)) { repush_if_args(); - // We need to mark this branch as taken so that if we recompile we will - // see that it is possible. In the tiered system the interpreter doesn't - // do profiling and by the time we get to the lower tier from the interpreter - // the path may be cold again. Make sure it doesn't look untaken - if (is_fallthrough) { - profile_not_taken_branch(!ProfileInterpreter); - } else { - profile_taken_branch(iter().get_dest(), !ProfileInterpreter); - } - uncommon_trap(Deoptimization::Reason_unreached, + uncommon_trap(Deoptimization::Reason_unstable_if, Deoptimization::Action_reinterpret, NULL, (is_fallthrough ? "taken always" : "taken never")); diff -r 4edd7572c235 -r 64b480f9eb1a src/share/vm/opto/runtime.cpp --- a/src/share/vm/opto/runtime.cpp Tue Sep 09 09:48:42 2014 -0700 +++ b/src/share/vm/opto/runtime.cpp Wed Sep 10 17:36:20 2014 +0000 @@ -942,6 +942,30 @@ return TypeFunc::make(domain, range); } +const TypeFunc* OptoRuntime::multiplyToLen_Type() { + // create input type (domain) + int num_args = 6; + int argcnt = num_args; + const Type** fields = TypeTuple::fields(argcnt); + int argp = TypeFunc::Parms; + fields[argp++] = TypePtr::NOTNULL; // x + fields[argp++] = TypeInt::INT; // xlen + fields[argp++] = TypePtr::NOTNULL; // y + fields[argp++] = TypeInt::INT; // ylen + fields[argp++] = TypePtr::NOTNULL; // z + fields[argp++] = TypeInt::INT; // zlen + assert(argp == TypeFunc::Parms+argcnt, "correct decoding"); + const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields); + + // no result type needed + fields = TypeTuple::fields(1); + fields[TypeFunc::Parms+0] = NULL; + const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields); + return TypeFunc::make(domain, range); +} + + + //------------- Interpreter state access for on stack replacement const TypeFunc* OptoRuntime::osr_end_Type() { // create input type (domain) diff -r 4edd7572c235 -r 64b480f9eb1a src/share/vm/opto/runtime.hpp --- a/src/share/vm/opto/runtime.hpp Tue Sep 09 09:48:42 2014 -0700 +++ b/src/share/vm/opto/runtime.hpp Wed Sep 10 17:36:20 2014 +0000 @@ -303,6 +303,8 @@ static const TypeFunc* sha_implCompress_Type(); static const TypeFunc* digestBase_implCompressMB_Type(); + static const TypeFunc* multiplyToLen_Type(); + static const TypeFunc* updateBytesCRC32_Type(); // leaf on stack replacement interpreter accessor types diff -r 4edd7572c235 -r 64b480f9eb1a src/share/vm/runtime/deoptimization.cpp --- a/src/share/vm/runtime/deoptimization.cpp Tue Sep 09 09:48:42 2014 -0700 +++ b/src/share/vm/runtime/deoptimization.cpp Wed Sep 10 17:36:20 2014 +0000 @@ -1835,7 +1835,8 @@ "predicate", "loop_limit_check", "speculate_class_check", - "rtm_state_change" + "rtm_state_change", + "unstable_if" }; const char* Deoptimization::_trap_action_name[Action_LIMIT] = { // Note: Keep this in sync. with enum DeoptAction. diff -r 4edd7572c235 -r 64b480f9eb1a src/share/vm/runtime/deoptimization.hpp --- a/src/share/vm/runtime/deoptimization.hpp Tue Sep 09 09:48:42 2014 -0700 +++ b/src/share/vm/runtime/deoptimization.hpp Wed Sep 10 17:36:20 2014 +0000 @@ -60,6 +60,7 @@ Reason_predicate, // compiler generated predicate failed Reason_loop_limit_check, // compiler generated loop limits check failed Reason_speculate_class_check, // saw unexpected object class from type speculation + Reason_unstable_if, // a branch predicted always false was taken Reason_rtm_state_change, // rtm state change detected Reason_LIMIT, // Note: Keep this enum in sync. with _trap_reason_name. @@ -315,6 +316,8 @@ return Reason_null_check; // recorded per BCI as a null check else if (reason == Reason_speculate_class_check) return Reason_class_check; + else if (reason == Reason_unstable_if) + return Reason_intrinsic; else return Reason_none; } diff -r 4edd7572c235 -r 64b480f9eb1a src/share/vm/runtime/stubRoutines.cpp --- a/src/share/vm/runtime/stubRoutines.cpp Tue Sep 09 09:48:42 2014 -0700 +++ b/src/share/vm/runtime/stubRoutines.cpp Wed Sep 10 17:36:20 2014 +0000 @@ -135,6 +135,8 @@ address StubRoutines::_updateBytesCRC32 = NULL; address StubRoutines::_crc_table_adr = NULL; +address StubRoutines::_multiplyToLen = NULL; + double (* StubRoutines::_intrinsic_log )(double) = NULL; double (* StubRoutines::_intrinsic_log10 )(double) = NULL; double (* StubRoutines::_intrinsic_exp )(double) = NULL; diff -r 4edd7572c235 -r 64b480f9eb1a src/share/vm/runtime/stubRoutines.hpp --- a/src/share/vm/runtime/stubRoutines.hpp Tue Sep 09 09:48:42 2014 -0700 +++ b/src/share/vm/runtime/stubRoutines.hpp Wed Sep 10 17:36:20 2014 +0000 @@ -217,6 +217,8 @@ static address _updateBytesCRC32; static address _crc_table_adr; + static address _multiplyToLen; + // These are versions of the java.lang.Math methods which perform // the same operations as the intrinsic version. They are used for // constant folding in the compiler to ensure equivalence. If the @@ -373,6 +375,8 @@ static address updateBytesCRC32() { return _updateBytesCRC32; } static address crc_table_addr() { return _crc_table_adr; } + static address multiplyToLen() {return _multiplyToLen; } + static address select_fill_function(BasicType t, bool aligned, const char* &name); static address zero_aligned_words() { return _zero_aligned_words; } diff -r 4edd7572c235 -r 64b480f9eb1a src/share/vm/runtime/vmStructs.cpp --- a/src/share/vm/runtime/vmStructs.cpp Tue Sep 09 09:48:42 2014 -0700 +++ b/src/share/vm/runtime/vmStructs.cpp Wed Sep 10 17:36:20 2014 +0000 @@ -814,6 +814,7 @@ static_field(StubRoutines, _cipherBlockChaining_decryptAESCrypt, address) \ static_field(StubRoutines, _updateBytesCRC32, address) \ static_field(StubRoutines, _crc_table_adr, address) \ + static_field(StubRoutines, _multiplyToLen, address) \ \ /*****************/ \ /* SharedRuntime */ \ @@ -2495,6 +2496,7 @@ declare_constant(Deoptimization::Reason_age) \ declare_constant(Deoptimization::Reason_predicate) \ declare_constant(Deoptimization::Reason_loop_limit_check) \ + declare_constant(Deoptimization::Reason_unstable_if) \ declare_constant(Deoptimization::Reason_LIMIT) \ declare_constant(Deoptimization::Reason_RECORDED_LIMIT) \ \ diff -r 4edd7572c235 -r 64b480f9eb1a test/compiler/intrinsics/multiplytolen/TestMultiplyToLen.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/compiler/intrinsics/multiplytolen/TestMultiplyToLen.java Wed Sep 10 17:36:20 2014 +0000 @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/** + * @test + * @bug 8055494 + * @summary Add C2 x86 intrinsic for BigInteger::multiplyToLen() method + * + * @run main/othervm/timeout=600 -XX:-TieredCompilation -Xbatch + * -XX:CompileCommand=exclude,TestMultiplyToLen::main + * -XX:CompileCommand=option,TestMultiplyToLen::base_multiply,ccstr,DisableIntrinsic,_multiplyToLen + * -XX:CompileCommand=option,java.math.BigInteger::multiply,ccstr,DisableIntrinsic,_multiplyToLen + * -XX:CompileCommand=inline,java.math.BigInteger::multiply TestMultiplyToLen + */ + +import java.util.Random; +import java.math.*; + +public class TestMultiplyToLen { + + // Avoid intrinsic by preventing inlining multiply() and multiplyToLen(). + public static BigInteger base_multiply(BigInteger op1, BigInteger op2) { + return op1.multiply(op2); + } + + // Generate multiplyToLen() intrinsic by inlining multiply(). + public static BigInteger new_multiply(BigInteger op1, BigInteger op2) { + return op1.multiply(op2); + } + + public static boolean bytecompare(BigInteger b1, BigInteger b2) { + byte[] data1 = b1.toByteArray(); + byte[] data2 = b2.toByteArray(); + if (data1.length != data2.length) + return false; + for (int i = 0; i < data1.length; i++) { + if (data1[i] != data2[i]) + return false; + } + return true; + } + + public static String stringify(BigInteger b) { + String strout= ""; + byte [] data = b.toByteArray(); + for (int i = 0; i < data.length; i++) { + strout += (String.format("%02x",data[i]) + " "); + } + return strout; + } + + public static void main(String args[]) throws Exception { + + BigInteger oldsum = new BigInteger("0"); + BigInteger newsum = new BigInteger("0"); + + BigInteger b1, b2, oldres, newres; + + Random rand = new Random(); + long seed = System.nanoTime(); + Random rand1 = new Random(); + long seed1 = System.nanoTime(); + rand.setSeed(seed); + rand1.setSeed(seed1); + + for (int j = 0; j < 1000000; j++) { + int rand_int = rand1.nextInt(3136)+32; + int rand_int1 = rand1.nextInt(3136)+32; + b1 = new BigInteger(rand_int, rand); + b2 = new BigInteger(rand_int1, rand); + + oldres = base_multiply(b1,b2); + newres = new_multiply(b1,b2); + + oldsum = oldsum.add(oldres); + newsum = newsum.add(newres); + + if (!bytecompare(oldres,newres)) { + System.out.print("mismatch for:b1:" + stringify(b1) + " :b2:" + stringify(b2) + " :oldres:" + stringify(oldres) + " :newres:" + stringify(newres)); + System.out.println(b1); + System.out.println(b2); + throw new Exception("Failed"); + } + } + if (!bytecompare(oldsum,newsum)) { + System.out.println("Failure: oldsum:" + stringify(oldsum) + " newsum:" + stringify(newsum)); + throw new Exception("Failed"); + } else { + System.out.println("Success"); + } + } +}