changeset 20443:64b480f9eb1a

Merge
author ctornqvi
date Wed, 10 Sep 2014 17:36:20 +0000
parents 4edd7572c235 (current diff) 42460b71ba70 (diff)
children fd9feb55481c
files
diffstat 39 files changed, 1493 insertions(+), 161 deletions(-) [+]
line wrap: on
line diff
--- a/src/cpu/x86/vm/assembler_x86.cpp	Tue Sep 09 09:48:42 2014 -0700
+++ b/src/cpu/x86/vm/assembler_x86.cpp	Wed Sep 10 17:36:20 2014 +0000
@@ -4937,6 +4937,26 @@
   emit_arith(0x03, 0xC0, dst, src);
 }
 
+void Assembler::adcxq(Register dst, Register src) {
+  //assert(VM_Version::supports_adx(), "adx instructions not supported");
+  emit_int8((unsigned char)0x66);
+  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
+  emit_int8(0x0F);
+  emit_int8(0x38);
+  emit_int8((unsigned char)0xF6);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+void Assembler::adoxq(Register dst, Register src) {
+  //assert(VM_Version::supports_adx(), "adx instructions not supported");
+  emit_int8((unsigned char)0xF3);
+  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
+  emit_int8(0x0F);
+  emit_int8(0x38);
+  emit_int8((unsigned char)0xF6);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
 void Assembler::andq(Address dst, int32_t imm32) {
   InstructionMark im(this);
   prefixq(dst);
@@ -5444,6 +5464,26 @@
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
+void Assembler::mulq(Address src) {
+  InstructionMark im(this);
+  prefixq(src);
+  emit_int8((unsigned char)0xF7);
+  emit_operand(rsp, src);
+}
+
+void Assembler::mulq(Register src) {
+  int encode = prefixq_and_encode(src->encoding());
+  emit_int8((unsigned char)0xF7);
+  emit_int8((unsigned char)(0xE0 | encode));
+}
+
+void Assembler::mulxq(Register dst1, Register dst2, Register src) {
+  assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
+  int encode = vex_prefix_and_encode(dst1->encoding(), dst2->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_38, true, false);
+  emit_int8((unsigned char)0xF6);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
 void Assembler::negq(Register dst) {
   int encode = prefixq_and_encode(dst->encoding());
   emit_int8((unsigned char)0xF7);
@@ -5572,6 +5612,28 @@
     emit_int8(imm8);
   }
 }
+
+void Assembler::rorq(Register dst, int imm8) {
+  assert(isShiftCount(imm8 >> 1), "illegal shift count");
+  int encode = prefixq_and_encode(dst->encoding());
+  if (imm8 == 1) {
+    emit_int8((unsigned char)0xD1);
+    emit_int8((unsigned char)(0xC8 | encode));
+  } else {
+    emit_int8((unsigned char)0xC1);
+    emit_int8((unsigned char)(0xc8 | encode));
+    emit_int8(imm8);
+  }
+}
+
+void Assembler::rorxq(Register dst, Register src, int imm8) {
+  assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_3A, true, false);
+  emit_int8((unsigned char)0xF0);
+  emit_int8((unsigned char)(0xC0 | encode));
+  emit_int8(imm8);
+}
+
 void Assembler::sarq(Register dst, int imm8) {
   assert(isShiftCount(imm8 >> 1), "illegal shift count");
   int encode = prefixq_and_encode(dst->encoding());
--- a/src/cpu/x86/vm/assembler_x86.hpp	Tue Sep 09 09:48:42 2014 -0700
+++ b/src/cpu/x86/vm/assembler_x86.hpp	Wed Sep 10 17:36:20 2014 +0000
@@ -888,6 +888,14 @@
   void addq(Register dst, Address src);
   void addq(Register dst, Register src);
 
+#ifdef _LP64
+ //Add Unsigned Integers with Carry Flag
+  void adcxq(Register dst, Register src);
+
+ //Add Unsigned Integers with Overflow Flag
+  void adoxq(Register dst, Register src);
+#endif
+
   void addr_nop_4();
   void addr_nop_5();
   void addr_nop_7();
@@ -1204,19 +1212,20 @@
   void idivl(Register src);
   void divl(Register src); // Unsigned division
 
+#ifdef _LP64
   void idivq(Register src);
+#endif
 
   void imull(Register dst, Register src);
   void imull(Register dst, Register src, int value);
   void imull(Register dst, Address src);
 
+#ifdef _LP64
   void imulq(Register dst, Register src);
   void imulq(Register dst, Register src, int value);
-#ifdef _LP64
   void imulq(Register dst, Address src);
 #endif
 
-
   // jcc is the generic conditional branch generator to run-
   // time routines, jcc is used for branches to labels. jcc
   // takes a branch opcode (cc) and a label (L) and generates
@@ -1408,9 +1417,16 @@
   void movzwq(Register dst, Register src);
 #endif
 
+  // Unsigned multiply with RAX destination register
   void mull(Address src);
   void mull(Register src);
 
+#ifdef _LP64
+  void mulq(Address src);
+  void mulq(Register src);
+  void mulxq(Register dst1, Register dst2, Register src);
+#endif
+
   // Multiply Scalar Double-Precision Floating-Point Values
   void mulsd(XMMRegister dst, Address src);
   void mulsd(XMMRegister dst, XMMRegister src);
@@ -1541,6 +1557,11 @@
 
   void ret(int imm16);
 
+#ifdef _LP64
+  void rorq(Register dst, int imm8);
+  void rorxq(Register dst, Register src, int imm8);
+#endif
+
   void sahf();
 
   void sarl(Register dst, int imm8);
--- a/src/cpu/x86/vm/globals_x86.hpp	Tue Sep 09 09:48:42 2014 -0700
+++ b/src/cpu/x86/vm/globals_x86.hpp	Wed Sep 10 17:36:20 2014 +0000
@@ -176,6 +176,8 @@
           "Use count trailing zeros instruction")                           \
                                                                             \
   product(bool, UseBMI1Instructions, false,                                 \
-          "Use BMI instructions")
-
+          "Use BMI1 instructions")                                          \
+                                                                            \
+  product(bool, UseBMI2Instructions, false,                                 \
+          "Use BMI2 instructions")
 #endif // CPU_X86_VM_GLOBALS_X86_HPP
--- a/src/cpu/x86/vm/macroAssembler_x86.cpp	Tue Sep 09 09:48:42 2014 -0700
+++ b/src/cpu/x86/vm/macroAssembler_x86.cpp	Wed Sep 10 17:36:20 2014 +0000
@@ -7293,6 +7293,467 @@
   bind(L_done);
 }
 
+#ifdef _LP64
+/**
+ * Helper for multiply_to_len().
+ */
+void MacroAssembler::add2_with_carry(Register dest_hi, Register dest_lo, Register src1, Register src2) {
+  addq(dest_lo, src1);
+  adcq(dest_hi, 0);
+  addq(dest_lo, src2);
+  adcq(dest_hi, 0);
+}
+
+/**
+ * Multiply 64 bit by 64 bit first loop.
+ */
+void MacroAssembler::multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart,
+                                           Register y, Register y_idx, Register z,
+                                           Register carry, Register product,
+                                           Register idx, Register kdx) {
+  //
+  //  jlong carry, x[], y[], z[];
+  //  for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx-, kdx--) {
+  //    huge_128 product = y[idx] * x[xstart] + carry;
+  //    z[kdx] = (jlong)product;
+  //    carry  = (jlong)(product >>> 64);
+  //  }
+  //  z[xstart] = carry;
+  //
+
+  Label L_first_loop, L_first_loop_exit;
+  Label L_one_x, L_one_y, L_multiply;
+
+  decrementl(xstart);
+  jcc(Assembler::negative, L_one_x);
+
+  movq(x_xstart, Address(x, xstart, Address::times_4,  0));
+  rorq(x_xstart, 32); // convert big-endian to little-endian
+
+  bind(L_first_loop);
+  decrementl(idx);
+  jcc(Assembler::negative, L_first_loop_exit);
+  decrementl(idx);
+  jcc(Assembler::negative, L_one_y);
+  movq(y_idx, Address(y, idx, Address::times_4,  0));
+  rorq(y_idx, 32); // convert big-endian to little-endian
+  bind(L_multiply);
+  movq(product, x_xstart);
+  mulq(y_idx); // product(rax) * y_idx -> rdx:rax
+  addq(product, carry);
+  adcq(rdx, 0);
+  subl(kdx, 2);
+  movl(Address(z, kdx, Address::times_4,  4), product);
+  shrq(product, 32);
+  movl(Address(z, kdx, Address::times_4,  0), product);
+  movq(carry, rdx);
+  jmp(L_first_loop);
+
+  bind(L_one_y);
+  movl(y_idx, Address(y,  0));
+  jmp(L_multiply);
+
+  bind(L_one_x);
+  movl(x_xstart, Address(x,  0));
+  jmp(L_first_loop);
+
+  bind(L_first_loop_exit);
+}
+
+/**
+ * Multiply 64 bit by 64 bit and add 128 bit.
+ */
+void MacroAssembler::multiply_add_128_x_128(Register x_xstart, Register y, Register z,
+                                            Register yz_idx, Register idx,
+                                            Register carry, Register product, int offset) {
+  //     huge_128 product = (y[idx] * x_xstart) + z[kdx] + carry;
+  //     z[kdx] = (jlong)product;
+
+  movq(yz_idx, Address(y, idx, Address::times_4,  offset));
+  rorq(yz_idx, 32); // convert big-endian to little-endian
+  movq(product, x_xstart);
+  mulq(yz_idx);     // product(rax) * yz_idx -> rdx:product(rax)
+  movq(yz_idx, Address(z, idx, Address::times_4,  offset));
+  rorq(yz_idx, 32); // convert big-endian to little-endian
+
+  add2_with_carry(rdx, product, carry, yz_idx);
+
+  movl(Address(z, idx, Address::times_4,  offset+4), product);
+  shrq(product, 32);
+  movl(Address(z, idx, Address::times_4,  offset), product);
+
+}
+
+/**
+ * Multiply 128 bit by 128 bit. Unrolled inner loop.
+ */
+void MacroAssembler::multiply_128_x_128_loop(Register x_xstart, Register y, Register z,
+                                             Register yz_idx, Register idx, Register jdx,
+                                             Register carry, Register product,
+                                             Register carry2) {
+  //   jlong carry, x[], y[], z[];
+  //   int kdx = ystart+1;
+  //   for (int idx=ystart-2; idx >= 0; idx -= 2) { // Third loop
+  //     huge_128 product = (y[idx+1] * x_xstart) + z[kdx+idx+1] + carry;
+  //     z[kdx+idx+1] = (jlong)product;
+  //     jlong carry2  = (jlong)(product >>> 64);
+  //     product = (y[idx] * x_xstart) + z[kdx+idx] + carry2;
+  //     z[kdx+idx] = (jlong)product;
+  //     carry  = (jlong)(product >>> 64);
+  //   }
+  //   idx += 2;
+  //   if (idx > 0) {
+  //     product = (y[idx] * x_xstart) + z[kdx+idx] + carry;
+  //     z[kdx+idx] = (jlong)product;
+  //     carry  = (jlong)(product >>> 64);
+  //   }
+  //
+
+  Label L_third_loop, L_third_loop_exit, L_post_third_loop_done;
+
+  movl(jdx, idx);
+  andl(jdx, 0xFFFFFFFC);
+  shrl(jdx, 2);
+
+  bind(L_third_loop);
+  subl(jdx, 1);
+  jcc(Assembler::negative, L_third_loop_exit);
+  subl(idx, 4);
+
+  multiply_add_128_x_128(x_xstart, y, z, yz_idx, idx, carry, product, 8);
+  movq(carry2, rdx);
+
+  multiply_add_128_x_128(x_xstart, y, z, yz_idx, idx, carry2, product, 0);
+  movq(carry, rdx);
+  jmp(L_third_loop);
+
+  bind (L_third_loop_exit);
+
+  andl (idx, 0x3);
+  jcc(Assembler::zero, L_post_third_loop_done);
+
+  Label L_check_1;
+  subl(idx, 2);
+  jcc(Assembler::negative, L_check_1);
+
+  multiply_add_128_x_128(x_xstart, y, z, yz_idx, idx, carry, product, 0);
+  movq(carry, rdx);
+
+  bind (L_check_1);
+  addl (idx, 0x2);
+  andl (idx, 0x1);
+  subl(idx, 1);
+  jcc(Assembler::negative, L_post_third_loop_done);
+
+  movl(yz_idx, Address(y, idx, Address::times_4,  0));
+  movq(product, x_xstart);
+  mulq(yz_idx); // product(rax) * yz_idx -> rdx:product(rax)
+  movl(yz_idx, Address(z, idx, Address::times_4,  0));
+
+  add2_with_carry(rdx, product, yz_idx, carry);
+
+  movl(Address(z, idx, Address::times_4,  0), product);
+  shrq(product, 32);
+
+  shlq(rdx, 32);
+  orq(product, rdx);
+  movq(carry, product);
+
+  bind(L_post_third_loop_done);
+}
+
+/**
+ * Multiply 128 bit by 128 bit using BMI2. Unrolled inner loop.
+ *
+ */
+void MacroAssembler::multiply_128_x_128_bmi2_loop(Register y, Register z,
+                                                  Register carry, Register carry2,
+                                                  Register idx, Register jdx,
+                                                  Register yz_idx1, Register yz_idx2,
+                                                  Register tmp, Register tmp3, Register tmp4) {
+  assert(UseBMI2Instructions, "should be used only when BMI2 is available");
+
+  //   jlong carry, x[], y[], z[];
+  //   int kdx = ystart+1;
+  //   for (int idx=ystart-2; idx >= 0; idx -= 2) { // Third loop
+  //     huge_128 tmp3 = (y[idx+1] * rdx) + z[kdx+idx+1] + carry;
+  //     jlong carry2  = (jlong)(tmp3 >>> 64);
+  //     huge_128 tmp4 = (y[idx]   * rdx) + z[kdx+idx] + carry2;
+  //     carry  = (jlong)(tmp4 >>> 64);
+  //     z[kdx+idx+1] = (jlong)tmp3;
+  //     z[kdx+idx] = (jlong)tmp4;
+  //   }
+  //   idx += 2;
+  //   if (idx > 0) {
+  //     yz_idx1 = (y[idx] * rdx) + z[kdx+idx] + carry;
+  //     z[kdx+idx] = (jlong)yz_idx1;
+  //     carry  = (jlong)(yz_idx1 >>> 64);
+  //   }
+  //
+
+  Label L_third_loop, L_third_loop_exit, L_post_third_loop_done;
+
+  movl(jdx, idx);
+  andl(jdx, 0xFFFFFFFC);
+  shrl(jdx, 2);
+
+  bind(L_third_loop);
+  subl(jdx, 1);
+  jcc(Assembler::negative, L_third_loop_exit);
+  subl(idx, 4);
+
+  movq(yz_idx1,  Address(y, idx, Address::times_4,  8));
+  rorxq(yz_idx1, yz_idx1, 32); // convert big-endian to little-endian
+  movq(yz_idx2, Address(y, idx, Address::times_4,  0));
+  rorxq(yz_idx2, yz_idx2, 32);
+
+  mulxq(tmp4, tmp3, yz_idx1);  //  yz_idx1 * rdx -> tmp4:tmp3
+  mulxq(carry2, tmp, yz_idx2); //  yz_idx2 * rdx -> carry2:tmp
+
+  movq(yz_idx1,  Address(z, idx, Address::times_4,  8));
+  rorxq(yz_idx1, yz_idx1, 32);
+  movq(yz_idx2, Address(z, idx, Address::times_4,  0));
+  rorxq(yz_idx2, yz_idx2, 32);
+
+  if (VM_Version::supports_adx()) {
+    adcxq(tmp3, carry);
+    adoxq(tmp3, yz_idx1);
+
+    adcxq(tmp4, tmp);
+    adoxq(tmp4, yz_idx2);
+
+    movl(carry, 0); // does not affect flags
+    adcxq(carry2, carry);
+    adoxq(carry2, carry);
+  } else {
+    add2_with_carry(tmp4, tmp3, carry, yz_idx1);
+    add2_with_carry(carry2, tmp4, tmp, yz_idx2);
+  }
+  movq(carry, carry2);
+
+  movl(Address(z, idx, Address::times_4, 12), tmp3);
+  shrq(tmp3, 32);
+  movl(Address(z, idx, Address::times_4,  8), tmp3);
+
+  movl(Address(z, idx, Address::times_4,  4), tmp4);
+  shrq(tmp4, 32);
+  movl(Address(z, idx, Address::times_4,  0), tmp4);
+
+  jmp(L_third_loop);
+
+  bind (L_third_loop_exit);
+
+  andl (idx, 0x3);
+  jcc(Assembler::zero, L_post_third_loop_done);
+
+  Label L_check_1;
+  subl(idx, 2);
+  jcc(Assembler::negative, L_check_1);
+
+  movq(yz_idx1, Address(y, idx, Address::times_4,  0));
+  rorxq(yz_idx1, yz_idx1, 32);
+  mulxq(tmp4, tmp3, yz_idx1); //  yz_idx1 * rdx -> tmp4:tmp3
+  movq(yz_idx2, Address(z, idx, Address::times_4,  0));
+  rorxq(yz_idx2, yz_idx2, 32);
+
+  add2_with_carry(tmp4, tmp3, carry, yz_idx2);
+
+  movl(Address(z, idx, Address::times_4,  4), tmp3);
+  shrq(tmp3, 32);
+  movl(Address(z, idx, Address::times_4,  0), tmp3);
+  movq(carry, tmp4);
+
+  bind (L_check_1);
+  addl (idx, 0x2);
+  andl (idx, 0x1);
+  subl(idx, 1);
+  jcc(Assembler::negative, L_post_third_loop_done);
+  movl(tmp4, Address(y, idx, Address::times_4,  0));
+  mulxq(carry2, tmp3, tmp4);  //  tmp4 * rdx -> carry2:tmp3
+  movl(tmp4, Address(z, idx, Address::times_4,  0));
+
+  add2_with_carry(carry2, tmp3, tmp4, carry);
+
+  movl(Address(z, idx, Address::times_4,  0), tmp3);
+  shrq(tmp3, 32);
+
+  shlq(carry2, 32);
+  orq(tmp3, carry2);
+  movq(carry, tmp3);
+
+  bind(L_post_third_loop_done);
+}
+
+/**
+ * Code for BigInteger::multiplyToLen() instrinsic.
+ *
+ * rdi: x
+ * rax: xlen
+ * rsi: y
+ * rcx: ylen
+ * r8:  z
+ * r11: zlen
+ * r12: tmp1
+ * r13: tmp2
+ * r14: tmp3
+ * r15: tmp4
+ * rbx: tmp5
+ *
+ */
+void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Register ylen, Register z, Register zlen,
+                                     Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5) {
+  ShortBranchVerifier sbv(this);
+  assert_different_registers(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, rdx);
+
+  push(tmp1);
+  push(tmp2);
+  push(tmp3);
+  push(tmp4);
+  push(tmp5);
+
+  push(xlen);
+  push(zlen);
+
+  const Register idx = tmp1;
+  const Register kdx = tmp2;
+  const Register xstart = tmp3;
+
+  const Register y_idx = tmp4;
+  const Register carry = tmp5;
+  const Register product  = xlen;
+  const Register x_xstart = zlen;  // reuse register
+
+  // First Loop.
+  //
+  //  final static long LONG_MASK = 0xffffffffL;
+  //  int xstart = xlen - 1;
+  //  int ystart = ylen - 1;
+  //  long carry = 0;
+  //  for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx-, kdx--) {
+  //    long product = (y[idx] & LONG_MASK) * (x[xstart] & LONG_MASK) + carry;
+  //    z[kdx] = (int)product;
+  //    carry = product >>> 32;
+  //  }
+  //  z[xstart] = (int)carry;
+  //
+
+  movl(idx, ylen);      // idx = ylen;
+  movl(kdx, zlen);      // kdx = xlen+ylen;
+  xorq(carry, carry);   // carry = 0;
+
+  Label L_done;
+
+  movl(xstart, xlen);
+  decrementl(xstart);
+  jcc(Assembler::negative, L_done);
+
+  multiply_64_x_64_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx);
+
+  Label L_second_loop;
+  testl(kdx, kdx);
+  jcc(Assembler::zero, L_second_loop);
+
+  Label L_carry;
+  subl(kdx, 1);
+  jcc(Assembler::zero, L_carry);
+
+  movl(Address(z, kdx, Address::times_4,  0), carry);
+  shrq(carry, 32);
+  subl(kdx, 1);
+
+  bind(L_carry);
+  movl(Address(z, kdx, Address::times_4,  0), carry);
+
+  // Second and third (nested) loops.
+  //
+  // for (int i = xstart-1; i >= 0; i--) { // Second loop
+  //   carry = 0;
+  //   for (int jdx=ystart, k=ystart+1+i; jdx >= 0; jdx--, k--) { // Third loop
+  //     long product = (y[jdx] & LONG_MASK) * (x[i] & LONG_MASK) +
+  //                    (z[k] & LONG_MASK) + carry;
+  //     z[k] = (int)product;
+  //     carry = product >>> 32;
+  //   }
+  //   z[i] = (int)carry;
+  // }
+  //
+  // i = xlen, j = tmp1, k = tmp2, carry = tmp5, x[i] = rdx
+
+  const Register jdx = tmp1;
+
+  bind(L_second_loop);
+  xorl(carry, carry);    // carry = 0;
+  movl(jdx, ylen);       // j = ystart+1
+
+  subl(xstart, 1);       // i = xstart-1;
+  jcc(Assembler::negative, L_done);
+
+  push (z);
+
+  Label L_last_x;
+  lea(z, Address(z, xstart, Address::times_4, 4)); // z = z + k - j
+  subl(xstart, 1);       // i = xstart-1;
+  jcc(Assembler::negative, L_last_x);
+
+  if (UseBMI2Instructions) {
+    movq(rdx,  Address(x, xstart, Address::times_4,  0));
+    rorxq(rdx, rdx, 32); // convert big-endian to little-endian
+  } else {
+    movq(x_xstart, Address(x, xstart, Address::times_4,  0));
+    rorq(x_xstart, 32);  // convert big-endian to little-endian
+  }
+
+  Label L_third_loop_prologue;
+  bind(L_third_loop_prologue);
+
+  push (x);
+  push (xstart);
+  push (ylen);
+
+
+  if (UseBMI2Instructions) {
+    multiply_128_x_128_bmi2_loop(y, z, carry, x, jdx, ylen, product, tmp2, x_xstart, tmp3, tmp4);
+  } else { // !UseBMI2Instructions
+    multiply_128_x_128_loop(x_xstart, y, z, y_idx, jdx, ylen, carry, product, x);
+  }
+
+  pop(ylen);
+  pop(xlen);
+  pop(x);
+  pop(z);
+
+  movl(tmp3, xlen);
+  addl(tmp3, 1);
+  movl(Address(z, tmp3, Address::times_4,  0), carry);
+  subl(tmp3, 1);
+  jccb(Assembler::negative, L_done);
+
+  shrq(carry, 32);
+  movl(Address(z, tmp3, Address::times_4,  0), carry);
+  jmp(L_second_loop);
+
+  // Next infrequent code is moved outside loops.
+  bind(L_last_x);
+  if (UseBMI2Instructions) {
+    movl(rdx, Address(x,  0));
+  } else {
+    movl(x_xstart, Address(x,  0));
+  }
+  jmp(L_third_loop_prologue);
+
+  bind(L_done);
+
+  pop(zlen);
+  pop(xlen);
+
+  pop(tmp5);
+  pop(tmp4);
+  pop(tmp3);
+  pop(tmp2);
+  pop(tmp1);
+}
+#endif
+
 /**
  * Emits code to update CRC-32 with a byte value according to constants in table
  *
--- a/src/cpu/x86/vm/macroAssembler_x86.hpp	Tue Sep 09 09:48:42 2014 -0700
+++ b/src/cpu/x86/vm/macroAssembler_x86.hpp	Wed Sep 10 17:36:20 2014 +0000
@@ -1221,6 +1221,28 @@
                         XMMRegister tmp1, XMMRegister tmp2, XMMRegister tmp3,
                         XMMRegister tmp4, Register tmp5, Register result);
 
+#ifdef _LP64
+  void add2_with_carry(Register dest_hi, Register dest_lo, Register src1, Register src2);
+  void multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart,
+                             Register y, Register y_idx, Register z,
+                             Register carry, Register product,
+                             Register idx, Register kdx);
+  void multiply_add_128_x_128(Register x_xstart, Register y, Register z,
+                              Register yz_idx, Register idx,
+                              Register carry, Register product, int offset);
+  void multiply_128_x_128_bmi2_loop(Register y, Register z,
+                                    Register carry, Register carry2,
+                                    Register idx, Register jdx,
+                                    Register yz_idx1, Register yz_idx2,
+                                    Register tmp, Register tmp3, Register tmp4);
+  void multiply_128_x_128_loop(Register x_xstart, Register y, Register z,
+                               Register yz_idx, Register idx, Register jdx,
+                               Register carry, Register product,
+                               Register carry2);
+  void multiply_to_len(Register x, Register xlen, Register y, Register ylen, Register z, Register zlen,
+                       Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5);
+#endif
+
   // CRC32 code for java.util.zip.CRC32::updateBytes() instrinsic.
   void update_byte_crc32(Register crc, Register val, Register table);
   void kernel_crc32(Register crc, Register buf, Register len, Register table, Register tmp);
--- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Tue Sep 09 09:48:42 2014 -0700
+++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Wed Sep 10 17:36:20 2014 +0000
@@ -3677,6 +3677,70 @@
     return start;
   }
 
+
+  /**
+   *  Arguments:
+   *
+   *  Input:
+   *    c_rarg0   - x address
+   *    c_rarg1   - x length
+   *    c_rarg2   - y address
+   *    c_rarg3   - y lenth
+   * not Win64
+   *    c_rarg4   - z address
+   *    c_rarg5   - z length
+   * Win64
+   *    rsp+40    - z address
+   *    rsp+48    - z length
+   */
+  address generate_multiplyToLen() {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "multiplyToLen");
+
+    address start = __ pc();
+    // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
+    // Unix:  rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)
+    const Register x     = rdi;
+    const Register xlen  = rax;
+    const Register y     = rsi;
+    const Register ylen  = rcx;
+    const Register z     = r8;
+    const Register zlen  = r11;
+
+    // Next registers will be saved on stack in multiply_to_len().
+    const Register tmp1  = r12;
+    const Register tmp2  = r13;
+    const Register tmp3  = r14;
+    const Register tmp4  = r15;
+    const Register tmp5  = rbx;
+
+    BLOCK_COMMENT("Entry:");
+    __ enter(); // required for proper stackwalking of RuntimeStub frame
+
+#ifndef _WIN64
+    __ movptr(zlen, r9); // Save r9 in r11 - zlen
+#endif
+    setup_arg_regs(4); // x => rdi, xlen => rsi, y => rdx
+                       // ylen => rcx, z => r8, zlen => r11
+                       // r9 and r10 may be used to save non-volatile registers
+#ifdef _WIN64
+    // last 2 arguments (#4, #5) are on stack on Win64
+    __ movptr(z, Address(rsp, 6 * wordSize));
+    __ movptr(zlen, Address(rsp, 7 * wordSize));
+#endif
+
+    __ movptr(xlen, rsi);
+    __ movptr(y,    rdx);
+    __ multiply_to_len(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5);
+
+    restore_arg_regs();
+
+    __ leave(); // required for proper stackwalking of RuntimeStub frame
+    __ ret(0);
+
+    return start;
+  }
+
 #undef __
 #define __ masm->
 
@@ -3917,6 +3981,11 @@
     generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
                                                        &StubRoutines::_safefetchN_fault_pc,
                                                        &StubRoutines::_safefetchN_continuation_pc);
+#ifdef COMPILER2
+    if (UseMultiplyToLenIntrinsic) {
+      StubRoutines::_multiplyToLen = generate_multiplyToLen();
+    }
+#endif
   }
 
  public:
--- a/src/cpu/x86/vm/vm_version_x86.cpp	Tue Sep 09 09:48:42 2014 -0700
+++ b/src/cpu/x86/vm/vm_version_x86.cpp	Wed Sep 10 17:36:20 2014 +0000
@@ -493,7 +493,7 @@
   }
 
   char buf[256];
-  jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
+  jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
                cores_per_cpu(), threads_per_core(),
                cpu_family(), _model, _stepping,
                (supports_cmov() ? ", cmov" : ""),
@@ -522,7 +522,8 @@
                (supports_tscinv_bit() ? ", tscinvbit": ""),
                (supports_tscinv() ? ", tscinv": ""),
                (supports_bmi1() ? ", bmi1" : ""),
-               (supports_bmi2() ? ", bmi2" : ""));
+               (supports_bmi2() ? ", bmi2" : ""),
+               (supports_adx() ? ", adx" : ""));
   _features_str = strdup(buf);
 
   // UseSSE is set to the smaller of what hardware supports and what
@@ -574,7 +575,7 @@
     }
   } else if (UseCRC32Intrinsics) {
     if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics))
-      warning("CRC32 Intrinsics requires AVX and CLMUL instructions (not available on this CPU)");
+      warning("CRC32 Intrinsics requires CLMUL instructions (not available on this CPU)");
     FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
   }
 
@@ -697,7 +698,20 @@
     }
 #endif
   }
+
+#ifdef _LP64
+  if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
+    UseMultiplyToLenIntrinsic = true;
+  }
+#else
+  if (UseMultiplyToLenIntrinsic) {
+    if (!FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
+      warning("multiplyToLen intrinsic is not available in 32-bit VM");
+    }
+    FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, false);
+  }
 #endif
+#endif // COMPILER2
 
   // On new cpus instructions which update whole XMM register should be used
   // to prevent partial register stall due to dependencies on high half.
@@ -840,6 +854,9 @@
         }
       }
     }
+    if(FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
+      AllocatePrefetchInstr = 3;
+    }
   }
 
   // Use count leading zeros count instruction if available.
@@ -852,23 +869,35 @@
     FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false);
   }
 
+  // Use count trailing zeros instruction if available
   if (supports_bmi1()) {
+    // tzcnt does not require VEX prefix
+    if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) {
+      UseCountTrailingZerosInstruction = true;
+    }
+  } else if (UseCountTrailingZerosInstruction) {
+    warning("tzcnt instruction is not available on this CPU");
+    FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false);
+  }
+
+  // BMI instructions use an encoding with VEX prefix.
+  // VEX prefix is generated only when AVX > 0.
+  if (supports_bmi1() && supports_avx()) {
     if (FLAG_IS_DEFAULT(UseBMI1Instructions)) {
       UseBMI1Instructions = true;
     }
   } else if (UseBMI1Instructions) {
-    warning("BMI1 instructions are not available on this CPU");
+    warning("BMI1 instructions are not available on this CPU (AVX is also required)");
     FLAG_SET_DEFAULT(UseBMI1Instructions, false);
   }
 
-  // Use count trailing zeros instruction if available
-  if (supports_bmi1()) {
-    if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) {
-      UseCountTrailingZerosInstruction = UseBMI1Instructions;
+  if (supports_bmi2() && supports_avx()) {
+    if (FLAG_IS_DEFAULT(UseBMI2Instructions)) {
+      UseBMI2Instructions = true;
     }
-  } else if (UseCountTrailingZerosInstruction) {
-    warning("tzcnt instruction is not available on this CPU");
-    FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false);
+  } else if (UseBMI2Instructions) {
+    warning("BMI2 instructions are not available on this CPU (AVX is also required)");
+    FLAG_SET_DEFAULT(UseBMI2Instructions, false);
   }
 
   // Use population count instruction if available.
--- a/src/cpu/x86/vm/vm_version_x86.hpp	Tue Sep 09 09:48:42 2014 -0700
+++ b/src/cpu/x86/vm/vm_version_x86.hpp	Wed Sep 10 17:36:20 2014 +0000
@@ -209,7 +209,9 @@
                    erms : 1,
                         : 1,
                    rtm  : 1,
-                        : 20;
+                        : 7,
+                   adx  : 1,
+                        : 12;
     } bits;
   };
 
@@ -260,7 +262,8 @@
     CPU_CLMUL  = (1 << 21), // carryless multiply for CRC
     CPU_BMI1   = (1 << 22),
     CPU_BMI2   = (1 << 23),
-    CPU_RTM    = (1 << 24)  // Restricted Transactional Memory instructions
+    CPU_RTM    = (1 << 24),  // Restricted Transactional Memory instructions
+    CPU_ADX    = (1 << 25)
   } cpuFeatureFlags;
 
   enum {
@@ -465,10 +468,16 @@
     }
     // Intel features.
     if(is_intel()) {
+      if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
+         result |= CPU_ADX;
       if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
         result |= CPU_BMI2;
       if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
         result |= CPU_LZCNT;
+      // for Intel, ecx.bits.misalignsse bit (bit 8) indicates support for prefetchw
+      if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse != 0) {
+        result |= CPU_3DNOW_PREFETCH;
+      }
     }
 
     return result;
@@ -621,6 +630,7 @@
   static bool supports_rtm()      { return (_cpuFeatures & CPU_RTM) != 0; }
   static bool supports_bmi1()     { return (_cpuFeatures & CPU_BMI1) != 0; }
   static bool supports_bmi2()     { return (_cpuFeatures & CPU_BMI2) != 0; }
+  static bool supports_adx()     { return (_cpuFeatures & CPU_ADX) != 0; }
   // Intel features
   static bool is_intel_family_core() { return is_intel() &&
                                        extended_cpu_family() == CPU_FAMILY_INTEL_CORE; }
--- a/src/share/vm/asm/codeBuffer.cpp	Tue Sep 09 09:48:42 2014 -0700
+++ b/src/share/vm/asm/codeBuffer.cpp	Wed Sep 10 17:36:20 2014 +0000
@@ -133,6 +133,10 @@
   // free any overflow storage
   delete _overflow_arena;
 
+  // Claim is that stack allocation ensures resources are cleaned up.
+  // This is resource clean up, let's hope that all were properly copied out.
+  free_strings();
+
 #ifdef ASSERT
   // Save allocation type to execute assert in ~ResourceObj()
   // which is called after this destructor.
@@ -704,7 +708,7 @@
   relocate_code_to(&dest);
 
   // transfer strings and comments from buffer to blob
-  dest_blob->set_strings(_strings);
+  dest_blob->set_strings(_code_strings);
 
   // Done moving code bytes; were they the right size?
   assert(round_to(dest.total_content_size(), oopSize) == dest_blob->content_size(), "sanity");
@@ -1003,11 +1007,11 @@
 
 
 void CodeBuffer::block_comment(intptr_t offset, const char * comment) {
-  _strings.add_comment(offset, comment);
+  _code_strings.add_comment(offset, comment);
 }
 
 const char* CodeBuffer::code_string(const char* str) {
-  return _strings.add_string(str);
+  return _code_strings.add_string(str);
 }
 
 class CodeString: public CHeapObj<mtCode> {
@@ -1073,6 +1077,7 @@
 }
 
 void CodeStrings::add_comment(intptr_t offset, const char * comment) {
+  check_valid();
   CodeString* c      = new CodeString(comment, offset);
   CodeString* inspos = (_strings == NULL) ? NULL : find_last(offset);
 
@@ -1088,11 +1093,32 @@
 }
 
 void CodeStrings::assign(CodeStrings& other) {
+  other.check_valid();
+  // Cannot do following because CodeStrings constructor is not alway run!
+  assert(is_null(), "Cannot assign onto non-empty CodeStrings");
   _strings = other._strings;
+  other.set_null_and_invalidate();
+}
+
+// Deep copy of CodeStrings for consistent memory management.
+// Only used for actual disassembly so this is cheaper than reference counting
+// for the "normal" fastdebug case.
+void CodeStrings::copy(CodeStrings& other) {
+  other.check_valid();
+  check_valid();
+  assert(is_null(), "Cannot copy onto non-empty CodeStrings");
+  CodeString* n = other._strings;
+  CodeString** ps = &_strings;
+  while (n != NULL) {
+    *ps = new CodeString(n->string(),n->offset());
+    ps = &((*ps)->_next);
+    n = n->next();
+  }
 }
 
 void CodeStrings::print_block_comment(outputStream* stream, intptr_t offset) const {
-  if (_strings != NULL) {
+    check_valid();
+    if (_strings != NULL) {
     CodeString* c = find(offset);
     while (c && c->offset() == offset) {
       stream->bol();
@@ -1103,7 +1129,7 @@
   }
 }
 
-
+// Also sets isNull()
 void CodeStrings::free() {
   CodeString* n = _strings;
   while (n) {
@@ -1113,10 +1139,11 @@
     delete n;
     n = p;
   }
-  _strings = NULL;
+  set_null_and_invalidate();
 }
 
 const char* CodeStrings::add_string(const char * string) {
+  check_valid();
   CodeString* s = new CodeString(string);
   s->set_next(_strings);
   _strings = s;
--- a/src/share/vm/asm/codeBuffer.hpp	Tue Sep 09 09:48:42 2014 -0700
+++ b/src/share/vm/asm/codeBuffer.hpp	Wed Sep 10 17:36:20 2014 +0000
@@ -27,6 +27,7 @@
 
 #include "code/oopRecorder.hpp"
 #include "code/relocInfo.hpp"
+#include "utilities/debug.hpp"
 
 class CodeStrings;
 class PhaseCFG;
@@ -245,15 +246,39 @@
 private:
 #ifndef PRODUCT
   CodeString* _strings;
+#ifdef ASSERT
+  // Becomes true after copy-out, forbids further use.
+  bool _defunct; // Zero bit pattern is "valid", see memset call in decode_env::decode_env
+#endif
 #endif
 
   CodeString* find(intptr_t offset) const;
   CodeString* find_last(intptr_t offset) const;
 
+  void set_null_and_invalidate() {
+#ifndef PRODUCT
+    _strings = NULL;
+#ifdef ASSERT
+    _defunct = true;
+#endif
+#endif
+  }
+
 public:
   CodeStrings() {
 #ifndef PRODUCT
     _strings = NULL;
+#ifdef ASSERT
+    _defunct = false;
+#endif
+#endif
+  }
+
+  bool is_null() {
+#ifdef ASSERT
+    return _strings == NULL;
+#else
+    return true;
 #endif
   }
 
@@ -261,8 +286,17 @@
 
   void add_comment(intptr_t offset, const char * comment) PRODUCT_RETURN;
   void print_block_comment(outputStream* stream, intptr_t offset) const PRODUCT_RETURN;
+  // MOVE strings from other to this; invalidate other.
   void assign(CodeStrings& other)  PRODUCT_RETURN;
+  // COPY strings from other to this; leave other valid.
+  void copy(CodeStrings& other)  PRODUCT_RETURN;
   void free() PRODUCT_RETURN;
+  // Guarantee that _strings are used at most once; assign invalidates a buffer.
+  inline void check_valid() const {
+#ifdef ASSERT
+    assert(!_defunct, "Use of invalid CodeStrings");
+#endif
+  }
 };
 
 // A CodeBuffer describes a memory space into which assembly
@@ -330,7 +364,7 @@
   csize_t      _total_size;     // size in bytes of combined memory buffer
 
   OopRecorder* _oop_recorder;
-  CodeStrings  _strings;
+  CodeStrings  _code_strings;
   OopRecorder  _default_oop_recorder;  // override with initialize_oop_recorder
   Arena*       _overflow_arena;
 
@@ -531,7 +565,13 @@
   void initialize_oop_recorder(OopRecorder* r);
 
   OopRecorder* oop_recorder() const   { return _oop_recorder; }
-  CodeStrings& strings()              { return _strings; }
+  CodeStrings& strings()              { return _code_strings; }
+
+  void free_strings() {
+    if (!_code_strings.is_null()) {
+      _code_strings.free(); // sets _strings Null as a side-effect.
+    }
+  }
 
   // Code generation
   void relocate(address at, RelocationHolder const& rspec, int format = 0) {
--- a/src/share/vm/asm/register.hpp	Tue Sep 09 09:48:42 2014 -0700
+++ b/src/share/vm/asm/register.hpp	Wed Sep 10 17:36:20 2014 +0000
@@ -275,4 +275,101 @@
   );
 }
 
+inline void assert_different_registers(
+  AbstractRegister a,
+  AbstractRegister b,
+  AbstractRegister c,
+  AbstractRegister d,
+  AbstractRegister e,
+  AbstractRegister f,
+  AbstractRegister g,
+  AbstractRegister h,
+  AbstractRegister i,
+  AbstractRegister j
+) {
+  assert(
+    a != b && a != c && a != d && a != e && a != f && a != g && a != h && a != i && a != j
+           && b != c && b != d && b != e && b != f && b != g && b != h && b != i && b != j
+                     && c != d && c != e && c != f && c != g && c != h && c != i && c != j
+                               && d != e && d != f && d != g && d != h && d != i && d != j
+                                         && e != f && e != g && e != h && e != i && e != j
+                                                   && f != g && f != h && f != i && f != j
+                                                             && g != h && g != i && g != j
+                                                                       && h != i && h != j
+                                                                                 && i != j,
+    err_msg_res("registers must be different: a=" INTPTR_FORMAT ", b=" INTPTR_FORMAT
+                ", c=" INTPTR_FORMAT ", d=" INTPTR_FORMAT ", e=" INTPTR_FORMAT
+                ", f=" INTPTR_FORMAT ", g=" INTPTR_FORMAT ", h=" INTPTR_FORMAT
+                ", i=" INTPTR_FORMAT ", j=" INTPTR_FORMAT "",
+                p2i(a), p2i(b), p2i(c), p2i(d), p2i(e), p2i(f), p2i(g), p2i(h), p2i(i), p2i(j))
+  );
+}
+
+inline void assert_different_registers(
+  AbstractRegister a,
+  AbstractRegister b,
+  AbstractRegister c,
+  AbstractRegister d,
+  AbstractRegister e,
+  AbstractRegister f,
+  AbstractRegister g,
+  AbstractRegister h,
+  AbstractRegister i,
+  AbstractRegister j,
+  AbstractRegister k
+) {
+  assert(
+    a != b && a != c && a != d && a != e && a != f && a != g && a != h && a != i && a != j && a !=k
+           && b != c && b != d && b != e && b != f && b != g && b != h && b != i && b != j && b !=k
+                     && c != d && c != e && c != f && c != g && c != h && c != i && c != j && c !=k
+                               && d != e && d != f && d != g && d != h && d != i && d != j && d !=k
+                                         && e != f && e != g && e != h && e != i && e != j && e !=k
+                                                   && f != g && f != h && f != i && f != j && f !=k
+                                                             && g != h && g != i && g != j && g !=k
+                                                                       && h != i && h != j && h !=k
+                                                                                 && i != j && i !=k
+                                                                                           && j !=k,
+    err_msg_res("registers must be different: a=" INTPTR_FORMAT ", b=" INTPTR_FORMAT
+                ", c=" INTPTR_FORMAT ", d=" INTPTR_FORMAT ", e=" INTPTR_FORMAT
+                ", f=" INTPTR_FORMAT ", g=" INTPTR_FORMAT ", h=" INTPTR_FORMAT
+                ", i=" INTPTR_FORMAT ", j=" INTPTR_FORMAT ", k=" INTPTR_FORMAT "",
+                p2i(a), p2i(b), p2i(c), p2i(d), p2i(e), p2i(f), p2i(g), p2i(h), p2i(i), p2i(j), p2i(k))
+  );
+}
+
+inline void assert_different_registers(
+  AbstractRegister a,
+  AbstractRegister b,
+  AbstractRegister c,
+  AbstractRegister d,
+  AbstractRegister e,
+  AbstractRegister f,
+  AbstractRegister g,
+  AbstractRegister h,
+  AbstractRegister i,
+  AbstractRegister j,
+  AbstractRegister k,
+  AbstractRegister l
+) {
+  assert(
+    a != b && a != c && a != d && a != e && a != f && a != g && a != h && a != i && a != j && a !=k && a !=l
+           && b != c && b != d && b != e && b != f && b != g && b != h && b != i && b != j && b !=k && b !=l
+                     && c != d && c != e && c != f && c != g && c != h && c != i && c != j && c !=k && c !=l
+                               && d != e && d != f && d != g && d != h && d != i && d != j && d !=k && d !=l
+                                         && e != f && e != g && e != h && e != i && e != j && e !=k && e !=l
+                                                   && f != g && f != h && f != i && f != j && f !=k && f !=l
+                                                             && g != h && g != i && g != j && g !=k && g !=l
+                                                                       && h != i && h != j && h !=k && h !=l
+                                                                                 && i != j && i !=k && i !=l
+                                                                                           && j !=k && j !=l
+                                                                                                    && k !=l,
+    err_msg_res("registers must be different: a=" INTPTR_FORMAT ", b=" INTPTR_FORMAT
+                ", c=" INTPTR_FORMAT ", d=" INTPTR_FORMAT ", e=" INTPTR_FORMAT
+                ", f=" INTPTR_FORMAT ", g=" INTPTR_FORMAT ", h=" INTPTR_FORMAT
+                ", i=" INTPTR_FORMAT ", j=" INTPTR_FORMAT ", k=" INTPTR_FORMAT
+                ", l=" INTPTR_FORMAT "",
+                p2i(a), p2i(b), p2i(c), p2i(d), p2i(e), p2i(f), p2i(g), p2i(h), p2i(i), p2i(j), p2i(k), p2i(l))
+  );
+}
+
 #endif // SHARE_VM_ASM_REGISTER_HPP
--- a/src/share/vm/ci/ciMethod.cpp	Tue Sep 09 09:48:42 2014 -0700
+++ b/src/share/vm/ci/ciMethod.cpp	Wed Sep 10 17:36:20 2014 +0000
@@ -1106,6 +1106,22 @@
 }
 
 // ------------------------------------------------------------------
+// ciMethod::has_option_value
+//
+template<typename T>
+bool ciMethod::has_option_value(const char* option, T& value) {
+  check_is_loaded();
+  VM_ENTRY_MARK;
+  methodHandle mh(THREAD, get_Method());
+  return CompilerOracle::has_option_value(mh, option, value);
+}
+// Explicit instantiation for all OptionTypes supported.
+template bool ciMethod::has_option_value<intx>(const char* option, intx& value);
+template bool ciMethod::has_option_value<uintx>(const char* option, uintx& value);
+template bool ciMethod::has_option_value<bool>(const char* option, bool& value);
+template bool ciMethod::has_option_value<ccstr>(const char* option, ccstr& value);
+
+// ------------------------------------------------------------------
 // ciMethod::can_be_compiled
 //
 // Have previous compilations of this method succeeded?
--- a/src/share/vm/ci/ciMethod.hpp	Tue Sep 09 09:48:42 2014 -0700
+++ b/src/share/vm/ci/ciMethod.hpp	Wed Sep 10 17:36:20 2014 +0000
@@ -264,6 +264,8 @@
   bool should_print_assembly();
   bool break_at_execute();
   bool has_option(const char *option);
+  template<typename T>
+  bool has_option_value(const char* option, T& value);
   bool can_be_compiled();
   bool can_be_osr_compiled(int entry_bci);
   void set_not_compilable(const char* reason = NULL);
--- a/src/share/vm/classfile/classLoader.cpp	Tue Sep 09 09:48:42 2014 -0700
+++ b/src/share/vm/classfile/classLoader.cpp	Wed Sep 10 17:36:20 2014 +0000
@@ -1609,7 +1609,7 @@
               if (TieredCompilation && TieredStopAtLevel >= CompLevel_full_optimization) {
                 // Clobber the first compile and force second tier compilation
                 nmethod* nm = m->code();
-                if (nm != NULL) {
+                if (nm != NULL && !m->is_method_handle_intrinsic()) {
                   // Throw out the code so that the code cache doesn't fill up
                   nm->make_not_entrant();
                   m->clear_code();
@@ -1628,7 +1628,7 @@
             }
 
             nmethod* nm = m->code();
-            if (nm != NULL) {
+            if (nm != NULL && !m->is_method_handle_intrinsic()) {
               // Throw out the code so that the code cache doesn't fill up
               nm->make_not_entrant();
               m->clear_code();
--- a/src/share/vm/classfile/systemDictionary.cpp	Tue Sep 09 09:48:42 2014 -0700
+++ b/src/share/vm/classfile/systemDictionary.cpp	Wed Sep 10 17:36:20 2014 +0000
@@ -2291,6 +2291,9 @@
   }
 
   assert(spe != NULL && spe->method() != NULL, "");
+  assert(!UseCompiler || (spe->method()->has_compiled_code() &&
+         spe->method()->code()->entry_point() == spe->method()->from_compiled_entry()),
+         "MH intrinsic invariant");
   return spe->method();
 }
 
--- a/src/share/vm/classfile/vmSymbols.hpp	Tue Sep 09 09:48:42 2014 -0700
+++ b/src/share/vm/classfile/vmSymbols.hpp	Wed Sep 10 17:36:20 2014 +0000
@@ -787,6 +787,11 @@
    do_name(     encodeISOArray_name,                             "encodeISOArray")                                      \
    do_signature(encodeISOArray_signature,                        "([CI[BII)I")                                          \
                                                                                                                         \
+  do_class(java_math_BigInteger,                      "java/math/BigInteger")                                           \
+  do_intrinsic(_multiplyToLen,      java_math_BigInteger, multiplyToLen_name, multiplyToLen_signature, F_R)             \
+   do_name(     multiplyToLen_name,                             "multiplyToLen")                                        \
+   do_signature(multiplyToLen_signature,                        "([II[II[I)[I")                                         \
+                                                                                                                        \
   /* java/lang/ref/Reference */                                                                                         \
   do_intrinsic(_Reference_get,            java_lang_ref_Reference, get_name,    void_object_signature, F_R)             \
                                                                                                                         \
--- a/src/share/vm/code/codeBlob.cpp	Tue Sep 09 09:48:42 2014 -0700
+++ b/src/share/vm/code/codeBlob.cpp	Wed Sep 10 17:36:20 2014 +0000
@@ -253,6 +253,7 @@
 
 void BufferBlob::free( BufferBlob *blob ) {
   ThreadInVMfromUnknown __tiv;  // get to VM state in case we block on CodeCache_lock
+  blob->flush();
   {
     MutexLockerEx mu(CodeCache_lock, Mutex::_no_safepoint_check_flag);
     CodeCache::free((CodeBlob*)blob);
--- a/src/share/vm/code/codeCache.cpp	Tue Sep 09 09:48:42 2014 -0700
+++ b/src/share/vm/code/codeCache.cpp	Wed Sep 10 17:36:20 2014 +0000
@@ -712,7 +712,9 @@
 void CodeCache::mark_all_nmethods_for_deoptimization() {
   MutexLockerEx mu(CodeCache_lock, Mutex::_no_safepoint_check_flag);
   FOR_ALL_ALIVE_NMETHODS(nm) {
-    nm->mark_for_deoptimization();
+    if (!nm->method()->is_method_handle_intrinsic()) {
+      nm->mark_for_deoptimization();
+    }
   }
 }
 
--- a/src/share/vm/code/compiledIC.cpp	Tue Sep 09 09:48:42 2014 -0700
+++ b/src/share/vm/code/compiledIC.cpp	Wed Sep 10 17:36:20 2014 +0000
@@ -595,6 +595,7 @@
   } else {
     // Callee is interpreted code.  In any case entering the interpreter
     // puts a converter-frame on the stack to save arguments.
+    assert(!m->is_method_handle_intrinsic(), "Compiled code should never call interpreter MH intrinsics");
     info._to_interpreter = true;
     info._entry      = m()->get_c2i_entry();
   }
--- a/src/share/vm/code/nmethod.hpp	Tue Sep 09 09:48:42 2014 -0700
+++ b/src/share/vm/code/nmethod.hpp	Wed Sep 10 17:36:20 2014 +0000
@@ -448,7 +448,10 @@
   // alive.  It is used when an uncommon trap happens.  Returns true
   // if this thread changed the state of the nmethod or false if
   // another thread performed the transition.
-  bool  make_not_entrant() { return make_not_entrant_or_zombie(not_entrant); }
+  bool  make_not_entrant() {
+    assert(!method()->is_method_handle_intrinsic(), "Cannot make MH intrinsic not entrant");
+    return make_not_entrant_or_zombie(not_entrant);
+  }
   bool  make_zombie()      { return make_not_entrant_or_zombie(zombie); }
 
   // used by jvmti to track if the unload event has been reported
--- a/src/share/vm/compiler/compilerOracle.cpp	Tue Sep 09 09:48:42 2014 -0700
+++ b/src/share/vm/compiler/compilerOracle.cpp	Wed Sep 10 17:36:20 2014 +0000
@@ -167,44 +167,134 @@
   }
 }
 
+enum OptionType {
+  IntxType,
+  UintxType,
+  BoolType,
+  CcstrType,
+  UnknownType
+};
 
-class MethodOptionMatcher: public MethodMatcher {
-  const char * option;
- public:
-  MethodOptionMatcher(Symbol* class_name, Mode class_mode,
-                             Symbol* method_name, Mode method_mode,
-                             Symbol* signature, const char * opt, MethodMatcher* next):
-    MethodMatcher(class_name, class_mode, method_name, method_mode, signature, next) {
-    option = opt;
+/* Methods to map real type names to OptionType */
+template<typename T>
+static OptionType get_type_for() {
+  return UnknownType;
+};
+
+template<> OptionType get_type_for<intx>() {
+  return IntxType;
+}
+
+template<> OptionType get_type_for<uintx>() {
+  return UintxType;
+}
+
+template<> OptionType get_type_for<bool>() {
+  return BoolType;
+}
+
+template<> OptionType get_type_for<ccstr>() {
+  return CcstrType;
+}
+
+template<typename T>
+static const T copy_value(const T value) {
+  return value;
+}
+
+template<> const ccstr copy_value<ccstr>(const ccstr value) {
+  return (const ccstr)strdup(value);
+}
+
+template <typename T>
+class TypedMethodOptionMatcher : public MethodMatcher {
+  const char* _option;
+  OptionType _type;
+  const T _value;
+
+public:
+  TypedMethodOptionMatcher(Symbol* class_name, Mode class_mode,
+                           Symbol* method_name, Mode method_mode,
+                           Symbol* signature, const char* opt,
+                           const T value,  MethodMatcher* next) :
+    MethodMatcher(class_name, class_mode, method_name, method_mode, signature, next),
+                  _type(get_type_for<T>()), _value(copy_value<T>(value)) {
+    _option = strdup(opt);
   }
 
-  bool match(methodHandle method, const char* opt) {
-    MethodOptionMatcher* current = this;
+  ~TypedMethodOptionMatcher() {
+    free((void*)_option);
+  }
+
+  TypedMethodOptionMatcher* match(methodHandle method, const char* opt) {
+    TypedMethodOptionMatcher* current = this;
     while (current != NULL) {
-      current = (MethodOptionMatcher*)current->find(method);
+      current = (TypedMethodOptionMatcher*)current->find(method);
       if (current == NULL) {
-        return false;
+        return NULL;
       }
-      if (strcmp(current->option, opt) == 0) {
-        return true;
+      if (strcmp(current->_option, opt) == 0) {
+        return current;
       }
       current = current->next();
     }
-    return false;
+    return NULL;
+  }
+
+  TypedMethodOptionMatcher* next() {
+    return (TypedMethodOptionMatcher*)_next;
   }
 
-  MethodOptionMatcher* next() {
-    return (MethodOptionMatcher*)_next;
-  }
+  OptionType get_type(void) {
+      return _type;
+  };
+
+  T value() { return _value; }
 
-  virtual void print() {
+  void print() {
+    ttyLocker ttyl;
     print_base();
-    tty->print(" %s", option);
+    tty->print(" %s", _option);
+    tty->print(" <unknown option type>");
     tty->cr();
   }
 };
 
+template<>
+void TypedMethodOptionMatcher<intx>::print() {
+  ttyLocker ttyl;
+  print_base();
+  tty->print(" intx %s", _option);
+  tty->print(" = " INTX_FORMAT, _value);
+  tty->cr();
+};
 
+template<>
+void TypedMethodOptionMatcher<uintx>::print() {
+  ttyLocker ttyl;
+  print_base();
+  tty->print(" uintx %s", _option);
+  tty->print(" = " UINTX_FORMAT, _value);
+  tty->cr();
+};
+
+template<>
+void TypedMethodOptionMatcher<bool>::print() {
+  ttyLocker ttyl;
+  print_base();
+  tty->print(" bool %s", _option);
+  tty->print(" = %s", _value ? "true" : "false");
+  tty->cr();
+};
+
+template<>
+void TypedMethodOptionMatcher<ccstr>::print() {
+  ttyLocker ttyl;
+  print_base();
+  tty->print(" const char* %s", _option);
+  tty->print(" = '%s'", _value);
+  tty->cr();
+};
 
 // this must parallel the command_names below
 enum OracleCommand {
@@ -259,23 +349,46 @@
   return lists[command];
 }
 
-
-
+template<typename T>
 static MethodMatcher* add_option_string(Symbol* class_name, MethodMatcher::Mode c_mode,
                                         Symbol* method_name, MethodMatcher::Mode m_mode,
                                         Symbol* signature,
-                                        const char* option) {
-  lists[OptionCommand] = new MethodOptionMatcher(class_name, c_mode, method_name, m_mode,
-                                                 signature, option, lists[OptionCommand]);
+                                        const char* option,
+                                        T value) {
+  lists[OptionCommand] = new TypedMethodOptionMatcher<T>(class_name, c_mode, method_name, m_mode,
+                                                         signature, option, value, lists[OptionCommand]);
   return lists[OptionCommand];
 }
 
+template<typename T>
+static bool get_option_value(methodHandle method, const char* option, T& value) {
+   TypedMethodOptionMatcher<T>* m;
+   if (lists[OptionCommand] != NULL
+       && (m = ((TypedMethodOptionMatcher<T>*)lists[OptionCommand])->match(method, option)) != NULL
+       && m->get_type() == get_type_for<T>()) {
+       value = m->value();
+       return true;
+   } else {
+     return false;
+   }
+}
 
 bool CompilerOracle::has_option_string(methodHandle method, const char* option) {
-  return lists[OptionCommand] != NULL &&
-    ((MethodOptionMatcher*)lists[OptionCommand])->match(method, option);
+  bool value = false;
+  get_option_value(method, option, value);
+  return value;
 }
 
+template<typename T>
+bool CompilerOracle::has_option_value(methodHandle method, const char* option, T& value) {
+  return ::get_option_value(method, option, value);
+}
+
+// Explicit instantiation for all OptionTypes supported.
+template bool CompilerOracle::has_option_value<intx>(methodHandle method, const char* option, intx& value);
+template bool CompilerOracle::has_option_value<uintx>(methodHandle method, const char* option, uintx& value);
+template bool CompilerOracle::has_option_value<bool>(methodHandle method, const char* option, bool& value);
+template bool CompilerOracle::has_option_value<ccstr>(methodHandle method, const char* option, ccstr& value);
 
 bool CompilerOracle::should_exclude(methodHandle method, bool& quietly) {
   quietly = true;
@@ -433,6 +546,94 @@
 
 
 
+// Scan next flag and value in line, return MethodMatcher object on success, NULL on failure.
+// On failure, error_msg contains description for the first error.
+// For future extensions: set error_msg on first error.
+static MethodMatcher* scan_flag_and_value(const char* type, const char* line, int& total_bytes_read,
+                                          Symbol* c_name, MethodMatcher::Mode c_match,
+                                          Symbol* m_name, MethodMatcher::Mode m_match,
+                                          Symbol* signature,
+                                          char* errorbuf, const int buf_size) {
+  total_bytes_read = 0;
+  int bytes_read = 0;
+  char flag[256];
+
+  // Read flag name.
+  if (sscanf(line, "%*[ \t]%255[a-zA-Z0-9]%n", flag, &bytes_read) == 1) {
+    line += bytes_read;
+    total_bytes_read += bytes_read;
+
+    // Read value.
+    if (strcmp(type, "intx") == 0) {
+      intx value;
+      if (sscanf(line, "%*[ \t]" INTX_FORMAT "%n", &value, &bytes_read) == 1) {
+        total_bytes_read += bytes_read;
+        return add_option_string(c_name, c_match, m_name, m_match, signature, flag, value);
+      } else {
+        jio_snprintf(errorbuf, buf_size, "  Value cannot be read for flag %s of type %s ", flag, type);
+      }
+    } else if (strcmp(type, "uintx") == 0) {
+      uintx value;
+      if (sscanf(line, "%*[ \t]" UINTX_FORMAT "%n", &value, &bytes_read) == 1) {
+        total_bytes_read += bytes_read;
+        return add_option_string(c_name, c_match, m_name, m_match, signature, flag, value);
+      } else {
+        jio_snprintf(errorbuf, buf_size, "  Value cannot be read for flag %s of type %s", flag, type);
+      }
+    } else if (strcmp(type, "ccstr") == 0) {
+      ResourceMark rm;
+      char* value = NEW_RESOURCE_ARRAY(char, strlen(line) + 1);
+      if (sscanf(line, "%*[ \t]%255[_a-zA-Z0-9]%n", value, &bytes_read) == 1) {
+        total_bytes_read += bytes_read;
+        return add_option_string(c_name, c_match, m_name, m_match, signature, flag, (ccstr)value);
+      } else {
+        jio_snprintf(errorbuf, buf_size, "  Value cannot be read for flag %s of type %s", flag, type);
+      }
+    } else if (strcmp(type, "ccstrlist") == 0) {
+      // Accumulates several strings into one. The internal type is ccstr.
+      ResourceMark rm;
+      char* value = NEW_RESOURCE_ARRAY(char, strlen(line) + 1);
+      char* next_value = value;
+      if (sscanf(line, "%*[ \t]%255[_a-zA-Z0-9]%n", next_value, &bytes_read) == 1) {
+        total_bytes_read += bytes_read;
+        line += bytes_read;
+        next_value += bytes_read;
+        char* end_value = next_value-1;
+        while (sscanf(line, "%*[ \t]%255[_a-zA-Z0-9]%n", next_value, &bytes_read) == 1) {
+          total_bytes_read += bytes_read;
+          line += bytes_read;
+          *end_value = ' '; // override '\0'
+          next_value += bytes_read;
+          end_value = next_value-1;
+        }
+        return add_option_string(c_name, c_match, m_name, m_match, signature, flag, (ccstr)value);
+      } else {
+        jio_snprintf(errorbuf, buf_size, "  Value cannot be read for flag %s of type %s", flag, type);
+      }
+    } else if (strcmp(type, "bool") == 0) {
+      char value[256];
+      if (sscanf(line, "%*[ \t]%255[a-zA-Z]%n", value, &bytes_read) == 1) {
+        if (strcmp(value, "true") == 0) {
+          total_bytes_read += bytes_read;
+          return add_option_string(c_name, c_match, m_name, m_match, signature, flag, true);
+        } else if (strcmp(value, "false") == 0) {
+          total_bytes_read += bytes_read;
+          return add_option_string(c_name, c_match, m_name, m_match, signature, flag, false);
+        } else {
+          jio_snprintf(errorbuf, buf_size, "  Value cannot be read for flag %s of type %s", flag, type);
+        }
+      } else {
+        jio_snprintf(errorbuf, sizeof(errorbuf), "  Value cannot be read for flag %s of type %s", flag, type);
+      }
+    } else {
+      jio_snprintf(errorbuf, sizeof(errorbuf), "  Type %s not supported ", type);
+    }
+  } else {
+    jio_snprintf(errorbuf, sizeof(errorbuf), "  Flag name for type %s should be alphanumeric ", type);
+  }
+  return NULL;
+}
+
 void CompilerOracle::parse_from_line(char* line) {
   if (line[0] == '\0') return;
   if (line[0] == '#')  return;
@@ -462,8 +663,10 @@
   int bytes_read;
   OracleCommand command = parse_command_name(line, &bytes_read);
   line += bytes_read;
+  ResourceMark rm;
 
   if (command == UnknownCommand) {
+    ttyLocker ttyl;
     tty->print_cr("CompilerOracle: unrecognized line");
     tty->print_cr("  \"%s\"", original_line);
     return;
@@ -485,7 +688,7 @@
   char method_name[256];
   char sig[1024];
   char errorbuf[1024];
-  const char* error_msg = NULL;
+  const char* error_msg = NULL; // description of first error that appears
   MethodMatcher* match = NULL;
 
   if (scan_line(line, class_name, &c_match, method_name, &m_match, &bytes_read, error_msg)) {
@@ -504,43 +707,77 @@
     }
 
     if (command == OptionCommand) {
-      // Look for trailing options to support
-      // ciMethod::has_option("string") to control features in the
-      // compiler.  Multiple options may follow the method name.
-      char option[256];
+      // Look for trailing options.
+      //
+      // Two types of trailing options are
+      // supported:
+      //
+      // (1) CompileCommand=option,Klass::method,flag
+      // (2) CompileCommand=option,Klass::method,type,flag,value
+      //
+      // Type (1) is used to support ciMethod::has_option("someflag")
+      // (i.e., to check if a flag "someflag" is enabled for a method).
+      //
+      // Type (2) is used to support options with a value. Values can have the
+      // the following types: intx, uintx, bool, ccstr, and ccstrlist.
+      //
+      // For future extensions: extend scan_flag_and_value()
+      char option[256]; // stores flag for Type (1) and type of Type (2)
       while (sscanf(line, "%*[ \t]%255[a-zA-Z0-9]%n", option, &bytes_read) == 1) {
         if (match != NULL && !_quiet) {
           // Print out the last match added
+          ttyLocker ttyl;
           tty->print("CompilerOracle: %s ", command_names[command]);
           match->print();
         }
-        match = add_option_string(c_name, c_match, m_name, m_match, signature, strdup(option));
         line += bytes_read;
-      }
+
+        if (strcmp(option, "intx") == 0
+            || strcmp(option, "uintx") == 0
+            || strcmp(option, "bool") == 0
+            || strcmp(option, "ccstr") == 0
+            || strcmp(option, "ccstrlist") == 0
+            ) {
+
+          // Type (2) option: parse flag name and value.
+          match = scan_flag_and_value(option, line, bytes_read,
+                                      c_name, c_match, m_name, m_match, signature,
+                                      errorbuf, sizeof(errorbuf));
+          if (match == NULL) {
+            error_msg = errorbuf;
+            break;
+          }
+          line += bytes_read;
+        } else {
+          // Type (1) option
+          match = add_option_string(c_name, c_match, m_name, m_match, signature, option, true);
+        }
+      } // while(
     } else {
-      bytes_read = 0;
-      sscanf(line, "%*[ \t]%n", &bytes_read);
-      if (line[bytes_read] != '\0') {
-        jio_snprintf(errorbuf, sizeof(errorbuf), "  Unrecognized text after command: %s", line);
-        error_msg = errorbuf;
-      } else {
-        match = add_predicate(command, c_name, c_match, m_name, m_match, signature);
-      }
+      match = add_predicate(command, c_name, c_match, m_name, m_match, signature);
     }
   }
 
-  if (match != NULL) {
-    if (!_quiet) {
-      ResourceMark rm;
-      tty->print("CompilerOracle: %s ", command_names[command]);
-      match->print();
-    }
-  } else {
+  ttyLocker ttyl;
+  if (error_msg != NULL) {
+    // an error has happened
     tty->print_cr("CompilerOracle: unrecognized line");
     tty->print_cr("  \"%s\"", original_line);
     if (error_msg != NULL) {
       tty->print_cr("%s", error_msg);
     }
+  } else {
+    // check for remaining characters
+    bytes_read = 0;
+    sscanf(line, "%*[ \t]%n", &bytes_read);
+    if (line[bytes_read] != '\0') {
+      tty->print_cr("CompilerOracle: unrecognized line");
+      tty->print_cr("  \"%s\"", original_line);
+      tty->print_cr("  Unrecognized text %s after command ", line);
+    } else if (match != NULL && !_quiet) {
+      tty->print("CompilerOracle: %s ", command_names[command]);
+      match->print();
+    }
   }
 }
 
--- a/src/share/vm/compiler/compilerOracle.hpp	Tue Sep 09 09:48:42 2014 -0700
+++ b/src/share/vm/compiler/compilerOracle.hpp	Wed Sep 10 17:36:20 2014 +0000
@@ -64,6 +64,11 @@
   // Check to see if this method has option set for it
   static bool has_option_string(methodHandle method, const char * option);
 
+  // Check if method has option and value set. If yes, overwrite value and return true,
+  // otherwise leave value unchanged and return false.
+  template<typename T>
+  static bool has_option_value(methodHandle method, const char* option, T& value);
+
   // Reads from string instead of file
   static void parse_from_string(const char* command_string, void (*parser)(char*));
 
--- a/src/share/vm/compiler/disassembler.cpp	Tue Sep 09 09:48:42 2014 -0700
+++ b/src/share/vm/compiler/disassembler.cpp	Wed Sep 10 17:36:20 2014 +0000
@@ -245,12 +245,12 @@
 };
 
 decode_env::decode_env(CodeBlob* code, outputStream* output, CodeStrings c) {
-  memset(this, 0, sizeof(*this));
+  memset(this, 0, sizeof(*this)); // Beware, this zeroes bits of fields.
   _output = output ? output : tty;
   _code = code;
   if (code != NULL && code->is_nmethod())
     _nm = (nmethod*) code;
-  _strings.assign(c);
+  _strings.copy(c);
 
   // by default, output pc but not bytes:
   _print_pc       = true;
--- a/src/share/vm/interpreter/interpreter.hpp	Tue Sep 09 09:48:42 2014 -0700
+++ b/src/share/vm/interpreter/interpreter.hpp	Wed Sep 10 17:36:20 2014 +0000
@@ -53,7 +53,9 @@
  public:
   // Initialization/finalization
   void    initialize(int size,
-                     CodeStrings& strings)       { _size = size; DEBUG_ONLY(_strings.assign(strings);) }
+                     CodeStrings& strings)       { _size = size;
+                                                   DEBUG_ONLY(::new(&_strings) CodeStrings();)
+                                                   DEBUG_ONLY(_strings.assign(strings);) }
   void    finalize()                             { ShouldNotCallThis(); }
 
   // General info/converters
--- a/src/share/vm/oops/methodData.hpp	Tue Sep 09 09:48:42 2014 -0700
+++ b/src/share/vm/oops/methodData.hpp	Wed Sep 10 17:36:20 2014 +0000
@@ -2057,7 +2057,7 @@
 
   // Whole-method sticky bits and flags
   enum {
-    _trap_hist_limit    = 19,   // decoupled from Deoptimization::Reason_LIMIT
+    _trap_hist_limit    = 20,   // decoupled from Deoptimization::Reason_LIMIT
     _trap_hist_mask     = max_jubyte,
     _extra_data_count   = 4     // extra DataLayout headers, for trap history
   }; // Public flag values
--- a/src/share/vm/opto/c2_globals.hpp	Tue Sep 09 09:48:42 2014 -0700
+++ b/src/share/vm/opto/c2_globals.hpp	Wed Sep 10 17:36:20 2014 +0000
@@ -653,6 +653,9 @@
   product(bool, UseMathExactIntrinsics, true,                               \
           "Enables intrinsification of various java.lang.Math functions")   \
                                                                             \
+  product(bool, UseMultiplyToLenIntrinsic, false,                           \
+          "Enables intrinsification of BigInteger.multiplyToLen()")         \
+                                                                            \
   product(bool, UseTypeSpeculation, true,                                   \
           "Speculatively propagate types from profiles")                    \
                                                                             \
--- a/src/share/vm/opto/compile.hpp	Tue Sep 09 09:48:42 2014 -0700
+++ b/src/share/vm/opto/compile.hpp	Wed Sep 10 17:36:20 2014 +0000
@@ -598,6 +598,10 @@
   bool          method_has_option(const char * option) {
     return method() != NULL && method()->has_option(option);
   }
+  template<typename T>
+  bool          method_has_option_value(const char * option, T& value) {
+    return method() != NULL && method()->has_option_value(option, value);
+  }
 #ifndef PRODUCT
   bool          trace_opto_output() const       { return _trace_opto_output; }
   bool              parsed_irreducible_loop() const { return _parsed_irreducible_loop; }
--- a/src/share/vm/opto/escape.cpp	Tue Sep 09 09:48:42 2014 -0700
+++ b/src/share/vm/opto/escape.cpp	Wed Sep 10 17:36:20 2014 +0000
@@ -944,7 +944,8 @@
                   strcmp(call->as_CallLeaf()->_name, "sha256_implCompress") == 0 ||
                   strcmp(call->as_CallLeaf()->_name, "sha256_implCompressMB") == 0 ||
                   strcmp(call->as_CallLeaf()->_name, "sha512_implCompress") == 0 ||
-                  strcmp(call->as_CallLeaf()->_name, "sha512_implCompressMB") == 0)
+                  strcmp(call->as_CallLeaf()->_name, "sha512_implCompressMB") == 0 ||
+                  strcmp(call->as_CallLeaf()->_name, "multiplyToLen") == 0)
                   ))) {
             call->dump();
             fatal(err_msg_res("EA unexpected CallLeaf %s", call->as_CallLeaf()->_name));
--- a/src/share/vm/opto/library_call.cpp	Tue Sep 09 09:48:42 2014 -0700
+++ b/src/share/vm/opto/library_call.cpp	Wed Sep 10 17:36:20 2014 +0000
@@ -322,6 +322,7 @@
   bool inline_updateCRC32();
   bool inline_updateBytesCRC32();
   bool inline_updateByteBufferCRC32();
+  bool inline_multiplyToLen();
 };
 
 
@@ -330,8 +331,12 @@
   vmIntrinsics::ID id = m->intrinsic_id();
   assert(id != vmIntrinsics::_none, "must be a VM intrinsic");
 
-  if (DisableIntrinsic[0] != '\0'
-      && strstr(DisableIntrinsic, vmIntrinsics::name_at(id)) != NULL) {
+  ccstr disable_intr = NULL;
+
+  if ((DisableIntrinsic[0] != '\0'
+       && strstr(DisableIntrinsic, vmIntrinsics::name_at(id)) != NULL) ||
+      (method_has_option_value("DisableIntrinsic", disable_intr)
+       && strstr(disable_intr, vmIntrinsics::name_at(id)) != NULL)) {
     // disabled by a user request on the command line:
     // example: -XX:DisableIntrinsic=_hashCode,_getClass
     return NULL;
@@ -515,6 +520,10 @@
     if (!UseAESIntrinsics) return NULL;
     break;
 
+  case vmIntrinsics::_multiplyToLen:
+    if (!UseMultiplyToLenIntrinsic) return NULL;
+    break;
+
   case vmIntrinsics::_cipherBlockChaining_encryptAESCrypt:
   case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt:
     if (!UseAESIntrinsics) return NULL;
@@ -912,6 +921,9 @@
   case vmIntrinsics::_digestBase_implCompressMB:
     return inline_digestBase_implCompressMB(predicate);
 
+  case vmIntrinsics::_multiplyToLen:
+    return inline_multiplyToLen();
+
   case vmIntrinsics::_encodeISOArray:
     return inline_encodeISOArray();
 
@@ -5735,6 +5747,106 @@
   return true;
 }
 
+//-------------inline_multiplyToLen-----------------------------------
+bool LibraryCallKit::inline_multiplyToLen() {
+  assert(UseMultiplyToLenIntrinsic, "not implementated on this platform");
+
+  address stubAddr = StubRoutines::multiplyToLen();
+  if (stubAddr == NULL) {
+    return false; // Intrinsic's stub is not implemented on this platform
+  }
+  const char* stubName = "multiplyToLen";
+
+  assert(callee()->signature()->size() == 5, "multiplyToLen has 5 parameters");
+
+  Node* x    = argument(1);
+  Node* xlen = argument(2);
+  Node* y    = argument(3);
+  Node* ylen = argument(4);
+  Node* z    = argument(5);
+
+  const Type* x_type = x->Value(&_gvn);
+  const Type* y_type = y->Value(&_gvn);
+  const TypeAryPtr* top_x = x_type->isa_aryptr();
+  const TypeAryPtr* top_y = y_type->isa_aryptr();
+  if (top_x  == NULL || top_x->klass()  == NULL ||
+      top_y == NULL || top_y->klass() == NULL) {
+    // failed array check
+    return false;
+  }
+
+  BasicType x_elem = x_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
+  BasicType y_elem = y_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
+  if (x_elem != T_INT || y_elem != T_INT) {
+    return false;
+  }
+
+  // Set the original stack and the reexecute bit for the interpreter to reexecute
+  // the bytecode that invokes BigInteger.multiplyToLen() if deoptimization happens
+  // on the return from z array allocation in runtime.
+  { PreserveReexecuteState preexecs(this);
+    jvms()->set_should_reexecute(true);
+
+    Node* x_start = array_element_address(x, intcon(0), x_elem);
+    Node* y_start = array_element_address(y, intcon(0), y_elem);
+    // 'x_start' points to x array + scaled xlen
+    // 'y_start' points to y array + scaled ylen
+
+    // Allocate the result array
+    Node* zlen = _gvn.transform(new(C) AddINode(xlen, ylen));
+    Node* klass_node = makecon(TypeKlassPtr::make(ciTypeArrayKlass::make(T_INT)));
+
+    IdealKit ideal(this);
+
+#define __ ideal.
+     Node* one = __ ConI(1);
+     Node* zero = __ ConI(0);
+     IdealVariable need_alloc(ideal), z_alloc(ideal);  __ declarations_done();
+     __ set(need_alloc, zero);
+     __ set(z_alloc, z);
+     __ if_then(z, BoolTest::eq, null()); {
+       __ increment (need_alloc, one);
+     } __ else_(); {
+       // Update graphKit memory and control from IdealKit.
+       sync_kit(ideal);
+       Node* zlen_arg = load_array_length(z);
+       // Update IdealKit memory and control from graphKit.
+       __ sync_kit(this);
+       __ if_then(zlen_arg, BoolTest::lt, zlen); {
+         __ increment (need_alloc, one);
+       } __ end_if();
+     } __ end_if();
+
+     __ if_then(__ value(need_alloc), BoolTest::ne, zero); {
+       // Update graphKit memory and control from IdealKit.
+       sync_kit(ideal);
+       Node * narr = new_array(klass_node, zlen, 1);
+       // Update IdealKit memory and control from graphKit.
+       __ sync_kit(this);
+       __ set(z_alloc, narr);
+     } __ end_if();
+
+     sync_kit(ideal);
+     z = __ value(z_alloc);
+     _gvn.set_type(z, TypeAryPtr::INTS);
+     // Final sync IdealKit and GraphKit.
+     final_sync(ideal);
+#undef __
+
+    Node* z_start = array_element_address(z, intcon(0), T_INT);
+
+    Node* call = make_runtime_call(RC_LEAF|RC_NO_FP,
+                                   OptoRuntime::multiplyToLen_Type(),
+                                   stubAddr, stubName, TypePtr::BOTTOM,
+                                   x_start, xlen, y_start, ylen, z_start, zlen);
+  } // original reexecute is set back here
+
+  C->set_has_split_ifs(true); // Has chance for split-if optimization
+  set_result(z);
+  return true;
+}
+
+
 /**
  * Calculate CRC32 for byte.
  * int java.util.zip.CRC32.update(int crc, int b)
--- a/src/share/vm/opto/parse.hpp	Tue Sep 09 09:48:42 2014 -0700
+++ b/src/share/vm/opto/parse.hpp	Wed Sep 10 17:36:20 2014 +0000
@@ -551,8 +551,9 @@
 
   float   dynamic_branch_prediction(float &cnt);
   float   branch_prediction(float &cnt, BoolTest::mask btest, int target_bci);
-  bool    seems_never_taken(float prob);
-  bool    seems_stable_comparison(BoolTest::mask btest, Node* c);
+  bool    seems_never_taken(float prob) const;
+  bool    path_is_suitable_for_uncommon_trap(float prob) const;
+  bool    seems_stable_comparison() const;
 
   void    do_ifnull(BoolTest::mask btest, Node* c);
   void    do_if(BoolTest::mask btest, Node* c);
--- a/src/share/vm/opto/parse2.cpp	Tue Sep 09 09:48:42 2014 -0700
+++ b/src/share/vm/opto/parse2.cpp	Wed Sep 10 17:36:20 2014 +0000
@@ -884,7 +884,7 @@
 // some branches (e.g., _213_javac.Assembler.eliminate) validly produce
 // very small but nonzero probabilities, which if confused with zero
 // counts would keep the program recompiling indefinitely.
-bool Parse::seems_never_taken(float prob) {
+bool Parse::seems_never_taken(float prob) const {
   return prob < PROB_MIN;
 }
 
@@ -895,53 +895,12 @@
 // if a path is never taken, its controlling comparison is
 // already acting in a stable fashion.  If the comparison
 // seems stable, we will put an expensive uncommon trap
-// on the untaken path.  To be conservative, and to allow
-// partially executed counted loops to be compiled fully,
-// we will plant uncommon traps only after pointer comparisons.
-bool Parse::seems_stable_comparison(BoolTest::mask btest, Node* cmp) {
-  for (int depth = 4; depth > 0; depth--) {
-    // The following switch can find CmpP here over half the time for
-    // dynamic language code rich with type tests.
-    // Code using counted loops or array manipulations (typical
-    // of benchmarks) will have many (>80%) CmpI instructions.
-    switch (cmp->Opcode()) {
-    case Op_CmpP:
-      // A never-taken null check looks like CmpP/BoolTest::eq.
-      // These certainly should be closed off as uncommon traps.
-      if (btest == BoolTest::eq)
-        return true;
-      // A never-failed type check looks like CmpP/BoolTest::ne.
-      // Let's put traps on those, too, so that we don't have to compile
-      // unused paths with indeterminate dynamic type information.
-      if (ProfileDynamicTypes)
-        return true;
-      return false;
-
-    case Op_CmpI:
-      // A small minority (< 10%) of CmpP are masked as CmpI,
-      // as if by boolean conversion ((p == q? 1: 0) != 0).
-      // Detect that here, even if it hasn't optimized away yet.
-      // Specifically, this covers the 'instanceof' operator.
-      if (btest == BoolTest::ne || btest == BoolTest::eq) {
-        if (_gvn.type(cmp->in(2))->singleton() &&
-            cmp->in(1)->is_Phi()) {
-          PhiNode* phi = cmp->in(1)->as_Phi();
-          int true_path = phi->is_diamond_phi();
-          if (true_path > 0 &&
-              _gvn.type(phi->in(1))->singleton() &&
-              _gvn.type(phi->in(2))->singleton()) {
-            // phi->region->if_proj->ifnode->bool->cmp
-            BoolNode* bol = phi->in(0)->in(1)->in(0)->in(1)->as_Bool();
-            btest = bol->_test._test;
-            cmp = bol->in(1);
-            continue;
-          }
-        }
-      }
-      return false;
-    }
+// on the untaken path.
+bool Parse::seems_stable_comparison() const {
+  if (C->too_many_traps(method(), bci(), Deoptimization::Reason_unstable_if)) {
+    return false;
   }
-  return false;
+  return true;
 }
 
 //-------------------------------repush_if_args--------------------------------
@@ -1166,6 +1125,14 @@
   }
 }
 
+bool Parse::path_is_suitable_for_uncommon_trap(float prob) const {
+  // Don't want to speculate on uncommon traps when running with -Xcomp
+  if (!UseInterpreter) {
+    return false;
+  }
+  return (seems_never_taken(prob) && seems_stable_comparison());
+}
+
 //----------------------------adjust_map_after_if------------------------------
 // Adjust the JVM state to reflect the result of taking this path.
 // Basically, it means inspecting the CmpNode controlling this
@@ -1179,33 +1146,9 @@
 
   bool is_fallthrough = (path == successor_for_bci(iter().next_bci()));
 
-  if (seems_never_taken(prob) && seems_stable_comparison(btest, c)) {
-    // If this might possibly turn into an implicit null check,
-    // and the null has never yet been seen, we need to generate
-    // an uncommon trap, so as to recompile instead of suffering
-    // with very slow branches.  (We'll get the slow branches if
-    // the program ever changes phase and starts seeing nulls here.)
-    //
-    // We do not inspect for a null constant, since a node may
-    // optimize to 'null' later on.
-    //
-    // Null checks, and other tests which expect inequality,
-    // show btest == BoolTest::eq along the non-taken branch.
-    // On the other hand, type tests, must-be-null tests,
-    // and other tests which expect pointer equality,
-    // show btest == BoolTest::ne along the non-taken branch.
-    // We prune both types of branches if they look unused.
+  if (path_is_suitable_for_uncommon_trap(prob)) {
     repush_if_args();
-    // We need to mark this branch as taken so that if we recompile we will
-    // see that it is possible. In the tiered system the interpreter doesn't
-    // do profiling and by the time we get to the lower tier from the interpreter
-    // the path may be cold again. Make sure it doesn't look untaken
-    if (is_fallthrough) {
-      profile_not_taken_branch(!ProfileInterpreter);
-    } else {
-      profile_taken_branch(iter().get_dest(), !ProfileInterpreter);
-    }
-    uncommon_trap(Deoptimization::Reason_unreached,
+    uncommon_trap(Deoptimization::Reason_unstable_if,
                   Deoptimization::Action_reinterpret,
                   NULL,
                   (is_fallthrough ? "taken always" : "taken never"));
--- a/src/share/vm/opto/runtime.cpp	Tue Sep 09 09:48:42 2014 -0700
+++ b/src/share/vm/opto/runtime.cpp	Wed Sep 10 17:36:20 2014 +0000
@@ -942,6 +942,30 @@
   return TypeFunc::make(domain, range);
 }
 
+const TypeFunc* OptoRuntime::multiplyToLen_Type() {
+  // create input type (domain)
+  int num_args      = 6;
+  int argcnt = num_args;
+  const Type** fields = TypeTuple::fields(argcnt);
+  int argp = TypeFunc::Parms;
+  fields[argp++] = TypePtr::NOTNULL;    // x
+  fields[argp++] = TypeInt::INT;        // xlen
+  fields[argp++] = TypePtr::NOTNULL;    // y
+  fields[argp++] = TypeInt::INT;        // ylen
+  fields[argp++] = TypePtr::NOTNULL;    // z
+  fields[argp++] = TypeInt::INT;        // zlen
+  assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
+  const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);
+
+  // no result type needed
+  fields = TypeTuple::fields(1);
+  fields[TypeFunc::Parms+0] = NULL;
+  const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields);
+  return TypeFunc::make(domain, range);
+}
+
+
+
 //------------- Interpreter state access for on stack replacement
 const TypeFunc* OptoRuntime::osr_end_Type() {
   // create input type (domain)
--- a/src/share/vm/opto/runtime.hpp	Tue Sep 09 09:48:42 2014 -0700
+++ b/src/share/vm/opto/runtime.hpp	Wed Sep 10 17:36:20 2014 +0000
@@ -303,6 +303,8 @@
   static const TypeFunc* sha_implCompress_Type();
   static const TypeFunc* digestBase_implCompressMB_Type();
 
+  static const TypeFunc* multiplyToLen_Type();
+
   static const TypeFunc* updateBytesCRC32_Type();
 
   // leaf on stack replacement interpreter accessor types
--- a/src/share/vm/runtime/deoptimization.cpp	Tue Sep 09 09:48:42 2014 -0700
+++ b/src/share/vm/runtime/deoptimization.cpp	Wed Sep 10 17:36:20 2014 +0000
@@ -1835,7 +1835,8 @@
   "predicate",
   "loop_limit_check",
   "speculate_class_check",
-  "rtm_state_change"
+  "rtm_state_change",
+  "unstable_if"
 };
 const char* Deoptimization::_trap_action_name[Action_LIMIT] = {
   // Note:  Keep this in sync. with enum DeoptAction.
--- a/src/share/vm/runtime/deoptimization.hpp	Tue Sep 09 09:48:42 2014 -0700
+++ b/src/share/vm/runtime/deoptimization.hpp	Wed Sep 10 17:36:20 2014 +0000
@@ -60,6 +60,7 @@
     Reason_predicate,             // compiler generated predicate failed
     Reason_loop_limit_check,      // compiler generated loop limits check failed
     Reason_speculate_class_check, // saw unexpected object class from type speculation
+    Reason_unstable_if,           // a branch predicted always false was taken
     Reason_rtm_state_change,      // rtm state change detected
     Reason_LIMIT,
     // Note:  Keep this enum in sync. with _trap_reason_name.
@@ -315,6 +316,8 @@
       return Reason_null_check;           // recorded per BCI as a null check
     else if (reason == Reason_speculate_class_check)
       return Reason_class_check;
+    else if (reason == Reason_unstable_if)
+      return Reason_intrinsic;
     else
       return Reason_none;
   }
--- a/src/share/vm/runtime/stubRoutines.cpp	Tue Sep 09 09:48:42 2014 -0700
+++ b/src/share/vm/runtime/stubRoutines.cpp	Wed Sep 10 17:36:20 2014 +0000
@@ -135,6 +135,8 @@
 address StubRoutines::_updateBytesCRC32 = NULL;
 address StubRoutines::_crc_table_adr = NULL;
 
+address StubRoutines::_multiplyToLen = NULL;
+
 double (* StubRoutines::_intrinsic_log   )(double) = NULL;
 double (* StubRoutines::_intrinsic_log10 )(double) = NULL;
 double (* StubRoutines::_intrinsic_exp   )(double) = NULL;
--- a/src/share/vm/runtime/stubRoutines.hpp	Tue Sep 09 09:48:42 2014 -0700
+++ b/src/share/vm/runtime/stubRoutines.hpp	Wed Sep 10 17:36:20 2014 +0000
@@ -217,6 +217,8 @@
   static address _updateBytesCRC32;
   static address _crc_table_adr;
 
+  static address _multiplyToLen;
+
   // These are versions of the java.lang.Math methods which perform
   // the same operations as the intrinsic version.  They are used for
   // constant folding in the compiler to ensure equivalence.  If the
@@ -373,6 +375,8 @@
   static address updateBytesCRC32()    { return _updateBytesCRC32; }
   static address crc_table_addr()      { return _crc_table_adr; }
 
+  static address multiplyToLen()       {return _multiplyToLen; }
+
   static address select_fill_function(BasicType t, bool aligned, const char* &name);
 
   static address zero_aligned_words()   { return _zero_aligned_words; }
--- a/src/share/vm/runtime/vmStructs.cpp	Tue Sep 09 09:48:42 2014 -0700
+++ b/src/share/vm/runtime/vmStructs.cpp	Wed Sep 10 17:36:20 2014 +0000
@@ -814,6 +814,7 @@
      static_field(StubRoutines,                _cipherBlockChaining_decryptAESCrypt,          address)                               \
      static_field(StubRoutines,                _updateBytesCRC32,                             address)                               \
      static_field(StubRoutines,                _crc_table_adr,                                address)                               \
+     static_field(StubRoutines,                _multiplyToLen,                                address)                               \
                                                                                                                                      \
   /*****************/                                                                                                                \
   /* SharedRuntime */                                                                                                                \
@@ -2495,6 +2496,7 @@
   declare_constant(Deoptimization::Reason_age)                            \
   declare_constant(Deoptimization::Reason_predicate)                      \
   declare_constant(Deoptimization::Reason_loop_limit_check)               \
+  declare_constant(Deoptimization::Reason_unstable_if)                    \
   declare_constant(Deoptimization::Reason_LIMIT)                          \
   declare_constant(Deoptimization::Reason_RECORDED_LIMIT)                 \
                                                                           \
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/intrinsics/multiplytolen/TestMultiplyToLen.java	Wed Sep 10 17:36:20 2014 +0000
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 8055494
+ * @summary Add C2 x86 intrinsic for BigInteger::multiplyToLen() method
+ *
+ * @run main/othervm/timeout=600 -XX:-TieredCompilation -Xbatch
+ *      -XX:CompileCommand=exclude,TestMultiplyToLen::main
+ *      -XX:CompileCommand=option,TestMultiplyToLen::base_multiply,ccstr,DisableIntrinsic,_multiplyToLen
+ *      -XX:CompileCommand=option,java.math.BigInteger::multiply,ccstr,DisableIntrinsic,_multiplyToLen
+ *      -XX:CompileCommand=inline,java.math.BigInteger::multiply TestMultiplyToLen
+ */
+
+import java.util.Random;
+import java.math.*;
+
+public class TestMultiplyToLen {
+
+    // Avoid intrinsic by preventing inlining multiply() and multiplyToLen().
+    public static BigInteger base_multiply(BigInteger op1, BigInteger op2) {
+      return op1.multiply(op2);
+    }
+
+    // Generate multiplyToLen() intrinsic by inlining multiply().
+    public static BigInteger new_multiply(BigInteger op1, BigInteger op2) {
+      return op1.multiply(op2);
+    }
+
+    public static boolean bytecompare(BigInteger b1, BigInteger b2) {
+      byte[] data1 = b1.toByteArray();
+      byte[] data2 = b2.toByteArray();
+      if (data1.length != data2.length)
+        return false;
+      for (int i = 0; i < data1.length; i++) {
+        if (data1[i] != data2[i])
+          return false;
+      }
+      return true;
+    }
+
+    public static String stringify(BigInteger b) {
+      String strout= "";
+      byte [] data = b.toByteArray();
+      for (int i = 0; i < data.length; i++) {
+        strout += (String.format("%02x",data[i]) + " ");
+      }
+      return strout;
+    }
+
+    public static void main(String args[]) throws Exception {
+
+      BigInteger oldsum = new BigInteger("0");
+      BigInteger newsum = new BigInteger("0");
+
+      BigInteger b1, b2, oldres, newres;
+
+      Random rand = new Random();
+      long seed = System.nanoTime();
+      Random rand1 = new Random();
+      long seed1 = System.nanoTime();
+      rand.setSeed(seed);
+      rand1.setSeed(seed1);
+
+      for (int j = 0; j < 1000000; j++) {
+        int rand_int = rand1.nextInt(3136)+32;
+        int rand_int1 = rand1.nextInt(3136)+32;
+        b1 = new BigInteger(rand_int, rand);
+        b2 = new BigInteger(rand_int1, rand);
+
+        oldres = base_multiply(b1,b2);
+        newres = new_multiply(b1,b2);
+
+        oldsum = oldsum.add(oldres);
+        newsum = newsum.add(newres);
+
+        if (!bytecompare(oldres,newres)) {
+          System.out.print("mismatch for:b1:" + stringify(b1) + " :b2:" + stringify(b2) + " :oldres:" + stringify(oldres) + " :newres:" + stringify(newres));
+          System.out.println(b1);
+          System.out.println(b2);
+          throw new Exception("Failed");
+        }
+      }
+      if (!bytecompare(oldsum,newsum))  {
+        System.out.println("Failure: oldsum:" + stringify(oldsum) + " newsum:" + stringify(newsum));
+        throw new Exception("Failed");
+      } else {
+        System.out.println("Success");
+      }
+   }
+}