Mercurial > hg > truffle

--- a/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp	Tue Jul 02 07:51:31 2013 +0200
+++ b/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp	Tue Jul 02 20:42:12 2013 -0400
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -2946,6 +2946,9 @@
   }
 }

+void LIR_Assembler::emit_updatecrc32(LIR_OpUpdateCRC32* op) {
+  fatal("CRC32 intrinsic is not implemented on this platform");
+}

 void LIR_Assembler::emit_lock(LIR_OpLock* op) {
   Register obj = op->obj_opr()->as_register();
--- a/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp	Tue Jul 02 07:51:31 2013 +0200
+++ b/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp	Tue Jul 02 20:42:12 2013 -0400
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -784,6 +784,10 @@
   set_no_result(x);
 }

+void LIRGenerator::do_update_CRC32(Intrinsic* x) {
+  fatal("CRC32 intrinsic is not implemented on this platform");
+}
+
 // _i2l, _i2f, _i2d, _l2i, _l2f, _l2d, _f2i, _f2l, _f2d, _d2i, _d2l, _d2f
 // _i2b, _i2c, _i2s
 void LIRGenerator::do_Convert(Convert* x) {
--- a/src/cpu/x86/vm/assembler_x86.cpp	Tue Jul 02 07:51:31 2013 +0200
+++ b/src/cpu/x86/vm/assembler_x86.cpp	Tue Jul 02 20:42:12 2013 -0400
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -1673,6 +1673,11 @@
   emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_66);
 }

+void Assembler::movdqa(XMMRegister dst, Address src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_66);
+}
+
 void Assembler::movdqu(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_F3);
@@ -2286,6 +2291,38 @@
   emit_int8(imm8);
 }

+void Assembler::pextrd(Register dst, XMMRegister src, int imm8) {
+  assert(VM_Version::supports_sse4_1(), "");
+  int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, false);
+  emit_int8(0x16);
+  emit_int8((unsigned char)(0xC0 | encode));
+  emit_int8(imm8);
+}
+
+void Assembler::pextrq(Register dst, XMMRegister src, int imm8) {
+  assert(VM_Version::supports_sse4_1(), "");
+  int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, true);
+  emit_int8(0x16);
+  emit_int8((unsigned char)(0xC0 | encode));
+  emit_int8(imm8);
+}
+
+void Assembler::pinsrd(XMMRegister dst, Register src, int imm8) {
+  assert(VM_Version::supports_sse4_1(), "");
+  int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, false);
+  emit_int8(0x22);
+  emit_int8((unsigned char)(0xC0 | encode));
+  emit_int8(imm8);
+}
+
+void Assembler::pinsrq(XMMRegister dst, Register src, int imm8) {
+  assert(VM_Version::supports_sse4_1(), "");
+  int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, true);
+  emit_int8(0x22);
+  emit_int8((unsigned char)(0xC0 | encode));
+  emit_int8(imm8);
+}
+
 void Assembler::pmovzxbw(XMMRegister dst, Address src) {
   assert(VM_Version::supports_sse4_1(), "");
   InstructionMark im(this);
@@ -3691,6 +3728,16 @@
   emit_int8((unsigned char)(0xC0 | encode));
 }

+// Carry-Less Multiplication Quadword
+void Assembler::vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask) {
+  assert(VM_Version::supports_avx() && VM_Version::supports_clmul(), "");
+  bool vector256 = false;
+  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_3A);
+  emit_int8(0x44);
+  emit_int8((unsigned char)(0xC0 | encode));
+  emit_int8((unsigned char)mask);
+}
+
 void Assembler::vzeroupper() {
   assert(VM_Version::supports_avx(), "");
   (void)vex_prefix_and_encode(xmm0, xmm0, xmm0, VEX_SIMD_NONE);
--- a/src/cpu/x86/vm/assembler_x86.hpp	Tue Jul 02 07:51:31 2013 +0200
+++ b/src/cpu/x86/vm/assembler_x86.hpp	Tue Jul 02 20:42:12 2013 -0400
@@ -1266,6 +1266,7 @@

   // Move Aligned Double Quadword
   void movdqa(XMMRegister dst, XMMRegister src);
+  void movdqa(XMMRegister dst, Address src);

   // Move Unaligned Double Quadword
   void movdqu(Address     dst, XMMRegister src);
@@ -1404,6 +1405,14 @@
   void pcmpestri(XMMRegister xmm1, XMMRegister xmm2, int imm8);
   void pcmpestri(XMMRegister xmm1, Address src, int imm8);

+  // SSE 4.1 extract
+  void pextrd(Register dst, XMMRegister src, int imm8);
+  void pextrq(Register dst, XMMRegister src, int imm8);
+
+  // SSE 4.1 insert
+  void pinsrd(XMMRegister dst, Register src, int imm8);
+  void pinsrq(XMMRegister dst, Register src, int imm8);
+
   // SSE4.1 packed move
   void pmovzxbw(XMMRegister dst, XMMRegister src);
   void pmovzxbw(XMMRegister dst, Address src);
@@ -1764,6 +1773,9 @@
   // duplicate 4-bytes integer data from src into 8 locations in dest
   void vpbroadcastd(XMMRegister dst, XMMRegister src);

+  // Carry-Less Multiplication Quadword
+  void vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask);
+
   // AVX instruction which is used to clear upper 128 bits of YMM registers and
   // to avoid transaction penalty between AVX and SSE states. There is no
   // penalty if legacy SSE instructions are encoded using VEX prefix because
--- a/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp	Tue Jul 02 07:51:31 2013 +0200
+++ b/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp	Tue Jul 02 20:42:12 2013 -0400
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -3512,6 +3512,22 @@
   __ bind(*stub->continuation());
 }

+void LIR_Assembler::emit_updatecrc32(LIR_OpUpdateCRC32* op) {
+  assert(op->crc()->is_single_cpu(),  "crc must be register");
+  assert(op->val()->is_single_cpu(),  "byte value must be register");
+  assert(op->result_opr()->is_single_cpu(), "result must be register");
+  Register crc = op->crc()->as_register();
+  Register val = op->val()->as_register();
+  Register res = op->result_opr()->as_register();
+
+  assert_different_registers(val, crc, res);
+
+  __ lea(res, ExternalAddress(StubRoutines::crc_table_addr()));
+  __ notl(crc); // ~crc
+  __ update_byte_crc32(crc, val, res);
+  __ notl(crc); // ~crc
+  __ mov(res, crc);
+}

 void LIR_Assembler::emit_lock(LIR_OpLock* op) {
   Register obj = op->obj_opr()->as_register();  // may not be an oop
--- a/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp	Tue Jul 02 07:51:31 2013 +0200
+++ b/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp	Tue Jul 02 20:42:12 2013 -0400
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -932,6 +932,81 @@
   __ arraycopy(src.result(), src_pos.result(), dst.result(), dst_pos.result(), length.result(), tmp, expected_type, flags, info); // does add_safepoint
 }

+void LIRGenerator::do_update_CRC32(Intrinsic* x) {
+  assert(UseCRC32Intrinsics, "need AVX and LCMUL instructions support");
+  // Make all state_for calls early since they can emit code
+  LIR_Opr result = rlock_result(x);
+  int flags = 0;
+  switch (x->id()) {
+    case vmIntrinsics::_updateCRC32: {
+      LIRItem crc(x->argument_at(0), this);
+      LIRItem val(x->argument_at(1), this);
+      crc.load_item();
+      val.load_item();
+      __ update_crc32(crc.result(), val.result(), result);
+      break;
+    }
+    case vmIntrinsics::_updateBytesCRC32:
+    case vmIntrinsics::_updateByteBufferCRC32: {
+      bool is_updateBytes = (x->id() == vmIntrinsics::_updateBytesCRC32);
+
+      LIRItem crc(x->argument_at(0), this);
+      LIRItem buf(x->argument_at(1), this);
+      LIRItem off(x->argument_at(2), this);
+      LIRItem len(x->argument_at(3), this);
+      buf.load_item();
+      off.load_nonconstant();
+
+      LIR_Opr index = off.result();
+      int offset = is_updateBytes ? arrayOopDesc::base_offset_in_bytes(T_BYTE) : 0;
+      if(off.result()->is_constant()) {
+        index = LIR_OprFact::illegalOpr;
+       offset += off.result()->as_jint();
+      }
+      LIR_Opr base_op = buf.result();
+
+#ifndef _LP64
+      if (!is_updateBytes) { // long b raw address
+         base_op = new_register(T_INT);
+         __ convert(Bytecodes::_l2i, buf.result(), base_op);
+      }
+#else
+      if (index->is_valid()) {
+        LIR_Opr tmp = new_register(T_LONG);
+        __ convert(Bytecodes::_i2l, index, tmp);
+        index = tmp;
+      }
+#endif
+
+      LIR_Address* a = new LIR_Address(base_op,
+                                       index,
+                                       LIR_Address::times_1,
+                                       offset,
+                                       T_BYTE);
+      BasicTypeList signature(3);
+      signature.append(T_INT);
+      signature.append(T_ADDRESS);
+      signature.append(T_INT);
+      CallingConvention* cc = frame_map()->c_calling_convention(&signature);
+      const LIR_Opr result_reg = result_register_for(x->type());
+
+      LIR_Opr addr = new_pointer_register();
+      __ leal(LIR_OprFact::address(a), addr);
+
+      crc.load_item_force(cc->at(0));
+      __ move(addr, cc->at(1));
+      len.load_item_force(cc->at(2));
+
+      __ call_runtime_leaf(StubRoutines::updateBytesCRC32(), getThreadTemp(), result_reg, cc->args());
+      __ move(result_reg, result);
+
+      break;
+    }
+    default: {
+      ShouldNotReachHere();
+    }
+  }
+}

 // _i2l, _i2f, _i2d, _l2i, _l2f, _l2d, _f2i, _f2l, _f2d, _d2i, _d2l, _d2f
 // _i2b, _i2c, _i2s
--- a/src/cpu/x86/vm/globals_x86.hpp	Tue Jul 02 07:51:31 2013 +0200
+++ b/src/cpu/x86/vm/globals_x86.hpp	Tue Jul 02 20:42:12 2013 -0400
@@ -96,6 +96,9 @@
   product(intx, UseAVX, 99,                                                 \
           "Highest supported AVX instructions set on x86/x64")              \
                                                                             \
+  product(bool, UseCLMUL, false,                                            \
+          "Control whether CLMUL instructions can be used on x86/x64")      \
+                                                                            \
   diagnostic(bool, UseIncDec, true,                                         \
           "Use INC, DEC instructions on x86")                               \
                                                                             \
--- a/src/cpu/x86/vm/interpreterGenerator_x86.hpp	Tue Jul 02 07:51:31 2013 +0200
+++ b/src/cpu/x86/vm/interpreterGenerator_x86.hpp	Tue Jul 02 20:42:12 2013 -0400
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -39,6 +39,8 @@
   address generate_empty_entry(void);
   address generate_accessor_entry(void);
   address generate_Reference_get_entry();
+  address generate_CRC32_update_entry();
+  address generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind);
   void lock_method(void);
   void generate_stack_overflow_check(void);
--- a/src/cpu/x86/vm/macroAssembler_x86.cpp	Tue Jul 02 07:51:31 2013 +0200
+++ b/src/cpu/x86/vm/macroAssembler_x86.cpp	Tue Jul 02 20:42:12 2013 -0400
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -2794,6 +2794,15 @@
   }
 }

+void MacroAssembler::movdqa(XMMRegister dst, AddressLiteral src) {
+  if (reachable(src)) {
+    Assembler::movdqa(dst, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    Assembler::movdqa(dst, Address(rscratch1, 0));
+  }
+}
+
 void MacroAssembler::movsd(XMMRegister dst, AddressLiteral src) {
   if (reachable(src)) {
     Assembler::movsd(dst, as_Address(src));
@@ -6388,6 +6397,193 @@
   bind(L_done);
 }

+/**
+ * Emits code to update CRC-32 with a byte value according to constants in table
+ *
+ * @param [in,out]crc   Register containing the crc.
+ * @param [in]val       Register containing the byte to fold into the CRC.
+ * @param [in]table     Register containing the table of crc constants.
+ *
+ * uint32_t crc;
+ * val = crc_table[(val ^ crc) & 0xFF];
+ * crc = val ^ (crc >> 8);
+ *
+ */
+void MacroAssembler::update_byte_crc32(Register crc, Register val, Register table) {
+  xorl(val, crc);
+  andl(val, 0xFF);
+  shrl(crc, 8); // unsigned shift
+  xorl(crc, Address(table, val, Address::times_4, 0));
+}
+
+/**
+ * Fold 128-bit data chunk
+ */
+void MacroAssembler::fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, Register buf, int offset) {
+  vpclmulhdq(xtmp, xK, xcrc); // [123:64]
+  vpclmulldq(xcrc, xK, xcrc); // [63:0]
+  vpxor(xcrc, xcrc, Address(buf, offset), false /* vector256 */);
+  pxor(xcrc, xtmp);
+}
+
+void MacroAssembler::fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, XMMRegister xbuf) {
+  vpclmulhdq(xtmp, xK, xcrc);
+  vpclmulldq(xcrc, xK, xcrc);
+  pxor(xcrc, xbuf);
+  pxor(xcrc, xtmp);
+}
+
+/**
+ * 8-bit folds to compute 32-bit CRC
+ *
+ * uint64_t xcrc;
+ * timesXtoThe32[xcrc & 0xFF] ^ (xcrc >> 8);
+ */
+void MacroAssembler::fold_8bit_crc32(XMMRegister xcrc, Register table, XMMRegister xtmp, Register tmp) {
+  movdl(tmp, xcrc);
+  andl(tmp, 0xFF);
+  movdl(xtmp, Address(table, tmp, Address::times_4, 0));
+  psrldq(xcrc, 1); // unsigned shift one byte
+  pxor(xcrc, xtmp);
+}
+
+/**
+ * uint32_t crc;
+ * timesXtoThe32[crc & 0xFF] ^ (crc >> 8);
+ */
+void MacroAssembler::fold_8bit_crc32(Register crc, Register table, Register tmp) {
+  movl(tmp, crc);
+  andl(tmp, 0xFF);
+  shrl(crc, 8);
+  xorl(crc, Address(table, tmp, Address::times_4, 0));
+}
+
+/**
+ * @param crc   register containing existing CRC (32-bit)
+ * @param buf   register pointing to input byte buffer (byte*)
+ * @param len   register containing number of bytes
+ * @param table register that will contain address of CRC table
+ * @param tmp   scratch register
+ */
+void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len, Register table, Register tmp) {
+  assert_different_registers(crc, buf, len, table, tmp, rax);
+
+  Label L_tail, L_tail_restore, L_tail_loop, L_exit, L_align_loop, L_aligned;
+  Label L_fold_tail, L_fold_128b, L_fold_512b, L_fold_512b_loop, L_fold_tail_loop;
+
+  lea(table, ExternalAddress(StubRoutines::crc_table_addr()));
+  notl(crc); // ~crc
+  cmpl(len, 16);
+  jcc(Assembler::less, L_tail);
+
+  // Align buffer to 16 bytes
+  movl(tmp, buf);
+  andl(tmp, 0xF);
+  jccb(Assembler::zero, L_aligned);
+  subl(tmp,  16);
+  addl(len, tmp);
+
+  align(4);
+  BIND(L_align_loop);
+  movsbl(rax, Address(buf, 0)); // load byte with sign extension
+  update_byte_crc32(crc, rax, table);
+  increment(buf);
+  incrementl(tmp);
+  jccb(Assembler::less, L_align_loop);
+
+  BIND(L_aligned);
+  movl(tmp, len); // save
+  shrl(len, 4);
+  jcc(Assembler::zero, L_tail_restore);
+
+  // Fold crc into first bytes of vector
+  movdqa(xmm1, Address(buf, 0));
+  movdl(rax, xmm1);
+  xorl(crc, rax);
+  pinsrd(xmm1, crc, 0);
+  addptr(buf, 16);
+  subl(len, 4); // len > 0
+  jcc(Assembler::less, L_fold_tail);
+
+  movdqa(xmm2, Address(buf,  0));
+  movdqa(xmm3, Address(buf, 16));
+  movdqa(xmm4, Address(buf, 32));
+  addptr(buf, 48);
+  subl(len, 3);
+  jcc(Assembler::lessEqual, L_fold_512b);
+
+  // Fold total 512 bits of polynomial on each iteration,
+  // 128 bits per each of 4 parallel streams.
+  movdqu(xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_addr() + 32));
+
+  align(32);
+  BIND(L_fold_512b_loop);
+  fold_128bit_crc32(xmm1, xmm0, xmm5, buf,  0);
+  fold_128bit_crc32(xmm2, xmm0, xmm5, buf, 16);
+  fold_128bit_crc32(xmm3, xmm0, xmm5, buf, 32);
+  fold_128bit_crc32(xmm4, xmm0, xmm5, buf, 48);
+  addptr(buf, 64);
+  subl(len, 4);
+  jcc(Assembler::greater, L_fold_512b_loop);
+
+  // Fold 512 bits to 128 bits.
+  BIND(L_fold_512b);
+  movdqu(xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_addr() + 16));
+  fold_128bit_crc32(xmm1, xmm0, xmm5, xmm2);
+  fold_128bit_crc32(xmm1, xmm0, xmm5, xmm3);
+  fold_128bit_crc32(xmm1, xmm0, xmm5, xmm4);
+
+  // Fold the rest of 128 bits data chunks
+  BIND(L_fold_tail);
+  addl(len, 3);
+  jccb(Assembler::lessEqual, L_fold_128b);
+  movdqu(xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_addr() + 16));
+
+  BIND(L_fold_tail_loop);
+  fold_128bit_crc32(xmm1, xmm0, xmm5, buf,  0);
+  addptr(buf, 16);
+  decrementl(len);
+  jccb(Assembler::greater, L_fold_tail_loop);
+
+  // Fold 128 bits in xmm1 down into 32 bits in crc register.
+  BIND(L_fold_128b);
+  movdqu(xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_addr()));
+  vpclmulqdq(xmm2, xmm0, xmm1, 0x1);
+  vpand(xmm3, xmm0, xmm2, false /* vector256 */);
+  vpclmulqdq(xmm0, xmm0, xmm3, 0x1);
+  psrldq(xmm1, 8);
+  psrldq(xmm2, 4);
+  pxor(xmm0, xmm1);
+  pxor(xmm0, xmm2);
+
+  // 8 8-bit folds to compute 32-bit CRC.
+  for (int j = 0; j < 4; j++) {
+    fold_8bit_crc32(xmm0, table, xmm1, rax);
+  }
+  movdl(crc, xmm0); // mov 32 bits to general register
+  for (int j = 0; j < 4; j++) {
+    fold_8bit_crc32(crc, table, rax);
+  }
+
+  BIND(L_tail_restore);
+  movl(len, tmp); // restore
+  BIND(L_tail);
+  andl(len, 0xf);
+  jccb(Assembler::zero, L_exit);
+
+  // Fold the rest of bytes
+  align(4);
+  BIND(L_tail_loop);
+  movsbl(rax, Address(buf, 0)); // load byte with sign extension
+  update_byte_crc32(crc, rax, table);
+  increment(buf);
+  decrementl(len);
+  jccb(Assembler::greater, L_tail_loop);
+
+  BIND(L_exit);
+  notl(crc); // ~c
+}
+
 #undef BIND
 #undef BLOCK_COMMENT
--- a/src/cpu/x86/vm/macroAssembler_x86.hpp	Tue Jul 02 07:51:31 2013 +0200
+++ b/src/cpu/x86/vm/macroAssembler_x86.hpp	Tue Jul 02 20:42:12 2013 -0400
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -899,6 +899,11 @@
   void movdqu(XMMRegister dst, XMMRegister src)   { Assembler::movdqu(dst, src); }
   void movdqu(XMMRegister dst, AddressLiteral src);

+  // Move Aligned Double Quadword
+  void movdqa(XMMRegister dst, Address src)       { Assembler::movdqa(dst, src); }
+  void movdqa(XMMRegister dst, XMMRegister src)   { Assembler::movdqa(dst, src); }
+  void movdqa(XMMRegister dst, AddressLiteral src);
+
   void movsd(XMMRegister dst, XMMRegister src) { Assembler::movsd(dst, src); }
   void movsd(Address dst, XMMRegister src)     { Assembler::movsd(dst, src); }
   void movsd(XMMRegister dst, Address src)     { Assembler::movsd(dst, src); }
@@ -1027,6 +1032,16 @@
       Assembler::vinsertf128h(dst, nds, src);
   }

+  // Carry-Less Multiplication Quadword
+  void vpclmulldq(XMMRegister dst, XMMRegister nds, XMMRegister src) {
+    // 0x00 - multiply lower 64 bits [0:63]
+    Assembler::vpclmulqdq(dst, nds, src, 0x00);
+  }
+  void vpclmulhdq(XMMRegister dst, XMMRegister nds, XMMRegister src) {
+    // 0x11 - multiply upper 64 bits [64:127]
+    Assembler::vpclmulqdq(dst, nds, src, 0x11);
+  }
+
   // Data

   void cmov32( Condition cc, Register dst, Address  src);
@@ -1143,6 +1158,16 @@
                         XMMRegister tmp1, XMMRegister tmp2, XMMRegister tmp3,
                         XMMRegister tmp4, Register tmp5, Register result);

+  // CRC32 code for java.util.zip.CRC32::updateBytes() instrinsic.
+  void update_byte_crc32(Register crc, Register val, Register table);
+  void kernel_crc32(Register crc, Register buf, Register len, Register table, Register tmp);
+  // Fold 128-bit data chunk
+  void fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, Register buf, int offset);
+  void fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, XMMRegister xbuf);
+  // Fold 8-bit data
+  void fold_8bit_crc32(Register crc, Register table, Register tmp);
+  void fold_8bit_crc32(XMMRegister crc, Register table, XMMRegister xtmp, Register tmp);
+
 #undef VIRTUAL

 };
--- a/src/cpu/x86/vm/stubGenerator_x86_32.cpp	Tue Jul 02 07:51:31 2013 +0200
+++ b/src/cpu/x86/vm/stubGenerator_x86_32.cpp	Tue Jul 02 20:42:12 2013 -0400
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -2713,6 +2713,59 @@
     return start;
   }

+  /**
+   *  Arguments:
+   *
+   * Inputs:
+   *   rsp(4)   - int crc
+   *   rsp(8)   - byte* buf
+   *   rsp(12)  - int length
+   *
+   * Ouput:
+   *       rax   - int crc result
+   */
+  address generate_updateBytesCRC32() {
+    assert(UseCRC32Intrinsics, "need AVX and CLMUL instructions");
+
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32");
+
+    address start = __ pc();
+
+    const Register crc   = rdx;  // crc
+    const Register buf   = rsi;  // source java byte array address
+    const Register len   = rcx;  // length
+    const Register table = rdi;  // crc_table address (reuse register)
+    const Register tmp   = rbx;
+    assert_different_registers(crc, buf, len, table, tmp, rax);
+
+    BLOCK_COMMENT("Entry:");
+    __ enter(); // required for proper stackwalking of RuntimeStub frame
+    __ push(rsi);
+    __ push(rdi);
+    __ push(rbx);
+
+    Address crc_arg(rbp, 8 + 0);
+    Address buf_arg(rbp, 8 + 4);
+    Address len_arg(rbp, 8 + 8);
+
+    // Load up:
+    __ movl(crc,   crc_arg);
+    __ movptr(buf, buf_arg);
+    __ movl(len,   len_arg);
+
+    __ kernel_crc32(crc, buf, len, table, tmp);
+
+    __ movl(rax, crc);
+    __ pop(rbx);
+    __ pop(rdi);
+    __ pop(rsi);
+    __ leave(); // required for proper stackwalking of RuntimeStub frame
+    __ ret(0);
+
+    return start;
+  }
+

  public:
   // Information about frame layout at time of blocking runtime call.
@@ -2887,6 +2940,12 @@

     // Build this early so it's available for the interpreter
     StubRoutines::_throw_StackOverflowError_entry          = generate_throw_exception("StackOverflowError throw_exception",           CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError));
+
+    if (UseCRC32Intrinsics) {
+      // set table address before stub generation which use it
+      StubRoutines::_crc_table_adr = (address)StubRoutines::x86::_crc_table;
+      StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32();
+    }
   }
--- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Tue Jul 02 07:51:31 2013 +0200
+++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Tue Jul 02 20:42:12 2013 -0400
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -3584,7 +3584,45 @@
     return start;
   }

-
+  /**
+   *  Arguments:
+   *
+   * Inputs:
+   *   c_rarg0   - int crc
+   *   c_rarg1   - byte* buf
+   *   c_rarg2   - int length
+   *
+   * Ouput:
+   *       rax   - int crc result
+   */
+  address generate_updateBytesCRC32() {
+    assert(UseCRC32Intrinsics, "need AVX and CLMUL instructions");
+
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32");
+
+    address start = __ pc();
+    // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
+    // Unix:  rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)
+    // rscratch1: r10
+    const Register crc   = c_rarg0;  // crc
+    const Register buf   = c_rarg1;  // source java byte array address
+    const Register len   = c_rarg2;  // length
+    const Register table = c_rarg3;  // crc_table address (reuse register)
+    const Register tmp   = r11;
+    assert_different_registers(crc, buf, len, table, tmp, rax);
+
+    BLOCK_COMMENT("Entry:");
+    __ enter(); // required for proper stackwalking of RuntimeStub frame
+
+    __ kernel_crc32(crc, buf, len, table, tmp);
+
+    __ movl(rax, crc);
+    __ leave(); // required for proper stackwalking of RuntimeStub frame
+    __ ret(0);
+
+    return start;
+  }

 #undef __
 #define __ masm->
@@ -3736,6 +3774,11 @@
                                CAST_FROM_FN_PTR(address,
                                                 SharedRuntime::
                                                 throw_StackOverflowError));
+    if (UseCRC32Intrinsics) {
+      // set table address before stub generation which use it
+      StubRoutines::_crc_table_adr = (address)StubRoutines::x86::_crc_table;
+      StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32();
+    }
   }

   void generate_all() {
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/x86/vm/stubRoutines_x86.cpp	Tue Jul 02 20:42:12 2013 -0400
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "runtime/deoptimization.hpp"
+#include "runtime/frame.inline.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "runtime/thread.inline.hpp"
+
+// Implementation of the platform-specific part of StubRoutines - for
+// a description of how to extend it, see the stubRoutines.hpp file.
+
+address StubRoutines::x86::_verify_mxcsr_entry = NULL;
+address StubRoutines::x86::_key_shuffle_mask_addr = NULL;
+
+uint64_t StubRoutines::x86::_crc_by128_masks[] =
+{
+  /* The fields in this structure are arranged so that they can be
+   * picked up two at a time with 128-bit loads.
+   *
+   * Because of flipped bit order for this CRC polynomials
+   * the constant for X**N is left-shifted by 1.  This is because
+   * a 64 x 64 polynomial multiply produces a 127-bit result
+   * but the highest term is always aligned to bit 0 in the container.
+   * Pre-shifting by one fixes this, at the cost of potentially making
+   * the 32-bit constant no longer fit in a 32-bit container (thus the
+   * use of uint64_t, though this is also the size used by the carry-
+   * less multiply instruction.
+   *
+   * In addition, the flipped bit order and highest-term-at-least-bit
+   * multiply changes the constants used.  The 96-bit result will be
+   * aligned to the high-term end of the target 128-bit container,
+   * not the low-term end; that is, instead of a 512-bit or 576-bit fold,
+   * instead it is a 480 (=512-32) or 544 (=512+64-32) bit fold.
+   *
+   * This cause additional problems in the 128-to-64-bit reduction; see the
+   * code for details.  By storing a mask in the otherwise unused half of
+   * a 128-bit constant, bits can be cleared before multiplication without
+   * storing and reloading.  Note that staying on a 128-bit datapath means
+   * that some data is uselessly stored and some unused data is intersected
+   * with an irrelevant constant.
+   */
+
+  ((uint64_t) 0xffffffffUL),     /* low  of K_M_64    */
+  ((uint64_t) 0xb1e6b092U << 1), /* high of K_M_64    */
+  ((uint64_t) 0xba8ccbe8U << 1), /* low  of K_160_96  */
+  ((uint64_t) 0x6655004fU << 1), /* high of K_160_96  */
+  ((uint64_t) 0xaa2215eaU << 1), /* low  of K_544_480 */
+  ((uint64_t) 0xe3720acbU << 1)  /* high of K_544_480 */
+};
+
+/**
+ *  crc_table[] from jdk/src/share/native/java/util/zip/zlib-1.2.5/crc32.h
+ */
+juint StubRoutines::x86::_crc_table[] =
+{
+    0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL,
+    0x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL,
+    0xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL,
+    0x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL,
+    0x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL,
+    0x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL,
+    0xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL,
+    0xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL,
+    0x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL,
+    0x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL,
+    0xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL,
+    0xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL,
+    0x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL,
+    0x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL,
+    0x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL,
+    0xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL,
+    0x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL,
+    0x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL,
+    0x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL,
+    0xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL,
+    0x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL,
+    0x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL,
+    0xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL,
+    0xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL,
+    0x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL,
+    0x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL,
+    0x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL,
+    0x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL,
+    0xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL,
+    0x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL,
+    0x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL,
+    0x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL,
+    0xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL,
+    0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL,
+    0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL,
+    0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL,
+    0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL,
+    0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL,
+    0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL,
+    0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL,
+    0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL,
+    0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL,
+    0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL,
+    0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL,
+    0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL,
+    0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL,
+    0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL,
+    0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL,
+    0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL,
+    0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL,
+    0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL,
+    0x2d02ef8dUL
+};
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/x86/vm/stubRoutines_x86.hpp	Tue Jul 02 20:42:12 2013 -0400
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_X86_VM_STUBROUTINES_X86_HPP
+#define CPU_X86_VM_STUBROUTINES_X86_HPP
+
+// This file holds the platform specific parts of the StubRoutines
+// definition. See stubRoutines.hpp for a description on how to
+// extend it.
+
+ private:
+  static address _verify_mxcsr_entry;
+  // shuffle mask for fixing up 128-bit words consisting of big-endian 32-bit integers
+  static address _key_shuffle_mask_addr;
+  // masks and table for CRC32
+  static uint64_t _crc_by128_masks[];
+  static juint    _crc_table[];
+
+ public:
+  static address verify_mxcsr_entry()    { return _verify_mxcsr_entry; }
+  static address key_shuffle_mask_addr() { return _key_shuffle_mask_addr; }
+  static address crc_by128_masks_addr()  { return (address)_crc_by128_masks; }
+
+#endif // CPU_X86_VM_STUBROUTINES_X86_32_HPP
--- a/src/cpu/x86/vm/stubRoutines_x86_32.cpp	Tue Jul 02 07:51:31 2013 +0200
+++ b/src/cpu/x86/vm/stubRoutines_x86_32.cpp	Tue Jul 02 20:42:12 2013 -0400
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -31,6 +31,4 @@
 // Implementation of the platform-specific part of StubRoutines - for
 // a description of how to extend it, see the stubRoutines.hpp file.

-address StubRoutines::x86::_verify_mxcsr_entry         = NULL;
 address StubRoutines::x86::_verify_fpu_cntrl_wrd_entry = NULL;
-address StubRoutines::x86::_key_shuffle_mask_addr = NULL;
--- a/src/cpu/x86/vm/stubRoutines_x86_32.hpp	Tue Jul 02 07:51:31 2013 +0200
+++ b/src/cpu/x86/vm/stubRoutines_x86_32.hpp	Tue Jul 02 20:42:12 2013 -0400
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -39,15 +39,12 @@
  friend class VMStructs;

  private:
-  static address _verify_mxcsr_entry;
   static address _verify_fpu_cntrl_wrd_entry;
-  // shuffle mask for fixing up 128-bit words consisting of big-endian 32-bit integers
-  static address _key_shuffle_mask_addr;

  public:
-  static address verify_mxcsr_entry()                        { return _verify_mxcsr_entry; }
   static address verify_fpu_cntrl_wrd_entry()                { return _verify_fpu_cntrl_wrd_entry; }
-  static address key_shuffle_mask_addr()                     { return _key_shuffle_mask_addr; }
+
+# include "stubRoutines_x86.hpp"

 };
--- a/src/cpu/x86/vm/stubRoutines_x86_64.cpp	Tue Jul 02 07:51:31 2013 +0200
+++ b/src/cpu/x86/vm/stubRoutines_x86_64.cpp	Tue Jul 02 20:42:12 2013 -0400
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -34,8 +34,6 @@
 address StubRoutines::x86::_get_previous_fp_entry = NULL;
 address StubRoutines::x86::_get_previous_sp_entry = NULL;

-address StubRoutines::x86::_verify_mxcsr_entry = NULL;
-
 address StubRoutines::x86::_f2i_fixup = NULL;
 address StubRoutines::x86::_f2l_fixup = NULL;
 address StubRoutines::x86::_d2i_fixup = NULL;
@@ -45,4 +43,3 @@
 address StubRoutines::x86::_double_sign_mask = NULL;
 address StubRoutines::x86::_double_sign_flip = NULL;
 address StubRoutines::x86::_mxcsr_std = NULL;
-address StubRoutines::x86::_key_shuffle_mask_addr = NULL;
--- a/src/cpu/x86/vm/stubRoutines_x86_64.hpp	Tue Jul 02 07:51:31 2013 +0200
+++ b/src/cpu/x86/vm/stubRoutines_x86_64.hpp	Tue Jul 02 20:42:12 2013 -0400
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -42,7 +42,6 @@
  private:
   static address _get_previous_fp_entry;
   static address _get_previous_sp_entry;
-  static address _verify_mxcsr_entry;

   static address _f2i_fixup;
   static address _f2l_fixup;
@@ -54,8 +53,6 @@
   static address _double_sign_mask;
   static address _double_sign_flip;
   static address _mxcsr_std;
-  // shuffle mask for fixing up 128-bit words consisting of big-endian 32-bit integers
-  static address _key_shuffle_mask_addr;

  public:

@@ -69,11 +66,6 @@
     return _get_previous_sp_entry;
   }

-  static address verify_mxcsr_entry()
-  {
-    return _verify_mxcsr_entry;
-  }
-
   static address f2i_fixup()
   {
     return _f2i_fixup;
@@ -119,7 +111,7 @@
     return _mxcsr_std;
   }

-  static address key_shuffle_mask_addr()                     { return _key_shuffle_mask_addr; }
+# include "stubRoutines_x86.hpp"

 };
--- a/src/cpu/x86/vm/templateInterpreter_x86_32.cpp	Tue Jul 02 07:51:31 2013 +0200
+++ b/src/cpu/x86/vm/templateInterpreter_x86_32.cpp	Tue Jul 02 20:42:12 2013 -0400
@@ -868,6 +868,120 @@
   return generate_accessor_entry();
 }

+/**
+ * Method entry for static native methods:
+ *   int java.util.zip.CRC32.update(int crc, int b)
+ */
+address InterpreterGenerator::generate_CRC32_update_entry() {
+  if (UseCRC32Intrinsics) {
+    address entry = __ pc();
+
+    // rbx,: Method*
+    // rsi: senderSP must preserved for slow path, set SP to it on fast path
+    // rdx: scratch
+    // rdi: scratch
+
+    Label slow_path;
+    // If we need a safepoint check, generate full interpreter entry.
+    ExternalAddress state(SafepointSynchronize::address_of_state());
+    __ cmp32(ExternalAddress(SafepointSynchronize::address_of_state()),
+             SafepointSynchronize::_not_synchronized);
+    __ jcc(Assembler::notEqual, slow_path);
+
+    // We don't generate local frame and don't align stack because
+    // we call stub code and there is no safepoint on this path.
+
+    // Load parameters
+    const Register crc = rax;  // crc
+    const Register val = rdx;  // source java byte value
+    const Register tbl = rdi;  // scratch
+
+    // Arguments are reversed on java expression stack
+    __ movl(val, Address(rsp,   wordSize)); // byte value
+    __ movl(crc, Address(rsp, 2*wordSize)); // Initial CRC
+
+    __ lea(tbl, ExternalAddress(StubRoutines::crc_table_addr()));
+    __ notl(crc); // ~crc
+    __ update_byte_crc32(crc, val, tbl);
+    __ notl(crc); // ~crc
+    // result in rax
+
+    // _areturn
+    __ pop(rdi);                // get return address
+    __ mov(rsp, rsi);           // set sp to sender sp
+    __ jmp(rdi);
+
+    // generate a vanilla native entry as the slow path
+    __ bind(slow_path);
+
+    (void) generate_native_entry(false);
+
+    return entry;
+  }
+  return generate_native_entry(false);
+}
+
+/**
+ * Method entry for static native methods:
+ *   int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len)
+ *   int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len)
+ */
+address InterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
+  if (UseCRC32Intrinsics) {
+    address entry = __ pc();
+
+    // rbx,: Method*
+    // rsi: senderSP must preserved for slow path, set SP to it on fast path
+    // rdx: scratch
+    // rdi: scratch
+
+    Label slow_path;
+    // If we need a safepoint check, generate full interpreter entry.
+    ExternalAddress state(SafepointSynchronize::address_of_state());
+    __ cmp32(ExternalAddress(SafepointSynchronize::address_of_state()),
+             SafepointSynchronize::_not_synchronized);
+    __ jcc(Assembler::notEqual, slow_path);
+
+    // We don't generate local frame and don't align stack because
+    // we call stub code and there is no safepoint on this path.
+
+    // Load parameters
+    const Register crc = rax;  // crc
+    const Register buf = rdx;  // source java byte array address
+    const Register len = rdi;  // length
+
+    // Arguments are reversed on java expression stack
+    __ movl(len,   Address(rsp,   wordSize)); // Length
+    // Calculate address of start element
+    if (kind == Interpreter::java_util_zip_CRC32_updateByteBuffer) {
+      __ movptr(buf, Address(rsp, 3*wordSize)); // long buf
+      __ addptr(buf, Address(rsp, 2*wordSize)); // + offset
+      __ movl(crc,   Address(rsp, 5*wordSize)); // Initial CRC
+    } else {
+      __ movptr(buf, Address(rsp, 3*wordSize)); // byte[] array
+      __ addptr(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size
+      __ addptr(buf, Address(rsp, 2*wordSize)); // + offset
+      __ movl(crc,   Address(rsp, 4*wordSize)); // Initial CRC
+    }
+
+    __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, StubRoutines::updateBytesCRC32()), crc, buf, len);
+    // result in rax
+
+    // _areturn
+    __ pop(rdi);                // get return address
+    __ mov(rsp, rsi);           // set sp to sender sp
+    __ jmp(rdi);
+
+    // generate a vanilla native entry as the slow path
+    __ bind(slow_path);
+
+    (void) generate_native_entry(false);
+
+    return entry;
+  }
+  return generate_native_entry(false);
+}
+
 //
 // Interpreter stub for calling a native method. (asm interpreter)
 // This sets up a somewhat different looking stack for calling the native method
@@ -1501,15 +1615,16 @@
   // determine code generation flags
   bool synchronized = false;
   address entry_point = NULL;
+  InterpreterGenerator* ig_this = (InterpreterGenerator*)this;

   switch (kind) {
-    case Interpreter::zerolocals             :                                                                             break;
-    case Interpreter::zerolocals_synchronized: synchronized = true;                                                        break;
-    case Interpreter::native                 : entry_point = ((InterpreterGenerator*)this)->generate_native_entry(false);  break;
-    case Interpreter::native_synchronized    : entry_point = ((InterpreterGenerator*)this)->generate_native_entry(true);   break;
-    case Interpreter::empty                  : entry_point = ((InterpreterGenerator*)this)->generate_empty_entry();        break;
-    case Interpreter::accessor               : entry_point = ((InterpreterGenerator*)this)->generate_accessor_entry();     break;
-    case Interpreter::abstract               : entry_point = ((InterpreterGenerator*)this)->generate_abstract_entry();     break;
+    case Interpreter::zerolocals             :                                                       break;
+    case Interpreter::zerolocals_synchronized: synchronized = true;                                  break;
+    case Interpreter::native                 : entry_point = ig_this->generate_native_entry(false);  break;
+    case Interpreter::native_synchronized    : entry_point = ig_this->generate_native_entry(true);   break;
+    case Interpreter::empty                  : entry_point = ig_this->generate_empty_entry();        break;
+    case Interpreter::accessor               : entry_point = ig_this->generate_accessor_entry();     break;
+    case Interpreter::abstract               : entry_point = ig_this->generate_abstract_entry();     break;

     case Interpreter::java_lang_math_sin     : // fall thru
     case Interpreter::java_lang_math_cos     : // fall thru
@@ -1519,9 +1634,15 @@
     case Interpreter::java_lang_math_log10   : // fall thru
     case Interpreter::java_lang_math_sqrt    : // fall thru
     case Interpreter::java_lang_math_pow     : // fall thru
-    case Interpreter::java_lang_math_exp     : entry_point = ((InterpreterGenerator*)this)->generate_math_entry(kind);     break;
+    case Interpreter::java_lang_math_exp     : entry_point = ig_this->generate_math_entry(kind);      break;
     case Interpreter::java_lang_ref_reference_get
-                                             : entry_point = ((InterpreterGenerator*)this)->generate_Reference_get_entry(); break;
+                                             : entry_point = ig_this->generate_Reference_get_entry(); break;
+    case Interpreter::java_util_zip_CRC32_update
+                                             : entry_point = ig_this->generate_CRC32_update_entry();  break;
+    case Interpreter::java_util_zip_CRC32_updateBytes
+                                             : // fall thru
+    case Interpreter::java_util_zip_CRC32_updateByteBuffer
+                                             : entry_point = ig_this->generate_CRC32_updateBytes_entry(kind); break;
     default:
       fatal(err_msg("unexpected method kind: %d", kind));
       break;
@@ -1529,7 +1650,7 @@

   if (entry_point) return entry_point;

-  return ((InterpreterGenerator*)this)->generate_normal_entry(synchronized);
+  return ig_this->generate_normal_entry(synchronized);

 }
--- a/src/cpu/x86/vm/templateInterpreter_x86_64.cpp	Tue Jul 02 07:51:31 2013 +0200
+++ b/src/cpu/x86/vm/templateInterpreter_x86_64.cpp	Tue Jul 02 20:42:12 2013 -0400
@@ -840,6 +840,117 @@
   return generate_accessor_entry();
 }

+/**
+ * Method entry for static native methods:
+ *   int java.util.zip.CRC32.update(int crc, int b)
+ */
+address InterpreterGenerator::generate_CRC32_update_entry() {
+  if (UseCRC32Intrinsics) {
+    address entry = __ pc();
+
+    // rbx,: Method*
+    // rsi: senderSP must preserved for slow path, set SP to it on fast path
+    // rdx: scratch
+    // rdi: scratch
+
+    Label slow_path;
+    // If we need a safepoint check, generate full interpreter entry.
+    ExternalAddress state(SafepointSynchronize::address_of_state());
+    __ cmp32(ExternalAddress(SafepointSynchronize::address_of_state()),
+             SafepointSynchronize::_not_synchronized);
+    __ jcc(Assembler::notEqual, slow_path);
+
+    // We don't generate local frame and don't align stack because
+    // we call stub code and there is no safepoint on this path.
+
+    // Load parameters
+    const Register crc = rax;  // crc
+    const Register val = rdx;  // source java byte value
+    const Register tbl = rdi;  // scratch
+
+    // Arguments are reversed on java expression stack
+    __ movl(val, Address(rsp,   wordSize)); // byte value
+    __ movl(crc, Address(rsp, 2*wordSize)); // Initial CRC
+
+    __ lea(tbl, ExternalAddress(StubRoutines::crc_table_addr()));
+    __ notl(crc); // ~crc
+    __ update_byte_crc32(crc, val, tbl);
+    __ notl(crc); // ~crc
+    // result in rax
+
+    // _areturn
+    __ pop(rdi);                // get return address
+    __ mov(rsp, rsi);           // set sp to sender sp
+    __ jmp(rdi);
+
+    // generate a vanilla native entry as the slow path
+    __ bind(slow_path);
+
+    (void) generate_native_entry(false);
+
+    return entry;
+  }
+  return generate_native_entry(false);
+}
+
+/**
+ * Method entry for static native methods:
+ *   int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len)
+ *   int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len)
+ */
+address InterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
+  if (UseCRC32Intrinsics) {
+    address entry = __ pc();
+
+    // rbx,: Method*
+    // r13: senderSP must preserved for slow path, set SP to it on fast path
+
+    Label slow_path;
+    // If we need a safepoint check, generate full interpreter entry.
+    ExternalAddress state(SafepointSynchronize::address_of_state());
+    __ cmp32(ExternalAddress(SafepointSynchronize::address_of_state()),
+             SafepointSynchronize::_not_synchronized);
+    __ jcc(Assembler::notEqual, slow_path);
+
+    // We don't generate local frame and don't align stack because
+    // we call stub code and there is no safepoint on this path.
+
+    // Load parameters
+    const Register crc = c_rarg0;  // crc
+    const Register buf = c_rarg1;  // source java byte array address
+    const Register len = c_rarg2;  // length
+
+    // Arguments are reversed on java expression stack
+    __ movl(len,   Address(rsp,   wordSize)); // Length
+    // Calculate address of start element
+    if (kind == Interpreter::java_util_zip_CRC32_updateByteBuffer) {
+      __ movptr(buf, Address(rsp, 3*wordSize)); // long buf
+      __ addptr(buf, Address(rsp, 2*wordSize)); // + offset
+      __ movl(crc,   Address(rsp, 5*wordSize)); // Initial CRC
+    } else {
+      __ movptr(buf, Address(rsp, 3*wordSize)); // byte[] array
+      __ addptr(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size
+      __ addptr(buf, Address(rsp, 2*wordSize)); // + offset
+      __ movl(crc,   Address(rsp, 4*wordSize)); // Initial CRC
+    }
+
+    __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, StubRoutines::updateBytesCRC32()), crc, buf, len);
+    // result in rax
+
+    // _areturn
+    __ pop(rdi);                // get return address
+    __ mov(rsp, r13);           // set sp to sender sp
+    __ jmp(rdi);
+
+    // generate a vanilla native entry as the slow path
+    __ bind(slow_path);
+
+    (void) generate_native_entry(false);
+
+    return entry;
+  }
+  return generate_native_entry(false);
+}

 // Interpreter stub for calling a native method. (asm interpreter)
 // This sets up a somewhat different looking stack for calling the
@@ -1510,15 +1621,16 @@
   // determine code generation flags
   bool synchronized = false;
   address entry_point = NULL;
+  InterpreterGenerator* ig_this = (InterpreterGenerator*)this;

   switch (kind) {
-  case Interpreter::zerolocals             :                                                                             break;
-  case Interpreter::zerolocals_synchronized: synchronized = true;                                                        break;
-  case Interpreter::native                 : entry_point = ((InterpreterGenerator*)this)->generate_native_entry(false); break;
-  case Interpreter::native_synchronized    : entry_point = ((InterpreterGenerator*)this)->generate_native_entry(true);  break;
-  case Interpreter::empty                  : entry_point = ((InterpreterGenerator*)this)->generate_empty_entry();       break;
-  case Interpreter::accessor               : entry_point = ((InterpreterGenerator*)this)->generate_accessor_entry();    break;
-  case Interpreter::abstract               : entry_point = ((InterpreterGenerator*)this)->generate_abstract_entry();    break;
+  case Interpreter::zerolocals             :                                                      break;
+  case Interpreter::zerolocals_synchronized: synchronized = true;                                 break;
+  case Interpreter::native                 : entry_point = ig_this->generate_native_entry(false); break;
+  case Interpreter::native_synchronized    : entry_point = ig_this->generate_native_entry(true);  break;
+  case Interpreter::empty                  : entry_point = ig_this->generate_empty_entry();       break;
+  case Interpreter::accessor               : entry_point = ig_this->generate_accessor_entry();    break;
+  case Interpreter::abstract               : entry_point = ig_this->generate_abstract_entry();    break;

   case Interpreter::java_lang_math_sin     : // fall thru
   case Interpreter::java_lang_math_cos     : // fall thru
@@ -1528,9 +1640,15 @@
   case Interpreter::java_lang_math_log10   : // fall thru
   case Interpreter::java_lang_math_sqrt    : // fall thru
   case Interpreter::java_lang_math_pow     : // fall thru
-  case Interpreter::java_lang_math_exp     : entry_point = ((InterpreterGenerator*)this)->generate_math_entry(kind);    break;
+  case Interpreter::java_lang_math_exp     : entry_point = ig_this->generate_math_entry(kind);      break;
   case Interpreter::java_lang_ref_reference_get
-                                           : entry_point = ((InterpreterGenerator*)this)->generate_Reference_get_entry(); break;
+                                           : entry_point = ig_this->generate_Reference_get_entry(); break;
+  case Interpreter::java_util_zip_CRC32_update
+                                           : entry_point = ig_this->generate_CRC32_update_entry();  break;
+  case Interpreter::java_util_zip_CRC32_updateBytes
+                                           : // fall thru
+  case Interpreter::java_util_zip_CRC32_updateByteBuffer
+                                           : entry_point = ig_this->generate_CRC32_updateBytes_entry(kind); break;
   default:
     fatal(err_msg("unexpected method kind: %d", kind));
     break;
@@ -1540,8 +1658,7 @@
     return entry_point;
   }

-  return ((InterpreterGenerator*) this)->
-                                generate_normal_entry(synchronized);
+  return ig_this->generate_normal_entry(synchronized);
 }

 // These should never be compiled since the interpreter will prefer
--- a/src/cpu/x86/vm/vm_version_x86.cpp	Tue Jul 02 07:51:31 2013 +0200
+++ b/src/cpu/x86/vm/vm_version_x86.cpp	Tue Jul 02 20:42:12 2013 -0400
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -446,6 +446,7 @@
                (supports_avx()    ? ", avx" : ""),
                (supports_avx2()   ? ", avx2" : ""),
                (supports_aes()    ? ", aes" : ""),
+               (supports_clmul()    ? ", clmul" : ""),
                (supports_erms()   ? ", erms" : ""),
                (supports_mmx_ext() ? ", mmxext" : ""),
                (supports_3dnow_prefetch() ? ", 3dnowpref" : ""),
@@ -489,6 +490,27 @@
     FLAG_SET_DEFAULT(UseAES, false);
   }

+  // Use CLMUL instructions if available.
+  if (supports_clmul()) {
+    if (FLAG_IS_DEFAULT(UseCLMUL)) {
+      UseCLMUL = true;
+    }
+  } else if (UseCLMUL) {
+    if (!FLAG_IS_DEFAULT(UseCLMUL))
+      warning("CLMUL instructions not available on this CPU (AVX may also be required)");
+    FLAG_SET_DEFAULT(UseCLMUL, false);
+  }
+
+  if (UseCLMUL && (UseAVX > 0) && (UseSSE > 2)) {
+    if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
+      UseCRC32Intrinsics = true;
+    }
+  } else if (UseCRC32Intrinsics) {
+    if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics))
+      warning("CRC32 Intrinsics requires AVX and CLMUL instructions (not available on this CPU)");
+    FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
+  }
+
   // The AES intrinsic stubs require AES instruction support (of course)
   // but also require sse3 mode for instructions it use.
   if (UseAES && (UseSSE > 2)) {
--- a/src/cpu/x86/vm/vm_version_x86.hpp	Tue Jul 02 07:51:31 2013 +0200
+++ b/src/cpu/x86/vm/vm_version_x86.hpp	Tue Jul 02 20:42:12 2013 -0400
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -61,7 +61,8 @@
     uint32_t value;
     struct {
       uint32_t sse3     : 1,
-                        : 2,
+               clmul    : 1,
+                        : 1,
                monitor  : 1,
                         : 1,
                vmx      : 1,
@@ -249,7 +250,8 @@
     CPU_AVX    = (1 << 17),
     CPU_AVX2   = (1 << 18),
     CPU_AES    = (1 << 19),
-    CPU_ERMS   = (1 << 20) // enhanced 'rep movsb/stosb' instructions
+    CPU_ERMS   = (1 << 20), // enhanced 'rep movsb/stosb' instructions
+    CPU_CLMUL  = (1 << 21) // carryless multiply for CRC
   } cpuFeatureFlags;

   enum {
@@ -429,6 +431,8 @@
       result |= CPU_AES;
     if (_cpuid_info.sef_cpuid7_ebx.bits.erms != 0)
       result |= CPU_ERMS;
+    if (_cpuid_info.std_cpuid1_ecx.bits.clmul != 0)
+      result |= CPU_CLMUL;

     // AMD features.
     if (is_amd()) {
@@ -555,6 +559,7 @@
   static bool supports_tsc()      { return (_cpuFeatures & CPU_TSC)    != 0; }
   static bool supports_aes()      { return (_cpuFeatures & CPU_AES) != 0; }
   static bool supports_erms()     { return (_cpuFeatures & CPU_ERMS) != 0; }
+  static bool supports_clmul()    { return (_cpuFeatures & CPU_CLMUL) != 0; }

   // Intel features
   static bool is_intel_family_core() { return is_intel() &&
--- a/src/share/vm/c1/c1_GraphBuilder.cpp	Tue Jul 02 07:51:31 2013 +0200
+++ b/src/share/vm/c1/c1_GraphBuilder.cpp	Tue Jul 02 20:42:12 2013 -0400
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -3461,6 +3461,14 @@
       preserves_state = true;
       break;

+    case vmIntrinsics::_updateCRC32:
+    case vmIntrinsics::_updateBytesCRC32:
+    case vmIntrinsics::_updateByteBufferCRC32:
+      if (!UseCRC32Intrinsics) return false;
+      cantrap = false;
+      preserves_state = true;
+      break;
+
     case vmIntrinsics::_loadFence :
     case vmIntrinsics::_storeFence:
     case vmIntrinsics::_fullFence :
--- a/src/share/vm/c1/c1_LIR.cpp	Tue Jul 02 07:51:31 2013 +0200
+++ b/src/share/vm/c1/c1_LIR.cpp	Tue Jul 02 20:42:12 2013 -0400
@@ -430,6 +430,11 @@
   _stub = new ArrayCopyStub(this);
 }

+LIR_OpUpdateCRC32::LIR_OpUpdateCRC32(LIR_Opr crc, LIR_Opr val, LIR_Opr res)
+  : LIR_Op(lir_updatecrc32, res, NULL)
+  , _crc(crc)
+  , _val(val) {
+}

 //-------------------verify--------------------------

@@ -876,6 +881,20 @@
     }


+// LIR_OpUpdateCRC32
+    case lir_updatecrc32: {
+      assert(op->as_OpUpdateCRC32() != NULL, "must be");
+      LIR_OpUpdateCRC32* opUp = (LIR_OpUpdateCRC32*)op;
+
+      assert(opUp->_crc->is_valid(), "used");          do_input(opUp->_crc);     do_temp(opUp->_crc);
+      assert(opUp->_val->is_valid(), "used");          do_input(opUp->_val);     do_temp(opUp->_val);
+      assert(opUp->_result->is_valid(), "used");       do_output(opUp->_result);
+      assert(opUp->_info == NULL, "no info for LIR_OpUpdateCRC32");
+
+      break;
+    }
+
+
 // LIR_OpLock
     case lir_lock:
     case lir_unlock: {
@@ -1056,6 +1075,10 @@
   masm->emit_code_stub(stub());
 }

+void LIR_OpUpdateCRC32::emit_code(LIR_Assembler* masm) {
+  masm->emit_updatecrc32(this);
+}
+
 void LIR_Op0::emit_code(LIR_Assembler* masm) {
   masm->emit_op0(this);
 }
@@ -1763,6 +1786,8 @@
      case lir_dynamic_call:          s = "dynamic";       break;
      // LIR_OpArrayCopy
      case lir_arraycopy:             s = "arraycopy";     break;
+     // LIR_OpUpdateCRC32
+     case lir_updatecrc32:           s = "updatecrc32";   break;
      // LIR_OpLock
      case lir_lock:                  s = "lock";          break;
      case lir_unlock:                s = "unlock";        break;
@@ -1815,6 +1840,13 @@
   tmp()->print(out);     out->print(" ");
 }

+// LIR_OpUpdateCRC32
+void LIR_OpUpdateCRC32::print_instr(outputStream* out) const {
+  crc()->print(out);     out->print(" ");
+  val()->print(out);     out->print(" ");
+  result_opr()->print(out); out->print(" ");
+}
+
 // LIR_OpCompareAndSwap
 void LIR_OpCompareAndSwap::print_instr(outputStream* out) const {
   addr()->print(out);      out->print(" ");
--- a/src/share/vm/c1/c1_LIR.hpp	Tue Jul 02 07:51:31 2013 +0200
+++ b/src/share/vm/c1/c1_LIR.hpp	Tue Jul 02 20:42:12 2013 -0400
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -877,6 +877,7 @@
 class      LIR_OpJavaCall;
 class      LIR_OpRTCall;
 class    LIR_OpArrayCopy;
+class    LIR_OpUpdateCRC32;
 class    LIR_OpLock;
 class    LIR_OpTypeCheck;
 class    LIR_OpCompareAndSwap;
@@ -982,6 +983,9 @@
   , begin_opArrayCopy
       , lir_arraycopy
   , end_opArrayCopy
+  , begin_opUpdateCRC32
+      , lir_updatecrc32
+  , end_opUpdateCRC32
   , begin_opLock
     , lir_lock
     , lir_unlock
@@ -1137,6 +1141,7 @@
   virtual LIR_Op2* as_Op2() { return NULL; }
   virtual LIR_Op3* as_Op3() { return NULL; }
   virtual LIR_OpArrayCopy* as_OpArrayCopy() { return NULL; }
+  virtual LIR_OpUpdateCRC32* as_OpUpdateCRC32() { return NULL; }
   virtual LIR_OpTypeCheck* as_OpTypeCheck() { return NULL; }
   virtual LIR_OpCompareAndSwap* as_OpCompareAndSwap() { return NULL; }
   virtual LIR_OpProfileCall* as_OpProfileCall() { return NULL; }
@@ -1293,6 +1298,25 @@
   void print_instr(outputStream* out) const PRODUCT_RETURN;
 };

+// LIR_OpUpdateCRC32
+class LIR_OpUpdateCRC32: public LIR_Op {
+  friend class LIR_OpVisitState;
+
+private:
+  LIR_Opr   _crc;
+  LIR_Opr   _val;
+
+public:
+
+  LIR_OpUpdateCRC32(LIR_Opr crc, LIR_Opr val, LIR_Opr res);
+
+  LIR_Opr crc() const                            { return _crc; }
+  LIR_Opr val() const                            { return _val; }
+
+  virtual void emit_code(LIR_Assembler* masm);
+  virtual LIR_OpUpdateCRC32* as_OpUpdateCRC32()  { return this; }
+  void print_instr(outputStream* out) const PRODUCT_RETURN;
+};

 // --------------------------------------------------
 // LIR_Op0
@@ -2212,6 +2236,8 @@

   void arraycopy(LIR_Opr src, LIR_Opr src_pos, LIR_Opr dst, LIR_Opr dst_pos, LIR_Opr length, LIR_Opr tmp, ciArrayKlass* expected_type, int flags, CodeEmitInfo* info) { append(new LIR_OpArrayCopy(src, src_pos, dst, dst_pos, length, tmp, expected_type, flags, info)); }

+  void update_crc32(LIR_Opr crc, LIR_Opr val, LIR_Opr res)  { append(new LIR_OpUpdateCRC32(crc, val, res)); }
+
   void fpop_raw()                                { append(new LIR_Op0(lir_fpop_raw)); }

   void instanceof(LIR_Opr result, LIR_Opr object, ciKlass* klass, LIR_Opr tmp1, LIR_Opr tmp2, LIR_Opr tmp3, bool fast_check, CodeEmitInfo* info_for_patch, ciMethod* profiled_method, int profiled_bci);
--- a/src/share/vm/c1/c1_LIRAssembler.hpp	Tue Jul 02 07:51:31 2013 +0200
+++ b/src/share/vm/c1/c1_LIRAssembler.hpp	Tue Jul 02 20:42:12 2013 -0400
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -195,6 +195,7 @@
   void emit_opBranch(LIR_OpBranch* op);
   void emit_opLabel(LIR_OpLabel* op);
   void emit_arraycopy(LIR_OpArrayCopy* op);
+  void emit_updatecrc32(LIR_OpUpdateCRC32* op);
   void emit_opConvert(LIR_OpConvert* op);
   void emit_alloc_obj(LIR_OpAllocObj* op);
   void emit_alloc_array(LIR_OpAllocArray* op);
--- a/src/share/vm/c1/c1_LIRGenerator.cpp	Tue Jul 02 07:51:31 2013 +0200
+++ b/src/share/vm/c1/c1_LIRGenerator.cpp	Tue Jul 02 20:42:12 2013 -0400
@@ -2994,6 +2994,12 @@
     do_Reference_get(x);
     break;

+  case vmIntrinsics::_updateCRC32:
+  case vmIntrinsics::_updateBytesCRC32:
+  case vmIntrinsics::_updateByteBufferCRC32:
+    do_update_CRC32(x);
+    break;
+
   default: ShouldNotReachHere(); break;
   }
 }
--- a/src/share/vm/c1/c1_LIRGenerator.hpp	Tue Jul 02 07:51:31 2013 +0200
+++ b/src/share/vm/c1/c1_LIRGenerator.hpp	Tue Jul 02 20:42:12 2013 -0400
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -247,6 +247,7 @@
   void do_NIOCheckIndex(Intrinsic* x);
   void do_FPIntrinsics(Intrinsic* x);
   void do_Reference_get(Intrinsic* x);
+  void do_update_CRC32(Intrinsic* x);

   void do_UnsafePrefetch(UnsafePrefetch* x, bool is_store);
--- a/src/share/vm/c1/c1_Runtime1.cpp	Tue Jul 02 07:51:31 2013 +0200
+++ b/src/share/vm/c1/c1_Runtime1.cpp	Tue Jul 02 20:42:12 2013 -0400
@@ -299,6 +299,7 @@
 #ifdef TRACE_HAVE_INTRINSICS
   FUNCTION_CASE(entry, TRACE_TIME_METHOD);
 #endif
+  FUNCTION_CASE(entry, StubRoutines::updateBytesCRC32());

 #undef FUNCTION_CASE
--- a/src/share/vm/classfile/vmSymbols.hpp	Tue Jul 02 07:51:31 2013 +0200
+++ b/src/share/vm/classfile/vmSymbols.hpp	Tue Jul 02 20:42:12 2013 -0400
@@ -771,6 +771,17 @@
    do_name(     decrypt_name,                                      "decrypt")                                           \
    do_signature(byteArray_int_int_byteArray_int_signature,         "([BII[BI)V")                                        \
                                                                                                                         \
+  /* support for java.util.zip */                                                                                       \
+  do_class(java_util_zip_CRC32,           "java/util/zip/CRC32")                                                        \
+  do_intrinsic(_updateCRC32,               java_util_zip_CRC32,   update_name, int2_int_signature,               F_SN)  \
+   do_name(     update_name,                                      "update")                                             \
+  do_intrinsic(_updateBytesCRC32,          java_util_zip_CRC32,   updateBytes_name, updateBytes_signature,       F_SN)  \
+   do_name(     updateBytes_name,                                "updateBytes")                                         \
+   do_signature(updateBytes_signature,                           "(I[BII)I")                                            \
+  do_intrinsic(_updateByteBufferCRC32,     java_util_zip_CRC32,   updateByteBuffer_name, updateByteBuffer_signature, F_SN) \
+   do_name(     updateByteBuffer_name,                           "updateByteBuffer")                                    \
+   do_signature(updateByteBuffer_signature,                      "(IJII)I")                                             \
+                                                                                                                        \
   /* support for sun.misc.Unsafe */                                                                                     \
   do_class(sun_misc_Unsafe,               "sun/misc/Unsafe")                                                            \
                                                                                                                         \
--- a/src/share/vm/interpreter/abstractInterpreter.hpp	Tue Jul 02 07:51:31 2013 +0200
+++ b/src/share/vm/interpreter/abstractInterpreter.hpp	Tue Jul 02 20:42:12 2013 -0400
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -102,6 +102,9 @@
     java_lang_math_pow,                                         // implementation of java.lang.Math.pow   (x,y)
     java_lang_math_exp,                                         // implementation of java.lang.Math.exp   (x)
     java_lang_ref_reference_get,                                // implementation of java.lang.ref.Reference.get()
+    java_util_zip_CRC32_update,                                 // implementation of java.util.zip.CRC32.update()
+    java_util_zip_CRC32_updateBytes,                            // implementation of java.util.zip.CRC32.updateBytes()
+    java_util_zip_CRC32_updateByteBuffer,                       // implementation of java.util.zip.CRC32.updateByteBuffer()
     number_of_method_entries,
     invalid = -1
   };
--- a/src/share/vm/interpreter/interpreter.cpp	Tue Jul 02 07:51:31 2013 +0200
+++ b/src/share/vm/interpreter/interpreter.cpp	Tue Jul 02 20:42:12 2013 -0400
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -195,6 +195,17 @@
     return kind;
   }

+#ifndef CC_INTERP
+  if (UseCRC32Intrinsics && m->is_native()) {
+    // Use optimized stub code for CRC32 native methods.
+    switch (m->intrinsic_id()) {
+      case vmIntrinsics::_updateCRC32            : return java_util_zip_CRC32_update;
+      case vmIntrinsics::_updateBytesCRC32       : return java_util_zip_CRC32_updateBytes;
+      case vmIntrinsics::_updateByteBufferCRC32  : return java_util_zip_CRC32_updateByteBuffer;
+    }
+  }
+#endif
+
   // Native method?
   // Note: This test must come _before_ the test for intrinsic
   //       methods. See also comments below.
@@ -297,6 +308,9 @@
     case java_lang_math_sqrt    : tty->print("java_lang_math_sqrt"    ); break;
     case java_lang_math_log     : tty->print("java_lang_math_log"     ); break;
     case java_lang_math_log10   : tty->print("java_lang_math_log10"   ); break;
+    case java_util_zip_CRC32_update           : tty->print("java_util_zip_CRC32_update"); break;
+    case java_util_zip_CRC32_updateBytes      : tty->print("java_util_zip_CRC32_updateBytes"); break;
+    case java_util_zip_CRC32_updateByteBuffer : tty->print("java_util_zip_CRC32_updateByteBuffer"); break;
     default:
       if (kind >= method_handle_invoke_FIRST &&
           kind <= method_handle_invoke_LAST) {
--- a/src/share/vm/interpreter/templateInterpreter.cpp	Tue Jul 02 07:51:31 2013 +0200
+++ b/src/share/vm/interpreter/templateInterpreter.cpp	Tue Jul 02 20:42:12 2013 -0400
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -373,6 +373,12 @@
   method_entry(java_lang_math_pow  )
   method_entry(java_lang_ref_reference_get)

+  if (UseCRC32Intrinsics) {
+    method_entry(java_util_zip_CRC32_update)
+    method_entry(java_util_zip_CRC32_updateBytes)
+    method_entry(java_util_zip_CRC32_updateByteBuffer)
+  }
+
   initialize_method_handle_entries();

   // all native method kinds (must be one contiguous block)
--- a/src/share/vm/opto/escape.cpp	Tue Jul 02 07:51:31 2013 +0200
+++ b/src/share/vm/opto/escape.cpp	Tue Jul 02 20:42:12 2013 -0400
@@ -933,6 +933,7 @@
                 (call->as_CallLeaf()->_name != NULL &&
                  (strcmp(call->as_CallLeaf()->_name, "g1_wb_pre")  == 0 ||
                   strcmp(call->as_CallLeaf()->_name, "g1_wb_post") == 0 ||
+                  strcmp(call->as_CallLeaf()->_name, "updateBytesCRC32") == 0 ||
                   strcmp(call->as_CallLeaf()->_name, "aescrypt_encryptBlock") == 0 ||
                   strcmp(call->as_CallLeaf()->_name, "aescrypt_decryptBlock") == 0 ||
                   strcmp(call->as_CallLeaf()->_name, "cipherBlockChaining_encryptAESCrypt") == 0 ||
--- a/src/share/vm/opto/library_call.cpp	Tue Jul 02 07:51:31 2013 +0200
+++ b/src/share/vm/opto/library_call.cpp	Tue Jul 02 20:42:12 2013 -0400
@@ -291,6 +291,9 @@
   Node* inline_cipherBlockChaining_AESCrypt_predicate(bool decrypting);
   Node* get_key_start_from_aescrypt_object(Node* aescrypt_object);
   bool inline_encodeISOArray();
+  bool inline_updateCRC32();
+  bool inline_updateBytesCRC32();
+  bool inline_updateByteBufferCRC32();
 };


@@ -488,6 +491,12 @@
     is_predicted = true;
     break;

+  case vmIntrinsics::_updateCRC32:
+  case vmIntrinsics::_updateBytesCRC32:
+  case vmIntrinsics::_updateByteBufferCRC32:
+    if (!UseCRC32Intrinsics) return NULL;
+    break;
+
  default:
     assert(id <= vmIntrinsics::LAST_COMPILER_INLINE, "caller responsibility");
     assert(id != vmIntrinsics::_Object_init && id != vmIntrinsics::_invoke, "enum out of order?");
@@ -807,6 +816,13 @@
   case vmIntrinsics::_encodeISOArray:
     return inline_encodeISOArray();

+  case vmIntrinsics::_updateCRC32:
+    return inline_updateCRC32();
+  case vmIntrinsics::_updateBytesCRC32:
+    return inline_updateBytesCRC32();
+  case vmIntrinsics::_updateByteBufferCRC32:
+    return inline_updateByteBufferCRC32();
+
   default:
     // If you get here, it may be that someone has added a new intrinsic
     // to the list in vmSymbols.hpp without implementing it here.
@@ -884,7 +900,7 @@

   IfNode* iff = create_and_map_if(control(), test, true_prob, COUNT_UNKNOWN);

-  Node* if_slow = _gvn.transform( new (C) IfTrueNode(iff) );
+  Node* if_slow = _gvn.transform(new (C) IfTrueNode(iff));
   if (if_slow == top()) {
     // The slow branch is never taken.  No need to build this guard.
     return NULL;
@@ -893,7 +909,7 @@
   if (region != NULL)
     region->add_req(if_slow);

-  Node* if_fast = _gvn.transform( new (C) IfFalseNode(iff) );
+  Node* if_fast = _gvn.transform(new (C) IfFalseNode(iff));
   set_control(if_fast);

   return if_slow;
@@ -912,8 +928,8 @@
     return NULL;                // already stopped
   if (_gvn.type(index)->higher_equal(TypeInt::POS)) // [0,maxint]
     return NULL;                // index is already adequately typed
-  Node* cmp_lt = _gvn.transform( new (C) CmpINode(index, intcon(0)) );
-  Node* bol_lt = _gvn.transform( new (C) BoolNode(cmp_lt, BoolTest::lt) );
+  Node* cmp_lt = _gvn.transform(new (C) CmpINode(index, intcon(0)));
+  Node* bol_lt = _gvn.transform(new (C) BoolNode(cmp_lt, BoolTest::lt));
   Node* is_neg = generate_guard(bol_lt, region, PROB_MIN);
   if (is_neg != NULL && pos_index != NULL) {
     // Emulate effect of Parse::adjust_map_after_if.
@@ -930,9 +946,9 @@
     return NULL;                // already stopped
   if (_gvn.type(index)->higher_equal(TypeInt::POS1)) // [1,maxint]
     return NULL;                // index is already adequately typed
-  Node* cmp_le = _gvn.transform( new (C) CmpINode(index, intcon(0)) );
+  Node* cmp_le = _gvn.transform(new (C) CmpINode(index, intcon(0)));
   BoolTest::mask le_or_eq = (never_negative ? BoolTest::eq : BoolTest::le);
-  Node* bol_le = _gvn.transform( new (C) BoolNode(cmp_le, le_or_eq) );
+  Node* bol_le = _gvn.transform(new (C) BoolNode(cmp_le, le_or_eq));
   Node* is_notp = generate_guard(bol_le, NULL, PROB_MIN);
   if (is_notp != NULL && pos_index != NULL) {
     // Emulate effect of Parse::adjust_map_after_if.
@@ -968,9 +984,9 @@
     return NULL;                // common case of whole-array copy
   Node* last = subseq_length;
   if (!zero_offset)             // last += offset
-    last = _gvn.transform( new (C) AddINode(last, offset));
-  Node* cmp_lt = _gvn.transform( new (C) CmpUNode(array_length, last) );
-  Node* bol_lt = _gvn.transform( new (C) BoolNode(cmp_lt, BoolTest::lt) );
+    last = _gvn.transform(new (C) AddINode(last, offset));
+  Node* cmp_lt = _gvn.transform(new (C) CmpUNode(array_length, last));
+  Node* bol_lt = _gvn.transform(new (C) BoolNode(cmp_lt, BoolTest::lt));
   Node* is_over = generate_guard(bol_lt, region, PROB_MIN);
   return is_over;
 }
@@ -1151,8 +1167,8 @@
     Node* argument_cnt  = load_String_length(no_ctrl, argument);

     // Check for receiver count != argument count
-    Node* cmp = _gvn.transform( new(C) CmpINode(receiver_cnt, argument_cnt) );
-    Node* bol = _gvn.transform( new(C) BoolNode(cmp, BoolTest::ne) );
+    Node* cmp = _gvn.transform(new(C) CmpINode(receiver_cnt, argument_cnt));
+    Node* bol = _gvn.transform(new(C) BoolNode(cmp, BoolTest::ne));
     Node* if_ne = generate_slow_guard(bol, NULL);
     if (if_ne != NULL) {
       phi->init_req(4, intcon(0));
@@ -1258,7 +1274,7 @@
   Node* sourceOffset  = load_String_offset(no_ctrl, string_object);
   Node* sourceCount   = load_String_length(no_ctrl, string_object);

-  Node* target = _gvn.transform( makecon(TypeOopPtr::make_from_constant(target_array, true)) );
+  Node* target = _gvn.transform( makecon(TypeOopPtr::make_from_constant(target_array, true)));
   jint target_length = target_array->length();
   const TypeAry* target_array_type = TypeAry::make(TypeInt::CHAR, TypeInt::make(0, target_length, Type::WidenMin));
   const TypeAryPtr* target_type = TypeAryPtr::make(TypePtr::BotPTR, target_array_type, target_array->klass(), true, Type::OffsetBot);
@@ -1365,8 +1381,8 @@
     Node* substr_cnt  = load_String_length(no_ctrl, arg);

     // Check for substr count > string count
-    Node* cmp = _gvn.transform( new(C) CmpINode(substr_cnt, source_cnt) );
-    Node* bol = _gvn.transform( new(C) BoolNode(cmp, BoolTest::gt) );
+    Node* cmp = _gvn.transform(new(C) CmpINode(substr_cnt, source_cnt));
+    Node* bol = _gvn.transform(new(C) BoolNode(cmp, BoolTest::gt));
     Node* if_gt = generate_slow_guard(bol, NULL);
     if (if_gt != NULL) {
       result_phi->init_req(2, intcon(-1));
@@ -1375,8 +1391,8 @@

     if (!stopped()) {
       // Check for substr count == 0
-      cmp = _gvn.transform( new(C) CmpINode(substr_cnt, intcon(0)) );
-      bol = _gvn.transform( new(C) BoolNode(cmp, BoolTest::eq) );
+      cmp = _gvn.transform(new(C) CmpINode(substr_cnt, intcon(0)));
+      bol = _gvn.transform(new(C) BoolNode(cmp, BoolTest::eq));
       Node* if_zero = generate_slow_guard(bol, NULL);
       if (if_zero != NULL) {
         result_phi->init_req(3, intcon(0));
@@ -1552,7 +1568,7 @@
     // Check PI/4 : abs(arg)
     Node *cmp = _gvn.transform(new (C) CmpDNode(pi4,abs));
     // Check: If PI/4 < abs(arg) then go slow
-    Node *bol = _gvn.transform( new (C) BoolNode( cmp, BoolTest::lt ) );
+    Node *bol = _gvn.transform(new (C) BoolNode( cmp, BoolTest::lt ));
     // Branch either way
     IfNode *iff = create_and_xform_if(control(),bol, PROB_STATIC_FREQUENT, COUNT_UNKNOWN);
     set_control(opt_iff(r,iff));
@@ -1617,8 +1633,8 @@
     // to the runtime to properly handle corner cases

     IfNode* iff = create_and_xform_if(control(), bolisnum, PROB_STATIC_FREQUENT, COUNT_UNKNOWN);
-    Node* if_slow = _gvn.transform( new (C) IfFalseNode(iff) );
-    Node* if_fast = _gvn.transform( new (C) IfTrueNode(iff) );
+    Node* if_slow = _gvn.transform(new (C) IfFalseNode(iff));
+    Node* if_fast = _gvn.transform(new (C) IfTrueNode(iff));

     if (!if_slow->is_top()) {
       RegionNode* result_region = new (C) RegionNode(3);
@@ -1704,42 +1720,42 @@
     // Check x:0
     Node *cmp = _gvn.transform(new (C) CmpDNode(x, zeronode));
     // Check: If (x<=0) then go complex path
-    Node *bol1 = _gvn.transform( new (C) BoolNode( cmp, BoolTest::le ) );
+    Node *bol1 = _gvn.transform(new (C) BoolNode( cmp, BoolTest::le ));
     // Branch either way
     IfNode *if1 = create_and_xform_if(control(),bol1, PROB_STATIC_INFREQUENT, COUNT_UNKNOWN);
     // Fast path taken; set region slot 3
-    Node *fast_taken = _gvn.transform( new (C) IfFalseNode(if1) );
+    Node *fast_taken = _gvn.transform(new (C) IfFalseNode(if1));
     r->init_req(3,fast_taken); // Capture fast-control

     // Fast path not-taken, i.e. slow path
-    Node *complex_path = _gvn.transform( new (C) IfTrueNode(if1) );
+    Node *complex_path = _gvn.transform(new (C) IfTrueNode(if1));

     // Set fast path result
-    Node *fast_result = _gvn.transform( new (C) PowDNode(C, control(), x, y) );
+    Node *fast_result = _gvn.transform(new (C) PowDNode(C, control(), x, y));
     phi->init_req(3, fast_result);

     // Complex path
     // Build the second if node (if y is long)
     // Node for (long)y
-    Node *longy = _gvn.transform( new (C) ConvD2LNode(y));
+    Node *longy = _gvn.transform(new (C) ConvD2LNode(y));
     // Node for (double)((long) y)
-    Node *doublelongy= _gvn.transform( new (C) ConvL2DNode(longy));
+    Node *doublelongy= _gvn.transform(new (C) ConvL2DNode(longy));
     // Check (double)((long) y) : y
     Node *cmplongy= _gvn.transform(new (C) CmpDNode(doublelongy, y));
     // Check if (y isn't long) then go to slow path

-    Node *bol2 = _gvn.transform( new (C) BoolNode( cmplongy, BoolTest::ne ) );
+    Node *bol2 = _gvn.transform(new (C) BoolNode( cmplongy, BoolTest::ne ));
     // Branch either way
     IfNode *if2 = create_and_xform_if(complex_path,bol2, PROB_STATIC_INFREQUENT, COUNT_UNKNOWN);
-    Node* ylong_path = _gvn.transform( new (C) IfFalseNode(if2));
-
-    Node *slow_path = _gvn.transform( new (C) IfTrueNode(if2) );
+    Node* ylong_path = _gvn.transform(new (C) IfFalseNode(if2));
+
+    Node *slow_path = _gvn.transform(new (C) IfTrueNode(if2));

     // Calculate DPow(abs(x), y)*(1 & (long)y)
     // Node for constant 1
     Node *conone = longcon(1);
     // 1& (long)y
-    Node *signnode= _gvn.transform( new (C) AndLNode(conone, longy) );
+    Node *signnode= _gvn.transform(new (C) AndLNode(conone, longy));

     // A huge number is always even. Detect a huge number by checking
     // if y + 1 == y and set integer to be tested for parity to 0.
@@ -1747,9 +1763,9 @@
     // (long)9.223372036854776E18 = max_jlong
     // (double)(long)9.223372036854776E18 = 9.223372036854776E18
     // max_jlong is odd but 9.223372036854776E18 is even
-    Node* yplus1 = _gvn.transform( new (C) AddDNode(y, makecon(TypeD::make(1))));
+    Node* yplus1 = _gvn.transform(new (C) AddDNode(y, makecon(TypeD::make(1))));
     Node *cmpyplus1= _gvn.transform(new (C) CmpDNode(yplus1, y));
-    Node *bolyplus1 = _gvn.transform( new (C) BoolNode( cmpyplus1, BoolTest::eq ) );
+    Node *bolyplus1 = _gvn.transform(new (C) BoolNode( cmpyplus1, BoolTest::eq ));
     Node* correctedsign = NULL;
     if (ConditionalMoveLimit != 0) {
       correctedsign = _gvn.transform( CMoveNode::make(C, NULL, bolyplus1, signnode, longcon(0), TypeLong::LONG));
@@ -1757,8 +1773,8 @@
       IfNode *ifyplus1 = create_and_xform_if(ylong_path,bolyplus1, PROB_FAIR, COUNT_UNKNOWN);
       RegionNode *r = new (C) RegionNode(3);
       Node *phi = new (C) PhiNode(r, TypeLong::LONG);
-      r->init_req(1, _gvn.transform( new (C) IfFalseNode(ifyplus1)));
-      r->init_req(2, _gvn.transform( new (C) IfTrueNode(ifyplus1)));
+      r->init_req(1, _gvn.transform(new (C) IfFalseNode(ifyplus1)));
+      r->init_req(2, _gvn.transform(new (C) IfTrueNode(ifyplus1)));
       phi->init_req(1, signnode);
       phi->init_req(2, longcon(0));
       correctedsign = _gvn.transform(phi);
@@ -1771,11 +1787,11 @@
     // Check (1&(long)y)==0?
     Node *cmpeq1 = _gvn.transform(new (C) CmpLNode(correctedsign, conzero));
     // Check if (1&(long)y)!=0?, if so the result is negative
-    Node *bol3 = _gvn.transform( new (C) BoolNode( cmpeq1, BoolTest::ne ) );
+    Node *bol3 = _gvn.transform(new (C) BoolNode( cmpeq1, BoolTest::ne ));
     // abs(x)
-    Node *absx=_gvn.transform( new (C) AbsDNode(x));
+    Node *absx=_gvn.transform(new (C) AbsDNode(x));
     // abs(x)^y
-    Node *absxpowy = _gvn.transform( new (C) PowDNode(C, control(), absx, y) );
+    Node *absxpowy = _gvn.transform(new (C) PowDNode(C, control(), absx, y));
     // -abs(x)^y
     Node *negabsxpowy = _gvn.transform(new (C) NegDNode (absxpowy));
     // (1&(long)y)==1?-DPow(abs(x), y):DPow(abs(x), y)
@@ -1786,8 +1802,8 @@
       IfNode *ifyeven = create_and_xform_if(ylong_path,bol3, PROB_FAIR, COUNT_UNKNOWN);
       RegionNode *r = new (C) RegionNode(3);
       Node *phi = new (C) PhiNode(r, Type::DOUBLE);
-      r->init_req(1, _gvn.transform( new (C) IfFalseNode(ifyeven)));
-      r->init_req(2, _gvn.transform( new (C) IfTrueNode(ifyeven)));
+      r->init_req(1, _gvn.transform(new (C) IfFalseNode(ifyeven)));
+      r->init_req(2, _gvn.transform(new (C) IfTrueNode(ifyeven)));
       phi->init_req(1, absxpowy);
       phi->init_req(2, negabsxpowy);
       signresult = _gvn.transform(phi);
@@ -1920,7 +1936,7 @@
   int   cmp_op = Op_CmpI;
   Node* xkey = xvalue;
   Node* ykey = yvalue;
-  Node* ideal_cmpxy = _gvn.transform( new(C) CmpINode(xkey, ykey) );
+  Node* ideal_cmpxy = _gvn.transform(new(C) CmpINode(xkey, ykey));
   if (ideal_cmpxy->is_Cmp()) {
     // E.g., if we have CmpI(length - offset, count),
     // it might idealize to CmpI(length, count + offset)
@@ -2013,7 +2029,7 @@
   default:
     if (cmpxy == NULL)
       cmpxy = ideal_cmpxy;
-    best_bol = _gvn.transform( new(C) BoolNode(cmpxy, BoolTest::lt) );
+    best_bol = _gvn.transform(new(C) BoolNode(cmpxy, BoolTest::lt));
     // and fall through:
   case BoolTest::lt:          // x < y
   case BoolTest::le:          // x <= y
@@ -2073,7 +2089,7 @@
     return Type::AnyPtr;
   } else if (base_type == TypePtr::NULL_PTR) {
     // Since this is a NULL+long form, we have to switch to a rawptr.
-    base   = _gvn.transform( new (C) CastX2PNode(offset) );
+    base   = _gvn.transform(new (C) CastX2PNode(offset));
     offset = MakeConX(0);
     return Type::RawPtr;
   } else if (base_type->base() == Type::RawPtr) {
@@ -2467,7 +2483,7 @@
     case T_ADDRESS:
       // Repackage the long as a pointer.
       val = ConvL2X(val);
-      val = _gvn.transform( new (C) CastX2PNode(val) );
+      val = _gvn.transform(new (C) CastX2PNode(val));
       break;
     }

@@ -2775,7 +2791,7 @@
   // SCMemProjNodes represent the memory state of a LoadStore. Their
   // main role is to prevent LoadStore nodes from being optimized away
   // when their results aren't used.
-  Node* proj = _gvn.transform( new (C) SCMemProjNode(load_store));
+  Node* proj = _gvn.transform(new (C) SCMemProjNode(load_store));
   set_memory(proj, alias_idx);

   // Add the trailing membar surrounding the access
@@ -3010,8 +3026,8 @@
   Node* rec_thr = argument(0);
   Node* tls_ptr = NULL;
   Node* cur_thr = generate_current_thread(tls_ptr);
-  Node* cmp_thr = _gvn.transform( new (C) CmpPNode(cur_thr, rec_thr) );
-  Node* bol_thr = _gvn.transform( new (C) BoolNode(cmp_thr, BoolTest::ne) );
+  Node* cmp_thr = _gvn.transform(new (C) CmpPNode(cur_thr, rec_thr));
+  Node* bol_thr = _gvn.transform(new (C) BoolNode(cmp_thr, BoolTest::ne));

   generate_slow_guard(bol_thr, slow_region);

@@ -3022,36 +3038,36 @@

   // Set the control input on the field _interrupted read to prevent it floating up.
   Node* int_bit = make_load(control(), p, TypeInt::BOOL, T_INT);
-  Node* cmp_bit = _gvn.transform( new (C) CmpINode(int_bit, intcon(0)) );
-  Node* bol_bit = _gvn.transform( new (C) BoolNode(cmp_bit, BoolTest::ne) );
+  Node* cmp_bit = _gvn.transform(new (C) CmpINode(int_bit, intcon(0)));
+  Node* bol_bit = _gvn.transform(new (C) BoolNode(cmp_bit, BoolTest::ne));

   IfNode* iff_bit = create_and_map_if(control(), bol_bit, PROB_UNLIKELY_MAG(3), COUNT_UNKNOWN);

   // First fast path:  if (!TLS._interrupted) return false;
-  Node* false_bit = _gvn.transform( new (C) IfFalseNode(iff_bit) );
+  Node* false_bit = _gvn.transform(new (C) IfFalseNode(iff_bit));
   result_rgn->init_req(no_int_result_path, false_bit);
   result_val->init_req(no_int_result_path, intcon(0));

   // drop through to next case
-  set_control( _gvn.transform(new (C) IfTrueNode(iff_bit)) );
+  set_control( _gvn.transform(new (C) IfTrueNode(iff_bit)));

   // (c) Or, if interrupt bit is set and clear_int is false, use 2nd fast path.
   Node* clr_arg = argument(1);
-  Node* cmp_arg = _gvn.transform( new (C) CmpINode(clr_arg, intcon(0)) );
-  Node* bol_arg = _gvn.transform( new (C) BoolNode(cmp_arg, BoolTest::ne) );
+  Node* cmp_arg = _gvn.transform(new (C) CmpINode(clr_arg, intcon(0)));
+  Node* bol_arg = _gvn.transform(new (C) BoolNode(cmp_arg, BoolTest::ne));
   IfNode* iff_arg = create_and_map_if(control(), bol_arg, PROB_FAIR, COUNT_UNKNOWN);

   // Second fast path:  ... else if (!clear_int) return true;
-  Node* false_arg = _gvn.transform( new (C) IfFalseNode(iff_arg) );
+  Node* false_arg = _gvn.transform(new (C) IfFalseNode(iff_arg));
   result_rgn->init_req(no_clear_result_path, false_arg);
   result_val->init_req(no_clear_result_path, intcon(1));

   // drop through to next case
-  set_control( _gvn.transform(new (C) IfTrueNode(iff_arg)) );
+  set_control( _gvn.transform(new (C) IfTrueNode(iff_arg)));

   // (d) Otherwise, go to the slow path.
   slow_region->add_req(control());
-  set_control( _gvn.transform(slow_region) );
+  set_control( _gvn.transform(slow_region));

   if (stopped()) {
     // There is no slow path.
@@ -3107,7 +3123,7 @@
   if (region == NULL)  never_see_null = true;
   Node* p = basic_plus_adr(mirror, offset);
   const TypeKlassPtr*  kls_type = TypeKlassPtr::OBJECT_OR_NULL;
-  Node* kls = _gvn.transform( LoadKlassNode::make(_gvn, immutable_memory(), p, TypeRawPtr::BOTTOM, kls_type) );
+  Node* kls = _gvn.transform( LoadKlassNode::make(_gvn, immutable_memory(), p, TypeRawPtr::BOTTOM, kls_type));
   Node* null_ctl = top();
   kls = null_check_oop(kls, &null_ctl, never_see_null);
   if (region != NULL) {
@@ -3129,9 +3145,9 @@
   Node* mods = make_load(NULL, modp, TypeInt::INT, T_INT);
   Node* mask = intcon(modifier_mask);
   Node* bits = intcon(modifier_bits);
-  Node* mbit = _gvn.transform( new (C) AndINode(mods, mask) );
-  Node* cmp  = _gvn.transform( new (C) CmpINode(mbit, bits) );
-  Node* bol  = _gvn.transform( new (C) BoolNode(cmp, BoolTest::ne) );
+  Node* mbit = _gvn.transform(new (C) AndINode(mods, mask));
+  Node* cmp  = _gvn.transform(new (C) CmpINode(mbit, bits));
+  Node* bol  = _gvn.transform(new (C) BoolNode(cmp, BoolTest::ne));
   return generate_fair_guard(bol, region);
 }
 Node* LibraryCallKit::generate_interface_guard(Node* kls, RegionNode* region) {
@@ -3282,7 +3298,7 @@
       phi->add_req(makecon(TypeInstPtr::make(env()->Object_klass()->java_mirror())));
     // If we fall through, it's a plain class.  Get its _super.
     p = basic_plus_adr(kls, in_bytes(Klass::super_offset()));
-    kls = _gvn.transform( LoadKlassNode::make(_gvn, immutable_memory(), p, TypeRawPtr::BOTTOM, TypeKlassPtr::OBJECT_OR_NULL) );
+    kls = _gvn.transform( LoadKlassNode::make(_gvn, immutable_memory(), p, TypeRawPtr::BOTTOM, TypeKlassPtr::OBJECT_OR_NULL));
     null_ctl = top();
     kls = null_check_oop(kls, &null_ctl);
     if (null_ctl != top()) {
@@ -3395,8 +3411,8 @@
   set_control(region->in(_prim_0_path)); // go back to first null check
   if (!stopped()) {
     // Since superc is primitive, make a guard for the superc==subc case.
-    Node* cmp_eq = _gvn.transform( new (C) CmpPNode(args[0], args[1]) );
-    Node* bol_eq = _gvn.transform( new (C) BoolNode(cmp_eq, BoolTest::eq) );
+    Node* cmp_eq = _gvn.transform(new (C) CmpPNode(args[0], args[1]));
+    Node* bol_eq = _gvn.transform(new (C) BoolNode(cmp_eq, BoolTest::eq));
     generate_guard(bol_eq, region, PROB_FAIR);
     if (region->req() == PATH_LIMIT+1) {
       // A guard was added.  If the added guard is taken, superc==subc.
@@ -3461,11 +3477,11 @@
                 ? ((jint)Klass::_lh_array_tag_type_value
                    <<    Klass::_lh_array_tag_shift)
                 : Klass::_lh_neutral_value);
-  Node* cmp = _gvn.transform( new(C) CmpINode(layout_val, intcon(nval)) );
+  Node* cmp = _gvn.transform(new(C) CmpINode(layout_val, intcon(nval)));
   BoolTest::mask btest = BoolTest::lt;  // correct for testing is_[obj]array
   // invert the test if we are looking for a non-array
   if (not_array)  btest = BoolTest(btest).negate();
-  Node* bol = _gvn.transform( new(C) BoolNode(cmp, btest) );
+  Node* bol = _gvn.transform(new(C) BoolNode(cmp, btest));
   return generate_fair_guard(bol, region);
 }

@@ -3525,7 +3541,7 @@

   // Return the combined state.
   set_i_o(        _gvn.transform(result_io)  );
-  set_all_memory( _gvn.transform(result_mem) );
+  set_all_memory( _gvn.transform(result_mem));

   C->set_has_split_ifs(true); // Has chance for split-if optimization
   set_result(result_reg, result_val);
@@ -3678,8 +3694,8 @@
   const TypePtr* native_call_addr = TypeMetadataPtr::make(method);

   Node* native_call = makecon(native_call_addr);
-  Node* chk_native  = _gvn.transform( new(C) CmpPNode(target_call, native_call) );
-  Node* test_native = _gvn.transform( new(C) BoolNode(chk_native, BoolTest::ne) );
+  Node* chk_native  = _gvn.transform(new(C) CmpPNode(target_call, native_call));
+  Node* test_native = _gvn.transform(new(C) BoolNode(chk_native, BoolTest::ne));

   return generate_slow_guard(test_native, slow_region);
 }
@@ -3800,10 +3816,10 @@

   // Test the header to see if it is unlocked.
   Node *lock_mask      = _gvn.MakeConX(markOopDesc::biased_lock_mask_in_place);
-  Node *lmasked_header = _gvn.transform( new (C) AndXNode(header, lock_mask) );
+  Node *lmasked_header = _gvn.transform(new (C) AndXNode(header, lock_mask));
   Node *unlocked_val   = _gvn.MakeConX(markOopDesc::unlocked_value);
-  Node *chk_unlocked   = _gvn.transform( new (C) CmpXNode( lmasked_header, unlocked_val));
-  Node *test_unlocked  = _gvn.transform( new (C) BoolNode( chk_unlocked, BoolTest::ne) );
+  Node *chk_unlocked   = _gvn.transform(new (C) CmpXNode( lmasked_header, unlocked_val));
+  Node *test_unlocked  = _gvn.transform(new (C) BoolNode( chk_unlocked, BoolTest::ne));

   generate_slow_guard(test_unlocked, slow_region);

@@ -3813,17 +3829,17 @@
   // vm: see markOop.hpp.
   Node *hash_mask      = _gvn.intcon(markOopDesc::hash_mask);
   Node *hash_shift     = _gvn.intcon(markOopDesc::hash_shift);
-  Node *hshifted_header= _gvn.transform( new (C) URShiftXNode(header, hash_shift) );
+  Node *hshifted_header= _gvn.transform(new (C) URShiftXNode(header, hash_shift));
   // This hack lets the hash bits live anywhere in the mark object now, as long
   // as the shift drops the relevant bits into the low 32 bits.  Note that
   // Java spec says that HashCode is an int so there's no point in capturing
   // an 'X'-sized hashcode (32 in 32-bit build or 64 in 64-bit build).
   hshifted_header      = ConvX2I(hshifted_header);
-  Node *hash_val       = _gvn.transform( new (C) AndINode(hshifted_header, hash_mask) );
+  Node *hash_val       = _gvn.transform(new (C) AndINode(hshifted_header, hash_mask));

   Node *no_hash_val    = _gvn.intcon(markOopDesc::no_hash);
-  Node *chk_assigned   = _gvn.transform( new (C) CmpINode( hash_val, no_hash_val));
-  Node *test_assigned  = _gvn.transform( new (C) BoolNode( chk_assigned, BoolTest::eq) );
+  Node *chk_assigned   = _gvn.transform(new (C) CmpINode( hash_val, no_hash_val));
+  Node *test_assigned  = _gvn.transform(new (C) BoolNode( chk_assigned, BoolTest::eq));

   generate_slow_guard(test_assigned, slow_region);

@@ -3854,7 +3870,7 @@

   // Return the combined state.
   set_i_o(        _gvn.transform(result_io)  );
-  set_all_memory( _gvn.transform(result_mem) );
+  set_all_memory( _gvn.transform(result_mem));

   set_result(result_reg, result_val);
   return true;
@@ -3982,7 +3998,7 @@
     Node *opt_isnan = _gvn.transform(ifisnan);
     assert( opt_isnan->is_If(), "Expect an IfNode");
     IfNode *opt_ifisnan = (IfNode*)opt_isnan;
-    Node *iftrue = _gvn.transform( new (C) IfTrueNode(opt_ifisnan) );
+    Node *iftrue = _gvn.transform(new (C) IfTrueNode(opt_ifisnan));

     set_control(iftrue);

@@ -4023,7 +4039,7 @@
     Node *opt_isnan = _gvn.transform(ifisnan);
     assert( opt_isnan->is_If(), "Expect an IfNode");
     IfNode *opt_ifisnan = (IfNode*)opt_isnan;
-    Node *iftrue = _gvn.transform( new (C) IfTrueNode(opt_ifisnan) );
+    Node *iftrue = _gvn.transform(new (C) IfTrueNode(opt_ifisnan));

     set_control(iftrue);

@@ -4152,8 +4168,8 @@

   // Compute the length also, if needed:
   Node* countx = size;
-  countx = _gvn.transform( new (C) SubXNode(countx, MakeConX(base_off)) );
-  countx = _gvn.transform( new (C) URShiftXNode(countx, intcon(LogBytesPerLong) ));
+  countx = _gvn.transform(new (C) SubXNode(countx, MakeConX(base_off)));
+  countx = _gvn.transform(new (C) URShiftXNode(countx, intcon(LogBytesPerLong) ));

   const TypePtr* raw_adr_type = TypeRawPtr::BOTTOM;
   bool disjoint_bases = true;
@@ -4357,9 +4373,9 @@
     }

     // Return the combined state.
-    set_control(    _gvn.transform(result_reg) );
-    set_i_o(        _gvn.transform(result_i_o) );
-    set_all_memory( _gvn.transform(result_mem) );
+    set_control(    _gvn.transform(result_reg));
+    set_i_o(        _gvn.transform(result_i_o));
+    set_all_memory( _gvn.transform(result_mem));
   } // original reexecute is set back here

   set_result(_gvn.transform(result_val));
@@ -4684,8 +4700,8 @@
     // are dest_head = dest[0..off] and dest_tail = dest[off+len..dest.length].
     Node* dest_size   = alloc->in(AllocateNode::AllocSize);
     Node* dest_length = alloc->in(AllocateNode::ALength);
-    Node* dest_tail   = _gvn.transform( new(C) AddINode(dest_offset,
-                                                          copy_length) );
+    Node* dest_tail   = _gvn.transform(new(C) AddINode(dest_offset,
+                                                          copy_length));

     // If there is a head section that needs zeroing, do it now.
     if (find_int_con(dest_offset, -1) != 0) {
@@ -4701,8 +4717,8 @@
     // the copy to a more hardware-friendly word size of 64 bits.
     Node* tail_ctl = NULL;
     if (!stopped() && !dest_tail->eqv_uncast(dest_length)) {
-      Node* cmp_lt   = _gvn.transform( new(C) CmpINode(dest_tail, dest_length) );
-      Node* bol_lt   = _gvn.transform( new(C) BoolNode(cmp_lt, BoolTest::lt) );
+      Node* cmp_lt   = _gvn.transform(new(C) CmpINode(dest_tail, dest_length));
+      Node* bol_lt   = _gvn.transform(new(C) BoolNode(cmp_lt, BoolTest::lt));
       tail_ctl = generate_slow_guard(bol_lt, NULL);
       assert(tail_ctl != NULL || !stopped(), "must be an outcome");
     }
@@ -4745,7 +4761,7 @@
                              dest_size);
         done_ctl->init_req(2, control());
         done_mem->init_req(2, memory(adr_type));
-        set_control( _gvn.transform(done_ctl) );
+        set_control( _gvn.transform(done_ctl));
         set_memory(  _gvn.transform(done_mem), adr_type );
       }
     }
@@ -4832,18 +4848,18 @@
     // Clean up after the checked call.
     // The returned value is either 0 or -1^K,
     // where K = number of partially transferred array elements.
-    Node* cmp = _gvn.transform( new(C) CmpINode(checked_value, intcon(0)) );
-    Node* bol = _gvn.transform( new(C) BoolNode(cmp, BoolTest::eq) );
+    Node* cmp = _gvn.transform(new(C) CmpINode(checked_value, intcon(0)));
+    Node* bol = _gvn.transform(new(C) BoolNode(cmp, BoolTest::eq));
     IfNode* iff = create_and_map_if(control(), bol, PROB_MAX, COUNT_UNKNOWN);

     // If it is 0, we are done, so transfer to the end.
-    Node* checks_done = _gvn.transform( new(C) IfTrueNode(iff) );
+    Node* checks_done = _gvn.transform(new(C) IfTrueNode(iff));
     result_region->init_req(checked_path, checks_done);
     result_i_o   ->init_req(checked_path, checked_i_o);
     result_memory->init_req(checked_path, checked_mem);

     // If it is not zero, merge into the slow call.
-    set_control( _gvn.transform( new(C) IfFalseNode(iff) ));
+    set_control( _gvn.transform(new(C) IfFalseNode(iff) ));
     RegionNode* slow_reg2 = new(C) RegionNode(3);
     PhiNode*    slow_i_o2 = new(C) PhiNode(slow_reg2, Type::ABIO);
     PhiNode*    slow_mem2 = new(C) PhiNode(slow_reg2, Type::MEMORY, adr_type);
@@ -4866,16 +4882,16 @@
     } else {
       // We must continue the copy exactly where it failed, or else
       // another thread might see the wrong number of writes to dest.
-      Node* checked_offset = _gvn.transform( new(C) XorINode(checked_value, intcon(-1)) );
+      Node* checked_offset = _gvn.transform(new(C) XorINode(checked_value, intcon(-1)));
       Node* slow_offset    = new(C) PhiNode(slow_reg2, TypeInt::INT);
       slow_offset->init_req(1, intcon(0));
       slow_offset->init_req(2, checked_offset);
       slow_offset  = _gvn.transform(slow_offset);

       // Adjust the arguments by the conditionally incoming offset.
-      Node* src_off_plus  = _gvn.transform( new(C) AddINode(src_offset,  slow_offset) );
-      Node* dest_off_plus = _gvn.transform( new(C) AddINode(dest_offset, slow_offset) );
-      Node* length_minus  = _gvn.transform( new(C) SubINode(copy_length, slow_offset) );
+      Node* src_off_plus  = _gvn.transform(new(C) AddINode(src_offset,  slow_offset));
+      Node* dest_off_plus = _gvn.transform(new(C) AddINode(dest_offset, slow_offset));
+      Node* length_minus  = _gvn.transform(new(C) SubINode(copy_length, slow_offset));

       // Tweak the node variables to adjust the code produced below:
       src_offset  = src_off_plus;
@@ -4914,7 +4930,7 @@
   }

   // Finished; return the combined state.
-  set_control( _gvn.transform(result_region) );
+  set_control( _gvn.transform(result_region));
   set_i_o(     _gvn.transform(result_i_o)    );
   set_memory(  _gvn.transform(result_memory), adr_type );

@@ -5096,10 +5112,10 @@
     int      end_round = (-1 << scale) & (BytesPerLong  - 1);
     Node*    end       = ConvI2X(slice_len);
     if (scale != 0)
-      end = _gvn.transform( new(C) LShiftXNode(end, intcon(scale) ));
+      end = _gvn.transform(new(C) LShiftXNode(end, intcon(scale) ));
     end_base += end_round;
-    end = _gvn.transform( new(C) AddXNode(end, MakeConX(end_base)) );
-    end = _gvn.transform( new(C) AndXNode(end, MakeConX(~end_round)) );
+    end = _gvn.transform(new(C) AddXNode(end, MakeConX(end_base)));
+    end = _gvn.transform(new(C) AndXNode(end, MakeConX(~end_round)));
     mem = ClearArrayNode::clear_memory(control(), mem, dest,
                                        start_con, end, &_gvn);
   } else if (start_con < 0 && dest_size != top()) {
@@ -5108,8 +5124,8 @@
     Node* start = slice_idx;
     start = ConvI2X(start);
     if (scale != 0)
-      start = _gvn.transform( new(C) LShiftXNode( start, intcon(scale) ));
-    start = _gvn.transform( new(C) AddXNode(start, MakeConX(abase)) );
+      start = _gvn.transform(new(C) LShiftXNode( start, intcon(scale) ));
+    start = _gvn.transform(new(C) AddXNode(start, MakeConX(abase)));
     if ((bump_bit | clear_low) != 0) {
       int to_clear = (bump_bit | clear_low);
       // Align up mod 8, then store a jint zero unconditionally
@@ -5120,14 +5136,14 @@
         assert((abase & to_clear) == 0, "array base must be long-aligned");
       } else {
         // Bump 'start' up to (or past) the next jint boundary:
-        start = _gvn.transform( new(C) AddXNode(start, MakeConX(bump_bit)) );
+        start = _gvn.transform(new(C) AddXNode(start, MakeConX(bump_bit)));
         assert((abase & clear_low) == 0, "array base must be int-aligned");
       }
       // Round bumped 'start' down to jlong boundary in body of array.
-      start = _gvn.transform( new(C) AndXNode(start, MakeConX(~to_clear)) );
+      start = _gvn.transform(new(C) AndXNode(start, MakeConX(~to_clear)));
       if (bump_bit != 0) {
         // Store a zero to the immediately preceding jint:
-        Node* x1 = _gvn.transform( new(C) AddXNode(start, MakeConX(-bump_bit)) );
+        Node* x1 = _gvn.transform(new(C) AddXNode(start, MakeConX(-bump_bit)));
         Node* p1 = basic_plus_adr(dest, x1);
         mem = StoreNode::make(_gvn, control(), mem, p1, adr_type, intcon(0), T_INT);
         mem = _gvn.transform(mem);
@@ -5194,8 +5210,8 @@
   Node* sptr  = basic_plus_adr(src,  src_off);
   Node* dptr  = basic_plus_adr(dest, dest_off);
   Node* countx = dest_size;
-  countx = _gvn.transform( new (C) SubXNode(countx, MakeConX(dest_off)) );
-  countx = _gvn.transform( new (C) URShiftXNode(countx, intcon(LogBytesPerLong)) );
+  countx = _gvn.transform(new (C) SubXNode(countx, MakeConX(dest_off)));
+  countx = _gvn.transform(new (C) URShiftXNode(countx, intcon(LogBytesPerLong)));

   bool disjoint_bases = true;   // since alloc != NULL
   generate_unchecked_arraycopy(adr_type, T_LONG, disjoint_bases,
@@ -5360,6 +5376,117 @@
   return true;
 }

+/**
+ * Calculate CRC32 for byte.
+ * int java.util.zip.CRC32.update(int crc, int b)
+ */
+bool LibraryCallKit::inline_updateCRC32() {
+  assert(UseCRC32Intrinsics, "need AVX and LCMUL instructions support");
+  assert(callee()->signature()->size() == 2, "update has 2 parameters");
+  // no receiver since it is static method
+  Node* crc  = argument(0); // type: int
+  Node* b    = argument(1); // type: int
+
+  /*
+   *    int c = ~ crc;
+   *    b = timesXtoThe32[(b ^ c) & 0xFF];
+   *    b = b ^ (c >>> 8);
+   *    crc = ~b;
+   */
+
+  Node* M1 = intcon(-1);
+  crc = _gvn.transform(new (C) XorINode(crc, M1));
+  Node* result = _gvn.transform(new (C) XorINode(crc, b));
+  result = _gvn.transform(new (C) AndINode(result, intcon(0xFF)));
+
+  Node* base = makecon(TypeRawPtr::make(StubRoutines::crc_table_addr()));
+  Node* offset = _gvn.transform(new (C) LShiftINode(result, intcon(0x2)));
+  Node* adr = basic_plus_adr(top(), base, ConvI2X(offset));
+  result = make_load(control(), adr, TypeInt::INT, T_INT);
+
+  crc = _gvn.transform(new (C) URShiftINode(crc, intcon(8)));
+  result = _gvn.transform(new (C) XorINode(crc, result));
+  result = _gvn.transform(new (C) XorINode(result, M1));
+  set_result(result);
+  return true;
+}
+
+/**
+ * Calculate CRC32 for byte[] array.
+ * int java.util.zip.CRC32.updateBytes(int crc, byte[] buf, int off, int len)
+ */
+bool LibraryCallKit::inline_updateBytesCRC32() {
+  assert(UseCRC32Intrinsics, "need AVX and LCMUL instructions support");
+  assert(callee()->signature()->size() == 4, "updateBytes has 4 parameters");
+  // no receiver since it is static method
+  Node* crc     = argument(0); // type: int
+  Node* src     = argument(1); // type: oop
+  Node* offset  = argument(2); // type: int
+  Node* length  = argument(3); // type: int
+
+  const Type* src_type = src->Value(&_gvn);
+  const TypeAryPtr* top_src = src_type->isa_aryptr();
+  if (top_src  == NULL || top_src->klass()  == NULL) {
+    // failed array check
+    return false;
+  }
+
+  // Figure out the size and type of the elements we will be copying.
+  BasicType src_elem = src_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
+  if (src_elem != T_BYTE) {
+    return false;
+  }
+
+  // 'src_start' points to src array + scaled offset
+  Node* src_start = array_element_address(src, offset, src_elem);
+
+  // We assume that range check is done by caller.
+  // TODO: generate range check (offset+length < src.length) in debug VM.
+
+  // Call the stub.
+  address stubAddr = StubRoutines::updateBytesCRC32();
+  const char *stubName = "updateBytesCRC32";
+
+  Node* call = make_runtime_call(RC_LEAF|RC_NO_FP, OptoRuntime::updateBytesCRC32_Type(),
+                                 stubAddr, stubName, TypePtr::BOTTOM,
+                                 crc, src_start, length);
+  Node* result = _gvn.transform(new (C) ProjNode(call, TypeFunc::Parms));
+  set_result(result);
+  return true;
+}
+
+/**
+ * Calculate CRC32 for ByteBuffer.
+ * int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len)
+ */
+bool LibraryCallKit::inline_updateByteBufferCRC32() {
+  assert(UseCRC32Intrinsics, "need AVX and LCMUL instructions support");
+  assert(callee()->signature()->size() == 5, "updateByteBuffer has 4 parameters and one is long");
+  // no receiver since it is static method
+  Node* crc     = argument(0); // type: int
+  Node* src     = argument(1); // type: long
+  Node* offset  = argument(3); // type: int
+  Node* length  = argument(4); // type: int
+
+  src = ConvL2X(src);  // adjust Java long to machine word
+  Node* base = _gvn.transform(new (C) CastX2PNode(src));
+  offset = ConvI2X(offset);
+
+  // 'src_start' points to src array + scaled offset
+  Node* src_start = basic_plus_adr(top(), base, offset);
+
+  // Call the stub.
+  address stubAddr = StubRoutines::updateBytesCRC32();
+  const char *stubName = "updateBytesCRC32";
+
+  Node* call = make_runtime_call(RC_LEAF|RC_NO_FP, OptoRuntime::updateBytesCRC32_Type(),
+                                 stubAddr, stubName, TypePtr::BOTTOM,
+                                 crc, src_start, length);
+  Node* result = _gvn.transform(new (C) ProjNode(call, TypeFunc::Parms));
+  set_result(result);
+  return true;
+}
+
 //----------------------------inline_reference_get----------------------------
 // public T java.lang.ref.Reference.get();
 bool LibraryCallKit::inline_reference_get() {
--- a/src/share/vm/opto/runtime.cpp	Tue Jul 02 07:51:31 2013 +0200
+++ b/src/share/vm/opto/runtime.cpp	Tue Jul 02 20:42:12 2013 -0400
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -829,6 +829,28 @@
   return TypeFunc::make(domain, range);
 }

+/**
+ * int updateBytesCRC32(int crc, byte* b, int len)
+ */
+const TypeFunc* OptoRuntime::updateBytesCRC32_Type() {
+  // create input type (domain)
+  int num_args      = 3;
+  int argcnt = num_args;
+  const Type** fields = TypeTuple::fields(argcnt);
+  int argp = TypeFunc::Parms;
+  fields[argp++] = TypeInt::INT;        // crc
+  fields[argp++] = TypePtr::NOTNULL;    // src
+  fields[argp++] = TypeInt::INT;        // len
+  assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
+  const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);
+
+  // result type needed
+  fields = TypeTuple::fields(1);
+  fields[TypeFunc::Parms+0] = TypeInt::INT; // crc result
+  const TypeTuple* range = TypeTuple::make(TypeFunc::Parms+1, fields);
+  return TypeFunc::make(domain, range);
+}
+
 // for cipherBlockChaining calls of aescrypt encrypt/decrypt, four pointers and a length, returning void
 const TypeFunc* OptoRuntime::cipherBlockChaining_aescrypt_Type() {
   // create input type (domain)
--- a/src/share/vm/opto/runtime.hpp	Tue Jul 02 07:51:31 2013 +0200
+++ b/src/share/vm/opto/runtime.hpp	Tue Jul 02 20:42:12 2013 -0400
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -284,6 +284,8 @@
   static const TypeFunc* aescrypt_block_Type();
   static const TypeFunc* cipherBlockChaining_aescrypt_Type();

+  static const TypeFunc* updateBytesCRC32_Type();
+
   // leaf on stack replacement interpreter accessor types
   static const TypeFunc* osr_end_Type();
--- a/src/share/vm/runtime/globals.hpp	Tue Jul 02 07:51:31 2013 +0200
+++ b/src/share/vm/runtime/globals.hpp	Tue Jul 02 20:42:12 2013 -0400
@@ -644,6 +644,9 @@
   product(bool, UseAESIntrinsics, false,                                    \
           "use intrinsics for AES versions of crypto")                      \
                                                                             \
+  product(bool, UseCRC32Intrinsics, false,                                  \
+          "use intrinsics for java.util.zip.CRC32")                         \
+                                                                            \
   develop(bool, TraceCallFixup, false,                                      \
           "traces all call fixups")                                         \
                                                                             \
--- a/src/share/vm/runtime/stubRoutines.cpp	Tue Jul 02 07:51:31 2013 +0200
+++ b/src/share/vm/runtime/stubRoutines.cpp	Tue Jul 02 20:42:12 2013 -0400
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -125,6 +125,9 @@
 address StubRoutines::_cipherBlockChaining_encryptAESCrypt = NULL;
 address StubRoutines::_cipherBlockChaining_decryptAESCrypt = NULL;

+address StubRoutines::_updateBytesCRC32 = NULL;
+address StubRoutines::_crc_table_adr = NULL;
+
 double (* StubRoutines::_intrinsic_log   )(double) = NULL;
 double (* StubRoutines::_intrinsic_log10 )(double) = NULL;
 double (* StubRoutines::_intrinsic_exp   )(double) = NULL;
--- a/src/share/vm/runtime/stubRoutines.hpp	Tue Jul 02 07:51:31 2013 +0200
+++ b/src/share/vm/runtime/stubRoutines.hpp	Tue Jul 02 20:42:12 2013 -0400
@@ -204,6 +204,9 @@
   static address _cipherBlockChaining_encryptAESCrypt;
   static address _cipherBlockChaining_decryptAESCrypt;

+  static address _updateBytesCRC32;
+  static address _crc_table_adr;
+
   // These are versions of the java.lang.Math methods which perform
   // the same operations as the intrinsic version.  They are used for
   // constant folding in the compiler to ensure equivalence.  If the
@@ -342,6 +345,9 @@
   static address cipherBlockChaining_encryptAESCrypt()  { return _cipherBlockChaining_encryptAESCrypt; }
   static address cipherBlockChaining_decryptAESCrypt()  { return _cipherBlockChaining_decryptAESCrypt; }

+  static address updateBytesCRC32()    { return _updateBytesCRC32; }
+  static address crc_table_addr()      { return _crc_table_adr; }
+
   static address select_fill_function(BasicType t, bool aligned, const char* &name);

   static address zero_aligned_words()   { return _zero_aligned_words; }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/7088419/CRCTest.java	Tue Jul 02 20:42:12 2013 -0400
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+   @test
+   @bug 7088419
+   @run main CRCTest
+   @summary Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32 and java.util.zip.Adler32
+ */
+
+import java.nio.ByteBuffer;
+import java.util.zip.CRC32;
+import java.util.zip.Checksum;
+
+public class CRCTest {
+
+    public static void main(String[] args) throws Exception {
+
+        byte[] b = initializedBytes(4096 * 4096);
+
+        {
+            CRC32 crc1 = new CRC32();
+            CRC32 crc2 = new CRC32();
+            CRC32 crc3 = new CRC32();
+            CRC32 crc4 = new CRC32();
+
+            crc1.update(b, 0, b.length);
+            updateSerial(crc2, b, 0, b.length);
+            updateDirect(crc3, b, 0, b.length);
+            updateSerialSlow(crc4, b, 0, b.length);
+
+            check(crc1, crc2);
+            check(crc3, crc4);
+            check(crc1, crc3);
+
+            crc1.update(17);
+            crc2.update(17);
+            crc3.update(17);
+            crc4.update(17);
+
+            crc1.update(b, 1, b.length-2);
+            updateSerial(crc2, b, 1, b.length-2);
+            updateDirect(crc3, b, 1, b.length-2);
+            updateSerialSlow(crc4, b, 1, b.length-2);
+
+            check(crc1, crc2);
+            check(crc3, crc4);
+            check(crc1, crc3);
+
+            report("finished huge crc", crc1, crc2, crc3, crc4);
+
+            for (int i = 0; i < 256; i++) {
+                for (int j = 0; j < 256; j += 1) {
+                    crc1.update(b, i, j);
+                    updateSerial(crc2, b, i, j);
+                    updateDirect(crc3, b, i, j);
+                    updateSerialSlow(crc4, b, i, j);
+
+                    check(crc1, crc2);
+                    check(crc3, crc4);
+                    check(crc1, crc3);
+
+                }
+            }
+
+            report("finished small survey crc", crc1, crc2, crc3, crc4);
+        }
+
+    }
+
+    private static void report(String s, Checksum crc1, Checksum crc2,
+            Checksum crc3, Checksum crc4) {
+        System.out.println(s + ", crc1 = " + crc1.getValue() +
+                ", crc2 = " + crc2.getValue()+
+                ", crc3 = " + crc3.getValue()+
+                ", crc4 = " + crc4.getValue());
+    }
+
+    private static void check(Checksum crc1, Checksum crc2) throws Exception {
+        if (crc1.getValue() != crc2.getValue()) {
+            String s = "value 1 = " + crc1.getValue() + ", value 2 = " + crc2.getValue();
+            System.err.println(s);
+            throw new Exception(s);
+        }
+    }
+
+    private static byte[] initializedBytes(int M) {
+        byte[] bytes = new byte[M];
+        for (int i = 0; i < bytes.length; i++) {
+            bytes[i] = (byte) i;
+        }
+        return bytes;
+    }
+
+    private static void updateSerial(Checksum crc, byte[] b, int start, int length) {
+        for (int i = 0; i < length; i++)
+            crc.update(b[i+start]);
+    }
+
+    private static void updateSerialSlow(Checksum crc, byte[] b, int start, int length) {
+        for (int i = 0; i < length; i++)
+            crc.update(b[i+start]);
+        crc.getValue();
+    }
+
+    private static void updateDirect(CRC32 crc3, byte[] b, int start, int length) {
+        ByteBuffer buf = ByteBuffer.allocateDirect(length);
+        buf.put(b, start, length);
+        buf.flip();
+        crc3.update(buf);
+    }
+}