diff src/cpu/x86/vm/templateInterpreter_x86_32.cpp @ 11080:b800986664f4

7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32 Summary: add intrinsics using new instruction to interpreter, C1, C2, for suitable x86; add test Reviewed-by: kvn, twisti
author drchase
date Tue, 02 Jul 2013 20:42:12 -0400
parents 603ca7e51354
children ca0165daa6ec
line wrap: on
line diff
--- a/src/cpu/x86/vm/templateInterpreter_x86_32.cpp	Tue Jul 02 07:51:31 2013 +0200
+++ b/src/cpu/x86/vm/templateInterpreter_x86_32.cpp	Tue Jul 02 20:42:12 2013 -0400
@@ -868,6 +868,120 @@
   return generate_accessor_entry();
 }
 
+/**
+ * Method entry for static native methods:
+ *   int java.util.zip.CRC32.update(int crc, int b)
+ */
+address InterpreterGenerator::generate_CRC32_update_entry() {
+  if (UseCRC32Intrinsics) {
+    address entry = __ pc();
+
+    // rbx,: Method*
+    // rsi: senderSP must preserved for slow path, set SP to it on fast path
+    // rdx: scratch
+    // rdi: scratch
+
+    Label slow_path;
+    // If we need a safepoint check, generate full interpreter entry.
+    ExternalAddress state(SafepointSynchronize::address_of_state());
+    __ cmp32(ExternalAddress(SafepointSynchronize::address_of_state()),
+             SafepointSynchronize::_not_synchronized);
+    __ jcc(Assembler::notEqual, slow_path);
+
+    // We don't generate local frame and don't align stack because
+    // we call stub code and there is no safepoint on this path.
+
+    // Load parameters
+    const Register crc = rax;  // crc
+    const Register val = rdx;  // source java byte value
+    const Register tbl = rdi;  // scratch
+
+    // Arguments are reversed on java expression stack
+    __ movl(val, Address(rsp,   wordSize)); // byte value
+    __ movl(crc, Address(rsp, 2*wordSize)); // Initial CRC
+
+    __ lea(tbl, ExternalAddress(StubRoutines::crc_table_addr()));
+    __ notl(crc); // ~crc
+    __ update_byte_crc32(crc, val, tbl);
+    __ notl(crc); // ~crc
+    // result in rax
+
+    // _areturn
+    __ pop(rdi);                // get return address
+    __ mov(rsp, rsi);           // set sp to sender sp
+    __ jmp(rdi);
+
+    // generate a vanilla native entry as the slow path
+    __ bind(slow_path);
+
+    (void) generate_native_entry(false);
+
+    return entry;
+  }
+  return generate_native_entry(false);
+}
+
+/**
+ * Method entry for static native methods:
+ *   int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len)
+ *   int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len)
+ */
+address InterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
+  if (UseCRC32Intrinsics) {
+    address entry = __ pc();
+
+    // rbx,: Method*
+    // rsi: senderSP must preserved for slow path, set SP to it on fast path
+    // rdx: scratch
+    // rdi: scratch
+
+    Label slow_path;
+    // If we need a safepoint check, generate full interpreter entry.
+    ExternalAddress state(SafepointSynchronize::address_of_state());
+    __ cmp32(ExternalAddress(SafepointSynchronize::address_of_state()),
+             SafepointSynchronize::_not_synchronized);
+    __ jcc(Assembler::notEqual, slow_path);
+
+    // We don't generate local frame and don't align stack because
+    // we call stub code and there is no safepoint on this path.
+
+    // Load parameters
+    const Register crc = rax;  // crc
+    const Register buf = rdx;  // source java byte array address
+    const Register len = rdi;  // length
+
+    // Arguments are reversed on java expression stack
+    __ movl(len,   Address(rsp,   wordSize)); // Length
+    // Calculate address of start element
+    if (kind == Interpreter::java_util_zip_CRC32_updateByteBuffer) {
+      __ movptr(buf, Address(rsp, 3*wordSize)); // long buf
+      __ addptr(buf, Address(rsp, 2*wordSize)); // + offset
+      __ movl(crc,   Address(rsp, 5*wordSize)); // Initial CRC
+    } else {
+      __ movptr(buf, Address(rsp, 3*wordSize)); // byte[] array
+      __ addptr(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size
+      __ addptr(buf, Address(rsp, 2*wordSize)); // + offset
+      __ movl(crc,   Address(rsp, 4*wordSize)); // Initial CRC
+    }
+
+    __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, StubRoutines::updateBytesCRC32()), crc, buf, len);
+    // result in rax
+
+    // _areturn
+    __ pop(rdi);                // get return address
+    __ mov(rsp, rsi);           // set sp to sender sp
+    __ jmp(rdi);
+
+    // generate a vanilla native entry as the slow path
+    __ bind(slow_path);
+
+    (void) generate_native_entry(false);
+
+    return entry;
+  }
+  return generate_native_entry(false);
+}
+
 //
 // Interpreter stub for calling a native method. (asm interpreter)
 // This sets up a somewhat different looking stack for calling the native method
@@ -1501,15 +1615,16 @@
   // determine code generation flags
   bool synchronized = false;
   address entry_point = NULL;
+  InterpreterGenerator* ig_this = (InterpreterGenerator*)this;
 
   switch (kind) {
-    case Interpreter::zerolocals             :                                                                             break;
-    case Interpreter::zerolocals_synchronized: synchronized = true;                                                        break;
-    case Interpreter::native                 : entry_point = ((InterpreterGenerator*)this)->generate_native_entry(false);  break;
-    case Interpreter::native_synchronized    : entry_point = ((InterpreterGenerator*)this)->generate_native_entry(true);   break;
-    case Interpreter::empty                  : entry_point = ((InterpreterGenerator*)this)->generate_empty_entry();        break;
-    case Interpreter::accessor               : entry_point = ((InterpreterGenerator*)this)->generate_accessor_entry();     break;
-    case Interpreter::abstract               : entry_point = ((InterpreterGenerator*)this)->generate_abstract_entry();     break;
+    case Interpreter::zerolocals             :                                                       break;
+    case Interpreter::zerolocals_synchronized: synchronized = true;                                  break;
+    case Interpreter::native                 : entry_point = ig_this->generate_native_entry(false);  break;
+    case Interpreter::native_synchronized    : entry_point = ig_this->generate_native_entry(true);   break;
+    case Interpreter::empty                  : entry_point = ig_this->generate_empty_entry();        break;
+    case Interpreter::accessor               : entry_point = ig_this->generate_accessor_entry();     break;
+    case Interpreter::abstract               : entry_point = ig_this->generate_abstract_entry();     break;
 
     case Interpreter::java_lang_math_sin     : // fall thru
     case Interpreter::java_lang_math_cos     : // fall thru
@@ -1519,9 +1634,15 @@
     case Interpreter::java_lang_math_log10   : // fall thru
     case Interpreter::java_lang_math_sqrt    : // fall thru
     case Interpreter::java_lang_math_pow     : // fall thru
-    case Interpreter::java_lang_math_exp     : entry_point = ((InterpreterGenerator*)this)->generate_math_entry(kind);     break;
+    case Interpreter::java_lang_math_exp     : entry_point = ig_this->generate_math_entry(kind);      break;
     case Interpreter::java_lang_ref_reference_get
-                                             : entry_point = ((InterpreterGenerator*)this)->generate_Reference_get_entry(); break;
+                                             : entry_point = ig_this->generate_Reference_get_entry(); break;
+    case Interpreter::java_util_zip_CRC32_update
+                                             : entry_point = ig_this->generate_CRC32_update_entry();  break;
+    case Interpreter::java_util_zip_CRC32_updateBytes
+                                             : // fall thru
+    case Interpreter::java_util_zip_CRC32_updateByteBuffer
+                                             : entry_point = ig_this->generate_CRC32_updateBytes_entry(kind); break;
     default:
       fatal(err_msg("unexpected method kind: %d", kind));
       break;
@@ -1529,7 +1650,7 @@
 
   if (entry_point) return entry_point;
 
-  return ((InterpreterGenerator*)this)->generate_normal_entry(synchronized);
+  return ig_this->generate_normal_entry(synchronized);
 
 }