diff src/cpu/x86/vm/assembler_x86.cpp @ 986:62001a362ce9

6827605: new String intrinsics may prevent EA scalar replacement 6875866: Intrinsic for String.indexOf() is broken on x86 with SSE4.2 Summary: Modify String intrinsic methods to pass char[] pointers instead of string oops. Reviewed-by: never
author kvn
date Mon, 14 Sep 2009 12:14:20 -0700
parents 60fea60a6db5
children d40f03b57795
line wrap: on
line diff
--- a/src/cpu/x86/vm/assembler_x86.cpp	Mon Sep 14 09:49:54 2009 -0700
+++ b/src/cpu/x86/vm/assembler_x86.cpp	Mon Sep 14 12:14:20 2009 -0700
@@ -8404,6 +8404,319 @@
 }
 #endif // _LP64
 
+// IndexOf substring.
+void MacroAssembler::string_indexof(Register str1, Register str2,
+                                    Register cnt1, Register cnt2, Register result,
+                                    XMMRegister vec, Register tmp) {
+  assert(UseSSE42Intrinsics, "SSE4.2 is required");
+
+  Label RELOAD_SUBSTR, PREP_FOR_SCAN, SCAN_TO_SUBSTR,
+        SCAN_SUBSTR, RET_NOT_FOUND, CLEANUP;
+
+  push(str1); // string addr
+  push(str2); // substr addr
+  push(cnt2); // substr count
+  jmpb(PREP_FOR_SCAN);
+
+  // Substr count saved at sp
+  // Substr saved at sp+1*wordSize
+  // String saved at sp+2*wordSize
+
+  // Reload substr for rescan
+  bind(RELOAD_SUBSTR);
+  movl(cnt2, Address(rsp, 0));
+  movptr(str2, Address(rsp, wordSize));
+  // We came here after the beginninig of the substring was
+  // matched but the rest of it was not so we need to search
+  // again. Start from the next element after the previous match.
+  subptr(str1, result); // Restore counter
+  shrl(str1, 1);
+  addl(cnt1, str1);
+  lea(str1, Address(result, 2)); // Reload string
+
+  // Load substr
+  bind(PREP_FOR_SCAN);
+  movdqu(vec, Address(str2, 0));
+  addl(cnt1, 8);  // prime the loop
+  subptr(str1, 16);
+
+  // Scan string for substr in 16-byte vectors
+  bind(SCAN_TO_SUBSTR);
+  subl(cnt1, 8);
+  addptr(str1, 16);
+
+  // pcmpestri
+  //   inputs:
+  //     xmm - substring
+  //     rax - substring length (elements count)
+  //     mem - scaned string
+  //     rdx - string length (elements count)
+  //     0xd - mode: 1100 (substring search) + 01 (unsigned shorts)
+  //   outputs:
+  //     rcx - matched index in string
+  assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");
+
+  pcmpestri(vec, Address(str1, 0), 0x0d);
+  jcc(Assembler::above, SCAN_TO_SUBSTR);      // CF == 0 && ZF == 0
+  jccb(Assembler::aboveEqual, RET_NOT_FOUND); // CF == 0
+
+  // Fallthrough: found a potential substr
+
+  // Make sure string is still long enough
+  subl(cnt1, tmp);
+  cmpl(cnt1, cnt2);
+  jccb(Assembler::negative, RET_NOT_FOUND);
+  // Compute start addr of substr
+  lea(str1, Address(str1, tmp, Address::times_2));
+  movptr(result, str1); // save
+
+  // Compare potential substr
+  addl(cnt1, 8);     // prime the loop
+  addl(cnt2, 8);
+  subptr(str1, 16);
+  subptr(str2, 16);
+
+  // Scan 16-byte vectors of string and substr
+  bind(SCAN_SUBSTR);
+  subl(cnt1, 8);
+  subl(cnt2, 8);
+  addptr(str1, 16);
+  addptr(str2, 16);
+  movdqu(vec, Address(str2, 0));
+  pcmpestri(vec, Address(str1, 0), 0x0d);
+  jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0
+  jcc(Assembler::positive, SCAN_SUBSTR);     // SF == 0
+
+  // Compute substr offset
+  subptr(result, Address(rsp, 2*wordSize));
+  shrl(result, 1); // index
+  jmpb(CLEANUP);
+
+  bind(RET_NOT_FOUND);
+  movl(result, -1);
+
+  bind(CLEANUP);
+  addptr(rsp, 3*wordSize);
+}
+
+// Compare strings.
+void MacroAssembler::string_compare(Register str1, Register str2,
+                                    Register cnt1, Register cnt2, Register result,
+                                    XMMRegister vec1, XMMRegister vec2) {
+  Label LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, WHILE_HEAD_LABEL;
+
+  // Compute the minimum of the string lengths and the
+  // difference of the string lengths (stack).
+  // Do the conditional move stuff
+  movl(result, cnt1);
+  subl(cnt1, cnt2);
+  push(cnt1);
+  if (VM_Version::supports_cmov()) {
+    cmovl(Assembler::lessEqual, cnt2, result);
+  } else {
+    Label GT_LABEL;
+    jccb(Assembler::greater, GT_LABEL);
+    movl(cnt2, result);
+    bind(GT_LABEL);
+  }
+
+  // Is the minimum length zero?
+  testl(cnt2, cnt2);
+  jcc(Assembler::zero, LENGTH_DIFF_LABEL);
+
+  // Load first characters
+  load_unsigned_short(result, Address(str1, 0));
+  load_unsigned_short(cnt1, Address(str2, 0));
+
+  // Compare first characters
+  subl(result, cnt1);
+  jcc(Assembler::notZero,  POP_LABEL);
+  decrementl(cnt2);
+  jcc(Assembler::zero, LENGTH_DIFF_LABEL);
+
+  {
+    // Check after comparing first character to see if strings are equivalent
+    Label LSkip2;
+    // Check if the strings start at same location
+    cmpptr(str1, str2);
+    jccb(Assembler::notEqual, LSkip2);
+
+    // Check if the length difference is zero (from stack)
+    cmpl(Address(rsp, 0), 0x0);
+    jcc(Assembler::equal,  LENGTH_DIFF_LABEL);
+
+    // Strings might not be equivalent
+    bind(LSkip2);
+  }
+
+  // Advance to next character
+  addptr(str1, 2);
+  addptr(str2, 2);
+
+  if (UseSSE42Intrinsics) {
+    // With SSE4.2, use double quad vector compare
+    Label COMPARE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL;
+    // Setup to compare 16-byte vectors
+    movl(cnt1, cnt2);
+    andl(cnt2, 0xfffffff8); // cnt2 holds the vector count
+    andl(cnt1, 0x00000007); // cnt1 holds the tail count
+    testl(cnt2, cnt2);
+    jccb(Assembler::zero, COMPARE_TAIL);
+
+    lea(str2, Address(str2, cnt2, Address::times_2));
+    lea(str1, Address(str1, cnt2, Address::times_2));
+    negptr(cnt2);
+
+    bind(COMPARE_VECTORS);
+    movdqu(vec1, Address(str1, cnt2, Address::times_2));
+    movdqu(vec2, Address(str2, cnt2, Address::times_2));
+    pxor(vec1, vec2);
+    ptest(vec1, vec1);
+    jccb(Assembler::notZero, VECTOR_NOT_EQUAL);
+    addptr(cnt2, 8);
+    jcc(Assembler::notZero, COMPARE_VECTORS);
+    jmpb(COMPARE_TAIL);
+
+    // Mismatched characters in the vectors
+    bind(VECTOR_NOT_EQUAL);
+    lea(str1, Address(str1, cnt2, Address::times_2));
+    lea(str2, Address(str2, cnt2, Address::times_2));
+    movl(cnt1, 8);
+
+    // Compare tail (< 8 chars), or rescan last vectors to
+    // find 1st mismatched characters
+    bind(COMPARE_TAIL);
+    testl(cnt1, cnt1);
+    jccb(Assembler::zero, LENGTH_DIFF_LABEL);
+    movl(cnt2, cnt1);
+    // Fallthru to tail compare
+  }
+
+  // Shift str2 and str1 to the end of the arrays, negate min
+  lea(str1, Address(str1, cnt2, Address::times_2, 0));
+  lea(str2, Address(str2, cnt2, Address::times_2, 0));
+  negptr(cnt2);
+
+    // Compare the rest of the characters
+  bind(WHILE_HEAD_LABEL);
+  load_unsigned_short(result, Address(str1, cnt2, Address::times_2, 0));
+  load_unsigned_short(cnt1, Address(str2, cnt2, Address::times_2, 0));
+  subl(result, cnt1);
+  jccb(Assembler::notZero, POP_LABEL);
+  increment(cnt2);
+  jcc(Assembler::notZero, WHILE_HEAD_LABEL);
+
+  // Strings are equal up to min length.  Return the length difference.
+  bind(LENGTH_DIFF_LABEL);
+  pop(result);
+  jmpb(DONE_LABEL);
+
+  // Discard the stored length difference
+  bind(POP_LABEL);
+  addptr(rsp, wordSize);
+
+  // That's it
+  bind(DONE_LABEL);
+}
+
+// Compare char[] arrays aligned to 4 bytes or substrings.
+void MacroAssembler::char_arrays_equals(bool is_array_equ, Register ary1, Register ary2,
+                                        Register limit, Register result, Register chr,
+                                        XMMRegister vec1, XMMRegister vec2) {
+  Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR;
+
+  int length_offset  = arrayOopDesc::length_offset_in_bytes();
+  int base_offset    = arrayOopDesc::base_offset_in_bytes(T_CHAR);
+
+  // Check the input args
+  cmpptr(ary1, ary2);
+  jcc(Assembler::equal, TRUE_LABEL);
+
+  if (is_array_equ) {
+    // Need additional checks for arrays_equals.
+    andptr(ary1, ary2);
+    jcc(Assembler::zero, FALSE_LABEL); // One pointer is NULL
+
+    // Check the lengths
+    movl(limit, Address(ary1, length_offset));
+    cmpl(limit, Address(ary2, length_offset));
+    jcc(Assembler::notEqual, FALSE_LABEL);
+  }
+
+  // count == 0
+  testl(limit, limit);
+  jcc(Assembler::zero, TRUE_LABEL);
+
+  if (is_array_equ) {
+    // Load array address
+    lea(ary1, Address(ary1, base_offset));
+    lea(ary2, Address(ary2, base_offset));
+  }
+
+  shll(limit, 1);      // byte count != 0
+  movl(result, limit); // copy
+
+  if (UseSSE42Intrinsics) {
+    // With SSE4.2, use double quad vector compare
+    Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
+    // Compare 16-byte vectors
+    andl(result, 0x0000000e);  //   tail count (in bytes)
+    andl(limit, 0xfffffff0);   // vector count (in bytes)
+    jccb(Assembler::zero, COMPARE_TAIL);
+
+    lea(ary1, Address(ary1, limit, Address::times_1));
+    lea(ary2, Address(ary2, limit, Address::times_1));
+    negptr(limit);
+
+    bind(COMPARE_WIDE_VECTORS);
+    movdqu(vec1, Address(ary1, limit, Address::times_1));
+    movdqu(vec2, Address(ary2, limit, Address::times_1));
+    pxor(vec1, vec2);
+    ptest(vec1, vec1);
+    jccb(Assembler::notZero, FALSE_LABEL);
+    addptr(limit, 16);
+    jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
+
+    bind(COMPARE_TAIL); // limit is zero
+    movl(limit, result);
+    // Fallthru to tail compare
+  }
+
+  // Compare 4-byte vectors
+  andl(limit, 0xfffffffc); // vector count (in bytes)
+  jccb(Assembler::zero, COMPARE_CHAR);
+
+  lea(ary1, Address(ary1, limit, Address::times_1));
+  lea(ary2, Address(ary2, limit, Address::times_1));
+  negptr(limit);
+
+  bind(COMPARE_VECTORS);
+  movl(chr, Address(ary1, limit, Address::times_1));
+  cmpl(chr, Address(ary2, limit, Address::times_1));
+  jccb(Assembler::notEqual, FALSE_LABEL);
+  addptr(limit, 4);
+  jcc(Assembler::notZero, COMPARE_VECTORS);
+
+  // Compare trailing char (final 2 bytes), if any
+  bind(COMPARE_CHAR);
+  testl(result, 0x2);   // tail  char
+  jccb(Assembler::zero, TRUE_LABEL);
+  load_unsigned_short(chr, Address(ary1, 0));
+  load_unsigned_short(limit, Address(ary2, 0));
+  cmpl(chr, limit);
+  jccb(Assembler::notEqual, FALSE_LABEL);
+
+  bind(TRUE_LABEL);
+  movl(result, 1);   // return true
+  jmpb(DONE);
+
+  bind(FALSE_LABEL);
+  xorl(result, result); // return false
+
+  // That's it
+  bind(DONE);
+}
+
 Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) {
   switch (cond) {
     // Note some conditions are synonyms for others