Mercurial > hg > truffle
comparison src/cpu/x86/vm/macroAssembler_x86.cpp @ 20886:0e647427eee4
Merge with dc41766b35e11348281b76fd70b456b6ba3cf7e9
author | Michael Van De Vanter <michael.van.de.vanter@oracle.com> |
---|---|
date | Fri, 10 Apr 2015 16:58:26 -0700 |
parents | 7848fc12602b |
children | be896a1983c0 |
comparison
equal
deleted
inserted
replaced
20885:e7ece52e1ff3 | 20886:0e647427eee4 |
---|---|
1767 // order to reduce the number of conditional branches in the most common cases. | 1767 // order to reduce the number of conditional branches in the most common cases. |
1768 // Beware -- there's a subtle invariant that fetch of the markword | 1768 // Beware -- there's a subtle invariant that fetch of the markword |
1769 // at [FETCH], below, will never observe a biased encoding (*101b). | 1769 // at [FETCH], below, will never observe a biased encoding (*101b). |
1770 // If this invariant is not held we risk exclusion (safety) failure. | 1770 // If this invariant is not held we risk exclusion (safety) failure. |
1771 if (UseBiasedLocking && !UseOptoBiasInlining) { | 1771 if (UseBiasedLocking && !UseOptoBiasInlining) { |
1772 biased_locking_enter(boxReg, objReg, tmpReg, scrReg, true, DONE_LABEL, NULL, counters); | 1772 biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, counters); |
1773 } | 1773 } |
1774 | 1774 |
1775 #if INCLUDE_RTM_OPT | 1775 #if INCLUDE_RTM_OPT |
1776 if (UseRTMForStackLocks && use_rtm) { | 1776 if (UseRTMForStackLocks && use_rtm) { |
1777 rtm_stack_locking(objReg, tmpReg, scrReg, cx2Reg, | 1777 rtm_stack_locking(objReg, tmpReg, scrReg, cx2Reg, |
7291 bind(L_copy_1_char_exit); | 7291 bind(L_copy_1_char_exit); |
7292 addptr(result, len); // len is negative count of not processed elements | 7292 addptr(result, len); // len is negative count of not processed elements |
7293 bind(L_done); | 7293 bind(L_done); |
7294 } | 7294 } |
7295 | 7295 |
7296 #ifdef _LP64 | |
7297 /** | |
7298 * Helper for multiply_to_len(). | |
7299 */ | |
7300 void MacroAssembler::add2_with_carry(Register dest_hi, Register dest_lo, Register src1, Register src2) { | |
7301 addq(dest_lo, src1); | |
7302 adcq(dest_hi, 0); | |
7303 addq(dest_lo, src2); | |
7304 adcq(dest_hi, 0); | |
7305 } | |
7306 | |
7307 /** | |
7308 * Multiply 64 bit by 64 bit first loop. | |
7309 */ | |
7310 void MacroAssembler::multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart, | |
7311 Register y, Register y_idx, Register z, | |
7312 Register carry, Register product, | |
7313 Register idx, Register kdx) { | |
7314 // | |
7315 // jlong carry, x[], y[], z[]; | |
7316 // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx-, kdx--) { | |
7317 // huge_128 product = y[idx] * x[xstart] + carry; | |
7318 // z[kdx] = (jlong)product; | |
7319 // carry = (jlong)(product >>> 64); | |
7320 // } | |
7321 // z[xstart] = carry; | |
7322 // | |
7323 | |
7324 Label L_first_loop, L_first_loop_exit; | |
7325 Label L_one_x, L_one_y, L_multiply; | |
7326 | |
7327 decrementl(xstart); | |
7328 jcc(Assembler::negative, L_one_x); | |
7329 | |
7330 movq(x_xstart, Address(x, xstart, Address::times_4, 0)); | |
7331 rorq(x_xstart, 32); // convert big-endian to little-endian | |
7332 | |
7333 bind(L_first_loop); | |
7334 decrementl(idx); | |
7335 jcc(Assembler::negative, L_first_loop_exit); | |
7336 decrementl(idx); | |
7337 jcc(Assembler::negative, L_one_y); | |
7338 movq(y_idx, Address(y, idx, Address::times_4, 0)); | |
7339 rorq(y_idx, 32); // convert big-endian to little-endian | |
7340 bind(L_multiply); | |
7341 movq(product, x_xstart); | |
7342 mulq(y_idx); // product(rax) * y_idx -> rdx:rax | |
7343 addq(product, carry); | |
7344 adcq(rdx, 0); | |
7345 subl(kdx, 2); | |
7346 movl(Address(z, kdx, Address::times_4, 4), product); | |
7347 shrq(product, 32); | |
7348 movl(Address(z, kdx, Address::times_4, 0), product); | |
7349 movq(carry, rdx); | |
7350 jmp(L_first_loop); | |
7351 | |
7352 bind(L_one_y); | |
7353 movl(y_idx, Address(y, 0)); | |
7354 jmp(L_multiply); | |
7355 | |
7356 bind(L_one_x); | |
7357 movl(x_xstart, Address(x, 0)); | |
7358 jmp(L_first_loop); | |
7359 | |
7360 bind(L_first_loop_exit); | |
7361 } | |
7362 | |
7363 /** | |
7364 * Multiply 64 bit by 64 bit and add 128 bit. | |
7365 */ | |
7366 void MacroAssembler::multiply_add_128_x_128(Register x_xstart, Register y, Register z, | |
7367 Register yz_idx, Register idx, | |
7368 Register carry, Register product, int offset) { | |
7369 // huge_128 product = (y[idx] * x_xstart) + z[kdx] + carry; | |
7370 // z[kdx] = (jlong)product; | |
7371 | |
7372 movq(yz_idx, Address(y, idx, Address::times_4, offset)); | |
7373 rorq(yz_idx, 32); // convert big-endian to little-endian | |
7374 movq(product, x_xstart); | |
7375 mulq(yz_idx); // product(rax) * yz_idx -> rdx:product(rax) | |
7376 movq(yz_idx, Address(z, idx, Address::times_4, offset)); | |
7377 rorq(yz_idx, 32); // convert big-endian to little-endian | |
7378 | |
7379 add2_with_carry(rdx, product, carry, yz_idx); | |
7380 | |
7381 movl(Address(z, idx, Address::times_4, offset+4), product); | |
7382 shrq(product, 32); | |
7383 movl(Address(z, idx, Address::times_4, offset), product); | |
7384 | |
7385 } | |
7386 | |
7387 /** | |
7388 * Multiply 128 bit by 128 bit. Unrolled inner loop. | |
7389 */ | |
7390 void MacroAssembler::multiply_128_x_128_loop(Register x_xstart, Register y, Register z, | |
7391 Register yz_idx, Register idx, Register jdx, | |
7392 Register carry, Register product, | |
7393 Register carry2) { | |
7394 // jlong carry, x[], y[], z[]; | |
7395 // int kdx = ystart+1; | |
7396 // for (int idx=ystart-2; idx >= 0; idx -= 2) { // Third loop | |
7397 // huge_128 product = (y[idx+1] * x_xstart) + z[kdx+idx+1] + carry; | |
7398 // z[kdx+idx+1] = (jlong)product; | |
7399 // jlong carry2 = (jlong)(product >>> 64); | |
7400 // product = (y[idx] * x_xstart) + z[kdx+idx] + carry2; | |
7401 // z[kdx+idx] = (jlong)product; | |
7402 // carry = (jlong)(product >>> 64); | |
7403 // } | |
7404 // idx += 2; | |
7405 // if (idx > 0) { | |
7406 // product = (y[idx] * x_xstart) + z[kdx+idx] + carry; | |
7407 // z[kdx+idx] = (jlong)product; | |
7408 // carry = (jlong)(product >>> 64); | |
7409 // } | |
7410 // | |
7411 | |
7412 Label L_third_loop, L_third_loop_exit, L_post_third_loop_done; | |
7413 | |
7414 movl(jdx, idx); | |
7415 andl(jdx, 0xFFFFFFFC); | |
7416 shrl(jdx, 2); | |
7417 | |
7418 bind(L_third_loop); | |
7419 subl(jdx, 1); | |
7420 jcc(Assembler::negative, L_third_loop_exit); | |
7421 subl(idx, 4); | |
7422 | |
7423 multiply_add_128_x_128(x_xstart, y, z, yz_idx, idx, carry, product, 8); | |
7424 movq(carry2, rdx); | |
7425 | |
7426 multiply_add_128_x_128(x_xstart, y, z, yz_idx, idx, carry2, product, 0); | |
7427 movq(carry, rdx); | |
7428 jmp(L_third_loop); | |
7429 | |
7430 bind (L_third_loop_exit); | |
7431 | |
7432 andl (idx, 0x3); | |
7433 jcc(Assembler::zero, L_post_third_loop_done); | |
7434 | |
7435 Label L_check_1; | |
7436 subl(idx, 2); | |
7437 jcc(Assembler::negative, L_check_1); | |
7438 | |
7439 multiply_add_128_x_128(x_xstart, y, z, yz_idx, idx, carry, product, 0); | |
7440 movq(carry, rdx); | |
7441 | |
7442 bind (L_check_1); | |
7443 addl (idx, 0x2); | |
7444 andl (idx, 0x1); | |
7445 subl(idx, 1); | |
7446 jcc(Assembler::negative, L_post_third_loop_done); | |
7447 | |
7448 movl(yz_idx, Address(y, idx, Address::times_4, 0)); | |
7449 movq(product, x_xstart); | |
7450 mulq(yz_idx); // product(rax) * yz_idx -> rdx:product(rax) | |
7451 movl(yz_idx, Address(z, idx, Address::times_4, 0)); | |
7452 | |
7453 add2_with_carry(rdx, product, yz_idx, carry); | |
7454 | |
7455 movl(Address(z, idx, Address::times_4, 0), product); | |
7456 shrq(product, 32); | |
7457 | |
7458 shlq(rdx, 32); | |
7459 orq(product, rdx); | |
7460 movq(carry, product); | |
7461 | |
7462 bind(L_post_third_loop_done); | |
7463 } | |
7464 | |
7465 /** | |
7466 * Multiply 128 bit by 128 bit using BMI2. Unrolled inner loop. | |
7467 * | |
7468 */ | |
7469 void MacroAssembler::multiply_128_x_128_bmi2_loop(Register y, Register z, | |
7470 Register carry, Register carry2, | |
7471 Register idx, Register jdx, | |
7472 Register yz_idx1, Register yz_idx2, | |
7473 Register tmp, Register tmp3, Register tmp4) { | |
7474 assert(UseBMI2Instructions, "should be used only when BMI2 is available"); | |
7475 | |
7476 // jlong carry, x[], y[], z[]; | |
7477 // int kdx = ystart+1; | |
7478 // for (int idx=ystart-2; idx >= 0; idx -= 2) { // Third loop | |
7479 // huge_128 tmp3 = (y[idx+1] * rdx) + z[kdx+idx+1] + carry; | |
7480 // jlong carry2 = (jlong)(tmp3 >>> 64); | |
7481 // huge_128 tmp4 = (y[idx] * rdx) + z[kdx+idx] + carry2; | |
7482 // carry = (jlong)(tmp4 >>> 64); | |
7483 // z[kdx+idx+1] = (jlong)tmp3; | |
7484 // z[kdx+idx] = (jlong)tmp4; | |
7485 // } | |
7486 // idx += 2; | |
7487 // if (idx > 0) { | |
7488 // yz_idx1 = (y[idx] * rdx) + z[kdx+idx] + carry; | |
7489 // z[kdx+idx] = (jlong)yz_idx1; | |
7490 // carry = (jlong)(yz_idx1 >>> 64); | |
7491 // } | |
7492 // | |
7493 | |
7494 Label L_third_loop, L_third_loop_exit, L_post_third_loop_done; | |
7495 | |
7496 movl(jdx, idx); | |
7497 andl(jdx, 0xFFFFFFFC); | |
7498 shrl(jdx, 2); | |
7499 | |
7500 bind(L_third_loop); | |
7501 subl(jdx, 1); | |
7502 jcc(Assembler::negative, L_third_loop_exit); | |
7503 subl(idx, 4); | |
7504 | |
7505 movq(yz_idx1, Address(y, idx, Address::times_4, 8)); | |
7506 rorxq(yz_idx1, yz_idx1, 32); // convert big-endian to little-endian | |
7507 movq(yz_idx2, Address(y, idx, Address::times_4, 0)); | |
7508 rorxq(yz_idx2, yz_idx2, 32); | |
7509 | |
7510 mulxq(tmp4, tmp3, yz_idx1); // yz_idx1 * rdx -> tmp4:tmp3 | |
7511 mulxq(carry2, tmp, yz_idx2); // yz_idx2 * rdx -> carry2:tmp | |
7512 | |
7513 movq(yz_idx1, Address(z, idx, Address::times_4, 8)); | |
7514 rorxq(yz_idx1, yz_idx1, 32); | |
7515 movq(yz_idx2, Address(z, idx, Address::times_4, 0)); | |
7516 rorxq(yz_idx2, yz_idx2, 32); | |
7517 | |
7518 if (VM_Version::supports_adx()) { | |
7519 adcxq(tmp3, carry); | |
7520 adoxq(tmp3, yz_idx1); | |
7521 | |
7522 adcxq(tmp4, tmp); | |
7523 adoxq(tmp4, yz_idx2); | |
7524 | |
7525 movl(carry, 0); // does not affect flags | |
7526 adcxq(carry2, carry); | |
7527 adoxq(carry2, carry); | |
7528 } else { | |
7529 add2_with_carry(tmp4, tmp3, carry, yz_idx1); | |
7530 add2_with_carry(carry2, tmp4, tmp, yz_idx2); | |
7531 } | |
7532 movq(carry, carry2); | |
7533 | |
7534 movl(Address(z, idx, Address::times_4, 12), tmp3); | |
7535 shrq(tmp3, 32); | |
7536 movl(Address(z, idx, Address::times_4, 8), tmp3); | |
7537 | |
7538 movl(Address(z, idx, Address::times_4, 4), tmp4); | |
7539 shrq(tmp4, 32); | |
7540 movl(Address(z, idx, Address::times_4, 0), tmp4); | |
7541 | |
7542 jmp(L_third_loop); | |
7543 | |
7544 bind (L_third_loop_exit); | |
7545 | |
7546 andl (idx, 0x3); | |
7547 jcc(Assembler::zero, L_post_third_loop_done); | |
7548 | |
7549 Label L_check_1; | |
7550 subl(idx, 2); | |
7551 jcc(Assembler::negative, L_check_1); | |
7552 | |
7553 movq(yz_idx1, Address(y, idx, Address::times_4, 0)); | |
7554 rorxq(yz_idx1, yz_idx1, 32); | |
7555 mulxq(tmp4, tmp3, yz_idx1); // yz_idx1 * rdx -> tmp4:tmp3 | |
7556 movq(yz_idx2, Address(z, idx, Address::times_4, 0)); | |
7557 rorxq(yz_idx2, yz_idx2, 32); | |
7558 | |
7559 add2_with_carry(tmp4, tmp3, carry, yz_idx2); | |
7560 | |
7561 movl(Address(z, idx, Address::times_4, 4), tmp3); | |
7562 shrq(tmp3, 32); | |
7563 movl(Address(z, idx, Address::times_4, 0), tmp3); | |
7564 movq(carry, tmp4); | |
7565 | |
7566 bind (L_check_1); | |
7567 addl (idx, 0x2); | |
7568 andl (idx, 0x1); | |
7569 subl(idx, 1); | |
7570 jcc(Assembler::negative, L_post_third_loop_done); | |
7571 movl(tmp4, Address(y, idx, Address::times_4, 0)); | |
7572 mulxq(carry2, tmp3, tmp4); // tmp4 * rdx -> carry2:tmp3 | |
7573 movl(tmp4, Address(z, idx, Address::times_4, 0)); | |
7574 | |
7575 add2_with_carry(carry2, tmp3, tmp4, carry); | |
7576 | |
7577 movl(Address(z, idx, Address::times_4, 0), tmp3); | |
7578 shrq(tmp3, 32); | |
7579 | |
7580 shlq(carry2, 32); | |
7581 orq(tmp3, carry2); | |
7582 movq(carry, tmp3); | |
7583 | |
7584 bind(L_post_third_loop_done); | |
7585 } | |
7586 | |
7587 /** | |
7588 * Code for BigInteger::multiplyToLen() instrinsic. | |
7589 * | |
7590 * rdi: x | |
7591 * rax: xlen | |
7592 * rsi: y | |
7593 * rcx: ylen | |
7594 * r8: z | |
7595 * r11: zlen | |
7596 * r12: tmp1 | |
7597 * r13: tmp2 | |
7598 * r14: tmp3 | |
7599 * r15: tmp4 | |
7600 * rbx: tmp5 | |
7601 * | |
7602 */ | |
7603 void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Register ylen, Register z, Register zlen, | |
7604 Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5) { | |
7605 ShortBranchVerifier sbv(this); | |
7606 assert_different_registers(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, rdx); | |
7607 | |
7608 push(tmp1); | |
7609 push(tmp2); | |
7610 push(tmp3); | |
7611 push(tmp4); | |
7612 push(tmp5); | |
7613 | |
7614 push(xlen); | |
7615 push(zlen); | |
7616 | |
7617 const Register idx = tmp1; | |
7618 const Register kdx = tmp2; | |
7619 const Register xstart = tmp3; | |
7620 | |
7621 const Register y_idx = tmp4; | |
7622 const Register carry = tmp5; | |
7623 const Register product = xlen; | |
7624 const Register x_xstart = zlen; // reuse register | |
7625 | |
7626 // First Loop. | |
7627 // | |
7628 // final static long LONG_MASK = 0xffffffffL; | |
7629 // int xstart = xlen - 1; | |
7630 // int ystart = ylen - 1; | |
7631 // long carry = 0; | |
7632 // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx-, kdx--) { | |
7633 // long product = (y[idx] & LONG_MASK) * (x[xstart] & LONG_MASK) + carry; | |
7634 // z[kdx] = (int)product; | |
7635 // carry = product >>> 32; | |
7636 // } | |
7637 // z[xstart] = (int)carry; | |
7638 // | |
7639 | |
7640 movl(idx, ylen); // idx = ylen; | |
7641 movl(kdx, zlen); // kdx = xlen+ylen; | |
7642 xorq(carry, carry); // carry = 0; | |
7643 | |
7644 Label L_done; | |
7645 | |
7646 movl(xstart, xlen); | |
7647 decrementl(xstart); | |
7648 jcc(Assembler::negative, L_done); | |
7649 | |
7650 multiply_64_x_64_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx); | |
7651 | |
7652 Label L_second_loop; | |
7653 testl(kdx, kdx); | |
7654 jcc(Assembler::zero, L_second_loop); | |
7655 | |
7656 Label L_carry; | |
7657 subl(kdx, 1); | |
7658 jcc(Assembler::zero, L_carry); | |
7659 | |
7660 movl(Address(z, kdx, Address::times_4, 0), carry); | |
7661 shrq(carry, 32); | |
7662 subl(kdx, 1); | |
7663 | |
7664 bind(L_carry); | |
7665 movl(Address(z, kdx, Address::times_4, 0), carry); | |
7666 | |
7667 // Second and third (nested) loops. | |
7668 // | |
7669 // for (int i = xstart-1; i >= 0; i--) { // Second loop | |
7670 // carry = 0; | |
7671 // for (int jdx=ystart, k=ystart+1+i; jdx >= 0; jdx--, k--) { // Third loop | |
7672 // long product = (y[jdx] & LONG_MASK) * (x[i] & LONG_MASK) + | |
7673 // (z[k] & LONG_MASK) + carry; | |
7674 // z[k] = (int)product; | |
7675 // carry = product >>> 32; | |
7676 // } | |
7677 // z[i] = (int)carry; | |
7678 // } | |
7679 // | |
7680 // i = xlen, j = tmp1, k = tmp2, carry = tmp5, x[i] = rdx | |
7681 | |
7682 const Register jdx = tmp1; | |
7683 | |
7684 bind(L_second_loop); | |
7685 xorl(carry, carry); // carry = 0; | |
7686 movl(jdx, ylen); // j = ystart+1 | |
7687 | |
7688 subl(xstart, 1); // i = xstart-1; | |
7689 jcc(Assembler::negative, L_done); | |
7690 | |
7691 push (z); | |
7692 | |
7693 Label L_last_x; | |
7694 lea(z, Address(z, xstart, Address::times_4, 4)); // z = z + k - j | |
7695 subl(xstart, 1); // i = xstart-1; | |
7696 jcc(Assembler::negative, L_last_x); | |
7697 | |
7698 if (UseBMI2Instructions) { | |
7699 movq(rdx, Address(x, xstart, Address::times_4, 0)); | |
7700 rorxq(rdx, rdx, 32); // convert big-endian to little-endian | |
7701 } else { | |
7702 movq(x_xstart, Address(x, xstart, Address::times_4, 0)); | |
7703 rorq(x_xstart, 32); // convert big-endian to little-endian | |
7704 } | |
7705 | |
7706 Label L_third_loop_prologue; | |
7707 bind(L_third_loop_prologue); | |
7708 | |
7709 push (x); | |
7710 push (xstart); | |
7711 push (ylen); | |
7712 | |
7713 | |
7714 if (UseBMI2Instructions) { | |
7715 multiply_128_x_128_bmi2_loop(y, z, carry, x, jdx, ylen, product, tmp2, x_xstart, tmp3, tmp4); | |
7716 } else { // !UseBMI2Instructions | |
7717 multiply_128_x_128_loop(x_xstart, y, z, y_idx, jdx, ylen, carry, product, x); | |
7718 } | |
7719 | |
7720 pop(ylen); | |
7721 pop(xlen); | |
7722 pop(x); | |
7723 pop(z); | |
7724 | |
7725 movl(tmp3, xlen); | |
7726 addl(tmp3, 1); | |
7727 movl(Address(z, tmp3, Address::times_4, 0), carry); | |
7728 subl(tmp3, 1); | |
7729 jccb(Assembler::negative, L_done); | |
7730 | |
7731 shrq(carry, 32); | |
7732 movl(Address(z, tmp3, Address::times_4, 0), carry); | |
7733 jmp(L_second_loop); | |
7734 | |
7735 // Next infrequent code is moved outside loops. | |
7736 bind(L_last_x); | |
7737 if (UseBMI2Instructions) { | |
7738 movl(rdx, Address(x, 0)); | |
7739 } else { | |
7740 movl(x_xstart, Address(x, 0)); | |
7741 } | |
7742 jmp(L_third_loop_prologue); | |
7743 | |
7744 bind(L_done); | |
7745 | |
7746 pop(zlen); | |
7747 pop(xlen); | |
7748 | |
7749 pop(tmp5); | |
7750 pop(tmp4); | |
7751 pop(tmp3); | |
7752 pop(tmp2); | |
7753 pop(tmp1); | |
7754 } | |
7755 #endif | |
7756 | |
7296 /** | 7757 /** |
7297 * Emits code to update CRC-32 with a byte value according to constants in table | 7758 * Emits code to update CRC-32 with a byte value according to constants in table |
7298 * | 7759 * |
7299 * @param [in,out]crc Register containing the crc. | 7760 * @param [in,out]crc Register containing the crc. |
7300 * @param [in]val Register containing the byte to fold into the CRC. | 7761 * @param [in]val Register containing the byte to fold into the CRC. |
7314 | 7775 |
7315 /** | 7776 /** |
7316 * Fold 128-bit data chunk | 7777 * Fold 128-bit data chunk |
7317 */ | 7778 */ |
7318 void MacroAssembler::fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, Register buf, int offset) { | 7779 void MacroAssembler::fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, Register buf, int offset) { |
7319 vpclmulhdq(xtmp, xK, xcrc); // [123:64] | 7780 if (UseAVX > 0) { |
7320 vpclmulldq(xcrc, xK, xcrc); // [63:0] | 7781 vpclmulhdq(xtmp, xK, xcrc); // [123:64] |
7321 vpxor(xcrc, xcrc, Address(buf, offset), false /* vector256 */); | 7782 vpclmulldq(xcrc, xK, xcrc); // [63:0] |
7322 pxor(xcrc, xtmp); | 7783 vpxor(xcrc, xcrc, Address(buf, offset), false /* vector256 */); |
7784 pxor(xcrc, xtmp); | |
7785 } else { | |
7786 movdqa(xtmp, xcrc); | |
7787 pclmulhdq(xtmp, xK); // [123:64] | |
7788 pclmulldq(xcrc, xK); // [63:0] | |
7789 pxor(xcrc, xtmp); | |
7790 movdqu(xtmp, Address(buf, offset)); | |
7791 pxor(xcrc, xtmp); | |
7792 } | |
7323 } | 7793 } |
7324 | 7794 |
7325 void MacroAssembler::fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, XMMRegister xbuf) { | 7795 void MacroAssembler::fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, XMMRegister xbuf) { |
7326 vpclmulhdq(xtmp, xK, xcrc); | 7796 if (UseAVX > 0) { |
7327 vpclmulldq(xcrc, xK, xcrc); | 7797 vpclmulhdq(xtmp, xK, xcrc); |
7328 pxor(xcrc, xbuf); | 7798 vpclmulldq(xcrc, xK, xcrc); |
7329 pxor(xcrc, xtmp); | 7799 pxor(xcrc, xbuf); |
7800 pxor(xcrc, xtmp); | |
7801 } else { | |
7802 movdqa(xtmp, xcrc); | |
7803 pclmulhdq(xtmp, xK); | |
7804 pclmulldq(xcrc, xK); | |
7805 pxor(xcrc, xbuf); | |
7806 pxor(xcrc, xtmp); | |
7807 } | |
7330 } | 7808 } |
7331 | 7809 |
7332 /** | 7810 /** |
7333 * 8-bit folds to compute 32-bit CRC | 7811 * 8-bit folds to compute 32-bit CRC |
7334 * | 7812 * |
7442 jccb(Assembler::greater, L_fold_tail_loop); | 7920 jccb(Assembler::greater, L_fold_tail_loop); |
7443 | 7921 |
7444 // Fold 128 bits in xmm1 down into 32 bits in crc register. | 7922 // Fold 128 bits in xmm1 down into 32 bits in crc register. |
7445 BIND(L_fold_128b); | 7923 BIND(L_fold_128b); |
7446 movdqu(xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_addr())); | 7924 movdqu(xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_addr())); |
7447 vpclmulqdq(xmm2, xmm0, xmm1, 0x1); | 7925 if (UseAVX > 0) { |
7448 vpand(xmm3, xmm0, xmm2, false /* vector256 */); | 7926 vpclmulqdq(xmm2, xmm0, xmm1, 0x1); |
7449 vpclmulqdq(xmm0, xmm0, xmm3, 0x1); | 7927 vpand(xmm3, xmm0, xmm2, false /* vector256 */); |
7928 vpclmulqdq(xmm0, xmm0, xmm3, 0x1); | |
7929 } else { | |
7930 movdqa(xmm2, xmm0); | |
7931 pclmulqdq(xmm2, xmm1, 0x1); | |
7932 movdqa(xmm3, xmm0); | |
7933 pand(xmm3, xmm2); | |
7934 pclmulqdq(xmm0, xmm3, 0x1); | |
7935 } | |
7450 psrldq(xmm1, 8); | 7936 psrldq(xmm1, 8); |
7451 psrldq(xmm2, 4); | 7937 psrldq(xmm2, 4); |
7452 pxor(xmm0, xmm1); | 7938 pxor(xmm0, xmm1); |
7453 pxor(xmm0, xmm2); | 7939 pxor(xmm0, xmm2); |
7454 | 7940 |