comparison src/cpu/sparc/vm/stubGenerator_sparc.cpp @ 1763:d6f45b55c972

4809552: Optimize Arrays.fill(...) Reviewed-by: kvn
author never
date Fri, 27 Aug 2010 17:33:49 -0700
parents e7ec8cd4dd8a
children f353275af40e
comparison
equal deleted inserted replaced
1731:ee5cc9e78493 1763:d6f45b55c972
1586 __ delayed()->mov(G0, O0); // return 0 1586 __ delayed()->mov(G0, O0); // return 0
1587 return start; 1587 return start;
1588 } 1588 }
1589 1589
1590 // 1590 //
1591 // Generate stub for disjoint short fill. If "aligned" is true, the
1592 // "to" address is assumed to be heapword aligned.
1593 //
1594 // Arguments for generated stub:
1595 // to: O0
1596 // value: O1
1597 // count: O2 treated as signed
1598 //
1599 address generate_fill(BasicType t, bool aligned, const char* name) {
1600 __ align(CodeEntryAlignment);
1601 StubCodeMark mark(this, "StubRoutines", name);
1602 address start = __ pc();
1603
1604 const Register to = O0; // source array address
1605 const Register value = O1; // fill value
1606 const Register count = O2; // elements count
1607 // O3 is used as a temp register
1608
1609 assert_clean_int(count, O3); // Make sure 'count' is clean int.
1610
1611 Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte;
1612 Label L_fill_2_bytes, L_fill_4_bytes, L_fill_32_bytes;
1613
1614 int shift = -1;
1615 switch (t) {
1616 case T_BYTE:
1617 shift = 2;
1618 break;
1619 case T_SHORT:
1620 shift = 1;
1621 break;
1622 case T_INT:
1623 shift = 0;
1624 break;
1625 default: ShouldNotReachHere();
1626 }
1627
1628 BLOCK_COMMENT("Entry:");
1629
1630 if (t == T_BYTE) {
1631 // Zero extend value
1632 __ and3(value, 0xff, value);
1633 __ sllx(value, 8, O3);
1634 __ or3(value, O3, value);
1635 }
1636 if (t == T_SHORT) {
1637 // Zero extend value
1638 __ sethi(0xffff0000, O3);
1639 __ andn(value, O3, value);
1640 }
1641 if (t == T_BYTE || t == T_SHORT) {
1642 __ sllx(value, 16, O3);
1643 __ or3(value, O3, value);
1644 }
1645
1646 __ cmp(count, 2<<shift); // Short arrays (< 8 bytes) fill by element
1647 __ brx(Assembler::lessUnsigned, false, Assembler::pn, L_fill_4_bytes); // use unsigned cmp
1648 __ delayed()->andcc(count, 1<<shift, G0);
1649
1650 if (!aligned && (t == T_BYTE || t == T_SHORT)) {
1651 // align source address at 4 bytes address boundary
1652 if (t == T_BYTE) {
1653 // One byte misalignment happens only for byte arrays
1654 __ andcc(to, 1, G0);
1655 __ br(Assembler::zero, false, Assembler::pt, L_skip_align1);
1656 __ delayed()->nop();
1657 __ stb(value, to, 0);
1658 __ inc(to, 1);
1659 __ dec(count, 1);
1660 __ BIND(L_skip_align1);
1661 }
1662 // Two bytes misalignment happens only for byte and short (char) arrays
1663 __ andcc(to, 2, G0);
1664 __ br(Assembler::zero, false, Assembler::pt, L_skip_align2);
1665 __ delayed()->nop();
1666 __ sth(value, to, 0);
1667 __ inc(to, 2);
1668 __ dec(count, 1 << (shift - 1));
1669 __ BIND(L_skip_align2);
1670 }
1671 #ifdef _LP64
1672 if (!aligned) {
1673 #endif
1674 // align to 8 bytes, we know we are 4 byte aligned to start
1675 __ andcc(to, 7, G0);
1676 __ br(Assembler::zero, false, Assembler::pt, L_fill_32_bytes);
1677 __ delayed()->nop();
1678 __ stw(value, to, 0);
1679 __ inc(to, 4);
1680 __ dec(count, 1 << shift);
1681 __ BIND(L_fill_32_bytes);
1682 #ifdef _LP64
1683 }
1684 #endif
1685
1686 Label L_check_fill_8_bytes;
1687 // Fill 32-byte chunks
1688 __ subcc(count, 8 << shift, count);
1689 __ brx(Assembler::less, false, Assembler::pt, L_check_fill_8_bytes);
1690 __ delayed()->nop();
1691
1692 if (t == T_INT) {
1693 // Zero extend value
1694 __ srl(value, 0, value);
1695 }
1696 if (t == T_BYTE || t == T_SHORT || t == T_INT) {
1697 __ sllx(value, 32, O3);
1698 __ or3(value, O3, value);
1699 }
1700
1701 Label L_fill_32_bytes_loop;
1702 __ align(16);
1703 __ BIND(L_fill_32_bytes_loop);
1704
1705 __ stx(value, to, 0);
1706 __ stx(value, to, 8);
1707 __ stx(value, to, 16);
1708 __ stx(value, to, 24);
1709
1710 __ subcc(count, 8 << shift, count);
1711 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_fill_32_bytes_loop);
1712 __ delayed()->add(to, 32, to);
1713
1714 __ BIND(L_check_fill_8_bytes);
1715 __ addcc(count, 8 << shift, count);
1716 __ brx(Assembler::zero, false, Assembler::pn, L_exit);
1717 __ delayed()->subcc(count, 1 << (shift + 1), count);
1718 __ brx(Assembler::less, false, Assembler::pn, L_fill_4_bytes);
1719 __ delayed()->andcc(count, 1<<shift, G0);
1720
1721 //
1722 // length is too short, just fill 8 bytes at a time
1723 //
1724 Label L_fill_8_bytes_loop;
1725 __ BIND(L_fill_8_bytes_loop);
1726 __ stx(value, to, 0);
1727 __ subcc(count, 1 << (shift + 1), count);
1728 __ brx(Assembler::greaterEqual, false, Assembler::pn, L_fill_8_bytes_loop);
1729 __ delayed()->add(to, 8, to);
1730
1731 // fill trailing 4 bytes
1732 __ andcc(count, 1<<shift, G0); // in delay slot of branches
1733 __ BIND(L_fill_4_bytes);
1734 __ brx(Assembler::zero, false, Assembler::pt, L_fill_2_bytes);
1735 if (t == T_BYTE || t == T_SHORT) {
1736 __ delayed()->andcc(count, 1<<(shift-1), G0);
1737 } else {
1738 __ delayed()->nop();
1739 }
1740 __ stw(value, to, 0);
1741 if (t == T_BYTE || t == T_SHORT) {
1742 __ inc(to, 4);
1743 // fill trailing 2 bytes
1744 __ andcc(count, 1<<(shift-1), G0); // in delay slot of branches
1745 __ BIND(L_fill_2_bytes);
1746 __ brx(Assembler::zero, false, Assembler::pt, L_fill_byte);
1747 __ delayed()->andcc(count, 1, count);
1748 __ sth(value, to, 0);
1749 if (t == T_BYTE) {
1750 __ inc(to, 2);
1751 // fill trailing byte
1752 __ andcc(count, 1, count); // in delay slot of branches
1753 __ BIND(L_fill_byte);
1754 __ brx(Assembler::zero, false, Assembler::pt, L_exit);
1755 __ delayed()->nop();
1756 __ stb(value, to, 0);
1757 } else {
1758 __ BIND(L_fill_byte);
1759 }
1760 } else {
1761 __ BIND(L_fill_2_bytes);
1762 }
1763 __ BIND(L_exit);
1764 __ retl();
1765 __ delayed()->mov(G0, O0); // return 0
1766 return start;
1767 }
1768
1769 //
1591 // Generate stub for conjoint short copy. If "aligned" is true, the 1770 // Generate stub for conjoint short copy. If "aligned" is true, the
1592 // "from" and "to" addresses are assumed to be heapword aligned. 1771 // "from" and "to" addresses are assumed to be heapword aligned.
1593 // 1772 //
1594 // Arguments for generated stub: 1773 // Arguments for generated stub:
1595 // from: O0 1774 // from: O0
2853 StubRoutines::_arrayof_oop_arraycopy = StubRoutines::_oop_arraycopy; 3032 StubRoutines::_arrayof_oop_arraycopy = StubRoutines::_oop_arraycopy;
2854 3033
2855 StubRoutines::_checkcast_arraycopy = generate_checkcast_copy("checkcast_arraycopy"); 3034 StubRoutines::_checkcast_arraycopy = generate_checkcast_copy("checkcast_arraycopy");
2856 StubRoutines::_unsafe_arraycopy = generate_unsafe_copy("unsafe_arraycopy"); 3035 StubRoutines::_unsafe_arraycopy = generate_unsafe_copy("unsafe_arraycopy");
2857 StubRoutines::_generic_arraycopy = generate_generic_copy("generic_arraycopy"); 3036 StubRoutines::_generic_arraycopy = generate_generic_copy("generic_arraycopy");
3037
3038 StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill");
3039 StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill");
3040 StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill");
3041 StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill");
3042 StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill");
3043 StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill");
2858 } 3044 }
2859 3045
2860 void generate_initial() { 3046 void generate_initial() {
2861 // Generates all stubs and initializes the entry points 3047 // Generates all stubs and initializes the entry points
2862 3048