Mercurial > hg > graal-compiler
comparison src/cpu/sparc/vm/stubGenerator_sparc.cpp @ 1763:d6f45b55c972
4809552: Optimize Arrays.fill(...)
Reviewed-by: kvn
author | never |
---|---|
date | Fri, 27 Aug 2010 17:33:49 -0700 |
parents | e7ec8cd4dd8a |
children | f353275af40e |
comparison
equal
deleted
inserted
replaced
1731:ee5cc9e78493 | 1763:d6f45b55c972 |
---|---|
1586 __ delayed()->mov(G0, O0); // return 0 | 1586 __ delayed()->mov(G0, O0); // return 0 |
1587 return start; | 1587 return start; |
1588 } | 1588 } |
1589 | 1589 |
1590 // | 1590 // |
1591 // Generate stub for disjoint short fill. If "aligned" is true, the | |
1592 // "to" address is assumed to be heapword aligned. | |
1593 // | |
1594 // Arguments for generated stub: | |
1595 // to: O0 | |
1596 // value: O1 | |
1597 // count: O2 treated as signed | |
1598 // | |
1599 address generate_fill(BasicType t, bool aligned, const char* name) { | |
1600 __ align(CodeEntryAlignment); | |
1601 StubCodeMark mark(this, "StubRoutines", name); | |
1602 address start = __ pc(); | |
1603 | |
1604 const Register to = O0; // source array address | |
1605 const Register value = O1; // fill value | |
1606 const Register count = O2; // elements count | |
1607 // O3 is used as a temp register | |
1608 | |
1609 assert_clean_int(count, O3); // Make sure 'count' is clean int. | |
1610 | |
1611 Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte; | |
1612 Label L_fill_2_bytes, L_fill_4_bytes, L_fill_32_bytes; | |
1613 | |
1614 int shift = -1; | |
1615 switch (t) { | |
1616 case T_BYTE: | |
1617 shift = 2; | |
1618 break; | |
1619 case T_SHORT: | |
1620 shift = 1; | |
1621 break; | |
1622 case T_INT: | |
1623 shift = 0; | |
1624 break; | |
1625 default: ShouldNotReachHere(); | |
1626 } | |
1627 | |
1628 BLOCK_COMMENT("Entry:"); | |
1629 | |
1630 if (t == T_BYTE) { | |
1631 // Zero extend value | |
1632 __ and3(value, 0xff, value); | |
1633 __ sllx(value, 8, O3); | |
1634 __ or3(value, O3, value); | |
1635 } | |
1636 if (t == T_SHORT) { | |
1637 // Zero extend value | |
1638 __ sethi(0xffff0000, O3); | |
1639 __ andn(value, O3, value); | |
1640 } | |
1641 if (t == T_BYTE || t == T_SHORT) { | |
1642 __ sllx(value, 16, O3); | |
1643 __ or3(value, O3, value); | |
1644 } | |
1645 | |
1646 __ cmp(count, 2<<shift); // Short arrays (< 8 bytes) fill by element | |
1647 __ brx(Assembler::lessUnsigned, false, Assembler::pn, L_fill_4_bytes); // use unsigned cmp | |
1648 __ delayed()->andcc(count, 1<<shift, G0); | |
1649 | |
1650 if (!aligned && (t == T_BYTE || t == T_SHORT)) { | |
1651 // align source address at 4 bytes address boundary | |
1652 if (t == T_BYTE) { | |
1653 // One byte misalignment happens only for byte arrays | |
1654 __ andcc(to, 1, G0); | |
1655 __ br(Assembler::zero, false, Assembler::pt, L_skip_align1); | |
1656 __ delayed()->nop(); | |
1657 __ stb(value, to, 0); | |
1658 __ inc(to, 1); | |
1659 __ dec(count, 1); | |
1660 __ BIND(L_skip_align1); | |
1661 } | |
1662 // Two bytes misalignment happens only for byte and short (char) arrays | |
1663 __ andcc(to, 2, G0); | |
1664 __ br(Assembler::zero, false, Assembler::pt, L_skip_align2); | |
1665 __ delayed()->nop(); | |
1666 __ sth(value, to, 0); | |
1667 __ inc(to, 2); | |
1668 __ dec(count, 1 << (shift - 1)); | |
1669 __ BIND(L_skip_align2); | |
1670 } | |
1671 #ifdef _LP64 | |
1672 if (!aligned) { | |
1673 #endif | |
1674 // align to 8 bytes, we know we are 4 byte aligned to start | |
1675 __ andcc(to, 7, G0); | |
1676 __ br(Assembler::zero, false, Assembler::pt, L_fill_32_bytes); | |
1677 __ delayed()->nop(); | |
1678 __ stw(value, to, 0); | |
1679 __ inc(to, 4); | |
1680 __ dec(count, 1 << shift); | |
1681 __ BIND(L_fill_32_bytes); | |
1682 #ifdef _LP64 | |
1683 } | |
1684 #endif | |
1685 | |
1686 Label L_check_fill_8_bytes; | |
1687 // Fill 32-byte chunks | |
1688 __ subcc(count, 8 << shift, count); | |
1689 __ brx(Assembler::less, false, Assembler::pt, L_check_fill_8_bytes); | |
1690 __ delayed()->nop(); | |
1691 | |
1692 if (t == T_INT) { | |
1693 // Zero extend value | |
1694 __ srl(value, 0, value); | |
1695 } | |
1696 if (t == T_BYTE || t == T_SHORT || t == T_INT) { | |
1697 __ sllx(value, 32, O3); | |
1698 __ or3(value, O3, value); | |
1699 } | |
1700 | |
1701 Label L_fill_32_bytes_loop; | |
1702 __ align(16); | |
1703 __ BIND(L_fill_32_bytes_loop); | |
1704 | |
1705 __ stx(value, to, 0); | |
1706 __ stx(value, to, 8); | |
1707 __ stx(value, to, 16); | |
1708 __ stx(value, to, 24); | |
1709 | |
1710 __ subcc(count, 8 << shift, count); | |
1711 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_fill_32_bytes_loop); | |
1712 __ delayed()->add(to, 32, to); | |
1713 | |
1714 __ BIND(L_check_fill_8_bytes); | |
1715 __ addcc(count, 8 << shift, count); | |
1716 __ brx(Assembler::zero, false, Assembler::pn, L_exit); | |
1717 __ delayed()->subcc(count, 1 << (shift + 1), count); | |
1718 __ brx(Assembler::less, false, Assembler::pn, L_fill_4_bytes); | |
1719 __ delayed()->andcc(count, 1<<shift, G0); | |
1720 | |
1721 // | |
1722 // length is too short, just fill 8 bytes at a time | |
1723 // | |
1724 Label L_fill_8_bytes_loop; | |
1725 __ BIND(L_fill_8_bytes_loop); | |
1726 __ stx(value, to, 0); | |
1727 __ subcc(count, 1 << (shift + 1), count); | |
1728 __ brx(Assembler::greaterEqual, false, Assembler::pn, L_fill_8_bytes_loop); | |
1729 __ delayed()->add(to, 8, to); | |
1730 | |
1731 // fill trailing 4 bytes | |
1732 __ andcc(count, 1<<shift, G0); // in delay slot of branches | |
1733 __ BIND(L_fill_4_bytes); | |
1734 __ brx(Assembler::zero, false, Assembler::pt, L_fill_2_bytes); | |
1735 if (t == T_BYTE || t == T_SHORT) { | |
1736 __ delayed()->andcc(count, 1<<(shift-1), G0); | |
1737 } else { | |
1738 __ delayed()->nop(); | |
1739 } | |
1740 __ stw(value, to, 0); | |
1741 if (t == T_BYTE || t == T_SHORT) { | |
1742 __ inc(to, 4); | |
1743 // fill trailing 2 bytes | |
1744 __ andcc(count, 1<<(shift-1), G0); // in delay slot of branches | |
1745 __ BIND(L_fill_2_bytes); | |
1746 __ brx(Assembler::zero, false, Assembler::pt, L_fill_byte); | |
1747 __ delayed()->andcc(count, 1, count); | |
1748 __ sth(value, to, 0); | |
1749 if (t == T_BYTE) { | |
1750 __ inc(to, 2); | |
1751 // fill trailing byte | |
1752 __ andcc(count, 1, count); // in delay slot of branches | |
1753 __ BIND(L_fill_byte); | |
1754 __ brx(Assembler::zero, false, Assembler::pt, L_exit); | |
1755 __ delayed()->nop(); | |
1756 __ stb(value, to, 0); | |
1757 } else { | |
1758 __ BIND(L_fill_byte); | |
1759 } | |
1760 } else { | |
1761 __ BIND(L_fill_2_bytes); | |
1762 } | |
1763 __ BIND(L_exit); | |
1764 __ retl(); | |
1765 __ delayed()->mov(G0, O0); // return 0 | |
1766 return start; | |
1767 } | |
1768 | |
1769 // | |
1591 // Generate stub for conjoint short copy. If "aligned" is true, the | 1770 // Generate stub for conjoint short copy. If "aligned" is true, the |
1592 // "from" and "to" addresses are assumed to be heapword aligned. | 1771 // "from" and "to" addresses are assumed to be heapword aligned. |
1593 // | 1772 // |
1594 // Arguments for generated stub: | 1773 // Arguments for generated stub: |
1595 // from: O0 | 1774 // from: O0 |
2853 StubRoutines::_arrayof_oop_arraycopy = StubRoutines::_oop_arraycopy; | 3032 StubRoutines::_arrayof_oop_arraycopy = StubRoutines::_oop_arraycopy; |
2854 | 3033 |
2855 StubRoutines::_checkcast_arraycopy = generate_checkcast_copy("checkcast_arraycopy"); | 3034 StubRoutines::_checkcast_arraycopy = generate_checkcast_copy("checkcast_arraycopy"); |
2856 StubRoutines::_unsafe_arraycopy = generate_unsafe_copy("unsafe_arraycopy"); | 3035 StubRoutines::_unsafe_arraycopy = generate_unsafe_copy("unsafe_arraycopy"); |
2857 StubRoutines::_generic_arraycopy = generate_generic_copy("generic_arraycopy"); | 3036 StubRoutines::_generic_arraycopy = generate_generic_copy("generic_arraycopy"); |
3037 | |
3038 StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill"); | |
3039 StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill"); | |
3040 StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill"); | |
3041 StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill"); | |
3042 StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill"); | |
3043 StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill"); | |
2858 } | 3044 } |
2859 | 3045 |
2860 void generate_initial() { | 3046 void generate_initial() { |
2861 // Generates all stubs and initializes the entry points | 3047 // Generates all stubs and initializes the entry points |
2862 | 3048 |