comparison src/cpu/x86/vm/stubGenerator_x86_64.cpp @ 113:ba764ed4b6f2

6420645: Create a vm that uses compressed oops for up to 32gb heapsizes Summary: Compressed oops in instances, arrays, and headers. Code contributors are coleenp, phh, never, swamyv Reviewed-by: jmasa, kamg, acorn, tbell, kvn, rasbold
author coleenp
date Sun, 13 Apr 2008 17:43:42 -0400
parents f8236e79048a
children b130b98db9cf
comparison
equal deleted inserted replaced
110:a49a647afe9a 113:ba764ed4b6f2
28 // Declaration and definition of StubGenerator (no .hpp file). 28 // Declaration and definition of StubGenerator (no .hpp file).
29 // For a more detailed description of the stub routine structure 29 // For a more detailed description of the stub routine structure
30 // see the comment in stubRoutines.hpp 30 // see the comment in stubRoutines.hpp
31 31
32 #define __ _masm-> 32 #define __ _masm->
33 #define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8)
33 34
34 #ifdef PRODUCT 35 #ifdef PRODUCT
35 #define BLOCK_COMMENT(str) /* nothing */ 36 #define BLOCK_COMMENT(str) /* nothing */
36 #else 37 #else
37 #define BLOCK_COMMENT(str) __ block_comment(str) 38 #define BLOCK_COMMENT(str) __ block_comment(str)
250 } 251 }
251 #endif 252 #endif
252 253
253 // Load up thread register 254 // Load up thread register
254 __ movq(r15_thread, thread); 255 __ movq(r15_thread, thread);
256 __ reinit_heapbase();
255 257
256 #ifdef ASSERT 258 #ifdef ASSERT
257 // make sure we have no pending exceptions 259 // make sure we have no pending exceptions
258 { 260 {
259 Label L; 261 Label L;
943 __ movptr(c_rarg3, (int64_t) Universe::verify_oop_bits()); 945 __ movptr(c_rarg3, (int64_t) Universe::verify_oop_bits());
944 __ cmpq(c_rarg2, c_rarg3); 946 __ cmpq(c_rarg2, c_rarg3);
945 __ jcc(Assembler::notZero, error); 947 __ jcc(Assembler::notZero, error);
946 948
947 // make sure klass is 'reasonable' 949 // make sure klass is 'reasonable'
948 __ movq(rax, Address(rax, oopDesc::klass_offset_in_bytes())); // get klass 950 __ load_klass(rax, rax); // get klass
949 __ testq(rax, rax); 951 __ testq(rax, rax);
950 __ jcc(Assembler::zero, error); // if klass is NULL it is broken 952 __ jcc(Assembler::zero, error); // if klass is NULL it is broken
951 // Check if the klass is in the right area of memory 953 // Check if the klass is in the right area of memory
952 __ movq(c_rarg2, rax); 954 __ movq(c_rarg2, rax);
953 __ movptr(c_rarg3, (int64_t) Universe::verify_klass_mask()); 955 __ movptr(c_rarg3, (int64_t) Universe::verify_klass_mask());
955 __ movptr(c_rarg3, (int64_t) Universe::verify_klass_bits()); 957 __ movptr(c_rarg3, (int64_t) Universe::verify_klass_bits());
956 __ cmpq(c_rarg2, c_rarg3); 958 __ cmpq(c_rarg2, c_rarg3);
957 __ jcc(Assembler::notZero, error); 959 __ jcc(Assembler::notZero, error);
958 960
959 // make sure klass' klass is 'reasonable' 961 // make sure klass' klass is 'reasonable'
960 __ movq(rax, Address(rax, oopDesc::klass_offset_in_bytes())); 962 __ load_klass(rax, rax);
961 __ testq(rax, rax); 963 __ testq(rax, rax);
962 __ jcc(Assembler::zero, error); // if klass' klass is NULL it is broken 964 __ jcc(Assembler::zero, error); // if klass' klass is NULL it is broken
963 // Check if the klass' klass is in the right area of memory 965 // Check if the klass' klass is in the right area of memory
964 __ movptr(c_rarg3, (int64_t) Universe::verify_klass_mask()); 966 __ movptr(c_rarg3, (int64_t) Universe::verify_klass_mask());
965 __ andq(rax, c_rarg3); 967 __ andq(rax, c_rarg3);
999 __ subq(rsp, frame::arg_reg_save_area_bytes);// windows 1001 __ subq(rsp, frame::arg_reg_save_area_bytes);// windows
1000 __ andq(rsp, -16); // align stack as required by ABI 1002 __ andq(rsp, -16); // align stack as required by ABI
1001 BLOCK_COMMENT("call MacroAssembler::debug"); 1003 BLOCK_COMMENT("call MacroAssembler::debug");
1002 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug))); 1004 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug)));
1003 __ movq(rsp, r12); // restore rsp 1005 __ movq(rsp, r12); // restore rsp
1006 __ reinit_heapbase(); // r12 is heapbase
1004 __ popaq(); // pop registers 1007 __ popaq(); // pop registers
1005 __ ret(3 * wordSize); // pop caller saved stuff 1008 __ ret(3 * wordSize); // pop caller saved stuff
1006 1009
1007 return start; 1010 return start;
1008 } 1011 }
1650 } 1653 }
1651 1654
1652 // Arguments: 1655 // Arguments:
1653 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary 1656 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
1654 // ignored 1657 // ignored
1658 // is_oop - true => oop array, so generate store check code
1655 // name - stub name string 1659 // name - stub name string
1656 // 1660 //
1657 // Inputs: 1661 // Inputs:
1658 // c_rarg0 - source array address 1662 // c_rarg0 - source array address
1659 // c_rarg1 - destination array address 1663 // c_rarg1 - destination array address
1663 // the hardware handle it. The two dwords within qwords that span 1667 // the hardware handle it. The two dwords within qwords that span
1664 // cache line boundaries will still be loaded and stored atomicly. 1668 // cache line boundaries will still be loaded and stored atomicly.
1665 // 1669 //
1666 // Side Effects: 1670 // Side Effects:
1667 // disjoint_int_copy_entry is set to the no-overlap entry point 1671 // disjoint_int_copy_entry is set to the no-overlap entry point
1668 // used by generate_conjoint_int_copy(). 1672 // used by generate_conjoint_int_oop_copy().
1669 // 1673 //
1670 address generate_disjoint_int_copy(bool aligned, const char *name) { 1674 address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, const char *name) {
1671 __ align(CodeEntryAlignment); 1675 __ align(CodeEntryAlignment);
1672 StubCodeMark mark(this, "StubRoutines", name); 1676 StubCodeMark mark(this, "StubRoutines", name);
1673 address start = __ pc(); 1677 address start = __ pc();
1674 1678
1675 Label L_copy_32_bytes, L_copy_8_bytes, L_copy_4_bytes, L_exit; 1679 Label L_copy_32_bytes, L_copy_8_bytes, L_copy_4_bytes, L_exit;
1678 const Register count = rdx; // elements count 1682 const Register count = rdx; // elements count
1679 const Register dword_count = rcx; 1683 const Register dword_count = rcx;
1680 const Register qword_count = count; 1684 const Register qword_count = count;
1681 const Register end_from = from; // source array end address 1685 const Register end_from = from; // source array end address
1682 const Register end_to = to; // destination array end address 1686 const Register end_to = to; // destination array end address
1687 const Register saved_to = r11; // saved destination array address
1683 // End pointers are inclusive, and if count is not zero they point 1688 // End pointers are inclusive, and if count is not zero they point
1684 // to the last unit copied: end_to[0] := end_from[0] 1689 // to the last unit copied: end_to[0] := end_from[0]
1685 1690
1686 __ enter(); // required for proper stackwalking of RuntimeStub frame 1691 __ enter(); // required for proper stackwalking of RuntimeStub frame
1687 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int. 1692 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int.
1688 1693
1689 disjoint_int_copy_entry = __ pc(); 1694 (is_oop ? disjoint_oop_copy_entry : disjoint_int_copy_entry) = __ pc();
1695
1696 if (is_oop) {
1697 // no registers are destroyed by this call
1698 gen_write_ref_array_pre_barrier(/* dest */ c_rarg1, /* count */ c_rarg2);
1699 }
1700
1690 BLOCK_COMMENT("Entry:"); 1701 BLOCK_COMMENT("Entry:");
1691 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) 1702 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1692 1703
1693 setup_arg_regs(); // from => rdi, to => rsi, count => rdx 1704 setup_arg_regs(); // from => rdi, to => rsi, count => rdx
1694 // r9 and r10 may be used to save non-volatile registers 1705 // r9 and r10 may be used to save non-volatile registers
1706
1707 if (is_oop) {
1708 __ movq(saved_to, to);
1709 }
1695 1710
1696 // 'from', 'to' and 'count' are now valid 1711 // 'from', 'to' and 'count' are now valid
1697 __ movq(dword_count, count); 1712 __ movq(dword_count, count);
1698 __ shrq(count, 1); // count => qword_count 1713 __ shrq(count, 1); // count => qword_count
1699 1714
1716 __ jccb(Assembler::zero, L_exit); 1731 __ jccb(Assembler::zero, L_exit);
1717 __ movl(rax, Address(end_from, 8)); 1732 __ movl(rax, Address(end_from, 8));
1718 __ movl(Address(end_to, 8), rax); 1733 __ movl(Address(end_to, 8), rax);
1719 1734
1720 __ BIND(L_exit); 1735 __ BIND(L_exit);
1736 if (is_oop) {
1737 __ leaq(end_to, Address(saved_to, dword_count, Address::times_4, -4));
1738 gen_write_ref_array_post_barrier(saved_to, end_to, rax);
1739 }
1721 inc_counter_np(SharedRuntime::_jint_array_copy_ctr); 1740 inc_counter_np(SharedRuntime::_jint_array_copy_ctr);
1722 restore_arg_regs(); 1741 restore_arg_regs();
1723 __ xorq(rax, rax); // return 0 1742 __ xorq(rax, rax); // return 0
1724 __ leave(); // required for proper stackwalking of RuntimeStub frame 1743 __ leave(); // required for proper stackwalking of RuntimeStub frame
1725 __ ret(0); 1744 __ ret(0);
1732 } 1751 }
1733 1752
1734 // Arguments: 1753 // Arguments:
1735 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary 1754 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
1736 // ignored 1755 // ignored
1756 // is_oop - true => oop array, so generate store check code
1737 // name - stub name string 1757 // name - stub name string
1738 // 1758 //
1739 // Inputs: 1759 // Inputs:
1740 // c_rarg0 - source array address 1760 // c_rarg0 - source array address
1741 // c_rarg1 - destination array address 1761 // c_rarg1 - destination array address
1743 // 1763 //
1744 // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let 1764 // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
1745 // the hardware handle it. The two dwords within qwords that span 1765 // the hardware handle it. The two dwords within qwords that span
1746 // cache line boundaries will still be loaded and stored atomicly. 1766 // cache line boundaries will still be loaded and stored atomicly.
1747 // 1767 //
1748 address generate_conjoint_int_copy(bool aligned, const char *name) { 1768 address generate_conjoint_int_oop_copy(bool aligned, bool is_oop, const char *name) {
1749 __ align(CodeEntryAlignment); 1769 __ align(CodeEntryAlignment);
1750 StubCodeMark mark(this, "StubRoutines", name); 1770 StubCodeMark mark(this, "StubRoutines", name);
1751 address start = __ pc(); 1771 address start = __ pc();
1752 1772
1753 Label L_copy_32_bytes, L_copy_8_bytes, L_copy_2_bytes; 1773 Label L_copy_32_bytes, L_copy_8_bytes, L_copy_2_bytes, L_exit;
1754 const Register from = rdi; // source array address 1774 const Register from = rdi; // source array address
1755 const Register to = rsi; // destination array address 1775 const Register to = rsi; // destination array address
1756 const Register count = rdx; // elements count 1776 const Register count = rdx; // elements count
1757 const Register dword_count = rcx; 1777 const Register dword_count = rcx;
1758 const Register qword_count = count; 1778 const Register qword_count = count;
1759 1779
1760 __ enter(); // required for proper stackwalking of RuntimeStub frame 1780 __ enter(); // required for proper stackwalking of RuntimeStub frame
1761 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int. 1781 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int.
1762 1782
1763 int_copy_entry = __ pc(); 1783 if (is_oop) {
1784 // no registers are destroyed by this call
1785 gen_write_ref_array_pre_barrier(/* dest */ c_rarg1, /* count */ c_rarg2);
1786 }
1787
1788 (is_oop ? oop_copy_entry : int_copy_entry) = __ pc();
1764 BLOCK_COMMENT("Entry:"); 1789 BLOCK_COMMENT("Entry:");
1765 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) 1790 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1766 1791
1767 array_overlap_test(disjoint_int_copy_entry, Address::times_4); 1792 array_overlap_test(is_oop ? disjoint_oop_copy_entry : disjoint_int_copy_entry,
1793 Address::times_4);
1768 setup_arg_regs(); // from => rdi, to => rsi, count => rdx 1794 setup_arg_regs(); // from => rdi, to => rsi, count => rdx
1769 // r9 and r10 may be used to save non-volatile registers 1795 // r9 and r10 may be used to save non-volatile registers
1770 1796
1797 assert_clean_int(count, rax); // Make sure 'count' is clean int.
1771 // 'from', 'to' and 'count' are now valid 1798 // 'from', 'to' and 'count' are now valid
1772 __ movq(dword_count, count); 1799 __ movq(dword_count, count);
1773 __ shrq(count, 1); // count => qword_count 1800 __ shrq(count, 1); // count => qword_count
1774 1801
1775 // Copy from high to low addresses. Use 'to' as scratch. 1802 // Copy from high to low addresses. Use 'to' as scratch.
1787 __ movq(Address(to, qword_count, Address::times_8, -8), rax); 1814 __ movq(Address(to, qword_count, Address::times_8, -8), rax);
1788 __ decrementq(qword_count); 1815 __ decrementq(qword_count);
1789 __ jcc(Assembler::notZero, L_copy_8_bytes); 1816 __ jcc(Assembler::notZero, L_copy_8_bytes);
1790 1817
1791 inc_counter_np(SharedRuntime::_jint_array_copy_ctr); 1818 inc_counter_np(SharedRuntime::_jint_array_copy_ctr);
1819 if (is_oop) {
1820 __ jmp(L_exit);
1821 }
1792 restore_arg_regs(); 1822 restore_arg_regs();
1793 __ xorq(rax, rax); // return 0 1823 __ xorq(rax, rax); // return 0
1794 __ leave(); // required for proper stackwalking of RuntimeStub frame 1824 __ leave(); // required for proper stackwalking of RuntimeStub frame
1795 __ ret(0); 1825 __ ret(0);
1796 1826
1797 // Copy in 32-bytes chunks 1827 // Copy in 32-bytes chunks
1798 copy_32_bytes_backward(from, to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes); 1828 copy_32_bytes_backward(from, to, qword_count, rax, L_copy_32_bytes, L_copy_8_bytes);
1799 1829
1800 inc_counter_np(SharedRuntime::_jint_array_copy_ctr); 1830 inc_counter_np(SharedRuntime::_jint_array_copy_ctr);
1831 __ bind(L_exit);
1832 if (is_oop) {
1833 Register end_to = rdx;
1834 __ leaq(end_to, Address(to, dword_count, Address::times_4, -4));
1835 gen_write_ref_array_post_barrier(to, end_to, rax);
1836 }
1801 restore_arg_regs(); 1837 restore_arg_regs();
1802 __ xorq(rax, rax); // return 0 1838 __ xorq(rax, rax); // return 0
1803 __ leave(); // required for proper stackwalking of RuntimeStub frame 1839 __ leave(); // required for proper stackwalking of RuntimeStub frame
1804 __ ret(0); 1840 __ ret(0);
1805 1841
1815 // Inputs: 1851 // Inputs:
1816 // c_rarg0 - source array address 1852 // c_rarg0 - source array address
1817 // c_rarg1 - destination array address 1853 // c_rarg1 - destination array address
1818 // c_rarg2 - element count, treated as ssize_t, can be zero 1854 // c_rarg2 - element count, treated as ssize_t, can be zero
1819 // 1855 //
1820 // Side Effects: 1856 // Side Effects:
1821 // disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the 1857 // disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the
1822 // no-overlap entry point used by generate_conjoint_long_oop_copy(). 1858 // no-overlap entry point used by generate_conjoint_long_oop_copy().
1823 // 1859 //
1824 address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, const char *name) { 1860 address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, const char *name) {
1825 __ align(CodeEntryAlignment); 1861 __ align(CodeEntryAlignment);
1855 1891
1856 // 'from', 'to' and 'qword_count' are now valid 1892 // 'from', 'to' and 'qword_count' are now valid
1857 1893
1858 // Copy from low to high addresses. Use 'to' as scratch. 1894 // Copy from low to high addresses. Use 'to' as scratch.
1859 __ leaq(end_from, Address(from, qword_count, Address::times_8, -8)); 1895 __ leaq(end_from, Address(from, qword_count, Address::times_8, -8));
1860 __ leaq(end_to, Address(to, qword_count, Address::times_8, -8)); 1896 __ leaq(end_to, Address(to, qword_count, Address::times_8, -8));
1861 __ negq(qword_count); 1897 __ negq(qword_count);
1862 __ jmp(L_copy_32_bytes); 1898 __ jmp(L_copy_32_bytes);
1863 1899
1864 // Copy trailing qwords 1900 // Copy trailing qwords
1865 __ BIND(L_copy_8_bytes); 1901 __ BIND(L_copy_8_bytes);
1921 __ enter(); // required for proper stackwalking of RuntimeStub frame 1957 __ enter(); // required for proper stackwalking of RuntimeStub frame
1922 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int. 1958 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int.
1923 1959
1924 address disjoint_copy_entry = NULL; 1960 address disjoint_copy_entry = NULL;
1925 if (is_oop) { 1961 if (is_oop) {
1962 assert(!UseCompressedOops, "shouldn't be called for compressed oops");
1926 disjoint_copy_entry = disjoint_oop_copy_entry; 1963 disjoint_copy_entry = disjoint_oop_copy_entry;
1927 oop_copy_entry = __ pc(); 1964 oop_copy_entry = __ pc();
1965 array_overlap_test(disjoint_oop_copy_entry, Address::times_8);
1928 } else { 1966 } else {
1929 disjoint_copy_entry = disjoint_long_copy_entry; 1967 disjoint_copy_entry = disjoint_long_copy_entry;
1930 long_copy_entry = __ pc(); 1968 long_copy_entry = __ pc();
1969 array_overlap_test(disjoint_long_copy_entry, Address::times_8);
1931 } 1970 }
1932 BLOCK_COMMENT("Entry:"); 1971 BLOCK_COMMENT("Entry:");
1933 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) 1972 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1934 1973
1935 array_overlap_test(disjoint_copy_entry, Address::times_8); 1974 array_overlap_test(disjoint_copy_entry, Address::times_8);
1942 // Save to and count for store barrier 1981 // Save to and count for store barrier
1943 __ movq(saved_count, qword_count); 1982 __ movq(saved_count, qword_count);
1944 // No registers are destroyed by this call 1983 // No registers are destroyed by this call
1945 gen_write_ref_array_pre_barrier(to, saved_count); 1984 gen_write_ref_array_pre_barrier(to, saved_count);
1946 } 1985 }
1947
1948 // Copy from high to low addresses. Use rcx as scratch.
1949 1986
1950 __ jmp(L_copy_32_bytes); 1987 __ jmp(L_copy_32_bytes);
1951 1988
1952 // Copy trailing qwords 1989 // Copy trailing qwords
1953 __ BIND(L_copy_8_bytes); 1990 __ BIND(L_copy_8_bytes);
2036 // Skip to start of data. 2073 // Skip to start of data.
2037 __ addq(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); 2074 __ addq(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
2038 // Scan rcx words at [rdi] for occurance of rax 2075 // Scan rcx words at [rdi] for occurance of rax
2039 // Set NZ/Z based on last compare 2076 // Set NZ/Z based on last compare
2040 __ movq(rax, super_klass); 2077 __ movq(rax, super_klass);
2041 __ repne_scan(); 2078 if (UseCompressedOops) {
2079 // Compare against compressed form. Don't need to uncompress because
2080 // looks like orig rax is restored in popq below.
2081 __ encode_heap_oop(rax);
2082 __ repne_scanl();
2083 } else {
2084 __ repne_scanq();
2085 }
2042 2086
2043 // Unspill the temp. registers: 2087 // Unspill the temp. registers:
2044 __ popq(rdi); 2088 __ popq(rdi);
2045 __ popq(rcx); 2089 __ popq(rcx);
2046 __ popq(rax); 2090 __ popq(rax);
2113 2157
2114 #ifdef ASSERT 2158 #ifdef ASSERT
2115 // caller guarantees that the arrays really are different 2159 // caller guarantees that the arrays really are different
2116 // otherwise, we would have to make conjoint checks 2160 // otherwise, we would have to make conjoint checks
2117 { Label L; 2161 { Label L;
2118 array_overlap_test(L, Address::times_8); 2162 array_overlap_test(L, TIMES_OOP);
2119 __ stop("checkcast_copy within a single array"); 2163 __ stop("checkcast_copy within a single array");
2120 __ bind(L); 2164 __ bind(L);
2121 } 2165 }
2122 #endif //ASSERT 2166 #endif //ASSERT
2123 2167
2158 __ bind(L); 2202 __ bind(L);
2159 } 2203 }
2160 #endif //ASSERT 2204 #endif //ASSERT
2161 2205
2162 // Loop-invariant addresses. They are exclusive end pointers. 2206 // Loop-invariant addresses. They are exclusive end pointers.
2163 Address end_from_addr(from, length, Address::times_8, 0); 2207 Address end_from_addr(from, length, TIMES_OOP, 0);
2164 Address end_to_addr(to, length, Address::times_8, 0); 2208 Address end_to_addr(to, length, TIMES_OOP, 0);
2165 // Loop-variant addresses. They assume post-incremented count < 0. 2209 // Loop-variant addresses. They assume post-incremented count < 0.
2166 Address from_element_addr(end_from, count, Address::times_8, 0); 2210 Address from_element_addr(end_from, count, TIMES_OOP, 0);
2167 Address to_element_addr(end_to, count, Address::times_8, 0); 2211 Address to_element_addr(end_to, count, TIMES_OOP, 0);
2168 Address oop_klass_addr(rax_oop, oopDesc::klass_offset_in_bytes());
2169 2212
2170 gen_write_ref_array_pre_barrier(to, count); 2213 gen_write_ref_array_pre_barrier(to, count);
2171 2214
2172 // Copy from low to high addresses, indexed from the end of each array. 2215 // Copy from low to high addresses, indexed from the end of each array.
2173 __ leaq(end_from, end_from_addr); 2216 __ leaq(end_from, end_from_addr);
2187 // for (count = -count; count != 0; count++) 2230 // for (count = -count; count != 0; count++)
2188 // Base pointers src, dst are biased by 8*(count-1),to last element. 2231 // Base pointers src, dst are biased by 8*(count-1),to last element.
2189 __ align(16); 2232 __ align(16);
2190 2233
2191 __ BIND(L_store_element); 2234 __ BIND(L_store_element);
2192 __ movq(to_element_addr, rax_oop); // store the oop 2235 __ store_heap_oop(to_element_addr, rax_oop); // store the oop
2193 __ incrementq(count); // increment the count toward zero 2236 __ incrementq(count); // increment the count toward zero
2194 __ jcc(Assembler::zero, L_do_card_marks); 2237 __ jcc(Assembler::zero, L_do_card_marks);
2195 2238
2196 // ======== loop entry is here ======== 2239 // ======== loop entry is here ========
2197 __ BIND(L_load_element); 2240 __ BIND(L_load_element);
2198 __ movq(rax_oop, from_element_addr); // load the oop 2241 __ load_heap_oop(rax_oop, from_element_addr); // load the oop
2199 __ testq(rax_oop, rax_oop); 2242 __ testq(rax_oop, rax_oop);
2200 __ jcc(Assembler::zero, L_store_element); 2243 __ jcc(Assembler::zero, L_store_element);
2201 2244
2202 __ movq(r11_klass, oop_klass_addr); // query the object klass 2245 __ load_klass(r11_klass, rax_oop);// query the object klass
2203 generate_type_check(r11_klass, ckoff, ckval, L_store_element); 2246 generate_type_check(r11_klass, ckoff, ckval, L_store_element);
2204 // ======== end loop ======== 2247 // ======== end loop ========
2205 2248
2206 // It was a real error; we must depend on the caller to finish the job. 2249 // It was a real error; we must depend on the caller to finish the job.
2207 // Register rdx = -1 * number of *remaining* oops, r14 = *total* oops. 2250 // Register rdx = -1 * number of *remaining* oops, r14 = *total* oops.
2423 guarantee(((j1off ^ j4off) & ~15) != 0, "I$ line of 1st & 4th jumps"); 2466 guarantee(((j1off ^ j4off) & ~15) != 0, "I$ line of 1st & 4th jumps");
2424 2467
2425 // registers used as temp 2468 // registers used as temp
2426 const Register r11_length = r11; // elements count to copy 2469 const Register r11_length = r11; // elements count to copy
2427 const Register r10_src_klass = r10; // array klass 2470 const Register r10_src_klass = r10; // array klass
2471 const Register r9_dst_klass = r9; // dest array klass
2428 2472
2429 // if (length < 0) return -1; 2473 // if (length < 0) return -1;
2430 __ movl(r11_length, C_RARG4); // length (elements count, 32-bits value) 2474 __ movl(r11_length, C_RARG4); // length (elements count, 32-bits value)
2431 __ testl(r11_length, r11_length); 2475 __ testl(r11_length, r11_length);
2432 __ jccb(Assembler::negative, L_failed_0); 2476 __ jccb(Assembler::negative, L_failed_0);
2433 2477
2434 Address src_klass_addr(src, oopDesc::klass_offset_in_bytes()); 2478 __ load_klass(r10_src_klass, src);
2435 Address dst_klass_addr(dst, oopDesc::klass_offset_in_bytes());
2436 __ movq(r10_src_klass, src_klass_addr);
2437 #ifdef ASSERT 2479 #ifdef ASSERT
2438 // assert(src->klass() != NULL); 2480 // assert(src->klass() != NULL);
2439 BLOCK_COMMENT("assert klasses not null"); 2481 BLOCK_COMMENT("assert klasses not null");
2440 { Label L1, L2; 2482 { Label L1, L2;
2441 __ testq(r10_src_klass, r10_src_klass); 2483 __ testq(r10_src_klass, r10_src_klass);
2442 __ jcc(Assembler::notZero, L2); // it is broken if klass is NULL 2484 __ jcc(Assembler::notZero, L2); // it is broken if klass is NULL
2443 __ bind(L1); 2485 __ bind(L1);
2444 __ stop("broken null klass"); 2486 __ stop("broken null klass");
2445 __ bind(L2); 2487 __ bind(L2);
2446 __ cmpq(dst_klass_addr, 0); 2488 __ load_klass(r9_dst_klass, dst);
2489 __ cmpq(r9_dst_klass, 0);
2447 __ jcc(Assembler::equal, L1); // this would be broken also 2490 __ jcc(Assembler::equal, L1); // this would be broken also
2448 BLOCK_COMMENT("assert done"); 2491 BLOCK_COMMENT("assert done");
2449 } 2492 }
2450 #endif 2493 #endif
2451 2494
2468 jint objArray_lh = Klass::array_layout_helper(T_OBJECT); 2511 jint objArray_lh = Klass::array_layout_helper(T_OBJECT);
2469 __ cmpl(rax_lh, objArray_lh); 2512 __ cmpl(rax_lh, objArray_lh);
2470 __ jcc(Assembler::equal, L_objArray); 2513 __ jcc(Assembler::equal, L_objArray);
2471 2514
2472 // if (src->klass() != dst->klass()) return -1; 2515 // if (src->klass() != dst->klass()) return -1;
2473 __ cmpq(r10_src_klass, dst_klass_addr); 2516 __ load_klass(r9_dst_klass, dst);
2517 __ cmpq(r10_src_klass, r9_dst_klass);
2474 __ jcc(Assembler::notEqual, L_failed); 2518 __ jcc(Assembler::notEqual, L_failed);
2475 2519
2476 // if (!src->is_Array()) return -1; 2520 // if (!src->is_Array()) return -1;
2477 __ cmpl(rax_lh, Klass::_lh_neutral_value); 2521 __ cmpl(rax_lh, Klass::_lh_neutral_value);
2478 __ jcc(Assembler::greaterEqual, L_failed); 2522 __ jcc(Assembler::greaterEqual, L_failed);
2557 __ BIND(L_objArray); 2601 __ BIND(L_objArray);
2558 // live at this point: r10_src_klass, src[_pos], dst[_pos] 2602 // live at this point: r10_src_klass, src[_pos], dst[_pos]
2559 2603
2560 Label L_plain_copy, L_checkcast_copy; 2604 Label L_plain_copy, L_checkcast_copy;
2561 // test array classes for subtyping 2605 // test array classes for subtyping
2562 __ cmpq(r10_src_klass, dst_klass_addr); // usual case is exact equality 2606 __ load_klass(r9_dst_klass, dst);
2607 __ cmpq(r10_src_klass, r9_dst_klass); // usual case is exact equality
2563 __ jcc(Assembler::notEqual, L_checkcast_copy); 2608 __ jcc(Assembler::notEqual, L_checkcast_copy);
2564 2609
2565 // Identically typed arrays can be copied without element-wise checks. 2610 // Identically typed arrays can be copied without element-wise checks.
2566 arraycopy_range_checks(src, src_pos, dst, dst_pos, r11_length, 2611 arraycopy_range_checks(src, src_pos, dst, dst_pos, r11_length,
2567 r10, L_failed); 2612 r10, L_failed);
2568 2613
2569 __ leaq(from, Address(src, src_pos, Address::times_8, 2614 __ leaq(from, Address(src, src_pos, TIMES_OOP,
2570 arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // src_addr 2615 arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // src_addr
2571 __ leaq(to, Address(dst, dst_pos, Address::times_8, 2616 __ leaq(to, Address(dst, dst_pos, TIMES_OOP,
2572 arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // dst_addr 2617 arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // dst_addr
2573 __ movslq(count, r11_length); // length 2618 __ movslq(count, r11_length); // length
2574 __ BIND(L_plain_copy); 2619 __ BIND(L_plain_copy);
2575 __ jump(RuntimeAddress(oop_copy_entry)); 2620 __ jump(RuntimeAddress(oop_copy_entry));
2576 2621
2577 __ BIND(L_checkcast_copy); 2622 __ BIND(L_checkcast_copy);
2578 // live at this point: r10_src_klass, !r11_length 2623 // live at this point: r10_src_klass, !r11_length
2579 { 2624 {
2580 // assert(r11_length == C_RARG4); // will reload from here 2625 // assert(r11_length == C_RARG4); // will reload from here
2581 Register r11_dst_klass = r11; 2626 Register r11_dst_klass = r11;
2582 __ movq(r11_dst_klass, dst_klass_addr); 2627 __ load_klass(r11_dst_klass, dst);
2583 2628
2584 // Before looking at dst.length, make sure dst is also an objArray. 2629 // Before looking at dst.length, make sure dst is also an objArray.
2585 __ cmpl(Address(r11_dst_klass, lh_offset), objArray_lh); 2630 __ cmpl(Address(r11_dst_klass, lh_offset), objArray_lh);
2586 __ jcc(Assembler::notEqual, L_failed); 2631 __ jcc(Assembler::notEqual, L_failed);
2587 2632
2591 rax, L_failed); 2636 rax, L_failed);
2592 #else 2637 #else
2593 __ movl(r11_length, C_RARG4); // reload 2638 __ movl(r11_length, C_RARG4); // reload
2594 arraycopy_range_checks(src, src_pos, dst, dst_pos, r11_length, 2639 arraycopy_range_checks(src, src_pos, dst, dst_pos, r11_length,
2595 rax, L_failed); 2640 rax, L_failed);
2596 __ movl(r11_dst_klass, dst_klass_addr); // reload 2641 __ load_klass(r11_dst_klass, dst); // reload
2597 #endif 2642 #endif
2598 2643
2599 // Marshal the base address arguments now, freeing registers. 2644 // Marshal the base address arguments now, freeing registers.
2600 __ leaq(from, Address(src, src_pos, Address::times_8, 2645 __ leaq(from, Address(src, src_pos, TIMES_OOP,
2601 arrayOopDesc::base_offset_in_bytes(T_OBJECT))); 2646 arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
2602 __ leaq(to, Address(dst, dst_pos, Address::times_8, 2647 __ leaq(to, Address(dst, dst_pos, TIMES_OOP,
2603 arrayOopDesc::base_offset_in_bytes(T_OBJECT))); 2648 arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
2604 __ movl(count, C_RARG4); // length (reloaded) 2649 __ movl(count, C_RARG4); // length (reloaded)
2605 Register sco_temp = c_rarg3; // this register is free now 2650 Register sco_temp = c_rarg3; // this register is free now
2606 assert_different_registers(from, to, count, sco_temp, 2651 assert_different_registers(from, to, count, sco_temp,
2607 r11_dst_klass, r10_src_klass); 2652 r11_dst_klass, r10_src_klass);
2646 StubRoutines::_jbyte_arraycopy = generate_conjoint_byte_copy(false, "jbyte_arraycopy"); 2691 StubRoutines::_jbyte_arraycopy = generate_conjoint_byte_copy(false, "jbyte_arraycopy");
2647 2692
2648 StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, "jshort_disjoint_arraycopy"); 2693 StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, "jshort_disjoint_arraycopy");
2649 StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, "jshort_arraycopy"); 2694 StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, "jshort_arraycopy");
2650 2695
2651 StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_copy(false, "jint_disjoint_arraycopy"); 2696 StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, false, "jint_disjoint_arraycopy");
2652 StubRoutines::_jint_arraycopy = generate_conjoint_int_copy(false, "jint_arraycopy"); 2697 StubRoutines::_jint_arraycopy = generate_conjoint_int_oop_copy(false, false, "jint_arraycopy");
2653 2698
2654 StubRoutines::_jlong_disjoint_arraycopy = generate_disjoint_long_oop_copy(false, false, "jlong_disjoint_arraycopy"); 2699 StubRoutines::_jlong_disjoint_arraycopy = generate_disjoint_long_oop_copy(false, false, "jlong_disjoint_arraycopy");
2655 StubRoutines::_jlong_arraycopy = generate_conjoint_long_oop_copy(false, false, "jlong_arraycopy"); 2700 StubRoutines::_jlong_arraycopy = generate_conjoint_long_oop_copy(false, false, "jlong_arraycopy");
2656 2701
2657 StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_long_oop_copy(false, true, "oop_disjoint_arraycopy"); 2702
2658 StubRoutines::_oop_arraycopy = generate_conjoint_long_oop_copy(false, true, "oop_arraycopy"); 2703 if (UseCompressedOops) {
2704 StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, true, "oop_disjoint_arraycopy");
2705 StubRoutines::_oop_arraycopy = generate_conjoint_int_oop_copy(false, true, "oop_arraycopy");
2706 } else {
2707 StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_long_oop_copy(false, true, "oop_disjoint_arraycopy");
2708 StubRoutines::_oop_arraycopy = generate_conjoint_long_oop_copy(false, true, "oop_arraycopy");
2709 }
2659 2710
2660 StubRoutines::_checkcast_arraycopy = generate_checkcast_copy("checkcast_arraycopy"); 2711 StubRoutines::_checkcast_arraycopy = generate_checkcast_copy("checkcast_arraycopy");
2661 StubRoutines::_unsafe_arraycopy = generate_unsafe_copy("unsafe_arraycopy"); 2712 StubRoutines::_unsafe_arraycopy = generate_unsafe_copy("unsafe_arraycopy");
2662 StubRoutines::_generic_arraycopy = generate_generic_copy("generic_arraycopy"); 2713 StubRoutines::_generic_arraycopy = generate_generic_copy("generic_arraycopy");
2663 2714