comparison src/cpu/x86/vm/stubGenerator_x86_64.cpp @ 2324:0ac769a57c64

6627983: G1: Bad oop deference during marking Summary: Bulk zeroing reduction didn't work with G1, because arraycopy would call pre-barriers on uninitialized oops. The solution is to have version of arraycopy stubs that don't have pre-barriers. Also refactored arraycopy stubs generation on SPARC to be more readable and reduced the number of stubs necessary in some cases. Reviewed-by: jrose, kvn, never
author iveresov
date Tue, 01 Mar 2011 14:56:48 -0800
parents d89a22843c62
children b1c22848507b
comparison
equal deleted inserted replaced
2323:bc6b27fb3568 2324:0ac769a57c64
1157 } 1157 }
1158 1158
1159 // Generate code for an array write pre barrier 1159 // Generate code for an array write pre barrier
1160 // 1160 //
1161 // addr - starting address 1161 // addr - starting address
1162 // count - element count 1162 // count - element count
1163 // tmp - scratch register
1163 // 1164 //
1164 // Destroy no registers! 1165 // Destroy no registers!
1165 // 1166 //
1166 void gen_write_ref_array_pre_barrier(Register addr, Register count) { 1167 void gen_write_ref_array_pre_barrier(Register addr, Register count, bool dest_uninitialized) {
1167 BarrierSet* bs = Universe::heap()->barrier_set(); 1168 BarrierSet* bs = Universe::heap()->barrier_set();
1168 switch (bs->kind()) { 1169 switch (bs->kind()) {
1169 case BarrierSet::G1SATBCT: 1170 case BarrierSet::G1SATBCT:
1170 case BarrierSet::G1SATBCTLogging: 1171 case BarrierSet::G1SATBCTLogging:
1171 { 1172 // With G1, don't generate the call if we statically know that the target in uninitialized
1172 __ pusha(); // push registers 1173 if (!dest_uninitialized) {
1173 if (count == c_rarg0) { 1174 __ pusha(); // push registers
1174 if (addr == c_rarg1) { 1175 if (count == c_rarg0) {
1175 // exactly backwards!! 1176 if (addr == c_rarg1) {
1176 __ xchgptr(c_rarg1, c_rarg0); 1177 // exactly backwards!!
1177 } else { 1178 __ xchgptr(c_rarg1, c_rarg0);
1178 __ movptr(c_rarg1, count); 1179 } else {
1179 __ movptr(c_rarg0, addr); 1180 __ movptr(c_rarg1, count);
1180 } 1181 __ movptr(c_rarg0, addr);
1181 1182 }
1182 } else { 1183 } else {
1183 __ movptr(c_rarg0, addr); 1184 __ movptr(c_rarg0, addr);
1184 __ movptr(c_rarg1, count); 1185 __ movptr(c_rarg1, count);
1185 } 1186 }
1186 __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre), 2); 1187 __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre), 2);
1187 __ popa(); 1188 __ popa();
1188 } 1189 }
1189 break; 1190 break;
1190 case BarrierSet::CardTableModRef: 1191 case BarrierSet::CardTableModRef:
1191 case BarrierSet::CardTableExtension: 1192 case BarrierSet::CardTableExtension:
1192 case BarrierSet::ModRef: 1193 case BarrierSet::ModRef:
1193 break; 1194 break;
1194 default: 1195 default:
1767 // 1768 //
1768 // Side Effects: 1769 // Side Effects:
1769 // disjoint_int_copy_entry is set to the no-overlap entry point 1770 // disjoint_int_copy_entry is set to the no-overlap entry point
1770 // used by generate_conjoint_int_oop_copy(). 1771 // used by generate_conjoint_int_oop_copy().
1771 // 1772 //
1772 address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, address* entry, const char *name) { 1773 address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, address* entry,
1774 const char *name, bool dest_uninitialized = false) {
1773 __ align(CodeEntryAlignment); 1775 __ align(CodeEntryAlignment);
1774 StubCodeMark mark(this, "StubRoutines", name); 1776 StubCodeMark mark(this, "StubRoutines", name);
1775 address start = __ pc(); 1777 address start = __ pc();
1776 1778
1777 Label L_copy_32_bytes, L_copy_8_bytes, L_copy_4_bytes, L_exit; 1779 Label L_copy_32_bytes, L_copy_8_bytes, L_copy_4_bytes, L_exit;
1797 1799
1798 setup_arg_regs(); // from => rdi, to => rsi, count => rdx 1800 setup_arg_regs(); // from => rdi, to => rsi, count => rdx
1799 // r9 and r10 may be used to save non-volatile registers 1801 // r9 and r10 may be used to save non-volatile registers
1800 if (is_oop) { 1802 if (is_oop) {
1801 __ movq(saved_to, to); 1803 __ movq(saved_to, to);
1802 gen_write_ref_array_pre_barrier(to, count); 1804 gen_write_ref_array_pre_barrier(to, count, dest_uninitialized);
1803 } 1805 }
1804 1806
1805 // 'from', 'to' and 'count' are now valid 1807 // 'from', 'to' and 'count' are now valid
1806 __ movptr(dword_count, count); 1808 __ movptr(dword_count, count);
1807 __ shrptr(count, 1); // count => qword_count 1809 __ shrptr(count, 1); // count => qword_count
1858 // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let 1860 // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
1859 // the hardware handle it. The two dwords within qwords that span 1861 // the hardware handle it. The two dwords within qwords that span
1860 // cache line boundaries will still be loaded and stored atomicly. 1862 // cache line boundaries will still be loaded and stored atomicly.
1861 // 1863 //
1862 address generate_conjoint_int_oop_copy(bool aligned, bool is_oop, address nooverlap_target, 1864 address generate_conjoint_int_oop_copy(bool aligned, bool is_oop, address nooverlap_target,
1863 address *entry, const char *name) { 1865 address *entry, const char *name,
1866 bool dest_uninitialized = false) {
1864 __ align(CodeEntryAlignment); 1867 __ align(CodeEntryAlignment);
1865 StubCodeMark mark(this, "StubRoutines", name); 1868 StubCodeMark mark(this, "StubRoutines", name);
1866 address start = __ pc(); 1869 address start = __ pc();
1867 1870
1868 Label L_copy_32_bytes, L_copy_8_bytes, L_copy_2_bytes, L_exit; 1871 Label L_copy_32_bytes, L_copy_8_bytes, L_copy_2_bytes, L_exit;
1885 setup_arg_regs(); // from => rdi, to => rsi, count => rdx 1888 setup_arg_regs(); // from => rdi, to => rsi, count => rdx
1886 // r9 and r10 may be used to save non-volatile registers 1889 // r9 and r10 may be used to save non-volatile registers
1887 1890
1888 if (is_oop) { 1891 if (is_oop) {
1889 // no registers are destroyed by this call 1892 // no registers are destroyed by this call
1890 gen_write_ref_array_pre_barrier(to, count); 1893 gen_write_ref_array_pre_barrier(to, count, dest_uninitialized);
1891 } 1894 }
1892 1895
1893 assert_clean_int(count, rax); // Make sure 'count' is clean int. 1896 assert_clean_int(count, rax); // Make sure 'count' is clean int.
1894 // 'from', 'to' and 'count' are now valid 1897 // 'from', 'to' and 'count' are now valid
1895 __ movptr(dword_count, count); 1898 __ movptr(dword_count, count);
1951 // 1954 //
1952 // Side Effects: 1955 // Side Effects:
1953 // disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the 1956 // disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the
1954 // no-overlap entry point used by generate_conjoint_long_oop_copy(). 1957 // no-overlap entry point used by generate_conjoint_long_oop_copy().
1955 // 1958 //
1956 address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, address *entry, const char *name) { 1959 address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, address *entry,
1960 const char *name, bool dest_uninitialized = false) {
1957 __ align(CodeEntryAlignment); 1961 __ align(CodeEntryAlignment);
1958 StubCodeMark mark(this, "StubRoutines", name); 1962 StubCodeMark mark(this, "StubRoutines", name);
1959 address start = __ pc(); 1963 address start = __ pc();
1960 1964
1961 Label L_copy_32_bytes, L_copy_8_bytes, L_exit; 1965 Label L_copy_32_bytes, L_copy_8_bytes, L_exit;
1981 setup_arg_regs(); // from => rdi, to => rsi, count => rdx 1985 setup_arg_regs(); // from => rdi, to => rsi, count => rdx
1982 // r9 and r10 may be used to save non-volatile registers 1986 // r9 and r10 may be used to save non-volatile registers
1983 // 'from', 'to' and 'qword_count' are now valid 1987 // 'from', 'to' and 'qword_count' are now valid
1984 if (is_oop) { 1988 if (is_oop) {
1985 // no registers are destroyed by this call 1989 // no registers are destroyed by this call
1986 gen_write_ref_array_pre_barrier(to, qword_count); 1990 gen_write_ref_array_pre_barrier(to, qword_count, dest_uninitialized);
1987 } 1991 }
1988 1992
1989 // Copy from low to high addresses. Use 'to' as scratch. 1993 // Copy from low to high addresses. Use 'to' as scratch.
1990 __ lea(end_from, Address(from, qword_count, Address::times_8, -8)); 1994 __ lea(end_from, Address(from, qword_count, Address::times_8, -8));
1991 __ lea(end_to, Address(to, qword_count, Address::times_8, -8)); 1995 __ lea(end_to, Address(to, qword_count, Address::times_8, -8));
2036 // Inputs: 2040 // Inputs:
2037 // c_rarg0 - source array address 2041 // c_rarg0 - source array address
2038 // c_rarg1 - destination array address 2042 // c_rarg1 - destination array address
2039 // c_rarg2 - element count, treated as ssize_t, can be zero 2043 // c_rarg2 - element count, treated as ssize_t, can be zero
2040 // 2044 //
2041 address generate_conjoint_long_oop_copy(bool aligned, bool is_oop, address nooverlap_target, 2045 address generate_conjoint_long_oop_copy(bool aligned, bool is_oop,
2042 address *entry, const char *name) { 2046 address nooverlap_target, address *entry,
2047 const char *name, bool dest_uninitialized = false) {
2043 __ align(CodeEntryAlignment); 2048 __ align(CodeEntryAlignment);
2044 StubCodeMark mark(this, "StubRoutines", name); 2049 StubCodeMark mark(this, "StubRoutines", name);
2045 address start = __ pc(); 2050 address start = __ pc();
2046 2051
2047 Label L_copy_32_bytes, L_copy_8_bytes, L_exit; 2052 Label L_copy_32_bytes, L_copy_8_bytes, L_exit;
2065 // 'from', 'to' and 'qword_count' are now valid 2070 // 'from', 'to' and 'qword_count' are now valid
2066 if (is_oop) { 2071 if (is_oop) {
2067 // Save to and count for store barrier 2072 // Save to and count for store barrier
2068 __ movptr(saved_count, qword_count); 2073 __ movptr(saved_count, qword_count);
2069 // No registers are destroyed by this call 2074 // No registers are destroyed by this call
2070 gen_write_ref_array_pre_barrier(to, saved_count); 2075 gen_write_ref_array_pre_barrier(to, saved_count, dest_uninitialized);
2071 } 2076 }
2072 2077
2073 __ jmp(L_copy_32_bytes); 2078 __ jmp(L_copy_32_bytes);
2074 2079
2075 // Copy trailing qwords 2080 // Copy trailing qwords
2144 // 2149 //
2145 // Output: 2150 // Output:
2146 // rax == 0 - success 2151 // rax == 0 - success
2147 // rax == -1^K - failure, where K is partial transfer count 2152 // rax == -1^K - failure, where K is partial transfer count
2148 // 2153 //
2149 address generate_checkcast_copy(const char *name, address *entry) { 2154 address generate_checkcast_copy(const char *name, address *entry,
2155 bool dest_uninitialized = false) {
2150 2156
2151 Label L_load_element, L_store_element, L_do_card_marks, L_done; 2157 Label L_load_element, L_store_element, L_do_card_marks, L_done;
2152 2158
2153 // Input registers (after setup_arg_regs) 2159 // Input registers (after setup_arg_regs)
2154 const Register from = rdi; // source array address 2160 const Register from = rdi; // source array address
2238 Address end_to_addr(to, length, TIMES_OOP, 0); 2244 Address end_to_addr(to, length, TIMES_OOP, 0);
2239 // Loop-variant addresses. They assume post-incremented count < 0. 2245 // Loop-variant addresses. They assume post-incremented count < 0.
2240 Address from_element_addr(end_from, count, TIMES_OOP, 0); 2246 Address from_element_addr(end_from, count, TIMES_OOP, 0);
2241 Address to_element_addr(end_to, count, TIMES_OOP, 0); 2247 Address to_element_addr(end_to, count, TIMES_OOP, 0);
2242 2248
2243 gen_write_ref_array_pre_barrier(to, count); 2249 gen_write_ref_array_pre_barrier(to, count, dest_uninitialized);
2244 2250
2245 // Copy from low to high addresses, indexed from the end of each array. 2251 // Copy from low to high addresses, indexed from the end of each array.
2246 __ lea(end_from, end_from_addr); 2252 __ lea(end_from, end_from_addr);
2247 __ lea(end_to, end_to_addr); 2253 __ lea(end_to, end_to_addr);
2248 __ movptr(r14_length, length); // save a copy of the length 2254 __ movptr(r14_length, length); // save a copy of the length
2748 if (UseCompressedOops) { 2754 if (UseCompressedOops) {
2749 StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, true, &entry, 2755 StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, true, &entry,
2750 "oop_disjoint_arraycopy"); 2756 "oop_disjoint_arraycopy");
2751 StubRoutines::_oop_arraycopy = generate_conjoint_int_oop_copy(false, true, entry, 2757 StubRoutines::_oop_arraycopy = generate_conjoint_int_oop_copy(false, true, entry,
2752 &entry_oop_arraycopy, "oop_arraycopy"); 2758 &entry_oop_arraycopy, "oop_arraycopy");
2759 StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_int_oop_copy(false, true, &entry,
2760 "oop_disjoint_arraycopy_uninit",
2761 /*dest_uninitialized*/true);
2762 StubRoutines::_oop_arraycopy_uninit = generate_conjoint_int_oop_copy(false, true, entry,
2763 NULL, "oop_arraycopy_uninit",
2764 /*dest_uninitialized*/true);
2753 } else { 2765 } else {
2754 StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_long_oop_copy(false, true, &entry, 2766 StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_long_oop_copy(false, true, &entry,
2755 "oop_disjoint_arraycopy"); 2767 "oop_disjoint_arraycopy");
2756 StubRoutines::_oop_arraycopy = generate_conjoint_long_oop_copy(false, true, entry, 2768 StubRoutines::_oop_arraycopy = generate_conjoint_long_oop_copy(false, true, entry,
2757 &entry_oop_arraycopy, "oop_arraycopy"); 2769 &entry_oop_arraycopy, "oop_arraycopy");
2758 } 2770 StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_long_oop_copy(false, true, &entry,
2759 2771 "oop_disjoint_arraycopy_uninit",
2760 StubRoutines::_checkcast_arraycopy = generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy); 2772 /*dest_uninitialized*/true);
2773 StubRoutines::_oop_arraycopy_uninit = generate_conjoint_long_oop_copy(false, true, entry,
2774 NULL, "oop_arraycopy_uninit",
2775 /*dest_uninitialized*/true);
2776 }
2777
2778 StubRoutines::_checkcast_arraycopy = generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy);
2779 StubRoutines::_checkcast_arraycopy_uninit = generate_checkcast_copy("checkcast_arraycopy_uninit", NULL,
2780 /*dest_uninitialized*/true);
2781
2761 StubRoutines::_unsafe_arraycopy = generate_unsafe_copy("unsafe_arraycopy", 2782 StubRoutines::_unsafe_arraycopy = generate_unsafe_copy("unsafe_arraycopy",
2762 entry_jbyte_arraycopy, 2783 entry_jbyte_arraycopy,
2763 entry_jshort_arraycopy, 2784 entry_jshort_arraycopy,
2764 entry_jint_arraycopy, 2785 entry_jint_arraycopy,
2765 entry_jlong_arraycopy); 2786 entry_jlong_arraycopy);
2792 StubRoutines::_arrayof_jlong_disjoint_arraycopy = StubRoutines::_jlong_disjoint_arraycopy; 2813 StubRoutines::_arrayof_jlong_disjoint_arraycopy = StubRoutines::_jlong_disjoint_arraycopy;
2793 StubRoutines::_arrayof_jlong_arraycopy = StubRoutines::_jlong_arraycopy; 2814 StubRoutines::_arrayof_jlong_arraycopy = StubRoutines::_jlong_arraycopy;
2794 2815
2795 StubRoutines::_arrayof_oop_disjoint_arraycopy = StubRoutines::_oop_disjoint_arraycopy; 2816 StubRoutines::_arrayof_oop_disjoint_arraycopy = StubRoutines::_oop_disjoint_arraycopy;
2796 StubRoutines::_arrayof_oop_arraycopy = StubRoutines::_oop_arraycopy; 2817 StubRoutines::_arrayof_oop_arraycopy = StubRoutines::_oop_arraycopy;
2818
2819 StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = StubRoutines::_oop_disjoint_arraycopy_uninit;
2820 StubRoutines::_arrayof_oop_arraycopy_uninit = StubRoutines::_oop_arraycopy_uninit;
2797 } 2821 }
2798 2822
2799 void generate_math_stubs() { 2823 void generate_math_stubs() {
2800 { 2824 {
2801 StubCodeMark mark(this, "StubRoutines", "log"); 2825 StubCodeMark mark(this, "StubRoutines", "log");