Mercurial > hg > graal-jvmci-8
comparison src/cpu/sparc/vm/assembler_sparc.cpp @ 3839:3d42f82cd811
7063628: Use cbcond on T4
Summary: Add new short branch instruction to Hotspot sparc assembler.
Reviewed-by: never, twisti, jrose
author | kvn |
---|---|
date | Thu, 21 Jul 2011 11:25:07 -0700 |
parents | cba7b5c2d53f |
children | 4fe626cbf0bf baf763f388e6 |
comparison
equal
deleted
inserted
replaced
3838:6a991dcb52bb | 3839:3d42f82cd811 |
---|---|
98 switch (inv_op(inst)) { | 98 switch (inv_op(inst)) { |
99 default: s = "????"; break; | 99 default: s = "????"; break; |
100 case call_op: s = "call"; break; | 100 case call_op: s = "call"; break; |
101 case branch_op: | 101 case branch_op: |
102 switch (inv_op2(inst)) { | 102 switch (inv_op2(inst)) { |
103 case bpr_op2: s = "bpr"; break; | |
104 case fb_op2: s = "fb"; break; | 103 case fb_op2: s = "fb"; break; |
105 case fbp_op2: s = "fbp"; break; | 104 case fbp_op2: s = "fbp"; break; |
106 case br_op2: s = "br"; break; | 105 case br_op2: s = "br"; break; |
107 case bp_op2: s = "bp"; break; | 106 case bp_op2: s = "bp"; break; |
108 case cb_op2: s = "cb"; break; | 107 case cb_op2: s = "cb"; break; |
108 case bpr_op2: { | |
109 if (is_cbcond(inst)) { | |
110 s = is_cxb(inst) ? "cxb" : "cwb"; | |
111 } else { | |
112 s = "bpr"; | |
113 } | |
114 break; | |
115 } | |
109 default: s = "????"; break; | 116 default: s = "????"; break; |
110 } | 117 } |
111 } | 118 } |
112 ::tty->print("%s", s); | 119 ::tty->print("%s", s); |
113 } | 120 } |
125 switch (inv_op(inst)) { | 132 switch (inv_op(inst)) { |
126 default: ShouldNotReachHere(); | 133 default: ShouldNotReachHere(); |
127 case call_op: m = wdisp(word_aligned_ones, 0, 30); v = wdisp(dest_pos, inst_pos, 30); break; | 134 case call_op: m = wdisp(word_aligned_ones, 0, 30); v = wdisp(dest_pos, inst_pos, 30); break; |
128 case branch_op: | 135 case branch_op: |
129 switch (inv_op2(inst)) { | 136 switch (inv_op2(inst)) { |
130 case bpr_op2: m = wdisp16(word_aligned_ones, 0); v = wdisp16(dest_pos, inst_pos); break; | |
131 case fbp_op2: m = wdisp( word_aligned_ones, 0, 19); v = wdisp( dest_pos, inst_pos, 19); break; | 137 case fbp_op2: m = wdisp( word_aligned_ones, 0, 19); v = wdisp( dest_pos, inst_pos, 19); break; |
132 case bp_op2: m = wdisp( word_aligned_ones, 0, 19); v = wdisp( dest_pos, inst_pos, 19); break; | 138 case bp_op2: m = wdisp( word_aligned_ones, 0, 19); v = wdisp( dest_pos, inst_pos, 19); break; |
133 case fb_op2: m = wdisp( word_aligned_ones, 0, 22); v = wdisp( dest_pos, inst_pos, 22); break; | 139 case fb_op2: m = wdisp( word_aligned_ones, 0, 22); v = wdisp( dest_pos, inst_pos, 22); break; |
134 case br_op2: m = wdisp( word_aligned_ones, 0, 22); v = wdisp( dest_pos, inst_pos, 22); break; | 140 case br_op2: m = wdisp( word_aligned_ones, 0, 22); v = wdisp( dest_pos, inst_pos, 22); break; |
135 case cb_op2: m = wdisp( word_aligned_ones, 0, 22); v = wdisp( dest_pos, inst_pos, 22); break; | 141 case cb_op2: m = wdisp( word_aligned_ones, 0, 22); v = wdisp( dest_pos, inst_pos, 22); break; |
142 case bpr_op2: { | |
143 if (is_cbcond(inst)) { | |
144 m = wdisp10(word_aligned_ones, 0); | |
145 v = wdisp10(dest_pos, inst_pos); | |
146 } else { | |
147 m = wdisp16(word_aligned_ones, 0); | |
148 v = wdisp16(dest_pos, inst_pos); | |
149 } | |
150 break; | |
151 } | |
136 default: ShouldNotReachHere(); | 152 default: ShouldNotReachHere(); |
137 } | 153 } |
138 } | 154 } |
139 return inst & ~m | v; | 155 return inst & ~m | v; |
140 } | 156 } |
147 switch (inv_op(inst)) { | 163 switch (inv_op(inst)) { |
148 default: ShouldNotReachHere(); | 164 default: ShouldNotReachHere(); |
149 case call_op: r = inv_wdisp(inst, pos, 30); break; | 165 case call_op: r = inv_wdisp(inst, pos, 30); break; |
150 case branch_op: | 166 case branch_op: |
151 switch (inv_op2(inst)) { | 167 switch (inv_op2(inst)) { |
152 case bpr_op2: r = inv_wdisp16(inst, pos); break; | |
153 case fbp_op2: r = inv_wdisp( inst, pos, 19); break; | 168 case fbp_op2: r = inv_wdisp( inst, pos, 19); break; |
154 case bp_op2: r = inv_wdisp( inst, pos, 19); break; | 169 case bp_op2: r = inv_wdisp( inst, pos, 19); break; |
155 case fb_op2: r = inv_wdisp( inst, pos, 22); break; | 170 case fb_op2: r = inv_wdisp( inst, pos, 22); break; |
156 case br_op2: r = inv_wdisp( inst, pos, 22); break; | 171 case br_op2: r = inv_wdisp( inst, pos, 22); break; |
157 case cb_op2: r = inv_wdisp( inst, pos, 22); break; | 172 case cb_op2: r = inv_wdisp( inst, pos, 22); break; |
173 case bpr_op2: { | |
174 if (is_cbcond(inst)) { | |
175 r = inv_wdisp10(inst, pos); | |
176 } else { | |
177 r = inv_wdisp16(inst, pos); | |
178 } | |
179 break; | |
180 } | |
158 default: ShouldNotReachHere(); | 181 default: ShouldNotReachHere(); |
159 } | 182 } |
160 } | 183 } |
161 return r; | 184 return r; |
162 } | 185 } |
966 #ifdef ASSERT | 989 #ifdef ASSERT |
967 // Verify that flags was zeroed on return to Java | 990 // Verify that flags was zeroed on return to Java |
968 Label PcOk; | 991 Label PcOk; |
969 save_frame(0); // to avoid clobbering O0 | 992 save_frame(0); // to avoid clobbering O0 |
970 ld_ptr(pc_addr, L0); | 993 ld_ptr(pc_addr, L0); |
971 tst(L0); | 994 br_null_short(L0, Assembler::pt, PcOk); |
972 #ifdef _LP64 | |
973 brx(Assembler::zero, false, Assembler::pt, PcOk); | |
974 #else | |
975 br(Assembler::zero, false, Assembler::pt, PcOk); | |
976 #endif // _LP64 | |
977 delayed() -> nop(); | |
978 stop("last_Java_pc not zeroed before leaving Java"); | 995 stop("last_Java_pc not zeroed before leaving Java"); |
979 bind(PcOk); | 996 bind(PcOk); |
980 | 997 |
981 // Verify that flags was zeroed on return to Java | 998 // Verify that flags was zeroed on return to Java |
982 Label FlagsOk; | 999 Label FlagsOk; |
1001 #ifdef ASSERT | 1018 #ifdef ASSERT |
1002 // Make sure that we have an odd stack | 1019 // Make sure that we have an odd stack |
1003 Label StackOk; | 1020 Label StackOk; |
1004 andcc(last_java_sp, 0x01, G0); | 1021 andcc(last_java_sp, 0x01, G0); |
1005 br(Assembler::notZero, false, Assembler::pt, StackOk); | 1022 br(Assembler::notZero, false, Assembler::pt, StackOk); |
1006 delayed() -> nop(); | 1023 delayed()->nop(); |
1007 stop("Stack Not Biased in set_last_Java_frame"); | 1024 stop("Stack Not Biased in set_last_Java_frame"); |
1008 bind(StackOk); | 1025 bind(StackOk); |
1009 #endif // ASSERT | 1026 #endif // ASSERT |
1010 assert( last_java_sp != G4_scratch, "bad register usage in set_last_Java_frame"); | 1027 assert( last_java_sp != G4_scratch, "bad register usage in set_last_Java_frame"); |
1011 add( last_java_sp, STACK_BIAS, G4_scratch ); | 1028 add( last_java_sp, STACK_BIAS, G4_scratch ); |
1097 check_and_handle_popframe(scratch_reg); | 1114 check_and_handle_popframe(scratch_reg); |
1098 check_and_handle_earlyret(scratch_reg); | 1115 check_and_handle_earlyret(scratch_reg); |
1099 | 1116 |
1100 Address exception_addr(G2_thread, Thread::pending_exception_offset()); | 1117 Address exception_addr(G2_thread, Thread::pending_exception_offset()); |
1101 ld_ptr(exception_addr, scratch_reg); | 1118 ld_ptr(exception_addr, scratch_reg); |
1102 br_null(scratch_reg,false,pt,L); | 1119 br_null_short(scratch_reg, pt, L); |
1103 delayed()->nop(); | |
1104 // we use O7 linkage so that forward_exception_entry has the issuing PC | 1120 // we use O7 linkage so that forward_exception_entry has the issuing PC |
1105 call(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); | 1121 call(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); |
1106 delayed()->nop(); | 1122 delayed()->nop(); |
1107 bind(L); | 1123 bind(L); |
1108 } | 1124 } |
1872 set(Universe::verify_oop_mask (), O2_mask); | 1888 set(Universe::verify_oop_mask (), O2_mask); |
1873 set(Universe::verify_oop_bits (), O3_bits); | 1889 set(Universe::verify_oop_bits (), O3_bits); |
1874 | 1890 |
1875 // assert((obj & oop_mask) == oop_bits); | 1891 // assert((obj & oop_mask) == oop_bits); |
1876 and3(O0_obj, O2_mask, O4_temp); | 1892 and3(O0_obj, O2_mask, O4_temp); |
1877 cmp(O4_temp, O3_bits); | 1893 cmp_and_brx_short(O4_temp, O3_bits, notEqual, pn, null_or_fail); |
1878 brx(notEqual, false, pn, null_or_fail); | |
1879 delayed()->nop(); | |
1880 | 1894 |
1881 if ((NULL_WORD & Universe::verify_oop_mask()) == Universe::verify_oop_bits()) { | 1895 if ((NULL_WORD & Universe::verify_oop_mask()) == Universe::verify_oop_bits()) { |
1882 // the null_or_fail case is useless; must test for null separately | 1896 // the null_or_fail case is useless; must test for null separately |
1883 br_null(O0_obj, false, pn, succeed); | 1897 br_null_short(O0_obj, pn, succeed); |
1884 delayed()->nop(); | |
1885 } | 1898 } |
1886 | 1899 |
1887 // Check the klassOop of this object for being in the right area of memory. | 1900 // Check the klassOop of this object for being in the right area of memory. |
1888 // Cannot do the load in the delay above slot in case O0 is null | 1901 // Cannot do the load in the delay above slot in case O0 is null |
1889 load_klass(O0_obj, O0_obj); | 1902 load_klass(O0_obj, O0_obj); |
1891 if( Universe::verify_klass_mask() != Universe::verify_oop_mask() ) | 1904 if( Universe::verify_klass_mask() != Universe::verify_oop_mask() ) |
1892 set(Universe::verify_klass_mask(), O2_mask); | 1905 set(Universe::verify_klass_mask(), O2_mask); |
1893 if( Universe::verify_klass_bits() != Universe::verify_oop_bits() ) | 1906 if( Universe::verify_klass_bits() != Universe::verify_oop_bits() ) |
1894 set(Universe::verify_klass_bits(), O3_bits); | 1907 set(Universe::verify_klass_bits(), O3_bits); |
1895 and3(O0_obj, O2_mask, O4_temp); | 1908 and3(O0_obj, O2_mask, O4_temp); |
1896 cmp(O4_temp, O3_bits); | 1909 cmp_and_brx_short(O4_temp, O3_bits, notEqual, pn, fail); |
1897 brx(notEqual, false, pn, fail); | |
1898 delayed()->nop(); | |
1899 // Check the klass's klass | 1910 // Check the klass's klass |
1900 load_klass(O0_obj, O0_obj); | 1911 load_klass(O0_obj, O0_obj); |
1901 and3(O0_obj, O2_mask, O4_temp); | 1912 and3(O0_obj, O2_mask, O4_temp); |
1902 cmp(O4_temp, O3_bits); | 1913 cmp(O4_temp, O3_bits); |
1903 brx(notEqual, false, pn, fail); | 1914 brx(notEqual, false, pn, fail); |
2120 } | 2131 } |
2121 ShouldNotReachHere(); | 2132 ShouldNotReachHere(); |
2122 return Assembler::rc_z; | 2133 return Assembler::rc_z; |
2123 } | 2134 } |
2124 | 2135 |
2125 // compares register with zero and branches. NOT FOR USE WITH 64-bit POINTERS | 2136 // compares (32 bit) register with zero and branches. NOT FOR USE WITH 64-bit POINTERS |
2126 void MacroAssembler::br_zero( Condition c, bool a, Predict p, Register s1, Label& L) { | 2137 void MacroAssembler::cmp_zero_and_br(Condition c, Register s1, Label& L, bool a, Predict p) { |
2127 tst(s1); | 2138 tst(s1); |
2128 br (c, a, p, L); | 2139 br (c, a, p, L); |
2129 } | 2140 } |
2130 | |
2131 | 2141 |
2132 // Compares a pointer register with zero and branches on null. | 2142 // Compares a pointer register with zero and branches on null. |
2133 // Does a test & branch on 32-bit systems and a register-branch on 64-bit. | 2143 // Does a test & branch on 32-bit systems and a register-branch on 64-bit. |
2134 void MacroAssembler::br_null( Register s1, bool a, Predict p, Label& L ) { | 2144 void MacroAssembler::br_null( Register s1, bool a, Predict p, Label& L ) { |
2135 assert_not_delayed(); | 2145 assert_not_delayed(); |
2152 } | 2162 } |
2153 | 2163 |
2154 void MacroAssembler::br_on_reg_cond( RCondition rc, bool a, Predict p, | 2164 void MacroAssembler::br_on_reg_cond( RCondition rc, bool a, Predict p, |
2155 Register s1, address d, | 2165 Register s1, address d, |
2156 relocInfo::relocType rt ) { | 2166 relocInfo::relocType rt ) { |
2167 assert_not_delayed(); | |
2157 if (VM_Version::v9_instructions_work()) { | 2168 if (VM_Version::v9_instructions_work()) { |
2158 bpr(rc, a, p, s1, d, rt); | 2169 bpr(rc, a, p, s1, d, rt); |
2159 } else { | 2170 } else { |
2160 tst(s1); | 2171 tst(s1); |
2161 br(reg_cond_to_cc_cond(rc), a, p, d, rt); | 2172 br(reg_cond_to_cc_cond(rc), a, p, d, rt); |
2162 } | 2173 } |
2163 } | 2174 } |
2164 | 2175 |
2165 void MacroAssembler::br_on_reg_cond( RCondition rc, bool a, Predict p, | 2176 void MacroAssembler::br_on_reg_cond( RCondition rc, bool a, Predict p, |
2166 Register s1, Label& L ) { | 2177 Register s1, Label& L ) { |
2178 assert_not_delayed(); | |
2167 if (VM_Version::v9_instructions_work()) { | 2179 if (VM_Version::v9_instructions_work()) { |
2168 bpr(rc, a, p, s1, L); | 2180 bpr(rc, a, p, s1, L); |
2169 } else { | 2181 } else { |
2170 tst(s1); | 2182 tst(s1); |
2171 br(reg_cond_to_cc_cond(rc), a, p, L); | 2183 br(reg_cond_to_cc_cond(rc), a, p, L); |
2172 } | 2184 } |
2173 } | 2185 } |
2174 | 2186 |
2187 // Compare registers and branch with nop in delay slot or cbcond without delay slot. | |
2188 | |
2189 // Compare integer (32 bit) values (icc only). | |
2190 void MacroAssembler::cmp_and_br_short(Register s1, Register s2, Condition c, | |
2191 Predict p, Label& L) { | |
2192 assert_not_delayed(); | |
2193 if (use_cbcond(L)) { | |
2194 Assembler::cbcond(c, icc, s1, s2, L); | |
2195 } else { | |
2196 cmp(s1, s2); | |
2197 br(c, false, p, L); | |
2198 delayed()->nop(); | |
2199 } | |
2200 } | |
2201 | |
2202 // Compare integer (32 bit) values (icc only). | |
2203 void MacroAssembler::cmp_and_br_short(Register s1, int simm13a, Condition c, | |
2204 Predict p, Label& L) { | |
2205 assert_not_delayed(); | |
2206 if (is_simm(simm13a,5) && use_cbcond(L)) { | |
2207 Assembler::cbcond(c, icc, s1, simm13a, L); | |
2208 } else { | |
2209 cmp(s1, simm13a); | |
2210 br(c, false, p, L); | |
2211 delayed()->nop(); | |
2212 } | |
2213 } | |
2214 | |
2215 // Branch that tests xcc in LP64 and icc in !LP64 | |
2216 void MacroAssembler::cmp_and_brx_short(Register s1, Register s2, Condition c, | |
2217 Predict p, Label& L) { | |
2218 assert_not_delayed(); | |
2219 if (use_cbcond(L)) { | |
2220 Assembler::cbcond(c, ptr_cc, s1, s2, L); | |
2221 } else { | |
2222 cmp(s1, s2); | |
2223 brx(c, false, p, L); | |
2224 delayed()->nop(); | |
2225 } | |
2226 } | |
2227 | |
2228 // Branch that tests xcc in LP64 and icc in !LP64 | |
2229 void MacroAssembler::cmp_and_brx_short(Register s1, int simm13a, Condition c, | |
2230 Predict p, Label& L) { | |
2231 assert_not_delayed(); | |
2232 if (is_simm(simm13a,5) && use_cbcond(L)) { | |
2233 Assembler::cbcond(c, ptr_cc, s1, simm13a, L); | |
2234 } else { | |
2235 cmp(s1, simm13a); | |
2236 brx(c, false, p, L); | |
2237 delayed()->nop(); | |
2238 } | |
2239 } | |
2240 | |
2241 // Short branch version for compares a pointer with zero. | |
2242 | |
2243 void MacroAssembler::br_null_short(Register s1, Predict p, Label& L) { | |
2244 assert_not_delayed(); | |
2245 if (use_cbcond(L)) { | |
2246 Assembler::cbcond(zero, ptr_cc, s1, 0, L); | |
2247 return; | |
2248 } | |
2249 br_null(s1, false, p, L); | |
2250 delayed()->nop(); | |
2251 } | |
2252 | |
2253 void MacroAssembler::br_notnull_short(Register s1, Predict p, Label& L) { | |
2254 assert_not_delayed(); | |
2255 if (use_cbcond(L)) { | |
2256 Assembler::cbcond(notZero, ptr_cc, s1, 0, L); | |
2257 return; | |
2258 } | |
2259 br_notnull(s1, false, p, L); | |
2260 delayed()->nop(); | |
2261 } | |
2262 | |
2263 // Unconditional short branch | |
2264 void MacroAssembler::ba_short(Label& L) { | |
2265 if (use_cbcond(L)) { | |
2266 Assembler::cbcond(equal, icc, G0, G0, L); | |
2267 return; | |
2268 } | |
2269 br(always, false, pt, L); | |
2270 delayed()->nop(); | |
2271 } | |
2175 | 2272 |
2176 // instruction sequences factored across compiler & interpreter | 2273 // instruction sequences factored across compiler & interpreter |
2177 | 2274 |
2178 | 2275 |
2179 void MacroAssembler::lcmp( Register Ra_hi, Register Ra_low, | 2276 void MacroAssembler::lcmp( Register Ra_hi, Register Ra_low, |
2195 // (and therefore probably prefetchable). | 2292 // (and therefore probably prefetchable). |
2196 // And the equals case for the high part does not need testing, | 2293 // And the equals case for the high part does not need testing, |
2197 // since that triplet is reached only after finding the high halves differ. | 2294 // since that triplet is reached only after finding the high halves differ. |
2198 | 2295 |
2199 if (VM_Version::v9_instructions_work()) { | 2296 if (VM_Version::v9_instructions_work()) { |
2200 | 2297 mov(-1, Rresult); |
2201 mov ( -1, Rresult); | 2298 ba(done); delayed()-> movcc(greater, false, icc, 1, Rresult); |
2202 ba( false, done ); delayed()-> movcc(greater, false, icc, 1, Rresult); | 2299 } else { |
2203 } | |
2204 else { | |
2205 br(less, true, pt, done); delayed()-> set(-1, Rresult); | 2300 br(less, true, pt, done); delayed()-> set(-1, Rresult); |
2206 br(greater, true, pt, done); delayed()-> set( 1, Rresult); | 2301 br(greater, true, pt, done); delayed()-> set( 1, Rresult); |
2207 } | 2302 } |
2208 | 2303 |
2209 bind( check_low_parts ); | 2304 bind( check_low_parts ); |
2210 | 2305 |
2211 if (VM_Version::v9_instructions_work()) { | 2306 if (VM_Version::v9_instructions_work()) { |
2212 mov( -1, Rresult); | 2307 mov( -1, Rresult); |
2213 movcc(equal, false, icc, 0, Rresult); | 2308 movcc(equal, false, icc, 0, Rresult); |
2214 movcc(greaterUnsigned, false, icc, 1, Rresult); | 2309 movcc(greaterUnsigned, false, icc, 1, Rresult); |
2215 } | 2310 } else { |
2216 else { | 2311 set(-1, Rresult); |
2217 set(-1, Rresult); | |
2218 br(equal, true, pt, done); delayed()->set( 0, Rresult); | 2312 br(equal, true, pt, done); delayed()->set( 0, Rresult); |
2219 br(greaterUnsigned, true, pt, done); delayed()->set( 1, Rresult); | 2313 br(greaterUnsigned, true, pt, done); delayed()->set( 1, Rresult); |
2220 } | 2314 } |
2221 bind( done ); | 2315 bind( done ); |
2222 } | 2316 } |
2248 Label big_shift, done; | 2342 Label big_shift, done; |
2249 | 2343 |
2250 // This code can be optimized to use the 64 bit shifts in V9. | 2344 // This code can be optimized to use the 64 bit shifts in V9. |
2251 // Here we use the 32 bit shifts. | 2345 // Here we use the 32 bit shifts. |
2252 | 2346 |
2253 and3( Rcount, 0x3f, Rcount); // take least significant 6 bits | 2347 and3( Rcount, 0x3f, Rcount); // take least significant 6 bits |
2254 subcc(Rcount, 31, Ralt_count); | 2348 subcc(Rcount, 31, Ralt_count); |
2255 br(greater, true, pn, big_shift); | 2349 br(greater, true, pn, big_shift); |
2256 delayed()-> | 2350 delayed()->dec(Ralt_count); |
2257 dec(Ralt_count); | |
2258 | 2351 |
2259 // shift < 32 bits, Ralt_count = Rcount-31 | 2352 // shift < 32 bits, Ralt_count = Rcount-31 |
2260 | 2353 |
2261 // We get the transfer bits by shifting right by 32-count the low | 2354 // We get the transfer bits by shifting right by 32-count the low |
2262 // register. This is done by shifting right by 31-count and then by one | 2355 // register. This is done by shifting right by 31-count and then by one |
2263 // more to take care of the special (rare) case where count is zero | 2356 // more to take care of the special (rare) case where count is zero |
2264 // (shifting by 32 would not work). | 2357 // (shifting by 32 would not work). |
2265 | 2358 |
2266 neg( Ralt_count ); | 2359 neg(Ralt_count); |
2267 | 2360 |
2268 // The order of the next two instructions is critical in the case where | 2361 // The order of the next two instructions is critical in the case where |
2269 // Rin and Rout are the same and should not be reversed. | 2362 // Rin and Rout are the same and should not be reversed. |
2270 | 2363 |
2271 srl( Rin_low, Ralt_count, Rxfer_bits ); // shift right by 31-count | 2364 srl(Rin_low, Ralt_count, Rxfer_bits); // shift right by 31-count |
2272 if (Rcount != Rout_low) { | 2365 if (Rcount != Rout_low) { |
2273 sll( Rin_low, Rcount, Rout_low ); // low half | 2366 sll(Rin_low, Rcount, Rout_low); // low half |
2274 } | 2367 } |
2275 sll( Rin_high, Rcount, Rout_high ); | 2368 sll(Rin_high, Rcount, Rout_high); |
2276 if (Rcount == Rout_low) { | 2369 if (Rcount == Rout_low) { |
2277 sll( Rin_low, Rcount, Rout_low ); // low half | 2370 sll(Rin_low, Rcount, Rout_low); // low half |
2278 } | 2371 } |
2279 srl( Rxfer_bits, 1, Rxfer_bits ); // shift right by one more | 2372 srl(Rxfer_bits, 1, Rxfer_bits ); // shift right by one more |
2280 ba (false, done); | 2373 ba(done); |
2281 delayed()-> | 2374 delayed()->or3(Rout_high, Rxfer_bits, Rout_high); // new hi value: or in shifted old hi part and xfer from low |
2282 or3( Rout_high, Rxfer_bits, Rout_high); // new hi value: or in shifted old hi part and xfer from low | |
2283 | 2375 |
2284 // shift >= 32 bits, Ralt_count = Rcount-32 | 2376 // shift >= 32 bits, Ralt_count = Rcount-32 |
2285 bind(big_shift); | 2377 bind(big_shift); |
2286 sll( Rin_low, Ralt_count, Rout_high ); | 2378 sll(Rin_low, Ralt_count, Rout_high ); |
2287 clr( Rout_low ); | 2379 clr(Rout_low); |
2288 | 2380 |
2289 bind(done); | 2381 bind(done); |
2290 } | 2382 } |
2291 | 2383 |
2292 | 2384 |
2311 Label big_shift, done; | 2403 Label big_shift, done; |
2312 | 2404 |
2313 // This code can be optimized to use the 64 bit shifts in V9. | 2405 // This code can be optimized to use the 64 bit shifts in V9. |
2314 // Here we use the 32 bit shifts. | 2406 // Here we use the 32 bit shifts. |
2315 | 2407 |
2316 and3( Rcount, 0x3f, Rcount); // take least significant 6 bits | 2408 and3( Rcount, 0x3f, Rcount); // take least significant 6 bits |
2317 subcc(Rcount, 31, Ralt_count); | 2409 subcc(Rcount, 31, Ralt_count); |
2318 br(greater, true, pn, big_shift); | 2410 br(greater, true, pn, big_shift); |
2319 delayed()->dec(Ralt_count); | 2411 delayed()->dec(Ralt_count); |
2320 | 2412 |
2321 // shift < 32 bits, Ralt_count = Rcount-31 | 2413 // shift < 32 bits, Ralt_count = Rcount-31 |
2322 | 2414 |
2323 // We get the transfer bits by shifting left by 32-count the high | 2415 // We get the transfer bits by shifting left by 32-count the high |
2324 // register. This is done by shifting left by 31-count and then by one | 2416 // register. This is done by shifting left by 31-count and then by one |
2325 // more to take care of the special (rare) case where count is zero | 2417 // more to take care of the special (rare) case where count is zero |
2326 // (shifting by 32 would not work). | 2418 // (shifting by 32 would not work). |
2327 | 2419 |
2328 neg( Ralt_count ); | 2420 neg(Ralt_count); |
2329 if (Rcount != Rout_low) { | 2421 if (Rcount != Rout_low) { |
2330 srl( Rin_low, Rcount, Rout_low ); | 2422 srl(Rin_low, Rcount, Rout_low); |
2331 } | 2423 } |
2332 | 2424 |
2333 // The order of the next two instructions is critical in the case where | 2425 // The order of the next two instructions is critical in the case where |
2334 // Rin and Rout are the same and should not be reversed. | 2426 // Rin and Rout are the same and should not be reversed. |
2335 | 2427 |
2336 sll( Rin_high, Ralt_count, Rxfer_bits ); // shift left by 31-count | 2428 sll(Rin_high, Ralt_count, Rxfer_bits); // shift left by 31-count |
2337 sra( Rin_high, Rcount, Rout_high ); // high half | 2429 sra(Rin_high, Rcount, Rout_high ); // high half |
2338 sll( Rxfer_bits, 1, Rxfer_bits ); // shift left by one more | 2430 sll(Rxfer_bits, 1, Rxfer_bits); // shift left by one more |
2339 if (Rcount == Rout_low) { | 2431 if (Rcount == Rout_low) { |
2340 srl( Rin_low, Rcount, Rout_low ); | 2432 srl(Rin_low, Rcount, Rout_low); |
2341 } | 2433 } |
2342 ba (false, done); | 2434 ba(done); |
2343 delayed()-> | 2435 delayed()->or3(Rout_low, Rxfer_bits, Rout_low); // new low value: or shifted old low part and xfer from high |
2344 or3( Rout_low, Rxfer_bits, Rout_low ); // new low value: or shifted old low part and xfer from high | |
2345 | 2436 |
2346 // shift >= 32 bits, Ralt_count = Rcount-32 | 2437 // shift >= 32 bits, Ralt_count = Rcount-32 |
2347 bind(big_shift); | 2438 bind(big_shift); |
2348 | 2439 |
2349 sra( Rin_high, Ralt_count, Rout_low ); | 2440 sra(Rin_high, Ralt_count, Rout_low); |
2350 sra( Rin_high, 31, Rout_high ); // sign into hi | 2441 sra(Rin_high, 31, Rout_high); // sign into hi |
2351 | 2442 |
2352 bind( done ); | 2443 bind( done ); |
2353 } | 2444 } |
2354 | 2445 |
2355 | 2446 |
2375 Label big_shift, done; | 2466 Label big_shift, done; |
2376 | 2467 |
2377 // This code can be optimized to use the 64 bit shifts in V9. | 2468 // This code can be optimized to use the 64 bit shifts in V9. |
2378 // Here we use the 32 bit shifts. | 2469 // Here we use the 32 bit shifts. |
2379 | 2470 |
2380 and3( Rcount, 0x3f, Rcount); // take least significant 6 bits | 2471 and3( Rcount, 0x3f, Rcount); // take least significant 6 bits |
2381 subcc(Rcount, 31, Ralt_count); | 2472 subcc(Rcount, 31, Ralt_count); |
2382 br(greater, true, pn, big_shift); | 2473 br(greater, true, pn, big_shift); |
2383 delayed()->dec(Ralt_count); | 2474 delayed()->dec(Ralt_count); |
2384 | 2475 |
2385 // shift < 32 bits, Ralt_count = Rcount-31 | 2476 // shift < 32 bits, Ralt_count = Rcount-31 |
2386 | 2477 |
2387 // We get the transfer bits by shifting left by 32-count the high | 2478 // We get the transfer bits by shifting left by 32-count the high |
2388 // register. This is done by shifting left by 31-count and then by one | 2479 // register. This is done by shifting left by 31-count and then by one |
2389 // more to take care of the special (rare) case where count is zero | 2480 // more to take care of the special (rare) case where count is zero |
2390 // (shifting by 32 would not work). | 2481 // (shifting by 32 would not work). |
2391 | 2482 |
2392 neg( Ralt_count ); | 2483 neg(Ralt_count); |
2393 if (Rcount != Rout_low) { | 2484 if (Rcount != Rout_low) { |
2394 srl( Rin_low, Rcount, Rout_low ); | 2485 srl(Rin_low, Rcount, Rout_low); |
2395 } | 2486 } |
2396 | 2487 |
2397 // The order of the next two instructions is critical in the case where | 2488 // The order of the next two instructions is critical in the case where |
2398 // Rin and Rout are the same and should not be reversed. | 2489 // Rin and Rout are the same and should not be reversed. |
2399 | 2490 |
2400 sll( Rin_high, Ralt_count, Rxfer_bits ); // shift left by 31-count | 2491 sll(Rin_high, Ralt_count, Rxfer_bits); // shift left by 31-count |
2401 srl( Rin_high, Rcount, Rout_high ); // high half | 2492 srl(Rin_high, Rcount, Rout_high ); // high half |
2402 sll( Rxfer_bits, 1, Rxfer_bits ); // shift left by one more | 2493 sll(Rxfer_bits, 1, Rxfer_bits); // shift left by one more |
2403 if (Rcount == Rout_low) { | 2494 if (Rcount == Rout_low) { |
2404 srl( Rin_low, Rcount, Rout_low ); | 2495 srl(Rin_low, Rcount, Rout_low); |
2405 } | 2496 } |
2406 ba (false, done); | 2497 ba(done); |
2407 delayed()-> | 2498 delayed()->or3(Rout_low, Rxfer_bits, Rout_low); // new low value: or shifted old low part and xfer from high |
2408 or3( Rout_low, Rxfer_bits, Rout_low ); // new low value: or shifted old low part and xfer from high | |
2409 | 2499 |
2410 // shift >= 32 bits, Ralt_count = Rcount-32 | 2500 // shift >= 32 bits, Ralt_count = Rcount-32 |
2411 bind(big_shift); | 2501 bind(big_shift); |
2412 | 2502 |
2413 srl( Rin_high, Ralt_count, Rout_low ); | 2503 srl(Rin_high, Ralt_count, Rout_low); |
2414 clr( Rout_high ); | 2504 clr(Rout_high); |
2415 | 2505 |
2416 bind( done ); | 2506 bind( done ); |
2417 } | 2507 } |
2418 | 2508 |
2419 #ifdef _LP64 | 2509 #ifdef _LP64 |
2420 void MacroAssembler::lcmp( Register Ra, Register Rb, Register Rresult) { | 2510 void MacroAssembler::lcmp( Register Ra, Register Rb, Register Rresult) { |
2421 cmp(Ra, Rb); | 2511 cmp(Ra, Rb); |
2422 mov( -1, Rresult); | 2512 mov(-1, Rresult); |
2423 movcc(equal, false, xcc, 0, Rresult); | 2513 movcc(equal, false, xcc, 0, Rresult); |
2424 movcc(greater, false, xcc, 1, Rresult); | 2514 movcc(greater, false, xcc, 1, Rresult); |
2425 } | 2515 } |
2426 #endif | 2516 #endif |
2427 | 2517 |
2457 Condition eq = f_equal; | 2547 Condition eq = f_equal; |
2458 Condition gt = unordered_result == 1 ? f_unorderedOrGreater : f_greater; | 2548 Condition gt = unordered_result == 1 ? f_unorderedOrGreater : f_greater; |
2459 | 2549 |
2460 if (VM_Version::v9_instructions_work()) { | 2550 if (VM_Version::v9_instructions_work()) { |
2461 | 2551 |
2462 mov( -1, Rresult ); | 2552 mov(-1, Rresult); |
2463 movcc( eq, true, fcc0, 0, Rresult ); | 2553 movcc(eq, true, fcc0, 0, Rresult); |
2464 movcc( gt, true, fcc0, 1, Rresult ); | 2554 movcc(gt, true, fcc0, 1, Rresult); |
2465 | 2555 |
2466 } else { | 2556 } else { |
2467 Label done; | 2557 Label done; |
2468 | 2558 |
2469 set( -1, Rresult ); | 2559 set( -1, Rresult ); |
2470 //fb(lt, true, pn, done); delayed()->set( -1, Rresult ); | 2560 //fb(lt, true, pn, done); delayed()->set( -1, Rresult ); |
2471 fb( eq, true, pn, done); delayed()->set( 0, Rresult ); | 2561 fb( eq, true, pn, done); delayed()->set( 0, Rresult ); |
2472 fb( gt, true, pn, done); delayed()->set( 1, Rresult ); | 2562 fb( gt, true, pn, done); delayed()->set( 1, Rresult ); |
2473 | 2563 |
2474 bind (done); | 2564 bind (done); |
2666 mov(G0,yield_reg); | 2756 mov(G0,yield_reg); |
2667 mov(G0, yieldall_reg); | 2757 mov(G0, yieldall_reg); |
2668 set(StubRoutines::Sparc::locked, lock_reg); | 2758 set(StubRoutines::Sparc::locked, lock_reg); |
2669 | 2759 |
2670 bind(retry_get_lock); | 2760 bind(retry_get_lock); |
2671 cmp(yield_reg, V8AtomicOperationUnderLockSpinCount); | 2761 cmp_and_br_short(yield_reg, V8AtomicOperationUnderLockSpinCount, Assembler::less, Assembler::pt, dont_yield); |
2672 br(Assembler::less, false, Assembler::pt, dont_yield); | |
2673 delayed()->nop(); | |
2674 | 2762 |
2675 if(use_call_vm) { | 2763 if(use_call_vm) { |
2676 Untested("Need to verify global reg consistancy"); | 2764 Untested("Need to verify global reg consistancy"); |
2677 call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::yield_all), yieldall_reg); | 2765 call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::yield_all), yieldall_reg); |
2678 } else { | 2766 } else { |
2698 br(Assembler::notEqual, true, Assembler::pn, retry_get_lock); | 2786 br(Assembler::notEqual, true, Assembler::pn, retry_get_lock); |
2699 delayed()->add(yield_reg,1,yield_reg); | 2787 delayed()->add(yield_reg,1,yield_reg); |
2700 | 2788 |
2701 // yes, got lock. do we have the same top? | 2789 // yes, got lock. do we have the same top? |
2702 ld(top_ptr_reg_after_save, 0, value_reg); | 2790 ld(top_ptr_reg_after_save, 0, value_reg); |
2703 cmp(value_reg, top_reg_after_save); | 2791 cmp_and_br_short(value_reg, top_reg_after_save, Assembler::notEqual, Assembler::pn, not_same); |
2704 br(Assembler::notEqual, false, Assembler::pn, not_same); | |
2705 delayed()->nop(); | |
2706 | 2792 |
2707 // yes, same top. | 2793 // yes, same top. |
2708 st(ptr_reg_after_save, top_ptr_reg_after_save, 0); | 2794 st(ptr_reg_after_save, top_ptr_reg_after_save, 0); |
2709 membar(Assembler::StoreStore); | 2795 membar(Assembler::StoreStore); |
2710 | 2796 |
2950 L2, L3, L4, L5, | 3036 L2, L3, L4, L5, |
2951 NULL, &L_pop_to_failure); | 3037 NULL, &L_pop_to_failure); |
2952 | 3038 |
2953 // on success: | 3039 // on success: |
2954 restore(); | 3040 restore(); |
2955 ba(false, L_success); | 3041 ba_short(L_success); |
2956 delayed()->nop(); | |
2957 | 3042 |
2958 // on failure: | 3043 // on failure: |
2959 bind(L_pop_to_failure); | 3044 bind(L_pop_to_failure); |
2960 restore(); | 3045 restore(); |
2961 bind(L_failure); | 3046 bind(L_failure); |
2967 Register temp_reg, | 3052 Register temp_reg, |
2968 Register temp2_reg, | 3053 Register temp2_reg, |
2969 Label* L_success, | 3054 Label* L_success, |
2970 Label* L_failure, | 3055 Label* L_failure, |
2971 Label* L_slow_path, | 3056 Label* L_slow_path, |
2972 RegisterOrConstant super_check_offset, | 3057 RegisterOrConstant super_check_offset) { |
2973 Register instanceof_hack) { | |
2974 int sc_offset = (klassOopDesc::header_size() * HeapWordSize + | 3058 int sc_offset = (klassOopDesc::header_size() * HeapWordSize + |
2975 Klass::secondary_super_cache_offset_in_bytes()); | 3059 Klass::secondary_super_cache_offset_in_bytes()); |
2976 int sco_offset = (klassOopDesc::header_size() * HeapWordSize + | 3060 int sco_offset = (klassOopDesc::header_size() * HeapWordSize + |
2977 Klass::super_check_offset_offset_in_bytes()); | 3061 Klass::super_check_offset_offset_in_bytes()); |
2978 | 3062 |
2991 Label L_fallthrough; | 3075 Label L_fallthrough; |
2992 int label_nulls = 0; | 3076 int label_nulls = 0; |
2993 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } | 3077 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } |
2994 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } | 3078 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } |
2995 if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } | 3079 if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } |
2996 assert(label_nulls <= 1 || instanceof_hack != noreg || | 3080 assert(label_nulls <= 1 || |
2997 (L_slow_path == &L_fallthrough && label_nulls <= 2 && !need_slow_path), | 3081 (L_slow_path == &L_fallthrough && label_nulls <= 2 && !need_slow_path), |
2998 "at most one NULL in the batch, usually"); | 3082 "at most one NULL in the batch, usually"); |
2999 | |
3000 // Support for the instanceof hack, which uses delay slots to | |
3001 // set a destination register to zero or one. | |
3002 bool do_bool_sets = (instanceof_hack != noreg); | |
3003 #define BOOL_SET(bool_value) \ | |
3004 if (do_bool_sets && bool_value >= 0) \ | |
3005 set(bool_value, instanceof_hack) | |
3006 #define DELAYED_BOOL_SET(bool_value) \ | |
3007 if (do_bool_sets && bool_value >= 0) \ | |
3008 delayed()->set(bool_value, instanceof_hack); \ | |
3009 else delayed()->nop() | |
3010 // Hacked ba(), which may only be used just before L_fallthrough. | |
3011 #define FINAL_JUMP(label, bool_value) \ | |
3012 if (&(label) == &L_fallthrough) { \ | |
3013 BOOL_SET(bool_value); \ | |
3014 } else { \ | |
3015 ba((do_bool_sets && bool_value >= 0), label); \ | |
3016 DELAYED_BOOL_SET(bool_value); \ | |
3017 } | |
3018 | 3083 |
3019 // If the pointers are equal, we are done (e.g., String[] elements). | 3084 // If the pointers are equal, we are done (e.g., String[] elements). |
3020 // This self-check enables sharing of secondary supertype arrays among | 3085 // This self-check enables sharing of secondary supertype arrays among |
3021 // non-primary types such as array-of-interface. Otherwise, each such | 3086 // non-primary types such as array-of-interface. Otherwise, each such |
3022 // type would need its own customized SSA. | 3087 // type would need its own customized SSA. |
3023 // We move this check to the front of the fast path because many | 3088 // We move this check to the front of the fast path because many |
3024 // type checks are in fact trivially successful in this manner, | 3089 // type checks are in fact trivially successful in this manner, |
3025 // so we get a nicely predicted branch right at the start of the check. | 3090 // so we get a nicely predicted branch right at the start of the check. |
3026 cmp(super_klass, sub_klass); | 3091 cmp(super_klass, sub_klass); |
3027 brx(Assembler::equal, do_bool_sets, Assembler::pn, *L_success); | 3092 brx(Assembler::equal, false, Assembler::pn, *L_success); |
3028 DELAYED_BOOL_SET(1); | 3093 delayed()->nop(); |
3029 | 3094 |
3030 // Check the supertype display: | 3095 // Check the supertype display: |
3031 if (must_load_sco) { | 3096 if (must_load_sco) { |
3032 // The super check offset is always positive... | 3097 // The super check offset is always positive... |
3033 lduw(super_klass, sco_offset, temp2_reg); | 3098 lduw(super_klass, sco_offset, temp2_reg); |
3047 // Note that the cache is updated below if it does not help us find | 3112 // Note that the cache is updated below if it does not help us find |
3048 // what we need immediately. | 3113 // what we need immediately. |
3049 // So if it was a primary super, we can just fail immediately. | 3114 // So if it was a primary super, we can just fail immediately. |
3050 // Otherwise, it's the slow path for us (no success at this point). | 3115 // Otherwise, it's the slow path for us (no success at this point). |
3051 | 3116 |
3117 // Hacked ba(), which may only be used just before L_fallthrough. | |
3118 #define FINAL_JUMP(label) \ | |
3119 if (&(label) != &L_fallthrough) { \ | |
3120 ba(label); delayed()->nop(); \ | |
3121 } | |
3122 | |
3052 if (super_check_offset.is_register()) { | 3123 if (super_check_offset.is_register()) { |
3053 brx(Assembler::equal, do_bool_sets, Assembler::pn, *L_success); | 3124 brx(Assembler::equal, false, Assembler::pn, *L_success); |
3054 delayed(); if (do_bool_sets) BOOL_SET(1); | 3125 delayed()->cmp(super_check_offset.as_register(), sc_offset); |
3055 // if !do_bool_sets, sneak the next cmp into the delay slot: | |
3056 cmp(super_check_offset.as_register(), sc_offset); | |
3057 | 3126 |
3058 if (L_failure == &L_fallthrough) { | 3127 if (L_failure == &L_fallthrough) { |
3059 brx(Assembler::equal, do_bool_sets, Assembler::pt, *L_slow_path); | 3128 brx(Assembler::equal, false, Assembler::pt, *L_slow_path); |
3060 delayed()->nop(); | 3129 delayed()->nop(); |
3061 BOOL_SET(0); // fallthrough on failure | |
3062 } else { | 3130 } else { |
3063 brx(Assembler::notEqual, do_bool_sets, Assembler::pn, *L_failure); | 3131 brx(Assembler::notEqual, false, Assembler::pn, *L_failure); |
3064 DELAYED_BOOL_SET(0); | 3132 delayed()->nop(); |
3065 FINAL_JUMP(*L_slow_path, -1); // -1 => vanilla delay slot | 3133 FINAL_JUMP(*L_slow_path); |
3066 } | 3134 } |
3067 } else if (super_check_offset.as_constant() == sc_offset) { | 3135 } else if (super_check_offset.as_constant() == sc_offset) { |
3068 // Need a slow path; fast failure is impossible. | 3136 // Need a slow path; fast failure is impossible. |
3069 if (L_slow_path == &L_fallthrough) { | 3137 if (L_slow_path == &L_fallthrough) { |
3070 brx(Assembler::equal, do_bool_sets, Assembler::pt, *L_success); | 3138 brx(Assembler::equal, false, Assembler::pt, *L_success); |
3071 DELAYED_BOOL_SET(1); | 3139 delayed()->nop(); |
3072 } else { | 3140 } else { |
3073 brx(Assembler::notEqual, false, Assembler::pn, *L_slow_path); | 3141 brx(Assembler::notEqual, false, Assembler::pn, *L_slow_path); |
3074 delayed()->nop(); | 3142 delayed()->nop(); |
3075 FINAL_JUMP(*L_success, 1); | 3143 FINAL_JUMP(*L_success); |
3076 } | 3144 } |
3077 } else { | 3145 } else { |
3078 // No slow path; it's a fast decision. | 3146 // No slow path; it's a fast decision. |
3079 if (L_failure == &L_fallthrough) { | 3147 if (L_failure == &L_fallthrough) { |
3080 brx(Assembler::equal, do_bool_sets, Assembler::pt, *L_success); | 3148 brx(Assembler::equal, false, Assembler::pt, *L_success); |
3081 DELAYED_BOOL_SET(1); | 3149 delayed()->nop(); |
3082 BOOL_SET(0); | |
3083 } else { | 3150 } else { |
3084 brx(Assembler::notEqual, do_bool_sets, Assembler::pn, *L_failure); | 3151 brx(Assembler::notEqual, false, Assembler::pn, *L_failure); |
3085 DELAYED_BOOL_SET(0); | 3152 delayed()->nop(); |
3086 FINAL_JUMP(*L_success, 1); | 3153 FINAL_JUMP(*L_success); |
3087 } | 3154 } |
3088 } | 3155 } |
3089 | 3156 |
3090 bind(L_fallthrough); | 3157 bind(L_fallthrough); |
3091 | 3158 |
3092 #undef final_jump | 3159 #undef FINAL_JUMP |
3093 #undef bool_set | |
3094 #undef DELAYED_BOOL_SET | |
3095 #undef final_jump | |
3096 } | 3160 } |
3097 | 3161 |
3098 | 3162 |
3099 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, | 3163 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, |
3100 Register super_klass, | 3164 Register super_klass, |
3183 | 3247 |
3184 // Success. Cache the super we found and proceed in triumph. | 3248 // Success. Cache the super we found and proceed in triumph. |
3185 st_ptr(super_klass, sub_klass, sc_offset); | 3249 st_ptr(super_klass, sub_klass, sc_offset); |
3186 | 3250 |
3187 if (L_success != &L_fallthrough) { | 3251 if (L_success != &L_fallthrough) { |
3188 ba(false, *L_success); | 3252 ba(*L_success); |
3189 delayed()->nop(); | 3253 delayed()->nop(); |
3190 } | 3254 } |
3191 | 3255 |
3192 bind(L_fallthrough); | 3256 bind(L_fallthrough); |
3193 } | 3257 } |
3198 Label& wrong_method_type) { | 3262 Label& wrong_method_type) { |
3199 assert_different_registers(mtype_reg, mh_reg, temp_reg); | 3263 assert_different_registers(mtype_reg, mh_reg, temp_reg); |
3200 // compare method type against that of the receiver | 3264 // compare method type against that of the receiver |
3201 RegisterOrConstant mhtype_offset = delayed_value(java_lang_invoke_MethodHandle::type_offset_in_bytes, temp_reg); | 3265 RegisterOrConstant mhtype_offset = delayed_value(java_lang_invoke_MethodHandle::type_offset_in_bytes, temp_reg); |
3202 load_heap_oop(mh_reg, mhtype_offset, temp_reg); | 3266 load_heap_oop(mh_reg, mhtype_offset, temp_reg); |
3203 cmp(temp_reg, mtype_reg); | 3267 cmp_and_brx_short(temp_reg, mtype_reg, Assembler::notEqual, Assembler::pn, wrong_method_type); |
3204 br(Assembler::notEqual, false, Assembler::pn, wrong_method_type); | |
3205 delayed()->nop(); | |
3206 } | 3268 } |
3207 | 3269 |
3208 | 3270 |
3209 // A method handle has a "vmslots" field which gives the size of its | 3271 // A method handle has a "vmslots" field which gives the size of its |
3210 // argument list in JVM stack slots. This field is either located directly | 3272 // argument list in JVM stack slots. This field is either located directly |
3293 // whether the epoch is still valid | 3355 // whether the epoch is still valid |
3294 // Note that the runtime guarantees sufficient alignment of JavaThread | 3356 // Note that the runtime guarantees sufficient alignment of JavaThread |
3295 // pointers to allow age to be placed into low bits | 3357 // pointers to allow age to be placed into low bits |
3296 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); | 3358 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); |
3297 and3(mark_reg, markOopDesc::biased_lock_mask_in_place, temp_reg); | 3359 and3(mark_reg, markOopDesc::biased_lock_mask_in_place, temp_reg); |
3298 cmp(temp_reg, markOopDesc::biased_lock_pattern); | 3360 cmp_and_brx_short(temp_reg, markOopDesc::biased_lock_pattern, Assembler::notEqual, Assembler::pn, cas_label); |
3299 brx(Assembler::notEqual, false, Assembler::pn, cas_label); | |
3300 delayed()->nop(); | |
3301 | 3361 |
3302 load_klass(obj_reg, temp_reg); | 3362 load_klass(obj_reg, temp_reg); |
3303 ld_ptr(Address(temp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()), temp_reg); | 3363 ld_ptr(Address(temp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()), temp_reg); |
3304 or3(G2_thread, temp_reg, temp_reg); | 3364 or3(G2_thread, temp_reg, temp_reg); |
3305 xor3(mark_reg, temp_reg, temp_reg); | 3365 xor3(mark_reg, temp_reg, temp_reg); |
3362 } | 3422 } |
3363 if (slow_case != NULL) { | 3423 if (slow_case != NULL) { |
3364 brx(Assembler::notEqual, true, Assembler::pn, *slow_case); | 3424 brx(Assembler::notEqual, true, Assembler::pn, *slow_case); |
3365 delayed()->nop(); | 3425 delayed()->nop(); |
3366 } | 3426 } |
3367 br(Assembler::always, false, Assembler::pt, done); | 3427 ba_short(done); |
3368 delayed()->nop(); | |
3369 | 3428 |
3370 bind(try_rebias); | 3429 bind(try_rebias); |
3371 // At this point we know the epoch has expired, meaning that the | 3430 // At this point we know the epoch has expired, meaning that the |
3372 // current "bias owner", if any, is actually invalid. Under these | 3431 // current "bias owner", if any, is actually invalid. Under these |
3373 // circumstances _only_, we are allowed to use the current header's | 3432 // circumstances _only_, we are allowed to use the current header's |
3391 } | 3450 } |
3392 if (slow_case != NULL) { | 3451 if (slow_case != NULL) { |
3393 brx(Assembler::notEqual, true, Assembler::pn, *slow_case); | 3452 brx(Assembler::notEqual, true, Assembler::pn, *slow_case); |
3394 delayed()->nop(); | 3453 delayed()->nop(); |
3395 } | 3454 } |
3396 br(Assembler::always, false, Assembler::pt, done); | 3455 ba_short(done); |
3397 delayed()->nop(); | |
3398 | 3456 |
3399 bind(try_revoke_bias); | 3457 bind(try_revoke_bias); |
3400 // The prototype mark in the klass doesn't have the bias bit set any | 3458 // The prototype mark in the klass doesn't have the bias bit set any |
3401 // more, indicating that objects of this data type are not supposed | 3459 // more, indicating that objects of this data type are not supposed |
3402 // to be biased any more. We are going to try to reset the mark of | 3460 // to be biased any more. We are going to try to reset the mark of |
3443 | 3501 |
3444 // CASN -- 32-64 bit switch hitter similar to the synthetic CASN provided by | 3502 // CASN -- 32-64 bit switch hitter similar to the synthetic CASN provided by |
3445 // Solaris/SPARC's "as". Another apt name would be cas_ptr() | 3503 // Solaris/SPARC's "as". Another apt name would be cas_ptr() |
3446 | 3504 |
3447 void MacroAssembler::casn (Register addr_reg, Register cmp_reg, Register set_reg ) { | 3505 void MacroAssembler::casn (Register addr_reg, Register cmp_reg, Register set_reg ) { |
3448 casx_under_lock (addr_reg, cmp_reg, set_reg, (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr()) ; | 3506 casx_under_lock (addr_reg, cmp_reg, set_reg, (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr()); |
3449 } | 3507 } |
3450 | 3508 |
3451 | 3509 |
3452 | 3510 |
3453 // compiler_lock_object() and compiler_unlock_object() are direct transliterations | 3511 // compiler_lock_object() and compiler_unlock_object() are direct transliterations |
3484 if (counters != NULL) { | 3542 if (counters != NULL) { |
3485 inc_counter((address) counters->total_entry_count_addr(), Rmark, Rscratch); | 3543 inc_counter((address) counters->total_entry_count_addr(), Rmark, Rscratch); |
3486 } | 3544 } |
3487 | 3545 |
3488 if (EmitSync & 1) { | 3546 if (EmitSync & 1) { |
3489 mov (3, Rscratch) ; | 3547 mov(3, Rscratch); |
3490 st_ptr (Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes()); | 3548 st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes()); |
3491 cmp (SP, G0) ; | 3549 cmp(SP, G0); |
3492 return ; | 3550 return ; |
3493 } | 3551 } |
3494 | 3552 |
3495 if (EmitSync & 2) { | 3553 if (EmitSync & 2) { |
3496 | 3554 |
3527 // we did not find an unlocked object so see if this is a recursive case | 3585 // we did not find an unlocked object so see if this is a recursive case |
3528 // sub(Rscratch, SP, Rscratch); | 3586 // sub(Rscratch, SP, Rscratch); |
3529 assert(os::vm_page_size() > 0xfff, "page size too small - change the constant"); | 3587 assert(os::vm_page_size() > 0xfff, "page size too small - change the constant"); |
3530 andcc(Rscratch, 0xfffff003, Rscratch); | 3588 andcc(Rscratch, 0xfffff003, Rscratch); |
3531 st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes()); | 3589 st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes()); |
3532 bind (done) ; | 3590 bind (done); |
3533 return ; | 3591 return ; |
3534 } | 3592 } |
3535 | 3593 |
3536 Label Egress ; | 3594 Label Egress ; |
3537 | 3595 |
3538 if (EmitSync & 256) { | 3596 if (EmitSync & 256) { |
3539 Label IsInflated ; | 3597 Label IsInflated ; |
3540 | 3598 |
3541 ld_ptr (mark_addr, Rmark); // fetch obj->mark | 3599 ld_ptr(mark_addr, Rmark); // fetch obj->mark |
3542 // Triage: biased, stack-locked, neutral, inflated | 3600 // Triage: biased, stack-locked, neutral, inflated |
3543 if (try_bias) { | 3601 if (try_bias) { |
3544 biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters); | 3602 biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters); |
3545 // Invariant: if control reaches this point in the emitted stream | 3603 // Invariant: if control reaches this point in the emitted stream |
3546 // then Rmark has not been modified. | 3604 // then Rmark has not been modified. |
3547 } | 3605 } |
3548 | 3606 |
3549 // Store mark into displaced mark field in the on-stack basic-lock "box" | 3607 // Store mark into displaced mark field in the on-stack basic-lock "box" |
3550 // Critically, this must happen before the CAS | 3608 // Critically, this must happen before the CAS |
3551 // Maximize the ST-CAS distance to minimize the ST-before-CAS penalty. | 3609 // Maximize the ST-CAS distance to minimize the ST-before-CAS penalty. |
3552 st_ptr (Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes()); | 3610 st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes()); |
3553 andcc (Rmark, 2, G0) ; | 3611 andcc(Rmark, 2, G0); |
3554 brx (Assembler::notZero, false, Assembler::pn, IsInflated) ; | 3612 brx(Assembler::notZero, false, Assembler::pn, IsInflated); |
3555 delayed() -> | 3613 delayed()-> |
3556 | 3614 |
3557 // Try stack-lock acquisition. | 3615 // Try stack-lock acquisition. |
3558 // Beware: the 1st instruction is in a delay slot | 3616 // Beware: the 1st instruction is in a delay slot |
3559 mov (Rbox, Rscratch); | 3617 mov(Rbox, Rscratch); |
3560 or3 (Rmark, markOopDesc::unlocked_value, Rmark); | 3618 or3(Rmark, markOopDesc::unlocked_value, Rmark); |
3561 assert (mark_addr.disp() == 0, "cas must take a zero displacement"); | 3619 assert(mark_addr.disp() == 0, "cas must take a zero displacement"); |
3562 casn (mark_addr.base(), Rmark, Rscratch) ; | 3620 casn(mark_addr.base(), Rmark, Rscratch); |
3563 cmp (Rmark, Rscratch); | 3621 cmp(Rmark, Rscratch); |
3564 brx (Assembler::equal, false, Assembler::pt, done); | 3622 brx(Assembler::equal, false, Assembler::pt, done); |
3565 delayed()->sub(Rscratch, SP, Rscratch); | 3623 delayed()->sub(Rscratch, SP, Rscratch); |
3566 | 3624 |
3567 // Stack-lock attempt failed - check for recursive stack-lock. | 3625 // Stack-lock attempt failed - check for recursive stack-lock. |
3568 // See the comments below about how we might remove this case. | 3626 // See the comments below about how we might remove this case. |
3569 #ifdef _LP64 | 3627 #ifdef _LP64 |
3570 sub (Rscratch, STACK_BIAS, Rscratch); | 3628 sub(Rscratch, STACK_BIAS, Rscratch); |
3571 #endif | 3629 #endif |
3572 assert(os::vm_page_size() > 0xfff, "page size too small - change the constant"); | 3630 assert(os::vm_page_size() > 0xfff, "page size too small - change the constant"); |
3573 andcc (Rscratch, 0xfffff003, Rscratch); | 3631 andcc(Rscratch, 0xfffff003, Rscratch); |
3574 br (Assembler::always, false, Assembler::pt, done) ; | 3632 br(Assembler::always, false, Assembler::pt, done); |
3575 delayed()-> st_ptr (Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes()); | 3633 delayed()-> st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes()); |
3576 | 3634 |
3577 bind (IsInflated) ; | 3635 bind(IsInflated); |
3578 if (EmitSync & 64) { | 3636 if (EmitSync & 64) { |
3579 // If m->owner != null goto IsLocked | 3637 // If m->owner != null goto IsLocked |
3580 // Pessimistic form: Test-and-CAS vs CAS | 3638 // Pessimistic form: Test-and-CAS vs CAS |
3581 // The optimistic form avoids RTS->RTO cache line upgrades. | 3639 // The optimistic form avoids RTS->RTO cache line upgrades. |
3582 ld_ptr (Rmark, ObjectMonitor::owner_offset_in_bytes() - 2, Rscratch); | 3640 ld_ptr(Rmark, ObjectMonitor::owner_offset_in_bytes() - 2, Rscratch); |
3583 andcc (Rscratch, Rscratch, G0) ; | 3641 andcc(Rscratch, Rscratch, G0); |
3584 brx (Assembler::notZero, false, Assembler::pn, done) ; | 3642 brx(Assembler::notZero, false, Assembler::pn, done); |
3585 delayed()->nop() ; | 3643 delayed()->nop(); |
3586 // m->owner == null : it's unlocked. | 3644 // m->owner == null : it's unlocked. |
3587 } | 3645 } |
3588 | 3646 |
3589 // Try to CAS m->owner from null to Self | 3647 // Try to CAS m->owner from null to Self |
3590 // Invariant: if we acquire the lock then _recursions should be 0. | 3648 // Invariant: if we acquire the lock then _recursions should be 0. |
3591 add (Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark) ; | 3649 add(Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark); |
3592 mov (G2_thread, Rscratch) ; | 3650 mov(G2_thread, Rscratch); |
3593 casn (Rmark, G0, Rscratch) ; | 3651 casn(Rmark, G0, Rscratch); |
3594 cmp (Rscratch, G0) ; | 3652 cmp(Rscratch, G0); |
3595 // Intentional fall-through into done | 3653 // Intentional fall-through into done |
3596 } else { | 3654 } else { |
3597 // Aggressively avoid the Store-before-CAS penalty | 3655 // Aggressively avoid the Store-before-CAS penalty |
3598 // Defer the store into box->dhw until after the CAS | 3656 // Defer the store into box->dhw until after the CAS |
3599 Label IsInflated, Recursive ; | 3657 Label IsInflated, Recursive ; |
3600 | 3658 |
3601 // Anticipate CAS -- Avoid RTS->RTO upgrade | 3659 // Anticipate CAS -- Avoid RTS->RTO upgrade |
3602 // prefetch (mark_addr, Assembler::severalWritesAndPossiblyReads) ; | 3660 // prefetch (mark_addr, Assembler::severalWritesAndPossiblyReads); |
3603 | 3661 |
3604 ld_ptr (mark_addr, Rmark); // fetch obj->mark | 3662 ld_ptr(mark_addr, Rmark); // fetch obj->mark |
3605 // Triage: biased, stack-locked, neutral, inflated | 3663 // Triage: biased, stack-locked, neutral, inflated |
3606 | 3664 |
3607 if (try_bias) { | 3665 if (try_bias) { |
3608 biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters); | 3666 biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters); |
3609 // Invariant: if control reaches this point in the emitted stream | 3667 // Invariant: if control reaches this point in the emitted stream |
3610 // then Rmark has not been modified. | 3668 // then Rmark has not been modified. |
3611 } | 3669 } |
3612 andcc (Rmark, 2, G0) ; | 3670 andcc(Rmark, 2, G0); |
3613 brx (Assembler::notZero, false, Assembler::pn, IsInflated) ; | 3671 brx(Assembler::notZero, false, Assembler::pn, IsInflated); |
3614 delayed()-> // Beware - dangling delay-slot | 3672 delayed()-> // Beware - dangling delay-slot |
3615 | 3673 |
3616 // Try stack-lock acquisition. | 3674 // Try stack-lock acquisition. |
3617 // Transiently install BUSY (0) encoding in the mark word. | 3675 // Transiently install BUSY (0) encoding in the mark word. |
3618 // if the CAS of 0 into the mark was successful then we execute: | 3676 // if the CAS of 0 into the mark was successful then we execute: |
3619 // ST box->dhw = mark -- save fetched mark in on-stack basiclock box | 3677 // ST box->dhw = mark -- save fetched mark in on-stack basiclock box |
3620 // ST obj->mark = box -- overwrite transient 0 value | 3678 // ST obj->mark = box -- overwrite transient 0 value |
3621 // This presumes TSO, of course. | 3679 // This presumes TSO, of course. |
3622 | 3680 |
3623 mov (0, Rscratch) ; | 3681 mov(0, Rscratch); |
3624 or3 (Rmark, markOopDesc::unlocked_value, Rmark); | 3682 or3(Rmark, markOopDesc::unlocked_value, Rmark); |
3625 assert (mark_addr.disp() == 0, "cas must take a zero displacement"); | 3683 assert(mark_addr.disp() == 0, "cas must take a zero displacement"); |
3626 casn (mark_addr.base(), Rmark, Rscratch) ; | 3684 casn(mark_addr.base(), Rmark, Rscratch); |
3627 // prefetch (mark_addr, Assembler::severalWritesAndPossiblyReads) ; | 3685 // prefetch (mark_addr, Assembler::severalWritesAndPossiblyReads); |
3628 cmp (Rscratch, Rmark) ; | 3686 cmp(Rscratch, Rmark); |
3629 brx (Assembler::notZero, false, Assembler::pn, Recursive) ; | 3687 brx(Assembler::notZero, false, Assembler::pn, Recursive); |
3630 delayed() -> | 3688 delayed()->st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes()); |
3631 st_ptr (Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes()); | |
3632 if (counters != NULL) { | 3689 if (counters != NULL) { |
3633 cond_inc(Assembler::equal, (address) counters->fast_path_entry_count_addr(), Rmark, Rscratch); | 3690 cond_inc(Assembler::equal, (address) counters->fast_path_entry_count_addr(), Rmark, Rscratch); |
3634 } | 3691 } |
3635 br (Assembler::always, false, Assembler::pt, done); | 3692 ba(done); |
3636 delayed() -> | 3693 delayed()->st_ptr(Rbox, mark_addr); |
3637 st_ptr (Rbox, mark_addr) ; | 3694 |
3638 | 3695 bind(Recursive); |
3639 bind (Recursive) ; | |
3640 // Stack-lock attempt failed - check for recursive stack-lock. | 3696 // Stack-lock attempt failed - check for recursive stack-lock. |
3641 // Tests show that we can remove the recursive case with no impact | 3697 // Tests show that we can remove the recursive case with no impact |
3642 // on refworkload 0.83. If we need to reduce the size of the code | 3698 // on refworkload 0.83. If we need to reduce the size of the code |
3643 // emitted by compiler_lock_object() the recursive case is perfect | 3699 // emitted by compiler_lock_object() the recursive case is perfect |
3644 // candidate. | 3700 // candidate. |
3651 // the fast-path stack-lock code from the interpreter and always passed | 3707 // the fast-path stack-lock code from the interpreter and always passed |
3652 // control to the "slow" operators in synchronizer.cpp. | 3708 // control to the "slow" operators in synchronizer.cpp. |
3653 | 3709 |
3654 // RScratch contains the fetched obj->mark value from the failed CASN. | 3710 // RScratch contains the fetched obj->mark value from the failed CASN. |
3655 #ifdef _LP64 | 3711 #ifdef _LP64 |
3656 sub (Rscratch, STACK_BIAS, Rscratch); | 3712 sub(Rscratch, STACK_BIAS, Rscratch); |
3657 #endif | 3713 #endif |
3658 sub(Rscratch, SP, Rscratch); | 3714 sub(Rscratch, SP, Rscratch); |
3659 assert(os::vm_page_size() > 0xfff, "page size too small - change the constant"); | 3715 assert(os::vm_page_size() > 0xfff, "page size too small - change the constant"); |
3660 andcc (Rscratch, 0xfffff003, Rscratch); | 3716 andcc(Rscratch, 0xfffff003, Rscratch); |
3661 if (counters != NULL) { | 3717 if (counters != NULL) { |
3662 // Accounting needs the Rscratch register | 3718 // Accounting needs the Rscratch register |
3663 st_ptr (Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes()); | 3719 st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes()); |
3664 cond_inc(Assembler::equal, (address) counters->fast_path_entry_count_addr(), Rmark, Rscratch); | 3720 cond_inc(Assembler::equal, (address) counters->fast_path_entry_count_addr(), Rmark, Rscratch); |
3665 br (Assembler::always, false, Assembler::pt, done) ; | 3721 ba_short(done); |
3666 delayed()->nop() ; | |
3667 } else { | 3722 } else { |
3668 br (Assembler::always, false, Assembler::pt, done) ; | 3723 ba(done); |
3669 delayed()-> st_ptr (Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes()); | 3724 delayed()->st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes()); |
3670 } | 3725 } |
3671 | 3726 |
3672 bind (IsInflated) ; | 3727 bind (IsInflated); |
3673 if (EmitSync & 64) { | 3728 if (EmitSync & 64) { |
3674 // If m->owner != null goto IsLocked | 3729 // If m->owner != null goto IsLocked |
3675 // Test-and-CAS vs CAS | 3730 // Test-and-CAS vs CAS |
3676 // Pessimistic form avoids futile (doomed) CAS attempts | 3731 // Pessimistic form avoids futile (doomed) CAS attempts |
3677 // The optimistic form avoids RTS->RTO cache line upgrades. | 3732 // The optimistic form avoids RTS->RTO cache line upgrades. |
3678 ld_ptr (Rmark, ObjectMonitor::owner_offset_in_bytes() - 2, Rscratch); | 3733 ld_ptr(Rmark, ObjectMonitor::owner_offset_in_bytes() - 2, Rscratch); |
3679 andcc (Rscratch, Rscratch, G0) ; | 3734 andcc(Rscratch, Rscratch, G0); |
3680 brx (Assembler::notZero, false, Assembler::pn, done) ; | 3735 brx(Assembler::notZero, false, Assembler::pn, done); |
3681 delayed()->nop() ; | 3736 delayed()->nop(); |
3682 // m->owner == null : it's unlocked. | 3737 // m->owner == null : it's unlocked. |
3683 } | 3738 } |
3684 | 3739 |
3685 // Try to CAS m->owner from null to Self | 3740 // Try to CAS m->owner from null to Self |
3686 // Invariant: if we acquire the lock then _recursions should be 0. | 3741 // Invariant: if we acquire the lock then _recursions should be 0. |
3687 add (Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark) ; | 3742 add(Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark); |
3688 mov (G2_thread, Rscratch) ; | 3743 mov(G2_thread, Rscratch); |
3689 casn (Rmark, G0, Rscratch) ; | 3744 casn(Rmark, G0, Rscratch); |
3690 cmp (Rscratch, G0) ; | 3745 cmp(Rscratch, G0); |
3691 // ST box->displaced_header = NonZero. | 3746 // ST box->displaced_header = NonZero. |
3692 // Any non-zero value suffices: | 3747 // Any non-zero value suffices: |
3693 // unused_mark(), G2_thread, RBox, RScratch, rsp, etc. | 3748 // unused_mark(), G2_thread, RBox, RScratch, rsp, etc. |
3694 st_ptr (Rbox, Rbox, BasicLock::displaced_header_offset_in_bytes()); | 3749 st_ptr(Rbox, Rbox, BasicLock::displaced_header_offset_in_bytes()); |
3695 // Intentional fall-through into done | 3750 // Intentional fall-through into done |
3696 } | 3751 } |
3697 | 3752 |
3698 bind (done) ; | 3753 bind (done); |
3699 } | 3754 } |
3700 | 3755 |
3701 void MacroAssembler::compiler_unlock_object(Register Roop, Register Rmark, | 3756 void MacroAssembler::compiler_unlock_object(Register Roop, Register Rmark, |
3702 Register Rbox, Register Rscratch, | 3757 Register Rbox, Register Rscratch, |
3703 bool try_bias) { | 3758 bool try_bias) { |
3704 Address mark_addr(Roop, oopDesc::mark_offset_in_bytes()); | 3759 Address mark_addr(Roop, oopDesc::mark_offset_in_bytes()); |
3705 | 3760 |
3706 Label done ; | 3761 Label done ; |
3707 | 3762 |
3708 if (EmitSync & 4) { | 3763 if (EmitSync & 4) { |
3709 cmp (SP, G0) ; | 3764 cmp(SP, G0); |
3710 return ; | 3765 return ; |
3711 } | 3766 } |
3712 | 3767 |
3713 if (EmitSync & 8) { | 3768 if (EmitSync & 8) { |
3714 if (try_bias) { | 3769 if (try_bias) { |
3715 biased_locking_exit(mark_addr, Rscratch, done); | 3770 biased_locking_exit(mark_addr, Rscratch, done); |
3716 } | 3771 } |
3717 | 3772 |
3718 // Test first if it is a fast recursive unlock | 3773 // Test first if it is a fast recursive unlock |
3719 ld_ptr(Rbox, BasicLock::displaced_header_offset_in_bytes(), Rmark); | 3774 ld_ptr(Rbox, BasicLock::displaced_header_offset_in_bytes(), Rmark); |
3720 cmp(Rmark, G0); | 3775 br_null_short(Rmark, Assembler::pt, done); |
3721 brx(Assembler::equal, false, Assembler::pt, done); | |
3722 delayed()->nop(); | |
3723 | 3776 |
3724 // Check if it is still a light weight lock, this is is true if we see | 3777 // Check if it is still a light weight lock, this is is true if we see |
3725 // the stack address of the basicLock in the markOop of the object | 3778 // the stack address of the basicLock in the markOop of the object |
3726 assert(mark_addr.disp() == 0, "cas must take a zero displacement"); | 3779 assert(mark_addr.disp() == 0, "cas must take a zero displacement"); |
3727 casx_under_lock(mark_addr.base(), Rbox, Rmark, | 3780 casx_under_lock(mark_addr.base(), Rbox, Rmark, |
3728 (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr()); | 3781 (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr()); |
3729 br (Assembler::always, false, Assembler::pt, done); | 3782 ba(done); |
3730 delayed()->cmp(Rbox, Rmark); | 3783 delayed()->cmp(Rbox, Rmark); |
3731 bind (done) ; | 3784 bind(done); |
3732 return ; | 3785 return ; |
3733 } | 3786 } |
3734 | 3787 |
3735 // Beware ... If the aggregate size of the code emitted by CLO and CUO is | 3788 // Beware ... If the aggregate size of the code emitted by CLO and CUO is |
3736 // is too large performance rolls abruptly off a cliff. | 3789 // is too large performance rolls abruptly off a cliff. |
3741 if (try_bias) { | 3794 if (try_bias) { |
3742 // TODO: eliminate redundant LDs of obj->mark | 3795 // TODO: eliminate redundant LDs of obj->mark |
3743 biased_locking_exit(mark_addr, Rscratch, done); | 3796 biased_locking_exit(mark_addr, Rscratch, done); |
3744 } | 3797 } |
3745 | 3798 |
3746 ld_ptr (Roop, oopDesc::mark_offset_in_bytes(), Rmark) ; | 3799 ld_ptr(Roop, oopDesc::mark_offset_in_bytes(), Rmark); |
3747 ld_ptr (Rbox, BasicLock::displaced_header_offset_in_bytes(), Rscratch); | 3800 ld_ptr(Rbox, BasicLock::displaced_header_offset_in_bytes(), Rscratch); |
3748 andcc (Rscratch, Rscratch, G0); | 3801 andcc(Rscratch, Rscratch, G0); |
3749 brx (Assembler::zero, false, Assembler::pn, done); | 3802 brx(Assembler::zero, false, Assembler::pn, done); |
3750 delayed()-> nop() ; // consider: relocate fetch of mark, above, into this DS | 3803 delayed()->nop(); // consider: relocate fetch of mark, above, into this DS |
3751 andcc (Rmark, 2, G0) ; | 3804 andcc(Rmark, 2, G0); |
3752 brx (Assembler::zero, false, Assembler::pt, LStacked) ; | 3805 brx(Assembler::zero, false, Assembler::pt, LStacked); |
3753 delayed()-> nop() ; | 3806 delayed()->nop(); |
3754 | 3807 |
3755 // It's inflated | 3808 // It's inflated |
3756 // Conceptually we need a #loadstore|#storestore "release" MEMBAR before | 3809 // Conceptually we need a #loadstore|#storestore "release" MEMBAR before |
3757 // the ST of 0 into _owner which releases the lock. This prevents loads | 3810 // the ST of 0 into _owner which releases the lock. This prevents loads |
3758 // and stores within the critical section from reordering (floating) | 3811 // and stores within the critical section from reordering (floating) |
3759 // past the store that releases the lock. But TSO is a strong memory model | 3812 // past the store that releases the lock. But TSO is a strong memory model |
3760 // and that particular flavor of barrier is a noop, so we can safely elide it. | 3813 // and that particular flavor of barrier is a noop, so we can safely elide it. |
3761 // Note that we use 1-0 locking by default for the inflated case. We | 3814 // Note that we use 1-0 locking by default for the inflated case. We |
3762 // close the resultant (and rare) race by having contented threads in | 3815 // close the resultant (and rare) race by having contented threads in |
3763 // monitorenter periodically poll _owner. | 3816 // monitorenter periodically poll _owner. |
3764 ld_ptr (Rmark, ObjectMonitor::owner_offset_in_bytes() - 2, Rscratch); | 3817 ld_ptr(Rmark, ObjectMonitor::owner_offset_in_bytes() - 2, Rscratch); |
3765 ld_ptr (Rmark, ObjectMonitor::recursions_offset_in_bytes() - 2, Rbox); | 3818 ld_ptr(Rmark, ObjectMonitor::recursions_offset_in_bytes() - 2, Rbox); |
3766 xor3 (Rscratch, G2_thread, Rscratch) ; | 3819 xor3(Rscratch, G2_thread, Rscratch); |
3767 orcc (Rbox, Rscratch, Rbox) ; | 3820 orcc(Rbox, Rscratch, Rbox); |
3768 brx (Assembler::notZero, false, Assembler::pn, done) ; | 3821 brx(Assembler::notZero, false, Assembler::pn, done); |
3769 delayed()-> | 3822 delayed()-> |
3770 ld_ptr (Rmark, ObjectMonitor::EntryList_offset_in_bytes() - 2, Rscratch); | 3823 ld_ptr(Rmark, ObjectMonitor::EntryList_offset_in_bytes() - 2, Rscratch); |
3771 ld_ptr (Rmark, ObjectMonitor::cxq_offset_in_bytes() - 2, Rbox); | 3824 ld_ptr(Rmark, ObjectMonitor::cxq_offset_in_bytes() - 2, Rbox); |
3772 orcc (Rbox, Rscratch, G0) ; | 3825 orcc(Rbox, Rscratch, G0); |
3773 if (EmitSync & 65536) { | 3826 if (EmitSync & 65536) { |
3774 Label LSucc ; | 3827 Label LSucc ; |
3775 brx (Assembler::notZero, false, Assembler::pn, LSucc) ; | 3828 brx(Assembler::notZero, false, Assembler::pn, LSucc); |
3776 delayed()->nop() ; | 3829 delayed()->nop(); |
3777 br (Assembler::always, false, Assembler::pt, done) ; | 3830 ba(done); |
3778 delayed()-> | 3831 delayed()->st_ptr(G0, Rmark, ObjectMonitor::owner_offset_in_bytes() - 2); |
3779 st_ptr (G0, Rmark, ObjectMonitor::owner_offset_in_bytes() - 2); | 3832 |
3780 | 3833 bind(LSucc); |
3781 bind (LSucc) ; | 3834 st_ptr(G0, Rmark, ObjectMonitor::owner_offset_in_bytes() - 2); |
3782 st_ptr (G0, Rmark, ObjectMonitor::owner_offset_in_bytes() - 2); | 3835 if (os::is_MP()) { membar (StoreLoad); } |
3783 if (os::is_MP()) { membar (StoreLoad) ; } | 3836 ld_ptr(Rmark, ObjectMonitor::succ_offset_in_bytes() - 2, Rscratch); |
3784 ld_ptr (Rmark, ObjectMonitor::succ_offset_in_bytes() - 2, Rscratch); | 3837 andcc(Rscratch, Rscratch, G0); |
3785 andcc (Rscratch, Rscratch, G0) ; | 3838 brx(Assembler::notZero, false, Assembler::pt, done); |
3786 brx (Assembler::notZero, false, Assembler::pt, done) ; | 3839 delayed()->andcc(G0, G0, G0); |
3787 delayed()-> andcc (G0, G0, G0) ; | 3840 add(Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark); |
3788 add (Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark) ; | 3841 mov(G2_thread, Rscratch); |
3789 mov (G2_thread, Rscratch) ; | 3842 casn(Rmark, G0, Rscratch); |
3790 casn (Rmark, G0, Rscratch) ; | |
3791 cmp (Rscratch, G0) ; | |
3792 // invert icc.zf and goto done | 3843 // invert icc.zf and goto done |
3793 brx (Assembler::notZero, false, Assembler::pt, done) ; | 3844 br_notnull(Rscratch, false, Assembler::pt, done); |
3794 delayed() -> cmp (G0, G0) ; | 3845 delayed()->cmp(G0, G0); |
3795 br (Assembler::always, false, Assembler::pt, done); | 3846 ba(done); |
3796 delayed() -> cmp (G0, 1) ; | 3847 delayed()->cmp(G0, 1); |
3797 } else { | 3848 } else { |
3798 brx (Assembler::notZero, false, Assembler::pn, done) ; | 3849 brx(Assembler::notZero, false, Assembler::pn, done); |
3799 delayed()->nop() ; | 3850 delayed()->nop(); |
3800 br (Assembler::always, false, Assembler::pt, done) ; | 3851 ba(done); |
3801 delayed()-> | 3852 delayed()->st_ptr(G0, Rmark, ObjectMonitor::owner_offset_in_bytes() - 2); |
3802 st_ptr (G0, Rmark, ObjectMonitor::owner_offset_in_bytes() - 2); | |
3803 } | 3853 } |
3804 | 3854 |
3805 bind (LStacked) ; | 3855 bind (LStacked); |
3806 // Consider: we could replace the expensive CAS in the exit | 3856 // Consider: we could replace the expensive CAS in the exit |
3807 // path with a simple ST of the displaced mark value fetched from | 3857 // path with a simple ST of the displaced mark value fetched from |
3808 // the on-stack basiclock box. That admits a race where a thread T2 | 3858 // the on-stack basiclock box. That admits a race where a thread T2 |
3809 // in the slow lock path -- inflating with monitor M -- could race a | 3859 // in the slow lock path -- inflating with monitor M -- could race a |
3810 // thread T1 in the fast unlock path, resulting in a missed wakeup for T2. | 3860 // thread T1 in the fast unlock path, resulting in a missed wakeup for T2. |
3829 // lost-update "stomp" WAW race but detects and recovers as needed. | 3879 // lost-update "stomp" WAW race but detects and recovers as needed. |
3830 // | 3880 // |
3831 // A prototype implementation showed excellent results, although | 3881 // A prototype implementation showed excellent results, although |
3832 // the scavenger and timeout code was rather involved. | 3882 // the scavenger and timeout code was rather involved. |
3833 | 3883 |
3834 casn (mark_addr.base(), Rbox, Rscratch) ; | 3884 casn(mark_addr.base(), Rbox, Rscratch); |
3835 cmp (Rbox, Rscratch); | 3885 cmp(Rbox, Rscratch); |
3836 // Intentional fall through into done ... | 3886 // Intentional fall through into done ... |
3837 | 3887 |
3838 bind (done) ; | 3888 bind(done); |
3839 } | 3889 } |
3840 | 3890 |
3841 | 3891 |
3842 | 3892 |
3843 void MacroAssembler::print_CPU_state() { | 3893 void MacroAssembler::print_CPU_state() { |
3889 | 3939 |
3890 save_frame(0); | 3940 save_frame(0); |
3891 ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), t1); | 3941 ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), t1); |
3892 ld_ptr(G2_thread, in_bytes(JavaThread::tlab_start_offset()), t2); | 3942 ld_ptr(G2_thread, in_bytes(JavaThread::tlab_start_offset()), t2); |
3893 or3(t1, t2, t3); | 3943 or3(t1, t2, t3); |
3894 cmp(t1, t2); | 3944 cmp_and_br_short(t1, t2, Assembler::greaterEqual, Assembler::pn, next); |
3895 br(Assembler::greaterEqual, false, Assembler::pn, next); | |
3896 delayed()->nop(); | |
3897 stop("assert(top >= start)"); | 3945 stop("assert(top >= start)"); |
3898 should_not_reach_here(); | 3946 should_not_reach_here(); |
3899 | 3947 |
3900 bind(next); | 3948 bind(next); |
3901 ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), t1); | 3949 ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), t1); |
3902 ld_ptr(G2_thread, in_bytes(JavaThread::tlab_end_offset()), t2); | 3950 ld_ptr(G2_thread, in_bytes(JavaThread::tlab_end_offset()), t2); |
3903 or3(t3, t2, t3); | 3951 or3(t3, t2, t3); |
3904 cmp(t1, t2); | 3952 cmp_and_br_short(t1, t2, Assembler::lessEqual, Assembler::pn, next2); |
3905 br(Assembler::lessEqual, false, Assembler::pn, next2); | |
3906 delayed()->nop(); | |
3907 stop("assert(top <= end)"); | 3953 stop("assert(top <= end)"); |
3908 should_not_reach_here(); | 3954 should_not_reach_here(); |
3909 | 3955 |
3910 bind(next2); | 3956 bind(next2); |
3911 and3(t3, MinObjAlignmentInBytesMask, t3); | 3957 and3(t3, MinObjAlignmentInBytesMask, t3); |
3912 cmp(t3, 0); | 3958 cmp_and_br_short(t3, 0, Assembler::lessEqual, Assembler::pn, ok); |
3913 br(Assembler::lessEqual, false, Assembler::pn, ok); | |
3914 delayed()->nop(); | |
3915 stop("assert(aligned)"); | 3959 stop("assert(aligned)"); |
3916 should_not_reach_here(); | 3960 should_not_reach_here(); |
3917 | 3961 |
3918 bind(ok); | 3962 bind(ok); |
3919 restore(); | 3963 restore(); |
3935 assert(0 <= con_size_in_bytes && Assembler::is_simm13(con_size_in_bytes), "illegal object size"); | 3979 assert(0 <= con_size_in_bytes && Assembler::is_simm13(con_size_in_bytes), "illegal object size"); |
3936 assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0, "object size is not multiple of alignment"); | 3980 assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0, "object size is not multiple of alignment"); |
3937 | 3981 |
3938 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { | 3982 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { |
3939 // No allocation in the shared eden. | 3983 // No allocation in the shared eden. |
3940 br(Assembler::always, false, Assembler::pt, slow_case); | 3984 ba_short(slow_case); |
3941 delayed()->nop(); | |
3942 } else { | 3985 } else { |
3943 // get eden boundaries | 3986 // get eden boundaries |
3944 // note: we need both top & top_addr! | 3987 // note: we need both top & top_addr! |
3945 const Register top_addr = t1; | 3988 const Register top_addr = t1; |
3946 const Register end = t2; | 3989 const Register end = t2; |
4070 assert_different_registers(top, t1, t2, t3, G4, G5 /* preserve G4 and G5 */); | 4113 assert_different_registers(top, t1, t2, t3, G4, G5 /* preserve G4 and G5 */); |
4071 Label do_refill, discard_tlab; | 4114 Label do_refill, discard_tlab; |
4072 | 4115 |
4073 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { | 4116 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { |
4074 // No allocation in the shared eden. | 4117 // No allocation in the shared eden. |
4075 br(Assembler::always, false, Assembler::pt, slow_case); | 4118 ba_short(slow_case); |
4076 delayed()->nop(); | |
4077 } | 4119 } |
4078 | 4120 |
4079 ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), top); | 4121 ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), top); |
4080 ld_ptr(G2_thread, in_bytes(JavaThread::tlab_end_offset()), t1); | 4122 ld_ptr(G2_thread, in_bytes(JavaThread::tlab_end_offset()), t1); |
4081 ld_ptr(G2_thread, in_bytes(JavaThread::tlab_refill_waste_limit_offset()), t2); | 4123 ld_ptr(G2_thread, in_bytes(JavaThread::tlab_refill_waste_limit_offset()), t2); |
4096 // increment number of slow_allocations | 4138 // increment number of slow_allocations |
4097 ld(G2_thread, in_bytes(JavaThread::tlab_slow_allocations_offset()), t2); | 4139 ld(G2_thread, in_bytes(JavaThread::tlab_slow_allocations_offset()), t2); |
4098 add(t2, 1, t2); | 4140 add(t2, 1, t2); |
4099 stw(t2, G2_thread, in_bytes(JavaThread::tlab_slow_allocations_offset())); | 4141 stw(t2, G2_thread, in_bytes(JavaThread::tlab_slow_allocations_offset())); |
4100 } | 4142 } |
4101 br(Assembler::always, false, Assembler::pt, try_eden); | 4143 ba_short(try_eden); |
4102 delayed()->nop(); | |
4103 | 4144 |
4104 bind(discard_tlab); | 4145 bind(discard_tlab); |
4105 if (TLABStats) { | 4146 if (TLABStats) { |
4106 // increment number of refills | 4147 // increment number of refills |
4107 ld(G2_thread, in_bytes(JavaThread::tlab_number_of_refills_offset()), t2); | 4148 ld(G2_thread, in_bytes(JavaThread::tlab_number_of_refills_offset()), t2); |
4113 stw(t2, G2_thread, in_bytes(JavaThread::tlab_fast_refill_waste_offset())); | 4154 stw(t2, G2_thread, in_bytes(JavaThread::tlab_fast_refill_waste_offset())); |
4114 } | 4155 } |
4115 | 4156 |
4116 // if tlab is currently allocated (top or end != null) then | 4157 // if tlab is currently allocated (top or end != null) then |
4117 // fill [top, end + alignment_reserve) with array object | 4158 // fill [top, end + alignment_reserve) with array object |
4118 br_null(top, false, Assembler::pn, do_refill); | 4159 br_null_short(top, Assembler::pn, do_refill); |
4119 delayed()->nop(); | |
4120 | 4160 |
4121 set((intptr_t)markOopDesc::prototype()->copy_set_hash(0x2), t2); | 4161 set((intptr_t)markOopDesc::prototype()->copy_set_hash(0x2), t2); |
4122 st_ptr(t2, top, oopDesc::mark_offset_in_bytes()); // set up the mark word | 4162 st_ptr(t2, top, oopDesc::mark_offset_in_bytes()); // set up the mark word |
4123 // set klass to intArrayKlass | 4163 // set klass to intArrayKlass |
4124 sub(t1, typeArrayOopDesc::header_size(T_INT), t1); | 4164 sub(t1, typeArrayOopDesc::header_size(T_INT), t1); |
4149 // check that tlab_size (t1) is still valid | 4189 // check that tlab_size (t1) is still valid |
4150 { | 4190 { |
4151 Label ok; | 4191 Label ok; |
4152 ld_ptr(G2_thread, in_bytes(JavaThread::tlab_size_offset()), t2); | 4192 ld_ptr(G2_thread, in_bytes(JavaThread::tlab_size_offset()), t2); |
4153 sll_ptr(t2, LogHeapWordSize, t2); | 4193 sll_ptr(t2, LogHeapWordSize, t2); |
4154 cmp(t1, t2); | 4194 cmp_and_br_short(t1, t2, Assembler::equal, Assembler::pt, ok); |
4155 br(Assembler::equal, false, Assembler::pt, ok); | |
4156 delayed()->nop(); | |
4157 stop("assert(t1 == tlab_size)"); | 4195 stop("assert(t1 == tlab_size)"); |
4158 should_not_reach_here(); | 4196 should_not_reach_here(); |
4159 | 4197 |
4160 bind(ok); | 4198 bind(ok); |
4161 } | 4199 } |
4162 #endif // ASSERT | 4200 #endif // ASSERT |
4163 add(top, t1, top); // t1 is tlab_size | 4201 add(top, t1, top); // t1 is tlab_size |
4164 sub(top, ThreadLocalAllocBuffer::alignment_reserve_in_bytes(), top); | 4202 sub(top, ThreadLocalAllocBuffer::alignment_reserve_in_bytes(), top); |
4165 st_ptr(top, G2_thread, in_bytes(JavaThread::tlab_end_offset())); | 4203 st_ptr(top, G2_thread, in_bytes(JavaThread::tlab_end_offset())); |
4166 verify_tlab(); | 4204 verify_tlab(); |
4167 br(Assembler::always, false, Assembler::pt, retry); | 4205 ba_short(retry); |
4168 delayed()->nop(); | |
4169 } | 4206 } |
4170 | 4207 |
4171 void MacroAssembler::incr_allocated_bytes(RegisterOrConstant size_in_bytes, | 4208 void MacroAssembler::incr_allocated_bytes(RegisterOrConstant size_in_bytes, |
4172 Register t1, Register t2) { | 4209 Register t1, Register t2) { |
4173 // Bump total bytes allocated by this thread | 4210 // Bump total bytes allocated by this thread |
4288 | 4325 |
4289 static void generate_satb_log_enqueue(bool with_frame) { | 4326 static void generate_satb_log_enqueue(bool with_frame) { |
4290 BufferBlob* bb = BufferBlob::create("enqueue_with_frame", EnqueueCodeSize); | 4327 BufferBlob* bb = BufferBlob::create("enqueue_with_frame", EnqueueCodeSize); |
4291 CodeBuffer buf(bb); | 4328 CodeBuffer buf(bb); |
4292 MacroAssembler masm(&buf); | 4329 MacroAssembler masm(&buf); |
4293 address start = masm.pc(); | 4330 |
4331 #define __ masm. | |
4332 | |
4333 address start = __ pc(); | |
4294 Register pre_val; | 4334 Register pre_val; |
4295 | 4335 |
4296 Label refill, restart; | 4336 Label refill, restart; |
4297 if (with_frame) { | 4337 if (with_frame) { |
4298 masm.save_frame(0); | 4338 __ save_frame(0); |
4299 pre_val = I0; // Was O0 before the save. | 4339 pre_val = I0; // Was O0 before the save. |
4300 } else { | 4340 } else { |
4301 pre_val = O0; | 4341 pre_val = O0; |
4302 } | 4342 } |
4303 int satb_q_index_byte_offset = | 4343 int satb_q_index_byte_offset = |
4308 PtrQueue::byte_offset_of_buf()); | 4348 PtrQueue::byte_offset_of_buf()); |
4309 assert(in_bytes(PtrQueue::byte_width_of_index()) == sizeof(intptr_t) && | 4349 assert(in_bytes(PtrQueue::byte_width_of_index()) == sizeof(intptr_t) && |
4310 in_bytes(PtrQueue::byte_width_of_buf()) == sizeof(intptr_t), | 4350 in_bytes(PtrQueue::byte_width_of_buf()) == sizeof(intptr_t), |
4311 "check sizes in assembly below"); | 4351 "check sizes in assembly below"); |
4312 | 4352 |
4313 masm.bind(restart); | 4353 __ bind(restart); |
4314 masm.ld_ptr(G2_thread, satb_q_index_byte_offset, L0); | 4354 __ ld_ptr(G2_thread, satb_q_index_byte_offset, L0); |
4315 | 4355 |
4316 masm.br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn, L0, refill); | 4356 __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn, L0, refill); |
4317 // If the branch is taken, no harm in executing this in the delay slot. | 4357 // If the branch is taken, no harm in executing this in the delay slot. |
4318 masm.delayed()->ld_ptr(G2_thread, satb_q_buf_byte_offset, L1); | 4358 __ delayed()->ld_ptr(G2_thread, satb_q_buf_byte_offset, L1); |
4319 masm.sub(L0, oopSize, L0); | 4359 __ sub(L0, oopSize, L0); |
4320 | 4360 |
4321 masm.st_ptr(pre_val, L1, L0); // [_buf + index] := I0 | 4361 __ st_ptr(pre_val, L1, L0); // [_buf + index] := I0 |
4322 if (!with_frame) { | 4362 if (!with_frame) { |
4323 // Use return-from-leaf | 4363 // Use return-from-leaf |
4324 masm.retl(); | 4364 __ retl(); |
4325 masm.delayed()->st_ptr(L0, G2_thread, satb_q_index_byte_offset); | 4365 __ delayed()->st_ptr(L0, G2_thread, satb_q_index_byte_offset); |
4326 } else { | 4366 } else { |
4327 // Not delayed. | 4367 // Not delayed. |
4328 masm.st_ptr(L0, G2_thread, satb_q_index_byte_offset); | 4368 __ st_ptr(L0, G2_thread, satb_q_index_byte_offset); |
4329 } | 4369 } |
4330 if (with_frame) { | 4370 if (with_frame) { |
4331 masm.ret(); | 4371 __ ret(); |
4332 masm.delayed()->restore(); | 4372 __ delayed()->restore(); |
4333 } | 4373 } |
4334 masm.bind(refill); | 4374 __ bind(refill); |
4335 | 4375 |
4336 address handle_zero = | 4376 address handle_zero = |
4337 CAST_FROM_FN_PTR(address, | 4377 CAST_FROM_FN_PTR(address, |
4338 &SATBMarkQueueSet::handle_zero_index_for_thread); | 4378 &SATBMarkQueueSet::handle_zero_index_for_thread); |
4339 // This should be rare enough that we can afford to save all the | 4379 // This should be rare enough that we can afford to save all the |
4340 // scratch registers that the calling context might be using. | 4380 // scratch registers that the calling context might be using. |
4341 masm.mov(G1_scratch, L0); | 4381 __ mov(G1_scratch, L0); |
4342 masm.mov(G3_scratch, L1); | 4382 __ mov(G3_scratch, L1); |
4343 masm.mov(G4, L2); | 4383 __ mov(G4, L2); |
4344 // We need the value of O0 above (for the write into the buffer), so we | 4384 // We need the value of O0 above (for the write into the buffer), so we |
4345 // save and restore it. | 4385 // save and restore it. |
4346 masm.mov(O0, L3); | 4386 __ mov(O0, L3); |
4347 // Since the call will overwrite O7, we save and restore that, as well. | 4387 // Since the call will overwrite O7, we save and restore that, as well. |
4348 masm.mov(O7, L4); | 4388 __ mov(O7, L4); |
4349 masm.call_VM_leaf(L5, handle_zero, G2_thread); | 4389 __ call_VM_leaf(L5, handle_zero, G2_thread); |
4350 masm.mov(L0, G1_scratch); | 4390 __ mov(L0, G1_scratch); |
4351 masm.mov(L1, G3_scratch); | 4391 __ mov(L1, G3_scratch); |
4352 masm.mov(L2, G4); | 4392 __ mov(L2, G4); |
4353 masm.mov(L3, O0); | 4393 __ mov(L3, O0); |
4354 masm.br(Assembler::always, /*annul*/false, Assembler::pt, restart); | 4394 __ br(Assembler::always, /*annul*/false, Assembler::pt, restart); |
4355 masm.delayed()->mov(L4, O7); | 4395 __ delayed()->mov(L4, O7); |
4356 | 4396 |
4357 if (with_frame) { | 4397 if (with_frame) { |
4358 satb_log_enqueue_with_frame = start; | 4398 satb_log_enqueue_with_frame = start; |
4359 satb_log_enqueue_with_frame_end = masm.pc(); | 4399 satb_log_enqueue_with_frame_end = __ pc(); |
4360 } else { | 4400 } else { |
4361 satb_log_enqueue_frameless = start; | 4401 satb_log_enqueue_frameless = start; |
4362 satb_log_enqueue_frameless_end = masm.pc(); | 4402 satb_log_enqueue_frameless_end = __ pc(); |
4363 } | 4403 } |
4404 | |
4405 #undef __ | |
4364 } | 4406 } |
4365 | 4407 |
4366 static inline void generate_satb_log_enqueue_if_necessary(bool with_frame) { | 4408 static inline void generate_satb_log_enqueue_if_necessary(bool with_frame) { |
4367 if (with_frame) { | 4409 if (with_frame) { |
4368 if (satb_log_enqueue_with_frame == 0) { | 4410 if (satb_log_enqueue_with_frame == 0) { |
4424 tmp); | 4466 tmp); |
4425 } | 4467 } |
4426 | 4468 |
4427 // Check on whether to annul. | 4469 // Check on whether to annul. |
4428 br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, tmp, filtered); | 4470 br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, tmp, filtered); |
4429 delayed() -> nop(); | 4471 delayed()->nop(); |
4430 | 4472 |
4431 // Do we need to load the previous value? | 4473 // Do we need to load the previous value? |
4432 if (obj != noreg) { | 4474 if (obj != noreg) { |
4433 // Load the previous value... | 4475 // Load the previous value... |
4434 if (index == noreg) { | 4476 if (index == noreg) { |
4448 assert(pre_val != noreg, "must have a real register"); | 4490 assert(pre_val != noreg, "must have a real register"); |
4449 | 4491 |
4450 // Is the previous value null? | 4492 // Is the previous value null? |
4451 // Check on whether to annul. | 4493 // Check on whether to annul. |
4452 br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, pre_val, filtered); | 4494 br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, pre_val, filtered); |
4453 delayed() -> nop(); | 4495 delayed()->nop(); |
4454 | 4496 |
4455 // OK, it's not filtered, so we'll need to call enqueue. In the normal | 4497 // OK, it's not filtered, so we'll need to call enqueue. In the normal |
4456 // case, pre_val will be a scratch G-reg, but there are some cases in | 4498 // case, pre_val will be a scratch G-reg, but there are some cases in |
4457 // which it's an O-reg. In the first case, do a normal call. In the | 4499 // which it's an O-reg. In the first case, do a normal call. In the |
4458 // latter, do a save here and call the frameless version. | 4500 // latter, do a save here and call the frameless version. |
4516 // This gets to assume that o0 contains the object address. | 4558 // This gets to assume that o0 contains the object address. |
4517 static void generate_dirty_card_log_enqueue(jbyte* byte_map_base) { | 4559 static void generate_dirty_card_log_enqueue(jbyte* byte_map_base) { |
4518 BufferBlob* bb = BufferBlob::create("dirty_card_enqueue", EnqueueCodeSize*2); | 4560 BufferBlob* bb = BufferBlob::create("dirty_card_enqueue", EnqueueCodeSize*2); |
4519 CodeBuffer buf(bb); | 4561 CodeBuffer buf(bb); |
4520 MacroAssembler masm(&buf); | 4562 MacroAssembler masm(&buf); |
4521 address start = masm.pc(); | 4563 #define __ masm. |
4564 address start = __ pc(); | |
4522 | 4565 |
4523 Label not_already_dirty, restart, refill; | 4566 Label not_already_dirty, restart, refill; |
4524 | 4567 |
4525 #ifdef _LP64 | 4568 #ifdef _LP64 |
4526 masm.srlx(O0, CardTableModRefBS::card_shift, O0); | 4569 __ srlx(O0, CardTableModRefBS::card_shift, O0); |
4527 #else | 4570 #else |
4528 masm.srl(O0, CardTableModRefBS::card_shift, O0); | 4571 __ srl(O0, CardTableModRefBS::card_shift, O0); |
4529 #endif | 4572 #endif |
4530 AddressLiteral addrlit(byte_map_base); | 4573 AddressLiteral addrlit(byte_map_base); |
4531 masm.set(addrlit, O1); // O1 := <card table base> | 4574 __ set(addrlit, O1); // O1 := <card table base> |
4532 masm.ldub(O0, O1, O2); // O2 := [O0 + O1] | 4575 __ ldub(O0, O1, O2); // O2 := [O0 + O1] |
4533 | 4576 |
4534 masm.br_on_reg_cond(Assembler::rc_nz, /*annul*/false, Assembler::pt, | 4577 __ br_on_reg_cond(Assembler::rc_nz, /*annul*/false, Assembler::pt, |
4535 O2, not_already_dirty); | 4578 O2, not_already_dirty); |
4536 // Get O1 + O2 into a reg by itself -- useful in the take-the-branch | 4579 // Get O1 + O2 into a reg by itself -- useful in the take-the-branch |
4537 // case, harmless if not. | 4580 // case, harmless if not. |
4538 masm.delayed()->add(O0, O1, O3); | 4581 __ delayed()->add(O0, O1, O3); |
4539 | 4582 |
4540 // We didn't take the branch, so we're already dirty: return. | 4583 // We didn't take the branch, so we're already dirty: return. |
4541 // Use return-from-leaf | 4584 // Use return-from-leaf |
4542 masm.retl(); | 4585 __ retl(); |
4543 masm.delayed()->nop(); | 4586 __ delayed()->nop(); |
4544 | 4587 |
4545 // Not dirty. | 4588 // Not dirty. |
4546 masm.bind(not_already_dirty); | 4589 __ bind(not_already_dirty); |
4547 // First, dirty it. | 4590 // First, dirty it. |
4548 masm.stb(G0, O3, G0); // [cardPtr] := 0 (i.e., dirty). | 4591 __ stb(G0, O3, G0); // [cardPtr] := 0 (i.e., dirty). |
4549 int dirty_card_q_index_byte_offset = | 4592 int dirty_card_q_index_byte_offset = |
4550 in_bytes(JavaThread::dirty_card_queue_offset() + | 4593 in_bytes(JavaThread::dirty_card_queue_offset() + |
4551 PtrQueue::byte_offset_of_index()); | 4594 PtrQueue::byte_offset_of_index()); |
4552 int dirty_card_q_buf_byte_offset = | 4595 int dirty_card_q_buf_byte_offset = |
4553 in_bytes(JavaThread::dirty_card_queue_offset() + | 4596 in_bytes(JavaThread::dirty_card_queue_offset() + |
4554 PtrQueue::byte_offset_of_buf()); | 4597 PtrQueue::byte_offset_of_buf()); |
4555 masm.bind(restart); | 4598 __ bind(restart); |
4556 masm.ld_ptr(G2_thread, dirty_card_q_index_byte_offset, L0); | 4599 __ ld_ptr(G2_thread, dirty_card_q_index_byte_offset, L0); |
4557 | 4600 |
4558 masm.br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn, | 4601 __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn, |
4559 L0, refill); | 4602 L0, refill); |
4560 // If the branch is taken, no harm in executing this in the delay slot. | 4603 // If the branch is taken, no harm in executing this in the delay slot. |
4561 masm.delayed()->ld_ptr(G2_thread, dirty_card_q_buf_byte_offset, L1); | 4604 __ delayed()->ld_ptr(G2_thread, dirty_card_q_buf_byte_offset, L1); |
4562 masm.sub(L0, oopSize, L0); | 4605 __ sub(L0, oopSize, L0); |
4563 | 4606 |
4564 masm.st_ptr(O3, L1, L0); // [_buf + index] := I0 | 4607 __ st_ptr(O3, L1, L0); // [_buf + index] := I0 |
4565 // Use return-from-leaf | 4608 // Use return-from-leaf |
4566 masm.retl(); | 4609 __ retl(); |
4567 masm.delayed()->st_ptr(L0, G2_thread, dirty_card_q_index_byte_offset); | 4610 __ delayed()->st_ptr(L0, G2_thread, dirty_card_q_index_byte_offset); |
4568 | 4611 |
4569 masm.bind(refill); | 4612 __ bind(refill); |
4570 address handle_zero = | 4613 address handle_zero = |
4571 CAST_FROM_FN_PTR(address, | 4614 CAST_FROM_FN_PTR(address, |
4572 &DirtyCardQueueSet::handle_zero_index_for_thread); | 4615 &DirtyCardQueueSet::handle_zero_index_for_thread); |
4573 // This should be rare enough that we can afford to save all the | 4616 // This should be rare enough that we can afford to save all the |
4574 // scratch registers that the calling context might be using. | 4617 // scratch registers that the calling context might be using. |
4575 masm.mov(G1_scratch, L3); | 4618 __ mov(G1_scratch, L3); |
4576 masm.mov(G3_scratch, L5); | 4619 __ mov(G3_scratch, L5); |
4577 // We need the value of O3 above (for the write into the buffer), so we | 4620 // We need the value of O3 above (for the write into the buffer), so we |
4578 // save and restore it. | 4621 // save and restore it. |
4579 masm.mov(O3, L6); | 4622 __ mov(O3, L6); |
4580 // Since the call will overwrite O7, we save and restore that, as well. | 4623 // Since the call will overwrite O7, we save and restore that, as well. |
4581 masm.mov(O7, L4); | 4624 __ mov(O7, L4); |
4582 | 4625 |
4583 masm.call_VM_leaf(L7_thread_cache, handle_zero, G2_thread); | 4626 __ call_VM_leaf(L7_thread_cache, handle_zero, G2_thread); |
4584 masm.mov(L3, G1_scratch); | 4627 __ mov(L3, G1_scratch); |
4585 masm.mov(L5, G3_scratch); | 4628 __ mov(L5, G3_scratch); |
4586 masm.mov(L6, O3); | 4629 __ mov(L6, O3); |
4587 masm.br(Assembler::always, /*annul*/false, Assembler::pt, restart); | 4630 __ br(Assembler::always, /*annul*/false, Assembler::pt, restart); |
4588 masm.delayed()->mov(L4, O7); | 4631 __ delayed()->mov(L4, O7); |
4589 | 4632 |
4590 dirty_card_log_enqueue = start; | 4633 dirty_card_log_enqueue = start; |
4591 dirty_card_log_enqueue_end = masm.pc(); | 4634 dirty_card_log_enqueue_end = __ pc(); |
4592 // XXX Should have a guarantee here about not going off the end! | 4635 // XXX Should have a guarantee here about not going off the end! |
4593 // Does it already do so? Do an experiment... | 4636 // Does it already do so? Do an experiment... |
4637 | |
4638 #undef __ | |
4639 | |
4594 } | 4640 } |
4595 | 4641 |
4596 static inline void | 4642 static inline void |
4597 generate_dirty_card_log_enqueue_if_necessary(jbyte* byte_map_base) { | 4643 generate_dirty_card_log_enqueue_if_necessary(jbyte* byte_map_base) { |
4598 if (dirty_card_log_enqueue == 0) { | 4644 if (dirty_card_log_enqueue == 0) { |
4901 cmp(chr1, chr2); | 4947 cmp(chr1, chr2); |
4902 br(Assembler::notEqual, true, Assembler::pt, Ldone); | 4948 br(Assembler::notEqual, true, Assembler::pt, Ldone); |
4903 delayed()->mov(G0, result); // not equal | 4949 delayed()->mov(G0, result); // not equal |
4904 | 4950 |
4905 // only one char ? | 4951 // only one char ? |
4906 br_on_reg_cond(rc_z, true, Assembler::pn, limit, Ldone); | 4952 cmp_zero_and_br(zero, limit, Ldone, true, Assembler::pn); |
4907 delayed()->add(G0, 1, result); // zero-length arrays are equal | 4953 delayed()->add(G0, 1, result); // zero-length arrays are equal |
4908 | 4954 |
4909 // word by word compare, dont't need alignment check | 4955 // word by word compare, dont't need alignment check |
4910 bind(Lvector); | 4956 bind(Lvector); |
4911 // Shift ary1 and ary2 to the end of the arrays, negate limit | 4957 // Shift ary1 and ary2 to the end of the arrays, negate limit |