comparison src/cpu/sparc/vm/assembler_sparc.cpp @ 3839:3d42f82cd811

7063628: Use cbcond on T4 Summary: Add new short branch instruction to Hotspot sparc assembler. Reviewed-by: never, twisti, jrose
author kvn
date Thu, 21 Jul 2011 11:25:07 -0700
parents cba7b5c2d53f
children 4fe626cbf0bf baf763f388e6
comparison
equal deleted inserted replaced
3838:6a991dcb52bb 3839:3d42f82cd811
98 switch (inv_op(inst)) { 98 switch (inv_op(inst)) {
99 default: s = "????"; break; 99 default: s = "????"; break;
100 case call_op: s = "call"; break; 100 case call_op: s = "call"; break;
101 case branch_op: 101 case branch_op:
102 switch (inv_op2(inst)) { 102 switch (inv_op2(inst)) {
103 case bpr_op2: s = "bpr"; break;
104 case fb_op2: s = "fb"; break; 103 case fb_op2: s = "fb"; break;
105 case fbp_op2: s = "fbp"; break; 104 case fbp_op2: s = "fbp"; break;
106 case br_op2: s = "br"; break; 105 case br_op2: s = "br"; break;
107 case bp_op2: s = "bp"; break; 106 case bp_op2: s = "bp"; break;
108 case cb_op2: s = "cb"; break; 107 case cb_op2: s = "cb"; break;
108 case bpr_op2: {
109 if (is_cbcond(inst)) {
110 s = is_cxb(inst) ? "cxb" : "cwb";
111 } else {
112 s = "bpr";
113 }
114 break;
115 }
109 default: s = "????"; break; 116 default: s = "????"; break;
110 } 117 }
111 } 118 }
112 ::tty->print("%s", s); 119 ::tty->print("%s", s);
113 } 120 }
125 switch (inv_op(inst)) { 132 switch (inv_op(inst)) {
126 default: ShouldNotReachHere(); 133 default: ShouldNotReachHere();
127 case call_op: m = wdisp(word_aligned_ones, 0, 30); v = wdisp(dest_pos, inst_pos, 30); break; 134 case call_op: m = wdisp(word_aligned_ones, 0, 30); v = wdisp(dest_pos, inst_pos, 30); break;
128 case branch_op: 135 case branch_op:
129 switch (inv_op2(inst)) { 136 switch (inv_op2(inst)) {
130 case bpr_op2: m = wdisp16(word_aligned_ones, 0); v = wdisp16(dest_pos, inst_pos); break;
131 case fbp_op2: m = wdisp( word_aligned_ones, 0, 19); v = wdisp( dest_pos, inst_pos, 19); break; 137 case fbp_op2: m = wdisp( word_aligned_ones, 0, 19); v = wdisp( dest_pos, inst_pos, 19); break;
132 case bp_op2: m = wdisp( word_aligned_ones, 0, 19); v = wdisp( dest_pos, inst_pos, 19); break; 138 case bp_op2: m = wdisp( word_aligned_ones, 0, 19); v = wdisp( dest_pos, inst_pos, 19); break;
133 case fb_op2: m = wdisp( word_aligned_ones, 0, 22); v = wdisp( dest_pos, inst_pos, 22); break; 139 case fb_op2: m = wdisp( word_aligned_ones, 0, 22); v = wdisp( dest_pos, inst_pos, 22); break;
134 case br_op2: m = wdisp( word_aligned_ones, 0, 22); v = wdisp( dest_pos, inst_pos, 22); break; 140 case br_op2: m = wdisp( word_aligned_ones, 0, 22); v = wdisp( dest_pos, inst_pos, 22); break;
135 case cb_op2: m = wdisp( word_aligned_ones, 0, 22); v = wdisp( dest_pos, inst_pos, 22); break; 141 case cb_op2: m = wdisp( word_aligned_ones, 0, 22); v = wdisp( dest_pos, inst_pos, 22); break;
142 case bpr_op2: {
143 if (is_cbcond(inst)) {
144 m = wdisp10(word_aligned_ones, 0);
145 v = wdisp10(dest_pos, inst_pos);
146 } else {
147 m = wdisp16(word_aligned_ones, 0);
148 v = wdisp16(dest_pos, inst_pos);
149 }
150 break;
151 }
136 default: ShouldNotReachHere(); 152 default: ShouldNotReachHere();
137 } 153 }
138 } 154 }
139 return inst & ~m | v; 155 return inst & ~m | v;
140 } 156 }
147 switch (inv_op(inst)) { 163 switch (inv_op(inst)) {
148 default: ShouldNotReachHere(); 164 default: ShouldNotReachHere();
149 case call_op: r = inv_wdisp(inst, pos, 30); break; 165 case call_op: r = inv_wdisp(inst, pos, 30); break;
150 case branch_op: 166 case branch_op:
151 switch (inv_op2(inst)) { 167 switch (inv_op2(inst)) {
152 case bpr_op2: r = inv_wdisp16(inst, pos); break;
153 case fbp_op2: r = inv_wdisp( inst, pos, 19); break; 168 case fbp_op2: r = inv_wdisp( inst, pos, 19); break;
154 case bp_op2: r = inv_wdisp( inst, pos, 19); break; 169 case bp_op2: r = inv_wdisp( inst, pos, 19); break;
155 case fb_op2: r = inv_wdisp( inst, pos, 22); break; 170 case fb_op2: r = inv_wdisp( inst, pos, 22); break;
156 case br_op2: r = inv_wdisp( inst, pos, 22); break; 171 case br_op2: r = inv_wdisp( inst, pos, 22); break;
157 case cb_op2: r = inv_wdisp( inst, pos, 22); break; 172 case cb_op2: r = inv_wdisp( inst, pos, 22); break;
173 case bpr_op2: {
174 if (is_cbcond(inst)) {
175 r = inv_wdisp10(inst, pos);
176 } else {
177 r = inv_wdisp16(inst, pos);
178 }
179 break;
180 }
158 default: ShouldNotReachHere(); 181 default: ShouldNotReachHere();
159 } 182 }
160 } 183 }
161 return r; 184 return r;
162 } 185 }
966 #ifdef ASSERT 989 #ifdef ASSERT
967 // Verify that flags was zeroed on return to Java 990 // Verify that flags was zeroed on return to Java
968 Label PcOk; 991 Label PcOk;
969 save_frame(0); // to avoid clobbering O0 992 save_frame(0); // to avoid clobbering O0
970 ld_ptr(pc_addr, L0); 993 ld_ptr(pc_addr, L0);
971 tst(L0); 994 br_null_short(L0, Assembler::pt, PcOk);
972 #ifdef _LP64
973 brx(Assembler::zero, false, Assembler::pt, PcOk);
974 #else
975 br(Assembler::zero, false, Assembler::pt, PcOk);
976 #endif // _LP64
977 delayed() -> nop();
978 stop("last_Java_pc not zeroed before leaving Java"); 995 stop("last_Java_pc not zeroed before leaving Java");
979 bind(PcOk); 996 bind(PcOk);
980 997
981 // Verify that flags was zeroed on return to Java 998 // Verify that flags was zeroed on return to Java
982 Label FlagsOk; 999 Label FlagsOk;
1001 #ifdef ASSERT 1018 #ifdef ASSERT
1002 // Make sure that we have an odd stack 1019 // Make sure that we have an odd stack
1003 Label StackOk; 1020 Label StackOk;
1004 andcc(last_java_sp, 0x01, G0); 1021 andcc(last_java_sp, 0x01, G0);
1005 br(Assembler::notZero, false, Assembler::pt, StackOk); 1022 br(Assembler::notZero, false, Assembler::pt, StackOk);
1006 delayed() -> nop(); 1023 delayed()->nop();
1007 stop("Stack Not Biased in set_last_Java_frame"); 1024 stop("Stack Not Biased in set_last_Java_frame");
1008 bind(StackOk); 1025 bind(StackOk);
1009 #endif // ASSERT 1026 #endif // ASSERT
1010 assert( last_java_sp != G4_scratch, "bad register usage in set_last_Java_frame"); 1027 assert( last_java_sp != G4_scratch, "bad register usage in set_last_Java_frame");
1011 add( last_java_sp, STACK_BIAS, G4_scratch ); 1028 add( last_java_sp, STACK_BIAS, G4_scratch );
1097 check_and_handle_popframe(scratch_reg); 1114 check_and_handle_popframe(scratch_reg);
1098 check_and_handle_earlyret(scratch_reg); 1115 check_and_handle_earlyret(scratch_reg);
1099 1116
1100 Address exception_addr(G2_thread, Thread::pending_exception_offset()); 1117 Address exception_addr(G2_thread, Thread::pending_exception_offset());
1101 ld_ptr(exception_addr, scratch_reg); 1118 ld_ptr(exception_addr, scratch_reg);
1102 br_null(scratch_reg,false,pt,L); 1119 br_null_short(scratch_reg, pt, L);
1103 delayed()->nop();
1104 // we use O7 linkage so that forward_exception_entry has the issuing PC 1120 // we use O7 linkage so that forward_exception_entry has the issuing PC
1105 call(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); 1121 call(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
1106 delayed()->nop(); 1122 delayed()->nop();
1107 bind(L); 1123 bind(L);
1108 } 1124 }
1872 set(Universe::verify_oop_mask (), O2_mask); 1888 set(Universe::verify_oop_mask (), O2_mask);
1873 set(Universe::verify_oop_bits (), O3_bits); 1889 set(Universe::verify_oop_bits (), O3_bits);
1874 1890
1875 // assert((obj & oop_mask) == oop_bits); 1891 // assert((obj & oop_mask) == oop_bits);
1876 and3(O0_obj, O2_mask, O4_temp); 1892 and3(O0_obj, O2_mask, O4_temp);
1877 cmp(O4_temp, O3_bits); 1893 cmp_and_brx_short(O4_temp, O3_bits, notEqual, pn, null_or_fail);
1878 brx(notEqual, false, pn, null_or_fail);
1879 delayed()->nop();
1880 1894
1881 if ((NULL_WORD & Universe::verify_oop_mask()) == Universe::verify_oop_bits()) { 1895 if ((NULL_WORD & Universe::verify_oop_mask()) == Universe::verify_oop_bits()) {
1882 // the null_or_fail case is useless; must test for null separately 1896 // the null_or_fail case is useless; must test for null separately
1883 br_null(O0_obj, false, pn, succeed); 1897 br_null_short(O0_obj, pn, succeed);
1884 delayed()->nop();
1885 } 1898 }
1886 1899
1887 // Check the klassOop of this object for being in the right area of memory. 1900 // Check the klassOop of this object for being in the right area of memory.
1888 // Cannot do the load in the delay above slot in case O0 is null 1901 // Cannot do the load in the delay above slot in case O0 is null
1889 load_klass(O0_obj, O0_obj); 1902 load_klass(O0_obj, O0_obj);
1891 if( Universe::verify_klass_mask() != Universe::verify_oop_mask() ) 1904 if( Universe::verify_klass_mask() != Universe::verify_oop_mask() )
1892 set(Universe::verify_klass_mask(), O2_mask); 1905 set(Universe::verify_klass_mask(), O2_mask);
1893 if( Universe::verify_klass_bits() != Universe::verify_oop_bits() ) 1906 if( Universe::verify_klass_bits() != Universe::verify_oop_bits() )
1894 set(Universe::verify_klass_bits(), O3_bits); 1907 set(Universe::verify_klass_bits(), O3_bits);
1895 and3(O0_obj, O2_mask, O4_temp); 1908 and3(O0_obj, O2_mask, O4_temp);
1896 cmp(O4_temp, O3_bits); 1909 cmp_and_brx_short(O4_temp, O3_bits, notEqual, pn, fail);
1897 brx(notEqual, false, pn, fail);
1898 delayed()->nop();
1899 // Check the klass's klass 1910 // Check the klass's klass
1900 load_klass(O0_obj, O0_obj); 1911 load_klass(O0_obj, O0_obj);
1901 and3(O0_obj, O2_mask, O4_temp); 1912 and3(O0_obj, O2_mask, O4_temp);
1902 cmp(O4_temp, O3_bits); 1913 cmp(O4_temp, O3_bits);
1903 brx(notEqual, false, pn, fail); 1914 brx(notEqual, false, pn, fail);
2120 } 2131 }
2121 ShouldNotReachHere(); 2132 ShouldNotReachHere();
2122 return Assembler::rc_z; 2133 return Assembler::rc_z;
2123 } 2134 }
2124 2135
2125 // compares register with zero and branches. NOT FOR USE WITH 64-bit POINTERS 2136 // compares (32 bit) register with zero and branches. NOT FOR USE WITH 64-bit POINTERS
2126 void MacroAssembler::br_zero( Condition c, bool a, Predict p, Register s1, Label& L) { 2137 void MacroAssembler::cmp_zero_and_br(Condition c, Register s1, Label& L, bool a, Predict p) {
2127 tst(s1); 2138 tst(s1);
2128 br (c, a, p, L); 2139 br (c, a, p, L);
2129 } 2140 }
2130
2131 2141
2132 // Compares a pointer register with zero and branches on null. 2142 // Compares a pointer register with zero and branches on null.
2133 // Does a test & branch on 32-bit systems and a register-branch on 64-bit. 2143 // Does a test & branch on 32-bit systems and a register-branch on 64-bit.
2134 void MacroAssembler::br_null( Register s1, bool a, Predict p, Label& L ) { 2144 void MacroAssembler::br_null( Register s1, bool a, Predict p, Label& L ) {
2135 assert_not_delayed(); 2145 assert_not_delayed();
2152 } 2162 }
2153 2163
2154 void MacroAssembler::br_on_reg_cond( RCondition rc, bool a, Predict p, 2164 void MacroAssembler::br_on_reg_cond( RCondition rc, bool a, Predict p,
2155 Register s1, address d, 2165 Register s1, address d,
2156 relocInfo::relocType rt ) { 2166 relocInfo::relocType rt ) {
2167 assert_not_delayed();
2157 if (VM_Version::v9_instructions_work()) { 2168 if (VM_Version::v9_instructions_work()) {
2158 bpr(rc, a, p, s1, d, rt); 2169 bpr(rc, a, p, s1, d, rt);
2159 } else { 2170 } else {
2160 tst(s1); 2171 tst(s1);
2161 br(reg_cond_to_cc_cond(rc), a, p, d, rt); 2172 br(reg_cond_to_cc_cond(rc), a, p, d, rt);
2162 } 2173 }
2163 } 2174 }
2164 2175
2165 void MacroAssembler::br_on_reg_cond( RCondition rc, bool a, Predict p, 2176 void MacroAssembler::br_on_reg_cond( RCondition rc, bool a, Predict p,
2166 Register s1, Label& L ) { 2177 Register s1, Label& L ) {
2178 assert_not_delayed();
2167 if (VM_Version::v9_instructions_work()) { 2179 if (VM_Version::v9_instructions_work()) {
2168 bpr(rc, a, p, s1, L); 2180 bpr(rc, a, p, s1, L);
2169 } else { 2181 } else {
2170 tst(s1); 2182 tst(s1);
2171 br(reg_cond_to_cc_cond(rc), a, p, L); 2183 br(reg_cond_to_cc_cond(rc), a, p, L);
2172 } 2184 }
2173 } 2185 }
2174 2186
2187 // Compare registers and branch with nop in delay slot or cbcond without delay slot.
2188
2189 // Compare integer (32 bit) values (icc only).
2190 void MacroAssembler::cmp_and_br_short(Register s1, Register s2, Condition c,
2191 Predict p, Label& L) {
2192 assert_not_delayed();
2193 if (use_cbcond(L)) {
2194 Assembler::cbcond(c, icc, s1, s2, L);
2195 } else {
2196 cmp(s1, s2);
2197 br(c, false, p, L);
2198 delayed()->nop();
2199 }
2200 }
2201
2202 // Compare integer (32 bit) values (icc only).
2203 void MacroAssembler::cmp_and_br_short(Register s1, int simm13a, Condition c,
2204 Predict p, Label& L) {
2205 assert_not_delayed();
2206 if (is_simm(simm13a,5) && use_cbcond(L)) {
2207 Assembler::cbcond(c, icc, s1, simm13a, L);
2208 } else {
2209 cmp(s1, simm13a);
2210 br(c, false, p, L);
2211 delayed()->nop();
2212 }
2213 }
2214
2215 // Branch that tests xcc in LP64 and icc in !LP64
2216 void MacroAssembler::cmp_and_brx_short(Register s1, Register s2, Condition c,
2217 Predict p, Label& L) {
2218 assert_not_delayed();
2219 if (use_cbcond(L)) {
2220 Assembler::cbcond(c, ptr_cc, s1, s2, L);
2221 } else {
2222 cmp(s1, s2);
2223 brx(c, false, p, L);
2224 delayed()->nop();
2225 }
2226 }
2227
2228 // Branch that tests xcc in LP64 and icc in !LP64
2229 void MacroAssembler::cmp_and_brx_short(Register s1, int simm13a, Condition c,
2230 Predict p, Label& L) {
2231 assert_not_delayed();
2232 if (is_simm(simm13a,5) && use_cbcond(L)) {
2233 Assembler::cbcond(c, ptr_cc, s1, simm13a, L);
2234 } else {
2235 cmp(s1, simm13a);
2236 brx(c, false, p, L);
2237 delayed()->nop();
2238 }
2239 }
2240
2241 // Short branch version for compares a pointer with zero.
2242
2243 void MacroAssembler::br_null_short(Register s1, Predict p, Label& L) {
2244 assert_not_delayed();
2245 if (use_cbcond(L)) {
2246 Assembler::cbcond(zero, ptr_cc, s1, 0, L);
2247 return;
2248 }
2249 br_null(s1, false, p, L);
2250 delayed()->nop();
2251 }
2252
2253 void MacroAssembler::br_notnull_short(Register s1, Predict p, Label& L) {
2254 assert_not_delayed();
2255 if (use_cbcond(L)) {
2256 Assembler::cbcond(notZero, ptr_cc, s1, 0, L);
2257 return;
2258 }
2259 br_notnull(s1, false, p, L);
2260 delayed()->nop();
2261 }
2262
2263 // Unconditional short branch
2264 void MacroAssembler::ba_short(Label& L) {
2265 if (use_cbcond(L)) {
2266 Assembler::cbcond(equal, icc, G0, G0, L);
2267 return;
2268 }
2269 br(always, false, pt, L);
2270 delayed()->nop();
2271 }
2175 2272
2176 // instruction sequences factored across compiler & interpreter 2273 // instruction sequences factored across compiler & interpreter
2177 2274
2178 2275
2179 void MacroAssembler::lcmp( Register Ra_hi, Register Ra_low, 2276 void MacroAssembler::lcmp( Register Ra_hi, Register Ra_low,
2195 // (and therefore probably prefetchable). 2292 // (and therefore probably prefetchable).
2196 // And the equals case for the high part does not need testing, 2293 // And the equals case for the high part does not need testing,
2197 // since that triplet is reached only after finding the high halves differ. 2294 // since that triplet is reached only after finding the high halves differ.
2198 2295
2199 if (VM_Version::v9_instructions_work()) { 2296 if (VM_Version::v9_instructions_work()) {
2200 2297 mov(-1, Rresult);
2201 mov ( -1, Rresult); 2298 ba(done); delayed()-> movcc(greater, false, icc, 1, Rresult);
2202 ba( false, done ); delayed()-> movcc(greater, false, icc, 1, Rresult); 2299 } else {
2203 }
2204 else {
2205 br(less, true, pt, done); delayed()-> set(-1, Rresult); 2300 br(less, true, pt, done); delayed()-> set(-1, Rresult);
2206 br(greater, true, pt, done); delayed()-> set( 1, Rresult); 2301 br(greater, true, pt, done); delayed()-> set( 1, Rresult);
2207 } 2302 }
2208 2303
2209 bind( check_low_parts ); 2304 bind( check_low_parts );
2210 2305
2211 if (VM_Version::v9_instructions_work()) { 2306 if (VM_Version::v9_instructions_work()) {
2212 mov( -1, Rresult); 2307 mov( -1, Rresult);
2213 movcc(equal, false, icc, 0, Rresult); 2308 movcc(equal, false, icc, 0, Rresult);
2214 movcc(greaterUnsigned, false, icc, 1, Rresult); 2309 movcc(greaterUnsigned, false, icc, 1, Rresult);
2215 } 2310 } else {
2216 else { 2311 set(-1, Rresult);
2217 set(-1, Rresult);
2218 br(equal, true, pt, done); delayed()->set( 0, Rresult); 2312 br(equal, true, pt, done); delayed()->set( 0, Rresult);
2219 br(greaterUnsigned, true, pt, done); delayed()->set( 1, Rresult); 2313 br(greaterUnsigned, true, pt, done); delayed()->set( 1, Rresult);
2220 } 2314 }
2221 bind( done ); 2315 bind( done );
2222 } 2316 }
2248 Label big_shift, done; 2342 Label big_shift, done;
2249 2343
2250 // This code can be optimized to use the 64 bit shifts in V9. 2344 // This code can be optimized to use the 64 bit shifts in V9.
2251 // Here we use the 32 bit shifts. 2345 // Here we use the 32 bit shifts.
2252 2346
2253 and3( Rcount, 0x3f, Rcount); // take least significant 6 bits 2347 and3( Rcount, 0x3f, Rcount); // take least significant 6 bits
2254 subcc(Rcount, 31, Ralt_count); 2348 subcc(Rcount, 31, Ralt_count);
2255 br(greater, true, pn, big_shift); 2349 br(greater, true, pn, big_shift);
2256 delayed()-> 2350 delayed()->dec(Ralt_count);
2257 dec(Ralt_count);
2258 2351
2259 // shift < 32 bits, Ralt_count = Rcount-31 2352 // shift < 32 bits, Ralt_count = Rcount-31
2260 2353
2261 // We get the transfer bits by shifting right by 32-count the low 2354 // We get the transfer bits by shifting right by 32-count the low
2262 // register. This is done by shifting right by 31-count and then by one 2355 // register. This is done by shifting right by 31-count and then by one
2263 // more to take care of the special (rare) case where count is zero 2356 // more to take care of the special (rare) case where count is zero
2264 // (shifting by 32 would not work). 2357 // (shifting by 32 would not work).
2265 2358
2266 neg( Ralt_count ); 2359 neg(Ralt_count);
2267 2360
2268 // The order of the next two instructions is critical in the case where 2361 // The order of the next two instructions is critical in the case where
2269 // Rin and Rout are the same and should not be reversed. 2362 // Rin and Rout are the same and should not be reversed.
2270 2363
2271 srl( Rin_low, Ralt_count, Rxfer_bits ); // shift right by 31-count 2364 srl(Rin_low, Ralt_count, Rxfer_bits); // shift right by 31-count
2272 if (Rcount != Rout_low) { 2365 if (Rcount != Rout_low) {
2273 sll( Rin_low, Rcount, Rout_low ); // low half 2366 sll(Rin_low, Rcount, Rout_low); // low half
2274 } 2367 }
2275 sll( Rin_high, Rcount, Rout_high ); 2368 sll(Rin_high, Rcount, Rout_high);
2276 if (Rcount == Rout_low) { 2369 if (Rcount == Rout_low) {
2277 sll( Rin_low, Rcount, Rout_low ); // low half 2370 sll(Rin_low, Rcount, Rout_low); // low half
2278 } 2371 }
2279 srl( Rxfer_bits, 1, Rxfer_bits ); // shift right by one more 2372 srl(Rxfer_bits, 1, Rxfer_bits ); // shift right by one more
2280 ba (false, done); 2373 ba(done);
2281 delayed()-> 2374 delayed()->or3(Rout_high, Rxfer_bits, Rout_high); // new hi value: or in shifted old hi part and xfer from low
2282 or3( Rout_high, Rxfer_bits, Rout_high); // new hi value: or in shifted old hi part and xfer from low
2283 2375
2284 // shift >= 32 bits, Ralt_count = Rcount-32 2376 // shift >= 32 bits, Ralt_count = Rcount-32
2285 bind(big_shift); 2377 bind(big_shift);
2286 sll( Rin_low, Ralt_count, Rout_high ); 2378 sll(Rin_low, Ralt_count, Rout_high );
2287 clr( Rout_low ); 2379 clr(Rout_low);
2288 2380
2289 bind(done); 2381 bind(done);
2290 } 2382 }
2291 2383
2292 2384
2311 Label big_shift, done; 2403 Label big_shift, done;
2312 2404
2313 // This code can be optimized to use the 64 bit shifts in V9. 2405 // This code can be optimized to use the 64 bit shifts in V9.
2314 // Here we use the 32 bit shifts. 2406 // Here we use the 32 bit shifts.
2315 2407
2316 and3( Rcount, 0x3f, Rcount); // take least significant 6 bits 2408 and3( Rcount, 0x3f, Rcount); // take least significant 6 bits
2317 subcc(Rcount, 31, Ralt_count); 2409 subcc(Rcount, 31, Ralt_count);
2318 br(greater, true, pn, big_shift); 2410 br(greater, true, pn, big_shift);
2319 delayed()->dec(Ralt_count); 2411 delayed()->dec(Ralt_count);
2320 2412
2321 // shift < 32 bits, Ralt_count = Rcount-31 2413 // shift < 32 bits, Ralt_count = Rcount-31
2322 2414
2323 // We get the transfer bits by shifting left by 32-count the high 2415 // We get the transfer bits by shifting left by 32-count the high
2324 // register. This is done by shifting left by 31-count and then by one 2416 // register. This is done by shifting left by 31-count and then by one
2325 // more to take care of the special (rare) case where count is zero 2417 // more to take care of the special (rare) case where count is zero
2326 // (shifting by 32 would not work). 2418 // (shifting by 32 would not work).
2327 2419
2328 neg( Ralt_count ); 2420 neg(Ralt_count);
2329 if (Rcount != Rout_low) { 2421 if (Rcount != Rout_low) {
2330 srl( Rin_low, Rcount, Rout_low ); 2422 srl(Rin_low, Rcount, Rout_low);
2331 } 2423 }
2332 2424
2333 // The order of the next two instructions is critical in the case where 2425 // The order of the next two instructions is critical in the case where
2334 // Rin and Rout are the same and should not be reversed. 2426 // Rin and Rout are the same and should not be reversed.
2335 2427
2336 sll( Rin_high, Ralt_count, Rxfer_bits ); // shift left by 31-count 2428 sll(Rin_high, Ralt_count, Rxfer_bits); // shift left by 31-count
2337 sra( Rin_high, Rcount, Rout_high ); // high half 2429 sra(Rin_high, Rcount, Rout_high ); // high half
2338 sll( Rxfer_bits, 1, Rxfer_bits ); // shift left by one more 2430 sll(Rxfer_bits, 1, Rxfer_bits); // shift left by one more
2339 if (Rcount == Rout_low) { 2431 if (Rcount == Rout_low) {
2340 srl( Rin_low, Rcount, Rout_low ); 2432 srl(Rin_low, Rcount, Rout_low);
2341 } 2433 }
2342 ba (false, done); 2434 ba(done);
2343 delayed()-> 2435 delayed()->or3(Rout_low, Rxfer_bits, Rout_low); // new low value: or shifted old low part and xfer from high
2344 or3( Rout_low, Rxfer_bits, Rout_low ); // new low value: or shifted old low part and xfer from high
2345 2436
2346 // shift >= 32 bits, Ralt_count = Rcount-32 2437 // shift >= 32 bits, Ralt_count = Rcount-32
2347 bind(big_shift); 2438 bind(big_shift);
2348 2439
2349 sra( Rin_high, Ralt_count, Rout_low ); 2440 sra(Rin_high, Ralt_count, Rout_low);
2350 sra( Rin_high, 31, Rout_high ); // sign into hi 2441 sra(Rin_high, 31, Rout_high); // sign into hi
2351 2442
2352 bind( done ); 2443 bind( done );
2353 } 2444 }
2354 2445
2355 2446
2375 Label big_shift, done; 2466 Label big_shift, done;
2376 2467
2377 // This code can be optimized to use the 64 bit shifts in V9. 2468 // This code can be optimized to use the 64 bit shifts in V9.
2378 // Here we use the 32 bit shifts. 2469 // Here we use the 32 bit shifts.
2379 2470
2380 and3( Rcount, 0x3f, Rcount); // take least significant 6 bits 2471 and3( Rcount, 0x3f, Rcount); // take least significant 6 bits
2381 subcc(Rcount, 31, Ralt_count); 2472 subcc(Rcount, 31, Ralt_count);
2382 br(greater, true, pn, big_shift); 2473 br(greater, true, pn, big_shift);
2383 delayed()->dec(Ralt_count); 2474 delayed()->dec(Ralt_count);
2384 2475
2385 // shift < 32 bits, Ralt_count = Rcount-31 2476 // shift < 32 bits, Ralt_count = Rcount-31
2386 2477
2387 // We get the transfer bits by shifting left by 32-count the high 2478 // We get the transfer bits by shifting left by 32-count the high
2388 // register. This is done by shifting left by 31-count and then by one 2479 // register. This is done by shifting left by 31-count and then by one
2389 // more to take care of the special (rare) case where count is zero 2480 // more to take care of the special (rare) case where count is zero
2390 // (shifting by 32 would not work). 2481 // (shifting by 32 would not work).
2391 2482
2392 neg( Ralt_count ); 2483 neg(Ralt_count);
2393 if (Rcount != Rout_low) { 2484 if (Rcount != Rout_low) {
2394 srl( Rin_low, Rcount, Rout_low ); 2485 srl(Rin_low, Rcount, Rout_low);
2395 } 2486 }
2396 2487
2397 // The order of the next two instructions is critical in the case where 2488 // The order of the next two instructions is critical in the case where
2398 // Rin and Rout are the same and should not be reversed. 2489 // Rin and Rout are the same and should not be reversed.
2399 2490
2400 sll( Rin_high, Ralt_count, Rxfer_bits ); // shift left by 31-count 2491 sll(Rin_high, Ralt_count, Rxfer_bits); // shift left by 31-count
2401 srl( Rin_high, Rcount, Rout_high ); // high half 2492 srl(Rin_high, Rcount, Rout_high ); // high half
2402 sll( Rxfer_bits, 1, Rxfer_bits ); // shift left by one more 2493 sll(Rxfer_bits, 1, Rxfer_bits); // shift left by one more
2403 if (Rcount == Rout_low) { 2494 if (Rcount == Rout_low) {
2404 srl( Rin_low, Rcount, Rout_low ); 2495 srl(Rin_low, Rcount, Rout_low);
2405 } 2496 }
2406 ba (false, done); 2497 ba(done);
2407 delayed()-> 2498 delayed()->or3(Rout_low, Rxfer_bits, Rout_low); // new low value: or shifted old low part and xfer from high
2408 or3( Rout_low, Rxfer_bits, Rout_low ); // new low value: or shifted old low part and xfer from high
2409 2499
2410 // shift >= 32 bits, Ralt_count = Rcount-32 2500 // shift >= 32 bits, Ralt_count = Rcount-32
2411 bind(big_shift); 2501 bind(big_shift);
2412 2502
2413 srl( Rin_high, Ralt_count, Rout_low ); 2503 srl(Rin_high, Ralt_count, Rout_low);
2414 clr( Rout_high ); 2504 clr(Rout_high);
2415 2505
2416 bind( done ); 2506 bind( done );
2417 } 2507 }
2418 2508
2419 #ifdef _LP64 2509 #ifdef _LP64
2420 void MacroAssembler::lcmp( Register Ra, Register Rb, Register Rresult) { 2510 void MacroAssembler::lcmp( Register Ra, Register Rb, Register Rresult) {
2421 cmp(Ra, Rb); 2511 cmp(Ra, Rb);
2422 mov( -1, Rresult); 2512 mov(-1, Rresult);
2423 movcc(equal, false, xcc, 0, Rresult); 2513 movcc(equal, false, xcc, 0, Rresult);
2424 movcc(greater, false, xcc, 1, Rresult); 2514 movcc(greater, false, xcc, 1, Rresult);
2425 } 2515 }
2426 #endif 2516 #endif
2427 2517
2457 Condition eq = f_equal; 2547 Condition eq = f_equal;
2458 Condition gt = unordered_result == 1 ? f_unorderedOrGreater : f_greater; 2548 Condition gt = unordered_result == 1 ? f_unorderedOrGreater : f_greater;
2459 2549
2460 if (VM_Version::v9_instructions_work()) { 2550 if (VM_Version::v9_instructions_work()) {
2461 2551
2462 mov( -1, Rresult ); 2552 mov(-1, Rresult);
2463 movcc( eq, true, fcc0, 0, Rresult ); 2553 movcc(eq, true, fcc0, 0, Rresult);
2464 movcc( gt, true, fcc0, 1, Rresult ); 2554 movcc(gt, true, fcc0, 1, Rresult);
2465 2555
2466 } else { 2556 } else {
2467 Label done; 2557 Label done;
2468 2558
2469 set( -1, Rresult ); 2559 set( -1, Rresult );
2470 //fb(lt, true, pn, done); delayed()->set( -1, Rresult ); 2560 //fb(lt, true, pn, done); delayed()->set( -1, Rresult );
2471 fb( eq, true, pn, done); delayed()->set( 0, Rresult ); 2561 fb( eq, true, pn, done); delayed()->set( 0, Rresult );
2472 fb( gt, true, pn, done); delayed()->set( 1, Rresult ); 2562 fb( gt, true, pn, done); delayed()->set( 1, Rresult );
2473 2563
2474 bind (done); 2564 bind (done);
2666 mov(G0,yield_reg); 2756 mov(G0,yield_reg);
2667 mov(G0, yieldall_reg); 2757 mov(G0, yieldall_reg);
2668 set(StubRoutines::Sparc::locked, lock_reg); 2758 set(StubRoutines::Sparc::locked, lock_reg);
2669 2759
2670 bind(retry_get_lock); 2760 bind(retry_get_lock);
2671 cmp(yield_reg, V8AtomicOperationUnderLockSpinCount); 2761 cmp_and_br_short(yield_reg, V8AtomicOperationUnderLockSpinCount, Assembler::less, Assembler::pt, dont_yield);
2672 br(Assembler::less, false, Assembler::pt, dont_yield);
2673 delayed()->nop();
2674 2762
2675 if(use_call_vm) { 2763 if(use_call_vm) {
2676 Untested("Need to verify global reg consistancy"); 2764 Untested("Need to verify global reg consistancy");
2677 call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::yield_all), yieldall_reg); 2765 call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::yield_all), yieldall_reg);
2678 } else { 2766 } else {
2698 br(Assembler::notEqual, true, Assembler::pn, retry_get_lock); 2786 br(Assembler::notEqual, true, Assembler::pn, retry_get_lock);
2699 delayed()->add(yield_reg,1,yield_reg); 2787 delayed()->add(yield_reg,1,yield_reg);
2700 2788
2701 // yes, got lock. do we have the same top? 2789 // yes, got lock. do we have the same top?
2702 ld(top_ptr_reg_after_save, 0, value_reg); 2790 ld(top_ptr_reg_after_save, 0, value_reg);
2703 cmp(value_reg, top_reg_after_save); 2791 cmp_and_br_short(value_reg, top_reg_after_save, Assembler::notEqual, Assembler::pn, not_same);
2704 br(Assembler::notEqual, false, Assembler::pn, not_same);
2705 delayed()->nop();
2706 2792
2707 // yes, same top. 2793 // yes, same top.
2708 st(ptr_reg_after_save, top_ptr_reg_after_save, 0); 2794 st(ptr_reg_after_save, top_ptr_reg_after_save, 0);
2709 membar(Assembler::StoreStore); 2795 membar(Assembler::StoreStore);
2710 2796
2950 L2, L3, L4, L5, 3036 L2, L3, L4, L5,
2951 NULL, &L_pop_to_failure); 3037 NULL, &L_pop_to_failure);
2952 3038
2953 // on success: 3039 // on success:
2954 restore(); 3040 restore();
2955 ba(false, L_success); 3041 ba_short(L_success);
2956 delayed()->nop();
2957 3042
2958 // on failure: 3043 // on failure:
2959 bind(L_pop_to_failure); 3044 bind(L_pop_to_failure);
2960 restore(); 3045 restore();
2961 bind(L_failure); 3046 bind(L_failure);
2967 Register temp_reg, 3052 Register temp_reg,
2968 Register temp2_reg, 3053 Register temp2_reg,
2969 Label* L_success, 3054 Label* L_success,
2970 Label* L_failure, 3055 Label* L_failure,
2971 Label* L_slow_path, 3056 Label* L_slow_path,
2972 RegisterOrConstant super_check_offset, 3057 RegisterOrConstant super_check_offset) {
2973 Register instanceof_hack) {
2974 int sc_offset = (klassOopDesc::header_size() * HeapWordSize + 3058 int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
2975 Klass::secondary_super_cache_offset_in_bytes()); 3059 Klass::secondary_super_cache_offset_in_bytes());
2976 int sco_offset = (klassOopDesc::header_size() * HeapWordSize + 3060 int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
2977 Klass::super_check_offset_offset_in_bytes()); 3061 Klass::super_check_offset_offset_in_bytes());
2978 3062
2991 Label L_fallthrough; 3075 Label L_fallthrough;
2992 int label_nulls = 0; 3076 int label_nulls = 0;
2993 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } 3077 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; }
2994 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } 3078 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; }
2995 if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } 3079 if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
2996 assert(label_nulls <= 1 || instanceof_hack != noreg || 3080 assert(label_nulls <= 1 ||
2997 (L_slow_path == &L_fallthrough && label_nulls <= 2 && !need_slow_path), 3081 (L_slow_path == &L_fallthrough && label_nulls <= 2 && !need_slow_path),
2998 "at most one NULL in the batch, usually"); 3082 "at most one NULL in the batch, usually");
2999
3000 // Support for the instanceof hack, which uses delay slots to
3001 // set a destination register to zero or one.
3002 bool do_bool_sets = (instanceof_hack != noreg);
3003 #define BOOL_SET(bool_value) \
3004 if (do_bool_sets && bool_value >= 0) \
3005 set(bool_value, instanceof_hack)
3006 #define DELAYED_BOOL_SET(bool_value) \
3007 if (do_bool_sets && bool_value >= 0) \
3008 delayed()->set(bool_value, instanceof_hack); \
3009 else delayed()->nop()
3010 // Hacked ba(), which may only be used just before L_fallthrough.
3011 #define FINAL_JUMP(label, bool_value) \
3012 if (&(label) == &L_fallthrough) { \
3013 BOOL_SET(bool_value); \
3014 } else { \
3015 ba((do_bool_sets && bool_value >= 0), label); \
3016 DELAYED_BOOL_SET(bool_value); \
3017 }
3018 3083
3019 // If the pointers are equal, we are done (e.g., String[] elements). 3084 // If the pointers are equal, we are done (e.g., String[] elements).
3020 // This self-check enables sharing of secondary supertype arrays among 3085 // This self-check enables sharing of secondary supertype arrays among
3021 // non-primary types such as array-of-interface. Otherwise, each such 3086 // non-primary types such as array-of-interface. Otherwise, each such
3022 // type would need its own customized SSA. 3087 // type would need its own customized SSA.
3023 // We move this check to the front of the fast path because many 3088 // We move this check to the front of the fast path because many
3024 // type checks are in fact trivially successful in this manner, 3089 // type checks are in fact trivially successful in this manner,
3025 // so we get a nicely predicted branch right at the start of the check. 3090 // so we get a nicely predicted branch right at the start of the check.
3026 cmp(super_klass, sub_klass); 3091 cmp(super_klass, sub_klass);
3027 brx(Assembler::equal, do_bool_sets, Assembler::pn, *L_success); 3092 brx(Assembler::equal, false, Assembler::pn, *L_success);
3028 DELAYED_BOOL_SET(1); 3093 delayed()->nop();
3029 3094
3030 // Check the supertype display: 3095 // Check the supertype display:
3031 if (must_load_sco) { 3096 if (must_load_sco) {
3032 // The super check offset is always positive... 3097 // The super check offset is always positive...
3033 lduw(super_klass, sco_offset, temp2_reg); 3098 lduw(super_klass, sco_offset, temp2_reg);
3047 // Note that the cache is updated below if it does not help us find 3112 // Note that the cache is updated below if it does not help us find
3048 // what we need immediately. 3113 // what we need immediately.
3049 // So if it was a primary super, we can just fail immediately. 3114 // So if it was a primary super, we can just fail immediately.
3050 // Otherwise, it's the slow path for us (no success at this point). 3115 // Otherwise, it's the slow path for us (no success at this point).
3051 3116
3117 // Hacked ba(), which may only be used just before L_fallthrough.
3118 #define FINAL_JUMP(label) \
3119 if (&(label) != &L_fallthrough) { \
3120 ba(label); delayed()->nop(); \
3121 }
3122
3052 if (super_check_offset.is_register()) { 3123 if (super_check_offset.is_register()) {
3053 brx(Assembler::equal, do_bool_sets, Assembler::pn, *L_success); 3124 brx(Assembler::equal, false, Assembler::pn, *L_success);
3054 delayed(); if (do_bool_sets) BOOL_SET(1); 3125 delayed()->cmp(super_check_offset.as_register(), sc_offset);
3055 // if !do_bool_sets, sneak the next cmp into the delay slot:
3056 cmp(super_check_offset.as_register(), sc_offset);
3057 3126
3058 if (L_failure == &L_fallthrough) { 3127 if (L_failure == &L_fallthrough) {
3059 brx(Assembler::equal, do_bool_sets, Assembler::pt, *L_slow_path); 3128 brx(Assembler::equal, false, Assembler::pt, *L_slow_path);
3060 delayed()->nop(); 3129 delayed()->nop();
3061 BOOL_SET(0); // fallthrough on failure
3062 } else { 3130 } else {
3063 brx(Assembler::notEqual, do_bool_sets, Assembler::pn, *L_failure); 3131 brx(Assembler::notEqual, false, Assembler::pn, *L_failure);
3064 DELAYED_BOOL_SET(0); 3132 delayed()->nop();
3065 FINAL_JUMP(*L_slow_path, -1); // -1 => vanilla delay slot 3133 FINAL_JUMP(*L_slow_path);
3066 } 3134 }
3067 } else if (super_check_offset.as_constant() == sc_offset) { 3135 } else if (super_check_offset.as_constant() == sc_offset) {
3068 // Need a slow path; fast failure is impossible. 3136 // Need a slow path; fast failure is impossible.
3069 if (L_slow_path == &L_fallthrough) { 3137 if (L_slow_path == &L_fallthrough) {
3070 brx(Assembler::equal, do_bool_sets, Assembler::pt, *L_success); 3138 brx(Assembler::equal, false, Assembler::pt, *L_success);
3071 DELAYED_BOOL_SET(1); 3139 delayed()->nop();
3072 } else { 3140 } else {
3073 brx(Assembler::notEqual, false, Assembler::pn, *L_slow_path); 3141 brx(Assembler::notEqual, false, Assembler::pn, *L_slow_path);
3074 delayed()->nop(); 3142 delayed()->nop();
3075 FINAL_JUMP(*L_success, 1); 3143 FINAL_JUMP(*L_success);
3076 } 3144 }
3077 } else { 3145 } else {
3078 // No slow path; it's a fast decision. 3146 // No slow path; it's a fast decision.
3079 if (L_failure == &L_fallthrough) { 3147 if (L_failure == &L_fallthrough) {
3080 brx(Assembler::equal, do_bool_sets, Assembler::pt, *L_success); 3148 brx(Assembler::equal, false, Assembler::pt, *L_success);
3081 DELAYED_BOOL_SET(1); 3149 delayed()->nop();
3082 BOOL_SET(0);
3083 } else { 3150 } else {
3084 brx(Assembler::notEqual, do_bool_sets, Assembler::pn, *L_failure); 3151 brx(Assembler::notEqual, false, Assembler::pn, *L_failure);
3085 DELAYED_BOOL_SET(0); 3152 delayed()->nop();
3086 FINAL_JUMP(*L_success, 1); 3153 FINAL_JUMP(*L_success);
3087 } 3154 }
3088 } 3155 }
3089 3156
3090 bind(L_fallthrough); 3157 bind(L_fallthrough);
3091 3158
3092 #undef final_jump 3159 #undef FINAL_JUMP
3093 #undef bool_set
3094 #undef DELAYED_BOOL_SET
3095 #undef final_jump
3096 } 3160 }
3097 3161
3098 3162
3099 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, 3163 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
3100 Register super_klass, 3164 Register super_klass,
3183 3247
3184 // Success. Cache the super we found and proceed in triumph. 3248 // Success. Cache the super we found and proceed in triumph.
3185 st_ptr(super_klass, sub_klass, sc_offset); 3249 st_ptr(super_klass, sub_klass, sc_offset);
3186 3250
3187 if (L_success != &L_fallthrough) { 3251 if (L_success != &L_fallthrough) {
3188 ba(false, *L_success); 3252 ba(*L_success);
3189 delayed()->nop(); 3253 delayed()->nop();
3190 } 3254 }
3191 3255
3192 bind(L_fallthrough); 3256 bind(L_fallthrough);
3193 } 3257 }
3198 Label& wrong_method_type) { 3262 Label& wrong_method_type) {
3199 assert_different_registers(mtype_reg, mh_reg, temp_reg); 3263 assert_different_registers(mtype_reg, mh_reg, temp_reg);
3200 // compare method type against that of the receiver 3264 // compare method type against that of the receiver
3201 RegisterOrConstant mhtype_offset = delayed_value(java_lang_invoke_MethodHandle::type_offset_in_bytes, temp_reg); 3265 RegisterOrConstant mhtype_offset = delayed_value(java_lang_invoke_MethodHandle::type_offset_in_bytes, temp_reg);
3202 load_heap_oop(mh_reg, mhtype_offset, temp_reg); 3266 load_heap_oop(mh_reg, mhtype_offset, temp_reg);
3203 cmp(temp_reg, mtype_reg); 3267 cmp_and_brx_short(temp_reg, mtype_reg, Assembler::notEqual, Assembler::pn, wrong_method_type);
3204 br(Assembler::notEqual, false, Assembler::pn, wrong_method_type);
3205 delayed()->nop();
3206 } 3268 }
3207 3269
3208 3270
3209 // A method handle has a "vmslots" field which gives the size of its 3271 // A method handle has a "vmslots" field which gives the size of its
3210 // argument list in JVM stack slots. This field is either located directly 3272 // argument list in JVM stack slots. This field is either located directly
3293 // whether the epoch is still valid 3355 // whether the epoch is still valid
3294 // Note that the runtime guarantees sufficient alignment of JavaThread 3356 // Note that the runtime guarantees sufficient alignment of JavaThread
3295 // pointers to allow age to be placed into low bits 3357 // pointers to allow age to be placed into low bits
3296 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); 3358 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
3297 and3(mark_reg, markOopDesc::biased_lock_mask_in_place, temp_reg); 3359 and3(mark_reg, markOopDesc::biased_lock_mask_in_place, temp_reg);
3298 cmp(temp_reg, markOopDesc::biased_lock_pattern); 3360 cmp_and_brx_short(temp_reg, markOopDesc::biased_lock_pattern, Assembler::notEqual, Assembler::pn, cas_label);
3299 brx(Assembler::notEqual, false, Assembler::pn, cas_label);
3300 delayed()->nop();
3301 3361
3302 load_klass(obj_reg, temp_reg); 3362 load_klass(obj_reg, temp_reg);
3303 ld_ptr(Address(temp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()), temp_reg); 3363 ld_ptr(Address(temp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()), temp_reg);
3304 or3(G2_thread, temp_reg, temp_reg); 3364 or3(G2_thread, temp_reg, temp_reg);
3305 xor3(mark_reg, temp_reg, temp_reg); 3365 xor3(mark_reg, temp_reg, temp_reg);
3362 } 3422 }
3363 if (slow_case != NULL) { 3423 if (slow_case != NULL) {
3364 brx(Assembler::notEqual, true, Assembler::pn, *slow_case); 3424 brx(Assembler::notEqual, true, Assembler::pn, *slow_case);
3365 delayed()->nop(); 3425 delayed()->nop();
3366 } 3426 }
3367 br(Assembler::always, false, Assembler::pt, done); 3427 ba_short(done);
3368 delayed()->nop();
3369 3428
3370 bind(try_rebias); 3429 bind(try_rebias);
3371 // At this point we know the epoch has expired, meaning that the 3430 // At this point we know the epoch has expired, meaning that the
3372 // current "bias owner", if any, is actually invalid. Under these 3431 // current "bias owner", if any, is actually invalid. Under these
3373 // circumstances _only_, we are allowed to use the current header's 3432 // circumstances _only_, we are allowed to use the current header's
3391 } 3450 }
3392 if (slow_case != NULL) { 3451 if (slow_case != NULL) {
3393 brx(Assembler::notEqual, true, Assembler::pn, *slow_case); 3452 brx(Assembler::notEqual, true, Assembler::pn, *slow_case);
3394 delayed()->nop(); 3453 delayed()->nop();
3395 } 3454 }
3396 br(Assembler::always, false, Assembler::pt, done); 3455 ba_short(done);
3397 delayed()->nop();
3398 3456
3399 bind(try_revoke_bias); 3457 bind(try_revoke_bias);
3400 // The prototype mark in the klass doesn't have the bias bit set any 3458 // The prototype mark in the klass doesn't have the bias bit set any
3401 // more, indicating that objects of this data type are not supposed 3459 // more, indicating that objects of this data type are not supposed
3402 // to be biased any more. We are going to try to reset the mark of 3460 // to be biased any more. We are going to try to reset the mark of
3443 3501
3444 // CASN -- 32-64 bit switch hitter similar to the synthetic CASN provided by 3502 // CASN -- 32-64 bit switch hitter similar to the synthetic CASN provided by
3445 // Solaris/SPARC's "as". Another apt name would be cas_ptr() 3503 // Solaris/SPARC's "as". Another apt name would be cas_ptr()
3446 3504
3447 void MacroAssembler::casn (Register addr_reg, Register cmp_reg, Register set_reg ) { 3505 void MacroAssembler::casn (Register addr_reg, Register cmp_reg, Register set_reg ) {
3448 casx_under_lock (addr_reg, cmp_reg, set_reg, (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr()) ; 3506 casx_under_lock (addr_reg, cmp_reg, set_reg, (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr());
3449 } 3507 }
3450 3508
3451 3509
3452 3510
3453 // compiler_lock_object() and compiler_unlock_object() are direct transliterations 3511 // compiler_lock_object() and compiler_unlock_object() are direct transliterations
3484 if (counters != NULL) { 3542 if (counters != NULL) {
3485 inc_counter((address) counters->total_entry_count_addr(), Rmark, Rscratch); 3543 inc_counter((address) counters->total_entry_count_addr(), Rmark, Rscratch);
3486 } 3544 }
3487 3545
3488 if (EmitSync & 1) { 3546 if (EmitSync & 1) {
3489 mov (3, Rscratch) ; 3547 mov(3, Rscratch);
3490 st_ptr (Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes()); 3548 st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
3491 cmp (SP, G0) ; 3549 cmp(SP, G0);
3492 return ; 3550 return ;
3493 } 3551 }
3494 3552
3495 if (EmitSync & 2) { 3553 if (EmitSync & 2) {
3496 3554
3527 // we did not find an unlocked object so see if this is a recursive case 3585 // we did not find an unlocked object so see if this is a recursive case
3528 // sub(Rscratch, SP, Rscratch); 3586 // sub(Rscratch, SP, Rscratch);
3529 assert(os::vm_page_size() > 0xfff, "page size too small - change the constant"); 3587 assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
3530 andcc(Rscratch, 0xfffff003, Rscratch); 3588 andcc(Rscratch, 0xfffff003, Rscratch);
3531 st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes()); 3589 st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
3532 bind (done) ; 3590 bind (done);
3533 return ; 3591 return ;
3534 } 3592 }
3535 3593
3536 Label Egress ; 3594 Label Egress ;
3537 3595
3538 if (EmitSync & 256) { 3596 if (EmitSync & 256) {
3539 Label IsInflated ; 3597 Label IsInflated ;
3540 3598
3541 ld_ptr (mark_addr, Rmark); // fetch obj->mark 3599 ld_ptr(mark_addr, Rmark); // fetch obj->mark
3542 // Triage: biased, stack-locked, neutral, inflated 3600 // Triage: biased, stack-locked, neutral, inflated
3543 if (try_bias) { 3601 if (try_bias) {
3544 biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters); 3602 biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters);
3545 // Invariant: if control reaches this point in the emitted stream 3603 // Invariant: if control reaches this point in the emitted stream
3546 // then Rmark has not been modified. 3604 // then Rmark has not been modified.
3547 } 3605 }
3548 3606
3549 // Store mark into displaced mark field in the on-stack basic-lock "box" 3607 // Store mark into displaced mark field in the on-stack basic-lock "box"
3550 // Critically, this must happen before the CAS 3608 // Critically, this must happen before the CAS
3551 // Maximize the ST-CAS distance to minimize the ST-before-CAS penalty. 3609 // Maximize the ST-CAS distance to minimize the ST-before-CAS penalty.
3552 st_ptr (Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes()); 3610 st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes());
3553 andcc (Rmark, 2, G0) ; 3611 andcc(Rmark, 2, G0);
3554 brx (Assembler::notZero, false, Assembler::pn, IsInflated) ; 3612 brx(Assembler::notZero, false, Assembler::pn, IsInflated);
3555 delayed() -> 3613 delayed()->
3556 3614
3557 // Try stack-lock acquisition. 3615 // Try stack-lock acquisition.
3558 // Beware: the 1st instruction is in a delay slot 3616 // Beware: the 1st instruction is in a delay slot
3559 mov (Rbox, Rscratch); 3617 mov(Rbox, Rscratch);
3560 or3 (Rmark, markOopDesc::unlocked_value, Rmark); 3618 or3(Rmark, markOopDesc::unlocked_value, Rmark);
3561 assert (mark_addr.disp() == 0, "cas must take a zero displacement"); 3619 assert(mark_addr.disp() == 0, "cas must take a zero displacement");
3562 casn (mark_addr.base(), Rmark, Rscratch) ; 3620 casn(mark_addr.base(), Rmark, Rscratch);
3563 cmp (Rmark, Rscratch); 3621 cmp(Rmark, Rscratch);
3564 brx (Assembler::equal, false, Assembler::pt, done); 3622 brx(Assembler::equal, false, Assembler::pt, done);
3565 delayed()->sub(Rscratch, SP, Rscratch); 3623 delayed()->sub(Rscratch, SP, Rscratch);
3566 3624
3567 // Stack-lock attempt failed - check for recursive stack-lock. 3625 // Stack-lock attempt failed - check for recursive stack-lock.
3568 // See the comments below about how we might remove this case. 3626 // See the comments below about how we might remove this case.
3569 #ifdef _LP64 3627 #ifdef _LP64
3570 sub (Rscratch, STACK_BIAS, Rscratch); 3628 sub(Rscratch, STACK_BIAS, Rscratch);
3571 #endif 3629 #endif
3572 assert(os::vm_page_size() > 0xfff, "page size too small - change the constant"); 3630 assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
3573 andcc (Rscratch, 0xfffff003, Rscratch); 3631 andcc(Rscratch, 0xfffff003, Rscratch);
3574 br (Assembler::always, false, Assembler::pt, done) ; 3632 br(Assembler::always, false, Assembler::pt, done);
3575 delayed()-> st_ptr (Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes()); 3633 delayed()-> st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
3576 3634
3577 bind (IsInflated) ; 3635 bind(IsInflated);
3578 if (EmitSync & 64) { 3636 if (EmitSync & 64) {
3579 // If m->owner != null goto IsLocked 3637 // If m->owner != null goto IsLocked
3580 // Pessimistic form: Test-and-CAS vs CAS 3638 // Pessimistic form: Test-and-CAS vs CAS
3581 // The optimistic form avoids RTS->RTO cache line upgrades. 3639 // The optimistic form avoids RTS->RTO cache line upgrades.
3582 ld_ptr (Rmark, ObjectMonitor::owner_offset_in_bytes() - 2, Rscratch); 3640 ld_ptr(Rmark, ObjectMonitor::owner_offset_in_bytes() - 2, Rscratch);
3583 andcc (Rscratch, Rscratch, G0) ; 3641 andcc(Rscratch, Rscratch, G0);
3584 brx (Assembler::notZero, false, Assembler::pn, done) ; 3642 brx(Assembler::notZero, false, Assembler::pn, done);
3585 delayed()->nop() ; 3643 delayed()->nop();
3586 // m->owner == null : it's unlocked. 3644 // m->owner == null : it's unlocked.
3587 } 3645 }
3588 3646
3589 // Try to CAS m->owner from null to Self 3647 // Try to CAS m->owner from null to Self
3590 // Invariant: if we acquire the lock then _recursions should be 0. 3648 // Invariant: if we acquire the lock then _recursions should be 0.
3591 add (Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark) ; 3649 add(Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark);
3592 mov (G2_thread, Rscratch) ; 3650 mov(G2_thread, Rscratch);
3593 casn (Rmark, G0, Rscratch) ; 3651 casn(Rmark, G0, Rscratch);
3594 cmp (Rscratch, G0) ; 3652 cmp(Rscratch, G0);
3595 // Intentional fall-through into done 3653 // Intentional fall-through into done
3596 } else { 3654 } else {
3597 // Aggressively avoid the Store-before-CAS penalty 3655 // Aggressively avoid the Store-before-CAS penalty
3598 // Defer the store into box->dhw until after the CAS 3656 // Defer the store into box->dhw until after the CAS
3599 Label IsInflated, Recursive ; 3657 Label IsInflated, Recursive ;
3600 3658
3601 // Anticipate CAS -- Avoid RTS->RTO upgrade 3659 // Anticipate CAS -- Avoid RTS->RTO upgrade
3602 // prefetch (mark_addr, Assembler::severalWritesAndPossiblyReads) ; 3660 // prefetch (mark_addr, Assembler::severalWritesAndPossiblyReads);
3603 3661
3604 ld_ptr (mark_addr, Rmark); // fetch obj->mark 3662 ld_ptr(mark_addr, Rmark); // fetch obj->mark
3605 // Triage: biased, stack-locked, neutral, inflated 3663 // Triage: biased, stack-locked, neutral, inflated
3606 3664
3607 if (try_bias) { 3665 if (try_bias) {
3608 biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters); 3666 biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters);
3609 // Invariant: if control reaches this point in the emitted stream 3667 // Invariant: if control reaches this point in the emitted stream
3610 // then Rmark has not been modified. 3668 // then Rmark has not been modified.
3611 } 3669 }
3612 andcc (Rmark, 2, G0) ; 3670 andcc(Rmark, 2, G0);
3613 brx (Assembler::notZero, false, Assembler::pn, IsInflated) ; 3671 brx(Assembler::notZero, false, Assembler::pn, IsInflated);
3614 delayed()-> // Beware - dangling delay-slot 3672 delayed()-> // Beware - dangling delay-slot
3615 3673
3616 // Try stack-lock acquisition. 3674 // Try stack-lock acquisition.
3617 // Transiently install BUSY (0) encoding in the mark word. 3675 // Transiently install BUSY (0) encoding in the mark word.
3618 // if the CAS of 0 into the mark was successful then we execute: 3676 // if the CAS of 0 into the mark was successful then we execute:
3619 // ST box->dhw = mark -- save fetched mark in on-stack basiclock box 3677 // ST box->dhw = mark -- save fetched mark in on-stack basiclock box
3620 // ST obj->mark = box -- overwrite transient 0 value 3678 // ST obj->mark = box -- overwrite transient 0 value
3621 // This presumes TSO, of course. 3679 // This presumes TSO, of course.
3622 3680
3623 mov (0, Rscratch) ; 3681 mov(0, Rscratch);
3624 or3 (Rmark, markOopDesc::unlocked_value, Rmark); 3682 or3(Rmark, markOopDesc::unlocked_value, Rmark);
3625 assert (mark_addr.disp() == 0, "cas must take a zero displacement"); 3683 assert(mark_addr.disp() == 0, "cas must take a zero displacement");
3626 casn (mark_addr.base(), Rmark, Rscratch) ; 3684 casn(mark_addr.base(), Rmark, Rscratch);
3627 // prefetch (mark_addr, Assembler::severalWritesAndPossiblyReads) ; 3685 // prefetch (mark_addr, Assembler::severalWritesAndPossiblyReads);
3628 cmp (Rscratch, Rmark) ; 3686 cmp(Rscratch, Rmark);
3629 brx (Assembler::notZero, false, Assembler::pn, Recursive) ; 3687 brx(Assembler::notZero, false, Assembler::pn, Recursive);
3630 delayed() -> 3688 delayed()->st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes());
3631 st_ptr (Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes());
3632 if (counters != NULL) { 3689 if (counters != NULL) {
3633 cond_inc(Assembler::equal, (address) counters->fast_path_entry_count_addr(), Rmark, Rscratch); 3690 cond_inc(Assembler::equal, (address) counters->fast_path_entry_count_addr(), Rmark, Rscratch);
3634 } 3691 }
3635 br (Assembler::always, false, Assembler::pt, done); 3692 ba(done);
3636 delayed() -> 3693 delayed()->st_ptr(Rbox, mark_addr);
3637 st_ptr (Rbox, mark_addr) ; 3694
3638 3695 bind(Recursive);
3639 bind (Recursive) ;
3640 // Stack-lock attempt failed - check for recursive stack-lock. 3696 // Stack-lock attempt failed - check for recursive stack-lock.
3641 // Tests show that we can remove the recursive case with no impact 3697 // Tests show that we can remove the recursive case with no impact
3642 // on refworkload 0.83. If we need to reduce the size of the code 3698 // on refworkload 0.83. If we need to reduce the size of the code
3643 // emitted by compiler_lock_object() the recursive case is perfect 3699 // emitted by compiler_lock_object() the recursive case is perfect
3644 // candidate. 3700 // candidate.
3651 // the fast-path stack-lock code from the interpreter and always passed 3707 // the fast-path stack-lock code from the interpreter and always passed
3652 // control to the "slow" operators in synchronizer.cpp. 3708 // control to the "slow" operators in synchronizer.cpp.
3653 3709
3654 // RScratch contains the fetched obj->mark value from the failed CASN. 3710 // RScratch contains the fetched obj->mark value from the failed CASN.
3655 #ifdef _LP64 3711 #ifdef _LP64
3656 sub (Rscratch, STACK_BIAS, Rscratch); 3712 sub(Rscratch, STACK_BIAS, Rscratch);
3657 #endif 3713 #endif
3658 sub(Rscratch, SP, Rscratch); 3714 sub(Rscratch, SP, Rscratch);
3659 assert(os::vm_page_size() > 0xfff, "page size too small - change the constant"); 3715 assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
3660 andcc (Rscratch, 0xfffff003, Rscratch); 3716 andcc(Rscratch, 0xfffff003, Rscratch);
3661 if (counters != NULL) { 3717 if (counters != NULL) {
3662 // Accounting needs the Rscratch register 3718 // Accounting needs the Rscratch register
3663 st_ptr (Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes()); 3719 st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
3664 cond_inc(Assembler::equal, (address) counters->fast_path_entry_count_addr(), Rmark, Rscratch); 3720 cond_inc(Assembler::equal, (address) counters->fast_path_entry_count_addr(), Rmark, Rscratch);
3665 br (Assembler::always, false, Assembler::pt, done) ; 3721 ba_short(done);
3666 delayed()->nop() ;
3667 } else { 3722 } else {
3668 br (Assembler::always, false, Assembler::pt, done) ; 3723 ba(done);
3669 delayed()-> st_ptr (Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes()); 3724 delayed()->st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
3670 } 3725 }
3671 3726
3672 bind (IsInflated) ; 3727 bind (IsInflated);
3673 if (EmitSync & 64) { 3728 if (EmitSync & 64) {
3674 // If m->owner != null goto IsLocked 3729 // If m->owner != null goto IsLocked
3675 // Test-and-CAS vs CAS 3730 // Test-and-CAS vs CAS
3676 // Pessimistic form avoids futile (doomed) CAS attempts 3731 // Pessimistic form avoids futile (doomed) CAS attempts
3677 // The optimistic form avoids RTS->RTO cache line upgrades. 3732 // The optimistic form avoids RTS->RTO cache line upgrades.
3678 ld_ptr (Rmark, ObjectMonitor::owner_offset_in_bytes() - 2, Rscratch); 3733 ld_ptr(Rmark, ObjectMonitor::owner_offset_in_bytes() - 2, Rscratch);
3679 andcc (Rscratch, Rscratch, G0) ; 3734 andcc(Rscratch, Rscratch, G0);
3680 brx (Assembler::notZero, false, Assembler::pn, done) ; 3735 brx(Assembler::notZero, false, Assembler::pn, done);
3681 delayed()->nop() ; 3736 delayed()->nop();
3682 // m->owner == null : it's unlocked. 3737 // m->owner == null : it's unlocked.
3683 } 3738 }
3684 3739
3685 // Try to CAS m->owner from null to Self 3740 // Try to CAS m->owner from null to Self
3686 // Invariant: if we acquire the lock then _recursions should be 0. 3741 // Invariant: if we acquire the lock then _recursions should be 0.
3687 add (Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark) ; 3742 add(Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark);
3688 mov (G2_thread, Rscratch) ; 3743 mov(G2_thread, Rscratch);
3689 casn (Rmark, G0, Rscratch) ; 3744 casn(Rmark, G0, Rscratch);
3690 cmp (Rscratch, G0) ; 3745 cmp(Rscratch, G0);
3691 // ST box->displaced_header = NonZero. 3746 // ST box->displaced_header = NonZero.
3692 // Any non-zero value suffices: 3747 // Any non-zero value suffices:
3693 // unused_mark(), G2_thread, RBox, RScratch, rsp, etc. 3748 // unused_mark(), G2_thread, RBox, RScratch, rsp, etc.
3694 st_ptr (Rbox, Rbox, BasicLock::displaced_header_offset_in_bytes()); 3749 st_ptr(Rbox, Rbox, BasicLock::displaced_header_offset_in_bytes());
3695 // Intentional fall-through into done 3750 // Intentional fall-through into done
3696 } 3751 }
3697 3752
3698 bind (done) ; 3753 bind (done);
3699 } 3754 }
3700 3755
3701 void MacroAssembler::compiler_unlock_object(Register Roop, Register Rmark, 3756 void MacroAssembler::compiler_unlock_object(Register Roop, Register Rmark,
3702 Register Rbox, Register Rscratch, 3757 Register Rbox, Register Rscratch,
3703 bool try_bias) { 3758 bool try_bias) {
3704 Address mark_addr(Roop, oopDesc::mark_offset_in_bytes()); 3759 Address mark_addr(Roop, oopDesc::mark_offset_in_bytes());
3705 3760
3706 Label done ; 3761 Label done ;
3707 3762
3708 if (EmitSync & 4) { 3763 if (EmitSync & 4) {
3709 cmp (SP, G0) ; 3764 cmp(SP, G0);
3710 return ; 3765 return ;
3711 } 3766 }
3712 3767
3713 if (EmitSync & 8) { 3768 if (EmitSync & 8) {
3714 if (try_bias) { 3769 if (try_bias) {
3715 biased_locking_exit(mark_addr, Rscratch, done); 3770 biased_locking_exit(mark_addr, Rscratch, done);
3716 } 3771 }
3717 3772
3718 // Test first if it is a fast recursive unlock 3773 // Test first if it is a fast recursive unlock
3719 ld_ptr(Rbox, BasicLock::displaced_header_offset_in_bytes(), Rmark); 3774 ld_ptr(Rbox, BasicLock::displaced_header_offset_in_bytes(), Rmark);
3720 cmp(Rmark, G0); 3775 br_null_short(Rmark, Assembler::pt, done);
3721 brx(Assembler::equal, false, Assembler::pt, done);
3722 delayed()->nop();
3723 3776
3724 // Check if it is still a light weight lock, this is is true if we see 3777 // Check if it is still a light weight lock, this is is true if we see
3725 // the stack address of the basicLock in the markOop of the object 3778 // the stack address of the basicLock in the markOop of the object
3726 assert(mark_addr.disp() == 0, "cas must take a zero displacement"); 3779 assert(mark_addr.disp() == 0, "cas must take a zero displacement");
3727 casx_under_lock(mark_addr.base(), Rbox, Rmark, 3780 casx_under_lock(mark_addr.base(), Rbox, Rmark,
3728 (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr()); 3781 (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr());
3729 br (Assembler::always, false, Assembler::pt, done); 3782 ba(done);
3730 delayed()->cmp(Rbox, Rmark); 3783 delayed()->cmp(Rbox, Rmark);
3731 bind (done) ; 3784 bind(done);
3732 return ; 3785 return ;
3733 } 3786 }
3734 3787
3735 // Beware ... If the aggregate size of the code emitted by CLO and CUO is 3788 // Beware ... If the aggregate size of the code emitted by CLO and CUO is
3736 // is too large performance rolls abruptly off a cliff. 3789 // is too large performance rolls abruptly off a cliff.
3741 if (try_bias) { 3794 if (try_bias) {
3742 // TODO: eliminate redundant LDs of obj->mark 3795 // TODO: eliminate redundant LDs of obj->mark
3743 biased_locking_exit(mark_addr, Rscratch, done); 3796 biased_locking_exit(mark_addr, Rscratch, done);
3744 } 3797 }
3745 3798
3746 ld_ptr (Roop, oopDesc::mark_offset_in_bytes(), Rmark) ; 3799 ld_ptr(Roop, oopDesc::mark_offset_in_bytes(), Rmark);
3747 ld_ptr (Rbox, BasicLock::displaced_header_offset_in_bytes(), Rscratch); 3800 ld_ptr(Rbox, BasicLock::displaced_header_offset_in_bytes(), Rscratch);
3748 andcc (Rscratch, Rscratch, G0); 3801 andcc(Rscratch, Rscratch, G0);
3749 brx (Assembler::zero, false, Assembler::pn, done); 3802 brx(Assembler::zero, false, Assembler::pn, done);
3750 delayed()-> nop() ; // consider: relocate fetch of mark, above, into this DS 3803 delayed()->nop(); // consider: relocate fetch of mark, above, into this DS
3751 andcc (Rmark, 2, G0) ; 3804 andcc(Rmark, 2, G0);
3752 brx (Assembler::zero, false, Assembler::pt, LStacked) ; 3805 brx(Assembler::zero, false, Assembler::pt, LStacked);
3753 delayed()-> nop() ; 3806 delayed()->nop();
3754 3807
3755 // It's inflated 3808 // It's inflated
3756 // Conceptually we need a #loadstore|#storestore "release" MEMBAR before 3809 // Conceptually we need a #loadstore|#storestore "release" MEMBAR before
3757 // the ST of 0 into _owner which releases the lock. This prevents loads 3810 // the ST of 0 into _owner which releases the lock. This prevents loads
3758 // and stores within the critical section from reordering (floating) 3811 // and stores within the critical section from reordering (floating)
3759 // past the store that releases the lock. But TSO is a strong memory model 3812 // past the store that releases the lock. But TSO is a strong memory model
3760 // and that particular flavor of barrier is a noop, so we can safely elide it. 3813 // and that particular flavor of barrier is a noop, so we can safely elide it.
3761 // Note that we use 1-0 locking by default for the inflated case. We 3814 // Note that we use 1-0 locking by default for the inflated case. We
3762 // close the resultant (and rare) race by having contented threads in 3815 // close the resultant (and rare) race by having contented threads in
3763 // monitorenter periodically poll _owner. 3816 // monitorenter periodically poll _owner.
3764 ld_ptr (Rmark, ObjectMonitor::owner_offset_in_bytes() - 2, Rscratch); 3817 ld_ptr(Rmark, ObjectMonitor::owner_offset_in_bytes() - 2, Rscratch);
3765 ld_ptr (Rmark, ObjectMonitor::recursions_offset_in_bytes() - 2, Rbox); 3818 ld_ptr(Rmark, ObjectMonitor::recursions_offset_in_bytes() - 2, Rbox);
3766 xor3 (Rscratch, G2_thread, Rscratch) ; 3819 xor3(Rscratch, G2_thread, Rscratch);
3767 orcc (Rbox, Rscratch, Rbox) ; 3820 orcc(Rbox, Rscratch, Rbox);
3768 brx (Assembler::notZero, false, Assembler::pn, done) ; 3821 brx(Assembler::notZero, false, Assembler::pn, done);
3769 delayed()-> 3822 delayed()->
3770 ld_ptr (Rmark, ObjectMonitor::EntryList_offset_in_bytes() - 2, Rscratch); 3823 ld_ptr(Rmark, ObjectMonitor::EntryList_offset_in_bytes() - 2, Rscratch);
3771 ld_ptr (Rmark, ObjectMonitor::cxq_offset_in_bytes() - 2, Rbox); 3824 ld_ptr(Rmark, ObjectMonitor::cxq_offset_in_bytes() - 2, Rbox);
3772 orcc (Rbox, Rscratch, G0) ; 3825 orcc(Rbox, Rscratch, G0);
3773 if (EmitSync & 65536) { 3826 if (EmitSync & 65536) {
3774 Label LSucc ; 3827 Label LSucc ;
3775 brx (Assembler::notZero, false, Assembler::pn, LSucc) ; 3828 brx(Assembler::notZero, false, Assembler::pn, LSucc);
3776 delayed()->nop() ; 3829 delayed()->nop();
3777 br (Assembler::always, false, Assembler::pt, done) ; 3830 ba(done);
3778 delayed()-> 3831 delayed()->st_ptr(G0, Rmark, ObjectMonitor::owner_offset_in_bytes() - 2);
3779 st_ptr (G0, Rmark, ObjectMonitor::owner_offset_in_bytes() - 2); 3832
3780 3833 bind(LSucc);
3781 bind (LSucc) ; 3834 st_ptr(G0, Rmark, ObjectMonitor::owner_offset_in_bytes() - 2);
3782 st_ptr (G0, Rmark, ObjectMonitor::owner_offset_in_bytes() - 2); 3835 if (os::is_MP()) { membar (StoreLoad); }
3783 if (os::is_MP()) { membar (StoreLoad) ; } 3836 ld_ptr(Rmark, ObjectMonitor::succ_offset_in_bytes() - 2, Rscratch);
3784 ld_ptr (Rmark, ObjectMonitor::succ_offset_in_bytes() - 2, Rscratch); 3837 andcc(Rscratch, Rscratch, G0);
3785 andcc (Rscratch, Rscratch, G0) ; 3838 brx(Assembler::notZero, false, Assembler::pt, done);
3786 brx (Assembler::notZero, false, Assembler::pt, done) ; 3839 delayed()->andcc(G0, G0, G0);
3787 delayed()-> andcc (G0, G0, G0) ; 3840 add(Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark);
3788 add (Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark) ; 3841 mov(G2_thread, Rscratch);
3789 mov (G2_thread, Rscratch) ; 3842 casn(Rmark, G0, Rscratch);
3790 casn (Rmark, G0, Rscratch) ;
3791 cmp (Rscratch, G0) ;
3792 // invert icc.zf and goto done 3843 // invert icc.zf and goto done
3793 brx (Assembler::notZero, false, Assembler::pt, done) ; 3844 br_notnull(Rscratch, false, Assembler::pt, done);
3794 delayed() -> cmp (G0, G0) ; 3845 delayed()->cmp(G0, G0);
3795 br (Assembler::always, false, Assembler::pt, done); 3846 ba(done);
3796 delayed() -> cmp (G0, 1) ; 3847 delayed()->cmp(G0, 1);
3797 } else { 3848 } else {
3798 brx (Assembler::notZero, false, Assembler::pn, done) ; 3849 brx(Assembler::notZero, false, Assembler::pn, done);
3799 delayed()->nop() ; 3850 delayed()->nop();
3800 br (Assembler::always, false, Assembler::pt, done) ; 3851 ba(done);
3801 delayed()-> 3852 delayed()->st_ptr(G0, Rmark, ObjectMonitor::owner_offset_in_bytes() - 2);
3802 st_ptr (G0, Rmark, ObjectMonitor::owner_offset_in_bytes() - 2);
3803 } 3853 }
3804 3854
3805 bind (LStacked) ; 3855 bind (LStacked);
3806 // Consider: we could replace the expensive CAS in the exit 3856 // Consider: we could replace the expensive CAS in the exit
3807 // path with a simple ST of the displaced mark value fetched from 3857 // path with a simple ST of the displaced mark value fetched from
3808 // the on-stack basiclock box. That admits a race where a thread T2 3858 // the on-stack basiclock box. That admits a race where a thread T2
3809 // in the slow lock path -- inflating with monitor M -- could race a 3859 // in the slow lock path -- inflating with monitor M -- could race a
3810 // thread T1 in the fast unlock path, resulting in a missed wakeup for T2. 3860 // thread T1 in the fast unlock path, resulting in a missed wakeup for T2.
3829 // lost-update "stomp" WAW race but detects and recovers as needed. 3879 // lost-update "stomp" WAW race but detects and recovers as needed.
3830 // 3880 //
3831 // A prototype implementation showed excellent results, although 3881 // A prototype implementation showed excellent results, although
3832 // the scavenger and timeout code was rather involved. 3882 // the scavenger and timeout code was rather involved.
3833 3883
3834 casn (mark_addr.base(), Rbox, Rscratch) ; 3884 casn(mark_addr.base(), Rbox, Rscratch);
3835 cmp (Rbox, Rscratch); 3885 cmp(Rbox, Rscratch);
3836 // Intentional fall through into done ... 3886 // Intentional fall through into done ...
3837 3887
3838 bind (done) ; 3888 bind(done);
3839 } 3889 }
3840 3890
3841 3891
3842 3892
3843 void MacroAssembler::print_CPU_state() { 3893 void MacroAssembler::print_CPU_state() {
3889 3939
3890 save_frame(0); 3940 save_frame(0);
3891 ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), t1); 3941 ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), t1);
3892 ld_ptr(G2_thread, in_bytes(JavaThread::tlab_start_offset()), t2); 3942 ld_ptr(G2_thread, in_bytes(JavaThread::tlab_start_offset()), t2);
3893 or3(t1, t2, t3); 3943 or3(t1, t2, t3);
3894 cmp(t1, t2); 3944 cmp_and_br_short(t1, t2, Assembler::greaterEqual, Assembler::pn, next);
3895 br(Assembler::greaterEqual, false, Assembler::pn, next);
3896 delayed()->nop();
3897 stop("assert(top >= start)"); 3945 stop("assert(top >= start)");
3898 should_not_reach_here(); 3946 should_not_reach_here();
3899 3947
3900 bind(next); 3948 bind(next);
3901 ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), t1); 3949 ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), t1);
3902 ld_ptr(G2_thread, in_bytes(JavaThread::tlab_end_offset()), t2); 3950 ld_ptr(G2_thread, in_bytes(JavaThread::tlab_end_offset()), t2);
3903 or3(t3, t2, t3); 3951 or3(t3, t2, t3);
3904 cmp(t1, t2); 3952 cmp_and_br_short(t1, t2, Assembler::lessEqual, Assembler::pn, next2);
3905 br(Assembler::lessEqual, false, Assembler::pn, next2);
3906 delayed()->nop();
3907 stop("assert(top <= end)"); 3953 stop("assert(top <= end)");
3908 should_not_reach_here(); 3954 should_not_reach_here();
3909 3955
3910 bind(next2); 3956 bind(next2);
3911 and3(t3, MinObjAlignmentInBytesMask, t3); 3957 and3(t3, MinObjAlignmentInBytesMask, t3);
3912 cmp(t3, 0); 3958 cmp_and_br_short(t3, 0, Assembler::lessEqual, Assembler::pn, ok);
3913 br(Assembler::lessEqual, false, Assembler::pn, ok);
3914 delayed()->nop();
3915 stop("assert(aligned)"); 3959 stop("assert(aligned)");
3916 should_not_reach_here(); 3960 should_not_reach_here();
3917 3961
3918 bind(ok); 3962 bind(ok);
3919 restore(); 3963 restore();
3935 assert(0 <= con_size_in_bytes && Assembler::is_simm13(con_size_in_bytes), "illegal object size"); 3979 assert(0 <= con_size_in_bytes && Assembler::is_simm13(con_size_in_bytes), "illegal object size");
3936 assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0, "object size is not multiple of alignment"); 3980 assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0, "object size is not multiple of alignment");
3937 3981
3938 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { 3982 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
3939 // No allocation in the shared eden. 3983 // No allocation in the shared eden.
3940 br(Assembler::always, false, Assembler::pt, slow_case); 3984 ba_short(slow_case);
3941 delayed()->nop();
3942 } else { 3985 } else {
3943 // get eden boundaries 3986 // get eden boundaries
3944 // note: we need both top & top_addr! 3987 // note: we need both top & top_addr!
3945 const Register top_addr = t1; 3988 const Register top_addr = t1;
3946 const Register end = t2; 3989 const Register end = t2;
4070 assert_different_registers(top, t1, t2, t3, G4, G5 /* preserve G4 and G5 */); 4113 assert_different_registers(top, t1, t2, t3, G4, G5 /* preserve G4 and G5 */);
4071 Label do_refill, discard_tlab; 4114 Label do_refill, discard_tlab;
4072 4115
4073 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { 4116 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
4074 // No allocation in the shared eden. 4117 // No allocation in the shared eden.
4075 br(Assembler::always, false, Assembler::pt, slow_case); 4118 ba_short(slow_case);
4076 delayed()->nop();
4077 } 4119 }
4078 4120
4079 ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), top); 4121 ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), top);
4080 ld_ptr(G2_thread, in_bytes(JavaThread::tlab_end_offset()), t1); 4122 ld_ptr(G2_thread, in_bytes(JavaThread::tlab_end_offset()), t1);
4081 ld_ptr(G2_thread, in_bytes(JavaThread::tlab_refill_waste_limit_offset()), t2); 4123 ld_ptr(G2_thread, in_bytes(JavaThread::tlab_refill_waste_limit_offset()), t2);
4096 // increment number of slow_allocations 4138 // increment number of slow_allocations
4097 ld(G2_thread, in_bytes(JavaThread::tlab_slow_allocations_offset()), t2); 4139 ld(G2_thread, in_bytes(JavaThread::tlab_slow_allocations_offset()), t2);
4098 add(t2, 1, t2); 4140 add(t2, 1, t2);
4099 stw(t2, G2_thread, in_bytes(JavaThread::tlab_slow_allocations_offset())); 4141 stw(t2, G2_thread, in_bytes(JavaThread::tlab_slow_allocations_offset()));
4100 } 4142 }
4101 br(Assembler::always, false, Assembler::pt, try_eden); 4143 ba_short(try_eden);
4102 delayed()->nop();
4103 4144
4104 bind(discard_tlab); 4145 bind(discard_tlab);
4105 if (TLABStats) { 4146 if (TLABStats) {
4106 // increment number of refills 4147 // increment number of refills
4107 ld(G2_thread, in_bytes(JavaThread::tlab_number_of_refills_offset()), t2); 4148 ld(G2_thread, in_bytes(JavaThread::tlab_number_of_refills_offset()), t2);
4113 stw(t2, G2_thread, in_bytes(JavaThread::tlab_fast_refill_waste_offset())); 4154 stw(t2, G2_thread, in_bytes(JavaThread::tlab_fast_refill_waste_offset()));
4114 } 4155 }
4115 4156
4116 // if tlab is currently allocated (top or end != null) then 4157 // if tlab is currently allocated (top or end != null) then
4117 // fill [top, end + alignment_reserve) with array object 4158 // fill [top, end + alignment_reserve) with array object
4118 br_null(top, false, Assembler::pn, do_refill); 4159 br_null_short(top, Assembler::pn, do_refill);
4119 delayed()->nop();
4120 4160
4121 set((intptr_t)markOopDesc::prototype()->copy_set_hash(0x2), t2); 4161 set((intptr_t)markOopDesc::prototype()->copy_set_hash(0x2), t2);
4122 st_ptr(t2, top, oopDesc::mark_offset_in_bytes()); // set up the mark word 4162 st_ptr(t2, top, oopDesc::mark_offset_in_bytes()); // set up the mark word
4123 // set klass to intArrayKlass 4163 // set klass to intArrayKlass
4124 sub(t1, typeArrayOopDesc::header_size(T_INT), t1); 4164 sub(t1, typeArrayOopDesc::header_size(T_INT), t1);
4149 // check that tlab_size (t1) is still valid 4189 // check that tlab_size (t1) is still valid
4150 { 4190 {
4151 Label ok; 4191 Label ok;
4152 ld_ptr(G2_thread, in_bytes(JavaThread::tlab_size_offset()), t2); 4192 ld_ptr(G2_thread, in_bytes(JavaThread::tlab_size_offset()), t2);
4153 sll_ptr(t2, LogHeapWordSize, t2); 4193 sll_ptr(t2, LogHeapWordSize, t2);
4154 cmp(t1, t2); 4194 cmp_and_br_short(t1, t2, Assembler::equal, Assembler::pt, ok);
4155 br(Assembler::equal, false, Assembler::pt, ok);
4156 delayed()->nop();
4157 stop("assert(t1 == tlab_size)"); 4195 stop("assert(t1 == tlab_size)");
4158 should_not_reach_here(); 4196 should_not_reach_here();
4159 4197
4160 bind(ok); 4198 bind(ok);
4161 } 4199 }
4162 #endif // ASSERT 4200 #endif // ASSERT
4163 add(top, t1, top); // t1 is tlab_size 4201 add(top, t1, top); // t1 is tlab_size
4164 sub(top, ThreadLocalAllocBuffer::alignment_reserve_in_bytes(), top); 4202 sub(top, ThreadLocalAllocBuffer::alignment_reserve_in_bytes(), top);
4165 st_ptr(top, G2_thread, in_bytes(JavaThread::tlab_end_offset())); 4203 st_ptr(top, G2_thread, in_bytes(JavaThread::tlab_end_offset()));
4166 verify_tlab(); 4204 verify_tlab();
4167 br(Assembler::always, false, Assembler::pt, retry); 4205 ba_short(retry);
4168 delayed()->nop();
4169 } 4206 }
4170 4207
4171 void MacroAssembler::incr_allocated_bytes(RegisterOrConstant size_in_bytes, 4208 void MacroAssembler::incr_allocated_bytes(RegisterOrConstant size_in_bytes,
4172 Register t1, Register t2) { 4209 Register t1, Register t2) {
4173 // Bump total bytes allocated by this thread 4210 // Bump total bytes allocated by this thread
4288 4325
4289 static void generate_satb_log_enqueue(bool with_frame) { 4326 static void generate_satb_log_enqueue(bool with_frame) {
4290 BufferBlob* bb = BufferBlob::create("enqueue_with_frame", EnqueueCodeSize); 4327 BufferBlob* bb = BufferBlob::create("enqueue_with_frame", EnqueueCodeSize);
4291 CodeBuffer buf(bb); 4328 CodeBuffer buf(bb);
4292 MacroAssembler masm(&buf); 4329 MacroAssembler masm(&buf);
4293 address start = masm.pc(); 4330
4331 #define __ masm.
4332
4333 address start = __ pc();
4294 Register pre_val; 4334 Register pre_val;
4295 4335
4296 Label refill, restart; 4336 Label refill, restart;
4297 if (with_frame) { 4337 if (with_frame) {
4298 masm.save_frame(0); 4338 __ save_frame(0);
4299 pre_val = I0; // Was O0 before the save. 4339 pre_val = I0; // Was O0 before the save.
4300 } else { 4340 } else {
4301 pre_val = O0; 4341 pre_val = O0;
4302 } 4342 }
4303 int satb_q_index_byte_offset = 4343 int satb_q_index_byte_offset =
4308 PtrQueue::byte_offset_of_buf()); 4348 PtrQueue::byte_offset_of_buf());
4309 assert(in_bytes(PtrQueue::byte_width_of_index()) == sizeof(intptr_t) && 4349 assert(in_bytes(PtrQueue::byte_width_of_index()) == sizeof(intptr_t) &&
4310 in_bytes(PtrQueue::byte_width_of_buf()) == sizeof(intptr_t), 4350 in_bytes(PtrQueue::byte_width_of_buf()) == sizeof(intptr_t),
4311 "check sizes in assembly below"); 4351 "check sizes in assembly below");
4312 4352
4313 masm.bind(restart); 4353 __ bind(restart);
4314 masm.ld_ptr(G2_thread, satb_q_index_byte_offset, L0); 4354 __ ld_ptr(G2_thread, satb_q_index_byte_offset, L0);
4315 4355
4316 masm.br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn, L0, refill); 4356 __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn, L0, refill);
4317 // If the branch is taken, no harm in executing this in the delay slot. 4357 // If the branch is taken, no harm in executing this in the delay slot.
4318 masm.delayed()->ld_ptr(G2_thread, satb_q_buf_byte_offset, L1); 4358 __ delayed()->ld_ptr(G2_thread, satb_q_buf_byte_offset, L1);
4319 masm.sub(L0, oopSize, L0); 4359 __ sub(L0, oopSize, L0);
4320 4360
4321 masm.st_ptr(pre_val, L1, L0); // [_buf + index] := I0 4361 __ st_ptr(pre_val, L1, L0); // [_buf + index] := I0
4322 if (!with_frame) { 4362 if (!with_frame) {
4323 // Use return-from-leaf 4363 // Use return-from-leaf
4324 masm.retl(); 4364 __ retl();
4325 masm.delayed()->st_ptr(L0, G2_thread, satb_q_index_byte_offset); 4365 __ delayed()->st_ptr(L0, G2_thread, satb_q_index_byte_offset);
4326 } else { 4366 } else {
4327 // Not delayed. 4367 // Not delayed.
4328 masm.st_ptr(L0, G2_thread, satb_q_index_byte_offset); 4368 __ st_ptr(L0, G2_thread, satb_q_index_byte_offset);
4329 } 4369 }
4330 if (with_frame) { 4370 if (with_frame) {
4331 masm.ret(); 4371 __ ret();
4332 masm.delayed()->restore(); 4372 __ delayed()->restore();
4333 } 4373 }
4334 masm.bind(refill); 4374 __ bind(refill);
4335 4375
4336 address handle_zero = 4376 address handle_zero =
4337 CAST_FROM_FN_PTR(address, 4377 CAST_FROM_FN_PTR(address,
4338 &SATBMarkQueueSet::handle_zero_index_for_thread); 4378 &SATBMarkQueueSet::handle_zero_index_for_thread);
4339 // This should be rare enough that we can afford to save all the 4379 // This should be rare enough that we can afford to save all the
4340 // scratch registers that the calling context might be using. 4380 // scratch registers that the calling context might be using.
4341 masm.mov(G1_scratch, L0); 4381 __ mov(G1_scratch, L0);
4342 masm.mov(G3_scratch, L1); 4382 __ mov(G3_scratch, L1);
4343 masm.mov(G4, L2); 4383 __ mov(G4, L2);
4344 // We need the value of O0 above (for the write into the buffer), so we 4384 // We need the value of O0 above (for the write into the buffer), so we
4345 // save and restore it. 4385 // save and restore it.
4346 masm.mov(O0, L3); 4386 __ mov(O0, L3);
4347 // Since the call will overwrite O7, we save and restore that, as well. 4387 // Since the call will overwrite O7, we save and restore that, as well.
4348 masm.mov(O7, L4); 4388 __ mov(O7, L4);
4349 masm.call_VM_leaf(L5, handle_zero, G2_thread); 4389 __ call_VM_leaf(L5, handle_zero, G2_thread);
4350 masm.mov(L0, G1_scratch); 4390 __ mov(L0, G1_scratch);
4351 masm.mov(L1, G3_scratch); 4391 __ mov(L1, G3_scratch);
4352 masm.mov(L2, G4); 4392 __ mov(L2, G4);
4353 masm.mov(L3, O0); 4393 __ mov(L3, O0);
4354 masm.br(Assembler::always, /*annul*/false, Assembler::pt, restart); 4394 __ br(Assembler::always, /*annul*/false, Assembler::pt, restart);
4355 masm.delayed()->mov(L4, O7); 4395 __ delayed()->mov(L4, O7);
4356 4396
4357 if (with_frame) { 4397 if (with_frame) {
4358 satb_log_enqueue_with_frame = start; 4398 satb_log_enqueue_with_frame = start;
4359 satb_log_enqueue_with_frame_end = masm.pc(); 4399 satb_log_enqueue_with_frame_end = __ pc();
4360 } else { 4400 } else {
4361 satb_log_enqueue_frameless = start; 4401 satb_log_enqueue_frameless = start;
4362 satb_log_enqueue_frameless_end = masm.pc(); 4402 satb_log_enqueue_frameless_end = __ pc();
4363 } 4403 }
4404
4405 #undef __
4364 } 4406 }
4365 4407
4366 static inline void generate_satb_log_enqueue_if_necessary(bool with_frame) { 4408 static inline void generate_satb_log_enqueue_if_necessary(bool with_frame) {
4367 if (with_frame) { 4409 if (with_frame) {
4368 if (satb_log_enqueue_with_frame == 0) { 4410 if (satb_log_enqueue_with_frame == 0) {
4424 tmp); 4466 tmp);
4425 } 4467 }
4426 4468
4427 // Check on whether to annul. 4469 // Check on whether to annul.
4428 br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, tmp, filtered); 4470 br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, tmp, filtered);
4429 delayed() -> nop(); 4471 delayed()->nop();
4430 4472
4431 // Do we need to load the previous value? 4473 // Do we need to load the previous value?
4432 if (obj != noreg) { 4474 if (obj != noreg) {
4433 // Load the previous value... 4475 // Load the previous value...
4434 if (index == noreg) { 4476 if (index == noreg) {
4448 assert(pre_val != noreg, "must have a real register"); 4490 assert(pre_val != noreg, "must have a real register");
4449 4491
4450 // Is the previous value null? 4492 // Is the previous value null?
4451 // Check on whether to annul. 4493 // Check on whether to annul.
4452 br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, pre_val, filtered); 4494 br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, pre_val, filtered);
4453 delayed() -> nop(); 4495 delayed()->nop();
4454 4496
4455 // OK, it's not filtered, so we'll need to call enqueue. In the normal 4497 // OK, it's not filtered, so we'll need to call enqueue. In the normal
4456 // case, pre_val will be a scratch G-reg, but there are some cases in 4498 // case, pre_val will be a scratch G-reg, but there are some cases in
4457 // which it's an O-reg. In the first case, do a normal call. In the 4499 // which it's an O-reg. In the first case, do a normal call. In the
4458 // latter, do a save here and call the frameless version. 4500 // latter, do a save here and call the frameless version.
4516 // This gets to assume that o0 contains the object address. 4558 // This gets to assume that o0 contains the object address.
4517 static void generate_dirty_card_log_enqueue(jbyte* byte_map_base) { 4559 static void generate_dirty_card_log_enqueue(jbyte* byte_map_base) {
4518 BufferBlob* bb = BufferBlob::create("dirty_card_enqueue", EnqueueCodeSize*2); 4560 BufferBlob* bb = BufferBlob::create("dirty_card_enqueue", EnqueueCodeSize*2);
4519 CodeBuffer buf(bb); 4561 CodeBuffer buf(bb);
4520 MacroAssembler masm(&buf); 4562 MacroAssembler masm(&buf);
4521 address start = masm.pc(); 4563 #define __ masm.
4564 address start = __ pc();
4522 4565
4523 Label not_already_dirty, restart, refill; 4566 Label not_already_dirty, restart, refill;
4524 4567
4525 #ifdef _LP64 4568 #ifdef _LP64
4526 masm.srlx(O0, CardTableModRefBS::card_shift, O0); 4569 __ srlx(O0, CardTableModRefBS::card_shift, O0);
4527 #else 4570 #else
4528 masm.srl(O0, CardTableModRefBS::card_shift, O0); 4571 __ srl(O0, CardTableModRefBS::card_shift, O0);
4529 #endif 4572 #endif
4530 AddressLiteral addrlit(byte_map_base); 4573 AddressLiteral addrlit(byte_map_base);
4531 masm.set(addrlit, O1); // O1 := <card table base> 4574 __ set(addrlit, O1); // O1 := <card table base>
4532 masm.ldub(O0, O1, O2); // O2 := [O0 + O1] 4575 __ ldub(O0, O1, O2); // O2 := [O0 + O1]
4533 4576
4534 masm.br_on_reg_cond(Assembler::rc_nz, /*annul*/false, Assembler::pt, 4577 __ br_on_reg_cond(Assembler::rc_nz, /*annul*/false, Assembler::pt,
4535 O2, not_already_dirty); 4578 O2, not_already_dirty);
4536 // Get O1 + O2 into a reg by itself -- useful in the take-the-branch 4579 // Get O1 + O2 into a reg by itself -- useful in the take-the-branch
4537 // case, harmless if not. 4580 // case, harmless if not.
4538 masm.delayed()->add(O0, O1, O3); 4581 __ delayed()->add(O0, O1, O3);
4539 4582
4540 // We didn't take the branch, so we're already dirty: return. 4583 // We didn't take the branch, so we're already dirty: return.
4541 // Use return-from-leaf 4584 // Use return-from-leaf
4542 masm.retl(); 4585 __ retl();
4543 masm.delayed()->nop(); 4586 __ delayed()->nop();
4544 4587
4545 // Not dirty. 4588 // Not dirty.
4546 masm.bind(not_already_dirty); 4589 __ bind(not_already_dirty);
4547 // First, dirty it. 4590 // First, dirty it.
4548 masm.stb(G0, O3, G0); // [cardPtr] := 0 (i.e., dirty). 4591 __ stb(G0, O3, G0); // [cardPtr] := 0 (i.e., dirty).
4549 int dirty_card_q_index_byte_offset = 4592 int dirty_card_q_index_byte_offset =
4550 in_bytes(JavaThread::dirty_card_queue_offset() + 4593 in_bytes(JavaThread::dirty_card_queue_offset() +
4551 PtrQueue::byte_offset_of_index()); 4594 PtrQueue::byte_offset_of_index());
4552 int dirty_card_q_buf_byte_offset = 4595 int dirty_card_q_buf_byte_offset =
4553 in_bytes(JavaThread::dirty_card_queue_offset() + 4596 in_bytes(JavaThread::dirty_card_queue_offset() +
4554 PtrQueue::byte_offset_of_buf()); 4597 PtrQueue::byte_offset_of_buf());
4555 masm.bind(restart); 4598 __ bind(restart);
4556 masm.ld_ptr(G2_thread, dirty_card_q_index_byte_offset, L0); 4599 __ ld_ptr(G2_thread, dirty_card_q_index_byte_offset, L0);
4557 4600
4558 masm.br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn, 4601 __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn,
4559 L0, refill); 4602 L0, refill);
4560 // If the branch is taken, no harm in executing this in the delay slot. 4603 // If the branch is taken, no harm in executing this in the delay slot.
4561 masm.delayed()->ld_ptr(G2_thread, dirty_card_q_buf_byte_offset, L1); 4604 __ delayed()->ld_ptr(G2_thread, dirty_card_q_buf_byte_offset, L1);
4562 masm.sub(L0, oopSize, L0); 4605 __ sub(L0, oopSize, L0);
4563 4606
4564 masm.st_ptr(O3, L1, L0); // [_buf + index] := I0 4607 __ st_ptr(O3, L1, L0); // [_buf + index] := I0
4565 // Use return-from-leaf 4608 // Use return-from-leaf
4566 masm.retl(); 4609 __ retl();
4567 masm.delayed()->st_ptr(L0, G2_thread, dirty_card_q_index_byte_offset); 4610 __ delayed()->st_ptr(L0, G2_thread, dirty_card_q_index_byte_offset);
4568 4611
4569 masm.bind(refill); 4612 __ bind(refill);
4570 address handle_zero = 4613 address handle_zero =
4571 CAST_FROM_FN_PTR(address, 4614 CAST_FROM_FN_PTR(address,
4572 &DirtyCardQueueSet::handle_zero_index_for_thread); 4615 &DirtyCardQueueSet::handle_zero_index_for_thread);
4573 // This should be rare enough that we can afford to save all the 4616 // This should be rare enough that we can afford to save all the
4574 // scratch registers that the calling context might be using. 4617 // scratch registers that the calling context might be using.
4575 masm.mov(G1_scratch, L3); 4618 __ mov(G1_scratch, L3);
4576 masm.mov(G3_scratch, L5); 4619 __ mov(G3_scratch, L5);
4577 // We need the value of O3 above (for the write into the buffer), so we 4620 // We need the value of O3 above (for the write into the buffer), so we
4578 // save and restore it. 4621 // save and restore it.
4579 masm.mov(O3, L6); 4622 __ mov(O3, L6);
4580 // Since the call will overwrite O7, we save and restore that, as well. 4623 // Since the call will overwrite O7, we save and restore that, as well.
4581 masm.mov(O7, L4); 4624 __ mov(O7, L4);
4582 4625
4583 masm.call_VM_leaf(L7_thread_cache, handle_zero, G2_thread); 4626 __ call_VM_leaf(L7_thread_cache, handle_zero, G2_thread);
4584 masm.mov(L3, G1_scratch); 4627 __ mov(L3, G1_scratch);
4585 masm.mov(L5, G3_scratch); 4628 __ mov(L5, G3_scratch);
4586 masm.mov(L6, O3); 4629 __ mov(L6, O3);
4587 masm.br(Assembler::always, /*annul*/false, Assembler::pt, restart); 4630 __ br(Assembler::always, /*annul*/false, Assembler::pt, restart);
4588 masm.delayed()->mov(L4, O7); 4631 __ delayed()->mov(L4, O7);
4589 4632
4590 dirty_card_log_enqueue = start; 4633 dirty_card_log_enqueue = start;
4591 dirty_card_log_enqueue_end = masm.pc(); 4634 dirty_card_log_enqueue_end = __ pc();
4592 // XXX Should have a guarantee here about not going off the end! 4635 // XXX Should have a guarantee here about not going off the end!
4593 // Does it already do so? Do an experiment... 4636 // Does it already do so? Do an experiment...
4637
4638 #undef __
4639
4594 } 4640 }
4595 4641
4596 static inline void 4642 static inline void
4597 generate_dirty_card_log_enqueue_if_necessary(jbyte* byte_map_base) { 4643 generate_dirty_card_log_enqueue_if_necessary(jbyte* byte_map_base) {
4598 if (dirty_card_log_enqueue == 0) { 4644 if (dirty_card_log_enqueue == 0) {
4901 cmp(chr1, chr2); 4947 cmp(chr1, chr2);
4902 br(Assembler::notEqual, true, Assembler::pt, Ldone); 4948 br(Assembler::notEqual, true, Assembler::pt, Ldone);
4903 delayed()->mov(G0, result); // not equal 4949 delayed()->mov(G0, result); // not equal
4904 4950
4905 // only one char ? 4951 // only one char ?
4906 br_on_reg_cond(rc_z, true, Assembler::pn, limit, Ldone); 4952 cmp_zero_and_br(zero, limit, Ldone, true, Assembler::pn);
4907 delayed()->add(G0, 1, result); // zero-length arrays are equal 4953 delayed()->add(G0, 1, result); // zero-length arrays are equal
4908 4954
4909 // word by word compare, dont't need alignment check 4955 // word by word compare, dont't need alignment check
4910 bind(Lvector); 4956 bind(Lvector);
4911 // Shift ary1 and ary2 to the end of the arrays, negate limit 4957 // Shift ary1 and ary2 to the end of the arrays, negate limit