Mercurial > hg > truffle
comparison src/cpu/sparc/vm/stubGenerator_sparc.cpp @ 644:c517646eef23
6813212: factor duplicated assembly code for general subclass check (for 6655638)
Summary: Code in interp_masm, stubGenerator, c1_LIRAssembler, and AD files moved into MacroAssembler.
Reviewed-by: kvn
author | jrose |
---|---|
date | Fri, 13 Mar 2009 18:39:22 -0700 |
parents | 660978a2a31a |
children | d0994e5bebce |
comparison
equal
deleted
inserted
replaced
643:c771b7f43bbf | 644:c517646eef23 |
---|---|
898 // raddr: O7, blown by call | 898 // raddr: O7, blown by call |
899 address generate_partial_subtype_check() { | 899 address generate_partial_subtype_check() { |
900 __ align(CodeEntryAlignment); | 900 __ align(CodeEntryAlignment); |
901 StubCodeMark mark(this, "StubRoutines", "partial_subtype_check"); | 901 StubCodeMark mark(this, "StubRoutines", "partial_subtype_check"); |
902 address start = __ pc(); | 902 address start = __ pc(); |
903 Label loop, miss; | 903 Label miss; |
904 | |
905 // Compare super with sub directly, since super is not in its own SSA. | |
906 // The compiler used to emit this test, but we fold it in here, | |
907 // to increase overall code density, with no real loss of speed. | |
908 { Label L; | |
909 __ cmp(O1, O2); | |
910 __ brx(Assembler::notEqual, false, Assembler::pt, L); | |
911 __ delayed()->nop(); | |
912 __ retl(); | |
913 __ delayed()->addcc(G0,0,O0); // set Z flags, zero result | |
914 __ bind(L); | |
915 } | |
916 | 904 |
917 #if defined(COMPILER2) && !defined(_LP64) | 905 #if defined(COMPILER2) && !defined(_LP64) |
918 // Do not use a 'save' because it blows the 64-bit O registers. | 906 // Do not use a 'save' because it blows the 64-bit O registers. |
919 __ add(SP,-4*wordSize,SP); // Make space for 4 temps (stack must be 2 words aligned) | 907 __ add(SP,-4*wordSize,SP); // Make space for 4 temps (stack must be 2 words aligned) |
920 __ st_ptr(L0,SP,(frame::register_save_words+0)*wordSize); | 908 __ st_ptr(L0,SP,(frame::register_save_words+0)*wordSize); |
934 Register L0_ary_len = L0; | 922 Register L0_ary_len = L0; |
935 Register L1_ary_ptr = L1; | 923 Register L1_ary_ptr = L1; |
936 Register L2_super = L2; | 924 Register L2_super = L2; |
937 Register L3_index = L3; | 925 Register L3_index = L3; |
938 | 926 |
939 #ifdef _LP64 | 927 __ check_klass_subtype_slow_path(Rsub, Rsuper, |
940 Register L4_ooptmp = L4; | 928 L0, L1, L2, L3, |
941 | 929 NULL, &miss); |
942 if (UseCompressedOops) { | 930 |
943 // this must be under UseCompressedOops check, as we rely upon fact | 931 // Match falls through here. |
944 // that L4 not clobbered in C2 on 32-bit platforms, where we do explicit save | 932 __ addcc(G0,0,Rret); // set Z flags, Z result |
945 // on stack, see several lines above | |
946 __ encode_heap_oop(Rsuper, L4_ooptmp); | |
947 } | |
948 #endif | |
949 | |
950 inc_counter_np(SharedRuntime::_partial_subtype_ctr, L0, L1); | |
951 | |
952 __ ld_ptr( Rsub, sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes(), L3 ); | |
953 __ lduw(L3,arrayOopDesc::length_offset_in_bytes(),L0_ary_len); | |
954 __ add(L3,arrayOopDesc::base_offset_in_bytes(T_OBJECT),L1_ary_ptr); | |
955 __ clr(L3_index); // zero index | |
956 // Load a little early; will load 1 off the end of the array. | |
957 // Ok for now; revisit if we have other uses of this routine. | |
958 if (UseCompressedOops) { | |
959 __ lduw(L1_ary_ptr,0,L2_super);// Will load a little early | |
960 } else { | |
961 __ ld_ptr(L1_ary_ptr,0,L2_super);// Will load a little early | |
962 } | |
963 | |
964 assert(heapOopSize != 0, "heapOopSize should be initialized"); | |
965 // The scan loop | |
966 __ BIND(loop); | |
967 __ add(L1_ary_ptr, heapOopSize, L1_ary_ptr); // Bump by OOP size | |
968 __ cmp(L3_index,L0_ary_len); | |
969 __ br(Assembler::equal,false,Assembler::pn,miss); | |
970 __ delayed()->inc(L3_index); // Bump index | |
971 | |
972 if (UseCompressedOops) { | |
973 #ifdef _LP64 | |
974 __ subcc(L2_super,L4_ooptmp,Rret); // Check for match; zero in Rret for a hit | |
975 __ br( Assembler::notEqual, false, Assembler::pt, loop ); | |
976 __ delayed()->lduw(L1_ary_ptr,0,L2_super);// Will load a little early | |
977 #else | |
978 ShouldNotReachHere(); | |
979 #endif | |
980 } else { | |
981 __ subcc(L2_super,Rsuper,Rret); // Check for match; zero in Rret for a hit | |
982 __ brx( Assembler::notEqual, false, Assembler::pt, loop ); | |
983 __ delayed()->ld_ptr(L1_ary_ptr,0,L2_super);// Will load a little early | |
984 } | |
985 | |
986 // Got a hit; report success; set cache. Cache load doesn't | |
987 // happen here; for speed it is directly emitted by the compiler. | |
988 __ st_ptr( Rsuper, Rsub, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() ); | |
989 | 933 |
990 #if defined(COMPILER2) && !defined(_LP64) | 934 #if defined(COMPILER2) && !defined(_LP64) |
991 __ ld_ptr(SP,(frame::register_save_words+0)*wordSize,L0); | 935 __ ld_ptr(SP,(frame::register_save_words+0)*wordSize,L0); |
992 __ ld_ptr(SP,(frame::register_save_words+1)*wordSize,L1); | 936 __ ld_ptr(SP,(frame::register_save_words+1)*wordSize,L1); |
993 __ ld_ptr(SP,(frame::register_save_words+2)*wordSize,L2); | 937 __ ld_ptr(SP,(frame::register_save_words+2)*wordSize,L2); |
997 #else | 941 #else |
998 __ ret(); // Result in Rret is zero; flags set to Z | 942 __ ret(); // Result in Rret is zero; flags set to Z |
999 __ delayed()->restore(); | 943 __ delayed()->restore(); |
1000 #endif | 944 #endif |
1001 | 945 |
1002 // Hit or miss falls through here | |
1003 __ BIND(miss); | 946 __ BIND(miss); |
1004 __ addcc(G0,1,Rret); // set NZ flags, NZ result | 947 __ addcc(G0,1,Rret); // set NZ flags, NZ result |
1005 | 948 |
1006 #if defined(COMPILER2) && !defined(_LP64) | 949 #if defined(COMPILER2) && !defined(_LP64) |
1007 __ ld_ptr(SP,(frame::register_save_words+0)*wordSize,L0); | 950 __ ld_ptr(SP,(frame::register_save_words+0)*wordSize,L0); |
2328 // Smashes only the given temp registers. | 2271 // Smashes only the given temp registers. |
2329 void generate_type_check(Register sub_klass, | 2272 void generate_type_check(Register sub_klass, |
2330 Register super_check_offset, | 2273 Register super_check_offset, |
2331 Register super_klass, | 2274 Register super_klass, |
2332 Register temp, | 2275 Register temp, |
2333 Label& L_success, | 2276 Label& L_success) { |
2334 Register deccc_hack = noreg) { | |
2335 assert_different_registers(sub_klass, super_check_offset, super_klass, temp); | 2277 assert_different_registers(sub_klass, super_check_offset, super_klass, temp); |
2336 | 2278 |
2337 BLOCK_COMMENT("type_check:"); | 2279 BLOCK_COMMENT("type_check:"); |
2338 | 2280 |
2339 Label L_miss; | 2281 Label L_miss, L_pop_to_miss; |
2340 | 2282 |
2341 assert_clean_int(super_check_offset, temp); | 2283 assert_clean_int(super_check_offset, temp); |
2342 | 2284 |
2343 // maybe decrement caller's trip count: | 2285 __ check_klass_subtype_fast_path(sub_klass, super_klass, temp, noreg, |
2344 #define DELAY_SLOT delayed(); \ | 2286 &L_success, &L_miss, NULL, |
2345 { if (deccc_hack == noreg) __ nop(); else __ deccc(deccc_hack); } | 2287 super_check_offset); |
2346 | 2288 |
2347 // if the pointers are equal, we are done (e.g., String[] elements) | 2289 BLOCK_COMMENT("type_check_slow_path:"); |
2348 __ cmp(sub_klass, super_klass); | |
2349 __ brx(Assembler::equal, true, Assembler::pt, L_success); | |
2350 __ DELAY_SLOT; | |
2351 | |
2352 // check the supertype display: | |
2353 __ ld_ptr(sub_klass, super_check_offset, temp); // query the super type | |
2354 __ cmp(super_klass, temp); // test the super type | |
2355 __ brx(Assembler::equal, true, Assembler::pt, L_success); | |
2356 __ DELAY_SLOT; | |
2357 | |
2358 int sc_offset = (klassOopDesc::header_size() * HeapWordSize + | |
2359 Klass::secondary_super_cache_offset_in_bytes()); | |
2360 __ cmp(super_klass, sc_offset); | |
2361 __ brx(Assembler::notEqual, true, Assembler::pt, L_miss); | |
2362 __ delayed()->nop(); | |
2363 | |
2364 __ save_frame(0); | 2290 __ save_frame(0); |
2365 __ mov(sub_klass->after_save(), O1); | 2291 __ check_klass_subtype_slow_path(sub_klass->after_save(), |
2366 // mov(super_klass->after_save(), O2); //fill delay slot | 2292 super_klass->after_save(), |
2367 assert(StubRoutines::Sparc::_partial_subtype_check != NULL, "order of generation"); | 2293 L0, L1, L2, L4, |
2368 __ call(StubRoutines::Sparc::_partial_subtype_check); | 2294 NULL, &L_pop_to_miss); |
2369 __ delayed()->mov(super_klass->after_save(), O2); | 2295 __ ba(false, L_success); |
2296 __ delayed()->restore(); | |
2297 | |
2298 __ bind(L_pop_to_miss); | |
2370 __ restore(); | 2299 __ restore(); |
2371 | |
2372 // Upon return, the condition codes are already set. | |
2373 __ brx(Assembler::equal, true, Assembler::pt, L_success); | |
2374 __ DELAY_SLOT; | |
2375 | |
2376 #undef DELAY_SLOT | |
2377 | 2300 |
2378 // Fall through on failure! | 2301 // Fall through on failure! |
2379 __ BIND(L_miss); | 2302 __ BIND(L_miss); |
2380 } | 2303 } |
2381 | 2304 |
2409 address start = __ pc(); | 2332 address start = __ pc(); |
2410 | 2333 |
2411 gen_write_ref_array_pre_barrier(O1, O2); | 2334 gen_write_ref_array_pre_barrier(O1, O2); |
2412 | 2335 |
2413 #ifdef ASSERT | 2336 #ifdef ASSERT |
2414 // We sometimes save a frame (see partial_subtype_check below). | 2337 // We sometimes save a frame (see generate_type_check below). |
2415 // If this will cause trouble, let's fail now instead of later. | 2338 // If this will cause trouble, let's fail now instead of later. |
2416 __ save_frame(0); | 2339 __ save_frame(0); |
2417 __ restore(); | 2340 __ restore(); |
2418 #endif | 2341 #endif |
2419 | 2342 |
2453 // (O5 = 0; ; O5 += wordSize) --- offset from src, dest arrays | 2376 // (O5 = 0; ; O5 += wordSize) --- offset from src, dest arrays |
2454 // (O2 = len; O2 != 0; O2--) --- number of oops *remaining* | 2377 // (O2 = len; O2 != 0; O2--) --- number of oops *remaining* |
2455 // G3, G4, G5 --- current oop, oop.klass, oop.klass.super | 2378 // G3, G4, G5 --- current oop, oop.klass, oop.klass.super |
2456 __ align(16); | 2379 __ align(16); |
2457 | 2380 |
2458 __ bind(store_element); | 2381 __ BIND(store_element); |
2459 // deccc(G1_remain); // decrement the count (hoisted) | 2382 __ deccc(G1_remain); // decrement the count |
2460 __ store_heap_oop(G3_oop, O1_to, O5_offset); // store the oop | 2383 __ store_heap_oop(G3_oop, O1_to, O5_offset); // store the oop |
2461 __ inc(O5_offset, heapOopSize); // step to next offset | 2384 __ inc(O5_offset, heapOopSize); // step to next offset |
2462 __ brx(Assembler::zero, true, Assembler::pt, do_card_marks); | 2385 __ brx(Assembler::zero, true, Assembler::pt, do_card_marks); |
2463 __ delayed()->set(0, O0); // return -1 on success | 2386 __ delayed()->set(0, O0); // return -1 on success |
2464 | 2387 |
2465 // ======== loop entry is here ======== | 2388 // ======== loop entry is here ======== |
2466 __ bind(load_element); | 2389 __ BIND(load_element); |
2467 __ load_heap_oop(O0_from, O5_offset, G3_oop); // load the oop | 2390 __ load_heap_oop(O0_from, O5_offset, G3_oop); // load the oop |
2468 __ br_null(G3_oop, true, Assembler::pt, store_element); | 2391 __ br_null(G3_oop, true, Assembler::pt, store_element); |
2469 __ delayed()->deccc(G1_remain); // decrement the count | 2392 __ delayed()->nop(); |
2470 | 2393 |
2471 __ load_klass(G3_oop, G4_klass); // query the object klass | 2394 __ load_klass(G3_oop, G4_klass); // query the object klass |
2472 | 2395 |
2473 generate_type_check(G4_klass, O3_ckoff, O4_ckval, G5_super, | 2396 generate_type_check(G4_klass, O3_ckoff, O4_ckval, G5_super, |
2474 // branch to this on success: | 2397 // branch to this on success: |
2475 store_element, | 2398 store_element); |
2476 // decrement this on success: | |
2477 G1_remain); | |
2478 // ======== end loop ======== | 2399 // ======== end loop ======== |
2479 | 2400 |
2480 // It was a real error; we must depend on the caller to finish the job. | 2401 // It was a real error; we must depend on the caller to finish the job. |
2481 // Register G1 has number of *remaining* oops, O2 number of *total* oops. | 2402 // Register G1 has number of *remaining* oops, O2 number of *total* oops. |
2482 // Emit GC store barriers for the oops we have copied (O2 minus G1), | 2403 // Emit GC store barriers for the oops we have copied (O2 minus G1), |
2483 // and report their number to the caller. | 2404 // and report their number to the caller. |
2484 __ bind(fail); | 2405 __ BIND(fail); |
2485 __ subcc(O2_count, G1_remain, O2_count); | 2406 __ subcc(O2_count, G1_remain, O2_count); |
2486 __ brx(Assembler::zero, false, Assembler::pt, done); | 2407 __ brx(Assembler::zero, false, Assembler::pt, done); |
2487 __ delayed()->not1(O2_count, O0); // report (-1^K) to caller | 2408 __ delayed()->not1(O2_count, O0); // report (-1^K) to caller |
2488 | 2409 |
2489 __ bind(do_card_marks); | 2410 __ BIND(do_card_marks); |
2490 gen_write_ref_array_post_barrier(O1_to, O2_count, O3); // store check on O1[0..O2] | 2411 gen_write_ref_array_post_barrier(O1_to, O2_count, O3); // store check on O1[0..O2] |
2491 | 2412 |
2492 __ bind(done); | 2413 __ BIND(done); |
2493 inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr, O3, O4); | 2414 inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr, O3, O4); |
2494 __ retl(); | 2415 __ retl(); |
2495 __ delayed()->nop(); // return value in 00 | 2416 __ delayed()->nop(); // return value in 00 |
2496 | 2417 |
2497 return start; | 2418 return start; |