comparison src/cpu/sparc/vm/stubGenerator_sparc.cpp @ 644:c517646eef23

6813212: factor duplicated assembly code for general subclass check (for 6655638) Summary: Code in interp_masm, stubGenerator, c1_LIRAssembler, and AD files moved into MacroAssembler. Reviewed-by: kvn
author jrose
date Fri, 13 Mar 2009 18:39:22 -0700
parents 660978a2a31a
children d0994e5bebce
comparison
equal deleted inserted replaced
643:c771b7f43bbf 644:c517646eef23
898 // raddr: O7, blown by call 898 // raddr: O7, blown by call
899 address generate_partial_subtype_check() { 899 address generate_partial_subtype_check() {
900 __ align(CodeEntryAlignment); 900 __ align(CodeEntryAlignment);
901 StubCodeMark mark(this, "StubRoutines", "partial_subtype_check"); 901 StubCodeMark mark(this, "StubRoutines", "partial_subtype_check");
902 address start = __ pc(); 902 address start = __ pc();
903 Label loop, miss; 903 Label miss;
904
905 // Compare super with sub directly, since super is not in its own SSA.
906 // The compiler used to emit this test, but we fold it in here,
907 // to increase overall code density, with no real loss of speed.
908 { Label L;
909 __ cmp(O1, O2);
910 __ brx(Assembler::notEqual, false, Assembler::pt, L);
911 __ delayed()->nop();
912 __ retl();
913 __ delayed()->addcc(G0,0,O0); // set Z flags, zero result
914 __ bind(L);
915 }
916 904
917 #if defined(COMPILER2) && !defined(_LP64) 905 #if defined(COMPILER2) && !defined(_LP64)
918 // Do not use a 'save' because it blows the 64-bit O registers. 906 // Do not use a 'save' because it blows the 64-bit O registers.
919 __ add(SP,-4*wordSize,SP); // Make space for 4 temps (stack must be 2 words aligned) 907 __ add(SP,-4*wordSize,SP); // Make space for 4 temps (stack must be 2 words aligned)
920 __ st_ptr(L0,SP,(frame::register_save_words+0)*wordSize); 908 __ st_ptr(L0,SP,(frame::register_save_words+0)*wordSize);
934 Register L0_ary_len = L0; 922 Register L0_ary_len = L0;
935 Register L1_ary_ptr = L1; 923 Register L1_ary_ptr = L1;
936 Register L2_super = L2; 924 Register L2_super = L2;
937 Register L3_index = L3; 925 Register L3_index = L3;
938 926
939 #ifdef _LP64 927 __ check_klass_subtype_slow_path(Rsub, Rsuper,
940 Register L4_ooptmp = L4; 928 L0, L1, L2, L3,
941 929 NULL, &miss);
942 if (UseCompressedOops) { 930
943 // this must be under UseCompressedOops check, as we rely upon fact 931 // Match falls through here.
944 // that L4 not clobbered in C2 on 32-bit platforms, where we do explicit save 932 __ addcc(G0,0,Rret); // set Z flags, Z result
945 // on stack, see several lines above
946 __ encode_heap_oop(Rsuper, L4_ooptmp);
947 }
948 #endif
949
950 inc_counter_np(SharedRuntime::_partial_subtype_ctr, L0, L1);
951
952 __ ld_ptr( Rsub, sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes(), L3 );
953 __ lduw(L3,arrayOopDesc::length_offset_in_bytes(),L0_ary_len);
954 __ add(L3,arrayOopDesc::base_offset_in_bytes(T_OBJECT),L1_ary_ptr);
955 __ clr(L3_index); // zero index
956 // Load a little early; will load 1 off the end of the array.
957 // Ok for now; revisit if we have other uses of this routine.
958 if (UseCompressedOops) {
959 __ lduw(L1_ary_ptr,0,L2_super);// Will load a little early
960 } else {
961 __ ld_ptr(L1_ary_ptr,0,L2_super);// Will load a little early
962 }
963
964 assert(heapOopSize != 0, "heapOopSize should be initialized");
965 // The scan loop
966 __ BIND(loop);
967 __ add(L1_ary_ptr, heapOopSize, L1_ary_ptr); // Bump by OOP size
968 __ cmp(L3_index,L0_ary_len);
969 __ br(Assembler::equal,false,Assembler::pn,miss);
970 __ delayed()->inc(L3_index); // Bump index
971
972 if (UseCompressedOops) {
973 #ifdef _LP64
974 __ subcc(L2_super,L4_ooptmp,Rret); // Check for match; zero in Rret for a hit
975 __ br( Assembler::notEqual, false, Assembler::pt, loop );
976 __ delayed()->lduw(L1_ary_ptr,0,L2_super);// Will load a little early
977 #else
978 ShouldNotReachHere();
979 #endif
980 } else {
981 __ subcc(L2_super,Rsuper,Rret); // Check for match; zero in Rret for a hit
982 __ brx( Assembler::notEqual, false, Assembler::pt, loop );
983 __ delayed()->ld_ptr(L1_ary_ptr,0,L2_super);// Will load a little early
984 }
985
986 // Got a hit; report success; set cache. Cache load doesn't
987 // happen here; for speed it is directly emitted by the compiler.
988 __ st_ptr( Rsuper, Rsub, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() );
989 933
990 #if defined(COMPILER2) && !defined(_LP64) 934 #if defined(COMPILER2) && !defined(_LP64)
991 __ ld_ptr(SP,(frame::register_save_words+0)*wordSize,L0); 935 __ ld_ptr(SP,(frame::register_save_words+0)*wordSize,L0);
992 __ ld_ptr(SP,(frame::register_save_words+1)*wordSize,L1); 936 __ ld_ptr(SP,(frame::register_save_words+1)*wordSize,L1);
993 __ ld_ptr(SP,(frame::register_save_words+2)*wordSize,L2); 937 __ ld_ptr(SP,(frame::register_save_words+2)*wordSize,L2);
997 #else 941 #else
998 __ ret(); // Result in Rret is zero; flags set to Z 942 __ ret(); // Result in Rret is zero; flags set to Z
999 __ delayed()->restore(); 943 __ delayed()->restore();
1000 #endif 944 #endif
1001 945
1002 // Hit or miss falls through here
1003 __ BIND(miss); 946 __ BIND(miss);
1004 __ addcc(G0,1,Rret); // set NZ flags, NZ result 947 __ addcc(G0,1,Rret); // set NZ flags, NZ result
1005 948
1006 #if defined(COMPILER2) && !defined(_LP64) 949 #if defined(COMPILER2) && !defined(_LP64)
1007 __ ld_ptr(SP,(frame::register_save_words+0)*wordSize,L0); 950 __ ld_ptr(SP,(frame::register_save_words+0)*wordSize,L0);
2328 // Smashes only the given temp registers. 2271 // Smashes only the given temp registers.
2329 void generate_type_check(Register sub_klass, 2272 void generate_type_check(Register sub_klass,
2330 Register super_check_offset, 2273 Register super_check_offset,
2331 Register super_klass, 2274 Register super_klass,
2332 Register temp, 2275 Register temp,
2333 Label& L_success, 2276 Label& L_success) {
2334 Register deccc_hack = noreg) {
2335 assert_different_registers(sub_klass, super_check_offset, super_klass, temp); 2277 assert_different_registers(sub_klass, super_check_offset, super_klass, temp);
2336 2278
2337 BLOCK_COMMENT("type_check:"); 2279 BLOCK_COMMENT("type_check:");
2338 2280
2339 Label L_miss; 2281 Label L_miss, L_pop_to_miss;
2340 2282
2341 assert_clean_int(super_check_offset, temp); 2283 assert_clean_int(super_check_offset, temp);
2342 2284
2343 // maybe decrement caller's trip count: 2285 __ check_klass_subtype_fast_path(sub_klass, super_klass, temp, noreg,
2344 #define DELAY_SLOT delayed(); \ 2286 &L_success, &L_miss, NULL,
2345 { if (deccc_hack == noreg) __ nop(); else __ deccc(deccc_hack); } 2287 super_check_offset);
2346 2288
2347 // if the pointers are equal, we are done (e.g., String[] elements) 2289 BLOCK_COMMENT("type_check_slow_path:");
2348 __ cmp(sub_klass, super_klass);
2349 __ brx(Assembler::equal, true, Assembler::pt, L_success);
2350 __ DELAY_SLOT;
2351
2352 // check the supertype display:
2353 __ ld_ptr(sub_klass, super_check_offset, temp); // query the super type
2354 __ cmp(super_klass, temp); // test the super type
2355 __ brx(Assembler::equal, true, Assembler::pt, L_success);
2356 __ DELAY_SLOT;
2357
2358 int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
2359 Klass::secondary_super_cache_offset_in_bytes());
2360 __ cmp(super_klass, sc_offset);
2361 __ brx(Assembler::notEqual, true, Assembler::pt, L_miss);
2362 __ delayed()->nop();
2363
2364 __ save_frame(0); 2290 __ save_frame(0);
2365 __ mov(sub_klass->after_save(), O1); 2291 __ check_klass_subtype_slow_path(sub_klass->after_save(),
2366 // mov(super_klass->after_save(), O2); //fill delay slot 2292 super_klass->after_save(),
2367 assert(StubRoutines::Sparc::_partial_subtype_check != NULL, "order of generation"); 2293 L0, L1, L2, L4,
2368 __ call(StubRoutines::Sparc::_partial_subtype_check); 2294 NULL, &L_pop_to_miss);
2369 __ delayed()->mov(super_klass->after_save(), O2); 2295 __ ba(false, L_success);
2296 __ delayed()->restore();
2297
2298 __ bind(L_pop_to_miss);
2370 __ restore(); 2299 __ restore();
2371
2372 // Upon return, the condition codes are already set.
2373 __ brx(Assembler::equal, true, Assembler::pt, L_success);
2374 __ DELAY_SLOT;
2375
2376 #undef DELAY_SLOT
2377 2300
2378 // Fall through on failure! 2301 // Fall through on failure!
2379 __ BIND(L_miss); 2302 __ BIND(L_miss);
2380 } 2303 }
2381 2304
2409 address start = __ pc(); 2332 address start = __ pc();
2410 2333
2411 gen_write_ref_array_pre_barrier(O1, O2); 2334 gen_write_ref_array_pre_barrier(O1, O2);
2412 2335
2413 #ifdef ASSERT 2336 #ifdef ASSERT
2414 // We sometimes save a frame (see partial_subtype_check below). 2337 // We sometimes save a frame (see generate_type_check below).
2415 // If this will cause trouble, let's fail now instead of later. 2338 // If this will cause trouble, let's fail now instead of later.
2416 __ save_frame(0); 2339 __ save_frame(0);
2417 __ restore(); 2340 __ restore();
2418 #endif 2341 #endif
2419 2342
2453 // (O5 = 0; ; O5 += wordSize) --- offset from src, dest arrays 2376 // (O5 = 0; ; O5 += wordSize) --- offset from src, dest arrays
2454 // (O2 = len; O2 != 0; O2--) --- number of oops *remaining* 2377 // (O2 = len; O2 != 0; O2--) --- number of oops *remaining*
2455 // G3, G4, G5 --- current oop, oop.klass, oop.klass.super 2378 // G3, G4, G5 --- current oop, oop.klass, oop.klass.super
2456 __ align(16); 2379 __ align(16);
2457 2380
2458 __ bind(store_element); 2381 __ BIND(store_element);
2459 // deccc(G1_remain); // decrement the count (hoisted) 2382 __ deccc(G1_remain); // decrement the count
2460 __ store_heap_oop(G3_oop, O1_to, O5_offset); // store the oop 2383 __ store_heap_oop(G3_oop, O1_to, O5_offset); // store the oop
2461 __ inc(O5_offset, heapOopSize); // step to next offset 2384 __ inc(O5_offset, heapOopSize); // step to next offset
2462 __ brx(Assembler::zero, true, Assembler::pt, do_card_marks); 2385 __ brx(Assembler::zero, true, Assembler::pt, do_card_marks);
2463 __ delayed()->set(0, O0); // return -1 on success 2386 __ delayed()->set(0, O0); // return -1 on success
2464 2387
2465 // ======== loop entry is here ======== 2388 // ======== loop entry is here ========
2466 __ bind(load_element); 2389 __ BIND(load_element);
2467 __ load_heap_oop(O0_from, O5_offset, G3_oop); // load the oop 2390 __ load_heap_oop(O0_from, O5_offset, G3_oop); // load the oop
2468 __ br_null(G3_oop, true, Assembler::pt, store_element); 2391 __ br_null(G3_oop, true, Assembler::pt, store_element);
2469 __ delayed()->deccc(G1_remain); // decrement the count 2392 __ delayed()->nop();
2470 2393
2471 __ load_klass(G3_oop, G4_klass); // query the object klass 2394 __ load_klass(G3_oop, G4_klass); // query the object klass
2472 2395
2473 generate_type_check(G4_klass, O3_ckoff, O4_ckval, G5_super, 2396 generate_type_check(G4_klass, O3_ckoff, O4_ckval, G5_super,
2474 // branch to this on success: 2397 // branch to this on success:
2475 store_element, 2398 store_element);
2476 // decrement this on success:
2477 G1_remain);
2478 // ======== end loop ======== 2399 // ======== end loop ========
2479 2400
2480 // It was a real error; we must depend on the caller to finish the job. 2401 // It was a real error; we must depend on the caller to finish the job.
2481 // Register G1 has number of *remaining* oops, O2 number of *total* oops. 2402 // Register G1 has number of *remaining* oops, O2 number of *total* oops.
2482 // Emit GC store barriers for the oops we have copied (O2 minus G1), 2403 // Emit GC store barriers for the oops we have copied (O2 minus G1),
2483 // and report their number to the caller. 2404 // and report their number to the caller.
2484 __ bind(fail); 2405 __ BIND(fail);
2485 __ subcc(O2_count, G1_remain, O2_count); 2406 __ subcc(O2_count, G1_remain, O2_count);
2486 __ brx(Assembler::zero, false, Assembler::pt, done); 2407 __ brx(Assembler::zero, false, Assembler::pt, done);
2487 __ delayed()->not1(O2_count, O0); // report (-1^K) to caller 2408 __ delayed()->not1(O2_count, O0); // report (-1^K) to caller
2488 2409
2489 __ bind(do_card_marks); 2410 __ BIND(do_card_marks);
2490 gen_write_ref_array_post_barrier(O1_to, O2_count, O3); // store check on O1[0..O2] 2411 gen_write_ref_array_post_barrier(O1_to, O2_count, O3); // store check on O1[0..O2]
2491 2412
2492 __ bind(done); 2413 __ BIND(done);
2493 inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr, O3, O4); 2414 inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr, O3, O4);
2494 __ retl(); 2415 __ retl();
2495 __ delayed()->nop(); // return value in 00 2416 __ delayed()->nop(); // return value in 00
2496 2417
2497 return start; 2418 return start;