comparison src/cpu/sparc/vm/stubGenerator_sparc.cpp @ 113:ba764ed4b6f2

6420645: Create a vm that uses compressed oops for up to 32gb heapsizes Summary: Compressed oops in instances, arrays, and headers. Code contributors are coleenp, phh, never, swamyv Reviewed-by: jmasa, kamg, acorn, tbell, kvn, rasbold
author coleenp
date Sun, 13 Apr 2008 17:43:42 -0400
parents f8236e79048a
children d1605aabd0a1 37f87013dfd8
comparison
equal deleted inserted replaced
110:a49a647afe9a 113:ba764ed4b6f2
125 const Argument parameter_size = Argument(6, false); 125 const Argument parameter_size = Argument(6, false);
126 const Argument thread = Argument(7, false); 126 const Argument thread = Argument(7, false);
127 127
128 // setup thread register 128 // setup thread register
129 __ ld_ptr(thread.as_address(), G2_thread); 129 __ ld_ptr(thread.as_address(), G2_thread);
130 __ reinit_heapbase();
130 131
131 #ifdef ASSERT 132 #ifdef ASSERT
132 // make sure we have no pending exceptions 133 // make sure we have no pending exceptions
133 { const Register t = G3_scratch; 134 { const Register t = G3_scratch;
134 Label L; 135 Label L;
894 // icc/xcc: set as O0 (depending on wordSize) 895 // icc/xcc: set as O0 (depending on wordSize)
895 // sub : O1, argument, not changed 896 // sub : O1, argument, not changed
896 // super: O2, argument, not changed 897 // super: O2, argument, not changed
897 // raddr: O7, blown by call 898 // raddr: O7, blown by call
898 address generate_partial_subtype_check() { 899 address generate_partial_subtype_check() {
900 __ align(CodeEntryAlignment);
899 StubCodeMark mark(this, "StubRoutines", "partial_subtype_check"); 901 StubCodeMark mark(this, "StubRoutines", "partial_subtype_check");
900 address start = __ pc(); 902 address start = __ pc();
901 Label loop, miss; 903 Label loop, miss;
902 904
903 // Compare super with sub directly, since super is not in its own SSA. 905 // Compare super with sub directly, since super is not in its own SSA.
912 __ bind(L); 914 __ bind(L);
913 } 915 }
914 916
915 #if defined(COMPILER2) && !defined(_LP64) 917 #if defined(COMPILER2) && !defined(_LP64)
916 // Do not use a 'save' because it blows the 64-bit O registers. 918 // Do not use a 'save' because it blows the 64-bit O registers.
917 __ add(SP,-4*wordSize,SP); // Make space for 4 temps 919 __ add(SP,-4*wordSize,SP); // Make space for 4 temps (stack must be 2 words aligned)
918 __ st_ptr(L0,SP,(frame::register_save_words+0)*wordSize); 920 __ st_ptr(L0,SP,(frame::register_save_words+0)*wordSize);
919 __ st_ptr(L1,SP,(frame::register_save_words+1)*wordSize); 921 __ st_ptr(L1,SP,(frame::register_save_words+1)*wordSize);
920 __ st_ptr(L2,SP,(frame::register_save_words+2)*wordSize); 922 __ st_ptr(L2,SP,(frame::register_save_words+2)*wordSize);
921 __ st_ptr(L3,SP,(frame::register_save_words+3)*wordSize); 923 __ st_ptr(L3,SP,(frame::register_save_words+3)*wordSize);
922 Register Rret = O0; 924 Register Rret = O0;
932 Register L0_ary_len = L0; 934 Register L0_ary_len = L0;
933 Register L1_ary_ptr = L1; 935 Register L1_ary_ptr = L1;
934 Register L2_super = L2; 936 Register L2_super = L2;
935 Register L3_index = L3; 937 Register L3_index = L3;
936 938
939 #ifdef _LP64
940 Register L4_ooptmp = L4;
941
942 if (UseCompressedOops) {
943 // this must be under UseCompressedOops check, as we rely upon fact
944 // that L4 not clobbered in C2 on 32-bit platforms, where we do explicit save
945 // on stack, see several lines above
946 __ encode_heap_oop(Rsuper, L4_ooptmp);
947 }
948 #endif
949
937 inc_counter_np(SharedRuntime::_partial_subtype_ctr, L0, L1); 950 inc_counter_np(SharedRuntime::_partial_subtype_ctr, L0, L1);
938 951
939 __ ld_ptr( Rsub, sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes(), L3 ); 952 __ ld_ptr( Rsub, sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes(), L3 );
940 __ lduw(L3,arrayOopDesc::length_offset_in_bytes(),L0_ary_len); 953 __ lduw(L3,arrayOopDesc::length_offset_in_bytes(),L0_ary_len);
941 __ add(L3,arrayOopDesc::base_offset_in_bytes(T_OBJECT),L1_ary_ptr); 954 __ add(L3,arrayOopDesc::base_offset_in_bytes(T_OBJECT),L1_ary_ptr);
942 __ clr(L3_index); // zero index 955 __ clr(L3_index); // zero index
943 // Load a little early; will load 1 off the end of the array. 956 // Load a little early; will load 1 off the end of the array.
944 // Ok for now; revisit if we have other uses of this routine. 957 // Ok for now; revisit if we have other uses of this routine.
945 __ ld_ptr(L1_ary_ptr,0,L2_super);// Will load a little early 958 if (UseCompressedOops) {
946 __ align(CodeEntryAlignment); 959 __ ld(L1_ary_ptr,0,L2_super);// Will load a little early
947 960 } else {
961 __ ld_ptr(L1_ary_ptr,0,L2_super);// Will load a little early
962 }
963
964 assert(heapOopSize != 0, "heapOopSize should be initialized");
948 // The scan loop 965 // The scan loop
949 __ BIND(loop); 966 __ BIND(loop);
950 __ add(L1_ary_ptr,wordSize,L1_ary_ptr); // Bump by OOP size 967 __ add(L1_ary_ptr, heapOopSize, L1_ary_ptr); // Bump by OOP size
951 __ cmp(L3_index,L0_ary_len); 968 __ cmp(L3_index,L0_ary_len);
952 __ br(Assembler::equal,false,Assembler::pn,miss); 969 __ br(Assembler::equal,false,Assembler::pn,miss);
953 __ delayed()->inc(L3_index); // Bump index 970 __ delayed()->inc(L3_index); // Bump index
954 __ subcc(L2_super,Rsuper,Rret); // Check for match; zero in Rret for a hit 971
955 __ brx( Assembler::notEqual, false, Assembler::pt, loop ); 972 if (UseCompressedOops) {
956 __ delayed()->ld_ptr(L1_ary_ptr,0,L2_super); // Will load a little early 973 #ifdef _LP64
974 __ subcc(L2_super,L4_ooptmp,Rret); // Check for match; zero in Rret for a hit
975 __ br( Assembler::notEqual, false, Assembler::pt, loop );
976 __ delayed()->ld(L1_ary_ptr,0,L2_super);// Will load a little early
977 #else
978 ShouldNotReachHere();
979 #endif
980 } else {
981 __ subcc(L2_super,Rsuper,Rret); // Check for match; zero in Rret for a hit
982 __ brx( Assembler::notEqual, false, Assembler::pt, loop );
983 __ delayed()->ld_ptr(L1_ary_ptr,0,L2_super);// Will load a little early
984 }
957 985
958 // Got a hit; report success; set cache. Cache load doesn't 986 // Got a hit; report success; set cache. Cache load doesn't
959 // happen here; for speed it is directly emitted by the compiler. 987 // happen here; for speed it is directly emitted by the compiler.
960 __ st_ptr( Rsuper, Rsub, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() ); 988 __ st_ptr( Rsuper, Rsub, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() );
961 989
1105 __ mov(L1, count); 1133 __ mov(L1, count);
1106 __ restore(); 1134 __ restore();
1107 } 1135 }
1108 #endif // 0 1136 #endif // 0
1109 } 1137 }
1110
1111 // 1138 //
1112 // Generate post-write barrier for array. 1139 // Generate post-write barrier for array.
1113 // 1140 //
1114 // Input: 1141 // Input:
1115 // addr - register containing starting address 1142 // addr - register containing starting address
1146 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); 1173 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
1147 assert_different_registers(addr, count, tmp); 1174 assert_different_registers(addr, count, tmp);
1148 1175
1149 Label L_loop; 1176 Label L_loop;
1150 1177
1151 __ sll_ptr(count, LogBytesPerOop, count); 1178 __ sll_ptr(count, LogBytesPerHeapOop, count);
1152 __ sub(count, BytesPerOop, count); 1179 __ sub(count, BytesPerHeapOop, count);
1153 __ add(count, addr, count); 1180 __ add(count, addr, count);
1154 // Use two shifts to clear out those low order two bits! (Cannot opt. into 1.) 1181 // Use two shifts to clear out those low order two bits! (Cannot opt. into 1.)
1155 __ srl_ptr(addr, CardTableModRefBS::card_shift, addr); 1182 __ srl_ptr(addr, CardTableModRefBS::card_shift, addr);
1156 __ srl_ptr(count, CardTableModRefBS::card_shift, count); 1183 __ srl_ptr(count, CardTableModRefBS::card_shift, count);
1157 __ sub(count, addr, count); 1184 __ sub(count, addr, count);
1169 break; 1196 break;
1170 default : 1197 default :
1171 ShouldNotReachHere(); 1198 ShouldNotReachHere();
1172 1199
1173 } 1200 }
1174
1175 } 1201 }
1176 1202
1177 1203
1178 // Copy big chunks forward with shift 1204 // Copy big chunks forward with shift
1179 // 1205 //
2224 // save arguments for barrier generation 2250 // save arguments for barrier generation
2225 __ mov(to, G1); 2251 __ mov(to, G1);
2226 __ mov(count, G5); 2252 __ mov(count, G5);
2227 gen_write_ref_array_pre_barrier(G1, G5); 2253 gen_write_ref_array_pre_barrier(G1, G5);
2228 #ifdef _LP64 2254 #ifdef _LP64
2229 generate_disjoint_long_copy_core(aligned); 2255 assert_clean_int(count, O3); // Make sure 'count' is clean int.
2256 if (UseCompressedOops) {
2257 generate_disjoint_int_copy_core(aligned);
2258 } else {
2259 generate_disjoint_long_copy_core(aligned);
2260 }
2230 #else 2261 #else
2231 generate_disjoint_int_copy_core(aligned); 2262 generate_disjoint_int_copy_core(aligned);
2232 #endif 2263 #endif
2233 // O0 is used as temp register 2264 // O0 is used as temp register
2234 gen_write_ref_array_post_barrier(G1, G5, O0); 2265 gen_write_ref_array_post_barrier(G1, G5, O0);
2272 2303
2273 address nooverlap_target = aligned ? 2304 address nooverlap_target = aligned ?
2274 StubRoutines::arrayof_oop_disjoint_arraycopy() : 2305 StubRoutines::arrayof_oop_disjoint_arraycopy() :
2275 disjoint_oop_copy_entry; 2306 disjoint_oop_copy_entry;
2276 2307
2277 array_overlap_test(nooverlap_target, LogBytesPerWord); 2308 array_overlap_test(nooverlap_target, LogBytesPerHeapOop);
2278 2309
2279 #ifdef _LP64 2310 #ifdef _LP64
2280 generate_conjoint_long_copy_core(aligned); 2311 if (UseCompressedOops) {
2312 generate_conjoint_int_copy_core(aligned);
2313 } else {
2314 generate_conjoint_long_copy_core(aligned);
2315 }
2281 #else 2316 #else
2282 generate_conjoint_int_copy_core(aligned); 2317 generate_conjoint_int_copy_core(aligned);
2283 #endif 2318 #endif
2284 2319
2285 // O0 is used as temp register 2320 // O0 is used as temp register
2375 2410
2376 __ align(CodeEntryAlignment); 2411 __ align(CodeEntryAlignment);
2377 StubCodeMark mark(this, "StubRoutines", name); 2412 StubCodeMark mark(this, "StubRoutines", name);
2378 address start = __ pc(); 2413 address start = __ pc();
2379 2414
2380 int klass_off = oopDesc::klass_offset_in_bytes();
2381
2382 gen_write_ref_array_pre_barrier(G1, G5); 2415 gen_write_ref_array_pre_barrier(G1, G5);
2383 2416
2384 2417
2385 #ifdef ASSERT 2418 #ifdef ASSERT
2386 // We sometimes save a frame (see partial_subtype_check below). 2419 // We sometimes save a frame (see partial_subtype_check below).
2393 // caller guarantees that the arrays really are different 2426 // caller guarantees that the arrays really are different
2394 // otherwise, we would have to make conjoint checks 2427 // otherwise, we would have to make conjoint checks
2395 { Label L; 2428 { Label L;
2396 __ mov(O3, G1); // spill: overlap test smashes O3 2429 __ mov(O3, G1); // spill: overlap test smashes O3
2397 __ mov(O4, G4); // spill: overlap test smashes O4 2430 __ mov(O4, G4); // spill: overlap test smashes O4
2398 array_overlap_test(L, LogBytesPerWord); 2431 array_overlap_test(L, LogBytesPerHeapOop);
2399 __ stop("checkcast_copy within a single array"); 2432 __ stop("checkcast_copy within a single array");
2400 __ bind(L); 2433 __ bind(L);
2401 __ mov(G1, O3); 2434 __ mov(G1, O3);
2402 __ mov(G4, O4); 2435 __ mov(G4, O4);
2403 } 2436 }
2427 // G3, G4, G5 --- current oop, oop.klass, oop.klass.super 2460 // G3, G4, G5 --- current oop, oop.klass, oop.klass.super
2428 __ align(16); 2461 __ align(16);
2429 2462
2430 __ bind(store_element); 2463 __ bind(store_element);
2431 // deccc(G1_remain); // decrement the count (hoisted) 2464 // deccc(G1_remain); // decrement the count (hoisted)
2432 __ st_ptr(G3_oop, O1_to, O5_offset); // store the oop 2465 __ store_heap_oop(G3_oop, O1_to, O5_offset); // store the oop
2433 __ inc(O5_offset, wordSize); // step to next offset 2466 __ inc(O5_offset, heapOopSize); // step to next offset
2434 __ brx(Assembler::zero, true, Assembler::pt, do_card_marks); 2467 __ brx(Assembler::zero, true, Assembler::pt, do_card_marks);
2435 __ delayed()->set(0, O0); // return -1 on success 2468 __ delayed()->set(0, O0); // return -1 on success
2436 2469
2437 // ======== loop entry is here ======== 2470 // ======== loop entry is here ========
2438 __ bind(load_element); 2471 __ bind(load_element);
2439 __ ld_ptr(O0_from, O5_offset, G3_oop); // load the oop 2472 __ load_heap_oop(O0_from, O5_offset, G3_oop); // load the oop
2440 __ br_null(G3_oop, true, Assembler::pt, store_element); 2473 __ br_null(G3_oop, true, Assembler::pt, store_element);
2441 __ delayed()->deccc(G1_remain); // decrement the count 2474 __ delayed()->deccc(G1_remain); // decrement the count
2442 2475
2443 __ ld_ptr(G3_oop, klass_off, G4_klass); // query the object klass 2476 __ load_klass(G3_oop, G4_klass); // query the object klass
2444 2477
2445 generate_type_check(G4_klass, O3_ckoff, O4_ckval, G5_super, 2478 generate_type_check(G4_klass, O3_ckoff, O4_ckval, G5_super,
2446 // branch to this on success: 2479 // branch to this on success:
2447 store_element, 2480 store_element,
2448 // decrement this on success: 2481 // decrement this on success:
2640 __ delayed()->tst(length); 2673 __ delayed()->tst(length);
2641 __ br(Assembler::negative, false, Assembler::pn, L_failed); 2674 __ br(Assembler::negative, false, Assembler::pn, L_failed);
2642 2675
2643 BLOCK_COMMENT("arraycopy argument klass checks"); 2676 BLOCK_COMMENT("arraycopy argument klass checks");
2644 // get src->klass() 2677 // get src->klass()
2645 __ delayed()->ld_ptr(src, oopDesc::klass_offset_in_bytes(), G3_src_klass); 2678 if (UseCompressedOops) {
2679 __ delayed()->nop(); // ??? not good
2680 __ load_klass(src, G3_src_klass);
2681 } else {
2682 __ delayed()->ld_ptr(src, oopDesc::klass_offset_in_bytes(), G3_src_klass);
2683 }
2646 2684
2647 #ifdef ASSERT 2685 #ifdef ASSERT
2648 // assert(src->klass() != NULL); 2686 // assert(src->klass() != NULL);
2649 BLOCK_COMMENT("assert klasses not null"); 2687 BLOCK_COMMENT("assert klasses not null");
2650 { Label L_a, L_b; 2688 { Label L_a, L_b;
2651 __ br_notnull(G3_src_klass, false, Assembler::pt, L_b); // it is broken if klass is NULL 2689 __ br_notnull(G3_src_klass, false, Assembler::pt, L_b); // it is broken if klass is NULL
2652 __ delayed()->ld_ptr(dst, oopDesc::klass_offset_in_bytes(), G4_dst_klass); 2690 __ delayed()->nop();
2653 __ bind(L_a); 2691 __ bind(L_a);
2654 __ stop("broken null klass"); 2692 __ stop("broken null klass");
2655 __ bind(L_b); 2693 __ bind(L_b);
2694 __ load_klass(dst, G4_dst_klass);
2656 __ br_null(G4_dst_klass, false, Assembler::pn, L_a); // this would be broken also 2695 __ br_null(G4_dst_klass, false, Assembler::pn, L_a); // this would be broken also
2657 __ delayed()->mov(G0, G4_dst_klass); // scribble the temp 2696 __ delayed()->mov(G0, G4_dst_klass); // scribble the temp
2658 BLOCK_COMMENT("assert done"); 2697 BLOCK_COMMENT("assert done");
2659 } 2698 }
2660 #endif 2699 #endif
2671 Klass::layout_helper_offset_in_bytes(); 2710 Klass::layout_helper_offset_in_bytes();
2672 2711
2673 // Load 32-bits signed value. Use br() instruction with it to check icc. 2712 // Load 32-bits signed value. Use br() instruction with it to check icc.
2674 __ lduw(G3_src_klass, lh_offset, G5_lh); 2713 __ lduw(G3_src_klass, lh_offset, G5_lh);
2675 2714
2715 if (UseCompressedOops) {
2716 __ load_klass(dst, G4_dst_klass);
2717 }
2676 // Handle objArrays completely differently... 2718 // Handle objArrays completely differently...
2677 juint objArray_lh = Klass::array_layout_helper(T_OBJECT); 2719 juint objArray_lh = Klass::array_layout_helper(T_OBJECT);
2678 __ set(objArray_lh, O5_temp); 2720 __ set(objArray_lh, O5_temp);
2679 __ cmp(G5_lh, O5_temp); 2721 __ cmp(G5_lh, O5_temp);
2680 __ br(Assembler::equal, false, Assembler::pt, L_objArray); 2722 __ br(Assembler::equal, false, Assembler::pt, L_objArray);
2681 __ delayed()->ld_ptr(dst, oopDesc::klass_offset_in_bytes(), G4_dst_klass); 2723 if (UseCompressedOops) {
2724 __ delayed()->nop();
2725 } else {
2726 __ delayed()->ld_ptr(dst, oopDesc::klass_offset_in_bytes(), G4_dst_klass);
2727 }
2682 2728
2683 // if (src->klass() != dst->klass()) return -1; 2729 // if (src->klass() != dst->klass()) return -1;
2684 __ cmp(G3_src_klass, G4_dst_klass); 2730 __ cmp(G3_src_klass, G4_dst_klass);
2685 __ brx(Assembler::notEqual, false, Assembler::pn, L_failed); 2731 __ brx(Assembler::notEqual, false, Assembler::pn, L_failed);
2686 __ delayed()->nop(); 2732 __ delayed()->nop();
2775 arraycopy_range_checks(src, src_pos, dst, dst_pos, length, 2821 arraycopy_range_checks(src, src_pos, dst, dst_pos, length,
2776 O5_temp, G5_lh, L_failed); 2822 O5_temp, G5_lh, L_failed);
2777 2823
2778 __ add(src, arrayOopDesc::base_offset_in_bytes(T_OBJECT), src); //src offset 2824 __ add(src, arrayOopDesc::base_offset_in_bytes(T_OBJECT), src); //src offset
2779 __ add(dst, arrayOopDesc::base_offset_in_bytes(T_OBJECT), dst); //dst offset 2825 __ add(dst, arrayOopDesc::base_offset_in_bytes(T_OBJECT), dst); //dst offset
2780 __ sll_ptr(src_pos, LogBytesPerOop, src_pos); 2826 __ sll_ptr(src_pos, LogBytesPerHeapOop, src_pos);
2781 __ sll_ptr(dst_pos, LogBytesPerOop, dst_pos); 2827 __ sll_ptr(dst_pos, LogBytesPerHeapOop, dst_pos);
2782 __ add(src, src_pos, from); // src_addr 2828 __ add(src, src_pos, from); // src_addr
2783 __ add(dst, dst_pos, to); // dst_addr 2829 __ add(dst, dst_pos, to); // dst_addr
2784 __ BIND(L_plain_copy); 2830 __ BIND(L_plain_copy);
2785 __ br(Assembler::always, false, Assembler::pt,StubRoutines::_oop_arraycopy); 2831 __ br(Assembler::always, false, Assembler::pt,StubRoutines::_oop_arraycopy);
2786 __ delayed()->signx(length, count); // length 2832 __ delayed()->signx(length, count); // length
2799 O5_temp, G5_lh, L_failed); 2845 O5_temp, G5_lh, L_failed);
2800 2846
2801 // Marshal the base address arguments now, freeing registers. 2847 // Marshal the base address arguments now, freeing registers.
2802 __ add(src, arrayOopDesc::base_offset_in_bytes(T_OBJECT), src); //src offset 2848 __ add(src, arrayOopDesc::base_offset_in_bytes(T_OBJECT), src); //src offset
2803 __ add(dst, arrayOopDesc::base_offset_in_bytes(T_OBJECT), dst); //dst offset 2849 __ add(dst, arrayOopDesc::base_offset_in_bytes(T_OBJECT), dst); //dst offset
2804 __ sll_ptr(src_pos, LogBytesPerOop, src_pos); 2850 __ sll_ptr(src_pos, LogBytesPerHeapOop, src_pos);
2805 __ sll_ptr(dst_pos, LogBytesPerOop, dst_pos); 2851 __ sll_ptr(dst_pos, LogBytesPerHeapOop, dst_pos);
2806 __ add(src, src_pos, from); // src_addr 2852 __ add(src, src_pos, from); // src_addr
2807 __ add(dst, dst_pos, to); // dst_addr 2853 __ add(dst, dst_pos, to); // dst_addr
2808 __ signx(length, count); // length (reloaded) 2854 __ signx(length, count); // length (reloaded)
2809 2855
2810 Register sco_temp = O3; // this register is free now 2856 Register sco_temp = O3; // this register is free now