comparison src/cpu/sparc/vm/stubGenerator_sparc.cpp @ 2324:0ac769a57c64

6627983: G1: Bad oop deference during marking Summary: Bulk zeroing reduction didn't work with G1, because arraycopy would call pre-barriers on uninitialized oops. The solution is to have version of arraycopy stubs that don't have pre-barriers. Also refactored arraycopy stubs generation on SPARC to be more readable and reduced the number of stubs necessary in some cases. Reviewed-by: jrose, kvn, never
author iveresov
date Tue, 01 Mar 2011 14:56:48 -0800
parents d89a22843c62
children 38fa55e5e792
comparison
equal deleted inserted replaced
2323:bc6b27fb3568 2324:0ac769a57c64
1031 // count - register containing element count 1031 // count - register containing element count
1032 // tmp - scratch register 1032 // tmp - scratch register
1033 // 1033 //
1034 // The input registers are overwritten. 1034 // The input registers are overwritten.
1035 // 1035 //
1036 void gen_write_ref_array_pre_barrier(Register addr, Register count) { 1036 void gen_write_ref_array_pre_barrier(Register addr, Register count, bool dest_uninitialized) {
1037 BarrierSet* bs = Universe::heap()->barrier_set(); 1037 BarrierSet* bs = Universe::heap()->barrier_set();
1038 if (bs->has_write_ref_pre_barrier()) { 1038 switch (bs->kind()) {
1039 assert(bs->has_write_ref_array_pre_opt(), 1039 case BarrierSet::G1SATBCT:
1040 "Else unsupported barrier set."); 1040 case BarrierSet::G1SATBCTLogging:
1041 1041 // With G1, don't generate the call if we statically know that the target in uninitialized
1042 __ save_frame(0); 1042 if (!dest_uninitialized) {
1043 // Save the necessary global regs... will be used after. 1043 __ save_frame(0);
1044 if (addr->is_global()) { 1044 // Save the necessary global regs... will be used after.
1045 __ mov(addr, L0); 1045 if (addr->is_global()) {
1046 } 1046 __ mov(addr, L0);
1047 if (count->is_global()) { 1047 }
1048 __ mov(count, L1); 1048 if (count->is_global()) {
1049 } 1049 __ mov(count, L1);
1050 __ mov(addr->after_save(), O0); 1050 }
1051 // Get the count into O1 1051 __ mov(addr->after_save(), O0);
1052 __ call(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre)); 1052 // Get the count into O1
1053 __ delayed()->mov(count->after_save(), O1); 1053 __ call(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre));
1054 if (addr->is_global()) { 1054 __ delayed()->mov(count->after_save(), O1);
1055 __ mov(L0, addr); 1055 if (addr->is_global()) {
1056 } 1056 __ mov(L0, addr);
1057 if (count->is_global()) { 1057 }
1058 __ mov(L1, count); 1058 if (count->is_global()) {
1059 } 1059 __ mov(L1, count);
1060 __ restore(); 1060 }
1061 __ restore();
1062 }
1063 break;
1064 case BarrierSet::CardTableModRef:
1065 case BarrierSet::CardTableExtension:
1066 case BarrierSet::ModRef:
1067 break;
1068 default:
1069 ShouldNotReachHere();
1061 } 1070 }
1062 } 1071 }
1063 // 1072 //
1064 // Generate post-write barrier for array. 1073 // Generate post-write barrier for array.
1065 // 1074 //
1069 // tmp - scratch register 1078 // tmp - scratch register
1070 // 1079 //
1071 // The input registers are overwritten. 1080 // The input registers are overwritten.
1072 // 1081 //
1073 void gen_write_ref_array_post_barrier(Register addr, Register count, 1082 void gen_write_ref_array_post_barrier(Register addr, Register count,
1074 Register tmp) { 1083 Register tmp) {
1075 BarrierSet* bs = Universe::heap()->barrier_set(); 1084 BarrierSet* bs = Universe::heap()->barrier_set();
1076 1085
1077 switch (bs->kind()) { 1086 switch (bs->kind()) {
1078 case BarrierSet::G1SATBCT: 1087 case BarrierSet::G1SATBCT:
1079 case BarrierSet::G1SATBCTLogging: 1088 case BarrierSet::G1SATBCTLogging:
2404 address *entry, const char *name) { 2413 address *entry, const char *name) {
2405 __ align(CodeEntryAlignment); 2414 __ align(CodeEntryAlignment);
2406 StubCodeMark mark(this, "StubRoutines", name); 2415 StubCodeMark mark(this, "StubRoutines", name);
2407 address start = __ pc(); 2416 address start = __ pc();
2408 2417
2409 assert(!aligned, "usage"); 2418 assert(aligned, "Should always be aligned");
2410 2419
2411 assert_clean_int(O2, O3); // Make sure 'count' is clean int. 2420 assert_clean_int(O2, O3); // Make sure 'count' is clean int.
2412 2421
2413 if (entry != NULL) { 2422 if (entry != NULL) {
2414 *entry = __ pc(); 2423 *entry = __ pc();
2433 // Arguments for generated stub: 2442 // Arguments for generated stub:
2434 // from: O0 2443 // from: O0
2435 // to: O1 2444 // to: O1
2436 // count: O2 treated as signed 2445 // count: O2 treated as signed
2437 // 2446 //
2438 address generate_disjoint_oop_copy(bool aligned, address *entry, const char *name) { 2447 address generate_disjoint_oop_copy(bool aligned, address *entry, const char *name,
2448 bool dest_uninitialized = false) {
2439 2449
2440 const Register from = O0; // source array address 2450 const Register from = O0; // source array address
2441 const Register to = O1; // destination array address 2451 const Register to = O1; // destination array address
2442 const Register count = O2; // elements count 2452 const Register count = O2; // elements count
2443 2453
2454 } 2464 }
2455 2465
2456 // save arguments for barrier generation 2466 // save arguments for barrier generation
2457 __ mov(to, G1); 2467 __ mov(to, G1);
2458 __ mov(count, G5); 2468 __ mov(count, G5);
2459 gen_write_ref_array_pre_barrier(G1, G5); 2469 gen_write_ref_array_pre_barrier(G1, G5, dest_uninitialized);
2460 #ifdef _LP64 2470 #ifdef _LP64
2461 assert_clean_int(count, O3); // Make sure 'count' is clean int. 2471 assert_clean_int(count, O3); // Make sure 'count' is clean int.
2462 if (UseCompressedOops) { 2472 if (UseCompressedOops) {
2463 generate_disjoint_int_copy_core(aligned); 2473 generate_disjoint_int_copy_core(aligned);
2464 } else { 2474 } else {
2484 // from: O0 2494 // from: O0
2485 // to: O1 2495 // to: O1
2486 // count: O2 treated as signed 2496 // count: O2 treated as signed
2487 // 2497 //
2488 address generate_conjoint_oop_copy(bool aligned, address nooverlap_target, 2498 address generate_conjoint_oop_copy(bool aligned, address nooverlap_target,
2489 address *entry, const char *name) { 2499 address *entry, const char *name,
2500 bool dest_uninitialized = false) {
2490 2501
2491 const Register from = O0; // source array address 2502 const Register from = O0; // source array address
2492 const Register to = O1; // destination array address 2503 const Register to = O1; // destination array address
2493 const Register count = O2; // elements count 2504 const Register count = O2; // elements count
2494 2505
2507 array_overlap_test(nooverlap_target, LogBytesPerHeapOop); 2518 array_overlap_test(nooverlap_target, LogBytesPerHeapOop);
2508 2519
2509 // save arguments for barrier generation 2520 // save arguments for barrier generation
2510 __ mov(to, G1); 2521 __ mov(to, G1);
2511 __ mov(count, G5); 2522 __ mov(count, G5);
2512 gen_write_ref_array_pre_barrier(G1, G5); 2523 gen_write_ref_array_pre_barrier(G1, G5, dest_uninitialized);
2513 2524
2514 #ifdef _LP64 2525 #ifdef _LP64
2515 if (UseCompressedOops) { 2526 if (UseCompressedOops) {
2516 generate_conjoint_int_copy_core(aligned); 2527 generate_conjoint_int_copy_core(aligned);
2517 } else { 2528 } else {
2576 // count: O2 treated as signed 2587 // count: O2 treated as signed
2577 // ckoff: O3 (super_check_offset) 2588 // ckoff: O3 (super_check_offset)
2578 // ckval: O4 (super_klass) 2589 // ckval: O4 (super_klass)
2579 // ret: O0 zero for success; (-1^K) where K is partial transfer count 2590 // ret: O0 zero for success; (-1^K) where K is partial transfer count
2580 // 2591 //
2581 address generate_checkcast_copy(const char *name, address *entry) { 2592 address generate_checkcast_copy(const char *name, address *entry, bool dest_uninitialized = false) {
2582 2593
2583 const Register O0_from = O0; // source array address 2594 const Register O0_from = O0; // source array address
2584 const Register O1_to = O1; // destination array address 2595 const Register O1_to = O1; // destination array address
2585 const Register O2_count = O2; // elements count 2596 const Register O2_count = O2; // elements count
2586 const Register O3_ckoff = O3; // super_check_offset 2597 const Register O3_ckoff = O3; // super_check_offset
2622 if (entry != NULL) { 2633 if (entry != NULL) {
2623 *entry = __ pc(); 2634 *entry = __ pc();
2624 // caller can pass a 64-bit byte count here (from generic stub) 2635 // caller can pass a 64-bit byte count here (from generic stub)
2625 BLOCK_COMMENT("Entry:"); 2636 BLOCK_COMMENT("Entry:");
2626 } 2637 }
2627 2638 gen_write_ref_array_pre_barrier(O1_to, O2_count, dest_uninitialized);
2628 gen_write_ref_array_pre_barrier(O1_to, O2_count);
2629 2639
2630 Label load_element, store_element, do_card_marks, fail, done; 2640 Label load_element, store_element, do_card_marks, fail, done;
2631 __ addcc(O2_count, 0, G1_remain); // initialize loop index, and test it 2641 __ addcc(O2_count, 0, G1_remain); // initialize loop index, and test it
2632 __ brx(Assembler::notZero, false, Assembler::pt, load_element); 2642 __ brx(Assembler::notZero, false, Assembler::pt, load_element);
2633 __ delayed()->mov(G0, O5_offset); // offset from start of arrays 2643 __ delayed()->mov(G0, O5_offset); // offset from start of arrays
3081 address entry_jint_arraycopy; 3091 address entry_jint_arraycopy;
3082 address entry_oop_arraycopy; 3092 address entry_oop_arraycopy;
3083 address entry_jlong_arraycopy; 3093 address entry_jlong_arraycopy;
3084 address entry_checkcast_arraycopy; 3094 address entry_checkcast_arraycopy;
3085 3095
3086 StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(false, &entry, 3096 //*** jbyte
3087 "jbyte_disjoint_arraycopy"); 3097 // Always need aligned and unaligned versions
3088 StubRoutines::_jbyte_arraycopy = generate_conjoint_byte_copy(false, entry, &entry_jbyte_arraycopy, 3098 StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(false, &entry,
3089 "jbyte_arraycopy"); 3099 "jbyte_disjoint_arraycopy");
3090 StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, &entry, 3100 StubRoutines::_jbyte_arraycopy = generate_conjoint_byte_copy(false, entry,
3091 "jshort_disjoint_arraycopy"); 3101 &entry_jbyte_arraycopy,
3092 StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, entry, &entry_jshort_arraycopy, 3102 "jbyte_arraycopy");
3093 "jshort_arraycopy"); 3103 StubRoutines::_arrayof_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(true, &entry,
3094 StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_copy(false, &entry, 3104 "arrayof_jbyte_disjoint_arraycopy");
3095 "jint_disjoint_arraycopy"); 3105 StubRoutines::_arrayof_jbyte_arraycopy = generate_conjoint_byte_copy(true, entry, NULL,
3096 StubRoutines::_jint_arraycopy = generate_conjoint_int_copy(false, entry, &entry_jint_arraycopy, 3106 "arrayof_jbyte_arraycopy");
3097 "jint_arraycopy"); 3107
3098 StubRoutines::_jlong_disjoint_arraycopy = generate_disjoint_long_copy(false, &entry, 3108 //*** jshort
3099 "jlong_disjoint_arraycopy"); 3109 // Always need aligned and unaligned versions
3100 StubRoutines::_jlong_arraycopy = generate_conjoint_long_copy(false, entry, &entry_jlong_arraycopy, 3110 StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, &entry,
3101 "jlong_arraycopy"); 3111 "jshort_disjoint_arraycopy");
3102 StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_oop_copy(false, &entry, 3112 StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, entry,
3103 "oop_disjoint_arraycopy"); 3113 &entry_jshort_arraycopy,
3104 StubRoutines::_oop_arraycopy = generate_conjoint_oop_copy(false, entry, &entry_oop_arraycopy, 3114 "jshort_arraycopy");
3105 "oop_arraycopy");
3106
3107
3108 StubRoutines::_arrayof_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(true, &entry,
3109 "arrayof_jbyte_disjoint_arraycopy");
3110 StubRoutines::_arrayof_jbyte_arraycopy = generate_conjoint_byte_copy(true, entry, NULL,
3111 "arrayof_jbyte_arraycopy");
3112
3113 StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_short_copy(true, &entry, 3115 StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_short_copy(true, &entry,
3114 "arrayof_jshort_disjoint_arraycopy"); 3116 "arrayof_jshort_disjoint_arraycopy");
3115 StubRoutines::_arrayof_jshort_arraycopy = generate_conjoint_short_copy(true, entry, NULL, 3117 StubRoutines::_arrayof_jshort_arraycopy = generate_conjoint_short_copy(true, entry, NULL,
3116 "arrayof_jshort_arraycopy"); 3118 "arrayof_jshort_arraycopy");
3117 3119
3118 StubRoutines::_arrayof_jint_disjoint_arraycopy = generate_disjoint_int_copy(true, &entry, 3120 //*** jint
3119 "arrayof_jint_disjoint_arraycopy"); 3121 // Aligned versions
3122 StubRoutines::_arrayof_jint_disjoint_arraycopy = generate_disjoint_int_copy(true, &entry,
3123 "arrayof_jint_disjoint_arraycopy");
3124 StubRoutines::_arrayof_jint_arraycopy = generate_conjoint_int_copy(true, entry, &entry_jint_arraycopy,
3125 "arrayof_jint_arraycopy");
3120 #ifdef _LP64 3126 #ifdef _LP64
3121 // since sizeof(jint) < sizeof(HeapWord), there's a different flavor: 3127 // In 64 bit we need both aligned and unaligned versions of jint arraycopy.
3122 StubRoutines::_arrayof_jint_arraycopy = generate_conjoint_int_copy(true, entry, NULL, "arrayof_jint_arraycopy"); 3128 // entry_jint_arraycopy always points to the unaligned version (notice that we overwrite it).
3123 #else 3129 StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_copy(false, &entry,
3124 StubRoutines::_arrayof_jint_arraycopy = StubRoutines::_jint_arraycopy; 3130 "jint_disjoint_arraycopy");
3131 StubRoutines::_jint_arraycopy = generate_conjoint_int_copy(false, entry,
3132 &entry_jint_arraycopy,
3133 "jint_arraycopy");
3134 #else
3135 // In 32 bit jints are always HeapWordSize aligned, so always use the aligned version
3136 // (in fact in 32bit we always have a pre-loop part even in the aligned version,
3137 // because it uses 64-bit loads/stores, so the aligned flag is actually ignored).
3138 StubRoutines::_jint_disjoint_arraycopy = StubRoutines::_arrayof_jint_disjoint_arraycopy;
3139 StubRoutines::_jint_arraycopy = StubRoutines::_arrayof_jint_arraycopy;
3125 #endif 3140 #endif
3126 3141
3127 StubRoutines::_arrayof_jlong_disjoint_arraycopy = generate_disjoint_long_copy(true, NULL, 3142
3128 "arrayof_jlong_disjoint_arraycopy"); 3143 //*** jlong
3129 StubRoutines::_arrayof_oop_disjoint_arraycopy = generate_disjoint_oop_copy(true, NULL, 3144 // It is always aligned
3130 "arrayof_oop_disjoint_arraycopy"); 3145 StubRoutines::_arrayof_jlong_disjoint_arraycopy = generate_disjoint_long_copy(true, &entry,
3131 3146 "arrayof_jlong_disjoint_arraycopy");
3132 StubRoutines::_arrayof_jlong_arraycopy = StubRoutines::_jlong_arraycopy; 3147 StubRoutines::_arrayof_jlong_arraycopy = generate_conjoint_long_copy(true, entry, &entry_jlong_arraycopy,
3133 StubRoutines::_arrayof_oop_arraycopy = StubRoutines::_oop_arraycopy; 3148 "arrayof_jlong_arraycopy");
3134 3149 StubRoutines::_jlong_disjoint_arraycopy = StubRoutines::_arrayof_jlong_disjoint_arraycopy;
3135 StubRoutines::_checkcast_arraycopy = generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy); 3150 StubRoutines::_jlong_arraycopy = StubRoutines::_arrayof_jlong_arraycopy;
3151
3152
3153 //*** oops
3154 // Aligned versions
3155 StubRoutines::_arrayof_oop_disjoint_arraycopy = generate_disjoint_oop_copy(true, &entry,
3156 "arrayof_oop_disjoint_arraycopy");
3157 StubRoutines::_arrayof_oop_arraycopy = generate_conjoint_oop_copy(true, entry, &entry_oop_arraycopy,
3158 "arrayof_oop_arraycopy");
3159 // Aligned versions without pre-barriers
3160 StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = generate_disjoint_oop_copy(true, &entry,
3161 "arrayof_oop_disjoint_arraycopy_uninit",
3162 /*dest_uninitialized*/true);
3163 StubRoutines::_arrayof_oop_arraycopy_uninit = generate_conjoint_oop_copy(true, entry, NULL,
3164 "arrayof_oop_arraycopy_uninit",
3165 /*dest_uninitialized*/true);
3166 #ifdef _LP64
3167 if (UseCompressedOops) {
3168 // With compressed oops we need unaligned versions, notice that we overwrite entry_oop_arraycopy.
3169 StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_oop_copy(false, &entry,
3170 "oop_disjoint_arraycopy");
3171 StubRoutines::_oop_arraycopy = generate_conjoint_oop_copy(false, entry, &entry_oop_arraycopy,
3172 "oop_arraycopy");
3173 // Unaligned versions without pre-barriers
3174 StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_oop_copy(false, &entry,
3175 "oop_disjoint_arraycopy_uninit",
3176 /*dest_uninitialized*/true);
3177 StubRoutines::_oop_arraycopy_uninit = generate_conjoint_oop_copy(false, entry, NULL,
3178 "oop_arraycopy_uninit",
3179 /*dest_uninitialized*/true);
3180 } else
3181 #endif
3182 {
3183 // oop arraycopy is always aligned on 32bit and 64bit without compressed oops
3184 StubRoutines::_oop_disjoint_arraycopy = StubRoutines::_arrayof_oop_disjoint_arraycopy;
3185 StubRoutines::_oop_arraycopy = StubRoutines::_arrayof_oop_arraycopy;
3186 StubRoutines::_oop_disjoint_arraycopy_uninit = StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit;
3187 StubRoutines::_oop_arraycopy_uninit = StubRoutines::_arrayof_oop_arraycopy_uninit;
3188 }
3189
3190 StubRoutines::_checkcast_arraycopy = generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy);
3191 StubRoutines::_checkcast_arraycopy_uninit = generate_checkcast_copy("checkcast_arraycopy_uninit", NULL,
3192 /*dest_uninitialized*/true);
3193
3136 StubRoutines::_unsafe_arraycopy = generate_unsafe_copy("unsafe_arraycopy", 3194 StubRoutines::_unsafe_arraycopy = generate_unsafe_copy("unsafe_arraycopy",
3137 entry_jbyte_arraycopy, 3195 entry_jbyte_arraycopy,
3138 entry_jshort_arraycopy, 3196 entry_jshort_arraycopy,
3139 entry_jint_arraycopy, 3197 entry_jint_arraycopy,
3140 entry_jlong_arraycopy); 3198 entry_jlong_arraycopy);