Mercurial > hg > graal-compiler
comparison src/share/vm/opto/loopTransform.cpp @ 1763:d6f45b55c972
4809552: Optimize Arrays.fill(...)
Reviewed-by: kvn
author | never |
---|---|
date | Fri, 27 Aug 2010 17:33:49 -0700 |
parents | 6027dddc26c6 |
children | 5e4f03302987 |
comparison
equal
deleted
inserted
replaced
1731:ee5cc9e78493 | 1763:d6f45b55c972 |
---|---|
2047 } | 2047 } |
2048 const CmpNode *cmp = bol->in(1)->as_Cmp(); | 2048 const CmpNode *cmp = bol->in(1)->as_Cmp(); |
2049 if (cmp->Opcode() != Op_CmpU ) { | 2049 if (cmp->Opcode() != Op_CmpU ) { |
2050 return false; | 2050 return false; |
2051 } | 2051 } |
2052 if (cmp->in(2)->Opcode() != Op_LoadRange) { | 2052 Node* range = cmp->in(2); |
2053 return false; | 2053 if (range->Opcode() != Op_LoadRange) { |
2054 } | 2054 const TypeInt* tint = phase->_igvn.type(range)->isa_int(); |
2055 LoadRangeNode* lr = (LoadRangeNode*)cmp->in(2); | 2055 if (!OptimizeFill || tint == NULL || tint->empty() || tint->_lo < 0) { |
2056 if (!invar.is_invariant(lr)) { // loadRange must be invariant | 2056 // Allow predication on positive values that aren't LoadRanges. |
2057 // This allows optimization of loops where the length of the | |
2058 // array is a known value and doesn't need to be loaded back | |
2059 // from the array. | |
2060 return false; | |
2061 } | |
2062 } | |
2063 if (!invar.is_invariant(range)) { | |
2057 return false; | 2064 return false; |
2058 } | 2065 } |
2059 Node *iv = _head->as_CountedLoop()->phi(); | 2066 Node *iv = _head->as_CountedLoop()->phi(); |
2060 int scale = 0; | 2067 int scale = 0; |
2061 Node *offset = NULL; | 2068 Node *offset = NULL; |
2246 | 2253 |
2247 // Range check for counted loops | 2254 // Range check for counted loops |
2248 const Node* cmp = bol->in(1)->as_Cmp(); | 2255 const Node* cmp = bol->in(1)->as_Cmp(); |
2249 Node* idx = cmp->in(1); | 2256 Node* idx = cmp->in(1); |
2250 assert(!invar.is_invariant(idx), "index is variant"); | 2257 assert(!invar.is_invariant(idx), "index is variant"); |
2251 assert(cmp->in(2)->Opcode() == Op_LoadRange, "must be"); | 2258 assert(cmp->in(2)->Opcode() == Op_LoadRange || OptimizeFill, "must be"); |
2252 Node* ld_rng = cmp->in(2); // LoadRangeNode | 2259 Node* rng = cmp->in(2); |
2253 assert(invar.is_invariant(ld_rng), "load range must be invariant"); | 2260 assert(invar.is_invariant(rng), "range must be invariant"); |
2254 int scale = 1; | 2261 int scale = 1; |
2255 Node* offset = zero; | 2262 Node* offset = zero; |
2256 bool ok = is_scaled_iv_plus_offset(idx, cl->phi(), &scale, &offset); | 2263 bool ok = is_scaled_iv_plus_offset(idx, cl->phi(), &scale, &offset); |
2257 assert(ok, "must be index expression"); | 2264 assert(ok, "must be index expression"); |
2258 | 2265 |
2269 assert(upper_bound_proj->in(0)->as_If()->in(0) == lower_bound_proj, "should dominate"); | 2276 assert(upper_bound_proj->in(0)->as_If()->in(0) == lower_bound_proj, "should dominate"); |
2270 Node *ctrl = lower_bound_proj->in(0)->as_If()->in(0); | 2277 Node *ctrl = lower_bound_proj->in(0)->as_If()->in(0); |
2271 | 2278 |
2272 // Perform cloning to keep Invariance state correct since the | 2279 // Perform cloning to keep Invariance state correct since the |
2273 // late schedule will place invariant things in the loop. | 2280 // late schedule will place invariant things in the loop. |
2274 ld_rng = invar.clone(ld_rng, ctrl); | 2281 rng = invar.clone(rng, ctrl); |
2275 if (offset && offset != zero) { | 2282 if (offset && offset != zero) { |
2276 assert(invar.is_invariant(offset), "offset must be loop invariant"); | 2283 assert(invar.is_invariant(offset), "offset must be loop invariant"); |
2277 offset = invar.clone(offset, ctrl); | 2284 offset = invar.clone(offset, ctrl); |
2278 } | 2285 } |
2279 | 2286 |
2280 // Test the lower bound | 2287 // Test the lower bound |
2281 Node* lower_bound_bol = rc_predicate(ctrl, scale, offset, init, limit, stride, ld_rng, false); | 2288 Node* lower_bound_bol = rc_predicate(ctrl, scale, offset, init, limit, stride, rng, false); |
2282 IfNode* lower_bound_iff = lower_bound_proj->in(0)->as_If(); | 2289 IfNode* lower_bound_iff = lower_bound_proj->in(0)->as_If(); |
2283 _igvn.hash_delete(lower_bound_iff); | 2290 _igvn.hash_delete(lower_bound_iff); |
2284 lower_bound_iff->set_req(1, lower_bound_bol); | 2291 lower_bound_iff->set_req(1, lower_bound_bol); |
2285 if (TraceLoopPredicate) tty->print_cr("lower bound check if: %d", lower_bound_iff->_idx); | 2292 if (TraceLoopPredicate) tty->print_cr("lower bound check if: %d", lower_bound_iff->_idx); |
2286 | 2293 |
2287 // Test the upper bound | 2294 // Test the upper bound |
2288 Node* upper_bound_bol = rc_predicate(ctrl, scale, offset, init, limit, stride, ld_rng, true); | 2295 Node* upper_bound_bol = rc_predicate(ctrl, scale, offset, init, limit, stride, rng, true); |
2289 IfNode* upper_bound_iff = upper_bound_proj->in(0)->as_If(); | 2296 IfNode* upper_bound_iff = upper_bound_proj->in(0)->as_If(); |
2290 _igvn.hash_delete(upper_bound_iff); | 2297 _igvn.hash_delete(upper_bound_iff); |
2291 upper_bound_iff->set_req(1, upper_bound_bol); | 2298 upper_bound_iff->set_req(1, upper_bound_bol); |
2292 if (TraceLoopPredicate) tty->print_cr("upper bound check if: %d", lower_bound_iff->_idx); | 2299 if (TraceLoopPredicate) tty->print_cr("upper bound check if: %d", lower_bound_iff->_idx); |
2293 | 2300 |
2364 hoisted |= _next->loop_predication( phase); | 2371 hoisted |= _next->loop_predication( phase); |
2365 } | 2372 } |
2366 | 2373 |
2367 return hoisted; | 2374 return hoisted; |
2368 } | 2375 } |
2376 | |
2377 | |
2378 // Process all the loops in the loop tree and replace any fill | |
2379 // patterns with an intrisc version. | |
2380 bool PhaseIdealLoop::do_intrinsify_fill() { | |
2381 bool changed = false; | |
2382 for (LoopTreeIterator iter(_ltree_root); !iter.done(); iter.next()) { | |
2383 IdealLoopTree* lpt = iter.current(); | |
2384 changed |= intrinsify_fill(lpt); | |
2385 } | |
2386 return changed; | |
2387 } | |
2388 | |
2389 | |
2390 // Examine an inner loop looking for a a single store of an invariant | |
2391 // value in a unit stride loop, | |
2392 bool PhaseIdealLoop::match_fill_loop(IdealLoopTree* lpt, Node*& store, Node*& store_value, | |
2393 Node*& shift, Node*& con) { | |
2394 const char* msg = NULL; | |
2395 Node* msg_node = NULL; | |
2396 | |
2397 store_value = NULL; | |
2398 con = NULL; | |
2399 shift = NULL; | |
2400 | |
2401 // Process the loop looking for stores. If there are multiple | |
2402 // stores or extra control flow give at this point. | |
2403 CountedLoopNode* head = lpt->_head->as_CountedLoop(); | |
2404 for (uint i = 0; msg == NULL && i < lpt->_body.size(); i++) { | |
2405 Node* n = lpt->_body.at(i); | |
2406 if (n->outcnt() == 0) continue; // Ignore dead | |
2407 if (n->is_Store()) { | |
2408 if (store != NULL) { | |
2409 msg = "multiple stores"; | |
2410 break; | |
2411 } | |
2412 int opc = n->Opcode(); | |
2413 if (opc == Op_StoreP || opc == Op_StoreN || opc == Op_StoreCM) { | |
2414 msg = "oop fills not handled"; | |
2415 break; | |
2416 } | |
2417 Node* value = n->in(MemNode::ValueIn); | |
2418 if (!lpt->is_invariant(value)) { | |
2419 msg = "variant store value"; | |
2420 } | |
2421 store = n; | |
2422 store_value = value; | |
2423 } else if (n->is_If() && n != head->loopexit()) { | |
2424 msg = "extra control flow"; | |
2425 msg_node = n; | |
2426 } | |
2427 } | |
2428 | |
2429 if (store == NULL) { | |
2430 // No store in loop | |
2431 return false; | |
2432 } | |
2433 | |
2434 if (msg == NULL && head->stride_con() != 1) { | |
2435 // could handle negative strides too | |
2436 if (head->stride_con() < 0) { | |
2437 msg = "negative stride"; | |
2438 } else { | |
2439 msg = "non-unit stride"; | |
2440 } | |
2441 } | |
2442 | |
2443 if (msg == NULL && !store->in(MemNode::Address)->is_AddP()) { | |
2444 msg = "can't handle store address"; | |
2445 msg_node = store->in(MemNode::Address); | |
2446 } | |
2447 | |
2448 // Make sure there is an appropriate fill routine | |
2449 BasicType t = store->as_Mem()->memory_type(); | |
2450 const char* fill_name; | |
2451 if (msg == NULL && | |
2452 StubRoutines::select_fill_function(t, false, fill_name) == NULL) { | |
2453 msg = "unsupported store"; | |
2454 msg_node = store; | |
2455 } | |
2456 | |
2457 if (msg != NULL) { | |
2458 #ifndef PRODUCT | |
2459 if (TraceOptimizeFill) { | |
2460 tty->print_cr("not fill intrinsic candidate: %s", msg); | |
2461 if (msg_node != NULL) msg_node->dump(); | |
2462 } | |
2463 #endif | |
2464 return false; | |
2465 } | |
2466 | |
2467 // Make sure the address expression can be handled. It should be | |
2468 // head->phi * elsize + con. head->phi might have a ConvI2L. | |
2469 Node* elements[4]; | |
2470 Node* conv = NULL; | |
2471 int count = store->in(MemNode::Address)->as_AddP()->unpack_offsets(elements, ARRAY_SIZE(elements)); | |
2472 for (int e = 0; e < count; e++) { | |
2473 Node* n = elements[e]; | |
2474 if (n->is_Con() && con == NULL) { | |
2475 con = n; | |
2476 } else if (n->Opcode() == Op_LShiftX && shift == NULL) { | |
2477 Node* value = n->in(1); | |
2478 #ifdef _LP64 | |
2479 if (value->Opcode() == Op_ConvI2L) { | |
2480 conv = value; | |
2481 value = value->in(1); | |
2482 } | |
2483 #endif | |
2484 if (value != head->phi()) { | |
2485 msg = "unhandled shift in address"; | |
2486 } else { | |
2487 shift = n; | |
2488 assert(type2aelembytes(store->as_Mem()->memory_type(), true) == 1 << shift->in(2)->get_int(), "scale should match"); | |
2489 } | |
2490 } else if (n->Opcode() == Op_ConvI2L && conv == NULL) { | |
2491 if (n->in(1) == head->phi()) { | |
2492 conv = n; | |
2493 } else { | |
2494 msg = "unhandled input to ConvI2L"; | |
2495 } | |
2496 } else if (n == head->phi()) { | |
2497 // no shift, check below for allowed cases | |
2498 } else { | |
2499 msg = "unhandled node in address"; | |
2500 msg_node = n; | |
2501 } | |
2502 } | |
2503 | |
2504 if (count == -1) { | |
2505 msg = "malformed address expression"; | |
2506 msg_node = store; | |
2507 } | |
2508 | |
2509 // byte sized items won't have a shift | |
2510 if (msg == NULL && shift == NULL && t != T_BYTE && t != T_BOOLEAN) { | |
2511 msg = "can't find shift"; | |
2512 msg_node = store; | |
2513 } | |
2514 | |
2515 if (msg != NULL) { | |
2516 #ifndef PRODUCT | |
2517 if (TraceOptimizeFill) { | |
2518 tty->print_cr("not fill intrinsic: %s", msg); | |
2519 if (msg_node != NULL) msg_node->dump(); | |
2520 } | |
2521 #endif | |
2522 return false; | |
2523 } | |
2524 | |
2525 // No make sure all the other nodes in the loop can be handled | |
2526 VectorSet ok(Thread::current()->resource_area()); | |
2527 | |
2528 // store related values are ok | |
2529 ok.set(store->_idx); | |
2530 ok.set(store->in(MemNode::Memory)->_idx); | |
2531 | |
2532 // Loop structure is ok | |
2533 ok.set(head->_idx); | |
2534 ok.set(head->loopexit()->_idx); | |
2535 ok.set(head->phi()->_idx); | |
2536 ok.set(head->incr()->_idx); | |
2537 ok.set(head->loopexit()->cmp_node()->_idx); | |
2538 ok.set(head->loopexit()->in(1)->_idx); | |
2539 | |
2540 // Address elements are ok | |
2541 if (con) ok.set(con->_idx); | |
2542 if (shift) ok.set(shift->_idx); | |
2543 if (conv) ok.set(conv->_idx); | |
2544 | |
2545 for (uint i = 0; msg == NULL && i < lpt->_body.size(); i++) { | |
2546 Node* n = lpt->_body.at(i); | |
2547 if (n->outcnt() == 0) continue; // Ignore dead | |
2548 if (ok.test(n->_idx)) continue; | |
2549 // Backedge projection is ok | |
2550 if (n->is_IfTrue() && n->in(0) == head->loopexit()) continue; | |
2551 if (!n->is_AddP()) { | |
2552 msg = "unhandled node"; | |
2553 msg_node = n; | |
2554 break; | |
2555 } | |
2556 } | |
2557 | |
2558 // Make sure no unexpected values are used outside the loop | |
2559 for (uint i = 0; msg == NULL && i < lpt->_body.size(); i++) { | |
2560 Node* n = lpt->_body.at(i); | |
2561 // These values can be replaced with other nodes if they are used | |
2562 // outside the loop. | |
2563 if (n == store || n == head->loopexit() || n == head->incr()) continue; | |
2564 for (SimpleDUIterator iter(n); iter.has_next(); iter.next()) { | |
2565 Node* use = iter.get(); | |
2566 if (!lpt->_body.contains(use)) { | |
2567 msg = "node is used outside loop"; | |
2568 // lpt->_body.dump(); | |
2569 msg_node = n; | |
2570 break; | |
2571 } | |
2572 } | |
2573 } | |
2574 | |
2575 #ifdef ASSERT | |
2576 if (TraceOptimizeFill) { | |
2577 if (msg != NULL) { | |
2578 tty->print_cr("no fill intrinsic: %s", msg); | |
2579 if (msg_node != NULL) msg_node->dump(); | |
2580 } else { | |
2581 tty->print_cr("fill intrinsic for:"); | |
2582 } | |
2583 store->dump(); | |
2584 if (Verbose) { | |
2585 lpt->_body.dump(); | |
2586 } | |
2587 } | |
2588 #endif | |
2589 | |
2590 return msg == NULL; | |
2591 } | |
2592 | |
2593 | |
2594 | |
2595 bool PhaseIdealLoop::intrinsify_fill(IdealLoopTree* lpt) { | |
2596 // Only for counted inner loops | |
2597 if (!lpt->is_counted() || !lpt->is_inner()) { | |
2598 return false; | |
2599 } | |
2600 | |
2601 // Must have constant stride | |
2602 CountedLoopNode* head = lpt->_head->as_CountedLoop(); | |
2603 if (!head->stride_is_con() || !head->is_normal_loop()) { | |
2604 return false; | |
2605 } | |
2606 | |
2607 // Check that the body only contains a store of a loop invariant | |
2608 // value that is indexed by the loop phi. | |
2609 Node* store = NULL; | |
2610 Node* store_value = NULL; | |
2611 Node* shift = NULL; | |
2612 Node* offset = NULL; | |
2613 if (!match_fill_loop(lpt, store, store_value, shift, offset)) { | |
2614 return false; | |
2615 } | |
2616 | |
2617 // Now replace the whole loop body by a call to a fill routine that | |
2618 // covers the same region as the loop. | |
2619 Node* base = store->in(MemNode::Address)->as_AddP()->in(AddPNode::Base); | |
2620 | |
2621 // Build an expression for the beginning of the copy region | |
2622 Node* index = head->init_trip(); | |
2623 #ifdef _LP64 | |
2624 index = new (C, 2) ConvI2LNode(index); | |
2625 _igvn.register_new_node_with_optimizer(index); | |
2626 #endif | |
2627 if (shift != NULL) { | |
2628 // byte arrays don't require a shift but others do. | |
2629 index = new (C, 3) LShiftXNode(index, shift->in(2)); | |
2630 _igvn.register_new_node_with_optimizer(index); | |
2631 } | |
2632 index = new (C, 4) AddPNode(base, base, index); | |
2633 _igvn.register_new_node_with_optimizer(index); | |
2634 Node* from = new (C, 4) AddPNode(base, index, offset); | |
2635 _igvn.register_new_node_with_optimizer(from); | |
2636 // Compute the number of elements to copy | |
2637 Node* len = new (C, 3) SubINode(head->limit(), head->init_trip()); | |
2638 _igvn.register_new_node_with_optimizer(len); | |
2639 | |
2640 BasicType t = store->as_Mem()->memory_type(); | |
2641 bool aligned = false; | |
2642 if (offset != NULL && head->init_trip()->is_Con()) { | |
2643 int element_size = type2aelembytes(t); | |
2644 aligned = (offset->find_intptr_t_type()->get_con() + head->init_trip()->get_int() * element_size) % HeapWordSize == 0; | |
2645 } | |
2646 | |
2647 // Build a call to the fill routine | |
2648 const char* fill_name; | |
2649 address fill = StubRoutines::select_fill_function(t, aligned, fill_name); | |
2650 assert(fill != NULL, "what?"); | |
2651 | |
2652 // Convert float/double to int/long for fill routines | |
2653 if (t == T_FLOAT) { | |
2654 store_value = new (C, 2) MoveF2INode(store_value); | |
2655 _igvn.register_new_node_with_optimizer(store_value); | |
2656 } else if (t == T_DOUBLE) { | |
2657 store_value = new (C, 2) MoveD2LNode(store_value); | |
2658 _igvn.register_new_node_with_optimizer(store_value); | |
2659 } | |
2660 | |
2661 Node* mem_phi = store->in(MemNode::Memory); | |
2662 Node* result_ctrl; | |
2663 Node* result_mem; | |
2664 const TypeFunc* call_type = OptoRuntime::array_fill_Type(); | |
2665 int size = call_type->domain()->cnt(); | |
2666 CallLeafNode *call = new (C, size) CallLeafNoFPNode(call_type, fill, | |
2667 fill_name, TypeAryPtr::get_array_body_type(t)); | |
2668 call->init_req(TypeFunc::Parms+0, from); | |
2669 call->init_req(TypeFunc::Parms+1, store_value); | |
2670 call->init_req(TypeFunc::Parms+2, len); | |
2671 call->init_req( TypeFunc::Control, head->init_control()); | |
2672 call->init_req( TypeFunc::I_O , C->top() ) ; // does no i/o | |
2673 call->init_req( TypeFunc::Memory , mem_phi->in(LoopNode::EntryControl) ); | |
2674 call->init_req( TypeFunc::ReturnAdr, C->start()->proj_out(TypeFunc::ReturnAdr) ); | |
2675 call->init_req( TypeFunc::FramePtr, C->start()->proj_out(TypeFunc::FramePtr) ); | |
2676 _igvn.register_new_node_with_optimizer(call); | |
2677 result_ctrl = new (C, 1) ProjNode(call,TypeFunc::Control); | |
2678 _igvn.register_new_node_with_optimizer(result_ctrl); | |
2679 result_mem = new (C, 1) ProjNode(call,TypeFunc::Memory); | |
2680 _igvn.register_new_node_with_optimizer(result_mem); | |
2681 | |
2682 // If this fill is tightly coupled to an allocation and overwrites | |
2683 // the whole body, allow it to take over the zeroing. | |
2684 AllocateNode* alloc = AllocateNode::Ideal_allocation(base, this); | |
2685 if (alloc != NULL && alloc->is_AllocateArray()) { | |
2686 Node* length = alloc->as_AllocateArray()->Ideal_length(); | |
2687 if (head->limit() == length && | |
2688 head->init_trip() == _igvn.intcon(0)) { | |
2689 if (TraceOptimizeFill) { | |
2690 tty->print_cr("Eliminated zeroing in allocation"); | |
2691 } | |
2692 alloc->maybe_set_complete(&_igvn); | |
2693 } else { | |
2694 #ifdef ASSERT | |
2695 if (TraceOptimizeFill) { | |
2696 tty->print_cr("filling array but bounds don't match"); | |
2697 alloc->dump(); | |
2698 head->init_trip()->dump(); | |
2699 head->limit()->dump(); | |
2700 length->dump(); | |
2701 } | |
2702 #endif | |
2703 } | |
2704 } | |
2705 | |
2706 // Redirect the old control and memory edges that are outside the loop. | |
2707 Node* exit = head->loopexit()->proj_out(0); | |
2708 _igvn.replace_node(exit, result_ctrl); | |
2709 _igvn.replace_node(store, result_mem); | |
2710 // Any uses the increment outside of the loop become the loop limit. | |
2711 _igvn.replace_node(head->incr(), head->limit()); | |
2712 | |
2713 // Disconnect the head from the loop. | |
2714 for (uint i = 0; i < lpt->_body.size(); i++) { | |
2715 Node* n = lpt->_body.at(i); | |
2716 _igvn.replace_node(n, C->top()); | |
2717 } | |
2718 | |
2719 return true; | |
2720 } |