comparison src/share/vm/opto/loopopts.cpp @ 4047:d8cb48376797

7097546: Optimize use of CMOVE instructions Summary: Avoid CMove in a loop if possible. May generate CMove if it could be moved outside a loop. Reviewed-by: never
author kvn
date Wed, 26 Oct 2011 06:08:56 -0700
parents 4e761e7e6e12
children 670a74b863fc
comparison
equal deleted inserted replaced
4046:e69a66a1457b 4047:d8cb48376797
26 #include "memory/allocation.inline.hpp" 26 #include "memory/allocation.inline.hpp"
27 #include "opto/addnode.hpp" 27 #include "opto/addnode.hpp"
28 #include "opto/connode.hpp" 28 #include "opto/connode.hpp"
29 #include "opto/divnode.hpp" 29 #include "opto/divnode.hpp"
30 #include "opto/loopnode.hpp" 30 #include "opto/loopnode.hpp"
31 #include "opto/matcher.hpp"
31 #include "opto/mulnode.hpp" 32 #include "opto/mulnode.hpp"
32 #include "opto/rootnode.hpp" 33 #include "opto/rootnode.hpp"
33 #include "opto/subnode.hpp" 34 #include "opto/subnode.hpp"
34 35
35 //============================================================================= 36 //=============================================================================
470 // of the CFG diamond is now speculatively executed. This code has to be 471 // of the CFG diamond is now speculatively executed. This code has to be
471 // "cheap enough". We are pretty much limited to CFG diamonds that merge 472 // "cheap enough". We are pretty much limited to CFG diamonds that merge
472 // 1 or 2 items with a total of 1 or 2 ops executed speculatively. 473 // 1 or 2 items with a total of 1 or 2 ops executed speculatively.
473 Node *PhaseIdealLoop::conditional_move( Node *region ) { 474 Node *PhaseIdealLoop::conditional_move( Node *region ) {
474 475
475 assert( region->is_Region(), "sanity check" ); 476 assert(region->is_Region(), "sanity check");
476 if( region->req() != 3 ) return NULL; 477 if (region->req() != 3) return NULL;
477 478
478 // Check for CFG diamond 479 // Check for CFG diamond
479 Node *lp = region->in(1); 480 Node *lp = region->in(1);
480 Node *rp = region->in(2); 481 Node *rp = region->in(2);
481 if( !lp || !rp ) return NULL; 482 if (!lp || !rp) return NULL;
482 Node *lp_c = lp->in(0); 483 Node *lp_c = lp->in(0);
483 if( lp_c == NULL || lp_c != rp->in(0) || !lp_c->is_If() ) return NULL; 484 if (lp_c == NULL || lp_c != rp->in(0) || !lp_c->is_If()) return NULL;
484 IfNode *iff = lp_c->as_If(); 485 IfNode *iff = lp_c->as_If();
485
486 // Check for highly predictable branch. No point in CMOV'ing if
487 // we are going to predict accurately all the time.
488 // %%% This hides patterns produced by utility methods like Math.min.
489 if( iff->_prob < PROB_UNLIKELY_MAG(3) ||
490 iff->_prob > PROB_LIKELY_MAG(3) )
491 return NULL;
492 486
493 // Check for ops pinned in an arm of the diamond. 487 // Check for ops pinned in an arm of the diamond.
494 // Can't remove the control flow in this case 488 // Can't remove the control flow in this case
495 if( lp->outcnt() > 1 ) return NULL; 489 if (lp->outcnt() > 1) return NULL;
496 if( rp->outcnt() > 1 ) return NULL; 490 if (rp->outcnt() > 1) return NULL;
491
492 IdealLoopTree* r_loop = get_loop(region);
493 assert(r_loop == get_loop(iff), "sanity");
494 // Always convert to CMOVE if all results are used only outside this loop.
495 bool used_inside_loop = (r_loop == _ltree_root);
497 496
498 // Check profitability 497 // Check profitability
499 int cost = 0; 498 int cost = 0;
500 int phis = 0; 499 int phis = 0;
501 for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) { 500 for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
502 Node *out = region->fast_out(i); 501 Node *out = region->fast_out(i);
503 if( !out->is_Phi() ) continue; // Ignore other control edges, etc 502 if (!out->is_Phi()) continue; // Ignore other control edges, etc
504 phis++; 503 phis++;
505 PhiNode* phi = out->as_Phi(); 504 PhiNode* phi = out->as_Phi();
506 switch (phi->type()->basic_type()) { 505 BasicType bt = phi->type()->basic_type();
507 case T_LONG: 506 switch (bt) {
508 cost++; // Probably encodes as 2 CMOV's 507 case T_FLOAT:
508 case T_DOUBLE: {
509 cost += Matcher::float_cmove_cost(); // Could be very expensive
510 break;
511 }
512 case T_LONG: {
513 cost += Matcher::long_cmove_cost(); // May encodes as 2 CMOV's
514 }
509 case T_INT: // These all CMOV fine 515 case T_INT: // These all CMOV fine
510 case T_FLOAT: 516 case T_ADDRESS: { // (RawPtr)
511 case T_DOUBLE:
512 case T_ADDRESS: // (RawPtr)
513 cost++; 517 cost++;
514 break; 518 break;
519 }
515 case T_NARROWOOP: // Fall through 520 case T_NARROWOOP: // Fall through
516 case T_OBJECT: { // Base oops are OK, but not derived oops 521 case T_OBJECT: { // Base oops are OK, but not derived oops
517 const TypeOopPtr *tp = phi->type()->make_ptr()->isa_oopptr(); 522 const TypeOopPtr *tp = phi->type()->make_ptr()->isa_oopptr();
518 // Derived pointers are Bad (tm): what's the Base (for GC purposes) of a 523 // Derived pointers are Bad (tm): what's the Base (for GC purposes) of a
519 // CMOVE'd derived pointer? It's a CMOVE'd derived base. Thus 524 // CMOVE'd derived pointer? It's a CMOVE'd derived base. Thus
522 // and good. But if the base is dead, we'll not make a CMOVE. Later 527 // and good. But if the base is dead, we'll not make a CMOVE. Later
523 // the allocator will have to produce a base by creating a CMOVE of the 528 // the allocator will have to produce a base by creating a CMOVE of the
524 // relevant bases. This puts the allocator in the business of 529 // relevant bases. This puts the allocator in the business of
525 // manufacturing expensive instructions, generally a bad plan. 530 // manufacturing expensive instructions, generally a bad plan.
526 // Just Say No to Conditionally-Moved Derived Pointers. 531 // Just Say No to Conditionally-Moved Derived Pointers.
527 if( tp && tp->offset() != 0 ) 532 if (tp && tp->offset() != 0)
528 return NULL; 533 return NULL;
529 cost++; 534 cost++;
530 break; 535 break;
531 } 536 }
532 default: 537 default:
533 return NULL; // In particular, can't do memory or I/O 538 return NULL; // In particular, can't do memory or I/O
534 } 539 }
535 // Add in cost any speculative ops 540 // Add in cost any speculative ops
536 for( uint j = 1; j < region->req(); j++ ) { 541 for (uint j = 1; j < region->req(); j++) {
537 Node *proj = region->in(j); 542 Node *proj = region->in(j);
538 Node *inp = phi->in(j); 543 Node *inp = phi->in(j);
539 if (get_ctrl(inp) == proj) { // Found local op 544 if (get_ctrl(inp) == proj) { // Found local op
540 cost++; 545 cost++;
541 // Check for a chain of dependent ops; these will all become 546 // Check for a chain of dependent ops; these will all become
542 // speculative in a CMOV. 547 // speculative in a CMOV.
543 for( uint k = 1; k < inp->req(); k++ ) 548 for (uint k = 1; k < inp->req(); k++)
544 if (get_ctrl(inp->in(k)) == proj) 549 if (get_ctrl(inp->in(k)) == proj)
545 return NULL; // Too much speculative goo 550 cost += ConditionalMoveLimit; // Too much speculative goo
546 } 551 }
547 } 552 }
548 // See if the Phi is used by a Cmp or Narrow oop Decode/Encode. 553 // See if the Phi is used by a Cmp or Narrow oop Decode/Encode.
549 // This will likely Split-If, a higher-payoff operation. 554 // This will likely Split-If, a higher-payoff operation.
550 for (DUIterator_Fast kmax, k = phi->fast_outs(kmax); k < kmax; k++) { 555 for (DUIterator_Fast kmax, k = phi->fast_outs(kmax); k < kmax; k++) {
551 Node* use = phi->fast_out(k); 556 Node* use = phi->fast_out(k);
552 if( use->is_Cmp() || use->is_DecodeN() || use->is_EncodeP() ) 557 if (use->is_Cmp() || use->is_DecodeN() || use->is_EncodeP())
553 return NULL; 558 cost += ConditionalMoveLimit;
554 } 559 // Is there a use inside the loop?
555 } 560 // Note: check only basic types since CMoveP is pinned.
556 if( cost >= ConditionalMoveLimit ) return NULL; // Too much goo 561 if (!used_inside_loop && is_java_primitive(bt)) {
562 IdealLoopTree* u_loop = get_loop(has_ctrl(use) ? get_ctrl(use) : use);
563 if (r_loop == u_loop || r_loop->is_member(u_loop)) {
564 used_inside_loop = true;
565 }
566 }
567 }
568 }
557 Node* bol = iff->in(1); 569 Node* bol = iff->in(1);
558 assert( bol->Opcode() == Op_Bool, "" ); 570 assert(bol->Opcode() == Op_Bool, "");
559 int cmp_op = bol->in(1)->Opcode(); 571 int cmp_op = bol->in(1)->Opcode();
560 // It is expensive to generate flags from a float compare. 572 // It is expensive to generate flags from a float compare.
561 // Avoid duplicated float compare. 573 // Avoid duplicated float compare.
562 if( phis > 1 && (cmp_op == Op_CmpF || cmp_op == Op_CmpD)) return NULL; 574 if (phis > 1 && (cmp_op == Op_CmpF || cmp_op == Op_CmpD)) return NULL;
575
576 float infrequent_prob = PROB_UNLIKELY_MAG(3);
577 // Ignore cost and blocks frequency if CMOVE can be moved outside the loop.
578 if (used_inside_loop) {
579 if (cost >= ConditionalMoveLimit) return NULL; // Too much goo
580
581 // BlockLayoutByFrequency optimization moves infrequent branch
582 // from hot path. No point in CMOV'ing in such case (110 is used
583 // instead of 100 to take into account not exactness of float value).
584 if (BlockLayoutByFrequency) {
585 infrequent_prob = MAX2(infrequent_prob, (float)BlockLayoutMinDiamondPercentage/110.0f);
586 }
587 }
588 // Check for highly predictable branch. No point in CMOV'ing if
589 // we are going to predict accurately all the time.
590 if (iff->_prob < infrequent_prob ||
591 iff->_prob > (1.0f - infrequent_prob))
592 return NULL;
563 593
564 // -------------- 594 // --------------
565 // Now replace all Phis with CMOV's 595 // Now replace all Phis with CMOV's
566 Node *cmov_ctrl = iff->in(0); 596 Node *cmov_ctrl = iff->in(0);
567 uint flip = (lp->Opcode() == Op_IfTrue); 597 uint flip = (lp->Opcode() == Op_IfTrue);
568 while( 1 ) { 598 while (1) {
569 PhiNode* phi = NULL; 599 PhiNode* phi = NULL;
570 for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) { 600 for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
571 Node *out = region->fast_out(i); 601 Node *out = region->fast_out(i);
572 if (out->is_Phi()) { 602 if (out->is_Phi()) {
573 phi = out->as_Phi(); 603 phi = out->as_Phi();
574 break; 604 break;
575 } 605 }
576 } 606 }
577 if (phi == NULL) break; 607 if (phi == NULL) break;
578 #ifndef PRODUCT 608 #ifndef PRODUCT
579 if( PrintOpto && VerifyLoopOptimizations ) tty->print_cr("CMOV"); 609 if (PrintOpto && VerifyLoopOptimizations) tty->print_cr("CMOV");
580 #endif 610 #endif
581 // Move speculative ops 611 // Move speculative ops
582 for( uint j = 1; j < region->req(); j++ ) { 612 for (uint j = 1; j < region->req(); j++) {
583 Node *proj = region->in(j); 613 Node *proj = region->in(j);
584 Node *inp = phi->in(j); 614 Node *inp = phi->in(j);
585 if (get_ctrl(inp) == proj) { // Found local op 615 if (get_ctrl(inp) == proj) { // Found local op
586 #ifndef PRODUCT 616 #ifndef PRODUCT
587 if( PrintOpto && VerifyLoopOptimizations ) { 617 if (PrintOpto && VerifyLoopOptimizations) {
588 tty->print(" speculate: "); 618 tty->print(" speculate: ");
589 inp->dump(); 619 inp->dump();
590 } 620 }
591 #endif 621 #endif
592 set_ctrl(inp, cmov_ctrl); 622 set_ctrl(inp, cmov_ctrl);
594 } 624 }
595 Node *cmov = CMoveNode::make( C, cmov_ctrl, iff->in(1), phi->in(1+flip), phi->in(2-flip), _igvn.type(phi) ); 625 Node *cmov = CMoveNode::make( C, cmov_ctrl, iff->in(1), phi->in(1+flip), phi->in(2-flip), _igvn.type(phi) );
596 register_new_node( cmov, cmov_ctrl ); 626 register_new_node( cmov, cmov_ctrl );
597 _igvn.replace_node( phi, cmov ); 627 _igvn.replace_node( phi, cmov );
598 #ifndef PRODUCT 628 #ifndef PRODUCT
599 if( VerifyLoopOptimizations ) verify(); 629 if (TraceLoopOpts) {
630 tty->print("CMOV ");
631 r_loop->dump_head();
632 if (Verbose)
633 bol->in(1)->dump(1);
634 cmov->dump(1);
635 }
636 if (VerifyLoopOptimizations) verify();
600 #endif 637 #endif
601 } 638 }
602 639
603 // The useless CFG diamond will fold up later; see the optimization in 640 // The useless CFG diamond will fold up later; see the optimization in
604 // RegionNode::Ideal. 641 // RegionNode::Ideal.
674 // Use same limit as split_if_with_blocks_post 711 // Use same limit as split_if_with_blocks_post
675 if( C->unique() > 35000 ) return n; // Method too big 712 if( C->unique() > 35000 ) return n; // Method too big
676 713
677 // Split 'n' through the merge point if it is profitable 714 // Split 'n' through the merge point if it is profitable
678 Node *phi = split_thru_phi( n, n_blk, policy ); 715 Node *phi = split_thru_phi( n, n_blk, policy );
679 if( !phi ) return n; 716 if (!phi) return n;
680 717
681 // Found a Phi to split thru! 718 // Found a Phi to split thru!
682 // Replace 'n' with the new phi 719 // Replace 'n' with the new phi
683 _igvn.replace_node( n, phi ); 720 _igvn.replace_node( n, phi );
684 // Moved a load around the loop, 'en-registering' something. 721 // Moved a load around the loop, 'en-registering' something.
685 if( n_blk->Opcode() == Op_Loop && n->is_Load() && 722 if (n_blk->is_Loop() && n->is_Load() &&
686 !phi->in(LoopNode::LoopBackControl)->is_Load() ) 723 !phi->in(LoopNode::LoopBackControl)->is_Load())
687 C->set_major_progress(); 724 C->set_major_progress();
688 725
689 return phi; 726 return phi;
690 } 727 }
691 728