Mercurial > hg > graal-jvmci-8
comparison src/share/vm/opto/loopopts.cpp @ 4047:d8cb48376797
7097546: Optimize use of CMOVE instructions
Summary: Avoid CMove in a loop if possible. May generate CMove if it could be moved outside a loop.
Reviewed-by: never
author | kvn |
---|---|
date | Wed, 26 Oct 2011 06:08:56 -0700 |
parents | 4e761e7e6e12 |
children | 670a74b863fc |
comparison
equal
deleted
inserted
replaced
4046:e69a66a1457b | 4047:d8cb48376797 |
---|---|
26 #include "memory/allocation.inline.hpp" | 26 #include "memory/allocation.inline.hpp" |
27 #include "opto/addnode.hpp" | 27 #include "opto/addnode.hpp" |
28 #include "opto/connode.hpp" | 28 #include "opto/connode.hpp" |
29 #include "opto/divnode.hpp" | 29 #include "opto/divnode.hpp" |
30 #include "opto/loopnode.hpp" | 30 #include "opto/loopnode.hpp" |
31 #include "opto/matcher.hpp" | |
31 #include "opto/mulnode.hpp" | 32 #include "opto/mulnode.hpp" |
32 #include "opto/rootnode.hpp" | 33 #include "opto/rootnode.hpp" |
33 #include "opto/subnode.hpp" | 34 #include "opto/subnode.hpp" |
34 | 35 |
35 //============================================================================= | 36 //============================================================================= |
470 // of the CFG diamond is now speculatively executed. This code has to be | 471 // of the CFG diamond is now speculatively executed. This code has to be |
471 // "cheap enough". We are pretty much limited to CFG diamonds that merge | 472 // "cheap enough". We are pretty much limited to CFG diamonds that merge |
472 // 1 or 2 items with a total of 1 or 2 ops executed speculatively. | 473 // 1 or 2 items with a total of 1 or 2 ops executed speculatively. |
473 Node *PhaseIdealLoop::conditional_move( Node *region ) { | 474 Node *PhaseIdealLoop::conditional_move( Node *region ) { |
474 | 475 |
475 assert( region->is_Region(), "sanity check" ); | 476 assert(region->is_Region(), "sanity check"); |
476 if( region->req() != 3 ) return NULL; | 477 if (region->req() != 3) return NULL; |
477 | 478 |
478 // Check for CFG diamond | 479 // Check for CFG diamond |
479 Node *lp = region->in(1); | 480 Node *lp = region->in(1); |
480 Node *rp = region->in(2); | 481 Node *rp = region->in(2); |
481 if( !lp || !rp ) return NULL; | 482 if (!lp || !rp) return NULL; |
482 Node *lp_c = lp->in(0); | 483 Node *lp_c = lp->in(0); |
483 if( lp_c == NULL || lp_c != rp->in(0) || !lp_c->is_If() ) return NULL; | 484 if (lp_c == NULL || lp_c != rp->in(0) || !lp_c->is_If()) return NULL; |
484 IfNode *iff = lp_c->as_If(); | 485 IfNode *iff = lp_c->as_If(); |
485 | |
486 // Check for highly predictable branch. No point in CMOV'ing if | |
487 // we are going to predict accurately all the time. | |
488 // %%% This hides patterns produced by utility methods like Math.min. | |
489 if( iff->_prob < PROB_UNLIKELY_MAG(3) || | |
490 iff->_prob > PROB_LIKELY_MAG(3) ) | |
491 return NULL; | |
492 | 486 |
493 // Check for ops pinned in an arm of the diamond. | 487 // Check for ops pinned in an arm of the diamond. |
494 // Can't remove the control flow in this case | 488 // Can't remove the control flow in this case |
495 if( lp->outcnt() > 1 ) return NULL; | 489 if (lp->outcnt() > 1) return NULL; |
496 if( rp->outcnt() > 1 ) return NULL; | 490 if (rp->outcnt() > 1) return NULL; |
491 | |
492 IdealLoopTree* r_loop = get_loop(region); | |
493 assert(r_loop == get_loop(iff), "sanity"); | |
494 // Always convert to CMOVE if all results are used only outside this loop. | |
495 bool used_inside_loop = (r_loop == _ltree_root); | |
497 | 496 |
498 // Check profitability | 497 // Check profitability |
499 int cost = 0; | 498 int cost = 0; |
500 int phis = 0; | 499 int phis = 0; |
501 for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) { | 500 for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) { |
502 Node *out = region->fast_out(i); | 501 Node *out = region->fast_out(i); |
503 if( !out->is_Phi() ) continue; // Ignore other control edges, etc | 502 if (!out->is_Phi()) continue; // Ignore other control edges, etc |
504 phis++; | 503 phis++; |
505 PhiNode* phi = out->as_Phi(); | 504 PhiNode* phi = out->as_Phi(); |
506 switch (phi->type()->basic_type()) { | 505 BasicType bt = phi->type()->basic_type(); |
507 case T_LONG: | 506 switch (bt) { |
508 cost++; // Probably encodes as 2 CMOV's | 507 case T_FLOAT: |
508 case T_DOUBLE: { | |
509 cost += Matcher::float_cmove_cost(); // Could be very expensive | |
510 break; | |
511 } | |
512 case T_LONG: { | |
513 cost += Matcher::long_cmove_cost(); // May encodes as 2 CMOV's | |
514 } | |
509 case T_INT: // These all CMOV fine | 515 case T_INT: // These all CMOV fine |
510 case T_FLOAT: | 516 case T_ADDRESS: { // (RawPtr) |
511 case T_DOUBLE: | |
512 case T_ADDRESS: // (RawPtr) | |
513 cost++; | 517 cost++; |
514 break; | 518 break; |
519 } | |
515 case T_NARROWOOP: // Fall through | 520 case T_NARROWOOP: // Fall through |
516 case T_OBJECT: { // Base oops are OK, but not derived oops | 521 case T_OBJECT: { // Base oops are OK, but not derived oops |
517 const TypeOopPtr *tp = phi->type()->make_ptr()->isa_oopptr(); | 522 const TypeOopPtr *tp = phi->type()->make_ptr()->isa_oopptr(); |
518 // Derived pointers are Bad (tm): what's the Base (for GC purposes) of a | 523 // Derived pointers are Bad (tm): what's the Base (for GC purposes) of a |
519 // CMOVE'd derived pointer? It's a CMOVE'd derived base. Thus | 524 // CMOVE'd derived pointer? It's a CMOVE'd derived base. Thus |
522 // and good. But if the base is dead, we'll not make a CMOVE. Later | 527 // and good. But if the base is dead, we'll not make a CMOVE. Later |
523 // the allocator will have to produce a base by creating a CMOVE of the | 528 // the allocator will have to produce a base by creating a CMOVE of the |
524 // relevant bases. This puts the allocator in the business of | 529 // relevant bases. This puts the allocator in the business of |
525 // manufacturing expensive instructions, generally a bad plan. | 530 // manufacturing expensive instructions, generally a bad plan. |
526 // Just Say No to Conditionally-Moved Derived Pointers. | 531 // Just Say No to Conditionally-Moved Derived Pointers. |
527 if( tp && tp->offset() != 0 ) | 532 if (tp && tp->offset() != 0) |
528 return NULL; | 533 return NULL; |
529 cost++; | 534 cost++; |
530 break; | 535 break; |
531 } | 536 } |
532 default: | 537 default: |
533 return NULL; // In particular, can't do memory or I/O | 538 return NULL; // In particular, can't do memory or I/O |
534 } | 539 } |
535 // Add in cost any speculative ops | 540 // Add in cost any speculative ops |
536 for( uint j = 1; j < region->req(); j++ ) { | 541 for (uint j = 1; j < region->req(); j++) { |
537 Node *proj = region->in(j); | 542 Node *proj = region->in(j); |
538 Node *inp = phi->in(j); | 543 Node *inp = phi->in(j); |
539 if (get_ctrl(inp) == proj) { // Found local op | 544 if (get_ctrl(inp) == proj) { // Found local op |
540 cost++; | 545 cost++; |
541 // Check for a chain of dependent ops; these will all become | 546 // Check for a chain of dependent ops; these will all become |
542 // speculative in a CMOV. | 547 // speculative in a CMOV. |
543 for( uint k = 1; k < inp->req(); k++ ) | 548 for (uint k = 1; k < inp->req(); k++) |
544 if (get_ctrl(inp->in(k)) == proj) | 549 if (get_ctrl(inp->in(k)) == proj) |
545 return NULL; // Too much speculative goo | 550 cost += ConditionalMoveLimit; // Too much speculative goo |
546 } | 551 } |
547 } | 552 } |
548 // See if the Phi is used by a Cmp or Narrow oop Decode/Encode. | 553 // See if the Phi is used by a Cmp or Narrow oop Decode/Encode. |
549 // This will likely Split-If, a higher-payoff operation. | 554 // This will likely Split-If, a higher-payoff operation. |
550 for (DUIterator_Fast kmax, k = phi->fast_outs(kmax); k < kmax; k++) { | 555 for (DUIterator_Fast kmax, k = phi->fast_outs(kmax); k < kmax; k++) { |
551 Node* use = phi->fast_out(k); | 556 Node* use = phi->fast_out(k); |
552 if( use->is_Cmp() || use->is_DecodeN() || use->is_EncodeP() ) | 557 if (use->is_Cmp() || use->is_DecodeN() || use->is_EncodeP()) |
553 return NULL; | 558 cost += ConditionalMoveLimit; |
554 } | 559 // Is there a use inside the loop? |
555 } | 560 // Note: check only basic types since CMoveP is pinned. |
556 if( cost >= ConditionalMoveLimit ) return NULL; // Too much goo | 561 if (!used_inside_loop && is_java_primitive(bt)) { |
562 IdealLoopTree* u_loop = get_loop(has_ctrl(use) ? get_ctrl(use) : use); | |
563 if (r_loop == u_loop || r_loop->is_member(u_loop)) { | |
564 used_inside_loop = true; | |
565 } | |
566 } | |
567 } | |
568 } | |
557 Node* bol = iff->in(1); | 569 Node* bol = iff->in(1); |
558 assert( bol->Opcode() == Op_Bool, "" ); | 570 assert(bol->Opcode() == Op_Bool, ""); |
559 int cmp_op = bol->in(1)->Opcode(); | 571 int cmp_op = bol->in(1)->Opcode(); |
560 // It is expensive to generate flags from a float compare. | 572 // It is expensive to generate flags from a float compare. |
561 // Avoid duplicated float compare. | 573 // Avoid duplicated float compare. |
562 if( phis > 1 && (cmp_op == Op_CmpF || cmp_op == Op_CmpD)) return NULL; | 574 if (phis > 1 && (cmp_op == Op_CmpF || cmp_op == Op_CmpD)) return NULL; |
575 | |
576 float infrequent_prob = PROB_UNLIKELY_MAG(3); | |
577 // Ignore cost and blocks frequency if CMOVE can be moved outside the loop. | |
578 if (used_inside_loop) { | |
579 if (cost >= ConditionalMoveLimit) return NULL; // Too much goo | |
580 | |
581 // BlockLayoutByFrequency optimization moves infrequent branch | |
582 // from hot path. No point in CMOV'ing in such case (110 is used | |
583 // instead of 100 to take into account not exactness of float value). | |
584 if (BlockLayoutByFrequency) { | |
585 infrequent_prob = MAX2(infrequent_prob, (float)BlockLayoutMinDiamondPercentage/110.0f); | |
586 } | |
587 } | |
588 // Check for highly predictable branch. No point in CMOV'ing if | |
589 // we are going to predict accurately all the time. | |
590 if (iff->_prob < infrequent_prob || | |
591 iff->_prob > (1.0f - infrequent_prob)) | |
592 return NULL; | |
563 | 593 |
564 // -------------- | 594 // -------------- |
565 // Now replace all Phis with CMOV's | 595 // Now replace all Phis with CMOV's |
566 Node *cmov_ctrl = iff->in(0); | 596 Node *cmov_ctrl = iff->in(0); |
567 uint flip = (lp->Opcode() == Op_IfTrue); | 597 uint flip = (lp->Opcode() == Op_IfTrue); |
568 while( 1 ) { | 598 while (1) { |
569 PhiNode* phi = NULL; | 599 PhiNode* phi = NULL; |
570 for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) { | 600 for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) { |
571 Node *out = region->fast_out(i); | 601 Node *out = region->fast_out(i); |
572 if (out->is_Phi()) { | 602 if (out->is_Phi()) { |
573 phi = out->as_Phi(); | 603 phi = out->as_Phi(); |
574 break; | 604 break; |
575 } | 605 } |
576 } | 606 } |
577 if (phi == NULL) break; | 607 if (phi == NULL) break; |
578 #ifndef PRODUCT | 608 #ifndef PRODUCT |
579 if( PrintOpto && VerifyLoopOptimizations ) tty->print_cr("CMOV"); | 609 if (PrintOpto && VerifyLoopOptimizations) tty->print_cr("CMOV"); |
580 #endif | 610 #endif |
581 // Move speculative ops | 611 // Move speculative ops |
582 for( uint j = 1; j < region->req(); j++ ) { | 612 for (uint j = 1; j < region->req(); j++) { |
583 Node *proj = region->in(j); | 613 Node *proj = region->in(j); |
584 Node *inp = phi->in(j); | 614 Node *inp = phi->in(j); |
585 if (get_ctrl(inp) == proj) { // Found local op | 615 if (get_ctrl(inp) == proj) { // Found local op |
586 #ifndef PRODUCT | 616 #ifndef PRODUCT |
587 if( PrintOpto && VerifyLoopOptimizations ) { | 617 if (PrintOpto && VerifyLoopOptimizations) { |
588 tty->print(" speculate: "); | 618 tty->print(" speculate: "); |
589 inp->dump(); | 619 inp->dump(); |
590 } | 620 } |
591 #endif | 621 #endif |
592 set_ctrl(inp, cmov_ctrl); | 622 set_ctrl(inp, cmov_ctrl); |
594 } | 624 } |
595 Node *cmov = CMoveNode::make( C, cmov_ctrl, iff->in(1), phi->in(1+flip), phi->in(2-flip), _igvn.type(phi) ); | 625 Node *cmov = CMoveNode::make( C, cmov_ctrl, iff->in(1), phi->in(1+flip), phi->in(2-flip), _igvn.type(phi) ); |
596 register_new_node( cmov, cmov_ctrl ); | 626 register_new_node( cmov, cmov_ctrl ); |
597 _igvn.replace_node( phi, cmov ); | 627 _igvn.replace_node( phi, cmov ); |
598 #ifndef PRODUCT | 628 #ifndef PRODUCT |
599 if( VerifyLoopOptimizations ) verify(); | 629 if (TraceLoopOpts) { |
630 tty->print("CMOV "); | |
631 r_loop->dump_head(); | |
632 if (Verbose) | |
633 bol->in(1)->dump(1); | |
634 cmov->dump(1); | |
635 } | |
636 if (VerifyLoopOptimizations) verify(); | |
600 #endif | 637 #endif |
601 } | 638 } |
602 | 639 |
603 // The useless CFG diamond will fold up later; see the optimization in | 640 // The useless CFG diamond will fold up later; see the optimization in |
604 // RegionNode::Ideal. | 641 // RegionNode::Ideal. |
674 // Use same limit as split_if_with_blocks_post | 711 // Use same limit as split_if_with_blocks_post |
675 if( C->unique() > 35000 ) return n; // Method too big | 712 if( C->unique() > 35000 ) return n; // Method too big |
676 | 713 |
677 // Split 'n' through the merge point if it is profitable | 714 // Split 'n' through the merge point if it is profitable |
678 Node *phi = split_thru_phi( n, n_blk, policy ); | 715 Node *phi = split_thru_phi( n, n_blk, policy ); |
679 if( !phi ) return n; | 716 if (!phi) return n; |
680 | 717 |
681 // Found a Phi to split thru! | 718 // Found a Phi to split thru! |
682 // Replace 'n' with the new phi | 719 // Replace 'n' with the new phi |
683 _igvn.replace_node( n, phi ); | 720 _igvn.replace_node( n, phi ); |
684 // Moved a load around the loop, 'en-registering' something. | 721 // Moved a load around the loop, 'en-registering' something. |
685 if( n_blk->Opcode() == Op_Loop && n->is_Load() && | 722 if (n_blk->is_Loop() && n->is_Load() && |
686 !phi->in(LoopNode::LoopBackControl)->is_Load() ) | 723 !phi->in(LoopNode::LoopBackControl)->is_Load()) |
687 C->set_major_progress(); | 724 C->set_major_progress(); |
688 | 725 |
689 return phi; | 726 return phi; |
690 } | 727 } |
691 | 728 |