Mercurial > hg > graal-jvmci-8
comparison src/share/vm/opto/loopTransform.cpp @ 3345:bad7ecd0b6ed
5091921: Sign flip issues in loop optimizer
Summary: Fix integer overflow problem in the code generated by loop optimizer.
Reviewed-by: never
author | kvn |
---|---|
date | Wed, 04 May 2011 13:12:42 -0700 |
parents | ae93231c7a1f |
children | f879eafd5835 |
comparison
equal
deleted
inserted
replaced
3344:0139aac70fb5 | 3345:bad7ecd0b6ed |
---|---|
81 return; // Infinite loop | 81 return; // Infinite loop |
82 | 82 |
83 #ifdef ASSERT | 83 #ifdef ASSERT |
84 BoolTest::mask bt = cl->loopexit()->test_trip(); | 84 BoolTest::mask bt = cl->loopexit()->test_trip(); |
85 assert(bt == BoolTest::lt || bt == BoolTest::gt || | 85 assert(bt == BoolTest::lt || bt == BoolTest::gt || |
86 bt == BoolTest::ne, "canonical test is expected"); | 86 (bt == BoolTest::ne && !LoopLimitCheck), "canonical test is expected"); |
87 #endif | 87 #endif |
88 | 88 |
89 Node* init_n = cl->init_trip(); | 89 Node* init_n = cl->init_trip(); |
90 Node* limit_n = cl->limit(); | 90 Node* limit_n = cl->limit(); |
91 if (init_n != NULL && init_n->is_Con() && | 91 if (init_n != NULL && init_n->is_Con() && |
508 // around the loopback from the prior iteration (follow the old-loop | 508 // around the loopback from the prior iteration (follow the old-loop |
509 // backedges) and then map to the new peeled iteration. This leaves | 509 // backedges) and then map to the new peeled iteration. This leaves |
510 // the pre-loop with only 1 user (the new peeled iteration), but the | 510 // the pre-loop with only 1 user (the new peeled iteration), but the |
511 // peeled-loop backedge has 2 users. | 511 // peeled-loop backedge has 2 users. |
512 Node* new_exit_value = old_new[head->in(LoopNode::LoopBackControl)->_idx]; | 512 Node* new_exit_value = old_new[head->in(LoopNode::LoopBackControl)->_idx]; |
513 new_exit_value = move_loop_predicates(entry, new_exit_value); | 513 new_exit_value = move_loop_predicates(entry, new_exit_value, !counted_loop); |
514 _igvn.hash_delete(head); | 514 _igvn.hash_delete(head); |
515 head->set_req(LoopNode::EntryControl, new_exit_value); | 515 head->set_req(LoopNode::EntryControl, new_exit_value); |
516 for (DUIterator_Fast jmax, j = head->fast_outs(jmax); j < jmax; j++) { | 516 for (DUIterator_Fast jmax, j = head->fast_outs(jmax); j < jmax; j++) { |
517 Node* old = head->fast_out(j); | 517 Node* old = head->fast_out(j); |
518 if (old->in(0) == loop->_head && old->req() == 3 && old->is_Phi()) { | 518 if (old->in(0) == loop->_head && old->req() == 3 && old->is_Phi()) { |
591 assert( (intx)unroll_limit == LoopUnrollLimit * 4, "LoopUnrollLimit must fit in 32bits"); | 591 assert( (intx)unroll_limit == LoopUnrollLimit * 4, "LoopUnrollLimit must fit in 32bits"); |
592 if (trip_count > unroll_limit || body_size > unroll_limit) { | 592 if (trip_count > unroll_limit || body_size > unroll_limit) { |
593 return false; | 593 return false; |
594 } | 594 } |
595 | 595 |
596 // Fully unroll a loop with few iterations regardless next | |
597 // conditions since following loop optimizations will split | |
598 // such loop anyway (pre-main-post). | |
599 if (trip_count <= 3) | |
600 return true; | |
601 | |
596 // Take into account that after unroll conjoined heads and tails will fold, | 602 // Take into account that after unroll conjoined heads and tails will fold, |
597 // otherwise policy_unroll() may allow more unrolling than max unrolling. | 603 // otherwise policy_unroll() may allow more unrolling than max unrolling. |
598 uint new_body_size = EMPTY_LOOP_SIZE + (body_size - EMPTY_LOOP_SIZE) * trip_count; | 604 uint new_body_size = EMPTY_LOOP_SIZE + (body_size - EMPTY_LOOP_SIZE) * trip_count; |
599 uint tst_body_size = (new_body_size - EMPTY_LOOP_SIZE) / trip_count + EMPTY_LOOP_SIZE; | 605 uint tst_body_size = (new_body_size - EMPTY_LOOP_SIZE) / trip_count + EMPTY_LOOP_SIZE; |
600 if (body_size != tst_body_size) // Check for int overflow | 606 if (body_size != tst_body_size) // Check for int overflow |
603 // Unrolling can result in a large amount of node construction | 609 // Unrolling can result in a large amount of node construction |
604 new_body_size >= MaxNodeLimit - phase->C->unique()) { | 610 new_body_size >= MaxNodeLimit - phase->C->unique()) { |
605 return false; | 611 return false; |
606 } | 612 } |
607 | 613 |
608 // Currently we don't have policy to optimize one iteration loops. | |
609 // Maximally unrolling transformation is used for that: | |
610 // it is peeled and the original loop become non reachable (dead). | |
611 // Also fully unroll a loop with few iterations regardless next | |
612 // conditions since following loop optimizations will split | |
613 // such loop anyway (pre-main-post). | |
614 if (trip_count <= 3) | |
615 return true; | |
616 | |
617 // Do not unroll a loop with String intrinsics code. | 614 // Do not unroll a loop with String intrinsics code. |
618 // String intrinsics are large and have loops. | 615 // String intrinsics are large and have loops. |
619 for (uint k = 0; k < _body.size(); k++) { | 616 for (uint k = 0; k < _body.size(); k++) { |
620 Node* n = _body.at(k); | 617 Node* n = _body.at(k); |
621 switch (n->Opcode()) { | 618 switch (n->Opcode()) { |
643 assert(cl->is_normal_loop() || cl->is_main_loop(), ""); | 640 assert(cl->is_normal_loop() || cl->is_main_loop(), ""); |
644 | 641 |
645 if (!cl->is_valid_counted_loop()) | 642 if (!cl->is_valid_counted_loop()) |
646 return false; // Malformed counted loop | 643 return false; // Malformed counted loop |
647 | 644 |
648 // protect against over-unrolling | 645 // Protect against over-unrolling. |
649 if (cl->trip_count() <= 1) return false; | 646 // After split at least one iteration will be executed in pre-loop. |
647 if (cl->trip_count() <= (uint)(cl->is_normal_loop() ? 2 : 1)) return false; | |
650 | 648 |
651 int future_unroll_ct = cl->unrolled_count() * 2; | 649 int future_unroll_ct = cl->unrolled_count() * 2; |
652 if (future_unroll_ct > MAX_UNROLL) return false; | 650 if (future_unroll_ct > MAX_UNROLL) return false; |
653 | 651 |
654 // Check for initial stride being a small enough constant | 652 // Check for initial stride being a small enough constant |
676 return false; | 674 return false; |
677 } | 675 } |
678 | 676 |
679 Node *init_n = cl->init_trip(); | 677 Node *init_n = cl->init_trip(); |
680 Node *limit_n = cl->limit(); | 678 Node *limit_n = cl->limit(); |
679 int stride_con = cl->stride_con(); | |
681 // Non-constant bounds. | 680 // Non-constant bounds. |
682 // Protect against over-unrolling when init or/and limit are not constant | 681 // Protect against over-unrolling when init or/and limit are not constant |
683 // (so that trip_count's init value is maxint) but iv range is known. | 682 // (so that trip_count's init value is maxint) but iv range is known. |
684 if (init_n == NULL || !init_n->is_Con() || | 683 if (init_n == NULL || !init_n->is_Con() || |
685 limit_n == NULL || !limit_n->is_Con()) { | 684 limit_n == NULL || !limit_n->is_Con()) { |
686 Node* phi = cl->phi(); | 685 Node* phi = cl->phi(); |
687 if (phi != NULL) { | 686 if (phi != NULL) { |
688 assert(phi->is_Phi() && phi->in(0) == _head, "Counted loop should have iv phi."); | 687 assert(phi->is_Phi() && phi->in(0) == _head, "Counted loop should have iv phi."); |
689 const TypeInt* iv_type = phase->_igvn.type(phi)->is_int(); | 688 const TypeInt* iv_type = phase->_igvn.type(phi)->is_int(); |
690 int next_stride = cl->stride_con() * 2; // stride after this unroll | 689 int next_stride = stride_con * 2; // stride after this unroll |
691 if (next_stride > 0) { | 690 if (next_stride > 0) { |
692 if (iv_type->_lo + next_stride <= iv_type->_lo || // overflow | 691 if (iv_type->_lo + next_stride <= iv_type->_lo || // overflow |
693 iv_type->_lo + next_stride > iv_type->_hi) { | 692 iv_type->_lo + next_stride > iv_type->_hi) { |
694 return false; // over-unrolling | 693 return false; // over-unrolling |
695 } | 694 } |
700 } | 699 } |
701 } | 700 } |
702 } | 701 } |
703 } | 702 } |
704 | 703 |
704 // After unroll limit will be adjusted: new_limit = limit-stride. | |
705 // Bailout if adjustment overflow. | |
706 const TypeInt* limit_type = phase->_igvn.type(limit_n)->is_int(); | |
707 if (stride_con > 0 && ((limit_type->_hi - stride_con) >= limit_type->_hi) || | |
708 stride_con < 0 && ((limit_type->_lo - stride_con) <= limit_type->_lo)) | |
709 return false; // overflow | |
710 | |
705 // Adjust body_size to determine if we unroll or not | 711 // Adjust body_size to determine if we unroll or not |
706 uint body_size = _body.size(); | 712 uint body_size = _body.size(); |
707 // Key test to unroll CaffeineMark's Logic test | |
708 int xors_in_loop = 0; | |
709 // Also count ModL, DivL and MulL which expand mightly | 713 // Also count ModL, DivL and MulL which expand mightly |
710 for (uint k = 0; k < _body.size(); k++) { | 714 for (uint k = 0; k < _body.size(); k++) { |
711 Node* n = _body.at(k); | 715 Node* n = _body.at(k); |
712 switch (n->Opcode()) { | 716 switch (n->Opcode()) { |
713 case Op_XorI: xors_in_loop++; break; // CaffeineMark's Logic test | |
714 case Op_ModL: body_size += 30; break; | 717 case Op_ModL: body_size += 30; break; |
715 case Op_DivL: body_size += 30; break; | 718 case Op_DivL: body_size += 30; break; |
716 case Op_MulL: body_size += 10; break; | 719 case Op_MulL: body_size += 10; break; |
717 case Op_StrComp: | 720 case Op_StrComp: |
718 case Op_StrEquals: | 721 case Op_StrEquals: |
725 } // switch | 728 } // switch |
726 } | 729 } |
727 | 730 |
728 // Check for being too big | 731 // Check for being too big |
729 if (body_size > (uint)LoopUnrollLimit) { | 732 if (body_size > (uint)LoopUnrollLimit) { |
730 if (xors_in_loop >= 4 && body_size < (uint)LoopUnrollLimit*4) return true; | 733 // Normal case: loop too big |
731 // Normal case: loop too big | |
732 return false; | 734 return false; |
733 } | 735 } |
734 | 736 |
735 // Unroll once! (Each trip will soon do double iterations) | 737 // Unroll once! (Each trip will soon do double iterations) |
736 return true; | 738 return true; |
748 | 750 |
749 //------------------------------policy_range_check----------------------------- | 751 //------------------------------policy_range_check----------------------------- |
750 // Return TRUE or FALSE if the loop should be range-check-eliminated. | 752 // Return TRUE or FALSE if the loop should be range-check-eliminated. |
751 // Actually we do iteration-splitting, a more powerful form of RCE. | 753 // Actually we do iteration-splitting, a more powerful form of RCE. |
752 bool IdealLoopTree::policy_range_check( PhaseIdealLoop *phase ) const { | 754 bool IdealLoopTree::policy_range_check( PhaseIdealLoop *phase ) const { |
753 if( !RangeCheckElimination ) return false; | 755 if (!RangeCheckElimination) return false; |
754 | 756 |
755 CountedLoopNode *cl = _head->as_CountedLoop(); | 757 CountedLoopNode *cl = _head->as_CountedLoop(); |
756 // If we unrolled with no intention of doing RCE and we later | 758 // If we unrolled with no intention of doing RCE and we later |
757 // changed our minds, we got no pre-loop. Either we need to | 759 // changed our minds, we got no pre-loop. Either we need to |
758 // make a new pre-loop, or we gotta disallow RCE. | 760 // make a new pre-loop, or we gotta disallow RCE. |
759 if( cl->is_main_no_pre_loop() ) return false; // Disallowed for now. | 761 if (cl->is_main_no_pre_loop()) return false; // Disallowed for now. |
760 Node *trip_counter = cl->phi(); | 762 Node *trip_counter = cl->phi(); |
761 | 763 |
762 // Check loop body for tests of trip-counter plus loop-invariant vs | 764 // Check loop body for tests of trip-counter plus loop-invariant vs |
763 // loop-invariant. | 765 // loop-invariant. |
764 for( uint i = 0; i < _body.size(); i++ ) { | 766 for (uint i = 0; i < _body.size(); i++) { |
765 Node *iff = _body[i]; | 767 Node *iff = _body[i]; |
766 if( iff->Opcode() == Op_If ) { // Test? | 768 if (iff->Opcode() == Op_If) { // Test? |
767 | 769 |
768 // Comparing trip+off vs limit | 770 // Comparing trip+off vs limit |
769 Node *bol = iff->in(1); | 771 Node *bol = iff->in(1); |
770 if( bol->req() != 2 ) continue; // dead constant test | 772 if (bol->req() != 2) continue; // dead constant test |
771 if (!bol->is_Bool()) { | 773 if (!bol->is_Bool()) { |
772 assert(UseLoopPredicate && bol->Opcode() == Op_Conv2B, "predicate check only"); | 774 assert(UseLoopPredicate && bol->Opcode() == Op_Conv2B, "predicate check only"); |
773 continue; | 775 continue; |
774 } | 776 } |
777 if (bol->as_Bool()->_test._test == BoolTest::ne) | |
778 continue; // not RC | |
779 | |
775 Node *cmp = bol->in(1); | 780 Node *cmp = bol->in(1); |
776 | 781 |
777 Node *rc_exp = cmp->in(1); | 782 Node *rc_exp = cmp->in(1); |
778 Node *limit = cmp->in(2); | 783 Node *limit = cmp->in(2); |
779 | 784 |
1065 // direction: | 1070 // direction: |
1066 // positive stride use < | 1071 // positive stride use < |
1067 // negative stride use > | 1072 // negative stride use > |
1068 | 1073 |
1069 if (pre_end->in(CountedLoopEndNode::TestValue)->as_Bool()->_test._test == BoolTest::ne) { | 1074 if (pre_end->in(CountedLoopEndNode::TestValue)->as_Bool()->_test._test == BoolTest::ne) { |
1075 assert(!LoopLimitCheck, "only canonical tests (lt or gt) are expected"); | |
1070 | 1076 |
1071 BoolTest::mask new_test = (main_end->stride_con() > 0) ? BoolTest::lt : BoolTest::gt; | 1077 BoolTest::mask new_test = (main_end->stride_con() > 0) ? BoolTest::lt : BoolTest::gt; |
1072 // Modify pre loop end condition | 1078 // Modify pre loop end condition |
1073 Node* pre_bol = pre_end->in(CountedLoopEndNode::TestValue)->as_Bool(); | 1079 Node* pre_bol = pre_end->in(CountedLoopEndNode::TestValue)->as_Bool(); |
1074 BoolNode* new_bol0 = new (C, 2) BoolNode(pre_bol->in(1), new_test); | 1080 BoolNode* new_bol0 = new (C, 2) BoolNode(pre_bol->in(1), new_test); |
1091 | 1097 |
1092 // Flag main loop | 1098 // Flag main loop |
1093 main_head->set_main_loop(); | 1099 main_head->set_main_loop(); |
1094 if( peel_only ) main_head->set_main_no_pre_loop(); | 1100 if( peel_only ) main_head->set_main_no_pre_loop(); |
1095 | 1101 |
1102 // Subtract a trip count for the pre-loop. | |
1103 main_head->set_trip_count(main_head->trip_count() - 1); | |
1104 | |
1096 // It's difficult to be precise about the trip-counts | 1105 // It's difficult to be precise about the trip-counts |
1097 // for the pre/post loops. They are usually very short, | 1106 // for the pre/post loops. They are usually very short, |
1098 // so guess that 4 trips is a reasonable value. | 1107 // so guess that 4 trips is a reasonable value. |
1099 post_head->set_profile_trip_cnt(4.0); | 1108 post_head->set_profile_trip_cnt(4.0); |
1100 pre_head->set_profile_trip_cnt(4.0); | 1109 pre_head->set_profile_trip_cnt(4.0); |
1124 if (PrintOpto && VerifyLoopOptimizations) { | 1133 if (PrintOpto && VerifyLoopOptimizations) { |
1125 tty->print("Unrolling "); | 1134 tty->print("Unrolling "); |
1126 loop->dump_head(); | 1135 loop->dump_head(); |
1127 } else if (TraceLoopOpts) { | 1136 } else if (TraceLoopOpts) { |
1128 if (loop_head->trip_count() < (uint)LoopUnrollLimit) { | 1137 if (loop_head->trip_count() < (uint)LoopUnrollLimit) { |
1129 tty->print("Unroll %d(%2d) ", loop_head->unrolled_count()*2, loop_head->trip_count()); | 1138 tty->print("Unroll %d(%2d) ", loop_head->unrolled_count()*2, loop_head->trip_count()); |
1130 } else { | 1139 } else { |
1131 tty->print("Unroll %d ", loop_head->unrolled_count()*2); | 1140 tty->print("Unroll %d ", loop_head->unrolled_count()*2); |
1132 } | 1141 } |
1133 loop->dump_head(); | 1142 loop->dump_head(); |
1134 } | 1143 } |
1135 #endif | 1144 #endif |
1136 | 1145 |
1142 Node *limit = loop_head->limit(); | 1151 Node *limit = loop_head->limit(); |
1143 Node *init = loop_head->init_trip(); | 1152 Node *init = loop_head->init_trip(); |
1144 Node *stride = loop_head->stride(); | 1153 Node *stride = loop_head->stride(); |
1145 | 1154 |
1146 Node *opaq = NULL; | 1155 Node *opaq = NULL; |
1147 if( adjust_min_trip ) { // If not maximally unrolling, need adjustment | 1156 if (adjust_min_trip) { // If not maximally unrolling, need adjustment |
1157 // Search for zero-trip guard. | |
1148 assert( loop_head->is_main_loop(), "" ); | 1158 assert( loop_head->is_main_loop(), "" ); |
1149 assert( ctrl->Opcode() == Op_IfTrue || ctrl->Opcode() == Op_IfFalse, "" ); | 1159 assert( ctrl->Opcode() == Op_IfTrue || ctrl->Opcode() == Op_IfFalse, "" ); |
1150 Node *iff = ctrl->in(0); | 1160 Node *iff = ctrl->in(0); |
1151 assert( iff->Opcode() == Op_If, "" ); | 1161 assert( iff->Opcode() == Op_If, "" ); |
1152 Node *bol = iff->in(1); | 1162 Node *bol = iff->in(1); |
1153 assert( bol->Opcode() == Op_Bool, "" ); | 1163 assert( bol->Opcode() == Op_Bool, "" ); |
1154 Node *cmp = bol->in(1); | 1164 Node *cmp = bol->in(1); |
1155 assert( cmp->Opcode() == Op_CmpI, "" ); | 1165 assert( cmp->Opcode() == Op_CmpI, "" ); |
1156 opaq = cmp->in(2); | 1166 opaq = cmp->in(2); |
1157 // Occasionally it's possible for a pre-loop Opaque1 node to be | 1167 // Occasionally it's possible for a zero-trip guard Opaque1 node to be |
1158 // optimized away and then another round of loop opts attempted. | 1168 // optimized away and then another round of loop opts attempted. |
1159 // We can not optimize this particular loop in that case. | 1169 // We can not optimize this particular loop in that case. |
1160 if( opaq->Opcode() != Op_Opaque1 ) | 1170 if (opaq->Opcode() != Op_Opaque1) |
1161 return; // Cannot find pre-loop! Bail out! | 1171 return; // Cannot find zero-trip guard! Bail out! |
1172 // Zero-trip test uses an 'opaque' node which is not shared. | |
1173 assert(opaq->outcnt() == 1 && opaq->in(1) == limit, ""); | |
1162 } | 1174 } |
1163 | 1175 |
1164 C->set_major_progress(); | 1176 C->set_major_progress(); |
1165 | 1177 |
1166 // Adjust max trip count. The trip count is intentionally rounded | 1178 Node* new_limit = NULL; |
1167 // down here (e.g. 15-> 7-> 3-> 1) because if we unwittingly over-unroll, | 1179 if (UnrollLimitCheck) { |
1168 // the main, unrolled, part of the loop will never execute as it is protected | 1180 int stride_con = stride->get_int(); |
1169 // by the min-trip test. See bug 4834191 for a case where we over-unrolled | 1181 int stride_p = (stride_con > 0) ? stride_con : -stride_con; |
1170 // and later determined that part of the unrolled loop was dead. | 1182 uint old_trip_count = loop_head->trip_count(); |
1171 loop_head->set_trip_count(loop_head->trip_count() / 2); | 1183 // Verify that unroll policy result is still valid. |
1172 | 1184 assert(old_trip_count > 1 && |
1173 // Double the count of original iterations in the unrolled loop body. | 1185 (!adjust_min_trip || stride_p <= (1<<3)*loop_head->unrolled_count()), "sanity"); |
1174 loop_head->double_unrolled_count(); | 1186 |
1175 | 1187 // Adjust loop limit to keep valid iterations number after unroll. |
1176 // ----------- | 1188 // Use (limit - stride) instead of (((limit - init)/stride) & (-2))*stride |
1177 // Step 2: Cut back the trip counter for an unroll amount of 2. | 1189 // which may overflow. |
1178 // Loop will normally trip (limit - init)/stride_con. Since it's a | 1190 if (!adjust_min_trip) { |
1179 // CountedLoop this is exact (stride divides limit-init exactly). | 1191 assert(old_trip_count > 1 && (old_trip_count & 1) == 0, |
1180 // We are going to double the loop body, so we want to knock off any | 1192 "odd trip count for maximally unroll"); |
1181 // odd iteration: (trip_cnt & ~1). Then back compute a new limit. | 1193 // Don't need to adjust limit for maximally unroll since trip count is even. |
1182 Node *span = new (C, 3) SubINode( limit, init ); | 1194 } else if (loop_head->has_exact_trip_count() && init->is_Con()) { |
1183 register_new_node( span, ctrl ); | 1195 // Loop's limit is constant. Loop's init could be constant when pre-loop |
1184 Node *trip = new (C, 3) DivINode( 0, span, stride ); | 1196 // become peeled iteration. |
1185 register_new_node( trip, ctrl ); | 1197 long init_con = init->get_int(); |
1186 Node *mtwo = _igvn.intcon(-2); | 1198 // We can keep old loop limit if iterations count stays the same: |
1187 set_ctrl(mtwo, C->root()); | 1199 // old_trip_count == new_trip_count * 2 |
1188 Node *rond = new (C, 3) AndINode( trip, mtwo ); | 1200 // Note: since old_trip_count >= 2 then new_trip_count >= 1 |
1189 register_new_node( rond, ctrl ); | 1201 // so we also don't need to adjust zero trip test. |
1190 Node *spn2 = new (C, 3) MulINode( rond, stride ); | 1202 long limit_con = limit->get_int(); |
1191 register_new_node( spn2, ctrl ); | 1203 // (stride_con*2) not overflow since stride_con <= 8. |
1192 Node *lim2 = new (C, 3) AddINode( spn2, init ); | 1204 int new_stride_con = stride_con * 2; |
1193 register_new_node( lim2, ctrl ); | 1205 int stride_m = new_stride_con - (stride_con > 0 ? 1 : -1); |
1194 | 1206 long trip_count = (limit_con - init_con + stride_m)/new_stride_con; |
1195 // Hammer in the new limit | 1207 // New trip count should satisfy next conditions. |
1196 Node *ctrl2 = loop_end->in(0); | 1208 assert(trip_count > 0 && (julong)trip_count < (julong)max_juint/2, "sanity"); |
1197 Node *cmp2 = new (C, 3) CmpINode( loop_head->incr(), lim2 ); | 1209 uint new_trip_count = (uint)trip_count; |
1198 register_new_node( cmp2, ctrl2 ); | 1210 adjust_min_trip = (old_trip_count != new_trip_count*2); |
1199 Node *bol2 = new (C, 2) BoolNode( cmp2, loop_end->test_trip() ); | 1211 } |
1200 register_new_node( bol2, ctrl2 ); | 1212 |
1201 _igvn.hash_delete(loop_end); | 1213 if (adjust_min_trip) { |
1202 loop_end->set_req(CountedLoopEndNode::TestValue, bol2); | 1214 // Step 2: Adjust the trip limit if it is called for. |
1203 | 1215 // The adjustment amount is -stride. Need to make sure if the |
1204 // Step 3: Find the min-trip test guaranteed before a 'main' loop. | 1216 // adjustment underflows or overflows, then the main loop is skipped. |
1205 // Make it a 1-trip test (means at least 2 trips). | 1217 Node* cmp = loop_end->cmp_node(); |
1206 if( adjust_min_trip ) { | 1218 assert(cmp->in(2) == limit, "sanity"); |
1207 // Guard test uses an 'opaque' node which is not shared. Hence I | 1219 assert(opaq != NULL && opaq->in(1) == limit, "sanity"); |
1208 // can edit it's inputs directly. Hammer in the new limit for the | 1220 |
1209 // minimum-trip guard. | 1221 // Verify that policy_unroll result is still valid. |
1210 assert( opaq->outcnt() == 1, "" ); | 1222 const TypeInt* limit_type = _igvn.type(limit)->is_int(); |
1211 _igvn.hash_delete(opaq); | 1223 assert(stride_con > 0 && ((limit_type->_hi - stride_con) < limit_type->_hi) || |
1212 opaq->set_req(1, lim2); | 1224 stride_con < 0 && ((limit_type->_lo - stride_con) > limit_type->_lo), "sanity"); |
1213 } | 1225 |
1226 if (limit->is_Con()) { | |
1227 // The check in policy_unroll and the assert above guarantee | |
1228 // no underflow if limit is constant. | |
1229 new_limit = _igvn.intcon(limit->get_int() - stride_con); | |
1230 set_ctrl(new_limit, C->root()); | |
1231 } else { | |
1232 if (stride_con > 0 && ((limit_type->_lo - stride_con) < limit_type->_lo) || | |
1233 stride_con < 0 && ((limit_type->_hi - stride_con) > limit_type->_hi)) { | |
1234 // No underflow. | |
1235 new_limit = new (C, 3) SubINode(limit, stride); | |
1236 } else { | |
1237 // (limit - stride) may underflow. | |
1238 // Clamp the adjustment value with MININT or MAXINT: | |
1239 // | |
1240 // new_limit = limit-stride | |
1241 // if (stride > 0) | |
1242 // new_limit = (limit < new_limit) ? MININT : new_limit; | |
1243 // else | |
1244 // new_limit = (limit > new_limit) ? MAXINT : new_limit; | |
1245 // | |
1246 BoolTest::mask bt = loop_end->test_trip(); | |
1247 assert(bt == BoolTest::lt || bt == BoolTest::gt, "canonical test is expected"); | |
1248 Node* adj_max = _igvn.intcon((stride_con > 0) ? min_jint : max_jint); | |
1249 set_ctrl(adj_max, C->root()); | |
1250 Node* old_limit = NULL; | |
1251 Node* adj_limit = NULL; | |
1252 Node* bol = limit->is_CMove() ? limit->in(CMoveNode::Condition) : NULL; | |
1253 if (loop_head->unrolled_count() > 1 && | |
1254 limit->is_CMove() && limit->Opcode() == Op_CMoveI && | |
1255 limit->in(CMoveNode::IfTrue) == adj_max && | |
1256 bol->as_Bool()->_test._test == bt && | |
1257 bol->in(1)->Opcode() == Op_CmpI && | |
1258 bol->in(1)->in(2) == limit->in(CMoveNode::IfFalse)) { | |
1259 // Loop was unrolled before. | |
1260 // Optimize the limit to avoid nested CMove: | |
1261 // use original limit as old limit. | |
1262 old_limit = bol->in(1)->in(1); | |
1263 // Adjust previous adjusted limit. | |
1264 adj_limit = limit->in(CMoveNode::IfFalse); | |
1265 adj_limit = new (C, 3) SubINode(adj_limit, stride); | |
1266 } else { | |
1267 old_limit = limit; | |
1268 adj_limit = new (C, 3) SubINode(limit, stride); | |
1269 } | |
1270 assert(old_limit != NULL && adj_limit != NULL, ""); | |
1271 register_new_node( adj_limit, ctrl ); // adjust amount | |
1272 Node* adj_cmp = new (C, 3) CmpINode(old_limit, adj_limit); | |
1273 register_new_node( adj_cmp, ctrl ); | |
1274 Node* adj_bool = new (C, 2) BoolNode(adj_cmp, bt); | |
1275 register_new_node( adj_bool, ctrl ); | |
1276 new_limit = new (C, 4) CMoveINode(adj_bool, adj_limit, adj_max, TypeInt::INT); | |
1277 } | |
1278 register_new_node(new_limit, ctrl); | |
1279 } | |
1280 assert(new_limit != NULL, ""); | |
1281 if (limit->outcnt() == 2) { | |
1282 // Replace old limit if it is used only in loop tests. | |
1283 _igvn.replace_node(limit, new_limit); | |
1284 } else { | |
1285 // Replace in loop test. | |
1286 _igvn.hash_delete(cmp); | |
1287 cmp->set_req(2, new_limit); | |
1288 | |
1289 // Step 3: Find the min-trip test guaranteed before a 'main' loop. | |
1290 // Make it a 1-trip test (means at least 2 trips). | |
1291 | |
1292 // Guard test uses an 'opaque' node which is not shared. Hence I | |
1293 // can edit it's inputs directly. Hammer in the new limit for the | |
1294 // minimum-trip guard. | |
1295 assert(opaq->outcnt() == 1, ""); | |
1296 _igvn.hash_delete(opaq); | |
1297 opaq->set_req(1, new_limit); | |
1298 } | |
1299 } | |
1300 | |
1301 // Adjust max trip count. The trip count is intentionally rounded | |
1302 // down here (e.g. 15-> 7-> 3-> 1) because if we unwittingly over-unroll, | |
1303 // the main, unrolled, part of the loop will never execute as it is protected | |
1304 // by the min-trip test. See bug 4834191 for a case where we over-unrolled | |
1305 // and later determined that part of the unrolled loop was dead. | |
1306 loop_head->set_trip_count(old_trip_count / 2); | |
1307 | |
1308 // Double the count of original iterations in the unrolled loop body. | |
1309 loop_head->double_unrolled_count(); | |
1310 | |
1311 } else { // LoopLimitCheck | |
1312 | |
1313 // Adjust max trip count. The trip count is intentionally rounded | |
1314 // down here (e.g. 15-> 7-> 3-> 1) because if we unwittingly over-unroll, | |
1315 // the main, unrolled, part of the loop will never execute as it is protected | |
1316 // by the min-trip test. See bug 4834191 for a case where we over-unrolled | |
1317 // and later determined that part of the unrolled loop was dead. | |
1318 loop_head->set_trip_count(loop_head->trip_count() / 2); | |
1319 | |
1320 // Double the count of original iterations in the unrolled loop body. | |
1321 loop_head->double_unrolled_count(); | |
1322 | |
1323 // ----------- | |
1324 // Step 2: Cut back the trip counter for an unroll amount of 2. | |
1325 // Loop will normally trip (limit - init)/stride_con. Since it's a | |
1326 // CountedLoop this is exact (stride divides limit-init exactly). | |
1327 // We are going to double the loop body, so we want to knock off any | |
1328 // odd iteration: (trip_cnt & ~1). Then back compute a new limit. | |
1329 Node *span = new (C, 3) SubINode( limit, init ); | |
1330 register_new_node( span, ctrl ); | |
1331 Node *trip = new (C, 3) DivINode( 0, span, stride ); | |
1332 register_new_node( trip, ctrl ); | |
1333 Node *mtwo = _igvn.intcon(-2); | |
1334 set_ctrl(mtwo, C->root()); | |
1335 Node *rond = new (C, 3) AndINode( trip, mtwo ); | |
1336 register_new_node( rond, ctrl ); | |
1337 Node *spn2 = new (C, 3) MulINode( rond, stride ); | |
1338 register_new_node( spn2, ctrl ); | |
1339 new_limit = new (C, 3) AddINode( spn2, init ); | |
1340 register_new_node( new_limit, ctrl ); | |
1341 | |
1342 // Hammer in the new limit | |
1343 Node *ctrl2 = loop_end->in(0); | |
1344 Node *cmp2 = new (C, 3) CmpINode( loop_head->incr(), new_limit ); | |
1345 register_new_node( cmp2, ctrl2 ); | |
1346 Node *bol2 = new (C, 2) BoolNode( cmp2, loop_end->test_trip() ); | |
1347 register_new_node( bol2, ctrl2 ); | |
1348 _igvn.hash_delete(loop_end); | |
1349 loop_end->set_req(CountedLoopEndNode::TestValue, bol2); | |
1350 | |
1351 // Step 3: Find the min-trip test guaranteed before a 'main' loop. | |
1352 // Make it a 1-trip test (means at least 2 trips). | |
1353 if( adjust_min_trip ) { | |
1354 assert( new_limit != NULL, "" ); | |
1355 // Guard test uses an 'opaque' node which is not shared. Hence I | |
1356 // can edit it's inputs directly. Hammer in the new limit for the | |
1357 // minimum-trip guard. | |
1358 assert( opaq->outcnt() == 1, "" ); | |
1359 _igvn.hash_delete(opaq); | |
1360 opaq->set_req(1, new_limit); | |
1361 } | |
1362 } // LoopLimitCheck | |
1214 | 1363 |
1215 // --------- | 1364 // --------- |
1216 // Step 4: Clone the loop body. Move it inside the loop. This loop body | 1365 // Step 4: Clone the loop body. Move it inside the loop. This loop body |
1217 // represents the odd iterations; since the loop trips an even number of | 1366 // represents the odd iterations; since the loop trips an even number of |
1218 // times its backedge is never taken. Kill the backedge. | 1367 // times its backedge is never taken. Kill the backedge. |
1264 | 1413 |
1265 //------------------------------do_maximally_unroll---------------------------- | 1414 //------------------------------do_maximally_unroll---------------------------- |
1266 | 1415 |
1267 void PhaseIdealLoop::do_maximally_unroll( IdealLoopTree *loop, Node_List &old_new ) { | 1416 void PhaseIdealLoop::do_maximally_unroll( IdealLoopTree *loop, Node_List &old_new ) { |
1268 CountedLoopNode *cl = loop->_head->as_CountedLoop(); | 1417 CountedLoopNode *cl = loop->_head->as_CountedLoop(); |
1418 assert(cl->has_exact_trip_count(), "trip count is not exact"); | |
1269 assert(cl->trip_count() > 0, ""); | 1419 assert(cl->trip_count() > 0, ""); |
1270 #ifndef PRODUCT | 1420 #ifndef PRODUCT |
1271 if (TraceLoopOpts) { | 1421 if (TraceLoopOpts) { |
1272 tty->print("MaxUnroll %d ", cl->trip_count()); | 1422 tty->print("MaxUnroll %d ", cl->trip_count()); |
1273 loop->dump_head(); | 1423 loop->dump_head(); |
1280 } | 1430 } |
1281 | 1431 |
1282 // Now its tripping an even number of times remaining. Double loop body. | 1432 // Now its tripping an even number of times remaining. Double loop body. |
1283 // Do not adjust pre-guards; they are not needed and do not exist. | 1433 // Do not adjust pre-guards; they are not needed and do not exist. |
1284 if (cl->trip_count() > 0) { | 1434 if (cl->trip_count() > 0) { |
1435 assert((cl->trip_count() & 1) == 0, "missed peeling"); | |
1285 do_unroll(loop, old_new, false); | 1436 do_unroll(loop, old_new, false); |
1286 } | 1437 } |
1287 } | 1438 } |
1288 | 1439 |
1289 //------------------------------dominates_backedge--------------------------------- | 1440 //------------------------------dominates_backedge--------------------------------- |
1293 Node* backedge = _head->as_Loop()->in(LoopNode::LoopBackControl); | 1444 Node* backedge = _head->as_Loop()->in(LoopNode::LoopBackControl); |
1294 return _phase->dom_lca_internal(ctrl, backedge) == ctrl; | 1445 return _phase->dom_lca_internal(ctrl, backedge) == ctrl; |
1295 } | 1446 } |
1296 | 1447 |
1297 //------------------------------add_constraint--------------------------------- | 1448 //------------------------------add_constraint--------------------------------- |
1298 // Constrain the main loop iterations so the condition: | 1449 // Constrain the main loop iterations so the conditions: |
1299 // scale_con * I + offset < limit | 1450 // low_limit <= scale_con * I + offset < upper_limit |
1300 // always holds true. That is, either increase the number of iterations in | 1451 // always holds true. That is, either increase the number of iterations in |
1301 // the pre-loop or the post-loop until the condition holds true in the main | 1452 // the pre-loop or the post-loop until the condition holds true in the main |
1302 // loop. Stride, scale, offset and limit are all loop invariant. Further, | 1453 // loop. Stride, scale, offset and limit are all loop invariant. Further, |
1303 // stride and scale are constants (offset and limit often are). | 1454 // stride and scale are constants (offset and limit often are). |
1304 void PhaseIdealLoop::add_constraint( int stride_con, int scale_con, Node *offset, Node *limit, Node *pre_ctrl, Node **pre_limit, Node **main_limit ) { | 1455 void PhaseIdealLoop::add_constraint( int stride_con, int scale_con, Node *offset, Node *low_limit, Node *upper_limit, Node *pre_ctrl, Node **pre_limit, Node **main_limit ) { |
1305 | |
1306 // Compute "I :: (limit-offset)/scale_con" | |
1307 Node *con = new (C, 3) SubINode( limit, offset ); | |
1308 register_new_node( con, pre_ctrl ); | |
1309 Node *scale = _igvn.intcon(scale_con); | |
1310 set_ctrl(scale, C->root()); | |
1311 Node *X = new (C, 3) DivINode( 0, con, scale ); | |
1312 register_new_node( X, pre_ctrl ); | |
1313 | |
1314 // For positive stride, the pre-loop limit always uses a MAX function | 1456 // For positive stride, the pre-loop limit always uses a MAX function |
1315 // and the main loop a MIN function. For negative stride these are | 1457 // and the main loop a MIN function. For negative stride these are |
1316 // reversed. | 1458 // reversed. |
1317 | 1459 |
1318 // Also for positive stride*scale the affine function is increasing, so the | 1460 // Also for positive stride*scale the affine function is increasing, so the |
1319 // pre-loop must check for underflow and the post-loop for overflow. | 1461 // pre-loop must check for underflow and the post-loop for overflow. |
1320 // Negative stride*scale reverses this; pre-loop checks for overflow and | 1462 // Negative stride*scale reverses this; pre-loop checks for overflow and |
1321 // post-loop for underflow. | 1463 // post-loop for underflow. |
1322 if( stride_con*scale_con > 0 ) { | 1464 if (stride_con*scale_con > 0) { |
1323 // Compute I < (limit-offset)/scale_con | 1465 // The overflow limit: scale*I+offset < upper_limit |
1324 // Adjust main-loop last iteration to be MIN/MAX(main_loop,X) | 1466 // For main-loop compute |
1325 *main_limit = (stride_con > 0) | 1467 // ( if (scale > 0) /* and stride > 0 */ |
1326 ? (Node*)(new (C, 3) MinINode( *main_limit, X )) | 1468 // I < (upper_limit-offset)/scale |
1327 : (Node*)(new (C, 3) MaxINode( *main_limit, X )); | 1469 // else /* scale < 0 and stride < 0 */ |
1328 register_new_node( *main_limit, pre_ctrl ); | 1470 // I > (upper_limit-offset)/scale |
1329 | 1471 // ) |
1330 } else { | 1472 // |
1331 // Compute (limit-offset)/scale_con + SGN(-scale_con) <= I | 1473 // (upper_limit-offset) may overflow when offset < 0. |
1332 // Add the negation of the main-loop constraint to the pre-loop. | 1474 // But it is fine since main loop will either have |
1333 // See footnote [++] below for a derivation of the limit expression. | 1475 // less iterations or will be skipped in such case. |
1334 Node *incr = _igvn.intcon(scale_con > 0 ? -1 : 1); | 1476 Node *con = new (C, 3) SubINode(upper_limit, offset); |
1335 set_ctrl(incr, C->root()); | 1477 register_new_node(con, pre_ctrl); |
1336 Node *adj = new (C, 3) AddINode( X, incr ); | 1478 Node *scale = _igvn.intcon(scale_con); |
1337 register_new_node( adj, pre_ctrl ); | 1479 set_ctrl(scale, C->root()); |
1338 *pre_limit = (scale_con > 0) | 1480 Node *X = new (C, 3) DivINode(0, con, scale); |
1339 ? (Node*)new (C, 3) MinINode( *pre_limit, adj ) | 1481 register_new_node(X, pre_ctrl); |
1340 : (Node*)new (C, 3) MaxINode( *pre_limit, adj ); | 1482 |
1341 register_new_node( *pre_limit, pre_ctrl ); | 1483 // Adjust main-loop last iteration |
1342 | 1484 Node *loop_limit = *main_limit; |
1343 // [++] Here's the algebra that justifies the pre-loop limit expression: | 1485 loop_limit = (stride_con > 0) // scale > 0 |
1344 // | 1486 ? (Node*)(new (C, 3) MinINode(loop_limit, X)) |
1345 // NOT( scale_con * I + offset < limit ) | 1487 : (Node*)(new (C, 3) MaxINode(loop_limit, X)); |
1346 // == | 1488 register_new_node(loop_limit, pre_ctrl); |
1347 // scale_con * I + offset >= limit | 1489 *main_limit = loop_limit; |
1348 // == | 1490 |
1349 // SGN(scale_con) * I >= (limit-offset)/|scale_con| | 1491 // The underflow limit: low_limit <= scale*I+offset. |
1350 // == | 1492 // For pre-loop compute |
1351 // (limit-offset)/|scale_con| <= I * SGN(scale_con) | 1493 // NOT(scale*I+offset >= low_limit) |
1352 // == | 1494 // scale*I+offset < low_limit |
1353 // (limit-offset)/|scale_con|-1 < I * SGN(scale_con) | 1495 // ( if (scale > 0) /* and stride > 0 */ |
1354 // == | 1496 // I < (low_limit-offset)/scale |
1355 // ( if (scale_con > 0) /*common case*/ | 1497 // else /* scale < 0 and stride < 0 */ |
1356 // (limit-offset)/scale_con - 1 < I | 1498 // I > (low_limit-offset)/scale |
1357 // else | 1499 // ) |
1358 // (limit-offset)/scale_con + 1 > I | 1500 |
1359 // ) | 1501 if (low_limit->get_int() == -max_jint) { |
1360 // ( if (scale_con > 0) /*common case*/ | 1502 if (!RangeLimitCheck) return; |
1361 // (limit-offset)/scale_con + SGN(-scale_con) < I | 1503 // We need this guard when scale*pre_limit+offset >= limit |
1362 // else | 1504 // due to underflow so we need execute pre-loop until |
1363 // (limit-offset)/scale_con + SGN(-scale_con) > I | 1505 // scale*I+offset >= min_int. But (low_limit-offset) will |
1506 // underflow when offset > 0 and X will be > original_limit. | |
1507 // To avoid it we replace offset = offset > 0 ? 0 : offset | |
1508 // and add min(pre_limit, original_limit). | |
1509 Node* shift = _igvn.intcon(31); | |
1510 set_ctrl(shift, C->root()); | |
1511 Node *neg_off = new (C, 3) RShiftINode(offset, shift); | |
1512 register_new_node(neg_off, pre_ctrl); | |
1513 offset = new (C, 3) AndINode(offset, neg_off); | |
1514 register_new_node(offset, pre_ctrl); | |
1515 } else { | |
1516 assert(low_limit->get_int() == 0, "wrong low limit for range check"); | |
1517 // The only problem we have here when offset == min_int | |
1518 // since (0-min_int) == min_int. It may be fine for scale > 0 | |
1519 // but for scale < 0 X will be < original_limit. | |
1520 } | |
1521 con = new (C, 3) SubINode(low_limit, offset); | |
1522 register_new_node(con, pre_ctrl); | |
1523 scale = _igvn.intcon(scale_con); | |
1524 set_ctrl(scale, C->root()); | |
1525 X = new (C, 3) DivINode(0, con, scale); | |
1526 register_new_node(X, pre_ctrl); | |
1527 | |
1528 // Adjust pre-loop last iteration | |
1529 loop_limit = *pre_limit; | |
1530 loop_limit = (stride_con > 0) // scale > 0 | |
1531 ? (Node*)(new (C, 3) MaxINode(loop_limit, X)) | |
1532 : (Node*)(new (C, 3) MinINode(loop_limit, X)); | |
1533 register_new_node( loop_limit, pre_ctrl ); | |
1534 *pre_limit = loop_limit; | |
1535 | |
1536 } else { // stride_con*scale_con < 0 | |
1537 // For negative stride*scale pre-loop checks for overflow and | |
1538 // post-loop for underflow. | |
1539 // | |
1540 // The underflow limit: low_limit <= scale*I+offset. | |
1541 // For main-loop compute | |
1542 // scale*I+offset+1 > low_limit | |
1543 // ( if (scale < 0) /* and stride > 0 */ | |
1544 // I < (low_limit-(offset+1))/scale | |
1545 // else /* scale < 0 and stride < 0 */ | |
1546 // I > (low_limit-(offset+1))/scale | |
1547 // ) | |
1548 | |
1549 if (low_limit->get_int() == -max_jint) { | |
1550 if (!RangeLimitCheck) return; | |
1551 } else { | |
1552 assert(low_limit->get_int() == 0, "wrong low limit for range check"); | |
1553 } | |
1554 | |
1555 Node *one = _igvn.intcon(1); | |
1556 set_ctrl(one, C->root()); | |
1557 Node *plus_one = new (C, 3) AddINode(offset, one); | |
1558 register_new_node( plus_one, pre_ctrl ); | |
1559 Node *con = new (C, 3) SubINode(low_limit, plus_one); | |
1560 register_new_node(con, pre_ctrl); | |
1561 Node *scale = _igvn.intcon(scale_con); | |
1562 set_ctrl(scale, C->root()); | |
1563 Node *X = new (C, 3) DivINode(0, con, scale); | |
1564 register_new_node(X, pre_ctrl); | |
1565 | |
1566 // Adjust main-loop last iteration | |
1567 Node *loop_limit = *main_limit; | |
1568 loop_limit = (stride_con > 0) // scale < 0 | |
1569 ? (Node*)(new (C, 3) MinINode(loop_limit, X)) | |
1570 : (Node*)(new (C, 3) MaxINode(loop_limit, X)); | |
1571 register_new_node(loop_limit, pre_ctrl); | |
1572 *main_limit = loop_limit; | |
1573 | |
1574 // The overflow limit: scale*I+offset < upper_limit | |
1575 // For pre-loop compute | |
1576 // NOT(scale*I+offset < upper_limit) | |
1577 // scale*I+offset >= upper_limit | |
1578 // scale*I+offset+1 > upper_limit | |
1579 // ( if (scale < 0) /* and stride > 0 */ | |
1580 // I < (upper_limit-(offset+1))/scale | |
1581 // else /* scale < 0 and stride < 0 */ | |
1582 // I > (upper_limit-(offset+1))/scale | |
1583 // ) | |
1584 plus_one = new (C, 3) AddINode(offset, one); | |
1585 register_new_node( plus_one, pre_ctrl ); | |
1586 con = new (C, 3) SubINode(upper_limit, plus_one); | |
1587 register_new_node(con, pre_ctrl); | |
1588 scale = _igvn.intcon(scale_con); | |
1589 set_ctrl(scale, C->root()); | |
1590 X = new (C, 3) DivINode(0, con, scale); | |
1591 register_new_node(X, pre_ctrl); | |
1592 | |
1593 // Adjust pre-loop last iteration | |
1594 loop_limit = *pre_limit; | |
1595 loop_limit = (stride_con > 0) // scale < 0 | |
1596 ? (Node*)(new (C, 3) MaxINode(loop_limit, X)) | |
1597 : (Node*)(new (C, 3) MinINode(loop_limit, X)); | |
1598 register_new_node( loop_limit, pre_ctrl ); | |
1599 *pre_limit = loop_limit; | |
1600 | |
1364 } | 1601 } |
1365 } | 1602 } |
1366 | 1603 |
1367 | 1604 |
1368 //------------------------------is_scaled_iv--------------------------------- | 1605 //------------------------------is_scaled_iv--------------------------------- |
1489 Node *bolzm = iffm->in(1); | 1726 Node *bolzm = iffm->in(1); |
1490 assert(bolzm->Opcode() == Op_Bool, ""); | 1727 assert(bolzm->Opcode() == Op_Bool, ""); |
1491 Node *cmpzm = bolzm->in(1); | 1728 Node *cmpzm = bolzm->in(1); |
1492 assert(cmpzm->is_Cmp(), ""); | 1729 assert(cmpzm->is_Cmp(), ""); |
1493 Node *opqzm = cmpzm->in(2); | 1730 Node *opqzm = cmpzm->in(2); |
1494 // Can not optimize a loop if pre-loop Opaque1 node is optimized | 1731 // Can not optimize a loop if zero-trip Opaque1 node is optimized |
1495 // away and then another round of loop opts attempted. | 1732 // away and then another round of loop opts attempted. |
1496 if (opqzm->Opcode() != Op_Opaque1) | 1733 if (opqzm->Opcode() != Op_Opaque1) |
1497 return; | 1734 return; |
1498 assert(opqzm->in(1) == main_limit, "do not understand situation"); | 1735 assert(opqzm->in(1) == main_limit, "do not understand situation"); |
1499 | 1736 |
1524 // Must know if its a count-up or count-down loop | 1761 // Must know if its a count-up or count-down loop |
1525 | 1762 |
1526 int stride_con = cl->stride_con(); | 1763 int stride_con = cl->stride_con(); |
1527 Node *zero = _igvn.intcon(0); | 1764 Node *zero = _igvn.intcon(0); |
1528 Node *one = _igvn.intcon(1); | 1765 Node *one = _igvn.intcon(1); |
1766 // Use symmetrical int range [-max_jint,max_jint] | |
1767 Node *mini = _igvn.intcon(-max_jint); | |
1529 set_ctrl(zero, C->root()); | 1768 set_ctrl(zero, C->root()); |
1530 set_ctrl(one, C->root()); | 1769 set_ctrl(one, C->root()); |
1770 set_ctrl(mini, C->root()); | |
1531 | 1771 |
1532 // Range checks that do not dominate the loop backedge (ie. | 1772 // Range checks that do not dominate the loop backedge (ie. |
1533 // conditionally executed) can lengthen the pre loop limit beyond | 1773 // conditionally executed) can lengthen the pre loop limit beyond |
1534 // the original loop limit. To prevent this, the pre limit is | 1774 // the original loop limit. To prevent this, the pre limit is |
1535 // (for stride > 0) MINed with the original loop limit (MAXed | 1775 // (for stride > 0) MINed with the original loop limit (MAXed |
1600 // As above for the 'limit', the 'offset' maybe pinned below the | 1840 // As above for the 'limit', the 'offset' maybe pinned below the |
1601 // zero trip test. | 1841 // zero trip test. |
1602 if( offset_c == ctrl ) { | 1842 if( offset_c == ctrl ) { |
1603 continue; // Don't rce this check but continue looking for other candidates. | 1843 continue; // Don't rce this check but continue looking for other candidates. |
1604 } | 1844 } |
1605 | 1845 #ifdef ASSERT |
1846 if (TraceRangeLimitCheck) { | |
1847 tty->print_cr("RC bool node%s", flip ? " flipped:" : ":"); | |
1848 bol->dump(2); | |
1849 } | |
1850 #endif | |
1606 // At this point we have the expression as: | 1851 // At this point we have the expression as: |
1607 // scale_con * trip_counter + offset :: limit | 1852 // scale_con * trip_counter + offset :: limit |
1608 // where scale_con, offset and limit are loop invariant. Trip_counter | 1853 // where scale_con, offset and limit are loop invariant. Trip_counter |
1609 // monotonically increases by stride_con, a constant. Both (or either) | 1854 // monotonically increases by stride_con, a constant. Both (or either) |
1610 // stride_con and scale_con can be negative which will flip about the | 1855 // stride_con and scale_con can be negative which will flip about the |
1611 // sense of the test. | 1856 // sense of the test. |
1612 | 1857 |
1613 // Adjust pre and main loop limits to guard the correct iteration set | 1858 // Adjust pre and main loop limits to guard the correct iteration set |
1614 if( cmp->Opcode() == Op_CmpU ) {// Unsigned compare is really 2 tests | 1859 if( cmp->Opcode() == Op_CmpU ) {// Unsigned compare is really 2 tests |
1615 if( b_test._test == BoolTest::lt ) { // Range checks always use lt | 1860 if( b_test._test == BoolTest::lt ) { // Range checks always use lt |
1616 // The overflow limit: scale*I+offset < limit | 1861 // The underflow and overflow limits: 0 <= scale*I+offset < limit |
1617 add_constraint( stride_con, scale_con, offset, limit, pre_ctrl, &pre_limit, &main_limit ); | 1862 add_constraint( stride_con, scale_con, offset, zero, limit, pre_ctrl, &pre_limit, &main_limit ); |
1618 // The underflow limit: 0 <= scale*I+offset. | |
1619 // Some math yields: -scale*I-(offset+1) < 0 | |
1620 Node *plus_one = new (C, 3) AddINode( offset, one ); | |
1621 register_new_node( plus_one, pre_ctrl ); | |
1622 Node *neg_offset = new (C, 3) SubINode( zero, plus_one ); | |
1623 register_new_node( neg_offset, pre_ctrl ); | |
1624 add_constraint( stride_con, -scale_con, neg_offset, zero, pre_ctrl, &pre_limit, &main_limit ); | |
1625 if (!conditional_rc) { | 1863 if (!conditional_rc) { |
1626 conditional_rc = !loop->dominates_backedge(iff); | 1864 conditional_rc = !loop->dominates_backedge(iff); |
1865 // It is also needed if offset->_lo == min_int since | |
1866 // (0-min_int) == min_int. It may be fine for stride > 0 | |
1867 // but for stride < 0 pre_limit will be < original_limit. | |
1868 const TypeInt* offset_t = _igvn.type(offset)->is_int(); | |
1869 conditional_rc |= RangeLimitCheck && (offset_t->_lo == min_jint) && | |
1870 (scale_con<0) && (stride_con<0); | |
1627 } | 1871 } |
1628 } else { | 1872 } else { |
1629 #ifndef PRODUCT | 1873 #ifndef PRODUCT |
1630 if( PrintOpto ) | 1874 if( PrintOpto ) |
1631 tty->print_cr("missed RCE opportunity"); | 1875 tty->print_cr("missed RCE opportunity"); |
1632 #endif | 1876 #endif |
1633 continue; // In release mode, ignore it | 1877 continue; // In release mode, ignore it |
1634 } | 1878 } |
1635 } else { // Otherwise work on normal compares | 1879 } else { // Otherwise work on normal compares |
1636 switch( b_test._test ) { | 1880 switch( b_test._test ) { |
1637 case BoolTest::ge: // Convert X >= Y to -X <= -Y | 1881 case BoolTest::gt: |
1882 // Fall into GE case | |
1883 case BoolTest::ge: | |
1884 // Convert (I*scale+offset) >= Limit to (I*(-scale)+(-offset)) <= -Limit | |
1638 scale_con = -scale_con; | 1885 scale_con = -scale_con; |
1639 offset = new (C, 3) SubINode( zero, offset ); | 1886 offset = new (C, 3) SubINode( zero, offset ); |
1640 register_new_node( offset, pre_ctrl ); | 1887 register_new_node( offset, pre_ctrl ); |
1641 limit = new (C, 3) SubINode( zero, limit ); | 1888 limit = new (C, 3) SubINode( zero, limit ); |
1642 register_new_node( limit, pre_ctrl ); | 1889 register_new_node( limit, pre_ctrl ); |
1643 // Fall into LE case | 1890 // Fall into LE case |
1644 case BoolTest::le: // Convert X <= Y to X < Y+1 | 1891 case BoolTest::le: |
1645 limit = new (C, 3) AddINode( limit, one ); | 1892 if (b_test._test != BoolTest::gt) { |
1646 register_new_node( limit, pre_ctrl ); | 1893 // Convert X <= Y to X < Y+1 |
1894 limit = new (C, 3) AddINode( limit, one ); | |
1895 register_new_node( limit, pre_ctrl ); | |
1896 } | |
1647 // Fall into LT case | 1897 // Fall into LT case |
1648 case BoolTest::lt: | 1898 case BoolTest::lt: |
1649 add_constraint( stride_con, scale_con, offset, limit, pre_ctrl, &pre_limit, &main_limit ); | 1899 // The underflow and overflow limits: MIN_INT <= scale*I+offset < limit |
1900 add_constraint( stride_con, scale_con, offset, mini, limit, pre_ctrl, &pre_limit, &main_limit ); | |
1650 if (!conditional_rc) { | 1901 if (!conditional_rc) { |
1651 conditional_rc = !loop->dominates_backedge(iff); | 1902 conditional_rc = !loop->dominates_backedge(iff); |
1903 // It is also needed if scale*pre_limit+offset >= limit | |
1904 // due to underflow so we need execute pre-loop until | |
1905 // scale*I+offset >= min_int. But (low_limit-offset) will | |
1906 // underflow when offset > 0 and X will be > original_limit. | |
1907 const TypeInt* offset_t = _igvn.type(offset)->is_int(); | |
1908 conditional_rc |= RangeLimitCheck && (offset_t->_hi > 0) && | |
1909 (scale_con>0) && (stride_con>0); | |
1652 } | 1910 } |
1653 break; | 1911 break; |
1654 default: | 1912 default: |
1655 #ifndef PRODUCT | 1913 #ifndef PRODUCT |
1656 if( PrintOpto ) | 1914 if( PrintOpto ) |
1697 _igvn.hash_delete(pre_opaq); | 1955 _igvn.hash_delete(pre_opaq); |
1698 pre_opaq->set_req(1, pre_limit); | 1956 pre_opaq->set_req(1, pre_limit); |
1699 | 1957 |
1700 // Note:: we are making the main loop limit no longer precise; | 1958 // Note:: we are making the main loop limit no longer precise; |
1701 // need to round up based on stride. | 1959 // need to round up based on stride. |
1702 if( stride_con != 1 && stride_con != -1 ) { // Cutout for common case | 1960 cl->set_nonexact_trip_count(); |
1961 if (!LoopLimitCheck && stride_con != 1 && stride_con != -1) { // Cutout for common case | |
1703 // "Standard" round-up logic: ([main_limit-init+(y-1)]/y)*y+init | 1962 // "Standard" round-up logic: ([main_limit-init+(y-1)]/y)*y+init |
1704 // Hopefully, compiler will optimize for powers of 2. | 1963 // Hopefully, compiler will optimize for powers of 2. |
1705 Node *ctrl = get_ctrl(main_limit); | 1964 Node *ctrl = get_ctrl(main_limit); |
1706 Node *stride = cl->stride(); | 1965 Node *stride = cl->stride(); |
1707 Node *init = cl->init_trip(); | 1966 Node *init = cl->init_trip(); |
1877 | 2136 |
1878 // Replace the phi at loop head with the final value of the last | 2137 // Replace the phi at loop head with the final value of the last |
1879 // iteration. Then the CountedLoopEnd will collapse (backedge never | 2138 // iteration. Then the CountedLoopEnd will collapse (backedge never |
1880 // taken) and all loop-invariant uses of the exit values will be correct. | 2139 // taken) and all loop-invariant uses of the exit values will be correct. |
1881 Node *phi = cl->phi(); | 2140 Node *phi = cl->phi(); |
1882 Node *final = new (phase->C, 3) SubINode( cl->limit(), cl->stride() ); | 2141 Node *exact_limit = phase->exact_limit(this); |
2142 if (exact_limit != cl->limit()) { | |
2143 // We also need to replace the original limit to collapse loop exit. | |
2144 Node* cmp = cl->loopexit()->cmp_node(); | |
2145 assert(cl->limit() == cmp->in(2), "sanity"); | |
2146 phase->_igvn._worklist.push(cmp->in(2)); // put limit on worklist | |
2147 phase->_igvn.hash_delete(cmp); | |
2148 cmp->set_req(2, exact_limit); | |
2149 phase->_igvn._worklist.push(cmp); // put cmp on worklist | |
2150 } | |
2151 // Note: the final value after increment should not overflow since | |
2152 // counted loop has limit check predicate. | |
2153 Node *final = new (phase->C, 3) SubINode( exact_limit, cl->stride() ); | |
1883 phase->register_new_node(final,cl->in(LoopNode::EntryControl)); | 2154 phase->register_new_node(final,cl->in(LoopNode::EntryControl)); |
1884 phase->_igvn.replace_node(phi,final); | 2155 phase->_igvn.replace_node(phi,final); |
1885 phase->C->set_major_progress(); | 2156 phase->C->set_major_progress(); |
1886 return true; | 2157 return true; |
1887 } | 2158 } |