graal-jvmci-8: src/share/vm/opto/loopTransform.cpp comparison

comparison src/share/vm/opto/loopTransform.cpp @ 3345:bad7ecd0b6ed

5091921: Sign flip issues in loop optimizer Summary: Fix integer overflow problem in the code generated by loop optimizer. Reviewed-by: never

author	kvn
date	Wed, 04 May 2011 13:12:42 -0700
parents	ae93231c7a1f
children	f879eafd5835

comparison

equal deleted inserted replaced

-:0139aac70fb5
+:bad7ecd0b6ed
 return; // Infinite loop
 #ifdef ASSERT
 BoolTest::mask bt = cl->loopexit()->test_trip();
 assert(bt == BoolTest::lt || bt == BoolTest::gt ||
-bt == BoolTest::ne, "canonical test is expected");
+(bt == BoolTest::ne && !LoopLimitCheck), "canonical test is expected");
 #endif
 Node* init_n = cl->init_trip();
 Node* limit_n = cl->limit();
 if (init_n  != NULL &&  init_n->is_Con() &&
 //         around the loopback from the prior iteration (follow the old-loop
 //         backedges) and then map to the new peeled iteration.  This leaves
 //         the pre-loop with only 1 user (the new peeled iteration), but the
 //         peeled-loop backedge has 2 users.
 Node* new_exit_value = old_new[head->in(LoopNode::LoopBackControl)->_idx];
-new_exit_value = move_loop_predicates(entry, new_exit_value);
+new_exit_value = move_loop_predicates(entry, new_exit_value, !counted_loop);
 _igvn.hash_delete(head);
 head->set_req(LoopNode::EntryControl, new_exit_value);
 for (DUIterator_Fast jmax, j = head->fast_outs(jmax); j < jmax; j++) {
 Node* old = head->fast_out(j);
 if (old->in(0) == loop->_head && old->req() == 3 && old->is_Phi()) {
 assert( (intx)unroll_limit == LoopUnrollLimit * 4, "LoopUnrollLimit must fit in 32bits");
 if (trip_count > unroll_limit || body_size > unroll_limit) {
 return false;
 }
+// Fully unroll a loop with few iterations regardless next
+// conditions since following loop optimizations will split
+// such loop anyway (pre-main-post).
+if (trip_count <= 3)
+return true;
 // Take into account that after unroll conjoined heads and tails will fold,
 // otherwise policy_unroll() may allow more unrolling than max unrolling.
 uint new_body_size = EMPTY_LOOP_SIZE + (body_size - EMPTY_LOOP_SIZE) * trip_count;
 uint tst_body_size = (new_body_size - EMPTY_LOOP_SIZE) / trip_count + EMPTY_LOOP_SIZE;
 if (body_size != tst_body_size) // Check for int overflow
 // Unrolling can result in a large amount of node construction
 new_body_size >= MaxNodeLimit - phase->C->unique()) {
 return false;
 }
-// Currently we don't have policy to optimize one iteration loops.
-// Maximally unrolling transformation is used for that:
-// it is peeled and the original loop become non reachable (dead).
-// Also fully unroll a loop with few iterations regardless next
-// conditions since following loop optimizations will split
-// such loop anyway (pre-main-post).
-if (trip_count <= 3)
-return true;
 // Do not unroll a loop with String intrinsics code.
 // String intrinsics are large and have loops.
 for (uint k = 0; k < _body.size(); k++) {
 Node* n = _body.at(k);
 switch (n->Opcode()) {
 assert(cl->is_normal_loop() || cl->is_main_loop(), "");
 if (!cl->is_valid_counted_loop())
 return false; // Malformed counted loop
-// protect against over-unrolling
+// Protect against over-unrolling.
-if (cl->trip_count() <= 1) return false;
+// After split at least one iteration will be executed in pre-loop.
+if (cl->trip_count() <= (uint)(cl->is_normal_loop() ? 2 : 1)) return false;
 int future_unroll_ct = cl->unrolled_count() * 2;
 if (future_unroll_ct > MAX_UNROLL) return false;
 // Check for initial stride being a small enough constant
 return false;
 }
 Node *init_n = cl->init_trip();
 Node *limit_n = cl->limit();
+int stride_con = cl->stride_con();
 // Non-constant bounds.
 // Protect against over-unrolling when init or/and limit are not constant
 // (so that trip_count's init value is maxint) but iv range is known.
 if (init_n   == NULL || !init_n->is_Con()  ||
 limit_n  == NULL || !limit_n->is_Con()) {
 Node* phi = cl->phi();
 if (phi != NULL) {
 assert(phi->is_Phi() && phi->in(0) == _head, "Counted loop should have iv phi.");
 const TypeInt* iv_type = phase->_igvn.type(phi)->is_int();
-int next_stride = cl->stride_con() * 2; // stride after this unroll
+int next_stride = stride_con * 2; // stride after this unroll
 if (next_stride > 0) {
 if (iv_type->_lo + next_stride <= iv_type->_lo || // overflow
 iv_type->_lo + next_stride >  iv_type->_hi) {
 return false;  // over-unrolling
 }
 }
 }
 }
 }
+// After unroll limit will be adjusted: new_limit = limit-stride.
+// Bailout if adjustment overflow.
+const TypeInt* limit_type = phase->_igvn.type(limit_n)->is_int();
+if (stride_con > 0 && ((limit_type->_hi - stride_con) >= limit_type->_hi) ||
+stride_con < 0 && ((limit_type->_lo - stride_con) <= limit_type->_lo))
+return false;  // overflow
 // Adjust body_size to determine if we unroll or not
 uint body_size = _body.size();
-// Key test to unroll CaffeineMark's Logic test
-int xors_in_loop = 0;
 // Also count ModL, DivL and MulL which expand mightly
 for (uint k = 0; k < _body.size(); k++) {
 Node* n = _body.at(k);
 switch (n->Opcode()) {
-case Op_XorI: xors_in_loop++; break; // CaffeineMark's Logic test
 case Op_ModL: body_size += 30; break;
 case Op_DivL: body_size += 30; break;
 case Op_MulL: body_size += 10; break;
 case Op_StrComp:
 case Op_StrEquals:
 } // switch
 }
 // Check for being too big
 if (body_size > (uint)LoopUnrollLimit) {
-if (xors_in_loop >= 4 && body_size < (uint)LoopUnrollLimit*4) return true;
+// Normal case: loop too big
-// Normal case: loop too big
 return false;
 }
 // Unroll once!  (Each trip will soon do double iterations)
 return true;
 //------------------------------policy_range_check-----------------------------
 // Return TRUE or FALSE if the loop should be range-check-eliminated.
 // Actually we do iteration-splitting, a more powerful form of RCE.
 bool IdealLoopTree::policy_range_check( PhaseIdealLoop *phase ) const {
-if( !RangeCheckElimination ) return false;
+if (!RangeCheckElimination) return false;
 CountedLoopNode *cl = _head->as_CountedLoop();
 // If we unrolled with no intention of doing RCE and we later
 // changed our minds, we got no pre-loop.  Either we need to
 // make a new pre-loop, or we gotta disallow RCE.
-if( cl->is_main_no_pre_loop() ) return false; // Disallowed for now.
+if (cl->is_main_no_pre_loop()) return false; // Disallowed for now.
 Node *trip_counter = cl->phi();
 // Check loop body for tests of trip-counter plus loop-invariant vs
 // loop-invariant.
-for( uint i = 0; i < _body.size(); i++ ) {
+for (uint i = 0; i < _body.size(); i++) {
 Node *iff = _body[i];
-if( iff->Opcode() == Op_If ) { // Test?
+if (iff->Opcode() == Op_If) { // Test?
 // Comparing trip+off vs limit
 Node *bol = iff->in(1);
-if( bol->req() != 2 ) continue; // dead constant test
+if (bol->req() != 2) continue; // dead constant test
 if (!bol->is_Bool()) {
 assert(UseLoopPredicate && bol->Opcode() == Op_Conv2B, "predicate check only");
 continue;
 }
+if (bol->as_Bool()->_test._test == BoolTest::ne)
+continue; // not RC
 Node *cmp = bol->in(1);
 Node *rc_exp = cmp->in(1);
 Node *limit = cmp->in(2);
 // direction:
 // positive stride use <
 // negative stride use >
 if (pre_end->in(CountedLoopEndNode::TestValue)->as_Bool()->_test._test == BoolTest::ne) {
+assert(!LoopLimitCheck, "only canonical tests (lt or gt) are expected");
 BoolTest::mask new_test = (main_end->stride_con() > 0) ? BoolTest::lt : BoolTest::gt;
 // Modify pre loop end condition
 Node* pre_bol = pre_end->in(CountedLoopEndNode::TestValue)->as_Bool();
 BoolNode* new_bol0 = new (C, 2) BoolNode(pre_bol->in(1), new_test);
 // Flag main loop
 main_head->set_main_loop();
 if( peel_only ) main_head->set_main_no_pre_loop();
+// Subtract a trip count for the pre-loop.
+main_head->set_trip_count(main_head->trip_count() - 1);
 // It's difficult to be precise about the trip-counts
 // for the pre/post loops.  They are usually very short,
 // so guess that 4 trips is a reasonable value.
 post_head->set_profile_trip_cnt(4.0);
 pre_head->set_profile_trip_cnt(4.0);
 if (PrintOpto && VerifyLoopOptimizations) {
 tty->print("Unrolling ");
 loop->dump_head();
 } else if (TraceLoopOpts) {
 if (loop_head->trip_count() < (uint)LoopUnrollLimit) {
-tty->print("Unroll  %d(%2d) ", loop_head->unrolled_count()*2, loop_head->trip_count());
+tty->print("Unroll %d(%2d) ", loop_head->unrolled_count()*2, loop_head->trip_count());
 } else {
-tty->print("Unroll  %d     ", loop_head->unrolled_count()*2);
+tty->print("Unroll %d     ", loop_head->unrolled_count()*2);
 }
 loop->dump_head();
 }
 #endif
 Node *limit = loop_head->limit();
 Node *init  = loop_head->init_trip();
 Node *stride = loop_head->stride();
 Node *opaq = NULL;
-if( adjust_min_trip ) {       // If not maximally unrolling, need adjustment
+if (adjust_min_trip) {       // If not maximally unrolling, need adjustment
+// Search for zero-trip guard.
 assert( loop_head->is_main_loop(), "" );
 assert( ctrl->Opcode() == Op_IfTrue || ctrl->Opcode() == Op_IfFalse, "" );
 Node *iff = ctrl->in(0);
 assert( iff->Opcode() == Op_If, "" );
 Node *bol = iff->in(1);
 assert( bol->Opcode() == Op_Bool, "" );
 Node *cmp = bol->in(1);
 assert( cmp->Opcode() == Op_CmpI, "" );
 opaq = cmp->in(2);
-// Occasionally it's possible for a pre-loop Opaque1 node to be
+// Occasionally it's possible for a zero-trip guard Opaque1 node to be
 // optimized away and then another round of loop opts attempted.
 // We can not optimize this particular loop in that case.
-if( opaq->Opcode() != Op_Opaque1 )
+if (opaq->Opcode() != Op_Opaque1)
-return;                   // Cannot find pre-loop!  Bail out!
+return; // Cannot find zero-trip guard!  Bail out!
+// Zero-trip test uses an 'opaque' node which is not shared.
+assert(opaq->outcnt() == 1 && opaq->in(1) == limit, "");
 }
 C->set_major_progress();
-// Adjust max trip count. The trip count is intentionally rounded
+Node* new_limit = NULL;
-// down here (e.g. 15-> 7-> 3-> 1) because if we unwittingly over-unroll,
+if (UnrollLimitCheck) {
-// the main, unrolled, part of the loop will never execute as it is protected
+int stride_con = stride->get_int();
-// by the min-trip test.  See bug 4834191 for a case where we over-unrolled
+int stride_p = (stride_con > 0) ? stride_con : -stride_con;
-// and later determined that part of the unrolled loop was dead.
+uint old_trip_count = loop_head->trip_count();
-loop_head->set_trip_count(loop_head->trip_count() / 2);
+// Verify that unroll policy result is still valid.
+assert(old_trip_count > 1 &&
-// Double the count of original iterations in the unrolled loop body.
+(!adjust_min_trip || stride_p <= (1<<3)*loop_head->unrolled_count()), "sanity");
-loop_head->double_unrolled_count();
+// Adjust loop limit to keep valid iterations number after unroll.
-// -----------
+// Use (limit - stride) instead of (((limit - init)/stride) & (-2))*stride
-// Step 2: Cut back the trip counter for an unroll amount of 2.
+// which may overflow.
-// Loop will normally trip (limit - init)/stride_con.  Since it's a
+if (!adjust_min_trip) {
-// CountedLoop this is exact (stride divides limit-init exactly).
+assert(old_trip_count > 1 && (old_trip_count & 1) == 0,
-// We are going to double the loop body, so we want to knock off any
+"odd trip count for maximally unroll");
-// odd iteration: (trip_cnt & ~1).  Then back compute a new limit.
+// Don't need to adjust limit for maximally unroll since trip count is even.
-Node *span = new (C, 3) SubINode( limit, init );
+} else if (loop_head->has_exact_trip_count() && init->is_Con()) {
-register_new_node( span, ctrl );
+// Loop's limit is constant. Loop's init could be constant when pre-loop
-Node *trip = new (C, 3) DivINode( 0, span, stride );
+// become peeled iteration.
-register_new_node( trip, ctrl );
+long init_con = init->get_int();
-Node *mtwo = _igvn.intcon(-2);
+// We can keep old loop limit if iterations count stays the same:
-set_ctrl(mtwo, C->root());
+//   old_trip_count == new_trip_count * 2
-Node *rond = new (C, 3) AndINode( trip, mtwo );
+// Note: since old_trip_count >= 2 then new_trip_count >= 1
-register_new_node( rond, ctrl );
+// so we also don't need to adjust zero trip test.
-Node *spn2 = new (C, 3) MulINode( rond, stride );
+long limit_con  = limit->get_int();
-register_new_node( spn2, ctrl );
+// (stride_con*2) not overflow since stride_con <= 8.
-Node *lim2 = new (C, 3) AddINode( spn2, init );
+int new_stride_con = stride_con * 2;
-register_new_node( lim2, ctrl );
+int stride_m    = new_stride_con - (stride_con > 0 ? 1 : -1);
+long trip_count = (limit_con - init_con + stride_m)/new_stride_con;
-// Hammer in the new limit
+// New trip count should satisfy next conditions.
-Node *ctrl2 = loop_end->in(0);
+assert(trip_count > 0 && (julong)trip_count < (julong)max_juint/2, "sanity");
-Node *cmp2 = new (C, 3) CmpINode( loop_head->incr(), lim2 );
+uint new_trip_count = (uint)trip_count;
-register_new_node( cmp2, ctrl2 );
+adjust_min_trip = (old_trip_count != new_trip_count*2);
-Node *bol2 = new (C, 2) BoolNode( cmp2, loop_end->test_trip() );
+}
-register_new_node( bol2, ctrl2 );
-_igvn.hash_delete(loop_end);
+if (adjust_min_trip) {
-loop_end->set_req(CountedLoopEndNode::TestValue, bol2);
+// Step 2: Adjust the trip limit if it is called for.
+// The adjustment amount is -stride. Need to make sure if the
-// Step 3: Find the min-trip test guaranteed before a 'main' loop.
+// adjustment underflows or overflows, then the main loop is skipped.
-// Make it a 1-trip test (means at least 2 trips).
+Node* cmp = loop_end->cmp_node();
-if( adjust_min_trip ) {
+assert(cmp->in(2) == limit, "sanity");
-// Guard test uses an 'opaque' node which is not shared.  Hence I
+assert(opaq != NULL && opaq->in(1) == limit, "sanity");
-// can edit it's inputs directly.  Hammer in the new limit for the
-// minimum-trip guard.
+// Verify that policy_unroll result is still valid.
-assert( opaq->outcnt() == 1, "" );
+const TypeInt* limit_type = _igvn.type(limit)->is_int();
-_igvn.hash_delete(opaq);
+assert(stride_con > 0 && ((limit_type->_hi - stride_con) < limit_type->_hi) ||
-opaq->set_req(1, lim2);
+stride_con < 0 && ((limit_type->_lo - stride_con) > limit_type->_lo), "sanity");
-}
+if (limit->is_Con()) {
+// The check in policy_unroll and the assert above guarantee
+// no underflow if limit is constant.
+new_limit = _igvn.intcon(limit->get_int() - stride_con);
+set_ctrl(new_limit, C->root());
+} else {
+if (stride_con > 0 && ((limit_type->_lo - stride_con) < limit_type->_lo) ||
+stride_con < 0 && ((limit_type->_hi - stride_con) > limit_type->_hi)) {
+// No underflow.
+new_limit = new (C, 3) SubINode(limit, stride);
+} else {
+// (limit - stride) may underflow.
+// Clamp the adjustment value with MININT or MAXINT:
+//
+//   new_limit = limit-stride
+//   if (stride > 0)
+//     new_limit = (limit < new_limit) ? MININT : new_limit;
+//   else
+//     new_limit = (limit > new_limit) ? MAXINT : new_limit;
+//
+BoolTest::mask bt = loop_end->test_trip();
+assert(bt == BoolTest::lt || bt == BoolTest::gt, "canonical test is expected");
+Node* adj_max = _igvn.intcon((stride_con > 0) ? min_jint : max_jint);
+set_ctrl(adj_max, C->root());
+Node* old_limit = NULL;
+Node* adj_limit = NULL;
+Node* bol = limit->is_CMove() ? limit->in(CMoveNode::Condition) : NULL;
+if (loop_head->unrolled_count() > 1 &&
+limit->is_CMove() && limit->Opcode() == Op_CMoveI &&
+limit->in(CMoveNode::IfTrue) == adj_max &&
+bol->as_Bool()->_test._test == bt &&
+bol->in(1)->Opcode() == Op_CmpI &&
+bol->in(1)->in(2) == limit->in(CMoveNode::IfFalse)) {
+// Loop was unrolled before.
+// Optimize the limit to avoid nested CMove:
+// use original limit as old limit.
+old_limit = bol->in(1)->in(1);
+// Adjust previous adjusted limit.
+adj_limit = limit->in(CMoveNode::IfFalse);
+adj_limit = new (C, 3) SubINode(adj_limit, stride);
+} else {
+old_limit = limit;
+adj_limit = new (C, 3) SubINode(limit, stride);
+}
+assert(old_limit != NULL && adj_limit != NULL, "");
+register_new_node( adj_limit, ctrl ); // adjust amount
+Node* adj_cmp = new (C, 3) CmpINode(old_limit, adj_limit);
+register_new_node( adj_cmp, ctrl );
+Node* adj_bool = new (C, 2) BoolNode(adj_cmp, bt);
+register_new_node( adj_bool, ctrl );
+new_limit = new (C, 4) CMoveINode(adj_bool, adj_limit, adj_max, TypeInt::INT);
+}
+register_new_node(new_limit, ctrl);
+}
+assert(new_limit != NULL, "");
+if (limit->outcnt() == 2) {
+// Replace old limit if it is used only in loop tests.
+_igvn.replace_node(limit, new_limit);
+} else {
+// Replace in loop test.
+_igvn.hash_delete(cmp);
+cmp->set_req(2, new_limit);
+// Step 3: Find the min-trip test guaranteed before a 'main' loop.
+// Make it a 1-trip test (means at least 2 trips).
+// Guard test uses an 'opaque' node which is not shared.  Hence I
+// can edit it's inputs directly.  Hammer in the new limit for the
+// minimum-trip guard.
+assert(opaq->outcnt() == 1, "");
+_igvn.hash_delete(opaq);
+opaq->set_req(1, new_limit);
+}
+}
+// Adjust max trip count. The trip count is intentionally rounded
+// down here (e.g. 15-> 7-> 3-> 1) because if we unwittingly over-unroll,
+// the main, unrolled, part of the loop will never execute as it is protected
+// by the min-trip test.  See bug 4834191 for a case where we over-unrolled
+// and later determined that part of the unrolled loop was dead.
+loop_head->set_trip_count(old_trip_count / 2);
+// Double the count of original iterations in the unrolled loop body.
+loop_head->double_unrolled_count();
+} else { // LoopLimitCheck
+// Adjust max trip count. The trip count is intentionally rounded
+// down here (e.g. 15-> 7-> 3-> 1) because if we unwittingly over-unroll,
+// the main, unrolled, part of the loop will never execute as it is protected
+// by the min-trip test.  See bug 4834191 for a case where we over-unrolled
+// and later determined that part of the unrolled loop was dead.
+loop_head->set_trip_count(loop_head->trip_count() / 2);
+// Double the count of original iterations in the unrolled loop body.
+loop_head->double_unrolled_count();
+// -----------
+// Step 2: Cut back the trip counter for an unroll amount of 2.
+// Loop will normally trip (limit - init)/stride_con.  Since it's a
+// CountedLoop this is exact (stride divides limit-init exactly).
+// We are going to double the loop body, so we want to knock off any
+// odd iteration: (trip_cnt & ~1).  Then back compute a new limit.
+Node *span = new (C, 3) SubINode( limit, init );
+register_new_node( span, ctrl );
+Node *trip = new (C, 3) DivINode( 0, span, stride );
+register_new_node( trip, ctrl );
+Node *mtwo = _igvn.intcon(-2);
+set_ctrl(mtwo, C->root());
+Node *rond = new (C, 3) AndINode( trip, mtwo );
+register_new_node( rond, ctrl );
+Node *spn2 = new (C, 3) MulINode( rond, stride );
+register_new_node( spn2, ctrl );
+new_limit = new (C, 3) AddINode( spn2, init );
+register_new_node( new_limit, ctrl );
+// Hammer in the new limit
+Node *ctrl2 = loop_end->in(0);
+Node *cmp2 = new (C, 3) CmpINode( loop_head->incr(), new_limit );
+register_new_node( cmp2, ctrl2 );
+Node *bol2 = new (C, 2) BoolNode( cmp2, loop_end->test_trip() );
+register_new_node( bol2, ctrl2 );
+_igvn.hash_delete(loop_end);
+loop_end->set_req(CountedLoopEndNode::TestValue, bol2);
+// Step 3: Find the min-trip test guaranteed before a 'main' loop.
+// Make it a 1-trip test (means at least 2 trips).
+if( adjust_min_trip ) {
+assert( new_limit != NULL, "" );
+// Guard test uses an 'opaque' node which is not shared.  Hence I
+// can edit it's inputs directly.  Hammer in the new limit for the
+// minimum-trip guard.
+assert( opaq->outcnt() == 1, "" );
+_igvn.hash_delete(opaq);
+opaq->set_req(1, new_limit);
+}
+} // LoopLimitCheck
 // ---------
 // Step 4: Clone the loop body.  Move it inside the loop.  This loop body
 // represents the odd iterations; since the loop trips an even number of
 // times its backedge is never taken.  Kill the backedge.
 //------------------------------do_maximally_unroll----------------------------
 void PhaseIdealLoop::do_maximally_unroll( IdealLoopTree *loop, Node_List &old_new ) {
 CountedLoopNode *cl = loop->_head->as_CountedLoop();
+assert(cl->has_exact_trip_count(), "trip count is not exact");
 assert(cl->trip_count() > 0, "");
 #ifndef PRODUCT
 if (TraceLoopOpts) {
 tty->print("MaxUnroll  %d ", cl->trip_count());
 loop->dump_head();
 }
 // Now its tripping an even number of times remaining.  Double loop body.
 // Do not adjust pre-guards; they are not needed and do not exist.
 if (cl->trip_count() > 0) {
+assert((cl->trip_count() & 1) == 0, "missed peeling");
 do_unroll(loop, old_new, false);
 }
 }
 //------------------------------dominates_backedge---------------------------------
 Node* backedge = _head->as_Loop()->in(LoopNode::LoopBackControl);
 return _phase->dom_lca_internal(ctrl, backedge) == ctrl;
 }
 //------------------------------add_constraint---------------------------------
-// Constrain the main loop iterations so the condition:
+// Constrain the main loop iterations so the conditions:
-//    scale_con * I + offset  <  limit
+//    low_limit <= scale_con * I + offset  <  upper_limit
 // always holds true.  That is, either increase the number of iterations in
 // the pre-loop or the post-loop until the condition holds true in the main
 // loop.  Stride, scale, offset and limit are all loop invariant.  Further,
 // stride and scale are constants (offset and limit often are).
-void PhaseIdealLoop::add_constraint( int stride_con, int scale_con, Node *offset, Node *limit, Node *pre_ctrl, Node **pre_limit, Node **main_limit ) {
+void PhaseIdealLoop::add_constraint( int stride_con, int scale_con, Node *offset, Node *low_limit, Node *upper_limit, Node *pre_ctrl, Node **pre_limit, Node **main_limit ) {
-// Compute "I :: (limit-offset)/scale_con"
-Node *con = new (C, 3) SubINode( limit, offset );
-register_new_node( con, pre_ctrl );
-Node *scale = _igvn.intcon(scale_con);
-set_ctrl(scale, C->root());
-Node *X = new (C, 3) DivINode( 0, con, scale );
-register_new_node( X, pre_ctrl );
 // For positive stride, the pre-loop limit always uses a MAX function
 // and the main loop a MIN function.  For negative stride these are
 // reversed.
 // Also for positive stride*scale the affine function is increasing, so the
 // pre-loop must check for underflow and the post-loop for overflow.
 // Negative stride*scale reverses this; pre-loop checks for overflow and
 // post-loop for underflow.
-if( stride_con*scale_con > 0 ) {
+if (stride_con*scale_con > 0) {
-// Compute I < (limit-offset)/scale_con
+// The overflow limit: scale*I+offset < upper_limit
-// Adjust main-loop last iteration to be MIN/MAX(main_loop,X)
+// For main-loop compute
-*main_limit = (stride_con > 0)
+//   ( if (scale > 0) /* and stride > 0 */
-? (Node*)(new (C, 3) MinINode( *main_limit, X ))
+//       I < (upper_limit-offset)/scale
-: (Node*)(new (C, 3) MaxINode( *main_limit, X ));
+//     else /* scale < 0 and stride < 0 */
-register_new_node( *main_limit, pre_ctrl );
+//       I > (upper_limit-offset)/scale
+//   )
-} else {
+//
-// Compute (limit-offset)/scale_con + SGN(-scale_con) <= I
+// (upper_limit-offset) may overflow when offset < 0.
-// Add the negation of the main-loop constraint to the pre-loop.
+// But it is fine since main loop will either have
-// See footnote [++] below for a derivation of the limit expression.
+// less iterations or will be skipped in such case.
-Node *incr = _igvn.intcon(scale_con > 0 ? -1 : 1);
+Node *con = new (C, 3) SubINode(upper_limit, offset);
-set_ctrl(incr, C->root());
+register_new_node(con, pre_ctrl);
-Node *adj = new (C, 3) AddINode( X, incr );
+Node *scale = _igvn.intcon(scale_con);
-register_new_node( adj, pre_ctrl );
+set_ctrl(scale, C->root());
-*pre_limit = (scale_con > 0)
+Node *X = new (C, 3) DivINode(0, con, scale);
-? (Node*)new (C, 3) MinINode( *pre_limit, adj )
+register_new_node(X, pre_ctrl);
-: (Node*)new (C, 3) MaxINode( *pre_limit, adj );
-register_new_node( *pre_limit, pre_ctrl );
+// Adjust main-loop last iteration
+Node *loop_limit = *main_limit;
-//   [++] Here's the algebra that justifies the pre-loop limit expression:
+loop_limit = (stride_con > 0) // scale > 0
-//
+? (Node*)(new (C, 3) MinINode(loop_limit, X))
-//   NOT( scale_con * I + offset  <  limit )
+: (Node*)(new (C, 3) MaxINode(loop_limit, X));
-//      ==
+register_new_node(loop_limit, pre_ctrl);
-//   scale_con * I + offset  >=  limit
+*main_limit = loop_limit;
-//      ==
-//   SGN(scale_con) * I  >=  (limit-offset)/|scale_con|
+// The underflow limit: low_limit <= scale*I+offset.
-//      ==
+// For pre-loop compute
-//   (limit-offset)/|scale_con|   <=  I * SGN(scale_con)
+//   NOT(scale*I+offset >= low_limit)
-//      ==
+//   scale*I+offset < low_limit
-//   (limit-offset)/|scale_con|-1  <  I * SGN(scale_con)
+//   ( if (scale > 0) /* and stride > 0 */
-//      ==
+//       I < (low_limit-offset)/scale
-//   ( if (scale_con > 0) /*common case*/
+//     else /* scale < 0 and stride < 0 */
-//       (limit-offset)/scale_con - 1  <  I
+//       I > (low_limit-offset)/scale
-//     else
+//   )
-//       (limit-offset)/scale_con + 1  >  I
-//    )
+if (low_limit->get_int() == -max_jint) {
-//   ( if (scale_con > 0) /*common case*/
+if (!RangeLimitCheck) return;
-//       (limit-offset)/scale_con + SGN(-scale_con)  <  I
+// We need this guard when scale*pre_limit+offset >= limit
-//     else
+// due to underflow so we need execute pre-loop until
-//       (limit-offset)/scale_con + SGN(-scale_con)  >  I
+// scale*I+offset >= min_int. But (low_limit-offset) will
+// underflow when offset > 0 and X will be > original_limit.
+// To avoid it we replace offset = offset > 0 ? 0 : offset
+// and add min(pre_limit, original_limit).
+Node* shift = _igvn.intcon(31);
+set_ctrl(shift, C->root());
+Node *neg_off = new (C, 3) RShiftINode(offset, shift);
+register_new_node(neg_off, pre_ctrl);
+offset = new (C, 3) AndINode(offset, neg_off);
+register_new_node(offset, pre_ctrl);
+} else {
+assert(low_limit->get_int() == 0, "wrong low limit for range check");
+// The only problem we have here when offset == min_int
+// since (0-min_int) == min_int. It may be fine for scale > 0
+// but for scale < 0 X will be < original_limit.
+}
+con = new (C, 3) SubINode(low_limit, offset);
+register_new_node(con, pre_ctrl);
+scale = _igvn.intcon(scale_con);
+set_ctrl(scale, C->root());
+X = new (C, 3) DivINode(0, con, scale);
+register_new_node(X, pre_ctrl);
+// Adjust pre-loop last iteration
+loop_limit = *pre_limit;
+loop_limit = (stride_con > 0) // scale > 0
+? (Node*)(new (C, 3) MaxINode(loop_limit, X))
+: (Node*)(new (C, 3) MinINode(loop_limit, X));
+register_new_node( loop_limit, pre_ctrl );
+*pre_limit = loop_limit;
+} else { // stride_con*scale_con < 0
+// For negative stride*scale pre-loop checks for overflow and
+// post-loop for underflow.
+//
+// The underflow limit: low_limit <= scale*I+offset.
+// For main-loop compute
+//   scale*I+offset+1 > low_limit
+//   ( if (scale < 0) /* and stride > 0 */
+//       I < (low_limit-(offset+1))/scale
+//     else /* scale < 0 and stride < 0 */
+//       I > (low_limit-(offset+1))/scale
+//   )
+if (low_limit->get_int() == -max_jint) {
+if (!RangeLimitCheck) return;
+} else {
+assert(low_limit->get_int() == 0, "wrong low limit for range check");
+}
+Node *one  = _igvn.intcon(1);
+set_ctrl(one, C->root());
+Node *plus_one = new (C, 3) AddINode(offset, one);
+register_new_node( plus_one, pre_ctrl );
+Node *con = new (C, 3) SubINode(low_limit, plus_one);
+register_new_node(con, pre_ctrl);
+Node *scale = _igvn.intcon(scale_con);
+set_ctrl(scale, C->root());
+Node *X = new (C, 3) DivINode(0, con, scale);
+register_new_node(X, pre_ctrl);
+// Adjust main-loop last iteration
+Node *loop_limit = *main_limit;
+loop_limit = (stride_con > 0) // scale < 0
+? (Node*)(new (C, 3) MinINode(loop_limit, X))
+: (Node*)(new (C, 3) MaxINode(loop_limit, X));
+register_new_node(loop_limit, pre_ctrl);
+*main_limit = loop_limit;
+// The overflow limit: scale*I+offset < upper_limit
+// For pre-loop compute
+//   NOT(scale*I+offset < upper_limit)
+//   scale*I+offset >= upper_limit
+//   scale*I+offset+1 > upper_limit
+//   ( if (scale < 0) /* and stride > 0 */
+//       I < (upper_limit-(offset+1))/scale
+//     else /* scale < 0 and stride < 0 */
+//       I > (upper_limit-(offset+1))/scale
+//   )
+plus_one = new (C, 3) AddINode(offset, one);
+register_new_node( plus_one, pre_ctrl );
+con = new (C, 3) SubINode(upper_limit, plus_one);
+register_new_node(con, pre_ctrl);
+scale = _igvn.intcon(scale_con);
+set_ctrl(scale, C->root());
+X = new (C, 3) DivINode(0, con, scale);
+register_new_node(X, pre_ctrl);
+// Adjust pre-loop last iteration
+loop_limit = *pre_limit;
+loop_limit = (stride_con > 0) // scale < 0
+? (Node*)(new (C, 3) MaxINode(loop_limit, X))
+: (Node*)(new (C, 3) MinINode(loop_limit, X));
+register_new_node( loop_limit, pre_ctrl );
+*pre_limit = loop_limit;
 }
 }
 //------------------------------is_scaled_iv---------------------------------
 Node *bolzm = iffm->in(1);
 assert(bolzm->Opcode() == Op_Bool, "");
 Node *cmpzm = bolzm->in(1);
 assert(cmpzm->is_Cmp(), "");
 Node *opqzm = cmpzm->in(2);
-// Can not optimize a loop if pre-loop Opaque1 node is optimized
+// Can not optimize a loop if zero-trip Opaque1 node is optimized
 // away and then another round of loop opts attempted.
 if (opqzm->Opcode() != Op_Opaque1)
 return;
 assert(opqzm->in(1) == main_limit, "do not understand situation");
 // Must know if its a count-up or count-down loop
 int stride_con = cl->stride_con();
 Node *zero = _igvn.intcon(0);
 Node *one  = _igvn.intcon(1);
+// Use symmetrical int range [-max_jint,max_jint]
+Node *mini = _igvn.intcon(-max_jint);
 set_ctrl(zero, C->root());
 set_ctrl(one,  C->root());
+set_ctrl(mini, C->root());
 // Range checks that do not dominate the loop backedge (ie.
 // conditionally executed) can lengthen the pre loop limit beyond
 // the original loop limit. To prevent this, the pre limit is
 // (for stride > 0) MINed with the original loop limit (MAXed
 // As above for the 'limit', the 'offset' maybe pinned below the
 // zero trip test.
 if( offset_c == ctrl ) {
 continue; // Don't rce this check but continue looking for other candidates.
 }
+#ifdef ASSERT
+if (TraceRangeLimitCheck) {
+tty->print_cr("RC bool node%s", flip ? " flipped:" : ":");
+bol->dump(2);
+}
+#endif
 // At this point we have the expression as:
 //   scale_con * trip_counter + offset :: limit
 // where scale_con, offset and limit are loop invariant.  Trip_counter
 // monotonically increases by stride_con, a constant.  Both (or either)
 // stride_con and scale_con can be negative which will flip about the
 // sense of the test.
 // Adjust pre and main loop limits to guard the correct iteration set
 if( cmp->Opcode() == Op_CmpU ) {// Unsigned compare is really 2 tests
 if( b_test._test == BoolTest::lt ) { // Range checks always use lt
-// The overflow limit: scale*I+offset < limit
+// The underflow and overflow limits: 0 <= scale*I+offset < limit
-add_constraint( stride_con, scale_con, offset, limit, pre_ctrl, &pre_limit, &main_limit );
+add_constraint( stride_con, scale_con, offset, zero, limit, pre_ctrl, &pre_limit, &main_limit );
-// The underflow limit: 0 <= scale*I+offset.
-// Some math yields: -scale*I-(offset+1) < 0
-Node *plus_one = new (C, 3) AddINode( offset, one );
-register_new_node( plus_one, pre_ctrl );
-Node *neg_offset = new (C, 3) SubINode( zero, plus_one );
-register_new_node( neg_offset, pre_ctrl );
-add_constraint( stride_con, -scale_con, neg_offset, zero, pre_ctrl, &pre_limit, &main_limit );
 if (!conditional_rc) {
 conditional_rc = !loop->dominates_backedge(iff);
+// It is also needed if offset->_lo == min_int since
+// (0-min_int) == min_int. It may be fine for stride > 0
+// but for stride < 0 pre_limit will be < original_limit.
+const TypeInt* offset_t = _igvn.type(offset)->is_int();
+conditional_rc |= RangeLimitCheck && (offset_t->_lo == min_jint) &&
+(scale_con<0) && (stride_con<0);
 }
 } else {
 #ifndef PRODUCT
 if( PrintOpto )
 tty->print_cr("missed RCE opportunity");
 #endif
 continue;             // In release mode, ignore it
 }
 } else {                  // Otherwise work on normal compares
 switch( b_test._test ) {
-case BoolTest::ge:      // Convert X >= Y to -X <= -Y
+case BoolTest::gt:
+// Fall into GE case
+case BoolTest::ge:
+// Convert (I*scale+offset) >= Limit to (I*(-scale)+(-offset)) <= -Limit
 scale_con = -scale_con;
 offset = new (C, 3) SubINode( zero, offset );
 register_new_node( offset, pre_ctrl );
 limit  = new (C, 3) SubINode( zero, limit  );
 register_new_node( limit, pre_ctrl );
 // Fall into LE case
-case BoolTest::le:      // Convert X <= Y to X < Y+1
+case BoolTest::le:
-limit = new (C, 3) AddINode( limit, one );
+if (b_test._test != BoolTest::gt) {
-register_new_node( limit, pre_ctrl );
+// Convert X <= Y to X < Y+1
+limit = new (C, 3) AddINode( limit, one );
+register_new_node( limit, pre_ctrl );
+}
 // Fall into LT case
 case BoolTest::lt:
-add_constraint( stride_con, scale_con, offset, limit, pre_ctrl, &pre_limit, &main_limit );
+// The underflow and overflow limits: MIN_INT <= scale*I+offset < limit
+add_constraint( stride_con, scale_con, offset, mini, limit, pre_ctrl, &pre_limit, &main_limit );
 if (!conditional_rc) {
 conditional_rc = !loop->dominates_backedge(iff);
+// It is also needed if scale*pre_limit+offset >= limit
+// due to underflow so we need execute pre-loop until
+// scale*I+offset >= min_int. But (low_limit-offset) will
+// underflow when offset > 0 and X will be > original_limit.
+const TypeInt* offset_t = _igvn.type(offset)->is_int();
+conditional_rc |= RangeLimitCheck && (offset_t->_hi > 0) &&
+(scale_con>0) && (stride_con>0);
 }
 break;
 default:
 #ifndef PRODUCT
 if( PrintOpto )
 _igvn.hash_delete(pre_opaq);
 pre_opaq->set_req(1, pre_limit);
 // Note:: we are making the main loop limit no longer precise;
 // need to round up based on stride.
-if( stride_con != 1 && stride_con != -1 ) { // Cutout for common case
+cl->set_nonexact_trip_count();
+if (!LoopLimitCheck && stride_con != 1 && stride_con != -1) { // Cutout for common case
 // "Standard" round-up logic:  ([main_limit-init+(y-1)]/y)*y+init
 // Hopefully, compiler will optimize for powers of 2.
 Node *ctrl = get_ctrl(main_limit);
 Node *stride = cl->stride();
 Node *init = cl->init_trip();
 // Replace the phi at loop head with the final value of the last
 // iteration.  Then the CountedLoopEnd will collapse (backedge never
 // taken) and all loop-invariant uses of the exit values will be correct.
 Node *phi = cl->phi();
-Node *final = new (phase->C, 3) SubINode( cl->limit(), cl->stride() );
+Node *exact_limit = phase->exact_limit(this);
+if (exact_limit != cl->limit()) {
+// We also need to replace the original limit to collapse loop exit.
+Node* cmp = cl->loopexit()->cmp_node();
+assert(cl->limit() == cmp->in(2), "sanity");
+phase->_igvn._worklist.push(cmp->in(2)); // put limit on worklist
+phase->_igvn.hash_delete(cmp);
+cmp->set_req(2, exact_limit);
+phase->_igvn._worklist.push(cmp);        // put cmp on worklist
+}
+// Note: the final value after increment should not overflow since
+// counted loop has limit check predicate.
+Node *final = new (phase->C, 3) SubINode( exact_limit, cl->stride() );
 phase->register_new_node(final,cl->in(LoopNode::EntryControl));
 phase->_igvn.replace_node(phi,final);
 phase->C->set_major_progress();
 return true;
 }

Mercurial > hg > graal-jvmci-8

comparison src/share/vm/opto/loopTransform.cpp @ 3345:bad7ecd0b6ed