From 69b806f6a60efcf150b995e418f08b96d26dd5dc Mon Sep 17 00:00:00 2001 From: Bin Cheng Date: Tue, 2 Aug 2016 10:13:28 +0000 Subject: [PATCH] re PR tree-optimization/34114 (Missed optimization: cannot determine loop termination) PR tree-optimization/34114 * tree-ssa-loop-niter.c (number_of_iterations_ne): Prove no-overflow information for more control IVs. gcc/testsuite PR tree-optimization/34114 * gcc.dg/tree-ssa/loop-42.c: New test. From-SVN: r238983 --- gcc/ChangeLog | 6 ++ gcc/testsuite/ChangeLog | 5 ++ gcc/testsuite/gcc.dg/tree-ssa/loop-42.c | 36 ++++++++++ gcc/tree-ssa-loop-niter.c | 89 ++++++++++++++++++++----- 4 files changed, 121 insertions(+), 15 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/loop-42.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 5687ad51224..7e23ba4da2d 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,9 @@ +2016-08-02 Bin Cheng + + PR tree-optimization/34114 + * tree-ssa-loop-niter.c (number_of_iterations_ne): Prove no-overflow + information for more control IVs. + 2016-08-02 Bin Cheng PR tree-optimization/34114 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index f5bd074cf18..c1a98ec2e98 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2016-08-02 Bin Cheng + + PR tree-optimization/34114 + * gcc.dg/tree-ssa/loop-42.c: New test. + 2016-08-02 Tamar Christina * gcc.target/aarch64/vminmaxnm.c: New. diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-42.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-42.c new file mode 100644 index 00000000000..3f9d91ac287 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-42.c @@ -0,0 +1,36 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-ivcanon-details" } */ + +void foo2 (unsigned int num, int *a) +{ + unsigned int i, n = (num - (num % 2)); + + for(i = 0; i != n; i += 2) + a[i] = 0; +} + +void foo3 (unsigned int num, int *a) +{ + unsigned int i, n = (num - (num % 3)); + + for(i = 0; i != n; i += 3) + a[i] = 0; +} + +void foo4 (unsigned int num, int *a) +{ + unsigned int i, n = (num - (num % 4)); + + for(i = 0; i != n; i += 4) + a[i] = 0; +} + +void foo5 (unsigned int num, int *a) +{ + unsigned int i, n = (num - (num % 5)); + + for(i = 0; i != n; i += 5) + a[i] = 0; +} + +/* { dg-final { scan-tree-dump-not "under assumptions " "ivcanon" } } */ diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c index 191a0718515..c740ffa9d6a 100644 --- a/gcc/tree-ssa-loop-niter.c +++ b/gcc/tree-ssa-loop-niter.c @@ -964,7 +964,6 @@ number_of_iterations_ne (struct loop *loop, tree type, affine_iv *iv, tree niter_type = unsigned_type_for (type); tree s, c, d, bits, assumption, tmp, bound; mpz_t max; - tree e; niter->control = *iv; niter->bound = final; @@ -999,21 +998,76 @@ number_of_iterations_ne (struct loop *loop, tree type, affine_iv *iv, TYPE_SIGN (niter_type)); mpz_clear (max); - /* Compute no-overflow information for the control iv. Note we are - handling NE_EXPR, if iv base equals to final value, the loop exits - immediately, and the iv does not overflow. */ - if (tree_int_cst_sign_bit (iv->step)) - e = fold_build2 (GE_EXPR, boolean_type_node, iv->base, final); - else - e = fold_build2 (LE_EXPR, boolean_type_node, iv->base, final); - e = simplify_using_initial_conditions (loop, e); - if (integer_onep (e) - && (integer_onep (s) - || (TREE_CODE (c) == INTEGER_CST - && TREE_CODE (s) == INTEGER_CST - && wi::mod_trunc (c, s, TYPE_SIGN (type)) == 0))) + /* Compute no-overflow information for the control iv. This can be + proven when below two conditions hold. + + 1) |FINAL - base| is an exact multiple of step. + 2) IV evaluates toward FINAL at beginning, i.e: + + base <= FINAL ; step > 0 + base >= FINAL ; step < 0 + + Note the first condition holds, the second can be then relaxed + to below condition. + + base - step < FINAL ; step > 0 + && base - step doesn't underflow + base - step > FINAL ; step < 0 + && base - step doesn't overflow + + The relaxation is important because after pass loop-ch, loop + with exit condition (IV != FINAL) will usually be guarded by + pre-condition (IV.base - IV.step != FINAL). Please refer to + PR34114 as an example. + + Also note, for NE_EXPR, base equals to FINAL is a special case, in + which the loop exits immediately, and the iv does not overflow. */ + if (!niter->control.no_overflow + && (integer_onep (s) || multiple_of_p (type, c, s))) { - niter->control.no_overflow = true; + tree t, cond, relaxed_cond = boolean_false_node; + + if (tree_int_cst_sign_bit (iv->step)) + { + cond = fold_build2 (GE_EXPR, boolean_type_node, iv->base, final); + if (TREE_CODE (type) == INTEGER_TYPE) + { + /* Only when base - step doesn't overflow. */ + t = TYPE_MAX_VALUE (type); + t = fold_build2 (PLUS_EXPR, type, t, iv->step); + t = fold_build2 (GE_EXPR, boolean_type_node, t, iv->base); + if (integer_nonzerop (t)) + { + t = fold_build2 (MINUS_EXPR, type, iv->base, iv->step); + relaxed_cond = fold_build2 (GT_EXPR, boolean_type_node, + t, final); + } + } + } + else + { + cond = fold_build2 (LE_EXPR, boolean_type_node, iv->base, final); + if (TREE_CODE (type) == INTEGER_TYPE) + { + /* Only when base - step doesn't underflow. */ + t = TYPE_MIN_VALUE (type); + t = fold_build2 (PLUS_EXPR, type, t, iv->step); + t = fold_build2 (LE_EXPR, boolean_type_node, t, iv->base); + if (integer_nonzerop (t)) + { + t = fold_build2 (MINUS_EXPR, type, iv->base, iv->step); + relaxed_cond = fold_build2 (LT_EXPR, boolean_type_node, + t, final); + } + } + } + + t = simplify_using_initial_conditions (loop, cond); + if (!t || !integer_onep (t)) + t = simplify_using_initial_conditions (loop, relaxed_cond); + + if (t && integer_onep (t)) + niter->control.no_overflow = true; } /* First the trivial cases -- when the step is 1. */ @@ -1022,6 +1076,11 @@ number_of_iterations_ne (struct loop *loop, tree type, affine_iv *iv, niter->niter = c; return true; } + if (niter->control.no_overflow && multiple_of_p (type, c, s)) + { + niter->niter = fold_build2 (FLOOR_DIV_EXPR, niter_type, c, s); + return true; + } /* Let nsd (step, size of mode) = d. If d does not divide c, the loop is infinite. Otherwise, the number of iterations is -- 2.30.2