From b4a9343cf59f4ccb03c5be05a9314ff1f45712b4 Mon Sep 17 00:00:00 2001 From: Zdenek Dvorak Date: Tue, 14 Jun 2011 16:29:58 +0200 Subject: [PATCH] re PR middle-end/45098 (Missed induction variable optimization) 2011-06-14 Zdenek Dvorak Tom de Vries PR target/45098 * cfgloop.h (nb_iterations_upper_bound, nb_iterations_estimate): Document changed semantics. (max_stmt_executions, max_stmt_executions_int): Declare. * tree-data-ref.c (estimated_loop_iterations) (estimated_loop_iterations_int): Move functions... * tree-ssa-loop-niter.c (estimated_loop_iterations) (estimated_loop_iterations_int): here. (record_estimate): Change nb_iterations_upper_bound and nb_iterations_estimate semantics. (max_stmt_executions, max_stmt_executions_int): New function. * tree-data-ref.c (estimated_loop_iterations_tree): Rename to ... (max_stmt_executions_tree): this. (analyze_miv_subscript): Use max_stmt_executions_tree instead of estimated_loop_iterations_tree. tree-ssa-loop-ivopts.c (avg_loop_niter): Use max_stmt_executions_int instead of estimated_loop_iterations_int. * predict.c (predict_loops): Idem. * tree-parloops.c (parallelize_loops): Idem. * tree-data-ref.c (analyze_siv_subscript_cst_affine) (compute_overlap_steps_for_affine_1_2, analyze_subscript_affine_affine) (init_omega_for_ddr_1): Idem. * tree-ssa-loop-prefetch.c (determine_loop_nest_reuse) (loop_prefetch_arrays): Idem * graphite-sese-to-poly.c (build_loop_iteration_domains): Use max_stmt_executions instead of estimated_loop_iterations. * tree-data-ref.c (estimated_loop_iterations_tree): Idem. * tree-vrp.c (adjust_range_with_scev): Use estimated_loop_iterations instead of nb_iterations_upper_bound. Co-Authored-By: Tom de Vries From-SVN: r175022 --- gcc/ChangeLog | 33 ++++++++++++ gcc/cfgloop.h | 10 ++-- gcc/graphite-sese-to-poly.c | 2 +- gcc/predict.c | 2 +- gcc/tree-data-ref.c | 86 +++++++------------------------ gcc/tree-parloops.c | 2 +- gcc/tree-ssa-loop-ivopts.c | 2 +- gcc/tree-ssa-loop-niter.c | 98 +++++++++++++++++++++++++++++++++--- gcc/tree-ssa-loop-prefetch.c | 4 +- gcc/tree-vrp.c | 62 +++++++++++------------ 10 files changed, 185 insertions(+), 116 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 80b7b1e8b0f..80cd0a425d6 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,36 @@ +2011-06-14 Zdenek Dvorak + Tom de Vries + + PR target/45098 + * cfgloop.h (nb_iterations_upper_bound, nb_iterations_estimate): + Document changed semantics. + (max_stmt_executions, max_stmt_executions_int): Declare. + * tree-data-ref.c (estimated_loop_iterations) + (estimated_loop_iterations_int): Move functions... + * tree-ssa-loop-niter.c (estimated_loop_iterations) + (estimated_loop_iterations_int): here. + (record_estimate): Change nb_iterations_upper_bound and + nb_iterations_estimate semantics. + (max_stmt_executions, max_stmt_executions_int): New function. + * tree-data-ref.c (estimated_loop_iterations_tree): Rename to ... + (max_stmt_executions_tree): this. + (analyze_miv_subscript): Use max_stmt_executions_tree instead of + estimated_loop_iterations_tree. + tree-ssa-loop-ivopts.c (avg_loop_niter): Use + max_stmt_executions_int instead of estimated_loop_iterations_int. + * predict.c (predict_loops): Idem. + * tree-parloops.c (parallelize_loops): Idem. + * tree-data-ref.c (analyze_siv_subscript_cst_affine) + (compute_overlap_steps_for_affine_1_2, analyze_subscript_affine_affine) + (init_omega_for_ddr_1): Idem. + * tree-ssa-loop-prefetch.c (determine_loop_nest_reuse) + (loop_prefetch_arrays): Idem + * graphite-sese-to-poly.c (build_loop_iteration_domains): Use + max_stmt_executions instead of estimated_loop_iterations. + * tree-data-ref.c (estimated_loop_iterations_tree): Idem. + * tree-vrp.c (adjust_range_with_scev): Use estimated_loop_iterations + instead of nb_iterations_upper_bound. + 2011-06-13 Jan Hubicka * ipa.c (cgraph_address_taken_from_non_vtable_p): Check the ref type. diff --git a/gcc/cfgloop.h b/gcc/cfgloop.h index 0ff44de67d3..510bc10bd4a 100644 --- a/gcc/cfgloop.h +++ b/gcc/cfgloop.h @@ -143,11 +143,13 @@ struct GTY ((chain_next ("%h.next"))) loop { computes and caches the computed information in this field. */ tree nb_iterations; - /* An integer guaranteed to bound the number of iterations of the loop - from above. */ + /* An integer guaranteed to be greater or equal to nb_iterations. Only + valid if any_upper_bound is true. */ double_int nb_iterations_upper_bound; - /* An integer giving the expected number of iterations of the loop. */ + /* An integer giving an estimate on nb_iterations. Unlike + nb_iterations_upper_bound, there is no guarantee that it is at least + nb_iterations. */ double_int nb_iterations_estimate; bool any_upper_bound; @@ -278,7 +280,9 @@ extern rtx doloop_condition_get (rtx); void estimate_numbers_of_iterations_loop (struct loop *, bool); HOST_WIDE_INT estimated_loop_iterations_int (struct loop *, bool); +HOST_WIDE_INT max_stmt_executions_int (struct loop *, bool); bool estimated_loop_iterations (struct loop *, bool, double_int *); +bool max_stmt_executions (struct loop *, bool, double_int *); /* Loop manipulation. */ extern bool can_duplicate_loop_p (const struct loop *loop); diff --git a/gcc/graphite-sese-to-poly.c b/gcc/graphite-sese-to-poly.c index 7474be016c3..7e23c9d69de 100644 --- a/gcc/graphite-sese-to-poly.c +++ b/gcc/graphite-sese-to-poly.c @@ -1092,7 +1092,7 @@ build_loop_iteration_domains (scop_p scop, struct loop *loop, scan_tree_for_params (SCOP_REGION (scop), nb_iters, ub_expr, one); mpz_clear (one); - if (estimated_loop_iterations (loop, true, &nit)) + if (max_stmt_executions (loop, true, &nit)) add_upper_bounds_from_estimated_nit (scop, nit, dim, ub_expr); /* loop_i <= expr_nb_iters */ diff --git a/gcc/predict.c b/gcc/predict.c index 870526bc6b3..f5e2449f636 100644 --- a/gcc/predict.c +++ b/gcc/predict.c @@ -994,7 +994,7 @@ predict_loops (void) the loop, use it to predict this exit. */ else if (n_exits == 1) { - nitercst = estimated_loop_iterations_int (loop, false); + nitercst = max_stmt_executions_int (loop, false); if (nitercst < 0) continue; if (nitercst > max) diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c index 7b28b8d9914..ebabf61c4db 100644 --- a/gcc/tree-data-ref.c +++ b/gcc/tree-data-ref.c @@ -1621,66 +1621,18 @@ analyze_ziv_subscript (tree chrec_a, fprintf (dump_file, ")\n"); } -/* Sets NIT to the estimated number of executions of the statements in - LOOP. If CONSERVATIVE is true, we must be sure that NIT is at least as - large as the number of iterations. If we have no reliable estimate, - the function returns false, otherwise returns true. */ - -bool -estimated_loop_iterations (struct loop *loop, bool conservative, - double_int *nit) -{ - estimate_numbers_of_iterations_loop (loop, true); - if (conservative) - { - if (!loop->any_upper_bound) - return false; - - *nit = loop->nb_iterations_upper_bound; - } - else - { - if (!loop->any_estimate) - return false; - - *nit = loop->nb_iterations_estimate; - } - - return true; -} - -/* Similar to estimated_loop_iterations, but returns the estimate only - if it fits to HOST_WIDE_INT. If this is not the case, or the estimate - on the number of iterations of LOOP could not be derived, returns -1. */ - -HOST_WIDE_INT -estimated_loop_iterations_int (struct loop *loop, bool conservative) -{ - double_int nit; - HOST_WIDE_INT hwi_nit; - - if (!estimated_loop_iterations (loop, conservative, &nit)) - return -1; - - if (!double_int_fits_in_shwi_p (nit)) - return -1; - hwi_nit = double_int_to_shwi (nit); - - return hwi_nit < 0 ? -1 : hwi_nit; -} - -/* Similar to estimated_loop_iterations, but returns the estimate as a tree, +/* Similar to max_stmt_executions_int, but returns the bound as a tree, and only if it fits to the int type. If this is not the case, or the - estimate on the number of iterations of LOOP could not be derived, returns + bound on the number of iterations of LOOP could not be derived, returns chrec_dont_know. */ static tree -estimated_loop_iterations_tree (struct loop *loop, bool conservative) +max_stmt_executions_tree (struct loop *loop) { double_int nit; tree type; - if (!estimated_loop_iterations (loop, conservative, &nit)) + if (!max_stmt_executions (loop, true, &nit)) return chrec_dont_know; type = lang_hooks.types.type_for_size (INT_TYPE_SIZE, true); @@ -1763,7 +1715,7 @@ analyze_siv_subscript_cst_affine (tree chrec_a, /* Perform weak-zero siv test to see if overlap is outside the loop bounds. */ - numiter = estimated_loop_iterations_int (loop, false); + numiter = max_stmt_executions_int (loop, true); if (numiter >= 0 && compare_tree_int (tmp, numiter) > 0) @@ -1841,7 +1793,7 @@ analyze_siv_subscript_cst_affine (tree chrec_a, /* Perform weak-zero siv test to see if overlap is outside the loop bounds. */ - numiter = estimated_loop_iterations_int (loop, false); + numiter = max_stmt_executions_int (loop, true); if (numiter >= 0 && compare_tree_int (tmp, numiter) > 0) @@ -2022,10 +1974,9 @@ compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b, step_z = int_cst_value (CHREC_RIGHT (chrec_b)); niter_x = - estimated_loop_iterations_int (get_chrec_loop (CHREC_LEFT (chrec_a)), - false); - niter_y = estimated_loop_iterations_int (get_chrec_loop (chrec_a), false); - niter_z = estimated_loop_iterations_int (get_chrec_loop (chrec_b), false); + max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)), true); + niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a), true); + niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b), true); if (niter_x < 0 || niter_y < 0 || niter_z < 0) { @@ -2350,10 +2301,8 @@ analyze_subscript_affine_affine (tree chrec_a, HOST_WIDE_INT niter, niter_a, niter_b; affine_fn ova, ovb; - niter_a = estimated_loop_iterations_int (get_chrec_loop (chrec_a), - false); - niter_b = estimated_loop_iterations_int (get_chrec_loop (chrec_b), - false); + niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a), true); + niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b), true); niter = MIN (niter_a, niter_b); step_a = int_cst_value (CHREC_RIGHT (chrec_a)); step_b = int_cst_value (CHREC_RIGHT (chrec_b)); @@ -2460,10 +2409,10 @@ analyze_subscript_affine_affine (tree chrec_a, if (i1 > 0 && j1 > 0) { - HOST_WIDE_INT niter_a = estimated_loop_iterations_int - (get_chrec_loop (chrec_a), false); - HOST_WIDE_INT niter_b = estimated_loop_iterations_int - (get_chrec_loop (chrec_b), false); + HOST_WIDE_INT niter_a = max_stmt_executions_int + (get_chrec_loop (chrec_a), true); + HOST_WIDE_INT niter_b = max_stmt_executions_int + (get_chrec_loop (chrec_b), true); HOST_WIDE_INT niter = MIN (niter_a, niter_b); /* (X0, Y0) is a solution of the Diophantine equation: @@ -2740,8 +2689,7 @@ analyze_miv_subscript (tree chrec_a, in the same order. */ *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node)); *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node)); - *last_conflicts = estimated_loop_iterations_tree - (get_chrec_loop (chrec_a), true); + *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a)); dependence_stats.num_miv_dependent++; } @@ -3754,7 +3702,7 @@ init_omega_for_ddr_1 (struct data_reference *dra, struct data_reference *drb, for (i = 0; i <= DDR_INNER_LOOP (ddr) && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++) { - HOST_WIDE_INT nbi = estimated_loop_iterations_int (loopi, false); + HOST_WIDE_INT nbi = max_stmt_executions_int (loopi, true); /* 0 <= loop_x */ ineq = omega_add_zero_geq (pb, omega_black); diff --git a/gcc/tree-parloops.c b/gcc/tree-parloops.c index 9a11f80d4b0..9d2ea1a05f6 100644 --- a/gcc/tree-parloops.c +++ b/gcc/tree-parloops.c @@ -2134,7 +2134,7 @@ parallelize_loops (void) /* FIXME: the check for vector phi nodes could be removed. */ || loop_has_vector_phi_nodes (loop)) continue; - estimated = estimated_loop_iterations_int (loop, false); + estimated = max_stmt_executions_int (loop, false); /* FIXME: Bypass this check as graphite doesn't update the count and frequency correctly now. */ if (!flag_loop_parallelize_all diff --git a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c index 10fd7dc48c7..3717639f0d2 100644 --- a/gcc/tree-ssa-loop-ivopts.c +++ b/gcc/tree-ssa-loop-ivopts.c @@ -115,7 +115,7 @@ along with GCC; see the file COPYING3. If not see static inline HOST_WIDE_INT avg_loop_niter (struct loop *loop) { - HOST_WIDE_INT niter = estimated_loop_iterations_int (loop, false); + HOST_WIDE_INT niter = max_stmt_executions_int (loop, false); if (niter == -1) return AVG_LOOP_NITER (loop); diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c index 230593ad264..fa3ccc63f59 100644 --- a/gcc/tree-ssa-loop-niter.c +++ b/gcc/tree-ssa-loop-niter.c @@ -2568,18 +2568,17 @@ record_estimate (struct loop *loop, tree bound, double_int i_bound, } /* Update the number of iteration estimates according to the bound. - If at_stmt is an exit, then every statement in the loop is - executed at most BOUND + 1 times. If it is not an exit, then - some of the statements before it could be executed BOUND + 2 - times, if an exit of LOOP is before stmt. */ + If at_stmt is an exit or dominates the single exit from the loop, + then the loop latch is executed at most BOUND times, otherwise + it can be executed BOUND + 1 times. */ exit = single_exit (loop); if (is_exit || (exit != NULL && dominated_by_p (CDI_DOMINATORS, exit->src, gimple_bb (at_stmt)))) - delta = double_int_one; + delta = double_int_zero; else - delta = double_int_two; + delta = double_int_one; i_bound = double_int_add (i_bound, delta); /* If an overflow occurred, ignore the result. */ @@ -3042,6 +3041,93 @@ estimate_numbers_of_iterations_loop (struct loop *loop, bool use_undefined_p) loop->nb_iterations_estimate = loop->nb_iterations_upper_bound; } +/* Sets NIT to the estimated number of executions of the latch of the + LOOP. If CONSERVATIVE is true, we must be sure that NIT is at least as + large as the number of iterations. If we have no reliable estimate, + the function returns false, otherwise returns true. */ + +bool +estimated_loop_iterations (struct loop *loop, bool conservative, + double_int *nit) +{ + estimate_numbers_of_iterations_loop (loop, true); + if (conservative) + { + if (!loop->any_upper_bound) + return false; + + *nit = loop->nb_iterations_upper_bound; + } + else + { + if (!loop->any_estimate) + return false; + + *nit = loop->nb_iterations_estimate; + } + + return true; +} + +/* Similar to estimated_loop_iterations, but returns the estimate only + if it fits to HOST_WIDE_INT. If this is not the case, or the estimate + on the number of iterations of LOOP could not be derived, returns -1. */ + +HOST_WIDE_INT +estimated_loop_iterations_int (struct loop *loop, bool conservative) +{ + double_int nit; + HOST_WIDE_INT hwi_nit; + + if (!estimated_loop_iterations (loop, conservative, &nit)) + return -1; + + if (!double_int_fits_in_shwi_p (nit)) + return -1; + hwi_nit = double_int_to_shwi (nit); + + return hwi_nit < 0 ? -1 : hwi_nit; +} + +/* Returns an upper bound on the number of executions of statements + in the LOOP. For statements before the loop exit, this exceeds + the number of execution of the latch by one. */ + +HOST_WIDE_INT +max_stmt_executions_int (struct loop *loop, bool conservative) +{ + HOST_WIDE_INT nit = estimated_loop_iterations_int (loop, conservative); + HOST_WIDE_INT snit; + + if (nit == -1) + return -1; + + snit = (HOST_WIDE_INT) ((unsigned HOST_WIDE_INT) nit + 1); + + /* If the computation overflows, return -1. */ + return snit < 0 ? -1 : snit; +} + +/* Sets NIT to the estimated number of executions of the latch of the + LOOP, plus one. If CONSERVATIVE is true, we must be sure that NIT is at + least as large as the number of iterations. If we have no reliable + estimate, the function returns false, otherwise returns true. */ + +bool +max_stmt_executions (struct loop *loop, bool conservative, double_int *nit) +{ + double_int nit_minus_one; + + if (!estimated_loop_iterations (loop, conservative, nit)) + return false; + + nit_minus_one = *nit; + + *nit = double_int_add (*nit, double_int_one); + + return double_int_ucmp (*nit, nit_minus_one) > 0; +} + /* Records estimates on numbers of iterations of loops. */ void diff --git a/gcc/tree-ssa-loop-prefetch.c b/gcc/tree-ssa-loop-prefetch.c index d920ec6eb07..9d9d249d2c2 100644 --- a/gcc/tree-ssa-loop-prefetch.c +++ b/gcc/tree-ssa-loop-prefetch.c @@ -1549,7 +1549,7 @@ determine_loop_nest_reuse (struct loop *loop, struct mem_ref_group *refs, continue; aloop = VEC_index (loop_p, vloops, i); - vol = estimated_loop_iterations_int (aloop, false); + vol = max_stmt_executions_int (aloop, false); if (vol < 0) vol = expected_loop_iterations (aloop); volume *= vol; @@ -1801,7 +1801,7 @@ loop_prefetch_arrays (struct loop *loop) return false; ahead = (PREFETCH_LATENCY + time - 1) / time; - est_niter = estimated_loop_iterations_int (loop, false); + est_niter = max_stmt_executions_int (loop, false); /* Prefetching is not likely to be profitable if the trip count to ahead ratio is too small. */ diff --git a/gcc/tree-vrp.c b/gcc/tree-vrp.c index f40d0d4c890..0ca27eff6e6 100644 --- a/gcc/tree-vrp.c +++ b/gcc/tree-vrp.c @@ -3403,44 +3403,42 @@ adjust_range_with_scev (value_range_t *vr, struct loop *loop, tmax = TYPE_MAX_VALUE (type); /* Try to use estimated number of iterations for the loop to constrain the - final value in the evolution. - We are interested in the number of executions of the latch, while - nb_iterations_upper_bound includes the last execution of the exit test. */ + final value in the evolution. */ if (TREE_CODE (step) == INTEGER_CST - && loop->any_upper_bound - && !double_int_zero_p (loop->nb_iterations_upper_bound) && is_gimple_val (init) && (TREE_CODE (init) != SSA_NAME || get_value_range (init)->type == VR_RANGE)) { - value_range_t maxvr = { VR_UNDEFINED, NULL_TREE, NULL_TREE, NULL }; - double_int dtmp; - bool unsigned_p = TYPE_UNSIGNED (TREE_TYPE (step)); - int overflow = 0; - - dtmp = double_int_mul_with_sign (tree_to_double_int (step), - double_int_sub ( - loop->nb_iterations_upper_bound, - double_int_one), - unsigned_p, &overflow); - /* If the multiplication overflowed we can't do a meaningful - adjustment. Likewise if the result doesn't fit in the type - of the induction variable. For a signed type we have to - check whether the result has the expected signedness which - is that of the step as nb_iterations_upper_bound is unsigned. */ - if (!overflow - && double_int_fits_to_tree_p (TREE_TYPE (init), dtmp) - && (unsigned_p - || ((dtmp.high ^ TREE_INT_CST_HIGH (step)) >= 0))) - { - tem = double_int_to_tree (TREE_TYPE (init), dtmp); - extract_range_from_binary_expr (&maxvr, PLUS_EXPR, - TREE_TYPE (init), init, tem); - /* Likewise if the addition did. */ - if (maxvr.type == VR_RANGE) + double_int nit; + + if (estimated_loop_iterations (loop, true, &nit)) + { + value_range_t maxvr = { VR_UNDEFINED, NULL_TREE, NULL_TREE, NULL }; + double_int dtmp; + bool unsigned_p = TYPE_UNSIGNED (TREE_TYPE (step)); + int overflow = 0; + + dtmp = double_int_mul_with_sign (tree_to_double_int (step), nit, + unsigned_p, &overflow); + /* If the multiplication overflowed we can't do a meaningful + adjustment. Likewise if the result doesn't fit in the type + of the induction variable. For a signed type we have to + check whether the result has the expected signedness which + is that of the step as number of iterations is unsigned. */ + if (!overflow + && double_int_fits_to_tree_p (TREE_TYPE (init), dtmp) + && (unsigned_p + || ((dtmp.high ^ TREE_INT_CST_HIGH (step)) >= 0))) { - tmin = maxvr.min; - tmax = maxvr.max; + tem = double_int_to_tree (TREE_TYPE (init), dtmp); + extract_range_from_binary_expr (&maxvr, PLUS_EXPR, + TREE_TYPE (init), init, tem); + /* Likewise if the addition did. */ + if (maxvr.type == VR_RANGE) + { + tmin = maxvr.min; + tmax = maxvr.max; + } } } } -- 2.30.2