From 98bdbb39a6676776c528f3b51ce740669c06d708 Mon Sep 17 00:00:00 2001 From: Jan Hubicka Date: Mon, 30 May 2016 12:40:33 +0200 Subject: [PATCH] predict.h (force_edge_cold): Declare. * predict.h (force_edge_cold): Declare. * predict.c (force_edge_cold): New function. * tree-ssa-loop-ivcanon.c (try_unroll_loop_completely): Fix profile updating. (canonicalize_loop_induction_variables): Fix formating. * gcc.dg/tree-ssa/cunroll-12.c: New testcase. * gcc.dg/tree-ssa/cunroll-13.c: New testcase. * gcc.dg/tree-ssa/cunroll-14.c: New testcase. From-SVN: r236874 --- gcc/ChangeLog | 8 ++ gcc/predict.c | 96 ++++++++++++++++++++++ gcc/predict.h | 1 + gcc/testsuite/ChangeLog | 6 ++ gcc/testsuite/gcc.dg/tree-ssa/cunroll-12.c | 11 +++ gcc/testsuite/gcc.dg/tree-ssa/cunroll-13.c | 16 ++++ gcc/testsuite/gcc.dg/tree-ssa/cunroll-14.c | 14 ++++ gcc/tree-ssa-loop-ivcanon.c | 29 +++++-- 8 files changed, 176 insertions(+), 5 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/cunroll-12.c create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/cunroll-13.c create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/cunroll-14.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 6488753958c..815a43da5ec 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,11 @@ +2016-05-30 Jan Hubicka + + * predict.h (force_edge_cold): Declare. + * predict.c (force_edge_cold): New function. + * tree-ssa-loop-ivcanon.c (try_unroll_loop_completely): Fix profile + updating. + (canonicalize_loop_induction_variables): Fix formating. + 2016-05-30 Eric Botcazou * config/visium/visium.c (visium_split_double_add): Minor tweaks. diff --git a/gcc/predict.c b/gcc/predict.c index 31c5565c309..396e1505395 100644 --- a/gcc/predict.c +++ b/gcc/predict.c @@ -3249,3 +3249,99 @@ report_predictor_hitrates (void) loop_optimizer_finalize (); } +/* Force edge E to be cold. + If IMPOSSIBLE is true, for edge to have count and probability 0 otherwise + keep low probability to represent possible error in a guess. This is used + i.e. in case we predict loop to likely iterate given number of times but + we are not 100% sure. + + This function locally updates profile without attempt to keep global + consistency which can not be reached in full generality without full profile + rebuild from probabilities alone. Doing so is not necessarily a good idea + because frequencies and counts may be more realistic then probabilities. + + In some cases (such as for elimination of early exits during full loop + unrolling) the caller can ensure that profile will get consistent + afterwards. */ + +void +force_edge_cold (edge e, bool impossible) +{ + gcov_type count_sum = 0; + int prob_sum = 0; + edge_iterator ei; + edge e2; + gcov_type old_count = e->count; + int old_probability = e->probability; + gcov_type gcov_scale = REG_BR_PROB_BASE; + int prob_scale = REG_BR_PROB_BASE; + + /* If edge is already improbably or cold, just return. */ + if (e->probability <= impossible ? PROB_VERY_UNLIKELY : 0 + && (!impossible || !e->count)) + return; + FOR_EACH_EDGE (e2, ei, e->src->succs) + if (e2 != e) + { + count_sum += e2->count; + prob_sum += e2->probability; + } + + /* If there are other edges out of e->src, redistribute probabilitity + there. */ + if (prob_sum) + { + e->probability + = MIN (e->probability, impossible ? 0 : PROB_VERY_UNLIKELY); + if (old_probability) + e->count = RDIV (e->count * e->probability, old_probability); + else + e->count = MIN (e->count, impossible ? 0 : 1); + + if (count_sum) + gcov_scale = RDIV ((count_sum + old_count - e->count) * REG_BR_PROB_BASE, + count_sum); + prob_scale = RDIV ((REG_BR_PROB_BASE - e->probability) * REG_BR_PROB_BASE, + prob_sum); + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Making edge %i->%i %s by redistributing " + "probability to other edges.\n", + e->src->index, e->dest->index, + impossible ? "imposisble" : "cold"); + FOR_EACH_EDGE (e2, ei, e->src->succs) + if (e2 != e) + { + e2->count = RDIV (e2->count * gcov_scale, REG_BR_PROB_BASE); + e2->probability = RDIV (e2->probability * prob_scale, + REG_BR_PROB_BASE); + } + } + /* If all edges out of e->src are unlikely, the basic block itself + is unlikely. */ + else + { + e->probability = REG_BR_PROB_BASE; + + /* If we did not adjusting, the source basic block has no likely edeges + leaving other direction. In that case force that bb cold, too. + This in general is difficult task to do, but handle special case when + BB has only one predecestor. This is common case when we are updating + after loop transforms. */ + if (!prob_sum && !count_sum && single_pred_p (e->src) + && e->src->frequency > (impossible ? 0 : 1)) + { + int old_frequency = e->src->frequency; + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Making bb %i %s.\n", e->src->index, + impossible ? "imposisble" : "cold"); + e->src->frequency = MIN (e->src->frequency, impossible ? 0 : 1); + e->src->count = e->count = RDIV (e->src->count * e->src->frequency, + old_frequency); + force_edge_cold (single_pred_edge (e->src), impossible); + } + else if (dump_file && (dump_flags & TDF_DETAILS) + && maybe_hot_bb_p (cfun, e->src)) + fprintf (dump_file, "Giving up on making bb %i %s.\n", e->src->index, + impossible ? "imposisble" : "cold"); + } +} diff --git a/gcc/predict.h b/gcc/predict.h index 86cb3bd57ce..79942871bac 100644 --- a/gcc/predict.h +++ b/gcc/predict.h @@ -91,5 +91,6 @@ extern tree build_predict_expr (enum br_predictor, enum prediction); extern const char *predictor_name (enum br_predictor); extern void rebuild_frequencies (void); extern void report_predictor_hitrates (void); +extern void force_edge_cold (edge, bool); #endif /* GCC_PREDICT_H */ diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index be6710268cb..8b989b4e5d6 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2016-05-30 Jan Hubicka + + * gcc.dg/tree-ssa/cunroll-12.c: New testcase. + * gcc.dg/tree-ssa/cunroll-13.c: New testcase. + * gcc.dg/tree-ssa/cunroll-14.c: New testcase. + 2016-05-30 Tom de Vries PR tree-optimization/69067 diff --git a/gcc/testsuite/gcc.dg/tree-ssa/cunroll-12.c b/gcc/testsuite/gcc.dg/tree-ssa/cunroll-12.c new file mode 100644 index 00000000000..0cc44f42fd8 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/cunroll-12.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -Warray-bounds -fdump-tree-cunroll-blocks-details" } */ +struct a {int a[8];int b;}; +void +t(struct a *a) +{ + for (int i=0;a->a[i];i++) + a->a[i]++; +} +/* { dg-final { scan-tree-dump-times "loop with 7 iterations completely unrolled" 1 "cunroll" } } */ +/* { dg-final { scan-tree-dump-not "Invalid sum" "cunroll" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/cunroll-13.c b/gcc/testsuite/gcc.dg/tree-ssa/cunroll-13.c new file mode 100644 index 00000000000..6e4417a7520 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/cunroll-13.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fdisable-tree-cunrolli -fdisable-tree-vrp1 -fdump-tree-cunroll-blocks-details" } */ +struct a {int a[8];int b;}; +void +t(struct a *a) +{ + for (int i=0;i<123456 && a->a[i];i++) + a->a[i]++; +} +/* This pass relies on the fact that we do not eliminate the redundant test for i early. + It is necessary to disable all passes that do so. At the moment it is vrp1 and cunrolli. */ +/* { dg-final { scan-tree-dump-times "Loop 1 iterates 123454 times" 1 "cunroll" } } */ +/* { dg-final { scan-tree-dump-times "Last iteration exit edge was proved true" 1 "cunroll" } } */ +/* { dg-final { scan-tree-dump-times "Exit condition of peeled iterations was eliminated" 1 "cunroll" } } */ +/* { dg-final { scan-tree-dump-times "loop with 7 iterations completely unrolled" 1 "cunroll" } } */ +/* { dg-final { scan-tree-dump-not "Invalid sum" "cunroll" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/cunroll-14.c b/gcc/testsuite/gcc.dg/tree-ssa/cunroll-14.c new file mode 100644 index 00000000000..4d27b185a86 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/cunroll-14.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fdump-tree-cunroll-blocks-details" } */ +struct a {int a[100];}; +void +t(struct a *a) +{ + for (int i=0;i<5 && a->a[i];i++) + a->a[i]++; +} +/* { dg-final { scan-tree-dump-times "loop with 5 iterations completely unrolled" 1 "cunroll" } } */ +/* { dg-final { scan-tree-dump-not "Invalid sum" "cunroll" } } */ +/* { dg-final { scan-tree-dump-times "Loop 1 iterates 4 times" 1 "cunroll" } } */ +/* { dg-final { scan-tree-dump-times "Last iteration exit edge was proved true" 1 "cunroll" } } */ +/* { dg-final { scan-tree-dump-times "Exit condition of peeled iterations was eliminated" 1 "cunroll" } } */ diff --git a/gcc/tree-ssa-loop-ivcanon.c b/gcc/tree-ssa-loop-ivcanon.c index de447f40e78..4673d022753 100644 --- a/gcc/tree-ssa-loop-ivcanon.c +++ b/gcc/tree-ssa-loop-ivcanon.c @@ -730,8 +730,14 @@ try_unroll_loop_completely (struct loop *loop, if (ul == UL_SINGLE_ITER) return false; + /* EXIT can be removed only if we are sure it passes first N_UNROLL + iterations. */ + bool remove_exit = (exit && niter + && TREE_CODE (niter) == INTEGER_CST + && wi::leu_p (n_unroll, wi::to_widest (niter))); + large = tree_estimate_loop_size - (loop, exit, edge_to_cancel, &size, + (loop, remove_exit ? exit : NULL, edge_to_cancel, &size, PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS)); ninsns = size.overall; if (large) @@ -837,8 +843,20 @@ try_unroll_loop_completely (struct loop *loop, initialize_original_copy_tables (); wont_exit = sbitmap_alloc (n_unroll + 1); - bitmap_ones (wont_exit); - bitmap_clear_bit (wont_exit, 0); + if (exit && niter + && TREE_CODE (niter) == INTEGER_CST + && wi::leu_p (n_unroll, wi::to_widest (niter))) + { + bitmap_ones (wont_exit); + if (wi::eq_p (wi::to_widest (niter), n_unroll) + || edge_to_cancel) + bitmap_clear_bit (wont_exit, 0); + } + else + { + exit = NULL; + bitmap_clear (wont_exit); + } if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), n_unroll, wont_exit, @@ -869,6 +887,7 @@ try_unroll_loop_completely (struct loop *loop, if (edge_to_cancel) { gcond *cond = as_a (last_stmt (edge_to_cancel->src)); + force_edge_cold (edge_to_cancel, true); if (edge_to_cancel->flags & EDGE_TRUE_VALUE) gimple_cond_make_false (cond); else @@ -1112,8 +1131,8 @@ canonicalize_loop_induction_variables (struct loop *loop, if (dump_file && (dump_flags & TDF_DETAILS) && likely_max_loop_iterations_int (loop) >= 0) { - fprintf (dump_file, "Loop likely %d iterates at most %i times.\n", loop->num, - (int)likely_max_loop_iterations_int (loop)); + fprintf (dump_file, "Loop %d likely iterates at most %i times.\n", + loop->num, (int)likely_max_loop_iterations_int (loop)); } /* Remove exits that are known to be never taken based on loop bound. -- 2.30.2