From: Jan Hubicka Date: Mon, 30 May 2016 17:58:19 +0000 (+0200) Subject: invoke.texi (-fpeel-loops,-O3): Update documentation. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=a448ff403b72f8e9d914a63d89ebca8246dad0ef;p=gcc.git invoke.texi (-fpeel-loops,-O3): Update documentation. * doc/invoke.texi (-fpeel-loops,-O3): Update documentation. * opts.c (default_options): Enable peel loops at -O3. * tree-ssa-loop-ivcanon.c (peeled_loops): New static var. (try_peel_loop): Do not re-peel already peeled loops; use likely upper bounds; fix profile updating. (pass_complete_unroll::execute): Initialize peeled_loops. * gcc.dg/tree-ssa/peel1.c: New testcase. * gcc.dg/tree-ssa/pr61743-1.c: Disable loop peeling. * gcc.dg/tree-ssa/pr61743-2.c: Disable loop peeling. From-SVN: r236892 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index bc283630d13..e4816b34b14 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,12 @@ +2016-05-30 Jan Hubicka + + * doc/invoke.texi (-fpeel-loops,-O3): Update documentation. + * opts.c (default_options): Enable peel loops at -O3. + * tree-ssa-loop-ivcanon.c (peeled_loops): New static var. + (try_peel_loop): Do not re-peel already peeled loops; + use likely upper bounds; fix profile updating. + (pass_complete_unroll::execute): Initialize peeled_loops. + 2016-05-30 Martin Liska * tree-ssa-loop-ivopts.c (get_computation_cost_at): Scale diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 9e921335c3e..aaafff2dbd8 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -6338,7 +6338,8 @@ by @option{-O2} and also turns on the @option{-finline-functions}, @option{-fgcse-after-reload}, @option{-ftree-loop-vectorize}, @option{-ftree-loop-distribute-patterns}, @option{-fsplit-paths} @option{-ftree-slp-vectorize}, @option{-fvect-cost-model}, -@option{-ftree-partial-pre} and @option{-fipa-cp-clone} options. +@option{-ftree-partial-pre}, @option{-fpeel-loops} +and @option{-fipa-cp-clone} options. @item -O0 @opindex O0 @@ -8661,10 +8662,11 @@ the loop is entered. This usually makes programs run more slowly. @item -fpeel-loops @opindex fpeel-loops Peels loops for which there is enough information that they do not -roll much (from profile feedback). It also turns on complete loop peeling -(i.e.@: complete removal of loops with small constant number of iterations). +roll much (from profile feedback or static analysis). It also turns on +complete loop peeling (i.e.@: complete removal of loops with small constant +number of iterations). -Enabled with @option{-fprofile-use}. +Enabled with @option{-O3} and/or @option{-fprofile-use}. @item -fmove-loop-invariants @opindex fmove-loop-invariants diff --git a/gcc/opts.c b/gcc/opts.c index 63d41ca9deb..e80331f4bf5 100644 --- a/gcc/opts.c +++ b/gcc/opts.c @@ -535,6 +535,7 @@ static const struct default_options default_options_table[] = { OPT_LEVELS_3_PLUS, OPT_fvect_cost_model_, NULL, VECT_COST_MODEL_DYNAMIC }, { OPT_LEVELS_3_PLUS, OPT_fipa_cp_clone, NULL, 1 }, { OPT_LEVELS_3_PLUS, OPT_ftree_partial_pre, NULL, 1 }, + { OPT_LEVELS_3_PLUS, OPT_fpeel_loops, NULL, 1 }, /* -Ofast adds optimizations to -O3. */ { OPT_LEVELS_FAST, OPT_ffast_math, NULL, 1 }, diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 2593bcd0430..67cab5797a5 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2016-05-30 Jan Hubicka + + * gcc.dg/tree-ssa/peel1.c: New testcase. + * gcc.dg/tree-ssa/pr61743-1.c: Disable loop peeling. + * gcc.dg/tree-ssa/pr61743-2.c: Disable loop peeling. + 2016-05-30 Tom de Vries * gcc.dg/graphite/pr69067.c (main): Remove superfluous argument in call diff --git a/gcc/testsuite/gcc.dg/tree-ssa/peel1.c b/gcc/testsuite/gcc.dg/tree-ssa/peel1.c new file mode 100644 index 00000000000..40a1f3a2132 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/peel1.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fdump-tree-cunroll-details" } */ +struct foo {int b; int a[3];} foo; +void add(struct foo *a,int l) +{ + int i; + for (i=0;ia[i]++; +} +/* { dg-final { scan-tree-dump "Loop 1 likely iterates at most 3 times." "cunroll"} } */ +/* { dg-final { scan-tree-dump "Peeled loop 1, 4 times." "cunroll"} } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr61743-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr61743-1.c index 2c8db1a01bb..8041c6a323e 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/pr61743-1.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr61743-1.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O3 -funroll-loops -fno-tree-vectorize -fdump-tree-cunroll-details" } */ +/* { dg-options "-O3 -funroll-loops -fno-tree-vectorize -fdump-tree-cunroll-details -fno-peel-loops" } */ #define N 8 #define M 14 diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr61743-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr61743-2.c index 92b02f5a98c..5bf38c5c743 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/pr61743-2.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr61743-2.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O3 -funroll-loops -fno-tree-vectorize -fdump-tree-cunroll-details" } */ +/* { dg-options "-O3 -funroll-loops -fno-tree-vectorize -fdump-tree-cunroll-details -fno-peel-loops" } */ #define N 8 #define M 14 diff --git a/gcc/tree-ssa-loop-ivcanon.c b/gcc/tree-ssa-loop-ivcanon.c index 4cfdb27bb67..319a4106930 100644 --- a/gcc/tree-ssa-loop-ivcanon.c +++ b/gcc/tree-ssa-loop-ivcanon.c @@ -594,6 +594,8 @@ remove_redundant_iv_tests (struct loop *loop) /* Stores loops that will be unlooped after we process whole loop tree. */ static vec loops_to_unloop; static vec loops_to_unloop_nunroll; +/* Stores loops that has been peeled. */ +static bitmap peeled_loops; /* Cancel all fully unrolled loops by putting __builtin_unreachable on the latch edge. @@ -962,14 +964,17 @@ try_peel_loop (struct loop *loop, vec to_remove = vNULL; edge e; - /* If the iteration bound is known and large, then we can safely eliminate - the check in peeled copies. */ - if (TREE_CODE (niter) != INTEGER_CST) - exit = NULL; - - if (!flag_peel_loops || PARAM_VALUE (PARAM_MAX_PEEL_TIMES) <= 0) + if (!flag_peel_loops || PARAM_VALUE (PARAM_MAX_PEEL_TIMES) <= 0 + || !peeled_loops) return false; + if (bitmap_bit_p (peeled_loops, loop->num)) + { + if (dump_file) + fprintf (dump_file, "Not peeling: loop is already peeled\n"); + return false; + } + /* Peel only innermost loops. While the code is perfectly capable of peeling non-innermost loops, the heuristics would probably need some improvements. */ @@ -989,6 +994,8 @@ try_peel_loop (struct loop *loop, /* Check if there is an estimate on the number of iterations. */ npeel = estimated_loop_iterations_int (loop); + if (npeel < 0) + npeel = likely_max_loop_iterations_int (loop); if (npeel < 0) { if (dump_file) @@ -1036,8 +1043,7 @@ try_peel_loop (struct loop *loop, && wi::leu_p (npeel, wi::to_widest (niter))) { bitmap_ones (wont_exit); - if (wi::eq_p (wi::to_widest (niter), npeel)) - bitmap_clear_bit (wont_exit, 0); + bitmap_clear_bit (wont_exit, 0); } else { @@ -1074,14 +1080,14 @@ try_peel_loop (struct loop *loop, } if (loop->any_upper_bound) { - if (wi::ltu_p (npeel, loop->nb_iterations_estimate)) + if (wi::ltu_p (npeel, loop->nb_iterations_upper_bound)) loop->nb_iterations_upper_bound -= npeel; else loop->nb_iterations_upper_bound = 0; } if (loop->any_likely_upper_bound) { - if (wi::ltu_p (npeel, loop->nb_iterations_estimate)) + if (wi::ltu_p (npeel, loop->nb_iterations_likely_upper_bound)) loop->nb_iterations_likely_upper_bound -= npeel; else { @@ -1107,6 +1113,7 @@ try_peel_loop (struct loop *loop, else if (loop->header->frequency) scale = RDIV (entry_freq * REG_BR_PROB_BASE, loop->header->frequency); scale_loop_profile (loop, scale, 0); + bitmap_set_bit (peeled_loops, loop->num); return true; } /* Adds a canonical induction variable to LOOP if suitable. @@ -1519,9 +1526,20 @@ pass_complete_unroll::execute (function *fun) if (number_of_loops (fun) <= 1) return 0; - return tree_unroll_loops_completely (flag_unroll_loops - || flag_peel_loops - || optimize >= 3, true); + /* If we ever decide to run loop peeling more than once, we will need to + track loops already peeled in loop structures themselves to avoid + re-peeling the same loop multiple times. */ + if (flag_peel_loops) + peeled_loops = BITMAP_ALLOC (NULL); + int val = tree_unroll_loops_completely (flag_unroll_loops + || flag_peel_loops + || optimize >= 3, true); + if (peeled_loops) + { + BITMAP_FREE (peeled_loops); + peeled_loops = NULL; + } + return val; } } // anon namespace