From dc236397e4d96647c5d72fd9b6d503ca0b187bee Mon Sep 17 00:00:00 2001 From: Michael Matz Date: Fri, 8 Dec 2017 17:41:58 +0000 Subject: [PATCH] re PR tree-optimization/83323 (186.crafty miscompares) Fix PR83323 * gimple-loop-jam (unroll_jam_possible_p): Correct test for head-controlled loops and loop BBs. * common.opt (funroll-and-jam): Remove, instead ... (floop-unroll-and-jam): ... reuse this option. * opts.c (default_options_table): Use OPT_floop_unroll_and_jam. * doc/invoke.texi (-funroll-and-jam): Move docu to ... (-floop-unroll-and-jam): ... this option. testsuite/ * gcc.dg/pr83323.c: New test. * gcc.dg/unroll-and-jam.c: Use -floop-unroll-and-jam. From-SVN: r255514 --- gcc/ChangeLog | 11 +++++++ gcc/common.opt | 8 ++---- gcc/doc/invoke.texi | 8 ++---- gcc/gimple-loop-jam.c | 41 +++++++++++++-------------- gcc/opts.c | 2 +- gcc/testsuite/ChangeLog | 6 ++++ gcc/testsuite/gcc.dg/pr83323.c | 23 +++++++++++++++ gcc/testsuite/gcc.dg/unroll-and-jam.c | 2 +- 8 files changed, 67 insertions(+), 34 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/pr83323.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index c0d1662854e..35c27beeeb5 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,14 @@ +2017-12-08 Michael Matz + + Fix PR tree-optimization/83323 + * gimple-loop-jam (unroll_jam_possible_p): Correct test for + head-controlled loops and loop BBs. + * common.opt (funroll-and-jam): Remove, instead ... + (floop-unroll-and-jam): ... reuse this option. + * opts.c (default_options_table): Use OPT_floop_unroll_and_jam. + * doc/invoke.texi (-funroll-and-jam): Move docu to ... + (-floop-unroll-and-jam): ... this option. + 2017-12-08 Jakub Jelinek * ipa-polymorphic-call.c (noncall_stmt_may_be_vtbl_ptr_store): Fix diff --git a/gcc/common.opt b/gcc/common.opt index 6fab2abd931..57b3cd7304a 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -1512,8 +1512,8 @@ Common Alias(floop-nest-optimize) Enable loop nest transforms. Same as -floop-nest-optimize. floop-unroll-and-jam -Common Alias(floop-nest-optimize) -Enable loop nest transforms. Same as -floop-nest-optimize. +Common Report Var(flag_unroll_jam) Optimization +Perform unroll-and-jam on loops. fgnu-tm Common Report Var(flag_tm) @@ -2695,10 +2695,6 @@ fsplit-loops Common Report Var(flag_split_loops) Optimization Perform loop splitting. -funroll-and-jam -Common Report Var(flag_unroll_jam) Optimization -Perform unroll-and-jam on loops. - funwind-tables Common Report Var(flag_unwind_tables) Optimization Just generate unwind tables for exception handling. diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 50740c5e905..1413095dd6c 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -437,7 +437,7 @@ Objective-C and Objective-C++ Dialects}. -ftree-reassoc -ftree-sink -ftree-slsr -ftree-sra @gol -ftree-switch-conversion -ftree-tail-merge @gol -ftree-ter -ftree-vectorize -ftree-vrp -funconstrained-commons @gol --funit-at-a-time -funroll-all-loops -funroll-loops -funroll-and-jam @gol +-funit-at-a-time -funroll-all-loops -funroll-loops @gol -funsafe-math-optimizations -funswitch-loops @gol -fipa-ra -fvariable-expansion-in-unroller -fvect-cost-model -fvpt @gol -fweb -fwhole-program -fwpa -fuse-linker-plugin @gol @@ -8511,11 +8511,9 @@ at @option{-O} and higher. @item -ftree-loop-linear @itemx -floop-strip-mine @itemx -floop-block -@itemx -floop-unroll-and-jam @opindex ftree-loop-linear @opindex floop-strip-mine @opindex floop-block -@opindex floop-unroll-and-jam Perform loop nest optimizations. Same as @option{-floop-nest-optimize}. To use this code transformation, GCC has to be configured with @option{--with-isl} to enable the Graphite loop @@ -9789,8 +9787,8 @@ for one side of the iteration space and false for the other. Move branches with loop invariant conditions out of the loop, with duplicates of the loop on both branches (modified according to result of the condition). -@item -funroll-and-jam -@opindex funroll-and-jam +@item -floop-unroll-and-jam +@opindex floop-unroll-and-jam Apply unroll and jam transoformations on feasible loops. In a loop nest this unrolls the outer loop by some factor and fuses the resulting multiple inner loops. diff --git a/gcc/gimple-loop-jam.c b/gcc/gimple-loop-jam.c index 32f813b13e1..8ed1bef8f77 100644 --- a/gcc/gimple-loop-jam.c +++ b/gcc/gimple-loop-jam.c @@ -152,7 +152,7 @@ merge_loop_tree (struct loop *loop, struct loop *old) free (bbs); } -/* BB exits the outer loop of an unroll-and-jam situation. +/* BB is part of the outer loop of an unroll-and-jam situation. Check if any statements therein would prevent the transformation. */ static bool @@ -160,9 +160,10 @@ bb_prevents_fusion_p (basic_block bb) { gimple_stmt_iterator gsi; /* BB is duplicated by outer unrolling and then all N-1 first copies - move into the body of the fused inner loop. The last copy remains - the exit block of the outer loop and is still outside the inner loop - also after fusion. We can't allow this for some effects of BB: + move into the body of the fused inner loop. If BB exits the outer loop + the last copy still doess so, and the first N-1 copies are cancelled + by loop unrolling, so also after fusion it's the exit block. + But there might be other reasons that prevent fusion: * stores or unknown side-effects prevent fusion * loads don't * computations into SSA names: these aren't problematic. Their @@ -204,6 +205,19 @@ unroll_jam_possible_p (struct loop *outer, struct loop *loop) if (outer->inner != loop || loop->next) return false; + /* Prevent head-controlled inner loops, that we usually have. + The guard block would need to be accepted + (invariant condition either entering or skipping the loop), + without also accepting arbitrary control flow. When unswitching + ran before us (as with -O3) this won't be a problem because its + outer loop unswitching will have moved out the invariant condition. + + If we do that we need to extend fuse_loops() to cope with this + by threading through the (still invariant) copied condition + between the two loop copies. */ + if (!dominated_by_p (CDI_DOMINATORS, outer->latch, loop->header)) + return false; + /* The number of iterations of the inner loop must be loop invariant with respect to the outer loop. */ if (!number_of_iterations_exit (loop, single_exit (loop), &niter, @@ -218,23 +232,8 @@ unroll_jam_possible_p (struct loop *outer, struct loop *loop) n = get_loop_body_with_size (outer, bbs, n_basic_blocks_for_fn (cfun)); for (i = 0; i < n; i++) - { - if (bbs[i]->loop_father == outer - && bbs[i] != outer->latch && bbs[i] != outer->header - && (!loop_exits_from_bb_p (outer, bbs[i]) - || bb_prevents_fusion_p (bbs[i]))) - break; - /* XXX Note that the above disallows head-controlled inner loops, - that we usually have. The guard block would need to be accepted - (invariant condition either entering or skipping the loop), - without also accepting arbitrary control flow. When unswitching - ran before us (as with -O3) this won't be a problem because its - outer loop unswitching will have moved out the invariant condition. - - If we do that we need to extend fuse_loops() to cope with this - by threading through the (still invariant) copied condition - between the two loop copies. */ - } + if (bbs[i]->loop_father == outer && bb_prevents_fusion_p (bbs[i])) + break; free (bbs); if (i != n) return false; diff --git a/gcc/opts.c b/gcc/opts.c index 98fbf537caa..a157b5f8816 100644 --- a/gcc/opts.c +++ b/gcc/opts.c @@ -536,7 +536,7 @@ static const struct default_options default_options_table[] = { OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_finline_functions_called_once, NULL, 1 }, { OPT_LEVELS_3_PLUS, OPT_fsplit_loops, NULL, 1 }, { OPT_LEVELS_3_PLUS, OPT_funswitch_loops, NULL, 1 }, - { OPT_LEVELS_3_PLUS, OPT_funroll_and_jam, NULL, 1 }, + { OPT_LEVELS_3_PLUS, OPT_floop_unroll_and_jam, NULL, 1 }, { OPT_LEVELS_3_PLUS, OPT_fgcse_after_reload, NULL, 1 }, { OPT_LEVELS_3_PLUS, OPT_ftree_loop_vectorize, NULL, 1 }, { OPT_LEVELS_3_PLUS, OPT_ftree_slp_vectorize, NULL, 1 }, diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 6a68ea6e965..396e26a6136 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2017-12-08 Michael Matz + + Fix PR tree-optimization/83323 + * gcc.dg/pr83323.c: New test. + * gcc.dg/unroll-and-jam.c: Use -floop-unroll-and-jam. + 2017-12-08 Richard Biener PR middle-end/81782 diff --git a/gcc/testsuite/gcc.dg/pr83323.c b/gcc/testsuite/gcc.dg/pr83323.c new file mode 100644 index 00000000000..6111745550f --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr83323.c @@ -0,0 +1,23 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -floop-unroll-and-jam --param unroll-jam-min-percent=0" } */ +int x[1024], y[1024]; + +void __attribute__((noipa)) foo () +{ + for (int i = 0; i < 1024; ++i) + { + x[i] = 0; + for (int j = 0; j < 1024; ++j) + if (!y[j]) + x[i] = 1; + } +} + +int main() +{ + y[1023] = 1; + foo (); + if (x[1] != 1) + __builtin_abort (); + return 0; +} diff --git a/gcc/testsuite/gcc.dg/unroll-and-jam.c b/gcc/testsuite/gcc.dg/unroll-and-jam.c index 59d60abe1b6..70910d318cb 100644 --- a/gcc/testsuite/gcc.dg/unroll-and-jam.c +++ b/gcc/testsuite/gcc.dg/unroll-and-jam.c @@ -1,5 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-O3 -funroll-and-jam --param unroll-jam-min-percent=0 -fdump-tree-unrolljam-details" } */ +/* { dg-options "-O3 -floop-unroll-and-jam --param unroll-jam-min-percent=0 -fdump-tree-unrolljam-details" } */ /* { dg-require-effective-target int32plus } */ #include -- 2.30.2