From 85bc75f87b45903d46555d0e1c013242de5c7c48 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Thu, 13 Sep 2018 14:15:41 +0000 Subject: [PATCH] re PR tree-optimization/87263 (ICE on valid code at -O1: verify_ssa failed) 2018-09-13 Richard Biener PR tree-optimization/87263 * tree-ssa-sccvn.c (visit_phi): Revert some earlier changes. (struct unwind_state): Add max_rpo field. (do_rpo_vn): Allow up-to-date loop state to be used when not iterating. Compute max_rpo, the max RPO number a block can be backwards reached from. Re-write non-iterating mode to a RPO ordered worklist approach, separating it from the iterating mode. * gcc.dg/torture/pr87263.c: New testcase. * gcc.dg/torture/ssa-fre-2.c: Likewise. * gcc.dg/torture/ssa-fre-3.c: Likewise. * gcc.dg/torture/ssa-fre-4.c: Likewise. From-SVN: r264273 --- gcc/ChangeLog | 10 + gcc/testsuite/ChangeLog | 8 + gcc/testsuite/gcc.dg/torture/pr87263.c | 24 ++ gcc/testsuite/gcc.dg/torture/ssa-fre-2.c | 21 ++ gcc/testsuite/gcc.dg/torture/ssa-fre-3.c | 23 ++ gcc/testsuite/gcc.dg/torture/ssa-fre-4.c | 17 ++ gcc/tree-ssa-sccvn.c | 279 +++++++++++++---------- 7 files changed, 266 insertions(+), 116 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/torture/pr87263.c create mode 100644 gcc/testsuite/gcc.dg/torture/ssa-fre-2.c create mode 100644 gcc/testsuite/gcc.dg/torture/ssa-fre-3.c create mode 100644 gcc/testsuite/gcc.dg/torture/ssa-fre-4.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index b754f10ff66..da616e44817 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,13 @@ +2018-09-13 Richard Biener + + PR tree-optimization/87263 + * tree-ssa-sccvn.c (visit_phi): Revert some earlier changes. + (struct unwind_state): Add max_rpo field. + (do_rpo_vn): Allow up-to-date loop state to be used when not iterating. + Compute max_rpo, the max RPO number a block can be backwards reached + from. Re-write non-iterating mode to a RPO ordered worklist approach, + separating it from the iterating mode. + 2018-09-13 Vlad Lazar * haifa-sched.c (rank_for_schedule): Schedule by INSN_COST. diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index be1e8b559ff..f6b0e641333 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,11 @@ +2018-09-13 Richard Biener + + PR tree-optimization/87263 + * gcc.dg/torture/pr87263.c: New testcase. + * gcc.dg/torture/ssa-fre-2.c: Likewise. + * gcc.dg/torture/ssa-fre-3.c: Likewise. + * gcc.dg/torture/ssa-fre-4.c: Likewise. + 2018-09-13 Omar Sandoval Tom de Vries diff --git a/gcc/testsuite/gcc.dg/torture/pr87263.c b/gcc/testsuite/gcc.dg/torture/pr87263.c new file mode 100644 index 00000000000..3d6a0d7d892 --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr87263.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ + +int a, b, c; + +int main () +{ + int g, *h[3] = {&g, &g, &g}; + if (h[2] == 0) + ; + else + { + int i[1]; + if (a) + while (a) + L:; + else + { + int k = b; + } + } + if ((b < c) > b) + goto L; + return 0; +} diff --git a/gcc/testsuite/gcc.dg/torture/ssa-fre-2.c b/gcc/testsuite/gcc.dg/torture/ssa-fre-2.c new file mode 100644 index 00000000000..5ce47d21587 --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/ssa-fre-2.c @@ -0,0 +1,21 @@ +/* { dg-do run } */ + +int x; +int main() +{ + int i = 0; + x = 0; + if (x) + { + for (; i < 10; ++i) + { +doit: + x = i; + } + } + if (!x) + goto doit; + if (x != 9) + __builtin_abort (); + return 0; +} diff --git a/gcc/testsuite/gcc.dg/torture/ssa-fre-3.c b/gcc/testsuite/gcc.dg/torture/ssa-fre-3.c new file mode 100644 index 00000000000..2ec1c28375e --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/ssa-fre-3.c @@ -0,0 +1,23 @@ +/* { dg-do compile } */ +/* { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */ +/* { dg-additional-options "-fdump-tree-fre1" } */ + +int x; +int main() +{ + x = 0; + int z = x; + int w = 1; + for (int i = 0; i < 32; ++i) + { + if (z) + w = 2; + else + w = 1; + if (w == 2) + __builtin_abort (); + } + return w; +} + +/* { dg-final { scan-tree-dump-not "abort" "fre1" } } */ diff --git a/gcc/testsuite/gcc.dg/torture/ssa-fre-4.c b/gcc/testsuite/gcc.dg/torture/ssa-fre-4.c new file mode 100644 index 00000000000..d09af7819a8 --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/ssa-fre-4.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */ +/* { dg-additional-options "-fdump-tree-fre1" } */ + +int x; +int main() +{ + x = 0; + if (x) + { + for (int i = 0; i < 10; ++i) + x = i; + } + return x; +} + +/* { dg-final { scan-tree-dump "return 0;" "fre1" } } */ diff --git a/gcc/tree-ssa-sccvn.c b/gcc/tree-ssa-sccvn.c index 7f2562fb4d3..1c95306fd82 100644 --- a/gcc/tree-ssa-sccvn.c +++ b/gcc/tree-ssa-sccvn.c @@ -4196,10 +4196,7 @@ visit_phi (gimple *phi, bool *inserted, bool backedges_varying_p) } } - /* If the value we want to use is the backedge and that wasn't visited - yet or if we should take it as VARYING but it has a non-VARYING - value drop to VARYING. This only happens when not iterating. - If we value-number a virtual operand never value-number to the + /* If we value-number a virtual operand never value-number to the value from the backedge as that confuses the alias-walking code. See gcc.dg/torture/pr87176.c. If the value is the same on a non-backedge everything is OK though. */ @@ -4207,9 +4204,7 @@ visit_phi (gimple *phi, bool *inserted, bool backedges_varying_p) && !seen_non_backedge && TREE_CODE (backedge_val) == SSA_NAME && sameval == backedge_val - && (SSA_NAME_IS_VIRTUAL_OPERAND (backedge_val) - || !SSA_VISITED (backedge_val) - || SSA_VAL (backedge_val) != backedge_val)) + && SSA_NAME_IS_VIRTUAL_OPERAND (backedge_val)) /* Note this just drops to VARYING without inserting the PHI into the hashes. */ result = PHI_RESULT (phi); @@ -6224,6 +6219,9 @@ struct unwind_state /* Whether to handle this as iteration point or whether to treat incoming backedge PHI values as varying. */ bool iterate; + /* Maximum RPO index this block is reachable from. */ + int max_rpo; + /* Unwind state. */ void *ob_top; vn_reference_t ref_top; vn_phi_t phi_top; @@ -6309,8 +6307,8 @@ do_rpo_vn (function *fn, edge entry, bitmap exit_bbs, } int *rpo = XNEWVEC (int, n_basic_blocks_for_fn (fn) - NUM_FIXED_BLOCKS); - int n = rev_post_order_and_mark_dfs_back_seme (fn, entry, exit_bbs, - iterate, rpo); + int n = rev_post_order_and_mark_dfs_back_seme + (fn, entry, exit_bbs, !loops_state_satisfies_p (LOOPS_NEED_FIXUP), rpo); /* rev_post_order_and_mark_dfs_back_seme fills RPO in reverse order. */ for (int i = 0; i < n / 2; ++i) std::swap (rpo[i], rpo[n-i-1]); @@ -6380,6 +6378,7 @@ do_rpo_vn (function *fn, edge entry, bitmap exit_bbs, { basic_block bb = BASIC_BLOCK_FOR_FN (fn, rpo[i]); rpo_state[i].visited = 0; + rpo_state[i].max_rpo = i; bb->flags &= ~BB_EXECUTABLE; bool has_backedges = false; edge e; @@ -6389,15 +6388,18 @@ do_rpo_vn (function *fn, edge entry, bitmap exit_bbs, if (e->flags & EDGE_DFS_BACK) has_backedges = true; if (! iterate && (e->flags & EDGE_DFS_BACK)) - { - e->flags |= EDGE_EXECUTABLE; - /* ??? Strictly speaking we only need to unconditionally - process a block when it is in an irreducible region, - thus when it may be reachable via the backedge only. */ - bb->flags |= BB_EXECUTABLE; - } + e->flags |= EDGE_EXECUTABLE; else e->flags &= ~EDGE_EXECUTABLE; + if (e == entry) + continue; + if (bb_to_rpo[e->src->index] > i) + rpo_state[i].max_rpo = MAX (rpo_state[i].max_rpo, + bb_to_rpo[e->src->index]); + else + rpo_state[i].max_rpo + = MAX (rpo_state[i].max_rpo, + rpo_state[bb_to_rpo[e->src->index]].max_rpo); } rpo_state[i].iterate = iterate && has_backedges; } @@ -6440,120 +6442,165 @@ do_rpo_vn (function *fn, edge entry, bitmap exit_bbs, } } - /* Go and process all blocks, iterating as necessary. */ - int idx = 0; uint64_t nblk = 0; - do + int idx = 0; + if (iterate) + /* Go and process all blocks, iterating as necessary. */ + do + { + basic_block bb = BASIC_BLOCK_FOR_FN (fn, rpo[idx]); + + /* If the block has incoming backedges remember unwind state. This + is required even for non-executable blocks since in irreducible + regions we might reach them via the backedge and re-start iterating + from there. + Note we can individually mark blocks with incoming backedges to + not iterate where we then handle PHIs conservatively. We do that + heuristically to reduce compile-time for degenerate cases. */ + if (rpo_state[idx].iterate) + { + rpo_state[idx].ob_top = obstack_alloc (&vn_tables_obstack, 0); + rpo_state[idx].ref_top = last_inserted_ref; + rpo_state[idx].phi_top = last_inserted_phi; + rpo_state[idx].nary_top = last_inserted_nary; + } + + if (!(bb->flags & BB_EXECUTABLE)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Block %d: BB%d found not executable\n", + idx, bb->index); + idx++; + continue; + } + + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Processing block %d: BB%d\n", idx, bb->index); + nblk++; + todo |= process_bb (avail, bb, + rpo_state[idx].visited != 0, + rpo_state[idx].iterate, + iterate, eliminate, do_region, exit_bbs); + rpo_state[idx].visited++; + + /* Verify if changed values flow over executable outgoing backedges + and those change destination PHI values (that's the thing we + can easily verify). Reduce over all such edges to the farthest + away PHI. */ + int iterate_to = -1; + edge_iterator ei; + edge e; + FOR_EACH_EDGE (e, ei, bb->succs) + if ((e->flags & (EDGE_DFS_BACK|EDGE_EXECUTABLE)) + == (EDGE_DFS_BACK|EDGE_EXECUTABLE) + && rpo_state[bb_to_rpo[e->dest->index]].iterate) + { + int destidx = bb_to_rpo[e->dest->index]; + if (!rpo_state[destidx].visited) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Unvisited destination %d\n", + e->dest->index); + if (iterate_to == -1 || destidx < iterate_to) + iterate_to = destidx; + continue; + } + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Looking for changed values of backedge" + " %d->%d destination PHIs\n", + e->src->index, e->dest->index); + vn_context_bb = e->dest; + gphi_iterator gsi; + for (gsi = gsi_start_phis (e->dest); + !gsi_end_p (gsi); gsi_next (&gsi)) + { + bool inserted = false; + /* While we'd ideally just iterate on value changes + we CSE PHIs and do that even across basic-block + boundaries. So even hashtable state changes can + be important (which is roughly equivalent to + PHI argument value changes). To not excessively + iterate because of that we track whether a PHI + was CSEd to with GF_PLF_1. */ + bool phival_changed; + if ((phival_changed = visit_phi (gsi.phi (), + &inserted, false)) + || (inserted && gimple_plf (gsi.phi (), GF_PLF_1))) + { + if (!phival_changed + && dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "PHI was CSEd and hashtable " + "state (changed)\n"); + if (iterate_to == -1 || destidx < iterate_to) + iterate_to = destidx; + break; + } + } + vn_context_bb = NULL; + } + if (iterate_to != -1) + { + do_unwind (&rpo_state[iterate_to], iterate_to, avail, bb_to_rpo); + idx = iterate_to; + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Iterating to %d BB%d\n", + iterate_to, rpo[iterate_to]); + continue; + } + + idx++; + } + while (idx < n); + + else /* !iterate */ { - basic_block bb = BASIC_BLOCK_FOR_FN (fn, rpo[idx]); - - /* If the block has incoming backedges remember unwind state. This - is required even for non-executable blocks since in irreducible - regions we might reach them via the backedge and re-start iterating - from there. - Note we can individually mark blocks with incoming backedges to - not iterate where we then handle PHIs conservatively. We do that - heuristically to reduce compile-time for degenerate cases. */ - if (rpo_state[idx].iterate) + /* Process all blocks greedily with a worklist that enforces RPO + processing of reachable blocks. */ + auto_bitmap worklist; + bitmap_set_bit (worklist, 0); + while (!bitmap_empty_p (worklist)) { - rpo_state[idx].ob_top = obstack_alloc (&vn_tables_obstack, 0); - rpo_state[idx].ref_top = last_inserted_ref; - rpo_state[idx].phi_top = last_inserted_phi; - rpo_state[idx].nary_top = last_inserted_nary; - } + int idx = bitmap_first_set_bit (worklist); + bitmap_clear_bit (worklist, idx); + basic_block bb = BASIC_BLOCK_FOR_FN (fn, rpo[idx]); + gcc_assert ((bb->flags & BB_EXECUTABLE) + && !rpo_state[idx].visited); - if (!(bb->flags & BB_EXECUTABLE)) - { if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "Block %d: BB%d found not executable\n", - idx, bb->index); - idx++; - continue; - } - - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "Processing block %d: BB%d\n", idx, bb->index); - nblk++; - todo |= process_bb (avail, bb, - rpo_state[idx].visited != 0, - rpo_state[idx].iterate, - iterate, eliminate, do_region, exit_bbs); - rpo_state[idx].visited++; + fprintf (dump_file, "Processing block %d: BB%d\n", idx, bb->index); - if (iterate) - { - /* Verify if changed values flow over executable outgoing backedges - and those change destination PHI values (that's the thing we - can easily verify). Reduce over all such edges to the farthest - away PHI. */ - int iterate_to = -1; + /* When we run into predecessor edges where we cannot trust its + executable state mark them executable so PHI processing will + be conservative. + ??? Do we need to force arguments flowing over that edge + to be varying or will they even always be? */ edge_iterator ei; edge e; - FOR_EACH_EDGE (e, ei, bb->succs) - if ((e->flags & (EDGE_DFS_BACK|EDGE_EXECUTABLE)) - == (EDGE_DFS_BACK|EDGE_EXECUTABLE) - && rpo_state[bb_to_rpo[e->dest->index]].iterate) + FOR_EACH_EDGE (e, ei, bb->preds) + if (!(e->flags & EDGE_EXECUTABLE) + && !rpo_state[bb_to_rpo[e->src->index]].visited + && rpo_state[bb_to_rpo[e->src->index]].max_rpo >= (int)idx) { - int destidx = bb_to_rpo[e->dest->index]; - if (!rpo_state[destidx].visited) - { - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "Unvisited destination %d\n", - e->dest->index); - if (iterate_to == -1 - || destidx < iterate_to) - iterate_to = destidx; - continue; - } if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "Looking for changed values of backedge " - "%d->%d destination PHIs\n", + fprintf (dump_file, "Cannot trust state of predecessor " + "edge %d -> %d, marking executable\n", e->src->index, e->dest->index); - vn_context_bb = e->dest; - gphi_iterator gsi; - for (gsi = gsi_start_phis (e->dest); - !gsi_end_p (gsi); gsi_next (&gsi)) - { - bool inserted = false; - /* While we'd ideally just iterate on value changes - we CSE PHIs and do that even across basic-block - boundaries. So even hashtable state changes can - be important (which is roughly equivalent to - PHI argument value changes). To not excessively - iterate because of that we track whether a PHI - was CSEd to with GF_PLF_1. */ - bool phival_changed; - if ((phival_changed = visit_phi (gsi.phi (), - &inserted, false)) - || (inserted && gimple_plf (gsi.phi (), GF_PLF_1))) - { - if (!phival_changed - && dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "PHI was CSEd and hashtable " - "state (changed)\n"); - if (iterate_to == -1 - || destidx < iterate_to) - iterate_to = destidx; - break; - } - } - vn_context_bb = NULL; + e->flags |= EDGE_EXECUTABLE; } - if (iterate_to != -1) - { - do_unwind (&rpo_state[iterate_to], iterate_to, - avail, bb_to_rpo); - idx = iterate_to; - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "Iterating to %d BB%d\n", - iterate_to, rpo[iterate_to]); - continue; - } - } - idx++; + nblk++; + todo |= process_bb (avail, bb, false, false, false, eliminate, + do_region, exit_bbs); + rpo_state[idx].visited++; + + FOR_EACH_EDGE (e, ei, bb->succs) + if ((e->flags & EDGE_EXECUTABLE) + && e->dest->index != EXIT_BLOCK + && (!do_region || !bitmap_bit_p (exit_bbs, e->dest->index)) + && !rpo_state[bb_to_rpo[e->dest->index]].visited) + bitmap_set_bit (worklist, bb_to_rpo[e->dest->index]); + } } - while (idx < n); /* If statistics or dump file active. */ int nex = 0; -- 2.30.2