From bd87cc14ebdb6789e067fb1828d5808407c308b3 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Wed, 11 Nov 2020 11:51:59 +0100 Subject: [PATCH] tree-optimization/97623 - Avoid PRE hoist insertion iteration The recent previous change in this area limited hoist insertion iteration via a param but the following is IMHO better since we are not really interested in PRE opportunities exposed by hoisting but only the other way around. So this moves hoist insertion after PRE iteration finished and removes hoist insertion iteration alltogether. 2020-11-11 Richard Biener PR tree-optimization/97623 * params.opt (-param=max-pre-hoist-insert-iterations): Remove again. * doc/invoke.texi (max-pre-hoist-insert-iterations): Likewise. * tree-ssa-pre.c (insert): Move hoist insertion after PRE insertion iteration and do not iterate it. * gcc.dg/tree-ssa/ssa-hoist-3.c: Adjust. * gcc.dg/tree-ssa/ssa-hoist-7.c: Likewise. * gcc.dg/tree-ssa/ssa-pre-30.c: Likewise. --- gcc/doc/invoke.texi | 5 --- gcc/params.opt | 4 --- gcc/testsuite/gcc.dg/tree-ssa/ssa-hoist-3.c | 2 +- gcc/testsuite/gcc.dg/tree-ssa/ssa-hoist-7.c | 4 +-- gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-30.c | 2 +- gcc/tree-ssa-pre.c | 34 +++++++++++++-------- 6 files changed, 26 insertions(+), 25 deletions(-) diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 18ca759c8be..8d0d2136831 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -13446,11 +13446,6 @@ is aborted and the load or store is not considered redundant. The number of queries is algorithmically limited to the number of stores on all paths from the load to the function entry. -@item max-pre-hoist-insert-iterations -The maximum number of iterations doing insertion during code -hoisting which is done as part of the partial redundancy elimination -insertion phase. - @item ira-max-loops-num IRA uses regional register allocation by default. If a function contains more loops than the number given by this parameter, only at most diff --git a/gcc/params.opt b/gcc/params.opt index a33a371a395..7bac39a9d58 100644 --- a/gcc/params.opt +++ b/gcc/params.opt @@ -597,10 +597,6 @@ Maximum depth of sqrt chains to use when synthesizing exponentiation by a real c Common Joined UInteger Var(param_max_predicted_iterations) Init(100) IntegerRange(0, 65536) Param Optimization The maximum number of loop iterations we predict statically. --param=max-pre-hoist-insert-iterations= -Common Joined UInteger Var(param_max_pre_hoist_insert_iterations) Init(3) Param Optimization -The maximum number of insert iterations done for PRE code hoisting. - -param=max-reload-search-insns= Common Joined UInteger Var(param_max_reload_search_insns) Init(100) Param Optimization The maximum number of instructions to search backward when looking for equivalent reload. diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-hoist-3.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-hoist-3.c index 51ba59c9ab6..de3051bfb50 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-hoist-3.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-hoist-3.c @@ -15,4 +15,4 @@ int test (int a, int b, int c, int g) /* We should hoist and CSE only the multiplication. */ /* { dg-final { scan-tree-dump-times " \\* " 1 "pre" } } */ -/* { dg-final { scan-tree-dump "Insertions: 1" "pre" } } */ +/* { dg-final { scan-tree-dump "HOIST inserted: 1" "pre" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-hoist-7.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-hoist-7.c index ce9cec61668..fdb6a3ed349 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-hoist-7.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-hoist-7.c @@ -49,6 +49,6 @@ void foo (int a, int b, int c, int d, int e, int x, int y, int z) /* Now inserting x + y five times is unnecessary but the cascading cannot be avoided with the simple-minded dataflow. But make sure - we do the insertions all in the first iteration. */ -/* { dg-final { scan-tree-dump "insert iterations == 2" "pre" } } */ + we do not iterate PRE insertion. */ +/* { dg-final { scan-tree-dump "insert iterations == 1" "pre" } } */ /* { dg-final { scan-tree-dump "HOIST inserted: 5" "pre" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-30.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-30.c index 59af63ad5ac..cf9317372d6 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-30.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-30.c @@ -24,4 +24,4 @@ bar (int b, int x) /* We should see the partial redundant loads of f even though they are using different types (of the same size). */ -/* { dg-final { scan-tree-dump-times "Replaced MEM" 2 "pre" } } */ +/* { dg-final { scan-tree-dump-times "Replaced MEM" 3 "pre" } } */ diff --git a/gcc/tree-ssa-pre.c b/gcc/tree-ssa-pre.c index da2b68909d9..d90249c0182 100644 --- a/gcc/tree-ssa-pre.c +++ b/gcc/tree-ssa-pre.c @@ -3695,18 +3695,6 @@ insert (void) fprintf (dump_file, "Starting insert iteration %d\n", num_iterations); changed = false; - /* Insert expressions for hoisting. Do a backward walk here since - inserting into BLOCK exposes new opportunities in its predecessors. - Since PRE and hoist insertions can cause back-to-back iteration - limit that on the hoist side. */ - if (flag_code_hoisting - && num_iterations <= param_max_pre_hoist_insert_iterations) - for (int idx = rpo_num - 1; idx >= 0; --idx) - { - basic_block block = BASIC_BLOCK_FOR_FN (cfun, rpo[idx]); - if (EDGE_COUNT (block->succs) >= 2) - changed |= do_hoist_insertion (block); - } for (int idx = 0; idx < rpo_num; ++idx) { basic_block block = BASIC_BLOCK_FOR_FN (cfun, rpo[idx]); @@ -3754,6 +3742,28 @@ insert (void) statistics_histogram_event (cfun, "insert iterations", num_iterations); + /* AVAIL_OUT is not needed after insertion so we don't have to + propagate NEW_SETS from hoist insertion. */ + FOR_ALL_BB_FN (bb, cfun) + { + bitmap_set_pool.remove (NEW_SETS (bb)); + NEW_SETS (bb) = NULL; + } + + /* Insert expressions for hoisting. Do a backward walk here since + inserting into BLOCK exposes new opportunities in its predecessors. + Since PRE and hoist insertions can cause back-to-back iteration + and we are interested in PRE insertion exposed hoisting opportunities + but not in hoisting exposed PRE ones do hoist insertion only after + PRE insertion iteration finished and do not iterate it. */ + if (flag_code_hoisting) + for (int idx = rpo_num - 1; idx >= 0; --idx) + { + basic_block block = BASIC_BLOCK_FOR_FN (cfun, rpo[idx]); + if (EDGE_COUNT (block->succs) >= 2) + changed |= do_hoist_insertion (block); + } + free (rpo); } -- 2.30.2