From 08926e6f5bbf23d1eebc776d84d648f8b5836931 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Wed, 19 Dec 2018 11:10:08 +0000 Subject: [PATCH] re PR tree-optimization/88533 (Higher performance penalty of array-bounds checking for sparse-matrix vector multiply) 2018-12-19 Richard Biener PR tree-optimization/88533 Revert 2018-04-30 Richard Biener PR tree-optimization/28364 PR tree-optimization/85275 * tree-ssa-loop-ch.c (ch_base::copy_headers): Stop after copying first exit test. * gcc.dg/tree-ssa/ssa-dom-thread-7.c: Adjust. * tree-ssa-loop-ch.c: Include tree-phinodes.h and ssa-iterators.h. (should_duplicate_loop_header_p): Track whether stmt compute loop invariants or values based on IVs. Apart from the original loop header only duplicate blocks with exit tests that are based on IVs or invariants. * gcc.dg/tree-ssa/copy-headers-6.c: New testcase. * gcc.dg/tree-ssa/copy-headers-7.c: Likewise. * gcc.dg/tree-ssa/ivopt_mult_1.c: Un-XFAIL. * gcc.dg/tree-ssa/ivopt_mult_2.c: Likewise. From-SVN: r267262 --- gcc/ChangeLog | 18 +++++ gcc/testsuite/ChangeLog | 15 ++++ .../gcc.dg/tree-ssa/copy-headers-6.c | 16 ++++ .../gcc.dg/tree-ssa/copy-headers-7.c | 16 ++++ gcc/testsuite/gcc.dg/tree-ssa/ivopt_mult_1.c | 2 +- gcc/testsuite/gcc.dg/tree-ssa/ivopt_mult_2.c | 2 +- .../gcc.dg/tree-ssa/ssa-dom-thread-7.c | 2 +- gcc/tree-ssa-loop-ch.c | 75 ++++++++++++++++--- 8 files changed, 133 insertions(+), 13 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/copy-headers-6.c create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/copy-headers-7.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index b850704c288..56e50e2cbfb 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,21 @@ +2018-12-19 Richard Biener + + PR tree-optimization/88533 + Revert + 2018-04-30 Richard Biener + + PR tree-optimization/28364 + PR tree-optimization/85275 + * tree-ssa-loop-ch.c (ch_base::copy_headers): Stop after + copying first exit test. + + * tree-ssa-loop-ch.c: Include tree-phinodes.h and + ssa-iterators.h. + (should_duplicate_loop_header_p): Track whether stmt compute + loop invariants or values based on IVs. Apart from the + original loop header only duplicate blocks with exit tests + that are based on IVs or invariants. + 2018-12-19 Tom de Vries * config/nvptx/nvptx.c (nvptx_gen_shared_bcast, shared_prop_gen) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index a0c28eff44c..cb5d6195432 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,18 @@ +2018-12-19 Richard Biener + + PR tree-optimization/88533 + Revert + 2018-04-30 Richard Biener + + PR tree-optimization/28364 + PR tree-optimization/85275 + * gcc.dg/tree-ssa/ssa-dom-thread-7.c: Adjust. + + * gcc.dg/tree-ssa/copy-headers-6.c: New testcase. + * gcc.dg/tree-ssa/copy-headers-7.c: Likewise. + * gcc.dg/tree-ssa/ivopt_mult_1.c: Un-XFAIL. + * gcc.dg/tree-ssa/ivopt_mult_2.c: Likewise. + 2018-12-19 Jakub Jelinek PR target/88541 diff --git a/gcc/testsuite/gcc.dg/tree-ssa/copy-headers-6.c b/gcc/testsuite/gcc.dg/tree-ssa/copy-headers-6.c new file mode 100644 index 00000000000..eeafba7972c --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-headers-6.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-ch2-details" } */ + +int is_sorted(int *a, int n) +{ + for (int i = 0; i < n - 1; i++) + if (a[i] > 0) + return 0; + return 1; +} + +/* Verify we apply loop header copying but only copy the IV test and + not the alternate exit test. */ + +/* { dg-final { scan-tree-dump "is now do-while loop" "ch2" } } */ +/* { dg-final { scan-tree-dump-times " if " 3 "ch2" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/copy-headers-7.c b/gcc/testsuite/gcc.dg/tree-ssa/copy-headers-7.c new file mode 100644 index 00000000000..a0a6e6a9b57 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-headers-7.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-ch2-details --param logical-op-non-short-circuit=0" } */ + +int is_sorted(int *a, int n, int m, int k) +{ + for (int i = 0; i < n - 1 && m && k > i; i++) + if (a[i] > a[i + 1]) + return 0; + return 1; +} + +/* Verify we apply loop header copying but only copy the IV tests and + the invariant test, not the alternate exit test. */ + +/* { dg-final { scan-tree-dump "is now do-while loop" "ch2" } } */ +/* { dg-final { scan-tree-dump-times "Will duplicate bb" 3 "ch2" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ivopt_mult_1.c b/gcc/testsuite/gcc.dg/tree-ssa/ivopt_mult_1.c index eaf2c7e6229..adfe371c7ce 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/ivopt_mult_1.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/ivopt_mult_1.c @@ -20,4 +20,4 @@ long foo(long* p, long* p2, int N1, int N2) return s; } -/* { dg-final { scan-tree-dump-times "Replacing" 1 "ivopts" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump-times "Replacing" 1 "ivopts" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ivopt_mult_2.c b/gcc/testsuite/gcc.dg/tree-ssa/ivopt_mult_2.c index 106349778ed..50d0cc5d2ae 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/ivopt_mult_2.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/ivopt_mult_2.c @@ -21,4 +21,4 @@ long foo(long* p, long* p2, int N1, int N2) return s; } -/* { dg-final { scan-tree-dump-times "Replacing" 2 "ivopts" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump-times "Replacing" 2 "ivopts" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-7.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-7.c index f833aa4351d..a3395578f78 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-7.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-7.c @@ -2,7 +2,7 @@ /* { dg-options "-O2 -fdump-tree-thread1-stats -fdump-tree-thread2-stats -fdump-tree-dom2-stats -fdump-tree-thread3-stats -fdump-tree-dom3-stats -fdump-tree-vrp2-stats -fno-guess-branch-probability" } */ /* { dg-final { scan-tree-dump "Jumps threaded: 16" "thread1" } } */ /* { dg-final { scan-tree-dump "Jumps threaded: 9" "thread2" } } */ -/* { dg-final { scan-tree-dump "Jumps threaded: 1" "dom2" } } */ +/* { dg-final { scan-tree-dump-not "Jumps threaded" "dom2" } } */ /* aarch64 has the highest CASE_VALUES_THRESHOLD in GCC. It's high enough to change decisions in switch expansion which in turn can expose new jump threading opportunities. Skip the later tests on aarch64. */ diff --git a/gcc/tree-ssa-loop-ch.c b/gcc/tree-ssa-loop-ch.c index 4d4813df3c8..3a094103c2a 100644 --- a/gcc/tree-ssa-loop-ch.c +++ b/gcc/tree-ssa-loop-ch.c @@ -34,6 +34,8 @@ along with GCC; see the file COPYING3. If not see #include "tree-ssa-scopedtables.h" #include "tree-ssa-threadedge.h" #include "tree-ssa-sccvn.h" +#include "tree-phinodes.h" +#include "ssa-iterators.h" #include "params.h" /* Duplicates headers of loops if they are small enough, so that the statements @@ -50,7 +52,6 @@ should_duplicate_loop_header_p (basic_block header, struct loop *loop, int *limit) { gimple_stmt_iterator bsi; - gimple *last; gcc_assert (!header->aux); @@ -99,8 +100,8 @@ should_duplicate_loop_header_p (basic_block header, struct loop *loop, return false; } - last = last_stmt (header); - if (gimple_code (last) != GIMPLE_COND) + gcond *last = dyn_cast (last_stmt (header)); + if (!last) { if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, @@ -109,10 +110,24 @@ should_duplicate_loop_header_p (basic_block header, struct loop *loop, return false; } - /* Count number of instructions and punt on calls. */ + for (gphi_iterator psi = gsi_start_phis (header); !gsi_end_p (psi); + gsi_next (&psi)) + { + gphi *phi = psi.phi (); + tree res = gimple_phi_result (phi); + if (INTEGRAL_TYPE_P (TREE_TYPE (res)) + || POINTER_TYPE_P (TREE_TYPE (res))) + gimple_set_uid (phi, 1 /* IV */); + else + gimple_set_uid (phi, 0); + } + + /* Count number of instructions and punt on calls. + Populate stmts INV/IV flag to later apply heuristics to the + kind of conditions we want to copy. */ for (bsi = gsi_start_bb (header); !gsi_end_p (bsi); gsi_next (&bsi)) { - last = gsi_stmt (bsi); + gimple *last = gsi_stmt (bsi); if (gimple_code (last) == GIMPLE_LABEL) continue; @@ -142,7 +157,52 @@ should_duplicate_loop_header_p (basic_block header, struct loop *loop, header->index); return false; } + + /* Classify the stmt based on whether its computation is based + on a IV or whether it is invariant in the loop. */ + gimple_set_uid (last, 0); + if (!gimple_vuse (last)) + { + bool inv = true; + bool iv = false; + ssa_op_iter i; + tree op; + FOR_EACH_SSA_TREE_OPERAND (op, last, i, SSA_OP_USE) + if (!SSA_NAME_IS_DEFAULT_DEF (op) + && flow_bb_inside_loop_p (loop, + gimple_bb (SSA_NAME_DEF_STMT (op)))) + { + if (!(gimple_uid (SSA_NAME_DEF_STMT (op)) & 2 /* INV */)) + inv = false; + if (gimple_uid (SSA_NAME_DEF_STMT (op)) & 1 /* IV */) + iv = true; + } + gimple_set_uid (last, (iv ? 1 : 0) | (inv ? 2 : 0)); + } } + + /* If the condition tests a non-IV loop variant we do not want to rotate + the loop further. Unless this is the original loop header. */ + tree lhs = gimple_cond_lhs (last); + tree rhs = gimple_cond_rhs (last); + if (header != loop->header + && ((TREE_CODE (lhs) == SSA_NAME + && !SSA_NAME_IS_DEFAULT_DEF (lhs) + && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (lhs))) + && gimple_uid (SSA_NAME_DEF_STMT (lhs)) == 0) + || (TREE_CODE (rhs) == SSA_NAME + && !SSA_NAME_IS_DEFAULT_DEF (rhs) + && flow_bb_inside_loop_p (loop, + gimple_bb (SSA_NAME_DEF_STMT (rhs))) + && gimple_uid (SSA_NAME_DEF_STMT (rhs)) == 0))) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, + " Not duplicating bb %i: condition based on non-IV loop" + "variant.\n", header->index); + return false; + } + if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, " Will duplicate bb %i\n", header->index); return true; @@ -343,11 +403,6 @@ ch_base::copy_headers (function *fun) bbs[n_bbs++] = header; gcc_assert (bbs_size > n_bbs); header = exit->dest; - /* Make sure to stop copying after we copied the first exit test. - Without further heuristics we do not want to rotate the loop - any further. */ - if (loop_exits_from_bb_p (loop, exit->src)) - break; } if (!exit) -- 2.30.2