From 1623d9f346086582c49cb747c3dabd062e730c42 Mon Sep 17 00:00:00 2001 From: Bin Cheng Date: Fri, 25 May 2018 11:09:42 +0000 Subject: [PATCH] re PR tree-optimization/85720 (bad codegen for looped assignment of primitives at -O2) PR tree-optimization/85720 * tree-loop-distribution.c (break_alias_scc_partitions): Don't merge SCC if all partitions are builtins. (version_loop_by_alias_check): New parameter. Generate cancelable runtime alias check if all partitions are builtins. (distribute_loop): Update call to above function. gcc/testsuite * gcc.dg/tree-ssa/pr85720.c: New test. * gcc.target/i386/avx256-unaligned-store-2.c: Disable loop pattern distribution. From-SVN: r260753 --- gcc/ChangeLog | 9 +++++ gcc/testsuite/ChangeLog | 7 ++++ gcc/testsuite/gcc.dg/tree-ssa/pr85720.c | 13 ++++++ .../i386/avx256-unaligned-store-2.c | 2 +- gcc/tree-loop-distribution.c | 40 ++++++++++++++----- 5 files changed, 61 insertions(+), 10 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr85720.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 06e6591aa21..68a4754ade1 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,12 @@ +2018-05-25 Bin Cheng + + PR tree-optimization/85720 + * tree-loop-distribution.c (break_alias_scc_partitions): Don't merge + SCC if all partitions are builtins. + (version_loop_by_alias_check): New parameter. Generate cancelable + runtime alias check if all partitions are builtins. + (distribute_loop): Update call to above function. + 2018-05-25 Bin Cheng * tree-outof-ssa.c (tree-ssa.h, tree-dfa.h): Include header files. diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 92dc33446d2..7d7fc8cb421 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,10 @@ +2018-05-25 Bin Cheng + + PR tree-optimization/85720 + * gcc.dg/tree-ssa/pr85720.c: New test. + * gcc.target/i386/avx256-unaligned-store-2.c: Disable loop pattern + distribution. + 2018-05-25 Martin Liska PR testsuite/85911 diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr85720.c b/gcc/testsuite/gcc.dg/tree-ssa/pr85720.c new file mode 100644 index 00000000000..18d8be92506 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr85720.c @@ -0,0 +1,13 @@ +/* { dg-do compile { target size32plus } } */ +/* { dg-options "-O2 -ftree-loop-distribution -ftree-loop-distribute-patterns -fdump-tree-ldist" } */ + +void fill(char* A, char* B, unsigned n) +{ + for (unsigned i = 0; i < n; i++) + { + A[i] = 0; + B[i] = A[i] + 1; + } +} + +/* { dg-final { scan-tree-dump-times "_builtin_memset" 2 "ldist" } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-2.c b/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-2.c index 87285c680d3..1e7969bb47d 100644 --- a/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-2.c +++ b/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-2.c @@ -1,5 +1,5 @@ /* { dg-do compile { target { ! ia32 } } } */ -/* { dg-options "-O3 -mtune-ctrl=sse_typeless_stores -dp -mavx -mavx256-split-unaligned-store -mno-prefer-avx128" } */ +/* { dg-options "-O3 -mtune-ctrl=sse_typeless_stores -dp -mavx -mavx256-split-unaligned-store -mno-prefer-avx128 -fno-tree-loop-distribute-patterns" } */ #define N 1024 diff --git a/gcc/tree-loop-distribution.c b/gcc/tree-loop-distribution.c index 5e327f4bfd8..c6e0a60b01b 100644 --- a/gcc/tree-loop-distribution.c +++ b/gcc/tree-loop-distribution.c @@ -2268,21 +2268,26 @@ break_alias_scc_partitions (struct graph *rdg, for (j = 0; partitions->iterate (j, &first); ++j) if (pg->vertices[j].component == i) break; + + bool same_type = true, all_builtins = partition_builtin_p (first); for (++j; partitions->iterate (j, &partition); ++j) { if (pg->vertices[j].component != i) continue; - /* Note we Merge partitions of parallel type on purpose, though - the result partition is sequential. The reason is vectorizer - can do more accurate runtime alias check in this case. Also - it results in more conservative distribution. */ if (first->type != partition->type) { - bitmap_clear_bit (sccs_to_merge, i); + same_type = false; break; } + all_builtins &= partition_builtin_p (partition); } + /* Merge SCC if all partitions in SCC have the same type, though the + result partition is sequential, because vectorizer can do better + runtime alias check. One expecption is all partitions in SCC are + builtins. */ + if (!same_type || all_builtins) + bitmap_clear_bit (sccs_to_merge, i); } /* Initialize callback data for traversing. */ @@ -2458,7 +2463,8 @@ compute_alias_check_pairs (struct loop *loop, vec *alias_ddrs, checks and version LOOP under condition of these runtime alias checks. */ static void -version_loop_by_alias_check (struct loop *loop, vec *alias_ddrs) +version_loop_by_alias_check (vec *partitions, + struct loop *loop, vec *alias_ddrs) { profile_probability prob; basic_block cond_bb; @@ -2481,9 +2487,25 @@ version_loop_by_alias_check (struct loop *loop, vec *alias_ddrs) is_gimple_val, NULL_TREE); /* Depend on vectorizer to fold IFN_LOOP_DIST_ALIAS. */ - if (flag_tree_loop_vectorize) + bool cancelable_p = flag_tree_loop_vectorize; + if (cancelable_p) + { + unsigned i = 0; + struct partition *partition; + for (; partitions->iterate (i, &partition); ++i) + if (!partition_builtin_p (partition)) + break; + + /* If all partitions are builtins, distributing it would be profitable and + we don't want to cancel the runtime alias checks. */ + if (i == partitions->length ()) + cancelable_p = false; + } + + /* Generate internal function call for loop distribution alias check if the + runtime alias check should be cancelable. */ + if (cancelable_p) { - /* Generate internal function call for loop distribution alias check. */ call_stmt = gimple_build_call_internal (IFN_LOOP_DIST_ALIAS, 2, NULL_TREE, cond_expr); lhs = make_ssa_name (boolean_type_node); @@ -2883,7 +2905,7 @@ distribute_loop (struct loop *loop, vec stmts, } if (version_for_distribution_p (&partitions, &alias_ddrs)) - version_loop_by_alias_check (loop, &alias_ddrs); + version_loop_by_alias_check (&partitions, loop, &alias_ddrs); if (dump_file && (dump_flags & TDF_DETAILS)) { -- 2.30.2