re PR tree-optimization/85720 (bad codegen for looped assignment of primitives at...
authorBin Cheng <bin.cheng@arm.com>
Fri, 25 May 2018 11:09:42 +0000 (11:09 +0000)
committerBin Cheng <amker@gcc.gnu.org>
Fri, 25 May 2018 11:09:42 +0000 (11:09 +0000)
PR tree-optimization/85720

* tree-loop-distribution.c (break_alias_scc_partitions): Don't merge
SCC if all partitions are builtins.
(version_loop_by_alias_check): New parameter.  Generate cancelable
runtime alias check if all partitions are builtins.
(distribute_loop): Update call to above function.

gcc/testsuite
* gcc.dg/tree-ssa/pr85720.c: New test.
* gcc.target/i386/avx256-unaligned-store-2.c: Disable loop pattern
distribution.

From-SVN: r260753

gcc/ChangeLog
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.dg/tree-ssa/pr85720.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx256-unaligned-store-2.c
gcc/tree-loop-distribution.c

index 06e6591aa2199ace2056658c1b91f2a1cebe093c..68a4754ade19e1a20c820d749c6a683dcb53e500 100644 (file)
@@ -1,3 +1,12 @@
+2018-05-25  Bin Cheng  <bin.cheng@arm.com>
+
+       PR tree-optimization/85720
+       * tree-loop-distribution.c (break_alias_scc_partitions): Don't merge
+       SCC if all partitions are builtins.
+       (version_loop_by_alias_check): New parameter.  Generate cancelable
+       runtime alias check if all partitions are builtins.
+       (distribute_loop): Update call to above function.
+
 2018-05-25  Bin Cheng  <bin.cheng@arm.com>
 
        * tree-outof-ssa.c (tree-ssa.h, tree-dfa.h): Include header files.
index 92dc33446d28824b8b5b261dfa708d17c4e98323..7d7fc8cb421c441e5c8b486cf653bede6a54d4f9 100644 (file)
@@ -1,3 +1,10 @@
+2018-05-25  Bin Cheng  <bin.cheng@arm.com>
+
+       PR tree-optimization/85720
+       * gcc.dg/tree-ssa/pr85720.c: New test.
+       * gcc.target/i386/avx256-unaligned-store-2.c: Disable loop pattern
+       distribution.
+
 2018-05-25  Martin Liska  <mliska@suse.cz>
 
        PR testsuite/85911
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr85720.c b/gcc/testsuite/gcc.dg/tree-ssa/pr85720.c
new file mode 100644 (file)
index 0000000..18d8be9
--- /dev/null
@@ -0,0 +1,13 @@
+/* { dg-do compile { target size32plus } } */
+/* { dg-options "-O2 -ftree-loop-distribution -ftree-loop-distribute-patterns -fdump-tree-ldist" } */
+
+void fill(char* A, char* B, unsigned n)
+{
+    for (unsigned i = 0; i < n; i++)
+    {
+        A[i] = 0;
+        B[i] = A[i] + 1;
+    }
+}
+
+/* { dg-final { scan-tree-dump-times "_builtin_memset" 2 "ldist" } } */
index 87285c680d373100294a6f139899d311f8b2c553..1e7969bb47db98a7304010821836063d0ad93fab 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do compile { target { ! ia32 } } } */
-/* { dg-options "-O3 -mtune-ctrl=sse_typeless_stores -dp -mavx -mavx256-split-unaligned-store -mno-prefer-avx128" } */
+/* { dg-options "-O3 -mtune-ctrl=sse_typeless_stores -dp -mavx -mavx256-split-unaligned-store -mno-prefer-avx128 -fno-tree-loop-distribute-patterns" } */
 
 #define N 1024
 
index 5e327f4bfd8e0f660a6400d4a7da809a48f9fe4f..c6e0a60b01b2200cddb6344fc33eaebfb621f5c4 100644 (file)
@@ -2268,21 +2268,26 @@ break_alias_scc_partitions (struct graph *rdg,
          for (j = 0; partitions->iterate (j, &first); ++j)
            if (pg->vertices[j].component == i)
              break;
+
+         bool same_type = true, all_builtins = partition_builtin_p (first);
          for (++j; partitions->iterate (j, &partition); ++j)
            {
              if (pg->vertices[j].component != i)
                continue;
 
-             /* Note we Merge partitions of parallel type on purpose, though
-                the result partition is sequential.  The reason is vectorizer
-                can do more accurate runtime alias check in this case.  Also
-                it results in more conservative distribution.  */
              if (first->type != partition->type)
                {
-                 bitmap_clear_bit (sccs_to_merge, i);
+                 same_type = false;
                  break;
                }
+             all_builtins &= partition_builtin_p (partition);
            }
+         /* Merge SCC if all partitions in SCC have the same type, though the
+            result partition is sequential, because vectorizer can do better
+            runtime alias check.  One expecption is all partitions in SCC are
+            builtins.  */
+         if (!same_type || all_builtins)
+           bitmap_clear_bit (sccs_to_merge, i);
        }
 
       /* Initialize callback data for traversing.  */
@@ -2458,7 +2463,8 @@ compute_alias_check_pairs (struct loop *loop, vec<ddr_p> *alias_ddrs,
    checks and version LOOP under condition of these runtime alias checks.  */
 
 static void
-version_loop_by_alias_check (struct loop *loop, vec<ddr_p> *alias_ddrs)
+version_loop_by_alias_check (vec<struct partition *> *partitions,
+                            struct loop *loop, vec<ddr_p> *alias_ddrs)
 {
   profile_probability prob;
   basic_block cond_bb;
@@ -2481,9 +2487,25 @@ version_loop_by_alias_check (struct loop *loop, vec<ddr_p> *alias_ddrs)
                                      is_gimple_val, NULL_TREE);
 
   /* Depend on vectorizer to fold IFN_LOOP_DIST_ALIAS.  */
-  if (flag_tree_loop_vectorize)
+  bool cancelable_p = flag_tree_loop_vectorize;
+  if (cancelable_p)
+    {
+      unsigned i = 0;
+      struct partition *partition;
+      for (; partitions->iterate (i, &partition); ++i)
+       if (!partition_builtin_p (partition))
+         break;
+
+     /* If all partitions are builtins, distributing it would be profitable and
+       we don't want to cancel the runtime alias checks.  */
+      if (i == partitions->length ())
+       cancelable_p = false;
+    }
+
+  /* Generate internal function call for loop distribution alias check if the
+     runtime alias check should be cancelable.  */
+  if (cancelable_p)
     {
-      /* Generate internal function call for loop distribution alias check.  */
       call_stmt = gimple_build_call_internal (IFN_LOOP_DIST_ALIAS,
                                              2, NULL_TREE, cond_expr);
       lhs = make_ssa_name (boolean_type_node);
@@ -2883,7 +2905,7 @@ distribute_loop (struct loop *loop, vec<gimple *> stmts,
     }
 
   if (version_for_distribution_p (&partitions, &alias_ddrs))
-    version_loop_by_alias_check (loop, &alias_ddrs);
+    version_loop_by_alias_check (&partitions, loop, &alias_ddrs);
 
   if (dump_file && (dump_flags & TDF_DETAILS))
     {