Change the way we split stores in BB vectorization
authorRichard Biener <rguenther@suse.de>
Wed, 28 Oct 2020 11:56:16 +0000 (12:56 +0100)
committerRichard Biener <rguenther@suse.de>
Wed, 28 Oct 2020 13:15:37 +0000 (14:15 +0100)
The following fixes missed optimizations due to the strange way we
split stores in BB vectorization.  The solution is to split at
the failure boundary and not re-align that to the initial piece
chosen vector size.  Also re-analyze any larger matching rest.

2020-10-28  Richard Biener  <rguenther@suse.de>

* tree-vect-slp.c (vect_build_slp_instance): Split the store
group at the failure boundary and also re-analyze a large enough
matching rest.

* gcc.dg/vect/bb-slp-68.c: New testcase.

gcc/testsuite/gcc.dg/vect/bb-slp-68.c [new file with mode: 0644]
gcc/tree-vect-slp.c

diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-68.c b/gcc/testsuite/gcc.dg/vect/bb-slp-68.c
new file mode 100644 (file)
index 0000000..8718031
--- /dev/null
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_double } */
+/* { dg-additional-options "-mavx" { target avx } } */
+
+double x[10], y[6], z[4];
+
+void foo ()
+{
+  x[0] = y[0];
+  x[1] = y[1];
+  x[2] = y[2];
+  x[3] = y[3];
+  x[4] = y[4];
+  x[5] = y[5];
+  x[6] = z[0] + 1.;
+  x[7] = z[1] + 1.;
+  x[8] = z[2] + 1.;
+  x[9] = z[3] + 1.;
+}
+
+/* We want to have the store group split into 4, 2, 4 when using 32byte vectors.  */
+/* { dg-final { scan-tree-dump-not "from scalars" "slp2" } } */
index 470b67d76b526b0296226cb8d245edf464c3d640..50a2d37eb254c02e788ded0435f00ec0620302d1 100644 (file)
@@ -2412,15 +2412,21 @@ vect_build_slp_instance (vec_info *vinfo,
                                                               group1_size);
              bool res = vect_analyze_slp_instance (vinfo, bst_map, stmt_info,
                                                    max_tree_size);
-             /* If the first non-match was in the middle of a vector,
-                skip the rest of that vector.  Do not bother to re-analyze
-                single stmt groups.  */
-             if (group1_size < i)
+             /* Split the rest at the failure point and possibly
+                re-analyze the remaining matching part if it has
+                at least two lanes.  */
+             if (group1_size < i
+                 && (i + 1 < group_size
+                     || i - group1_size > 1))
                {
-                 i = group1_size + const_nunits;
-                 if (i + 1 < group_size)
-                   rest = vect_split_slp_store_group (rest, const_nunits);
+                 stmt_vec_info rest2 = rest;
+                 rest = vect_split_slp_store_group (rest, i - group1_size);
+                 if (i - group1_size > 1)
+                   res |= vect_analyze_slp_instance (vinfo, bst_map,
+                                                     rest2, max_tree_size);
                }
+             /* Re-analyze the non-matching tail if it has at least
+                two lanes.  */
              if (i + 1 < group_size)
                res |= vect_analyze_slp_instance (vinfo, bst_map,
                                                  rest, max_tree_size);