tree-optimization/95866 - avoid vectorizing uniform SLP subgraphs
authorRichard Biener <rguenther@suse.de>
Wed, 24 Jun 2020 13:49:00 +0000 (15:49 +0200)
committerRichard Biener <rguenther@suse.de>
Wed, 24 Jun 2020 17:49:16 +0000 (19:49 +0200)
This avoids vectorizing SLP subgraphs that just compute uniform
operations on all-same operands.  That fixes the less interesting
(but most embarrasing) part of the testcase in the PR.  On the
way it also fixed a missing matches[0] reset in the last
refactoring touching that place.

2020-06-24  Richard Biener  <rguenther@suse.de>

PR tree-optimization/95866
* tree-vect-slp.c (vect_slp_tree_uniform_p): New.
(vect_build_slp_tree_2): Properly reset matches[0],
ignore uniform constants.

* gcc.target/i386/pr95866-1.c: New testcase.

gcc/testsuite/gcc.target/i386/pr95866-1.c [new file with mode: 0644]
gcc/tree-vect-slp.c

diff --git a/gcc/testsuite/gcc.target/i386/pr95866-1.c b/gcc/testsuite/gcc.target/i386/pr95866-1.c
new file mode 100644 (file)
index 0000000..991370c
--- /dev/null
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-slp2-details -msse2" } */
+
+int x[4];
+void foo(int i)
+{
+  int j = (i+1) & 31;
+  x[0] = (x[0] << j) + j;
+  x[1] = (x[1] << j) + j;
+  x[2] = (x[2] << j) + j;
+  x[3] = (x[3] << j) + j;
+}
+
+/* We should not use vector operations for i + 1 and (i + 1) & 31 but
+   instead use { j, j, j, j }.  */ 
+/* { dg-final { scan-tree-dump-times "Building parent vector operands from scalars" 2 "slp2" } } */
+/* { dg-final { scan-tree-dump-not " = \{i_" "slp2" } } */
+/* { dg-final { scan-tree-dump-times " = \{j_" 1 "slp2" } } */
index e7a260877a9172e4ac8960e440aa13e1a1d75a55..b2792c76ad2e6b394b0097b65d036e311fd71d3a 100644 (file)
@@ -238,6 +238,26 @@ vect_contains_pattern_stmt_p (vec<stmt_vec_info> stmts)
   return false;
 }
 
+/* Return true when all lanes in the external or constant NODE have
+   the same value.  */
+
+static bool
+vect_slp_tree_uniform_p (slp_tree node)
+{
+  gcc_assert (SLP_TREE_DEF_TYPE (node) == vect_constant_def
+             || SLP_TREE_DEF_TYPE (node) == vect_external_def);
+
+  unsigned i;
+  tree op, first = NULL_TREE;
+  FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
+    if (!first)
+      first = op;
+    else if (!operand_equal_p (first, op, 0))
+      return false;
+
+  return true;
+}
+
 /* Find the place of the data-ref in STMT_INFO in the interleaving chain
    that starts from FIRST_STMT_INFO.  Return -1 if the data-ref is not a part
    of the chain.  */
@@ -1439,7 +1459,7 @@ fail:
   vect_free_oprnd_info (oprnds_info);
 
   /* If we have all children of a non-unary child built up from
-     scalars then just throw that away, causing it built up
+     uniform scalars then just throw that away, causing it built up
      from scalars.  */
   if (nops > 1
       && is_a <bb_vec_info> (vinfo)
@@ -1451,11 +1471,13 @@ fail:
       slp_tree child;
       unsigned j;
       FOR_EACH_VEC_ELT (children, j, child)
-       if (SLP_TREE_DEF_TYPE (child) != vect_external_def)
+       if (SLP_TREE_DEF_TYPE (child) == vect_internal_def
+           || !vect_slp_tree_uniform_p (child))
          break;
       if (!child)
        {
          /* Roll back.  */
+         matches[0] = false;
          FOR_EACH_VEC_ELT (children, j, child)
            vect_free_slp_tree (child, false);