re PR tree-optimization/92645 (Hand written vector code is 450 times slower when...
authorRichard Biener <rguenther@suse.de>
Tue, 26 Nov 2019 08:32:38 +0000 (08:32 +0000)
committerRichard Biener <rguenth@gcc.gnu.org>
Tue, 26 Nov 2019 08:32:38 +0000 (08:32 +0000)
2019-11-26  Richard Biener  <rguenther@suse.de>

PR tree-optimization/92645
* tree-vect-slp.c (vect_build_slp_tree_2): For unary ops
do not build the operation from scalars if the operand is.

* gcc.target/i386/pr92645.c: New testcase.

From-SVN: r278719

gcc/ChangeLog
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/i386/pr92645.c [new file with mode: 0644]
gcc/tree-vect-slp.c

index 186299abab244df75de00253e28028d0c4520496..6ea6e5b592fa25608c49a73499a153dc6bff0444 100644 (file)
@@ -1,3 +1,9 @@
+2019-11-26  Richard Biener  <rguenther@suse.de>
+
+       PR tree-optimization/92645
+       * tree-vect-slp.c (vect_build_slp_tree_2): For unary ops
+       do not build the operation from scalars if the operand is.
+
 2019-11-25  Tobias Burnus  <tobias@codesourcery.com>
 
        * config/gcn/mkoffload.c (COMMENT_PREFIX, struct id_map,
index 0a284dae1ea9918fc08e6c2ab5e3ebf1ec881d1c..d105b609c84adad67d190d47c4204bc43fabbf3f 100644 (file)
@@ -1,3 +1,8 @@
+2019-11-26  Richard Biener  <rguenther@suse.de>
+
+       PR tree-optimization/92645
+       * gcc.target/i386/pr92645.c: New testcase.
+
 2019-11-26  Jakub Jelinek  <jakub@redhat.com>
 
        * gfortran.dg/dec-comparison.f90: Change dg-do from run to compile.
diff --git a/gcc/testsuite/gcc.target/i386/pr92645.c b/gcc/testsuite/gcc.target/i386/pr92645.c
new file mode 100644 (file)
index 0000000..467ed53
--- /dev/null
@@ -0,0 +1,36 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-optimized -msse2 -Wno-psabi" } */
+
+typedef unsigned short v8hi __attribute__((vector_size(16)));
+typedef unsigned int v4si __attribute__((vector_size(16)));
+
+void bar (v4si *dst, v8hi * __restrict src)
+{
+  unsigned int tem[8];
+  tem[0] = (*src)[0];
+  tem[1] = (*src)[1];
+  tem[2] = (*src)[2];
+  tem[3] = (*src)[3];
+  tem[4] = (*src)[4];
+  tem[5] = (*src)[5];
+  tem[6] = (*src)[6];
+  tem[7] = (*src)[7];
+  dst[0] = *(v4si *)tem;
+  dst[1] = *(v4si *)&tem[4];
+}
+void foo (v4si *dst, v8hi src)
+{
+  unsigned int tem[8];
+  tem[0] = src[0];
+  tem[1] = src[1];
+  tem[2] = src[2];
+  tem[3] = src[3];
+  tem[4] = src[4];
+  tem[5] = src[5];
+  tem[6] = src[6];
+  tem[7] = src[7];
+  dst[0] = *(v4si *)tem;
+  dst[1] = *(v4si *)&tem[4];
+}
+
+/* { dg-final { scan-tree-dump-times "vec_unpack_" 4 "optimized" } } */
index bedbe9ac978fa0468ba6a3350e655003f2210ae9..48aca3b48f66a7caaf9d86728bca71ed289a2d6c 100644 (file)
@@ -1410,10 +1410,11 @@ vect_build_slp_tree_2 (vec_info *vinfo,
                                        matches, npermutes,
                                        &this_tree_size, bst_map)) != NULL)
        {
-         /* If we have all children of child built up from scalars then just
-            throw that away and build it up this node from scalars.  */
+         /* If we have all children of a non-unary child built up from
+            scalars then just throw that away and build it up this node
+            from scalars.  */
          if (is_a <bb_vec_info> (vinfo)
-             && !SLP_TREE_CHILDREN (child).is_empty ()
+             && SLP_TREE_CHILDREN (child).length () > 1
              /* ???  Rejecting patterns this way doesn't work.  We'd have to
                 do extra work to cancel the pattern so the uses see the
                 scalar version.  */
@@ -1549,10 +1550,11 @@ vect_build_slp_tree_2 (vec_info *vinfo,
                                            tem, npermutes,
                                            &this_tree_size, bst_map)) != NULL)
            {
-             /* If we have all children of child built up from scalars then
-                just throw that away and build it up this node from scalars.  */
+             /* If we have all children of a non-unary child built up from
+                scalars then just throw that away and build it up this node
+                from scalars.  */
              if (is_a <bb_vec_info> (vinfo)
-                 && !SLP_TREE_CHILDREN (child).is_empty ()
+                 && SLP_TREE_CHILDREN (child).length () > 1
                  /* ???  Rejecting patterns this way doesn't work.  We'd have
                     to do extra work to cancel the pattern so the uses see the
                     scalar version.  */