re PR tree-optimization/78205 (BB vectorization confused by too large load groups)
authorRichard Biener <rguenther@suse.de>
Tue, 8 Nov 2016 08:06:42 +0000 (08:06 +0000)
committerRichard Biener <rguenth@gcc.gnu.org>
Tue, 8 Nov 2016 08:06:42 +0000 (08:06 +0000)
2016-11-08  Richard Biener  <rguenther@suse.de>

PR tree-optimization/78205
* tree-vect-stmts.c (vectorizable_load): Move check whether
we may run into gaps when BB vectorizing SLP permutations ...
* tree-vect-slp.c (vect_supported_load_permutation_p): ...
here where we can do a more precise check.

* gcc.dg/vect/bb-slp-pr78205.c: New testcase.

From-SVN: r241956

gcc/ChangeLog
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.dg/vect/bb-slp-pr78205.c [new file with mode: 0644]
gcc/tree-vect-slp.c
gcc/tree-vect-stmts.c

index 64b0b93ef476d974c5fe2252d9025c3a2a78f1b4..59d55154410b4867320b02a9653bc83733d405fb 100644 (file)
@@ -1,3 +1,11 @@
+2016-11-08  Richard Biener  <rguenther@suse.de>
+
+       PR tree-optimization/78205
+       * tree-vect-stmts.c (vectorizable_load): Move check whether
+       we may run into gaps when BB vectorizing SLP permutations ...
+       * tree-vect-slp.c (vect_supported_load_permutation_p): ...
+       here where we can do a more precise check.
+
 2016-11-08  Richard Biener  <rguenther@suse.de>
 
        PR tree-optimization/78224
index 45a09a5e257e640ed12e10c81cfa5a077909eada..acc09f38367f952535aed644445f8da823f52dfe 100644 (file)
@@ -1,3 +1,8 @@
+2016-11-08  Richard Biener  <rguenther@suse.de>
+
+       PR tree-optimization/78205
+       * gcc.dg/vect/bb-slp-pr78205.c: New testcase.
+
 2016-11-08  Richard Biener  <rguenther@suse.de>
 
        PR tree-optimization/78224
diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pr78205.c b/gcc/testsuite/gcc.dg/vect/bb-slp-pr78205.c
new file mode 100644 (file)
index 0000000..e02502a
--- /dev/null
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_double } */
+/* { dg-additional-options "-fdump-tree-optimized" } */
+
+double x[2], a[4], b[4], c[5];
+
+void foo ()
+{
+  a[0] = c[0];
+  a[1] = c[1];
+  a[2] = c[0];
+  a[3] = c[1];
+  b[0] = c[2];
+  b[1] = c[3];
+  b[2] = c[2];
+  b[3] = c[3];
+  x[0] = c[4];
+  x[1] = c[4];
+}
+
+/* We may not vectorize the store to x[] as it accesses c out-of bounds
+   but we do want to vectorize the other two store groups.  */
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized" 1 "slp2" } } */
+/* { dg-final { scan-tree-dump-times "x\\\[\[0-1\]\\\] = " 2 "optimized" } } */
index 6694164effb4383ed9dd8a68e960808886b65740..8d547681913dff636474dc89951f3d6abce20b25 100644 (file)
@@ -1459,6 +1459,25 @@ vect_supported_load_permutation_p (slp_instance slp_instn)
            SLP_TREE_LOAD_PERMUTATION (node).release ();
          else
            {
+             stmt_vec_info group_info
+               = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
+             group_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (group_info));
+             unsigned nunits
+               = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (group_info));
+             unsigned k, maxk = 0;
+             FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node), j, k)
+               if (k > maxk)
+                 maxk = k;
+             /* In BB vectorization we may not actually use a loaded vector
+                accessing elements in excess of GROUP_SIZE.  */
+             if (maxk >= (GROUP_SIZE (group_info) & ~(nunits - 1)))
+               {
+                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                                  "BB vectorization with gaps at the end of "
+                                  "a load is not supported\n");
+                 return false;
+               }
+
              /* Verify the permutation can be generated.  */
              vec<tree> tem;
              unsigned n_perms;
index ab01defbe55102a9d9cfc71b0afccbe0bf85084f..15aec2197b33618f652c8c60a62cf48aa657ec2f 100644 (file)
@@ -6548,18 +6548,6 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
       if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
        slp_perm = true;
 
-      /* ???  The following is overly pessimistic (as well as the loop
-         case above) in the case we can statically determine the excess
-        elements loaded are within the bounds of a decl that is accessed.
-        Likewise for BB vectorizations using masked loads is a possibility.  */
-      if (bb_vinfo && slp_perm && group_size % nunits != 0)
-       {
-         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                          "BB vectorization with gaps at the end of a load "
-                          "is not supported\n");
-         return false;
-       }
-
       /* Invalidate assumptions made by dependence analysis when vectorization
         on the unrolled body effectively re-orders stmts.  */
       if (!PURE_SLP_STMT (stmt_info)