+2016-11-08 Richard Biener <rguenther@suse.de>
+
+ PR tree-optimization/78205
+ * tree-vect-stmts.c (vectorizable_load): Move check whether
+ we may run into gaps when BB vectorizing SLP permutations ...
+ * tree-vect-slp.c (vect_supported_load_permutation_p): ...
+ here where we can do a more precise check.
+
2016-11-08 Richard Biener <rguenther@suse.de>
PR tree-optimization/78224
+2016-11-08 Richard Biener <rguenther@suse.de>
+
+ PR tree-optimization/78205
+ * gcc.dg/vect/bb-slp-pr78205.c: New testcase.
+
2016-11-08 Richard Biener <rguenther@suse.de>
PR tree-optimization/78224
--- /dev/null
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_double } */
+/* { dg-additional-options "-fdump-tree-optimized" } */
+
+double x[2], a[4], b[4], c[5];
+
+void foo ()
+{
+ a[0] = c[0];
+ a[1] = c[1];
+ a[2] = c[0];
+ a[3] = c[1];
+ b[0] = c[2];
+ b[1] = c[3];
+ b[2] = c[2];
+ b[3] = c[3];
+ x[0] = c[4];
+ x[1] = c[4];
+}
+
+/* We may not vectorize the store to x[] as it accesses c out-of bounds
+ but we do want to vectorize the other two store groups. */
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized" 1 "slp2" } } */
+/* { dg-final { scan-tree-dump-times "x\\\[\[0-1\]\\\] = " 2 "optimized" } } */
SLP_TREE_LOAD_PERMUTATION (node).release ();
else
{
+ stmt_vec_info group_info
+ = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
+ group_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (group_info));
+ unsigned nunits
+ = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (group_info));
+ unsigned k, maxk = 0;
+ FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node), j, k)
+ if (k > maxk)
+ maxk = k;
+ /* In BB vectorization we may not actually use a loaded vector
+ accessing elements in excess of GROUP_SIZE. */
+ if (maxk >= (GROUP_SIZE (group_info) & ~(nunits - 1)))
+ {
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "BB vectorization with gaps at the end of "
+ "a load is not supported\n");
+ return false;
+ }
+
/* Verify the permutation can be generated. */
vec<tree> tem;
unsigned n_perms;
if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
slp_perm = true;
- /* ??? The following is overly pessimistic (as well as the loop
- case above) in the case we can statically determine the excess
- elements loaded are within the bounds of a decl that is accessed.
- Likewise for BB vectorizations using masked loads is a possibility. */
- if (bb_vinfo && slp_perm && group_size % nunits != 0)
- {
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "BB vectorization with gaps at the end of a load "
- "is not supported\n");
- return false;
- }
-
/* Invalidate assumptions made by dependence analysis when vectorization
on the unrolled body effectively re-orders stmts. */
if (!PURE_SLP_STMT (stmt_info)