From: Richard Biener Date: Tue, 8 Nov 2016 08:06:42 +0000 (+0000) Subject: re PR tree-optimization/78205 (BB vectorization confused by too large load groups) X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=fe73a33284d23c5a7d8d8eb5b13e37454401b6c4;p=gcc.git re PR tree-optimization/78205 (BB vectorization confused by too large load groups) 2016-11-08 Richard Biener PR tree-optimization/78205 * tree-vect-stmts.c (vectorizable_load): Move check whether we may run into gaps when BB vectorizing SLP permutations ... * tree-vect-slp.c (vect_supported_load_permutation_p): ... here where we can do a more precise check. * gcc.dg/vect/bb-slp-pr78205.c: New testcase. From-SVN: r241956 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 64b0b93ef47..59d55154410 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,11 @@ +2016-11-08 Richard Biener + + PR tree-optimization/78205 + * tree-vect-stmts.c (vectorizable_load): Move check whether + we may run into gaps when BB vectorizing SLP permutations ... + * tree-vect-slp.c (vect_supported_load_permutation_p): ... + here where we can do a more precise check. + 2016-11-08 Richard Biener PR tree-optimization/78224 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 45a09a5e257..acc09f38367 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2016-11-08 Richard Biener + + PR tree-optimization/78205 + * gcc.dg/vect/bb-slp-pr78205.c: New testcase. + 2016-11-08 Richard Biener PR tree-optimization/78224 diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pr78205.c b/gcc/testsuite/gcc.dg/vect/bb-slp-pr78205.c new file mode 100644 index 00000000000..e02502a3fc1 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pr78205.c @@ -0,0 +1,25 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_double } */ +/* { dg-additional-options "-fdump-tree-optimized" } */ + +double x[2], a[4], b[4], c[5]; + +void foo () +{ + a[0] = c[0]; + a[1] = c[1]; + a[2] = c[0]; + a[3] = c[1]; + b[0] = c[2]; + b[1] = c[3]; + b[2] = c[2]; + b[3] = c[3]; + x[0] = c[4]; + x[1] = c[4]; +} + +/* We may not vectorize the store to x[] as it accesses c out-of bounds + but we do want to vectorize the other two store groups. */ + +/* { dg-final { scan-tree-dump-times "basic block vectorized" 1 "slp2" } } */ +/* { dg-final { scan-tree-dump-times "x\\\[\[0-1\]\\\] = " 2 "optimized" } } */ diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index 6694164effb..8d547681913 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -1459,6 +1459,25 @@ vect_supported_load_permutation_p (slp_instance slp_instn) SLP_TREE_LOAD_PERMUTATION (node).release (); else { + stmt_vec_info group_info + = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]); + group_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (group_info)); + unsigned nunits + = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (group_info)); + unsigned k, maxk = 0; + FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node), j, k) + if (k > maxk) + maxk = k; + /* In BB vectorization we may not actually use a loaded vector + accessing elements in excess of GROUP_SIZE. */ + if (maxk >= (GROUP_SIZE (group_info) & ~(nunits - 1))) + { + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "BB vectorization with gaps at the end of " + "a load is not supported\n"); + return false; + } + /* Verify the permutation can be generated. */ vec tem; unsigned n_perms; diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index ab01defbe55..15aec2197b3 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -6548,18 +6548,6 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()) slp_perm = true; - /* ??? The following is overly pessimistic (as well as the loop - case above) in the case we can statically determine the excess - elements loaded are within the bounds of a decl that is accessed. - Likewise for BB vectorizations using masked loads is a possibility. */ - if (bb_vinfo && slp_perm && group_size % nunits != 0) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "BB vectorization with gaps at the end of a load " - "is not supported\n"); - return false; - } - /* Invalidate assumptions made by dependence analysis when vectorization on the unrolled body effectively re-orders stmts. */ if (!PURE_SLP_STMT (stmt_info)