From 2acb33103c4cbc6766d8707ad58aa5cc9a804719 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Wed, 28 Oct 2020 12:56:16 +0100 Subject: [PATCH] Change the way we split stores in BB vectorization The following fixes missed optimizations due to the strange way we split stores in BB vectorization. The solution is to split at the failure boundary and not re-align that to the initial piece chosen vector size. Also re-analyze any larger matching rest. 2020-10-28 Richard Biener * tree-vect-slp.c (vect_build_slp_instance): Split the store group at the failure boundary and also re-analyze a large enough matching rest. * gcc.dg/vect/bb-slp-68.c: New testcase. --- gcc/testsuite/gcc.dg/vect/bb-slp-68.c | 22 ++++++++++++++++++++++ gcc/tree-vect-slp.c | 20 +++++++++++++------- 2 files changed, 35 insertions(+), 7 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/bb-slp-68.c diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-68.c b/gcc/testsuite/gcc.dg/vect/bb-slp-68.c new file mode 100644 index 00000000000..8718031cc71 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-68.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_double } */ +/* { dg-additional-options "-mavx" { target avx } } */ + +double x[10], y[6], z[4]; + +void foo () +{ + x[0] = y[0]; + x[1] = y[1]; + x[2] = y[2]; + x[3] = y[3]; + x[4] = y[4]; + x[5] = y[5]; + x[6] = z[0] + 1.; + x[7] = z[1] + 1.; + x[8] = z[2] + 1.; + x[9] = z[3] + 1.; +} + +/* We want to have the store group split into 4, 2, 4 when using 32byte vectors. */ +/* { dg-final { scan-tree-dump-not "from scalars" "slp2" } } */ diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index 470b67d76b5..50a2d37eb25 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -2412,15 +2412,21 @@ vect_build_slp_instance (vec_info *vinfo, group1_size); bool res = vect_analyze_slp_instance (vinfo, bst_map, stmt_info, max_tree_size); - /* If the first non-match was in the middle of a vector, - skip the rest of that vector. Do not bother to re-analyze - single stmt groups. */ - if (group1_size < i) + /* Split the rest at the failure point and possibly + re-analyze the remaining matching part if it has + at least two lanes. */ + if (group1_size < i + && (i + 1 < group_size + || i - group1_size > 1)) { - i = group1_size + const_nunits; - if (i + 1 < group_size) - rest = vect_split_slp_store_group (rest, const_nunits); + stmt_vec_info rest2 = rest; + rest = vect_split_slp_store_group (rest, i - group1_size); + if (i - group1_size > 1) + res |= vect_analyze_slp_instance (vinfo, bst_map, + rest2, max_tree_size); } + /* Re-analyze the non-matching tail if it has at least + two lanes. */ if (i + 1 < group_size) res |= vect_analyze_slp_instance (vinfo, bst_map, rest, max_tree_size); -- 2.30.2