From 429ad0bb0d3dc77e44f95620341da4938d49168e Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Thu, 15 Oct 2020 11:55:53 +0200 Subject: [PATCH] tree-optimization/97428 - split SLP groups for loop vectorization This enables SLP store group splitting also for loop vectorization. For the existing testcase gcc.dg/vect/vect-complex-5.c this then generates much better code, likewise for the PR97428 testcase. Both of those have a splitting opportunity splitting the group into two equal (vector-sized) halves, still the patch enables quite arbitrary splitting since generally the interleaving scheme results in quite awkward code for even small groups. If any problems surface with this it's easy to restrict the splitting to known-good cases. 2020-10-15 Richard Biener PR tree-optimization/97428 * tree-vect-slp.c (vect_analyze_slp_instance): Split store groups also for loop vectorization. * gcc.dg/vect/vect-complex-5.c: Expect to SLP. * gcc.dg/vect/pr97428.c: Likewise. --- gcc/testsuite/gcc.dg/vect/pr97428.c | 1 + gcc/testsuite/gcc.dg/vect/vect-complex-5.c | 2 +- gcc/tree-vect-slp.c | 46 ++++++++++++++++++---- 3 files changed, 40 insertions(+), 9 deletions(-) diff --git a/gcc/testsuite/gcc.dg/vect/pr97428.c b/gcc/testsuite/gcc.dg/vect/pr97428.c index b5b02dca9de..49d53738256 100644 --- a/gcc/testsuite/gcc.dg/vect/pr97428.c +++ b/gcc/testsuite/gcc.dg/vect/pr97428.c @@ -40,4 +40,5 @@ void foo_i2(dcmlx4_t dst[], const dcmlx_t src[], int n) load and store groups. */ /* { dg-final { scan-tree-dump "Detected interleaving load of size 8" "vect" } } */ /* { dg-final { scan-tree-dump "Detected interleaving store of size 16" "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" } } */ /* { dg-final { scan-tree-dump-not "gap of 6 elements" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-complex-5.c b/gcc/testsuite/gcc.dg/vect/vect-complex-5.c index a2e3590ed98..06486375449 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-complex-5.c +++ b/gcc/testsuite/gcc.dg/vect/vect-complex-5.c @@ -40,4 +40,4 @@ main (void) return 0; } -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" } } */ diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index 5e0a3608948..8037b27cddd 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -2244,20 +2244,20 @@ vect_analyze_slp_instance (vec_info *vinfo, scalar_stmts.release (); } - /* For basic block SLP, try to break the group up into multiples of the - vector size. */ + /* Try to break the group up into pieces. */ unsigned HOST_WIDE_INT const_nunits; - if (is_a (vinfo) - && STMT_VINFO_GROUPED_ACCESS (stmt_info) + if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)) && nunits.is_constant (&const_nunits)) { - /* We consider breaking the group only on VF boundaries from the existing - start. */ for (i = 0; i < group_size; i++) - if (!matches[i]) break; + if (!matches[i]) + break; - if (i >= const_nunits && i < group_size) + /* For basic block SLP, try to break the group up into multiples of the + vector size. */ + if (is_a (vinfo) + && (i >= const_nunits && i < group_size)) { /* Split into two groups at the first vector boundary before i. */ gcc_assert ((const_nunits & (const_nunits - 1)) == 0); @@ -2284,6 +2284,36 @@ vect_analyze_slp_instance (vec_info *vinfo, rest, max_tree_size); return res; } + + /* For loop vectorization split into arbitrary pieces of size > 1. */ + if (is_a (vinfo) + && (i > 1 && i < group_size)) + { + gcc_assert ((const_nunits & (const_nunits - 1)) == 0); + unsigned group1_size = i; + + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "Splitting SLP group at stmt %u\n", i); + + stmt_vec_info rest = vect_split_slp_store_group (stmt_info, + group1_size); + /* Loop vectorization cannot handle gaps in stores, make sure + the split group appears as strided. */ + STMT_VINFO_STRIDED_P (rest) = 1; + DR_GROUP_GAP (rest) = 0; + STMT_VINFO_STRIDED_P (stmt_info) = 1; + DR_GROUP_GAP (stmt_info) = 0; + + bool res = vect_analyze_slp_instance (vinfo, bst_map, stmt_info, + max_tree_size); + if (i + 1 < group_size) + res |= vect_analyze_slp_instance (vinfo, bst_map, + rest, max_tree_size); + + return res; + } + /* Even though the first vector did not all match, we might be able to SLP (some) of the remainder. FORNOW ignore this possibility. */ } -- 2.30.2