From 16760e5bf7028dfa36b39af305d05cdf2c15b3a9 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Fri, 9 Oct 2020 12:24:46 +0200 Subject: [PATCH] tree-optimization/97334 - improve BB SLP discovery We're running into a multiplication with one unvectorizable operand we expect to build from scalars but SLP discovery fatally fails the build of both since one stmt is commutated: _60 = _58 * _59; _63 = _59 * _62; _66 = _59 * _65; ... where _59 is the "bad" operand. The following patch makes the case work where the first stmt has a good operand by not fatally failing the SLP build for the operand but communicating upwards how to commutate. 2020-10-09 Richard Biener PR tree-optimization/97334 * tree-vect-slp.c (vect_build_slp_tree_1): Do not fatally fail lanes other than zero when BB vectorizing. * gcc.dg/vect/bb-slp-pr65935.c: Amend. --- gcc/testsuite/gcc.dg/vect/bb-slp-pr65935.c | 3 +++ gcc/tree-vect-slp.c | 22 ++++++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pr65935.c b/gcc/testsuite/gcc.dg/vect/bb-slp-pr65935.c index 4e3448eccd7..ea37e4e614c 100644 --- a/gcc/testsuite/gcc.dg/vect/bb-slp-pr65935.c +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pr65935.c @@ -60,3 +60,6 @@ int main() /* We should also be able to use 2-lane SLP to initialize the real and imaginary components in the first loop of main. */ /* { dg-final { scan-tree-dump-times "optimized: basic block" 10 "slp1" } } */ +/* We should see the s->phase[dir] operand and only that operand built + from scalars. See PR97334. */ +/* { dg-final { scan-tree-dump-times "Building vector operands from scalars" 1 "slp1" } } */ diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index 479c3eeaec7..495fb970e24 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -773,6 +773,12 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap, dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, "Build SLP failed: unvectorizable statement %G", stmt); + /* ??? For BB vectorization we want to commutate operands in a way + to shuffle all unvectorizable defs into one operand and have + the other still vectorized. The following doesn't reliably + work for this though but it's the easiest we can do here. */ + if (is_a (vinfo) && i != 0) + continue; /* Fatal mismatch. */ matches[0] = false; return false; @@ -785,6 +791,8 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap, dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, "Build SLP failed: not GIMPLE_ASSIGN nor " "GIMPLE_CALL %G", stmt); + if (is_a (vinfo) && i != 0) + continue; /* Fatal mismatch. */ matches[0] = false; return false; @@ -797,6 +805,8 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap, && !vect_record_max_nunits (vinfo, stmt_info, group_size, nunits_vectype, max_nunits))) { + if (is_a (vinfo) && i != 0) + continue; /* Fatal mismatch. */ matches[0] = false; return false; @@ -823,6 +833,8 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap, dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, "Build SLP failed: unsupported call type %G", call_stmt); + if (is_a (vinfo) && i != 0) + continue; /* Fatal mismatch. */ matches[0] = false; return false; @@ -865,6 +877,8 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap, if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, "Build SLP failed: no optab.\n"); + if (is_a (vinfo) && i != 0) + continue; /* Fatal mismatch. */ matches[0] = false; return false; @@ -876,6 +890,8 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap, dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, "Build SLP failed: " "op not supported by target.\n"); + if (is_a (vinfo) && i != 0) + continue; /* Fatal mismatch. */ matches[0] = false; return false; @@ -900,6 +916,8 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap, if (TREE_CODE (vec) != SSA_NAME || !types_compatible_p (vectype, TREE_TYPE (vec))) { + if (is_a (vinfo) && i != 0) + continue; if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, "Build SLP failed: " @@ -1048,6 +1066,8 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap, "Build SLP failed: not grouped load %G", stmt); /* FORNOW: Not grouped loads are not supported. */ + if (is_a (vinfo) && i != 0) + continue; /* Fatal mismatch. */ matches[0] = false; return false; @@ -1066,6 +1086,8 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap, dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, "Build SLP failed: operation unsupported %G", stmt); + if (is_a (vinfo) && i != 0) + continue; /* Fatal mismatch. */ matches[0] = false; return false; -- 2.30.2