From 6c6e0cafa38cee8309f37b846cb7db813a472a54 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Wed, 14 Oct 2020 15:37:51 +0200 Subject: [PATCH] Adjust BB vectorization SLP build heuristics This changes SLP def gathering to not fail due to mismatched def type but instead demote the def to external. This allows the new testcase to be vectorized in full (with GCC 10 it is not vectorized at all and with current trunk we vectorize only the store). This is important since with BB vectorization being applied to bigger pieces of code the chance that we mix internal and external defs for an operand that should end up treated as external (built from scalars) increases. 2020-10-16 Richard Biener * tree-vect-slp.c (vect_get_and_check_slp_defs): For BB vectorization swap operands only if it helps, demote mismatches to external. * gcc.dg/vect/bb-slp-53.c: New testcase. --- gcc/testsuite/gcc.dg/vect/bb-slp-53.c | 20 +++++++++++++++++ gcc/tree-vect-slp.c | 31 +++++++++++++++++++++------ 2 files changed, 45 insertions(+), 6 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/bb-slp-53.c diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-53.c b/gcc/testsuite/gcc.dg/vect/bb-slp-53.c new file mode 100644 index 00000000000..f3b5f317444 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-53.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_double } */ + +double a[2], b[2]; + +void foo(double x, double y) +{ + double breakme1 = y + 3.; + double a1 = b[1] + 2.; + double breakme0 = x; + double a0 = b[0] + 1.; + a[0] = a0 * breakme0; + a[1] = a1 * breakme1; +} + +/* We should vectorize the SLP opportunity starting from the + grouped store to a[] including the load from b[] at the + leaf even though the multiplication requires another + vector invariant to be built. */ +/* { dg-final { scan-tree-dump "transform load" "slp2" } } */ diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index cfb79e2651f..c3e6d67067c 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -564,8 +564,15 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char swap, != (oprnd_info->first_dt != vect_reduction_def)))) { /* Try swapping operands if we got a mismatch. For BB - vectorization only in case that will improve things. */ - if (i == commutative_op && !swapped) + vectorization only in case it will clearly improve things. */ + if (i == commutative_op && !swapped + && (!is_a (vinfo) + || (!vect_def_types_match ((*oprnds_info)[i+1]->first_dt, + dts[i+1]) + && (vect_def_types_match (oprnd_info->first_dt, + dts[i+1]) + || vect_def_types_match + ((*oprnds_info)[i+1]->first_dt, dts[i]))))) { if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, @@ -579,10 +586,22 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char swap, continue; } - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "Build SLP failed: different types\n"); - return 1; + if (is_a (vinfo)) + { + /* Now for commutative ops we should see whether we can + make the other operand matching. */ + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "treating operand as external\n"); + oprnd_info->first_dt = dt = vect_external_def; + } + else + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "Build SLP failed: different types\n"); + return 1; + } } /* Make sure to demote the overall operand to external. */ -- 2.30.2