From 52a170b1a1818b7521c25e76271638a448b3f630 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Tue, 12 Jan 2021 11:17:33 +0100 Subject: [PATCH] tree-optimization/98550 - fix BB vect unrolling check This fixes the check that disqualifies BB vectorization because of required unrolling to match up with the later exact_div we do. To not disable the ability to split groups that do not match up exactly with a choosen vector type this also introduces a soft-fail mechanism to vect_build_slp_tree_1 which delays failing to after the matches[] array is populated from other checks and only then determines the split point according to the vector type. 2021-01-12 Richard Biener PR tree-optimization/98550 * tree-vect-slp.c (vect_record_max_nunits): Check whether the group size is a multiple of the vector element count. (vect_build_slp_tree_1): When we need to fail because the vector type choosen causes unrolling do so lazily without affecting matches only at the end to guide group splitting. * g++.dg/opt/pr98550.C: New testcase. --- gcc/testsuite/g++.dg/opt/pr98550.C | 96 ++++++++++++++++++++++++++++++ gcc/tree-vect-slp.c | 40 ++++++++++--- 2 files changed, 128 insertions(+), 8 deletions(-) create mode 100644 gcc/testsuite/g++.dg/opt/pr98550.C diff --git a/gcc/testsuite/g++.dg/opt/pr98550.C b/gcc/testsuite/g++.dg/opt/pr98550.C new file mode 100644 index 00000000000..49102e6c1a1 --- /dev/null +++ b/gcc/testsuite/g++.dg/opt/pr98550.C @@ -0,0 +1,96 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target c++11 } */ +/* { dg-additional-options "-O3" } */ +/* { dg-additional-options "-march=z13" { target s390x-*-* } } */ + +template struct k { static constexpr int c = a; }; +template struct o; +template struct o { + typedef decltype(0) h; +}; +template struct p : o::c, k::c, g...> {}; +class q; +class r { +public: + void ap(q); +}; +template void ax(aw ay) { ay(); } +template void ba(az bb) { + using bc = p; + using bd = typename bc::h; + ax(bb); +} +template class s; +class t { +public: + s<8> br(); + template void operator()() { ba(br()); } +}; +class q { +public: + template q(az) { H(); } + struct H { + t cc; + H() { cc(); } + }; +}; +template struct I {}; +template void cm(j cn, I) { + cm(cn, I()); + cn(cl); +} +template void cm(j, I<0>) {} +template struct u { + long cp[co]; + void cq(const u &); + void cs(int); + void operator<(u); +}; +template void u::cq(const u &l) { + cm([&](int i) { cp[i] &= l.cp[i]; }, I()); +} +template void u::cs(int m) { + cm([&](int i) { cp[i] >>= m; }, I()); +} +template class K; +template class v { + int cv; + friend K; + +public: + void cx(int, unsigned char *, unsigned long long); +}; +template class K { +public: + static void cx(v &); +}; +template +void v::cx(int, unsigned char *, unsigned long long) { + K::cx(*this); +} +template void K::cx(v &cz) { + u a, b, d; + int e, n = cz.cv; + for (; e;) + if (cz.cv) + a.cs(cz.cv); + a.cq(d); + a < b; +} +template class s { + v *dh; + +public: + void operator()(); +}; +template void s::operator()() { + int f; + unsigned char g; + long h; + dh->cx(f, &g, h); +} +void d() { + r i; + t j; + i.ap(j); +} diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index 877d44b2257..65b7a27e1e8 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -873,11 +873,8 @@ vect_record_max_nunits (vec_info *vinfo, stmt_vec_info stmt_info, /* If populating the vector type requires unrolling then fail before adjusting *max_nunits for basic-block vectorization. */ - poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); - unsigned HOST_WIDE_INT const_nunits; if (is_a (vinfo) - && (!nunits.is_constant (&const_nunits) - || const_nunits > group_size)) + && !multiple_p (group_size, TYPE_VECTOR_SUBPARTS (vectype))) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -928,6 +925,8 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap, stmt_vec_info first_load = NULL, prev_first_load = NULL; bool first_stmt_load_p = false, load_p = false; bool first_stmt_phi_p = false, phi_p = false; + bool maybe_soft_fail = false; + tree soft_fail_nunits_vectype = NULL_TREE; /* For every stmt in NODE find its def stmt/s. */ stmt_vec_info stmt_info; @@ -977,10 +976,7 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap, tree nunits_vectype; if (!vect_get_vector_types_for_stmt (vinfo, stmt_info, &vectype, - &nunits_vectype, group_size) - || (nunits_vectype - && !vect_record_max_nunits (vinfo, stmt_info, group_size, - nunits_vectype, max_nunits))) + &nunits_vectype, group_size)) { if (is_a (vinfo) && i != 0) continue; @@ -988,6 +984,17 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap, matches[0] = false; return false; } + /* Record nunits required but continue analysis, producing matches[] + as if nunits was not an issue. This allows splitting of groups + to happen. */ + if (nunits_vectype + && !vect_record_max_nunits (vinfo, stmt_info, group_size, + nunits_vectype, max_nunits)) + { + gcc_assert (is_a (vinfo)); + maybe_soft_fail = true; + soft_fail_nunits_vectype = nunits_vectype; + } gcc_assert (vectype); @@ -1340,6 +1347,23 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap, *two_operators = true; } + if (maybe_soft_fail) + { + unsigned HOST_WIDE_INT const_nunits; + if (!TYPE_VECTOR_SUBPARTS + (soft_fail_nunits_vectype).is_constant (&const_nunits) + || const_nunits > group_size) + matches[0] = false; + else + { + /* With constant vector elements simulate a mismatch at the + point we need to split. */ + unsigned tail = group_size & (const_nunits - 1); + memset (&matches[group_size - tail], 0, sizeof (bool) * tail); + } + return false; + } + return true; } -- 2.30.2