From c78907d514d65483c7ddfb4cb1f5c57f23da73d9 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Wed, 24 Jun 2020 15:49:00 +0200 Subject: [PATCH] tree-optimization/95866 - avoid vectorizing uniform SLP subgraphs This avoids vectorizing SLP subgraphs that just compute uniform operations on all-same operands. That fixes the less interesting (but most embarrasing) part of the testcase in the PR. On the way it also fixed a missing matches[0] reset in the last refactoring touching that place. 2020-06-24 Richard Biener PR tree-optimization/95866 * tree-vect-slp.c (vect_slp_tree_uniform_p): New. (vect_build_slp_tree_2): Properly reset matches[0], ignore uniform constants. * gcc.target/i386/pr95866-1.c: New testcase. --- gcc/testsuite/gcc.target/i386/pr95866-1.c | 18 ++++++++++++++++ gcc/tree-vect-slp.c | 26 +++++++++++++++++++++-- 2 files changed, 42 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr95866-1.c diff --git a/gcc/testsuite/gcc.target/i386/pr95866-1.c b/gcc/testsuite/gcc.target/i386/pr95866-1.c new file mode 100644 index 00000000000..991370cf669 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr95866-1.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fdump-tree-slp2-details -msse2" } */ + +int x[4]; +void foo(int i) +{ + int j = (i+1) & 31; + x[0] = (x[0] << j) + j; + x[1] = (x[1] << j) + j; + x[2] = (x[2] << j) + j; + x[3] = (x[3] << j) + j; +} + +/* We should not use vector operations for i + 1 and (i + 1) & 31 but + instead use { j, j, j, j }. */ +/* { dg-final { scan-tree-dump-times "Building parent vector operands from scalars" 2 "slp2" } } */ +/* { dg-final { scan-tree-dump-not " = \{i_" "slp2" } } */ +/* { dg-final { scan-tree-dump-times " = \{j_" 1 "slp2" } } */ diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index e7a260877a9..b2792c76ad2 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -238,6 +238,26 @@ vect_contains_pattern_stmt_p (vec stmts) return false; } +/* Return true when all lanes in the external or constant NODE have + the same value. */ + +static bool +vect_slp_tree_uniform_p (slp_tree node) +{ + gcc_assert (SLP_TREE_DEF_TYPE (node) == vect_constant_def + || SLP_TREE_DEF_TYPE (node) == vect_external_def); + + unsigned i; + tree op, first = NULL_TREE; + FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op) + if (!first) + first = op; + else if (!operand_equal_p (first, op, 0)) + return false; + + return true; +} + /* Find the place of the data-ref in STMT_INFO in the interleaving chain that starts from FIRST_STMT_INFO. Return -1 if the data-ref is not a part of the chain. */ @@ -1439,7 +1459,7 @@ fail: vect_free_oprnd_info (oprnds_info); /* If we have all children of a non-unary child built up from - scalars then just throw that away, causing it built up + uniform scalars then just throw that away, causing it built up from scalars. */ if (nops > 1 && is_a (vinfo) @@ -1451,11 +1471,13 @@ fail: slp_tree child; unsigned j; FOR_EACH_VEC_ELT (children, j, child) - if (SLP_TREE_DEF_TYPE (child) != vect_external_def) + if (SLP_TREE_DEF_TYPE (child) == vect_internal_def + || !vect_slp_tree_uniform_p (child)) break; if (!child) { /* Roll back. */ + matches[0] = false; FOR_EACH_VEC_ELT (children, j, child) vect_free_slp_tree (child, false); -- 2.30.2