From 4f8b89f092467f9550cb2aa873c2e30ac809c08a Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Tue, 21 May 2019 12:01:00 +0000 Subject: [PATCH] re PR tree-optimization/90510 (Unnecessary permutation) 2019-05-21 Richard Biener PR middle-end/90510 * fold-const.c (fold_read_from_vector): New function. * fold-const.h (fold_read_from_vector): Declare. * match.pd (VEC_PERM_EXPR): Build BIT_INSERT_EXPRs for single-element insert permutations. Canonicalize selector further and fix issue with last commit. * gcc.target/i386/pr90510.c: New testcase. From-SVN: r271463 --- gcc/ChangeLog | 9 ++++ gcc/fold-const.c | 22 ++++++++++ gcc/fold-const.h | 1 + gcc/match.pd | 56 ++++++++++++++++++++++--- gcc/testsuite/ChangeLog | 5 +++ gcc/testsuite/gcc.target/i386/pr90510.c | 22 ++++++++++ 6 files changed, 109 insertions(+), 6 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr90510.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index e9f7b1f4d38..cd55c4dffad 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,12 @@ +2019-05-21 Richard Biener + + PR middle-end/90510 + * fold-const.c (fold_read_from_vector): New function. + * fold-const.h (fold_read_from_vector): Declare. + * match.pd (VEC_PERM_EXPR): Build BIT_INSERT_EXPRs for + single-element insert permutations. Canonicalize selector + further and fix issue with last commit. + 2019-05-21 Vladislav Ivanishin * tree-cfg.h (split_critical_edges): Add for_edge_insertion_p diff --git a/gcc/fold-const.c b/gcc/fold-const.c index 34448524050..42289013cc1 100644 --- a/gcc/fold-const.c +++ b/gcc/fold-const.c @@ -13793,6 +13793,28 @@ fold_read_from_constant_string (tree exp) return NULL; } +/* Folds a read from vector element at IDX of vector ARG. */ + +tree +fold_read_from_vector (tree arg, poly_uint64 idx) +{ + unsigned HOST_WIDE_INT i; + if (known_lt (idx, TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg))) + && known_ge (idx, 0u) + && idx.is_constant (&i)) + { + if (TREE_CODE (arg) == VECTOR_CST) + return VECTOR_CST_ELT (arg, i); + else if (TREE_CODE (arg) == CONSTRUCTOR) + { + if (i >= CONSTRUCTOR_NELTS (arg)) + return build_zero_cst (TREE_TYPE (TREE_TYPE (arg))); + return CONSTRUCTOR_ELT (arg, i)->value; + } + } + return NULL_TREE; +} + /* Return the tree for neg (ARG0) when ARG0 is known to be either an integer constant, real, or fixed-point constant. diff --git a/gcc/fold-const.h b/gcc/fold-const.h index c4b7cbfbdba..2a69bf9163d 100644 --- a/gcc/fold-const.h +++ b/gcc/fold-const.h @@ -100,6 +100,7 @@ extern tree fold_bit_and_mask (tree, tree, enum tree_code, tree, enum tree_code, tree, tree, tree, enum tree_code, tree, tree, tree *); extern tree fold_read_from_constant_string (tree); +extern tree fold_read_from_vector (tree, poly_uint64); #if GCC_VEC_PERN_INDICES_H extern tree fold_vec_perm (tree, tree, tree, const vec_perm_indices &); #endif diff --git a/gcc/match.pd b/gcc/match.pd index 9ff52123cd9..f9bc097c491 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -5406,6 +5406,11 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) op0 = op1; sel.rotate_inputs (1); } + else if (known_ge (poly_uint64 (sel[0]), nelts)) + { + std::swap (op0, op1); + sel.rotate_inputs (1); + } } gassign *def; tree cop0 = op0, cop1 = op1; @@ -5429,9 +5434,46 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (with { bool changed = (op0 == op1 && !single_arg); + tree ins = NULL_TREE; + unsigned at = 0; + + /* See if the permutation is performing a single element + insert from a CONSTRUCTOR or constant and use a BIT_INSERT_EXPR + in that case. But only if the vector mode is supported, + otherwise this is invalid GIMPLE. */ + if (TYPE_MODE (type) != BLKmode + && (TREE_CODE (cop0) == VECTOR_CST + || TREE_CODE (cop0) == CONSTRUCTOR + || TREE_CODE (cop1) == VECTOR_CST + || TREE_CODE (cop1) == CONSTRUCTOR)) + { + if (sel.series_p (1, 1, nelts + 1, 1)) + { + /* After canonicalizing the first elt to come from the + first vector we only can insert the first elt from + the first vector. */ + at = 0; + ins = fold_read_from_vector (cop0, 0); + op0 = op1; + } + else + { + unsigned int encoded_nelts = sel.encoding ().encoded_nelts (); + for (at = 0; at < encoded_nelts; ++at) + if (maybe_ne (sel[at], at)) + break; + if (at < encoded_nelts && sel.series_p (at + 1, 1, at + 1, 1)) + { + if (known_lt (at, nelts)) + ins = fold_read_from_vector (cop0, sel[at]); + else + ins = fold_read_from_vector (cop1, sel[at] - nelts); + } + } + } /* Generate a canonical form of the selector. */ - if (sel.encoding () != builder) + if (!ins && sel.encoding () != builder) { /* Some targets are deficient and fail to expand a single argument permutation while still allowing an equivalent @@ -5450,10 +5492,12 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) so use the preferred form. */ op2 = vec_perm_indices_to_tree (TREE_TYPE (op2), sel); } - /* Differences in the encoder do not necessarily mean - differences in the resulting vector. */ - changed = !operand_equal_p (op2, oldop2, 0); + if (!operand_equal_p (op2, oldop2, 0)) + changed = true; } } - (if (changed) - (vec_perm { op0; } { op1; } { op2; }))))))))) + (if (ins) + (bit_insert { op0; } { ins; } + { bitsize_int (at * tree_to_uhwi (TYPE_SIZE (TREE_TYPE (type)))); }) + (if (changed) + (vec_perm { op0; } { op1; } { op2; })))))))))) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 66354eb1cc8..d68685ba9db 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2019-05-21 Richard Biener + + PR middle-end/90510 + * gcc.target/i386/pr90510.c: New testcase. + 2019-05-21 Martin Liska * gcc.target/i386/pr90500-1.c: Add missing '""'. diff --git a/gcc/testsuite/gcc.target/i386/pr90510.c b/gcc/testsuite/gcc.target/i386/pr90510.c new file mode 100644 index 00000000000..a3c11a45bf4 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr90510.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse2 -fdump-tree-optimized" } */ + +typedef double __v2df __attribute__ ((__vector_size__ (16))); +typedef long long __v2di __attribute__ ((__vector_size__ (16))); + +__v2df +_mm_add_sd_A (__v2df x, __v2df y) +{ + double tem = x[0] + y[0]; + return __builtin_shuffle ( x, (__v2df) { tem, tem }, (__v2di) { 2, 1 } ); +} + +__v2df +_mm_add_sd_B (__v2df x, __v2df y) +{ + __v2df z = { (x[0] + y[0]), x[1] }; + return z; +} + +/* { dg-final { scan-tree-dump-times "BIT_INSERT_EXPR" 2 "optimized" } } */ +/* { dg-final { scan-assembler-not "unpck" } } */ -- 2.30.2