From 2ef278569f60a2c1556f1752aeba39c586521371 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Fri, 6 Dec 2019 07:53:15 +0000 Subject: [PATCH] re PR tree-optimization/92819 (Worse code generated on avx2 due to simplify_vector_constructor) 2019-12-06 Richard Biener PR tree-optimization/92819 * match.pd (VEC_PERM_EXPR -> BIT_INSERT_EXPR): Handle inserts into the last lane. For two-element vectors try inserting into the last lane when inserting into the first fails. * gcc.target/i386/pr92819-1.c: New testcase. * gcc.target/i386/pr92803.c: Adjust. From-SVN: r279033 --- gcc/ChangeLog | 7 +++++++ gcc/match.pd | 13 ++++++++++--- gcc/testsuite/ChangeLog | 6 ++++++ gcc/testsuite/gcc.target/i386/pr92803.c | 8 +++++--- gcc/testsuite/gcc.target/i386/pr92819-1.c | 20 ++++++++++++++++++++ 5 files changed, 48 insertions(+), 6 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr92819-1.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index b12636a581b..3e747a620c5 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,10 @@ +2019-12-06 Richard Biener + + PR tree-optimization/92819 + * match.pd (VEC_PERM_EXPR -> BIT_INSERT_EXPR): Handle inserts + into the last lane. For two-element vectors try inserting + into the last lane when inserting into the first fails. + 2019-12-06 Jakub Jelinek * common.opt (fprofile-partial-training): Terminate description with diff --git a/gcc/match.pd b/gcc/match.pd index 68027f6757d..e32d8009647 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -6032,7 +6032,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) || TREE_CODE (cop1) == VECTOR_CST || TREE_CODE (cop1) == CONSTRUCTOR)) { - if (sel.series_p (1, 1, nelts + 1, 1)) + bool insert_first_p = sel.series_p (1, 1, nelts + 1, 1); + if (insert_first_p) { /* After canonicalizing the first elt to come from the first vector we only can insert the first elt from @@ -6041,13 +6042,19 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) if ((ins = fold_read_from_vector (cop0, sel[0]))) op0 = op1; } - else + /* The above can fail for two-element vectors which always + appear to insert the first element, so try inserting + into the second lane as well. For more than two + elements that's wasted time. */ + if (!insert_first_p || (!ins && maybe_eq (nelts, 2u))) { unsigned int encoded_nelts = sel.encoding ().encoded_nelts (); for (at = 0; at < encoded_nelts; ++at) if (maybe_ne (sel[at], at)) break; - if (at < encoded_nelts && sel.series_p (at + 1, 1, at + 1, 1)) + if (at < encoded_nelts + && (known_eq (at + 1, nelts) + || sel.series_p (at + 1, 1, at + 1, 1))) { if (known_lt (poly_uint64 (sel[at]), nelts)) ins = fold_read_from_vector (cop0, sel[at]); diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 753aa398ac9..bcc65f27336 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2019-12-06 Richard Biener + + PR tree-optimization/92819 + * gcc.target/i386/pr92819-1.c: New testcase. + * gcc.target/i386/pr92803.c: Adjust. + 2019-12-05 Martin Sebor PR testsuite/92829 diff --git a/gcc/testsuite/gcc.target/i386/pr92803.c b/gcc/testsuite/gcc.target/i386/pr92803.c index fc8d64efb83..d533bae0c26 100644 --- a/gcc/testsuite/gcc.target/i386/pr92803.c +++ b/gcc/testsuite/gcc.target/i386/pr92803.c @@ -31,8 +31,10 @@ barf (v8sf x) return (v4sf) { x[4], x[5], 1.0f, 2.0f }; } -/* We expect all CTORs to turn into permutes, the FP converting ones +/* For bar we do two inserts, first zero, then convert, then insert *p. } */ +/* { dg-final { scan-tree-dump-times "BIT_INSERT_EXPR" 2 "forwprop1" } } */ +/* We expect all other CTORs to turn into permutes, the FP converting ones to two each with the one with constants possibly elided in the future by converting 3.0f and 1.0f "back" to integers. */ -/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 6 "forwprop1" } } */ -/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 5 "forwprop1" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 4 "forwprop1" } } */ +/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 3 "forwprop1" { xfail *-*-* } } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr92819-1.c b/gcc/testsuite/gcc.target/i386/pr92819-1.c new file mode 100644 index 00000000000..0ec0ca52f21 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr92819-1.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-O -msse2 -fdump-tree-forwprop1" } */ + +typedef double v2df __attribute__((vector_size (16))); + +v2df +foo (v2df x, double *p) +{ + return (v2df) { x[0], *p }; +} + +v2df +bar (v2df x, double *p) +{ + return (v2df) { *p, x[1] }; +} + +/* { dg-final { scan-tree-dump-times "BIT_INSERT_EXPR" 2 "forwprop1" } } */ +/* { dg-final { scan-assembler "movhpd" } } */ +/* { dg-final { scan-assembler "movlpd" } } */ -- 2.30.2