re PR tree-optimization/92819 (Worse code generated on avx2 due to simplify_vector_co...
authorRichard Biener <rguenther@suse.de>
Fri, 6 Dec 2019 07:53:15 +0000 (07:53 +0000)
committerRichard Biener <rguenth@gcc.gnu.org>
Fri, 6 Dec 2019 07:53:15 +0000 (07:53 +0000)
2019-12-06  Richard Biener  <rguenther@suse.de>

PR tree-optimization/92819
* match.pd (VEC_PERM_EXPR -> BIT_INSERT_EXPR): Handle inserts
into the last lane.  For two-element vectors try inserting
into the last lane when inserting into the first fails.

* gcc.target/i386/pr92819-1.c: New testcase.
* gcc.target/i386/pr92803.c: Adjust.

From-SVN: r279033

gcc/ChangeLog
gcc/match.pd
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/i386/pr92803.c
gcc/testsuite/gcc.target/i386/pr92819-1.c [new file with mode: 0644]

index b12636a581b1f3e3060ab659f582194c804ff7f1..3e747a620c5ef6cf2302b7591831a61904106034 100644 (file)
@@ -1,3 +1,10 @@
+2019-12-06  Richard Biener  <rguenther@suse.de>
+
+       PR tree-optimization/92819
+       * match.pd (VEC_PERM_EXPR -> BIT_INSERT_EXPR): Handle inserts
+       into the last lane.  For two-element vectors try inserting
+       into the last lane when inserting into the first fails.
+
 2019-12-06  Jakub Jelinek  <jakub@redhat.com>
 
        * common.opt (fprofile-partial-training): Terminate description with
index 68027f6757d8a4f71bd5e69a98fbb141ecc66a1b..e32d800964748db1cbb2bb3324d1ccbc90e0a5db 100644 (file)
@@ -6032,7 +6032,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
                || TREE_CODE (cop1) == VECTOR_CST
                || TREE_CODE (cop1) == CONSTRUCTOR))
           {
-           if (sel.series_p (1, 1, nelts + 1, 1))
+           bool insert_first_p = sel.series_p (1, 1, nelts + 1, 1);
+           if (insert_first_p)
              {
                /* After canonicalizing the first elt to come from the
                   first vector we only can insert the first elt from
@@ -6041,13 +6042,19 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
                if ((ins = fold_read_from_vector (cop0, sel[0])))
                  op0 = op1;
              }
-           else
+           /* The above can fail for two-element vectors which always
+              appear to insert the first element, so try inserting
+              into the second lane as well.  For more than two
+              elements that's wasted time.  */
+           if (!insert_first_p || (!ins && maybe_eq (nelts, 2u)))
              {
                unsigned int encoded_nelts = sel.encoding ().encoded_nelts ();
                for (at = 0; at < encoded_nelts; ++at)
                  if (maybe_ne (sel[at], at))
                    break;
-               if (at < encoded_nelts && sel.series_p (at + 1, 1, at + 1, 1))
+               if (at < encoded_nelts
+                   && (known_eq (at + 1, nelts)
+                       || sel.series_p (at + 1, 1, at + 1, 1)))
                  {
                    if (known_lt (poly_uint64 (sel[at]), nelts))
                      ins = fold_read_from_vector (cop0, sel[at]);
index 753aa398ac93f65648ca5113cca60a1e3956a500..bcc65f27336290e18b2c5b4dec44d30626ec0be0 100644 (file)
@@ -1,3 +1,9 @@
+2019-12-06  Richard Biener  <rguenther@suse.de>
+
+       PR tree-optimization/92819
+       * gcc.target/i386/pr92819-1.c: New testcase.
+       * gcc.target/i386/pr92803.c: Adjust.
+
 2019-12-05  Martin Sebor  <msebor@redhat.com>
 
        PR testsuite/92829
index fc8d64efb83e34ac8e2c15c1ad5536b6c0768aeb..d533bae0c26816591a80fb05f1b2e7ccbf02d58a 100644 (file)
@@ -31,8 +31,10 @@ barf (v8sf x)
   return (v4sf) { x[4], x[5], 1.0f, 2.0f };
 }
 
-/* We expect all CTORs to turn into permutes, the FP converting ones
+/* For bar we do two inserts, first zero, then convert, then insert *p.  } */
+/* { dg-final { scan-tree-dump-times "BIT_INSERT_EXPR" 2 "forwprop1" } } */
+/* We expect all other CTORs to turn into permutes, the FP converting ones
    to two each with the one with constants possibly elided in the future
    by converting 3.0f and 1.0f "back" to integers.  */
-/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 6 "forwprop1" } } */
-/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 5 "forwprop1" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 4 "forwprop1" } } */
+/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 3 "forwprop1" { xfail *-*-* } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr92819-1.c b/gcc/testsuite/gcc.target/i386/pr92819-1.c
new file mode 100644 (file)
index 0000000..0ec0ca5
--- /dev/null
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O -msse2 -fdump-tree-forwprop1" } */
+
+typedef double v2df __attribute__((vector_size (16)));
+
+v2df
+foo (v2df x, double *p)
+{
+  return (v2df) { x[0], *p };
+}
+
+v2df
+bar (v2df x, double *p)
+{
+  return (v2df) { *p, x[1] };
+}
+
+/* { dg-final { scan-tree-dump-times "BIT_INSERT_EXPR" 2 "forwprop1" } } */
+/* { dg-final { scan-assembler "movhpd" } } */
+/* { dg-final { scan-assembler "movlpd" } } */