tree-optimization/92645 - avoid harmful early BIT_FIELD_REF canonicalization
authorRichard Biener <rguenther@suse.de>
Wed, 13 Jan 2021 12:48:31 +0000 (13:48 +0100)
committerRichard Biener <rguenther@suse.de>
Wed, 13 Jan 2021 13:51:08 +0000 (14:51 +0100)
This avoids canonicalizing BIT_FIELD_REF <T1> (a, <sz>, 0) to
(T1)a on integer typed a.  This confuses the vectorizer SLP matching.

With this delayed to after vector lowering the testcase in PR92645
from Skia is now finally optimized to reasonable assembly.

2021-01-13  Richard Biener  <rguenther@suse.de>

PR tree-optimization/92645
* match.pd (BIT_FIELD_REF to conversion): Delay canonicalization
until after vector lowering.

* gcc.target/i386/pr92645-7.c: New testcase.
* gcc.dg/tree-ssa/ssa-fre-54.c: Adjust.
* gcc.dg/pr69047.c: Likewise.

gcc/match.pd
gcc/testsuite/gcc.dg/pr69047.c
gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-54.c
gcc/testsuite/gcc.target/i386/pr92645-7.c [new file with mode: 0644]

index c286a540c4ea9b0099e164009bf1c0a353a1f6ec..60c383da13bd748ee89d5073ba74d389c9017c53 100644 (file)
@@ -6075,6 +6075,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
           /* Low-parts can be reduced to integral conversions.
              ???  The following doesn't work for PDP endian.  */
           || (BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN
+              /* But only do this after vectorization.  */
+              && canonicalize_math_after_vectorization_p ()
               /* Don't even think about BITS_BIG_ENDIAN.  */
               && TYPE_PRECISION (TREE_TYPE (@0)) % BITS_PER_UNIT == 0
               && TYPE_PRECISION (type) % BITS_PER_UNIT == 0
index 63d9fd90e83127b9a52b8cb238dd84ac64011253..d562663d86a3908aee6367d5bcb624bbbf20b993 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O -fdump-tree-cddce1" } */
+/* { dg-options "-O -fdump-tree-forwprop4" } */
 
 __UINT8_TYPE__
 f(__UINT16_TYPE__ b)
@@ -15,4 +15,4 @@ f(__UINT16_TYPE__ b)
   return a;
 }
 
-/* { dg-final { scan-tree-dump "_\[0-9\]+ = \\(\[^)\]+\\) b" "cddce1" } } */
+/* { dg-final { scan-tree-dump "_\[0-9\]+ = \\(\[^)\]+\\) b" "forwprop4" } } */
index be7537e80c1c2420a86ba8e395a18326b0fb708a..02ebf068a619033119cb9a84842afb98602b9c3b 100644 (file)
@@ -1,6 +1,6 @@
 /* { dg-do run } */
 /* { dg-require-effective-target int32plus } */
-/* { dg-options "-O -fdump-tree-fre1 -fdump-tree-dse1" } */
+/* { dg-options "-O -fdump-tree-forwprop4 -fdump-tree-dse1" } */
 
 extern void abort (void);
 
@@ -51,6 +51,6 @@ int main()
   return 0;
 }
 
-/* { dg-final { scan-tree-dump "\\(char\\) i_" "fre1" } } */
-/* { dg-final { scan-tree-dump "\\(short int\\) i_" "fre1" } } */
+/* { dg-final { scan-tree-dump "\\(char\\) i_" "forwprop4" } } */
+/* { dg-final { scan-tree-dump "\\(short int\\) i_" "forwprop4" } } */
 /* { dg-final { scan-tree-dump-not "u.i =" "dse1" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr92645-7.c b/gcc/testsuite/gcc.target/i386/pr92645-7.c
new file mode 100644 (file)
index 0000000..e4c04c2
--- /dev/null
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O3 -msse2" } */
+
+typedef long v2di __attribute__((vector_size(16)));
+typedef int v4si __attribute__((vector_size(16)));
+
+void bar (v4si *p, __int128_t *q)
+{
+  union { __int128_t a; v4si b; } u;
+  u.a = *q;
+  (*p)[0] = u.b[0];
+  (*p)[1] = u.b[2];
+  (*p)[2] = u.b[1];
+  (*p)[3] = u.b[3];
+}
+
+/* The function should end up with sth like
+     [v]pshufd $216, (%esi), %xmm0
+     [v]movdqa %xmm0, (%edi)
+     ret
+   recognized by SLP vectorization involving an existing "vector".  */
+/* { dg-final { scan-assembler-not "punpck" } } */
+/* { dg-final { scan-assembler-times "pshufd" 1 } } */