re PR tree-optimization/88828 (Inefficient update of the first element of vector...
authorRichard Biener <rguenther@suse.de>
Tue, 14 May 2019 09:11:15 +0000 (09:11 +0000)
committerRichard Biener <rguenth@gcc.gnu.org>
Tue, 14 May 2019 09:11:15 +0000 (09:11 +0000)
2019-05-14  Richard Biener  <rguenther@suse.de>
H.J. Lu  <hongjiu.lu@intel.com>

PR tree-optimization/88828
* tree-ssa-forwprop.c (simplify_vector_constructor): Handle
permuting in a single non-constant element not extracted
from a vector.

* gcc.target/i386/pr88828-1.c: New test.
* gcc.target/i386/pr88828-1a.c: Likewise.
* gcc.target/i386/pr88828-1b.c: Likewise.
* gcc.target/i386/pr88828-1c.c: Likewise.
* gcc.target/i386/pr88828-4a.c: Likewise.
* gcc.target/i386/pr88828-4b.c: Likewise.
* gcc.target/i386/pr88828-5a.c: Likewise.
* gcc.target/i386/pr88828-5b.c: Likewise.
* gcc.target/i386/pr88828-7.c: Likewise.
* gcc.target/i386/pr88828-7a.c: Likewise.
* gcc.target/i386/pr88828-7b.c: Likewise.
* gcc.target/i386/pr88828-8.c: Likewise.
* gcc.target/i386/pr88828-8a.c: Likewise.
* gcc.target/i386/pr88828-8b.c: Likewise.
* gcc.target/i386/pr88828-9.c: Likewise.
* gcc.target/i386/pr88828-9a.c: Likewise.
* gcc.target/i386/pr88828-9b.c: Likewise.

Co-Authored-By: H.J. Lu <hongjiu.lu@intel.com>
From-SVN: r271153

20 files changed:
gcc/ChangeLog
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/i386/pr88828-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pr88828-1a.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pr88828-1b.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pr88828-1c.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pr88828-4a.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pr88828-4b.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pr88828-5a.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pr88828-5b.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pr88828-7.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pr88828-7a.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pr88828-7b.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pr88828-8.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pr88828-8a.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pr88828-8b.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pr88828-9.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pr88828-9a.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pr88828-9b.c [new file with mode: 0644]
gcc/tree-ssa-forwprop.c

index a4568fe165128a51036da9106040518b21f6e46b..1af0304076aa410813a50da4036c5532c05d3a12 100644 (file)
@@ -1,3 +1,11 @@
+2019-05-14  Richard Biener  <rguenther@suse.de>
+       H.J. Lu  <hongjiu.lu@intel.com>
+
+       PR tree-optimization/88828
+       * tree-ssa-forwprop.c (simplify_vector_constructor): Handle
+       permuting in a single non-constant element not extracted
+       from a vector.
+
 2019-05-14  Przemyslaw Wirkus  <przemyslaw.wirkus@arm.com\>
 
        * internal-fn.def (SIGNBIT): New.
index b140a5787fb7cfdda3e5f6029d60062ea8be2571..eebebfa9697647d0f8625afb826328974537b0c0 100644 (file)
@@ -1,3 +1,25 @@
+2019-05-14  Richard Biener  <rguenther@suse.de>
+       H.J. Lu  <hongjiu.lu@intel.com>
+
+       PR tree-optimization/88828
+       * gcc.target/i386/pr88828-1.c: New test.
+       * gcc.target/i386/pr88828-1a.c: Likewise.
+       * gcc.target/i386/pr88828-1b.c: Likewise.
+       * gcc.target/i386/pr88828-1c.c: Likewise.
+       * gcc.target/i386/pr88828-4a.c: Likewise.
+       * gcc.target/i386/pr88828-4b.c: Likewise.
+       * gcc.target/i386/pr88828-5a.c: Likewise.
+       * gcc.target/i386/pr88828-5b.c: Likewise.
+       * gcc.target/i386/pr88828-7.c: Likewise.
+       * gcc.target/i386/pr88828-7a.c: Likewise.
+       * gcc.target/i386/pr88828-7b.c: Likewise.
+       * gcc.target/i386/pr88828-8.c: Likewise.
+       * gcc.target/i386/pr88828-8a.c: Likewise.
+       * gcc.target/i386/pr88828-8b.c: Likewise.
+       * gcc.target/i386/pr88828-9.c: Likewise.
+       * gcc.target/i386/pr88828-9a.c: Likewise.
+       * gcc.target/i386/pr88828-9b.c: Likewise.
+
 2019-05-14  Przemyslaw Wirkus  <przemyslaw.wirkus@arm.com\>
 
        * gcc.target/aarch64/signbitv4sf.c: New test.
diff --git a/gcc/testsuite/gcc.target/i386/pr88828-1.c b/gcc/testsuite/gcc.target/i386/pr88828-1.c
new file mode 100644 (file)
index 0000000..a15d1fe
--- /dev/null
@@ -0,0 +1,49 @@
+/* { dg-do run { target sse2_runtime } } */
+/* { dg-options "-O2 -msse2" } */
+
+#include "pr88828-1a.c"
+#include "pr88828-1b.c"
+#include "pr88828-1c.c"
+
+extern void abort ();
+
+void
+do_check (__v4sf y, float f[4], float z)
+{
+  int i;
+
+  for (i = 0; i < 4; i++)
+    if (i == 0)
+      {
+       if (y[i] != z)
+         abort ();
+      }
+    else
+      {
+       if (y[i] != f[i])
+         abort ();
+      }
+}
+
+int
+main (void)
+{
+  float f[4] = { -11, 2, 55553, -4 };
+  float z = 134567;
+  __v4sf x = { f[0], f[1], f[2], f[3] };
+  __v4sf y;
+  int i;
+
+  for (i = 0; i < 4; i++)
+    if (x[i] != f[i])
+      abort ();
+
+  y = foo1 (x, z);
+  do_check (y, f, z);
+  y = foo2 (x, z);
+  do_check (y, f, z);
+  y = foo3 (x, z);
+  do_check (y, f, z);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr88828-1a.c b/gcc/testsuite/gcc.target/i386/pr88828-1a.c
new file mode 100644 (file)
index 0000000..d37b24c
--- /dev/null
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse -mno-sse4" } */
+/* { dg-final { scan-assembler "movss" } } */
+/* { dg-final { scan-assembler-not "movaps" } } */
+/* { dg-final { scan-assembler-not "movlhps" } } */
+/* { dg-final { scan-assembler-not "unpcklps" } } */
+/* { dg-final { scan-assembler-not "shufps" } } */
+
+typedef float __v4sf __attribute__ ((__vector_size__ (16)));
+
+__attribute__((noinline, noclone))
+__v4sf
+foo1 (__v4sf x, float f)
+{
+  __v4sf y = { f, x[1], x[2], x[3] };
+  return y;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr88828-1b.c b/gcc/testsuite/gcc.target/i386/pr88828-1b.c
new file mode 100644 (file)
index 0000000..af4aced
--- /dev/null
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse -mno-sse4" } */
+/* { dg-final { scan-assembler "movss" } } */
+/* { dg-final { scan-assembler-not "movaps" } } */
+/* { dg-final { scan-assembler-not "movlhps" } } */
+/* { dg-final { scan-assembler-not "unpcklps" } } */
+/* { dg-final { scan-assembler-not "shufps" } } */
+
+typedef float __v4sf __attribute__ ((__vector_size__ (16)));
+
+static __v4sf
+vector_init (float f0,float f1, float f2,float f3)
+{
+  __v4sf y = { f0, f1, f2, f3 };
+   return y;
+}
+
+__attribute__((noinline, noclone))
+__v4sf
+foo2 (__v4sf x, float f)
+{
+  return vector_init (f, x[1], x[2], x[3]) ;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr88828-1c.c b/gcc/testsuite/gcc.target/i386/pr88828-1c.c
new file mode 100644 (file)
index 0000000..a117f3e
--- /dev/null
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse -mno-sse4" } */
+/* { dg-final { scan-assembler "movss" } } */
+/* { dg-final { scan-assembler-not "movaps" } } */
+/* { dg-final { scan-assembler-not "movlhps" } } */
+/* { dg-final { scan-assembler-not "unpcklps" } } */
+/* { dg-final { scan-assembler-not "shufps" } } */
+
+typedef float __v4sf __attribute__ ((__vector_size__ (16)));
+
+__attribute__((noinline, noclone))
+__v4sf
+foo3 (__v4sf x, float f)
+{
+  __v4sf y = x;
+  y[0] = f;
+  return y;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr88828-4a.c b/gcc/testsuite/gcc.target/i386/pr88828-4a.c
new file mode 100644 (file)
index 0000000..64043b9
--- /dev/null
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse -mno-sse4" } */
+/* { dg-final { scan-assembler "movss" } } */
+/* { dg-final { scan-assembler-times "shufps" 1 } } */
+/* { dg-final { scan-assembler-not "movaps" } } */
+/* { dg-final { scan-assembler-not "movlhps" } } */
+/* { dg-final { scan-assembler-not "unpcklps" } } */
+
+typedef float __v4sf __attribute__ ((__vector_size__ (16)));
+
+__attribute__((noinline, noclone))
+__v4sf
+foo (__v4sf x, float f)
+{
+  __v4sf y = { x[0], x[2], x[3], x[1] };
+  y[0] = f;
+  return y;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr88828-4b.c b/gcc/testsuite/gcc.target/i386/pr88828-4b.c
new file mode 100644 (file)
index 0000000..ad8d2b9
--- /dev/null
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx" } */
+/* { dg-final { scan-assembler-times "vpermilps" 1 } } */
+/* { dg-final { scan-assembler-times "vmovss" 1 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpinsrd" 1 { target ia32 } } } */
+/* { dg-final { scan-assembler-not "vmovss" { target ia32 } } } */
+/* { dg-final { scan-assembler-not "vshufps" } } */
+/* { dg-final { scan-assembler-not "vmovaps" } } */
+/* { dg-final { scan-assembler-not "vmovlhps" } } */
+/* { dg-final { scan-assembler-not "vunpcklps" } } */
+
+typedef float __v4sf __attribute__ ((__vector_size__ (16)));
+
+__attribute__((noinline, noclone))
+__v4sf
+foo (__v4sf x, float f)
+{
+  __v4sf y = { x[0], x[2], x[3], x[1] };
+  y[0] = f;
+  return y;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr88828-5a.c b/gcc/testsuite/gcc.target/i386/pr88828-5a.c
new file mode 100644 (file)
index 0000000..5e908fa
--- /dev/null
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse -mno-sse4" } */
+/* { dg-final { scan-assembler "movss" } } */
+/* { dg-final { scan-assembler-times "shufps" 2 } } */
+/* { dg-final { scan-assembler-times "movaps" 1 } } */
+/* { dg-final { scan-assembler-not "movlhps" } } */
+/* { dg-final { scan-assembler-not "unpcklps" } } */
+
+typedef float __v4sf __attribute__ ((__vector_size__ (16)));
+
+__attribute__((noinline, noclone))
+__v4sf
+foo (__v4sf x, float f)
+{
+  __v4sf y = { x[0], x[2], x[3], x[0] };
+  y[3] = f;
+  return y;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr88828-5b.c b/gcc/testsuite/gcc.target/i386/pr88828-5b.c
new file mode 100644 (file)
index 0000000..988a488
--- /dev/null
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx" } */
+/* { dg-final { scan-assembler-times "vpermilps" 1 } } */
+/* { dg-final { scan-assembler-times "vinsertps" 1 } } */
+/* { dg-final { scan-assembler-not "vshufps" } } */
+/* { dg-final { scan-assembler-not "vmovss" } } */
+/* { dg-final { scan-assembler-not "vmovaps" } } */
+/* { dg-final { scan-assembler-not "vmovlhps" } } */
+/* { dg-final { scan-assembler-not "vunpcklps" } } */
+
+typedef float __v4sf __attribute__ ((__vector_size__ (16)));
+
+__attribute__((noinline, noclone))
+__v4sf
+foo (__v4sf x, float f)
+{
+  __v4sf y = { x[0], x[2], x[3], x[0] };
+  y[3] = f;
+  return y;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr88828-7.c b/gcc/testsuite/gcc.target/i386/pr88828-7.c
new file mode 100644 (file)
index 0000000..4302c26
--- /dev/null
@@ -0,0 +1,53 @@
+/* { dg-do run { target sse2_runtime } } */
+/* { dg-options "-O2 -msse2 -fexcess-precision=standard" } */
+
+#include "pr88828-7a.c"
+#include "pr88828-7b.c"
+
+extern void abort ();
+
+float
+bar (float x, float y)
+{
+  return x / y - y * x;
+}
+
+void
+do_check (__v4sf x, float f1[4], float f2[4])
+{
+  int i;
+
+  for (i = 0; i < 4; i++)
+    if (i == 0)
+      {
+       if (x[i] != bar (f1[i], f2[i]))
+         abort ();
+      }
+    else
+      {
+       if (x[i] != f1[i])
+         abort ();
+      }
+}
+
+int
+main (void)
+{
+  float f1[4] = { -11, 2, 55553, -4 };
+  float f2[4] = { 111, 3.3, -55.553, 4.8 };
+  __v4sf x = { f1[0], f1[1], f1[2], f1[3] };
+  __v4sf y = { f2[0], f2[1], f2[2], f2[3] };
+  __v4sf z;
+  int i;
+
+  for (i = 0; i < 4; i++)
+    if (x[i] != f1[i] || y[i] != f2[i] )
+      abort ();
+
+  z = foo1 (x, y);
+  do_check (z, f1, f2);
+  x = foo2 (x, y);
+  do_check (z, f1, f2);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr88828-7a.c b/gcc/testsuite/gcc.target/i386/pr88828-7a.c
new file mode 100644 (file)
index 0000000..f1ae574
--- /dev/null
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse -mno-sse4" } */
+/* { dg-final { scan-assembler-not "movlhps" } } */
+/* { dg-final { scan-assembler-not "unpckhps" } } */
+/* { dg-final { scan-assembler-not "unpcklps" } } */
+/* { dg-final { scan-assembler-not "shufps" } } */
+
+typedef float __v4sf __attribute__ ((__vector_size__ (16)));
+extern float bar (float, float);
+
+__v4sf
+foo1 (__v4sf x, __v4sf y)
+{
+  __v4sf z = { bar (x[0], y[0]), x[1], x[2], x[3] };
+  return z;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr88828-7b.c b/gcc/testsuite/gcc.target/i386/pr88828-7b.c
new file mode 100644 (file)
index 0000000..c027c56
--- /dev/null
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse -mno-sse4" } */
+/* { dg-final { scan-assembler-not "movlhps" } } */
+/* { dg-final { scan-assembler-not "unpckhps" } } */
+/* { dg-final { scan-assembler-not "unpcklps" } } */
+/* { dg-final { scan-assembler-not "shufps" } } */
+
+typedef float __v4sf __attribute__ ((__vector_size__ (16)));
+extern float bar (float, float);
+
+static __v4sf
+vector_init (float f0,float f1, float f2,float f3)
+{
+  __v4sf y = { f0, f1, f2, f3 };
+   return y;
+}
+
+__v4sf
+foo2 (__v4sf x, __v4sf y)
+{
+  return vector_init (bar (x[0], y[0]), x[1], x[2], x[3]) ;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr88828-8.c b/gcc/testsuite/gcc.target/i386/pr88828-8.c
new file mode 100644 (file)
index 0000000..3b8eabd
--- /dev/null
@@ -0,0 +1,46 @@
+/* { dg-do run { target sse2_runtime } } */
+/* { dg-options "-O2 -msse2" } */
+
+#include "pr88828-8a.c"
+#include "pr88828-8b.c"
+
+extern void abort ();
+
+void
+do_check (__v4sf y, float f[4], float z)
+{
+  int i;
+
+  for (i = 0; i < 4; i++)
+    if (i == 0)
+      {
+       if (y[i] != z)
+         abort ();
+      }
+    else
+      {
+       if (y[i] != f[i])
+         abort ();
+      }
+}
+
+int
+main (void)
+{
+  float f[4] = { -11, 2, 55553, -4 };
+  float z = 11.4;
+  __v4sf x = { f[0], f[1], f[2], f[3] };
+  __v4sf y;
+  int i;
+
+  for (i = 0; i < 4; i++)
+    if (x[i] != f[i])
+      abort ();
+
+  y = foo1 (x);
+  do_check (y, f, z);
+  y = foo2 (x);
+  do_check (y, f, z);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr88828-8a.c b/gcc/testsuite/gcc.target/i386/pr88828-8a.c
new file mode 100644 (file)
index 0000000..5d383df
--- /dev/null
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse -mno-sse4" } */
+/* { dg-final { scan-assembler-not "movlhps" } } */
+/* { dg-final { scan-assembler-not "unpckhps" } } */
+/* { dg-final { scan-assembler-not "unpcklps" } } */
+/* { dg-final { scan-assembler-not "shufps" } } */
+
+typedef float __v4sf __attribute__ ((__vector_size__ (16)));
+
+__v4sf
+foo1 (__v4sf x)
+{
+  __v4sf z = { 11.4, x[1], x[2], x[3] };
+  return z;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr88828-8b.c b/gcc/testsuite/gcc.target/i386/pr88828-8b.c
new file mode 100644 (file)
index 0000000..5ffbc9c
--- /dev/null
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse -mno-sse4" } */
+/* { dg-final { scan-assembler-not "movlhps" } } */
+/* { dg-final { scan-assembler-not "unpckhps" } } */
+/* { dg-final { scan-assembler-not "unpcklps" } } */
+/* { dg-final { scan-assembler-not "shufps" } } */
+
+typedef float __v4sf __attribute__ ((__vector_size__ (16)));
+
+static __v4sf
+vector_init (float f0,float f1, float f2,float f3)
+{
+  __v4sf y = { f0, f1, f2, f3 };
+   return y;
+}
+
+__v4sf
+foo2 (__v4sf x)
+{
+  return vector_init (11.4, x[1], x[2], x[3]) ;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr88828-9.c b/gcc/testsuite/gcc.target/i386/pr88828-9.c
new file mode 100644 (file)
index 0000000..c33907b
--- /dev/null
@@ -0,0 +1,46 @@
+/* { dg-do run { target sse2_runtime } } */
+/* { dg-options "-O2 -msse2" } */
+
+#include "pr88828-9a.c"
+#include "pr88828-9b.c"
+
+extern void abort ();
+
+void
+do_check (__v4sf y, float f[4], float z)
+{
+  int i;
+
+  for (i = 0; i < 4; i++)
+    if (i == 0)
+      {
+       if (y[i] != z)
+         abort ();
+      }
+    else
+      {
+       if (y[i] != f[i])
+         abort ();
+      }
+}
+
+int
+main (void)
+{
+  float f[4] = { -11, 2, 55553, -4 };
+  float z = 11.4;
+  __m128 x = (__m128) (__v4sf) { f[0], f[1], f[2], f[3] };
+  __m128 y;
+  int i;
+
+  for (i = 0; i < 4; i++)
+    if (x[i] != f[i])
+      abort ();
+
+  y = foo1 (x);
+  do_check (y, f, z);
+  y = foo2 (x);
+  do_check (y, f, z);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr88828-9a.c b/gcc/testsuite/gcc.target/i386/pr88828-9a.c
new file mode 100644 (file)
index 0000000..7f83065
--- /dev/null
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse -mno-sse4" } */
+/* { dg-final { scan-assembler-not "movlhps" } } */
+/* { dg-final { scan-assembler-not "unpckhps" } } */
+/* { dg-final { scan-assembler-not "unpcklps" } } */
+/* { dg-final { scan-assembler-not "shufps" } } */
+
+typedef float __v4sf __attribute__ ((__vector_size__ (16)));
+typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__));
+
+__m128
+foo1 (__m128 x)
+{
+  __v4sf z = { 11.4, ((__v4sf) x)[1], ((__v4sf) x)[2], ((__v4sf) x) [3] };
+  return (__m128) z;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr88828-9b.c b/gcc/testsuite/gcc.target/i386/pr88828-9b.c
new file mode 100644 (file)
index 0000000..6588ad1
--- /dev/null
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse -mno-sse4" } */
+/* { dg-final { scan-assembler-not "movlhps" } } */
+/* { dg-final { scan-assembler-not "unpckhps" } } */
+/* { dg-final { scan-assembler-not "unpcklps" } } */
+/* { dg-final { scan-assembler-not "shufps" } } */
+
+typedef float __v4sf __attribute__ ((__vector_size__ (16)));
+typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__));
+
+static __m128
+vector_init (float f0,float f1, float f2,float f3)
+{
+  __v4sf y = { f0, f1, f2, f3 };
+   return (__m128) y;
+}
+
+__m128
+foo2 (__m128 x)
+{
+  return vector_init (11.4, ((__v4sf) x)[1], ((__v4sf) x)[2],
+                     ((__v4sf) x) [3]);
+}
index 32b5a15151c693337f00800925819f5761f04c76..b6b21f86f13ffc2146b7faf37b221cd09b866a6c 100644 (file)
@@ -2065,71 +2065,87 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
   conv_code = ERROR_MARK;
   maybe_ident = true;
   tree one_constant = NULL_TREE;
+  tree one_nonconstant = NULL_TREE;
   auto_vec<tree> constants;
   constants.safe_grow_cleared (nelts);
   FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (op), i, elt)
     {
       tree ref, op1;
+      unsigned int elem;
 
       if (i >= nelts)
        return false;
 
+      /* Look for elements extracted and possibly converted from
+         another vector.  */
       op1 = get_bit_field_ref_def (elt->value, conv_code);
-      if (op1)
+      if (op1
+         && TREE_CODE ((ref = TREE_OPERAND (op1, 0))) == SSA_NAME
+         && VECTOR_TYPE_P (TREE_TYPE (ref))
+         && useless_type_conversion_p (TREE_TYPE (op1),
+                                       TREE_TYPE (TREE_TYPE (ref)))
+         && known_eq (bit_field_size (op1), elem_size)
+         && constant_multiple_p (bit_field_offset (op1),
+                                 elem_size, &elem))
        {
-         ref = TREE_OPERAND (op1, 0);
          unsigned int j;
          for (j = 0; j < 2; ++j)
            {
              if (!orig[j])
                {
-                 if (TREE_CODE (ref) != SSA_NAME)
-                   return false;
-                 if (! VECTOR_TYPE_P (TREE_TYPE (ref))
-                     || ! useless_type_conversion_p (TREE_TYPE (op1),
-                                                     TREE_TYPE (TREE_TYPE (ref))))
-                   return false;
-                 if (j && !useless_type_conversion_p (TREE_TYPE (orig[0]),
-                                                      TREE_TYPE (ref)))
-                   return false;
-                 orig[j] = ref;
-                 break;
+                 if (j == 0
+                     || useless_type_conversion_p (TREE_TYPE (orig[0]),
+                                                   TREE_TYPE (ref)))
+                   break;
                }
              else if (ref == orig[j])
                break;
            }
-         if (j == 2)
-           return false;
-
-         unsigned int elt;
-         if (maybe_ne (bit_field_size (op1), elem_size)
-             || !constant_multiple_p (bit_field_offset (op1), elem_size, &elt))
-           return false;
-         if (j)
-           elt += nelts;
-         if (elt != i)
-           maybe_ident = false;
-         sel.quick_push (elt);
+         /* Found a suitable vector element.  */
+         if (j <= 2)
+           {
+             orig[j] = ref;
+             if (j)
+               elem += nelts;
+             if (elem != i)
+               maybe_ident = false;
+             sel.quick_push (elem);
+             continue;
+           }
+         /* Else fallthru.  */
        }
-      else if (CONSTANT_CLASS_P (elt->value))
+      /* Handle elements not extracted from a vector.
+          1. constants by permuting with constant vector
+         2. a unique non-constant element by permuting with a splat vector  */
+      if (orig[1]
+         && orig[1] != error_mark_node)
+       return false;
+      orig[1] = error_mark_node;
+      if (CONSTANT_CLASS_P (elt->value))
        {
-         if (orig[1]
-             && orig[1] != error_mark_node)
+         if (one_nonconstant)
            return false;
-         orig[1] = error_mark_node;
          if (!one_constant)
            one_constant = elt->value;
          constants[i] = elt->value;
-         sel.quick_push (i + nelts);
-         maybe_ident = false;
        }
       else
-       return false;
+       {
+         if (one_constant)
+           return false;
+         if (!one_nonconstant)
+           one_nonconstant = elt->value;
+         else if (!operand_equal_p (one_nonconstant, elt->value, 0))
+           return false;
+       }
+      sel.quick_push (i + nelts);
+      maybe_ident = false;
     }
   if (i < nelts)
     return false;
 
-  if (! VECTOR_TYPE_P (TREE_TYPE (orig[0]))
+  if (! orig[0]
+      || ! VECTOR_TYPE_P (TREE_TYPE (orig[0]))
       || maybe_ne (TYPE_VECTOR_SUBPARTS (type),
                   TYPE_VECTOR_SUBPARTS (TREE_TYPE (orig[0]))))
     return false;
@@ -2165,9 +2181,19 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
                       GET_MODE_SIZE (TYPE_MODE (type))))
        return false;
       op2 = vec_perm_indices_to_tree (mask_type, indices);
+      bool convert_orig0 = false;
       if (!orig[1])
        orig[1] = orig[0];
-      if (orig[1] == error_mark_node)
+      else if (orig[1] == error_mark_node
+              && one_nonconstant)
+       {
+         gimple_seq seq = NULL;
+         orig[1] = gimple_build_vector_from_val (&seq, UNKNOWN_LOCATION,
+                                                 type, one_nonconstant);
+         gsi_insert_seq_before (gsi, seq, GSI_SAME_STMT);
+         convert_orig0 = true;
+       }
+      else if (orig[1] == error_mark_node)
        {
          tree_vector_builder vec (type, nelts, 1);
          for (unsigned i = 0; i < nelts; ++i)
@@ -2177,11 +2203,12 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
              /* ??? Push a don't-care value.  */
              vec.quick_push (one_constant);
          orig[1] = vec.build ();
+         convert_orig0 = true;
        }
       if (conv_code == ERROR_MARK)
        gimple_assign_set_rhs_with_ops (gsi, VEC_PERM_EXPR, orig[0],
                                        orig[1], op2);
-      else if (TREE_CODE (orig[1]) == VECTOR_CST)
+      else if (convert_orig0)
        {
          gimple *conv
            = gimple_build_assign (make_ssa_name (type), conv_code, orig[0]);