From 962372f9f853c582c879f11c0db14973cc8687e0 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Tue, 14 May 2019 09:11:15 +0000 Subject: [PATCH] re PR tree-optimization/88828 (Inefficient update of the first element of vector registers) 2019-05-14 Richard Biener H.J. Lu PR tree-optimization/88828 * tree-ssa-forwprop.c (simplify_vector_constructor): Handle permuting in a single non-constant element not extracted from a vector. * gcc.target/i386/pr88828-1.c: New test. * gcc.target/i386/pr88828-1a.c: Likewise. * gcc.target/i386/pr88828-1b.c: Likewise. * gcc.target/i386/pr88828-1c.c: Likewise. * gcc.target/i386/pr88828-4a.c: Likewise. * gcc.target/i386/pr88828-4b.c: Likewise. * gcc.target/i386/pr88828-5a.c: Likewise. * gcc.target/i386/pr88828-5b.c: Likewise. * gcc.target/i386/pr88828-7.c: Likewise. * gcc.target/i386/pr88828-7a.c: Likewise. * gcc.target/i386/pr88828-7b.c: Likewise. * gcc.target/i386/pr88828-8.c: Likewise. * gcc.target/i386/pr88828-8a.c: Likewise. * gcc.target/i386/pr88828-8b.c: Likewise. * gcc.target/i386/pr88828-9.c: Likewise. * gcc.target/i386/pr88828-9a.c: Likewise. * gcc.target/i386/pr88828-9b.c: Likewise. Co-Authored-By: H.J. Lu From-SVN: r271153 --- gcc/ChangeLog | 8 ++ gcc/testsuite/ChangeLog | 22 +++++ gcc/testsuite/gcc.target/i386/pr88828-1.c | 49 +++++++++++ gcc/testsuite/gcc.target/i386/pr88828-1a.c | 17 ++++ gcc/testsuite/gcc.target/i386/pr88828-1b.c | 23 +++++ gcc/testsuite/gcc.target/i386/pr88828-1c.c | 18 ++++ gcc/testsuite/gcc.target/i386/pr88828-4a.c | 18 ++++ gcc/testsuite/gcc.target/i386/pr88828-4b.c | 21 +++++ gcc/testsuite/gcc.target/i386/pr88828-5a.c | 18 ++++ gcc/testsuite/gcc.target/i386/pr88828-5b.c | 20 +++++ gcc/testsuite/gcc.target/i386/pr88828-7.c | 53 ++++++++++++ gcc/testsuite/gcc.target/i386/pr88828-7a.c | 16 ++++ gcc/testsuite/gcc.target/i386/pr88828-7b.c | 22 +++++ gcc/testsuite/gcc.target/i386/pr88828-8.c | 46 ++++++++++ gcc/testsuite/gcc.target/i386/pr88828-8a.c | 15 ++++ gcc/testsuite/gcc.target/i386/pr88828-8b.c | 21 +++++ gcc/testsuite/gcc.target/i386/pr88828-9.c | 46 ++++++++++ gcc/testsuite/gcc.target/i386/pr88828-9a.c | 16 ++++ gcc/testsuite/gcc.target/i386/pr88828-9b.c | 23 +++++ gcc/tree-ssa-forwprop.c | 97 ++++++++++++++-------- 20 files changed, 534 insertions(+), 35 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr88828-1.c create mode 100644 gcc/testsuite/gcc.target/i386/pr88828-1a.c create mode 100644 gcc/testsuite/gcc.target/i386/pr88828-1b.c create mode 100644 gcc/testsuite/gcc.target/i386/pr88828-1c.c create mode 100644 gcc/testsuite/gcc.target/i386/pr88828-4a.c create mode 100644 gcc/testsuite/gcc.target/i386/pr88828-4b.c create mode 100644 gcc/testsuite/gcc.target/i386/pr88828-5a.c create mode 100644 gcc/testsuite/gcc.target/i386/pr88828-5b.c create mode 100644 gcc/testsuite/gcc.target/i386/pr88828-7.c create mode 100644 gcc/testsuite/gcc.target/i386/pr88828-7a.c create mode 100644 gcc/testsuite/gcc.target/i386/pr88828-7b.c create mode 100644 gcc/testsuite/gcc.target/i386/pr88828-8.c create mode 100644 gcc/testsuite/gcc.target/i386/pr88828-8a.c create mode 100644 gcc/testsuite/gcc.target/i386/pr88828-8b.c create mode 100644 gcc/testsuite/gcc.target/i386/pr88828-9.c create mode 100644 gcc/testsuite/gcc.target/i386/pr88828-9a.c create mode 100644 gcc/testsuite/gcc.target/i386/pr88828-9b.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index a4568fe1651..1af0304076a 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,11 @@ +2019-05-14 Richard Biener + H.J. Lu + + PR tree-optimization/88828 + * tree-ssa-forwprop.c (simplify_vector_constructor): Handle + permuting in a single non-constant element not extracted + from a vector. + 2019-05-14 Przemyslaw Wirkus * internal-fn.def (SIGNBIT): New. diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index b140a5787fb..eebebfa9697 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,25 @@ +2019-05-14 Richard Biener + H.J. Lu + + PR tree-optimization/88828 + * gcc.target/i386/pr88828-1.c: New test. + * gcc.target/i386/pr88828-1a.c: Likewise. + * gcc.target/i386/pr88828-1b.c: Likewise. + * gcc.target/i386/pr88828-1c.c: Likewise. + * gcc.target/i386/pr88828-4a.c: Likewise. + * gcc.target/i386/pr88828-4b.c: Likewise. + * gcc.target/i386/pr88828-5a.c: Likewise. + * gcc.target/i386/pr88828-5b.c: Likewise. + * gcc.target/i386/pr88828-7.c: Likewise. + * gcc.target/i386/pr88828-7a.c: Likewise. + * gcc.target/i386/pr88828-7b.c: Likewise. + * gcc.target/i386/pr88828-8.c: Likewise. + * gcc.target/i386/pr88828-8a.c: Likewise. + * gcc.target/i386/pr88828-8b.c: Likewise. + * gcc.target/i386/pr88828-9.c: Likewise. + * gcc.target/i386/pr88828-9a.c: Likewise. + * gcc.target/i386/pr88828-9b.c: Likewise. + 2019-05-14 Przemyslaw Wirkus * gcc.target/aarch64/signbitv4sf.c: New test. diff --git a/gcc/testsuite/gcc.target/i386/pr88828-1.c b/gcc/testsuite/gcc.target/i386/pr88828-1.c new file mode 100644 index 00000000000..a15d1fea3f5 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr88828-1.c @@ -0,0 +1,49 @@ +/* { dg-do run { target sse2_runtime } } */ +/* { dg-options "-O2 -msse2" } */ + +#include "pr88828-1a.c" +#include "pr88828-1b.c" +#include "pr88828-1c.c" + +extern void abort (); + +void +do_check (__v4sf y, float f[4], float z) +{ + int i; + + for (i = 0; i < 4; i++) + if (i == 0) + { + if (y[i] != z) + abort (); + } + else + { + if (y[i] != f[i]) + abort (); + } +} + +int +main (void) +{ + float f[4] = { -11, 2, 55553, -4 }; + float z = 134567; + __v4sf x = { f[0], f[1], f[2], f[3] }; + __v4sf y; + int i; + + for (i = 0; i < 4; i++) + if (x[i] != f[i]) + abort (); + + y = foo1 (x, z); + do_check (y, f, z); + y = foo2 (x, z); + do_check (y, f, z); + y = foo3 (x, z); + do_check (y, f, z); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/i386/pr88828-1a.c b/gcc/testsuite/gcc.target/i386/pr88828-1a.c new file mode 100644 index 00000000000..d37b24c6661 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr88828-1a.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse -mno-sse4" } */ +/* { dg-final { scan-assembler "movss" } } */ +/* { dg-final { scan-assembler-not "movaps" } } */ +/* { dg-final { scan-assembler-not "movlhps" } } */ +/* { dg-final { scan-assembler-not "unpcklps" } } */ +/* { dg-final { scan-assembler-not "shufps" } } */ + +typedef float __v4sf __attribute__ ((__vector_size__ (16))); + +__attribute__((noinline, noclone)) +__v4sf +foo1 (__v4sf x, float f) +{ + __v4sf y = { f, x[1], x[2], x[3] }; + return y; +} diff --git a/gcc/testsuite/gcc.target/i386/pr88828-1b.c b/gcc/testsuite/gcc.target/i386/pr88828-1b.c new file mode 100644 index 00000000000..af4aced65f8 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr88828-1b.c @@ -0,0 +1,23 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse -mno-sse4" } */ +/* { dg-final { scan-assembler "movss" } } */ +/* { dg-final { scan-assembler-not "movaps" } } */ +/* { dg-final { scan-assembler-not "movlhps" } } */ +/* { dg-final { scan-assembler-not "unpcklps" } } */ +/* { dg-final { scan-assembler-not "shufps" } } */ + +typedef float __v4sf __attribute__ ((__vector_size__ (16))); + +static __v4sf +vector_init (float f0,float f1, float f2,float f3) +{ + __v4sf y = { f0, f1, f2, f3 }; + return y; +} + +__attribute__((noinline, noclone)) +__v4sf +foo2 (__v4sf x, float f) +{ + return vector_init (f, x[1], x[2], x[3]) ; +} diff --git a/gcc/testsuite/gcc.target/i386/pr88828-1c.c b/gcc/testsuite/gcc.target/i386/pr88828-1c.c new file mode 100644 index 00000000000..a117f3ec7b1 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr88828-1c.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse -mno-sse4" } */ +/* { dg-final { scan-assembler "movss" } } */ +/* { dg-final { scan-assembler-not "movaps" } } */ +/* { dg-final { scan-assembler-not "movlhps" } } */ +/* { dg-final { scan-assembler-not "unpcklps" } } */ +/* { dg-final { scan-assembler-not "shufps" } } */ + +typedef float __v4sf __attribute__ ((__vector_size__ (16))); + +__attribute__((noinline, noclone)) +__v4sf +foo3 (__v4sf x, float f) +{ + __v4sf y = x; + y[0] = f; + return y; +} diff --git a/gcc/testsuite/gcc.target/i386/pr88828-4a.c b/gcc/testsuite/gcc.target/i386/pr88828-4a.c new file mode 100644 index 00000000000..64043b9855f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr88828-4a.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse -mno-sse4" } */ +/* { dg-final { scan-assembler "movss" } } */ +/* { dg-final { scan-assembler-times "shufps" 1 } } */ +/* { dg-final { scan-assembler-not "movaps" } } */ +/* { dg-final { scan-assembler-not "movlhps" } } */ +/* { dg-final { scan-assembler-not "unpcklps" } } */ + +typedef float __v4sf __attribute__ ((__vector_size__ (16))); + +__attribute__((noinline, noclone)) +__v4sf +foo (__v4sf x, float f) +{ + __v4sf y = { x[0], x[2], x[3], x[1] }; + y[0] = f; + return y; +} diff --git a/gcc/testsuite/gcc.target/i386/pr88828-4b.c b/gcc/testsuite/gcc.target/i386/pr88828-4b.c new file mode 100644 index 00000000000..ad8d2b985d4 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr88828-4b.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx" } */ +/* { dg-final { scan-assembler-times "vpermilps" 1 } } */ +/* { dg-final { scan-assembler-times "vmovss" 1 { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-times "vpinsrd" 1 { target ia32 } } } */ +/* { dg-final { scan-assembler-not "vmovss" { target ia32 } } } */ +/* { dg-final { scan-assembler-not "vshufps" } } */ +/* { dg-final { scan-assembler-not "vmovaps" } } */ +/* { dg-final { scan-assembler-not "vmovlhps" } } */ +/* { dg-final { scan-assembler-not "vunpcklps" } } */ + +typedef float __v4sf __attribute__ ((__vector_size__ (16))); + +__attribute__((noinline, noclone)) +__v4sf +foo (__v4sf x, float f) +{ + __v4sf y = { x[0], x[2], x[3], x[1] }; + y[0] = f; + return y; +} diff --git a/gcc/testsuite/gcc.target/i386/pr88828-5a.c b/gcc/testsuite/gcc.target/i386/pr88828-5a.c new file mode 100644 index 00000000000..5e908faef5c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr88828-5a.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse -mno-sse4" } */ +/* { dg-final { scan-assembler "movss" } } */ +/* { dg-final { scan-assembler-times "shufps" 2 } } */ +/* { dg-final { scan-assembler-times "movaps" 1 } } */ +/* { dg-final { scan-assembler-not "movlhps" } } */ +/* { dg-final { scan-assembler-not "unpcklps" } } */ + +typedef float __v4sf __attribute__ ((__vector_size__ (16))); + +__attribute__((noinline, noclone)) +__v4sf +foo (__v4sf x, float f) +{ + __v4sf y = { x[0], x[2], x[3], x[0] }; + y[3] = f; + return y; +} diff --git a/gcc/testsuite/gcc.target/i386/pr88828-5b.c b/gcc/testsuite/gcc.target/i386/pr88828-5b.c new file mode 100644 index 00000000000..988a48823e6 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr88828-5b.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx" } */ +/* { dg-final { scan-assembler-times "vpermilps" 1 } } */ +/* { dg-final { scan-assembler-times "vinsertps" 1 } } */ +/* { dg-final { scan-assembler-not "vshufps" } } */ +/* { dg-final { scan-assembler-not "vmovss" } } */ +/* { dg-final { scan-assembler-not "vmovaps" } } */ +/* { dg-final { scan-assembler-not "vmovlhps" } } */ +/* { dg-final { scan-assembler-not "vunpcklps" } } */ + +typedef float __v4sf __attribute__ ((__vector_size__ (16))); + +__attribute__((noinline, noclone)) +__v4sf +foo (__v4sf x, float f) +{ + __v4sf y = { x[0], x[2], x[3], x[0] }; + y[3] = f; + return y; +} diff --git a/gcc/testsuite/gcc.target/i386/pr88828-7.c b/gcc/testsuite/gcc.target/i386/pr88828-7.c new file mode 100644 index 00000000000..4302c266478 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr88828-7.c @@ -0,0 +1,53 @@ +/* { dg-do run { target sse2_runtime } } */ +/* { dg-options "-O2 -msse2 -fexcess-precision=standard" } */ + +#include "pr88828-7a.c" +#include "pr88828-7b.c" + +extern void abort (); + +float +bar (float x, float y) +{ + return x / y - y * x; +} + +void +do_check (__v4sf x, float f1[4], float f2[4]) +{ + int i; + + for (i = 0; i < 4; i++) + if (i == 0) + { + if (x[i] != bar (f1[i], f2[i])) + abort (); + } + else + { + if (x[i] != f1[i]) + abort (); + } +} + +int +main (void) +{ + float f1[4] = { -11, 2, 55553, -4 }; + float f2[4] = { 111, 3.3, -55.553, 4.8 }; + __v4sf x = { f1[0], f1[1], f1[2], f1[3] }; + __v4sf y = { f2[0], f2[1], f2[2], f2[3] }; + __v4sf z; + int i; + + for (i = 0; i < 4; i++) + if (x[i] != f1[i] || y[i] != f2[i] ) + abort (); + + z = foo1 (x, y); + do_check (z, f1, f2); + x = foo2 (x, y); + do_check (z, f1, f2); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/i386/pr88828-7a.c b/gcc/testsuite/gcc.target/i386/pr88828-7a.c new file mode 100644 index 00000000000..f1ae57422d9 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr88828-7a.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse -mno-sse4" } */ +/* { dg-final { scan-assembler-not "movlhps" } } */ +/* { dg-final { scan-assembler-not "unpckhps" } } */ +/* { dg-final { scan-assembler-not "unpcklps" } } */ +/* { dg-final { scan-assembler-not "shufps" } } */ + +typedef float __v4sf __attribute__ ((__vector_size__ (16))); +extern float bar (float, float); + +__v4sf +foo1 (__v4sf x, __v4sf y) +{ + __v4sf z = { bar (x[0], y[0]), x[1], x[2], x[3] }; + return z; +} diff --git a/gcc/testsuite/gcc.target/i386/pr88828-7b.c b/gcc/testsuite/gcc.target/i386/pr88828-7b.c new file mode 100644 index 00000000000..c027c56948d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr88828-7b.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse -mno-sse4" } */ +/* { dg-final { scan-assembler-not "movlhps" } } */ +/* { dg-final { scan-assembler-not "unpckhps" } } */ +/* { dg-final { scan-assembler-not "unpcklps" } } */ +/* { dg-final { scan-assembler-not "shufps" } } */ + +typedef float __v4sf __attribute__ ((__vector_size__ (16))); +extern float bar (float, float); + +static __v4sf +vector_init (float f0,float f1, float f2,float f3) +{ + __v4sf y = { f0, f1, f2, f3 }; + return y; +} + +__v4sf +foo2 (__v4sf x, __v4sf y) +{ + return vector_init (bar (x[0], y[0]), x[1], x[2], x[3]) ; +} diff --git a/gcc/testsuite/gcc.target/i386/pr88828-8.c b/gcc/testsuite/gcc.target/i386/pr88828-8.c new file mode 100644 index 00000000000..3b8eabd225f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr88828-8.c @@ -0,0 +1,46 @@ +/* { dg-do run { target sse2_runtime } } */ +/* { dg-options "-O2 -msse2" } */ + +#include "pr88828-8a.c" +#include "pr88828-8b.c" + +extern void abort (); + +void +do_check (__v4sf y, float f[4], float z) +{ + int i; + + for (i = 0; i < 4; i++) + if (i == 0) + { + if (y[i] != z) + abort (); + } + else + { + if (y[i] != f[i]) + abort (); + } +} + +int +main (void) +{ + float f[4] = { -11, 2, 55553, -4 }; + float z = 11.4; + __v4sf x = { f[0], f[1], f[2], f[3] }; + __v4sf y; + int i; + + for (i = 0; i < 4; i++) + if (x[i] != f[i]) + abort (); + + y = foo1 (x); + do_check (y, f, z); + y = foo2 (x); + do_check (y, f, z); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/i386/pr88828-8a.c b/gcc/testsuite/gcc.target/i386/pr88828-8a.c new file mode 100644 index 00000000000..5d383dfd081 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr88828-8a.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse -mno-sse4" } */ +/* { dg-final { scan-assembler-not "movlhps" } } */ +/* { dg-final { scan-assembler-not "unpckhps" } } */ +/* { dg-final { scan-assembler-not "unpcklps" } } */ +/* { dg-final { scan-assembler-not "shufps" } } */ + +typedef float __v4sf __attribute__ ((__vector_size__ (16))); + +__v4sf +foo1 (__v4sf x) +{ + __v4sf z = { 11.4, x[1], x[2], x[3] }; + return z; +} diff --git a/gcc/testsuite/gcc.target/i386/pr88828-8b.c b/gcc/testsuite/gcc.target/i386/pr88828-8b.c new file mode 100644 index 00000000000..5ffbc9c3103 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr88828-8b.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse -mno-sse4" } */ +/* { dg-final { scan-assembler-not "movlhps" } } */ +/* { dg-final { scan-assembler-not "unpckhps" } } */ +/* { dg-final { scan-assembler-not "unpcklps" } } */ +/* { dg-final { scan-assembler-not "shufps" } } */ + +typedef float __v4sf __attribute__ ((__vector_size__ (16))); + +static __v4sf +vector_init (float f0,float f1, float f2,float f3) +{ + __v4sf y = { f0, f1, f2, f3 }; + return y; +} + +__v4sf +foo2 (__v4sf x) +{ + return vector_init (11.4, x[1], x[2], x[3]) ; +} diff --git a/gcc/testsuite/gcc.target/i386/pr88828-9.c b/gcc/testsuite/gcc.target/i386/pr88828-9.c new file mode 100644 index 00000000000..c33907b4a6f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr88828-9.c @@ -0,0 +1,46 @@ +/* { dg-do run { target sse2_runtime } } */ +/* { dg-options "-O2 -msse2" } */ + +#include "pr88828-9a.c" +#include "pr88828-9b.c" + +extern void abort (); + +void +do_check (__v4sf y, float f[4], float z) +{ + int i; + + for (i = 0; i < 4; i++) + if (i == 0) + { + if (y[i] != z) + abort (); + } + else + { + if (y[i] != f[i]) + abort (); + } +} + +int +main (void) +{ + float f[4] = { -11, 2, 55553, -4 }; + float z = 11.4; + __m128 x = (__m128) (__v4sf) { f[0], f[1], f[2], f[3] }; + __m128 y; + int i; + + for (i = 0; i < 4; i++) + if (x[i] != f[i]) + abort (); + + y = foo1 (x); + do_check (y, f, z); + y = foo2 (x); + do_check (y, f, z); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/i386/pr88828-9a.c b/gcc/testsuite/gcc.target/i386/pr88828-9a.c new file mode 100644 index 00000000000..7f830657732 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr88828-9a.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse -mno-sse4" } */ +/* { dg-final { scan-assembler-not "movlhps" } } */ +/* { dg-final { scan-assembler-not "unpckhps" } } */ +/* { dg-final { scan-assembler-not "unpcklps" } } */ +/* { dg-final { scan-assembler-not "shufps" } } */ + +typedef float __v4sf __attribute__ ((__vector_size__ (16))); +typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__)); + +__m128 +foo1 (__m128 x) +{ + __v4sf z = { 11.4, ((__v4sf) x)[1], ((__v4sf) x)[2], ((__v4sf) x) [3] }; + return (__m128) z; +} diff --git a/gcc/testsuite/gcc.target/i386/pr88828-9b.c b/gcc/testsuite/gcc.target/i386/pr88828-9b.c new file mode 100644 index 00000000000..6588ad15a9b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr88828-9b.c @@ -0,0 +1,23 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse -mno-sse4" } */ +/* { dg-final { scan-assembler-not "movlhps" } } */ +/* { dg-final { scan-assembler-not "unpckhps" } } */ +/* { dg-final { scan-assembler-not "unpcklps" } } */ +/* { dg-final { scan-assembler-not "shufps" } } */ + +typedef float __v4sf __attribute__ ((__vector_size__ (16))); +typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__)); + +static __m128 +vector_init (float f0,float f1, float f2,float f3) +{ + __v4sf y = { f0, f1, f2, f3 }; + return (__m128) y; +} + +__m128 +foo2 (__m128 x) +{ + return vector_init (11.4, ((__v4sf) x)[1], ((__v4sf) x)[2], + ((__v4sf) x) [3]); +} diff --git a/gcc/tree-ssa-forwprop.c b/gcc/tree-ssa-forwprop.c index 32b5a15151c..b6b21f86f13 100644 --- a/gcc/tree-ssa-forwprop.c +++ b/gcc/tree-ssa-forwprop.c @@ -2065,71 +2065,87 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi) conv_code = ERROR_MARK; maybe_ident = true; tree one_constant = NULL_TREE; + tree one_nonconstant = NULL_TREE; auto_vec constants; constants.safe_grow_cleared (nelts); FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (op), i, elt) { tree ref, op1; + unsigned int elem; if (i >= nelts) return false; + /* Look for elements extracted and possibly converted from + another vector. */ op1 = get_bit_field_ref_def (elt->value, conv_code); - if (op1) + if (op1 + && TREE_CODE ((ref = TREE_OPERAND (op1, 0))) == SSA_NAME + && VECTOR_TYPE_P (TREE_TYPE (ref)) + && useless_type_conversion_p (TREE_TYPE (op1), + TREE_TYPE (TREE_TYPE (ref))) + && known_eq (bit_field_size (op1), elem_size) + && constant_multiple_p (bit_field_offset (op1), + elem_size, &elem)) { - ref = TREE_OPERAND (op1, 0); unsigned int j; for (j = 0; j < 2; ++j) { if (!orig[j]) { - if (TREE_CODE (ref) != SSA_NAME) - return false; - if (! VECTOR_TYPE_P (TREE_TYPE (ref)) - || ! useless_type_conversion_p (TREE_TYPE (op1), - TREE_TYPE (TREE_TYPE (ref)))) - return false; - if (j && !useless_type_conversion_p (TREE_TYPE (orig[0]), - TREE_TYPE (ref))) - return false; - orig[j] = ref; - break; + if (j == 0 + || useless_type_conversion_p (TREE_TYPE (orig[0]), + TREE_TYPE (ref))) + break; } else if (ref == orig[j]) break; } - if (j == 2) - return false; - - unsigned int elt; - if (maybe_ne (bit_field_size (op1), elem_size) - || !constant_multiple_p (bit_field_offset (op1), elem_size, &elt)) - return false; - if (j) - elt += nelts; - if (elt != i) - maybe_ident = false; - sel.quick_push (elt); + /* Found a suitable vector element. */ + if (j <= 2) + { + orig[j] = ref; + if (j) + elem += nelts; + if (elem != i) + maybe_ident = false; + sel.quick_push (elem); + continue; + } + /* Else fallthru. */ } - else if (CONSTANT_CLASS_P (elt->value)) + /* Handle elements not extracted from a vector. + 1. constants by permuting with constant vector + 2. a unique non-constant element by permuting with a splat vector */ + if (orig[1] + && orig[1] != error_mark_node) + return false; + orig[1] = error_mark_node; + if (CONSTANT_CLASS_P (elt->value)) { - if (orig[1] - && orig[1] != error_mark_node) + if (one_nonconstant) return false; - orig[1] = error_mark_node; if (!one_constant) one_constant = elt->value; constants[i] = elt->value; - sel.quick_push (i + nelts); - maybe_ident = false; } else - return false; + { + if (one_constant) + return false; + if (!one_nonconstant) + one_nonconstant = elt->value; + else if (!operand_equal_p (one_nonconstant, elt->value, 0)) + return false; + } + sel.quick_push (i + nelts); + maybe_ident = false; } if (i < nelts) return false; - if (! VECTOR_TYPE_P (TREE_TYPE (orig[0])) + if (! orig[0] + || ! VECTOR_TYPE_P (TREE_TYPE (orig[0])) || maybe_ne (TYPE_VECTOR_SUBPARTS (type), TYPE_VECTOR_SUBPARTS (TREE_TYPE (orig[0])))) return false; @@ -2165,9 +2181,19 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi) GET_MODE_SIZE (TYPE_MODE (type)))) return false; op2 = vec_perm_indices_to_tree (mask_type, indices); + bool convert_orig0 = false; if (!orig[1]) orig[1] = orig[0]; - if (orig[1] == error_mark_node) + else if (orig[1] == error_mark_node + && one_nonconstant) + { + gimple_seq seq = NULL; + orig[1] = gimple_build_vector_from_val (&seq, UNKNOWN_LOCATION, + type, one_nonconstant); + gsi_insert_seq_before (gsi, seq, GSI_SAME_STMT); + convert_orig0 = true; + } + else if (orig[1] == error_mark_node) { tree_vector_builder vec (type, nelts, 1); for (unsigned i = 0; i < nelts; ++i) @@ -2177,11 +2203,12 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi) /* ??? Push a don't-care value. */ vec.quick_push (one_constant); orig[1] = vec.build (); + convert_orig0 = true; } if (conv_code == ERROR_MARK) gimple_assign_set_rhs_with_ops (gsi, VEC_PERM_EXPR, orig[0], orig[1], op2); - else if (TREE_CODE (orig[1]) == VECTOR_CST) + else if (convert_orig0) { gimple *conv = gimple_build_assign (make_ssa_name (type), conv_code, orig[0]); -- 2.30.2