From f66e6e2bea4bf5999c972974f4a13044fdeb02dd Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Fri, 2 Aug 2019 10:28:31 +0200 Subject: [PATCH] re PR tree-optimization/91201 (SIMD not generated for horizontal sum of bytes in array) PR tree-optimization/91201 * config/i386/i386-expand.c (ix86_expand_vector_extract): For elt == 0 V16QImode extraction without sse4.1 try to use V4SImode lowpart extraction. * gcc.target/i386/sse2-pr91201-3.c: New test. * gcc.target/i386/sse2-pr91201-4.c: New test. * gcc.target/i386/sse2-pr91201-5.c: New test. * gcc.target/i386/sse2-pr91201-6.c: New test. From-SVN: r273998 --- gcc/ChangeLog | 7 +++++++ gcc/config/i386/i386-expand.c | 11 +++++++++++ gcc/testsuite/ChangeLog | 8 ++++++++ gcc/testsuite/gcc.target/i386/sse2-pr91201-3.c | 13 +++++++++++++ gcc/testsuite/gcc.target/i386/sse2-pr91201-4.c | 13 +++++++++++++ gcc/testsuite/gcc.target/i386/sse2-pr91201-5.c | 13 +++++++++++++ gcc/testsuite/gcc.target/i386/sse2-pr91201-6.c | 13 +++++++++++++ 7 files changed, 78 insertions(+) create mode 100644 gcc/testsuite/gcc.target/i386/sse2-pr91201-3.c create mode 100644 gcc/testsuite/gcc.target/i386/sse2-pr91201-4.c create mode 100644 gcc/testsuite/gcc.target/i386/sse2-pr91201-5.c create mode 100644 gcc/testsuite/gcc.target/i386/sse2-pr91201-6.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index e5c3a59cbb5..1909d87a330 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,10 @@ +2019-08-02 Jakub Jelinek + + PR tree-optimization/91201 + * config/i386/i386-expand.c (ix86_expand_vector_extract): For elt == 0 + V16QImode extraction without sse4.1 try to use V4SImode lowpart + extraction. + 2019-08-01 Martin Sebor PR c++/90947 diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c index 7e12a86a75f..6e4ae5077f5 100644 --- a/gcc/config/i386/i386-expand.c +++ b/gcc/config/i386/i386-expand.c @@ -14706,6 +14706,17 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt) case E_V16QImode: use_vec_extr = TARGET_SSE4_1; + if (!use_vec_extr + && TARGET_SSE2 + && elt == 0 + && (optimize_insn_for_size_p () || TARGET_INTER_UNIT_MOVES_FROM_VEC)) + { + tmp = gen_reg_rtx (SImode); + ix86_expand_vector_extract (false, tmp, gen_lowpart (V4SImode, vec), + 0); + emit_insn (gen_rtx_SET (target, gen_lowpart (QImode, tmp))); + return; + } break; case E_V8SFmode: diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 72e258b3506..fdc8f0848d0 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,11 @@ +2019-08-02 Jakub Jelinek + + PR tree-optimization/91201 + * gcc.target/i386/sse2-pr91201-3.c: New test. + * gcc.target/i386/sse2-pr91201-4.c: New test. + * gcc.target/i386/sse2-pr91201-5.c: New test. + * gcc.target/i386/sse2-pr91201-6.c: New test. + 2019-08-02 Martin Liska * g++.dg/cpp1y/new2.C: New test. diff --git a/gcc/testsuite/gcc.target/i386/sse2-pr91201-3.c b/gcc/testsuite/gcc.target/i386/sse2-pr91201-3.c new file mode 100644 index 00000000000..1fc5834ffee --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse2-pr91201-3.c @@ -0,0 +1,13 @@ +/* PR tree-optimization/91201 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse2 -mno-sse3 -mtune=generic -masm=att" } */ +/* { dg-final { scan-assembler "\tmovd\t%xmm0, %eax" } } */ +/* { dg-final { scan-assembler-not "\\(%" } } */ + +typedef unsigned char V __attribute__((vector_size (16))); + +unsigned char +foo (V x) +{ + return x[0]; +} diff --git a/gcc/testsuite/gcc.target/i386/sse2-pr91201-4.c b/gcc/testsuite/gcc.target/i386/sse2-pr91201-4.c new file mode 100644 index 00000000000..48044c6c832 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse2-pr91201-4.c @@ -0,0 +1,13 @@ +/* PR tree-optimization/91201 */ +/* { dg-do compile } */ +/* { dg-options "-Os -msse2 -mno-sse3 -mtune=generic -masm=att" } */ +/* { dg-final { scan-assembler "\tmovd\t%xmm0, %eax" } } */ +/* { dg-final { scan-assembler-not "\\(%" } } */ + +typedef unsigned char V __attribute__((vector_size (16))); + +unsigned char +foo (V x) +{ + return x[0]; +} diff --git a/gcc/testsuite/gcc.target/i386/sse2-pr91201-5.c b/gcc/testsuite/gcc.target/i386/sse2-pr91201-5.c new file mode 100644 index 00000000000..13c1954e459 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse2-pr91201-5.c @@ -0,0 +1,13 @@ +/* PR tree-optimization/91201 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse2 -mno-sse3 -mtune=k8 -masm=att" } */ +/* { dg-final { scan-assembler-not "\tmovd\t%xmm0, %eax" } } */ +/* { dg-final { scan-assembler "\tmov(zbl|b)\t\[^\n\r]*\\(%" } } */ + +typedef unsigned char V __attribute__((vector_size (16))); + +unsigned char +foo (V x) +{ + return x[0]; +} diff --git a/gcc/testsuite/gcc.target/i386/sse2-pr91201-6.c b/gcc/testsuite/gcc.target/i386/sse2-pr91201-6.c new file mode 100644 index 00000000000..2997bfd70b7 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse2-pr91201-6.c @@ -0,0 +1,13 @@ +/* PR tree-optimization/91201 */ +/* { dg-do compile } */ +/* { dg-options "-Os -msse2 -mno-sse3 -mtune=k8 -masm=att" } */ +/* { dg-final { scan-assembler "\tmovd\t%xmm0, %eax" } } */ +/* { dg-final { scan-assembler-not "\\(%" } } */ + +typedef unsigned char V __attribute__((vector_size (16))); + +unsigned char +foo (V x) +{ + return x[0]; +} -- 2.30.2