From bbcdf9bb3fd04adc59f41e4e1ff6293c84cbecc4 Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Fri, 3 Apr 2020 11:49:10 -0700 Subject: [PATCH] x86: Mark scratch operand in ssse3_pshufbv8qi3 as earlyclobber commit 16ed2601ad0a4aa82f11e9df86ea92183f94f979 Author: H.J. Lu Date: Wed May 15 15:26:19 2019 +0000 i386: Emulate MMX pshufb with SSE version has +(define_insn_and_split "ssse3_pshufbv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv") + (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0,0,Yv") + (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")] + UNSPEC_PSHUFB)) + (clobber (match_scratch:V4SI 3 "=X,x,Yv"))] ^^^ There are earlyclobber. + "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3" + "@ + pshufb\t{%2, %0|%0, %2} + # + #" + "TARGET_MMX_WITH_SSE && reload_completed" + [(set (match_dup 3) (match_dup 5)) + (set (match_dup 3) + (and:V4SI (match_dup 3) (match_dup 2))) + (set (match_dup 0) + (unspec:V16QI [(match_dup 1) (match_dup 4)] UNSPEC_PSHUFB))] If input register operand 2 is dead after this insn, RA may choose it as scratch operand. Since it isn't marked as earlyclobber, operand 2 becomes unused after split and then it gets optimized out. Mark scratch operand as earlyclobber fixes the issue. gcc/ PR target/94467 * config/i386/sse.md (ssse3_pshufbv8qi3): Mark scratch operand as earlyclobber. gcc/testsuite/ PR target/94467 * gcc.target/i386/pr94467-1.c: New test. * gcc.target/i386/pr94467-2.c: Likewise. --- gcc/ChangeLog | 6 +++ gcc/config/i386/sse.md | 2 +- gcc/testsuite/ChangeLog | 6 +++ gcc/testsuite/gcc.target/i386/pr94467-1.c | 40 +++++++++++++++++++ gcc/testsuite/gcc.target/i386/pr94467-2.c | 48 +++++++++++++++++++++++ 5 files changed, 101 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr94467-1.c create mode 100644 gcc/testsuite/gcc.target/i386/pr94467-2.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index e9dfa71ec0e..6317e385cac 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,9 @@ +2020-04-03 H.J. Lu + + PR target/94467 + * config/i386/sse.md (ssse3_pshufbv8qi3): Mark scratch operand + as earlyclobber. + 2020-04-03 Jeff Law PR rtl-optimization/92264 diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 24b3acd163e..fef6065b687 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -16695,7 +16695,7 @@ (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0,0,Yv") (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")] UNSPEC_PSHUFB)) - (clobber (match_scratch:V4SI 3 "=X,x,Yv"))] + (clobber (match_scratch:V4SI 3 "=X,&x,&Yv"))] "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3" "@ pshufb\t{%2, %0|%0, %2} diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 67a14db7930..aad627d84f3 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2020-04-03 H.J. Lu + + PR target/94467 + * gcc.target/i386/pr94467-1.c: New test. + * gcc.target/i386/pr94467-2.c: Likewise. + 2020-04-03 Jakub Jelinek PR target/94460 diff --git a/gcc/testsuite/gcc.target/i386/pr94467-1.c b/gcc/testsuite/gcc.target/i386/pr94467-1.c new file mode 100644 index 00000000000..a51c3a8f5fe --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr94467-1.c @@ -0,0 +1,40 @@ +/* { dg-do run } */ +/* { dg-require-effective-target avx } */ +/* { dg-options "-O -mavx" } */ + +#include "avx-check.h" + +typedef char __attribute__ ((__vector_size__ (8))) v8qi; +typedef short __attribute__ ((__vector_size__ (8))) v4hi; +typedef int __attribute__ ((__vector_size__ (8))) v2si; +typedef long long __attribute__ ((__vector_size__ (8))) v1di; +typedef unsigned long long u64; +u64 k, c; + +v8qi g, h, p, q; +v4hi d, e, f, l, n, o; +v2si j; + +u64 +foo (v4hi r) +{ + v8qi s; + f = (v4hi) j; + e = __builtin_ia32_psrlwi ((v4hi) k, c); + s = __builtin_ia32_pavgb (h, h); + n = __builtin_ia32_pabsw (f); + o = __builtin_ia32_psubusw (n, l); + p = __builtin_ia32_packsswb (r, o); + q = __builtin_ia32_pshufb (p, s); + g = __builtin_ia32_punpcklbw (q, (v8qi) r); + d = r; + return (u64) g + (u64) h + (u64) j; +} + +static void +avx_test (void) +{ + u64 x = foo ((v4hi) { 5 }); + if (x != 0x0005000500050505) + __builtin_abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/pr94467-2.c b/gcc/testsuite/gcc.target/i386/pr94467-2.c new file mode 100644 index 00000000000..8128be325e4 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr94467-2.c @@ -0,0 +1,48 @@ +/* { dg-do run } */ +/* { dg-require-effective-target ssse3 } */ +/* { dg-options "-O -mssse3" } */ + +#ifndef CHECK_H +#define CHECK_H "ssse3-check.h" +#endif + +#ifndef TEST +#define TEST ssse3_test +#endif + +#include CHECK_H + +typedef char __attribute__ ((__vector_size__ (8))) v8qi; +typedef short __attribute__ ((__vector_size__ (8))) v4hi; +typedef int __attribute__ ((__vector_size__ (8))) v2si; +typedef long long __attribute__ ((__vector_size__ (8))) v1di; +typedef unsigned long long u64; +u64 k, c; + +v8qi g, h, p, q; +v4hi d, e, f, l, n, o; +v2si j; + +u64 +foo (v4hi r) +{ + v8qi s; + f = (v4hi) j; + e = __builtin_ia32_psrlwi ((v4hi) k, c); + s = __builtin_ia32_pavgb (h, h); + n = __builtin_ia32_pabsw (f); + o = __builtin_ia32_psubusw (n, l); + p = __builtin_ia32_packsswb (r, o); + q = __builtin_ia32_pshufb (p, s); + g = __builtin_ia32_punpcklbw (q, (v8qi) r); + d = r; + return (u64) g + (u64) h + (u64) j; +} + +static void +ssse3_test (void) +{ + u64 x = foo ((v4hi) { 5 }); + if (x != 0x0005000500050505) + __builtin_abort (); +} -- 2.30.2