From bee27152f7e6651f38c25ac68db13370382147e0 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Tue, 7 Apr 2020 08:27:49 +0200 Subject: [PATCH] i386: Fix emit_reduc_half on V{64Q,32H}Imode [PR94500] The following testcase is miscompiled in 8.x, because emit_reduc_half is prepared to handle for 512-bit modes only i equal to 512, 256, 128 and 64. V32HImode also needs i equal to 32 and V64QImode i equal to 32 and 16, but emit_reduc_half in that case performs a redundant permutation exactly like i == 32. In 9+ the testcase works because Richard in r9-3393 changed the reduc_* expanders so that they actually don't call ix86_expand_reduc on 512-bit modes, but only 128-bit ones. The patch fixes emit_reduc_half to handle also i of 32 and 16 similarly to how V32QImode/V16HImode are handled for AVX2. I think it shouldn't hurt to fix the function even on the trunk and 9 branch even when nothing uses it ATM. 2020-04-07 Jakub Jelinek PR target/94500 * config/i386/i386-expand.c (emit_reduc_half): For V{64QI,32HI}mode handle i < 64 using avx512bw_lshrv4ti3. Formatting fixes. * gcc.target/i386/avx512bw-pr94500.c: New test. --- gcc/ChangeLog | 6 ++ gcc/config/i386/i386-expand.c | 66 +++++++++++-------- gcc/testsuite/ChangeLog | 5 ++ .../gcc.target/i386/avx512bw-pr94500.c | 28 ++++++++ 4 files changed, 76 insertions(+), 29 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/avx512bw-pr94500.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 40707325765..f248688933d 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,9 @@ +2020-04-07 Jakub Jelinek + + PR target/94500 + * config/i386/i386-expand.c (emit_reduc_half): For V{64QI,32HI}mode + handle i < 64 using avx512bw_lshrv4ti3. Formatting fixes. + 2020-04-06 Jakub Jelinek * cselib.c (cselib_subst_to_values): For SP_DERIVED_VALUE_P diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c index 8e623b3707f..066de99e660 100644 --- a/gcc/config/i386/i386-expand.c +++ b/gcc/config/i386/i386-expand.c @@ -14891,43 +14891,51 @@ emit_reduc_half (rtx dest, rtx src, int i) break; case E_V64QImode: case E_V32HImode: + if (i < 64) + { + d = gen_reg_rtx (V4TImode); + tem = gen_avx512bw_lshrv4ti3 (d, gen_lowpart (V4TImode, src), + GEN_INT (i / 2)); + break; + } + /* FALLTHRU */ case E_V16SImode: case E_V16SFmode: case E_V8DImode: case E_V8DFmode: if (i > 128) tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest), - gen_lowpart (V16SImode, src), - gen_lowpart (V16SImode, src), - GEN_INT (0x4 + (i == 512 ? 4 : 0)), - GEN_INT (0x5 + (i == 512 ? 4 : 0)), - GEN_INT (0x6 + (i == 512 ? 4 : 0)), - GEN_INT (0x7 + (i == 512 ? 4 : 0)), - GEN_INT (0xC), GEN_INT (0xD), - GEN_INT (0xE), GEN_INT (0xF), - GEN_INT (0x10), GEN_INT (0x11), - GEN_INT (0x12), GEN_INT (0x13), - GEN_INT (0x14), GEN_INT (0x15), - GEN_INT (0x16), GEN_INT (0x17)); + gen_lowpart (V16SImode, src), + gen_lowpart (V16SImode, src), + GEN_INT (0x4 + (i == 512 ? 4 : 0)), + GEN_INT (0x5 + (i == 512 ? 4 : 0)), + GEN_INT (0x6 + (i == 512 ? 4 : 0)), + GEN_INT (0x7 + (i == 512 ? 4 : 0)), + GEN_INT (0xC), GEN_INT (0xD), + GEN_INT (0xE), GEN_INT (0xF), + GEN_INT (0x10), GEN_INT (0x11), + GEN_INT (0x12), GEN_INT (0x13), + GEN_INT (0x14), GEN_INT (0x15), + GEN_INT (0x16), GEN_INT (0x17)); else tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest), - gen_lowpart (V16SImode, src), - GEN_INT (i == 128 ? 0x2 : 0x1), - GEN_INT (0x3), - GEN_INT (0x3), - GEN_INT (0x3), - GEN_INT (i == 128 ? 0x6 : 0x5), - GEN_INT (0x7), - GEN_INT (0x7), - GEN_INT (0x7), - GEN_INT (i == 128 ? 0xA : 0x9), - GEN_INT (0xB), - GEN_INT (0xB), - GEN_INT (0xB), - GEN_INT (i == 128 ? 0xE : 0xD), - GEN_INT (0xF), - GEN_INT (0xF), - GEN_INT (0xF)); + gen_lowpart (V16SImode, src), + GEN_INT (i == 128 ? 0x2 : 0x1), + GEN_INT (0x3), + GEN_INT (0x3), + GEN_INT (0x3), + GEN_INT (i == 128 ? 0x6 : 0x5), + GEN_INT (0x7), + GEN_INT (0x7), + GEN_INT (0x7), + GEN_INT (i == 128 ? 0xA : 0x9), + GEN_INT (0xB), + GEN_INT (0xB), + GEN_INT (0xB), + GEN_INT (i == 128 ? 0xE : 0xD), + GEN_INT (0xF), + GEN_INT (0xF), + GEN_INT (0xF)); break; default: gcc_unreachable (); diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index ef5e0cf527c..d9209f920ae 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2020-04-07 Jakub Jelinek + + PR target/94500 + * gcc.target/i386/avx512bw-pr94500.c: New test. + 2020-04-06 Steven G. Kargl PR fortran/93686 diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr94500.c b/gcc/testsuite/gcc.target/i386/avx512bw-pr94500.c new file mode 100644 index 00000000000..7effdac5a17 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512bw-pr94500.c @@ -0,0 +1,28 @@ +/* PR target/94500 */ +/* { dg-do run { target avx512bw } } */ +/* { dg-options "-O3 -mavx512bw -mprefer-vector-width=512" } */ + +#define AVX512BW +#include "avx512f-helper.h" + +__attribute__((noipa)) signed char +foo (signed char *p) +{ + signed char r = 0; + int i; + for (i = 0; i < 256; i++) + if (p[i] > r) r = p[i]; + return r; +} + +signed char buf[256]; + +static void +TEST (void) +{ + int i; + for (i = 0; i < 256; i++) + buf[i] = i - 128; + if (foo (buf) != 127) + abort (); +} -- 2.30.2