i386: Fix emit_reduc_half on V{64Q,32H}Imode [PR94500]
authorJakub Jelinek <jakub@redhat.com>
Tue, 7 Apr 2020 06:27:49 +0000 (08:27 +0200)
committerJakub Jelinek <jakub@redhat.com>
Tue, 7 Apr 2020 06:27:49 +0000 (08:27 +0200)
The following testcase is miscompiled in 8.x, because emit_reduc_half is
prepared to handle for 512-bit modes only i equal to 512, 256, 128 and 64.
V32HImode also needs i equal to 32 and V64QImode i equal to 32 and 16,
but emit_reduc_half in that case performs a redundant permutation exactly
like i == 32.  In 9+ the testcase works because Richard in r9-3393
changed the reduc_* expanders so that they actually don't call
ix86_expand_reduc on 512-bit modes, but only 128-bit ones.

The patch fixes emit_reduc_half to handle also i of 32 and 16 similarly to
how V32QImode/V16HImode are handled for AVX2.  I think it shouldn't hurt
to fix the function even on the trunk and 9 branch even when nothing uses
it ATM.

2020-04-07  Jakub Jelinek  <jakub@redhat.com>

PR target/94500
* config/i386/i386-expand.c (emit_reduc_half): For V{64QI,32HI}mode
handle i < 64 using avx512bw_lshrv4ti3.  Formatting fixes.

* gcc.target/i386/avx512bw-pr94500.c: New test.

gcc/ChangeLog
gcc/config/i386/i386-expand.c
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/i386/avx512bw-pr94500.c [new file with mode: 0644]

index 40707325765086a52ecdb73f572ca1b970250d5d..f248688933d4e2790256e0add2f2edd3755e6d30 100644 (file)
@@ -1,3 +1,9 @@
+2020-04-07  Jakub Jelinek  <jakub@redhat.com>
+
+       PR target/94500
+       * config/i386/i386-expand.c (emit_reduc_half): For V{64QI,32HI}mode
+       handle i < 64 using avx512bw_lshrv4ti3.  Formatting fixes.
+
 2020-04-06  Jakub Jelinek  <jakub@redhat.com>
 
        * cselib.c (cselib_subst_to_values): For SP_DERIVED_VALUE_P
index 8e623b3707fbf11c4d63017d07ef9d38cb288646..066de99e66076d486338fcd2238ac4013f4a5e30 100644 (file)
@@ -14891,43 +14891,51 @@ emit_reduc_half (rtx dest, rtx src, int i)
       break;
     case E_V64QImode:
     case E_V32HImode:
+      if (i < 64)
+       {
+         d = gen_reg_rtx (V4TImode);
+         tem = gen_avx512bw_lshrv4ti3 (d, gen_lowpart (V4TImode, src),
+                                       GEN_INT (i / 2));
+         break;
+       }
+      /* FALLTHRU */
     case E_V16SImode:
     case E_V16SFmode:
     case E_V8DImode:
     case E_V8DFmode:
       if (i > 128)
        tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
-                                     gen_lowpart (V16SImode, src),
-                                     gen_lowpart (V16SImode, src),
-                                     GEN_INT (0x4 + (i == 512 ? 4 : 0)),
-                                     GEN_INT (0x5 + (i == 512 ? 4 : 0)),
-                                     GEN_INT (0x6 + (i == 512 ? 4 : 0)),
-                                     GEN_INT (0x7 + (i == 512 ? 4 : 0)),
-                                     GEN_INT (0xC), GEN_INT (0xD),
-                                     GEN_INT (0xE), GEN_INT (0xF),
-                                     GEN_INT (0x10), GEN_INT (0x11),
-                                     GEN_INT (0x12), GEN_INT (0x13),
-                                     GEN_INT (0x14), GEN_INT (0x15),
-                                     GEN_INT (0x16), GEN_INT (0x17));
+                                       gen_lowpart (V16SImode, src),
+                                       gen_lowpart (V16SImode, src),
+                                       GEN_INT (0x4 + (i == 512 ? 4 : 0)),
+                                       GEN_INT (0x5 + (i == 512 ? 4 : 0)),
+                                       GEN_INT (0x6 + (i == 512 ? 4 : 0)),
+                                       GEN_INT (0x7 + (i == 512 ? 4 : 0)),
+                                       GEN_INT (0xC), GEN_INT (0xD),
+                                       GEN_INT (0xE), GEN_INT (0xF),
+                                       GEN_INT (0x10), GEN_INT (0x11),
+                                       GEN_INT (0x12), GEN_INT (0x13),
+                                       GEN_INT (0x14), GEN_INT (0x15),
+                                       GEN_INT (0x16), GEN_INT (0x17));
       else
        tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
-                                  gen_lowpart (V16SImode, src),
-                                  GEN_INT (i == 128 ? 0x2 : 0x1),
-                                  GEN_INT (0x3),
-                                  GEN_INT (0x3),
-                                  GEN_INT (0x3),
-                                  GEN_INT (i == 128 ? 0x6 : 0x5),
-                                  GEN_INT (0x7),
-                                  GEN_INT (0x7),
-                                  GEN_INT (0x7),
-                                  GEN_INT (i == 128 ? 0xA : 0x9),
-                                  GEN_INT (0xB),
-                                  GEN_INT (0xB),
-                                  GEN_INT (0xB),
-                                  GEN_INT (i == 128 ? 0xE : 0xD),
-                                  GEN_INT (0xF),
-                                  GEN_INT (0xF),
-                                  GEN_INT (0xF));
+                                   gen_lowpart (V16SImode, src),
+                                   GEN_INT (i == 128 ? 0x2 : 0x1),
+                                   GEN_INT (0x3),
+                                   GEN_INT (0x3),
+                                   GEN_INT (0x3),
+                                   GEN_INT (i == 128 ? 0x6 : 0x5),
+                                   GEN_INT (0x7),
+                                   GEN_INT (0x7),
+                                   GEN_INT (0x7),
+                                   GEN_INT (i == 128 ? 0xA : 0x9),
+                                   GEN_INT (0xB),
+                                   GEN_INT (0xB),
+                                   GEN_INT (0xB),
+                                   GEN_INT (i == 128 ? 0xE : 0xD),
+                                   GEN_INT (0xF),
+                                   GEN_INT (0xF),
+                                   GEN_INT (0xF));
       break;
     default:
       gcc_unreachable ();
index ef5e0cf527ca6f35c7b721031bdc9558196a2e5b..d9209f920aed38123d8cfc29d3981d9cd20c3874 100644 (file)
@@ -1,3 +1,8 @@
+2020-04-07  Jakub Jelinek  <jakub@redhat.com>
+
+       PR target/94500
+       * gcc.target/i386/avx512bw-pr94500.c: New test.
+
 2020-04-06  Steven G. Kargl  <kargl@gcc.gnu.org>
 
        PR fortran/93686
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr94500.c b/gcc/testsuite/gcc.target/i386/avx512bw-pr94500.c
new file mode 100644 (file)
index 0000000..7effdac
--- /dev/null
@@ -0,0 +1,28 @@
+/* PR target/94500 */
+/* { dg-do run { target avx512bw } } */
+/* { dg-options "-O3 -mavx512bw -mprefer-vector-width=512" } */
+
+#define AVX512BW
+#include "avx512f-helper.h"
+
+__attribute__((noipa)) signed char
+foo (signed char *p)
+{
+  signed char r = 0;
+  int i;
+  for (i = 0; i < 256; i++)
+    if (p[i] > r) r = p[i];
+  return r;
+}
+
+signed char buf[256];
+
+static void
+TEST (void)
+{
+  int i;
+  for (i = 0; i < 256; i++)
+    buf[i] = i - 128;
+  if (foo (buf) != 127)
+    abort ();
+}