Merge two define_insn: <avx512>_blendm<mode>, <avx512>_load<mode>_mask.
authorliuhongt <hongtao.liu@intel.com>
Tue, 21 Jul 2020 07:25:20 +0000 (15:25 +0800)
committerliuhongt <hongtao.liu@intel.com>
Thu, 13 Aug 2020 03:19:02 +0000 (11:19 +0800)
Those two define_insns have same pattern, and <avx512>_load<mode>_mask
would always be matched since it show up earlier in the md file, and
it may lose some opportunity in pass_reload since
<avx512>_load<mode>_mask only have constraint "0C" for operand2, and
"v" constraint in <avx512>_vblendm<mode> would never be matched.

2020-07-21  Hongtao Liu  <hongtao.liu@intel.com>

gcc/
PR target/96246
* config/i386/sse.md (<avx512>_load<mode>_mask,
<avx512>_load<mode>_mask): Extend to generate blendm
instructions.
(<avx512>_blendm<mode>, <avx512>_blendm<mode>): Change
define_insn to define_expand.

gcc/testsuite/
* gcc.target/i386/avx512bw-pr96246-1.c: New test.
* gcc.target/i386/avx512bw-pr96246-2.c: New test.
* gcc.target/i386/avx512vl-pr96246-1.c: New test.
* gcc.target/i386/avx512vl-pr96246-2.c: New test.
* gcc.target/i386/avx512bw-vmovdqu16-1.c: Adjust test.
* gcc.target/i386/avx512bw-vmovdqu8-1.c: Ditto.
* gcc.target/i386/avx512f-vmovapd-1.c: Ditto.
* gcc.target/i386/avx512f-vmovaps-1.c: Ditto.
* gcc.target/i386/avx512f-vmovdqa32-1.c: Ditto.
* gcc.target/i386/avx512f-vmovdqa64-1.c: Ditto.
* gcc.target/i386/avx512vl-pr92686-movcc-1.c: Ditto.
* gcc.target/i386/avx512vl-pr96246-1.c: Ditto.
* gcc.target/i386/avx512vl-pr96246-2.c: Ditto.
* gcc.target/i386/avx512vl-vmovapd-1.c: Ditto.
* gcc.target/i386/avx512vl-vmovaps-1.c: Ditto.
* gcc.target/i386/avx512vl-vmovdqa32-1.c: Ditto.
* gcc.target/i386/avx512vl-vmovdqa64-1.c: Ditto.

16 files changed:
gcc/config/i386/sse.md
gcc/testsuite/gcc.target/i386/avx512bw-pr96246-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512bw-pr96246-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512bw-vmovdqu16-1.c
gcc/testsuite/gcc.target/i386/avx512bw-vmovdqu8-1.c
gcc/testsuite/gcc.target/i386/avx512f-vmovapd-1.c
gcc/testsuite/gcc.target/i386/avx512f-vmovaps-1.c
gcc/testsuite/gcc.target/i386/avx512f-vmovdqa32-1.c
gcc/testsuite/gcc.target/i386/avx512f-vmovdqa64-1.c
gcc/testsuite/gcc.target/i386/avx512vl-pr92686-movcc-1.c
gcc/testsuite/gcc.target/i386/avx512vl-pr96246-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512vl-pr96246-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512vl-vmovapd-1.c
gcc/testsuite/gcc.target/i386/avx512vl-vmovaps-1.c
gcc/testsuite/gcc.target/i386/avx512vl-vmovdqa32-1.c
gcc/testsuite/gcc.target/i386/avx512vl-vmovdqa64-1.c

index ad8169f6f08ef28d56c721ec2d524726f34aa863..41c6dbfa668e696f882874bda389f422bf45cf5c 100644 (file)
 (define_insn "<avx512>_load<mode>_mask"
   [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v")
        (vec_merge:V48_AVX512VL
-         (match_operand:V48_AVX512VL 1 "nonimmediate_operand" "v,m")
-         (match_operand:V48_AVX512VL 2 "nonimm_or_0_operand" "0C,0C")
+         (match_operand:V48_AVX512VL 1 "nonimmediate_operand" "vm,vm")
+         (match_operand:V48_AVX512VL 2 "nonimm_or_0_operand" "0C,v")
          (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
   "TARGET_AVX512F"
 {
+  if (REG_P (operands[2])
+     && REGNO (operands[2]) != REGNO (operands[0]))
+    return "v<sseintprefix>blendm<ssemodesuffix>\t{%1, %2, %0%{%3%}|%0%{%3%}, %2, %1}";
+
   if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
     {
       if (misaligned_operand (operands[1], <MODE>mode))
 }
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "evex")
-   (set_attr "memory" "none,load")
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_insn "<avx512>_load<mode>_mask"
   [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v")
        (vec_merge:VI12_AVX512VL
-         (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "v,m")
-         (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand" "0C,0C")
+         (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "vm,vm")
+         (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand" "0C,v")
          (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
   "TARGET_AVX512BW"
-  "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
+  "@
+    vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}
+    vpblendm<ssemodesuffix>\t{%1, %2, %0%{%3%}|%0%{%3%}, %2, %1}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "evex")
-   (set_attr "memory" "none,load")
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_insn "avx512f_mov<ssescalarmodelower>_mask"
    (set_attr "memory" "store")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "<avx512>_blendm<mode>"
+(define_expand "<avx512>_blendm<mode>"
   [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
        (vec_merge:V48_AVX512VL
          (match_operand:V48_AVX512VL 2 "nonimmediate_operand" "vm")
          (match_operand:V48_AVX512VL 1 "register_operand" "v")
          (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
-  "TARGET_AVX512F"
-  "v<sseintprefix>blendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
-  [(set_attr "type" "ssemov")
-   (set_attr "prefix" "evex")
-   (set_attr "mode" "<sseinsnmode>")])
+  "TARGET_AVX512F")
 
-(define_insn "<avx512>_blendm<mode>"
+(define_expand "<avx512>_blendm<mode>"
   [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
        (vec_merge:VI12_AVX512VL
          (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
          (match_operand:VI12_AVX512VL 1 "register_operand" "v")
          (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
-  "TARGET_AVX512BW"
-  "vpblendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
-  [(set_attr "type" "ssemov")
-   (set_attr "prefix" "evex")
-   (set_attr "mode" "<sseinsnmode>")])
+  "TARGET_AVX512BW")
 
 (define_insn "<avx512>_store<mode>_mask"
   [(set (match_operand:V48_AVX512VL 0 "memory_operand" "=m")
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr96246-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-pr96246-1.c
new file mode 100644 (file)
index 0000000..2bfcc84
--- /dev/null
@@ -0,0 +1,30 @@
+/* PR target/96246 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -mavx512bw" } */
+/* { dg-final { scan-assembler-times "vpblendm\[bwdq\]\[\t ]" 4 } } */
+/* { dg-final { scan-assembler-times "vblendmp\[sd\]\[\t ]" 2 } } */
+
+typedef char v64qi __attribute__((vector_size (64)));
+typedef short v32hi __attribute__((vector_size (64)));
+typedef int v16si __attribute__((vector_size (64)));
+typedef long long v8di __attribute__((vector_size (64)));
+typedef float v16sf __attribute__((vector_size (64)));
+typedef double v8df __attribute__((vector_size (64)));
+
+#define COMPILE_TEST(vtype, num)                       \
+  vtype                                                        \
+  __attribute__ ((noipa))                              \
+  foo_##vtype (vtype a, vtype b, vtype c, vtype d)     \
+  {                                                    \
+    vtype e;                                           \
+    for (int i = 0; i != num; i++)                     \
+      e[i] = a[i] > b[i] ? c[i] : d[i];                        \
+    return e;                                          \
+  }
+
+COMPILE_TEST (v64qi, 64);
+COMPILE_TEST (v32hi, 32);
+COMPILE_TEST (v16si, 16);
+COMPILE_TEST (v8di, 8);
+COMPILE_TEST (v16sf, 16);
+COMPILE_TEST (v8df, 8);
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr96246-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-pr96246-2.c
new file mode 100644 (file)
index 0000000..422fcfe
--- /dev/null
@@ -0,0 +1,47 @@
+/* PR target/96246 */
+/* { dg-do run } */
+/* { dg-require-effective-target avx512bw } */
+/* { dg-options "-Ofast -mavx512bw" } */
+
+#ifndef CHECK
+#define CHECK "avx512f-helper.h"
+#endif
+
+#include CHECK
+
+#ifndef TEST
+#define TEST avx512bw_test
+#endif
+
+#include "avx512bw-pr96246-1.c"
+
+#define RUNTIME_TEST(vtype, num)                       \
+  do                                                   \
+    {                                                  \
+      vtype a, b, c, d;                                        \
+      vtype res;                                       \
+      for (int i = 0; i != num; i++)                   \
+       {                                               \
+         a[i] = i * 2;                                 \
+         b[i] = i * i - 5;                             \
+         c[i] = 1;                                     \
+         d[i] = 0;                                     \
+       }                                               \
+      res = foo_##vtype (a, b, c, d);                  \
+      for (int i = 0; i != num; i++)                   \
+       if (res [i] != (a[i] > b[i] ? c[i] : d[i]))     \
+         __builtin_abort ();                           \
+    }                                                  \
+  while (0)
+
+static void
+__attribute__ ((optimize (0)))
+TEST (void)
+{
+  RUNTIME_TEST (v64qi, 64);
+  RUNTIME_TEST (v32hi, 32);
+  RUNTIME_TEST (v16si, 16);
+  RUNTIME_TEST (v8di, 8);
+  RUNTIME_TEST (v16sf, 16);
+  RUNTIME_TEST (v8df, 8);
+}
index 0655042036080a4096ccd9005ad3705e6f0a2bc5..a0d0e36389bbdeb7d879f5554f25ab5ac05f1c15 100644 (file)
@@ -1,8 +1,8 @@
 /* { dg-do compile } */
 /* { dg-options "-mavx512bw -mavx512vl -O2" } */
-/* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "(?:vmovdqu16|vpblendmw)\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "(?:vmovdqu16|vpblendmw)\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "(?:vmovdqu16|vpblendmw)\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
index 7f2a964bf0bf17f2007fcfaab0e51888b289dd0f..6d24e79bf66638407ac3f7fa554162153ef83b92 100644 (file)
@@ -1,8 +1,8 @@
 /* { dg-do compile } */
 /* { dg-options "-mavx512bw -mavx512vl -O2" } */
-/* { dg-final { scan-assembler-times "vmovdqu8\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vmovdqu8\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vmovdqu8\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "(?:vmovdqu8|vpblendmb)\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "(?:vmovdqu8|vpblendmb)\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "(?:vmovdqu8|vpblendmb)\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovdqu8\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovdqu8\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovdqu8\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
index e869f70665aaddba48cacc970cb0e5c8b8feabf1..7fc84b16e2b50fe49705315b9cb0f77b60ac1959 100644 (file)
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-mavx512f -O2" } */
-/* { dg-final { scan-assembler-times "vmovapd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "(?:vmovapd|vblendmpd)\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovapd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovapd\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovapd\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
index a7635a3ebf257eb225a230f0ce100bd15e06b2f6..c2e2655fda6e96aefce1031fe59dba9bb7c23cec 100644 (file)
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-mavx512f -O2" } */
-/* { dg-final { scan-assembler-times "vmovaps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "(?:vmovaps|vblendmps)\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovaps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovaps\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovaps\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
index b93727d9ef2635efe614098280c5422e9ef0ae86..8fb816c13176f73640667af9cfa7fa4a97fd52d7 100644 (file)
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-mavx512f -O2" } */
-/* { dg-final { scan-assembler-times "vmovdqa32\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "(?:vmovdqa32|vpblendmd)\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovdqa32\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovdqa32\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovdqa32\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
index 1c372c4f92a6cfba3f02a1b94fa6c42d04881d22..4352b12b6e7f4c7e3496b0de9f5cd2e8ae1f1bd6 100644 (file)
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-mavx512f -O2" } */
-/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "(?:vmovdqa64|vpblendmq)\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
index 1b9644a37902dc9f6133904b9a07f463c44c513d..8d0b2e94001869fa921330f6e2742db7917d70b6 100644 (file)
@@ -3,10 +3,10 @@
 /* { dg-options "-Ofast -mavx512bw -mavx512vl -mno-xop -mprefer-vector-width=256" } */
 /* { dg-final { scan-assembler-times "vpcmp\[bwdq\]\[\t ]" 8 } } */
 /* { dg-final { scan-assembler-times "vpcmpu\[bwdq\]\[\t ]" 8 } } */
-/* { dg-final { scan-assembler-times "vmovdq\[au\]8\[^\{\n\]*%ymm0+\[^\n\]*\{%k\[1-7\]\}" 4 } } */
-/* { dg-final { scan-assembler-times "vmovdq\[au\]16\[^\{\n\]*%ymm0+\[^\n\]*\{%k\[1-7\]\}" 4 } } */
-/* { dg-final { scan-assembler-times "vmovdq\[au\]32\[^\{\n\]*%ymm0+\[^\n\]*\{%k\[1-7\]\}" 4 } } */
-/* { dg-final { scan-assembler-times "vmovdq\[au\]64\[^\{\n\]*%ymm0+\[^\n\]*\{%k\[1-7\]\}" 4 } } */
+/* { dg-final { scan-assembler-times "(?:vmovdq\[au\]8|vpblendmb)\[^\{\n\]*%ymm0+\[^\n\]*\{%k\[1-7\]\}" 4 } } */
+/* { dg-final { scan-assembler-times "(?:vmovdq\[au\]16|vpblendmw)\[^\{\n\]*%ymm0+\[^\n\]*\{%k\[1-7\]\}" 4 } } */
+/* { dg-final { scan-assembler-times "(?:vmovdq\[au\]32|vpblendmd)\[^\{\n\]*%ymm0+\[^\n\]*\{%k\[1-7\]\}" 4 } } */
+/* { dg-final { scan-assembler-times "(?:vmovdq\[au\]64|vpblendmq)\[^\{\n\]*%ymm0+\[^\n\]*\{%k\[1-7\]\}" 4 } } */
 
 __attribute__((noipa)) void
 f1 (char *__restrict dst, char *__restrict src1, char *__restrict src2)
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr96246-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr96246-1.c
new file mode 100644 (file)
index 0000000..95357d6
--- /dev/null
@@ -0,0 +1,36 @@
+/* PR target/96246 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -mavx512bw -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vpblendm\[bwdq\]\[\t ]" 6 } } */
+/* { dg-final { scan-assembler-times "vblendmp\[sd\]\[\t ]" 3 } } */
+
+typedef char v16qi __attribute__ ((vector_size (16)));
+typedef char v32qi __attribute__ ((vector_size (32)));
+typedef char v16hi __attribute__ ((vector_size (32)));
+typedef int v4si __attribute__((vector_size (16)));
+typedef int v8si __attribute__((vector_size (32)));
+typedef long long v4di __attribute__((vector_size (32)));
+typedef float v4sf __attribute__((vector_size (16)));
+typedef float v8sf __attribute__((vector_size (32)));
+typedef double v4df __attribute__((vector_size (32)));
+
+#define COMPILE_TEST(vtype, num)                       \
+  vtype                                                        \
+  __attribute__ ((noipa))                              \
+  foo_##vtype (vtype a, vtype b, vtype c, vtype d)     \
+  {                                                    \
+    vtype e;                                           \
+    for (int i = 0; i != num; i++)                     \
+      e[i] = a[i] > b[i] ? c[i] : d[i];                        \
+    return e;                                          \
+  }
+
+COMPILE_TEST (v16qi, 16);
+COMPILE_TEST (v32qi, 32);
+COMPILE_TEST (v16hi, 16);
+COMPILE_TEST (v4si, 4);
+COMPILE_TEST (v8si, 8);
+COMPILE_TEST (v4sf, 4);
+COMPILE_TEST (v8sf, 8);
+COMPILE_TEST (v4di, 4);
+COMPILE_TEST (v4df, 4);
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr96246-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr96246-2.c
new file mode 100644 (file)
index 0000000..d219f7c
--- /dev/null
@@ -0,0 +1,51 @@
+/* PR target/96246 */
+/* { dg-do run } */
+/* { dg-require-effective-target avx512bw } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-options "-Ofast -mavx512bw -mavx512vl" } */
+
+#ifndef CHECK
+#define CHECK "avx512f-helper.h"
+#endif
+
+#include CHECK
+
+#ifndef TEST
+#define TEST avx512bw_test
+#endif
+
+#include "avx512vl-pr96246-1.c"
+
+#define RUNTIME_TEST(vtype, num)                       \
+  do                                                   \
+    {                                                  \
+      vtype a, b, c, d;                                        \
+      vtype res;                                       \
+      for (int i = 0; i != num; i++)                   \
+       {                                               \
+         a[i] = i * 2;                                 \
+         b[i] = i * i - 5;                             \
+         c[i] = 1;                                     \
+         d[i] = 0;                                     \
+       }                                               \
+      res = foo_##vtype (a, b, c, d);                  \
+      for (int i = 0; i != num; i++)                   \
+       if (res [i] != (a[i] > b[i] ? c[i] : d[i]))     \
+         __builtin_abort ();                           \
+    }                                                  \
+  while (0)
+
+static void
+__attribute__ ((optimize (0)))
+TEST (void)
+{
+  RUNTIME_TEST (v16qi, 16);
+  RUNTIME_TEST (v32qi, 32);
+  RUNTIME_TEST (v16hi, 16);
+  RUNTIME_TEST (v4si, 4);
+  RUNTIME_TEST (v8si, 8);
+  RUNTIME_TEST (v4sf, 4);
+  RUNTIME_TEST (v8sf, 8);
+  RUNTIME_TEST (v4di, 4);
+  RUNTIME_TEST (v4df, 4);
+}
index 89c3ebefe35bc3389351224acb1fab53c687608b..fd59660f93227f0ba5e8fee000b240aab112f2e0 100644 (file)
@@ -1,7 +1,7 @@
 /* { dg-do compile } */
 /* { dg-options "-mavx512vl -O2" } */
-/* { dg-final { scan-assembler-times "vmovapd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vmovapd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "(?:vmovapd|vblendmpd)\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "(?:vmovapd|vblendmpd)\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovapd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovapd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovapd\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
index 2196ebb55d903de1e595883a3a2e082bc8db8fc1..455b1a9dc37efa7b04bf6860f10f5186165c4a88 100644 (file)
@@ -1,7 +1,7 @@
 /* { dg-do compile } */
 /* { dg-options "-mavx512vl -O2" } */
-/* { dg-final { scan-assembler-times "vmovaps\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vmovaps\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "(?:vmovaps|vblendmps)\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "(?:vmovaps|vblendmps)\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovaps\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovaps\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovaps\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
index e391de5b4321a42928aaba905eadffeef5b35e5a..217afbc6904b61ad908baec649ac620ba395fde0 100644 (file)
@@ -1,7 +1,7 @@
 /* { dg-do compile } */
 /* { dg-options "-mavx512vl -O2" } */
-/* { dg-final { scan-assembler-times "vmovdqa32\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vmovdqa32\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "(?:vmovdqa32|vpblendmd)\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "(?:vmovdqa32|vpblendmd)\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovdqa32\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovdqa32\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovdqa32\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
index db4d9d1487561cf7c919a41066c4d2d0f72c7bfb..9dc794d6a80fdf27ee393f4b132fdd88a457f9b1 100644 (file)
@@ -1,7 +1,7 @@
 /* { dg-do compile } */
 /* { dg-options "-mavx512vl -O2" } */
-/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "(?:vmovdqa64|vpblendmq)\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "(?:vmovdqa64|vpblendmq)\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovdqa\[ \\t\]+\\(\[^\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { target nonpic } } } */