(define_insn "<avx512>_load<mode>_mask"
[(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v")
(vec_merge:V48_AVX512VL
- (match_operand:V48_AVX512VL 1 "nonimmediate_operand" "v,m")
- (match_operand:V48_AVX512VL 2 "nonimm_or_0_operand" "0C,0C")
+ (match_operand:V48_AVX512VL 1 "nonimmediate_operand" "vm,vm")
+ (match_operand:V48_AVX512VL 2 "nonimm_or_0_operand" "0C,v")
(match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
"TARGET_AVX512F"
{
+ if (REG_P (operands[2])
+ && REGNO (operands[2]) != REGNO (operands[0]))
+ return "v<sseintprefix>blendm<ssemodesuffix>\t{%1, %2, %0%{%3%}|%0%{%3%}, %2, %1}";
+
if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
{
if (misaligned_operand (operands[1], <MODE>mode))
}
[(set_attr "type" "ssemov")
(set_attr "prefix" "evex")
- (set_attr "memory" "none,load")
(set_attr "mode" "<sseinsnmode>")])
(define_insn "<avx512>_load<mode>_mask"
[(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v")
(vec_merge:VI12_AVX512VL
- (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "v,m")
- (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand" "0C,0C")
+ (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "vm,vm")
+ (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand" "0C,v")
(match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
"TARGET_AVX512BW"
- "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
+ "@
+ vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}
+ vpblendm<ssemodesuffix>\t{%1, %2, %0%{%3%}|%0%{%3%}, %2, %1}"
[(set_attr "type" "ssemov")
(set_attr "prefix" "evex")
- (set_attr "memory" "none,load")
(set_attr "mode" "<sseinsnmode>")])
(define_insn "avx512f_mov<ssescalarmodelower>_mask"
(set_attr "memory" "store")
(set_attr "mode" "<MODE>")])
-(define_insn "<avx512>_blendm<mode>"
+(define_expand "<avx512>_blendm<mode>"
[(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
(vec_merge:V48_AVX512VL
(match_operand:V48_AVX512VL 2 "nonimmediate_operand" "vm")
(match_operand:V48_AVX512VL 1 "register_operand" "v")
(match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
- "TARGET_AVX512F"
- "v<sseintprefix>blendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
- [(set_attr "type" "ssemov")
- (set_attr "prefix" "evex")
- (set_attr "mode" "<sseinsnmode>")])
+ "TARGET_AVX512F")
-(define_insn "<avx512>_blendm<mode>"
+(define_expand "<avx512>_blendm<mode>"
[(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
(vec_merge:VI12_AVX512VL
(match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
(match_operand:VI12_AVX512VL 1 "register_operand" "v")
(match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
- "TARGET_AVX512BW"
- "vpblendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
- [(set_attr "type" "ssemov")
- (set_attr "prefix" "evex")
- (set_attr "mode" "<sseinsnmode>")])
+ "TARGET_AVX512BW")
(define_insn "<avx512>_store<mode>_mask"
[(set (match_operand:V48_AVX512VL 0 "memory_operand" "=m")
--- /dev/null
+/* PR target/96246 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -mavx512bw" } */
+/* { dg-final { scan-assembler-times "vpblendm\[bwdq\]\[\t ]" 4 } } */
+/* { dg-final { scan-assembler-times "vblendmp\[sd\]\[\t ]" 2 } } */
+
+typedef char v64qi __attribute__((vector_size (64)));
+typedef short v32hi __attribute__((vector_size (64)));
+typedef int v16si __attribute__((vector_size (64)));
+typedef long long v8di __attribute__((vector_size (64)));
+typedef float v16sf __attribute__((vector_size (64)));
+typedef double v8df __attribute__((vector_size (64)));
+
+#define COMPILE_TEST(vtype, num) \
+ vtype \
+ __attribute__ ((noipa)) \
+ foo_##vtype (vtype a, vtype b, vtype c, vtype d) \
+ { \
+ vtype e; \
+ for (int i = 0; i != num; i++) \
+ e[i] = a[i] > b[i] ? c[i] : d[i]; \
+ return e; \
+ }
+
+COMPILE_TEST (v64qi, 64);
+COMPILE_TEST (v32hi, 32);
+COMPILE_TEST (v16si, 16);
+COMPILE_TEST (v8di, 8);
+COMPILE_TEST (v16sf, 16);
+COMPILE_TEST (v8df, 8);
--- /dev/null
+/* PR target/96246 */
+/* { dg-do run } */
+/* { dg-require-effective-target avx512bw } */
+/* { dg-options "-Ofast -mavx512bw" } */
+
+#ifndef CHECK
+#define CHECK "avx512f-helper.h"
+#endif
+
+#include CHECK
+
+#ifndef TEST
+#define TEST avx512bw_test
+#endif
+
+#include "avx512bw-pr96246-1.c"
+
+#define RUNTIME_TEST(vtype, num) \
+ do \
+ { \
+ vtype a, b, c, d; \
+ vtype res; \
+ for (int i = 0; i != num; i++) \
+ { \
+ a[i] = i * 2; \
+ b[i] = i * i - 5; \
+ c[i] = 1; \
+ d[i] = 0; \
+ } \
+ res = foo_##vtype (a, b, c, d); \
+ for (int i = 0; i != num; i++) \
+ if (res [i] != (a[i] > b[i] ? c[i] : d[i])) \
+ __builtin_abort (); \
+ } \
+ while (0)
+
+static void
+__attribute__ ((optimize (0)))
+TEST (void)
+{
+ RUNTIME_TEST (v64qi, 64);
+ RUNTIME_TEST (v32hi, 32);
+ RUNTIME_TEST (v16si, 16);
+ RUNTIME_TEST (v8di, 8);
+ RUNTIME_TEST (v16sf, 16);
+ RUNTIME_TEST (v8df, 8);
+}
/* { dg-do compile } */
/* { dg-options "-mavx512bw -mavx512vl -O2" } */
-/* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "(?:vmovdqu16|vpblendmw)\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "(?:vmovdqu16|vpblendmw)\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "(?:vmovdqu16|vpblendmw)\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-do compile } */
/* { dg-options "-mavx512bw -mavx512vl -O2" } */
-/* { dg-final { scan-assembler-times "vmovdqu8\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vmovdqu8\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vmovdqu8\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "(?:vmovdqu8|vpblendmb)\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "(?:vmovdqu8|vpblendmb)\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "(?:vmovdqu8|vpblendmb)\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqu8\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqu8\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqu8\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-do compile } */
/* { dg-options "-mavx512f -O2" } */
-/* { dg-final { scan-assembler-times "vmovapd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "(?:vmovapd|vblendmpd)\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovapd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovapd\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovapd\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-do compile } */
/* { dg-options "-mavx512f -O2" } */
-/* { dg-final { scan-assembler-times "vmovaps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "(?:vmovaps|vblendmps)\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovaps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovaps\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovaps\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-do compile } */
/* { dg-options "-mavx512f -O2" } */
-/* { dg-final { scan-assembler-times "vmovdqa32\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "(?:vmovdqa32|vpblendmd)\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqa32\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqa32\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqa32\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-do compile } */
/* { dg-options "-mavx512f -O2" } */
-/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "(?:vmovdqa64|vpblendmq)\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-options "-Ofast -mavx512bw -mavx512vl -mno-xop -mprefer-vector-width=256" } */
/* { dg-final { scan-assembler-times "vpcmp\[bwdq\]\[\t ]" 8 } } */
/* { dg-final { scan-assembler-times "vpcmpu\[bwdq\]\[\t ]" 8 } } */
-/* { dg-final { scan-assembler-times "vmovdq\[au\]8\[^\{\n\]*%ymm0+\[^\n\]*\{%k\[1-7\]\}" 4 } } */
-/* { dg-final { scan-assembler-times "vmovdq\[au\]16\[^\{\n\]*%ymm0+\[^\n\]*\{%k\[1-7\]\}" 4 } } */
-/* { dg-final { scan-assembler-times "vmovdq\[au\]32\[^\{\n\]*%ymm0+\[^\n\]*\{%k\[1-7\]\}" 4 } } */
-/* { dg-final { scan-assembler-times "vmovdq\[au\]64\[^\{\n\]*%ymm0+\[^\n\]*\{%k\[1-7\]\}" 4 } } */
+/* { dg-final { scan-assembler-times "(?:vmovdq\[au\]8|vpblendmb)\[^\{\n\]*%ymm0+\[^\n\]*\{%k\[1-7\]\}" 4 } } */
+/* { dg-final { scan-assembler-times "(?:vmovdq\[au\]16|vpblendmw)\[^\{\n\]*%ymm0+\[^\n\]*\{%k\[1-7\]\}" 4 } } */
+/* { dg-final { scan-assembler-times "(?:vmovdq\[au\]32|vpblendmd)\[^\{\n\]*%ymm0+\[^\n\]*\{%k\[1-7\]\}" 4 } } */
+/* { dg-final { scan-assembler-times "(?:vmovdq\[au\]64|vpblendmq)\[^\{\n\]*%ymm0+\[^\n\]*\{%k\[1-7\]\}" 4 } } */
__attribute__((noipa)) void
f1 (char *__restrict dst, char *__restrict src1, char *__restrict src2)
--- /dev/null
+/* PR target/96246 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -mavx512bw -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vpblendm\[bwdq\]\[\t ]" 6 } } */
+/* { dg-final { scan-assembler-times "vblendmp\[sd\]\[\t ]" 3 } } */
+
+typedef char v16qi __attribute__ ((vector_size (16)));
+typedef char v32qi __attribute__ ((vector_size (32)));
+typedef char v16hi __attribute__ ((vector_size (32)));
+typedef int v4si __attribute__((vector_size (16)));
+typedef int v8si __attribute__((vector_size (32)));
+typedef long long v4di __attribute__((vector_size (32)));
+typedef float v4sf __attribute__((vector_size (16)));
+typedef float v8sf __attribute__((vector_size (32)));
+typedef double v4df __attribute__((vector_size (32)));
+
+#define COMPILE_TEST(vtype, num) \
+ vtype \
+ __attribute__ ((noipa)) \
+ foo_##vtype (vtype a, vtype b, vtype c, vtype d) \
+ { \
+ vtype e; \
+ for (int i = 0; i != num; i++) \
+ e[i] = a[i] > b[i] ? c[i] : d[i]; \
+ return e; \
+ }
+
+COMPILE_TEST (v16qi, 16);
+COMPILE_TEST (v32qi, 32);
+COMPILE_TEST (v16hi, 16);
+COMPILE_TEST (v4si, 4);
+COMPILE_TEST (v8si, 8);
+COMPILE_TEST (v4sf, 4);
+COMPILE_TEST (v8sf, 8);
+COMPILE_TEST (v4di, 4);
+COMPILE_TEST (v4df, 4);
--- /dev/null
+/* PR target/96246 */
+/* { dg-do run } */
+/* { dg-require-effective-target avx512bw } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-options "-Ofast -mavx512bw -mavx512vl" } */
+
+#ifndef CHECK
+#define CHECK "avx512f-helper.h"
+#endif
+
+#include CHECK
+
+#ifndef TEST
+#define TEST avx512bw_test
+#endif
+
+#include "avx512vl-pr96246-1.c"
+
+#define RUNTIME_TEST(vtype, num) \
+ do \
+ { \
+ vtype a, b, c, d; \
+ vtype res; \
+ for (int i = 0; i != num; i++) \
+ { \
+ a[i] = i * 2; \
+ b[i] = i * i - 5; \
+ c[i] = 1; \
+ d[i] = 0; \
+ } \
+ res = foo_##vtype (a, b, c, d); \
+ for (int i = 0; i != num; i++) \
+ if (res [i] != (a[i] > b[i] ? c[i] : d[i])) \
+ __builtin_abort (); \
+ } \
+ while (0)
+
+static void
+__attribute__ ((optimize (0)))
+TEST (void)
+{
+ RUNTIME_TEST (v16qi, 16);
+ RUNTIME_TEST (v32qi, 32);
+ RUNTIME_TEST (v16hi, 16);
+ RUNTIME_TEST (v4si, 4);
+ RUNTIME_TEST (v8si, 8);
+ RUNTIME_TEST (v4sf, 4);
+ RUNTIME_TEST (v8sf, 8);
+ RUNTIME_TEST (v4di, 4);
+ RUNTIME_TEST (v4df, 4);
+}
/* { dg-do compile } */
/* { dg-options "-mavx512vl -O2" } */
-/* { dg-final { scan-assembler-times "vmovapd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vmovapd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "(?:vmovapd|vblendmpd)\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "(?:vmovapd|vblendmpd)\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovapd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovapd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovapd\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-do compile } */
/* { dg-options "-mavx512vl -O2" } */
-/* { dg-final { scan-assembler-times "vmovaps\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vmovaps\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "(?:vmovaps|vblendmps)\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "(?:vmovaps|vblendmps)\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovaps\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovaps\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovaps\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-do compile } */
/* { dg-options "-mavx512vl -O2" } */
-/* { dg-final { scan-assembler-times "vmovdqa32\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vmovdqa32\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "(?:vmovdqa32|vpblendmd)\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "(?:vmovdqa32|vpblendmd)\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqa32\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqa32\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqa32\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-do compile } */
/* { dg-options "-mavx512vl -O2" } */
-/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "(?:vmovdqa64|vpblendmq)\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "(?:vmovdqa64|vpblendmq)\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqa\[ \\t\]+\\(\[^\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { target nonpic } } } */