op = fixup_modeless_constant (op, mode);
- if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
+ /* NB: 3-operands load implied it's a mask load,
+ and that mask operand shoud be at the end.
+ Keep all-ones mask which would be simplified by the expander. */
+ if (nargs == 3 && i == 2 && klass == load
+ && constm1_operand (op, mode))
+ ;
+ else if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
op = copy_to_mode_reg (mode, op);
else
{
UNSPEC_MASKOP
UNSPEC_KORTEST
UNSPEC_KTEST
+ ;; Mask load
+ UNSPEC_MASKLOAD
;; For embed. rounding feature
UNSPEC_EMBEDDED_ROUNDING
]
(symbol_ref "true")))])
-(define_insn "<avx512>_load<mode>_mask"
- [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v")
+;; If mem_addr points to a memory region with less than whole vector size bytes
+;; of accessible memory and k is a mask that would prevent reading the inaccessible
+;; bytes from mem_addr, add UNSPEC_MASKLOAD to prevent it to be transformed to vpblendd
+;; See pr97642.
+(define_expand "<avx512>_load<mode>_mask"
+ [(set (match_operand:V48_AVX512VL 0 "register_operand")
(vec_merge:V48_AVX512VL
- (match_operand:V48_AVX512VL 1 "nonimmediate_operand" "vm,vm")
- (match_operand:V48_AVX512VL 2 "nonimm_or_0_operand" "0C,v")
- (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
+ (match_operand:V48_AVX512VL 1 "nonimmediate_operand")
+ (match_operand:V48_AVX512VL 2 "nonimm_or_0_operand")
+ (match_operand:<avx512fmaskmode> 3 "register_or_constm1_operand")))]
"TARGET_AVX512F"
{
- if (REG_P (operands[2])
- && REGNO (operands[2]) != REGNO (operands[0]))
- return "v<sseintprefix>blendm<ssemodesuffix>\t{%1, %2, %0%{%3%}|%0%{%3%}, %2, %1}";
+ if (CONST_INT_P (operands[3]))
+ {
+ emit_insn (gen_rtx_SET (operands[0], operands[1]));
+ DONE;
+ }
+ else if (MEM_P (operands[1]))
+ operands[1] = gen_rtx_UNSPEC (<MODE>mode,
+ gen_rtvec(1, operands[1]),
+ UNSPEC_MASKLOAD);
+})
+(define_insn "*<avx512>_load<mode>_mask"
+ [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
+ (vec_merge:V48_AVX512VL
+ (unspec:V48_AVX512VL
+ [(match_operand:V48_AVX512VL 1 "memory_operand" "m")]
+ UNSPEC_MASKLOAD)
+ (match_operand:V48_AVX512VL 2 "nonimm_or_0_operand" "0C")
+ (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
+ "TARGET_AVX512F"
+{
if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
{
if (misaligned_operand (operands[1], <MODE>mode))
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "<avx512>_load<mode>_mask"
- [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v")
+(define_insn_and_split "*<avx512>_load<mode>"
+ [(set (match_operand:V48_AVX512VL 0 "register_operand")
+ (unspec:V48_AVX512VL
+ [(match_operand:V48_AVX512VL 1 "memory_operand")]
+ UNSPEC_MASKLOAD))]
+ "TARGET_AVX512F"
+ "#"
+ "&& 1"
+ [(set (match_dup 0) (match_dup 1))])
+
+(define_expand "<avx512>_load<mode>_mask"
+ [(set (match_operand:VI12_AVX512VL 0 "register_operand")
(vec_merge:VI12_AVX512VL
- (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "vm,vm")
- (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand" "0C,v")
- (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
+ (match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
+ (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand")
+ (match_operand:<avx512fmaskmode> 3 "register_or_constm1_operand")))]
"TARGET_AVX512BW"
- "@
- vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}
- vpblendm<ssemodesuffix>\t{%1, %2, %0%{%3%}|%0%{%3%}, %2, %1}"
+{
+ if (CONST_INT_P (operands[3]))
+ {
+ emit_insn (gen_rtx_SET (operands[0], operands[1]));
+ DONE;
+ }
+ else if (MEM_P (operands[1]))
+ operands[1] = gen_rtx_UNSPEC (<MODE>mode,
+ gen_rtvec(1, operands[1]),
+ UNSPEC_MASKLOAD);
+
+})
+
+(define_insn "*<avx512>_load<mode>_mask"
+ [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
+ (vec_merge:VI12_AVX512VL
+ (unspec:VI12_AVX512VL
+ [(match_operand:VI12_AVX512VL 1 "memory_operand" "m")]
+ UNSPEC_MASKLOAD)
+ (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand" "0C")
+ (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
+ "TARGET_AVX512BW"
+ "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
[(set_attr "type" "ssemov")
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
+(define_insn_and_split "*<avx512>_load<mode>"
+ [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
+ (unspec:VI12_AVX512VL
+ [(match_operand:VI12_AVX512VL 1 "memory_operand" "m")]
+ UNSPEC_MASKLOAD))]
+ "TARGET_AVX512BW"
+ "#"
+ "&& 1"
+ [(set (match_dup 0) (match_dup 1))])
+
(define_insn "avx512f_mov<ssescalarmodelower>_mask"
[(set (match_operand:VF_128 0 "register_operand" "=v")
(vec_merge:VF_128
(set_attr "memory" "store")
(set_attr "mode" "<MODE>")])
-(define_expand "<avx512>_blendm<mode>"
- [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
+(define_insn "<avx512>_blendm<mode>"
+ [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v")
(vec_merge:V48_AVX512VL
- (match_operand:V48_AVX512VL 2 "nonimmediate_operand" "vm")
- (match_operand:V48_AVX512VL 1 "register_operand" "v")
- (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
- "TARGET_AVX512F")
+ (match_operand:V48_AVX512VL 2 "nonimmediate_operand" "vm,vm")
+ (match_operand:V48_AVX512VL 1 "nonimm_or_0_operand" "0C,v")
+ (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
+ "TARGET_AVX512F"
+{
+ if (REG_P (operands[1])
+ && REGNO (operands[1]) != REGNO (operands[0]))
+ return "v<sseintprefix>blendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}";
-(define_expand "<avx512>_blendm<mode>"
- [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
+ if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
+ {
+ if (misaligned_operand (operands[2], <MODE>mode))
+ return "vmovu<ssemodesuffix>\t{%2, %0%{%3%}%N1|%0%{%3%}%N1, %2}";
+ else
+ return "vmova<ssemodesuffix>\t{%2, %0%{%3%}%N1|%0%{%3%}%N1, %2}";
+ }
+ else
+ {
+ if (misaligned_operand (operands[2], <MODE>mode))
+ return "vmovdqu<ssescalarsize>\t{%2, %0%{%3%}%N1|%0%{%3%}%N1, %2}";
+ else
+ return "vmovdqa<ssescalarsize>\t{%2, %0%{%3%}%N1|%0%{%3%}%N1, %2}";
+ }
+}
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "<avx512>_blendm<mode>"
+ [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v")
(vec_merge:VI12_AVX512VL
- (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
- (match_operand:VI12_AVX512VL 1 "register_operand" "v")
- (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
- "TARGET_AVX512BW")
+ (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm,vm")
+ (match_operand:VI12_AVX512VL 1 "nonimm_or_0_operand" "0C,v")
+ (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
+ "TARGET_AVX512BW"
+ "@
+ vmovdqu<ssescalarsize>\t{%2, %0%{%3%}%N1|%0%{%3%}%N1, %2}
+ vpblendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
(define_insn "<avx512>_store<mode>_mask"
[(set (match_operand:V48_AVX512VL 0 "memory_operand" "=m")
/* { dg-do compile } */
/* { dg-options "-mavx512bw -mavx512vl -O2" } */
-/* { dg-final { scan-assembler-times "(?:vmovdqu16|vpblendmw)\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "(?:vmovdqu16|vpblendmw)\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "(?:vmovdqu16|vpblendmw)\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-do compile } */
/* { dg-options "-mavx512bw -mavx512vl -O2" } */
-/* { dg-final { scan-assembler-times "(?:vmovdqu8|vpblendmb)\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "(?:vmovdqu8|vpblendmb)\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "(?:vmovdqu8|vpblendmb)\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu8\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu8\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu8\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqu8\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqu8\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqu8\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-do compile } */
/* { dg-options "-mavx512f -O2" } */
-/* { dg-final { scan-assembler-times "(?:vmovapd|vblendmpd)\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovapd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovapd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovapd\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovapd\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-do compile } */
/* { dg-options "-mavx512f -O2" } */
-/* { dg-final { scan-assembler-times "(?:vmovaps|vblendmps)\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovaps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovaps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovaps\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovaps\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-do compile } */
/* { dg-options "-mavx512f -O2" } */
-/* { dg-final { scan-assembler-times "(?:vmovdqa32|vpblendmd)\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqa32\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqa32\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqa32\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqa32\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-do compile } */
/* { dg-options "-mavx512f -O2" } */
-/* { dg-final { scan-assembler-times "(?:vmovdqa64|vpblendmq)\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-do compile } */
/* { dg-options "-mavx512vl -O2" } */
-/* { dg-final { scan-assembler-times "(?:vmovapd|vblendmpd)\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "(?:vmovapd|vblendmpd)\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovapd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovapd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovapd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovapd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovapd\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-do compile } */
/* { dg-options "-mavx512vl -O2" } */
-/* { dg-final { scan-assembler-times "(?:vmovaps|vblendmps)\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "(?:vmovaps|vblendmps)\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovaps\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovaps\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovaps\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovaps\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovaps\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-do compile } */
/* { dg-options "-mavx512vl -O2" } */
-/* { dg-final { scan-assembler-times "(?:vmovdqa32|vpblendmd)\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "(?:vmovdqa32|vpblendmd)\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqa32\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqa32\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqa32\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqa32\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqa32\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-do compile } */
/* { dg-options "-mavx512vl -O2" } */
-/* { dg-final { scan-assembler-times "(?:vmovdqa64|vpblendmq)\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "(?:vmovdqa64|vpblendmq)\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqa\[ \\t\]+\\(\[^\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 2 { target nonpic } } } */
--- /dev/null
+/* PR target/97642 */
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-not { k[0-8] } } } */
+
+#include <immintrin.h>
+__m128i
+foo1 (__m128i src, void const* P)
+{
+ return _mm_mask_loadu_epi32 (src, 15, P);
+}
+
+__m256i
+foo2 (__m256i src, void const* P)
+{
+ return _mm256_mask_loadu_epi32 (src, 255, P);
+}
+
+__m512i
+foo3 (__m512i src, void const* P)
+{
+ return _mm512_mask_loadu_epi32 (src, 65535 , P);
+}
+
+__m128i
+foo4 (__m128i src, void const* P)
+{
+ return _mm_mask_loadu_epi32 (src, -1, P);
+}
+
+__m256i
+foo5 (__m256i src, void const* P)
+{
+ return _mm256_mask_loadu_epi32 (src, -1, P);
+}
+
+__m512i
+foo6 (__m512i src, void const* P)
+{
+ return _mm512_mask_loadu_epi32 (src, -1 , P);
+}
--- /dev/null
+/* PR target/97642 */
+/* { dg-do run { target *-*-linux* } } */
+/* { dg-options "-O2 -mavx512dq -mavx512vl -mavx512bw" } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-require-effective-target avx512dq } */
+/* { dg-require-effective-target avx512bw } */
+
+#include <assert.h>
+#include <immintrin.h>
+#include <stdint.h>
+#include <sys/mman.h>
+
+#define N 5
+
+// Faults with GCC because usage of vpblendd
+__m256i __attribute__((noinline)) mask_load(uint32_t * arr) {
+ __m256i tmp;
+ return _mm256_mask_loadu_epi32(tmp, (1 << N) - 1, arr);
+}
+
+// Faults
+__m256i __attribute__((noinline)) blend_load_asm(uint32_t * arr) {
+ __m256i tmp = _mm256_set1_epi64x(0);
+ asm volatile("vpblendd %[m], (%[arr]), %[tmp], %[tmp]\n\t"
+ : [ tmp ] "+x"(tmp)
+ : [ arr ] "r"(arr), [ m ] "i"(((1 << N) - 1))
+ :);
+ return tmp;
+}
+
+// Does not fault
+__m256i __attribute__((noinline)) mask_load_asm(uint32_t * arr) {
+ __m256i tmp;
+ asm volatile(
+ "movb %[m], %%al\n\t"
+ "kmovb %%eax, %%k1\n\t"
+ "vmovdqu32 (%[arr]), %[tmp] %{%%k1} %{z%}\n\t"
+ : [ tmp ] "+x"(tmp)
+ : [ arr ] "r"(arr), [ m ] "i"(((1 << N) - 1))
+ : "eax", "k1");
+ return tmp;
+}
+
+
+void __attribute__((noinline)) mask_store(uint32_t * arr, __m256i v) {
+ return _mm256_mask_storeu_epi32(arr, (1 << N) - 1, v);
+}
+
+
+#define NPAGES (2)
+#define END_OF_PAGE (1024 - N)
+
+#ifndef LOAD_METHOD
+#define LOAD_METHOD mask_load // mask_load_asm does not fault
+#endif
+
+
+int
+main() {
+ if (!(__builtin_cpu_supports ("avx512dq")
+ && __builtin_cpu_supports ("avx512vl")
+ && __builtin_cpu_supports ("avx512bw")))
+ return 0;
+
+ uint32_t * addr =
+ (uint32_t *)mmap(NULL, NPAGES * 4096, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+
+ for (uint32_t i = 0; i < NPAGES; i += 2) {
+
+ uint32_t page_offset = 1024 * i + END_OF_PAGE;
+ uint32_t next_page_offset = 1024 * (i + 1);
+
+ assert(!mprotect(addr + next_page_offset, 4096, PROT_NONE));
+ mask_store(addr + page_offset, LOAD_METHOD(addr + page_offset));
+ }
+}