2017-10-24 Jakub Jelinek <jakub@redhat.com>
+ PR target/82460
+ * config/i386/sse.md (UNSPEC_VPERMI2, UNSPEC_VPERMI2_MASK): Remove.
+ (VPERMI2, VPERMI2I): New mode iterators.
+ (<avx512>_vpermi2var<mode>3_maskz): Remove 3 define_expand patterns.
+ (<avx512>_vpermi2var<mode>3<sd_maskz_name>): Remove 3 define_insn
+ patterns.
+ (<avx512>_vpermi2var<mode>3_mask): New define_expand using VPERMI2
+ mode iterator. Remove 3 old define_insn patterns.
+ (*<avx512>_vpermi2var<mode>3_mask): 2 new define_insn patterns.
+ (<avx512>_vpermt2var<mode>3_maskz): Adjust 1 define_expand to use
+ VPERMI2 mode iterator, remove the other two expanders.
+ (<avx512>_vpermt2var<mode>3<sd_maskz_name>): Adjust 1 define_insn
+ to use VPERMI2 mode iterator, add another alternative for vpermi2*
+ instructions, remove the other two patterns.
+ (<avx512>_vpermt2var<mode>3_mask): Adjust 1 define_insn to use VPERMI2
+ mode iterator, remove the other two patterns.
+ * config/i386/i386.c (ix86_expand_vec_perm_vpermi2): Renamed to ...
+ (ix86_expand_vec_perm_vpermt2): ... this. Swap mask and op0
+ arguments, use gen_*vpermt2* expanders instead of gen_*vpermi2*
+ and adjust argument order accordingly.
+ (ix86_expand_vec_perm): Adjust caller.
+ (expand_vec_perm_1): Likewise.
+ (expand_vec_perm_vpermi2_vpshub2): Rename to ...
+ (expand_vec_perm_vpermt2_vpshub2): ... this.
+ (ix86_expand_vec_perm_const_1): Adjust caller.
+ (ix86_vectorize_vec_perm_const_ok): Adjust comments.
+
PR target/82370
* config/i386/sse.md (VIMAX_AVX2): Remove V4TImode.
(VIMAX_AVX2_AVX512BW, VIMAX_AVX512VL): New mode iterators.
};
static bool
-ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1,
+ix86_expand_vec_perm_vpermt2 (rtx target, rtx mask, rtx op0, rtx op1,
struct expand_vec_perm_d *d)
{
- /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
+ /* ix86_expand_vec_perm_vpermt2 is called from both const and non-const
expander, so args are either in d, or in op0, op1 etc. */
machine_mode mode = GET_MODE (d ? d->op0 : op0);
machine_mode maskmode = mode;
{
case E_V8HImode:
if (TARGET_AVX512VL && TARGET_AVX512BW)
- gen = gen_avx512vl_vpermi2varv8hi3;
+ gen = gen_avx512vl_vpermt2varv8hi3;
break;
case E_V16HImode:
if (TARGET_AVX512VL && TARGET_AVX512BW)
- gen = gen_avx512vl_vpermi2varv16hi3;
+ gen = gen_avx512vl_vpermt2varv16hi3;
break;
case E_V64QImode:
if (TARGET_AVX512VBMI)
- gen = gen_avx512bw_vpermi2varv64qi3;
+ gen = gen_avx512bw_vpermt2varv64qi3;
break;
case E_V32HImode:
if (TARGET_AVX512BW)
- gen = gen_avx512bw_vpermi2varv32hi3;
+ gen = gen_avx512bw_vpermt2varv32hi3;
break;
case E_V4SImode:
if (TARGET_AVX512VL)
- gen = gen_avx512vl_vpermi2varv4si3;
+ gen = gen_avx512vl_vpermt2varv4si3;
break;
case E_V8SImode:
if (TARGET_AVX512VL)
- gen = gen_avx512vl_vpermi2varv8si3;
+ gen = gen_avx512vl_vpermt2varv8si3;
break;
case E_V16SImode:
if (TARGET_AVX512F)
- gen = gen_avx512f_vpermi2varv16si3;
+ gen = gen_avx512f_vpermt2varv16si3;
break;
case E_V4SFmode:
if (TARGET_AVX512VL)
{
- gen = gen_avx512vl_vpermi2varv4sf3;
+ gen = gen_avx512vl_vpermt2varv4sf3;
maskmode = V4SImode;
}
break;
case E_V8SFmode:
if (TARGET_AVX512VL)
{
- gen = gen_avx512vl_vpermi2varv8sf3;
+ gen = gen_avx512vl_vpermt2varv8sf3;
maskmode = V8SImode;
}
break;
case E_V16SFmode:
if (TARGET_AVX512F)
{
- gen = gen_avx512f_vpermi2varv16sf3;
+ gen = gen_avx512f_vpermt2varv16sf3;
maskmode = V16SImode;
}
break;
case E_V2DImode:
if (TARGET_AVX512VL)
- gen = gen_avx512vl_vpermi2varv2di3;
+ gen = gen_avx512vl_vpermt2varv2di3;
break;
case E_V4DImode:
if (TARGET_AVX512VL)
- gen = gen_avx512vl_vpermi2varv4di3;
+ gen = gen_avx512vl_vpermt2varv4di3;
break;
case E_V8DImode:
if (TARGET_AVX512F)
- gen = gen_avx512f_vpermi2varv8di3;
+ gen = gen_avx512f_vpermt2varv8di3;
break;
case E_V2DFmode:
if (TARGET_AVX512VL)
{
- gen = gen_avx512vl_vpermi2varv2df3;
+ gen = gen_avx512vl_vpermt2varv2df3;
maskmode = V2DImode;
}
break;
case E_V4DFmode:
if (TARGET_AVX512VL)
{
- gen = gen_avx512vl_vpermi2varv4df3;
+ gen = gen_avx512vl_vpermt2varv4df3;
maskmode = V4DImode;
}
break;
case E_V8DFmode:
if (TARGET_AVX512F)
{
- gen = gen_avx512f_vpermi2varv8df3;
+ gen = gen_avx512f_vpermt2varv8df3;
maskmode = V8DImode;
}
break;
if (gen == NULL)
return false;
- /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
+ /* ix86_expand_vec_perm_vpermt2 is called from both const and non-const
expander, so args are either in d, or in op0, op1 etc. */
if (d)
{
mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec));
}
- emit_insn (gen (target, op0, force_reg (maskmode, mask), op1));
+ emit_insn (gen (target, force_reg (maskmode, mask), op0, op1));
return true;
}
}
}
- if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1, NULL))
+ if (ix86_expand_vec_perm_vpermt2 (target, mask, op0, op1, NULL))
return;
if (TARGET_AVX2)
if (ix86_expand_vec_one_operand_perm_avx512 (d))
return true;
- /* Try the AVX512F vpermi2 instructions. */
- if (ix86_expand_vec_perm_vpermi2 (NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX, d))
+ /* Try the AVX512F vpermt2/vpermi2 instructions. */
+ if (ix86_expand_vec_perm_vpermt2 (NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX, d))
return true;
/* See if we can get the same permutation in different vector integer
}
/* Implement arbitrary permutations of two V64QImode operands
- will 2 vpermi2w, 2 vpshufb and one vpor instruction. */
+ with 2 vperm[it]2w, 2 vpshufb and one vpor instruction. */
static bool
-expand_vec_perm_vpermi2_vpshub2 (struct expand_vec_perm_d *d)
+expand_vec_perm_vpermt2_vpshub2 (struct expand_vec_perm_d *d)
{
if (!TARGET_AVX512BW || !(d->vmode == V64QImode))
return false;
if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
return true;
- if (expand_vec_perm_vpermi2_vpshub2 (d))
+ if (expand_vec_perm_vpermt2_vpshub2 (d))
return true;
/* ??? Look for narrow permutations whose element orderings would
case E_V8DImode:
case E_V8DFmode:
if (TARGET_AVX512F)
- /* All implementable with a single vpermi2 insn. */
+ /* All implementable with a single vperm[it]2 insn. */
return true;
break;
case E_V32HImode:
if (TARGET_AVX512BW)
- /* All implementable with a single vpermi2 insn. */
+ /* All implementable with a single vperm[it]2 insn. */
return true;
break;
case E_V64QImode:
if (TARGET_AVX512BW)
- /* Implementable with 2 vpermi2, 2 vpshufb and 1 or insn. */
+ /* Implementable with 2 vperm[it]2, 2 vpshufb and 1 or insn. */
return true;
break;
case E_V8SImode:
case E_V4DFmode:
case E_V4DImode:
if (TARGET_AVX512VL)
- /* All implementable with a single vpermi2 insn. */
+ /* All implementable with a single vperm[it]2 insn. */
return true;
break;
case E_V16HImode:
UNSPEC_VSIBADDR
;; For AVX512F support
- UNSPEC_VPERMI2
UNSPEC_VPERMT2
- UNSPEC_VPERMI2_MASK
UNSPEC_UNSIGNED_FIX_NOTRUNC
UNSPEC_UNSIGNED_PCMP
UNSPEC_TESTM
(set_attr "prefix" "<mask_prefix>")
(set_attr "mode" "<sseinsnmode>")])
-(define_expand "<avx512>_vpermi2var<mode>3_maskz"
- [(match_operand:VI48F 0 "register_operand")
- (match_operand:VI48F 1 "register_operand")
- (match_operand:<sseintvecmode> 2 "register_operand")
- (match_operand:VI48F 3 "nonimmediate_operand")
- (match_operand:<avx512fmaskmode> 4 "register_operand")]
- "TARGET_AVX512F"
-{
- emit_insn (gen_<avx512>_vpermi2var<mode>3_maskz_1 (
- operands[0], operands[1], operands[2], operands[3],
- CONST0_RTX (<MODE>mode), operands[4]));
- DONE;
-})
-
-(define_expand "<avx512>_vpermi2var<mode>3_maskz"
- [(match_operand:VI1_AVX512VL 0 "register_operand")
- (match_operand:VI1_AVX512VL 1 "register_operand")
- (match_operand:<sseintvecmode> 2 "register_operand")
- (match_operand:VI1_AVX512VL 3 "nonimmediate_operand")
- (match_operand:<avx512fmaskmode> 4 "register_operand")]
- "TARGET_AVX512VBMI"
-{
- emit_insn (gen_<avx512>_vpermi2var<mode>3_maskz_1 (
- operands[0], operands[1], operands[2], operands[3],
- CONST0_RTX (<MODE>mode), operands[4]));
- DONE;
-})
-
-(define_expand "<avx512>_vpermi2var<mode>3_maskz"
- [(match_operand:VI2_AVX512VL 0 "register_operand")
- (match_operand:VI2_AVX512VL 1 "register_operand")
- (match_operand:<sseintvecmode> 2 "register_operand")
- (match_operand:VI2_AVX512VL 3 "nonimmediate_operand")
- (match_operand:<avx512fmaskmode> 4 "register_operand")]
- "TARGET_AVX512BW"
-{
- emit_insn (gen_<avx512>_vpermi2var<mode>3_maskz_1 (
- operands[0], operands[1], operands[2], operands[3],
- CONST0_RTX (<MODE>mode), operands[4]));
- DONE;
-})
-
-(define_insn "<avx512>_vpermi2var<mode>3<sd_maskz_name>"
- [(set (match_operand:VI48F 0 "register_operand" "=v")
- (unspec:VI48F
- [(match_operand:VI48F 1 "register_operand" "v")
- (match_operand:<sseintvecmode> 2 "register_operand" "0")
- (match_operand:VI48F 3 "nonimmediate_operand" "vm")]
- UNSPEC_VPERMI2))]
+(define_mode_iterator VPERMI2
+ [V16SI V16SF V8DI V8DF
+ (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
+ (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
+ (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
+ (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")
+ (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512BW && TARGET_AVX512VL")
+ (V8HI "TARGET_AVX512BW && TARGET_AVX512VL")
+ (V64QI "TARGET_AVX512VBMI") (V32QI "TARGET_AVX512VBMI && TARGET_AVX512VL")
+ (V16QI "TARGET_AVX512VBMI && TARGET_AVX512VL")])
+
+(define_mode_iterator VPERMI2I
+ [V16SI V8DI
+ (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
+ (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
+ (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512BW && TARGET_AVX512VL")
+ (V8HI "TARGET_AVX512BW && TARGET_AVX512VL")
+ (V64QI "TARGET_AVX512VBMI") (V32QI "TARGET_AVX512VBMI && TARGET_AVX512VL")
+ (V16QI "TARGET_AVX512VBMI && TARGET_AVX512VL")])
+
+(define_expand "<avx512>_vpermi2var<mode>3_mask"
+ [(set (match_operand:VPERMI2 0 "register_operand")
+ (vec_merge:VPERMI2
+ (unspec:VPERMI2
+ [(match_operand:<sseintvecmode> 2 "register_operand")
+ (match_operand:VPERMI2 1 "register_operand")
+ (match_operand:VPERMI2 3 "nonimmediate_operand")]
+ UNSPEC_VPERMT2)
+ (match_dup 5)
+ (match_operand:<avx512fmaskmode> 4 "register_operand")))]
"TARGET_AVX512F"
- "vpermi2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
- [(set_attr "type" "sselog")
- (set_attr "prefix" "evex")
- (set_attr "mode" "<sseinsnmode>")])
-
-(define_insn "<avx512>_vpermi2var<mode>3<sd_maskz_name>"
- [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
- (unspec:VI1_AVX512VL
- [(match_operand:VI1_AVX512VL 1 "register_operand" "v")
- (match_operand:<sseintvecmode> 2 "register_operand" "0")
- (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")]
- UNSPEC_VPERMI2))]
- "TARGET_AVX512VBMI"
- "vpermi2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
- [(set_attr "type" "sselog")
- (set_attr "prefix" "evex")
- (set_attr "mode" "<sseinsnmode>")])
-
-(define_insn "<avx512>_vpermi2var<mode>3<sd_maskz_name>"
- [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
- (unspec:VI2_AVX512VL
- [(match_operand:VI2_AVX512VL 1 "register_operand" "v")
- (match_operand:<sseintvecmode> 2 "register_operand" "0")
- (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")]
- UNSPEC_VPERMI2))]
- "TARGET_AVX512BW"
- "vpermi2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
- [(set_attr "type" "sselog")
- (set_attr "prefix" "evex")
- (set_attr "mode" "<sseinsnmode>")])
-
-(define_insn "<avx512>_vpermi2var<mode>3_mask"
- [(set (match_operand:VI48F 0 "register_operand" "=v")
- (vec_merge:VI48F
- (unspec:VI48F
- [(match_operand:VI48F 1 "register_operand" "v")
- (match_operand:<sseintvecmode> 2 "register_operand" "0")
- (match_operand:VI48F 3 "nonimmediate_operand" "vm")]
- UNSPEC_VPERMI2_MASK)
- (match_dup 0)
+ "operands[5] = gen_lowpart (<MODE>mode, operands[2]);")
+
+(define_insn "*<avx512>_vpermi2var<mode>3_mask"
+ [(set (match_operand:VPERMI2I 0 "register_operand" "=v")
+ (vec_merge:VPERMI2I
+ (unspec:VPERMI2I
+ [(match_operand:<sseintvecmode> 2 "register_operand" "0")
+ (match_operand:VPERMI2I 1 "register_operand" "v")
+ (match_operand:VPERMI2I 3 "nonimmediate_operand" "vm")]
+ UNSPEC_VPERMT2)
+ (match_dup 2)
(match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
"TARGET_AVX512F"
"vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "<avx512>_vpermi2var<mode>3_mask"
- [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
- (vec_merge:VI1_AVX512VL
- (unspec:VI1_AVX512VL
- [(match_operand:VI1_AVX512VL 1 "register_operand" "v")
- (match_operand:<sseintvecmode> 2 "register_operand" "0")
- (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")]
- UNSPEC_VPERMI2_MASK)
- (match_dup 0)
- (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
- "TARGET_AVX512VBMI"
- "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
- [(set_attr "type" "sselog")
- (set_attr "prefix" "evex")
- (set_attr "mode" "<sseinsnmode>")])
-
-(define_insn "<avx512>_vpermi2var<mode>3_mask"
- [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
- (vec_merge:VI2_AVX512VL
- (unspec:VI2_AVX512VL
- [(match_operand:VI2_AVX512VL 1 "register_operand" "v")
- (match_operand:<sseintvecmode> 2 "register_operand" "0")
- (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")]
- UNSPEC_VPERMI2_MASK)
- (match_dup 0)
+(define_insn "*<avx512>_vpermi2var<mode>3_mask"
+ [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
+ (vec_merge:VF_AVX512VL
+ (unspec:VF_AVX512VL
+ [(match_operand:<sseintvecmode> 2 "register_operand" "0")
+ (match_operand:VF_AVX512VL 1 "register_operand" "v")
+ (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "vm")]
+ UNSPEC_VPERMT2)
+ (subreg:VF_AVX512VL (match_dup 2) 0)
(match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
- "TARGET_AVX512BW"
+ "TARGET_AVX512F"
"vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
[(set_attr "type" "sselog")
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
(define_expand "<avx512>_vpermt2var<mode>3_maskz"
- [(match_operand:VI48F 0 "register_operand")
+ [(match_operand:VPERMI2 0 "register_operand")
(match_operand:<sseintvecmode> 1 "register_operand")
- (match_operand:VI48F 2 "register_operand")
- (match_operand:VI48F 3 "nonimmediate_operand")
+ (match_operand:VPERMI2 2 "register_operand")
+ (match_operand:VPERMI2 3 "nonimmediate_operand")
(match_operand:<avx512fmaskmode> 4 "register_operand")]
"TARGET_AVX512F"
{
DONE;
})
-(define_expand "<avx512>_vpermt2var<mode>3_maskz"
- [(match_operand:VI1_AVX512VL 0 "register_operand")
- (match_operand:<sseintvecmode> 1 "register_operand")
- (match_operand:VI1_AVX512VL 2 "register_operand")
- (match_operand:VI1_AVX512VL 3 "nonimmediate_operand")
- (match_operand:<avx512fmaskmode> 4 "register_operand")]
- "TARGET_AVX512VBMI"
-{
- emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 (
- operands[0], operands[1], operands[2], operands[3],
- CONST0_RTX (<MODE>mode), operands[4]));
- DONE;
-})
-
-(define_expand "<avx512>_vpermt2var<mode>3_maskz"
- [(match_operand:VI2_AVX512VL 0 "register_operand")
- (match_operand:<sseintvecmode> 1 "register_operand")
- (match_operand:VI2_AVX512VL 2 "register_operand")
- (match_operand:VI2_AVX512VL 3 "nonimmediate_operand")
- (match_operand:<avx512fmaskmode> 4 "register_operand")]
- "TARGET_AVX512BW"
-{
- emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 (
- operands[0], operands[1], operands[2], operands[3],
- CONST0_RTX (<MODE>mode), operands[4]));
- DONE;
-})
-
(define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>"
- [(set (match_operand:VI48F 0 "register_operand" "=v")
- (unspec:VI48F
- [(match_operand:<sseintvecmode> 1 "register_operand" "v")
- (match_operand:VI48F 2 "register_operand" "0")
- (match_operand:VI48F 3 "nonimmediate_operand" "vm")]
+ [(set (match_operand:VPERMI2 0 "register_operand" "=v,v")
+ (unspec:VPERMI2
+ [(match_operand:<sseintvecmode> 1 "register_operand" "v,0")
+ (match_operand:VPERMI2 2 "register_operand" "0,v")
+ (match_operand:VPERMI2 3 "nonimmediate_operand" "vm,vm")]
UNSPEC_VPERMT2))]
"TARGET_AVX512F"
- "vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
- [(set_attr "type" "sselog")
- (set_attr "prefix" "evex")
- (set_attr "mode" "<sseinsnmode>")])
-
-(define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>"
- [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
- (unspec:VI1_AVX512VL
- [(match_operand:<sseintvecmode> 1 "register_operand" "v")
- (match_operand:VI1_AVX512VL 2 "register_operand" "0")
- (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")]
- UNSPEC_VPERMT2))]
- "TARGET_AVX512VBMI"
- "vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
- [(set_attr "type" "sselog")
- (set_attr "prefix" "evex")
- (set_attr "mode" "<sseinsnmode>")])
-
-(define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>"
- [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
- (unspec:VI2_AVX512VL
- [(match_operand:<sseintvecmode> 1 "register_operand" "v")
- (match_operand:VI2_AVX512VL 2 "register_operand" "0")
- (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")]
- UNSPEC_VPERMT2))]
- "TARGET_AVX512BW"
- "vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
+ "@
+ vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}
+ vpermi2<ssemodesuffix>\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3}"
[(set_attr "type" "sselog")
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
(define_insn "<avx512>_vpermt2var<mode>3_mask"
- [(set (match_operand:VI48F 0 "register_operand" "=v")
- (vec_merge:VI48F
- (unspec:VI48F
+ [(set (match_operand:VPERMI2 0 "register_operand" "=v")
+ (vec_merge:VPERMI2
+ (unspec:VPERMI2
[(match_operand:<sseintvecmode> 1 "register_operand" "v")
- (match_operand:VI48F 2 "register_operand" "0")
- (match_operand:VI48F 3 "nonimmediate_operand" "vm")]
+ (match_operand:VPERMI2 2 "register_operand" "0")
+ (match_operand:VPERMI2 3 "nonimmediate_operand" "vm")]
UNSPEC_VPERMT2)
(match_dup 2)
(match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "<avx512>_vpermt2var<mode>3_mask"
- [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
- (vec_merge:VI1_AVX512VL
- (unspec:VI1_AVX512VL
- [(match_operand:<sseintvecmode> 1 "register_operand" "v")
- (match_operand:VI1_AVX512VL 2 "register_operand" "0")
- (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")]
- UNSPEC_VPERMT2)
- (match_dup 2)
- (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
- "TARGET_AVX512VBMI"
- "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
- [(set_attr "type" "sselog")
- (set_attr "prefix" "evex")
- (set_attr "mode" "<sseinsnmode>")])
-
-(define_insn "<avx512>_vpermt2var<mode>3_mask"
- [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
- (vec_merge:VI2_AVX512VL
- (unspec:VI2_AVX512VL
- [(match_operand:<sseintvecmode> 1 "register_operand" "v")
- (match_operand:VI2_AVX512VL 2 "register_operand" "0")
- (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")]
- UNSPEC_VPERMT2)
- (match_dup 2)
- (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
- "TARGET_AVX512BW"
- "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
- [(set_attr "type" "sselog")
- (set_attr "prefix" "evex")
- (set_attr "mode" "<sseinsnmode>")])
-
(define_expand "avx_vperm2f128<mode>3"
[(set (match_operand:AVX256MODE2P 0 "register_operand")
(unspec:AVX256MODE2P
2017-10-24 Jakub Jelinek <jakub@redhat.com>
+ PR target/82460
+ * gcc.target/i386/pr82460-1.c: New test.
+ * gcc.target/i386/pr82460-2.c: New test.
+ * gcc.target/i386/avx512f-vpermt2pd-1.c: Adjust scan-assembler*
+ regexps to allow vpermt2* to vpermi2* replacement or vice versa
+ where possible.
+ * gcc.target/i386/avx512vl-vpermt2pd-1.c: Likewise.
+ * gcc.target/i386/avx512f-vpermt2d-1.c: Likewise.
+ * gcc.target/i386/vect-pack-trunc-2.c: Likewise.
+ * gcc.target/i386/avx512vl-vpermt2ps-1.c: Likewise.
+ * gcc.target/i386/avx512vl-vpermt2q-1.c: Likewise.
+ * gcc.target/i386/avx512f-vpermt2ps-1.c: Likewise.
+ * gcc.target/i386/avx512vl-vpermt2d-1.c: Likewise.
+ * gcc.target/i386/avx512bw-vpermt2w-1.c: Likewise.
+ * gcc.target/i386/avx512vbmi-vpermt2b-1.c: Likewise.
+ * gcc.target/i386/avx512f-vpermt2q-1.c: Likewise.
+
PR target/82370
* gcc.target/i386/pr82370.c: New test.
/* { dg-do compile } */
/* { dg-options "-mavx512bw -mavx512vl -O2" } */
-/* { dg-final { scan-assembler-times "vpermt2w\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vpermt2w\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } *
-/* { dg-final { scan-assembler-times "vpermt2w\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vpermt2w\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vpermt2w\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vpermt2w\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vpermt2w\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vpermt2w\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vpermt2w\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2w\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2w\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } *
+/* { dg-final { scan-assembler-times "vperm\[ti]2w\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2w\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2w\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2w\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2w\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2w\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2w\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
#include <immintrin.h>
/* { dg-do compile } */
/* { dg-options "-mavx512f -O2" } */
-/* { dg-final { scan-assembler-times "vpermt2d\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vpermt2d\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vpermt2d\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2d\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2d\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2d\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
#include <immintrin.h>
/* { dg-do compile } */
/* { dg-options "-mavx512f -O2" } */
-/* { dg-final { scan-assembler-times "vpermt2pd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2pd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpermt2pd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vpermt2pd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2pd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
#include <immintrin.h>
/* { dg-do compile } */
/* { dg-options "-mavx512f -O2" } */
-/* { dg-final { scan-assembler-times "vpermt2ps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2ps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpermt2ps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vpermt2ps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2ps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
#include <immintrin.h>
/* { dg-do compile } */
/* { dg-options "-mavx512f -O2" } */
-/* { dg-final { scan-assembler-times "vpermt2q\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vpermt2q\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vpermt2q\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2q\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2q\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2q\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
#include <immintrin.h>
/* { dg-do compile } */
/* { dg-options "-mavx512vbmi -mavx512vl -O2" } */
-/* { dg-final { scan-assembler-times "vpermt2b\[ \\t\]+\[^\n\]*%zmm\[0-9\]+" 3 } } */
-/* { dg-final { scan-assembler-times "vpermt2b\[ \\t\]+\[^\n\]*%ymm\[0-9\]+" 3 } } *
-/* { dg-final { scan-assembler-times "vpermt2b\[ \\t\]+\[^\n\]*%xmm\[0-9\]+" 3 } } */
-/* { dg-final { scan-assembler-times "vpermt2b\[ \\t\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\[^\{\]" 1 } } */
-/* { dg-final { scan-assembler-times "vpermt2b\[ \\t\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\[^\{\]" 1 } } */
-/* { dg-final { scan-assembler-times "vpermt2b\[ \\t\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\[^\{\]" 1 } } */
-/* { dg-final { scan-assembler-times "vpermt2b\[ \\t\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}" 1 } } */
-/* { dg-final { scan-assembler-times "vpermt2b\[ \\t\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}" 1 } } */
-/* { dg-final { scan-assembler-times "vpermt2b\[ \\t\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2b\[ \\t\]+\[^\n\]*%zmm\[0-9\]+" 3 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2b\[ \\t\]+\[^\n\]*%ymm\[0-9\]+" 3 } } *
+/* { dg-final { scan-assembler-times "vperm\[ti]2b\[ \\t\]+\[^\n\]*%xmm\[0-9\]+" 3 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2b\[ \\t\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2b\[ \\t\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2b\[ \\t\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2b\[ \\t\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2b\[ \\t\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2b\[ \\t\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}" 1 } } */
#include <immintrin.h>
/* { dg-do compile } */
/* { dg-options "-mavx512vl -O2" } */
-/* { dg-final { scan-assembler-times "vpermt2d\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vpermt2d\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vpermt2d\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vpermt2d\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vpermt2d\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vpermt2d\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2d\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2d\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2d\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2d\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2d\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2d\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
#include <immintrin.h>
/* { dg-do compile } */
/* { dg-options "-mavx512vl -O2" } */
-/* { dg-final { scan-assembler-times "vpermt2pd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vpermt2pd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2pd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2pd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpermt2pd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpermt2pd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vpermt2pd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vpermt2pd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2pd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2pd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
#include <immintrin.h>
/* { dg-do compile } */
/* { dg-options "-mavx512vl -O2" } */
-/* { dg-final { scan-assembler-times "vpermt2ps\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vpermt2ps\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2ps\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2ps\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpermt2ps\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpermt2ps\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vpermt2ps\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vpermt2ps\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2ps\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2ps\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
#include <immintrin.h>
/* { dg-do compile } */
/* { dg-options "-mavx512vl -O2" } */
-/* { dg-final { scan-assembler-times "vpermt2q\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vpermt2q\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vpermt2q\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vpermt2q\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vpermt2q\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vpermt2q\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2q\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2q\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2q\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2q\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2q\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[ti]2q\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
#include <immintrin.h>
--- /dev/null
+/* PR target/82460 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vbmi" } */
+/* { dg-final { scan-assembler-not {\mvmovd} } } */
+
+#include <x86intrin.h>
+
+__m512i
+f1 (__m512i x, __m512i y, char *z)
+{
+ return _mm512_permutex2var_epi32 (y, x, _mm512_loadu_si512 (z));
+}
+
+__m512i
+f2 (__m512i x, __m512i y, char *z)
+{
+ return _mm512_permutex2var_epi32 (x, y, _mm512_loadu_si512 (z));
+}
+
+__m512i
+f3 (__m512i x, __m512i y, __m512i z)
+{
+ return _mm512_permutex2var_epi8 (y, x, z);
+}
+
+__m512i
+f4 (__m512i x, __m512i y, __m512i z)
+{
+ return _mm512_permutex2var_epi8 (x, y, z);
+}
--- /dev/null
+/* PR target/82460 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -mavx512vbmi -mno-prefer-avx256" } */
+/* We want to reuse the permutation mask in the loop, so use vpermt2b rather
+ than vpermi2b. */
+/* { dg-final { scan-assembler-not {\mvpermi2b\M} } } */
+/* { dg-final { scan-assembler {\mvpermt2b\M} } } */
+
+void
+foo (unsigned char *__restrict__ x, const unsigned short *__restrict__ y,
+ unsigned long z)
+{
+ unsigned char *w = x + z;
+ do
+ *x++ = *y++ >> 8;
+ while (x < w);
+}
abort ();
}
-/* { dg-final { scan-assembler-times "vpermi2w\[ \\t\]+\[^\n\]*%zmm" 1 } } */
+/* { dg-final { scan-assembler-times "vperm\[it]2w\[ \\t\]+\[^\n\]*%zmm" 1 } } */