IX86_BUILTIN_PBROADCASTMW512,
IX86_BUILTIN_PBROADCASTQ512,
IX86_BUILTIN_PBROADCASTQ512_GPR,
- IX86_BUILTIN_PBROADCASTQ512_MEM,
IX86_BUILTIN_PCMPEQD512_MASK,
IX86_BUILTIN_PCMPEQQ512_MASK,
IX86_BUILTIN_PCMPGTD512_MASK,
IX86_BUILTIN_PBROADCASTD128_GPR_MASK,
IX86_BUILTIN_PBROADCASTQ256_MASK,
IX86_BUILTIN_PBROADCASTQ256_GPR_MASK,
- IX86_BUILTIN_PBROADCASTQ256_MEM_MASK,
IX86_BUILTIN_PBROADCASTQ128_MASK,
IX86_BUILTIN_PBROADCASTQ128_GPR_MASK,
- IX86_BUILTIN_PBROADCASTQ128_MEM_MASK,
IX86_BUILTIN_BROADCASTSS256,
IX86_BUILTIN_BROADCASTSS128,
IX86_BUILTIN_BROADCASTSD256,
{ OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_QI },
{ OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_HI },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
- { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
- { OPTION_MASK_ISA_AVX512F & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_memv8di_mask, "__builtin_ia32_pbroadcastq512_mem_mask", IX86_BUILTIN_PBROADCASTQ512_MEM, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
{ OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4si_mask, "__builtin_ia32_pbroadcastd128_mask", IX86_BUILTIN_PBROADCASTD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
{ OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4si_mask, "__builtin_ia32_pbroadcastd128_gpr_mask", IX86_BUILTIN_PBROADCASTD128_GPR_MASK, UNKNOWN, (int) V4SI_FTYPE_SI_V4SI_QI },
{ OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4di_mask, "__builtin_ia32_pbroadcastq256_mask", IX86_BUILTIN_PBROADCASTQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_QI },
- { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512vl_vec_dup_gprv4di_mask, "__builtin_ia32_pbroadcastq256_gpr_mask", IX86_BUILTIN_PBROADCASTQ256_GPR_MASK, UNKNOWN, (int) V4DI_FTYPE_DI_V4DI_QI },
- { OPTION_MASK_ISA_AVX512VL & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512vl_vec_dup_memv4di_mask, "__builtin_ia32_pbroadcastq256_mem_mask", IX86_BUILTIN_PBROADCASTQ256_MEM_MASK, UNKNOWN, (int) V4DI_FTYPE_DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4di_mask, "__builtin_ia32_pbroadcastq256_gpr_mask", IX86_BUILTIN_PBROADCASTQ256_GPR_MASK, UNKNOWN, (int) V4DI_FTYPE_DI_V4DI_QI },
{ OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv2di_mask, "__builtin_ia32_pbroadcastq128_mask", IX86_BUILTIN_PBROADCASTQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
- { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512vl_vec_dup_gprv2di_mask, "__builtin_ia32_pbroadcastq128_gpr_mask", IX86_BUILTIN_PBROADCASTQ128_GPR_MASK, UNKNOWN, (int) V2DI_FTYPE_DI_V2DI_QI },
- { OPTION_MASK_ISA_AVX512VL & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512vl_vec_dup_memv2di_mask, "__builtin_ia32_pbroadcastq128_mem_mask", IX86_BUILTIN_PBROADCASTQ128_MEM_MASK, UNKNOWN, (int) V2DI_FTYPE_DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv2di_mask, "__builtin_ia32_pbroadcastq128_gpr_mask", IX86_BUILTIN_PBROADCASTQ128_GPR_MASK, UNKNOWN, (int) V2DI_FTYPE_DI_V2DI_QI },
{ OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8sf_mask, "__builtin_ia32_broadcastss256_mask", IX86_BUILTIN_BROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
{ OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4sf_mask, "__builtin_ia32_broadcastss128_mask", IX86_BUILTIN_BROADCASTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
{ OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4df_mask, "__builtin_ia32_broadcastsd256_mask", IX86_BUILTIN_BROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_QI },
(set_attr "prefix" "evex")
(set_attr "mode" "<MODE>")])
-(define_insn "vec_dupv4sf"
- [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
- (vec_duplicate:V4SF
- (match_operand:SF 1 "nonimmediate_operand" "x,m,0")))]
- "TARGET_SSE"
- "@
- vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
- vbroadcastss\t{%1, %0|%0, %1}
- shufps\t{$0, %0, %0|%0, %0, 0}"
- [(set_attr "isa" "avx,avx,noavx")
- (set_attr "type" "sseshuf1,ssemov,sseshuf1")
- (set_attr "length_immediate" "1,0,1")
- (set_attr "prefix_extra" "0,1,*")
- (set_attr "prefix" "vex,vex,orig")
- (set_attr "mode" "V4SF")])
-
;; Although insertps takes register source, we prefer
;; unpcklps with register source since it is shorter.
(define_insn "*vec_concatv2sf_sse4_1"
operands[1] = adjust_address (operands[1], <ssescalarmode>mode, offs);
})
-(define_insn "*vec_dupv4si"
- [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
- (vec_duplicate:V4SI
- (match_operand:SI 1 "nonimmediate_operand" " x,m,0")))]
- "TARGET_SSE"
- "@
- %vpshufd\t{$0, %1, %0|%0, %1, 0}
- vbroadcastss\t{%1, %0|%0, %1}
- shufps\t{$0, %0, %0|%0, %0, 0}"
- [(set_attr "isa" "sse2,avx,noavx")
- (set_attr "type" "sselog1,ssemov,sselog1")
- (set_attr "length_immediate" "1,0,1")
- (set_attr "prefix_extra" "0,1,*")
- (set_attr "prefix" "maybe_vex,vex,orig")
- (set_attr "mode" "TI,V4SF,V4SF")])
-
-(define_insn "*vec_dupv2di"
- [(set (match_operand:V2DI 0 "register_operand" "=x,x,x,x")
- (vec_duplicate:V2DI
- (match_operand:DI 1 "nonimmediate_operand" " 0,x,m,0")))]
- "TARGET_SSE"
- "@
- punpcklqdq\t%0, %0
- vpunpcklqdq\t{%d1, %0|%0, %d1}
- %vmovddup\t{%1, %0|%0, %1}
- movlhps\t%0, %0"
- [(set_attr "isa" "sse2_noavx,avx,sse3,noavx")
- (set_attr "type" "sselog1,sselog1,sselog1,ssemov")
- (set_attr "prefix" "orig,vex,maybe_vex,orig")
- (set_attr "mode" "TI,TI,DF,V4SF")])
-
(define_insn "*vec_concatv2si_sse4_1"
[(set (match_operand:V2SI 0 "register_operand" "=Yr,*x,x, Yr,*x,x, x, *y,*y")
(vec_concat:V2SI
(set_attr "mode" "<sseinsnmode>")])
(define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>"
- [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
+ [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v")
(vec_duplicate:VI12_AVX512VL
- (match_operand:<ssescalarmode> 1 "register_operand" "r")))]
+ (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))]
"TARGET_AVX512BW"
- "vpbroadcast<bcstscalarsuff>\t{%k1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
+ "@
+ vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}
+ vpbroadcast<bcstscalarsuff>\t{%k1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
[(set_attr "type" "ssemov")
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
(define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>"
- [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
- (vec_duplicate:VI48_AVX512VL
- (match_operand:<ssescalarmode> 1 "register_operand" "r")))]
- "TARGET_AVX512F && (<ssescalarmode>mode != DImode || TARGET_64BIT)"
-{
- return "vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
-}
- [(set_attr "type" "ssemov")
- (set_attr "prefix" "evex")
- (set_attr "mode" "<sseinsnmode>")])
-
-(define_insn "<mask_codefor><avx512>_vec_dup_mem<mode><mask_name>"
- [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
+ [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v")
(vec_duplicate:V48_AVX512VL
- (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm")))]
+ (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))]
"TARGET_AVX512F"
"v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "ssemov")
(set_attr "prefix" "evex")
- (set_attr "mode" "<sseinsnmode>")])
+ (set_attr "mode" "<sseinsnmode>")
+ (set (attr "enabled")
+ (if_then_else (eq_attr "alternative" "1")
+ (symbol_ref "GET_MODE_CLASS (<ssescalarmode>mode) == MODE_INT
+ && (<ssescalarmode>mode != DImode || TARGET_64BIT)")
+ (const_int 1)))])
-(define_insn "<mask_codefor><avx512>_vec_dup_mem<mode><mask_name>"
- [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
- (vec_duplicate:VI12_AVX512VL
- (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm")))]
- "TARGET_AVX512BW"
- "vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
- [(set_attr "type" "ssemov")
- (set_attr "prefix" "evex")
- (set_attr "mode" "<sseinsnmode>")])
+(define_insn "vec_dupv4sf"
+ [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
+ (vec_duplicate:V4SF
+ (match_operand:SF 1 "nonimmediate_operand" "x,m,0")))]
+ "TARGET_SSE"
+ "@
+ vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
+ vbroadcastss\t{%1, %0|%0, %1}
+ shufps\t{$0, %0, %0|%0, %0, 0}"
+ [(set_attr "isa" "avx,avx,noavx")
+ (set_attr "type" "sseshuf1,ssemov,sseshuf1")
+ (set_attr "length_immediate" "1,0,1")
+ (set_attr "prefix_extra" "0,1,*")
+ (set_attr "prefix" "vex,vex,orig")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "*vec_dupv4si"
+ [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
+ (vec_duplicate:V4SI
+ (match_operand:SI 1 "nonimmediate_operand" " x,m,0")))]
+ "TARGET_SSE"
+ "@
+ %vpshufd\t{$0, %1, %0|%0, %1, 0}
+ vbroadcastss\t{%1, %0|%0, %1}
+ shufps\t{$0, %0, %0|%0, %0, 0}"
+ [(set_attr "isa" "sse2,avx,noavx")
+ (set_attr "type" "sselog1,ssemov,sselog1")
+ (set_attr "length_immediate" "1,0,1")
+ (set_attr "prefix_extra" "0,1,*")
+ (set_attr "prefix" "maybe_vex,vex,orig")
+ (set_attr "mode" "TI,V4SF,V4SF")])
+
+(define_insn "*vec_dupv2di"
+ [(set (match_operand:V2DI 0 "register_operand" "=x,x,x,x")
+ (vec_duplicate:V2DI
+ (match_operand:DI 1 "nonimmediate_operand" " 0,x,m,0")))]
+ "TARGET_SSE"
+ "@
+ punpcklqdq\t%0, %0
+ vpunpcklqdq\t{%d1, %0|%0, %d1}
+ %vmovddup\t{%1, %0|%0, %1}
+ movlhps\t%0, %0"
+ [(set_attr "isa" "sse2_noavx,avx,sse3,noavx")
+ (set_attr "type" "sselog1,sselog1,sselog1,ssemov")
+ (set_attr "prefix" "orig,vex,maybe_vex,orig")
+ (set_attr "mode" "TI,TI,DF,V4SF")])
(define_insn "avx2_vbroadcasti128_<mode>"
[(set (match_operand:VI_256 0 "register_operand" "=x")
[(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand")
(vec_duplicate:AVX2_VEC_DUP_MODE
(match_operand:<ssescalarmode> 1 "register_operand")))]
- "TARGET_AVX2 && reload_completed && GENERAL_REG_P (operands[1])"
+ "TARGET_AVX2
+ /* Disable this splitter if avx512vl_vec_dup_gprv*[qhs]i insn is
+ available, because then we can broadcast from GPRs directly.
+ For V*[QH]I modes it requires both -mavx512vl and -mavx512bw,
+ for V*SI mode it requires just -mavx512vl. */
+ && !(TARGET_AVX512VL
+ && (TARGET_AVX512BW || <ssescalarmode>mode == SImode))
+ && reload_completed && GENERAL_REG_P (operands[1])"
[(const_int 0)]
{
emit_insn (gen_vec_setv4si_0 (gen_lowpart (V4SImode, operands[0]),