re PR target/63594 (ICE: in ix86_vector_duplicate_value, at config/i386/i386.c:39831...
authorJakub Jelinek <jakub@redhat.com>
Wed, 10 Dec 2014 09:00:50 +0000 (10:00 +0100)
committerJakub Jelinek <jakub@gcc.gnu.org>
Wed, 10 Dec 2014 09:00:50 +0000 (10:00 +0100)
PR target/63594
* config/i386/sse.md (vec_dupv4sf): Move after
<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name> pattern.
(*vec_dupv4si, *vec_dupv2di): Likewise.
(<mask_codefor><avx512>_vec_dup_mem<mode><mask_name>): Merge into ...
(<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>): ... this
pattern.
(*vec_dup<mode> AVX2_VEC_DUP_MODE splitter): Disable for
TARGET_AVX512VL (for QI/HI scalar modes only if TARGET_AVX512BW
is set too).
* config/i386/i386.c (enum ix86_builtins): Remove
IX86_BUILTIN_PBROADCASTQ256_MEM_MASK,
IX86_BUILTIN_PBROADCASTQ128_MEM_MASK and
IX86_BUILTIN_PBROADCASTQ512_MEM.
(bdesc_args): Use __builtin_ia32_pbroadcastq512_gpr_mask,
__builtin_ia32_pbroadcastq256_gpr_mask and
__builtin_ia32_pbroadcastq128_gpr_mask instead of *_mem_mask
regardless of OPTION_MASK_ISA_64BIT.
* config/i386/avx512fintrin.h (_mm512_set1_epi64,
_mm512_mask_set1_epi64, _mm512_maskz_set1_epi64): Use *_gpr_mask
builtins regardless of whether TARGET_64BIT is defined or not.
* config/i386/avx512vlintrin.h (_mm256_mask_set1_epi64,
_mm256_maskz_set1_epi64, _mm_mask_set1_epi64, _mm_maskz_set1_epi64):
Likewise.

From-SVN: r218565

gcc/ChangeLog
gcc/config/i386/avx512fintrin.h
gcc/config/i386/avx512vlintrin.h
gcc/config/i386/i386.c
gcc/config/i386/sse.md

index 997e38e80ded5719e6f159b71dd305db2e32d45f..63008297238b016f3e5f9bb24af2ba62a93a1087 100644 (file)
@@ -1,5 +1,30 @@
 2014-12-10  Jakub Jelinek  <jakub@redhat.com>
 
+       PR target/63594
+       * config/i386/sse.md (vec_dupv4sf): Move after
+       <mask_codefor><avx512>_vec_dup_gpr<mode><mask_name> pattern.
+       (*vec_dupv4si, *vec_dupv2di): Likewise.
+       (<mask_codefor><avx512>_vec_dup_mem<mode><mask_name>): Merge into ...
+       (<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>): ... this
+       pattern.
+       (*vec_dup<mode> AVX2_VEC_DUP_MODE splitter): Disable for
+       TARGET_AVX512VL (for QI/HI scalar modes only if TARGET_AVX512BW
+       is set too).
+       * config/i386/i386.c (enum ix86_builtins): Remove
+       IX86_BUILTIN_PBROADCASTQ256_MEM_MASK,
+       IX86_BUILTIN_PBROADCASTQ128_MEM_MASK and
+       IX86_BUILTIN_PBROADCASTQ512_MEM.
+       (bdesc_args): Use __builtin_ia32_pbroadcastq512_gpr_mask,
+       __builtin_ia32_pbroadcastq256_gpr_mask and
+       __builtin_ia32_pbroadcastq128_gpr_mask instead of *_mem_mask
+       regardless of OPTION_MASK_ISA_64BIT.
+       * config/i386/avx512fintrin.h (_mm512_set1_epi64,
+       _mm512_mask_set1_epi64, _mm512_maskz_set1_epi64): Use *_gpr_mask
+       builtins regardless of whether TARGET_64BIT is defined or not.
+       * config/i386/avx512vlintrin.h (_mm256_mask_set1_epi64,
+       _mm256_maskz_set1_epi64, _mm_mask_set1_epi64, _mm_maskz_set1_epi64):
+       Likewise.
+
        * config/i386/sse.md (*mov<mode>_internal, *avx512f_gatherdi<mode>_2):
        Use <MODE_SIZE> instead of GET_MODE_SIZE (<MODE>mode).
 
index 66f51999345a4a8a6097201d4d8629c5fa46b839..b4842b26d8b063c485ff8433c3403a95eeeaa67d 100644 (file)
@@ -3603,47 +3603,28 @@ extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_set1_epi64 (long long __A)
 {
-#ifdef TARGET_64BIT
   return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A,
                                                           (__v8di)
                                                           _mm512_undefined_si512 (),
                                                           (__mmask8)(-1));
-#else
-  return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A,
-                                                          (__v8di)
-                                                          _mm512_undefined_si512 (),
-                                                          (__mmask8)(-1));
-#endif
 }
 
 extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
 {
-#ifdef TARGET_64BIT
   return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, (__v8di) __O,
                                                           __M);
-#else
-  return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A, (__v8di) __O,
-                                                          __M);
-#endif
 }
 
 extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_maskz_set1_epi64 (__mmask8 __M, long long __A)
 {
-#ifdef TARGET_64BIT
   return (__m512i)
         __builtin_ia32_pbroadcastq512_gpr_mask (__A,
                                                 (__v8di) _mm512_setzero_si512 (),
                                                 __M);
-#else
-  return (__m512i)
-        __builtin_ia32_pbroadcastq512_mem_mask (__A,
-                                                (__v8di) _mm512_setzero_si512 (),
-                                                __M);
-#endif
 }
 
 extern __inline __m512
index a042e8c6ca63739e0a0cc471d9e2dfa9c05d68ea..56aaa3ec68dd156827da41b78cb11f0ad2d14a36 100644 (file)
@@ -2642,30 +2642,18 @@ extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_mask_set1_epi64 (__m256i __O, __mmask8 __M, long long __A)
 {
-#ifdef TARGET_64BIT
   return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A, (__v4di) __O,
                                                           __M);
-#else
-  return (__m256i) __builtin_ia32_pbroadcastq256_mem_mask (__A, (__v4di) __O,
-                                                          __M);
-#endif
 }
 
 extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_maskz_set1_epi64 (__mmask8 __M, long long __A)
 {
-#ifdef TARGET_64BIT
   return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A,
                                                           (__v4di)
                                                           _mm256_setzero_si256 (),
                                                           __M);
-#else
-  return (__m256i) __builtin_ia32_pbroadcastq256_mem_mask (__A,
-                                                          (__v4di)
-                                                          _mm256_setzero_si256 (),
-                                                          __M);
-#endif
 }
 
 extern __inline __m128i
@@ -2691,30 +2679,18 @@ extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A)
 {
-#ifdef TARGET_64BIT
   return (__m128i) __builtin_ia32_pbroadcastq128_gpr_mask (__A, (__v2di) __O,
                                                           __M);
-#else
-  return (__m128i) __builtin_ia32_pbroadcastq128_mem_mask (__A, (__v2di) __O,
-                                                          __M);
-#endif
 }
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_set1_epi64 (__mmask8 __M, long long __A)
 {
-#ifdef TARGET_64BIT
   return (__m128i) __builtin_ia32_pbroadcastq128_gpr_mask (__A,
                                                           (__v2di)
                                                           _mm_setzero_si128 (),
                                                           __M);
-#else
-  return (__m128i) __builtin_ia32_pbroadcastq128_mem_mask (__A,
-                                                          (__v2di)
-                                                          _mm_setzero_si128 (),
-                                                          __M);
-#endif
 }
 
 extern __inline __m256
index 1e1716e70702d9af28aca8e29f1b274683d9f892..9fe69cc61f7bca7bfc6b2692550a4bb495556c76 100644 (file)
@@ -28823,7 +28823,6 @@ enum ix86_builtins
   IX86_BUILTIN_PBROADCASTMW512,
   IX86_BUILTIN_PBROADCASTQ512,
   IX86_BUILTIN_PBROADCASTQ512_GPR,
-  IX86_BUILTIN_PBROADCASTQ512_MEM,
   IX86_BUILTIN_PCMPEQD512_MASK,
   IX86_BUILTIN_PCMPEQQ512_MASK,
   IX86_BUILTIN_PCMPGTD512_MASK,
@@ -29261,10 +29260,8 @@ enum ix86_builtins
   IX86_BUILTIN_PBROADCASTD128_GPR_MASK,
   IX86_BUILTIN_PBROADCASTQ256_MASK,
   IX86_BUILTIN_PBROADCASTQ256_GPR_MASK,
-  IX86_BUILTIN_PBROADCASTQ256_MEM_MASK,
   IX86_BUILTIN_PBROADCASTQ128_MASK,
   IX86_BUILTIN_PBROADCASTQ128_GPR_MASK,
-  IX86_BUILTIN_PBROADCASTQ128_MEM_MASK,
   IX86_BUILTIN_BROADCASTSS256,
   IX86_BUILTIN_BROADCASTSS128,
   IX86_BUILTIN_BROADCASTSD256,
@@ -31803,8 +31800,7 @@ static const struct builtin_description bdesc_args[] =
   { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_QI },
   { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_HI },
   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
-  { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
-  { OPTION_MASK_ISA_AVX512F & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_memv8di_mask, "__builtin_ia32_pbroadcastq512_mem_mask", IX86_BUILTIN_PBROADCASTQ512_MEM, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
@@ -32078,11 +32074,9 @@ static const struct builtin_description bdesc_args[] =
   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4si_mask, "__builtin_ia32_pbroadcastd128_mask", IX86_BUILTIN_PBROADCASTD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4si_mask, "__builtin_ia32_pbroadcastd128_gpr_mask", IX86_BUILTIN_PBROADCASTD128_GPR_MASK, UNKNOWN, (int) V4SI_FTYPE_SI_V4SI_QI },
   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4di_mask, "__builtin_ia32_pbroadcastq256_mask", IX86_BUILTIN_PBROADCASTQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_QI },
-  { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512vl_vec_dup_gprv4di_mask, "__builtin_ia32_pbroadcastq256_gpr_mask", IX86_BUILTIN_PBROADCASTQ256_GPR_MASK, UNKNOWN, (int) V4DI_FTYPE_DI_V4DI_QI },
-  { OPTION_MASK_ISA_AVX512VL & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512vl_vec_dup_memv4di_mask, "__builtin_ia32_pbroadcastq256_mem_mask", IX86_BUILTIN_PBROADCASTQ256_MEM_MASK, UNKNOWN, (int) V4DI_FTYPE_DI_V4DI_QI },
+  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4di_mask, "__builtin_ia32_pbroadcastq256_gpr_mask", IX86_BUILTIN_PBROADCASTQ256_GPR_MASK, UNKNOWN, (int) V4DI_FTYPE_DI_V4DI_QI },
   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv2di_mask, "__builtin_ia32_pbroadcastq128_mask", IX86_BUILTIN_PBROADCASTQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
-  { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512vl_vec_dup_gprv2di_mask, "__builtin_ia32_pbroadcastq128_gpr_mask", IX86_BUILTIN_PBROADCASTQ128_GPR_MASK, UNKNOWN, (int) V2DI_FTYPE_DI_V2DI_QI },
-  { OPTION_MASK_ISA_AVX512VL & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512vl_vec_dup_memv2di_mask, "__builtin_ia32_pbroadcastq128_mem_mask", IX86_BUILTIN_PBROADCASTQ128_MEM_MASK, UNKNOWN, (int) V2DI_FTYPE_DI_V2DI_QI },
+  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv2di_mask, "__builtin_ia32_pbroadcastq128_gpr_mask", IX86_BUILTIN_PBROADCASTQ128_GPR_MASK, UNKNOWN, (int) V2DI_FTYPE_DI_V2DI_QI },
   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8sf_mask, "__builtin_ia32_broadcastss256_mask", IX86_BUILTIN_BROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4sf_mask, "__builtin_ia32_broadcastss128_mask", IX86_BUILTIN_BROADCASTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
   { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4df_mask, "__builtin_ia32_broadcastsd256_mask", IX86_BUILTIN_BROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_QI },
index 0075fe224cdb9bde12bf4e447ea9325cc98a4d48..8548e16e9165210215c499ae838fc692caccd1e8 100644 (file)
     (set_attr "prefix" "evex")
     (set_attr "mode" "<MODE>")])
 
-(define_insn "vec_dupv4sf"
-  [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
-       (vec_duplicate:V4SF
-         (match_operand:SF 1 "nonimmediate_operand" "x,m,0")))]
-  "TARGET_SSE"
-  "@
-   vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
-   vbroadcastss\t{%1, %0|%0, %1}
-   shufps\t{$0, %0, %0|%0, %0, 0}"
-  [(set_attr "isa" "avx,avx,noavx")
-   (set_attr "type" "sseshuf1,ssemov,sseshuf1")
-   (set_attr "length_immediate" "1,0,1")
-   (set_attr "prefix_extra" "0,1,*")
-   (set_attr "prefix" "vex,vex,orig")
-   (set_attr "mode" "V4SF")])
-
 ;; Although insertps takes register source, we prefer
 ;; unpcklps with register source since it is shorter.
 (define_insn "*vec_concatv2sf_sse4_1"
   operands[1] = adjust_address (operands[1], <ssescalarmode>mode, offs);
 })
 
-(define_insn "*vec_dupv4si"
-  [(set (match_operand:V4SI 0 "register_operand"     "=x,x,x")
-       (vec_duplicate:V4SI
-         (match_operand:SI 1 "nonimmediate_operand" " x,m,0")))]
-  "TARGET_SSE"
-  "@
-   %vpshufd\t{$0, %1, %0|%0, %1, 0}
-   vbroadcastss\t{%1, %0|%0, %1}
-   shufps\t{$0, %0, %0|%0, %0, 0}"
-  [(set_attr "isa" "sse2,avx,noavx")
-   (set_attr "type" "sselog1,ssemov,sselog1")
-   (set_attr "length_immediate" "1,0,1")
-   (set_attr "prefix_extra" "0,1,*")
-   (set_attr "prefix" "maybe_vex,vex,orig")
-   (set_attr "mode" "TI,V4SF,V4SF")])
-
-(define_insn "*vec_dupv2di"
-  [(set (match_operand:V2DI 0 "register_operand"     "=x,x,x,x")
-       (vec_duplicate:V2DI
-         (match_operand:DI 1 "nonimmediate_operand" " 0,x,m,0")))]
-  "TARGET_SSE"
-  "@
-   punpcklqdq\t%0, %0
-   vpunpcklqdq\t{%d1, %0|%0, %d1}
-   %vmovddup\t{%1, %0|%0, %1}
-   movlhps\t%0, %0"
-  [(set_attr "isa" "sse2_noavx,avx,sse3,noavx")
-   (set_attr "type" "sselog1,sselog1,sselog1,ssemov")
-   (set_attr "prefix" "orig,vex,maybe_vex,orig")
-   (set_attr "mode" "TI,TI,DF,V4SF")])
-
 (define_insn "*vec_concatv2si_sse4_1"
   [(set (match_operand:V2SI 0 "register_operand"     "=Yr,*x,x, Yr,*x,x, x, *y,*y")
        (vec_concat:V2SI
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>"
-  [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
+  [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v")
        (vec_duplicate:VI12_AVX512VL
-         (match_operand:<ssescalarmode> 1 "register_operand" "r")))]
+         (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))]
   "TARGET_AVX512BW"
-  "vpbroadcast<bcstscalarsuff>\t{%k1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
+  "@
+   vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}
+   vpbroadcast<bcstscalarsuff>\t{%k1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>"
-  [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
-       (vec_duplicate:VI48_AVX512VL
-         (match_operand:<ssescalarmode> 1 "register_operand" "r")))]
-  "TARGET_AVX512F && (<ssescalarmode>mode != DImode || TARGET_64BIT)"
-{
-  return "vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
-}
-  [(set_attr "type" "ssemov")
-   (set_attr "prefix" "evex")
-   (set_attr "mode" "<sseinsnmode>")])
-
-(define_insn "<mask_codefor><avx512>_vec_dup_mem<mode><mask_name>"
-  [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
+  [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v")
        (vec_duplicate:V48_AVX512VL
-         (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm")))]
+         (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))]
   "TARGET_AVX512F"
   "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "evex")
-   (set_attr "mode" "<sseinsnmode>")])
+   (set_attr "mode" "<sseinsnmode>")
+   (set (attr "enabled")
+     (if_then_else (eq_attr "alternative" "1")
+       (symbol_ref "GET_MODE_CLASS (<ssescalarmode>mode) == MODE_INT
+                    && (<ssescalarmode>mode != DImode || TARGET_64BIT)")
+       (const_int 1)))])
 
-(define_insn "<mask_codefor><avx512>_vec_dup_mem<mode><mask_name>"
-  [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
-       (vec_duplicate:VI12_AVX512VL
-         (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm")))]
-  "TARGET_AVX512BW"
-  "vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
-  [(set_attr "type" "ssemov")
-   (set_attr "prefix" "evex")
-   (set_attr "mode" "<sseinsnmode>")])
+(define_insn "vec_dupv4sf"
+  [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
+       (vec_duplicate:V4SF
+         (match_operand:SF 1 "nonimmediate_operand" "x,m,0")))]
+  "TARGET_SSE"
+  "@
+   vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
+   vbroadcastss\t{%1, %0|%0, %1}
+   shufps\t{$0, %0, %0|%0, %0, 0}"
+  [(set_attr "isa" "avx,avx,noavx")
+   (set_attr "type" "sseshuf1,ssemov,sseshuf1")
+   (set_attr "length_immediate" "1,0,1")
+   (set_attr "prefix_extra" "0,1,*")
+   (set_attr "prefix" "vex,vex,orig")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "*vec_dupv4si"
+  [(set (match_operand:V4SI 0 "register_operand"     "=x,x,x")
+       (vec_duplicate:V4SI
+         (match_operand:SI 1 "nonimmediate_operand" " x,m,0")))]
+  "TARGET_SSE"
+  "@
+   %vpshufd\t{$0, %1, %0|%0, %1, 0}
+   vbroadcastss\t{%1, %0|%0, %1}
+   shufps\t{$0, %0, %0|%0, %0, 0}"
+  [(set_attr "isa" "sse2,avx,noavx")
+   (set_attr "type" "sselog1,ssemov,sselog1")
+   (set_attr "length_immediate" "1,0,1")
+   (set_attr "prefix_extra" "0,1,*")
+   (set_attr "prefix" "maybe_vex,vex,orig")
+   (set_attr "mode" "TI,V4SF,V4SF")])
+
+(define_insn "*vec_dupv2di"
+  [(set (match_operand:V2DI 0 "register_operand"     "=x,x,x,x")
+       (vec_duplicate:V2DI
+         (match_operand:DI 1 "nonimmediate_operand" " 0,x,m,0")))]
+  "TARGET_SSE"
+  "@
+   punpcklqdq\t%0, %0
+   vpunpcklqdq\t{%d1, %0|%0, %d1}
+   %vmovddup\t{%1, %0|%0, %1}
+   movlhps\t%0, %0"
+  [(set_attr "isa" "sse2_noavx,avx,sse3,noavx")
+   (set_attr "type" "sselog1,sselog1,sselog1,ssemov")
+   (set_attr "prefix" "orig,vex,maybe_vex,orig")
+   (set_attr "mode" "TI,TI,DF,V4SF")])
 
 (define_insn "avx2_vbroadcasti128_<mode>"
   [(set (match_operand:VI_256 0 "register_operand" "=x")
   [(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand")
        (vec_duplicate:AVX2_VEC_DUP_MODE
          (match_operand:<ssescalarmode> 1 "register_operand")))]
-  "TARGET_AVX2 && reload_completed && GENERAL_REG_P (operands[1])"
+  "TARGET_AVX2
+   /* Disable this splitter if avx512vl_vec_dup_gprv*[qhs]i insn is
+      available, because then we can broadcast from GPRs directly.
+      For V*[QH]I modes it requires both -mavx512vl and -mavx512bw,
+      for V*SI mode it requires just -mavx512vl.  */
+   && !(TARGET_AVX512VL
+       && (TARGET_AVX512BW || <ssescalarmode>mode == SImode))
+   && reload_completed && GENERAL_REG_P (operands[1])"
   [(const_int 0)]
 {
   emit_insn (gen_vec_setv4si_0 (gen_lowpart (V4SImode, operands[0]),