(set_attr "prefix" "maybe_evex")
(set_attr "mode" "OI")])
+(define_expand "<code>v16qiv16hi2"
+ [(set (match_operand:V16HI 0 "register_operand")
+ (any_extend:V16HI
+ (match_operand:V16QI 1 "nonimmediate_operand")))]
+ "TARGET_AVX2")
+
(define_insn "avx512bw_<code>v32qiv32hi2<mask_name>"
[(set (match_operand:V32HI 0 "register_operand" "=v")
(any_extend:V32HI
(set_attr "prefix" "evex")
(set_attr "mode" "XI")])
+(define_expand "<code>v32qiv32hi2"
+ [(set (match_operand:V32HI 0 "register_operand")
+ (any_extend:V32HI
+ (match_operand:V32QI 1 "nonimmediate_operand")))]
+ "TARGET_AVX512BW")
+
(define_insn "sse4_1_<code>v8qiv8hi2<mask_name>"
[(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,v")
(any_extend:V8HI
(any_extend:V8HI (match_dup 1)))]
"operands[1] = adjust_address_nv (operands[1], V8QImode, 0);")
+(define_expand "<code>v8qiv8hi2"
+ [(set (match_operand:V8HI 0 "register_operand")
+ (any_extend:V8HI
+ (match_operand:V8QI 1 "nonimmediate_operand")))]
+ "TARGET_SSE4_1"
+{
+ if (!MEM_P (operands[1]))
+ {
+ operands[1] = simplify_subreg (V16QImode, operands[1], V8QImode, 0);
+ emit_insn (gen_sse4_1_<code>v8qiv8hi2 (operands[0], operands[1]));
+ DONE;
+ }
+})
+
(define_insn "<mask_codefor>avx512f_<code>v16qiv16si2<mask_name>"
[(set (match_operand:V16SI 0 "register_operand" "=v")
(any_extend:V16SI
(set_attr "prefix" "evex")
(set_attr "mode" "XI")])
+(define_expand "<code>v16qiv16si2"
+ [(set (match_operand:V16SI 0 "register_operand")
+ (any_extend:V16SI
+ (match_operand:V16QI 1 "nonimmediate_operand")))]
+ "TARGET_AVX512F")
+
(define_insn "avx2_<code>v8qiv8si2<mask_name>"
[(set (match_operand:V8SI 0 "register_operand" "=v")
(any_extend:V8SI
(any_extend:V8SI (match_dup 1)))]
"operands[1] = adjust_address_nv (operands[1], V8QImode, 0);")
+(define_expand "<code>v8qiv8si2"
+ [(set (match_operand:V8SI 0 "register_operand")
+ (any_extend:V8SI
+ (match_operand:V8QI 1 "nonimmediate_operand")))]
+ "TARGET_AVX2"
+{
+ if (!MEM_P (operands[1]))
+ {
+ operands[1] = simplify_subreg (V16QImode, operands[1], V8QImode, 0);
+ emit_insn (gen_avx2_<code>v8qiv8si2 (operands[0], operands[1]));
+ DONE;
+ }
+})
+
(define_insn "sse4_1_<code>v4qiv4si2<mask_name>"
[(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
(any_extend:V4SI
(any_extend:V4SI (match_dup 1)))]
"operands[1] = adjust_address_nv (operands[1], V4QImode, 0);")
+(define_expand "<code>v4qiv4si2"
+ [(set (match_operand:V4SI 0 "register_operand")
+ (any_extend:V4SI
+ (match_operand:V4QI 1 "nonimmediate_operand")))]
+ "TARGET_SSE4_1"
+{
+ if (!MEM_P (operands[1]))
+ {
+ operands[1] = simplify_subreg (V16QImode, operands[1], V4QImode, 0);
+ emit_insn (gen_sse4_1_<code>v4qiv4si2 (operands[0], operands[1]));
+ DONE;
+ }
+})
+
(define_insn "avx512f_<code>v16hiv16si2<mask_name>"
[(set (match_operand:V16SI 0 "register_operand" "=v")
(any_extend:V16SI
(set_attr "prefix" "evex")
(set_attr "mode" "XI")])
+(define_expand "<code>v16hiv16si2"
+ [(set (match_operand:V16SI 0 "register_operand")
+ (any_extend:V16SI
+ (match_operand:V16HI 1 "nonimmediate_operand")))]
+ "TARGET_AVX512F")
+
(define_insn "avx2_<code>v8hiv8si2<mask_name>"
[(set (match_operand:V8SI 0 "register_operand" "=v")
(any_extend:V8SI
(set_attr "prefix" "maybe_evex")
(set_attr "mode" "OI")])
+(define_expand "<code>v8hiv8si2"
+ [(set (match_operand:V8SI 0 "register_operand")
+ (any_extend:V8SI
+ (match_operand:V8HI 1 "nonimmediate_operand")))]
+ "TARGET_AVX2")
+
(define_insn "sse4_1_<code>v4hiv4si2<mask_name>"
[(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
(any_extend:V4SI
(any_extend:V4SI (match_dup 1)))]
"operands[1] = adjust_address_nv (operands[1], V4HImode, 0);")
+(define_expand "<code>v4hiv4si2"
+ [(set (match_operand:V4SI 0 "register_operand")
+ (any_extend:V4SI
+ (match_operand:V4HI 1 "nonimmediate_operand")))]
+ "TARGET_SSE4_1"
+{
+ if (!MEM_P (operands[1]))
+ {
+ operands[1] = simplify_subreg (V8HImode, operands[1], V4HImode, 0);
+ emit_insn (gen_sse4_1_<code>v4hiv4si2 (operands[0], operands[1]));
+ DONE;
+ }
+})
+
(define_insn "avx512f_<code>v8qiv8di2<mask_name>"
[(set (match_operand:V8DI 0 "register_operand" "=v")
(any_extend:V8DI
(any_extend:V8DI (match_dup 1)))]
"operands[1] = adjust_address_nv (operands[1], V8QImode, 0);")
+(define_expand "<code>v8qiv8di2"
+ [(set (match_operand:V8DI 0 "register_operand")
+ (any_extend:V8DI
+ (match_operand:V8QI 1 "nonimmediate_operand")))]
+ "TARGET_AVX512F"
+{
+ if (!MEM_P (operands[1]))
+ {
+ operands[1] = simplify_subreg (V16QImode, operands[1], V8QImode, 0);
+ emit_insn (gen_avx512f_<code>v8qiv8di2 (operands[0], operands[1]));
+ DONE;
+ }
+})
+
(define_insn "avx2_<code>v4qiv4di2<mask_name>"
[(set (match_operand:V4DI 0 "register_operand" "=v")
(any_extend:V4DI
(any_extend:V4DI (match_dup 1)))]
"operands[1] = adjust_address_nv (operands[1], V4QImode, 0);")
+(define_expand "<code>v4qiv4di2"
+ [(set (match_operand:V4DI 0 "register_operand")
+ (any_extend:V4DI
+ (match_operand:V4QI 1 "nonimmediate_operand")))]
+ "TARGET_AVX2"
+{
+ if (!MEM_P (operands[1]))
+ {
+ operands[1] = simplify_subreg (V16QImode, operands[1], V8QImode, 0);
+ emit_insn (gen_avx2_<code>v4qiv4di2 (operands[0], operands[1]));
+ DONE;
+ }
+})
+
(define_insn "sse4_1_<code>v2qiv2di2<mask_name>"
[(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
(any_extend:V2DI
(set_attr "prefix" "orig,orig,maybe_evex")
(set_attr "mode" "TI")])
+(define_expand "<code>v2qiv2di2"
+ [(set (match_operand:V2DI 0 "register_operand")
+ (any_extend:V2DI
+ (match_operand:V2QI 1 "register_operand")))]
+ "TARGET_SSE4_1"
+{
+ operands[1] = simplify_subreg (V16QImode, operands[1], V2QImode, 0);
+ emit_insn (gen_sse4_1_<code>v2qiv2di2 (operands[0], operands[1]));
+ DONE;
+})
+
(define_insn "avx512f_<code>v8hiv8di2<mask_name>"
[(set (match_operand:V8DI 0 "register_operand" "=v")
(any_extend:V8DI
(set_attr "prefix" "evex")
(set_attr "mode" "XI")])
+(define_expand "<code>v8hiv8di2"
+ [(set (match_operand:V8DI 0 "register_operand")
+ (any_extend:V8DI
+ (match_operand:V8HI 1 "nonimmediate_operand")))]
+ "TARGET_AVX512F")
+
(define_insn "avx2_<code>v4hiv4di2<mask_name>"
[(set (match_operand:V4DI 0 "register_operand" "=v")
(any_extend:V4DI
(any_extend:V4DI (match_dup 1)))]
"operands[1] = adjust_address_nv (operands[1], V4HImode, 0);")
+(define_expand "<code>v4hiv4di2"
+ [(set (match_operand:V4DI 0 "register_operand")
+ (any_extend:V4DI
+ (match_operand:V4HI 1 "nonimmediate_operand")))]
+ "TARGET_AVX2"
+{
+ if (!MEM_P (operands[1]))
+ {
+ operands[1] = simplify_subreg (V8HImode, operands[1], V4HImode, 0);
+ emit_insn (gen_avx2_<code>v4hiv4di2 (operands[0], operands[1]));
+ DONE;
+ }
+})
+
(define_insn "sse4_1_<code>v2hiv2di2<mask_name>"
[(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
(any_extend:V2DI
(any_extend:V2DI (match_dup 1)))]
"operands[1] = adjust_address_nv (operands[1], V2HImode, 0);")
+(define_expand "<code>v2hiv2di2"
+ [(set (match_operand:V2DI 0 "register_operand")
+ (any_extend:V2DI
+ (match_operand:V2HI 1 "nonimmediate_operand")))]
+ "TARGET_SSE4_1"
+{
+ if (!MEM_P (operands[1]))
+ {
+ operands[1] = simplify_subreg (V8HImode, operands[1], V2HImode, 0);
+ emit_insn (gen_sse4_1_<code>v2hiv2di2 (operands[0], operands[1]));
+ DONE;
+ }
+})
+
(define_insn "avx512f_<code>v8siv8di2<mask_name>"
[(set (match_operand:V8DI 0 "register_operand" "=v")
(any_extend:V8DI
(set_attr "prefix" "evex")
(set_attr "mode" "XI")])
+(define_expand "<code>v8siv8di2"
+ [(set (match_operand:V8DI 0 "register_operand" "=v")
+ (any_extend:V8DI
+ (match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
+ "TARGET_AVX512F")
+
(define_insn "avx2_<code>v4siv4di2<mask_name>"
[(set (match_operand:V4DI 0 "register_operand" "=v")
(any_extend:V4DI
(set_attr "prefix_extra" "1")
(set_attr "mode" "OI")])
+(define_expand "<code>v4siv4di2"
+ [(set (match_operand:V4DI 0 "register_operand" "=v")
+ (any_extend:V4DI
+ (match_operand:V4SI 1 "nonimmediate_operand" "vm")))]
+ "TARGET_AVX2")
+
(define_insn "sse4_1_<code>v2siv2di2<mask_name>"
[(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
(any_extend:V2DI
(any_extend:V2DI (match_dup 1)))]
"operands[1] = adjust_address_nv (operands[1], V2SImode, 0);")
+(define_expand "<code>v2siv2di2"
+ [(set (match_operand:V2DI 0 "register_operand")
+ (any_extend:V2DI
+ (match_operand:V2SI 1 "nonimmediate_operand")))]
+ "TARGET_SSE4_1"
+{
+ if (!MEM_P (operands[1]))
+ {
+ operands[1] = simplify_subreg (V4SImode, operands[1], V2SImode, 0);
+ emit_insn (gen_sse4_1_<code>v2siv2di2 (operands[0], operands[1]));
+ DONE;
+ }
+})
+
;; ptestps/ptestpd are very similar to comiss and ucomiss when
;; setting FLAGS_REG. But it is not a really compare instruction.
(define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>"
--- /dev/null
+/* PR target/92658 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -mavx2" } */
+
+typedef unsigned char v32qi __attribute__((vector_size (32)));
+typedef unsigned short v16hi __attribute__((vector_size (32)));
+typedef unsigned int v8si __attribute__((vector_size (32)));
+typedef unsigned long long v4di __attribute__((vector_size (32)));
+
+void
+foo_u8_u16 (v16hi * dst, v32qi * __restrict src)
+{
+ unsigned short tem[16];
+ tem[0] = (*src)[0];
+ tem[1] = (*src)[1];
+ tem[2] = (*src)[2];
+ tem[3] = (*src)[3];
+ tem[4] = (*src)[4];
+ tem[5] = (*src)[5];
+ tem[6] = (*src)[6];
+ tem[7] = (*src)[7];
+ tem[8] = (*src)[8];
+ tem[9] = (*src)[9];
+ tem[10] = (*src)[10];
+ tem[11] = (*src)[11];
+ tem[12] = (*src)[12];
+ tem[13] = (*src)[13];
+ tem[14] = (*src)[14];
+ tem[15] = (*src)[15];
+ dst[0] = *(v16hi *) tem;
+}
+
+void
+bar_u8_u16 (v16hi * dst, v32qi src)
+{
+ unsigned short tem[16];
+ tem[0] = src[0];
+ tem[1] = src[1];
+ tem[2] = src[2];
+ tem[3] = src[3];
+ tem[4] = src[4];
+ tem[5] = src[5];
+ tem[6] = src[6];
+ tem[7] = src[7];
+ tem[8] = src[8];
+ tem[9] = src[9];
+ tem[10] = src[10];
+ tem[11] = src[11];
+ tem[12] = src[12];
+ tem[13] = src[13];
+ tem[14] = src[14];
+ tem[15] = src[15];
+ dst[0] = *(v16hi *) tem;
+}
+
+/* { dg-final { scan-assembler-times "pmovzxbw" 2 } } */
+
+void
+foo_u8_u32 (v8si * dst, v32qi * __restrict src)
+{
+ unsigned int tem[8];
+ tem[0] = (*src)[0];
+ tem[1] = (*src)[1];
+ tem[2] = (*src)[2];
+ tem[3] = (*src)[3];
+ tem[4] = (*src)[4];
+ tem[5] = (*src)[5];
+ tem[6] = (*src)[6];
+ tem[7] = (*src)[7];
+ dst[0] = *(v8si *) tem;
+}
+
+void
+bar_u8_u32 (v8si * dst, v32qi src)
+{
+ unsigned int tem[8];
+ tem[0] = src[0];
+ tem[1] = src[1];
+ tem[2] = src[2];
+ tem[3] = src[3];
+ tem[4] = src[4];
+ tem[5] = src[5];
+ tem[6] = src[6];
+ tem[7] = src[7];
+ dst[0] = *(v8si *) tem;
+}
+
+/* { dg-final { scan-assembler-times "pmovzxbd" 2 } } */
+
+void
+foo_u8_u64 (v4di * dst, v32qi * __restrict src)
+{
+ unsigned long long tem[4];
+ tem[0] = (*src)[0];
+ tem[1] = (*src)[1];
+ tem[2] = (*src)[2];
+ tem[3] = (*src)[3];
+ dst[0] = *(v4di *) tem;
+}
+
+void
+bar_u8_u64 (v4di * dst, v32qi src)
+{
+ unsigned long long tem[4];
+ tem[0] = src[0];
+ tem[1] = src[1];
+ tem[2] = src[2];
+ tem[3] = src[3];
+ dst[0] = *(v4di *) tem;
+}
+
+/* { dg-final { scan-assembler-times "pmovzxbq" 2 { xfail *-*-* } } } */
+
+void
+foo_u16_u32 (v8si * dst, v16hi * __restrict src)
+{
+ unsigned int tem[8];
+ tem[0] = (*src)[0];
+ tem[1] = (*src)[1];
+ tem[2] = (*src)[2];
+ tem[3] = (*src)[3];
+ tem[4] = (*src)[4];
+ tem[5] = (*src)[5];
+ tem[6] = (*src)[6];
+ tem[7] = (*src)[7];
+ dst[0] = *(v8si *) tem;
+}
+
+void
+bar_u16_u32 (v8si * dst, v16hi src)
+{
+ unsigned int tem[8];
+ tem[0] = src[0];
+ tem[1] = src[1];
+ tem[2] = src[2];
+ tem[3] = src[3];
+ tem[4] = src[4];
+ tem[5] = src[5];
+ tem[6] = src[6];
+ tem[7] = src[7];
+ dst[0] = *(v8si *) tem;
+}
+
+/* { dg-final { scan-assembler-times "pmovzxwd" 2 } } */
+
+void
+foo_u16_u64 (v4di * dst, v16hi * __restrict src)
+{
+ unsigned long long tem[4];
+ tem[0] = (*src)[0];
+ tem[1] = (*src)[1];
+ tem[2] = (*src)[2];
+ tem[3] = (*src)[3];
+ dst[0] = *(v4di *) tem;
+}
+
+void
+bar_u16_u64 (v4di * dst, v16hi src)
+{
+ unsigned long long tem[4];
+ tem[0] = src[0];
+ tem[1] = src[1];
+ tem[2] = src[2];
+ tem[3] = src[3];
+ dst[0] = *(v4di *) tem;
+}
+
+/* { dg-final { scan-assembler-times "pmovzxwq" 2 } } */
+
+void
+foo_u32_u64 (v4di * dst, v8si * __restrict src)
+{
+ unsigned long long tem[4];
+ tem[0] = (*src)[0];
+ tem[1] = (*src)[1];
+ tem[2] = (*src)[2];
+ tem[3] = (*src)[3];
+ dst[0] = *(v4di *) tem;
+}
+
+void
+bar_u32_u64 (v4di * dst, v8si src)
+{
+ unsigned long long tem[4];
+ tem[0] = src[0];
+ tem[1] = src[1];
+ tem[2] = src[2];
+ tem[3] = src[3];
+ dst[0] = *(v4di *) tem;
+}
+
+/* { dg-final { scan-assembler-times "pmovzxdq" 2 } } */
--- /dev/null
+/* PR target/92658 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -mavx512bw" } */
+
+typedef unsigned char v64qi __attribute__((vector_size (64)));
+typedef unsigned short v32hi __attribute__((vector_size (64)));
+typedef unsigned int v16si __attribute__((vector_size (64)));
+typedef unsigned long long v8di __attribute__((vector_size (64)));
+
+void
+foo_u8_u16 (v32hi * dst, v64qi * __restrict src)
+{
+ unsigned short tem[32];
+ tem[0] = (*src)[0];
+ tem[1] = (*src)[1];
+ tem[2] = (*src)[2];
+ tem[3] = (*src)[3];
+ tem[4] = (*src)[4];
+ tem[5] = (*src)[5];
+ tem[6] = (*src)[6];
+ tem[7] = (*src)[7];
+ tem[8] = (*src)[8];
+ tem[9] = (*src)[9];
+ tem[10] = (*src)[10];
+ tem[11] = (*src)[11];
+ tem[12] = (*src)[12];
+ tem[13] = (*src)[13];
+ tem[14] = (*src)[14];
+ tem[15] = (*src)[15];
+ tem[16] = (*src)[16];
+ tem[17] = (*src)[17];
+ tem[18] = (*src)[18];
+ tem[19] = (*src)[19];
+ tem[20] = (*src)[20];
+ tem[21] = (*src)[21];
+ tem[22] = (*src)[22];
+ tem[23] = (*src)[23];
+ tem[24] = (*src)[24];
+ tem[25] = (*src)[25];
+ tem[26] = (*src)[26];
+ tem[27] = (*src)[27];
+ tem[28] = (*src)[28];
+ tem[29] = (*src)[29];
+ tem[30] = (*src)[30];
+ tem[31] = (*src)[31];
+ dst[0] = *(v32hi *) tem;
+}
+
+void
+bar_u8_u16 (v32hi * dst, v64qi src)
+{
+ unsigned short tem[32];
+ tem[0] = src[0];
+ tem[1] = src[1];
+ tem[2] = src[2];
+ tem[3] = src[3];
+ tem[4] = src[4];
+ tem[5] = src[5];
+ tem[6] = src[6];
+ tem[7] = src[7];
+ tem[8] = src[8];
+ tem[9] = src[9];
+ tem[10] = src[10];
+ tem[11] = src[11];
+ tem[12] = src[12];
+ tem[13] = src[13];
+ tem[14] = src[14];
+ tem[15] = src[15];
+ tem[16] = src[16];
+ tem[17] = src[17];
+ tem[18] = src[18];
+ tem[19] = src[19];
+ tem[20] = src[20];
+ tem[21] = src[21];
+ tem[22] = src[22];
+ tem[23] = src[23];
+ tem[24] = src[24];
+ tem[25] = src[25];
+ tem[26] = src[26];
+ tem[27] = src[27];
+ tem[28] = src[28];
+ tem[29] = src[29];
+ tem[30] = src[30];
+ tem[31] = src[31];
+ dst[0] = *(v32hi *) tem;
+}
+
+/* { dg-final { scan-assembler-times "pmovzxbw" 2 } } */
+
+void
+foo_u8_u32 (v16si * dst, v64qi * __restrict src)
+{
+ unsigned int tem[16];
+ tem[0] = (*src)[0];
+ tem[1] = (*src)[1];
+ tem[2] = (*src)[2];
+ tem[3] = (*src)[3];
+ tem[4] = (*src)[4];
+ tem[5] = (*src)[5];
+ tem[6] = (*src)[6];
+ tem[7] = (*src)[7];
+ tem[8] = (*src)[8];
+ tem[9] = (*src)[9];
+ tem[10] = (*src)[10];
+ tem[11] = (*src)[11];
+ tem[12] = (*src)[12];
+ tem[13] = (*src)[13];
+ tem[14] = (*src)[14];
+ tem[15] = (*src)[15];
+ dst[0] = *(v16si *) tem;
+}
+
+void
+bar_u8_u32 (v16si * dst, v64qi src)
+{
+ unsigned int tem[16];
+ tem[0] = src[0];
+ tem[1] = src[1];
+ tem[2] = src[2];
+ tem[3] = src[3];
+ tem[4] = src[4];
+ tem[5] = src[5];
+ tem[6] = src[6];
+ tem[7] = src[7];
+ tem[8] = src[8];
+ tem[9] = src[9];
+ tem[10] = src[10];
+ tem[11] = src[11];
+ tem[12] = src[12];
+ tem[13] = src[13];
+ tem[14] = src[14];
+ tem[15] = src[15];
+ dst[0] = *(v16si *) tem;
+}
+
+/* { dg-final { scan-assembler-times "pmovzxbd" 2 } } */
+
+void
+foo_u8_u64 (v8di * dst, v64qi * __restrict src)
+{
+ unsigned long long tem[8];
+ tem[0] = (*src)[0];
+ tem[1] = (*src)[1];
+ tem[2] = (*src)[2];
+ tem[3] = (*src)[3];
+ tem[4] = (*src)[4];
+ tem[5] = (*src)[5];
+ tem[6] = (*src)[6];
+ tem[7] = (*src)[7];
+ dst[0] = *(v8di *) tem;
+}
+
+void
+bar_u8_u64 (v8di * dst, v64qi src)
+{
+ unsigned long long tem[8];
+ tem[0] = src[0];
+ tem[1] = src[1];
+ tem[2] = src[2];
+ tem[3] = src[3];
+ tem[4] = src[4];
+ tem[5] = src[5];
+ tem[6] = src[6];
+ tem[7] = src[7];
+ dst[0] = *(v8di *) tem;
+}
+
+/* { dg-final { scan-assembler-times "pmovzxbq" 2 } } */
+
+void
+foo_u16_u32 (v16si * dst, v32hi * __restrict src)
+{
+ unsigned int tem[16];
+ tem[0] = (*src)[0];
+ tem[1] = (*src)[1];
+ tem[2] = (*src)[2];
+ tem[3] = (*src)[3];
+ tem[4] = (*src)[4];
+ tem[5] = (*src)[5];
+ tem[6] = (*src)[6];
+ tem[7] = (*src)[7];
+ tem[8] = (*src)[8];
+ tem[9] = (*src)[9];
+ tem[10] = (*src)[10];
+ tem[11] = (*src)[11];
+ tem[12] = (*src)[12];
+ tem[13] = (*src)[13];
+ tem[14] = (*src)[14];
+ tem[15] = (*src)[15];
+ dst[0] = *(v16si *) tem;
+}
+
+void
+bar_u16_u32 (v16si * dst, v32hi src)
+{
+ unsigned int tem[16];
+ tem[0] = src[0];
+ tem[1] = src[1];
+ tem[2] = src[2];
+ tem[3] = src[3];
+ tem[4] = src[4];
+ tem[5] = src[5];
+ tem[6] = src[6];
+ tem[7] = src[7];
+ tem[8] = src[8];
+ tem[9] = src[9];
+ tem[10] = src[10];
+ tem[11] = src[11];
+ tem[12] = src[12];
+ tem[13] = src[13];
+ tem[14] = src[14];
+ tem[15] = src[15];
+ dst[0] = *(v16si *) tem;
+}
+
+/* { dg-final { scan-assembler-times "pmovzxwd" 2 } } */
+
+void
+foo_u16_u64 (v8di * dst, v32hi * __restrict src)
+{
+ unsigned long long tem[8];
+ tem[0] = (*src)[0];
+ tem[1] = (*src)[1];
+ tem[2] = (*src)[2];
+ tem[3] = (*src)[3];
+ tem[4] = (*src)[4];
+ tem[5] = (*src)[5];
+ tem[6] = (*src)[6];
+ tem[7] = (*src)[7];
+ dst[0] = *(v8di *) tem;
+}
+
+void
+bar_u16_u64 (v8di * dst, v32hi src)
+{
+ unsigned long long tem[8];
+ tem[0] = src[0];
+ tem[1] = src[1];
+ tem[2] = src[2];
+ tem[3] = src[3];
+ tem[4] = src[4];
+ tem[5] = src[5];
+ tem[6] = src[6];
+ tem[7] = src[7];
+ dst[0] = *(v8di *) tem;
+}
+
+/* { dg-final { scan-assembler-times "pmovzxwq" 2 } } */
+
+void
+foo_u32_u64 (v8di * dst, v16si * __restrict src)
+{
+ unsigned long long tem[8];
+ tem[0] = (*src)[0];
+ tem[1] = (*src)[1];
+ tem[2] = (*src)[2];
+ tem[3] = (*src)[3];
+ tem[4] = (*src)[4];
+ tem[5] = (*src)[5];
+ tem[6] = (*src)[6];
+ tem[7] = (*src)[7];
+ dst[0] = *(v8di *) tem;
+}
+
+void
+bar_u32_u64 (v8di * dst, v16si src)
+{
+ unsigned long long tem[8];
+ tem[0] = src[0];
+ tem[1] = src[1];
+ tem[2] = src[2];
+ tem[3] = src[3];
+ tem[4] = src[4];
+ tem[5] = src[5];
+ tem[6] = src[6];
+ tem[7] = src[7];
+ dst[0] = *(v8di *) tem;
+}
+
+/* { dg-final { scan-assembler-times "pmovzxdq" 2 } } */
--- /dev/null
+/* PR target/92658 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -msse4.1" } */
+
+typedef unsigned char v16qi __attribute__((vector_size (16)));
+typedef unsigned short v8hi __attribute__((vector_size (16)));
+typedef unsigned int v4si __attribute__((vector_size (16)));
+typedef unsigned long long v2di __attribute__((vector_size (16)));
+
+void
+foo_u8_u16 (v8hi * dst, v16qi * __restrict src)
+{
+ unsigned short tem[8];
+ tem[0] = (*src)[0];
+ tem[1] = (*src)[1];
+ tem[2] = (*src)[2];
+ tem[3] = (*src)[3];
+ tem[4] = (*src)[4];
+ tem[5] = (*src)[5];
+ tem[6] = (*src)[6];
+ tem[7] = (*src)[7];
+ dst[0] = *(v8hi *) tem;
+}
+
+void
+bar_u8_u16 (v8hi * dst, v16qi src)
+{
+ unsigned short tem[8];
+ tem[0] = src[0];
+ tem[1] = src[1];
+ tem[2] = src[2];
+ tem[3] = src[3];
+ tem[4] = src[4];
+ tem[5] = src[5];
+ tem[6] = src[6];
+ tem[7] = src[7];
+ dst[0] = *(v8hi *) tem;
+}
+
+/* { dg-final { scan-assembler-times "pmovzxbw" 2 } } */
+
+void
+foo_u8_u32 (v4si * dst, v16qi * __restrict src)
+{
+ unsigned int tem[4];
+ tem[0] = (*src)[0];
+ tem[1] = (*src)[1];
+ tem[2] = (*src)[2];
+ tem[3] = (*src)[3];
+ dst[0] = *(v4si *) tem;
+}
+
+void
+bar_u8_u32 (v4si * dst, v16qi src)
+{
+ unsigned int tem[4];
+ tem[0] = src[0];
+ tem[1] = src[1];
+ tem[2] = src[2];
+ tem[3] = src[3];
+ dst[0] = *(v4si *) tem;
+}
+
+/* { dg-final { scan-assembler-times "pmovzxbd" 2 { xfail *-*-* } } } */
+
+void
+foo_u8_u64 (v2di * dst, v16qi * __restrict src)
+{
+ unsigned long long tem[2];
+ tem[0] = (*src)[0];
+ tem[1] = (*src)[1];
+ dst[0] = *(v2di *) tem;
+}
+
+void
+bar_u8_u64 (v2di * dst, v16qi src)
+{
+ unsigned long long tem[2];
+ tem[0] = src[0];
+ tem[1] = src[1];
+ dst[0] = *(v2di *) tem;
+}
+
+/* { dg-final { scan-assembler-times "pmovzxbq" 2 { xfail *-*-* } } } */
+
+void
+foo_u16_u32 (v4si * dst, v8hi * __restrict src)
+{
+ unsigned int tem[4];
+ tem[0] = (*src)[0];
+ tem[1] = (*src)[1];
+ tem[2] = (*src)[2];
+ tem[3] = (*src)[3];
+ dst[0] = *(v4si *) tem;
+}
+
+void
+bar_u16_u32 (v4si * dst, v8hi src)
+{
+ unsigned int tem[4];
+ tem[0] = src[0];
+ tem[1] = src[1];
+ tem[2] = src[2];
+ tem[3] = src[3];
+ dst[0] = *(v4si *) tem;
+}
+
+/* { dg-final { scan-assembler-times "pmovzxwd" 2 } } */
+
+void
+foo_u16_u64 (v2di * dst, v8hi * __restrict src)
+{
+ unsigned long long tem[2];
+ tem[0] = (*src)[0];
+ tem[1] = (*src)[1];
+ dst[0] = *(v2di *) tem;
+}
+
+void
+bar_u16_u64 (v2di * dst, v8hi src)
+{
+ unsigned long long tem[2];
+ tem[0] = src[0];
+ tem[1] = src[1];
+ dst[0] = *(v2di *) tem;
+}
+
+/* { dg-final { scan-assembler-times "pmovzxwq" 2 { xfail *-*-* } } } */
+
+void
+foo_u32_u64 (v2di * dst, v4si * __restrict src)
+{
+ unsigned long long tem[2];
+ tem[0] = (*src)[0];
+ tem[1] = (*src)[1];
+ dst[0] = *(v2di *) tem;
+}
+
+void
+bar_u32_u64 (v2di * dst, v4si src)
+{
+ unsigned long long tem[2];
+ tem[0] = src[0];
+ tem[1] = src[1];
+ dst[0] = *(v2di *) tem;
+}
+
+/* { dg-final { scan-assembler-times "pmovzxdq" 2 } } */