From e740f3d73144abbca1ad98a04825c6bd63314a0b Mon Sep 17 00:00:00 2001 From: liuhongt Date: Wed, 20 May 2020 15:53:14 +0800 Subject: [PATCH] Add missing vector truncmn2 expanders [PR92658] 2020-05-22 Hongtao.liu gcc/ChangeLog: PR target/92658 * config/i386/sse.md (trunc2): New expander (truncv32hiv32qi2): Ditto. (trunc2): Ditto. (trunc2): Ditto. (trunc2): Ditto. (truncv2div2si2): Ditto. (truncv8div8qi2): Ditto. (avx512f_v8div16qi2): Renaming from *avx512f_v8div16qi2. (avx512vl_v2div2si): Renaming from *avx512vl_v2div2si2. (avx512vl_v2qi2): Renaming from *avx512vl_vqi2. gcc/testsuite/ChangeLog: * gcc.target/i386/pr92658-avx512f.c: New test. * gcc.target/i386/pr92658-avx512vl.c: Ditto. * gcc.target/i386/pr92658-avx512bw-trunc.c: Ditto. --- gcc/ChangeLog | 15 ++ gcc/config/i386/sse.md | 77 ++++++++++- gcc/testsuite/ChangeLog | 6 + .../gcc.target/i386/pr92658-avx512bw-trunc.c | 91 ++++++++++++ .../gcc.target/i386/pr92658-avx512f.c | 106 ++++++++++++++ .../gcc.target/i386/pr92658-avx512vl.c | 129 ++++++++++++++++++ 6 files changed, 420 insertions(+), 4 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr92658-avx512bw-trunc.c create mode 100644 gcc/testsuite/gcc.target/i386/pr92658-avx512f.c create mode 100644 gcc/testsuite/gcc.target/i386/pr92658-avx512vl.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 9106cdb2e72..4049ac308e2 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,18 @@ +2020-05-22 Hongtao.liu + + PR target/92658 + * config/i386/sse.md (trunc2): New expander + (truncv32hiv32qi2): Ditto. + (trunc2): Ditto. + (trunc2): Ditto. + (trunc2): Ditto. + (truncv2div2si2): Ditto. + (truncv8div8qi2): Ditto. + (avx512f_v8div16qi2): Renaming from *avx512f_v8div16qi2. + (avx512vl_v2div2si): Renaming from *avx512vl_v2div2si2. + (avx512vl_v2qi2): Renaming from + *avx512vl_vqi2. + 2020-05-22 H.J. Lu PR target/95258 diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 5071fb2895a..bb8ee19b64b 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -10513,6 +10513,12 @@ (define_mode_attr pmov_suff_1 [(V16QI "db") (V16HI "dw") (V8SI "qd") (V8HI "qw")]) +(define_expand "trunc2" + [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand") + (truncate:PMOV_DST_MODE_1 + (match_operand: 1 "register_operand")))] + "TARGET_AVX512F") + (define_insn "*avx512f_2" [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m") (any_truncate:PMOV_DST_MODE_1 @@ -10547,6 +10553,12 @@ (match_operand: 2 "register_operand")))] "TARGET_AVX512F") +(define_expand "truncv32hiv32qi2" + [(set (match_operand:V32QI 0 "nonimmediate_operand") + (truncate:V32QI + (match_operand:V32HI 1 "register_operand")))] + "TARGET_AVX512BW") + (define_insn "avx512bw_v32hiv32qi2" [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m") (any_truncate:V32QI @@ -10586,6 +10598,12 @@ (define_mode_attr pmov_suff_2 [(V16QI "wb") (V8HI "dw") (V4SI "qd")]) +(define_expand "trunc2" + [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand") + (truncate:PMOV_DST_MODE_2 + (match_operand: 1 "register_operand")))] + "TARGET_AVX512VL") + (define_insn "*avx512vl_2" [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m") (any_truncate:PMOV_DST_MODE_2 @@ -10628,7 +10646,20 @@ (define_mode_attr pmov_suff_3 [(V4DI "qb") (V2DI "qb") (V8SI "db") (V4SI "db") (V8HI "wb")]) -(define_insn "*avx512vl_vqi2" +(define_expand "trunc2" + [(set (match_operand: 0 "register_operand") + (truncate: + (match_operand:PMOV_SRC_MODE_3 1 "register_operand")))] + "TARGET_AVX512VL" +{ + operands[0] = simplify_gen_subreg (V16QImode, operands[0], mode, 0); + emit_insn (gen_avx512vl_truncatevqi2 (operands[0], + operands[1], + CONST0_RTX (mode))); + DONE; +}) + +(define_insn "avx512vl_vqi2" [(set (match_operand:V16QI 0 "register_operand" "=v") (vec_concat:V16QI (any_truncate: @@ -10920,7 +10951,21 @@ (define_mode_attr pmov_suff_4 [(V4DI "qw") (V2DI "qw") (V4SI "dw")]) -(define_insn "*avx512vl_vhi2" +(define_expand "trunc2" + [(set (match_operand: 0 "register_operand") + (truncate: + (match_operand:PMOV_SRC_MODE_4 1 "register_operand")))] + "TARGET_AVX512VL" +{ + operands[0] = simplify_gen_subreg (V8HImode, operands[0], mode, 0); + emit_insn (gen_avx512vl_truncatevhi2 (operands[0], + operands[1], + CONST0_RTX (mode))); + DONE; + +}) + +(define_insn "avx512vl_vhi2" [(set (match_operand:V8HI 0 "register_operand" "=v") (vec_concat:V8HI (any_truncate: @@ -11085,7 +11130,20 @@ (set_attr "prefix" "evex") (set_attr "mode" "TI")]) -(define_insn "*avx512vl_v2div2si2" +(define_expand "truncv2div2si2" + [(set (match_operand:V2SI 0 "register_operand") + (truncate:V2SI + (match_operand:V2DI 1 "register_operand")))] + "TARGET_AVX512VL" +{ + operands[0] = simplify_gen_subreg (V4SImode, operands[0], V2SImode, 0); + emit_insn (gen_avx512vl_truncatev2div2si2 (operands[0], + operands[1], + CONST0_RTX (V2SImode))); + DONE; +}) + +(define_insn "avx512vl_v2div2si2" [(set (match_operand:V4SI 0 "register_operand" "=v") (vec_concat:V4SI (any_truncate:V2SI @@ -11164,7 +11222,18 @@ (set_attr "prefix" "evex") (set_attr "mode" "TI")]) -(define_insn "*avx512f_v8div16qi2" +(define_expand "truncv8div8qi2" + [(set (match_operand:V8QI 0 "register_operand") + (truncate:V8QI + (match_operand:V8DI 1 "register_operand")))] + "TARGET_AVX512F" +{ + operands[0] = simplify_gen_subreg (V16QImode, operands[0], V8QImode, 0); + emit_insn (gen_avx512f_truncatev8div16qi2 (operands[0], operands[1])); + DONE; +}) + +(define_insn "avx512f_v8div16qi2" [(set (match_operand:V16QI 0 "register_operand" "=v") (vec_concat:V16QI (any_truncate:V8QI diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 5a628260925..7812e3f0f57 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2020-05-22 Hongtao.liu + + * gcc.target/i386/pr92658-avx512f.c: New test. + * gcc.target/i386/pr92658-avx512vl.c: Ditto. + * gcc.target/i386/pr92658-avx512bw-trunc.c: Ditto. + 2020-05-22 Richard Biener PR tree-optimization/95268 diff --git a/gcc/testsuite/gcc.target/i386/pr92658-avx512bw-trunc.c b/gcc/testsuite/gcc.target/i386/pr92658-avx512bw-trunc.c new file mode 100644 index 00000000000..bdfad7a4d18 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr92658-avx512bw-trunc.c @@ -0,0 +1,91 @@ +/* PR target/92658 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -mavx512bw -mavx512vl" } */ + +typedef unsigned char v8qi __attribute__((vector_size (8))); +typedef unsigned char v16qi __attribute__((vector_size (16))); +typedef unsigned char v32qi __attribute__((vector_size (32))); +typedef unsigned short v8hi __attribute__((vector_size (16))); +typedef unsigned short v16hi __attribute__((vector_size (32))); +typedef unsigned short v32hi __attribute__((vector_size (64))); + + +void +truncwb_512 (v32qi * dst, v32hi * __restrict src) +{ + unsigned char tem[8]; + tem[0] = (*src)[0]; + tem[1] = (*src)[1]; + tem[2] = (*src)[2]; + tem[3] = (*src)[3]; + tem[4] = (*src)[4]; + tem[5] = (*src)[5]; + tem[6] = (*src)[6]; + tem[7] = (*src)[7]; + tem[8] = (*src)[8]; + tem[9] = (*src)[9]; + tem[10] = (*src)[10]; + tem[11] = (*src)[11]; + tem[12] = (*src)[12]; + tem[13] = (*src)[13]; + tem[14] = (*src)[14]; + tem[15] = (*src)[15]; + tem[16] = (*src)[16]; + tem[17] = (*src)[17]; + tem[18] = (*src)[18]; + tem[19] = (*src)[19]; + tem[20] = (*src)[20]; + tem[21] = (*src)[21]; + tem[22] = (*src)[22]; + tem[23] = (*src)[23]; + tem[24] = (*src)[24]; + tem[25] = (*src)[25]; + tem[26] = (*src)[26]; + tem[27] = (*src)[27]; + tem[28] = (*src)[28]; + tem[29] = (*src)[29]; + tem[30] = (*src)[30]; + tem[31] = (*src)[31]; + dst[0] = *(v32qi *) tem; +} + +void +truncwb_256 (v16qi * dst, v16hi * __restrict src) +{ + unsigned char tem[8]; + tem[0] = (*src)[0]; + tem[1] = (*src)[1]; + tem[2] = (*src)[2]; + tem[3] = (*src)[3]; + tem[4] = (*src)[4]; + tem[5] = (*src)[5]; + tem[6] = (*src)[6]; + tem[7] = (*src)[7]; + tem[8] = (*src)[8]; + tem[9] = (*src)[9]; + tem[10] = (*src)[10]; + tem[11] = (*src)[11]; + tem[12] = (*src)[12]; + tem[13] = (*src)[13]; + tem[14] = (*src)[14]; + tem[15] = (*src)[15]; + dst[0] = *(v16qi *) tem; +} + +void +truncwb_128 (v16qi * dst, v8hi * __restrict src) +{ + unsigned char tem[8]; + tem[0] = (*src)[0]; + tem[1] = (*src)[1]; + tem[2] = (*src)[2]; + tem[3] = (*src)[3]; + tem[4] = (*src)[4]; + tem[5] = (*src)[5]; + tem[6] = (*src)[6]; + tem[7] = (*src)[7]; + dst[0] = *(v16qi *) tem; +} + +/* { dg-final { scan-assembler-times "vpmovwb" 2 } } */ +/* { dg-final { scan-assembler-times "vpmovwb" 3 { xfail *-*-* } } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr92658-avx512f.c b/gcc/testsuite/gcc.target/i386/pr92658-avx512f.c new file mode 100644 index 00000000000..2ba29074a81 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr92658-avx512f.c @@ -0,0 +1,106 @@ +/* PR target/92658 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -mavx512f" } */ + +typedef unsigned char v8qi __attribute__((vector_size (8))); +typedef unsigned char v16qi __attribute__((vector_size (16))); +typedef unsigned short v8hi __attribute__((vector_size (16))); +typedef unsigned short v16hi __attribute__((vector_size (32))); +typedef unsigned int v8si __attribute__((vector_size (32))); +typedef unsigned int v16si __attribute__((vector_size (64))); +typedef unsigned long long v8di __attribute__((vector_size (64))); + +void +truncqd (v8si * dst, v8di * __restrict src) +{ + unsigned tem[8]; + tem[0] = (*src)[0]; + tem[1] = (*src)[1]; + tem[2] = (*src)[2]; + tem[3] = (*src)[3]; + tem[4] = (*src)[4]; + tem[5] = (*src)[5]; + tem[6] = (*src)[6]; + tem[7] = (*src)[7]; + dst[0] = *(v8si *) tem; +} + +void +truncqw (v8hi * dst, v8di * __restrict src) +{ + unsigned short tem[8]; + tem[0] = (*src)[0]; + tem[1] = (*src)[1]; + tem[2] = (*src)[2]; + tem[3] = (*src)[3]; + tem[4] = (*src)[4]; + tem[5] = (*src)[5]; + tem[6] = (*src)[6]; + tem[7] = (*src)[7]; + dst[0] = *(v8hi *) tem; +} + +void +truncqb (v8qi * dst, v8di * __restrict src) +{ + unsigned char tem[8]; + tem[0] = (*src)[0]; + tem[1] = (*src)[1]; + tem[2] = (*src)[2]; + tem[3] = (*src)[3]; + dst[0] = *(v8qi *) tem; +} + +void +truncdw (v16hi * dst, v16si * __restrict src) +{ + unsigned short tem[8]; + tem[0] = (*src)[0]; + tem[1] = (*src)[1]; + tem[2] = (*src)[2]; + tem[3] = (*src)[3]; + tem[4] = (*src)[4]; + tem[5] = (*src)[5]; + tem[6] = (*src)[6]; + tem[7] = (*src)[7]; + tem[8] = (*src)[8]; + tem[9] = (*src)[9]; + tem[10] = (*src)[10]; + tem[11] = (*src)[11]; + tem[12] = (*src)[12]; + tem[13] = (*src)[13]; + tem[14] = (*src)[14]; + tem[15] = (*src)[15]; + dst[0] = *(v16hi *) tem; +} + + +void +truncdb (v16qi * dst, v16si * __restrict src) +{ + unsigned char tem[8]; + tem[0] = (*src)[0]; + tem[1] = (*src)[1]; + tem[2] = (*src)[2]; + tem[3] = (*src)[3]; + tem[4] = (*src)[4]; + tem[5] = (*src)[5]; + tem[6] = (*src)[6]; + tem[7] = (*src)[7]; + tem[8] = (*src)[8]; + tem[9] = (*src)[9]; + tem[10] = (*src)[10]; + tem[11] = (*src)[11]; + tem[12] = (*src)[12]; + tem[13] = (*src)[13]; + tem[14] = (*src)[14]; + tem[15] = (*src)[15]; + dst[0] = *(v16qi *) tem; +} + +/* { dg-final { scan-assembler-times "vpmovqd" 1 } } */ +/* { dg-final { scan-assembler-times "vpmovqw" 1 } } */ +/* { dg-final { scan-assembler-times "vpmovqb" 1 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times "vpmovdw" 1 } } */ +/* { dg-final { scan-assembler-times "vpmovdb" 1 } } */ + diff --git a/gcc/testsuite/gcc.target/i386/pr92658-avx512vl.c b/gcc/testsuite/gcc.target/i386/pr92658-avx512vl.c new file mode 100644 index 00000000000..50b32f968ac --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr92658-avx512vl.c @@ -0,0 +1,129 @@ +/* PR target/92658 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -mavx512f -mavx512vl" } */ + +typedef unsigned char v16qi __attribute__((vector_size (16))); +typedef unsigned short v8hi __attribute__((vector_size (16))); +typedef unsigned int v4si __attribute__((vector_size (16))); +typedef unsigned int v8si __attribute__((vector_size (32))); +typedef unsigned long long v2di __attribute__((vector_size (16))); +typedef unsigned long long v4di __attribute__((vector_size (32))); + +void +truncqd_256 (v4si * dst, v4di * __restrict src) +{ + unsigned tem[4]; + tem[0] = (*src)[0]; + tem[1] = (*src)[1]; + tem[2] = (*src)[2]; + tem[3] = (*src)[3]; + dst[0] = *(v4si *) tem; +} + +void +truncqw_256 (v8hi * dst, v4di * __restrict src) +{ + unsigned short tem[4]; + tem[0] = (*src)[0]; + tem[1] = (*src)[1]; + tem[2] = (*src)[2]; + tem[3] = (*src)[3]; + dst[0] = *(v8hi *) tem; +} + +void +truncqb_256 (v16qi * dst, v4di * __restrict src) +{ + unsigned char tem[4]; + tem[0] = (*src)[0]; + tem[1] = (*src)[1]; + tem[2] = (*src)[2]; + tem[3] = (*src)[3]; + dst[0] = *(v16qi *) tem; +} + +void +truncqd_128 (v4si * dst, v2di * __restrict src) +{ + unsigned tem[4]; + tem[0] = (*src)[0]; + tem[1] = (*src)[1]; + dst[0] = *(v4si *) tem; +} + +void +truncqw_128 (v8hi * dst, v2di * __restrict src) +{ + unsigned short tem[4]; + tem[0] = (*src)[0]; + tem[1] = (*src)[1]; + dst[0] = *(v8hi *) tem; +} + +void +truncqb_128 (v16qi * dst, v2di * __restrict src) +{ + unsigned char tem[4]; + tem[0] = (*src)[0]; + tem[1] = (*src)[1]; + dst[0] = *(v16qi *) tem; +} + +void +truncdw_256 (v8hi * dst, v8si * __restrict src) +{ + unsigned short tem[8]; + tem[0] = (*src)[0]; + tem[1] = (*src)[1]; + tem[2] = (*src)[2]; + tem[3] = (*src)[3]; + tem[4] = (*src)[4]; + tem[5] = (*src)[5]; + tem[6] = (*src)[6]; + tem[7] = (*src)[7]; + dst[0] = *(v8hi *) tem; +} + +void +truncdb_256 (v16qi * dst, v8si * __restrict src) +{ + unsigned char tem[8]; + tem[0] = (*src)[0]; + tem[1] = (*src)[1]; + tem[2] = (*src)[2]; + tem[3] = (*src)[3]; + tem[4] = (*src)[4]; + tem[5] = (*src)[5]; + tem[6] = (*src)[6]; + tem[7] = (*src)[7]; + dst[0] = *(v16qi *) tem; +} + +void +truncdw_128 (v8hi * dst, v4si * __restrict src) +{ + unsigned short tem[8]; + tem[0] = (*src)[0]; + tem[1] = (*src)[1]; + tem[2] = (*src)[2]; + tem[3] = (*src)[3]; + dst[0] = *(v8hi *) tem; +} + +void +truncdb_128 (v16qi * dst, v4si * __restrict src) +{ + unsigned char tem[8]; + tem[0] = (*src)[0]; + tem[1] = (*src)[1]; + tem[2] = (*src)[2]; + tem[3] = (*src)[3]; + dst[0] = *(v16qi *) tem; +} + +/* { dg-final { scan-assembler-times "vpmovqd" 2 } } } */ +/* { dg-final { scan-assembler-times "vpmovqw" 2 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times "vpmovqb" 2 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times "vpmovdw" 1 } } */ +/* { dg-final { scan-assembler-times "vpmovdw" 2 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times "vpmovdb" 2 { xfail *-*-* } } } */ -- 2.30.2