From 9d0dc47de6ed9609fe7445ea0941a6a822c80f88 Mon Sep 17 00:00:00 2001 From: liuhongt Date: Tue, 26 May 2020 11:14:31 +0800 Subject: [PATCH] Fix non-comforming expander for floatv2div2sf2,floatunsv2div2sf2,fix_truncv2sfv2di,fixuns_truncv2sfv2di. MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit 2020-05-26 Hongtao Liu Uroš Bizjak gcc/ChangeLog: PR target/95211 PR target/95256 * config/i386/sse.md (v2div2sf2): New expander. (fix_truncv2sfv2di2): Ditto. (avx512dq_floatv2div2sf2): Renaming from floatv2div2sf2. (avx512dq_fix_truncv2sfv2di2): Renaming from fix_truncv2sfv2di2. (vec_pack_float_): Adjust icode name. (vec_unpack_fix_trunc_lo_): Ditto. (vec_unpack_fix_trunc_hi_): Ditto. * config/i386/i386-builtin.def: Ditto. * emit-rtl.c (validate_subreg): Allow use of *paradoxical* vector subregs when both omode and imode are vector mode and have the same inner mode. gcc/testsuite/ChangeLog * gcc.target/i386/pr95211.c: New test. --- gcc/config/i386/i386-builtin.def | 4 +- gcc/config/i386/sse.md | 51 +++++++++++++++++++++---- gcc/emit-rtl.c | 12 +++--- gcc/testsuite/gcc.target/i386/pr95211.c | 38 ++++++++++++++++++ 4 files changed, 90 insertions(+), 15 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr95211.c diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def index fa123788a8e..b873498f3ab 100644 --- a/gcc/config/i386/i386-builtin.def +++ b/gcc/config/i386/i386-builtin.def @@ -1649,9 +1649,9 @@ BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_ufix_not BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_ufix_notruncv4dfv4si2_mask, "__builtin_ia32_cvtpd2udq256_mask", IX86_BUILTIN_CVTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_ufix_notruncv2dfv2si2_mask, "__builtin_ia32_cvtpd2udq128_mask", IX86_BUILTIN_CVTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_UQI) BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_fix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2qq256_mask", IX86_BUILTIN_CVTTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_fix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2qq128_mask", IX86_BUILTIN_CVTTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512dq_fix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2qq128_mask", IX86_BUILTIN_CVTTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_UQI) BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_fixuns_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2uqq256_mask", IX86_BUILTIN_CVTTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_UQI) -BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_fixuns_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2uqq128_mask", IX86_BUILTIN_CVTTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512dq_fixuns_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2uqq128_mask", IX86_BUILTIN_CVTTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_fix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2dq256_mask", IX86_BUILTIN_CVTTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_fix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2dq128_mask", IX86_BUILTIN_CVTTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_ufix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2udq256_mask", IX86_BUILTIN_CVTTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index cee2b2890a5..fde65391d7d 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -5795,7 +5795,7 @@ (set_attr "prefix" "evex") (set_attr "mode" "")]) -(define_expand "floatv2div2sf2" +(define_expand "avx512dq_floatv2div2sf2" [(set (match_operand:V4SF 0 "register_operand" "=v") (vec_concat:V4SF (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm")) @@ -5803,7 +5803,7 @@ "TARGET_AVX512DQ && TARGET_AVX512VL" "operands[2] = CONST0_RTX (V2SFmode);") -(define_insn "*floatv2div2sf2" +(define_insn "*avx512dq_floatv2div2sf2" [(set (match_operand:V4SF 0 "register_operand" "=v") (vec_concat:V4SF (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm")) @@ -5814,6 +5814,17 @@ (set_attr "prefix" "evex") (set_attr "mode" "V4SF")]) +(define_expand "floatv2div2sf2" + [(set (match_operand:V2SF 0 "register_operand") + (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand")))] + "TARGET_AVX512DQ && TARGET_AVX512VL" +{ + operands[0] = simplify_gen_subreg (V4SFmode, operands[0], V2SFmode, 0); + emit_insn (gen_avx512dq_floatv2div2sf2 + (operands[0], operands[1])); + DONE; +}) + (define_mode_attr vpckfloat_concat_mode [(V8DI "v16sf") (V4DI "v8sf") (V2DI "v8sf")]) (define_mode_attr vpckfloat_temp_mode @@ -5830,7 +5841,12 @@ { rtx r1 = gen_reg_rtx (mode); rtx r2 = gen_reg_rtx (mode); - rtx (*gen) (rtx, rtx) = gen_float2; + rtx (*gen) (rtx, rtx); + + if (mode == V2DImode) + gen = gen_avx512dq_floatv2div2sf2; + else + gen = gen_float2; emit_insn (gen (r1, operands[1])); emit_insn (gen (r2, operands[2])); if (mode == V2DImode) @@ -6217,7 +6233,7 @@ (set_attr "prefix" "evex") (set_attr "mode" "")]) -(define_insn "fix_truncv2sfv2di2" +(define_insn "avx512dq_fix_truncv2sfv2di2" [(set (match_operand:V2DI 0 "register_operand" "=v") (any_fix:V2DI (vec_select:V2SF @@ -6229,6 +6245,18 @@ (set_attr "prefix" "evex") (set_attr "mode" "TI")]) +(define_expand "fix_truncv2sfv2di2" + [(set (match_operand:V2DI 0 "register_operand") + (any_fix:V2DI + (match_operand:V2SF 1 "register_operand")))] + "TARGET_AVX512DQ && TARGET_AVX512VL" +{ + operands[1] = simplify_gen_subreg (V4SFmode, operands[1], V2SFmode, 0); + emit_insn (gen_avx512dq_fix_truncv2sfv2di2 + (operands[0], operands[1])); + DONE; +}) + (define_mode_attr vunpckfixt_mode [(V16SF "V8DI") (V8SF "V4DI") (V4SF "V2DI")]) (define_mode_attr vunpckfixt_model @@ -6243,14 +6271,18 @@ "TARGET_AVX512DQ" { rtx tem = operands[1]; + rtx (*gen) (rtx, rtx); + if (mode != V4SFmode) { tem = gen_reg_rtx (mode); emit_insn (gen_vec_extract_lo_ (tem, operands[1])); + gen = gen_fix_trunc2; } - rtx (*gen) (rtx, rtx) - = gen_fix_trunc2; + else + gen = gen_avx512dq_fix_truncv2sfv2di2; + emit_insn (gen (operands[0], tem)); DONE; }) @@ -6262,19 +6294,22 @@ "TARGET_AVX512DQ" { rtx tem; + rtx (*gen) (rtx, rtx); + if (mode != V4SFmode) { tem = gen_reg_rtx (mode); emit_insn (gen_vec_extract_hi_ (tem, operands[1])); + gen = gen_fix_trunc2; } else { tem = gen_reg_rtx (V4SFmode); emit_insn (gen_avx_vpermilv4sf (tem, operands[1], GEN_INT (0x4e))); + gen = gen_avx512dq_fix_truncv2sfv2di2; } - rtx (*gen) (rtx, rtx) - = gen_fix_trunc2; + emit_insn (gen (operands[0], tem)); DONE; }) diff --git a/gcc/emit-rtl.c b/gcc/emit-rtl.c index c49628040f8..2b790636366 100644 --- a/gcc/emit-rtl.c +++ b/gcc/emit-rtl.c @@ -939,11 +939,13 @@ validate_subreg (machine_mode omode, machine_mode imode, && GET_MODE_INNER (imode) == omode) ; /* ??? x86 sse code makes heavy use of *paradoxical* vector subregs, - i.e. (subreg:V4SF (reg:SF) 0). This surely isn't the cleanest way to - represent this. It's questionable if this ought to be represented at - all -- why can't this all be hidden in post-reload splitters that make - arbitrarily mode changes to the registers themselves. */ - else if (VECTOR_MODE_P (omode) && GET_MODE_INNER (omode) == imode) + i.e. (subreg:V4SF (reg:SF) 0) or (subreg:V4SF (reg:V2SF) 0). This + surely isn't the cleanest way to represent this. It's questionable + if this ought to be represented at all -- why can't this all be hidden + in post-reload splitters that make arbitrarily mode changes to the + registers themselves. */ + else if (VECTOR_MODE_P (omode) + && GET_MODE_INNER (omode) == GET_MODE_INNER (imode)) ; /* Subregs involving floating point modes are not allowed to change size. Therefore (subreg:DI (reg:DF) 0) is fine, but diff --git a/gcc/testsuite/gcc.target/i386/pr95211.c b/gcc/testsuite/gcc.target/i386/pr95211.c new file mode 100644 index 00000000000..dc10f8f4659 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr95211.c @@ -0,0 +1,38 @@ +/* PR target/95211 target/95256 */ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -ftree-slp-vectorize -march=skylake-avx512" } */ + +extern float f[4]; +extern long long l[2]; +extern long long ul[2]; + +void +fix_128 (void) +{ + l[0] = f[0]; + l[1] = f[1]; +} + +void +fixuns_128 (void) +{ + ul[0] = f[0]; + ul[1] = f[1]; +} + +void +float_128 (void) +{ + f[0] = l[0]; + f[1] = l[1]; +} + +void +floatuns_128 (void) +{ + f[0] = ul[0]; + f[1] = ul[1]; +} + +/* { dg-final { scan-assembler-times "vcvttps2qq" 2 } } */ +/* { dg-final { scan-assembler-times "vcvtqq2ps" 2 } } */ -- 2.30.2