From 1d4b4f4979171ef0dacc452439e3a317795441db Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Thu, 31 Jan 2019 21:06:42 +0100 Subject: [PATCH] re PR target/89071 (AVX vcvtsd2ss lets us avoid PXOR dependency breaking for scalar float<->double and other scalar xmm,xmm instructions) PR target/89071 * config/i386/i386.md (*extendsfdf2): Split out reg->reg alternative to avoid partial SSE register stall for TARGET_AVX. (truncdfsf2): Ditto. (sse4_1_round2): Ditto. From-SVN: r268427 --- gcc/ChangeLog | 8 ++++++++ gcc/config/i386/i386.md | 39 ++++++++++++++++++++++----------------- 2 files changed, 30 insertions(+), 17 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 56e13e882c7..bf8892946f6 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,11 @@ +2019-01-31 Uroš Bizjak + + PR target/89071 + * config/i386/i386.md (*extendsfdf2): Split out reg->reg + alternative to avoid partial SSE register stall for TARGET_AVX. + (truncdfsf2): Ditto. + (sse4_1_round2): Ditto. + 2018-01-31 Bill Schmidt PR tree-optimization/89008 diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index d085e88bc61..744f155fca6 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -4370,9 +4370,9 @@ }) (define_insn "*extendsfdf2" - [(set (match_operand:DF 0 "nonimm_ssenomem_operand" "=f,m,v") + [(set (match_operand:DF 0 "nonimm_ssenomem_operand" "=f,m,v,v") (float_extend:DF - (match_operand:SF 1 "nonimmediate_operand" "fm,f,vm")))] + (match_operand:SF 1 "nonimmediate_operand" "fm,f,v,m")))] "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)" { switch (which_alternative) @@ -4382,15 +4382,17 @@ return output_387_reg_move (insn, operands); case 2: + return "%vcvtss2sd\t{%d1, %0|%0, %d1}"; + case 3: return "%vcvtss2sd\t{%1, %d0|%d0, %1}"; default: gcc_unreachable (); } } - [(set_attr "type" "fmov,fmov,ssecvt") - (set_attr "prefix" "orig,orig,maybe_vex") - (set_attr "mode" "SF,XF,DF") + [(set_attr "type" "fmov,fmov,ssecvt,ssecvt") + (set_attr "prefix" "orig,orig,maybe_vex,maybe_vex") + (set_attr "mode" "SF,XF,DF,DF") (set (attr "enabled") (if_then_else (match_test ("TARGET_SSE2 && TARGET_SSE_MATH")) @@ -4481,7 +4483,7 @@ "TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed && optimize_function_for_speed_p (cfun) && (!REG_P (operands[1]) - || REGNO (operands[0]) != REGNO (operands[1])) + || (!TARGET_AVX && REGNO (operands[0]) != REGNO (operands[1]))) && (!EXT_REX_SSE_REG_P (operands[0]) || TARGET_AVX512VL)" [(set (match_dup 0) @@ -4534,9 +4536,9 @@ ;; Conversion from DFmode to SFmode. (define_insn "truncdfsf2" - [(set (match_operand:SF 0 "nonimm_ssenomem_operand" "=m,f,v") + [(set (match_operand:SF 0 "nonimm_ssenomem_operand" "=m,f,v,v") (float_truncate:SF - (match_operand:DF 1 "register_ssemem_operand" "f,f,vm")))] + (match_operand:DF 1 "register_ssemem_operand" "f,f,v,m")))] "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)" { switch (which_alternative) @@ -4546,13 +4548,15 @@ return output_387_reg_move (insn, operands); case 2: + return "%vcvtsd2ss\t{%d1, %0|%0, %d1}"; + case 3: return "%vcvtsd2ss\t{%1, %d0|%d0, %1}"; default: gcc_unreachable (); } } - [(set_attr "type" "fmov,fmov,ssecvt") + [(set_attr "type" "fmov,fmov,ssecvt,ssecvt") (set_attr "mode" "SF") (set (attr "enabled") (if_then_else @@ -4639,7 +4643,7 @@ "TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed && optimize_function_for_speed_p (cfun) && (!REG_P (operands[1]) - || REGNO (operands[0]) != REGNO (operands[1])) + || (!TARGET_AVX && REGNO (operands[0]) != REGNO (operands[1]))) && (!EXT_REX_SSE_REG_P (operands[0]) || TARGET_AVX512VL)" [(set (match_dup 0) @@ -16171,19 +16175,20 @@ (define_insn "sse4_1_round2" - [(set (match_operand:MODEF 0 "register_operand" "=x,v") - (unspec:MODEF [(match_operand:MODEF 1 "nonimmediate_operand" "xm,vm") - (match_operand:SI 2 "const_0_to_15_operand" "n,n")] + [(set (match_operand:MODEF 0 "register_operand" "=x,x,v") + (unspec:MODEF [(match_operand:MODEF 1 "nonimmediate_operand" "x,m,vm") + (match_operand:SI 2 "const_0_to_15_operand" "n,n,n")] UNSPEC_ROUND))] "TARGET_SSE4_1" "@ + %vround\t{%2, %d1, %0|%0, %d1, %2} %vround\t{%2, %1, %d0|%d0, %1, %2} vrndscale\t{%2, %1, %d0|%d0, %1, %2}" [(set_attr "type" "ssecvt") - (set_attr "prefix_extra" "1,*") - (set_attr "length_immediate" "*,1") - (set_attr "prefix" "maybe_vex,evex") - (set_attr "isa" "noavx512f,avx512f") + (set_attr "prefix_extra" "1,1,*") + (set_attr "length_immediate" "*,*,1") + (set_attr "prefix" "maybe_vex,maybe_vex,evex") + (set_attr "isa" "noavx512f,noavx512f,avx512f") (set_attr "mode" "")]) (define_insn "rintxf2" -- 2.30.2