From 4801cc61b13b55b988c1bd808444cccd3663f033 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Sun, 9 Sep 2018 23:32:08 +0200 Subject: [PATCH] i386.md (float partial SSE register stall splitter): Move splitter near its instruction pattern. * config/i386/i386.md (float partial SSE register stall splitter): Move splitter near its instruction pattern. (float_extend partial SSE register stall splitter): Ditto. (float_truncate partial SSE register stall splitter): Ditto. From-SVN: r264185 --- gcc/ChangeLog | 7 ++ gcc/config/i386/i386.md | 216 ++++++++++++++++++++-------------------- 2 files changed, 115 insertions(+), 108 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index a29b69ccfcf..6dbe8147b3e 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,10 @@ +2018-09-09 Uros Bizjak + + * config/i386/i386.md (float partial SSE register stall splitter): Move + splitter near its instruction pattern. + (float_extend partial SSE register stall splitter): Ditto. + (float_truncate partial SSE register stall splitter): Ditto. + 2018-09-09 Hans-Peter Nilsson PR target/86794 diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 0ee2d91414a..059ddbd6c97 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -4477,6 +4477,40 @@ } }) +(define_insn "*extendsfdf2" + [(set (match_operand:DF 0 "nonimm_ssenomem_operand" "=f,m,v") + (float_extend:DF + (match_operand:SF 1 "nonimmediate_operand" "fm,f,vm")))] + "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)" +{ + switch (which_alternative) + { + case 0: + case 1: + return output_387_reg_move (insn, operands); + + case 2: + return "%vcvtss2sd\t{%1, %d0|%d0, %1}"; + + default: + gcc_unreachable (); + } +} + [(set_attr "type" "fmov,fmov,ssecvt") + (set_attr "prefix" "orig,orig,maybe_vex") + (set_attr "mode" "SF,XF,DF") + (set (attr "enabled") + (if_then_else + (match_test ("TARGET_SSE2 && TARGET_SSE_MATH")) + (if_then_else + (eq_attr "alternative" "0,1") + (symbol_ref "TARGET_MIX_SSE_I387") + (symbol_ref "true")) + (if_then_else + (eq_attr "alternative" "0,1") + (symbol_ref "true") + (symbol_ref "false"))))]) + /* For converting SF(xmm2) to DF(xmm1), use the following code instead of cvtss2sd: unpcklps xmm2,xmm2 ; packed conversion might crash on signaling NaNs @@ -4544,39 +4578,31 @@ (set (match_dup 0) (float_extend:DF (match_dup 2)))] "operands[2] = lowpart_subreg (SFmode, operands[0], DFmode);") -(define_insn "*extendsfdf2" - [(set (match_operand:DF 0 "nonimm_ssenomem_operand" "=f,m,v") +;; Break partial reg stall for cvtss2sd. This splitter should split +;; late in the pass sequence (after register rename pass), +;; so allocated registers won't change anymore. + +(define_split + [(set (match_operand:DF 0 "sse_reg_operand") (float_extend:DF - (match_operand:SF 1 "nonimmediate_operand" "fm,f,vm")))] - "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)" + (match_operand:SF 1 "nonimmediate_operand")))] + "TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed + && optimize_function_for_speed_p (cfun) + && (!REG_P (operands[1]) + || REGNO (operands[0]) != REGNO (operands[1])) + && (!EXT_REX_SSE_REG_P (operands[0]) + || TARGET_AVX512VL)" + [(set (match_dup 0) + (vec_merge:V2DF + (vec_duplicate:V2DF + (float_extend:DF + (match_dup 1))) + (match_dup 0) + (const_int 1)))] { - switch (which_alternative) - { - case 0: - case 1: - return output_387_reg_move (insn, operands); - - case 2: - return "%vcvtss2sd\t{%1, %d0|%d0, %1}"; - - default: - gcc_unreachable (); - } -} - [(set_attr "type" "fmov,fmov,ssecvt") - (set_attr "prefix" "orig,orig,maybe_vex") - (set_attr "mode" "SF,XF,DF") - (set (attr "enabled") - (if_then_else - (match_test ("TARGET_SSE2 && TARGET_SSE_MATH")) - (if_then_else - (eq_attr "alternative" "0,1") - (symbol_ref "TARGET_MIX_SSE_I387") - (symbol_ref "true")) - (if_then_else - (eq_attr "alternative" "0,1") - (symbol_ref "true") - (symbol_ref "false"))))]) + operands[0] = lowpart_subreg (V2DFmode, operands[0], DFmode); + emit_move_insn (operands[0], CONST0_RTX (V2DFmode)); +}) (define_expand "extendxf2" [(set (match_operand:XF 0 "nonimmediate_operand") @@ -4710,6 +4736,32 @@ (set (match_dup 0) (float_truncate:SF (match_dup 2)))] "operands[2] = lowpart_subreg (DFmode, operands[0], SFmode);") +;; Break partial reg stall for cvtsd2ss. This splitter should split +;; late in the pass sequence (after register rename pass), +;; so allocated registers won't change anymore. + +(define_split + [(set (match_operand:SF 0 "sse_reg_operand") + (float_truncate:SF + (match_operand:DF 1 "nonimmediate_operand")))] + "TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed + && optimize_function_for_speed_p (cfun) + && (!REG_P (operands[1]) + || REGNO (operands[0]) != REGNO (operands[1])) + && (!EXT_REX_SSE_REG_P (operands[0]) + || TARGET_AVX512VL)" + [(set (match_dup 0) + (vec_merge:V4SF + (vec_duplicate:V4SF + (float_truncate:SF + (match_dup 1))) + (match_dup 0) + (const_int 1)))] +{ + operands[0] = lowpart_subreg (V4SFmode, operands[0], SFmode); + emit_move_insn (operands[0], CONST0_RTX (V4SFmode)); +}) + ;; Conversion from XFmode to {SF,DF}mode (define_insn "truncxf2" @@ -5152,83 +5204,6 @@ DONE; }) -;; Avoid partial SSE register dependency stalls. This splitter should split -;; late in the pass sequence (after register rename pass), so allocated -;; registers won't change anymore - -(define_split - [(set (match_operand:MODEF 0 "sse_reg_operand") - (float:MODEF (match_operand:SWI48 1 "nonimmediate_operand")))] - "TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed - && optimize_function_for_speed_p (cfun) - && (!EXT_REX_SSE_REG_P (operands[0]) - || TARGET_AVX512VL)" - [(set (match_dup 0) - (vec_merge: - (vec_duplicate: - (float:MODEF - (match_dup 1))) - (match_dup 0) - (const_int 1)))] -{ - const machine_mode vmode = mode; - - operands[0] = lowpart_subreg (vmode, operands[0], mode); - emit_move_insn (operands[0], CONST0_RTX (vmode)); -}) - -;; Break partial reg stall for cvtsd2ss. This splitter should split -;; late in the pass sequence (after register rename pass), -;; so allocated registers won't change anymore. - -(define_split - [(set (match_operand:SF 0 "sse_reg_operand") - (float_truncate:SF - (match_operand:DF 1 "nonimmediate_operand")))] - "TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed - && optimize_function_for_speed_p (cfun) - && (!REG_P (operands[1]) - || REGNO (operands[0]) != REGNO (operands[1])) - && (!EXT_REX_SSE_REG_P (operands[0]) - || TARGET_AVX512VL)" - [(set (match_dup 0) - (vec_merge:V4SF - (vec_duplicate:V4SF - (float_truncate:SF - (match_dup 1))) - (match_dup 0) - (const_int 1)))] -{ - operands[0] = lowpart_subreg (V4SFmode, operands[0], SFmode); - emit_move_insn (operands[0], CONST0_RTX (V4SFmode)); -}) - -;; Break partial reg stall for cvtss2sd. This splitter should split -;; late in the pass sequence (after register rename pass), -;; so allocated registers won't change anymore. - -(define_split - [(set (match_operand:DF 0 "sse_reg_operand") - (float_extend:DF - (match_operand:SF 1 "nonimmediate_operand")))] - "TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed - && optimize_function_for_speed_p (cfun) - && (!REG_P (operands[1]) - || REGNO (operands[0]) != REGNO (operands[1])) - && (!EXT_REX_SSE_REG_P (operands[0]) - || TARGET_AVX512VL)" - [(set (match_dup 0) - (vec_merge:V2DF - (vec_duplicate:V2DF - (float_extend:DF - (match_dup 1))) - (match_dup 0) - (const_int 1)))] -{ - operands[0] = lowpart_subreg (V2DFmode, operands[0], DFmode); - emit_move_insn (operands[0], CONST0_RTX (V2DFmode)); -}) - ;; Avoid store forwarding (partial memory) stall penalty ;; by passing DImode value through XMM registers. */ @@ -5279,6 +5254,31 @@ (set_attr "unit" "i387") (set_attr "fp_int_src" "true")]) +;; Avoid partial SSE register dependency stalls. This splitter should split +;; late in the pass sequence (after register rename pass), so allocated +;; registers won't change anymore + +(define_split + [(set (match_operand:MODEF 0 "sse_reg_operand") + (float:MODEF (match_operand:SWI48 1 "nonimmediate_operand")))] + "TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed + && optimize_function_for_speed_p (cfun) + && (!EXT_REX_SSE_REG_P (operands[0]) + || TARGET_AVX512VL)" + [(set (match_dup 0) + (vec_merge: + (vec_duplicate: + (float:MODEF + (match_dup 1))) + (match_dup 0) + (const_int 1)))] +{ + const machine_mode vmode = mode; + + operands[0] = lowpart_subreg (vmode, operands[0], mode); + emit_move_insn (operands[0], CONST0_RTX (vmode)); +}) + (define_expand "floatuns2" [(set (match_operand:MODEF 0 "register_operand") (unsigned_float:MODEF -- 2.30.2