From d31fd1e8ab24f5264894d4f887bcaacc5bc6ed40 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Sun, 3 Feb 2019 17:48:41 +0100 Subject: [PATCH] re PR target/89071 (AVX vcvtsd2ss lets us avoid PXOR dependency breaking for scalar float<->double and other scalar xmm,xmm instructions) PR target/89071 * config/i386/i386.md (*sqrt2_sse): Add (v,0) alternative. Do not prefer (v,v) alternative for non-AVX targets and (m,v) alternative for speed when TARGET_SSE_PARTIAL_REG_DEPENDENCY is set. (*rcpsf2_sse): Ditto. (*rsqrtsf2_sse): Ditto. (sse4_1_round + + PR target/89071 + * config/i386/i386.md (*sqrt2_sse): Add (v,0) alternative. + Do not prefer (v,v) alternative for non-AVX targets and (m,v) + alternative for speed when TARGET_SSE_PARTIAL_REG_DEPENDENCY is set. + (*rcpsf2_sse): Ditto. + (*rsqrtsf2_sse): Ditto. + (sse4_1_round PR debug/87295 diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 744f155fca6..9948f77fca5 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -4472,9 +4472,9 @@ (set (match_dup 0) (float_extend:DF (match_dup 2)))] "operands[2] = lowpart_subreg (SFmode, operands[0], DFmode);") -;; Break partial reg stall for cvtss2sd. This splitter should split -;; late in the pass sequence (after register rename pass), -;; so allocated registers won't change anymore. +;; Break partial SSE register dependency stall. This splitter should split +;; late in the pass sequence (after register rename pass), so allocated +;; registers won't change anymore (define_split [(set (match_operand:DF 0 "sse_reg_operand") @@ -4632,9 +4632,9 @@ (set (match_dup 0) (float_truncate:SF (match_dup 2)))] "operands[2] = lowpart_subreg (DFmode, operands[0], SFmode);") -;; Break partial reg stall for cvtsd2ss. This splitter should split -;; late in the pass sequence (after register rename pass), -;; so allocated registers won't change anymore. +;; Break partial SSE register dependency stall. This splitter should split +;; late in the pass sequence (after register rename pass), so allocated +;; registers won't change anymore (define_split [(set (match_operand:SF 0 "sse_reg_operand") @@ -5137,7 +5137,7 @@ (set_attr "unit" "i387") (set_attr "fp_int_src" "true")]) -;; Avoid partial SSE register dependency stalls. This splitter should split +;; Break partial SSE register dependency stall. This splitter should split ;; late in the pass sequence (after register rename pass), so allocated ;; registers won't change anymore @@ -14765,18 +14765,26 @@ (symbol_ref "false"))))]) (define_insn "*rcpsf2_sse" - [(set (match_operand:SF 0 "register_operand" "=x,x") - (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "x,m")] + [(set (match_operand:SF 0 "register_operand" "=x,x,x") + (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "0,x,m")] UNSPEC_RCP))] "TARGET_SSE && TARGET_SSE_MATH" "@ + %vrcpss\t{%d1, %0|%0, %d1} %vrcpss\t{%d1, %0|%0, %d1} %vrcpss\t{%1, %d0|%d0, %1}" [(set_attr "type" "sse") (set_attr "atom_sse_attr" "rcp") (set_attr "btver2_sse_attr" "rcp") (set_attr "prefix" "maybe_vex") - (set_attr "mode" "SF")]) + (set_attr "mode" "SF") + (set (attr "preferred_for_speed") + (cond [(eq_attr "alternative" "1") + (symbol_ref "TARGET_AVX || !TARGET_SSE_PARTIAL_REG_DEPENDENCY") + (eq_attr "alternative" "2") + (symbol_ref "!TARGET_SSE_PARTIAL_REG_DEPENDENCY") + ] + (symbol_ref "true")))]) (define_insn "*fop_xf_1_i387" [(set (match_operand:XF 0 "register_operand" "=f,f") @@ -15003,18 +15011,26 @@ (set_attr "bdver1_decode" "direct")]) (define_insn "*rsqrtsf2_sse" - [(set (match_operand:SF 0 "register_operand" "=x,x") - (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "x,m")] + [(set (match_operand:SF 0 "register_operand" "=x,x,x") + (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "0,x,m")] UNSPEC_RSQRT))] "TARGET_SSE && TARGET_SSE_MATH" "@ + %vrsqrtss\t{%d1, %0|%0, %d1} %vrsqrtss\t{%d1, %0|%0, %d1} %vrsqrtss\t{%1, %d0|%d0, %1}" [(set_attr "type" "sse") (set_attr "atom_sse_attr" "rcp") (set_attr "btver2_sse_attr" "rcp") (set_attr "prefix" "maybe_vex") - (set_attr "mode" "SF")]) + (set_attr "mode" "SF") + (set (attr "preferred_for_speed") + (cond [(eq_attr "alternative" "1") + (symbol_ref "TARGET_AVX || !TARGET_SSE_PARTIAL_REG_DEPENDENCY") + (eq_attr "alternative" "2") + (symbol_ref "!TARGET_SSE_PARTIAL_REG_DEPENDENCY") + ] + (symbol_ref "true")))]) (define_expand "rsqrtsf2" [(set (match_operand:SF 0 "register_operand") @@ -15027,11 +15043,12 @@ }) (define_insn "*sqrt2_sse" - [(set (match_operand:MODEF 0 "register_operand" "=v,v") + [(set (match_operand:MODEF 0 "register_operand" "=v,v,v") (sqrt:MODEF - (match_operand:MODEF 1 "nonimmediate_operand" "v,m")))] + (match_operand:MODEF 1 "nonimmediate_operand" "0,v,m")))] "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH" "@ + %vsqrt\t{%d1, %0|%0, %d1} %vsqrt\t{%d1, %0|%0, %d1} %vsqrt\t{%1, %d0|%d0, %1}" [(set_attr "type" "sse") @@ -15039,9 +15056,13 @@ (set_attr "btver2_sse_attr" "sqrt") (set_attr "prefix" "maybe_vex") (set_attr "mode" "") - (set_attr "athlon_decode" "*") - (set_attr "amdfam10_decode" "*") - (set_attr "bdver1_decode" "*")]) + (set (attr "preferred_for_speed") + (cond [(eq_attr "alternative" "1") + (symbol_ref "TARGET_AVX || !TARGET_SSE_PARTIAL_REG_DEPENDENCY") + (eq_attr "alternative" "2") + (symbol_ref "!TARGET_SSE_PARTIAL_REG_DEPENDENCY") + ] + (symbol_ref "true")))]) (define_expand "sqrt2" [(set (match_operand:MODEF 0 "register_operand") @@ -16175,21 +16196,30 @@ (define_insn "sse4_1_round2" - [(set (match_operand:MODEF 0 "register_operand" "=x,x,v") - (unspec:MODEF [(match_operand:MODEF 1 "nonimmediate_operand" "x,m,vm") - (match_operand:SI 2 "const_0_to_15_operand" "n,n,n")] - UNSPEC_ROUND))] + [(set (match_operand:MODEF 0 "register_operand" "=x,x,x,v") + (unspec:MODEF + [(match_operand:MODEF 1 "nonimmediate_operand" "0,x,m,vm") + (match_operand:SI 2 "const_0_to_15_operand" "n,n,n,n")] + UNSPEC_ROUND))] "TARGET_SSE4_1" "@ + %vround\t{%2, %d1, %0|%0, %d1, %2} %vround\t{%2, %d1, %0|%0, %d1, %2} %vround\t{%2, %1, %d0|%d0, %1, %2} vrndscale\t{%2, %1, %d0|%d0, %1, %2}" [(set_attr "type" "ssecvt") - (set_attr "prefix_extra" "1,1,*") - (set_attr "length_immediate" "*,*,1") - (set_attr "prefix" "maybe_vex,maybe_vex,evex") - (set_attr "isa" "noavx512f,noavx512f,avx512f") - (set_attr "mode" "")]) + (set_attr "prefix_extra" "1,1,1,*") + (set_attr "length_immediate" "*,*,*,1") + (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,evex") + (set_attr "isa" "noavx512f,noavx512f,noavx512f,avx512f") + (set_attr "mode" "") + (set (attr "preferred_for_speed") + (cond [(eq_attr "alternative" "1") + (symbol_ref "TARGET_AVX || !TARGET_SSE_PARTIAL_REG_DEPENDENCY") + (eq_attr "alternative" "2") + (symbol_ref "!TARGET_SSE_PARTIAL_REG_DEPENDENCY") + ] + (symbol_ref "true")))]) (define_insn "rintxf2" [(set (match_operand:XF 0 "register_operand" "=f") -- 2.30.2