From 375aa4bcc7c47df4c7892a19cdb4848d825503a6 Mon Sep 17 00:00:00 2001 From: Pat Haugen Date: Thu, 16 Nov 2017 20:14:22 +0000 Subject: [PATCH] power9.md (power9fpdiv): New automaton and cpu_unit defined for it. * rs6000/power9.md (power9fpdiv): New automaton and cpu_unit defined for it. (DU_C2_3_power9): Correct reservation combinations. (FP_DIV_power9, VEC_DIV_power9): New. (power9-alu): Split out rotate/shift... (power9-rot): ...to here, correct dispatch resource. (power9-cracked-alu, power9-mul, power9-mul-compare): Correct dispatch resource. (power9-fp): Correct latency. (power9-sdiv): Add div/sqrt resource. (power9-ddiv): Correct latency, add div/sqrt resource. (power9-sqrt, power9-dsqrt): Add div/sqrt resource. (power9-vecfdiv, power9-vecdiv): Correct latency, add div/sqrt resource. (power9-qpdiv, power9-qpmul): Adjust resource usage. From-SVN: r254844 --- gcc/ChangeLog | 18 ++++++++++ gcc/config/rs6000/power9.md | 65 ++++++++++++++++++++++++------------- 2 files changed, 61 insertions(+), 22 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index b5ef10ef28f..541ec6e73e0 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,21 @@ +2017-11-16 Pat Haugen + + * rs6000/power9.md (power9fpdiv): New automaton and cpu_unit defined + for it. + (DU_C2_3_power9): Correct reservation combinations. + (FP_DIV_power9, VEC_DIV_power9): New. + (power9-alu): Split out rotate/shift... + (power9-rot): ...to here, correct dispatch resource. + (power9-cracked-alu, power9-mul, power9-mul-compare): Correct dispatch + resource. + (power9-fp): Correct latency. + (power9-sdiv): Add div/sqrt resource. + (power9-ddiv): Correct latency, add div/sqrt resource. + (power9-sqrt, power9-dsqrt): Add div/sqrt resource. + (power9-vecfdiv, power9-vecdiv): Correct latency, add div/sqrt + resource. + (power9-qpdiv, power9-qpmul): Adjust resource usage. + 2017-11-15 Michael Meissner * config/rs6000/rs6000.c (rs6000_expand_builtin): Do not do the diff --git a/gcc/config/rs6000/power9.md b/gcc/config/rs6000/power9.md index 82e4b1cf65c..e0f71fba153 100644 --- a/gcc/config/rs6000/power9.md +++ b/gcc/config/rs6000/power9.md @@ -19,7 +19,7 @@ ;; along with GCC; see the file COPYING3. If not see ;; . -(define_automaton "power9dsp,power9lsu,power9vsu,power9misc") +(define_automaton "power9dsp,power9lsu,power9vsu,power9fpdiv,power9misc") (define_cpu_unit "lsu0_power9,lsu1_power9,lsu2_power9,lsu3_power9" "power9lsu") (define_cpu_unit "vsu0_power9,vsu1_power9,vsu2_power9,vsu3_power9" "power9vsu") @@ -28,6 +28,10 @@ ; Two fixed point divide units, not pipelined (define_cpu_unit "fx_div0_power9,fx_div1_power9" "power9misc") (define_cpu_unit "bru_power9,cryptu_power9,dfu_power9" "power9misc") +; Create a false unit for use by non-pipelined FP div/sqrt +(define_cpu_unit "fp_div0_power9,fp_div1_power9,fp_div2_power9,fp_div3_power9" + "power9fpdiv") + (define_cpu_unit "x0_power9,x1_power9,xa0_power9,xa1_power9, x2_power9,x3_power9,xb0_power9,xb1_power9, @@ -79,8 +83,7 @@ ; 2-way cracked plus 3rd slot (define_reservation "DU_C2_3_power9" "x0_power9+x1_power9+xa0_power9| - x1_power9+x2_power9+xa0_power9| - x1_power9+x2_power9+xb0_power9| + x1_power9+x2_power9+xa1_power9| x2_power9+x3_power9+xb0_power9") ; 3-way cracked (consumes whole decode/dispatch cycle) @@ -108,6 +111,18 @@ (define_reservation "VSU_PRM_power9" "prm0_power9|prm1_power9") +; Define the reservation to be used by FP div/sqrt which allows other insns +; to be issued to the VSU, but blocks other div/sqrt for a number of cycles. +; Note that the number of cycles blocked varies depending on insn, but we +; just use the same number for all in order to keep the number of DFA states +; reasonable. +(define_reservation "FP_DIV_power9" + "fp_div0_power9*8|fp_div1_power9*8|fp_div2_power9*8| + fp_div3_power9*8") +(define_reservation "VEC_DIV_power9" + "fp_div0_power9*8+fp_div1_power9*8| + fp_div2_power9*8+fp_div3_power9*8") + ; LS Unit (define_insn_reservation "power9-load" 4 @@ -243,21 +258,26 @@ ; Most ALU insns are simple 2 cycle, including record form (define_insn_reservation "power9-alu" 2 - (and (ior (eq_attr "type" "add,exts,integer,logical,isel") - (and (eq_attr "type" "insert,shift") - (eq_attr "dot" "no"))) + (and (eq_attr "type" "add,exts,integer,logical,isel") (eq_attr "cpu" "power9")) "DU_any_power9,VSU_power9") ; 5 cycle CR latency (define_bypass 5 "power9-alu" "power9-crlogical,power9-mfcr,power9-mfcrf") +; Rotate/shift prevent use of third slot +(define_insn_reservation "power9-rot" 2 + (and (eq_attr "type" "insert,shift") + (eq_attr "dot" "no") + (eq_attr "cpu" "power9")) + "DU_slice_3_power9,VSU_power9") + ; Record form rotate/shift are cracked (define_insn_reservation "power9-cracked-alu" 2 (and (eq_attr "type" "insert,shift") (eq_attr "dot" "yes") (eq_attr "cpu" "power9")) - "DU_C2_power9,VSU_power9") + "DU_C2_3_power9,VSU_power9") ; 7 cycle CR latency (define_bypass 7 "power9-cracked-alu" "power9-crlogical,power9-mfcr,power9-mfcrf") @@ -291,13 +311,13 @@ (and (eq_attr "type" "mul") (eq_attr "dot" "no") (eq_attr "cpu" "power9")) - "DU_any_power9,VSU_power9") + "DU_slice_3_power9,VSU_power9") (define_insn_reservation "power9-mul-compare" 5 (and (eq_attr "type" "mul") (eq_attr "dot" "yes") (eq_attr "cpu" "power9")) - "DU_C2_power9,VSU_power9") + "DU_C2_3_power9,VSU_power9") ; 10 cycle CR latency (define_bypass 10 "power9-mul-compare" "power9-crlogical,power9-mfcr,power9-mfcrf") @@ -349,7 +369,7 @@ (eq_attr "cpu" "power9")) "DU_slice_3_power9,VSU_power9") -(define_insn_reservation "power9-fp" 7 +(define_insn_reservation "power9-fp" 5 (and (eq_attr "type" "fp,dmul") (eq_attr "cpu" "power9")) "DU_slice_3_power9,VSU_power9") @@ -360,26 +380,26 @@ "DU_slice_3_power9,VSU_power9") ; FP div/sqrt are executed in the VSU slices. They are not pipelined wrt other -; divide insns, but for the most part do not block pipelined ops. +; div/sqrt insns, but for the most part do not block pipelined ops. (define_insn_reservation "power9-sdiv" 22 (and (eq_attr "type" "sdiv") (eq_attr "cpu" "power9")) - "DU_slice_3_power9,VSU_power9") + "DU_slice_3_power9,VSU_power9,FP_DIV_power9") -(define_insn_reservation "power9-ddiv" 33 +(define_insn_reservation "power9-ddiv" 27 (and (eq_attr "type" "ddiv") (eq_attr "cpu" "power9")) - "DU_slice_3_power9,VSU_power9") + "DU_slice_3_power9,VSU_power9,FP_DIV_power9") (define_insn_reservation "power9-sqrt" 26 (and (eq_attr "type" "ssqrt") (eq_attr "cpu" "power9")) - "DU_slice_3_power9,VSU_power9") + "DU_slice_3_power9,VSU_power9,FP_DIV_power9") (define_insn_reservation "power9-dsqrt" 36 (and (eq_attr "type" "dsqrt") (eq_attr "cpu" "power9")) - "DU_slice_3_power9,VSU_power9") + "DU_slice_3_power9,VSU_power9,FP_DIV_power9") (define_insn_reservation "power9-vec-2cyc" 2 (and (eq_attr "type" "vecmove,veclogical,vecexts,veccmpfx") @@ -419,28 +439,29 @@ (eq_attr "cpu" "power9")) "DU_super_power9,VSU_super_power9") -(define_insn_reservation "power9-vecfdiv" 28 +(define_insn_reservation "power9-vecfdiv" 24 (and (eq_attr "type" "vecfdiv") (eq_attr "cpu" "power9")) - "DU_super_power9,VSU_super_power9") + "DU_super_power9,VSU_super_power9,VEC_DIV_power9") -(define_insn_reservation "power9-vecdiv" 32 +(define_insn_reservation "power9-vecdiv" 27 (and (eq_attr "type" "vecdiv") (eq_attr "size" "!128") (eq_attr "cpu" "power9")) - "DU_super_power9,VSU_super_power9") + "DU_super_power9,VSU_super_power9,VEC_DIV_power9") +; Use 8 for DFU reservation on QP div/mul to limit DFA state size (define_insn_reservation "power9-qpdiv" 56 (and (eq_attr "type" "vecdiv") (eq_attr "size" "128") (eq_attr "cpu" "power9")) - "DU_super_power9,dfu_power9*44") + "DU_super_power9,dfu_power9*8") (define_insn_reservation "power9-qpmul" 24 (and (eq_attr "type" "qmul") (eq_attr "size" "128") (eq_attr "cpu" "power9")) - "DU_super_power9,dfu_power9*12") + "DU_super_power9,dfu_power9*8") (define_insn_reservation "power9-mffgpr" 2 (and (eq_attr "type" "mffgpr") -- 2.30.2