Improve Cortex-A53 FP scheduler
authorWilco Dijkstra <wdijkstr@arm.com>
Wed, 14 Jun 2017 14:51:46 +0000 (14:51 +0000)
committerWilco Dijkstra <wilco@gcc.gnu.org>
Wed, 14 Jun 2017 14:51:46 +0000 (14:51 +0000)
The Cortex-A53 scheduler model of FMAC bypass is not quite right
for FMAC to FMAC forwarding.  Experiments also show the latencies of
FP operations are too high as well.  Rather than adding more bypasses,
adjust the latencies of FP instructions to get a better schedule on
average.  As a result SPECFP2006 is 1.1% faster.

    gcc/
* config/arm/cortex-a53.md (cortex_a53_fpalu) Adjust latency.
(cortex_a53_fconst): Likewise.
(cortex_a53_fpmul): Likewise.
(cortex_a53_f_load_64): Likewise.
(cortex_a53_f_load_many): Likewise.
(cortex_a53_advsimd_alu): Likewise.
(cortex_a53_advsimd_alu_q): Likewise.
(cortex_a53_advsimd_mul): Likewise.
(cortex_a53_advsimd_mul_q): Likewise.
(fpmac bypass): Add new bypass for fpmac-fpmac case.
Add missing fmul, r2f_cvt and fconst cases.

From-SVN: r249200

gcc/ChangeLog
gcc/config/arm/cortex-a53.md

index 09a1b983584b18c6aa9606648d36ba5ea673dc8e..59ca50606dedee3b72b42bb044637375219607ca 100644 (file)
@@ -1,3 +1,17 @@
+2017-06-14  Wilco Dijkstra  <wdijkstr@arm.com>
+
+       * config/arm/cortex-a53.md (cortex_a53_fpalu) Adjust latency.
+       (cortex_a53_fconst): Likewise.
+       (cortex_a53_fpmul): Likewise.
+       (cortex_a53_f_load_64): Likewise.
+       (cortex_a53_f_load_many): Likewise.
+       (cortex_a53_advsimd_alu): Likewise.
+       (cortex_a53_advsimd_alu_q): Likewise.
+       (cortex_a53_advsimd_mul): Likewise.
+       (cortex_a53_advsimd_mul_q): Likewise.
+       (fpmac bypass): Add new bypass for fpmac-fpmac case.
+       Add missing fmul, r2f_cvt and fconst cases.
+
 2017-06-14  Richard Biener  <rguenther@suse.de>
 
        PR middle-end/81088
index b7e0c9257f76b915ddccbc3eff28780fef7fc784..ff16e360a66a0cc4393cd74c6c69a37ee2af9654 100644 (file)
 ;; Floating-point arithmetic.
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-(define_insn_reservation "cortex_a53_fpalu" 5
+(define_insn_reservation "cortex_a53_fpalu" 4
   (and (eq_attr "tune" "cortexa53")
        (eq_attr "type" "ffariths, fadds, ffarithd, faddd, fmov,
                        f_cvt, fcmps, fcmpd, fccmps, fccmpd, fcsel,
                        f_rints, f_rintd, f_minmaxs, f_minmaxd"))
   "cortex_a53_slot_any,cortex_a53_fp_alu")
 
-(define_insn_reservation "cortex_a53_fconst" 3
+(define_insn_reservation "cortex_a53_fconst" 2
   (and (eq_attr "tune" "cortexa53")
        (eq_attr "type" "fconsts,fconstd"))
   "cortex_a53_slot_any,cortex_a53_fp_alu")
 
-(define_insn_reservation "cortex_a53_fpmul" 5
+(define_insn_reservation "cortex_a53_fpmul" 4
   (and (eq_attr "tune" "cortexa53")
        (eq_attr "type" "fmuls,fmuld"))
   "cortex_a53_slot_any,cortex_a53_fp_mul")
 ;; Floating-point load/store.
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-(define_insn_reservation "cortex_a53_f_load_64" 4
+(define_insn_reservation "cortex_a53_f_load_64" 3
   (and (eq_attr "tune" "cortexa53")
        (ior (eq_attr "type" "f_loads,f_loadd")
            (eq_attr "cortex_a53_advsimd_type"
   "cortex_a53_slot_any+cortex_a53_ls_agen,
    cortex_a53_load")
 
-(define_insn_reservation "cortex_a53_f_load_many" 5
+(define_insn_reservation "cortex_a53_f_load_many" 4
   (and (eq_attr "tune" "cortexa53")
        (eq_attr "cortex_a53_advsimd_type"
                "advsimd_load_128,advsimd_load_lots"))
 ;; or a 128-bit operation in which case we require in our model that we
 ;; issue from slot 0.
 
-(define_insn_reservation "cortex_a53_advsimd_alu" 5
+(define_insn_reservation "cortex_a53_advsimd_alu" 4
   (and (eq_attr "tune" "cortexa53")
        (eq_attr "cortex_a53_advsimd_type" "advsimd_alu"))
   "cortex_a53_slot_any,cortex_a53_fp_alu")
 
-(define_insn_reservation "cortex_a53_advsimd_alu_q" 5
+(define_insn_reservation "cortex_a53_advsimd_alu_q" 4
   (and (eq_attr "tune" "cortexa53")
        (eq_attr "cortex_a53_advsimd_type" "advsimd_alu_q"))
   "cortex_a53_slot0,cortex_a53_fp_alu_q")
 
-(define_insn_reservation "cortex_a53_advsimd_mul" 5
+(define_insn_reservation "cortex_a53_advsimd_mul" 4
   (and (eq_attr "tune" "cortexa53")
        (eq_attr "cortex_a53_advsimd_type" "advsimd_mul"))
   "cortex_a53_slot_any,cortex_a53_fp_mul")
 
-(define_insn_reservation "cortex_a53_advsimd_mul_q" 5
+(define_insn_reservation "cortex_a53_advsimd_mul_q" 4
   (and (eq_attr "tune" "cortexa53")
        (eq_attr "cortex_a53_advsimd_type" "advsimd_mul_q"))
   "cortex_a53_slot0,cortex_a53_fp_mul_q")
 ;; multiply-accumulate operations as a bypass reducing the latency
 ;; of producing instructions to near zero.
 
-(define_bypass 1 "cortex_a53_fp*,
+(define_bypass 1 "cortex_a53_fpalu,
+                 cortex_a53_fpmul,
                  cortex_a53_r2f,
+                 cortex_a53_r2f_cvt,
+                 cortex_a53_fconst,
                  cortex_a53_f_load*"
                 "cortex_a53_fpmac"
                 "aarch_accumulator_forwarding")
 
-;; Model a bypass from the result of an FP operation to a use.
-
-(define_bypass 4 "cortex_a53_fpalu,
-                 cortex_a53_fpmul"
-                "cortex_a53_fpalu,
-                 cortex_a53_fpmul,
-                 cortex_a53_fpmac,
-                 cortex_a53_advsimd_div*")
+(define_bypass 4 "cortex_a53_fpmac"
+                "cortex_a53_fpmac"
+                "aarch_accumulator_forwarding")
 
 ;; We want AESE and AESMC to end up consecutive to one another.