--- /dev/null
+;; Pipeline descriptions of Andes NDS32 cpu for GNU compiler
+;; Copyright (C) 2012-2018 Free Software Foundation, Inc.
+;; Contributed by Andes Technology Corporation.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+;; ------------------------------------------------------------------------
+;; Define N10 pipeline settings.
+;; ------------------------------------------------------------------------
+
+(define_automaton "nds32_n10_machine")
+
+;; ------------------------------------------------------------------------
+;; Pipeline Stages
+;; ------------------------------------------------------------------------
+;; IF - Instruction Fetch
+;; II - Instruction Issue / Instruction Decode
+;; EX - Instruction Execution
+;; MM - Memory Execution
+;; WB - Instruction Retire / Result Write-Back
+
+(define_cpu_unit "n10_ii" "nds32_n10_machine")
+(define_cpu_unit "n10_ex" "nds32_n10_machine")
+(define_cpu_unit "n10_mm" "nds32_n10_machine")
+(define_cpu_unit "n10_wb" "nds32_n10_machine")
+(define_cpu_unit "n10f_iq" "nds32_n10_machine")
+(define_cpu_unit "n10f_rf" "nds32_n10_machine")
+(define_cpu_unit "n10f_e1" "nds32_n10_machine")
+(define_cpu_unit "n10f_e2" "nds32_n10_machine")
+(define_cpu_unit "n10f_e3" "nds32_n10_machine")
+(define_cpu_unit "n10f_e4" "nds32_n10_machine")
+
+(define_insn_reservation "nds_n10_unknown" 1
+ (and (eq_attr "type" "unknown")
+ (eq_attr "pipeline_model" "n10"))
+ "n10_ii, n10_ex, n10_mm, n10_wb")
+
+(define_insn_reservation "nds_n10_misc" 1
+ (and (eq_attr "type" "misc")
+ (eq_attr "pipeline_model" "n10"))
+ "n10_ii, n10_ex, n10_mm, n10_wb")
+
+(define_insn_reservation "nds_n10_mmu" 1
+ (and (eq_attr "type" "mmu")
+ (eq_attr "pipeline_model" "n10"))
+ "n10_ii, n10_ex, n10_mm, n10_wb")
+
+(define_insn_reservation "nds_n10_alu" 1
+ (and (eq_attr "type" "alu")
+ (eq_attr "pipeline_model" "n10"))
+ "n10_ii, n10_ex, n10_mm, n10_wb")
+
+(define_insn_reservation "nds_n10_alu_shift" 1
+ (and (eq_attr "type" "alu_shift")
+ (eq_attr "pipeline_model" "n10"))
+ "n10_ii, n10_ii+n10_ex, n10_ex+n10_mm, n10_mm+n10_wb, n10_wb")
+
+(define_insn_reservation "nds_n10_pbsad" 1
+ (and (eq_attr "type" "pbsad")
+ (eq_attr "pipeline_model" "n10"))
+ "n10_ii, n10_ex*3, n10_mm, n10_wb")
+
+(define_insn_reservation "nds_n10_pbsada" 1
+ (and (eq_attr "type" "pbsada")
+ (eq_attr "pipeline_model" "n10"))
+ "n10_ii, n10_ex*3, n10_mm, n10_wb")
+
+(define_insn_reservation "nds_n10_load" 1
+ (and (match_test "nds32::load_single_p (insn)")
+ (eq_attr "pipeline_model" "n10"))
+ "n10_ii, n10_ex, n10_mm, n10_wb")
+
+(define_insn_reservation "nds_n10_store" 1
+ (and (match_test "nds32::store_single_p (insn)")
+ (eq_attr "pipeline_model" "n10"))
+ "n10_ii, n10_ex, n10_mm, n10_wb")
+
+(define_insn_reservation "nds_n10_load_multiple_1" 1
+ (and (eq_attr "pipeline_model" "n10")
+ (and (eq_attr "type" "load_multiple")
+ (eq_attr "combo" "1")))
+ "n10_ii, n10_ex, n10_mm, n10_wb")
+
+(define_insn_reservation "nds_n10_load_multiple_2" 1
+ (and (eq_attr "pipeline_model" "n10")
+ (ior (and (eq_attr "type" "load_multiple")
+ (eq_attr "combo" "2"))
+ (match_test "nds32::load_double_p (insn)")))
+ "n10_ii, n10_ii+n10_ex, n10_ex+n10_mm, n10_mm+n10_wb, n10_wb")
+
+(define_insn_reservation "nds_n10_load_multiple_3" 1
+ (and (eq_attr "pipeline_model" "n10")
+ (and (eq_attr "type" "load_multiple")
+ (eq_attr "combo" "3")))
+ "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb")
+
+(define_insn_reservation "nds_n10_load_multiple_4" 1
+ (and (eq_attr "pipeline_model" "n10")
+ (and (eq_attr "type" "load_multiple")
+ (eq_attr "combo" "4")))
+ "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, n10_ii+n10_ex+n10_mm+n10_wb, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb")
+
+(define_insn_reservation "nds_n10_load_multiple_5" 1
+ (and (eq_attr "pipeline_model" "n10")
+ (and (eq_attr "type" "load_multiple")
+ (eq_attr "combo" "5")))
+ "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, (n10_ii+n10_ex+n10_mm+n10_wb)*2, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb")
+
+(define_insn_reservation "nds_n10_load_multiple_6" 1
+ (and (eq_attr "pipeline_model" "n10")
+ (and (eq_attr "type" "load_multiple")
+ (eq_attr "combo" "6")))
+ "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, (n10_ii+n10_ex+n10_mm+n10_wb)*3, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb")
+
+(define_insn_reservation "nds_n10_load_multiple_7" 1
+ (and (eq_attr "pipeline_model" "n10")
+ (and (eq_attr "type" "load_multiple")
+ (eq_attr "combo" "7")))
+ "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, (n10_ii+n10_ex+n10_mm+n10_wb)*4, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb")
+
+(define_insn_reservation "nds_n10_load_multiple_N" 1
+ (and (eq_attr "pipeline_model" "n10")
+ (and (eq_attr "type" "load_multiple")
+ (match_test "get_attr_combo (insn) >= 8")))
+ "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, (n10_ii+n10_ex+n10_mm+n10_wb)*5, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb")
+
+(define_insn_reservation "nds_n10_store_multiple_1" 1
+ (and (eq_attr "pipeline_model" "n10")
+ (and (eq_attr "type" "store_multiple")
+ (eq_attr "combo" "1")))
+ "n10_ii, n10_ex, n10_mm, n10_wb")
+
+(define_insn_reservation "nds_n10_store_multiple_2" 1
+ (and (eq_attr "pipeline_model" "n10")
+ (ior (and (eq_attr "type" "store_multiple")
+ (eq_attr "combo" "2"))
+ (match_test "nds32::store_double_p (insn)")))
+ "n10_ii, n10_ii+n10_ex, n10_ex+n10_mm, n10_mm+n10_wb, n10_wb")
+
+(define_insn_reservation "nds_n10_store_multiple_3" 1
+ (and (eq_attr "pipeline_model" "n10")
+ (and (eq_attr "type" "store_multiple")
+ (eq_attr "combo" "3")))
+ "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb")
+
+(define_insn_reservation "nds_n10_store_multiple_4" 1
+ (and (eq_attr "pipeline_model" "n10")
+ (and (eq_attr "type" "store_multiple")
+ (eq_attr "combo" "4")))
+ "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, n10_ii+n10_ex+n10_mm+n10_wb, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb")
+
+(define_insn_reservation "nds_n10_store_multiple_5" 1
+ (and (eq_attr "pipeline_model" "n10")
+ (and (eq_attr "type" "store_multiple")
+ (eq_attr "combo" "5")))
+ "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, (n10_ii+n10_ex+n10_mm+n10_wb)*2, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb")
+
+(define_insn_reservation "nds_n10_store_multiple_6" 1
+ (and (eq_attr "pipeline_model" "n10")
+ (and (eq_attr "type" "store_multiple")
+ (eq_attr "combo" "6")))
+ "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, (n10_ii+n10_ex+n10_mm+n10_wb)*3, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb")
+
+(define_insn_reservation "nds_n10_store_multiple_7" 1
+ (and (eq_attr "pipeline_model" "n10")
+ (and (eq_attr "type" "store_multiple")
+ (eq_attr "combo" "7")))
+ "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, (n10_ii+n10_ex+n10_mm+n10_wb)*4, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb")
+
+(define_insn_reservation "nds_n10_store_multiple_N" 1
+ (and (eq_attr "pipeline_model" "n10")
+ (and (eq_attr "type" "store_multiple")
+ (match_test "get_attr_combo (insn) >= 8")))
+ "n10_ii, n10_ii+n10_ex, n10_ii+n10_ex+n10_mm, (n10_ii+n10_ex+n10_mm+n10_wb)*5, n10_ex+n10_mm+n10_wb, n10_mm+n10_wb, n10_wb")
+
+(define_insn_reservation "nds_n10_mul" 1
+ (and (eq_attr "type" "mul")
+ (eq_attr "pipeline_model" "n10"))
+ "n10_ii, n10_ex, n10_mm, n10_wb")
+
+(define_insn_reservation "nds_n10_mac" 1
+ (and (eq_attr "type" "mac")
+ (eq_attr "pipeline_model" "n10"))
+ "n10_ii, n10_ex, n10_mm, n10_wb")
+
+(define_insn_reservation "nds_n10_div" 1
+ (and (eq_attr "type" "div")
+ (eq_attr "pipeline_model" "n10"))
+ "n10_ii, n10_ex*34, n10_mm, n10_wb")
+
+(define_insn_reservation "nds_n10_branch" 1
+ (and (eq_attr "type" "branch")
+ (eq_attr "pipeline_model" "n10"))
+ "n10_ii, n10_ex, n10_mm, n10_wb")
+
+(define_insn_reservation "nds_n10_dsp_alu" 1
+ (and (eq_attr "type" "dalu")
+ (eq_attr "pipeline_model" "n10"))
+ "n10_ii, n10_ex, n10_mm, n10_wb")
+
+(define_insn_reservation "nds_n10_dsp_alu64" 1
+ (and (eq_attr "type" "dalu64")
+ (eq_attr "pipeline_model" "n10"))
+ "n10_ii, n10_ex, n10_mm, n10_wb")
+
+(define_insn_reservation "nds_n10_dsp_alu_round" 1
+ (and (eq_attr "type" "daluround")
+ (eq_attr "pipeline_model" "n10"))
+ "n10_ii, n10_ex, n10_mm, n10_wb")
+
+(define_insn_reservation "nds_n10_dsp_cmp" 1
+ (and (eq_attr "type" "dcmp")
+ (eq_attr "pipeline_model" "n10"))
+ "n10_ii, n10_ex, n10_mm, n10_wb")
+
+(define_insn_reservation "nds_n10_dsp_clip" 1
+ (and (eq_attr "type" "dclip")
+ (eq_attr "pipeline_model" "n10"))
+ "n10_ii, n10_ex, n10_mm, n10_wb")
+
+(define_insn_reservation "nds_n10_dsp_mul" 1
+ (and (eq_attr "type" "dmul")
+ (eq_attr "pipeline_model" "n10"))
+ "n10_ii, n10_ex, n10_mm, n10_wb")
+
+(define_insn_reservation "nds_n10_dsp_mac" 1
+ (and (eq_attr "type" "dmac")
+ (eq_attr "pipeline_model" "n10"))
+ "n10_ii, n10_ex, n10_mm, n10_wb")
+
+(define_insn_reservation "nds_n10_dsp_insb" 1
+ (and (eq_attr "type" "dinsb")
+ (eq_attr "pipeline_model" "n10"))
+ "n10_ii, n10_ex, n10_mm, n10_wb")
+
+(define_insn_reservation "nds_n10_dsp_pack" 1
+ (and (eq_attr "type" "dpack")
+ (eq_attr "pipeline_model" "n10"))
+ "n10_ii, n10_ex, n10_mm, n10_wb")
+
+(define_insn_reservation "nds_n10_dsp_bpick" 1
+ (and (eq_attr "type" "dbpick")
+ (eq_attr "pipeline_model" "n10"))
+ "n10_ii, n10_ex, n10_mm, n10_wb")
+
+(define_insn_reservation "nds_n10_dsp_wext" 1
+ (and (eq_attr "type" "dwext")
+ (eq_attr "pipeline_model" "n10"))
+ "n10_ii, n10_ex, n10_mm, n10_wb")
+
+(define_insn_reservation "nds_n10_fpu_alu" 4
+ (and (eq_attr "type" "falu")
+ (eq_attr "pipeline_model" "n10"))
+ "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2, n10f_e3, n10f_e4")
+
+(define_insn_reservation "nds_n10_fpu_muls" 4
+ (and (eq_attr "type" "fmuls")
+ (eq_attr "pipeline_model" "n10"))
+ "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2, n10f_e3, n10f_e4")
+
+(define_insn_reservation "nds_n10_fpu_muld" 4
+ (and (eq_attr "type" "fmuld")
+ (eq_attr "pipeline_model" "n10"))
+ "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2*2, n10f_e3, n10f_e4")
+
+(define_insn_reservation "nds_n10_fpu_macs" 4
+ (and (eq_attr "type" "fmacs")
+ (eq_attr "pipeline_model" "n10"))
+ "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2*3, n10f_e3, n10f_e4")
+
+(define_insn_reservation "nds_n10_fpu_macd" 4
+ (and (eq_attr "type" "fmacd")
+ (eq_attr "pipeline_model" "n10"))
+ "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2*4, n10f_e3, n10f_e4")
+
+(define_insn_reservation "nds_n10_fpu_divs" 4
+ (and (ior (eq_attr "type" "fdivs")
+ (eq_attr "type" "fsqrts"))
+ (eq_attr "pipeline_model" "n10"))
+ "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2*14, n10f_e3, n10f_e4")
+
+(define_insn_reservation "nds_n10_fpu_divd" 4
+ (and (ior (eq_attr "type" "fdivd")
+ (eq_attr "type" "fsqrtd"))
+ (eq_attr "pipeline_model" "n10"))
+ "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2*28, n10f_e3, n10f_e4")
+
+(define_insn_reservation "nds_n10_fpu_fast_alu" 2
+ (and (ior (eq_attr "type" "fcmp")
+ (ior (eq_attr "type" "fabs")
+ (ior (eq_attr "type" "fcpy")
+ (eq_attr "type" "fcmov"))))
+ (eq_attr "pipeline_model" "n10"))
+ "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2, n10f_e3, n10f_e4")
+
+(define_insn_reservation "nds_n10_fpu_fmtsr" 4
+ (and (eq_attr "type" "fmtsr")
+ (eq_attr "pipeline_model" "n10"))
+ "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2, n10f_e3, n10f_e4")
+
+(define_insn_reservation "nds_n10_fpu_fmtdr" 4
+ (and (eq_attr "type" "fmtdr")
+ (eq_attr "pipeline_model" "n10"))
+ "n10_ii, n10_ii+n10f_iq, n10f_iq+n10f_rf, n10f_rf+n10f_e1, n10f_e1+n10f_e2, n10f_e2+n10f_e3, n10f_e3+n10f_e4, n10f_e4")
+
+(define_insn_reservation "nds_n10_fpu_fmfsr" 2
+ (and (eq_attr "type" "fmfsr")
+ (eq_attr "pipeline_model" "n10"))
+ "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2, n10f_e3, n10f_e4")
+
+(define_insn_reservation "nds_n10_fpu_fmfdr" 2
+ (and (eq_attr "type" "fmfdr")
+ (eq_attr "pipeline_model" "n10"))
+ "n10_ii, n10_ii+n10f_iq, n10f_iq+n10f_rf, n10f_rf+n10f_e1, n10f_e1+n10f_e2, n10f_e2+n10f_e3, n10f_e3+n10f_e4, n10f_e4")
+
+(define_insn_reservation "nds_n10_fpu_load" 3
+ (and (eq_attr "type" "fload")
+ (eq_attr "pipeline_model" "n10"))
+ "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2, n10f_e3, n10f_e4")
+
+(define_insn_reservation "nds_n10_fpu_store" 1
+ (and (eq_attr "type" "fstore")
+ (eq_attr "pipeline_model" "n10"))
+ "n10_ii, n10f_iq, n10f_rf, n10f_e1, n10f_e2, n10f_e3, n10f_e4")
+
+;; ------------------------------------------------------------------------
+;; Comment Notations and Bypass Rules
+;; ------------------------------------------------------------------------
+;; Producers (LHS)
+;; LD
+;; Load data from the memory and produce the loaded data. The result is
+;; ready at MM.
+;; LMW(N, M)
+;; There are N micro-operations within an instruction that loads multiple
+;; words. The result produced by the M-th micro-operation is sent to
+;; consumers. The result is ready at MM.
+;; MUL, MAC
+;; Compute data in the multiply-adder and produce the data. The result
+;; is ready at MM.
+;; DIV
+;; Compute data in the divider and produce the data. The result is ready
+;; at MM.
+;;
+;; Consumers (RHS)
+;; ALU, MOVD44, PBSAD, PBSADA_RaRb, MUL, MAC, DIV, MMU
+;; Require operands at EX.
+;; ALU_SHIFT_Rb
+;; An ALU-SHIFT instruction consists of a shift micro-operation followed
+;; by an arithmetic micro-operation. The operand Rb is used by the first
+;; micro-operation, and there are some latencies if data dependency occurs.
+;; MAC_RaRb
+;; A MAC instruction does multiplication at EX and does accumulation at MM,
+;; so the operand Rt is required at MM, and operands Ra and Rb are required
+;; at EX.
+;; ADDR_IN
+;; If an instruction requires an address as its input operand, the address
+;; is required at EX.
+;; ST
+;; A store instruction requires its data at MM.
+;; SMW(N, M)
+;; There are N micro-operations within an instruction that stores multiple
+;; words. Each M-th micro-operation requires its data at MM.
+;; BR
+;; If a branch instruction is conditional, its input data is required at EX.
+
+;; FPU_ADDR_OUT -> FPU_ADDR_IN
+;; Main pipeline rules don't need this because those default latency is 1.
+(define_bypass 1
+ "nds_n10_fpu_load, nds_n10_fpu_store"
+ "nds_n10_fpu_load, nds_n10_fpu_store"
+ "nds32_n10_ex_to_ex_p"
+)
+
+;; LD, MUL, MAC, DIV, DALU64, DMUL, DMAC, DALUROUND, DBPICK, DWEXT
+;; -> ALU, ALU_SHIFT_Rb, PBSAD, PBSADA_RaRb, MOVD44, MUL, MAC_RaRb, DIV, ADDR_IN, BR, MMU,
+;; DALU, DALUROUND, DMUL, DMAC_RaRb, DPACK, DINSB, DCMP, DCLIP, WEXT_O, BPICK_RaRb
+(define_bypass 2
+ "nds_n10_load, nds_n10_mul, nds_n10_mac, nds_n10_div,\
+ nds_n10_dsp_alu64, nds_n10_dsp_mul, nds_n10_dsp_mac,\
+ nds_n10_dsp_alu_round, nds_n10_dsp_bpick, nds_n10_dsp_wext"
+ "nds_n10_alu, nds_n10_alu_shift,\
+ nds_n10_pbsad, nds_n10_pbsada,\
+ nds_n10_mul, nds_n10_mac, nds_n10_div,\
+ nds_n10_branch,\
+ nds_n10_load, nds_n10_store,\
+ nds_n10_load_multiple_1, nds_n10_load_multiple_2, nds_n10_load_multiple_3,\
+ nds_n10_load_multiple_4, nds_n10_load_multiple_5, nds_n10_load_multiple_6,\
+ nds_n10_load_multiple_7, nds_n10_load_multiple_N,\
+ nds_n10_store_multiple_1, nds_n10_store_multiple_2, nds_n10_store_multiple_3,\
+ nds_n10_store_multiple_4, nds_n10_store_multiple_5, nds_n10_store_multiple_6,\
+ nds_n10_store_multiple_7, nds_n10_store_multiple_N,\
+ nds_n10_mmu,\
+ nds_n10_dsp_alu, nds_n10_dsp_alu_round,\
+ nds_n10_dsp_mul, nds_n10_dsp_mac, nds_n10_dsp_pack,\
+ nds_n10_dsp_insb, nds_n10_dsp_cmp, nds_n10_dsp_clip,\
+ nds_n10_dsp_wext, nds_n10_dsp_bpick"
+ "nds32_n10_mm_to_ex_p"
+)
+
+;; LMW(N, N)
+;; -> ALU, ALU_SHIFT_Rb, PBSAD, PBSADA_RaRb, MOVD44, MUL, MAC_RaRb, DIV, ADDR_IN, BR, MMU
+;; DALU, DALUROUND, DMUL, DMAC_RaRb, DPACK, DINSB, DCMP, DCLIP, WEXT_O, BPICK_RaRb
+(define_bypass 2
+ "nds_n10_load_multiple_1, nds_n10_load_multiple_2, nds_n10_load_multiple_3,\
+ nds_n10_load_multiple_4, nds_n10_load_multiple_5, nds_n10_load_multiple_6,\
+ nds_n10_load_multiple_7, nds_n10_load_multiple_N"
+ "nds_n10_alu, nds_n10_alu_shift,\
+ nds_n10_pbsad, nds_n10_pbsada,\
+ nds_n10_mul, nds_n10_mac, nds_n10_div,\
+ nds_n10_branch,\
+ nds_n10_load, nds_n10_store,\
+ nds_n10_load_multiple_1, nds_n10_load_multiple_2, nds_n10_load_multiple_3,\
+ nds_n10_load_multiple_4, nds_n10_load_multiple_5, nds_n10_load_multiple_6,\
+ nds_n10_load_multiple_7, nds_n10_load_multiple_N,\
+ nds_n10_store_multiple_1, nds_n10_store_multiple_2, nds_n10_store_multiple_3,\
+ nds_n10_store_multiple_4, nds_n10_store_multiple_5, nds_n10_store_multiple_6,\
+ nds_n10_store_multiple_7, nds_n10_store_multiple_N,\
+ nds_n10_mmu,\
+ nds_n10_dsp_alu, nds_n10_dsp_alu_round,\
+ nds_n10_dsp_mul, nds_n10_dsp_mac, nds_n10_dsp_pack,\
+ nds_n10_dsp_insb, nds_n10_dsp_cmp, nds_n10_dsp_clip,\
+ nds_n10_dsp_wext, nds_n10_dsp_bpick"
+ "nds32_n10_last_load_to_ex_p"
+)
return false;
}
+/* Check if INSN is a wext insn consuming DEF_REG. */
+bool
+wext_odd_dep_p (rtx insn, rtx def_reg)
+{
+ rtx shift_rtx = XEXP (SET_SRC (PATTERN (insn)), 0);
+ rtx use_reg = XEXP (shift_rtx, 0);
+ rtx pos_rtx = XEXP (shift_rtx, 1);
+
+ if (REG_P (pos_rtx) && reg_overlap_p (def_reg, pos_rtx))
+ return true;
+
+ if (GET_MODE (def_reg) == DImode)
+ return reg_overlap_p (def_reg, use_reg);
+
+ gcc_assert (REG_P (def_reg) || GET_CODE (def_reg) == SUBREG);
+ gcc_assert (REG_P (use_reg));
+
+ if (REG_P (def_reg))
+ {
+ if (!TARGET_BIG_ENDIAN)
+ return REGNO (def_reg) == REGNO (use_reg) + 1;
+ else
+ return REGNO (def_reg) == REGNO (use_reg);
+ }
+
+ if (GET_CODE (def_reg) == SUBREG)
+ {
+ if (!reg_overlap_p (def_reg, use_reg))
+ return false;
+
+ if (!TARGET_BIG_ENDIAN)
+ return SUBREG_BYTE (def_reg) == 4;
+ else
+ return SUBREG_BYTE (def_reg) == 0;
+ }
+
+ return false;
+}
+
+/* Check if INSN is a bpick insn consuming DEF_REG. */
+bool
+bpick_ra_rb_dep_p (rtx insn, rtx def_reg)
+{
+ rtx ior_rtx = SET_SRC (PATTERN (insn));
+ rtx and1_rtx = XEXP (ior_rtx, 0);
+ rtx and2_rtx = XEXP (ior_rtx, 1);
+ rtx reg1_0 = XEXP (and1_rtx, 0);
+ rtx reg1_1 = XEXP (and1_rtx, 1);
+ rtx reg2_0 = XEXP (and2_rtx, 0);
+ rtx reg2_1 = XEXP (and2_rtx, 1);
+
+ if (GET_CODE (reg1_0) == NOT)
+ {
+ if (rtx_equal_p (reg1_0, reg2_0))
+ return reg_overlap_p (def_reg, reg1_1)
+ || reg_overlap_p (def_reg, reg2_1);
+
+ if (rtx_equal_p (reg1_0, reg2_1))
+ return reg_overlap_p (def_reg, reg1_1)
+ || reg_overlap_p (def_reg, reg2_0);
+ }
+
+ if (GET_CODE (reg1_1) == NOT)
+ {
+ if (rtx_equal_p (reg1_1, reg2_0))
+ return reg_overlap_p (def_reg, reg1_0)
+ || reg_overlap_p (def_reg, reg2_1);
+
+ if (rtx_equal_p (reg1_1, reg2_1))
+ return reg_overlap_p (def_reg, reg1_0)
+ || reg_overlap_p (def_reg, reg2_0);
+ }
+
+ if (GET_CODE (reg2_0) == NOT)
+ {
+ if (rtx_equal_p (reg2_0, reg1_0))
+ return reg_overlap_p (def_reg, reg2_1)
+ || reg_overlap_p (def_reg, reg1_1);
+
+ if (rtx_equal_p (reg2_0, reg1_1))
+ return reg_overlap_p (def_reg, reg2_1)
+ || reg_overlap_p (def_reg, reg1_0);
+ }
+
+ if (GET_CODE (reg2_1) == NOT)
+ {
+ if (rtx_equal_p (reg2_1, reg1_0))
+ return reg_overlap_p (def_reg, reg2_0)
+ || reg_overlap_p (def_reg, reg1_1);
+
+ if (rtx_equal_p (reg2_1, reg1_1))
+ return reg_overlap_p (def_reg, reg2_0)
+ || reg_overlap_p (def_reg, reg1_0);
+ }
+
+ gcc_unreachable ();
+}
} // namespace scheduling
} // namespace nds32
operations in order to write two registers. We have to check the
dependency from the producer to the first micro-operation. */
case TYPE_DIV:
- if (INSN_CODE (consumer) == CODE_FOR_divmodsi4
- || INSN_CODE (consumer) == CODE_FOR_udivmodsi4)
+ if (divmod_p (consumer))
use_rtx = SET_SRC (parallel_element (consumer, 0));
else
use_rtx = SET_SRC (PATTERN (consumer));
operations in order to write two registers. We have to check the
dependency from the producer to the first micro-operation. */
case TYPE_DIV:
- if (INSN_CODE (consumer) == CODE_FOR_divmodsi4
- || INSN_CODE (consumer) == CODE_FOR_udivmodsi4)
+ if (divmod_p (consumer))
use_rtx = SET_SRC (parallel_element (consumer, 0));
else
use_rtx = SET_SRC (PATTERN (consumer));
break;
case TYPE_DIV:
- if (INSN_CODE (consumer) == CODE_FOR_divmodsi4
- || INSN_CODE (consumer) == CODE_FOR_udivmodsi4)
+ if (divmod_p (consumer))
use_rtx = SET_SRC (parallel_element (consumer, 0));
else
use_rtx = SET_SRC (PATTERN (consumer));
We have to check the dependency from the producer to the first
micro-operation. */
case TYPE_DIV:
- if (INSN_CODE (consumer) == CODE_FOR_divmodsi4
- || INSN_CODE (consumer) == CODE_FOR_udivmodsi4)
+ if (divmod_p (consumer))
use_rtx = SET_SRC (parallel_element (consumer, 0));
else
use_rtx = SET_SRC (PATTERN (consumer));
return false;
}
+/* Check the dependency between the producer defining DEF_REG and CONSUMER
+ requiring input operand at EX. */
+bool
+n10_consumed_by_ex_dep_p (rtx_insn *consumer, rtx def_reg)
+{
+ rtx use_rtx;
+
+ switch (get_attr_type (consumer))
+ {
+ case TYPE_ALU:
+ case TYPE_PBSAD:
+ case TYPE_MUL:
+ case TYPE_DALU:
+ case TYPE_DALU64:
+ case TYPE_DMUL:
+ case TYPE_DPACK:
+ case TYPE_DINSB:
+ case TYPE_DCMP:
+ case TYPE_DCLIP:
+ case TYPE_DALUROUND:
+ use_rtx = SET_SRC (PATTERN (consumer));
+ break;
+
+ case TYPE_ALU_SHIFT:
+ use_rtx = extract_shift_reg (consumer);
+ break;
+
+ case TYPE_PBSADA:
+ return pbsada_insn_ra_rb_dep_reg_p (consumer, def_reg);
+
+ case TYPE_MAC:
+ case TYPE_DMAC:
+ use_rtx = extract_mac_non_acc_rtx (consumer);
+ break;
+
+ /* Some special instructions, divmodsi4 and udivmodsi4, produce two
+ results, the quotient and the remainder. */
+ case TYPE_DIV:
+ if (divmod_p (consumer))
+ use_rtx = SET_SRC (parallel_element (consumer, 0));
+ else
+ use_rtx = SET_SRC (PATTERN (consumer));
+ break;
+
+ case TYPE_DWEXT:
+ return wext_odd_dep_p (consumer, def_reg);
+
+ case TYPE_DBPICK:
+ return bpick_ra_rb_dep_p (consumer, def_reg);
+
+ case TYPE_MMU:
+ if (GET_CODE (PATTERN (consumer)) == SET)
+ use_rtx = SET_SRC (PATTERN (consumer));
+ else
+ return true;
+ break;
+
+ case TYPE_LOAD:
+ case TYPE_STORE:
+ use_rtx = extract_mem_rtx (consumer);
+ break;
+
+ case TYPE_LOAD_MULTIPLE:
+ case TYPE_STORE_MULTIPLE:
+ use_rtx = extract_base_reg (consumer);
+ break;
+
+ case TYPE_BRANCH:
+ use_rtx = PATTERN (consumer);
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ if (reg_overlap_p (def_reg, use_rtx))
+ return true;
+
+ return false;
+}
} // anonymous namespace
break;
case TYPE_DIV:
- if (INSN_CODE (producer) == CODE_FOR_divmodsi4
- || INSN_CODE (producer) == CODE_FOR_udivmodsi4)
+ if (divmod_p (producer))
def_reg = SET_DEST (parallel_element (producer, 1));
else
def_reg = SET_DEST (PATTERN (producer));
break;
case TYPE_DIV:
- if (INSN_CODE (producer) == CODE_FOR_divmodsi4
- || INSN_CODE (producer) == CODE_FOR_udivmodsi4)
+ if (divmod_p (producer))
{
rtx def_reg1 = SET_DEST (parallel_element (producer, 0));
rtx def_reg2 = SET_DEST (parallel_element (producer, 1));
results, the quotient and the remainder. We have to handle them
individually. */
case TYPE_DIV:
- if (INSN_CODE (producer) == CODE_FOR_divmodsi4
- || INSN_CODE (producer) == CODE_FOR_udivmodsi4)
+ if (divmod_p (producer))
{
rtx def_reg1 = SET_DEST (parallel_element (producer, 0));
rtx def_reg2 = SET_DEST (parallel_element (producer, 1));
return n9_3r2w_consumed_by_ex_dep_p (consumer, last_def_reg);
}
+/* Guard functions for N10 cores. */
+
+/* Check dependencies from EX to EX (ADDR_OUT -> ADDR_IN). */
+bool
+nds32_n10_ex_to_ex_p (rtx_insn *producer, rtx_insn *consumer)
+{
+ gcc_assert (get_attr_type (producer) == TYPE_FLOAD
+ || get_attr_type (producer) == TYPE_FSTORE);
+ gcc_assert (get_attr_type (consumer) == TYPE_FLOAD
+ || get_attr_type (consumer) == TYPE_FSTORE);
+
+ if (!post_update_insn_p (producer))
+ return false;
+
+ return reg_overlap_p (extract_base_reg (producer),
+ extract_mem_rtx (consumer));
+}
+
+/* Check dependencies from MM to EX. */
+bool
+nds32_n10_mm_to_ex_p (rtx_insn *producer, rtx_insn *consumer)
+{
+ rtx def_reg;
+
+ switch (get_attr_type (producer))
+ {
+ case TYPE_LOAD:
+ case TYPE_MUL:
+ case TYPE_MAC:
+ case TYPE_DALU64:
+ case TYPE_DMUL:
+ case TYPE_DMAC:
+ case TYPE_DALUROUND:
+ case TYPE_DBPICK:
+ case TYPE_DWEXT:
+ def_reg = SET_DEST (PATTERN (producer));
+ break;
+
+ /* Some special instructions, divmodsi4 and udivmodsi4, produce two
+ results, the quotient and the remainder. We have to handle them
+ individually. */
+ case TYPE_DIV:
+ if (divmod_p (producer))
+ {
+ rtx def_reg1 = SET_DEST (parallel_element (producer, 0));
+ rtx def_reg2 = SET_DEST (parallel_element (producer, 1));
+
+ return (n10_consumed_by_ex_dep_p (consumer, def_reg1)
+ || n10_consumed_by_ex_dep_p (consumer, def_reg2));
+ }
+
+ def_reg = SET_DEST (PATTERN (producer));
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ return n10_consumed_by_ex_dep_p (consumer, def_reg);
+}
+
+/* Check dependencies from LMW(N, N) to EX. */
+bool
+nds32_n10_last_load_to_ex_p (rtx_insn *producer, rtx_insn *consumer)
+{
+ rtx last_def_reg = extract_nth_access_reg (producer, -1);
+
+ return n10_consumed_by_ex_dep_p (consumer, last_def_reg);
+}
/* ------------------------------------------------------------------------ */