--- /dev/null
+;; Pipeline descriptions of Andes NDS32 cpu for GNU compiler
+;; Copyright (C) 2012-2018 Free Software Foundation, Inc.
+;; Contributed by Andes Technology Corporation.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+;; ------------------------------------------------------------------------
+;; Define N13 pipeline settings.
+;; ------------------------------------------------------------------------
+
+(define_automaton "nds32_n13_machine")
+
+;; ------------------------------------------------------------------------
+;; Pipeline Stages
+;; ------------------------------------------------------------------------
+;; F1 - Instruction Fetch First
+;; Instruction Tag/Data Arrays
+;; ITLB Address Translation
+;; Branch Target Buffer Prediction
+;; F2 - Instruction Fetch Second
+;; Instruction Cache Hit Detection
+;; Cache Way Selection
+;; Inustruction Alignment
+;; I1 - Instruction Issue First / Instruction Decode
+;; Instruction Cache Replay Triggering
+;; 32/16-Bit Instruction Decode
+;; Return Address Stack Prediction
+;; I2 - Instruction Issue Second / Register File Access
+;; Instruction Issue Logic
+;; Register File Access
+;; E1 - Instruction Execute First / Address Generation / MAC First
+;; Data Access Address generation
+;; Multiply Operation
+;; E2 - Instruction Execute Second / Data Access First / MAC Second /
+;; ALU Execute
+;; Skewed ALU
+;; Branch/Jump/Return Resolution
+;; Data Tag/Data arrays
+;; DTLB address translation
+;; Accumulation Operation
+;; E3 - Instruction Execute Third / Data Access Second
+;; Data Cache Hit Detection
+;; Cache Way Selection
+;; Data Alignment
+;; E4 - Instruction Execute Fourth / Write Back
+;; Interruption Resolution
+;; Instruction Retire
+;; Register File Write Back
+
+(define_cpu_unit "n13_i1" "nds32_n13_machine")
+(define_cpu_unit "n13_i2" "nds32_n13_machine")
+(define_cpu_unit "n13_e1" "nds32_n13_machine")
+(define_cpu_unit "n13_e2" "nds32_n13_machine")
+(define_cpu_unit "n13_e3" "nds32_n13_machine")
+(define_cpu_unit "n13_e4" "nds32_n13_machine")
+
+(define_insn_reservation "nds_n13_unknown" 1
+ (and (eq_attr "type" "unknown")
+ (eq_attr "pipeline_model" "n13"))
+ "n13_i1, n13_i2, n13_e1, n13_e2, n13_e3, n13_e4")
+
+(define_insn_reservation "nds_n13_misc" 1
+ (and (eq_attr "type" "misc")
+ (eq_attr "pipeline_model" "n13"))
+ "n13_i1, n13_i2, n13_e1, n13_e2, n13_e3, n13_e4")
+
+(define_insn_reservation "nds_n13_mmu" 1
+ (and (eq_attr "type" "mmu")
+ (eq_attr "pipeline_model" "n13"))
+ "n13_i1, n13_i2, n13_e1, n13_e2, n13_e3, n13_e4")
+
+(define_insn_reservation "nds_n13_alu" 1
+ (and (eq_attr "type" "alu")
+ (eq_attr "pipeline_model" "n13"))
+ "n13_i1, n13_i2, n13_e1, n13_e2, n13_e3, n13_e4")
+
+(define_insn_reservation "nds_n13_alu_shift" 1
+ (and (eq_attr "type" "alu_shift")
+ (eq_attr "pipeline_model" "n13"))
+ "n13_i1, n13_i1+n13_i2, n13_i2+n13_e1, n13_e1+n13_e2, n13_e2+n13_e3, n13_e3+n13_e4, n13_e4")
+
+(define_insn_reservation "nds_n13_pbsad" 1
+ (and (eq_attr "type" "pbsad")
+ (eq_attr "pipeline_model" "n13"))
+ "n13_i1, n13_i2, n13_e1, n13_e2*2, n13_e3, n13_e4")
+
+(define_insn_reservation "nds_n13_pbsada" 1
+ (and (eq_attr "type" "pbsada")
+ (eq_attr "pipeline_model" "n13"))
+ "n13_i1, n13_i2, n13_e1, n13_e2*3, n13_e3, n13_e4")
+
+(define_insn_reservation "nds_n13_load" 1
+ (and (match_test "nds32::load_single_p (insn)")
+ (eq_attr "pipeline_model" "n13"))
+ "n13_i1, n13_i2, n13_e1, n13_e2, n13_e3, n13_e4")
+
+(define_insn_reservation "nds_n13_store" 1
+ (and (match_test "nds32::store_single_p (insn)")
+ (eq_attr "pipeline_model" "n13"))
+ "n13_i1, n13_i2, n13_e1, n13_e2, n13_e3, n13_e4")
+
+(define_insn_reservation "nds_n13_load_multiple_1" 1
+ (and (and (eq_attr "type" "load_multiple")
+ (eq_attr "combo" "1"))
+ (eq_attr "pipeline_model" "n13"))
+ "n13_i1, n13_i2, n13_e1, n13_e2, n13_e3, n13_e4")
+
+(define_insn_reservation "nds_n13_load_multiple_2" 1
+ (and (ior (and (eq_attr "type" "load_multiple")
+ (eq_attr "combo" "2"))
+ (match_test "nds32::load_double_p (insn)"))
+ (eq_attr "pipeline_model" "n13"))
+ "n13_i1, n13_i1+n13_i2, n13_i2+n13_e1, n13_e1+n13_e2, n13_e2+n13_e3, n13_e3+n13_e4, n13_e4")
+
+(define_insn_reservation "nds_n13_load_multiple_3" 1
+ (and (and (eq_attr "type" "load_multiple")
+ (eq_attr "combo" "3"))
+ (eq_attr "pipeline_model" "n13"))
+ "n13_i1, n13_i2+n13_i2, n13_i1+n13_i2+n13_e1, n13_i2+n13_e1+n13_e2, n13_e1+n13_e2+n13_e3, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4")
+
+(define_insn_reservation "nds_n13_load_multiple_4" 1
+ (and (and (eq_attr "type" "load_multiple")
+ (eq_attr "combo" "4"))
+ (eq_attr "pipeline_model" "n13"))
+ "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i2+n13_e1+n13_e2+n13_e3, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4")
+
+(define_insn_reservation "nds_n13_load_multiple_5" 1
+ (and (and (eq_attr "type" "load_multiple")
+ (eq_attr "combo" "5"))
+ (eq_attr "pipeline_model" "n13"))
+ "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4")
+
+(define_insn_reservation "nds_n13_load_multiple_6" 1
+ (and (and (eq_attr "type" "load_multiple")
+ (eq_attr "combo" "6"))
+ (eq_attr "pipeline_model" "n13"))
+ "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4")
+
+(define_insn_reservation "nds_n13_load_multiple_7" 1
+ (and (and (eq_attr "type" "load_multiple")
+ (eq_attr "combo" "7"))
+ (eq_attr "pipeline_model" "n13"))
+ "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, (n13_i1+n13_i2+n13_e1+n13_e2+n13_e3+n13_e4)*2, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4")
+
+(define_insn_reservation "nds_n13_load_multiple_8" 1
+ (and (and (eq_attr "type" "load_multiple")
+ (eq_attr "combo" "8"))
+ (eq_attr "pipeline_model" "n13"))
+ "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, (n13_i1+n13_i2+n13_e1+n13_e2+n13_e3+n13_e4)*3, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4")
+
+(define_insn_reservation "nds_n13_load_multiple_12" 1
+ (and (and (eq_attr "type" "load_multiple")
+ (eq_attr "combo" "12"))
+ (eq_attr "pipeline_model" "n13"))
+ "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, (n13_i1+n13_i2+n13_e1+n13_e2+n13_e3+n13_e4)*7, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4")
+
+(define_insn_reservation "nds_n13_store_multiple_1" 1
+ (and (and (eq_attr "type" "store_multiple")
+ (eq_attr "combo" "1"))
+ (eq_attr "pipeline_model" "n13"))
+ "n13_i1, n13_i2, n13_e1, n13_e2, n13_e3, n13_e4")
+
+(define_insn_reservation "nds_n13_store_multiple_2" 1
+ (and (ior (and (eq_attr "type" "store_multiple")
+ (eq_attr "combo" "2"))
+ (match_test "nds32::store_double_p (insn)"))
+ (eq_attr "pipeline_model" "n13"))
+ "n13_i1, n13_i1+n13_i2, n13_i2+n13_e1, n13_e1+n13_e2, n13_e2+n13_e3, n13_e3+n13_e4, n13_e4")
+
+(define_insn_reservation "nds_n13_store_multiple_3" 1
+ (and (and (eq_attr "type" "store_multiple")
+ (eq_attr "combo" "3"))
+ (eq_attr "pipeline_model" "n13"))
+ "n13_i1, n13_i2+n13_i2, n13_i1+n13_i2+n13_e1, n13_i2+n13_e1+n13_e2, n13_e1+n13_e2+n13_e3, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4")
+
+(define_insn_reservation "nds_n13_store_multiple_4" 1
+ (and (and (eq_attr "type" "store_multiple")
+ (eq_attr "combo" "4"))
+ (eq_attr "pipeline_model" "n13"))
+ "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i2+n13_e1+n13_e2+n13_e3, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4")
+
+(define_insn_reservation "nds_n13_store_multiple_5" 1
+ (and (and (eq_attr "type" "store_multiple")
+ (eq_attr "combo" "5"))
+ (eq_attr "pipeline_model" "n13"))
+ "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4")
+
+(define_insn_reservation "nds_n13_store_multiple_6" 1
+ (and (and (eq_attr "type" "store_multiple")
+ (eq_attr "combo" "6"))
+ (eq_attr "pipeline_model" "n13"))
+ "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4")
+
+(define_insn_reservation "nds_n13_store_multiple_7" 1
+ (and (and (eq_attr "type" "store_multiple")
+ (eq_attr "combo" "7"))
+ (eq_attr "pipeline_model" "n13"))
+ "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, (n13_i1+n13_i2+n13_e1+n13_e2+n13_e3+n13_e4)*2, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4")
+
+(define_insn_reservation "nds_n13_store_multiple_8" 1
+ (and (and (eq_attr "type" "store_multiple")
+ (eq_attr "combo" "8"))
+ (eq_attr "pipeline_model" "n13"))
+ "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, (n13_i1+n13_i2+n13_e1+n13_e2+n13_e3+n13_e4)*3, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4")
+
+(define_insn_reservation "nds_n13_store_multiple_12" 1
+ (and (and (eq_attr "type" "store_multiple")
+ (eq_attr "combo" "12"))
+ (eq_attr "pipeline_model" "n13"))
+ "n13_i1, n13_i1+n13_i2, n13_i1+n13_i2+n13_e1, n13_i1+n13_i2+n13_e1+n13_e2, n13_i1+n13_i2+n13_e1+n13_e2+n13_e3, (n13_i1+n13_i2+n13_e1+n13_e2+n13_e3+n13_e4)*7, n13_i2+n13_e1+n13_e2+n13_e3+n13_e4, n13_e1+n13_e2+n13_e3+n13_e4, n13_e2+n13_e3+n13_e4, n13_e3+n13_e4, n13_e4")
+
+;; The multiplier at E1 takes two cycles.
+(define_insn_reservation "nds_n13_mul" 1
+ (and (eq_attr "type" "mul")
+ (eq_attr "pipeline_model" "n13"))
+ "n13_i1, n13_i2, n13_e1*2, n13_e2, n13_e3, n13_e4")
+
+(define_insn_reservation "nds_n13_mac" 1
+ (and (eq_attr "type" "mac")
+ (eq_attr "pipeline_model" "n13"))
+ "n13_i1, n13_i2, n13_e1*2, n13_e2, n13_e3, n13_e4")
+
+;; The cycles consumed at E2 are 32 - CLZ(abs(Ra)) + 2,
+;; so the worst case is 34.
+(define_insn_reservation "nds_n13_div" 1
+ (and (eq_attr "type" "div")
+ (eq_attr "pipeline_model" "n13"))
+ "n13_i1, n13_i2, n13_e1, n13_e2*34, n13_e3, n13_e4")
+
+(define_insn_reservation "nds_n13_branch" 1
+ (and (eq_attr "type" "branch")
+ (eq_attr "pipeline_model" "n13"))
+ "n13_i1, n13_i2, n13_e1, n13_e2, n13_e3, n13_e4")
+
+;; ------------------------------------------------------------------------
+;; Comment Notations and Bypass Rules
+;; ------------------------------------------------------------------------
+;; Producers (LHS)
+;; LD
+;; Load data from the memory and produce the loaded data. The result is
+;; ready at E3.
+;; LMW(N, M)
+;; There are N micro-operations within an instruction that loads multiple
+;; words. The result produced by the M-th micro-operation is sent to
+;; consumers. The result is ready at E3.
+;; ADDR_OUT
+;; Most load/store instructions can produce an address output if updating
+;; the base register is required. The result is ready at E2, which is
+;; produced by ALU.
+;; ALU, ALU_SHIFT, SIMD
+;; Compute data in ALU and produce the data. The result is ready at E2.
+;; MUL, MAC
+;; Compute data in the multiply-adder and produce the data. The result
+;; is ready at E2.
+;; DIV
+;; Compute data in the divider and produce the data. The result is ready
+;; at E2.
+;; BR
+;; Branch-with-link instructions produces a result containing the return
+;; address. The result is ready at E2.
+;;
+;; Consumers (RHS)
+;; ALU
+;; General ALU instructions require operands at E2.
+;; ALU_E1
+;; Some special ALU instructions, such as BSE, BSP and MOVD44, require
+;; operand at E1.
+;; MUL, DIV, PBSAD, MMU
+;; Operands are required at E1.
+;; PBSADA_Rt, PBSADA_RaRb
+;; Operands Ra and Rb are required at E1, and the operand Rt is required
+;; at E2.
+;; ALU_SHIFT_Rb
+;; An ALU-SHIFT instruction consists of a shift micro-operation followed
+;; by an arithmetic micro-operation. The operand Rb is used by the first
+;; micro-operation, and there are some latencies if data dependency occurs.
+;; MAC_RaRb
+;; A MAC instruction does multiplication at E1 and does accumulation at E2,
+;; so the operand Rt is required at E2, and operands Ra and Rb are required
+;; at E1.
+;; ADDR_IN
+;; If an instruction requires an address as its input operand, the address
+;; is required at E1.
+;; ST
+;; A store instruction requires its data at E2.
+;; SMW(N, M)
+;; There are N micro-operations within an instruction that stores multiple
+;; words. Each M-th micro-operation requires its data at E2.
+;; BR
+;; If a branch instruction is conditional, its input data is required at E2.
+
+;; LD -> ALU_E1, PBSAD, PBSADA_RaRb, MUL, MAC_RaRb, DIV, MMU, ADDR_IN
+(define_bypass 3
+ "nds_n13_load"
+ "nds_n13_alu, nds_n13_pbsad, nds_n13_pbsada,\
+ nds_n13_mul, nds_n13_mac, nds_n13_div,\
+ nds_n13_mmu,\
+ nds_n13_load, nds_n13_store,\
+ nds_n13_load_multiple_1,nds_n13_load_multiple_2, nds_n13_load_multiple_3,\
+ nds_n13_load_multiple_4,nds_n13_load_multiple_5, nds_n13_load_multiple_6,\
+ nds_n13_load_multiple_7,nds_n13_load_multiple_8, nds_n13_load_multiple_12,\
+ nds_n13_store_multiple_1,nds_n13_store_multiple_2, nds_n13_store_multiple_3,\
+ nds_n13_store_multiple_4,nds_n13_store_multiple_5, nds_n13_store_multiple_6,\
+ nds_n13_store_multiple_7,nds_n13_store_multiple_8, nds_n13_store_multiple_12"
+ "nds32_n13_load_to_e1_p"
+)
+
+;; LD -> ALU, ALU_SHIFT_Rb, PBSADA_Rt, BR, ST, SMW(N, 1)
+(define_bypass 2
+ "nds_n13_load"
+ "nds_n13_alu, nds_n13_alu_shift, nds_n13_pbsada, nds_n13_branch, nds_n13_store,\
+ nds_n13_store_multiple_1,nds_n13_store_multiple_2, nds_n13_store_multiple_3,\
+ nds_n13_store_multiple_4,nds_n13_store_multiple_5, nds_n13_store_multiple_6,\
+ nds_n13_store_multiple_7,nds_n13_store_multiple_8, nds_n13_store_multiple_12"
+ "nds32_n13_load_to_e2_p"
+)
+
+;; LMW(N, N) -> ALU_E1, PBSAD, PBSADA_RaRb, MUL, MAC_RaRb, DIV, MMU, ADDR_IN
+(define_bypass 3
+ "nds_n13_load_multiple_1,nds_n13_load_multiple_2, nds_n13_load_multiple_3,\
+ nds_n13_load_multiple_4,nds_n13_load_multiple_5, nds_n13_load_multiple_6,\
+ nds_n13_load_multiple_7,nds_n13_load_multiple_8, nds_n13_load_multiple_12"
+ "nds_n13_alu, nds_n13_pbsad, nds_n13_pbsada,\
+ nds_n13_mul, nds_n13_mac, nds_n13_div,\
+ nds_n13_mmu,\
+ nds_n13_load, nds_n13_store,\
+ nds_n13_load_multiple_1,nds_n13_load_multiple_2, nds_n13_load_multiple_3,\
+ nds_n13_load_multiple_4,nds_n13_load_multiple_5, nds_n13_load_multiple_6,\
+ nds_n13_load_multiple_7,nds_n13_load_multiple_8, nds_n13_load_multiple_12,\
+ nds_n13_store_multiple_1,nds_n13_store_multiple_2, nds_n13_store_multiple_3,\
+ nds_n13_store_multiple_4,nds_n13_store_multiple_5, nds_n13_store_multiple_6,\
+ nds_n13_store_multiple_7,nds_n13_store_multiple_8, nds_n13_store_multiple_12"
+ "nds32_n13_last_load_to_e1_p")
+
+;; LMW(N, N) -> ALU, ALU_SHIFT_Rb, PBSADA_Rt, BR, ST, SMW(N, 1)
+(define_bypass 2
+ "nds_n13_load_multiple_1,nds_n13_load_multiple_2, nds_n13_load_multiple_3,\
+ nds_n13_load_multiple_4,nds_n13_load_multiple_5, nds_n13_load_multiple_6,\
+ nds_n13_load_multiple_7,nds_n13_load_multiple_8, nds_n13_load_multiple_12"
+ "nds_n13_alu, nds_n13_alu_shift, nds_n13_pbsada, nds_n13_branch, nds_n13_store,\
+ nds_n13_store_multiple_1,nds_n13_store_multiple_2, nds_n13_store_multiple_3,\
+ nds_n13_store_multiple_4,nds_n13_store_multiple_5, nds_n13_store_multiple_6,\
+ nds_n13_store_multiple_7,nds_n13_store_multiple_8, nds_n13_store_multiple_12"
+ "nds32_n13_last_load_to_e2_p"
+)
+
+;; LMW(N, N - 1) -> ALU_E1, PBSAD, PBSADA_RaRb, MUL, MAC_RaRb, DIV, MMU, ADDR_IN
+(define_bypass 2
+ "nds_n13_load_multiple_1,nds_n13_load_multiple_2, nds_n13_load_multiple_3,\
+ nds_n13_load_multiple_4,nds_n13_load_multiple_5, nds_n13_load_multiple_6,\
+ nds_n13_load_multiple_7,nds_n13_load_multiple_8, nds_n13_load_multiple_12"
+ "nds_n13_alu, nds_n13_pbsad, nds_n13_pbsada,\
+ nds_n13_mul, nds_n13_mac, nds_n13_div,\
+ nds_n13_mmu,\
+ nds_n13_load, nds_n13_store,\
+ nds_n13_load_multiple_1,nds_n13_load_multiple_2, nds_n13_load_multiple_3,\
+ nds_n13_load_multiple_4,nds_n13_load_multiple_5, nds_n13_load_multiple_6,\
+ nds_n13_load_multiple_7,nds_n13_load_multiple_8, nds_n13_load_multiple_12,\
+ nds_n13_store_multiple_1,nds_n13_store_multiple_2, nds_n13_store_multiple_3,\
+ nds_n13_store_multiple_4,nds_n13_store_multiple_5, nds_n13_store_multiple_6,\
+ nds_n13_store_multiple_7,nds_n13_store_multiple_8, nds_n13_store_multiple_12"
+ "nds32_n13_last_two_load_to_e1_p")
+
+;; ALU, ALU_SHIFT, SIMD, BR, MUL, MAC, DIV, ADDR_OUT
+;; -> ALU_E1, PBSAD, PBSADA_RaRb, MUL, MAC_RaRb, DIV, MMU, ADDR_IN
+(define_bypass 2
+ "nds_n13_alu, nds_n13_alu_shift, nds_n13_pbsad, nds_n13_pbsada, nds_n13_branch,\
+ nds_n13_mul, nds_n13_mac, nds_n13_div,\
+ nds_n13_load, nds_n13_store,\
+ nds_n13_load_multiple_1,nds_n13_load_multiple_2, nds_n13_load_multiple_3,\
+ nds_n13_load_multiple_4,nds_n13_load_multiple_5, nds_n13_load_multiple_6,\
+ nds_n13_load_multiple_7,nds_n13_load_multiple_8, nds_n13_load_multiple_12,\
+ nds_n13_store_multiple_1,nds_n13_store_multiple_2, nds_n13_store_multiple_3,\
+ nds_n13_store_multiple_4,nds_n13_store_multiple_5, nds_n13_store_multiple_6,\
+ nds_n13_store_multiple_7,nds_n13_store_multiple_8, nds_n13_store_multiple_12"
+ "nds_n13_alu, nds_n13_pbsad, nds_n13_pbsada,\
+ nds_n13_mul, nds_n13_mac, nds_n13_div,\
+ nds_n13_mmu,\
+ nds_n13_load, nds_n13_store,\
+ nds_n13_load_multiple_1,nds_n13_load_multiple_2, nds_n13_load_multiple_3,\
+ nds_n13_load_multiple_4,nds_n13_load_multiple_5, nds_n13_load_multiple_6,\
+ nds_n13_load_multiple_7,nds_n13_load_multiple_8, nds_n13_load_multiple_12,\
+ nds_n13_store_multiple_1,nds_n13_store_multiple_2, nds_n13_store_multiple_3,\
+ nds_n13_store_multiple_4,nds_n13_store_multiple_5, nds_n13_store_multiple_6,\
+ nds_n13_store_multiple_7,nds_n13_store_multiple_8, nds_n13_store_multiple_12"
+ "nds32_n13_e2_to_e1_p")
return false;
}
+/* Determine if the latency is occured when the consumer PBSADA_INSN uses the
+ value of DEF_REG in its Rt field. */
+bool
+pbsada_insn_rt_dep_reg_p (rtx pbsada_insn, rtx def_reg)
+{
+ rtx pbsada_rt = SET_DEST (PATTERN (pbsada_insn));
+
+ if (rtx_equal_p (def_reg, pbsada_rt))
+ return true;
+
+ return false;
+}
+
/* Check if INSN is a movd44 insn consuming DEF_REG. */
bool
movd44_even_dep_p (rtx_insn *insn, rtx def_reg)
return false;
}
+/* Check dependencies from any stages to ALU_E1 (E1). This is a helper
+ function of n13_consumed_by_e1_dep_p (). */
+bool
+n13_alu_e1_insn_dep_reg_p (rtx_insn *alu_e1_insn, rtx def_reg)
+{
+ rtx unspec_rtx, operand_ra, operand_rb;
+ rtx src_rtx, dst_rtx;
+
+ switch (INSN_CODE (alu_e1_insn))
+ {
+ /* BSP and BSE are supported by built-in functions, the corresponding
+ patterns are formed by UNSPEC RTXs. We have to handle them
+ individually. */
+ case CODE_FOR_unspec_bsp:
+ case CODE_FOR_unspec_bse:
+ unspec_rtx = SET_SRC (parallel_element (alu_e1_insn, 0));
+ gcc_assert (GET_CODE (unspec_rtx) == UNSPEC);
+
+ operand_ra = XVECEXP (unspec_rtx, 0, 0);
+ operand_rb = XVECEXP (unspec_rtx, 0, 1);
+
+ if (rtx_equal_p (def_reg, operand_ra)
+ || rtx_equal_p (def_reg, operand_rb))
+ return true;
+
+ return false;
+
+ /* Unlink general ALU instructions, MOVD44 requires operands at E1. */
+ case CODE_FOR_move_di:
+ case CODE_FOR_move_df:
+ src_rtx = SET_SRC (PATTERN (alu_e1_insn));
+ dst_rtx = SET_DEST (PATTERN (alu_e1_insn));
+
+ if (REG_P (dst_rtx) && REG_P (src_rtx)
+ && rtx_equal_p (src_rtx, def_reg))
+ return true;
+
+ return false;
+
+ default:
+ return false;
+ }
+}
+
+/* Check the dependency between the producer defining DEF_REG and CONSUMER
+ requiring input operand at E1. Because the address generation unti is
+ at E1, the address input should be ready at E1. Note that the branch
+ target is also a kind of addresses, so we have to check it. */
+bool
+n13_consumed_by_e1_dep_p (rtx_insn *consumer, rtx def_reg)
+{
+ rtx use_rtx;
+
+ switch (get_attr_type (consumer))
+ {
+ /* ALU_E1 */
+ case TYPE_ALU:
+ return n13_alu_e1_insn_dep_reg_p (consumer, def_reg);
+
+ case TYPE_PBSADA:
+ return pbsada_insn_ra_rb_dep_reg_p (consumer, def_reg);
+
+ case TYPE_PBSAD:
+ case TYPE_MUL:
+ use_rtx = SET_SRC (PATTERN (consumer));
+ break;
+
+ case TYPE_MAC:
+ use_rtx = extract_mac_non_acc_rtx (consumer);
+ break;
+
+ case TYPE_DIV:
+ if (divmod_p (consumer))
+ use_rtx = SET_SRC (parallel_element (consumer, 0));
+ else
+ use_rtx = SET_SRC (PATTERN (consumer));
+ break;
+
+ case TYPE_MMU:
+ if (GET_CODE (PATTERN (consumer)) == SET)
+ use_rtx = SET_SRC (PATTERN (consumer));
+ else
+ return true;
+ break;
+
+ case TYPE_BRANCH:
+ use_rtx = extract_branch_target_rtx (consumer);
+ break;
+
+ case TYPE_LOAD:
+ case TYPE_STORE:
+ use_rtx = extract_mem_rtx (consumer);
+ break;
+
+ case TYPE_LOAD_MULTIPLE:
+ case TYPE_STORE_MULTIPLE:
+ use_rtx = extract_base_reg (consumer);
+ break;
+
+ default:
+ return false;
+ }
+
+ if (reg_overlap_p (def_reg, use_rtx))
+ return true;
+
+ return false;
+}
+
+/* Check the dependency between the producer defining DEF_REG and CONSUMER
+ requiring input operand at E2. */
+bool
+n13_consumed_by_e2_dep_p (rtx_insn *consumer, rtx def_reg)
+{
+ rtx use_rtx;
+
+ switch (get_attr_type (consumer))
+ {
+ case TYPE_ALU:
+ case TYPE_STORE:
+ use_rtx = SET_SRC (PATTERN (consumer));
+ break;
+
+ case TYPE_ALU_SHIFT:
+ use_rtx = extract_shift_reg (consumer);
+ break;
+
+ case TYPE_PBSADA:
+ return pbsada_insn_rt_dep_reg_p (consumer, def_reg);
+
+ case TYPE_STORE_MULTIPLE:
+ use_rtx = extract_nth_access_rtx (consumer, 0);
+ break;
+
+ case TYPE_BRANCH:
+ use_rtx = extract_branch_condition_rtx (consumer);
+ break;
+
+ default:
+ gcc_unreachable();
+ }
+
+ if (reg_overlap_p (def_reg, use_rtx))
+ return true;
+
+ return false;
+}
} // anonymous namespace
/* ------------------------------------------------------------------------ */
return n10_consumed_by_ex_dep_p (consumer, last_def_reg);
}
+
+/* Guard functions for N12/N13 cores. */
+
+/* Check dependencies from E2 to E1. */
+bool
+nds32_n13_e2_to_e1_p (rtx_insn *producer, rtx_insn *consumer)
+{
+ rtx def_reg;
+
+ switch (get_attr_type (producer))
+ {
+ /* Only post-update load/store instructions are considered. These
+ instructions produces address output at E2. */
+ case TYPE_LOAD:
+ case TYPE_STORE:
+ case TYPE_LOAD_MULTIPLE:
+ case TYPE_STORE_MULTIPLE:
+ if (!post_update_insn_p (producer))
+ return false;
+
+ def_reg = extract_base_reg (producer);
+ break;
+
+ case TYPE_ALU:
+ case TYPE_ALU_SHIFT:
+ case TYPE_PBSAD:
+ case TYPE_PBSADA:
+ case TYPE_MUL:
+ case TYPE_MAC:
+ def_reg = SET_DEST (PATTERN (producer));
+ break;
+
+ case TYPE_BRANCH:
+ return true;
+
+ case TYPE_DIV:
+ /* Some special instructions, divmodsi4 and udivmodsi4, produce two
+ results, the quotient and the remainder. We have to handle them
+ individually. */
+ if (divmod_p (producer))
+ {
+ rtx def_reg1 = SET_DEST (parallel_element (producer, 0));
+ rtx def_reg2 = SET_DEST (parallel_element (producer, 1));
+
+ return (n13_consumed_by_e1_dep_p (consumer, def_reg1)
+ || n13_consumed_by_e1_dep_p (consumer, def_reg2));
+ }
+
+ def_reg = SET_DEST (PATTERN (producer));
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ return n13_consumed_by_e1_dep_p (consumer, def_reg);
+}
+
+/* Check dependencies from Load-Store Unit (E3) to E1. */
+bool
+nds32_n13_load_to_e1_p (rtx_insn *producer, rtx_insn *consumer)
+{
+ rtx def_reg = SET_DEST (PATTERN (producer));
+
+ gcc_assert (get_attr_type (producer) == TYPE_LOAD);
+ gcc_assert (REG_P (def_reg) || GET_CODE (def_reg) == SUBREG);
+
+ return n13_consumed_by_e1_dep_p (consumer, def_reg);
+}
+
+/* Check dependencies from Load-Store Unit (E3) to E2. */
+bool
+nds32_n13_load_to_e2_p (rtx_insn *producer, rtx_insn *consumer)
+{
+ rtx def_reg = SET_DEST (PATTERN (producer));
+
+ gcc_assert (get_attr_type (producer) == TYPE_LOAD);
+ gcc_assert (REG_P (def_reg) || GET_CODE (def_reg) == SUBREG);
+
+ return n13_consumed_by_e2_dep_p (consumer, def_reg);
+}
+
+/* Check dependencies from LMW(N, N) to E1. */
+bool
+nds32_n13_last_load_to_e1_p (rtx_insn *producer, rtx_insn *consumer)
+{
+ rtx last_def_reg = extract_nth_access_reg (producer, -1);
+
+ return n13_consumed_by_e1_dep_p (consumer, last_def_reg);
+}
+
+/* Check dependencies from LMW(N, N) to E2. */
+bool
+nds32_n13_last_load_to_e2_p (rtx_insn *producer, rtx_insn *consumer)
+{
+ rtx last_def_reg = extract_nth_access_reg (producer, -1);
+
+ return n13_consumed_by_e2_dep_p (consumer, last_def_reg);
+}
+
+/* Check dependencies from LMW(N, N-1) to E2. */
+bool
+nds32_n13_last_two_load_to_e1_p (rtx_insn *producer, rtx_insn *consumer)
+{
+ rtx last_two_def_reg = extract_nth_access_reg (producer, -2);
+
+ if (last_two_def_reg == NULL_RTX)
+ return false;
+
+ return n13_consumed_by_e1_dep_p (consumer, last_two_def_reg);
+}
/* ------------------------------------------------------------------------ */