arm.md (attribute "insn"): Delete.
[gcc.git] / gcc / config / arm / fa726te.md
1 ;; Faraday FA726TE Pipeline Description
2 ;; Copyright (C) 2010-2013 Free Software Foundation, Inc.
3 ;; Written by I-Jui Sung, based on ARM926EJ-S Pipeline Description.
4 ;;
5 ;; This file is part of GCC.
6 ;;
7 ;; GCC is free software; you can redistribute it and/or modify it under
8 ;; the terms of the GNU General Public License as published by the Free
9 ;; Software Foundation; either version 3, or (at your option) any later
10 ;; version.
11 ;;
12 ;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 ;; WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 ;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 ;; for more details.
16 ;;
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>. */
20
21 ;; These descriptions are based on the information contained in the
22 ;; FA726TE Core Design Note, Copyright (c) 2010 Faraday Technology Corp.
23
24 ;; This automaton provides a pipeline description for the Faraday
25 ;; FA726TE core.
26 ;;
27 ;; The model given here assumes that the condition for all conditional
28 ;; instructions is "true", i.e., that all of the instructions are
29 ;; actually executed.
30
31 (define_automaton "fa726te")
32
33 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
34 ;; Pipelines
35 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
36
37 ;; The ALU pipeline has fetch, decode, execute, memory, and
38 ;; write stages. We only need to model the execute, memory and write
39 ;; stages.
40
41 ;; E1 E2 E3 E4 E5 WB
42 ;;______________________________________________________
43 ;;
44 ;; <-------------- LD/ST ----------->
45 ;; shifter + LU <-- AU -->
46 ;; <-- AU --> shifter + LU CPSR (Pipe 0)
47 ;;______________________________________________________
48 ;;
49 ;; <---------- MUL --------->
50 ;; shifter + LU <-- AU -->
51 ;; <-- AU --> shifter + LU CPSR (Pipe 1)
52
53
54 (define_cpu_unit "fa726te_alu0_pipe,fa726te_alu1_pipe" "fa726te")
55 (define_cpu_unit "fa726te_mac_pipe" "fa726te")
56 (define_cpu_unit "fa726te_lsu_pipe_e,fa726te_lsu_pipe_w" "fa726te")
57
58 ;; Pretend we have 2 LSUs (the second is ONLY for LDR), which can possibly
59 ;; improve code quality.
60 (define_query_cpu_unit "fa726te_lsu1_pipe_e,fa726te_lsu1_pipe_w" "fa726te")
61 (define_cpu_unit "fa726te_is0,fa726te_is1" "fa726te")
62
63 (define_reservation "fa726te_issue" "(fa726te_is0|fa726te_is1)")
64 ;; Reservation to restrict issue to 1.
65 (define_reservation "fa726te_blockage" "(fa726te_is0+fa726te_is1)")
66
67 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
68 ;; ALU Instructions
69 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
70
71 ;; ALU instructions require three cycles to execute, and use the ALU
72 ;; pipeline in each of the three stages. The results are available
73 ;; after the execute stage stage has finished.
74 ;;
75 ;; If the destination register is the PC, the pipelines are stalled
76 ;; for several cycles. That case is not modeled here.
77
78 ;; Move instructions.
79 (define_insn_reservation "726te_shift_op" 1
80 (and (eq_attr "tune" "fa726te")
81 (eq_attr "type" "mov_imm,mov_reg,mov_shift,mov_shift_reg,\
82 mvn_imm,mvn_reg,mvn_shift,mvn_shift_reg"))
83 "fa726te_issue+(fa726te_alu0_pipe|fa726te_alu1_pipe)")
84
85 ;; ALU operations with no shifted operand will finished in 1 cycle
86 ;; Other ALU instructions 2 cycles.
87 (define_insn_reservation "726te_alu_op" 1
88 (and (eq_attr "tune" "fa726te")
89 (eq_attr "type" "arlo_imm,arlo_reg,shift,shift_reg"))
90 "fa726te_issue+(fa726te_alu0_pipe|fa726te_alu1_pipe)")
91
92 ;; ALU operations with a shift-by-register operand.
93 ;; These really stall in the decoder, in order to read the shift value
94 ;; in the first cycle. If the instruction uses both shifter and AU,
95 ;; it takes 3 cycles.
96 (define_insn_reservation "726te_alu_shift_op" 3
97 (and (eq_attr "tune" "fa726te")
98 (eq_attr "type" "extend,arlo_shift"))
99 "fa726te_issue+(fa726te_alu0_pipe|fa726te_alu1_pipe)")
100
101 (define_insn_reservation "726te_alu_shift_reg_op" 3
102 (and (eq_attr "tune" "fa726te")
103 (eq_attr "type" "arlo_shift_reg"))
104 "fa726te_issue+(fa726te_alu0_pipe|fa726te_alu1_pipe)")
105 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
106 ;; Multiplication Instructions
107 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
108
109 ;; Multiplication instructions loop in the execute stage until the
110 ;; instruction has been passed through the multiplier array enough
111 ;; times. Multiply operations occur in both the execute and memory
112 ;; stages of the pipeline
113
114 (define_insn_reservation "726te_mult_op" 3
115 (and (eq_attr "tune" "fa726te")
116 (eq_attr "type" "smlalxy,mul,mla,muls,mlas,umull,umlal,smull,smlal,\
117 umulls,umlals,smulls,smlals,smlawx,smulxy,smlaxy"))
118 "fa726te_issue+fa726te_mac_pipe")
119
120 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
121 ;; Load/Store Instructions
122 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
123
124 ;; The models for load/store instructions do not accurately describe
125 ;; the difference between operations with a base register writeback
126 ;; (such as "ldm!"). These models assume that all memory references
127 ;; hit in dcache.
128
129 ;; Loads with a shifted offset take 3 cycles, and are (a) probably the
130 ;; most common and (b) the pessimistic assumption will lead to fewer stalls.
131
132 ;; Scalar loads are pipelined in FA726TE LSU pipe.
133 ;; Here we model the resource conflict between Load@E3-stage & Store@W-stage.
134 ;; The 2nd LSU (lsu1) is to model the fact that if 2 loads are scheduled in the
135 ;; same "bundle", and the 2nd load will introudce another ISSUE stall but is
136 ;; still ok to execute (and may be benefical sometimes).
137
138 (define_insn_reservation "726te_load1_op" 3
139 (and (eq_attr "tune" "fa726te")
140 (eq_attr "type" "load1,load_byte"))
141 "(fa726te_issue+fa726te_lsu_pipe_e+fa726te_lsu_pipe_w)\
142 | (fa726te_issue+fa726te_lsu1_pipe_e+fa726te_lsu1_pipe_w,fa726te_blockage)")
143
144 (define_insn_reservation "726te_store1_op" 1
145 (and (eq_attr "tune" "fa726te")
146 (eq_attr "type" "store1"))
147 "fa726te_blockage*2")
148
149 ;; Load/Store Multiple blocks all pipelines in EX stages until WB.
150 ;; No other instructions can be issued together. Since they essentially
151 ;; prevent all scheduling opportunities, we model them together here.
152
153 ;; The LDM is breaking into multiple load instructions, later instruction in
154 ;; the pipe 1 is stalled.
155 (define_insn_reservation "726te_ldm2_op" 4
156 (and (eq_attr "tune" "fa726te")
157 (eq_attr "type" "load2,load3"))
158 "fa726te_blockage*4")
159
160 (define_insn_reservation "726te_ldm3_op" 5
161 (and (eq_attr "tune" "fa726te")
162 (eq_attr "type" "load4"))
163 "fa726te_blockage*5")
164
165 (define_insn_reservation "726te_stm2_op" 2
166 (and (eq_attr "tune" "fa726te")
167 (eq_attr "type" "store2,store3"))
168 "fa726te_blockage*3")
169
170 (define_insn_reservation "726te_stm3_op" 3
171 (and (eq_attr "tune" "fa726te")
172 (eq_attr "type" "store4"))
173 "fa726te_blockage*4")
174
175 (define_bypass 1 "726te_load1_op,726te_ldm2_op,726te_ldm3_op" "726te_store1_op,\
176 726te_stm2_op,726te_stm3_op" "arm_no_early_store_addr_dep")
177 (define_bypass 0 "726te_shift_op,726te_alu_op,726te_alu_shift_op,\
178 726te_alu_shift_reg_op,726te_mult_op" "726te_store1_op"
179 "arm_no_early_store_addr_dep")
180 (define_bypass 0 "726te_shift_op,726te_alu_op" "726te_shift_op,726te_alu_op")
181 (define_bypass 1 "726te_alu_shift_op,726te_alu_shift_reg_op"
182 "726te_shift_op,726te_alu_op")
183 (define_bypass 1 "726te_alu_shift_op,726te_alu_shift_reg_op,726te_mult_op"
184 "726te_alu_shift_op" "arm_no_early_alu_shift_dep")
185 (define_bypass 1 "726te_alu_shift_op,726te_alu_shift_reg_op,726te_mult_op"
186 "726te_alu_shift_reg_op" "arm_no_early_alu_shift_value_dep")
187 (define_bypass 1 "726te_mult_op" "726te_shift_op,726te_alu_op")
188
189 (define_bypass 4 "726te_load1_op" "726te_mult_op")
190 (define_bypass 5 "726te_ldm2_op" "726te_mult_op")
191 (define_bypass 6 "726te_ldm3_op" "726te_mult_op")
192
193 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
194 ;; Branch and Call Instructions
195 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
196
197 ;; Branch instructions are difficult to model accurately. The FA726TE
198 ;; core can predict most branches. If the branch is predicted
199 ;; correctly, and predicted early enough, the branch can be completely
200 ;; eliminated from the instruction stream. Some branches can
201 ;; therefore appear to require zero cycle to execute. We assume that
202 ;; all branches are predicted correctly, and that the latency is
203 ;; therefore the minimum value.
204
205 (define_insn_reservation "726te_branch_op" 0
206 (and (eq_attr "tune" "fa726te")
207 (eq_attr "type" "branch"))
208 "fa726te_blockage")
209
210 ;; The latency for a call is actually the latency when the result is available.
211 ;; i.e. R0 is ready for int return value.
212 (define_insn_reservation "726te_call_op" 1
213 (and (eq_attr "tune" "fa726te")
214 (eq_attr "type" "call"))
215 "fa726te_blockage")
216