2 * Copyright © 2014 Broadcom
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
27 * Validates the QPU instruction sequence after register allocation and
34 #include "v3d_compiler.h"
35 #include "qpu/qpu_disasm.h"
37 struct v3d_qpu_validate_state
{
38 struct v3d_compile
*c
;
39 const struct v3d_qpu_instr
*last
;
44 bool last_thrsw_found
;
49 fail_instr(struct v3d_qpu_validate_state
*state
, const char *msg
)
51 struct v3d_compile
*c
= state
->c
;
53 fprintf(stderr
, "v3d_qpu_validate at ip %d: %s:\n", state
->ip
, msg
);
56 vir_for_each_inst_inorder(inst
, c
) {
57 v3d_qpu_dump(c
->devinfo
, &inst
->qpu
);
59 if (dump_ip
++ == state
->ip
)
60 fprintf(stderr
, " *** ERROR ***");
62 fprintf(stderr
, "\n");
65 fprintf(stderr
, "\n");
70 in_branch_delay_slots(struct v3d_qpu_validate_state
*state
)
72 return (state
->ip
- state
->last_branch_ip
) < 3;
76 in_thrsw_delay_slots(struct v3d_qpu_validate_state
*state
)
78 return (state
->ip
- state
->last_thrsw_ip
) < 3;
82 qpu_magic_waddr_matches(const struct v3d_qpu_instr
*inst
,
83 bool (*predicate
)(enum v3d_qpu_waddr waddr
))
85 if (inst
->type
== V3D_QPU_INSTR_TYPE_ALU
)
88 if (inst
->alu
.add
.op
!= V3D_QPU_A_NOP
&&
89 inst
->alu
.add
.magic_write
&&
90 predicate(inst
->alu
.add
.waddr
))
93 if (inst
->alu
.mul
.op
!= V3D_QPU_M_NOP
&&
94 inst
->alu
.mul
.magic_write
&&
95 predicate(inst
->alu
.mul
.waddr
))
102 qpu_validate_inst(struct v3d_qpu_validate_state
*state
, struct qinst
*qinst
)
104 const struct v3d_device_info
*devinfo
= state
->c
->devinfo
;
105 const struct v3d_qpu_instr
*inst
= &qinst
->qpu
;
107 if (inst
->type
!= V3D_QPU_INSTR_TYPE_ALU
)
110 /* LDVARY writes r5 two instructions later and LDUNIF writes
111 * r5 one instruction later, which is illegal to have
114 if (state
->last
&& state
->last
->sig
.ldvary
&&
115 (inst
->sig
.ldunif
|| inst
->sig
.ldunifa
)) {
116 fail_instr(state
, "LDUNIF after a LDVARY");
125 if (inst
->alu
.add
.op
!= V3D_QPU_A_NOP
) {
126 if (inst
->alu
.add
.magic_write
) {
127 if (v3d_qpu_magic_waddr_is_tmu(inst
->alu
.add
.waddr
))
129 if (v3d_qpu_magic_waddr_is_sfu(inst
->alu
.add
.waddr
))
131 if (v3d_qpu_magic_waddr_is_vpm(inst
->alu
.add
.waddr
))
133 if (v3d_qpu_magic_waddr_is_tlb(inst
->alu
.add
.waddr
))
135 if (v3d_qpu_magic_waddr_is_tsy(inst
->alu
.add
.waddr
))
140 if (inst
->alu
.mul
.op
!= V3D_QPU_M_NOP
) {
141 if (inst
->alu
.mul
.magic_write
) {
142 if (v3d_qpu_magic_waddr_is_tmu(inst
->alu
.mul
.waddr
))
144 if (v3d_qpu_magic_waddr_is_sfu(inst
->alu
.mul
.waddr
))
146 if (v3d_qpu_magic_waddr_is_vpm(inst
->alu
.mul
.waddr
))
148 if (v3d_qpu_magic_waddr_is_tlb(inst
->alu
.mul
.waddr
))
150 if (v3d_qpu_magic_waddr_is_tsy(inst
->alu
.mul
.waddr
))
155 if (in_thrsw_delay_slots(state
)) {
156 /* There's no way you want to start SFU during the THRSW delay
157 * slots, since the result would land in the other thread.
161 "SFU write started during THRSW delay slots ");
164 if (inst
->sig
.ldvary
)
165 fail_instr(state
, "LDVARY during THRSW delay slots");
168 (void)qpu_magic_waddr_matches
; /* XXX */
170 /* SFU r4 results come back two instructions later. No doing
171 * r4 read/writes or other SFU lookups until it's done.
173 if (state
->ip
- state
->last_sfu_write
< 2) {
174 if (v3d_qpu_uses_mux(inst
, V3D_QPU_MUX_R4
))
175 fail_instr(state
, "R4 read too soon after SFU");
177 if (v3d_qpu_writes_r4(devinfo
, inst
))
178 fail_instr(state
, "R4 write too soon after SFU");
181 fail_instr(state
, "SFU write too soon after SFU");
184 /* XXX: The docs say VPM can happen with the others, but the simulator
195 inst
->sig
.ldtlbu
> 1) {
197 "Only one of [TMU, SFU, TSY, TLB read, VPM] allowed");
201 state
->last_sfu_write
= state
->ip
;
203 if (inst
->sig
.thrsw
) {
204 if (in_branch_delay_slots(state
))
205 fail_instr(state
, "THRSW in a branch delay slot.");
207 if (state
->last_thrsw_ip
== state
->ip
- 1) {
208 /* If it's the second THRSW in a row, then it's just a
211 if (state
->last_thrsw_found
)
212 fail_instr(state
, "Two last-THRSW signals");
213 state
->last_thrsw_found
= true;
215 if (in_thrsw_delay_slots(state
)) {
217 "THRSW too close to another THRSW.");
219 state
->thrsw_count
++;
220 state
->last_thrsw_ip
= state
->ip
;
224 if (inst
->type
== V3D_QPU_INSTR_TYPE_BRANCH
) {
225 if (in_branch_delay_slots(state
))
226 fail_instr(state
, "branch in a branch delay slot.");
227 if (in_thrsw_delay_slots(state
))
228 fail_instr(state
, "branch in a THRSW delay slot.");
229 state
->last_branch_ip
= state
->ip
;
234 qpu_validate_block(struct v3d_qpu_validate_state
*state
, struct qblock
*block
)
236 vir_for_each_inst(qinst
, block
) {
237 qpu_validate_inst(state
, qinst
);
239 state
->last
= &qinst
->qpu
;
245 * Checks for the instruction restrictions from page 37 ("Summary of
246 * Instruction Restrictions").
249 qpu_validate(struct v3d_compile
*c
)
251 /* We don't want to do validation in release builds, but we want to
252 * keep compiling the validation code to make sure it doesn't get
259 struct v3d_qpu_validate_state state
= {
261 .last_sfu_write
= -10,
262 .last_thrsw_ip
= -10,
263 .last_branch_ip
= -10,
267 vir_for_each_block(block
, c
) {
268 qpu_validate_block(&state
, block
);
271 if (state
.thrsw_count
> 1 && !state
.last_thrsw_found
) {
273 "thread switch found without last-THRSW in program");
276 if (state
.thrsw_count
== 0 ||
277 (state
.last_thrsw_found
&& state
.thrsw_count
== 1)) {
278 fail_instr(&state
, "No program-end THRSW found");