2 * Copyright © 2014 Broadcom
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
27 * Validates the QPU instruction sequence after register allocation and
34 #include "v3d_compiler.h"
35 #include "qpu/qpu_disasm.h"
37 struct v3d_qpu_validate_state
{
38 struct v3d_compile
*c
;
39 const struct v3d_qpu_instr
*last
;
45 /* Set when we've found the last-THRSW signal, or if we were started
46 * in single-segment mode.
48 bool last_thrsw_found
;
50 /* Set when we've found the THRSW after the last THRSW */
57 fail_instr(struct v3d_qpu_validate_state
*state
, const char *msg
)
59 struct v3d_compile
*c
= state
->c
;
61 fprintf(stderr
, "v3d_qpu_validate at ip %d: %s:\n", state
->ip
, msg
);
64 vir_for_each_inst_inorder(inst
, c
) {
65 v3d_qpu_dump(c
->devinfo
, &inst
->qpu
);
67 if (dump_ip
++ == state
->ip
)
68 fprintf(stderr
, " *** ERROR ***");
70 fprintf(stderr
, "\n");
73 fprintf(stderr
, "\n");
78 in_branch_delay_slots(struct v3d_qpu_validate_state
*state
)
80 return (state
->ip
- state
->last_branch_ip
) < 3;
84 in_thrsw_delay_slots(struct v3d_qpu_validate_state
*state
)
86 return (state
->ip
- state
->last_thrsw_ip
) < 3;
90 qpu_magic_waddr_matches(const struct v3d_qpu_instr
*inst
,
91 bool (*predicate
)(enum v3d_qpu_waddr waddr
))
93 if (inst
->type
== V3D_QPU_INSTR_TYPE_ALU
)
96 if (inst
->alu
.add
.op
!= V3D_QPU_A_NOP
&&
97 inst
->alu
.add
.magic_write
&&
98 predicate(inst
->alu
.add
.waddr
))
101 if (inst
->alu
.mul
.op
!= V3D_QPU_M_NOP
&&
102 inst
->alu
.mul
.magic_write
&&
103 predicate(inst
->alu
.mul
.waddr
))
110 qpu_validate_inst(struct v3d_qpu_validate_state
*state
, struct qinst
*qinst
)
112 const struct v3d_device_info
*devinfo
= state
->c
->devinfo
;
113 const struct v3d_qpu_instr
*inst
= &qinst
->qpu
;
115 if (inst
->type
!= V3D_QPU_INSTR_TYPE_ALU
)
118 /* LDVARY writes r5 two instructions later and LDUNIF writes
119 * r5 one instruction later, which is illegal to have
122 if (state
->last
&& state
->last
->sig
.ldvary
&&
123 (inst
->sig
.ldunif
|| inst
->sig
.ldunifa
)) {
124 fail_instr(state
, "LDUNIF after a LDVARY");
133 if (inst
->alu
.add
.op
!= V3D_QPU_A_NOP
) {
134 if (inst
->alu
.add
.magic_write
) {
135 if (v3d_qpu_magic_waddr_is_tmu(inst
->alu
.add
.waddr
))
137 if (v3d_qpu_magic_waddr_is_sfu(inst
->alu
.add
.waddr
))
139 if (v3d_qpu_magic_waddr_is_vpm(inst
->alu
.add
.waddr
))
141 if (v3d_qpu_magic_waddr_is_tlb(inst
->alu
.add
.waddr
))
143 if (v3d_qpu_magic_waddr_is_tsy(inst
->alu
.add
.waddr
))
148 if (inst
->alu
.mul
.op
!= V3D_QPU_M_NOP
) {
149 if (inst
->alu
.mul
.magic_write
) {
150 if (v3d_qpu_magic_waddr_is_tmu(inst
->alu
.mul
.waddr
))
152 if (v3d_qpu_magic_waddr_is_sfu(inst
->alu
.mul
.waddr
))
154 if (v3d_qpu_magic_waddr_is_vpm(inst
->alu
.mul
.waddr
))
156 if (v3d_qpu_magic_waddr_is_tlb(inst
->alu
.mul
.waddr
))
158 if (v3d_qpu_magic_waddr_is_tsy(inst
->alu
.mul
.waddr
))
163 if (in_thrsw_delay_slots(state
)) {
164 /* There's no way you want to start SFU during the THRSW delay
165 * slots, since the result would land in the other thread.
169 "SFU write started during THRSW delay slots ");
172 if (inst
->sig
.ldvary
)
173 fail_instr(state
, "LDVARY during THRSW delay slots");
176 (void)qpu_magic_waddr_matches
; /* XXX */
178 /* SFU r4 results come back two instructions later. No doing
179 * r4 read/writes or other SFU lookups until it's done.
181 if (state
->ip
- state
->last_sfu_write
< 2) {
182 if (v3d_qpu_uses_mux(inst
, V3D_QPU_MUX_R4
))
183 fail_instr(state
, "R4 read too soon after SFU");
185 if (v3d_qpu_writes_r4(devinfo
, inst
))
186 fail_instr(state
, "R4 write too soon after SFU");
189 fail_instr(state
, "SFU write too soon after SFU");
192 /* XXX: The docs say VPM can happen with the others, but the simulator
203 inst
->sig
.ldtlbu
> 1) {
205 "Only one of [TMU, SFU, TSY, TLB read, VPM] allowed");
209 state
->last_sfu_write
= state
->ip
;
211 if (inst
->sig
.thrsw
) {
212 if (in_branch_delay_slots(state
))
213 fail_instr(state
, "THRSW in a branch delay slot.");
215 if (state
->last_thrsw_found
)
216 state
->thrend_found
= true;
218 if (state
->last_thrsw_ip
== state
->ip
- 1) {
219 /* If it's the second THRSW in a row, then it's just a
222 if (state
->last_thrsw_found
)
223 fail_instr(state
, "Two last-THRSW signals");
224 state
->last_thrsw_found
= true;
226 if (in_thrsw_delay_slots(state
)) {
228 "THRSW too close to another THRSW.");
230 state
->thrsw_count
++;
231 state
->last_thrsw_ip
= state
->ip
;
235 if (state
->thrend_found
&&
236 state
->last_thrsw_ip
- state
->ip
<= 2 &&
237 inst
->type
== V3D_QPU_INSTR_TYPE_ALU
) {
238 if ((inst
->alu
.add
.op
!= V3D_QPU_A_NOP
&&
239 !inst
->alu
.add
.magic_write
)) {
240 fail_instr(state
, "RF write after THREND");
243 if ((inst
->alu
.mul
.op
!= V3D_QPU_M_NOP
&&
244 !inst
->alu
.mul
.magic_write
)) {
245 fail_instr(state
, "RF write after THREND");
248 if (v3d_qpu_sig_writes_address(devinfo
, &inst
->sig
))
249 fail_instr(state
, "RF write after THREND");
252 if (inst
->type
== V3D_QPU_INSTR_TYPE_BRANCH
) {
253 if (in_branch_delay_slots(state
))
254 fail_instr(state
, "branch in a branch delay slot.");
255 if (in_thrsw_delay_slots(state
))
256 fail_instr(state
, "branch in a THRSW delay slot.");
257 state
->last_branch_ip
= state
->ip
;
262 qpu_validate_block(struct v3d_qpu_validate_state
*state
, struct qblock
*block
)
264 vir_for_each_inst(qinst
, block
) {
265 qpu_validate_inst(state
, qinst
);
267 state
->last
= &qinst
->qpu
;
273 * Checks for the instruction restrictions from page 37 ("Summary of
274 * Instruction Restrictions").
277 qpu_validate(struct v3d_compile
*c
)
279 /* We don't want to do validation in release builds, but we want to
280 * keep compiling the validation code to make sure it doesn't get
287 struct v3d_qpu_validate_state state
= {
289 .last_sfu_write
= -10,
290 .last_thrsw_ip
= -10,
291 .last_branch_ip
= -10,
294 .last_thrsw_found
= !c
->last_thrsw
,
297 vir_for_each_block(block
, c
) {
298 qpu_validate_block(&state
, block
);
301 if (state
.thrsw_count
> 1 && !state
.last_thrsw_found
) {
303 "thread switch found without last-THRSW in program");
306 if (!state
.thrend_found
)
307 fail_instr(&state
, "No program-end THRSW found");