vc4: Make vc4_qpu_validate() produce more verbose failures.
[mesa.git] / src / gallium / drivers / vc4 / vc4_qpu_validate.c
1
2 /*
3 * Copyright © 2014 Broadcom
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 */
24
25 #include "vc4_qpu.h"
26
27 static void
28 fail_instr(uint64_t inst, const char *msg)
29 {
30 fprintf(stderr, "vc4_qpu_validate: %s: ", msg);
31 vc4_qpu_disasm(&inst, 1);
32 fprintf(stderr, "\n");
33 abort();
34 }
35
36 static bool
37 writes_reg(uint64_t inst, uint32_t w)
38 {
39 return (QPU_GET_FIELD(inst, QPU_WADDR_ADD) == w ||
40 QPU_GET_FIELD(inst, QPU_WADDR_MUL) == w);
41 }
42
43 static bool
44 _reads_reg(uint64_t inst, uint32_t r, bool ignore_a, bool ignore_b)
45 {
46 struct {
47 uint32_t mux, addr;
48 } src_regs[] = {
49 { QPU_GET_FIELD(inst, QPU_ADD_A) },
50 { QPU_GET_FIELD(inst, QPU_ADD_B) },
51 { QPU_GET_FIELD(inst, QPU_MUL_A) },
52 { QPU_GET_FIELD(inst, QPU_MUL_B) },
53 };
54
55 for (int i = 0; i < ARRAY_SIZE(src_regs); i++) {
56 if (!ignore_a &&
57 src_regs[i].mux == QPU_MUX_A &&
58 (QPU_GET_FIELD(inst, QPU_RADDR_A) == r))
59 return true;
60
61 if (!ignore_b &&
62 QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_SMALL_IMM &&
63 src_regs[i].mux == QPU_MUX_B &&
64 (QPU_GET_FIELD(inst, QPU_RADDR_B) == r))
65 return true;
66 }
67
68 return false;
69 }
70
71 static bool
72 reads_reg(uint64_t inst, uint32_t r)
73 {
74 return _reads_reg(inst, r, false, false);
75 }
76
77 static bool
78 reads_a_reg(uint64_t inst, uint32_t r)
79 {
80 return _reads_reg(inst, r, false, true);
81 }
82
83 static bool
84 reads_b_reg(uint64_t inst, uint32_t r)
85 {
86 return _reads_reg(inst, r, true, false);
87 }
88
89 static bool
90 writes_sfu(uint64_t inst)
91 {
92 return (writes_reg(inst, QPU_W_SFU_RECIP) ||
93 writes_reg(inst, QPU_W_SFU_RECIPSQRT) ||
94 writes_reg(inst, QPU_W_SFU_EXP) ||
95 writes_reg(inst, QPU_W_SFU_LOG));
96 }
97
98 /**
99 * Checks for the instruction restrictions from page 37 ("Summary of
100 * Instruction Restrictions").
101 */
102 void
103 vc4_qpu_validate(uint64_t *insts, uint32_t num_inst)
104 {
105 bool scoreboard_locked = false;
106
107 /* We don't want to do validation in release builds, but we want to
108 * keep compiling the validation code to make sure it doesn't get
109 * broken.
110 */
111 #ifndef DEBUG
112 return;
113 #endif
114
115 for (int i = 0; i < num_inst; i++) {
116 uint64_t inst = insts[i];
117
118 if (QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_PROG_END) {
119 if (qpu_inst_is_tlb(inst))
120 scoreboard_locked = true;
121
122 continue;
123 }
124
125 /* "The Thread End instruction must not write to either physical
126 * regfile A or B."
127 */
128 if (QPU_GET_FIELD(inst, QPU_WADDR_ADD) < 32 ||
129 QPU_GET_FIELD(inst, QPU_WADDR_MUL) < 32) {
130 fail_instr(inst, "write to phys reg in thread end");
131 }
132
133 /* Can't trigger an implicit wait on scoreboard in the program
134 * end instruction.
135 */
136 if (qpu_inst_is_tlb(inst) && !scoreboard_locked)
137 fail_instr(inst, "implicit sb wait in program end");
138
139 /* Two delay slots will be executed. */
140 assert(i + 2 <= num_inst);
141
142 for (int j = i; j < i + 2; j++) {
143 /* "The last three instructions of any program
144 * (Thread End plus the following two delay-slot
145 * instructions) must not do varyings read, uniforms
146 * read or any kind of VPM, VDR, or VDW read or
147 * write."
148 */
149 if (writes_reg(insts[j], QPU_W_VPM) ||
150 reads_reg(insts[j], QPU_R_VARY) ||
151 reads_reg(insts[j], QPU_R_UNIF) ||
152 reads_reg(insts[j], QPU_R_VPM)) {
153 fail_instr(insts[j], "last 3 instructions "
154 "using fixed functions");
155 }
156
157 /* "The Thread End instruction and the following two
158 * delay slot instructions must not write or read
159 * address 14 in either regfile A or B."
160 */
161 if (writes_reg(insts[j], 14) ||
162 reads_reg(insts[j], 14)) {
163 fail_instr(insts[j], "last 3 instructions "
164 "must not use r14");
165 }
166 }
167
168 /* "The final program instruction (the second delay slot
169 * instruction) must not do a TLB Z write."
170 */
171 if (writes_reg(insts[i + 2], QPU_W_TLB_Z)) {
172 fail_instr(insts[i + 2], "final instruction doing "
173 "Z write");
174 }
175 }
176
177 /* "A scoreboard wait must not occur in the first two instructions of
178 * a fragment shader. This is either the explicit Wait for Scoreboard
179 * signal or an implicit wait with the first tile-buffer read or
180 * write instruction."
181 */
182 for (int i = 0; i < 2; i++) {
183 uint64_t inst = insts[i];
184
185 if (qpu_inst_is_tlb(inst))
186 fail_instr(inst, "sb wait in first two insts");
187 }
188
189 /* "If TMU_NOSWAP is written, the write must be three instructions
190 * before the first TMU write instruction. For example, if
191 * TMU_NOSWAP is written in the first shader instruction, the first
192 * TMU write cannot occur before the 4th shader instruction."
193 */
194 int last_tmu_noswap = -10;
195 for (int i = 0; i < num_inst; i++) {
196 uint64_t inst = insts[i];
197
198 if ((i - last_tmu_noswap) <= 3 &&
199 (writes_reg(inst, QPU_W_TMU0_S) ||
200 writes_reg(inst, QPU_W_TMU1_S))) {
201 fail_instr(inst, "TMU write too soon after TMU_NOSWAP");
202 }
203
204 if (writes_reg(inst, QPU_W_TMU_NOSWAP))
205 last_tmu_noswap = i;
206 }
207
208 /* "An instruction must not read from a location in physical regfile A
209 * or B that was written to by the previous instruction."
210 */
211 for (int i = 0; i < num_inst - 1; i++) {
212 uint64_t inst = insts[i];
213 uint32_t add_waddr = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
214 uint32_t mul_waddr = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
215 uint32_t waddr_a, waddr_b;
216
217 if (inst & QPU_WS) {
218 waddr_b = add_waddr;
219 waddr_a = mul_waddr;
220 } else {
221 waddr_a = add_waddr;
222 waddr_b = mul_waddr;
223 }
224
225 if ((waddr_a < 32 && reads_a_reg(insts[i + 1], waddr_a)) ||
226 (waddr_b < 32 && reads_b_reg(insts[i + 1], waddr_b))) {
227 fail_instr(insts[i + 1],
228 "Reads physical reg too soon after write");
229 }
230 }
231
232 /* "After an SFU lookup instruction, accumulator r4 must not be read
233 * in the following two instructions. Any other instruction that
234 * results in r4 being written (that is, TMU read, TLB read, SFU
235 * lookup) cannot occur in the two instructions following an SFU
236 * lookup."
237 */
238 int last_sfu_inst = -10;
239 for (int i = 0; i < num_inst - 1; i++) {
240 uint64_t inst = insts[i];
241 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
242
243 if (i - last_sfu_inst <= 2 &&
244 (writes_sfu(inst) ||
245 sig == QPU_SIG_LOAD_TMU0 ||
246 sig == QPU_SIG_LOAD_TMU1 ||
247 sig == QPU_SIG_COLOR_LOAD)) {
248 fail_instr(inst, "R4 write too soon after SFU write");
249 }
250
251 if (writes_sfu(inst))
252 last_sfu_inst = i;
253 }
254
255 int last_r5_write = -10;
256 for (int i = 0; i < num_inst - 1; i++) {
257 uint64_t inst = insts[i];
258
259 /* "An instruction that does a vector rotate by r5 must not
260 * immediately follow an instruction that writes to r5."
261 */
262 if (last_r5_write == i - 1 &&
263 QPU_GET_FIELD(inst, QPU_SIG) == QPU_SIG_SMALL_IMM &&
264 QPU_GET_FIELD(inst, QPU_SMALL_IMM) == 48) {
265 fail_instr(inst,
266 "vector rotate by r5 immediately "
267 "after r5 write");
268 }
269 }
270
271 /* "An instruction that does a vector rotate must not immediately
272 * follow an instruction that writes to the accumulator that is being
273 * rotated.
274 *
275 * XXX: TODO.
276 */
277
278 /* "After an instruction that does a TLB Z write, the multisample mask
279 * must not be read as an instruction input argument in the following
280 * two instruction. The TLB Z write instruction can, however, be
281 * followed immediately by a TLB color write."
282 */
283 for (int i = 0; i < num_inst - 1; i++) {
284 uint64_t inst = insts[i];
285 if (writes_reg(inst, QPU_W_TLB_Z) &&
286 (reads_a_reg(insts[i + 1], QPU_R_MS_REV_FLAGS) ||
287 reads_a_reg(insts[i + 2], QPU_R_MS_REV_FLAGS))) {
288 fail_instr(inst, "TLB Z write followed by MS mask read");
289 }
290 }
291
292 /*
293 * "A single instruction can only perform a maximum of one of the
294 * following closely coupled peripheral accesses in a single
295 * instruction: TMU write, TMU read, TLB write, TLB read, TLB
296 * combined color read and write, SFU write, Mutex read or Semaphore
297 * access."
298 */
299 for (int i = 0; i < num_inst - 1; i++) {
300 uint64_t inst = insts[i];
301
302 if (qpu_num_sf_accesses(inst) > 1)
303 fail_instr(inst, "Single instruction writes SFU twice");
304 }
305 }