Merge remote-tracking branch 'mesa-public/master' into vulkan
[mesa.git] / src / gallium / drivers / vc4 / vc4_qpu_validate.c
1 /*
2 * Copyright © 2014 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "vc4_qpu.h"
25
26 #ifdef NDEBUG
27 /* Since most of our code is used in assert()s, don't warn about dead code. */
28 #pragma GCC diagnostic ignored "-Wunused-but-set-variable"
29 #pragma GCC diagnostic ignored "-Wunused-variable"
30 #pragma GCC diagnostic ignored "-Wunused-function"
31 #endif
32
33 static bool
34 writes_reg(uint64_t inst, uint32_t w)
35 {
36 return (QPU_GET_FIELD(inst, QPU_WADDR_ADD) == w ||
37 QPU_GET_FIELD(inst, QPU_WADDR_MUL) == w);
38 }
39
40 static bool
41 _reads_reg(uint64_t inst, uint32_t r, bool ignore_a, bool ignore_b)
42 {
43 struct {
44 uint32_t mux, addr;
45 } src_regs[] = {
46 { QPU_GET_FIELD(inst, QPU_ADD_A) },
47 { QPU_GET_FIELD(inst, QPU_ADD_B) },
48 { QPU_GET_FIELD(inst, QPU_MUL_A) },
49 { QPU_GET_FIELD(inst, QPU_MUL_B) },
50 };
51
52 for (int i = 0; i < ARRAY_SIZE(src_regs); i++) {
53 if (!ignore_a &&
54 src_regs[i].mux == QPU_MUX_A &&
55 (QPU_GET_FIELD(inst, QPU_RADDR_A) == r))
56 return true;
57
58 if (!ignore_b &&
59 QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_SMALL_IMM &&
60 src_regs[i].mux == QPU_MUX_B &&
61 (QPU_GET_FIELD(inst, QPU_RADDR_B) == r))
62 return true;
63 }
64
65 return false;
66 }
67
68 static bool
69 reads_reg(uint64_t inst, uint32_t r)
70 {
71 return _reads_reg(inst, r, false, false);
72 }
73
74 static bool
75 reads_a_reg(uint64_t inst, uint32_t r)
76 {
77 return _reads_reg(inst, r, false, true);
78 }
79
80 static bool
81 reads_b_reg(uint64_t inst, uint32_t r)
82 {
83 return _reads_reg(inst, r, true, false);
84 }
85
86 static bool
87 writes_sfu(uint64_t inst)
88 {
89 return (writes_reg(inst, QPU_W_SFU_RECIP) ||
90 writes_reg(inst, QPU_W_SFU_RECIPSQRT) ||
91 writes_reg(inst, QPU_W_SFU_EXP) ||
92 writes_reg(inst, QPU_W_SFU_LOG));
93 }
94
95 /**
96 * Checks for the instruction restrictions from page 37 ("Summary of
97 * Instruction Restrictions").
98 */
99 void
100 vc4_qpu_validate(uint64_t *insts, uint32_t num_inst)
101 {
102 bool scoreboard_locked = false;
103
104 for (int i = 0; i < num_inst; i++) {
105 uint64_t inst = insts[i];
106
107 if (QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_PROG_END) {
108 if (qpu_inst_is_tlb(inst))
109 scoreboard_locked = true;
110
111 continue;
112 }
113
114 /* "The Thread End instruction must not write to either physical
115 * regfile A or B."
116 */
117 assert(QPU_GET_FIELD(inst, QPU_WADDR_ADD) >= 32);
118 assert(QPU_GET_FIELD(inst, QPU_WADDR_MUL) >= 32);
119
120 /* Can't trigger an implicit wait on scoreboard in the program
121 * end instruction.
122 */
123 assert(!qpu_inst_is_tlb(inst) || scoreboard_locked);
124
125 /* Two delay slots will be executed. */
126 assert(i + 2 <= num_inst);
127
128 for (int j = i; j < i + 2; j++) {
129 /* "The last three instructions of any program
130 * (Thread End plus the following two delay-slot
131 * instructions) must not do varyings read, uniforms
132 * read or any kind of VPM, VDR, or VDW read or
133 * write."
134 */
135 assert(!writes_reg(insts[j], QPU_W_VPM));
136 assert(!reads_reg(insts[j], QPU_R_VARY));
137 assert(!reads_reg(insts[j], QPU_R_UNIF));
138 assert(!reads_reg(insts[j], QPU_R_VPM));
139
140 /* "The Thread End instruction and the following two
141 * delay slot instructions must not write or read
142 * address 14 in either regfile A or B."
143 */
144 assert(!writes_reg(insts[j], 14));
145 assert(!reads_reg(insts[j], 14));
146
147 }
148
149 /* "The final program instruction (the second delay slot
150 * instruction) must not do a TLB Z write."
151 */
152 assert(!writes_reg(insts[i + 2], QPU_W_TLB_Z));
153 }
154
155 /* "A scoreboard wait must not occur in the first two instructions of
156 * a fragment shader. This is either the explicit Wait for Scoreboard
157 * signal or an implicit wait with the first tile-buffer read or
158 * write instruction."
159 */
160 for (int i = 0; i < 2; i++) {
161 uint64_t inst = insts[i];
162
163 assert(!qpu_inst_is_tlb(inst));
164 }
165
166 /* "If TMU_NOSWAP is written, the write must be three instructions
167 * before the first TMU write instruction. For example, if
168 * TMU_NOSWAP is written in the first shader instruction, the first
169 * TMU write cannot occur before the 4th shader instruction."
170 */
171 int last_tmu_noswap = -10;
172 for (int i = 0; i < num_inst; i++) {
173 uint64_t inst = insts[i];
174
175 assert((i - last_tmu_noswap) > 3 ||
176 (!writes_reg(inst, QPU_W_TMU0_S) &&
177 !writes_reg(inst, QPU_W_TMU1_S)));
178
179 if (writes_reg(inst, QPU_W_TMU_NOSWAP))
180 last_tmu_noswap = i;
181 }
182
183 /* "An instruction must not read from a location in physical regfile A
184 * or B that was written to by the previous instruction."
185 */
186 for (int i = 0; i < num_inst - 1; i++) {
187 uint64_t inst = insts[i];
188 uint32_t add_waddr = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
189 uint32_t mul_waddr = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
190 uint32_t waddr_a, waddr_b;
191
192 if (inst & QPU_WS) {
193 waddr_b = add_waddr;
194 waddr_a = mul_waddr;
195 } else {
196 waddr_a = add_waddr;
197 waddr_b = mul_waddr;
198 }
199
200 assert(waddr_a >= 32 || !reads_a_reg(insts[i + 1], waddr_a));
201 assert(waddr_b >= 32 || !reads_b_reg(insts[i + 1], waddr_b));
202 }
203
204 /* "After an SFU lookup instruction, accumulator r4 must not be read
205 * in the following two instructions. Any other instruction that
206 * results in r4 being written (that is, TMU read, TLB read, SFU
207 * lookup) cannot occur in the two instructions following an SFU
208 * lookup."
209 */
210 int last_sfu_inst = -10;
211 for (int i = 0; i < num_inst - 1; i++) {
212 uint64_t inst = insts[i];
213 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
214
215 assert(i - last_sfu_inst > 2 ||
216 (!writes_sfu(inst) &&
217 sig != QPU_SIG_LOAD_TMU0 &&
218 sig != QPU_SIG_LOAD_TMU1 &&
219 sig != QPU_SIG_COLOR_LOAD));
220
221 if (writes_sfu(inst))
222 last_sfu_inst = i;
223 }
224
225 int last_r5_write = -10;
226 for (int i = 0; i < num_inst - 1; i++) {
227 uint64_t inst = insts[i];
228
229 /* "An instruction that does a vector rotate by r5 must not
230 * immediately follow an instruction that writes to r5."
231 */
232 assert(last_r5_write != i - 1 ||
233 QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_SMALL_IMM ||
234 QPU_GET_FIELD(inst, QPU_SMALL_IMM) != 48);
235 }
236
237 /* "An instruction that does a vector rotate must not immediately
238 * follow an instruction that writes to the accumulator that is being
239 * rotated.
240 *
241 * XXX: TODO.
242 */
243
244 /* "After an instruction that does a TLB Z write, the multisample mask
245 * must not be read as an instruction input argument in the following
246 * two instruction. The TLB Z write instruction can, however, be
247 * followed immediately by a TLB color write."
248 */
249 for (int i = 0; i < num_inst - 1; i++) {
250 uint64_t inst = insts[i];
251 if (writes_reg(inst, QPU_W_TLB_Z)) {
252 assert(!reads_a_reg(insts[i + 1], QPU_R_MS_REV_FLAGS));
253 assert(!reads_a_reg(insts[i + 2], QPU_R_MS_REV_FLAGS));
254 }
255 }
256
257 /*
258 * "A single instruction can only perform a maximum of one of the
259 * following closely coupled peripheral accesses in a single
260 * instruction: TMU write, TMU read, TLB write, TLB read, TLB
261 * combined color read and write, SFU write, Mutex read or Semaphore
262 * access."
263 */
264 for (int i = 0; i < num_inst - 1; i++) {
265 uint64_t inst = insts[i];
266
267 assert(qpu_num_sf_accesses(inst) <= 1);
268 }
269 }