freedreno/ir3: drop instr_clone() stuff
[mesa.git] / src / gallium / drivers / vc4 / vc4_qpu_validate.c
1 /*
2 * Copyright © 2014 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "vc4_qpu.h"
25
26 static bool
27 writes_reg(uint64_t inst, uint32_t w)
28 {
29 return (QPU_GET_FIELD(inst, QPU_WADDR_ADD) == w ||
30 QPU_GET_FIELD(inst, QPU_WADDR_MUL) == w);
31 }
32
33 static bool
34 _reads_reg(uint64_t inst, uint32_t r, bool ignore_a, bool ignore_b)
35 {
36 struct {
37 uint32_t mux, addr;
38 } src_regs[] = {
39 { QPU_GET_FIELD(inst, QPU_ADD_A) },
40 { QPU_GET_FIELD(inst, QPU_ADD_B) },
41 { QPU_GET_FIELD(inst, QPU_MUL_A) },
42 { QPU_GET_FIELD(inst, QPU_MUL_B) },
43 };
44
45 for (int i = 0; i < ARRAY_SIZE(src_regs); i++) {
46 if (!ignore_a &&
47 src_regs[i].mux == QPU_MUX_A &&
48 (QPU_GET_FIELD(inst, QPU_RADDR_A) == r))
49 return true;
50
51 if (!ignore_b &&
52 QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_SMALL_IMM &&
53 src_regs[i].mux == QPU_MUX_B &&
54 (QPU_GET_FIELD(inst, QPU_RADDR_B) == r))
55 return true;
56 }
57
58 return false;
59 }
60
61 static bool
62 reads_reg(uint64_t inst, uint32_t r)
63 {
64 return _reads_reg(inst, r, false, false);
65 }
66
67 static bool
68 reads_a_reg(uint64_t inst, uint32_t r)
69 {
70 return _reads_reg(inst, r, false, true);
71 }
72
73 static bool
74 reads_b_reg(uint64_t inst, uint32_t r)
75 {
76 return _reads_reg(inst, r, true, false);
77 }
78
79 static bool
80 writes_sfu(uint64_t inst)
81 {
82 return (writes_reg(inst, QPU_W_SFU_RECIP) ||
83 writes_reg(inst, QPU_W_SFU_RECIPSQRT) ||
84 writes_reg(inst, QPU_W_SFU_EXP) ||
85 writes_reg(inst, QPU_W_SFU_LOG));
86 }
87
88 /**
89 * Checks for the instruction restrictions from page 37 ("Summary of
90 * Instruction Restrictions").
91 */
92 void
93 vc4_qpu_validate(uint64_t *insts, uint32_t num_inst)
94 {
95 bool scoreboard_locked = false;
96
97 for (int i = 0; i < num_inst; i++) {
98 uint64_t inst = insts[i];
99
100 if (QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_PROG_END) {
101 if (qpu_inst_is_tlb(inst))
102 scoreboard_locked = true;
103
104 continue;
105 }
106
107 /* "The Thread End instruction must not write to either physical
108 * regfile A or B."
109 */
110 assert(QPU_GET_FIELD(inst, QPU_WADDR_ADD) >= 32);
111 assert(QPU_GET_FIELD(inst, QPU_WADDR_MUL) >= 32);
112
113 /* Can't trigger an implicit wait on scoreboard in the program
114 * end instruction.
115 */
116 assert(!qpu_inst_is_tlb(inst) || scoreboard_locked);
117
118 /* Two delay slots will be executed. */
119 assert(i + 2 <= num_inst);
120
121 for (int j = i; j < i + 2; j++) {
122 /* "The last three instructions of any program
123 * (Thread End plus the following two delay-slot
124 * instructions) must not do varyings read, uniforms
125 * read or any kind of VPM, VDR, or VDW read or
126 * write."
127 */
128 assert(!writes_reg(insts[j], QPU_W_VPM));
129 assert(!reads_reg(insts[j], QPU_R_VARY));
130 assert(!reads_reg(insts[j], QPU_R_UNIF));
131 assert(!reads_reg(insts[j], QPU_R_VPM));
132
133 /* "The Thread End instruction and the following two
134 * delay slot instructions must not write or read
135 * address 14 in either regfile A or B."
136 */
137 assert(!writes_reg(insts[j], 14));
138 assert(!reads_reg(insts[j], 14));
139
140 }
141
142 /* "The final program instruction (the second delay slot
143 * instruction) must not do a TLB Z write."
144 */
145 assert(!writes_reg(insts[i + 2], QPU_W_TLB_Z));
146 }
147
148 /* "A scoreboard wait must not occur in the first two instructions of
149 * a fragment shader. This is either the explicit Wait for Scoreboard
150 * signal or an implicit wait with the first tile-buffer read or
151 * write instruction."
152 */
153 for (int i = 0; i < 2; i++) {
154 uint64_t inst = insts[i];
155
156 assert(!qpu_inst_is_tlb(inst));
157 }
158
159 /* "If TMU_NOSWAP is written, the write must be three instructions
160 * before the first TMU write instruction. For example, if
161 * TMU_NOSWAP is written in the first shader instruction, the first
162 * TMU write cannot occur before the 4th shader instruction."
163 */
164 int last_tmu_noswap = -10;
165 for (int i = 0; i < num_inst; i++) {
166 uint64_t inst = insts[i];
167
168 assert((i - last_tmu_noswap) > 3 ||
169 (!writes_reg(inst, QPU_W_TMU0_S) &&
170 !writes_reg(inst, QPU_W_TMU1_S)));
171
172 if (writes_reg(inst, QPU_W_TMU_NOSWAP))
173 last_tmu_noswap = i;
174 }
175
176 /* "An instruction must not read from a location in physical regfile A
177 * or B that was written to by the previous instruction."
178 */
179 for (int i = 0; i < num_inst - 1; i++) {
180 uint64_t inst = insts[i];
181 uint32_t add_waddr = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
182 uint32_t mul_waddr = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
183 uint32_t waddr_a, waddr_b;
184
185 if (inst & QPU_WS) {
186 waddr_b = add_waddr;
187 waddr_a = mul_waddr;
188 } else {
189 waddr_a = add_waddr;
190 waddr_b = mul_waddr;
191 }
192
193 assert(waddr_a >= 32 || !reads_a_reg(insts[i + 1], waddr_a));
194 assert(waddr_b >= 32 || !reads_b_reg(insts[i + 1], waddr_b));
195 }
196
197 /* "After an SFU lookup instruction, accumulator r4 must not be read
198 * in the following two instructions. Any other instruction that
199 * results in r4 being written (that is, TMU read, TLB read, SFU
200 * lookup) cannot occur in the two instructions following an SFU
201 * lookup."
202 */
203 int last_sfu_inst = -10;
204 for (int i = 0; i < num_inst - 1; i++) {
205 uint64_t inst = insts[i];
206 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
207
208 assert(i - last_sfu_inst > 2 ||
209 (!writes_sfu(inst) &&
210 sig != QPU_SIG_LOAD_TMU0 &&
211 sig != QPU_SIG_LOAD_TMU1 &&
212 sig != QPU_SIG_COLOR_LOAD));
213
214 if (writes_sfu(inst))
215 last_sfu_inst = i;
216 }
217
218 int last_r5_write = -10;
219 for (int i = 0; i < num_inst - 1; i++) {
220 uint64_t inst = insts[i];
221
222 /* "An instruction that does a vector rotate by r5 must not
223 * immediately follow an instruction that writes to r5."
224 */
225 assert(last_r5_write != i - 1 ||
226 QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_SMALL_IMM ||
227 QPU_GET_FIELD(inst, QPU_SMALL_IMM) != 48);
228 }
229
230 /* "An instruction that does a vector rotate must not immediately
231 * follow an instruction that writes to the accumulator that is being
232 * rotated.
233 *
234 * XXX: TODO.
235 */
236
237 /* "After an instruction that does a TLB Z write, the multisample mask
238 * must not be read as an instruction input argument in the following
239 * two instruction. The TLB Z write instruction can, however, be
240 * followed immediately by a TLB color write."
241 */
242 for (int i = 0; i < num_inst - 1; i++) {
243 uint64_t inst = insts[i];
244 if (writes_reg(inst, QPU_W_TLB_Z)) {
245 assert(!reads_a_reg(insts[i + 1], QPU_R_MS_REV_FLAGS));
246 assert(!reads_a_reg(insts[i + 2], QPU_R_MS_REV_FLAGS));
247 }
248 }
249
250 /*
251 * "A single instruction can only perform a maximum of one of the
252 * following closely coupled peripheral accesses in a single
253 * instruction: TMU write, TMU read, TLB write, TLB read, TLB
254 * combined color read and write, SFU write, Mutex read or Semaphore
255 * access."
256 */
257 for (int i = 0; i < num_inst - 1; i++) {
258 uint64_t inst = insts[i];
259
260 assert(qpu_num_sf_accesses(inst) <= 1);
261 }
262 }