vc4: Fix leak of a copy of the scheduled QPU instructions.
[mesa.git] / src / gallium / drivers / vc4 / vc4_qpu.c
1 /*
2 * Copyright © 2014 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <stdbool.h>
25 #include "util/ralloc.h"
26 #include "vc4_qir.h"
27 #include "vc4_qpu.h"
28
29 static uint64_t
30 set_src_raddr(uint64_t inst, struct qpu_reg src)
31 {
32 if (src.mux == QPU_MUX_A) {
33 assert(QPU_GET_FIELD(inst, QPU_RADDR_A) == QPU_R_NOP ||
34 QPU_GET_FIELD(inst, QPU_RADDR_A) == src.addr);
35 return ((inst & ~QPU_RADDR_A_MASK) |
36 QPU_SET_FIELD(src.addr, QPU_RADDR_A));
37 }
38
39 if (src.mux == QPU_MUX_B) {
40 assert(QPU_GET_FIELD(inst, QPU_RADDR_B) == QPU_R_NOP ||
41 QPU_GET_FIELD(inst, QPU_RADDR_B) == src.addr);
42 return ((inst & ~QPU_RADDR_B_MASK) |
43 QPU_SET_FIELD(src.addr, QPU_RADDR_B));
44 }
45
46 return inst;
47 }
48
49 uint64_t
50 qpu_NOP()
51 {
52 uint64_t inst = 0;
53
54 inst |= QPU_SET_FIELD(QPU_A_NOP, QPU_OP_ADD);
55 inst |= QPU_SET_FIELD(QPU_M_NOP, QPU_OP_MUL);
56
57 /* Note: These field values are actually non-zero */
58 inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_ADD);
59 inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL);
60 inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A);
61 inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B);
62 inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG);
63
64 return inst;
65 }
66
67 static uint64_t
68 qpu_a_dst(struct qpu_reg dst)
69 {
70 uint64_t inst = 0;
71
72 if (dst.mux <= QPU_MUX_R5) {
73 /* Translate the mux to the ACCn values. */
74 inst |= QPU_SET_FIELD(32 + dst.mux, QPU_WADDR_ADD);
75 } else {
76 inst |= QPU_SET_FIELD(dst.addr, QPU_WADDR_ADD);
77 if (dst.mux == QPU_MUX_B)
78 inst |= QPU_WS;
79 }
80
81 return inst;
82 }
83
84 static uint64_t
85 qpu_m_dst(struct qpu_reg dst)
86 {
87 uint64_t inst = 0;
88
89 if (dst.mux <= QPU_MUX_R5) {
90 /* Translate the mux to the ACCn values. */
91 inst |= QPU_SET_FIELD(32 + dst.mux, QPU_WADDR_MUL);
92 } else {
93 inst |= QPU_SET_FIELD(dst.addr, QPU_WADDR_MUL);
94 if (dst.mux == QPU_MUX_A)
95 inst |= QPU_WS;
96 }
97
98 return inst;
99 }
100
101 uint64_t
102 qpu_a_MOV(struct qpu_reg dst, struct qpu_reg src)
103 {
104 uint64_t inst = 0;
105
106 inst |= QPU_SET_FIELD(QPU_A_OR, QPU_OP_ADD);
107 inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A);
108 inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B);
109 inst |= qpu_a_dst(dst);
110 inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_ADD);
111 inst |= QPU_SET_FIELD(src.mux, QPU_ADD_A);
112 inst |= QPU_SET_FIELD(src.mux, QPU_ADD_B);
113 inst = set_src_raddr(inst, src);
114 inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG);
115 inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL);
116
117 return inst;
118 }
119
120 uint64_t
121 qpu_m_MOV(struct qpu_reg dst, struct qpu_reg src)
122 {
123 uint64_t inst = 0;
124
125 inst |= QPU_SET_FIELD(QPU_M_V8MIN, QPU_OP_MUL);
126 inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A);
127 inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B);
128 inst |= qpu_m_dst(dst);
129 inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_MUL);
130 inst |= QPU_SET_FIELD(src.mux, QPU_MUL_A);
131 inst |= QPU_SET_FIELD(src.mux, QPU_MUL_B);
132 inst = set_src_raddr(inst, src);
133 inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG);
134 inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_ADD);
135
136 return inst;
137 }
138
139 uint64_t
140 qpu_load_imm_ui(struct qpu_reg dst, uint32_t val)
141 {
142 uint64_t inst = 0;
143
144 inst |= qpu_a_dst(dst);
145 inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL);
146 inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_ADD);
147 inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_MUL);
148 inst |= QPU_SET_FIELD(QPU_SIG_LOAD_IMM, QPU_SIG);
149 inst |= val;
150
151 return inst;
152 }
153
154 uint64_t
155 qpu_a_alu2(enum qpu_op_add op,
156 struct qpu_reg dst, struct qpu_reg src0, struct qpu_reg src1)
157 {
158 uint64_t inst = 0;
159
160 inst |= QPU_SET_FIELD(op, QPU_OP_ADD);
161 inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A);
162 inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B);
163 inst |= qpu_a_dst(dst);
164 inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_ADD);
165 inst |= QPU_SET_FIELD(src0.mux, QPU_ADD_A);
166 inst = set_src_raddr(inst, src0);
167 inst |= QPU_SET_FIELD(src1.mux, QPU_ADD_B);
168 inst = set_src_raddr(inst, src1);
169 inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG);
170 inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL);
171
172 return inst;
173 }
174
175 uint64_t
176 qpu_m_alu2(enum qpu_op_mul op,
177 struct qpu_reg dst, struct qpu_reg src0, struct qpu_reg src1)
178 {
179 uint64_t inst = 0;
180
181 inst |= QPU_SET_FIELD(op, QPU_OP_MUL);
182 inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A);
183 inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B);
184 inst |= qpu_m_dst(dst);
185 inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_MUL);
186 inst |= QPU_SET_FIELD(src0.mux, QPU_MUL_A);
187 inst = set_src_raddr(inst, src0);
188 inst |= QPU_SET_FIELD(src1.mux, QPU_MUL_B);
189 inst = set_src_raddr(inst, src1);
190 inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG);
191 inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_ADD);
192
193 return inst;
194 }
195
196 static bool
197 merge_fields(uint64_t *merge,
198 uint64_t a, uint64_t b,
199 uint64_t mask, uint64_t ignore)
200 {
201 if ((a & mask) == ignore) {
202 *merge = (*merge & ~mask) | (b & mask);
203 } else if ((b & mask) == ignore) {
204 *merge = (*merge & ~mask) | (a & mask);
205 } else {
206 if ((a & mask) != (b & mask))
207 return false;
208 }
209
210 return true;
211 }
212
213 int
214 qpu_num_sf_accesses(uint64_t inst)
215 {
216 int accesses = 0;
217 static const uint32_t specials[] = {
218 QPU_W_TLB_COLOR_MS,
219 QPU_W_TLB_COLOR_ALL,
220 QPU_W_TLB_Z,
221 QPU_W_TMU0_S,
222 QPU_W_TMU0_T,
223 QPU_W_TMU0_R,
224 QPU_W_TMU0_B,
225 QPU_W_TMU1_S,
226 QPU_W_TMU1_T,
227 QPU_W_TMU1_R,
228 QPU_W_TMU1_B,
229 QPU_W_SFU_RECIP,
230 QPU_W_SFU_RECIPSQRT,
231 QPU_W_SFU_EXP,
232 QPU_W_SFU_LOG,
233 };
234 uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
235 uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
236 uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
237 uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
238
239 for (int j = 0; j < ARRAY_SIZE(specials); j++) {
240 if (waddr_add == specials[j])
241 accesses++;
242 if (waddr_mul == specials[j])
243 accesses++;
244 }
245
246 if (raddr_a == QPU_R_MUTEX_ACQUIRE)
247 accesses++;
248 if (raddr_b == QPU_R_MUTEX_ACQUIRE)
249 accesses++;
250
251 /* XXX: semaphore, combined color read/write? */
252 switch (QPU_GET_FIELD(inst, QPU_SIG)) {
253 case QPU_SIG_COLOR_LOAD:
254 case QPU_SIG_COLOR_LOAD_END:
255 case QPU_SIG_LOAD_TMU0:
256 case QPU_SIG_LOAD_TMU1:
257 accesses++;
258 }
259
260 return accesses;
261 }
262
263 static bool
264 qpu_waddr_ignores_pm(uint32_t waddr)
265 {
266 switch(waddr) {
267 case QPU_W_ACC0:
268 case QPU_W_ACC1:
269 case QPU_W_ACC2:
270 case QPU_W_ACC3:
271 case QPU_W_TLB_Z:
272 case QPU_W_TLB_COLOR_MS:
273 case QPU_W_TLB_COLOR_ALL:
274 case QPU_W_TLB_ALPHA_MASK:
275 case QPU_W_VPM:
276 case QPU_W_SFU_RECIP:
277 case QPU_W_SFU_RECIPSQRT:
278 case QPU_W_SFU_EXP:
279 case QPU_W_SFU_LOG:
280 case QPU_W_TMU0_S:
281 case QPU_W_TMU0_T:
282 case QPU_W_TMU0_R:
283 case QPU_W_TMU0_B:
284 case QPU_W_TMU1_S:
285 case QPU_W_TMU1_T:
286 case QPU_W_TMU1_R:
287 case QPU_W_TMU1_B:
288 return true;
289 }
290
291 return false;
292 }
293
294 static void
295 swap_ra_file_mux_helper(uint64_t *merge, uint64_t *a, uint32_t mux_shift)
296 {
297 uint64_t mux_mask = (uint64_t)0x7 << mux_shift;
298 uint64_t mux_a_val = (uint64_t)QPU_MUX_A << mux_shift;
299 uint64_t mux_b_val = (uint64_t)QPU_MUX_B << mux_shift;
300
301 if ((*a & mux_mask) == mux_a_val) {
302 *a = (*a & ~mux_mask) | mux_b_val;
303 *merge = (*merge & ~mux_mask) | mux_b_val;
304 }
305 }
306
307 static bool
308 try_swap_ra_file(uint64_t *merge, uint64_t *a, uint64_t *b)
309 {
310 uint32_t raddr_a_a = QPU_GET_FIELD(*a, QPU_RADDR_A);
311 uint32_t raddr_a_b = QPU_GET_FIELD(*a, QPU_RADDR_B);
312 uint32_t raddr_b_a = QPU_GET_FIELD(*b, QPU_RADDR_A);
313 uint32_t raddr_b_b = QPU_GET_FIELD(*b, QPU_RADDR_B);
314
315 if (raddr_a_b != QPU_R_NOP)
316 return false;
317
318 switch (raddr_a_a) {
319 case QPU_R_UNIF:
320 case QPU_R_VARY:
321 break;
322 default:
323 return false;
324 }
325
326 if (raddr_b_b != QPU_R_NOP &&
327 raddr_b_b != raddr_a_a)
328 return false;
329
330 /* Move raddr A to B in instruction a. */
331 *a = (*a & ~QPU_RADDR_A_MASK) | QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A);
332 *a = (*a & ~QPU_RADDR_B_MASK) | QPU_SET_FIELD(raddr_a_a, QPU_RADDR_B);
333 *merge = ((*merge & ~QPU_RADDR_A_MASK) | QPU_SET_FIELD(raddr_b_a, QPU_RADDR_A));
334 *merge = ((*merge & ~QPU_RADDR_B_MASK) | QPU_SET_FIELD(raddr_a_a, QPU_RADDR_B));
335 swap_ra_file_mux_helper(merge, a, QPU_ADD_A_SHIFT);
336 swap_ra_file_mux_helper(merge, a, QPU_ADD_B_SHIFT);
337 swap_ra_file_mux_helper(merge, a, QPU_MUL_A_SHIFT);
338 swap_ra_file_mux_helper(merge, a, QPU_MUL_B_SHIFT);
339
340 return true;
341 }
342
343 uint64_t
344 qpu_merge_inst(uint64_t a, uint64_t b)
345 {
346 uint64_t merge = a | b;
347 bool ok = true;
348
349 if (QPU_GET_FIELD(a, QPU_OP_ADD) != QPU_A_NOP &&
350 QPU_GET_FIELD(b, QPU_OP_ADD) != QPU_A_NOP)
351 return 0;
352
353 if (QPU_GET_FIELD(a, QPU_OP_MUL) != QPU_M_NOP &&
354 QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP)
355 return 0;
356
357 if (qpu_num_sf_accesses(a) && qpu_num_sf_accesses(b))
358 return 0;
359
360 if (QPU_GET_FIELD(a, QPU_SIG) == QPU_SIG_LOAD_IMM ||
361 QPU_GET_FIELD(b, QPU_SIG) == QPU_SIG_LOAD_IMM) {
362 return 0;
363 }
364
365 ok = ok && merge_fields(&merge, a, b, QPU_SIG_MASK,
366 QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG));
367
368 /* Misc fields that have to match exactly. */
369 ok = ok && merge_fields(&merge, a, b, QPU_SF | QPU_PM,
370 ~0);
371
372 if (!merge_fields(&merge, a, b, QPU_RADDR_A_MASK,
373 QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A))) {
374 /* Since we tend to use regfile A by default both for register
375 * allocation and for our special values (uniforms and
376 * varyings), try swapping uniforms and varyings to regfile B
377 * to resolve raddr A conflicts.
378 */
379 if (!try_swap_ra_file(&merge, &a, &b) &&
380 !try_swap_ra_file(&merge, &b, &a)) {
381 return 0;
382 }
383 }
384
385 ok = ok && merge_fields(&merge, a, b, QPU_RADDR_B_MASK,
386 QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B));
387
388 ok = ok && merge_fields(&merge, a, b, QPU_WADDR_ADD_MASK,
389 QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_ADD));
390 ok = ok && merge_fields(&merge, a, b, QPU_WADDR_MUL_MASK,
391 QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL));
392
393 /* Allow disagreement on WS (swapping A vs B physical reg file as the
394 * destination for ADD/MUL) if one of the original instructions
395 * ignores it (probably because it's just writing to accumulators).
396 */
397 if (qpu_waddr_ignores_pm(QPU_GET_FIELD(a, QPU_WADDR_ADD)) &&
398 qpu_waddr_ignores_pm(QPU_GET_FIELD(a, QPU_WADDR_MUL))) {
399 merge = (merge & ~QPU_WS) | (b & QPU_WS);
400 } else if (qpu_waddr_ignores_pm(QPU_GET_FIELD(b, QPU_WADDR_ADD)) &&
401 qpu_waddr_ignores_pm(QPU_GET_FIELD(b, QPU_WADDR_MUL))) {
402 merge = (merge & ~QPU_WS) | (a & QPU_WS);
403 } else {
404 if ((a & QPU_WS) != (b & QPU_WS))
405 return 0;
406 }
407
408 if (ok)
409 return merge;
410 else
411 return 0;
412 }
413
414 uint64_t
415 qpu_set_sig(uint64_t inst, uint32_t sig)
416 {
417 assert(QPU_GET_FIELD(inst, QPU_SIG) == QPU_SIG_NONE);
418 return (inst & ~QPU_SIG_MASK) | QPU_SET_FIELD(sig, QPU_SIG);
419 }
420
421 uint64_t
422 qpu_set_cond_add(uint64_t inst, uint32_t sig)
423 {
424 assert(QPU_GET_FIELD(inst, QPU_COND_ADD) == QPU_COND_ALWAYS);
425 return (inst & ~QPU_COND_ADD_MASK) | QPU_SET_FIELD(sig, QPU_COND_ADD);
426 }
427
428 uint64_t
429 qpu_set_cond_mul(uint64_t inst, uint32_t sig)
430 {
431 assert(QPU_GET_FIELD(inst, QPU_COND_MUL) == QPU_COND_ALWAYS);
432 return (inst & ~QPU_COND_MUL_MASK) | QPU_SET_FIELD(sig, QPU_COND_MUL);
433 }
434
435 bool
436 qpu_waddr_is_tlb(uint32_t waddr)
437 {
438 switch (waddr) {
439 case QPU_W_TLB_COLOR_ALL:
440 case QPU_W_TLB_COLOR_MS:
441 case QPU_W_TLB_Z:
442 return true;
443 default:
444 return false;
445 }
446 }
447
448 bool
449 qpu_inst_is_tlb(uint64_t inst)
450 {
451 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
452
453 return (qpu_waddr_is_tlb(QPU_GET_FIELD(inst, QPU_WADDR_ADD)) ||
454 qpu_waddr_is_tlb(QPU_GET_FIELD(inst, QPU_WADDR_MUL)) ||
455 sig == QPU_SIG_COLOR_LOAD ||
456 sig == QPU_SIG_WAIT_FOR_SCOREBOARD);
457 }
458
459 void
460 qpu_serialize_one_inst(struct vc4_compile *c, uint64_t inst)
461 {
462 if (c->qpu_inst_count >= c->qpu_inst_size) {
463 c->qpu_inst_size = MAX2(16, c->qpu_inst_size * 2);
464 c->qpu_insts = reralloc(c, c->qpu_insts,
465 uint64_t, c->qpu_inst_size);
466 }
467 c->qpu_insts[c->qpu_inst_count++] = inst;
468 }