52c06ae551744a9b3aabba72a0601de94a49329b
[mesa.git] / src / gallium / drivers / vc4 / vc4_qpu.c
1 /*
2 * Copyright © 2014 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <stdbool.h>
25 #include "util/ralloc.h"
26 #include "vc4_qir.h"
27 #include "vc4_qpu.h"
28
29 static uint64_t
30 set_src_raddr(uint64_t inst, struct qpu_reg src)
31 {
32 if (src.mux == QPU_MUX_A) {
33 assert(QPU_GET_FIELD(inst, QPU_RADDR_A) == QPU_R_NOP ||
34 QPU_GET_FIELD(inst, QPU_RADDR_A) == src.addr);
35 return QPU_UPDATE_FIELD(inst, src.addr, QPU_RADDR_A);
36 }
37
38 if (src.mux == QPU_MUX_B) {
39 assert(QPU_GET_FIELD(inst, QPU_RADDR_B) == QPU_R_NOP ||
40 QPU_GET_FIELD(inst, QPU_RADDR_B) == src.addr);
41 return QPU_UPDATE_FIELD(inst, src.addr, QPU_RADDR_B);
42 }
43
44 return inst;
45 }
46
47 uint64_t
48 qpu_NOP()
49 {
50 uint64_t inst = 0;
51
52 inst |= QPU_SET_FIELD(QPU_A_NOP, QPU_OP_ADD);
53 inst |= QPU_SET_FIELD(QPU_M_NOP, QPU_OP_MUL);
54
55 /* Note: These field values are actually non-zero */
56 inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_ADD);
57 inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL);
58 inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A);
59 inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B);
60 inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG);
61
62 return inst;
63 }
64
65 static uint64_t
66 qpu_a_dst(struct qpu_reg dst)
67 {
68 uint64_t inst = 0;
69
70 if (dst.mux <= QPU_MUX_R5) {
71 /* Translate the mux to the ACCn values. */
72 inst |= QPU_SET_FIELD(32 + dst.mux, QPU_WADDR_ADD);
73 } else {
74 inst |= QPU_SET_FIELD(dst.addr, QPU_WADDR_ADD);
75 if (dst.mux == QPU_MUX_B)
76 inst |= QPU_WS;
77 }
78
79 return inst;
80 }
81
82 static uint64_t
83 qpu_m_dst(struct qpu_reg dst)
84 {
85 uint64_t inst = 0;
86
87 if (dst.mux <= QPU_MUX_R5) {
88 /* Translate the mux to the ACCn values. */
89 inst |= QPU_SET_FIELD(32 + dst.mux, QPU_WADDR_MUL);
90 } else {
91 inst |= QPU_SET_FIELD(dst.addr, QPU_WADDR_MUL);
92 if (dst.mux == QPU_MUX_A)
93 inst |= QPU_WS;
94 }
95
96 return inst;
97 }
98
99 uint64_t
100 qpu_a_MOV(struct qpu_reg dst, struct qpu_reg src)
101 {
102 uint64_t inst = 0;
103
104 inst |= QPU_SET_FIELD(QPU_A_OR, QPU_OP_ADD);
105 inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A);
106 inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B);
107 inst |= qpu_a_dst(dst);
108 inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_ADD);
109 inst |= QPU_SET_FIELD(src.mux, QPU_ADD_A);
110 inst |= QPU_SET_FIELD(src.mux, QPU_ADD_B);
111 inst = set_src_raddr(inst, src);
112 inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG);
113 inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL);
114
115 return inst;
116 }
117
118 uint64_t
119 qpu_m_MOV(struct qpu_reg dst, struct qpu_reg src)
120 {
121 uint64_t inst = 0;
122
123 inst |= QPU_SET_FIELD(QPU_M_V8MIN, QPU_OP_MUL);
124 inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A);
125 inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B);
126 inst |= qpu_m_dst(dst);
127 inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_MUL);
128 inst |= QPU_SET_FIELD(src.mux, QPU_MUL_A);
129 inst |= QPU_SET_FIELD(src.mux, QPU_MUL_B);
130 inst = set_src_raddr(inst, src);
131 inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG);
132 inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_ADD);
133
134 return inst;
135 }
136
137 uint64_t
138 qpu_load_imm_ui(struct qpu_reg dst, uint32_t val)
139 {
140 uint64_t inst = 0;
141
142 inst |= qpu_a_dst(dst);
143 inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL);
144 inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_ADD);
145 inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_MUL);
146 inst |= QPU_SET_FIELD(QPU_SIG_LOAD_IMM, QPU_SIG);
147 inst |= val;
148
149 return inst;
150 }
151
152 uint64_t
153 qpu_a_alu2(enum qpu_op_add op,
154 struct qpu_reg dst, struct qpu_reg src0, struct qpu_reg src1)
155 {
156 uint64_t inst = 0;
157
158 inst |= QPU_SET_FIELD(op, QPU_OP_ADD);
159 inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A);
160 inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B);
161 inst |= qpu_a_dst(dst);
162 inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_ADD);
163 inst |= QPU_SET_FIELD(src0.mux, QPU_ADD_A);
164 inst = set_src_raddr(inst, src0);
165 inst |= QPU_SET_FIELD(src1.mux, QPU_ADD_B);
166 inst = set_src_raddr(inst, src1);
167 inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG);
168 inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL);
169
170 return inst;
171 }
172
173 uint64_t
174 qpu_m_alu2(enum qpu_op_mul op,
175 struct qpu_reg dst, struct qpu_reg src0, struct qpu_reg src1)
176 {
177 uint64_t inst = 0;
178
179 inst |= QPU_SET_FIELD(op, QPU_OP_MUL);
180 inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A);
181 inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B);
182 inst |= qpu_m_dst(dst);
183 inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_MUL);
184 inst |= QPU_SET_FIELD(src0.mux, QPU_MUL_A);
185 inst = set_src_raddr(inst, src0);
186 inst |= QPU_SET_FIELD(src1.mux, QPU_MUL_B);
187 inst = set_src_raddr(inst, src1);
188 inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG);
189 inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_ADD);
190
191 return inst;
192 }
193
194 static bool
195 merge_fields(uint64_t *merge,
196 uint64_t a, uint64_t b,
197 uint64_t mask, uint64_t ignore)
198 {
199 if ((a & mask) == ignore) {
200 *merge = (*merge & ~mask) | (b & mask);
201 } else if ((b & mask) == ignore) {
202 *merge = (*merge & ~mask) | (a & mask);
203 } else {
204 if ((a & mask) != (b & mask))
205 return false;
206 }
207
208 return true;
209 }
210
211 int
212 qpu_num_sf_accesses(uint64_t inst)
213 {
214 int accesses = 0;
215 static const uint32_t specials[] = {
216 QPU_W_TLB_COLOR_MS,
217 QPU_W_TLB_COLOR_ALL,
218 QPU_W_TLB_Z,
219 QPU_W_TMU0_S,
220 QPU_W_TMU0_T,
221 QPU_W_TMU0_R,
222 QPU_W_TMU0_B,
223 QPU_W_TMU1_S,
224 QPU_W_TMU1_T,
225 QPU_W_TMU1_R,
226 QPU_W_TMU1_B,
227 QPU_W_SFU_RECIP,
228 QPU_W_SFU_RECIPSQRT,
229 QPU_W_SFU_EXP,
230 QPU_W_SFU_LOG,
231 };
232 uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
233 uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
234 uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
235 uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
236
237 for (int j = 0; j < ARRAY_SIZE(specials); j++) {
238 if (waddr_add == specials[j])
239 accesses++;
240 if (waddr_mul == specials[j])
241 accesses++;
242 }
243
244 if (raddr_a == QPU_R_MUTEX_ACQUIRE)
245 accesses++;
246 if (raddr_b == QPU_R_MUTEX_ACQUIRE)
247 accesses++;
248
249 /* XXX: semaphore, combined color read/write? */
250 switch (QPU_GET_FIELD(inst, QPU_SIG)) {
251 case QPU_SIG_COLOR_LOAD:
252 case QPU_SIG_COLOR_LOAD_END:
253 case QPU_SIG_LOAD_TMU0:
254 case QPU_SIG_LOAD_TMU1:
255 accesses++;
256 }
257
258 return accesses;
259 }
260
261 static bool
262 qpu_waddr_ignores_ws(uint32_t waddr)
263 {
264 switch(waddr) {
265 case QPU_W_ACC0:
266 case QPU_W_ACC1:
267 case QPU_W_ACC2:
268 case QPU_W_ACC3:
269 case QPU_W_TLB_Z:
270 case QPU_W_TLB_COLOR_MS:
271 case QPU_W_TLB_COLOR_ALL:
272 case QPU_W_TLB_ALPHA_MASK:
273 case QPU_W_VPM:
274 case QPU_W_SFU_RECIP:
275 case QPU_W_SFU_RECIPSQRT:
276 case QPU_W_SFU_EXP:
277 case QPU_W_SFU_LOG:
278 case QPU_W_TMU0_S:
279 case QPU_W_TMU0_T:
280 case QPU_W_TMU0_R:
281 case QPU_W_TMU0_B:
282 case QPU_W_TMU1_S:
283 case QPU_W_TMU1_T:
284 case QPU_W_TMU1_R:
285 case QPU_W_TMU1_B:
286 return true;
287 }
288
289 return false;
290 }
291
292 static void
293 swap_ra_file_mux_helper(uint64_t *merge, uint64_t *a, uint32_t mux_shift)
294 {
295 uint64_t mux_mask = (uint64_t)0x7 << mux_shift;
296 uint64_t mux_a_val = (uint64_t)QPU_MUX_A << mux_shift;
297 uint64_t mux_b_val = (uint64_t)QPU_MUX_B << mux_shift;
298
299 if ((*a & mux_mask) == mux_a_val) {
300 *a = (*a & ~mux_mask) | mux_b_val;
301 *merge = (*merge & ~mux_mask) | mux_b_val;
302 }
303 }
304
305 static bool
306 try_swap_ra_file(uint64_t *merge, uint64_t *a, uint64_t *b)
307 {
308 uint32_t raddr_a_a = QPU_GET_FIELD(*a, QPU_RADDR_A);
309 uint32_t raddr_a_b = QPU_GET_FIELD(*a, QPU_RADDR_B);
310 uint32_t raddr_b_a = QPU_GET_FIELD(*b, QPU_RADDR_A);
311 uint32_t raddr_b_b = QPU_GET_FIELD(*b, QPU_RADDR_B);
312
313 if (raddr_a_b != QPU_R_NOP)
314 return false;
315
316 switch (raddr_a_a) {
317 case QPU_R_UNIF:
318 case QPU_R_VARY:
319 break;
320 default:
321 return false;
322 }
323
324 if (raddr_b_b != QPU_R_NOP &&
325 raddr_b_b != raddr_a_a)
326 return false;
327
328 /* Move raddr A to B in instruction a. */
329 *a = (*a & ~QPU_RADDR_A_MASK) | QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A);
330 *a = (*a & ~QPU_RADDR_B_MASK) | QPU_SET_FIELD(raddr_a_a, QPU_RADDR_B);
331 *merge = QPU_UPDATE_FIELD(*merge, raddr_b_a, QPU_RADDR_A);
332 *merge = QPU_UPDATE_FIELD(*merge, raddr_a_a, QPU_RADDR_B);
333 swap_ra_file_mux_helper(merge, a, QPU_ADD_A_SHIFT);
334 swap_ra_file_mux_helper(merge, a, QPU_ADD_B_SHIFT);
335 swap_ra_file_mux_helper(merge, a, QPU_MUL_A_SHIFT);
336 swap_ra_file_mux_helper(merge, a, QPU_MUL_B_SHIFT);
337
338 return true;
339 }
340
341 static bool
342 convert_mov(uint64_t *inst)
343 {
344 uint32_t add_a = QPU_GET_FIELD(*inst, QPU_ADD_A);
345 uint32_t waddr_add = QPU_GET_FIELD(*inst, QPU_WADDR_ADD);
346 uint32_t cond_add = QPU_GET_FIELD(*inst, QPU_COND_ADD);
347
348 /* Is it a MOV? */
349 if (QPU_GET_FIELD(*inst, QPU_OP_ADD) != QPU_A_OR ||
350 (add_a != QPU_GET_FIELD(*inst, QPU_ADD_B))) {
351 return false;
352 }
353
354 if (QPU_GET_FIELD(*inst, QPU_SIG) != QPU_SIG_NONE)
355 return false;
356
357 /* We could maybe support this in the .8888 and .8a-.8d cases. */
358 if (*inst & QPU_PM)
359 return false;
360
361 *inst = QPU_UPDATE_FIELD(*inst, QPU_A_NOP, QPU_OP_ADD);
362 *inst = QPU_UPDATE_FIELD(*inst, QPU_M_V8MIN, QPU_OP_MUL);
363
364 *inst = QPU_UPDATE_FIELD(*inst, add_a, QPU_MUL_A);
365 *inst = QPU_UPDATE_FIELD(*inst, add_a, QPU_MUL_B);
366 *inst = QPU_UPDATE_FIELD(*inst, QPU_MUX_R0, QPU_ADD_A);
367 *inst = QPU_UPDATE_FIELD(*inst, QPU_MUX_R0, QPU_ADD_B);
368
369 *inst = QPU_UPDATE_FIELD(*inst, waddr_add, QPU_WADDR_MUL);
370 *inst = QPU_UPDATE_FIELD(*inst, QPU_W_NOP, QPU_WADDR_ADD);
371
372 *inst = QPU_UPDATE_FIELD(*inst, cond_add, QPU_COND_MUL);
373 *inst = QPU_UPDATE_FIELD(*inst, QPU_COND_NEVER, QPU_COND_ADD);
374
375 if (!qpu_waddr_ignores_ws(waddr_add))
376 *inst ^= QPU_WS;
377
378 return true;
379 }
380
381 uint64_t
382 qpu_merge_inst(uint64_t a, uint64_t b)
383 {
384 uint64_t merge = a | b;
385 bool ok = true;
386
387 if (QPU_GET_FIELD(a, QPU_OP_ADD) != QPU_A_NOP &&
388 QPU_GET_FIELD(b, QPU_OP_ADD) != QPU_A_NOP) {
389 if (QPU_GET_FIELD(a, QPU_OP_MUL) != QPU_M_NOP ||
390 QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP ||
391 !(convert_mov(&a) || convert_mov(&b))) {
392 return 0;
393 } else {
394 merge = a | b;
395 }
396 }
397
398 if (QPU_GET_FIELD(a, QPU_OP_MUL) != QPU_M_NOP &&
399 QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP)
400 return 0;
401
402 if (qpu_num_sf_accesses(a) && qpu_num_sf_accesses(b))
403 return 0;
404
405 if (QPU_GET_FIELD(a, QPU_SIG) == QPU_SIG_LOAD_IMM ||
406 QPU_GET_FIELD(b, QPU_SIG) == QPU_SIG_LOAD_IMM) {
407 return 0;
408 }
409
410 ok = ok && merge_fields(&merge, a, b, QPU_SIG_MASK,
411 QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG));
412
413 /* Misc fields that have to match exactly. */
414 ok = ok && merge_fields(&merge, a, b, QPU_SF | QPU_PM,
415 ~0);
416
417 if (!merge_fields(&merge, a, b, QPU_RADDR_A_MASK,
418 QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A))) {
419 /* Since we tend to use regfile A by default both for register
420 * allocation and for our special values (uniforms and
421 * varyings), try swapping uniforms and varyings to regfile B
422 * to resolve raddr A conflicts.
423 */
424 if (!try_swap_ra_file(&merge, &a, &b) &&
425 !try_swap_ra_file(&merge, &b, &a)) {
426 return 0;
427 }
428 }
429
430 ok = ok && merge_fields(&merge, a, b, QPU_RADDR_B_MASK,
431 QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B));
432
433 ok = ok && merge_fields(&merge, a, b, QPU_WADDR_ADD_MASK,
434 QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_ADD));
435 ok = ok && merge_fields(&merge, a, b, QPU_WADDR_MUL_MASK,
436 QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL));
437
438 /* Allow disagreement on WS (swapping A vs B physical reg file as the
439 * destination for ADD/MUL) if one of the original instructions
440 * ignores it (probably because it's just writing to accumulators).
441 */
442 if (qpu_waddr_ignores_ws(QPU_GET_FIELD(a, QPU_WADDR_ADD)) &&
443 qpu_waddr_ignores_ws(QPU_GET_FIELD(a, QPU_WADDR_MUL))) {
444 merge = (merge & ~QPU_WS) | (b & QPU_WS);
445 } else if (qpu_waddr_ignores_ws(QPU_GET_FIELD(b, QPU_WADDR_ADD)) &&
446 qpu_waddr_ignores_ws(QPU_GET_FIELD(b, QPU_WADDR_MUL))) {
447 merge = (merge & ~QPU_WS) | (a & QPU_WS);
448 } else {
449 if ((a & QPU_WS) != (b & QPU_WS))
450 return 0;
451 }
452
453 if (ok)
454 return merge;
455 else
456 return 0;
457 }
458
459 uint64_t
460 qpu_set_sig(uint64_t inst, uint32_t sig)
461 {
462 assert(QPU_GET_FIELD(inst, QPU_SIG) == QPU_SIG_NONE);
463 return QPU_UPDATE_FIELD(inst, sig, QPU_SIG);
464 }
465
466 uint64_t
467 qpu_set_cond_add(uint64_t inst, uint32_t cond)
468 {
469 assert(QPU_GET_FIELD(inst, QPU_COND_ADD) == QPU_COND_ALWAYS);
470 return QPU_UPDATE_FIELD(inst, cond, QPU_COND_ADD);
471 }
472
473 uint64_t
474 qpu_set_cond_mul(uint64_t inst, uint32_t cond)
475 {
476 assert(QPU_GET_FIELD(inst, QPU_COND_MUL) == QPU_COND_ALWAYS);
477 return QPU_UPDATE_FIELD(inst, cond, QPU_COND_MUL);
478 }
479
480 bool
481 qpu_waddr_is_tlb(uint32_t waddr)
482 {
483 switch (waddr) {
484 case QPU_W_TLB_COLOR_ALL:
485 case QPU_W_TLB_COLOR_MS:
486 case QPU_W_TLB_Z:
487 return true;
488 default:
489 return false;
490 }
491 }
492
493 bool
494 qpu_inst_is_tlb(uint64_t inst)
495 {
496 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
497
498 return (qpu_waddr_is_tlb(QPU_GET_FIELD(inst, QPU_WADDR_ADD)) ||
499 qpu_waddr_is_tlb(QPU_GET_FIELD(inst, QPU_WADDR_MUL)) ||
500 sig == QPU_SIG_COLOR_LOAD ||
501 sig == QPU_SIG_WAIT_FOR_SCOREBOARD);
502 }
503
504 void
505 qpu_serialize_one_inst(struct vc4_compile *c, uint64_t inst)
506 {
507 if (c->qpu_inst_count >= c->qpu_inst_size) {
508 c->qpu_inst_size = MAX2(16, c->qpu_inst_size * 2);
509 c->qpu_insts = reralloc(c, c->qpu_insts,
510 uint64_t, c->qpu_inst_size);
511 }
512 c->qpu_insts[c->qpu_inst_count++] = inst;
513 }