radeonsi: interpolate colors after interpolation weight shuffling
[mesa.git] / src / gallium / drivers / vc4 / vc4_qpu.c
1 /*
2 * Copyright © 2014 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <stdbool.h>
25 #include "util/ralloc.h"
26 #include "vc4_qir.h"
27 #include "vc4_qpu.h"
28
29 #define QPU_MUX(mux, muxfield) \
30 QPU_SET_FIELD(mux != QPU_MUX_SMALL_IMM ? mux : QPU_MUX_B, muxfield)
31
32 static uint64_t
33 set_src_raddr(uint64_t inst, struct qpu_reg src)
34 {
35 if (src.mux == QPU_MUX_A) {
36 assert(QPU_GET_FIELD(inst, QPU_RADDR_A) == QPU_R_NOP ||
37 QPU_GET_FIELD(inst, QPU_RADDR_A) == src.addr);
38 return QPU_UPDATE_FIELD(inst, src.addr, QPU_RADDR_A);
39 }
40
41 if (src.mux == QPU_MUX_B) {
42 assert((QPU_GET_FIELD(inst, QPU_RADDR_B) == QPU_R_NOP ||
43 QPU_GET_FIELD(inst, QPU_RADDR_B) == src.addr) &&
44 QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_SMALL_IMM);
45 return QPU_UPDATE_FIELD(inst, src.addr, QPU_RADDR_B);
46 }
47
48 if (src.mux == QPU_MUX_SMALL_IMM) {
49 if (QPU_GET_FIELD(inst, QPU_SIG) == QPU_SIG_SMALL_IMM) {
50 assert(QPU_GET_FIELD(inst, QPU_RADDR_B) == src.addr);
51 } else {
52 inst = qpu_set_sig(inst, QPU_SIG_SMALL_IMM);
53 assert(QPU_GET_FIELD(inst, QPU_RADDR_B) == QPU_R_NOP);
54 }
55 return ((inst & ~QPU_RADDR_B_MASK) |
56 QPU_SET_FIELD(src.addr, QPU_RADDR_B));
57 }
58
59 return inst;
60 }
61
62 uint64_t
63 qpu_NOP()
64 {
65 uint64_t inst = 0;
66
67 inst |= QPU_SET_FIELD(QPU_A_NOP, QPU_OP_ADD);
68 inst |= QPU_SET_FIELD(QPU_M_NOP, QPU_OP_MUL);
69
70 /* Note: These field values are actually non-zero */
71 inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_ADD);
72 inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL);
73 inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A);
74 inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B);
75 inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG);
76
77 return inst;
78 }
79
80 static uint64_t
81 qpu_a_dst(struct qpu_reg dst)
82 {
83 uint64_t inst = 0;
84
85 if (dst.mux <= QPU_MUX_R5) {
86 /* Translate the mux to the ACCn values. */
87 inst |= QPU_SET_FIELD(32 + dst.mux, QPU_WADDR_ADD);
88 } else {
89 inst |= QPU_SET_FIELD(dst.addr, QPU_WADDR_ADD);
90 if (dst.mux == QPU_MUX_B)
91 inst |= QPU_WS;
92 }
93
94 return inst;
95 }
96
97 static uint64_t
98 qpu_m_dst(struct qpu_reg dst)
99 {
100 uint64_t inst = 0;
101
102 if (dst.mux <= QPU_MUX_R5) {
103 /* Translate the mux to the ACCn values. */
104 inst |= QPU_SET_FIELD(32 + dst.mux, QPU_WADDR_MUL);
105 } else {
106 inst |= QPU_SET_FIELD(dst.addr, QPU_WADDR_MUL);
107 if (dst.mux == QPU_MUX_A)
108 inst |= QPU_WS;
109 }
110
111 return inst;
112 }
113
114 uint64_t
115 qpu_a_MOV(struct qpu_reg dst, struct qpu_reg src)
116 {
117 uint64_t inst = 0;
118
119 inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG);
120 inst |= QPU_SET_FIELD(QPU_A_OR, QPU_OP_ADD);
121 inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A);
122 inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B);
123 inst |= qpu_a_dst(dst);
124 inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_ADD);
125 inst |= QPU_MUX(src.mux, QPU_ADD_A);
126 inst |= QPU_MUX(src.mux, QPU_ADD_B);
127 inst = set_src_raddr(inst, src);
128 inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL);
129
130 return inst;
131 }
132
133 uint64_t
134 qpu_m_MOV(struct qpu_reg dst, struct qpu_reg src)
135 {
136 uint64_t inst = 0;
137
138 inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG);
139 inst |= QPU_SET_FIELD(QPU_M_V8MIN, QPU_OP_MUL);
140 inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A);
141 inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B);
142 inst |= qpu_m_dst(dst);
143 inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_MUL);
144 inst |= QPU_MUX(src.mux, QPU_MUL_A);
145 inst |= QPU_MUX(src.mux, QPU_MUL_B);
146 inst = set_src_raddr(inst, src);
147 inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_ADD);
148
149 return inst;
150 }
151
152 uint64_t
153 qpu_load_imm_ui(struct qpu_reg dst, uint32_t val)
154 {
155 uint64_t inst = 0;
156
157 inst |= qpu_a_dst(dst);
158 inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL);
159 inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_ADD);
160 inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_MUL);
161 inst |= QPU_SET_FIELD(QPU_SIG_LOAD_IMM, QPU_SIG);
162 inst |= val;
163
164 return inst;
165 }
166
167 uint64_t
168 qpu_load_imm_u2(struct qpu_reg dst, uint32_t val)
169 {
170 return qpu_load_imm_ui(dst, val) | QPU_SET_FIELD(QPU_LOAD_IMM_MODE_U2,
171 QPU_LOAD_IMM_MODE);
172 }
173
174 uint64_t
175 qpu_load_imm_i2(struct qpu_reg dst, uint32_t val)
176 {
177 return qpu_load_imm_ui(dst, val) | QPU_SET_FIELD(QPU_LOAD_IMM_MODE_I2,
178 QPU_LOAD_IMM_MODE);
179 }
180
181 uint64_t
182 qpu_branch(uint32_t cond, uint32_t target)
183 {
184 uint64_t inst = 0;
185
186 inst |= qpu_a_dst(qpu_ra(QPU_W_NOP));
187 inst |= qpu_m_dst(qpu_rb(QPU_W_NOP));
188 inst |= QPU_SET_FIELD(cond, QPU_BRANCH_COND);
189 inst |= QPU_SET_FIELD(QPU_SIG_BRANCH, QPU_SIG);
190 inst |= QPU_SET_FIELD(target, QPU_BRANCH_TARGET);
191
192 return inst;
193 }
194
195 uint64_t
196 qpu_a_alu2(enum qpu_op_add op,
197 struct qpu_reg dst, struct qpu_reg src0, struct qpu_reg src1)
198 {
199 uint64_t inst = 0;
200
201 inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG);
202 inst |= QPU_SET_FIELD(op, QPU_OP_ADD);
203 inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A);
204 inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B);
205 inst |= qpu_a_dst(dst);
206 inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_ADD);
207 inst |= QPU_MUX(src0.mux, QPU_ADD_A);
208 inst = set_src_raddr(inst, src0);
209 inst |= QPU_MUX(src1.mux, QPU_ADD_B);
210 inst = set_src_raddr(inst, src1);
211 inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL);
212
213 return inst;
214 }
215
216 uint64_t
217 qpu_m_alu2(enum qpu_op_mul op,
218 struct qpu_reg dst, struct qpu_reg src0, struct qpu_reg src1)
219 {
220 uint64_t inst = 0;
221
222 inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG);
223 inst |= QPU_SET_FIELD(op, QPU_OP_MUL);
224 inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A);
225 inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B);
226 inst |= qpu_m_dst(dst);
227 inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_MUL);
228 inst |= QPU_MUX(src0.mux, QPU_MUL_A);
229 inst = set_src_raddr(inst, src0);
230 inst |= QPU_MUX(src1.mux, QPU_MUL_B);
231 inst = set_src_raddr(inst, src1);
232 inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_ADD);
233
234 return inst;
235 }
236
237 uint64_t
238 qpu_m_rot(struct qpu_reg dst, struct qpu_reg src0, int rot)
239 {
240 uint64_t inst = 0;
241 inst = qpu_m_alu2(QPU_M_V8MIN, dst, src0, src0);
242
243 inst = QPU_UPDATE_FIELD(inst, QPU_SIG_SMALL_IMM, QPU_SIG);
244 inst = QPU_UPDATE_FIELD(inst, QPU_SMALL_IMM_MUL_ROT + rot,
245 QPU_SMALL_IMM);
246
247 return inst;
248 }
249
250 static bool
251 merge_fields(uint64_t *merge,
252 uint64_t a, uint64_t b,
253 uint64_t mask, uint64_t ignore)
254 {
255 if ((a & mask) == ignore) {
256 *merge = (*merge & ~mask) | (b & mask);
257 } else if ((b & mask) == ignore) {
258 *merge = (*merge & ~mask) | (a & mask);
259 } else {
260 if ((a & mask) != (b & mask))
261 return false;
262 }
263
264 return true;
265 }
266
267 int
268 qpu_num_sf_accesses(uint64_t inst)
269 {
270 int accesses = 0;
271 static const uint32_t specials[] = {
272 QPU_W_TLB_COLOR_MS,
273 QPU_W_TLB_COLOR_ALL,
274 QPU_W_TLB_Z,
275 QPU_W_TMU0_S,
276 QPU_W_TMU0_T,
277 QPU_W_TMU0_R,
278 QPU_W_TMU0_B,
279 QPU_W_TMU1_S,
280 QPU_W_TMU1_T,
281 QPU_W_TMU1_R,
282 QPU_W_TMU1_B,
283 QPU_W_SFU_RECIP,
284 QPU_W_SFU_RECIPSQRT,
285 QPU_W_SFU_EXP,
286 QPU_W_SFU_LOG,
287 };
288 uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
289 uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
290 uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
291 uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
292
293 for (int j = 0; j < ARRAY_SIZE(specials); j++) {
294 if (waddr_add == specials[j])
295 accesses++;
296 if (waddr_mul == specials[j])
297 accesses++;
298 }
299
300 if (raddr_a == QPU_R_MUTEX_ACQUIRE)
301 accesses++;
302 if (raddr_b == QPU_R_MUTEX_ACQUIRE &&
303 QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_SMALL_IMM)
304 accesses++;
305
306 /* XXX: semaphore, combined color read/write? */
307 switch (QPU_GET_FIELD(inst, QPU_SIG)) {
308 case QPU_SIG_COLOR_LOAD:
309 case QPU_SIG_COLOR_LOAD_END:
310 case QPU_SIG_LOAD_TMU0:
311 case QPU_SIG_LOAD_TMU1:
312 accesses++;
313 }
314
315 return accesses;
316 }
317
318 static bool
319 qpu_waddr_ignores_ws(uint32_t waddr)
320 {
321 switch(waddr) {
322 case QPU_W_ACC0:
323 case QPU_W_ACC1:
324 case QPU_W_ACC2:
325 case QPU_W_ACC3:
326 case QPU_W_TLB_Z:
327 case QPU_W_TLB_COLOR_MS:
328 case QPU_W_TLB_COLOR_ALL:
329 case QPU_W_TLB_ALPHA_MASK:
330 case QPU_W_VPM:
331 case QPU_W_SFU_RECIP:
332 case QPU_W_SFU_RECIPSQRT:
333 case QPU_W_SFU_EXP:
334 case QPU_W_SFU_LOG:
335 case QPU_W_TMU0_S:
336 case QPU_W_TMU0_T:
337 case QPU_W_TMU0_R:
338 case QPU_W_TMU0_B:
339 case QPU_W_TMU1_S:
340 case QPU_W_TMU1_T:
341 case QPU_W_TMU1_R:
342 case QPU_W_TMU1_B:
343 return true;
344 }
345
346 return false;
347 }
348
349 static void
350 swap_ra_file_mux_helper(uint64_t *merge, uint64_t *a, uint32_t mux_shift)
351 {
352 uint64_t mux_mask = (uint64_t)0x7 << mux_shift;
353 uint64_t mux_a_val = (uint64_t)QPU_MUX_A << mux_shift;
354 uint64_t mux_b_val = (uint64_t)QPU_MUX_B << mux_shift;
355
356 if ((*a & mux_mask) == mux_a_val) {
357 *a = (*a & ~mux_mask) | mux_b_val;
358 *merge = (*merge & ~mux_mask) | mux_b_val;
359 }
360 }
361
362 static bool
363 try_swap_ra_file(uint64_t *merge, uint64_t *a, uint64_t *b)
364 {
365 uint32_t raddr_a_a = QPU_GET_FIELD(*a, QPU_RADDR_A);
366 uint32_t raddr_a_b = QPU_GET_FIELD(*a, QPU_RADDR_B);
367 uint32_t raddr_b_a = QPU_GET_FIELD(*b, QPU_RADDR_A);
368 uint32_t raddr_b_b = QPU_GET_FIELD(*b, QPU_RADDR_B);
369
370 if (raddr_a_b != QPU_R_NOP)
371 return false;
372
373 switch (raddr_a_a) {
374 case QPU_R_UNIF:
375 case QPU_R_VARY:
376 break;
377 default:
378 return false;
379 }
380
381 if (!(*merge & QPU_PM) &&
382 QPU_GET_FIELD(*merge, QPU_UNPACK) != QPU_UNPACK_NOP) {
383 return false;
384 }
385
386 if (raddr_b_b != QPU_R_NOP &&
387 raddr_b_b != raddr_a_a)
388 return false;
389
390 /* Move raddr A to B in instruction a. */
391 *a = (*a & ~QPU_RADDR_A_MASK) | QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A);
392 *a = (*a & ~QPU_RADDR_B_MASK) | QPU_SET_FIELD(raddr_a_a, QPU_RADDR_B);
393 *merge = QPU_UPDATE_FIELD(*merge, raddr_b_a, QPU_RADDR_A);
394 *merge = QPU_UPDATE_FIELD(*merge, raddr_a_a, QPU_RADDR_B);
395 swap_ra_file_mux_helper(merge, a, QPU_ADD_A_SHIFT);
396 swap_ra_file_mux_helper(merge, a, QPU_ADD_B_SHIFT);
397 swap_ra_file_mux_helper(merge, a, QPU_MUL_A_SHIFT);
398 swap_ra_file_mux_helper(merge, a, QPU_MUL_B_SHIFT);
399
400 return true;
401 }
402
403 static bool
404 convert_mov(uint64_t *inst)
405 {
406 uint32_t add_a = QPU_GET_FIELD(*inst, QPU_ADD_A);
407 uint32_t waddr_add = QPU_GET_FIELD(*inst, QPU_WADDR_ADD);
408 uint32_t cond_add = QPU_GET_FIELD(*inst, QPU_COND_ADD);
409
410 /* Is it a MOV? */
411 if (QPU_GET_FIELD(*inst, QPU_OP_ADD) != QPU_A_OR ||
412 (add_a != QPU_GET_FIELD(*inst, QPU_ADD_B))) {
413 return false;
414 }
415
416 if (QPU_GET_FIELD(*inst, QPU_SIG) != QPU_SIG_NONE)
417 return false;
418
419 /* We could maybe support this in the .8888 and .8a-.8d cases. */
420 if (*inst & QPU_PM)
421 return false;
422
423 *inst = QPU_UPDATE_FIELD(*inst, QPU_A_NOP, QPU_OP_ADD);
424 *inst = QPU_UPDATE_FIELD(*inst, QPU_M_V8MIN, QPU_OP_MUL);
425
426 *inst = QPU_UPDATE_FIELD(*inst, add_a, QPU_MUL_A);
427 *inst = QPU_UPDATE_FIELD(*inst, add_a, QPU_MUL_B);
428 *inst = QPU_UPDATE_FIELD(*inst, QPU_MUX_R0, QPU_ADD_A);
429 *inst = QPU_UPDATE_FIELD(*inst, QPU_MUX_R0, QPU_ADD_B);
430
431 *inst = QPU_UPDATE_FIELD(*inst, waddr_add, QPU_WADDR_MUL);
432 *inst = QPU_UPDATE_FIELD(*inst, QPU_W_NOP, QPU_WADDR_ADD);
433
434 *inst = QPU_UPDATE_FIELD(*inst, cond_add, QPU_COND_MUL);
435 *inst = QPU_UPDATE_FIELD(*inst, QPU_COND_NEVER, QPU_COND_ADD);
436
437 if (!qpu_waddr_ignores_ws(waddr_add))
438 *inst ^= QPU_WS;
439
440 return true;
441 }
442
443 static bool
444 writes_a_file(uint64_t inst)
445 {
446 if (!(inst & QPU_WS))
447 return QPU_GET_FIELD(inst, QPU_WADDR_ADD) < 32;
448 else
449 return QPU_GET_FIELD(inst, QPU_WADDR_MUL) < 32;
450 }
451
452 static bool
453 reads_r4(uint64_t inst)
454 {
455 return (QPU_GET_FIELD(inst, QPU_ADD_A) == QPU_MUX_R4 ||
456 QPU_GET_FIELD(inst, QPU_ADD_B) == QPU_MUX_R4 ||
457 QPU_GET_FIELD(inst, QPU_MUL_A) == QPU_MUX_R4 ||
458 QPU_GET_FIELD(inst, QPU_MUL_B) == QPU_MUX_R4);
459 }
460
461 uint64_t
462 qpu_merge_inst(uint64_t a, uint64_t b)
463 {
464 uint64_t merge = a | b;
465 bool ok = true;
466 uint32_t a_sig = QPU_GET_FIELD(a, QPU_SIG);
467 uint32_t b_sig = QPU_GET_FIELD(b, QPU_SIG);
468
469 if (QPU_GET_FIELD(a, QPU_OP_ADD) != QPU_A_NOP &&
470 QPU_GET_FIELD(b, QPU_OP_ADD) != QPU_A_NOP) {
471 if (QPU_GET_FIELD(a, QPU_OP_MUL) != QPU_M_NOP ||
472 QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP ||
473 !(convert_mov(&a) || convert_mov(&b))) {
474 return 0;
475 } else {
476 merge = a | b;
477 }
478 }
479
480 if (QPU_GET_FIELD(a, QPU_OP_MUL) != QPU_M_NOP &&
481 QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP)
482 return 0;
483
484 if (qpu_num_sf_accesses(a) && qpu_num_sf_accesses(b))
485 return 0;
486
487 if (a_sig == QPU_SIG_LOAD_IMM ||
488 b_sig == QPU_SIG_LOAD_IMM ||
489 a_sig == QPU_SIG_SMALL_IMM ||
490 b_sig == QPU_SIG_SMALL_IMM ||
491 a_sig == QPU_SIG_BRANCH ||
492 b_sig == QPU_SIG_BRANCH) {
493 return 0;
494 }
495
496 ok = ok && merge_fields(&merge, a, b, QPU_SIG_MASK,
497 QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG));
498
499 /* Misc fields that have to match exactly. */
500 ok = ok && merge_fields(&merge, a, b, QPU_SF, ~0);
501
502 if (!merge_fields(&merge, a, b, QPU_RADDR_A_MASK,
503 QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A))) {
504 /* Since we tend to use regfile A by default both for register
505 * allocation and for our special values (uniforms and
506 * varyings), try swapping uniforms and varyings to regfile B
507 * to resolve raddr A conflicts.
508 */
509 if (!try_swap_ra_file(&merge, &a, &b) &&
510 !try_swap_ra_file(&merge, &b, &a)) {
511 return 0;
512 }
513 }
514
515 ok = ok && merge_fields(&merge, a, b, QPU_RADDR_B_MASK,
516 QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B));
517
518 ok = ok && merge_fields(&merge, a, b, QPU_WADDR_ADD_MASK,
519 QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_ADD));
520 ok = ok && merge_fields(&merge, a, b, QPU_WADDR_MUL_MASK,
521 QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL));
522
523 /* Allow disagreement on WS (swapping A vs B physical reg file as the
524 * destination for ADD/MUL) if one of the original instructions
525 * ignores it (probably because it's just writing to accumulators).
526 */
527 if (qpu_waddr_ignores_ws(QPU_GET_FIELD(a, QPU_WADDR_ADD)) &&
528 qpu_waddr_ignores_ws(QPU_GET_FIELD(a, QPU_WADDR_MUL))) {
529 merge = (merge & ~QPU_WS) | (b & QPU_WS);
530 } else if (qpu_waddr_ignores_ws(QPU_GET_FIELD(b, QPU_WADDR_ADD)) &&
531 qpu_waddr_ignores_ws(QPU_GET_FIELD(b, QPU_WADDR_MUL))) {
532 merge = (merge & ~QPU_WS) | (a & QPU_WS);
533 } else {
534 if ((a & QPU_WS) != (b & QPU_WS))
535 return 0;
536 }
537
538 if (!merge_fields(&merge, a, b, QPU_PM, ~0)) {
539 /* If one instruction has PM bit set and the other not, the
540 * one without PM shouldn't do packing/unpacking, and we
541 * have to make sure non-NOP packing/unpacking from PM
542 * instruction aren't added to it.
543 */
544 uint64_t temp;
545
546 /* Let a be the one with PM bit */
547 if (!(a & QPU_PM)) {
548 temp = a;
549 a = b;
550 b = temp;
551 }
552
553 if ((b & (QPU_PACK_MASK | QPU_UNPACK_MASK)) != 0)
554 return 0;
555
556 if ((a & QPU_PACK_MASK) != 0 &&
557 QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP)
558 return 0;
559
560 if ((a & QPU_UNPACK_MASK) != 0 && reads_r4(b))
561 return 0;
562 } else {
563 /* packing: Make sure that non-NOP packs agree, then deal with
564 * special-case failing of adding a non-NOP pack to something
565 * with a NOP pack.
566 */
567 if (!merge_fields(&merge, a, b, QPU_PACK_MASK, 0))
568 return 0;
569 bool new_a_pack = (QPU_GET_FIELD(a, QPU_PACK) !=
570 QPU_GET_FIELD(merge, QPU_PACK));
571 bool new_b_pack = (QPU_GET_FIELD(b, QPU_PACK) !=
572 QPU_GET_FIELD(merge, QPU_PACK));
573 if (!(merge & QPU_PM)) {
574 /* Make sure we're not going to be putting a new
575 * a-file packing on either half.
576 */
577 if (new_a_pack && writes_a_file(a))
578 return 0;
579
580 if (new_b_pack && writes_a_file(b))
581 return 0;
582 } else {
583 /* Make sure we're not going to be putting new MUL
584 * packing oneither half.
585 */
586 if (new_a_pack &&
587 QPU_GET_FIELD(a, QPU_OP_MUL) != QPU_M_NOP)
588 return 0;
589
590 if (new_b_pack &&
591 QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP)
592 return 0;
593 }
594
595 /* unpacking: Make sure that non-NOP unpacks agree, then deal
596 * with special-case failing of adding a non-NOP unpack to
597 * something with a NOP unpack.
598 */
599 if (!merge_fields(&merge, a, b, QPU_UNPACK_MASK, 0))
600 return 0;
601 bool new_a_unpack = (QPU_GET_FIELD(a, QPU_UNPACK) !=
602 QPU_GET_FIELD(merge, QPU_UNPACK));
603 bool new_b_unpack = (QPU_GET_FIELD(b, QPU_UNPACK) !=
604 QPU_GET_FIELD(merge, QPU_UNPACK));
605 if (!(merge & QPU_PM)) {
606 /* Make sure we're not going to be putting a new
607 * a-file packing on either half.
608 */
609 if (new_a_unpack &&
610 QPU_GET_FIELD(a, QPU_RADDR_A) != QPU_R_NOP)
611 return 0;
612
613 if (new_b_unpack &&
614 QPU_GET_FIELD(b, QPU_RADDR_A) != QPU_R_NOP)
615 return 0;
616 } else {
617 /* Make sure we're not going to be putting new r4
618 * unpack on either half.
619 */
620 if (new_a_unpack && reads_r4(a))
621 return 0;
622
623 if (new_b_unpack && reads_r4(b))
624 return 0;
625 }
626 }
627
628 if (ok)
629 return merge;
630 else
631 return 0;
632 }
633
634 uint64_t
635 qpu_set_sig(uint64_t inst, uint32_t sig)
636 {
637 assert(QPU_GET_FIELD(inst, QPU_SIG) == QPU_SIG_NONE);
638 return QPU_UPDATE_FIELD(inst, sig, QPU_SIG);
639 }
640
641 uint64_t
642 qpu_set_cond_add(uint64_t inst, uint32_t cond)
643 {
644 assert(QPU_GET_FIELD(inst, QPU_COND_ADD) == QPU_COND_ALWAYS);
645 return QPU_UPDATE_FIELD(inst, cond, QPU_COND_ADD);
646 }
647
648 uint64_t
649 qpu_set_cond_mul(uint64_t inst, uint32_t cond)
650 {
651 assert(QPU_GET_FIELD(inst, QPU_COND_MUL) == QPU_COND_ALWAYS);
652 return QPU_UPDATE_FIELD(inst, cond, QPU_COND_MUL);
653 }
654
655 bool
656 qpu_waddr_is_tlb(uint32_t waddr)
657 {
658 switch (waddr) {
659 case QPU_W_TLB_COLOR_ALL:
660 case QPU_W_TLB_COLOR_MS:
661 case QPU_W_TLB_Z:
662 return true;
663 default:
664 return false;
665 }
666 }
667
668 bool
669 qpu_inst_is_tlb(uint64_t inst)
670 {
671 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
672
673 return (qpu_waddr_is_tlb(QPU_GET_FIELD(inst, QPU_WADDR_ADD)) ||
674 qpu_waddr_is_tlb(QPU_GET_FIELD(inst, QPU_WADDR_MUL)) ||
675 sig == QPU_SIG_COLOR_LOAD ||
676 sig == QPU_SIG_WAIT_FOR_SCOREBOARD);
677 }
678
679 /**
680 * Returns the small immediate value to be encoded in to the raddr b field if
681 * the argument can be represented as one, or ~0 otherwise.
682 */
683 uint32_t
684 qpu_encode_small_immediate(uint32_t i)
685 {
686 if (i <= 15)
687 return i;
688 if ((int)i < 0 && (int)i >= -16)
689 return i + 32;
690
691 switch (i) {
692 case 0x3f800000:
693 return 32;
694 case 0x40000000:
695 return 33;
696 case 0x40800000:
697 return 34;
698 case 0x41000000:
699 return 35;
700 case 0x41800000:
701 return 36;
702 case 0x42000000:
703 return 37;
704 case 0x42800000:
705 return 38;
706 case 0x43000000:
707 return 39;
708 case 0x3b800000:
709 return 40;
710 case 0x3c000000:
711 return 41;
712 case 0x3c800000:
713 return 42;
714 case 0x3d000000:
715 return 43;
716 case 0x3d800000:
717 return 44;
718 case 0x3e000000:
719 return 45;
720 case 0x3e800000:
721 return 46;
722 case 0x3f000000:
723 return 47;
724 }
725
726 return ~0;
727 }
728
729 void
730 qpu_serialize_one_inst(struct vc4_compile *c, uint64_t inst)
731 {
732 if (c->qpu_inst_count >= c->qpu_inst_size) {
733 c->qpu_inst_size = MAX2(16, c->qpu_inst_size * 2);
734 c->qpu_insts = reralloc(c, c->qpu_insts,
735 uint64_t, c->qpu_inst_size);
736 }
737 c->qpu_insts[c->qpu_inst_count++] = inst;
738 }