vc4: Make sure that we don't overwrite the signal for PROG_END.
[mesa.git] / src / gallium / drivers / vc4 / vc4_qpu.c
1 /*
2 * Copyright © 2014 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <stdbool.h>
25 #include "util/ralloc.h"
26 #include "vc4_qir.h"
27 #include "vc4_qpu.h"
28
29 #define QPU_MUX(mux, muxfield) \
30 QPU_SET_FIELD(mux != QPU_MUX_SMALL_IMM ? mux : QPU_MUX_B, muxfield)
31
32 static uint64_t
33 set_src_raddr(uint64_t inst, struct qpu_reg src)
34 {
35 if (src.mux == QPU_MUX_A) {
36 assert(QPU_GET_FIELD(inst, QPU_RADDR_A) == QPU_R_NOP ||
37 QPU_GET_FIELD(inst, QPU_RADDR_A) == src.addr);
38 return QPU_UPDATE_FIELD(inst, src.addr, QPU_RADDR_A);
39 }
40
41 if (src.mux == QPU_MUX_B) {
42 assert((QPU_GET_FIELD(inst, QPU_RADDR_B) == QPU_R_NOP ||
43 QPU_GET_FIELD(inst, QPU_RADDR_B) == src.addr) &&
44 QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_SMALL_IMM);
45 return QPU_UPDATE_FIELD(inst, src.addr, QPU_RADDR_B);
46 }
47
48 if (src.mux == QPU_MUX_SMALL_IMM) {
49 if (QPU_GET_FIELD(inst, QPU_SIG) == QPU_SIG_SMALL_IMM) {
50 assert(QPU_GET_FIELD(inst, QPU_RADDR_B) == src.addr);
51 } else {
52 inst = qpu_set_sig(inst, QPU_SIG_SMALL_IMM);
53 assert(QPU_GET_FIELD(inst, QPU_RADDR_B) == QPU_R_NOP);
54 }
55 return ((inst & ~QPU_RADDR_B_MASK) |
56 QPU_SET_FIELD(src.addr, QPU_RADDR_B));
57 }
58
59 return inst;
60 }
61
62 uint64_t
63 qpu_NOP()
64 {
65 uint64_t inst = 0;
66
67 inst |= QPU_SET_FIELD(QPU_A_NOP, QPU_OP_ADD);
68 inst |= QPU_SET_FIELD(QPU_M_NOP, QPU_OP_MUL);
69
70 /* Note: These field values are actually non-zero */
71 inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_ADD);
72 inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL);
73 inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A);
74 inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B);
75 inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG);
76
77 return inst;
78 }
79
80 static uint64_t
81 qpu_a_dst(struct qpu_reg dst)
82 {
83 uint64_t inst = 0;
84
85 if (dst.mux <= QPU_MUX_R5) {
86 /* Translate the mux to the ACCn values. */
87 inst |= QPU_SET_FIELD(32 + dst.mux, QPU_WADDR_ADD);
88 } else {
89 inst |= QPU_SET_FIELD(dst.addr, QPU_WADDR_ADD);
90 if (dst.mux == QPU_MUX_B)
91 inst |= QPU_WS;
92 }
93
94 return inst;
95 }
96
97 static uint64_t
98 qpu_m_dst(struct qpu_reg dst)
99 {
100 uint64_t inst = 0;
101
102 if (dst.mux <= QPU_MUX_R5) {
103 /* Translate the mux to the ACCn values. */
104 inst |= QPU_SET_FIELD(32 + dst.mux, QPU_WADDR_MUL);
105 } else {
106 inst |= QPU_SET_FIELD(dst.addr, QPU_WADDR_MUL);
107 if (dst.mux == QPU_MUX_A)
108 inst |= QPU_WS;
109 }
110
111 return inst;
112 }
113
114 uint64_t
115 qpu_a_MOV(struct qpu_reg dst, struct qpu_reg src)
116 {
117 uint64_t inst = 0;
118
119 inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG);
120 inst |= QPU_SET_FIELD(QPU_A_OR, QPU_OP_ADD);
121 inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A);
122 inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B);
123 inst |= qpu_a_dst(dst);
124 inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_ADD);
125 inst |= QPU_MUX(src.mux, QPU_ADD_A);
126 inst |= QPU_MUX(src.mux, QPU_ADD_B);
127 inst = set_src_raddr(inst, src);
128 inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL);
129
130 return inst;
131 }
132
133 uint64_t
134 qpu_m_MOV(struct qpu_reg dst, struct qpu_reg src)
135 {
136 uint64_t inst = 0;
137
138 inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG);
139 inst |= QPU_SET_FIELD(QPU_M_V8MIN, QPU_OP_MUL);
140 inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A);
141 inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B);
142 inst |= qpu_m_dst(dst);
143 inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_MUL);
144 inst |= QPU_MUX(src.mux, QPU_MUL_A);
145 inst |= QPU_MUX(src.mux, QPU_MUL_B);
146 inst = set_src_raddr(inst, src);
147 inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_ADD);
148
149 return inst;
150 }
151
152 uint64_t
153 qpu_load_imm_ui(struct qpu_reg dst, uint32_t val)
154 {
155 uint64_t inst = 0;
156
157 inst |= qpu_a_dst(dst);
158 inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL);
159 inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_ADD);
160 inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_MUL);
161 inst |= QPU_SET_FIELD(QPU_SIG_LOAD_IMM, QPU_SIG);
162 inst |= val;
163
164 return inst;
165 }
166
167 uint64_t
168 qpu_a_alu2(enum qpu_op_add op,
169 struct qpu_reg dst, struct qpu_reg src0, struct qpu_reg src1)
170 {
171 uint64_t inst = 0;
172
173 inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG);
174 inst |= QPU_SET_FIELD(op, QPU_OP_ADD);
175 inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A);
176 inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B);
177 inst |= qpu_a_dst(dst);
178 inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_ADD);
179 inst |= QPU_MUX(src0.mux, QPU_ADD_A);
180 inst = set_src_raddr(inst, src0);
181 inst |= QPU_MUX(src1.mux, QPU_ADD_B);
182 inst = set_src_raddr(inst, src1);
183 inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL);
184
185 return inst;
186 }
187
188 uint64_t
189 qpu_m_alu2(enum qpu_op_mul op,
190 struct qpu_reg dst, struct qpu_reg src0, struct qpu_reg src1)
191 {
192 uint64_t inst = 0;
193
194 inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG);
195 inst |= QPU_SET_FIELD(op, QPU_OP_MUL);
196 inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A);
197 inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B);
198 inst |= qpu_m_dst(dst);
199 inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_MUL);
200 inst |= QPU_MUX(src0.mux, QPU_MUL_A);
201 inst = set_src_raddr(inst, src0);
202 inst |= QPU_MUX(src1.mux, QPU_MUL_B);
203 inst = set_src_raddr(inst, src1);
204 inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_ADD);
205
206 return inst;
207 }
208
209 static bool
210 merge_fields(uint64_t *merge,
211 uint64_t a, uint64_t b,
212 uint64_t mask, uint64_t ignore)
213 {
214 if ((a & mask) == ignore) {
215 *merge = (*merge & ~mask) | (b & mask);
216 } else if ((b & mask) == ignore) {
217 *merge = (*merge & ~mask) | (a & mask);
218 } else {
219 if ((a & mask) != (b & mask))
220 return false;
221 }
222
223 return true;
224 }
225
226 int
227 qpu_num_sf_accesses(uint64_t inst)
228 {
229 int accesses = 0;
230 static const uint32_t specials[] = {
231 QPU_W_TLB_COLOR_MS,
232 QPU_W_TLB_COLOR_ALL,
233 QPU_W_TLB_Z,
234 QPU_W_TMU0_S,
235 QPU_W_TMU0_T,
236 QPU_W_TMU0_R,
237 QPU_W_TMU0_B,
238 QPU_W_TMU1_S,
239 QPU_W_TMU1_T,
240 QPU_W_TMU1_R,
241 QPU_W_TMU1_B,
242 QPU_W_SFU_RECIP,
243 QPU_W_SFU_RECIPSQRT,
244 QPU_W_SFU_EXP,
245 QPU_W_SFU_LOG,
246 };
247 uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
248 uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
249 uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
250 uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
251
252 for (int j = 0; j < ARRAY_SIZE(specials); j++) {
253 if (waddr_add == specials[j])
254 accesses++;
255 if (waddr_mul == specials[j])
256 accesses++;
257 }
258
259 if (raddr_a == QPU_R_MUTEX_ACQUIRE)
260 accesses++;
261 if (raddr_b == QPU_R_MUTEX_ACQUIRE &&
262 QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_SMALL_IMM)
263 accesses++;
264
265 /* XXX: semaphore, combined color read/write? */
266 switch (QPU_GET_FIELD(inst, QPU_SIG)) {
267 case QPU_SIG_COLOR_LOAD:
268 case QPU_SIG_COLOR_LOAD_END:
269 case QPU_SIG_LOAD_TMU0:
270 case QPU_SIG_LOAD_TMU1:
271 accesses++;
272 }
273
274 return accesses;
275 }
276
277 static bool
278 qpu_waddr_ignores_ws(uint32_t waddr)
279 {
280 switch(waddr) {
281 case QPU_W_ACC0:
282 case QPU_W_ACC1:
283 case QPU_W_ACC2:
284 case QPU_W_ACC3:
285 case QPU_W_TLB_Z:
286 case QPU_W_TLB_COLOR_MS:
287 case QPU_W_TLB_COLOR_ALL:
288 case QPU_W_TLB_ALPHA_MASK:
289 case QPU_W_VPM:
290 case QPU_W_SFU_RECIP:
291 case QPU_W_SFU_RECIPSQRT:
292 case QPU_W_SFU_EXP:
293 case QPU_W_SFU_LOG:
294 case QPU_W_TMU0_S:
295 case QPU_W_TMU0_T:
296 case QPU_W_TMU0_R:
297 case QPU_W_TMU0_B:
298 case QPU_W_TMU1_S:
299 case QPU_W_TMU1_T:
300 case QPU_W_TMU1_R:
301 case QPU_W_TMU1_B:
302 return true;
303 }
304
305 return false;
306 }
307
308 static void
309 swap_ra_file_mux_helper(uint64_t *merge, uint64_t *a, uint32_t mux_shift)
310 {
311 uint64_t mux_mask = (uint64_t)0x7 << mux_shift;
312 uint64_t mux_a_val = (uint64_t)QPU_MUX_A << mux_shift;
313 uint64_t mux_b_val = (uint64_t)QPU_MUX_B << mux_shift;
314
315 if ((*a & mux_mask) == mux_a_val) {
316 *a = (*a & ~mux_mask) | mux_b_val;
317 *merge = (*merge & ~mux_mask) | mux_b_val;
318 }
319 }
320
321 static bool
322 try_swap_ra_file(uint64_t *merge, uint64_t *a, uint64_t *b)
323 {
324 uint32_t raddr_a_a = QPU_GET_FIELD(*a, QPU_RADDR_A);
325 uint32_t raddr_a_b = QPU_GET_FIELD(*a, QPU_RADDR_B);
326 uint32_t raddr_b_a = QPU_GET_FIELD(*b, QPU_RADDR_A);
327 uint32_t raddr_b_b = QPU_GET_FIELD(*b, QPU_RADDR_B);
328
329 if (raddr_a_b != QPU_R_NOP)
330 return false;
331
332 switch (raddr_a_a) {
333 case QPU_R_UNIF:
334 case QPU_R_VARY:
335 break;
336 default:
337 return false;
338 }
339
340 if (!(*merge & QPU_PM) &&
341 QPU_GET_FIELD(*merge, QPU_UNPACK) != QPU_UNPACK_NOP) {
342 return false;
343 }
344
345 if (raddr_b_b != QPU_R_NOP &&
346 raddr_b_b != raddr_a_a)
347 return false;
348
349 /* Move raddr A to B in instruction a. */
350 *a = (*a & ~QPU_RADDR_A_MASK) | QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A);
351 *a = (*a & ~QPU_RADDR_B_MASK) | QPU_SET_FIELD(raddr_a_a, QPU_RADDR_B);
352 *merge = QPU_UPDATE_FIELD(*merge, raddr_b_a, QPU_RADDR_A);
353 *merge = QPU_UPDATE_FIELD(*merge, raddr_a_a, QPU_RADDR_B);
354 swap_ra_file_mux_helper(merge, a, QPU_ADD_A_SHIFT);
355 swap_ra_file_mux_helper(merge, a, QPU_ADD_B_SHIFT);
356 swap_ra_file_mux_helper(merge, a, QPU_MUL_A_SHIFT);
357 swap_ra_file_mux_helper(merge, a, QPU_MUL_B_SHIFT);
358
359 return true;
360 }
361
362 static bool
363 convert_mov(uint64_t *inst)
364 {
365 uint32_t add_a = QPU_GET_FIELD(*inst, QPU_ADD_A);
366 uint32_t waddr_add = QPU_GET_FIELD(*inst, QPU_WADDR_ADD);
367 uint32_t cond_add = QPU_GET_FIELD(*inst, QPU_COND_ADD);
368
369 /* Is it a MOV? */
370 if (QPU_GET_FIELD(*inst, QPU_OP_ADD) != QPU_A_OR ||
371 (add_a != QPU_GET_FIELD(*inst, QPU_ADD_B))) {
372 return false;
373 }
374
375 if (QPU_GET_FIELD(*inst, QPU_SIG) != QPU_SIG_NONE)
376 return false;
377
378 /* We could maybe support this in the .8888 and .8a-.8d cases. */
379 if (*inst & QPU_PM)
380 return false;
381
382 *inst = QPU_UPDATE_FIELD(*inst, QPU_A_NOP, QPU_OP_ADD);
383 *inst = QPU_UPDATE_FIELD(*inst, QPU_M_V8MIN, QPU_OP_MUL);
384
385 *inst = QPU_UPDATE_FIELD(*inst, add_a, QPU_MUL_A);
386 *inst = QPU_UPDATE_FIELD(*inst, add_a, QPU_MUL_B);
387 *inst = QPU_UPDATE_FIELD(*inst, QPU_MUX_R0, QPU_ADD_A);
388 *inst = QPU_UPDATE_FIELD(*inst, QPU_MUX_R0, QPU_ADD_B);
389
390 *inst = QPU_UPDATE_FIELD(*inst, waddr_add, QPU_WADDR_MUL);
391 *inst = QPU_UPDATE_FIELD(*inst, QPU_W_NOP, QPU_WADDR_ADD);
392
393 *inst = QPU_UPDATE_FIELD(*inst, cond_add, QPU_COND_MUL);
394 *inst = QPU_UPDATE_FIELD(*inst, QPU_COND_NEVER, QPU_COND_ADD);
395
396 if (!qpu_waddr_ignores_ws(waddr_add))
397 *inst ^= QPU_WS;
398
399 return true;
400 }
401
402 static bool
403 writes_a_file(uint64_t inst)
404 {
405 if (!(inst & QPU_WS))
406 return QPU_GET_FIELD(inst, QPU_WADDR_ADD) < 32;
407 else
408 return QPU_GET_FIELD(inst, QPU_WADDR_MUL) < 32;
409 }
410
411 static bool
412 reads_r4(uint64_t inst)
413 {
414 return (QPU_GET_FIELD(inst, QPU_ADD_A) == QPU_MUX_R4 ||
415 QPU_GET_FIELD(inst, QPU_ADD_B) == QPU_MUX_R4 ||
416 QPU_GET_FIELD(inst, QPU_MUL_A) == QPU_MUX_R4 ||
417 QPU_GET_FIELD(inst, QPU_MUL_B) == QPU_MUX_R4);
418 }
419
420 uint64_t
421 qpu_merge_inst(uint64_t a, uint64_t b)
422 {
423 uint64_t merge = a | b;
424 bool ok = true;
425 uint32_t a_sig = QPU_GET_FIELD(a, QPU_SIG);
426 uint32_t b_sig = QPU_GET_FIELD(b, QPU_SIG);
427
428 if (QPU_GET_FIELD(a, QPU_OP_ADD) != QPU_A_NOP &&
429 QPU_GET_FIELD(b, QPU_OP_ADD) != QPU_A_NOP) {
430 if (QPU_GET_FIELD(a, QPU_OP_MUL) != QPU_M_NOP ||
431 QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP ||
432 !(convert_mov(&a) || convert_mov(&b))) {
433 return 0;
434 } else {
435 merge = a | b;
436 }
437 }
438
439 if (QPU_GET_FIELD(a, QPU_OP_MUL) != QPU_M_NOP &&
440 QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP)
441 return 0;
442
443 if (qpu_num_sf_accesses(a) && qpu_num_sf_accesses(b))
444 return 0;
445
446 if (a_sig == QPU_SIG_LOAD_IMM ||
447 b_sig == QPU_SIG_LOAD_IMM ||
448 a_sig == QPU_SIG_SMALL_IMM ||
449 b_sig == QPU_SIG_SMALL_IMM) {
450 return 0;
451 }
452
453 ok = ok && merge_fields(&merge, a, b, QPU_SIG_MASK,
454 QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG));
455
456 /* Misc fields that have to match exactly. */
457 ok = ok && merge_fields(&merge, a, b, QPU_SF, ~0);
458
459 if (!merge_fields(&merge, a, b, QPU_RADDR_A_MASK,
460 QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A))) {
461 /* Since we tend to use regfile A by default both for register
462 * allocation and for our special values (uniforms and
463 * varyings), try swapping uniforms and varyings to regfile B
464 * to resolve raddr A conflicts.
465 */
466 if (!try_swap_ra_file(&merge, &a, &b) &&
467 !try_swap_ra_file(&merge, &b, &a)) {
468 return 0;
469 }
470 }
471
472 ok = ok && merge_fields(&merge, a, b, QPU_RADDR_B_MASK,
473 QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B));
474
475 ok = ok && merge_fields(&merge, a, b, QPU_WADDR_ADD_MASK,
476 QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_ADD));
477 ok = ok && merge_fields(&merge, a, b, QPU_WADDR_MUL_MASK,
478 QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL));
479
480 /* Allow disagreement on WS (swapping A vs B physical reg file as the
481 * destination for ADD/MUL) if one of the original instructions
482 * ignores it (probably because it's just writing to accumulators).
483 */
484 if (qpu_waddr_ignores_ws(QPU_GET_FIELD(a, QPU_WADDR_ADD)) &&
485 qpu_waddr_ignores_ws(QPU_GET_FIELD(a, QPU_WADDR_MUL))) {
486 merge = (merge & ~QPU_WS) | (b & QPU_WS);
487 } else if (qpu_waddr_ignores_ws(QPU_GET_FIELD(b, QPU_WADDR_ADD)) &&
488 qpu_waddr_ignores_ws(QPU_GET_FIELD(b, QPU_WADDR_MUL))) {
489 merge = (merge & ~QPU_WS) | (a & QPU_WS);
490 } else {
491 if ((a & QPU_WS) != (b & QPU_WS))
492 return 0;
493 }
494
495 if (!merge_fields(&merge, a, b, QPU_PM, ~0)) {
496 /* If one instruction has PM bit set and the other not, the
497 * one without PM shouldn't do packing/unpacking, and we
498 * have to make sure non-NOP packing/unpacking from PM
499 * instruction aren't added to it.
500 */
501 uint64_t temp;
502
503 /* Let a be the one with PM bit */
504 if (!(a & QPU_PM)) {
505 temp = a;
506 a = b;
507 b = temp;
508 }
509
510 if ((b & (QPU_PACK_MASK | QPU_UNPACK_MASK)) != 0)
511 return 0;
512
513 if ((a & QPU_PACK_MASK) != 0 &&
514 QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP)
515 return 0;
516
517 if ((a & QPU_UNPACK_MASK) != 0 && reads_r4(b))
518 return 0;
519 } else {
520 /* packing: Make sure that non-NOP packs agree, then deal with
521 * special-case failing of adding a non-NOP pack to something
522 * with a NOP pack.
523 */
524 if (!merge_fields(&merge, a, b, QPU_PACK_MASK, 0))
525 return 0;
526 bool new_a_pack = (QPU_GET_FIELD(a, QPU_PACK) !=
527 QPU_GET_FIELD(merge, QPU_PACK));
528 bool new_b_pack = (QPU_GET_FIELD(b, QPU_PACK) !=
529 QPU_GET_FIELD(merge, QPU_PACK));
530 if (!(merge & QPU_PM)) {
531 /* Make sure we're not going to be putting a new
532 * a-file packing on either half.
533 */
534 if (new_a_pack && writes_a_file(a))
535 return 0;
536
537 if (new_b_pack && writes_a_file(b))
538 return 0;
539 } else {
540 /* Make sure we're not going to be putting new MUL
541 * packing oneither half.
542 */
543 if (new_a_pack &&
544 QPU_GET_FIELD(a, QPU_OP_MUL) != QPU_M_NOP)
545 return 0;
546
547 if (new_b_pack &&
548 QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP)
549 return 0;
550 }
551
552 /* unpacking: Make sure that non-NOP unpacks agree, then deal
553 * with special-case failing of adding a non-NOP unpack to
554 * something with a NOP unpack.
555 */
556 if (!merge_fields(&merge, a, b, QPU_UNPACK_MASK, 0))
557 return 0;
558 bool new_a_unpack = (QPU_GET_FIELD(a, QPU_UNPACK) !=
559 QPU_GET_FIELD(merge, QPU_UNPACK));
560 bool new_b_unpack = (QPU_GET_FIELD(b, QPU_UNPACK) !=
561 QPU_GET_FIELD(merge, QPU_UNPACK));
562 if (!(merge & QPU_PM)) {
563 /* Make sure we're not going to be putting a new
564 * a-file packing on either half.
565 */
566 if (new_a_unpack &&
567 QPU_GET_FIELD(a, QPU_RADDR_A) != QPU_R_NOP)
568 return 0;
569
570 if (new_b_unpack &&
571 QPU_GET_FIELD(b, QPU_RADDR_A) != QPU_R_NOP)
572 return 0;
573 } else {
574 /* Make sure we're not going to be putting new r4
575 * unpack on either half.
576 */
577 if (new_a_unpack && reads_r4(a))
578 return 0;
579
580 if (new_b_unpack && reads_r4(b))
581 return 0;
582 }
583 }
584
585 if (ok)
586 return merge;
587 else
588 return 0;
589 }
590
591 uint64_t
592 qpu_set_sig(uint64_t inst, uint32_t sig)
593 {
594 assert(QPU_GET_FIELD(inst, QPU_SIG) == QPU_SIG_NONE);
595 return QPU_UPDATE_FIELD(inst, sig, QPU_SIG);
596 }
597
598 uint64_t
599 qpu_set_cond_add(uint64_t inst, uint32_t cond)
600 {
601 assert(QPU_GET_FIELD(inst, QPU_COND_ADD) == QPU_COND_ALWAYS);
602 return QPU_UPDATE_FIELD(inst, cond, QPU_COND_ADD);
603 }
604
605 uint64_t
606 qpu_set_cond_mul(uint64_t inst, uint32_t cond)
607 {
608 assert(QPU_GET_FIELD(inst, QPU_COND_MUL) == QPU_COND_ALWAYS);
609 return QPU_UPDATE_FIELD(inst, cond, QPU_COND_MUL);
610 }
611
612 bool
613 qpu_waddr_is_tlb(uint32_t waddr)
614 {
615 switch (waddr) {
616 case QPU_W_TLB_COLOR_ALL:
617 case QPU_W_TLB_COLOR_MS:
618 case QPU_W_TLB_Z:
619 return true;
620 default:
621 return false;
622 }
623 }
624
625 bool
626 qpu_inst_is_tlb(uint64_t inst)
627 {
628 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
629
630 return (qpu_waddr_is_tlb(QPU_GET_FIELD(inst, QPU_WADDR_ADD)) ||
631 qpu_waddr_is_tlb(QPU_GET_FIELD(inst, QPU_WADDR_MUL)) ||
632 sig == QPU_SIG_COLOR_LOAD ||
633 sig == QPU_SIG_WAIT_FOR_SCOREBOARD);
634 }
635
636 /**
637 * Returns the small immediate value to be encoded in to the raddr b field if
638 * the argument can be represented as one, or ~0 otherwise.
639 */
640 uint32_t
641 qpu_encode_small_immediate(uint32_t i)
642 {
643 if (i <= 15)
644 return i;
645 if ((int)i < 0 && (int)i >= -16)
646 return i + 32;
647
648 switch (i) {
649 case 0x3f800000:
650 return 32;
651 case 0x40000000:
652 return 33;
653 case 0x40800000:
654 return 34;
655 case 0x41000000:
656 return 35;
657 case 0x41800000:
658 return 36;
659 case 0x42000000:
660 return 37;
661 case 0x42800000:
662 return 38;
663 case 0x43000000:
664 return 39;
665 case 0x3b800000:
666 return 40;
667 case 0x3c000000:
668 return 41;
669 case 0x3c800000:
670 return 42;
671 case 0x3d000000:
672 return 43;
673 case 0x3d800000:
674 return 44;
675 case 0x3e000000:
676 return 45;
677 case 0x3e800000:
678 return 46;
679 case 0x3f000000:
680 return 47;
681 }
682
683 return ~0;
684 }
685
686 void
687 qpu_serialize_one_inst(struct vc4_compile *c, uint64_t inst)
688 {
689 if (c->qpu_inst_count >= c->qpu_inst_size) {
690 c->qpu_inst_size = MAX2(16, c->qpu_inst_size * 2);
691 c->qpu_insts = reralloc(c, c->qpu_insts,
692 uint64_t, c->qpu_inst_size);
693 }
694 c->qpu_insts[c->qpu_inst_count++] = inst;
695 }