bbc2950432693fc448db953064a7d7e833e6f0d3
[mesa.git] / src / broadcom / compiler / vir_to_qpu.c
1 /*
2 * Copyright © 2016 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "compiler/v3d_compiler.h"
25 #include "qpu/qpu_instr.h"
26 #include "qpu/qpu_disasm.h"
27
28 static inline struct qpu_reg
29 qpu_reg(int index)
30 {
31 struct qpu_reg reg = {
32 .magic = false,
33 .index = index,
34 };
35 return reg;
36 }
37
38 static inline struct qpu_reg
39 qpu_magic(enum v3d_qpu_waddr waddr)
40 {
41 struct qpu_reg reg = {
42 .magic = true,
43 .index = waddr,
44 };
45 return reg;
46 }
47
48 static inline struct qpu_reg
49 qpu_acc(int acc)
50 {
51 return qpu_magic(V3D_QPU_WADDR_R0 + acc);
52 }
53
54 struct v3d_qpu_instr
55 v3d_qpu_nop(void)
56 {
57 struct v3d_qpu_instr instr = {
58 .type = V3D_QPU_INSTR_TYPE_ALU,
59 .alu = {
60 .add = {
61 .op = V3D_QPU_A_NOP,
62 .waddr = V3D_QPU_WADDR_NOP,
63 .magic_write = true,
64 },
65 .mul = {
66 .op = V3D_QPU_M_NOP,
67 .waddr = V3D_QPU_WADDR_NOP,
68 .magic_write = true,
69 },
70 }
71 };
72
73 return instr;
74 }
75
76 static struct qinst *
77 vir_nop(void)
78 {
79 struct qreg undef = vir_nop_reg();
80 struct qinst *qinst = vir_add_inst(V3D_QPU_A_NOP, undef, undef, undef);
81
82 return qinst;
83 }
84
85 static struct qinst *
86 new_qpu_nop_before(struct qinst *inst)
87 {
88 struct qinst *q = vir_nop();
89
90 list_addtail(&q->link, &inst->link);
91
92 return q;
93 }
94
95 /**
96 * Allocates the src register (accumulator or register file) into the RADDR
97 * fields of the instruction.
98 */
99 static void
100 set_src(struct v3d_qpu_instr *instr, enum v3d_qpu_mux *mux, struct qpu_reg src)
101 {
102 if (src.smimm) {
103 assert(instr->sig.small_imm);
104 *mux = V3D_QPU_MUX_B;
105 return;
106 }
107
108 if (src.magic) {
109 assert(src.index >= V3D_QPU_WADDR_R0 &&
110 src.index <= V3D_QPU_WADDR_R5);
111 *mux = src.index - V3D_QPU_WADDR_R0 + V3D_QPU_MUX_R0;
112 return;
113 }
114
115 if (instr->alu.add.a != V3D_QPU_MUX_A &&
116 instr->alu.add.b != V3D_QPU_MUX_A &&
117 instr->alu.mul.a != V3D_QPU_MUX_A &&
118 instr->alu.mul.b != V3D_QPU_MUX_A) {
119 instr->raddr_a = src.index;
120 *mux = V3D_QPU_MUX_A;
121 } else {
122 if (instr->raddr_a == src.index) {
123 *mux = V3D_QPU_MUX_A;
124 } else {
125 assert(!(instr->alu.add.a == V3D_QPU_MUX_B &&
126 instr->alu.add.b == V3D_QPU_MUX_B &&
127 instr->alu.mul.a == V3D_QPU_MUX_B &&
128 instr->alu.mul.b == V3D_QPU_MUX_B) ||
129 src.index == instr->raddr_b);
130
131 instr->raddr_b = src.index;
132 *mux = V3D_QPU_MUX_B;
133 }
134 }
135 }
136
137 static bool
138 is_no_op_mov(struct qinst *qinst)
139 {
140 static const struct v3d_qpu_sig no_sig = {0};
141
142 /* Make sure it's just a lone MOV. */
143 if (qinst->qpu.type != V3D_QPU_INSTR_TYPE_ALU ||
144 qinst->qpu.alu.mul.op != V3D_QPU_M_MOV ||
145 qinst->qpu.alu.add.op != V3D_QPU_A_NOP ||
146 memcmp(&qinst->qpu.sig, &no_sig, sizeof(no_sig)) != 0) {
147 return false;
148 }
149
150 /* Check if it's a MOV from a register to itself. */
151 enum v3d_qpu_waddr waddr = qinst->qpu.alu.mul.waddr;
152 if (qinst->qpu.alu.mul.magic_write) {
153 if (waddr < V3D_QPU_WADDR_R0 || waddr > V3D_QPU_WADDR_R4)
154 return false;
155
156 if (qinst->qpu.alu.mul.a !=
157 V3D_QPU_MUX_R0 + (waddr - V3D_QPU_WADDR_R0)) {
158 return false;
159 }
160 } else {
161 int raddr;
162
163 switch (qinst->qpu.alu.mul.a) {
164 case V3D_QPU_MUX_A:
165 raddr = qinst->qpu.raddr_a;
166 break;
167 case V3D_QPU_MUX_B:
168 raddr = qinst->qpu.raddr_b;
169 break;
170 default:
171 return false;
172 }
173 if (raddr != waddr)
174 return false;
175 }
176
177 /* No packing or flags updates, or we need to execute the
178 * instruction.
179 */
180 if (qinst->qpu.alu.mul.a_unpack != V3D_QPU_UNPACK_NONE ||
181 qinst->qpu.alu.mul.output_pack != V3D_QPU_PACK_NONE ||
182 qinst->qpu.flags.mc != V3D_QPU_COND_NONE ||
183 qinst->qpu.flags.mpf != V3D_QPU_PF_NONE ||
184 qinst->qpu.flags.muf != V3D_QPU_UF_NONE) {
185 return false;
186 }
187
188 return true;
189 }
190
191 static void
192 v3d_generate_code_block(struct v3d_compile *c,
193 struct qblock *block,
194 struct qpu_reg *temp_registers)
195 {
196 int last_vpm_read_index = -1;
197
198 vir_for_each_inst_safe(qinst, block) {
199 #if 0
200 fprintf(stderr, "translating qinst to qpu: ");
201 vir_dump_inst(c, qinst);
202 fprintf(stderr, "\n");
203 #endif
204
205 struct qinst *temp;
206
207 if (vir_has_uniform(qinst))
208 c->num_uniforms++;
209
210 int nsrc = vir_get_nsrc(qinst);
211 struct qpu_reg src[ARRAY_SIZE(qinst->src)];
212 for (int i = 0; i < nsrc; i++) {
213 int index = qinst->src[i].index;
214 switch (qinst->src[i].file) {
215 case QFILE_REG:
216 src[i] = qpu_reg(qinst->src[i].index);
217 break;
218 case QFILE_MAGIC:
219 src[i] = qpu_magic(qinst->src[i].index);
220 break;
221 case QFILE_NULL:
222 case QFILE_LOAD_IMM:
223 src[i] = qpu_acc(0);
224 break;
225 case QFILE_TEMP:
226 src[i] = temp_registers[index];
227 break;
228 case QFILE_SMALL_IMM:
229 src[i].smimm = true;
230 break;
231
232 case QFILE_VPM:
233 assert((int)qinst->src[i].index >=
234 last_vpm_read_index);
235 (void)last_vpm_read_index;
236 last_vpm_read_index = qinst->src[i].index;
237
238 temp = new_qpu_nop_before(qinst);
239 temp->qpu.sig.ldvpm = true;
240
241 src[i] = qpu_acc(3);
242 break;
243
244 case QFILE_TLB:
245 case QFILE_TLBU:
246 unreachable("bad vir src file");
247 }
248 }
249
250 struct qpu_reg dst;
251 switch (qinst->dst.file) {
252 case QFILE_NULL:
253 dst = qpu_magic(V3D_QPU_WADDR_NOP);
254 break;
255
256 case QFILE_REG:
257 dst = qpu_reg(qinst->dst.index);
258 break;
259
260 case QFILE_MAGIC:
261 dst = qpu_magic(qinst->dst.index);
262 break;
263
264 case QFILE_TEMP:
265 dst = temp_registers[qinst->dst.index];
266 break;
267
268 case QFILE_VPM:
269 dst = qpu_magic(V3D_QPU_WADDR_VPM);
270 break;
271
272 case QFILE_TLB:
273 dst = qpu_magic(V3D_QPU_WADDR_TLB);
274 break;
275
276 case QFILE_TLBU:
277 dst = qpu_magic(V3D_QPU_WADDR_TLBU);
278 break;
279
280 case QFILE_SMALL_IMM:
281 case QFILE_LOAD_IMM:
282 assert(!"not reached");
283 break;
284 }
285
286 if (qinst->qpu.type == V3D_QPU_INSTR_TYPE_ALU) {
287 if (qinst->qpu.sig.ldunif) {
288 assert(qinst->qpu.alu.add.op == V3D_QPU_A_NOP);
289 assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP);
290
291 if (!dst.magic ||
292 dst.index != V3D_QPU_WADDR_R5) {
293 assert(c->devinfo->ver >= 40);
294
295 qinst->qpu.sig.ldunif = false;
296 qinst->qpu.sig.ldunifrf = true;
297 qinst->qpu.sig_addr = dst.index;
298 qinst->qpu.sig_magic = dst.magic;
299 }
300 } else if (v3d_qpu_sig_writes_address(c->devinfo,
301 &qinst->qpu.sig)) {
302 assert(qinst->qpu.alu.add.op == V3D_QPU_A_NOP);
303 assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP);
304
305 qinst->qpu.sig_addr = dst.index;
306 qinst->qpu.sig_magic = dst.magic;
307 } else if (qinst->qpu.alu.add.op != V3D_QPU_A_NOP) {
308 assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP);
309 if (nsrc >= 1) {
310 set_src(&qinst->qpu,
311 &qinst->qpu.alu.add.a, src[0]);
312 }
313 if (nsrc >= 2) {
314 set_src(&qinst->qpu,
315 &qinst->qpu.alu.add.b, src[1]);
316 }
317
318 qinst->qpu.alu.add.waddr = dst.index;
319 qinst->qpu.alu.add.magic_write = dst.magic;
320 } else {
321 if (nsrc >= 1) {
322 set_src(&qinst->qpu,
323 &qinst->qpu.alu.mul.a, src[0]);
324 }
325 if (nsrc >= 2) {
326 set_src(&qinst->qpu,
327 &qinst->qpu.alu.mul.b, src[1]);
328 }
329
330 qinst->qpu.alu.mul.waddr = dst.index;
331 qinst->qpu.alu.mul.magic_write = dst.magic;
332
333 if (is_no_op_mov(qinst)) {
334 vir_remove_instruction(c, qinst);
335 continue;
336 }
337 }
338 } else {
339 assert(qinst->qpu.type == V3D_QPU_INSTR_TYPE_BRANCH);
340 }
341 }
342 }
343
344 static bool
345 reads_uniform(const struct v3d_device_info *devinfo, uint64_t instruction)
346 {
347 struct v3d_qpu_instr qpu;
348 MAYBE_UNUSED bool ok = v3d_qpu_instr_unpack(devinfo, instruction, &qpu);
349 assert(ok);
350
351 if (qpu.sig.ldunif ||
352 qpu.sig.ldunifrf ||
353 qpu.sig.wrtmuc) {
354 return true;
355 }
356
357 if (qpu.type == V3D_QPU_INSTR_TYPE_BRANCH)
358 return true;
359
360 if (qpu.type == V3D_QPU_INSTR_TYPE_ALU) {
361 if (qpu.alu.add.magic_write &&
362 v3d_qpu_magic_waddr_loads_unif(qpu.alu.add.waddr)) {
363 return true;
364 }
365
366 if (qpu.alu.mul.magic_write &&
367 v3d_qpu_magic_waddr_loads_unif(qpu.alu.mul.waddr)) {
368 return true;
369 }
370 }
371
372 return false;
373 }
374
375 static void
376 v3d_dump_qpu(struct v3d_compile *c)
377 {
378 fprintf(stderr, "%s prog %d/%d QPU:\n",
379 vir_get_stage_name(c),
380 c->program_id, c->variant_id);
381
382 int next_uniform = 0;
383 for (int i = 0; i < c->qpu_inst_count; i++) {
384 const char *str = v3d_qpu_disasm(c->devinfo, c->qpu_insts[i]);
385 fprintf(stderr, "0x%016"PRIx64" %s", c->qpu_insts[i], str);
386
387 /* We can only do this on 4.x, because we're not tracking TMU
388 * implicit uniforms here on 3.x.
389 */
390 if (c->devinfo->ver >= 40 &&
391 reads_uniform(c->devinfo, c->qpu_insts[i])) {
392 fprintf(stderr, " (");
393 vir_dump_uniform(c->uniform_contents[next_uniform],
394 c->uniform_data[next_uniform]);
395 fprintf(stderr, ")");
396 next_uniform++;
397 }
398 fprintf(stderr, "\n");
399 ralloc_free((void *)str);
400 }
401
402 /* Make sure our dumping lined up. */
403 if (c->devinfo->ver >= 40)
404 assert(next_uniform == c->num_uniforms);
405
406 fprintf(stderr, "\n");
407 }
408
409 void
410 v3d_vir_to_qpu(struct v3d_compile *c, struct qpu_reg *temp_registers)
411 {
412 /* Reset the uniform count to how many will be actually loaded by the
413 * generated QPU code.
414 */
415 c->num_uniforms = 0;
416
417 vir_for_each_block(block, c)
418 v3d_generate_code_block(c, block, temp_registers);
419
420 uint32_t cycles = v3d_qpu_schedule_instructions(c);
421
422 c->qpu_insts = rzalloc_array(c, uint64_t, c->qpu_inst_count);
423 int i = 0;
424 vir_for_each_inst_inorder(inst, c) {
425 bool ok = v3d_qpu_instr_pack(c->devinfo, &inst->qpu,
426 &c->qpu_insts[i++]);
427 if (!ok) {
428 fprintf(stderr, "Failed to pack instruction:\n");
429 vir_dump_inst(c, inst);
430 fprintf(stderr, "\n");
431 c->failed = true;
432 return;
433 }
434 }
435 assert(i == c->qpu_inst_count);
436
437 if (V3D_DEBUG & V3D_DEBUG_SHADERDB) {
438 fprintf(stderr, "SHADER-DB: %s prog %d/%d: %d instructions\n",
439 vir_get_stage_name(c),
440 c->program_id, c->variant_id,
441 c->qpu_inst_count);
442 }
443
444 /* The QPU cycle estimates are pretty broken (see waddr_latency()), so
445 * don't report them for now.
446 */
447 if (false) {
448 fprintf(stderr, "SHADER-DB: %s prog %d/%d: %d estimated cycles\n",
449 vir_get_stage_name(c),
450 c->program_id, c->variant_id,
451 cycles);
452 }
453
454 if (V3D_DEBUG & (V3D_DEBUG_QPU |
455 v3d_debug_flag_for_shader_stage(c->s->info.stage))) {
456 v3d_dump_qpu(c);
457 }
458
459 qpu_validate(c);
460
461 free(temp_registers);
462 }