v3d: we always have at least one output segment
[mesa.git] / src / broadcom / compiler / vir_to_qpu.c
1 /*
2 * Copyright © 2016 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "compiler/v3d_compiler.h"
25 #include "qpu/qpu_instr.h"
26 #include "qpu/qpu_disasm.h"
27
28 static inline struct qpu_reg
29 qpu_reg(int index)
30 {
31 struct qpu_reg reg = {
32 .magic = false,
33 .index = index,
34 };
35 return reg;
36 }
37
38 static inline struct qpu_reg
39 qpu_magic(enum v3d_qpu_waddr waddr)
40 {
41 struct qpu_reg reg = {
42 .magic = true,
43 .index = waddr,
44 };
45 return reg;
46 }
47
48 static inline struct qpu_reg
49 qpu_acc(int acc)
50 {
51 return qpu_magic(V3D_QPU_WADDR_R0 + acc);
52 }
53
54 struct v3d_qpu_instr
55 v3d_qpu_nop(void)
56 {
57 struct v3d_qpu_instr instr = {
58 .type = V3D_QPU_INSTR_TYPE_ALU,
59 .alu = {
60 .add = {
61 .op = V3D_QPU_A_NOP,
62 .waddr = V3D_QPU_WADDR_NOP,
63 .magic_write = true,
64 },
65 .mul = {
66 .op = V3D_QPU_M_NOP,
67 .waddr = V3D_QPU_WADDR_NOP,
68 .magic_write = true,
69 },
70 }
71 };
72
73 return instr;
74 }
75
76 static struct qinst *
77 vir_nop(void)
78 {
79 struct qreg undef = vir_nop_reg();
80 struct qinst *qinst = vir_add_inst(V3D_QPU_A_NOP, undef, undef, undef);
81
82 return qinst;
83 }
84
85 static struct qinst *
86 new_qpu_nop_before(struct qinst *inst)
87 {
88 struct qinst *q = vir_nop();
89
90 list_addtail(&q->link, &inst->link);
91
92 return q;
93 }
94
95 /**
96 * Allocates the src register (accumulator or register file) into the RADDR
97 * fields of the instruction.
98 */
99 static void
100 set_src(struct v3d_qpu_instr *instr, enum v3d_qpu_mux *mux, struct qpu_reg src)
101 {
102 if (src.smimm) {
103 assert(instr->sig.small_imm);
104 *mux = V3D_QPU_MUX_B;
105 return;
106 }
107
108 if (src.magic) {
109 assert(src.index >= V3D_QPU_WADDR_R0 &&
110 src.index <= V3D_QPU_WADDR_R5);
111 *mux = src.index - V3D_QPU_WADDR_R0 + V3D_QPU_MUX_R0;
112 return;
113 }
114
115 if (instr->alu.add.a != V3D_QPU_MUX_A &&
116 instr->alu.add.b != V3D_QPU_MUX_A &&
117 instr->alu.mul.a != V3D_QPU_MUX_A &&
118 instr->alu.mul.b != V3D_QPU_MUX_A) {
119 instr->raddr_a = src.index;
120 *mux = V3D_QPU_MUX_A;
121 } else {
122 if (instr->raddr_a == src.index) {
123 *mux = V3D_QPU_MUX_A;
124 } else {
125 assert(!(instr->alu.add.a == V3D_QPU_MUX_B &&
126 instr->alu.add.b == V3D_QPU_MUX_B &&
127 instr->alu.mul.a == V3D_QPU_MUX_B &&
128 instr->alu.mul.b == V3D_QPU_MUX_B) ||
129 src.index == instr->raddr_b);
130
131 instr->raddr_b = src.index;
132 *mux = V3D_QPU_MUX_B;
133 }
134 }
135 }
136
137 static bool
138 is_no_op_mov(struct qinst *qinst)
139 {
140 static const struct v3d_qpu_sig no_sig = {0};
141
142 /* Make sure it's just a lone MOV. */
143 if (qinst->qpu.type != V3D_QPU_INSTR_TYPE_ALU ||
144 qinst->qpu.alu.mul.op != V3D_QPU_M_MOV ||
145 qinst->qpu.alu.add.op != V3D_QPU_A_NOP ||
146 memcmp(&qinst->qpu.sig, &no_sig, sizeof(no_sig)) != 0) {
147 return false;
148 }
149
150 /* Check if it's a MOV from a register to itself. */
151 enum v3d_qpu_waddr waddr = qinst->qpu.alu.mul.waddr;
152 if (qinst->qpu.alu.mul.magic_write) {
153 if (waddr < V3D_QPU_WADDR_R0 || waddr > V3D_QPU_WADDR_R4)
154 return false;
155
156 if (qinst->qpu.alu.mul.a !=
157 V3D_QPU_MUX_R0 + (waddr - V3D_QPU_WADDR_R0)) {
158 return false;
159 }
160 } else {
161 int raddr;
162
163 switch (qinst->qpu.alu.mul.a) {
164 case V3D_QPU_MUX_A:
165 raddr = qinst->qpu.raddr_a;
166 break;
167 case V3D_QPU_MUX_B:
168 raddr = qinst->qpu.raddr_b;
169 break;
170 default:
171 return false;
172 }
173 if (raddr != waddr)
174 return false;
175 }
176
177 /* No packing or flags updates, or we need to execute the
178 * instruction.
179 */
180 if (qinst->qpu.alu.mul.a_unpack != V3D_QPU_UNPACK_NONE ||
181 qinst->qpu.alu.mul.output_pack != V3D_QPU_PACK_NONE ||
182 qinst->qpu.flags.mc != V3D_QPU_COND_NONE ||
183 qinst->qpu.flags.mpf != V3D_QPU_PF_NONE ||
184 qinst->qpu.flags.muf != V3D_QPU_UF_NONE) {
185 return false;
186 }
187
188 return true;
189 }
190
191 static void
192 v3d_generate_code_block(struct v3d_compile *c,
193 struct qblock *block,
194 struct qpu_reg *temp_registers)
195 {
196 int last_vpm_read_index = -1;
197
198 vir_for_each_inst_safe(qinst, block) {
199 #if 0
200 fprintf(stderr, "translating qinst to qpu: ");
201 vir_dump_inst(c, qinst);
202 fprintf(stderr, "\n");
203 #endif
204
205 struct qinst *temp;
206
207 if (vir_has_uniform(qinst))
208 c->num_uniforms++;
209
210 int nsrc = vir_get_nsrc(qinst);
211 struct qpu_reg src[ARRAY_SIZE(qinst->src)];
212 for (int i = 0; i < nsrc; i++) {
213 int index = qinst->src[i].index;
214 switch (qinst->src[i].file) {
215 case QFILE_REG:
216 src[i] = qpu_reg(qinst->src[i].index);
217 break;
218 case QFILE_MAGIC:
219 src[i] = qpu_magic(qinst->src[i].index);
220 break;
221 case QFILE_NULL:
222 case QFILE_LOAD_IMM:
223 src[i] = qpu_acc(0);
224 break;
225 case QFILE_TEMP:
226 src[i] = temp_registers[index];
227 break;
228 case QFILE_SMALL_IMM:
229 src[i].smimm = true;
230 break;
231
232 case QFILE_VPM:
233 assert((int)qinst->src[i].index >=
234 last_vpm_read_index);
235 (void)last_vpm_read_index;
236 last_vpm_read_index = qinst->src[i].index;
237
238 temp = new_qpu_nop_before(qinst);
239 temp->qpu.sig.ldvpm = true;
240
241 src[i] = qpu_acc(3);
242 break;
243 }
244 }
245
246 struct qpu_reg dst;
247 switch (qinst->dst.file) {
248 case QFILE_NULL:
249 dst = qpu_magic(V3D_QPU_WADDR_NOP);
250 break;
251
252 case QFILE_REG:
253 dst = qpu_reg(qinst->dst.index);
254 break;
255
256 case QFILE_MAGIC:
257 dst = qpu_magic(qinst->dst.index);
258 break;
259
260 case QFILE_TEMP:
261 dst = temp_registers[qinst->dst.index];
262 break;
263
264 case QFILE_VPM:
265 dst = qpu_magic(V3D_QPU_WADDR_VPM);
266 break;
267
268 case QFILE_SMALL_IMM:
269 case QFILE_LOAD_IMM:
270 assert(!"not reached");
271 break;
272 }
273
274 if (qinst->qpu.type == V3D_QPU_INSTR_TYPE_ALU) {
275 if (qinst->qpu.sig.ldunif) {
276 assert(qinst->qpu.alu.add.op == V3D_QPU_A_NOP);
277 assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP);
278
279 if (!dst.magic ||
280 dst.index != V3D_QPU_WADDR_R5) {
281 assert(c->devinfo->ver >= 40);
282
283 qinst->qpu.sig.ldunif = false;
284 qinst->qpu.sig.ldunifrf = true;
285 qinst->qpu.sig_addr = dst.index;
286 qinst->qpu.sig_magic = dst.magic;
287 }
288 } else if (v3d_qpu_sig_writes_address(c->devinfo,
289 &qinst->qpu.sig)) {
290 assert(qinst->qpu.alu.add.op == V3D_QPU_A_NOP);
291 assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP);
292
293 qinst->qpu.sig_addr = dst.index;
294 qinst->qpu.sig_magic = dst.magic;
295 } else if (qinst->qpu.alu.add.op != V3D_QPU_A_NOP) {
296 assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP);
297 if (nsrc >= 1) {
298 set_src(&qinst->qpu,
299 &qinst->qpu.alu.add.a, src[0]);
300 }
301 if (nsrc >= 2) {
302 set_src(&qinst->qpu,
303 &qinst->qpu.alu.add.b, src[1]);
304 }
305
306 qinst->qpu.alu.add.waddr = dst.index;
307 qinst->qpu.alu.add.magic_write = dst.magic;
308 } else {
309 if (nsrc >= 1) {
310 set_src(&qinst->qpu,
311 &qinst->qpu.alu.mul.a, src[0]);
312 }
313 if (nsrc >= 2) {
314 set_src(&qinst->qpu,
315 &qinst->qpu.alu.mul.b, src[1]);
316 }
317
318 qinst->qpu.alu.mul.waddr = dst.index;
319 qinst->qpu.alu.mul.magic_write = dst.magic;
320
321 if (is_no_op_mov(qinst)) {
322 vir_remove_instruction(c, qinst);
323 continue;
324 }
325 }
326 } else {
327 assert(qinst->qpu.type == V3D_QPU_INSTR_TYPE_BRANCH);
328 }
329 }
330 }
331
332 static bool
333 reads_uniform(const struct v3d_device_info *devinfo, uint64_t instruction)
334 {
335 struct v3d_qpu_instr qpu;
336 ASSERTED bool ok = v3d_qpu_instr_unpack(devinfo, instruction, &qpu);
337 assert(ok);
338
339 if (qpu.sig.ldunif ||
340 qpu.sig.ldunifrf ||
341 qpu.sig.ldtlbu ||
342 qpu.sig.wrtmuc) {
343 return true;
344 }
345
346 if (qpu.type == V3D_QPU_INSTR_TYPE_BRANCH)
347 return true;
348
349 if (qpu.type == V3D_QPU_INSTR_TYPE_ALU) {
350 if (qpu.alu.add.magic_write &&
351 v3d_qpu_magic_waddr_loads_unif(qpu.alu.add.waddr)) {
352 return true;
353 }
354
355 if (qpu.alu.mul.magic_write &&
356 v3d_qpu_magic_waddr_loads_unif(qpu.alu.mul.waddr)) {
357 return true;
358 }
359 }
360
361 return false;
362 }
363
364 static void
365 v3d_dump_qpu(struct v3d_compile *c)
366 {
367 fprintf(stderr, "%s prog %d/%d QPU:\n",
368 vir_get_stage_name(c),
369 c->program_id, c->variant_id);
370
371 int next_uniform = 0;
372 for (int i = 0; i < c->qpu_inst_count; i++) {
373 const char *str = v3d_qpu_disasm(c->devinfo, c->qpu_insts[i]);
374 fprintf(stderr, "0x%016"PRIx64" %s", c->qpu_insts[i], str);
375
376 /* We can only do this on 4.x, because we're not tracking TMU
377 * implicit uniforms here on 3.x.
378 */
379 if (c->devinfo->ver >= 40 &&
380 reads_uniform(c->devinfo, c->qpu_insts[i])) {
381 fprintf(stderr, " (");
382 vir_dump_uniform(c->uniform_contents[next_uniform],
383 c->uniform_data[next_uniform]);
384 fprintf(stderr, ")");
385 next_uniform++;
386 }
387 fprintf(stderr, "\n");
388 ralloc_free((void *)str);
389 }
390
391 /* Make sure our dumping lined up. */
392 if (c->devinfo->ver >= 40)
393 assert(next_uniform == c->num_uniforms);
394
395 fprintf(stderr, "\n");
396 }
397
398 void
399 v3d_vir_to_qpu(struct v3d_compile *c, struct qpu_reg *temp_registers)
400 {
401 /* Reset the uniform count to how many will be actually loaded by the
402 * generated QPU code.
403 */
404 c->num_uniforms = 0;
405
406 vir_for_each_block(block, c)
407 v3d_generate_code_block(c, block, temp_registers);
408
409 v3d_qpu_schedule_instructions(c);
410
411 c->qpu_insts = rzalloc_array(c, uint64_t, c->qpu_inst_count);
412 int i = 0;
413 vir_for_each_inst_inorder(inst, c) {
414 bool ok = v3d_qpu_instr_pack(c->devinfo, &inst->qpu,
415 &c->qpu_insts[i++]);
416 if (!ok) {
417 fprintf(stderr, "Failed to pack instruction:\n");
418 vir_dump_inst(c, inst);
419 fprintf(stderr, "\n");
420 c->failed = true;
421 return;
422 }
423 }
424 assert(i == c->qpu_inst_count);
425
426 if (V3D_DEBUG & (V3D_DEBUG_QPU |
427 v3d_debug_flag_for_shader_stage(c->s->info.stage))) {
428 v3d_dump_qpu(c);
429 }
430
431 qpu_validate(c);
432
433 free(temp_registers);
434 }