v3d: Assert that we do request the normal texturing return data.
[mesa.git] / src / broadcom / compiler / vir_to_qpu.c
1 /*
2 * Copyright © 2016 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "compiler/v3d_compiler.h"
25 #include "qpu/qpu_instr.h"
26 #include "qpu/qpu_disasm.h"
27
28 static inline struct qpu_reg
29 qpu_reg(int index)
30 {
31 struct qpu_reg reg = {
32 .magic = false,
33 .index = index,
34 };
35 return reg;
36 }
37
38 static inline struct qpu_reg
39 qpu_magic(enum v3d_qpu_waddr waddr)
40 {
41 struct qpu_reg reg = {
42 .magic = true,
43 .index = waddr,
44 };
45 return reg;
46 }
47
48 static inline struct qpu_reg
49 qpu_acc(int acc)
50 {
51 return qpu_magic(V3D_QPU_WADDR_R0 + acc);
52 }
53
54 struct v3d_qpu_instr
55 v3d_qpu_nop(void)
56 {
57 struct v3d_qpu_instr instr = {
58 .type = V3D_QPU_INSTR_TYPE_ALU,
59 .alu = {
60 .add = {
61 .op = V3D_QPU_A_NOP,
62 .waddr = V3D_QPU_WADDR_NOP,
63 .magic_write = true,
64 },
65 .mul = {
66 .op = V3D_QPU_M_NOP,
67 .waddr = V3D_QPU_WADDR_NOP,
68 .magic_write = true,
69 },
70 }
71 };
72
73 return instr;
74 }
75
76 static struct qinst *
77 vir_nop(void)
78 {
79 struct qreg undef = vir_nop_reg();
80 struct qinst *qinst = vir_add_inst(V3D_QPU_A_NOP, undef, undef, undef);
81
82 return qinst;
83 }
84
85 static struct qinst *
86 new_qpu_nop_before(struct qinst *inst)
87 {
88 struct qinst *q = vir_nop();
89
90 list_addtail(&q->link, &inst->link);
91
92 return q;
93 }
94
95 /**
96 * Allocates the src register (accumulator or register file) into the RADDR
97 * fields of the instruction.
98 */
99 static void
100 set_src(struct v3d_qpu_instr *instr, enum v3d_qpu_mux *mux, struct qpu_reg src)
101 {
102 if (src.smimm) {
103 assert(instr->sig.small_imm);
104 *mux = V3D_QPU_MUX_B;
105 return;
106 }
107
108 if (src.magic) {
109 assert(src.index >= V3D_QPU_WADDR_R0 &&
110 src.index <= V3D_QPU_WADDR_R5);
111 *mux = src.index - V3D_QPU_WADDR_R0 + V3D_QPU_MUX_R0;
112 return;
113 }
114
115 if (instr->alu.add.a != V3D_QPU_MUX_A &&
116 instr->alu.add.b != V3D_QPU_MUX_A &&
117 instr->alu.mul.a != V3D_QPU_MUX_A &&
118 instr->alu.mul.b != V3D_QPU_MUX_A) {
119 instr->raddr_a = src.index;
120 *mux = V3D_QPU_MUX_A;
121 } else {
122 if (instr->raddr_a == src.index) {
123 *mux = V3D_QPU_MUX_A;
124 } else {
125 assert(!(instr->alu.add.a == V3D_QPU_MUX_B &&
126 instr->alu.add.b == V3D_QPU_MUX_B &&
127 instr->alu.mul.a == V3D_QPU_MUX_B &&
128 instr->alu.mul.b == V3D_QPU_MUX_B) ||
129 src.index == instr->raddr_b);
130
131 instr->raddr_b = src.index;
132 *mux = V3D_QPU_MUX_B;
133 }
134 }
135 }
136
137 static bool
138 is_no_op_mov(struct qinst *qinst)
139 {
140 static const struct v3d_qpu_sig no_sig = {0};
141
142 /* Make sure it's just a lone MOV. */
143 if (qinst->qpu.type != V3D_QPU_INSTR_TYPE_ALU ||
144 qinst->qpu.alu.mul.op != V3D_QPU_M_MOV ||
145 qinst->qpu.alu.add.op != V3D_QPU_A_NOP ||
146 memcmp(&qinst->qpu.sig, &no_sig, sizeof(no_sig)) != 0) {
147 return false;
148 }
149
150 /* Check if it's a MOV from a register to itself. */
151 enum v3d_qpu_waddr waddr = qinst->qpu.alu.mul.waddr;
152 if (qinst->qpu.alu.mul.magic_write) {
153 if (waddr < V3D_QPU_WADDR_R0 || waddr > V3D_QPU_WADDR_R4)
154 return false;
155
156 if (qinst->qpu.alu.mul.a !=
157 V3D_QPU_MUX_R0 + (waddr - V3D_QPU_WADDR_R0)) {
158 return false;
159 }
160 } else {
161 int raddr;
162
163 switch (qinst->qpu.alu.mul.a) {
164 case V3D_QPU_MUX_A:
165 raddr = qinst->qpu.raddr_a;
166 break;
167 case V3D_QPU_MUX_B:
168 raddr = qinst->qpu.raddr_b;
169 break;
170 default:
171 return false;
172 }
173 if (raddr != waddr)
174 return false;
175 }
176
177 /* No packing or flags updates, or we need to execute the
178 * instruction.
179 */
180 if (qinst->qpu.alu.mul.a_unpack != V3D_QPU_UNPACK_NONE ||
181 qinst->qpu.alu.mul.output_pack != V3D_QPU_PACK_NONE ||
182 qinst->qpu.flags.mc != V3D_QPU_COND_NONE ||
183 qinst->qpu.flags.mpf != V3D_QPU_PF_NONE ||
184 qinst->qpu.flags.muf != V3D_QPU_UF_NONE) {
185 return false;
186 }
187
188 return true;
189 }
190
191 static void
192 v3d_generate_code_block(struct v3d_compile *c,
193 struct qblock *block,
194 struct qpu_reg *temp_registers)
195 {
196 int last_vpm_read_index = -1;
197
198 vir_for_each_inst_safe(qinst, block) {
199 #if 0
200 fprintf(stderr, "translating qinst to qpu: ");
201 vir_dump_inst(c, qinst);
202 fprintf(stderr, "\n");
203 #endif
204
205 struct qinst *temp;
206
207 if (vir_has_uniform(qinst))
208 c->num_uniforms++;
209
210 int nsrc = vir_get_nsrc(qinst);
211 struct qpu_reg src[ARRAY_SIZE(qinst->src)];
212 for (int i = 0; i < nsrc; i++) {
213 int index = qinst->src[i].index;
214 switch (qinst->src[i].file) {
215 case QFILE_REG:
216 src[i] = qpu_reg(qinst->src[i].index);
217 break;
218 case QFILE_MAGIC:
219 src[i] = qpu_magic(qinst->src[i].index);
220 break;
221 case QFILE_NULL:
222 case QFILE_LOAD_IMM:
223 src[i] = qpu_acc(0);
224 break;
225 case QFILE_TEMP:
226 src[i] = temp_registers[index];
227 break;
228 case QFILE_SMALL_IMM:
229 src[i].smimm = true;
230 break;
231
232 case QFILE_VPM:
233 assert((int)qinst->src[i].index >=
234 last_vpm_read_index);
235 (void)last_vpm_read_index;
236 last_vpm_read_index = qinst->src[i].index;
237
238 temp = new_qpu_nop_before(qinst);
239 temp->qpu.sig.ldvpm = true;
240
241 src[i] = qpu_acc(3);
242 break;
243 }
244 }
245
246 struct qpu_reg dst;
247 switch (qinst->dst.file) {
248 case QFILE_NULL:
249 dst = qpu_magic(V3D_QPU_WADDR_NOP);
250 break;
251
252 case QFILE_REG:
253 dst = qpu_reg(qinst->dst.index);
254 break;
255
256 case QFILE_MAGIC:
257 dst = qpu_magic(qinst->dst.index);
258 break;
259
260 case QFILE_TEMP:
261 dst = temp_registers[qinst->dst.index];
262 break;
263
264 case QFILE_VPM:
265 dst = qpu_magic(V3D_QPU_WADDR_VPM);
266 break;
267
268 case QFILE_SMALL_IMM:
269 case QFILE_LOAD_IMM:
270 assert(!"not reached");
271 break;
272 }
273
274 if (qinst->qpu.type == V3D_QPU_INSTR_TYPE_ALU) {
275 if (qinst->qpu.sig.ldunif) {
276 assert(qinst->qpu.alu.add.op == V3D_QPU_A_NOP);
277 assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP);
278
279 if (!dst.magic ||
280 dst.index != V3D_QPU_WADDR_R5) {
281 assert(c->devinfo->ver >= 40);
282
283 qinst->qpu.sig.ldunif = false;
284 qinst->qpu.sig.ldunifrf = true;
285 qinst->qpu.sig_addr = dst.index;
286 qinst->qpu.sig_magic = dst.magic;
287 }
288 } else if (v3d_qpu_sig_writes_address(c->devinfo,
289 &qinst->qpu.sig)) {
290 assert(qinst->qpu.alu.add.op == V3D_QPU_A_NOP);
291 assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP);
292
293 qinst->qpu.sig_addr = dst.index;
294 qinst->qpu.sig_magic = dst.magic;
295 } else if (qinst->qpu.alu.add.op != V3D_QPU_A_NOP) {
296 assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP);
297 if (nsrc >= 1) {
298 set_src(&qinst->qpu,
299 &qinst->qpu.alu.add.a, src[0]);
300 }
301 if (nsrc >= 2) {
302 set_src(&qinst->qpu,
303 &qinst->qpu.alu.add.b, src[1]);
304 }
305
306 qinst->qpu.alu.add.waddr = dst.index;
307 qinst->qpu.alu.add.magic_write = dst.magic;
308 } else {
309 if (nsrc >= 1) {
310 set_src(&qinst->qpu,
311 &qinst->qpu.alu.mul.a, src[0]);
312 }
313 if (nsrc >= 2) {
314 set_src(&qinst->qpu,
315 &qinst->qpu.alu.mul.b, src[1]);
316 }
317
318 qinst->qpu.alu.mul.waddr = dst.index;
319 qinst->qpu.alu.mul.magic_write = dst.magic;
320
321 if (is_no_op_mov(qinst)) {
322 vir_remove_instruction(c, qinst);
323 continue;
324 }
325 }
326 } else {
327 assert(qinst->qpu.type == V3D_QPU_INSTR_TYPE_BRANCH);
328 }
329 }
330 }
331
332 static bool
333 reads_uniform(const struct v3d_device_info *devinfo, uint64_t instruction)
334 {
335 struct v3d_qpu_instr qpu;
336 MAYBE_UNUSED bool ok = v3d_qpu_instr_unpack(devinfo, instruction, &qpu);
337 assert(ok);
338
339 if (qpu.sig.ldunif ||
340 qpu.sig.ldunifrf ||
341 qpu.sig.wrtmuc) {
342 return true;
343 }
344
345 if (qpu.type == V3D_QPU_INSTR_TYPE_BRANCH)
346 return true;
347
348 if (qpu.type == V3D_QPU_INSTR_TYPE_ALU) {
349 if (qpu.alu.add.magic_write &&
350 v3d_qpu_magic_waddr_loads_unif(qpu.alu.add.waddr)) {
351 return true;
352 }
353
354 if (qpu.alu.mul.magic_write &&
355 v3d_qpu_magic_waddr_loads_unif(qpu.alu.mul.waddr)) {
356 return true;
357 }
358 }
359
360 return false;
361 }
362
363 static void
364 v3d_dump_qpu(struct v3d_compile *c)
365 {
366 fprintf(stderr, "%s prog %d/%d QPU:\n",
367 vir_get_stage_name(c),
368 c->program_id, c->variant_id);
369
370 int next_uniform = 0;
371 for (int i = 0; i < c->qpu_inst_count; i++) {
372 const char *str = v3d_qpu_disasm(c->devinfo, c->qpu_insts[i]);
373 fprintf(stderr, "0x%016"PRIx64" %s", c->qpu_insts[i], str);
374
375 /* We can only do this on 4.x, because we're not tracking TMU
376 * implicit uniforms here on 3.x.
377 */
378 if (c->devinfo->ver >= 40 &&
379 reads_uniform(c->devinfo, c->qpu_insts[i])) {
380 fprintf(stderr, " (");
381 vir_dump_uniform(c->uniform_contents[next_uniform],
382 c->uniform_data[next_uniform]);
383 fprintf(stderr, ")");
384 next_uniform++;
385 }
386 fprintf(stderr, "\n");
387 ralloc_free((void *)str);
388 }
389
390 /* Make sure our dumping lined up. */
391 if (c->devinfo->ver >= 40)
392 assert(next_uniform == c->num_uniforms);
393
394 fprintf(stderr, "\n");
395 }
396
397 void
398 v3d_vir_to_qpu(struct v3d_compile *c, struct qpu_reg *temp_registers)
399 {
400 /* Reset the uniform count to how many will be actually loaded by the
401 * generated QPU code.
402 */
403 c->num_uniforms = 0;
404
405 vir_for_each_block(block, c)
406 v3d_generate_code_block(c, block, temp_registers);
407
408 v3d_qpu_schedule_instructions(c);
409
410 c->qpu_insts = rzalloc_array(c, uint64_t, c->qpu_inst_count);
411 int i = 0;
412 vir_for_each_inst_inorder(inst, c) {
413 bool ok = v3d_qpu_instr_pack(c->devinfo, &inst->qpu,
414 &c->qpu_insts[i++]);
415 if (!ok) {
416 fprintf(stderr, "Failed to pack instruction:\n");
417 vir_dump_inst(c, inst);
418 fprintf(stderr, "\n");
419 c->failed = true;
420 return;
421 }
422 }
423 assert(i == c->qpu_inst_count);
424
425 if (V3D_DEBUG & (V3D_DEBUG_QPU |
426 v3d_debug_flag_for_shader_stage(c->s->info.stage))) {
427 v3d_dump_qpu(c);
428 }
429
430 qpu_validate(c);
431
432 free(temp_registers);
433 }