broadcom/vc5: Add support for loading varyings in V3D 4.1.
[mesa.git] / src / broadcom / compiler / vir_to_qpu.c
1 /*
2 * Copyright © 2016 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "compiler/v3d_compiler.h"
25 #include "qpu/qpu_instr.h"
26 #include "qpu/qpu_disasm.h"
27
28 static inline struct qpu_reg
29 qpu_reg(int index)
30 {
31 struct qpu_reg reg = {
32 .magic = false,
33 .index = index,
34 };
35 return reg;
36 }
37
38 static inline struct qpu_reg
39 qpu_magic(enum v3d_qpu_waddr waddr)
40 {
41 struct qpu_reg reg = {
42 .magic = true,
43 .index = waddr,
44 };
45 return reg;
46 }
47
48 static inline struct qpu_reg
49 qpu_acc(int acc)
50 {
51 return qpu_magic(V3D_QPU_WADDR_R0 + acc);
52 }
53
54 struct v3d_qpu_instr
55 v3d_qpu_nop(void)
56 {
57 struct v3d_qpu_instr instr = {
58 .type = V3D_QPU_INSTR_TYPE_ALU,
59 .alu = {
60 .add = {
61 .op = V3D_QPU_A_NOP,
62 .waddr = V3D_QPU_WADDR_NOP,
63 .magic_write = true,
64 },
65 .mul = {
66 .op = V3D_QPU_M_NOP,
67 .waddr = V3D_QPU_WADDR_NOP,
68 .magic_write = true,
69 },
70 }
71 };
72
73 return instr;
74 }
75
76 static struct qinst *
77 vir_nop(void)
78 {
79 struct qreg undef = { QFILE_NULL, 0 };
80 struct qinst *qinst = vir_add_inst(V3D_QPU_A_NOP, undef, undef, undef);
81
82 return qinst;
83 }
84
85 static struct qinst *
86 new_qpu_nop_before(struct qinst *inst)
87 {
88 struct qinst *q = vir_nop();
89
90 list_addtail(&q->link, &inst->link);
91
92 return q;
93 }
94
95 static void
96 new_ldunif_instr(struct qinst *inst, int i)
97 {
98 struct qinst *ldunif = new_qpu_nop_before(inst);
99
100 ldunif->qpu.sig.ldunif = true;
101 assert(inst->src[i].file == QFILE_UNIF);
102 ldunif->uniform = inst->src[i].index;
103 }
104
105 /**
106 * Allocates the src register (accumulator or register file) into the RADDR
107 * fields of the instruction.
108 */
109 static void
110 set_src(struct v3d_qpu_instr *instr, enum v3d_qpu_mux *mux, struct qpu_reg src)
111 {
112 if (src.magic) {
113 assert(src.index >= V3D_QPU_WADDR_R0 &&
114 src.index <= V3D_QPU_WADDR_R5);
115 *mux = src.index - V3D_QPU_WADDR_R0 + V3D_QPU_MUX_R0;
116 return;
117 }
118
119 if (instr->alu.add.a != V3D_QPU_MUX_A &&
120 instr->alu.add.b != V3D_QPU_MUX_A &&
121 instr->alu.mul.a != V3D_QPU_MUX_A &&
122 instr->alu.mul.b != V3D_QPU_MUX_A) {
123 instr->raddr_a = src.index;
124 *mux = V3D_QPU_MUX_A;
125 } else {
126 if (instr->raddr_a == src.index) {
127 *mux = V3D_QPU_MUX_A;
128 } else {
129 assert(!(instr->alu.add.a == V3D_QPU_MUX_B &&
130 instr->alu.add.b == V3D_QPU_MUX_B &&
131 instr->alu.mul.a == V3D_QPU_MUX_B &&
132 instr->alu.mul.b == V3D_QPU_MUX_B) ||
133 src.index == instr->raddr_b);
134
135 instr->raddr_b = src.index;
136 *mux = V3D_QPU_MUX_B;
137 }
138 }
139 }
140
141 static void
142 v3d_generate_code_block(struct v3d_compile *c,
143 struct qblock *block,
144 struct qpu_reg *temp_registers)
145 {
146 int last_vpm_read_index = -1;
147
148 vir_for_each_inst(qinst, block) {
149 #if 0
150 fprintf(stderr, "translating qinst to qpu: ");
151 vir_dump_inst(c, qinst);
152 fprintf(stderr, "\n");
153 #endif
154
155 struct qinst *temp;
156
157 if (vir_has_implicit_uniform(qinst)) {
158 int src = vir_get_implicit_uniform_src(qinst);
159 assert(qinst->src[src].file == QFILE_UNIF);
160 qinst->uniform = qinst->src[src].index;
161 c->num_uniforms++;
162 }
163
164 int nsrc = vir_get_non_sideband_nsrc(qinst);
165 struct qpu_reg src[ARRAY_SIZE(qinst->src)];
166 bool emitted_ldunif = false;
167 for (int i = 0; i < nsrc; i++) {
168 int index = qinst->src[i].index;
169 switch (qinst->src[i].file) {
170 case QFILE_REG:
171 src[i] = qpu_reg(qinst->src[i].index);
172 break;
173 case QFILE_MAGIC:
174 src[i] = qpu_magic(qinst->src[i].index);
175 break;
176 case QFILE_NULL:
177 case QFILE_LOAD_IMM:
178 src[i] = qpu_acc(0);
179 break;
180 case QFILE_TEMP:
181 src[i] = temp_registers[index];
182 break;
183 case QFILE_UNIF:
184 if (!emitted_ldunif) {
185 new_ldunif_instr(qinst, i);
186 c->num_uniforms++;
187 emitted_ldunif = true;
188 }
189
190 src[i] = qpu_acc(5);
191 break;
192 case QFILE_SMALL_IMM:
193 abort(); /* XXX */
194 #if 0
195 src[i].mux = QPU_MUX_SMALL_IMM;
196 src[i].addr = qpu_encode_small_immediate(qinst->src[i].index);
197 /* This should only have returned a valid
198 * small immediate field, not ~0 for failure.
199 */
200 assert(src[i].addr <= 47);
201 #endif
202 break;
203
204 case QFILE_VPM:
205 assert((int)qinst->src[i].index >=
206 last_vpm_read_index);
207 (void)last_vpm_read_index;
208 last_vpm_read_index = qinst->src[i].index;
209
210 temp = new_qpu_nop_before(qinst);
211 temp->qpu.sig.ldvpm = true;
212
213 src[i] = qpu_acc(3);
214 break;
215
216 case QFILE_TLB:
217 case QFILE_TLBU:
218 unreachable("bad vir src file");
219 }
220 }
221
222 struct qpu_reg dst;
223 switch (qinst->dst.file) {
224 case QFILE_NULL:
225 dst = qpu_magic(V3D_QPU_WADDR_NOP);
226 break;
227
228 case QFILE_REG:
229 dst = qpu_reg(qinst->dst.index);
230 break;
231
232 case QFILE_MAGIC:
233 dst = qpu_magic(qinst->dst.index);
234 break;
235
236 case QFILE_TEMP:
237 dst = temp_registers[qinst->dst.index];
238 break;
239
240 case QFILE_VPM:
241 dst = qpu_magic(V3D_QPU_WADDR_VPM);
242 break;
243
244 case QFILE_TLB:
245 dst = qpu_magic(V3D_QPU_WADDR_TLB);
246 break;
247
248 case QFILE_TLBU:
249 dst = qpu_magic(V3D_QPU_WADDR_TLBU);
250 break;
251
252 case QFILE_UNIF:
253 case QFILE_SMALL_IMM:
254 case QFILE_LOAD_IMM:
255 assert(!"not reached");
256 break;
257 }
258
259 if (qinst->qpu.type == V3D_QPU_INSTR_TYPE_ALU) {
260 if (v3d_qpu_sig_writes_address(c->devinfo,
261 &qinst->qpu.sig)) {
262 assert(qinst->qpu.alu.add.op == V3D_QPU_A_NOP);
263 assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP);
264
265 qinst->qpu.sig_addr = dst.index;
266 qinst->qpu.sig_magic = dst.magic;
267 } else if (qinst->qpu.alu.add.op != V3D_QPU_A_NOP) {
268 assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP);
269 if (nsrc >= 1) {
270 set_src(&qinst->qpu,
271 &qinst->qpu.alu.add.a, src[0]);
272 }
273 if (nsrc >= 2) {
274 set_src(&qinst->qpu,
275 &qinst->qpu.alu.add.b, src[1]);
276 }
277
278 qinst->qpu.alu.add.waddr = dst.index;
279 qinst->qpu.alu.add.magic_write = dst.magic;
280 } else {
281 if (nsrc >= 1) {
282 set_src(&qinst->qpu,
283 &qinst->qpu.alu.mul.a, src[0]);
284 }
285 if (nsrc >= 2) {
286 set_src(&qinst->qpu,
287 &qinst->qpu.alu.mul.b, src[1]);
288 }
289
290 qinst->qpu.alu.mul.waddr = dst.index;
291 qinst->qpu.alu.mul.magic_write = dst.magic;
292 }
293 } else {
294 assert(qinst->qpu.type == V3D_QPU_INSTR_TYPE_BRANCH);
295 }
296 }
297 }
298
299
300 static void
301 v3d_dump_qpu(struct v3d_compile *c)
302 {
303 fprintf(stderr, "%s prog %d/%d QPU:\n",
304 vir_get_stage_name(c),
305 c->program_id, c->variant_id);
306
307 for (int i = 0; i < c->qpu_inst_count; i++) {
308 const char *str = v3d_qpu_disasm(c->devinfo, c->qpu_insts[i]);
309 fprintf(stderr, "0x%016"PRIx64" %s\n", c->qpu_insts[i], str);
310 }
311 fprintf(stderr, "\n");
312 }
313
314 void
315 v3d_vir_to_qpu(struct v3d_compile *c, struct qpu_reg *temp_registers)
316 {
317 /* Reset the uniform count to how many will be actually loaded by the
318 * generated QPU code.
319 */
320 c->num_uniforms = 0;
321
322 vir_for_each_block(block, c)
323 v3d_generate_code_block(c, block, temp_registers);
324
325 uint32_t cycles = v3d_qpu_schedule_instructions(c);
326
327 c->qpu_insts = rzalloc_array(c, uint64_t, c->qpu_inst_count);
328 int i = 0;
329 vir_for_each_inst_inorder(inst, c) {
330 bool ok = v3d_qpu_instr_pack(c->devinfo, &inst->qpu,
331 &c->qpu_insts[i++]);
332 assert(ok); (void) ok;
333 }
334 assert(i == c->qpu_inst_count);
335
336 if (V3D_DEBUG & V3D_DEBUG_SHADERDB) {
337 fprintf(stderr, "SHADER-DB: %s prog %d/%d: %d estimated cycles\n",
338 vir_get_stage_name(c),
339 c->program_id, c->variant_id,
340 cycles);
341 }
342
343 if (V3D_DEBUG & (V3D_DEBUG_QPU |
344 v3d_debug_flag_for_shader_stage(c->s->info.stage))) {
345 v3d_dump_qpu(c);
346 }
347
348 qpu_validate(c);
349
350 free(temp_registers);
351 }