broadcom/vc5: Use THRSW to enable multi-threaded shaders.
[mesa.git] / src / broadcom / compiler / vir_to_qpu.c
1 /*
2 * Copyright © 2016 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "compiler/v3d_compiler.h"
25 #include "qpu/qpu_instr.h"
26 #include "qpu/qpu_disasm.h"
27
28 static inline struct qpu_reg
29 qpu_reg(int index)
30 {
31 struct qpu_reg reg = {
32 .magic = false,
33 .index = index,
34 };
35 return reg;
36 }
37
38 static inline struct qpu_reg
39 qpu_magic(enum v3d_qpu_waddr waddr)
40 {
41 struct qpu_reg reg = {
42 .magic = true,
43 .index = waddr,
44 };
45 return reg;
46 }
47
48 static inline struct qpu_reg
49 qpu_acc(int acc)
50 {
51 return qpu_magic(V3D_QPU_WADDR_R0 + acc);
52 }
53
54 struct v3d_qpu_instr
55 v3d_qpu_nop(void)
56 {
57 struct v3d_qpu_instr instr = {
58 .type = V3D_QPU_INSTR_TYPE_ALU,
59 .alu = {
60 .add = {
61 .op = V3D_QPU_A_NOP,
62 .waddr = V3D_QPU_WADDR_NOP,
63 .magic_write = true,
64 },
65 .mul = {
66 .op = V3D_QPU_M_NOP,
67 .waddr = V3D_QPU_WADDR_NOP,
68 .magic_write = true,
69 },
70 }
71 };
72
73 return instr;
74 }
75
76 static struct qinst *
77 vir_nop(void)
78 {
79 struct qreg undef = { QFILE_NULL, 0 };
80 struct qinst *qinst = vir_add_inst(V3D_QPU_A_NOP, undef, undef, undef);
81
82 return qinst;
83 }
84
85 static struct qinst *
86 new_qpu_nop_before(struct qinst *inst)
87 {
88 struct qinst *q = vir_nop();
89
90 list_addtail(&q->link, &inst->link);
91
92 return q;
93 }
94
95 static void
96 new_ldunif_instr(struct qinst *inst, int i)
97 {
98 struct qinst *ldunif = new_qpu_nop_before(inst);
99
100 ldunif->qpu.sig.ldunif = true;
101 assert(inst->src[i].file == QFILE_UNIF);
102 ldunif->uniform = inst->src[i].index;
103 }
104
105 /**
106 * Allocates the src register (accumulator or register file) into the RADDR
107 * fields of the instruction.
108 */
109 static void
110 set_src(struct v3d_qpu_instr *instr, enum v3d_qpu_mux *mux, struct qpu_reg src)
111 {
112 if (src.magic) {
113 assert(src.index >= V3D_QPU_WADDR_R0 &&
114 src.index <= V3D_QPU_WADDR_R5);
115 *mux = src.index - V3D_QPU_WADDR_R0 + V3D_QPU_MUX_R0;
116 return;
117 }
118
119 if (instr->alu.add.a != V3D_QPU_MUX_A &&
120 instr->alu.add.b != V3D_QPU_MUX_A &&
121 instr->alu.mul.a != V3D_QPU_MUX_A &&
122 instr->alu.mul.b != V3D_QPU_MUX_A) {
123 instr->raddr_a = src.index;
124 *mux = V3D_QPU_MUX_A;
125 } else {
126 if (instr->raddr_a == src.index) {
127 *mux = V3D_QPU_MUX_A;
128 } else {
129 assert(!(instr->alu.add.a == V3D_QPU_MUX_B &&
130 instr->alu.add.b == V3D_QPU_MUX_B &&
131 instr->alu.mul.a == V3D_QPU_MUX_B &&
132 instr->alu.mul.b == V3D_QPU_MUX_B) ||
133 src.index == instr->raddr_b);
134
135 instr->raddr_b = src.index;
136 *mux = V3D_QPU_MUX_B;
137 }
138 }
139 }
140
141 static void
142 v3d_generate_code_block(struct v3d_compile *c,
143 struct qblock *block,
144 struct qpu_reg *temp_registers)
145 {
146 int last_vpm_read_index = -1;
147
148 vir_for_each_inst(qinst, block) {
149 #if 0
150 fprintf(stderr, "translating qinst to qpu: ");
151 vir_dump_inst(c, qinst);
152 fprintf(stderr, "\n");
153 #endif
154
155 struct qinst *temp;
156
157 if (vir_has_implicit_uniform(qinst)) {
158 int src = vir_get_implicit_uniform_src(qinst);
159 assert(qinst->src[src].file == QFILE_UNIF);
160 qinst->uniform = qinst->src[src].index;
161 c->num_uniforms++;
162 }
163
164 int nsrc = vir_get_non_sideband_nsrc(qinst);
165 struct qpu_reg src[ARRAY_SIZE(qinst->src)];
166 bool emitted_ldunif = false;
167 for (int i = 0; i < nsrc; i++) {
168 int index = qinst->src[i].index;
169 switch (qinst->src[i].file) {
170 case QFILE_REG:
171 src[i] = qpu_reg(qinst->src[i].index);
172 break;
173 case QFILE_MAGIC:
174 src[i] = qpu_magic(qinst->src[i].index);
175 break;
176 case QFILE_NULL:
177 case QFILE_LOAD_IMM:
178 src[i] = qpu_acc(0);
179 break;
180 case QFILE_TEMP:
181 src[i] = temp_registers[index];
182 break;
183 case QFILE_UNIF:
184 if (!emitted_ldunif) {
185 new_ldunif_instr(qinst, i);
186 c->num_uniforms++;
187 emitted_ldunif = true;
188 }
189
190 src[i] = qpu_acc(5);
191 break;
192 case QFILE_VARY:
193 temp = new_qpu_nop_before(qinst);
194 temp->qpu.sig.ldvary = true;
195
196 src[i] = qpu_acc(3);
197 break;
198 case QFILE_SMALL_IMM:
199 abort(); /* XXX */
200 #if 0
201 src[i].mux = QPU_MUX_SMALL_IMM;
202 src[i].addr = qpu_encode_small_immediate(qinst->src[i].index);
203 /* This should only have returned a valid
204 * small immediate field, not ~0 for failure.
205 */
206 assert(src[i].addr <= 47);
207 #endif
208 break;
209
210 case QFILE_VPM:
211 assert((int)qinst->src[i].index >=
212 last_vpm_read_index);
213 (void)last_vpm_read_index;
214 last_vpm_read_index = qinst->src[i].index;
215
216 temp = new_qpu_nop_before(qinst);
217 temp->qpu.sig.ldvpm = true;
218
219 src[i] = qpu_acc(3);
220 break;
221
222 case QFILE_TLB:
223 case QFILE_TLBU:
224 unreachable("bad vir src file");
225 }
226 }
227
228 struct qpu_reg dst;
229 switch (qinst->dst.file) {
230 case QFILE_NULL:
231 dst = qpu_magic(V3D_QPU_WADDR_NOP);
232 break;
233
234 case QFILE_REG:
235 dst = qpu_reg(qinst->dst.index);
236 break;
237
238 case QFILE_MAGIC:
239 dst = qpu_magic(qinst->dst.index);
240 break;
241
242 case QFILE_TEMP:
243 dst = temp_registers[qinst->dst.index];
244 break;
245
246 case QFILE_VPM:
247 dst = qpu_magic(V3D_QPU_WADDR_VPM);
248 break;
249
250 case QFILE_TLB:
251 dst = qpu_magic(V3D_QPU_WADDR_TLB);
252 break;
253
254 case QFILE_TLBU:
255 dst = qpu_magic(V3D_QPU_WADDR_TLBU);
256 break;
257
258 case QFILE_VARY:
259 case QFILE_UNIF:
260 case QFILE_SMALL_IMM:
261 case QFILE_LOAD_IMM:
262 assert(!"not reached");
263 break;
264 }
265
266 if (qinst->qpu.type == V3D_QPU_INSTR_TYPE_ALU) {
267 if (v3d_qpu_sig_writes_address(c->devinfo,
268 &qinst->qpu.sig)) {
269 assert(qinst->qpu.alu.add.op == V3D_QPU_A_NOP);
270 assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP);
271
272 qinst->qpu.sig_addr = dst.index;
273 qinst->qpu.sig_magic = dst.magic;
274 } else if (qinst->qpu.alu.add.op != V3D_QPU_A_NOP) {
275 assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP);
276 if (nsrc >= 1) {
277 set_src(&qinst->qpu,
278 &qinst->qpu.alu.add.a, src[0]);
279 }
280 if (nsrc >= 2) {
281 set_src(&qinst->qpu,
282 &qinst->qpu.alu.add.b, src[1]);
283 }
284
285 qinst->qpu.alu.add.waddr = dst.index;
286 qinst->qpu.alu.add.magic_write = dst.magic;
287 } else {
288 if (nsrc >= 1) {
289 set_src(&qinst->qpu,
290 &qinst->qpu.alu.mul.a, src[0]);
291 }
292 if (nsrc >= 2) {
293 set_src(&qinst->qpu,
294 &qinst->qpu.alu.mul.b, src[1]);
295 }
296
297 qinst->qpu.alu.mul.waddr = dst.index;
298 qinst->qpu.alu.mul.magic_write = dst.magic;
299 }
300 } else {
301 assert(qinst->qpu.type == V3D_QPU_INSTR_TYPE_BRANCH);
302 }
303 }
304 }
305
306
307 static void
308 v3d_dump_qpu(struct v3d_compile *c)
309 {
310 fprintf(stderr, "%s prog %d/%d QPU:\n",
311 vir_get_stage_name(c),
312 c->program_id, c->variant_id);
313
314 for (int i = 0; i < c->qpu_inst_count; i++) {
315 const char *str = v3d_qpu_disasm(c->devinfo, c->qpu_insts[i]);
316 fprintf(stderr, "0x%016"PRIx64" %s\n", c->qpu_insts[i], str);
317 }
318 fprintf(stderr, "\n");
319 }
320
321 void
322 v3d_vir_to_qpu(struct v3d_compile *c, struct qpu_reg *temp_registers)
323 {
324 /* Reset the uniform count to how many will be actually loaded by the
325 * generated QPU code.
326 */
327 c->num_uniforms = 0;
328
329 vir_for_each_block(block, c)
330 v3d_generate_code_block(c, block, temp_registers);
331
332 uint32_t cycles = v3d_qpu_schedule_instructions(c);
333
334 c->qpu_insts = rzalloc_array(c, uint64_t, c->qpu_inst_count);
335 int i = 0;
336 vir_for_each_inst_inorder(inst, c) {
337 bool ok = v3d_qpu_instr_pack(c->devinfo, &inst->qpu,
338 &c->qpu_insts[i++]);
339 assert(ok); (void) ok;
340 }
341 assert(i == c->qpu_inst_count);
342
343 if (V3D_DEBUG & V3D_DEBUG_SHADERDB) {
344 fprintf(stderr, "SHADER-DB: %s prog %d/%d: %d estimated cycles\n",
345 vir_get_stage_name(c),
346 c->program_id, c->variant_id,
347 cycles);
348 }
349
350 if (V3D_DEBUG & (V3D_DEBUG_QPU |
351 v3d_debug_flag_for_shader_stage(c->s->info.stage))) {
352 v3d_dump_qpu(c);
353 }
354
355 qpu_validate(c);
356
357 free(temp_registers);
358 }