broadcom/vc5: Add support for register spilling.
[mesa.git] / src / broadcom / compiler / vir.c
1 /*
2 * Copyright © 2016-2017 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "broadcom/common/v3d_device_info.h"
25 #include "v3d_compiler.h"
26
27 int
28 vir_get_non_sideband_nsrc(struct qinst *inst)
29 {
30 switch (inst->qpu.type) {
31 case V3D_QPU_INSTR_TYPE_BRANCH:
32 return 0;
33 case V3D_QPU_INSTR_TYPE_ALU:
34 if (inst->qpu.alu.add.op != V3D_QPU_A_NOP)
35 return v3d_qpu_add_op_num_src(inst->qpu.alu.add.op);
36 else
37 return v3d_qpu_mul_op_num_src(inst->qpu.alu.mul.op);
38 }
39
40 return 0;
41 }
42
43 int
44 vir_get_nsrc(struct qinst *inst)
45 {
46 int nsrc = vir_get_non_sideband_nsrc(inst);
47
48 if (vir_has_implicit_uniform(inst))
49 nsrc++;
50
51 return nsrc;
52 }
53
54 bool
55 vir_has_implicit_uniform(struct qinst *inst)
56 {
57 switch (inst->qpu.type) {
58 case V3D_QPU_INSTR_TYPE_BRANCH:
59 return true;
60 case V3D_QPU_INSTR_TYPE_ALU:
61 switch (inst->dst.file) {
62 case QFILE_TLBU:
63 return true;
64 default:
65 return inst->has_implicit_uniform;
66 }
67 }
68 return false;
69 }
70
71 /* The sideband uniform for textures gets stored after the normal ALU
72 * arguments.
73 */
74 int
75 vir_get_implicit_uniform_src(struct qinst *inst)
76 {
77 return vir_get_nsrc(inst) - 1;
78 }
79
80 /**
81 * Returns whether the instruction has any side effects that must be
82 * preserved.
83 */
84 bool
85 vir_has_side_effects(struct v3d_compile *c, struct qinst *inst)
86 {
87 switch (inst->qpu.type) {
88 case V3D_QPU_INSTR_TYPE_BRANCH:
89 return true;
90 case V3D_QPU_INSTR_TYPE_ALU:
91 switch (inst->qpu.alu.add.op) {
92 case V3D_QPU_A_SETREVF:
93 case V3D_QPU_A_SETMSF:
94 case V3D_QPU_A_VPMSETUP:
95 case V3D_QPU_A_STVPMV:
96 case V3D_QPU_A_STVPMD:
97 case V3D_QPU_A_STVPMP:
98 case V3D_QPU_A_VPMWT:
99 return true;
100 default:
101 break;
102 }
103
104 switch (inst->qpu.alu.mul.op) {
105 case V3D_QPU_M_MULTOP:
106 return true;
107 default:
108 break;
109 }
110 }
111
112 if (inst->qpu.sig.ldtmu ||
113 inst->qpu.sig.ldvary ||
114 inst->qpu.sig.wrtmuc ||
115 inst->qpu.sig.thrsw) {
116 return true;
117 }
118
119 return false;
120 }
121
122 bool
123 vir_is_float_input(struct qinst *inst)
124 {
125 /* XXX: More instrs */
126 switch (inst->qpu.type) {
127 case V3D_QPU_INSTR_TYPE_BRANCH:
128 return false;
129 case V3D_QPU_INSTR_TYPE_ALU:
130 switch (inst->qpu.alu.add.op) {
131 case V3D_QPU_A_FADD:
132 case V3D_QPU_A_FSUB:
133 case V3D_QPU_A_FMIN:
134 case V3D_QPU_A_FMAX:
135 case V3D_QPU_A_FTOIN:
136 return true;
137 default:
138 break;
139 }
140
141 switch (inst->qpu.alu.mul.op) {
142 case V3D_QPU_M_FMOV:
143 case V3D_QPU_M_VFMUL:
144 case V3D_QPU_M_FMUL:
145 return true;
146 default:
147 break;
148 }
149 }
150
151 return false;
152 }
153
154 bool
155 vir_is_raw_mov(struct qinst *inst)
156 {
157 if (inst->qpu.type != V3D_QPU_INSTR_TYPE_ALU ||
158 (inst->qpu.alu.mul.op != V3D_QPU_M_FMOV &&
159 inst->qpu.alu.mul.op != V3D_QPU_M_MOV)) {
160 return false;
161 }
162
163 if (inst->qpu.alu.add.output_pack != V3D_QPU_PACK_NONE ||
164 inst->qpu.alu.mul.output_pack != V3D_QPU_PACK_NONE) {
165 return false;
166 }
167
168 if (inst->qpu.flags.ac != V3D_QPU_COND_NONE ||
169 inst->qpu.flags.mc != V3D_QPU_COND_NONE)
170 return false;
171
172 return true;
173 }
174
175 bool
176 vir_is_add(struct qinst *inst)
177 {
178 return (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU &&
179 inst->qpu.alu.add.op != V3D_QPU_A_NOP);
180 }
181
182 bool
183 vir_is_mul(struct qinst *inst)
184 {
185 return (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU &&
186 inst->qpu.alu.mul.op != V3D_QPU_M_NOP);
187 }
188
189 bool
190 vir_is_tex(struct qinst *inst)
191 {
192 if (inst->dst.file == QFILE_MAGIC)
193 return v3d_qpu_magic_waddr_is_tmu(inst->dst.index);
194
195 return false;
196 }
197
198 bool
199 vir_depends_on_flags(struct qinst *inst)
200 {
201 if (inst->qpu.type == V3D_QPU_INSTR_TYPE_BRANCH) {
202 return (inst->qpu.branch.cond != V3D_QPU_BRANCH_COND_ALWAYS);
203 } else {
204 return (inst->qpu.flags.ac != V3D_QPU_COND_NONE &&
205 inst->qpu.flags.mc != V3D_QPU_COND_NONE);
206 }
207 }
208
209 bool
210 vir_writes_r3(const struct v3d_device_info *devinfo, struct qinst *inst)
211 {
212 for (int i = 0; i < vir_get_nsrc(inst); i++) {
213 switch (inst->src[i].file) {
214 case QFILE_VPM:
215 return true;
216 default:
217 break;
218 }
219 }
220
221 if (devinfo->ver < 41 && (inst->qpu.sig.ldvary ||
222 inst->qpu.sig.ldtlb ||
223 inst->qpu.sig.ldtlbu ||
224 inst->qpu.sig.ldvpm)) {
225 return true;
226 }
227
228 return false;
229 }
230
231 bool
232 vir_writes_r4(const struct v3d_device_info *devinfo, struct qinst *inst)
233 {
234 switch (inst->dst.file) {
235 case QFILE_MAGIC:
236 switch (inst->dst.index) {
237 case V3D_QPU_WADDR_RECIP:
238 case V3D_QPU_WADDR_RSQRT:
239 case V3D_QPU_WADDR_EXP:
240 case V3D_QPU_WADDR_LOG:
241 case V3D_QPU_WADDR_SIN:
242 return true;
243 }
244 break;
245 default:
246 break;
247 }
248
249 if (devinfo->ver < 41 && inst->qpu.sig.ldtmu)
250 return true;
251
252 return false;
253 }
254
255 void
256 vir_set_unpack(struct qinst *inst, int src,
257 enum v3d_qpu_input_unpack unpack)
258 {
259 assert(src == 0 || src == 1);
260
261 if (vir_is_add(inst)) {
262 if (src == 0)
263 inst->qpu.alu.add.a_unpack = unpack;
264 else
265 inst->qpu.alu.add.b_unpack = unpack;
266 } else {
267 assert(vir_is_mul(inst));
268 if (src == 0)
269 inst->qpu.alu.mul.a_unpack = unpack;
270 else
271 inst->qpu.alu.mul.b_unpack = unpack;
272 }
273 }
274
275 void
276 vir_set_cond(struct qinst *inst, enum v3d_qpu_cond cond)
277 {
278 if (vir_is_add(inst)) {
279 inst->qpu.flags.ac = cond;
280 } else {
281 assert(vir_is_mul(inst));
282 inst->qpu.flags.mc = cond;
283 }
284 }
285
286 void
287 vir_set_pf(struct qinst *inst, enum v3d_qpu_pf pf)
288 {
289 if (vir_is_add(inst)) {
290 inst->qpu.flags.apf = pf;
291 } else {
292 assert(vir_is_mul(inst));
293 inst->qpu.flags.mpf = pf;
294 }
295 }
296
297 #if 0
298 uint8_t
299 vir_channels_written(struct qinst *inst)
300 {
301 if (vir_is_mul(inst)) {
302 switch (inst->dst.pack) {
303 case QPU_PACK_MUL_NOP:
304 case QPU_PACK_MUL_8888:
305 return 0xf;
306 case QPU_PACK_MUL_8A:
307 return 0x1;
308 case QPU_PACK_MUL_8B:
309 return 0x2;
310 case QPU_PACK_MUL_8C:
311 return 0x4;
312 case QPU_PACK_MUL_8D:
313 return 0x8;
314 }
315 } else {
316 switch (inst->dst.pack) {
317 case QPU_PACK_A_NOP:
318 case QPU_PACK_A_8888:
319 case QPU_PACK_A_8888_SAT:
320 case QPU_PACK_A_32_SAT:
321 return 0xf;
322 case QPU_PACK_A_8A:
323 case QPU_PACK_A_8A_SAT:
324 return 0x1;
325 case QPU_PACK_A_8B:
326 case QPU_PACK_A_8B_SAT:
327 return 0x2;
328 case QPU_PACK_A_8C:
329 case QPU_PACK_A_8C_SAT:
330 return 0x4;
331 case QPU_PACK_A_8D:
332 case QPU_PACK_A_8D_SAT:
333 return 0x8;
334 case QPU_PACK_A_16A:
335 case QPU_PACK_A_16A_SAT:
336 return 0x3;
337 case QPU_PACK_A_16B:
338 case QPU_PACK_A_16B_SAT:
339 return 0xc;
340 }
341 }
342 unreachable("Bad pack field");
343 }
344 #endif
345
346 struct qreg
347 vir_get_temp(struct v3d_compile *c)
348 {
349 struct qreg reg;
350
351 reg.file = QFILE_TEMP;
352 reg.index = c->num_temps++;
353
354 if (c->num_temps > c->defs_array_size) {
355 uint32_t old_size = c->defs_array_size;
356 c->defs_array_size = MAX2(old_size * 2, 16);
357
358 c->defs = reralloc(c, c->defs, struct qinst *,
359 c->defs_array_size);
360 memset(&c->defs[old_size], 0,
361 sizeof(c->defs[0]) * (c->defs_array_size - old_size));
362
363 c->spillable = reralloc(c, c->spillable,
364 BITSET_WORD,
365 BITSET_WORDS(c->defs_array_size));
366 for (int i = old_size; i < c->defs_array_size; i++)
367 BITSET_SET(c->spillable, i);
368 }
369
370 return reg;
371 }
372
373 struct qinst *
374 vir_add_inst(enum v3d_qpu_add_op op, struct qreg dst, struct qreg src0, struct qreg src1)
375 {
376 struct qinst *inst = calloc(1, sizeof(*inst));
377
378 inst->qpu = v3d_qpu_nop();
379 inst->qpu.alu.add.op = op;
380
381 inst->dst = dst;
382 inst->src[0] = src0;
383 inst->src[1] = src1;
384 inst->uniform = ~0;
385
386 return inst;
387 }
388
389 struct qinst *
390 vir_mul_inst(enum v3d_qpu_mul_op op, struct qreg dst, struct qreg src0, struct qreg src1)
391 {
392 struct qinst *inst = calloc(1, sizeof(*inst));
393
394 inst->qpu = v3d_qpu_nop();
395 inst->qpu.alu.mul.op = op;
396
397 inst->dst = dst;
398 inst->src[0] = src0;
399 inst->src[1] = src1;
400 inst->uniform = ~0;
401
402 return inst;
403 }
404
405 struct qinst *
406 vir_branch_inst(enum v3d_qpu_branch_cond cond, struct qreg src)
407 {
408 struct qinst *inst = calloc(1, sizeof(*inst));
409
410 inst->qpu = v3d_qpu_nop();
411 inst->qpu.type = V3D_QPU_INSTR_TYPE_BRANCH;
412 inst->qpu.branch.cond = cond;
413 inst->qpu.branch.msfign = V3D_QPU_MSFIGN_NONE;
414 inst->qpu.branch.bdi = V3D_QPU_BRANCH_DEST_REL;
415 inst->qpu.branch.ub = true;
416 inst->qpu.branch.bdu = V3D_QPU_BRANCH_DEST_REL;
417
418 inst->dst = vir_reg(QFILE_NULL, 0);
419 inst->src[0] = src;
420 inst->uniform = ~0;
421
422 return inst;
423 }
424
425 static void
426 vir_emit(struct v3d_compile *c, struct qinst *inst)
427 {
428 switch (c->cursor.mode) {
429 case vir_cursor_add:
430 list_add(&inst->link, c->cursor.link);
431 break;
432 case vir_cursor_addtail:
433 list_addtail(&inst->link, c->cursor.link);
434 break;
435 }
436
437 c->cursor = vir_after_inst(inst);
438 }
439
440 /* Updates inst to write to a new temporary, emits it, and notes the def. */
441 struct qreg
442 vir_emit_def(struct v3d_compile *c, struct qinst *inst)
443 {
444 assert(inst->dst.file == QFILE_NULL);
445
446 inst->dst = vir_get_temp(c);
447
448 if (inst->dst.file == QFILE_TEMP)
449 c->defs[inst->dst.index] = inst;
450
451 vir_emit(c, inst);
452
453 return inst->dst;
454 }
455
456 struct qinst *
457 vir_emit_nondef(struct v3d_compile *c, struct qinst *inst)
458 {
459 if (inst->dst.file == QFILE_TEMP)
460 c->defs[inst->dst.index] = NULL;
461
462 vir_emit(c, inst);
463
464 return inst;
465 }
466
467 struct qblock *
468 vir_new_block(struct v3d_compile *c)
469 {
470 struct qblock *block = rzalloc(c, struct qblock);
471
472 list_inithead(&block->instructions);
473
474 block->predecessors = _mesa_set_create(block,
475 _mesa_hash_pointer,
476 _mesa_key_pointer_equal);
477
478 block->index = c->next_block_index++;
479
480 return block;
481 }
482
483 void
484 vir_set_emit_block(struct v3d_compile *c, struct qblock *block)
485 {
486 c->cur_block = block;
487 c->cursor = vir_after_block(block);
488 list_addtail(&block->link, &c->blocks);
489 }
490
491 struct qblock *
492 vir_entry_block(struct v3d_compile *c)
493 {
494 return list_first_entry(&c->blocks, struct qblock, link);
495 }
496
497 struct qblock *
498 vir_exit_block(struct v3d_compile *c)
499 {
500 return list_last_entry(&c->blocks, struct qblock, link);
501 }
502
503 void
504 vir_link_blocks(struct qblock *predecessor, struct qblock *successor)
505 {
506 _mesa_set_add(successor->predecessors, predecessor);
507 if (predecessor->successors[0]) {
508 assert(!predecessor->successors[1]);
509 predecessor->successors[1] = successor;
510 } else {
511 predecessor->successors[0] = successor;
512 }
513 }
514
515 const struct v3d_compiler *
516 v3d_compiler_init(const struct v3d_device_info *devinfo)
517 {
518 struct v3d_compiler *compiler = rzalloc(NULL, struct v3d_compiler);
519 if (!compiler)
520 return NULL;
521
522 compiler->devinfo = devinfo;
523
524 if (!vir_init_reg_sets(compiler)) {
525 ralloc_free(compiler);
526 return NULL;
527 }
528
529 return compiler;
530 }
531
532 void
533 v3d_compiler_free(const struct v3d_compiler *compiler)
534 {
535 ralloc_free((void *)compiler);
536 }
537
538 static struct v3d_compile *
539 vir_compile_init(const struct v3d_compiler *compiler,
540 struct v3d_key *key,
541 nir_shader *s,
542 int program_id, int variant_id)
543 {
544 struct v3d_compile *c = rzalloc(NULL, struct v3d_compile);
545
546 c->compiler = compiler;
547 c->devinfo = compiler->devinfo;
548 c->key = key;
549 c->program_id = program_id;
550 c->variant_id = variant_id;
551 c->threads = 4;
552
553 s = nir_shader_clone(c, s);
554 c->s = s;
555
556 list_inithead(&c->blocks);
557 vir_set_emit_block(c, vir_new_block(c));
558
559 c->output_position_index = -1;
560 c->output_point_size_index = -1;
561 c->output_sample_mask_index = -1;
562
563 c->def_ht = _mesa_hash_table_create(c, _mesa_hash_pointer,
564 _mesa_key_pointer_equal);
565
566 return c;
567 }
568
569 static void
570 v3d_lower_nir(struct v3d_compile *c)
571 {
572 struct nir_lower_tex_options tex_options = {
573 .lower_txd = true,
574 .lower_rect = false, /* XXX */
575 .lower_txp = ~0,
576 /* Apply swizzles to all samplers. */
577 .swizzle_result = ~0,
578 };
579
580 /* Lower the format swizzle and (for 32-bit returns)
581 * ARB_texture_swizzle-style swizzle.
582 */
583 for (int i = 0; i < ARRAY_SIZE(c->key->tex); i++) {
584 for (int j = 0; j < 4; j++)
585 tex_options.swizzles[i][j] = c->key->tex[i].swizzle[j];
586
587 if (c->key->tex[i].clamp_s)
588 tex_options.saturate_s |= 1 << i;
589 if (c->key->tex[i].clamp_t)
590 tex_options.saturate_t |= 1 << i;
591 if (c->key->tex[i].clamp_r)
592 tex_options.saturate_r |= 1 << i;
593 }
594
595 NIR_PASS_V(c->s, nir_lower_tex, &tex_options);
596 }
597
598 static void
599 v3d_lower_nir_late(struct v3d_compile *c)
600 {
601 NIR_PASS_V(c->s, v3d_nir_lower_io, c);
602 NIR_PASS_V(c->s, v3d_nir_lower_txf_ms, c);
603 NIR_PASS_V(c->s, nir_lower_idiv);
604 }
605
606 static void
607 v3d_set_prog_data_uniforms(struct v3d_compile *c,
608 struct v3d_prog_data *prog_data)
609 {
610 int count = c->num_uniforms;
611 struct v3d_uniform_list *ulist = &prog_data->uniforms;
612
613 ulist->count = count;
614 ulist->data = ralloc_array(prog_data, uint32_t, count);
615 memcpy(ulist->data, c->uniform_data,
616 count * sizeof(*ulist->data));
617 ulist->contents = ralloc_array(prog_data, enum quniform_contents, count);
618 memcpy(ulist->contents, c->uniform_contents,
619 count * sizeof(*ulist->contents));
620 }
621
622 /* Copy the compiler UBO range state to the compiled shader, dropping out
623 * arrays that were never referenced by an indirect load.
624 *
625 * (Note that QIR dead code elimination of an array access still leaves that
626 * array alive, though)
627 */
628 static void
629 v3d_set_prog_data_ubo(struct v3d_compile *c,
630 struct v3d_prog_data *prog_data)
631 {
632 if (!c->num_ubo_ranges)
633 return;
634
635 prog_data->num_ubo_ranges = 0;
636 prog_data->ubo_ranges = ralloc_array(prog_data, struct v3d_ubo_range,
637 c->num_ubo_ranges);
638 for (int i = 0; i < c->num_ubo_ranges; i++) {
639 if (!c->ubo_range_used[i])
640 continue;
641
642 struct v3d_ubo_range *range = &c->ubo_ranges[i];
643 prog_data->ubo_ranges[prog_data->num_ubo_ranges++] = *range;
644 prog_data->ubo_size += range->size;
645 }
646
647 if (prog_data->ubo_size) {
648 if (V3D_DEBUG & V3D_DEBUG_SHADERDB) {
649 fprintf(stderr, "SHADER-DB: %s prog %d/%d: %d UBO uniforms\n",
650 vir_get_stage_name(c),
651 c->program_id, c->variant_id,
652 prog_data->ubo_size / 4);
653 }
654 }
655 }
656
657 static void
658 v3d_set_prog_data(struct v3d_compile *c,
659 struct v3d_prog_data *prog_data)
660 {
661 prog_data->threads = c->threads;
662 prog_data->single_seg = !c->last_thrsw;
663 prog_data->spill_size = c->spill_size;
664
665 v3d_set_prog_data_uniforms(c, prog_data);
666 v3d_set_prog_data_ubo(c, prog_data);
667 }
668
669 static uint64_t *
670 v3d_return_qpu_insts(struct v3d_compile *c, uint32_t *final_assembly_size)
671 {
672 *final_assembly_size = c->qpu_inst_count * sizeof(uint64_t);
673
674 uint64_t *qpu_insts = malloc(*final_assembly_size);
675 if (!qpu_insts)
676 return NULL;
677
678 memcpy(qpu_insts, c->qpu_insts, *final_assembly_size);
679
680 vir_compile_destroy(c);
681
682 return qpu_insts;
683 }
684
685 uint64_t *v3d_compile_vs(const struct v3d_compiler *compiler,
686 struct v3d_vs_key *key,
687 struct v3d_vs_prog_data *prog_data,
688 nir_shader *s,
689 int program_id, int variant_id,
690 uint32_t *final_assembly_size)
691 {
692 struct v3d_compile *c = vir_compile_init(compiler, &key->base, s,
693 program_id, variant_id);
694
695 c->vs_key = key;
696
697 v3d_lower_nir(c);
698
699 if (key->clamp_color)
700 NIR_PASS_V(c->s, nir_lower_clamp_color_outputs);
701
702 if (key->base.ucp_enables) {
703 NIR_PASS_V(c->s, nir_lower_clip_vs, key->base.ucp_enables);
704 NIR_PASS_V(c->s, nir_lower_io_to_scalar,
705 nir_var_shader_out);
706 }
707
708 /* Note: VS output scalarizing must happen after nir_lower_clip_vs. */
709 NIR_PASS_V(c->s, nir_lower_io_to_scalar, nir_var_shader_out);
710
711 v3d_lower_nir_late(c);
712 v3d_optimize_nir(c->s);
713 NIR_PASS_V(c->s, nir_convert_from_ssa, true);
714
715 v3d_nir_to_vir(c);
716
717 v3d_set_prog_data(c, &prog_data->base);
718
719 prog_data->base.num_inputs = c->num_inputs;
720
721 /* The vertex data gets format converted by the VPM so that
722 * each attribute channel takes up a VPM column. Precompute
723 * the sizes for the shader record.
724 */
725 for (int i = 0; i < ARRAY_SIZE(prog_data->vattr_sizes); i++) {
726 prog_data->vattr_sizes[i] = c->vattr_sizes[i];
727 prog_data->vpm_input_size += c->vattr_sizes[i];
728 }
729
730 /* Input/output segment size are in 8x32-bit multiples. */
731 prog_data->vpm_input_size = align(prog_data->vpm_input_size, 8) / 8;
732 prog_data->vpm_output_size = align(c->num_vpm_writes, 8) / 8;
733
734 prog_data->uses_vid = (s->info.system_values_read &
735 (1ull << SYSTEM_VALUE_VERTEX_ID));
736 prog_data->uses_iid = (s->info.system_values_read &
737 (1ull << SYSTEM_VALUE_INSTANCE_ID));
738
739 return v3d_return_qpu_insts(c, final_assembly_size);
740 }
741
742 static void
743 v3d_set_fs_prog_data_inputs(struct v3d_compile *c,
744 struct v3d_fs_prog_data *prog_data)
745 {
746 prog_data->base.num_inputs = c->num_inputs;
747 memcpy(prog_data->input_slots, c->input_slots,
748 c->num_inputs * sizeof(*c->input_slots));
749
750 STATIC_ASSERT(ARRAY_SIZE(prog_data->flat_shade_flags) >
751 (V3D_MAX_FS_INPUTS - 1) / 24);
752 for (int i = 0; i < V3D_MAX_FS_INPUTS; i++) {
753 if (BITSET_TEST(c->flat_shade_flags, i))
754 prog_data->flat_shade_flags[i / 24] |= 1 << (i % 24);
755 }
756 }
757
758 uint64_t *v3d_compile_fs(const struct v3d_compiler *compiler,
759 struct v3d_fs_key *key,
760 struct v3d_fs_prog_data *prog_data,
761 nir_shader *s,
762 int program_id, int variant_id,
763 uint32_t *final_assembly_size)
764 {
765 struct v3d_compile *c = vir_compile_init(compiler, &key->base, s,
766 program_id, variant_id);
767
768 c->fs_key = key;
769
770 v3d_lower_nir(c);
771
772 if (key->light_twoside)
773 NIR_PASS_V(c->s, nir_lower_two_sided_color);
774
775 if (key->clamp_color)
776 NIR_PASS_V(c->s, nir_lower_clamp_color_outputs);
777
778 if (key->alpha_test) {
779 NIR_PASS_V(c->s, nir_lower_alpha_test, key->alpha_test_func,
780 false);
781 }
782
783 if (key->base.ucp_enables)
784 NIR_PASS_V(c->s, nir_lower_clip_fs, key->base.ucp_enables);
785
786 /* Note: FS input scalarizing must happen after
787 * nir_lower_two_sided_color, which only handles a vec4 at a time.
788 */
789 NIR_PASS_V(c->s, nir_lower_io_to_scalar, nir_var_shader_in);
790
791 v3d_lower_nir_late(c);
792 v3d_optimize_nir(c->s);
793 NIR_PASS_V(c->s, nir_convert_from_ssa, true);
794
795 v3d_nir_to_vir(c);
796
797 v3d_set_prog_data(c, &prog_data->base);
798 v3d_set_fs_prog_data_inputs(c, prog_data);
799 prog_data->writes_z = (c->s->info.outputs_written &
800 (1 << FRAG_RESULT_DEPTH));
801 prog_data->discard = c->s->info.fs.uses_discard;
802
803 return v3d_return_qpu_insts(c, final_assembly_size);
804 }
805
806 void
807 vir_remove_instruction(struct v3d_compile *c, struct qinst *qinst)
808 {
809 if (qinst->dst.file == QFILE_TEMP)
810 c->defs[qinst->dst.index] = NULL;
811
812 assert(&qinst->link != c->cursor.link);
813
814 list_del(&qinst->link);
815 free(qinst);
816 }
817
818 struct qreg
819 vir_follow_movs(struct v3d_compile *c, struct qreg reg)
820 {
821 /* XXX
822 int pack = reg.pack;
823
824 while (reg.file == QFILE_TEMP &&
825 c->defs[reg.index] &&
826 (c->defs[reg.index]->op == QOP_MOV ||
827 c->defs[reg.index]->op == QOP_FMOV) &&
828 !c->defs[reg.index]->dst.pack &&
829 !c->defs[reg.index]->src[0].pack) {
830 reg = c->defs[reg.index]->src[0];
831 }
832
833 reg.pack = pack;
834 */
835 return reg;
836 }
837
838 void
839 vir_compile_destroy(struct v3d_compile *c)
840 {
841 /* Defuse the assert that we aren't removing the cursor's instruction.
842 */
843 c->cursor.link = NULL;
844
845 vir_for_each_block(block, c) {
846 while (!list_empty(&block->instructions)) {
847 struct qinst *qinst =
848 list_first_entry(&block->instructions,
849 struct qinst, link);
850 vir_remove_instruction(c, qinst);
851 }
852 }
853
854 ralloc_free(c);
855 }
856
857 struct qreg
858 vir_uniform(struct v3d_compile *c,
859 enum quniform_contents contents,
860 uint32_t data)
861 {
862 for (int i = 0; i < c->num_uniforms; i++) {
863 if (c->uniform_contents[i] == contents &&
864 c->uniform_data[i] == data) {
865 return vir_reg(QFILE_UNIF, i);
866 }
867 }
868
869 uint32_t uniform = c->num_uniforms++;
870
871 if (uniform >= c->uniform_array_size) {
872 c->uniform_array_size = MAX2(MAX2(16, uniform + 1),
873 c->uniform_array_size * 2);
874
875 c->uniform_data = reralloc(c, c->uniform_data,
876 uint32_t,
877 c->uniform_array_size);
878 c->uniform_contents = reralloc(c, c->uniform_contents,
879 enum quniform_contents,
880 c->uniform_array_size);
881 }
882
883 c->uniform_contents[uniform] = contents;
884 c->uniform_data[uniform] = data;
885
886 return vir_reg(QFILE_UNIF, uniform);
887 }
888
889 void
890 vir_PF(struct v3d_compile *c, struct qreg src, enum v3d_qpu_pf pf)
891 {
892 struct qinst *last_inst = NULL;
893
894 if (!list_empty(&c->cur_block->instructions)) {
895 last_inst = (struct qinst *)c->cur_block->instructions.prev;
896
897 /* Can't stuff the PF into the last last inst if our cursor
898 * isn't pointing after it.
899 */
900 struct vir_cursor after_inst = vir_after_inst(last_inst);
901 if (c->cursor.mode != after_inst.mode ||
902 c->cursor.link != after_inst.link)
903 last_inst = NULL;
904 }
905
906 if (src.file != QFILE_TEMP ||
907 !c->defs[src.index] ||
908 last_inst != c->defs[src.index]) {
909 /* XXX: Make the MOV be the appropriate type */
910 last_inst = vir_MOV_dest(c, vir_reg(QFILE_NULL, 0), src);
911 }
912
913 vir_set_pf(last_inst, pf);
914 }
915
916 #define OPTPASS(func) \
917 do { \
918 bool stage_progress = func(c); \
919 if (stage_progress) { \
920 progress = true; \
921 if (print_opt_debug) { \
922 fprintf(stderr, \
923 "VIR opt pass %2d: %s progress\n", \
924 pass, #func); \
925 } \
926 /*XXX vir_validate(c);*/ \
927 } \
928 } while (0)
929
930 void
931 vir_optimize(struct v3d_compile *c)
932 {
933 bool print_opt_debug = false;
934 int pass = 1;
935
936 while (true) {
937 bool progress = false;
938
939 OPTPASS(vir_opt_copy_propagate);
940 OPTPASS(vir_opt_dead_code);
941
942 if (!progress)
943 break;
944
945 pass++;
946 }
947 }
948
949 const char *
950 vir_get_stage_name(struct v3d_compile *c)
951 {
952 if (c->vs_key && c->vs_key->is_coord)
953 return "MESA_SHADER_COORD";
954 else
955 return gl_shader_stage_name(c->s->info.stage);
956 }