4e78a477bd7d430cc86b24304125056f094dc4b3
[mesa.git] / src / broadcom / compiler / vir.c
1 /*
2 * Copyright © 2016-2017 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "v3d_compiler.h"
25
26 int
27 vir_get_non_sideband_nsrc(struct qinst *inst)
28 {
29 switch (inst->qpu.type) {
30 case V3D_QPU_INSTR_TYPE_BRANCH:
31 return 0;
32 case V3D_QPU_INSTR_TYPE_ALU:
33 if (inst->qpu.alu.add.op != V3D_QPU_A_NOP)
34 return v3d_qpu_add_op_num_src(inst->qpu.alu.add.op);
35 else
36 return v3d_qpu_mul_op_num_src(inst->qpu.alu.mul.op);
37 }
38
39 return 0;
40 }
41
42 int
43 vir_get_nsrc(struct qinst *inst)
44 {
45 int nsrc = vir_get_non_sideband_nsrc(inst);
46
47 if (vir_has_implicit_uniform(inst))
48 nsrc++;
49
50 return nsrc;
51 }
52
53 bool
54 vir_has_implicit_uniform(struct qinst *inst)
55 {
56 switch (inst->qpu.type) {
57 case V3D_QPU_INSTR_TYPE_BRANCH:
58 return true;
59 case V3D_QPU_INSTR_TYPE_ALU:
60 switch (inst->dst.file) {
61 case QFILE_TLBU:
62 return true;
63 default:
64 return inst->has_implicit_uniform;
65 }
66 }
67 return false;
68 }
69
70 /* The sideband uniform for textures gets stored after the normal ALU
71 * arguments.
72 */
73 int
74 vir_get_implicit_uniform_src(struct qinst *inst)
75 {
76 return vir_get_nsrc(inst) - 1;
77 }
78
79 /**
80 * Returns whether the instruction has any side effects that must be
81 * preserved.
82 */
83 bool
84 vir_has_side_effects(struct v3d_compile *c, struct qinst *inst)
85 {
86 switch (inst->qpu.type) {
87 case V3D_QPU_INSTR_TYPE_BRANCH:
88 return true;
89 case V3D_QPU_INSTR_TYPE_ALU:
90 switch (inst->qpu.alu.add.op) {
91 case V3D_QPU_A_SETREVF:
92 case V3D_QPU_A_SETMSF:
93 case V3D_QPU_A_VPMSETUP:
94 return true;
95 default:
96 break;
97 }
98
99 switch (inst->qpu.alu.mul.op) {
100 case V3D_QPU_M_MULTOP:
101 return true;
102 default:
103 break;
104 }
105 }
106
107 if (inst->qpu.sig.ldtmu)
108 return true;
109
110 return false;
111 }
112
113 bool
114 vir_is_float_input(struct qinst *inst)
115 {
116 /* XXX: More instrs */
117 switch (inst->qpu.type) {
118 case V3D_QPU_INSTR_TYPE_BRANCH:
119 return false;
120 case V3D_QPU_INSTR_TYPE_ALU:
121 switch (inst->qpu.alu.add.op) {
122 case V3D_QPU_A_FADD:
123 case V3D_QPU_A_FSUB:
124 case V3D_QPU_A_FMIN:
125 case V3D_QPU_A_FMAX:
126 case V3D_QPU_A_FTOIN:
127 return true;
128 default:
129 break;
130 }
131
132 switch (inst->qpu.alu.mul.op) {
133 case V3D_QPU_M_FMOV:
134 case V3D_QPU_M_VFMUL:
135 case V3D_QPU_M_FMUL:
136 return true;
137 default:
138 break;
139 }
140 }
141
142 return false;
143 }
144
145 bool
146 vir_is_raw_mov(struct qinst *inst)
147 {
148 if (inst->qpu.type != V3D_QPU_INSTR_TYPE_ALU ||
149 (inst->qpu.alu.mul.op != V3D_QPU_M_FMOV &&
150 inst->qpu.alu.mul.op != V3D_QPU_M_MOV)) {
151 return false;
152 }
153
154 if (inst->qpu.alu.add.output_pack != V3D_QPU_PACK_NONE ||
155 inst->qpu.alu.mul.output_pack != V3D_QPU_PACK_NONE) {
156 return false;
157 }
158
159 if (inst->qpu.flags.ac != V3D_QPU_COND_NONE ||
160 inst->qpu.flags.mc != V3D_QPU_COND_NONE)
161 return false;
162
163 return true;
164 }
165
166 bool
167 vir_is_add(struct qinst *inst)
168 {
169 return (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU &&
170 inst->qpu.alu.add.op != V3D_QPU_A_NOP);
171 }
172
173 bool
174 vir_is_mul(struct qinst *inst)
175 {
176 return (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU &&
177 inst->qpu.alu.mul.op != V3D_QPU_M_NOP);
178 }
179
180 bool
181 vir_is_tex(struct qinst *inst)
182 {
183 if (inst->dst.file == QFILE_MAGIC)
184 return v3d_qpu_magic_waddr_is_tmu(inst->dst.index);
185
186 return false;
187 }
188
189 bool
190 vir_depends_on_flags(struct qinst *inst)
191 {
192 if (inst->qpu.type == V3D_QPU_INSTR_TYPE_BRANCH) {
193 return (inst->qpu.branch.cond != V3D_QPU_BRANCH_COND_ALWAYS);
194 } else {
195 return (inst->qpu.flags.ac != V3D_QPU_COND_NONE &&
196 inst->qpu.flags.mc != V3D_QPU_COND_NONE);
197 }
198 }
199
200 bool
201 vir_writes_r3(struct qinst *inst)
202 {
203 for (int i = 0; i < vir_get_nsrc(inst); i++) {
204 switch (inst->src[i].file) {
205 case QFILE_VARY:
206 case QFILE_VPM:
207 return true;
208 default:
209 break;
210 }
211 }
212
213 return false;
214 }
215
216 bool
217 vir_writes_r4(struct qinst *inst)
218 {
219 switch (inst->dst.file) {
220 case QFILE_MAGIC:
221 switch (inst->dst.index) {
222 case V3D_QPU_WADDR_RECIP:
223 case V3D_QPU_WADDR_RSQRT:
224 case V3D_QPU_WADDR_EXP:
225 case V3D_QPU_WADDR_LOG:
226 case V3D_QPU_WADDR_SIN:
227 return true;
228 }
229 break;
230 default:
231 break;
232 }
233
234 if (inst->qpu.sig.ldtmu)
235 return true;
236
237 return false;
238 }
239
240 void
241 vir_set_unpack(struct qinst *inst, int src,
242 enum v3d_qpu_input_unpack unpack)
243 {
244 assert(src == 0 || src == 1);
245
246 if (vir_is_add(inst)) {
247 if (src == 0)
248 inst->qpu.alu.add.a_unpack = unpack;
249 else
250 inst->qpu.alu.add.b_unpack = unpack;
251 } else {
252 assert(vir_is_mul(inst));
253 if (src == 0)
254 inst->qpu.alu.mul.a_unpack = unpack;
255 else
256 inst->qpu.alu.mul.b_unpack = unpack;
257 }
258 }
259
260 void
261 vir_set_cond(struct qinst *inst, enum v3d_qpu_cond cond)
262 {
263 if (vir_is_add(inst)) {
264 inst->qpu.flags.ac = cond;
265 } else {
266 assert(vir_is_mul(inst));
267 inst->qpu.flags.mc = cond;
268 }
269 }
270
271 void
272 vir_set_pf(struct qinst *inst, enum v3d_qpu_pf pf)
273 {
274 if (vir_is_add(inst)) {
275 inst->qpu.flags.apf = pf;
276 } else {
277 assert(vir_is_mul(inst));
278 inst->qpu.flags.mpf = pf;
279 }
280 }
281
282 #if 0
283 uint8_t
284 vir_channels_written(struct qinst *inst)
285 {
286 if (vir_is_mul(inst)) {
287 switch (inst->dst.pack) {
288 case QPU_PACK_MUL_NOP:
289 case QPU_PACK_MUL_8888:
290 return 0xf;
291 case QPU_PACK_MUL_8A:
292 return 0x1;
293 case QPU_PACK_MUL_8B:
294 return 0x2;
295 case QPU_PACK_MUL_8C:
296 return 0x4;
297 case QPU_PACK_MUL_8D:
298 return 0x8;
299 }
300 } else {
301 switch (inst->dst.pack) {
302 case QPU_PACK_A_NOP:
303 case QPU_PACK_A_8888:
304 case QPU_PACK_A_8888_SAT:
305 case QPU_PACK_A_32_SAT:
306 return 0xf;
307 case QPU_PACK_A_8A:
308 case QPU_PACK_A_8A_SAT:
309 return 0x1;
310 case QPU_PACK_A_8B:
311 case QPU_PACK_A_8B_SAT:
312 return 0x2;
313 case QPU_PACK_A_8C:
314 case QPU_PACK_A_8C_SAT:
315 return 0x4;
316 case QPU_PACK_A_8D:
317 case QPU_PACK_A_8D_SAT:
318 return 0x8;
319 case QPU_PACK_A_16A:
320 case QPU_PACK_A_16A_SAT:
321 return 0x3;
322 case QPU_PACK_A_16B:
323 case QPU_PACK_A_16B_SAT:
324 return 0xc;
325 }
326 }
327 unreachable("Bad pack field");
328 }
329 #endif
330
331 struct qreg
332 vir_get_temp(struct v3d_compile *c)
333 {
334 struct qreg reg;
335
336 reg.file = QFILE_TEMP;
337 reg.index = c->num_temps++;
338
339 if (c->num_temps > c->defs_array_size) {
340 uint32_t old_size = c->defs_array_size;
341 c->defs_array_size = MAX2(old_size * 2, 16);
342 c->defs = reralloc(c, c->defs, struct qinst *,
343 c->defs_array_size);
344 memset(&c->defs[old_size], 0,
345 sizeof(c->defs[0]) * (c->defs_array_size - old_size));
346 }
347
348 return reg;
349 }
350
351 struct qinst *
352 vir_add_inst(enum v3d_qpu_add_op op, struct qreg dst, struct qreg src0, struct qreg src1)
353 {
354 struct qinst *inst = calloc(1, sizeof(*inst));
355
356 inst->qpu = v3d_qpu_nop();
357 inst->qpu.alu.add.op = op;
358
359 inst->dst = dst;
360 inst->src[0] = src0;
361 inst->src[1] = src1;
362 inst->uniform = ~0;
363
364 return inst;
365 }
366
367 struct qinst *
368 vir_mul_inst(enum v3d_qpu_mul_op op, struct qreg dst, struct qreg src0, struct qreg src1)
369 {
370 struct qinst *inst = calloc(1, sizeof(*inst));
371
372 inst->qpu = v3d_qpu_nop();
373 inst->qpu.alu.mul.op = op;
374
375 inst->dst = dst;
376 inst->src[0] = src0;
377 inst->src[1] = src1;
378 inst->uniform = ~0;
379
380 return inst;
381 }
382
383 struct qinst *
384 vir_branch_inst(enum v3d_qpu_branch_cond cond, struct qreg src)
385 {
386 struct qinst *inst = calloc(1, sizeof(*inst));
387
388 inst->qpu = v3d_qpu_nop();
389 inst->qpu.type = V3D_QPU_INSTR_TYPE_BRANCH;
390 inst->qpu.branch.cond = cond;
391 inst->qpu.branch.msfign = V3D_QPU_MSFIGN_NONE;
392 inst->qpu.branch.bdi = V3D_QPU_BRANCH_DEST_REL;
393 inst->qpu.branch.ub = true;
394 inst->qpu.branch.bdu = V3D_QPU_BRANCH_DEST_REL;
395
396 inst->dst = vir_reg(QFILE_NULL, 0);
397 inst->src[0] = src;
398 inst->uniform = ~0;
399
400 return inst;
401 }
402
403 static void
404 vir_emit(struct v3d_compile *c, struct qinst *inst)
405 {
406 list_addtail(&inst->link, &c->cur_block->instructions);
407
408 if (inst->dst.file == QFILE_MAGIC &&
409 inst->dst.index == V3D_QPU_WADDR_VPM)
410 c->num_vpm_writes++;
411 }
412
413 /* Updates inst to write to a new temporary, emits it, and notes the def. */
414 struct qreg
415 vir_emit_def(struct v3d_compile *c, struct qinst *inst)
416 {
417 assert(inst->dst.file == QFILE_NULL);
418
419 inst->dst = vir_get_temp(c);
420
421 if (inst->dst.file == QFILE_TEMP)
422 c->defs[inst->dst.index] = inst;
423
424 vir_emit(c, inst);
425
426 return inst->dst;
427 }
428
429 struct qinst *
430 vir_emit_nondef(struct v3d_compile *c, struct qinst *inst)
431 {
432 if (inst->dst.file == QFILE_TEMP)
433 c->defs[inst->dst.index] = NULL;
434
435 vir_emit(c, inst);
436
437 return inst;
438 }
439
440 struct qblock *
441 vir_new_block(struct v3d_compile *c)
442 {
443 struct qblock *block = rzalloc(c, struct qblock);
444
445 list_inithead(&block->instructions);
446
447 block->predecessors = _mesa_set_create(block,
448 _mesa_hash_pointer,
449 _mesa_key_pointer_equal);
450
451 block->index = c->next_block_index++;
452
453 return block;
454 }
455
456 void
457 vir_set_emit_block(struct v3d_compile *c, struct qblock *block)
458 {
459 c->cur_block = block;
460 list_addtail(&block->link, &c->blocks);
461 }
462
463 struct qblock *
464 vir_entry_block(struct v3d_compile *c)
465 {
466 return list_first_entry(&c->blocks, struct qblock, link);
467 }
468
469 struct qblock *
470 vir_exit_block(struct v3d_compile *c)
471 {
472 return list_last_entry(&c->blocks, struct qblock, link);
473 }
474
475 void
476 vir_link_blocks(struct qblock *predecessor, struct qblock *successor)
477 {
478 _mesa_set_add(successor->predecessors, predecessor);
479 if (predecessor->successors[0]) {
480 assert(!predecessor->successors[1]);
481 predecessor->successors[1] = successor;
482 } else {
483 predecessor->successors[0] = successor;
484 }
485 }
486
487 const struct v3d_compiler *
488 v3d_compiler_init(const struct v3d_device_info *devinfo)
489 {
490 struct v3d_compiler *compiler = rzalloc(NULL, struct v3d_compiler);
491 if (!compiler)
492 return NULL;
493
494 compiler->devinfo = devinfo;
495
496 if (!vir_init_reg_sets(compiler)) {
497 ralloc_free(compiler);
498 return NULL;
499 }
500
501 return compiler;
502 }
503
504 void
505 v3d_compiler_free(const struct v3d_compiler *compiler)
506 {
507 ralloc_free((void *)compiler);
508 }
509
510 static struct v3d_compile *
511 vir_compile_init(const struct v3d_compiler *compiler,
512 struct v3d_key *key,
513 nir_shader *s,
514 int program_id, int variant_id)
515 {
516 struct v3d_compile *c = rzalloc(NULL, struct v3d_compile);
517
518 c->compiler = compiler;
519 c->devinfo = compiler->devinfo;
520 c->key = key;
521 c->program_id = program_id;
522 c->variant_id = variant_id;
523
524 s = nir_shader_clone(c, s);
525 c->s = s;
526
527 list_inithead(&c->blocks);
528 vir_set_emit_block(c, vir_new_block(c));
529
530 c->output_position_index = -1;
531 c->output_point_size_index = -1;
532 c->output_sample_mask_index = -1;
533
534 c->def_ht = _mesa_hash_table_create(c, _mesa_hash_pointer,
535 _mesa_key_pointer_equal);
536
537 return c;
538 }
539
540 static void
541 v3d_lower_nir(struct v3d_compile *c)
542 {
543 struct nir_lower_tex_options tex_options = {
544 .lower_txd = true,
545 .lower_rect = false, /* XXX */
546 .lower_txp = ~0,
547 /* Apply swizzles to all samplers. */
548 .swizzle_result = ~0,
549 };
550
551 /* Lower the format swizzle and (for 32-bit returns)
552 * ARB_texture_swizzle-style swizzle.
553 */
554 for (int i = 0; i < ARRAY_SIZE(c->key->tex); i++) {
555 for (int j = 0; j < 4; j++)
556 tex_options.swizzles[i][j] = c->key->tex[i].swizzle[j];
557
558 if (c->key->tex[i].clamp_s)
559 tex_options.saturate_s |= 1 << i;
560 if (c->key->tex[i].clamp_t)
561 tex_options.saturate_t |= 1 << i;
562 if (c->key->tex[i].clamp_r)
563 tex_options.saturate_r |= 1 << i;
564 }
565
566 NIR_PASS_V(c->s, nir_lower_tex, &tex_options);
567 }
568
569 static void
570 v3d_lower_nir_late(struct v3d_compile *c)
571 {
572 NIR_PASS_V(c->s, v3d_nir_lower_io, c);
573 NIR_PASS_V(c->s, v3d_nir_lower_txf_ms, c);
574 NIR_PASS_V(c->s, nir_lower_idiv);
575 }
576
577 static void
578 v3d_set_prog_data_uniforms(struct v3d_compile *c,
579 struct v3d_prog_data *prog_data)
580 {
581 int count = c->num_uniforms;
582 struct v3d_uniform_list *ulist = &prog_data->uniforms;
583
584 ulist->count = count;
585 ulist->data = ralloc_array(prog_data, uint32_t, count);
586 memcpy(ulist->data, c->uniform_data,
587 count * sizeof(*ulist->data));
588 ulist->contents = ralloc_array(prog_data, enum quniform_contents, count);
589 memcpy(ulist->contents, c->uniform_contents,
590 count * sizeof(*ulist->contents));
591 }
592
593 /* Copy the compiler UBO range state to the compiled shader, dropping out
594 * arrays that were never referenced by an indirect load.
595 *
596 * (Note that QIR dead code elimination of an array access still leaves that
597 * array alive, though)
598 */
599 static void
600 v3d_set_prog_data_ubo(struct v3d_compile *c,
601 struct v3d_prog_data *prog_data)
602 {
603 if (!c->num_ubo_ranges)
604 return;
605
606 prog_data->num_ubo_ranges = 0;
607 prog_data->ubo_ranges = ralloc_array(prog_data, struct v3d_ubo_range,
608 c->num_ubo_ranges);
609 for (int i = 0; i < c->num_ubo_ranges; i++) {
610 if (!c->ubo_range_used[i])
611 continue;
612
613 struct v3d_ubo_range *range = &c->ubo_ranges[i];
614 prog_data->ubo_ranges[prog_data->num_ubo_ranges++] = *range;
615 prog_data->ubo_size += range->size;
616 }
617
618 if (prog_data->ubo_size) {
619 if (V3D_DEBUG & V3D_DEBUG_SHADERDB) {
620 fprintf(stderr, "SHADER-DB: %s prog %d/%d: %d UBO uniforms\n",
621 vir_get_stage_name(c),
622 c->program_id, c->variant_id,
623 prog_data->ubo_size / 4);
624 }
625 }
626 }
627
628 static void
629 v3d_set_prog_data(struct v3d_compile *c,
630 struct v3d_prog_data *prog_data)
631 {
632 v3d_set_prog_data_uniforms(c, prog_data);
633 v3d_set_prog_data_ubo(c, prog_data);
634 }
635
636 static uint64_t *
637 v3d_return_qpu_insts(struct v3d_compile *c, uint32_t *final_assembly_size)
638 {
639 *final_assembly_size = c->qpu_inst_count * sizeof(uint64_t);
640
641 uint64_t *qpu_insts = malloc(*final_assembly_size);
642 if (!qpu_insts)
643 return NULL;
644
645 memcpy(qpu_insts, c->qpu_insts, *final_assembly_size);
646
647 vir_compile_destroy(c);
648
649 return qpu_insts;
650 }
651
652 uint64_t *v3d_compile_vs(const struct v3d_compiler *compiler,
653 struct v3d_vs_key *key,
654 struct v3d_vs_prog_data *prog_data,
655 nir_shader *s,
656 int program_id, int variant_id,
657 uint32_t *final_assembly_size)
658 {
659 struct v3d_compile *c = vir_compile_init(compiler, &key->base, s,
660 program_id, variant_id);
661
662 c->vs_key = key;
663
664 v3d_lower_nir(c);
665
666 if (key->clamp_color)
667 NIR_PASS_V(c->s, nir_lower_clamp_color_outputs);
668
669 if (key->base.ucp_enables) {
670 NIR_PASS_V(c->s, nir_lower_clip_vs, key->base.ucp_enables);
671 NIR_PASS_V(c->s, nir_lower_io_to_scalar,
672 nir_var_shader_out);
673 }
674
675 /* Note: VS output scalarizing must happen after nir_lower_clip_vs. */
676 NIR_PASS_V(c->s, nir_lower_io_to_scalar, nir_var_shader_out);
677
678 v3d_lower_nir_late(c);
679 v3d_optimize_nir(c->s);
680 NIR_PASS_V(c->s, nir_convert_from_ssa, true);
681
682 v3d_nir_to_vir(c);
683
684 v3d_set_prog_data(c, &prog_data->base);
685
686 prog_data->base.num_inputs = c->num_inputs;
687
688 /* The vertex data gets format converted by the VPM so that
689 * each attribute channel takes up a VPM column. Precompute
690 * the sizes for the shader record.
691 */
692 for (int i = 0; i < ARRAY_SIZE(prog_data->vattr_sizes); i++) {
693 prog_data->vattr_sizes[i] = c->vattr_sizes[i];
694 prog_data->vpm_input_size += c->vattr_sizes[i];
695 }
696
697 /* Input/output segment size are in 8x32-bit multiples. */
698 prog_data->vpm_input_size = align(prog_data->vpm_input_size, 8) / 8;
699 prog_data->vpm_output_size = align(c->num_vpm_writes, 8) / 8;
700
701 prog_data->uses_vid = (s->info.system_values_read &
702 (1ull << SYSTEM_VALUE_VERTEX_ID));
703 prog_data->uses_iid = (s->info.system_values_read &
704 (1ull << SYSTEM_VALUE_INSTANCE_ID));
705
706 return v3d_return_qpu_insts(c, final_assembly_size);
707 }
708
709 static void
710 v3d_set_fs_prog_data_inputs(struct v3d_compile *c,
711 struct v3d_fs_prog_data *prog_data)
712 {
713 prog_data->base.num_inputs = c->num_inputs;
714 memcpy(prog_data->input_slots, c->input_slots,
715 c->num_inputs * sizeof(*c->input_slots));
716
717 STATIC_ASSERT(ARRAY_SIZE(prog_data->flat_shade_flags) >
718 (V3D_MAX_FS_INPUTS - 1) / 24);
719 for (int i = 0; i < V3D_MAX_FS_INPUTS; i++) {
720 if (BITSET_TEST(c->flat_shade_flags, i))
721 prog_data->flat_shade_flags[i / 24] |= 1 << (i % 24);
722 }
723 }
724
725 uint64_t *v3d_compile_fs(const struct v3d_compiler *compiler,
726 struct v3d_fs_key *key,
727 struct v3d_fs_prog_data *prog_data,
728 nir_shader *s,
729 int program_id, int variant_id,
730 uint32_t *final_assembly_size)
731 {
732 struct v3d_compile *c = vir_compile_init(compiler, &key->base, s,
733 program_id, variant_id);
734
735 c->fs_key = key;
736
737 v3d_lower_nir(c);
738
739 if (key->light_twoside)
740 NIR_PASS_V(c->s, nir_lower_two_sided_color);
741
742 if (key->clamp_color)
743 NIR_PASS_V(c->s, nir_lower_clamp_color_outputs);
744
745 if (key->alpha_test) {
746 NIR_PASS_V(c->s, nir_lower_alpha_test, key->alpha_test_func,
747 false);
748 }
749
750 if (key->base.ucp_enables)
751 NIR_PASS_V(c->s, nir_lower_clip_fs, key->base.ucp_enables);
752
753 /* Note: FS input scalarizing must happen after
754 * nir_lower_two_sided_color, which only handles a vec4 at a time.
755 */
756 NIR_PASS_V(c->s, nir_lower_io_to_scalar, nir_var_shader_in);
757
758 v3d_lower_nir_late(c);
759 v3d_optimize_nir(c->s);
760 NIR_PASS_V(c->s, nir_convert_from_ssa, true);
761
762 v3d_nir_to_vir(c);
763
764 v3d_set_prog_data(c, &prog_data->base);
765 v3d_set_fs_prog_data_inputs(c, prog_data);
766 prog_data->writes_z = (c->s->info.outputs_written &
767 (1 << FRAG_RESULT_DEPTH));
768 prog_data->discard = c->s->info.fs.uses_discard;
769
770 return v3d_return_qpu_insts(c, final_assembly_size);
771 }
772
773 void
774 vir_remove_instruction(struct v3d_compile *c, struct qinst *qinst)
775 {
776 if (qinst->dst.file == QFILE_TEMP)
777 c->defs[qinst->dst.index] = NULL;
778
779 list_del(&qinst->link);
780 free(qinst);
781 }
782
783 struct qreg
784 vir_follow_movs(struct v3d_compile *c, struct qreg reg)
785 {
786 /* XXX
787 int pack = reg.pack;
788
789 while (reg.file == QFILE_TEMP &&
790 c->defs[reg.index] &&
791 (c->defs[reg.index]->op == QOP_MOV ||
792 c->defs[reg.index]->op == QOP_FMOV) &&
793 !c->defs[reg.index]->dst.pack &&
794 !c->defs[reg.index]->src[0].pack) {
795 reg = c->defs[reg.index]->src[0];
796 }
797
798 reg.pack = pack;
799 */
800 return reg;
801 }
802
803 void
804 vir_compile_destroy(struct v3d_compile *c)
805 {
806 vir_for_each_block(block, c) {
807 while (!list_empty(&block->instructions)) {
808 struct qinst *qinst =
809 list_first_entry(&block->instructions,
810 struct qinst, link);
811 vir_remove_instruction(c, qinst);
812 }
813 }
814
815 ralloc_free(c);
816 }
817
818 struct qreg
819 vir_uniform(struct v3d_compile *c,
820 enum quniform_contents contents,
821 uint32_t data)
822 {
823 for (int i = 0; i < c->num_uniforms; i++) {
824 if (c->uniform_contents[i] == contents &&
825 c->uniform_data[i] == data) {
826 return vir_reg(QFILE_UNIF, i);
827 }
828 }
829
830 uint32_t uniform = c->num_uniforms++;
831
832 if (uniform >= c->uniform_array_size) {
833 c->uniform_array_size = MAX2(MAX2(16, uniform + 1),
834 c->uniform_array_size * 2);
835
836 c->uniform_data = reralloc(c, c->uniform_data,
837 uint32_t,
838 c->uniform_array_size);
839 c->uniform_contents = reralloc(c, c->uniform_contents,
840 enum quniform_contents,
841 c->uniform_array_size);
842 }
843
844 c->uniform_contents[uniform] = contents;
845 c->uniform_data[uniform] = data;
846
847 return vir_reg(QFILE_UNIF, uniform);
848 }
849
850 void
851 vir_PF(struct v3d_compile *c, struct qreg src, enum v3d_qpu_pf pf)
852 {
853 struct qinst *last_inst = NULL;
854
855 if (!list_empty(&c->cur_block->instructions))
856 last_inst = (struct qinst *)c->cur_block->instructions.prev;
857
858 if (src.file != QFILE_TEMP ||
859 !c->defs[src.index] ||
860 last_inst != c->defs[src.index]) {
861 /* XXX: Make the MOV be the appropriate type */
862 last_inst = vir_MOV_dest(c, vir_reg(QFILE_NULL, 0), src);
863 last_inst = (struct qinst *)c->cur_block->instructions.prev;
864 }
865
866 vir_set_pf(last_inst, pf);
867 }
868
869 #define OPTPASS(func) \
870 do { \
871 bool stage_progress = func(c); \
872 if (stage_progress) { \
873 progress = true; \
874 if (print_opt_debug) { \
875 fprintf(stderr, \
876 "VIR opt pass %2d: %s progress\n", \
877 pass, #func); \
878 } \
879 /*XXX vir_validate(c);*/ \
880 } \
881 } while (0)
882
883 void
884 vir_optimize(struct v3d_compile *c)
885 {
886 bool print_opt_debug = false;
887 int pass = 1;
888
889 while (true) {
890 bool progress = false;
891
892 OPTPASS(vir_opt_copy_propagate);
893 OPTPASS(vir_opt_dead_code);
894
895 if (!progress)
896 break;
897
898 pass++;
899 }
900 }
901
902 const char *
903 vir_get_stage_name(struct v3d_compile *c)
904 {
905 if (c->vs_key && c->vs_key->is_coord)
906 return "MESA_SHADER_COORD";
907 else
908 return gl_shader_stage_name(c->s->info.stage);
909 }