da4ece2cffe73ecaee65140ddfc7670f56633f26
[mesa.git] / src / broadcom / compiler / vir.c
1 /*
2 * Copyright © 2016-2017 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "broadcom/common/v3d_device_info.h"
25 #include "v3d_compiler.h"
26
27 int
28 vir_get_non_sideband_nsrc(struct qinst *inst)
29 {
30 switch (inst->qpu.type) {
31 case V3D_QPU_INSTR_TYPE_BRANCH:
32 return 0;
33 case V3D_QPU_INSTR_TYPE_ALU:
34 if (inst->qpu.alu.add.op != V3D_QPU_A_NOP)
35 return v3d_qpu_add_op_num_src(inst->qpu.alu.add.op);
36 else
37 return v3d_qpu_mul_op_num_src(inst->qpu.alu.mul.op);
38 }
39
40 return 0;
41 }
42
43 int
44 vir_get_nsrc(struct qinst *inst)
45 {
46 int nsrc = vir_get_non_sideband_nsrc(inst);
47
48 if (vir_has_implicit_uniform(inst))
49 nsrc++;
50
51 return nsrc;
52 }
53
54 bool
55 vir_has_implicit_uniform(struct qinst *inst)
56 {
57 switch (inst->qpu.type) {
58 case V3D_QPU_INSTR_TYPE_BRANCH:
59 return true;
60 case V3D_QPU_INSTR_TYPE_ALU:
61 switch (inst->dst.file) {
62 case QFILE_TLBU:
63 return true;
64 default:
65 return inst->has_implicit_uniform;
66 }
67 }
68 return false;
69 }
70
71 /* The sideband uniform for textures gets stored after the normal ALU
72 * arguments.
73 */
74 int
75 vir_get_implicit_uniform_src(struct qinst *inst)
76 {
77 return vir_get_nsrc(inst) - 1;
78 }
79
80 /**
81 * Returns whether the instruction has any side effects that must be
82 * preserved.
83 */
84 bool
85 vir_has_side_effects(struct v3d_compile *c, struct qinst *inst)
86 {
87 switch (inst->qpu.type) {
88 case V3D_QPU_INSTR_TYPE_BRANCH:
89 return true;
90 case V3D_QPU_INSTR_TYPE_ALU:
91 switch (inst->qpu.alu.add.op) {
92 case V3D_QPU_A_SETREVF:
93 case V3D_QPU_A_SETMSF:
94 case V3D_QPU_A_VPMSETUP:
95 case V3D_QPU_A_STVPMV:
96 case V3D_QPU_A_STVPMD:
97 case V3D_QPU_A_STVPMP:
98 case V3D_QPU_A_VPMWT:
99 return true;
100 default:
101 break;
102 }
103
104 switch (inst->qpu.alu.mul.op) {
105 case V3D_QPU_M_MULTOP:
106 return true;
107 default:
108 break;
109 }
110 }
111
112 if (inst->qpu.sig.ldtmu)
113 return true;
114
115 return false;
116 }
117
118 bool
119 vir_is_float_input(struct qinst *inst)
120 {
121 /* XXX: More instrs */
122 switch (inst->qpu.type) {
123 case V3D_QPU_INSTR_TYPE_BRANCH:
124 return false;
125 case V3D_QPU_INSTR_TYPE_ALU:
126 switch (inst->qpu.alu.add.op) {
127 case V3D_QPU_A_FADD:
128 case V3D_QPU_A_FSUB:
129 case V3D_QPU_A_FMIN:
130 case V3D_QPU_A_FMAX:
131 case V3D_QPU_A_FTOIN:
132 return true;
133 default:
134 break;
135 }
136
137 switch (inst->qpu.alu.mul.op) {
138 case V3D_QPU_M_FMOV:
139 case V3D_QPU_M_VFMUL:
140 case V3D_QPU_M_FMUL:
141 return true;
142 default:
143 break;
144 }
145 }
146
147 return false;
148 }
149
150 bool
151 vir_is_raw_mov(struct qinst *inst)
152 {
153 if (inst->qpu.type != V3D_QPU_INSTR_TYPE_ALU ||
154 (inst->qpu.alu.mul.op != V3D_QPU_M_FMOV &&
155 inst->qpu.alu.mul.op != V3D_QPU_M_MOV)) {
156 return false;
157 }
158
159 if (inst->qpu.alu.add.output_pack != V3D_QPU_PACK_NONE ||
160 inst->qpu.alu.mul.output_pack != V3D_QPU_PACK_NONE) {
161 return false;
162 }
163
164 if (inst->qpu.flags.ac != V3D_QPU_COND_NONE ||
165 inst->qpu.flags.mc != V3D_QPU_COND_NONE)
166 return false;
167
168 return true;
169 }
170
171 bool
172 vir_is_add(struct qinst *inst)
173 {
174 return (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU &&
175 inst->qpu.alu.add.op != V3D_QPU_A_NOP);
176 }
177
178 bool
179 vir_is_mul(struct qinst *inst)
180 {
181 return (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU &&
182 inst->qpu.alu.mul.op != V3D_QPU_M_NOP);
183 }
184
185 bool
186 vir_is_tex(struct qinst *inst)
187 {
188 if (inst->dst.file == QFILE_MAGIC)
189 return v3d_qpu_magic_waddr_is_tmu(inst->dst.index);
190
191 return false;
192 }
193
194 bool
195 vir_depends_on_flags(struct qinst *inst)
196 {
197 if (inst->qpu.type == V3D_QPU_INSTR_TYPE_BRANCH) {
198 return (inst->qpu.branch.cond != V3D_QPU_BRANCH_COND_ALWAYS);
199 } else {
200 return (inst->qpu.flags.ac != V3D_QPU_COND_NONE &&
201 inst->qpu.flags.mc != V3D_QPU_COND_NONE);
202 }
203 }
204
205 bool
206 vir_writes_r3(const struct v3d_device_info *devinfo, struct qinst *inst)
207 {
208 for (int i = 0; i < vir_get_nsrc(inst); i++) {
209 switch (inst->src[i].file) {
210 case QFILE_VARY:
211 case QFILE_VPM:
212 return true;
213 default:
214 break;
215 }
216 }
217
218 if (devinfo->ver < 41 && (inst->qpu.sig.ldvary ||
219 inst->qpu.sig.ldtlb ||
220 inst->qpu.sig.ldtlbu ||
221 inst->qpu.sig.ldvpm)) {
222 return true;
223 }
224
225 return false;
226 }
227
228 bool
229 vir_writes_r4(const struct v3d_device_info *devinfo, struct qinst *inst)
230 {
231 switch (inst->dst.file) {
232 case QFILE_MAGIC:
233 switch (inst->dst.index) {
234 case V3D_QPU_WADDR_RECIP:
235 case V3D_QPU_WADDR_RSQRT:
236 case V3D_QPU_WADDR_EXP:
237 case V3D_QPU_WADDR_LOG:
238 case V3D_QPU_WADDR_SIN:
239 return true;
240 }
241 break;
242 default:
243 break;
244 }
245
246 if (devinfo->ver < 41 && inst->qpu.sig.ldtmu)
247 return true;
248
249 return false;
250 }
251
252 void
253 vir_set_unpack(struct qinst *inst, int src,
254 enum v3d_qpu_input_unpack unpack)
255 {
256 assert(src == 0 || src == 1);
257
258 if (vir_is_add(inst)) {
259 if (src == 0)
260 inst->qpu.alu.add.a_unpack = unpack;
261 else
262 inst->qpu.alu.add.b_unpack = unpack;
263 } else {
264 assert(vir_is_mul(inst));
265 if (src == 0)
266 inst->qpu.alu.mul.a_unpack = unpack;
267 else
268 inst->qpu.alu.mul.b_unpack = unpack;
269 }
270 }
271
272 void
273 vir_set_cond(struct qinst *inst, enum v3d_qpu_cond cond)
274 {
275 if (vir_is_add(inst)) {
276 inst->qpu.flags.ac = cond;
277 } else {
278 assert(vir_is_mul(inst));
279 inst->qpu.flags.mc = cond;
280 }
281 }
282
283 void
284 vir_set_pf(struct qinst *inst, enum v3d_qpu_pf pf)
285 {
286 if (vir_is_add(inst)) {
287 inst->qpu.flags.apf = pf;
288 } else {
289 assert(vir_is_mul(inst));
290 inst->qpu.flags.mpf = pf;
291 }
292 }
293
294 #if 0
295 uint8_t
296 vir_channels_written(struct qinst *inst)
297 {
298 if (vir_is_mul(inst)) {
299 switch (inst->dst.pack) {
300 case QPU_PACK_MUL_NOP:
301 case QPU_PACK_MUL_8888:
302 return 0xf;
303 case QPU_PACK_MUL_8A:
304 return 0x1;
305 case QPU_PACK_MUL_8B:
306 return 0x2;
307 case QPU_PACK_MUL_8C:
308 return 0x4;
309 case QPU_PACK_MUL_8D:
310 return 0x8;
311 }
312 } else {
313 switch (inst->dst.pack) {
314 case QPU_PACK_A_NOP:
315 case QPU_PACK_A_8888:
316 case QPU_PACK_A_8888_SAT:
317 case QPU_PACK_A_32_SAT:
318 return 0xf;
319 case QPU_PACK_A_8A:
320 case QPU_PACK_A_8A_SAT:
321 return 0x1;
322 case QPU_PACK_A_8B:
323 case QPU_PACK_A_8B_SAT:
324 return 0x2;
325 case QPU_PACK_A_8C:
326 case QPU_PACK_A_8C_SAT:
327 return 0x4;
328 case QPU_PACK_A_8D:
329 case QPU_PACK_A_8D_SAT:
330 return 0x8;
331 case QPU_PACK_A_16A:
332 case QPU_PACK_A_16A_SAT:
333 return 0x3;
334 case QPU_PACK_A_16B:
335 case QPU_PACK_A_16B_SAT:
336 return 0xc;
337 }
338 }
339 unreachable("Bad pack field");
340 }
341 #endif
342
343 struct qreg
344 vir_get_temp(struct v3d_compile *c)
345 {
346 struct qreg reg;
347
348 reg.file = QFILE_TEMP;
349 reg.index = c->num_temps++;
350
351 if (c->num_temps > c->defs_array_size) {
352 uint32_t old_size = c->defs_array_size;
353 c->defs_array_size = MAX2(old_size * 2, 16);
354 c->defs = reralloc(c, c->defs, struct qinst *,
355 c->defs_array_size);
356 memset(&c->defs[old_size], 0,
357 sizeof(c->defs[0]) * (c->defs_array_size - old_size));
358 }
359
360 return reg;
361 }
362
363 struct qinst *
364 vir_add_inst(enum v3d_qpu_add_op op, struct qreg dst, struct qreg src0, struct qreg src1)
365 {
366 struct qinst *inst = calloc(1, sizeof(*inst));
367
368 inst->qpu = v3d_qpu_nop();
369 inst->qpu.alu.add.op = op;
370
371 inst->dst = dst;
372 inst->src[0] = src0;
373 inst->src[1] = src1;
374 inst->uniform = ~0;
375
376 return inst;
377 }
378
379 struct qinst *
380 vir_mul_inst(enum v3d_qpu_mul_op op, struct qreg dst, struct qreg src0, struct qreg src1)
381 {
382 struct qinst *inst = calloc(1, sizeof(*inst));
383
384 inst->qpu = v3d_qpu_nop();
385 inst->qpu.alu.mul.op = op;
386
387 inst->dst = dst;
388 inst->src[0] = src0;
389 inst->src[1] = src1;
390 inst->uniform = ~0;
391
392 return inst;
393 }
394
395 struct qinst *
396 vir_branch_inst(enum v3d_qpu_branch_cond cond, struct qreg src)
397 {
398 struct qinst *inst = calloc(1, sizeof(*inst));
399
400 inst->qpu = v3d_qpu_nop();
401 inst->qpu.type = V3D_QPU_INSTR_TYPE_BRANCH;
402 inst->qpu.branch.cond = cond;
403 inst->qpu.branch.msfign = V3D_QPU_MSFIGN_NONE;
404 inst->qpu.branch.bdi = V3D_QPU_BRANCH_DEST_REL;
405 inst->qpu.branch.ub = true;
406 inst->qpu.branch.bdu = V3D_QPU_BRANCH_DEST_REL;
407
408 inst->dst = vir_reg(QFILE_NULL, 0);
409 inst->src[0] = src;
410 inst->uniform = ~0;
411
412 return inst;
413 }
414
415 static void
416 vir_emit(struct v3d_compile *c, struct qinst *inst)
417 {
418 list_addtail(&inst->link, &c->cur_block->instructions);
419 }
420
421 /* Updates inst to write to a new temporary, emits it, and notes the def. */
422 struct qreg
423 vir_emit_def(struct v3d_compile *c, struct qinst *inst)
424 {
425 assert(inst->dst.file == QFILE_NULL);
426
427 inst->dst = vir_get_temp(c);
428
429 if (inst->dst.file == QFILE_TEMP)
430 c->defs[inst->dst.index] = inst;
431
432 vir_emit(c, inst);
433
434 return inst->dst;
435 }
436
437 struct qinst *
438 vir_emit_nondef(struct v3d_compile *c, struct qinst *inst)
439 {
440 if (inst->dst.file == QFILE_TEMP)
441 c->defs[inst->dst.index] = NULL;
442
443 vir_emit(c, inst);
444
445 return inst;
446 }
447
448 struct qblock *
449 vir_new_block(struct v3d_compile *c)
450 {
451 struct qblock *block = rzalloc(c, struct qblock);
452
453 list_inithead(&block->instructions);
454
455 block->predecessors = _mesa_set_create(block,
456 _mesa_hash_pointer,
457 _mesa_key_pointer_equal);
458
459 block->index = c->next_block_index++;
460
461 return block;
462 }
463
464 void
465 vir_set_emit_block(struct v3d_compile *c, struct qblock *block)
466 {
467 c->cur_block = block;
468 list_addtail(&block->link, &c->blocks);
469 }
470
471 struct qblock *
472 vir_entry_block(struct v3d_compile *c)
473 {
474 return list_first_entry(&c->blocks, struct qblock, link);
475 }
476
477 struct qblock *
478 vir_exit_block(struct v3d_compile *c)
479 {
480 return list_last_entry(&c->blocks, struct qblock, link);
481 }
482
483 void
484 vir_link_blocks(struct qblock *predecessor, struct qblock *successor)
485 {
486 _mesa_set_add(successor->predecessors, predecessor);
487 if (predecessor->successors[0]) {
488 assert(!predecessor->successors[1]);
489 predecessor->successors[1] = successor;
490 } else {
491 predecessor->successors[0] = successor;
492 }
493 }
494
495 const struct v3d_compiler *
496 v3d_compiler_init(const struct v3d_device_info *devinfo)
497 {
498 struct v3d_compiler *compiler = rzalloc(NULL, struct v3d_compiler);
499 if (!compiler)
500 return NULL;
501
502 compiler->devinfo = devinfo;
503
504 if (!vir_init_reg_sets(compiler)) {
505 ralloc_free(compiler);
506 return NULL;
507 }
508
509 return compiler;
510 }
511
512 void
513 v3d_compiler_free(const struct v3d_compiler *compiler)
514 {
515 ralloc_free((void *)compiler);
516 }
517
518 static struct v3d_compile *
519 vir_compile_init(const struct v3d_compiler *compiler,
520 struct v3d_key *key,
521 nir_shader *s,
522 int program_id, int variant_id)
523 {
524 struct v3d_compile *c = rzalloc(NULL, struct v3d_compile);
525
526 c->compiler = compiler;
527 c->devinfo = compiler->devinfo;
528 c->key = key;
529 c->program_id = program_id;
530 c->variant_id = variant_id;
531
532 s = nir_shader_clone(c, s);
533 c->s = s;
534
535 list_inithead(&c->blocks);
536 vir_set_emit_block(c, vir_new_block(c));
537
538 c->output_position_index = -1;
539 c->output_point_size_index = -1;
540 c->output_sample_mask_index = -1;
541
542 c->def_ht = _mesa_hash_table_create(c, _mesa_hash_pointer,
543 _mesa_key_pointer_equal);
544
545 return c;
546 }
547
548 static void
549 v3d_lower_nir(struct v3d_compile *c)
550 {
551 struct nir_lower_tex_options tex_options = {
552 .lower_txd = true,
553 .lower_rect = false, /* XXX */
554 .lower_txp = ~0,
555 /* Apply swizzles to all samplers. */
556 .swizzle_result = ~0,
557 };
558
559 /* Lower the format swizzle and (for 32-bit returns)
560 * ARB_texture_swizzle-style swizzle.
561 */
562 for (int i = 0; i < ARRAY_SIZE(c->key->tex); i++) {
563 for (int j = 0; j < 4; j++)
564 tex_options.swizzles[i][j] = c->key->tex[i].swizzle[j];
565
566 if (c->key->tex[i].clamp_s)
567 tex_options.saturate_s |= 1 << i;
568 if (c->key->tex[i].clamp_t)
569 tex_options.saturate_t |= 1 << i;
570 if (c->key->tex[i].clamp_r)
571 tex_options.saturate_r |= 1 << i;
572 }
573
574 NIR_PASS_V(c->s, nir_lower_tex, &tex_options);
575 }
576
577 static void
578 v3d_lower_nir_late(struct v3d_compile *c)
579 {
580 NIR_PASS_V(c->s, v3d_nir_lower_io, c);
581 NIR_PASS_V(c->s, v3d_nir_lower_txf_ms, c);
582 NIR_PASS_V(c->s, nir_lower_idiv);
583 }
584
585 static void
586 v3d_set_prog_data_uniforms(struct v3d_compile *c,
587 struct v3d_prog_data *prog_data)
588 {
589 int count = c->num_uniforms;
590 struct v3d_uniform_list *ulist = &prog_data->uniforms;
591
592 ulist->count = count;
593 ulist->data = ralloc_array(prog_data, uint32_t, count);
594 memcpy(ulist->data, c->uniform_data,
595 count * sizeof(*ulist->data));
596 ulist->contents = ralloc_array(prog_data, enum quniform_contents, count);
597 memcpy(ulist->contents, c->uniform_contents,
598 count * sizeof(*ulist->contents));
599 }
600
601 /* Copy the compiler UBO range state to the compiled shader, dropping out
602 * arrays that were never referenced by an indirect load.
603 *
604 * (Note that QIR dead code elimination of an array access still leaves that
605 * array alive, though)
606 */
607 static void
608 v3d_set_prog_data_ubo(struct v3d_compile *c,
609 struct v3d_prog_data *prog_data)
610 {
611 if (!c->num_ubo_ranges)
612 return;
613
614 prog_data->num_ubo_ranges = 0;
615 prog_data->ubo_ranges = ralloc_array(prog_data, struct v3d_ubo_range,
616 c->num_ubo_ranges);
617 for (int i = 0; i < c->num_ubo_ranges; i++) {
618 if (!c->ubo_range_used[i])
619 continue;
620
621 struct v3d_ubo_range *range = &c->ubo_ranges[i];
622 prog_data->ubo_ranges[prog_data->num_ubo_ranges++] = *range;
623 prog_data->ubo_size += range->size;
624 }
625
626 if (prog_data->ubo_size) {
627 if (V3D_DEBUG & V3D_DEBUG_SHADERDB) {
628 fprintf(stderr, "SHADER-DB: %s prog %d/%d: %d UBO uniforms\n",
629 vir_get_stage_name(c),
630 c->program_id, c->variant_id,
631 prog_data->ubo_size / 4);
632 }
633 }
634 }
635
636 static void
637 v3d_set_prog_data(struct v3d_compile *c,
638 struct v3d_prog_data *prog_data)
639 {
640 v3d_set_prog_data_uniforms(c, prog_data);
641 v3d_set_prog_data_ubo(c, prog_data);
642 }
643
644 static uint64_t *
645 v3d_return_qpu_insts(struct v3d_compile *c, uint32_t *final_assembly_size)
646 {
647 *final_assembly_size = c->qpu_inst_count * sizeof(uint64_t);
648
649 uint64_t *qpu_insts = malloc(*final_assembly_size);
650 if (!qpu_insts)
651 return NULL;
652
653 memcpy(qpu_insts, c->qpu_insts, *final_assembly_size);
654
655 vir_compile_destroy(c);
656
657 return qpu_insts;
658 }
659
660 uint64_t *v3d_compile_vs(const struct v3d_compiler *compiler,
661 struct v3d_vs_key *key,
662 struct v3d_vs_prog_data *prog_data,
663 nir_shader *s,
664 int program_id, int variant_id,
665 uint32_t *final_assembly_size)
666 {
667 struct v3d_compile *c = vir_compile_init(compiler, &key->base, s,
668 program_id, variant_id);
669
670 c->vs_key = key;
671
672 v3d_lower_nir(c);
673
674 if (key->clamp_color)
675 NIR_PASS_V(c->s, nir_lower_clamp_color_outputs);
676
677 if (key->base.ucp_enables) {
678 NIR_PASS_V(c->s, nir_lower_clip_vs, key->base.ucp_enables);
679 NIR_PASS_V(c->s, nir_lower_io_to_scalar,
680 nir_var_shader_out);
681 }
682
683 /* Note: VS output scalarizing must happen after nir_lower_clip_vs. */
684 NIR_PASS_V(c->s, nir_lower_io_to_scalar, nir_var_shader_out);
685
686 v3d_lower_nir_late(c);
687 v3d_optimize_nir(c->s);
688 NIR_PASS_V(c->s, nir_convert_from_ssa, true);
689
690 v3d_nir_to_vir(c);
691
692 v3d_set_prog_data(c, &prog_data->base);
693
694 prog_data->base.num_inputs = c->num_inputs;
695
696 /* The vertex data gets format converted by the VPM so that
697 * each attribute channel takes up a VPM column. Precompute
698 * the sizes for the shader record.
699 */
700 for (int i = 0; i < ARRAY_SIZE(prog_data->vattr_sizes); i++) {
701 prog_data->vattr_sizes[i] = c->vattr_sizes[i];
702 prog_data->vpm_input_size += c->vattr_sizes[i];
703 }
704
705 /* Input/output segment size are in 8x32-bit multiples. */
706 prog_data->vpm_input_size = align(prog_data->vpm_input_size, 8) / 8;
707 prog_data->vpm_output_size = align(c->num_vpm_writes, 8) / 8;
708
709 prog_data->uses_vid = (s->info.system_values_read &
710 (1ull << SYSTEM_VALUE_VERTEX_ID));
711 prog_data->uses_iid = (s->info.system_values_read &
712 (1ull << SYSTEM_VALUE_INSTANCE_ID));
713
714 return v3d_return_qpu_insts(c, final_assembly_size);
715 }
716
717 static void
718 v3d_set_fs_prog_data_inputs(struct v3d_compile *c,
719 struct v3d_fs_prog_data *prog_data)
720 {
721 prog_data->base.num_inputs = c->num_inputs;
722 memcpy(prog_data->input_slots, c->input_slots,
723 c->num_inputs * sizeof(*c->input_slots));
724
725 STATIC_ASSERT(ARRAY_SIZE(prog_data->flat_shade_flags) >
726 (V3D_MAX_FS_INPUTS - 1) / 24);
727 for (int i = 0; i < V3D_MAX_FS_INPUTS; i++) {
728 if (BITSET_TEST(c->flat_shade_flags, i))
729 prog_data->flat_shade_flags[i / 24] |= 1 << (i % 24);
730 }
731 }
732
733 uint64_t *v3d_compile_fs(const struct v3d_compiler *compiler,
734 struct v3d_fs_key *key,
735 struct v3d_fs_prog_data *prog_data,
736 nir_shader *s,
737 int program_id, int variant_id,
738 uint32_t *final_assembly_size)
739 {
740 struct v3d_compile *c = vir_compile_init(compiler, &key->base, s,
741 program_id, variant_id);
742
743 c->fs_key = key;
744
745 v3d_lower_nir(c);
746
747 if (key->light_twoside)
748 NIR_PASS_V(c->s, nir_lower_two_sided_color);
749
750 if (key->clamp_color)
751 NIR_PASS_V(c->s, nir_lower_clamp_color_outputs);
752
753 if (key->alpha_test) {
754 NIR_PASS_V(c->s, nir_lower_alpha_test, key->alpha_test_func,
755 false);
756 }
757
758 if (key->base.ucp_enables)
759 NIR_PASS_V(c->s, nir_lower_clip_fs, key->base.ucp_enables);
760
761 /* Note: FS input scalarizing must happen after
762 * nir_lower_two_sided_color, which only handles a vec4 at a time.
763 */
764 NIR_PASS_V(c->s, nir_lower_io_to_scalar, nir_var_shader_in);
765
766 v3d_lower_nir_late(c);
767 v3d_optimize_nir(c->s);
768 NIR_PASS_V(c->s, nir_convert_from_ssa, true);
769
770 v3d_nir_to_vir(c);
771
772 v3d_set_prog_data(c, &prog_data->base);
773 v3d_set_fs_prog_data_inputs(c, prog_data);
774 prog_data->writes_z = (c->s->info.outputs_written &
775 (1 << FRAG_RESULT_DEPTH));
776 prog_data->discard = c->s->info.fs.uses_discard;
777
778 return v3d_return_qpu_insts(c, final_assembly_size);
779 }
780
781 void
782 vir_remove_instruction(struct v3d_compile *c, struct qinst *qinst)
783 {
784 if (qinst->dst.file == QFILE_TEMP)
785 c->defs[qinst->dst.index] = NULL;
786
787 list_del(&qinst->link);
788 free(qinst);
789 }
790
791 struct qreg
792 vir_follow_movs(struct v3d_compile *c, struct qreg reg)
793 {
794 /* XXX
795 int pack = reg.pack;
796
797 while (reg.file == QFILE_TEMP &&
798 c->defs[reg.index] &&
799 (c->defs[reg.index]->op == QOP_MOV ||
800 c->defs[reg.index]->op == QOP_FMOV) &&
801 !c->defs[reg.index]->dst.pack &&
802 !c->defs[reg.index]->src[0].pack) {
803 reg = c->defs[reg.index]->src[0];
804 }
805
806 reg.pack = pack;
807 */
808 return reg;
809 }
810
811 void
812 vir_compile_destroy(struct v3d_compile *c)
813 {
814 vir_for_each_block(block, c) {
815 while (!list_empty(&block->instructions)) {
816 struct qinst *qinst =
817 list_first_entry(&block->instructions,
818 struct qinst, link);
819 vir_remove_instruction(c, qinst);
820 }
821 }
822
823 ralloc_free(c);
824 }
825
826 struct qreg
827 vir_uniform(struct v3d_compile *c,
828 enum quniform_contents contents,
829 uint32_t data)
830 {
831 for (int i = 0; i < c->num_uniforms; i++) {
832 if (c->uniform_contents[i] == contents &&
833 c->uniform_data[i] == data) {
834 return vir_reg(QFILE_UNIF, i);
835 }
836 }
837
838 uint32_t uniform = c->num_uniforms++;
839
840 if (uniform >= c->uniform_array_size) {
841 c->uniform_array_size = MAX2(MAX2(16, uniform + 1),
842 c->uniform_array_size * 2);
843
844 c->uniform_data = reralloc(c, c->uniform_data,
845 uint32_t,
846 c->uniform_array_size);
847 c->uniform_contents = reralloc(c, c->uniform_contents,
848 enum quniform_contents,
849 c->uniform_array_size);
850 }
851
852 c->uniform_contents[uniform] = contents;
853 c->uniform_data[uniform] = data;
854
855 return vir_reg(QFILE_UNIF, uniform);
856 }
857
858 void
859 vir_PF(struct v3d_compile *c, struct qreg src, enum v3d_qpu_pf pf)
860 {
861 struct qinst *last_inst = NULL;
862
863 if (!list_empty(&c->cur_block->instructions))
864 last_inst = (struct qinst *)c->cur_block->instructions.prev;
865
866 if (src.file != QFILE_TEMP ||
867 !c->defs[src.index] ||
868 last_inst != c->defs[src.index]) {
869 /* XXX: Make the MOV be the appropriate type */
870 last_inst = vir_MOV_dest(c, vir_reg(QFILE_NULL, 0), src);
871 last_inst = (struct qinst *)c->cur_block->instructions.prev;
872 }
873
874 vir_set_pf(last_inst, pf);
875 }
876
877 #define OPTPASS(func) \
878 do { \
879 bool stage_progress = func(c); \
880 if (stage_progress) { \
881 progress = true; \
882 if (print_opt_debug) { \
883 fprintf(stderr, \
884 "VIR opt pass %2d: %s progress\n", \
885 pass, #func); \
886 } \
887 /*XXX vir_validate(c);*/ \
888 } \
889 } while (0)
890
891 void
892 vir_optimize(struct v3d_compile *c)
893 {
894 bool print_opt_debug = false;
895 int pass = 1;
896
897 while (true) {
898 bool progress = false;
899
900 OPTPASS(vir_opt_copy_propagate);
901 OPTPASS(vir_opt_dead_code);
902
903 if (!progress)
904 break;
905
906 pass++;
907 }
908 }
909
910 const char *
911 vir_get_stage_name(struct v3d_compile *c)
912 {
913 if (c->vs_key && c->vs_key->is_coord)
914 return "MESA_SHADER_COORD";
915 else
916 return gl_shader_stage_name(c->s->info.stage);
917 }