v3d: Use the early_fragment_tests flag for the shader's disable-EZ field.
[mesa.git] / src / broadcom / compiler / vir.c
1 /*
2 * Copyright © 2016-2017 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "broadcom/common/v3d_device_info.h"
25 #include "v3d_compiler.h"
26
27 int
28 vir_get_non_sideband_nsrc(struct qinst *inst)
29 {
30 switch (inst->qpu.type) {
31 case V3D_QPU_INSTR_TYPE_BRANCH:
32 return 0;
33 case V3D_QPU_INSTR_TYPE_ALU:
34 if (inst->qpu.alu.add.op != V3D_QPU_A_NOP)
35 return v3d_qpu_add_op_num_src(inst->qpu.alu.add.op);
36 else
37 return v3d_qpu_mul_op_num_src(inst->qpu.alu.mul.op);
38 }
39
40 return 0;
41 }
42
43 int
44 vir_get_nsrc(struct qinst *inst)
45 {
46 int nsrc = vir_get_non_sideband_nsrc(inst);
47
48 if (vir_has_implicit_uniform(inst))
49 nsrc++;
50
51 return nsrc;
52 }
53
54 bool
55 vir_has_implicit_uniform(struct qinst *inst)
56 {
57 switch (inst->qpu.type) {
58 case V3D_QPU_INSTR_TYPE_BRANCH:
59 return true;
60 case V3D_QPU_INSTR_TYPE_ALU:
61 switch (inst->dst.file) {
62 case QFILE_TLBU:
63 return true;
64 case QFILE_MAGIC:
65 switch (inst->dst.index) {
66 case V3D_QPU_WADDR_TLBU:
67 case V3D_QPU_WADDR_TMUAU:
68 case V3D_QPU_WADDR_SYNCU:
69 return true;
70 default:
71 break;
72 }
73 break;
74 default:
75 return inst->has_implicit_uniform;
76 }
77 }
78 return false;
79 }
80
81 /* The sideband uniform for textures gets stored after the normal ALU
82 * arguments.
83 */
84 int
85 vir_get_implicit_uniform_src(struct qinst *inst)
86 {
87 if (!vir_has_implicit_uniform(inst))
88 return -1;
89 return vir_get_nsrc(inst) - 1;
90 }
91
92 /**
93 * Returns whether the instruction has any side effects that must be
94 * preserved.
95 */
96 bool
97 vir_has_side_effects(struct v3d_compile *c, struct qinst *inst)
98 {
99 switch (inst->qpu.type) {
100 case V3D_QPU_INSTR_TYPE_BRANCH:
101 return true;
102 case V3D_QPU_INSTR_TYPE_ALU:
103 switch (inst->qpu.alu.add.op) {
104 case V3D_QPU_A_SETREVF:
105 case V3D_QPU_A_SETMSF:
106 case V3D_QPU_A_VPMSETUP:
107 case V3D_QPU_A_STVPMV:
108 case V3D_QPU_A_STVPMD:
109 case V3D_QPU_A_STVPMP:
110 case V3D_QPU_A_VPMWT:
111 case V3D_QPU_A_TMUWT:
112 return true;
113 default:
114 break;
115 }
116
117 switch (inst->qpu.alu.mul.op) {
118 case V3D_QPU_M_MULTOP:
119 return true;
120 default:
121 break;
122 }
123 }
124
125 if (inst->qpu.sig.ldtmu ||
126 inst->qpu.sig.ldvary ||
127 inst->qpu.sig.wrtmuc ||
128 inst->qpu.sig.thrsw) {
129 return true;
130 }
131
132 return false;
133 }
134
135 bool
136 vir_is_raw_mov(struct qinst *inst)
137 {
138 if (inst->qpu.type != V3D_QPU_INSTR_TYPE_ALU ||
139 (inst->qpu.alu.mul.op != V3D_QPU_M_FMOV &&
140 inst->qpu.alu.mul.op != V3D_QPU_M_MOV)) {
141 return false;
142 }
143
144 if (inst->qpu.alu.add.output_pack != V3D_QPU_PACK_NONE ||
145 inst->qpu.alu.mul.output_pack != V3D_QPU_PACK_NONE) {
146 return false;
147 }
148
149 if (inst->qpu.flags.ac != V3D_QPU_COND_NONE ||
150 inst->qpu.flags.mc != V3D_QPU_COND_NONE)
151 return false;
152
153 return true;
154 }
155
156 bool
157 vir_is_add(struct qinst *inst)
158 {
159 return (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU &&
160 inst->qpu.alu.add.op != V3D_QPU_A_NOP);
161 }
162
163 bool
164 vir_is_mul(struct qinst *inst)
165 {
166 return (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU &&
167 inst->qpu.alu.mul.op != V3D_QPU_M_NOP);
168 }
169
170 bool
171 vir_is_tex(struct qinst *inst)
172 {
173 if (inst->dst.file == QFILE_MAGIC)
174 return v3d_qpu_magic_waddr_is_tmu(inst->dst.index);
175
176 if (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU &&
177 inst->qpu.alu.add.op == V3D_QPU_A_TMUWT) {
178 return true;
179 }
180
181 return false;
182 }
183
184 bool
185 vir_writes_r3(const struct v3d_device_info *devinfo, struct qinst *inst)
186 {
187 for (int i = 0; i < vir_get_nsrc(inst); i++) {
188 switch (inst->src[i].file) {
189 case QFILE_VPM:
190 return true;
191 default:
192 break;
193 }
194 }
195
196 if (devinfo->ver < 41 && (inst->qpu.sig.ldvary ||
197 inst->qpu.sig.ldtlb ||
198 inst->qpu.sig.ldtlbu ||
199 inst->qpu.sig.ldvpm)) {
200 return true;
201 }
202
203 return false;
204 }
205
206 bool
207 vir_writes_r4(const struct v3d_device_info *devinfo, struct qinst *inst)
208 {
209 switch (inst->dst.file) {
210 case QFILE_MAGIC:
211 switch (inst->dst.index) {
212 case V3D_QPU_WADDR_RECIP:
213 case V3D_QPU_WADDR_RSQRT:
214 case V3D_QPU_WADDR_EXP:
215 case V3D_QPU_WADDR_LOG:
216 case V3D_QPU_WADDR_SIN:
217 return true;
218 }
219 break;
220 default:
221 break;
222 }
223
224 if (devinfo->ver < 41 && inst->qpu.sig.ldtmu)
225 return true;
226
227 return false;
228 }
229
230 void
231 vir_set_unpack(struct qinst *inst, int src,
232 enum v3d_qpu_input_unpack unpack)
233 {
234 assert(src == 0 || src == 1);
235
236 if (vir_is_add(inst)) {
237 if (src == 0)
238 inst->qpu.alu.add.a_unpack = unpack;
239 else
240 inst->qpu.alu.add.b_unpack = unpack;
241 } else {
242 assert(vir_is_mul(inst));
243 if (src == 0)
244 inst->qpu.alu.mul.a_unpack = unpack;
245 else
246 inst->qpu.alu.mul.b_unpack = unpack;
247 }
248 }
249
250 void
251 vir_set_cond(struct qinst *inst, enum v3d_qpu_cond cond)
252 {
253 if (vir_is_add(inst)) {
254 inst->qpu.flags.ac = cond;
255 } else {
256 assert(vir_is_mul(inst));
257 inst->qpu.flags.mc = cond;
258 }
259 }
260
261 void
262 vir_set_pf(struct qinst *inst, enum v3d_qpu_pf pf)
263 {
264 if (vir_is_add(inst)) {
265 inst->qpu.flags.apf = pf;
266 } else {
267 assert(vir_is_mul(inst));
268 inst->qpu.flags.mpf = pf;
269 }
270 }
271
272 void
273 vir_set_uf(struct qinst *inst, enum v3d_qpu_uf uf)
274 {
275 if (vir_is_add(inst)) {
276 inst->qpu.flags.auf = uf;
277 } else {
278 assert(vir_is_mul(inst));
279 inst->qpu.flags.muf = uf;
280 }
281 }
282
283 #if 0
284 uint8_t
285 vir_channels_written(struct qinst *inst)
286 {
287 if (vir_is_mul(inst)) {
288 switch (inst->dst.pack) {
289 case QPU_PACK_MUL_NOP:
290 case QPU_PACK_MUL_8888:
291 return 0xf;
292 case QPU_PACK_MUL_8A:
293 return 0x1;
294 case QPU_PACK_MUL_8B:
295 return 0x2;
296 case QPU_PACK_MUL_8C:
297 return 0x4;
298 case QPU_PACK_MUL_8D:
299 return 0x8;
300 }
301 } else {
302 switch (inst->dst.pack) {
303 case QPU_PACK_A_NOP:
304 case QPU_PACK_A_8888:
305 case QPU_PACK_A_8888_SAT:
306 case QPU_PACK_A_32_SAT:
307 return 0xf;
308 case QPU_PACK_A_8A:
309 case QPU_PACK_A_8A_SAT:
310 return 0x1;
311 case QPU_PACK_A_8B:
312 case QPU_PACK_A_8B_SAT:
313 return 0x2;
314 case QPU_PACK_A_8C:
315 case QPU_PACK_A_8C_SAT:
316 return 0x4;
317 case QPU_PACK_A_8D:
318 case QPU_PACK_A_8D_SAT:
319 return 0x8;
320 case QPU_PACK_A_16A:
321 case QPU_PACK_A_16A_SAT:
322 return 0x3;
323 case QPU_PACK_A_16B:
324 case QPU_PACK_A_16B_SAT:
325 return 0xc;
326 }
327 }
328 unreachable("Bad pack field");
329 }
330 #endif
331
332 struct qreg
333 vir_get_temp(struct v3d_compile *c)
334 {
335 struct qreg reg;
336
337 reg.file = QFILE_TEMP;
338 reg.index = c->num_temps++;
339
340 if (c->num_temps > c->defs_array_size) {
341 uint32_t old_size = c->defs_array_size;
342 c->defs_array_size = MAX2(old_size * 2, 16);
343
344 c->defs = reralloc(c, c->defs, struct qinst *,
345 c->defs_array_size);
346 memset(&c->defs[old_size], 0,
347 sizeof(c->defs[0]) * (c->defs_array_size - old_size));
348
349 c->spillable = reralloc(c, c->spillable,
350 BITSET_WORD,
351 BITSET_WORDS(c->defs_array_size));
352 for (int i = old_size; i < c->defs_array_size; i++)
353 BITSET_SET(c->spillable, i);
354 }
355
356 return reg;
357 }
358
359 struct qinst *
360 vir_add_inst(enum v3d_qpu_add_op op, struct qreg dst, struct qreg src0, struct qreg src1)
361 {
362 struct qinst *inst = calloc(1, sizeof(*inst));
363
364 inst->qpu = v3d_qpu_nop();
365 inst->qpu.alu.add.op = op;
366
367 inst->dst = dst;
368 inst->src[0] = src0;
369 inst->src[1] = src1;
370 inst->uniform = ~0;
371
372 return inst;
373 }
374
375 struct qinst *
376 vir_mul_inst(enum v3d_qpu_mul_op op, struct qreg dst, struct qreg src0, struct qreg src1)
377 {
378 struct qinst *inst = calloc(1, sizeof(*inst));
379
380 inst->qpu = v3d_qpu_nop();
381 inst->qpu.alu.mul.op = op;
382
383 inst->dst = dst;
384 inst->src[0] = src0;
385 inst->src[1] = src1;
386 inst->uniform = ~0;
387
388 return inst;
389 }
390
391 struct qinst *
392 vir_branch_inst(enum v3d_qpu_branch_cond cond, struct qreg src)
393 {
394 struct qinst *inst = calloc(1, sizeof(*inst));
395
396 inst->qpu = v3d_qpu_nop();
397 inst->qpu.type = V3D_QPU_INSTR_TYPE_BRANCH;
398 inst->qpu.branch.cond = cond;
399 inst->qpu.branch.msfign = V3D_QPU_MSFIGN_NONE;
400 inst->qpu.branch.bdi = V3D_QPU_BRANCH_DEST_REL;
401 inst->qpu.branch.ub = true;
402 inst->qpu.branch.bdu = V3D_QPU_BRANCH_DEST_REL;
403
404 inst->dst = vir_reg(QFILE_NULL, 0);
405 inst->src[0] = src;
406 inst->uniform = ~0;
407
408 return inst;
409 }
410
411 static void
412 vir_emit(struct v3d_compile *c, struct qinst *inst)
413 {
414 switch (c->cursor.mode) {
415 case vir_cursor_add:
416 list_add(&inst->link, c->cursor.link);
417 break;
418 case vir_cursor_addtail:
419 list_addtail(&inst->link, c->cursor.link);
420 break;
421 }
422
423 c->cursor = vir_after_inst(inst);
424 c->live_intervals_valid = false;
425 }
426
427 /* Updates inst to write to a new temporary, emits it, and notes the def. */
428 struct qreg
429 vir_emit_def(struct v3d_compile *c, struct qinst *inst)
430 {
431 assert(inst->dst.file == QFILE_NULL);
432
433 /* If we're emitting an instruction that's a def, it had better be
434 * writing a register.
435 */
436 if (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU) {
437 assert(inst->qpu.alu.add.op == V3D_QPU_A_NOP ||
438 v3d_qpu_add_op_has_dst(inst->qpu.alu.add.op));
439 assert(inst->qpu.alu.mul.op == V3D_QPU_M_NOP ||
440 v3d_qpu_mul_op_has_dst(inst->qpu.alu.mul.op));
441 }
442
443 inst->dst = vir_get_temp(c);
444
445 if (inst->dst.file == QFILE_TEMP)
446 c->defs[inst->dst.index] = inst;
447
448 vir_emit(c, inst);
449
450 return inst->dst;
451 }
452
453 struct qinst *
454 vir_emit_nondef(struct v3d_compile *c, struct qinst *inst)
455 {
456 if (inst->dst.file == QFILE_TEMP)
457 c->defs[inst->dst.index] = NULL;
458
459 vir_emit(c, inst);
460
461 return inst;
462 }
463
464 struct qblock *
465 vir_new_block(struct v3d_compile *c)
466 {
467 struct qblock *block = rzalloc(c, struct qblock);
468
469 list_inithead(&block->instructions);
470
471 block->predecessors = _mesa_set_create(block,
472 _mesa_hash_pointer,
473 _mesa_key_pointer_equal);
474
475 block->index = c->next_block_index++;
476
477 return block;
478 }
479
480 void
481 vir_set_emit_block(struct v3d_compile *c, struct qblock *block)
482 {
483 c->cur_block = block;
484 c->cursor = vir_after_block(block);
485 list_addtail(&block->link, &c->blocks);
486 }
487
488 struct qblock *
489 vir_entry_block(struct v3d_compile *c)
490 {
491 return list_first_entry(&c->blocks, struct qblock, link);
492 }
493
494 struct qblock *
495 vir_exit_block(struct v3d_compile *c)
496 {
497 return list_last_entry(&c->blocks, struct qblock, link);
498 }
499
500 void
501 vir_link_blocks(struct qblock *predecessor, struct qblock *successor)
502 {
503 _mesa_set_add(successor->predecessors, predecessor);
504 if (predecessor->successors[0]) {
505 assert(!predecessor->successors[1]);
506 predecessor->successors[1] = successor;
507 } else {
508 predecessor->successors[0] = successor;
509 }
510 }
511
512 const struct v3d_compiler *
513 v3d_compiler_init(const struct v3d_device_info *devinfo)
514 {
515 struct v3d_compiler *compiler = rzalloc(NULL, struct v3d_compiler);
516 if (!compiler)
517 return NULL;
518
519 compiler->devinfo = devinfo;
520
521 if (!vir_init_reg_sets(compiler)) {
522 ralloc_free(compiler);
523 return NULL;
524 }
525
526 return compiler;
527 }
528
529 void
530 v3d_compiler_free(const struct v3d_compiler *compiler)
531 {
532 ralloc_free((void *)compiler);
533 }
534
535 static struct v3d_compile *
536 vir_compile_init(const struct v3d_compiler *compiler,
537 struct v3d_key *key,
538 nir_shader *s,
539 void (*debug_output)(const char *msg,
540 void *debug_output_data),
541 void *debug_output_data,
542 int program_id, int variant_id)
543 {
544 struct v3d_compile *c = rzalloc(NULL, struct v3d_compile);
545
546 c->compiler = compiler;
547 c->devinfo = compiler->devinfo;
548 c->key = key;
549 c->program_id = program_id;
550 c->variant_id = variant_id;
551 c->threads = 4;
552 c->debug_output = debug_output;
553 c->debug_output_data = debug_output_data;
554
555 s = nir_shader_clone(c, s);
556 c->s = s;
557
558 list_inithead(&c->blocks);
559 vir_set_emit_block(c, vir_new_block(c));
560
561 c->output_position_index = -1;
562 c->output_point_size_index = -1;
563 c->output_sample_mask_index = -1;
564
565 c->def_ht = _mesa_hash_table_create(c, _mesa_hash_pointer,
566 _mesa_key_pointer_equal);
567
568 return c;
569 }
570
571 static int
572 type_size_vec4(const struct glsl_type *type)
573 {
574 return glsl_count_attribute_slots(type, false);
575 }
576
577 static void
578 v3d_lower_nir(struct v3d_compile *c)
579 {
580 struct nir_lower_tex_options tex_options = {
581 .lower_txd = true,
582 .lower_tg4_broadcom_swizzle = true,
583
584 .lower_rect = false, /* XXX: Use this on V3D 3.x */
585 .lower_txp = ~0,
586 /* Apply swizzles to all samplers. */
587 .swizzle_result = ~0,
588 };
589
590 /* Lower the format swizzle and (for 32-bit returns)
591 * ARB_texture_swizzle-style swizzle.
592 */
593 for (int i = 0; i < ARRAY_SIZE(c->key->tex); i++) {
594 for (int j = 0; j < 4; j++)
595 tex_options.swizzles[i][j] = c->key->tex[i].swizzle[j];
596
597 if (c->key->tex[i].clamp_s)
598 tex_options.saturate_s |= 1 << i;
599 if (c->key->tex[i].clamp_t)
600 tex_options.saturate_t |= 1 << i;
601 if (c->key->tex[i].clamp_r)
602 tex_options.saturate_r |= 1 << i;
603 if (c->key->tex[i].return_size == 16) {
604 tex_options.lower_tex_packing[i] =
605 nir_lower_tex_packing_16;
606 }
607 }
608
609 NIR_PASS_V(c->s, nir_lower_tex, &tex_options);
610 NIR_PASS_V(c->s, nir_lower_system_values);
611 }
612
613 static void
614 v3d_set_prog_data_uniforms(struct v3d_compile *c,
615 struct v3d_prog_data *prog_data)
616 {
617 int count = c->num_uniforms;
618 struct v3d_uniform_list *ulist = &prog_data->uniforms;
619
620 ulist->count = count;
621 ulist->data = ralloc_array(prog_data, uint32_t, count);
622 memcpy(ulist->data, c->uniform_data,
623 count * sizeof(*ulist->data));
624 ulist->contents = ralloc_array(prog_data, enum quniform_contents, count);
625 memcpy(ulist->contents, c->uniform_contents,
626 count * sizeof(*ulist->contents));
627 }
628
629 /* Copy the compiler UBO range state to the compiled shader, dropping out
630 * arrays that were never referenced by an indirect load.
631 *
632 * (Note that QIR dead code elimination of an array access still leaves that
633 * array alive, though)
634 */
635 static void
636 v3d_set_prog_data_ubo(struct v3d_compile *c,
637 struct v3d_prog_data *prog_data)
638 {
639 if (!c->num_ubo_ranges)
640 return;
641
642 prog_data->num_ubo_ranges = 0;
643 prog_data->ubo_ranges = ralloc_array(prog_data, struct v3d_ubo_range,
644 c->num_ubo_ranges);
645 for (int i = 0; i < c->num_ubo_ranges; i++) {
646 if (!c->ubo_range_used[i])
647 continue;
648
649 struct v3d_ubo_range *range = &c->ubo_ranges[i];
650 prog_data->ubo_ranges[prog_data->num_ubo_ranges++] = *range;
651 prog_data->ubo_size += range->size;
652 }
653
654 if (prog_data->ubo_size) {
655 if (V3D_DEBUG & V3D_DEBUG_SHADERDB) {
656 fprintf(stderr, "SHADER-DB: %s prog %d/%d: %d UBO uniforms\n",
657 vir_get_stage_name(c),
658 c->program_id, c->variant_id,
659 prog_data->ubo_size / 4);
660 }
661 }
662 }
663
664 static void
665 v3d_vs_set_prog_data(struct v3d_compile *c,
666 struct v3d_vs_prog_data *prog_data)
667 {
668 prog_data->base.num_inputs = c->num_inputs;
669
670 /* The vertex data gets format converted by the VPM so that
671 * each attribute channel takes up a VPM column. Precompute
672 * the sizes for the shader record.
673 */
674 for (int i = 0; i < ARRAY_SIZE(prog_data->vattr_sizes); i++) {
675 prog_data->vattr_sizes[i] = c->vattr_sizes[i];
676 prog_data->vpm_input_size += c->vattr_sizes[i];
677 }
678
679 prog_data->uses_vid = (c->s->info.system_values_read &
680 (1ull << SYSTEM_VALUE_VERTEX_ID));
681 prog_data->uses_iid = (c->s->info.system_values_read &
682 (1ull << SYSTEM_VALUE_INSTANCE_ID));
683
684 if (prog_data->uses_vid)
685 prog_data->vpm_input_size++;
686 if (prog_data->uses_iid)
687 prog_data->vpm_input_size++;
688
689 /* Input/output segment size are in sectors (8 rows of 32 bits per
690 * channel).
691 */
692 prog_data->vpm_input_size = align(prog_data->vpm_input_size, 8) / 8;
693 prog_data->vpm_output_size = align(c->num_vpm_writes, 8) / 8;
694
695 /* Set us up for shared input/output segments. This is apparently
696 * necessary for our VCM setup to avoid varying corruption.
697 */
698 prog_data->separate_segments = false;
699 prog_data->vpm_output_size = MAX2(prog_data->vpm_output_size,
700 prog_data->vpm_input_size);
701 prog_data->vpm_input_size = 0;
702
703 /* Compute VCM cache size. We set up our program to take up less than
704 * half of the VPM, so that any set of bin and render programs won't
705 * run out of space. We need space for at least one input segment,
706 * and then allocate the rest to output segments (one for the current
707 * program, the rest to VCM). The valid range of the VCM cache size
708 * field is 1-4 16-vertex batches, but GFXH-1744 limits us to 2-4
709 * batches.
710 */
711 assert(c->devinfo->vpm_size);
712 int sector_size = 16 * sizeof(uint32_t) * 8;
713 int vpm_size_in_sectors = c->devinfo->vpm_size / sector_size;
714 int half_vpm = vpm_size_in_sectors / 2;
715 int vpm_output_sectors = half_vpm - prog_data->vpm_input_size;
716 int vpm_output_batches = vpm_output_sectors / prog_data->vpm_output_size;
717 assert(vpm_output_batches >= 2);
718 prog_data->vcm_cache_size = CLAMP(vpm_output_batches - 1, 2, 4);
719 }
720
721 static void
722 v3d_set_fs_prog_data_inputs(struct v3d_compile *c,
723 struct v3d_fs_prog_data *prog_data)
724 {
725 prog_data->base.num_inputs = c->num_inputs;
726 memcpy(prog_data->input_slots, c->input_slots,
727 c->num_inputs * sizeof(*c->input_slots));
728
729 STATIC_ASSERT(ARRAY_SIZE(prog_data->flat_shade_flags) >
730 (V3D_MAX_FS_INPUTS - 1) / 24);
731 for (int i = 0; i < V3D_MAX_FS_INPUTS; i++) {
732 if (BITSET_TEST(c->flat_shade_flags, i))
733 prog_data->flat_shade_flags[i / 24] |= 1 << (i % 24);
734
735 if (BITSET_TEST(c->noperspective_flags, i))
736 prog_data->noperspective_flags[i / 24] |= 1 << (i % 24);
737
738 if (BITSET_TEST(c->centroid_flags, i))
739 prog_data->centroid_flags[i / 24] |= 1 << (i % 24);
740 }
741 }
742
743 static void
744 v3d_fs_set_prog_data(struct v3d_compile *c,
745 struct v3d_fs_prog_data *prog_data)
746 {
747 v3d_set_fs_prog_data_inputs(c, prog_data);
748 prog_data->writes_z = c->writes_z;
749 prog_data->disable_ez = !c->s->info.fs.early_fragment_tests;
750 prog_data->uses_center_w = c->uses_center_w;
751 }
752
753 static void
754 v3d_set_prog_data(struct v3d_compile *c,
755 struct v3d_prog_data *prog_data)
756 {
757 prog_data->threads = c->threads;
758 prog_data->single_seg = !c->last_thrsw;
759 prog_data->spill_size = c->spill_size;
760
761 v3d_set_prog_data_uniforms(c, prog_data);
762 v3d_set_prog_data_ubo(c, prog_data);
763
764 if (c->s->info.stage == MESA_SHADER_VERTEX) {
765 v3d_vs_set_prog_data(c, (struct v3d_vs_prog_data *)prog_data);
766 } else {
767 assert(c->s->info.stage == MESA_SHADER_FRAGMENT);
768 v3d_fs_set_prog_data(c, (struct v3d_fs_prog_data *)prog_data);
769 }
770 }
771
772 static uint64_t *
773 v3d_return_qpu_insts(struct v3d_compile *c, uint32_t *final_assembly_size)
774 {
775 *final_assembly_size = c->qpu_inst_count * sizeof(uint64_t);
776
777 uint64_t *qpu_insts = malloc(*final_assembly_size);
778 if (!qpu_insts)
779 return NULL;
780
781 memcpy(qpu_insts, c->qpu_insts, *final_assembly_size);
782
783 vir_compile_destroy(c);
784
785 return qpu_insts;
786 }
787
788 static void
789 v3d_nir_lower_vs_early(struct v3d_compile *c)
790 {
791 /* Split our I/O vars and dead code eliminate the unused
792 * components.
793 */
794 NIR_PASS_V(c->s, nir_lower_io_to_scalar_early,
795 nir_var_shader_in | nir_var_shader_out);
796 uint64_t used_outputs[4] = {0};
797 for (int i = 0; i < c->vs_key->num_fs_inputs; i++) {
798 int slot = v3d_slot_get_slot(c->vs_key->fs_inputs[i]);
799 int comp = v3d_slot_get_component(c->vs_key->fs_inputs[i]);
800 used_outputs[comp] |= 1ull << slot;
801 }
802 NIR_PASS_V(c->s, nir_remove_unused_io_vars,
803 &c->s->outputs, used_outputs, NULL); /* demotes to globals */
804 NIR_PASS_V(c->s, nir_lower_global_vars_to_local);
805 v3d_optimize_nir(c->s);
806 NIR_PASS_V(c->s, nir_remove_dead_variables, nir_var_shader_in);
807 NIR_PASS_V(c->s, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
808 type_size_vec4,
809 (nir_lower_io_options)0);
810 }
811
812 static void
813 v3d_fixup_fs_output_types(struct v3d_compile *c)
814 {
815 nir_foreach_variable(var, &c->s->outputs) {
816 uint32_t mask = 0;
817
818 switch (var->data.location) {
819 case FRAG_RESULT_COLOR:
820 mask = ~0;
821 break;
822 case FRAG_RESULT_DATA0:
823 case FRAG_RESULT_DATA1:
824 case FRAG_RESULT_DATA2:
825 case FRAG_RESULT_DATA3:
826 mask = 1 << (var->data.location - FRAG_RESULT_DATA0);
827 break;
828 }
829
830 if (c->fs_key->int_color_rb & mask) {
831 var->type =
832 glsl_vector_type(GLSL_TYPE_INT,
833 glsl_get_components(var->type));
834 } else if (c->fs_key->uint_color_rb & mask) {
835 var->type =
836 glsl_vector_type(GLSL_TYPE_UINT,
837 glsl_get_components(var->type));
838 }
839 }
840 }
841
842 static void
843 v3d_nir_lower_fs_early(struct v3d_compile *c)
844 {
845 if (c->fs_key->int_color_rb || c->fs_key->uint_color_rb)
846 v3d_fixup_fs_output_types(c);
847
848 /* If the shader has no non-TLB side effects, we can promote it to
849 * enabling early_fragment_tests even if the user didn't.
850 */
851 if (!(c->s->info.num_images ||
852 c->s->info.num_ssbos ||
853 c->s->info.num_abos)) {
854 c->s->info.fs.early_fragment_tests = true;
855 }
856 }
857
858 static void
859 v3d_nir_lower_vs_late(struct v3d_compile *c)
860 {
861 if (c->vs_key->clamp_color)
862 NIR_PASS_V(c->s, nir_lower_clamp_color_outputs);
863
864 if (c->key->ucp_enables) {
865 NIR_PASS_V(c->s, nir_lower_clip_vs, c->key->ucp_enables,
866 false);
867 NIR_PASS_V(c->s, nir_lower_io_to_scalar,
868 nir_var_shader_out);
869 }
870
871 /* Note: VS output scalarizing must happen after nir_lower_clip_vs. */
872 NIR_PASS_V(c->s, nir_lower_io_to_scalar, nir_var_shader_out);
873 }
874
875 static void
876 v3d_nir_lower_fs_late(struct v3d_compile *c)
877 {
878 if (c->fs_key->light_twoside)
879 NIR_PASS_V(c->s, nir_lower_two_sided_color);
880
881 if (c->fs_key->clamp_color)
882 NIR_PASS_V(c->s, nir_lower_clamp_color_outputs);
883
884 if (c->fs_key->alpha_test) {
885 NIR_PASS_V(c->s, nir_lower_alpha_test,
886 c->fs_key->alpha_test_func,
887 false);
888 }
889
890 if (c->key->ucp_enables)
891 NIR_PASS_V(c->s, nir_lower_clip_fs, c->key->ucp_enables);
892
893 /* Note: FS input scalarizing must happen after
894 * nir_lower_two_sided_color, which only handles a vec4 at a time.
895 */
896 NIR_PASS_V(c->s, nir_lower_io_to_scalar, nir_var_shader_in);
897 }
898
899 uint64_t *v3d_compile(const struct v3d_compiler *compiler,
900 struct v3d_key *key,
901 struct v3d_prog_data **out_prog_data,
902 nir_shader *s,
903 void (*debug_output)(const char *msg,
904 void *debug_output_data),
905 void *debug_output_data,
906 int program_id, int variant_id,
907 uint32_t *final_assembly_size)
908 {
909 struct v3d_prog_data *prog_data;
910 struct v3d_compile *c = vir_compile_init(compiler, key, s,
911 debug_output, debug_output_data,
912 program_id, variant_id);
913
914 switch (c->s->info.stage) {
915 case MESA_SHADER_VERTEX:
916 c->vs_key = (struct v3d_vs_key *)key;
917 prog_data = rzalloc_size(NULL, sizeof(struct v3d_vs_prog_data));
918 break;
919 case MESA_SHADER_FRAGMENT:
920 c->fs_key = (struct v3d_fs_key *)key;
921 prog_data = rzalloc_size(NULL, sizeof(struct v3d_fs_prog_data));
922 break;
923 default:
924 unreachable("unsupported shader stage");
925 }
926
927 if (c->s->info.stage == MESA_SHADER_VERTEX) {
928 v3d_nir_lower_vs_early(c);
929 } else {
930 assert(c->s->info.stage == MESA_SHADER_FRAGMENT);
931 v3d_nir_lower_fs_early(c);
932 }
933
934 v3d_lower_nir(c);
935
936 if (c->s->info.stage == MESA_SHADER_VERTEX) {
937 v3d_nir_lower_vs_late(c);
938 } else {
939 assert(c->s->info.stage == MESA_SHADER_FRAGMENT);
940 v3d_nir_lower_fs_late(c);
941 }
942
943 NIR_PASS_V(c->s, v3d_nir_lower_io, c);
944 NIR_PASS_V(c->s, v3d_nir_lower_txf_ms, c);
945 NIR_PASS_V(c->s, v3d_nir_lower_image_load_store);
946 NIR_PASS_V(c->s, nir_lower_idiv);
947
948 v3d_optimize_nir(c->s);
949 NIR_PASS_V(c->s, nir_lower_bool_to_int32);
950 NIR_PASS_V(c->s, nir_convert_from_ssa, true);
951
952 v3d_nir_to_vir(c);
953
954 v3d_set_prog_data(c, prog_data);
955
956 *out_prog_data = prog_data;
957
958 char *shaderdb;
959 int ret = asprintf(&shaderdb,
960 "%s shader: %d inst, %d threads, %d loops, "
961 "%d uniforms, %d:%d spills:fills",
962 vir_get_stage_name(c),
963 c->qpu_inst_count,
964 c->threads,
965 c->loops,
966 c->num_uniforms,
967 c->spills,
968 c->fills);
969 if (ret >= 0) {
970 c->debug_output(shaderdb, c->debug_output_data);
971 free(shaderdb);
972 }
973
974 return v3d_return_qpu_insts(c, final_assembly_size);
975 }
976
977 void
978 vir_remove_instruction(struct v3d_compile *c, struct qinst *qinst)
979 {
980 if (qinst->dst.file == QFILE_TEMP)
981 c->defs[qinst->dst.index] = NULL;
982
983 assert(&qinst->link != c->cursor.link);
984
985 list_del(&qinst->link);
986 free(qinst);
987
988 c->live_intervals_valid = false;
989 }
990
991 struct qreg
992 vir_follow_movs(struct v3d_compile *c, struct qreg reg)
993 {
994 /* XXX
995 int pack = reg.pack;
996
997 while (reg.file == QFILE_TEMP &&
998 c->defs[reg.index] &&
999 (c->defs[reg.index]->op == QOP_MOV ||
1000 c->defs[reg.index]->op == QOP_FMOV) &&
1001 !c->defs[reg.index]->dst.pack &&
1002 !c->defs[reg.index]->src[0].pack) {
1003 reg = c->defs[reg.index]->src[0];
1004 }
1005
1006 reg.pack = pack;
1007 */
1008 return reg;
1009 }
1010
1011 void
1012 vir_compile_destroy(struct v3d_compile *c)
1013 {
1014 /* Defuse the assert that we aren't removing the cursor's instruction.
1015 */
1016 c->cursor.link = NULL;
1017
1018 vir_for_each_block(block, c) {
1019 while (!list_empty(&block->instructions)) {
1020 struct qinst *qinst =
1021 list_first_entry(&block->instructions,
1022 struct qinst, link);
1023 vir_remove_instruction(c, qinst);
1024 }
1025 }
1026
1027 ralloc_free(c);
1028 }
1029
1030 struct qreg
1031 vir_uniform(struct v3d_compile *c,
1032 enum quniform_contents contents,
1033 uint32_t data)
1034 {
1035 for (int i = 0; i < c->num_uniforms; i++) {
1036 if (c->uniform_contents[i] == contents &&
1037 c->uniform_data[i] == data) {
1038 return vir_reg(QFILE_UNIF, i);
1039 }
1040 }
1041
1042 uint32_t uniform = c->num_uniforms++;
1043
1044 if (uniform >= c->uniform_array_size) {
1045 c->uniform_array_size = MAX2(MAX2(16, uniform + 1),
1046 c->uniform_array_size * 2);
1047
1048 c->uniform_data = reralloc(c, c->uniform_data,
1049 uint32_t,
1050 c->uniform_array_size);
1051 c->uniform_contents = reralloc(c, c->uniform_contents,
1052 enum quniform_contents,
1053 c->uniform_array_size);
1054 }
1055
1056 c->uniform_contents[uniform] = contents;
1057 c->uniform_data[uniform] = data;
1058
1059 return vir_reg(QFILE_UNIF, uniform);
1060 }
1061
1062 static bool
1063 vir_can_set_flags(struct v3d_compile *c, struct qinst *inst)
1064 {
1065 if (c->devinfo->ver >= 40 && (v3d_qpu_reads_vpm(&inst->qpu) ||
1066 v3d_qpu_uses_sfu(&inst->qpu))) {
1067 return false;
1068 }
1069
1070 if (inst->qpu.type != V3D_QPU_INSTR_TYPE_ALU ||
1071 (inst->qpu.alu.add.op == V3D_QPU_A_NOP &&
1072 inst->qpu.alu.mul.op == V3D_QPU_M_NOP)) {
1073 return false;
1074 }
1075
1076 return true;
1077 }
1078
1079 void
1080 vir_PF(struct v3d_compile *c, struct qreg src, enum v3d_qpu_pf pf)
1081 {
1082 struct qinst *last_inst = NULL;
1083
1084 if (!list_empty(&c->cur_block->instructions)) {
1085 last_inst = (struct qinst *)c->cur_block->instructions.prev;
1086
1087 /* Can't stuff the PF into the last last inst if our cursor
1088 * isn't pointing after it.
1089 */
1090 struct vir_cursor after_inst = vir_after_inst(last_inst);
1091 if (c->cursor.mode != after_inst.mode ||
1092 c->cursor.link != after_inst.link)
1093 last_inst = NULL;
1094 }
1095
1096 if (src.file != QFILE_TEMP ||
1097 !c->defs[src.index] ||
1098 last_inst != c->defs[src.index] ||
1099 !vir_can_set_flags(c, last_inst)) {
1100 /* XXX: Make the MOV be the appropriate type */
1101 last_inst = vir_MOV_dest(c, vir_reg(QFILE_NULL, 0), src);
1102 }
1103
1104 vir_set_pf(last_inst, pf);
1105 }
1106
1107 #define OPTPASS(func) \
1108 do { \
1109 bool stage_progress = func(c); \
1110 if (stage_progress) { \
1111 progress = true; \
1112 if (print_opt_debug) { \
1113 fprintf(stderr, \
1114 "VIR opt pass %2d: %s progress\n", \
1115 pass, #func); \
1116 } \
1117 /*XXX vir_validate(c);*/ \
1118 } \
1119 } while (0)
1120
1121 void
1122 vir_optimize(struct v3d_compile *c)
1123 {
1124 bool print_opt_debug = false;
1125 int pass = 1;
1126
1127 while (true) {
1128 bool progress = false;
1129
1130 OPTPASS(vir_opt_copy_propagate);
1131 OPTPASS(vir_opt_dead_code);
1132 OPTPASS(vir_opt_small_immediates);
1133
1134 if (!progress)
1135 break;
1136
1137 pass++;
1138 }
1139 }
1140
1141 const char *
1142 vir_get_stage_name(struct v3d_compile *c)
1143 {
1144 if (c->vs_key && c->vs_key->is_coord)
1145 return "MESA_SHADER_COORD";
1146 else
1147 return gl_shader_stage_name(c->s->info.stage);
1148 }