3b0274450e994955c28be6cc372e51977a409139
[mesa.git] / src / broadcom / compiler / vir.c
1 /*
2 * Copyright © 2016-2017 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "broadcom/common/v3d_device_info.h"
25 #include "v3d_compiler.h"
26
27 int
28 vir_get_non_sideband_nsrc(struct qinst *inst)
29 {
30 switch (inst->qpu.type) {
31 case V3D_QPU_INSTR_TYPE_BRANCH:
32 return 0;
33 case V3D_QPU_INSTR_TYPE_ALU:
34 if (inst->qpu.alu.add.op != V3D_QPU_A_NOP)
35 return v3d_qpu_add_op_num_src(inst->qpu.alu.add.op);
36 else
37 return v3d_qpu_mul_op_num_src(inst->qpu.alu.mul.op);
38 }
39
40 return 0;
41 }
42
43 int
44 vir_get_nsrc(struct qinst *inst)
45 {
46 int nsrc = vir_get_non_sideband_nsrc(inst);
47
48 if (vir_has_implicit_uniform(inst))
49 nsrc++;
50
51 return nsrc;
52 }
53
54 bool
55 vir_has_implicit_uniform(struct qinst *inst)
56 {
57 switch (inst->qpu.type) {
58 case V3D_QPU_INSTR_TYPE_BRANCH:
59 return true;
60 case V3D_QPU_INSTR_TYPE_ALU:
61 switch (inst->dst.file) {
62 case QFILE_TLBU:
63 return true;
64 case QFILE_MAGIC:
65 switch (inst->dst.index) {
66 case V3D_QPU_WADDR_TLBU:
67 case V3D_QPU_WADDR_TMUAU:
68 case V3D_QPU_WADDR_SYNCU:
69 return true;
70 default:
71 break;
72 }
73 break;
74 default:
75 return inst->has_implicit_uniform;
76 }
77 }
78 return false;
79 }
80
81 /* The sideband uniform for textures gets stored after the normal ALU
82 * arguments.
83 */
84 int
85 vir_get_implicit_uniform_src(struct qinst *inst)
86 {
87 if (!vir_has_implicit_uniform(inst))
88 return -1;
89 return vir_get_nsrc(inst) - 1;
90 }
91
92 /**
93 * Returns whether the instruction has any side effects that must be
94 * preserved.
95 */
96 bool
97 vir_has_side_effects(struct v3d_compile *c, struct qinst *inst)
98 {
99 switch (inst->qpu.type) {
100 case V3D_QPU_INSTR_TYPE_BRANCH:
101 return true;
102 case V3D_QPU_INSTR_TYPE_ALU:
103 switch (inst->qpu.alu.add.op) {
104 case V3D_QPU_A_SETREVF:
105 case V3D_QPU_A_SETMSF:
106 case V3D_QPU_A_VPMSETUP:
107 case V3D_QPU_A_STVPMV:
108 case V3D_QPU_A_STVPMD:
109 case V3D_QPU_A_STVPMP:
110 case V3D_QPU_A_VPMWT:
111 case V3D_QPU_A_TMUWT:
112 return true;
113 default:
114 break;
115 }
116
117 switch (inst->qpu.alu.mul.op) {
118 case V3D_QPU_M_MULTOP:
119 return true;
120 default:
121 break;
122 }
123 }
124
125 if (inst->qpu.sig.ldtmu ||
126 inst->qpu.sig.ldvary ||
127 inst->qpu.sig.wrtmuc ||
128 inst->qpu.sig.thrsw) {
129 return true;
130 }
131
132 return false;
133 }
134
135 bool
136 vir_is_raw_mov(struct qinst *inst)
137 {
138 if (inst->qpu.type != V3D_QPU_INSTR_TYPE_ALU ||
139 (inst->qpu.alu.mul.op != V3D_QPU_M_FMOV &&
140 inst->qpu.alu.mul.op != V3D_QPU_M_MOV)) {
141 return false;
142 }
143
144 if (inst->qpu.alu.add.output_pack != V3D_QPU_PACK_NONE ||
145 inst->qpu.alu.mul.output_pack != V3D_QPU_PACK_NONE) {
146 return false;
147 }
148
149 if (inst->qpu.flags.ac != V3D_QPU_COND_NONE ||
150 inst->qpu.flags.mc != V3D_QPU_COND_NONE)
151 return false;
152
153 return true;
154 }
155
156 bool
157 vir_is_add(struct qinst *inst)
158 {
159 return (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU &&
160 inst->qpu.alu.add.op != V3D_QPU_A_NOP);
161 }
162
163 bool
164 vir_is_mul(struct qinst *inst)
165 {
166 return (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU &&
167 inst->qpu.alu.mul.op != V3D_QPU_M_NOP);
168 }
169
170 bool
171 vir_is_tex(struct qinst *inst)
172 {
173 if (inst->dst.file == QFILE_MAGIC)
174 return v3d_qpu_magic_waddr_is_tmu(inst->dst.index);
175
176 if (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU &&
177 inst->qpu.alu.add.op == V3D_QPU_A_TMUWT) {
178 return true;
179 }
180
181 return false;
182 }
183
184 bool
185 vir_writes_r3(const struct v3d_device_info *devinfo, struct qinst *inst)
186 {
187 for (int i = 0; i < vir_get_nsrc(inst); i++) {
188 switch (inst->src[i].file) {
189 case QFILE_VPM:
190 return true;
191 default:
192 break;
193 }
194 }
195
196 if (devinfo->ver < 41 && (inst->qpu.sig.ldvary ||
197 inst->qpu.sig.ldtlb ||
198 inst->qpu.sig.ldtlbu ||
199 inst->qpu.sig.ldvpm)) {
200 return true;
201 }
202
203 return false;
204 }
205
206 bool
207 vir_writes_r4(const struct v3d_device_info *devinfo, struct qinst *inst)
208 {
209 switch (inst->dst.file) {
210 case QFILE_MAGIC:
211 switch (inst->dst.index) {
212 case V3D_QPU_WADDR_RECIP:
213 case V3D_QPU_WADDR_RSQRT:
214 case V3D_QPU_WADDR_EXP:
215 case V3D_QPU_WADDR_LOG:
216 case V3D_QPU_WADDR_SIN:
217 return true;
218 }
219 break;
220 default:
221 break;
222 }
223
224 if (devinfo->ver < 41 && inst->qpu.sig.ldtmu)
225 return true;
226
227 return false;
228 }
229
230 void
231 vir_set_unpack(struct qinst *inst, int src,
232 enum v3d_qpu_input_unpack unpack)
233 {
234 assert(src == 0 || src == 1);
235
236 if (vir_is_add(inst)) {
237 if (src == 0)
238 inst->qpu.alu.add.a_unpack = unpack;
239 else
240 inst->qpu.alu.add.b_unpack = unpack;
241 } else {
242 assert(vir_is_mul(inst));
243 if (src == 0)
244 inst->qpu.alu.mul.a_unpack = unpack;
245 else
246 inst->qpu.alu.mul.b_unpack = unpack;
247 }
248 }
249
250 void
251 vir_set_cond(struct qinst *inst, enum v3d_qpu_cond cond)
252 {
253 if (vir_is_add(inst)) {
254 inst->qpu.flags.ac = cond;
255 } else {
256 assert(vir_is_mul(inst));
257 inst->qpu.flags.mc = cond;
258 }
259 }
260
261 void
262 vir_set_pf(struct qinst *inst, enum v3d_qpu_pf pf)
263 {
264 if (vir_is_add(inst)) {
265 inst->qpu.flags.apf = pf;
266 } else {
267 assert(vir_is_mul(inst));
268 inst->qpu.flags.mpf = pf;
269 }
270 }
271
272 void
273 vir_set_uf(struct qinst *inst, enum v3d_qpu_uf uf)
274 {
275 if (vir_is_add(inst)) {
276 inst->qpu.flags.auf = uf;
277 } else {
278 assert(vir_is_mul(inst));
279 inst->qpu.flags.muf = uf;
280 }
281 }
282
283 #if 0
284 uint8_t
285 vir_channels_written(struct qinst *inst)
286 {
287 if (vir_is_mul(inst)) {
288 switch (inst->dst.pack) {
289 case QPU_PACK_MUL_NOP:
290 case QPU_PACK_MUL_8888:
291 return 0xf;
292 case QPU_PACK_MUL_8A:
293 return 0x1;
294 case QPU_PACK_MUL_8B:
295 return 0x2;
296 case QPU_PACK_MUL_8C:
297 return 0x4;
298 case QPU_PACK_MUL_8D:
299 return 0x8;
300 }
301 } else {
302 switch (inst->dst.pack) {
303 case QPU_PACK_A_NOP:
304 case QPU_PACK_A_8888:
305 case QPU_PACK_A_8888_SAT:
306 case QPU_PACK_A_32_SAT:
307 return 0xf;
308 case QPU_PACK_A_8A:
309 case QPU_PACK_A_8A_SAT:
310 return 0x1;
311 case QPU_PACK_A_8B:
312 case QPU_PACK_A_8B_SAT:
313 return 0x2;
314 case QPU_PACK_A_8C:
315 case QPU_PACK_A_8C_SAT:
316 return 0x4;
317 case QPU_PACK_A_8D:
318 case QPU_PACK_A_8D_SAT:
319 return 0x8;
320 case QPU_PACK_A_16A:
321 case QPU_PACK_A_16A_SAT:
322 return 0x3;
323 case QPU_PACK_A_16B:
324 case QPU_PACK_A_16B_SAT:
325 return 0xc;
326 }
327 }
328 unreachable("Bad pack field");
329 }
330 #endif
331
332 struct qreg
333 vir_get_temp(struct v3d_compile *c)
334 {
335 struct qreg reg;
336
337 reg.file = QFILE_TEMP;
338 reg.index = c->num_temps++;
339
340 if (c->num_temps > c->defs_array_size) {
341 uint32_t old_size = c->defs_array_size;
342 c->defs_array_size = MAX2(old_size * 2, 16);
343
344 c->defs = reralloc(c, c->defs, struct qinst *,
345 c->defs_array_size);
346 memset(&c->defs[old_size], 0,
347 sizeof(c->defs[0]) * (c->defs_array_size - old_size));
348
349 c->spillable = reralloc(c, c->spillable,
350 BITSET_WORD,
351 BITSET_WORDS(c->defs_array_size));
352 for (int i = old_size; i < c->defs_array_size; i++)
353 BITSET_SET(c->spillable, i);
354 }
355
356 return reg;
357 }
358
359 struct qinst *
360 vir_add_inst(enum v3d_qpu_add_op op, struct qreg dst, struct qreg src0, struct qreg src1)
361 {
362 struct qinst *inst = calloc(1, sizeof(*inst));
363
364 inst->qpu = v3d_qpu_nop();
365 inst->qpu.alu.add.op = op;
366
367 inst->dst = dst;
368 inst->src[0] = src0;
369 inst->src[1] = src1;
370 inst->uniform = ~0;
371
372 return inst;
373 }
374
375 struct qinst *
376 vir_mul_inst(enum v3d_qpu_mul_op op, struct qreg dst, struct qreg src0, struct qreg src1)
377 {
378 struct qinst *inst = calloc(1, sizeof(*inst));
379
380 inst->qpu = v3d_qpu_nop();
381 inst->qpu.alu.mul.op = op;
382
383 inst->dst = dst;
384 inst->src[0] = src0;
385 inst->src[1] = src1;
386 inst->uniform = ~0;
387
388 return inst;
389 }
390
391 struct qinst *
392 vir_branch_inst(enum v3d_qpu_branch_cond cond, struct qreg src)
393 {
394 struct qinst *inst = calloc(1, sizeof(*inst));
395
396 inst->qpu = v3d_qpu_nop();
397 inst->qpu.type = V3D_QPU_INSTR_TYPE_BRANCH;
398 inst->qpu.branch.cond = cond;
399 inst->qpu.branch.msfign = V3D_QPU_MSFIGN_NONE;
400 inst->qpu.branch.bdi = V3D_QPU_BRANCH_DEST_REL;
401 inst->qpu.branch.ub = true;
402 inst->qpu.branch.bdu = V3D_QPU_BRANCH_DEST_REL;
403
404 inst->dst = vir_nop_reg();
405 inst->src[0] = src;
406 inst->uniform = ~0;
407
408 return inst;
409 }
410
411 static void
412 vir_emit(struct v3d_compile *c, struct qinst *inst)
413 {
414 switch (c->cursor.mode) {
415 case vir_cursor_add:
416 list_add(&inst->link, c->cursor.link);
417 break;
418 case vir_cursor_addtail:
419 list_addtail(&inst->link, c->cursor.link);
420 break;
421 }
422
423 c->cursor = vir_after_inst(inst);
424 c->live_intervals_valid = false;
425 }
426
427 /* Updates inst to write to a new temporary, emits it, and notes the def. */
428 struct qreg
429 vir_emit_def(struct v3d_compile *c, struct qinst *inst)
430 {
431 assert(inst->dst.file == QFILE_NULL);
432
433 /* If we're emitting an instruction that's a def, it had better be
434 * writing a register.
435 */
436 if (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU) {
437 assert(inst->qpu.alu.add.op == V3D_QPU_A_NOP ||
438 v3d_qpu_add_op_has_dst(inst->qpu.alu.add.op));
439 assert(inst->qpu.alu.mul.op == V3D_QPU_M_NOP ||
440 v3d_qpu_mul_op_has_dst(inst->qpu.alu.mul.op));
441 }
442
443 inst->dst = vir_get_temp(c);
444
445 if (inst->dst.file == QFILE_TEMP)
446 c->defs[inst->dst.index] = inst;
447
448 vir_emit(c, inst);
449
450 return inst->dst;
451 }
452
453 struct qinst *
454 vir_emit_nondef(struct v3d_compile *c, struct qinst *inst)
455 {
456 if (inst->dst.file == QFILE_TEMP)
457 c->defs[inst->dst.index] = NULL;
458
459 vir_emit(c, inst);
460
461 return inst;
462 }
463
464 struct qblock *
465 vir_new_block(struct v3d_compile *c)
466 {
467 struct qblock *block = rzalloc(c, struct qblock);
468
469 list_inithead(&block->instructions);
470
471 block->predecessors = _mesa_set_create(block,
472 _mesa_hash_pointer,
473 _mesa_key_pointer_equal);
474
475 block->index = c->next_block_index++;
476
477 return block;
478 }
479
480 void
481 vir_set_emit_block(struct v3d_compile *c, struct qblock *block)
482 {
483 c->cur_block = block;
484 c->cursor = vir_after_block(block);
485 list_addtail(&block->link, &c->blocks);
486 }
487
488 struct qblock *
489 vir_entry_block(struct v3d_compile *c)
490 {
491 return list_first_entry(&c->blocks, struct qblock, link);
492 }
493
494 struct qblock *
495 vir_exit_block(struct v3d_compile *c)
496 {
497 return list_last_entry(&c->blocks, struct qblock, link);
498 }
499
500 void
501 vir_link_blocks(struct qblock *predecessor, struct qblock *successor)
502 {
503 _mesa_set_add(successor->predecessors, predecessor);
504 if (predecessor->successors[0]) {
505 assert(!predecessor->successors[1]);
506 predecessor->successors[1] = successor;
507 } else {
508 predecessor->successors[0] = successor;
509 }
510 }
511
512 const struct v3d_compiler *
513 v3d_compiler_init(const struct v3d_device_info *devinfo)
514 {
515 struct v3d_compiler *compiler = rzalloc(NULL, struct v3d_compiler);
516 if (!compiler)
517 return NULL;
518
519 compiler->devinfo = devinfo;
520
521 if (!vir_init_reg_sets(compiler)) {
522 ralloc_free(compiler);
523 return NULL;
524 }
525
526 return compiler;
527 }
528
529 void
530 v3d_compiler_free(const struct v3d_compiler *compiler)
531 {
532 ralloc_free((void *)compiler);
533 }
534
535 static struct v3d_compile *
536 vir_compile_init(const struct v3d_compiler *compiler,
537 struct v3d_key *key,
538 nir_shader *s,
539 void (*debug_output)(const char *msg,
540 void *debug_output_data),
541 void *debug_output_data,
542 int program_id, int variant_id)
543 {
544 struct v3d_compile *c = rzalloc(NULL, struct v3d_compile);
545
546 c->compiler = compiler;
547 c->devinfo = compiler->devinfo;
548 c->key = key;
549 c->program_id = program_id;
550 c->variant_id = variant_id;
551 c->threads = 4;
552 c->debug_output = debug_output;
553 c->debug_output_data = debug_output_data;
554
555 s = nir_shader_clone(c, s);
556 c->s = s;
557
558 list_inithead(&c->blocks);
559 vir_set_emit_block(c, vir_new_block(c));
560
561 c->output_position_index = -1;
562 c->output_point_size_index = -1;
563 c->output_sample_mask_index = -1;
564
565 c->def_ht = _mesa_hash_table_create(c, _mesa_hash_pointer,
566 _mesa_key_pointer_equal);
567
568 return c;
569 }
570
571 static int
572 type_size_vec4(const struct glsl_type *type)
573 {
574 return glsl_count_attribute_slots(type, false);
575 }
576
577 static void
578 v3d_lower_nir(struct v3d_compile *c)
579 {
580 struct nir_lower_tex_options tex_options = {
581 .lower_txd = true,
582 .lower_tg4_broadcom_swizzle = true,
583
584 .lower_rect = false, /* XXX: Use this on V3D 3.x */
585 .lower_txp = ~0,
586 /* Apply swizzles to all samplers. */
587 .swizzle_result = ~0,
588 };
589
590 /* Lower the format swizzle and (for 32-bit returns)
591 * ARB_texture_swizzle-style swizzle.
592 */
593 for (int i = 0; i < ARRAY_SIZE(c->key->tex); i++) {
594 for (int j = 0; j < 4; j++)
595 tex_options.swizzles[i][j] = c->key->tex[i].swizzle[j];
596
597 if (c->key->tex[i].clamp_s)
598 tex_options.saturate_s |= 1 << i;
599 if (c->key->tex[i].clamp_t)
600 tex_options.saturate_t |= 1 << i;
601 if (c->key->tex[i].clamp_r)
602 tex_options.saturate_r |= 1 << i;
603 if (c->key->tex[i].return_size == 16) {
604 tex_options.lower_tex_packing[i] =
605 nir_lower_tex_packing_16;
606 }
607 }
608
609 NIR_PASS_V(c->s, nir_lower_tex, &tex_options);
610 NIR_PASS_V(c->s, nir_lower_system_values);
611 }
612
613 static void
614 v3d_set_prog_data_uniforms(struct v3d_compile *c,
615 struct v3d_prog_data *prog_data)
616 {
617 int count = c->num_uniforms;
618 struct v3d_uniform_list *ulist = &prog_data->uniforms;
619
620 ulist->count = count;
621 ulist->data = ralloc_array(prog_data, uint32_t, count);
622 memcpy(ulist->data, c->uniform_data,
623 count * sizeof(*ulist->data));
624 ulist->contents = ralloc_array(prog_data, enum quniform_contents, count);
625 memcpy(ulist->contents, c->uniform_contents,
626 count * sizeof(*ulist->contents));
627 }
628
629 /* Copy the compiler UBO range state to the compiled shader, dropping out
630 * arrays that were never referenced by an indirect load.
631 *
632 * (Note that QIR dead code elimination of an array access still leaves that
633 * array alive, though)
634 */
635 static void
636 v3d_set_prog_data_ubo(struct v3d_compile *c,
637 struct v3d_prog_data *prog_data)
638 {
639 if (!c->num_ubo_ranges)
640 return;
641
642 prog_data->num_ubo_ranges = 0;
643 prog_data->ubo_ranges = ralloc_array(prog_data, struct v3d_ubo_range,
644 c->num_ubo_ranges);
645 for (int i = 0; i < c->num_ubo_ranges; i++) {
646 if (!c->ubo_range_used[i])
647 continue;
648
649 struct v3d_ubo_range *range = &c->ubo_ranges[i];
650 prog_data->ubo_ranges[prog_data->num_ubo_ranges++] = *range;
651 prog_data->ubo_size += range->size;
652 }
653
654 if (prog_data->ubo_size) {
655 if (V3D_DEBUG & V3D_DEBUG_SHADERDB) {
656 fprintf(stderr, "SHADER-DB: %s prog %d/%d: %d UBO uniforms\n",
657 vir_get_stage_name(c),
658 c->program_id, c->variant_id,
659 prog_data->ubo_size / 4);
660 }
661 }
662 }
663
664 static void
665 v3d_vs_set_prog_data(struct v3d_compile *c,
666 struct v3d_vs_prog_data *prog_data)
667 {
668 /* The vertex data gets format converted by the VPM so that
669 * each attribute channel takes up a VPM column. Precompute
670 * the sizes for the shader record.
671 */
672 for (int i = 0; i < ARRAY_SIZE(prog_data->vattr_sizes); i++) {
673 prog_data->vattr_sizes[i] = c->vattr_sizes[i];
674 prog_data->vpm_input_size += c->vattr_sizes[i];
675 }
676
677 prog_data->uses_vid = (c->s->info.system_values_read &
678 (1ull << SYSTEM_VALUE_VERTEX_ID));
679 prog_data->uses_iid = (c->s->info.system_values_read &
680 (1ull << SYSTEM_VALUE_INSTANCE_ID));
681
682 if (prog_data->uses_vid)
683 prog_data->vpm_input_size++;
684 if (prog_data->uses_iid)
685 prog_data->vpm_input_size++;
686
687 /* Input/output segment size are in sectors (8 rows of 32 bits per
688 * channel).
689 */
690 prog_data->vpm_input_size = align(prog_data->vpm_input_size, 8) / 8;
691 prog_data->vpm_output_size = align(c->num_vpm_writes, 8) / 8;
692
693 /* Set us up for shared input/output segments. This is apparently
694 * necessary for our VCM setup to avoid varying corruption.
695 */
696 prog_data->separate_segments = false;
697 prog_data->vpm_output_size = MAX2(prog_data->vpm_output_size,
698 prog_data->vpm_input_size);
699 prog_data->vpm_input_size = 0;
700
701 /* Compute VCM cache size. We set up our program to take up less than
702 * half of the VPM, so that any set of bin and render programs won't
703 * run out of space. We need space for at least one input segment,
704 * and then allocate the rest to output segments (one for the current
705 * program, the rest to VCM). The valid range of the VCM cache size
706 * field is 1-4 16-vertex batches, but GFXH-1744 limits us to 2-4
707 * batches.
708 */
709 assert(c->devinfo->vpm_size);
710 int sector_size = 16 * sizeof(uint32_t) * 8;
711 int vpm_size_in_sectors = c->devinfo->vpm_size / sector_size;
712 int half_vpm = vpm_size_in_sectors / 2;
713 int vpm_output_sectors = half_vpm - prog_data->vpm_input_size;
714 int vpm_output_batches = vpm_output_sectors / prog_data->vpm_output_size;
715 assert(vpm_output_batches >= 2);
716 prog_data->vcm_cache_size = CLAMP(vpm_output_batches - 1, 2, 4);
717 }
718
719 static void
720 v3d_set_fs_prog_data_inputs(struct v3d_compile *c,
721 struct v3d_fs_prog_data *prog_data)
722 {
723 prog_data->num_inputs = c->num_inputs;
724 memcpy(prog_data->input_slots, c->input_slots,
725 c->num_inputs * sizeof(*c->input_slots));
726
727 STATIC_ASSERT(ARRAY_SIZE(prog_data->flat_shade_flags) >
728 (V3D_MAX_FS_INPUTS - 1) / 24);
729 for (int i = 0; i < V3D_MAX_FS_INPUTS; i++) {
730 if (BITSET_TEST(c->flat_shade_flags, i))
731 prog_data->flat_shade_flags[i / 24] |= 1 << (i % 24);
732
733 if (BITSET_TEST(c->noperspective_flags, i))
734 prog_data->noperspective_flags[i / 24] |= 1 << (i % 24);
735
736 if (BITSET_TEST(c->centroid_flags, i))
737 prog_data->centroid_flags[i / 24] |= 1 << (i % 24);
738 }
739 }
740
741 static void
742 v3d_fs_set_prog_data(struct v3d_compile *c,
743 struct v3d_fs_prog_data *prog_data)
744 {
745 v3d_set_fs_prog_data_inputs(c, prog_data);
746 prog_data->writes_z = c->writes_z;
747 prog_data->disable_ez = !c->s->info.fs.early_fragment_tests;
748 prog_data->uses_center_w = c->uses_center_w;
749 }
750
751 static void
752 v3d_set_prog_data(struct v3d_compile *c,
753 struct v3d_prog_data *prog_data)
754 {
755 prog_data->threads = c->threads;
756 prog_data->single_seg = !c->last_thrsw;
757 prog_data->spill_size = c->spill_size;
758
759 v3d_set_prog_data_uniforms(c, prog_data);
760 v3d_set_prog_data_ubo(c, prog_data);
761
762 if (c->s->info.stage == MESA_SHADER_VERTEX) {
763 v3d_vs_set_prog_data(c, (struct v3d_vs_prog_data *)prog_data);
764 } else {
765 assert(c->s->info.stage == MESA_SHADER_FRAGMENT);
766 v3d_fs_set_prog_data(c, (struct v3d_fs_prog_data *)prog_data);
767 }
768 }
769
770 static uint64_t *
771 v3d_return_qpu_insts(struct v3d_compile *c, uint32_t *final_assembly_size)
772 {
773 *final_assembly_size = c->qpu_inst_count * sizeof(uint64_t);
774
775 uint64_t *qpu_insts = malloc(*final_assembly_size);
776 if (!qpu_insts)
777 return NULL;
778
779 memcpy(qpu_insts, c->qpu_insts, *final_assembly_size);
780
781 vir_compile_destroy(c);
782
783 return qpu_insts;
784 }
785
786 static void
787 v3d_nir_lower_vs_early(struct v3d_compile *c)
788 {
789 /* Split our I/O vars and dead code eliminate the unused
790 * components.
791 */
792 NIR_PASS_V(c->s, nir_lower_io_to_scalar_early,
793 nir_var_shader_in | nir_var_shader_out);
794 uint64_t used_outputs[4] = {0};
795 for (int i = 0; i < c->vs_key->num_fs_inputs; i++) {
796 int slot = v3d_slot_get_slot(c->vs_key->fs_inputs[i]);
797 int comp = v3d_slot_get_component(c->vs_key->fs_inputs[i]);
798 used_outputs[comp] |= 1ull << slot;
799 }
800 NIR_PASS_V(c->s, nir_remove_unused_io_vars,
801 &c->s->outputs, used_outputs, NULL); /* demotes to globals */
802 NIR_PASS_V(c->s, nir_lower_global_vars_to_local);
803 v3d_optimize_nir(c->s);
804 NIR_PASS_V(c->s, nir_remove_dead_variables, nir_var_shader_in);
805 NIR_PASS_V(c->s, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
806 type_size_vec4,
807 (nir_lower_io_options)0);
808 }
809
810 static void
811 v3d_fixup_fs_output_types(struct v3d_compile *c)
812 {
813 nir_foreach_variable(var, &c->s->outputs) {
814 uint32_t mask = 0;
815
816 switch (var->data.location) {
817 case FRAG_RESULT_COLOR:
818 mask = ~0;
819 break;
820 case FRAG_RESULT_DATA0:
821 case FRAG_RESULT_DATA1:
822 case FRAG_RESULT_DATA2:
823 case FRAG_RESULT_DATA3:
824 mask = 1 << (var->data.location - FRAG_RESULT_DATA0);
825 break;
826 }
827
828 if (c->fs_key->int_color_rb & mask) {
829 var->type =
830 glsl_vector_type(GLSL_TYPE_INT,
831 glsl_get_components(var->type));
832 } else if (c->fs_key->uint_color_rb & mask) {
833 var->type =
834 glsl_vector_type(GLSL_TYPE_UINT,
835 glsl_get_components(var->type));
836 }
837 }
838 }
839
840 static void
841 v3d_nir_lower_fs_early(struct v3d_compile *c)
842 {
843 if (c->fs_key->int_color_rb || c->fs_key->uint_color_rb)
844 v3d_fixup_fs_output_types(c);
845
846 /* If the shader has no non-TLB side effects, we can promote it to
847 * enabling early_fragment_tests even if the user didn't.
848 */
849 if (!(c->s->info.num_images ||
850 c->s->info.num_ssbos ||
851 c->s->info.num_abos)) {
852 c->s->info.fs.early_fragment_tests = true;
853 }
854 }
855
856 static void
857 v3d_nir_lower_vs_late(struct v3d_compile *c)
858 {
859 if (c->vs_key->clamp_color)
860 NIR_PASS_V(c->s, nir_lower_clamp_color_outputs);
861
862 if (c->key->ucp_enables) {
863 NIR_PASS_V(c->s, nir_lower_clip_vs, c->key->ucp_enables,
864 false);
865 NIR_PASS_V(c->s, nir_lower_io_to_scalar,
866 nir_var_shader_out);
867 }
868
869 /* Note: VS output scalarizing must happen after nir_lower_clip_vs. */
870 NIR_PASS_V(c->s, nir_lower_io_to_scalar, nir_var_shader_out);
871 }
872
873 static void
874 v3d_nir_lower_fs_late(struct v3d_compile *c)
875 {
876 if (c->fs_key->light_twoside)
877 NIR_PASS_V(c->s, nir_lower_two_sided_color);
878
879 if (c->fs_key->clamp_color)
880 NIR_PASS_V(c->s, nir_lower_clamp_color_outputs);
881
882 if (c->fs_key->alpha_test) {
883 NIR_PASS_V(c->s, nir_lower_alpha_test,
884 c->fs_key->alpha_test_func,
885 false);
886 }
887
888 if (c->key->ucp_enables)
889 NIR_PASS_V(c->s, nir_lower_clip_fs, c->key->ucp_enables);
890
891 /* Note: FS input scalarizing must happen after
892 * nir_lower_two_sided_color, which only handles a vec4 at a time.
893 */
894 NIR_PASS_V(c->s, nir_lower_io_to_scalar, nir_var_shader_in);
895 }
896
897 uint64_t *v3d_compile(const struct v3d_compiler *compiler,
898 struct v3d_key *key,
899 struct v3d_prog_data **out_prog_data,
900 nir_shader *s,
901 void (*debug_output)(const char *msg,
902 void *debug_output_data),
903 void *debug_output_data,
904 int program_id, int variant_id,
905 uint32_t *final_assembly_size)
906 {
907 struct v3d_prog_data *prog_data;
908 struct v3d_compile *c = vir_compile_init(compiler, key, s,
909 debug_output, debug_output_data,
910 program_id, variant_id);
911
912 switch (c->s->info.stage) {
913 case MESA_SHADER_VERTEX:
914 c->vs_key = (struct v3d_vs_key *)key;
915 prog_data = rzalloc_size(NULL, sizeof(struct v3d_vs_prog_data));
916 break;
917 case MESA_SHADER_FRAGMENT:
918 c->fs_key = (struct v3d_fs_key *)key;
919 prog_data = rzalloc_size(NULL, sizeof(struct v3d_fs_prog_data));
920 break;
921 default:
922 unreachable("unsupported shader stage");
923 }
924
925 if (c->s->info.stage == MESA_SHADER_VERTEX) {
926 v3d_nir_lower_vs_early(c);
927 } else {
928 assert(c->s->info.stage == MESA_SHADER_FRAGMENT);
929 v3d_nir_lower_fs_early(c);
930 }
931
932 v3d_lower_nir(c);
933
934 if (c->s->info.stage == MESA_SHADER_VERTEX) {
935 v3d_nir_lower_vs_late(c);
936 } else {
937 assert(c->s->info.stage == MESA_SHADER_FRAGMENT);
938 v3d_nir_lower_fs_late(c);
939 }
940
941 NIR_PASS_V(c->s, v3d_nir_lower_io, c);
942 NIR_PASS_V(c->s, v3d_nir_lower_txf_ms, c);
943 NIR_PASS_V(c->s, v3d_nir_lower_image_load_store);
944 NIR_PASS_V(c->s, nir_lower_idiv);
945
946 v3d_optimize_nir(c->s);
947 NIR_PASS_V(c->s, nir_lower_bool_to_int32);
948 NIR_PASS_V(c->s, nir_convert_from_ssa, true);
949
950 v3d_nir_to_vir(c);
951
952 v3d_set_prog_data(c, prog_data);
953
954 *out_prog_data = prog_data;
955
956 char *shaderdb;
957 int ret = asprintf(&shaderdb,
958 "%s shader: %d inst, %d threads, %d loops, "
959 "%d uniforms, %d:%d spills:fills",
960 vir_get_stage_name(c),
961 c->qpu_inst_count,
962 c->threads,
963 c->loops,
964 c->num_uniforms,
965 c->spills,
966 c->fills);
967 if (ret >= 0) {
968 c->debug_output(shaderdb, c->debug_output_data);
969 free(shaderdb);
970 }
971
972 return v3d_return_qpu_insts(c, final_assembly_size);
973 }
974
975 void
976 vir_remove_instruction(struct v3d_compile *c, struct qinst *qinst)
977 {
978 if (qinst->dst.file == QFILE_TEMP)
979 c->defs[qinst->dst.index] = NULL;
980
981 assert(&qinst->link != c->cursor.link);
982
983 list_del(&qinst->link);
984 free(qinst);
985
986 c->live_intervals_valid = false;
987 }
988
989 struct qreg
990 vir_follow_movs(struct v3d_compile *c, struct qreg reg)
991 {
992 /* XXX
993 int pack = reg.pack;
994
995 while (reg.file == QFILE_TEMP &&
996 c->defs[reg.index] &&
997 (c->defs[reg.index]->op == QOP_MOV ||
998 c->defs[reg.index]->op == QOP_FMOV) &&
999 !c->defs[reg.index]->dst.pack &&
1000 !c->defs[reg.index]->src[0].pack) {
1001 reg = c->defs[reg.index]->src[0];
1002 }
1003
1004 reg.pack = pack;
1005 */
1006 return reg;
1007 }
1008
1009 void
1010 vir_compile_destroy(struct v3d_compile *c)
1011 {
1012 /* Defuse the assert that we aren't removing the cursor's instruction.
1013 */
1014 c->cursor.link = NULL;
1015
1016 vir_for_each_block(block, c) {
1017 while (!list_empty(&block->instructions)) {
1018 struct qinst *qinst =
1019 list_first_entry(&block->instructions,
1020 struct qinst, link);
1021 vir_remove_instruction(c, qinst);
1022 }
1023 }
1024
1025 ralloc_free(c);
1026 }
1027
1028 struct qreg
1029 vir_uniform(struct v3d_compile *c,
1030 enum quniform_contents contents,
1031 uint32_t data)
1032 {
1033 for (int i = 0; i < c->num_uniforms; i++) {
1034 if (c->uniform_contents[i] == contents &&
1035 c->uniform_data[i] == data) {
1036 return vir_reg(QFILE_UNIF, i);
1037 }
1038 }
1039
1040 uint32_t uniform = c->num_uniforms++;
1041
1042 if (uniform >= c->uniform_array_size) {
1043 c->uniform_array_size = MAX2(MAX2(16, uniform + 1),
1044 c->uniform_array_size * 2);
1045
1046 c->uniform_data = reralloc(c, c->uniform_data,
1047 uint32_t,
1048 c->uniform_array_size);
1049 c->uniform_contents = reralloc(c, c->uniform_contents,
1050 enum quniform_contents,
1051 c->uniform_array_size);
1052 }
1053
1054 c->uniform_contents[uniform] = contents;
1055 c->uniform_data[uniform] = data;
1056
1057 return vir_reg(QFILE_UNIF, uniform);
1058 }
1059
1060 #define OPTPASS(func) \
1061 do { \
1062 bool stage_progress = func(c); \
1063 if (stage_progress) { \
1064 progress = true; \
1065 if (print_opt_debug) { \
1066 fprintf(stderr, \
1067 "VIR opt pass %2d: %s progress\n", \
1068 pass, #func); \
1069 } \
1070 /*XXX vir_validate(c);*/ \
1071 } \
1072 } while (0)
1073
1074 void
1075 vir_optimize(struct v3d_compile *c)
1076 {
1077 bool print_opt_debug = false;
1078 int pass = 1;
1079
1080 while (true) {
1081 bool progress = false;
1082
1083 OPTPASS(vir_opt_copy_propagate);
1084 OPTPASS(vir_opt_dead_code);
1085 OPTPASS(vir_opt_small_immediates);
1086
1087 if (!progress)
1088 break;
1089
1090 pass++;
1091 }
1092 }
1093
1094 const char *
1095 vir_get_stage_name(struct v3d_compile *c)
1096 {
1097 if (c->vs_key && c->vs_key->is_coord)
1098 return "MESA_SHADER_COORD";
1099 else
1100 return gl_shader_stage_name(c->s->info.stage);
1101 }