lima/ppir: fix ssa undef emit
[mesa.git] / src / gallium / drivers / lima / ir / pp / nir.c
1 /*
2 * Copyright (c) 2017 Lima Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
13 * of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 */
24
25 #include <string.h>
26
27 #include "util/hash_table.h"
28 #include "util/ralloc.h"
29 #include "util/bitscan.h"
30 #include "compiler/nir/nir.h"
31 #include "pipe/p_state.h"
32
33
34 #include "ppir.h"
35
36 static void *ppir_node_create_ssa(ppir_block *block, ppir_op op, nir_ssa_def *ssa)
37 {
38 ppir_node *node = ppir_node_create(block, op, ssa->index, 0);
39 if (!node)
40 return NULL;
41
42 ppir_dest *dest = ppir_node_get_dest(node);
43 dest->type = ppir_target_ssa;
44 dest->ssa.num_components = ssa->num_components;
45 dest->write_mask = u_bit_consecutive(0, ssa->num_components);
46
47 if (node->type == ppir_node_type_load ||
48 node->type == ppir_node_type_store)
49 dest->ssa.is_head = true;
50
51 return node;
52 }
53
54 static void *ppir_node_create_reg(ppir_block *block, ppir_op op,
55 nir_register *reg, unsigned mask)
56 {
57 ppir_node *node = ppir_node_create(block, op, reg->index, mask);
58 if (!node)
59 return NULL;
60
61 ppir_dest *dest = ppir_node_get_dest(node);
62
63 list_for_each_entry(ppir_reg, r, &block->comp->reg_list, list) {
64 if (r->index == reg->index) {
65 dest->reg = r;
66 break;
67 }
68 }
69
70 dest->type = ppir_target_register;
71 dest->write_mask = mask;
72
73 if (node->type == ppir_node_type_load ||
74 node->type == ppir_node_type_store)
75 dest->reg->is_head = true;
76
77 return node;
78 }
79
80 static void *ppir_node_create_dest(ppir_block *block, ppir_op op,
81 nir_dest *dest, unsigned mask)
82 {
83 unsigned index = -1;
84
85 if (dest) {
86 if (dest->is_ssa)
87 return ppir_node_create_ssa(block, op, &dest->ssa);
88 else
89 return ppir_node_create_reg(block, op, dest->reg.reg, mask);
90 }
91
92 return ppir_node_create(block, op, index, 0);
93 }
94
95 static void ppir_node_add_src(ppir_compiler *comp, ppir_node *node,
96 ppir_src *ps, nir_src *ns, unsigned mask)
97 {
98 ppir_node *child = NULL;
99
100 if (ns->is_ssa) {
101 child = comp->var_nodes[ns->ssa->index];
102 /* Clone consts for each successor */
103 switch (child->op) {
104 case ppir_op_const:
105 child = ppir_node_clone(node->block, child);
106 break;
107 case ppir_op_load_varying: {
108 bool is_load_coords = false;
109 if (node->op == ppir_op_load_texture) {
110 nir_tex_src *nts = (nir_tex_src *)ns;
111 if (nts->src_type == nir_tex_src_coord)
112 is_load_coords = true;
113 }
114
115 if (!is_load_coords) {
116 /* Clone varying loads for each block */
117 if (child->block != node->block) {
118 ppir_node *new = ppir_node_clone(node->block, child);
119 /* If we clone it for every block and there is no user of
120 * the original load left, delete the original one. */
121 ppir_delete_if_orphan(node->block, child);
122 child = new;
123 comp->var_nodes[ns->ssa->index] = child;
124 }
125 break;
126 }
127 /* At least one successor is load_texture, promote it to load_coords
128 * to ensure that is has exactly one successor */
129 child->op = ppir_op_load_coords;
130 }
131 /* Fallthrough */
132 case ppir_op_load_uniform:
133 case ppir_op_load_coords:
134 case ppir_op_load_coords_reg:
135 /* Clone uniform and texture coord loads for each block.
136 * Also ensure that each load has a single successor.
137 * Let's do a fetch each time and hope for a cache hit instead
138 * of increasing reg pressure.
139 */
140 if (child->block != node->block || !ppir_node_is_root(child)) {
141 child = ppir_node_clone(node->block, child);
142 comp->var_nodes[ns->ssa->index] = child;
143 }
144 break;
145 default:
146 break;
147 }
148
149 if (child->op != ppir_op_undef)
150 ppir_node_add_dep(node, child, ppir_dep_src);
151 }
152 else {
153 nir_register *reg = ns->reg.reg;
154 while (mask) {
155 int swizzle = ps->swizzle[u_bit_scan(&mask)];
156 child = comp->var_nodes[(reg->index << 2) + comp->reg_base + swizzle];
157 /* Reg is read before it was written, create a dummy node for it */
158 if (!child) {
159 child = ppir_node_create_reg(node->block, ppir_op_undef, reg,
160 u_bit_consecutive(0, 4));
161 comp->var_nodes[(reg->index << 2) + comp->reg_base + swizzle] = child;
162 }
163 /* Don't add dummies or recursive deps for ops like r1 = r1 + ssa1 */
164 if (child && node != child && child->op != ppir_op_undef)
165 ppir_node_add_dep(node, child, ppir_dep_src);
166 }
167 }
168
169 ppir_node_target_assign(ps, child);
170 }
171
172 static int nir_to_ppir_opcodes[nir_num_opcodes] = {
173 /* not supported */
174 [0 ... nir_last_opcode] = -1,
175
176 [nir_op_mov] = ppir_op_mov,
177 [nir_op_fmul] = ppir_op_mul,
178 [nir_op_fabs] = ppir_op_abs,
179 [nir_op_fneg] = ppir_op_neg,
180 [nir_op_fadd] = ppir_op_add,
181 [nir_op_fsum3] = ppir_op_sum3,
182 [nir_op_fsum4] = ppir_op_sum4,
183 [nir_op_frsq] = ppir_op_rsqrt,
184 [nir_op_flog2] = ppir_op_log2,
185 [nir_op_fexp2] = ppir_op_exp2,
186 [nir_op_fsqrt] = ppir_op_sqrt,
187 [nir_op_fsin] = ppir_op_sin,
188 [nir_op_fcos] = ppir_op_cos,
189 [nir_op_fmax] = ppir_op_max,
190 [nir_op_fmin] = ppir_op_min,
191 [nir_op_frcp] = ppir_op_rcp,
192 [nir_op_ffloor] = ppir_op_floor,
193 [nir_op_fceil] = ppir_op_ceil,
194 [nir_op_ffract] = ppir_op_fract,
195 [nir_op_sge] = ppir_op_ge,
196 [nir_op_slt] = ppir_op_lt,
197 [nir_op_seq] = ppir_op_eq,
198 [nir_op_sne] = ppir_op_ne,
199 [nir_op_fcsel] = ppir_op_select,
200 [nir_op_inot] = ppir_op_not,
201 [nir_op_ftrunc] = ppir_op_trunc,
202 [nir_op_fsat] = ppir_op_sat,
203 [nir_op_fddx] = ppir_op_ddx,
204 [nir_op_fddy] = ppir_op_ddy,
205 };
206
207 static ppir_node *ppir_emit_alu(ppir_block *block, nir_instr *ni)
208 {
209 nir_alu_instr *instr = nir_instr_as_alu(ni);
210 int op = nir_to_ppir_opcodes[instr->op];
211
212 if (op < 0) {
213 ppir_error("unsupported nir_op: %s\n", nir_op_infos[instr->op].name);
214 return NULL;
215 }
216
217 ppir_alu_node *node = ppir_node_create_dest(block, op, &instr->dest.dest,
218 instr->dest.write_mask);
219 if (!node)
220 return NULL;
221
222 ppir_dest *pd = &node->dest;
223 nir_alu_dest *nd = &instr->dest;
224 if (nd->saturate)
225 pd->modifier = ppir_outmod_clamp_fraction;
226
227 unsigned src_mask;
228 switch (op) {
229 case ppir_op_sum3:
230 src_mask = 0b0111;
231 break;
232 case ppir_op_sum4:
233 src_mask = 0b1111;
234 break;
235 default:
236 src_mask = pd->write_mask;
237 break;
238 }
239
240 unsigned num_child = nir_op_infos[instr->op].num_inputs;
241 node->num_src = num_child;
242
243 for (int i = 0; i < num_child; i++) {
244 nir_alu_src *ns = instr->src + i;
245 ppir_src *ps = node->src + i;
246 memcpy(ps->swizzle, ns->swizzle, sizeof(ps->swizzle));
247 ppir_node_add_src(block->comp, &node->node, ps, &ns->src, src_mask);
248
249 ps->absolute = ns->abs;
250 ps->negate = ns->negate;
251 }
252
253 return &node->node;
254 }
255
256 static ppir_block *ppir_block_create(ppir_compiler *comp);
257
258 static bool ppir_emit_discard_block(ppir_compiler *comp)
259 {
260 ppir_block *block = ppir_block_create(comp);
261 ppir_discard_node *discard;
262 if (!block)
263 return false;
264
265 comp->discard_block = block;
266 block->comp = comp;
267
268 discard = ppir_node_create(block, ppir_op_discard, -1, 0);
269 if (discard)
270 list_addtail(&discard->node.list, &block->node_list);
271 else
272 return false;
273
274 return true;
275 }
276
277 static ppir_node *ppir_emit_discard_if(ppir_block *block, nir_instr *ni)
278 {
279 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(ni);
280 ppir_node *node;
281 ppir_compiler *comp = block->comp;
282 ppir_branch_node *branch;
283
284 if (!comp->discard_block && !ppir_emit_discard_block(comp))
285 return NULL;
286
287 node = ppir_node_create(block, ppir_op_branch, -1, 0);
288 if (!node)
289 return NULL;
290 branch = ppir_node_to_branch(node);
291
292 /* second src and condition will be updated during lowering */
293 ppir_node_add_src(block->comp, node, &branch->src[0],
294 &instr->src[0], u_bit_consecutive(0, instr->num_components));
295 branch->num_src = 1;
296 branch->target = comp->discard_block;
297
298 return node;
299 }
300
301 static ppir_node *ppir_emit_discard(ppir_block *block, nir_instr *ni)
302 {
303 ppir_node *node = ppir_node_create(block, ppir_op_discard, -1, 0);
304
305 return node;
306 }
307
308 static ppir_node *ppir_emit_intrinsic(ppir_block *block, nir_instr *ni)
309 {
310 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(ni);
311 unsigned mask = 0;
312 ppir_load_node *lnode;
313 ppir_alu_node *alu_node;
314
315 switch (instr->intrinsic) {
316 case nir_intrinsic_load_input:
317 if (!instr->dest.is_ssa)
318 mask = u_bit_consecutive(0, instr->num_components);
319
320 lnode = ppir_node_create_dest(block, ppir_op_load_varying, &instr->dest, mask);
321 if (!lnode)
322 return NULL;
323
324 lnode->num_components = instr->num_components;
325 lnode->index = nir_intrinsic_base(instr) * 4 + nir_intrinsic_component(instr);
326 if (nir_src_is_const(instr->src[0]))
327 lnode->index += (uint32_t)(nir_src_as_float(instr->src[0]) * 4);
328 else {
329 lnode->num_src = 1;
330 ppir_node_add_src(block->comp, &lnode->node, &lnode->src, instr->src, 1);
331 }
332 return &lnode->node;
333
334 case nir_intrinsic_load_frag_coord:
335 case nir_intrinsic_load_point_coord:
336 case nir_intrinsic_load_front_face:
337 if (!instr->dest.is_ssa)
338 mask = u_bit_consecutive(0, instr->num_components);
339
340 ppir_op op;
341 switch (instr->intrinsic) {
342 case nir_intrinsic_load_frag_coord:
343 op = ppir_op_load_fragcoord;
344 break;
345 case nir_intrinsic_load_point_coord:
346 op = ppir_op_load_pointcoord;
347 break;
348 case nir_intrinsic_load_front_face:
349 op = ppir_op_load_frontface;
350 break;
351 default:
352 assert(0);
353 break;
354 }
355
356 lnode = ppir_node_create_dest(block, op, &instr->dest, mask);
357 if (!lnode)
358 return NULL;
359
360 lnode->num_components = instr->num_components;
361 return &lnode->node;
362
363 case nir_intrinsic_load_uniform:
364 if (!instr->dest.is_ssa)
365 mask = u_bit_consecutive(0, instr->num_components);
366
367 lnode = ppir_node_create_dest(block, ppir_op_load_uniform, &instr->dest, mask);
368 if (!lnode)
369 return NULL;
370
371 lnode->num_components = instr->num_components;
372 lnode->index = nir_intrinsic_base(instr);
373 if (nir_src_is_const(instr->src[0]))
374 lnode->index += (uint32_t)nir_src_as_float(instr->src[0]);
375 else {
376 lnode->num_src = 1;
377 ppir_node_add_src(block->comp, &lnode->node, &lnode->src, instr->src, 1);
378 }
379
380 return &lnode->node;
381
382 case nir_intrinsic_store_output: {
383 alu_node = ppir_node_create_dest(block, ppir_op_store_color, NULL, 0);
384 if (!alu_node)
385 return NULL;
386
387 ppir_dest *dest = ppir_node_get_dest(&alu_node->node);
388 dest->type = ppir_target_ssa;
389 dest->ssa.num_components = instr->num_components;
390 dest->ssa.index = 0;
391 dest->write_mask = u_bit_consecutive(0, instr->num_components);
392
393 alu_node->num_src = 1;
394
395 for (int i = 0; i < instr->num_components; i++)
396 alu_node->src[0].swizzle[i] = i;
397
398 ppir_node_add_src(block->comp, &alu_node->node, alu_node->src, instr->src,
399 u_bit_consecutive(0, instr->num_components));
400
401 return &alu_node->node;
402 }
403
404 case nir_intrinsic_discard:
405 return ppir_emit_discard(block, ni);
406
407 case nir_intrinsic_discard_if:
408 return ppir_emit_discard_if(block, ni);
409
410 default:
411 ppir_error("unsupported nir_intrinsic_instr %s\n",
412 nir_intrinsic_infos[instr->intrinsic].name);
413 return NULL;
414 }
415 }
416
417 static ppir_node *ppir_emit_load_const(ppir_block *block, nir_instr *ni)
418 {
419 nir_load_const_instr *instr = nir_instr_as_load_const(ni);
420 ppir_const_node *node = ppir_node_create_ssa(block, ppir_op_const, &instr->def);
421 if (!node)
422 return NULL;
423
424 assert(instr->def.bit_size == 32);
425
426 for (int i = 0; i < instr->def.num_components; i++)
427 node->constant.value[i].i = instr->value[i].i32;
428 node->constant.num = instr->def.num_components;
429
430 return &node->node;
431 }
432
433 static ppir_node *ppir_emit_ssa_undef(ppir_block *block, nir_instr *ni)
434 {
435 nir_ssa_undef_instr *undef = nir_instr_as_ssa_undef(ni);
436 ppir_node *node = ppir_node_create_ssa(block, ppir_op_undef, &undef->def);
437 if (!node)
438 return NULL;
439 ppir_alu_node *alu = ppir_node_to_alu(node);
440
441 ppir_dest *dest = &alu->dest;
442 dest->ssa.undef = true;
443
444 return node;
445 }
446
447 static ppir_node *ppir_emit_tex(ppir_block *block, nir_instr *ni)
448 {
449 nir_tex_instr *instr = nir_instr_as_tex(ni);
450 ppir_load_texture_node *node;
451
452 switch (instr->op) {
453 case nir_texop_tex:
454 case nir_texop_txb:
455 case nir_texop_txl:
456 break;
457 default:
458 ppir_error("unsupported texop %d\n", instr->op);
459 return NULL;
460 }
461
462 unsigned mask = 0;
463 if (!instr->dest.is_ssa)
464 mask = u_bit_consecutive(0, nir_tex_instr_dest_size(instr));
465
466 node = ppir_node_create_dest(block, ppir_op_load_texture, &instr->dest, mask);
467 if (!node)
468 return NULL;
469
470 node->sampler = instr->texture_index;
471
472 switch (instr->sampler_dim) {
473 case GLSL_SAMPLER_DIM_2D:
474 case GLSL_SAMPLER_DIM_CUBE:
475 case GLSL_SAMPLER_DIM_RECT:
476 case GLSL_SAMPLER_DIM_EXTERNAL:
477 break;
478 default:
479 ppir_error("unsupported sampler dim: %d\n", instr->sampler_dim);
480 return NULL;
481 }
482
483 node->sampler_dim = instr->sampler_dim;
484
485 for (int i = 0; i < instr->coord_components; i++)
486 node->src[0].swizzle[i] = i;
487
488 for (int i = 0; i < instr->num_srcs; i++) {
489 switch (instr->src[i].src_type) {
490 case nir_tex_src_coord:
491 ppir_node_add_src(block->comp, &node->node, &node->src[0], &instr->src[i].src,
492 u_bit_consecutive(0, instr->coord_components));
493 node->num_src++;
494 break;
495 case nir_tex_src_bias:
496 case nir_tex_src_lod:
497 node->lod_bias_en = true;
498 node->explicit_lod = (instr->src[i].src_type == nir_tex_src_lod);
499 ppir_node_add_src(block->comp, &node->node, &node->src[1], &instr->src[i].src, 1);
500 node->num_src++;
501 break;
502 default:
503 ppir_error("unsupported texture source type\n");
504 return NULL;
505 }
506 }
507
508 return &node->node;
509 }
510
511 static ppir_block *ppir_get_block(ppir_compiler *comp, nir_block *nblock)
512 {
513 ppir_block *block = _mesa_hash_table_u64_search(comp->blocks, (uint64_t)nblock);
514
515 return block;
516 }
517
518 static ppir_node *ppir_emit_jump(ppir_block *block, nir_instr *ni)
519 {
520 ppir_node *node;
521 ppir_compiler *comp = block->comp;
522 ppir_branch_node *branch;
523 ppir_block *jump_block;
524 nir_jump_instr *jump = nir_instr_as_jump(ni);
525
526 switch (jump->type) {
527 case nir_jump_break: {
528 assert(comp->current_block->successors[0]);
529 assert(!comp->current_block->successors[1]);
530 jump_block = comp->current_block->successors[0];
531 }
532 break;
533 case nir_jump_continue:
534 jump_block = comp->loop_cont_block;
535 break;
536 default:
537 ppir_error("nir_jump_instr not support\n");
538 return NULL;
539 }
540
541 assert(jump_block != NULL);
542
543 node = ppir_node_create(block, ppir_op_branch, -1, 0);
544 if (!node)
545 return NULL;
546 branch = ppir_node_to_branch(node);
547
548 /* Unconditional */
549 branch->num_src = 0;
550 branch->target = jump_block;
551
552 return node;
553 }
554
555 static ppir_node *(*ppir_emit_instr[nir_instr_type_phi])(ppir_block *, nir_instr *) = {
556 [nir_instr_type_alu] = ppir_emit_alu,
557 [nir_instr_type_intrinsic] = ppir_emit_intrinsic,
558 [nir_instr_type_load_const] = ppir_emit_load_const,
559 [nir_instr_type_ssa_undef] = ppir_emit_ssa_undef,
560 [nir_instr_type_tex] = ppir_emit_tex,
561 [nir_instr_type_jump] = ppir_emit_jump,
562 };
563
564 static ppir_block *ppir_block_create(ppir_compiler *comp)
565 {
566 ppir_block *block = rzalloc(comp, ppir_block);
567 if (!block)
568 return NULL;
569
570 list_inithead(&block->node_list);
571 list_inithead(&block->instr_list);
572
573 block->comp = comp;
574
575 return block;
576 }
577
578 static bool ppir_emit_block(ppir_compiler *comp, nir_block *nblock)
579 {
580 ppir_block *block = ppir_get_block(comp, nblock);
581
582 comp->current_block = block;
583
584 list_addtail(&block->list, &comp->block_list);
585
586 nir_foreach_instr(instr, nblock) {
587 assert(instr->type < nir_instr_type_phi);
588 ppir_node *node = ppir_emit_instr[instr->type](block, instr);
589 if (!node)
590 return false;
591
592 list_addtail(&node->list, &block->node_list);
593 }
594
595 return true;
596 }
597
598 static bool ppir_emit_cf_list(ppir_compiler *comp, struct exec_list *list);
599
600 static bool ppir_emit_if(ppir_compiler *comp, nir_if *if_stmt)
601 {
602 ppir_node *node;
603 ppir_branch_node *else_branch, *after_branch;
604 nir_block *nir_else_block = nir_if_first_else_block(if_stmt);
605 bool empty_else_block =
606 (nir_else_block == nir_if_last_else_block(if_stmt) &&
607 exec_list_is_empty(&nir_else_block->instr_list));
608 ppir_block *block = comp->current_block;
609
610 node = ppir_node_create(block, ppir_op_branch, -1, 0);
611 if (!node)
612 return false;
613 else_branch = ppir_node_to_branch(node);
614 ppir_node_add_src(block->comp, node, &else_branch->src[0],
615 &if_stmt->condition, 1);
616 else_branch->num_src = 1;
617 /* Negate condition to minimize branching. We're generating following:
618 * current_block: { ...; if (!statement) branch else_block; }
619 * then_block: { ...; branch after_block; }
620 * else_block: { ... }
621 * after_block: { ... }
622 *
623 * or if else list is empty:
624 * block: { if (!statement) branch else_block; }
625 * then_block: { ... }
626 * else_block: after_block: { ... }
627 */
628 else_branch->negate = true;
629 list_addtail(&else_branch->node.list, &block->node_list);
630
631 ppir_emit_cf_list(comp, &if_stmt->then_list);
632 if (empty_else_block) {
633 nir_block *nblock = nir_if_last_else_block(if_stmt);
634 assert(nblock->successors[0]);
635 assert(!nblock->successors[1]);
636 else_branch->target = ppir_get_block(comp, nblock->successors[0]);
637 /* Add empty else block to the list */
638 list_addtail(&block->successors[1]->list, &comp->block_list);
639 return true;
640 }
641
642 else_branch->target = ppir_get_block(comp, nir_if_first_else_block(if_stmt));
643
644 nir_block *last_then_block = nir_if_last_then_block(if_stmt);
645 assert(last_then_block->successors[0]);
646 assert(!last_then_block->successors[1]);
647 block = ppir_get_block(comp, last_then_block);
648 node = ppir_node_create(block, ppir_op_branch, -1, 0);
649 if (!node)
650 return false;
651 after_branch = ppir_node_to_branch(node);
652 /* Unconditional */
653 after_branch->num_src = 0;
654 after_branch->target = ppir_get_block(comp, last_then_block->successors[0]);
655 /* Target should be after_block, will fixup later */
656 list_addtail(&after_branch->node.list, &block->node_list);
657
658 ppir_emit_cf_list(comp, &if_stmt->else_list);
659
660 return true;
661 }
662
663 static bool ppir_emit_loop(ppir_compiler *comp, nir_loop *nloop)
664 {
665 ppir_block *save_loop_cont_block = comp->loop_cont_block;
666 ppir_block *block;
667 ppir_branch_node *loop_branch;
668 nir_block *loop_last_block;
669 ppir_node *node;
670
671 comp->loop_cont_block = ppir_get_block(comp, nir_loop_first_block(nloop));
672
673 ppir_emit_cf_list(comp, &nloop->body);
674
675 loop_last_block = nir_loop_last_block(nloop);
676 block = ppir_get_block(comp, loop_last_block);
677 node = ppir_node_create(block, ppir_op_branch, -1, 0);
678 if (!node)
679 return false;
680 loop_branch = ppir_node_to_branch(node);
681 /* Unconditional */
682 loop_branch->num_src = 0;
683 loop_branch->target = comp->loop_cont_block;
684 list_addtail(&loop_branch->node.list, &block->node_list);
685
686 comp->loop_cont_block = save_loop_cont_block;
687
688 comp->num_loops++;
689
690 return true;
691 }
692
693 static bool ppir_emit_function(ppir_compiler *comp, nir_function_impl *nfunc)
694 {
695 ppir_error("function nir_cf_node not support\n");
696 return false;
697 }
698
699 static bool ppir_emit_cf_list(ppir_compiler *comp, struct exec_list *list)
700 {
701 foreach_list_typed(nir_cf_node, node, node, list) {
702 bool ret;
703
704 switch (node->type) {
705 case nir_cf_node_block:
706 ret = ppir_emit_block(comp, nir_cf_node_as_block(node));
707 break;
708 case nir_cf_node_if:
709 ret = ppir_emit_if(comp, nir_cf_node_as_if(node));
710 break;
711 case nir_cf_node_loop:
712 ret = ppir_emit_loop(comp, nir_cf_node_as_loop(node));
713 break;
714 case nir_cf_node_function:
715 ret = ppir_emit_function(comp, nir_cf_node_as_function(node));
716 break;
717 default:
718 ppir_error("unknown NIR node type %d\n", node->type);
719 return false;
720 }
721
722 if (!ret)
723 return false;
724 }
725
726 return true;
727 }
728
729 static ppir_compiler *ppir_compiler_create(void *prog, unsigned num_reg, unsigned num_ssa)
730 {
731 ppir_compiler *comp = rzalloc_size(
732 prog, sizeof(*comp) + ((num_reg << 2) + num_ssa) * sizeof(ppir_node *));
733 if (!comp)
734 return NULL;
735
736 list_inithead(&comp->block_list);
737 list_inithead(&comp->reg_list);
738 comp->blocks = _mesa_hash_table_u64_create(prog);
739
740 comp->var_nodes = (ppir_node **)(comp + 1);
741 comp->reg_base = num_ssa;
742 comp->prog = prog;
743 return comp;
744 }
745
746 static void ppir_add_ordering_deps(ppir_compiler *comp)
747 {
748 /* Some intrinsics do not have explicit dependencies and thus depend
749 * on instructions order. Consider discard_if and store_ouput as
750 * example. If we don't add fake dependency of discard_if to store_output
751 * scheduler may put store_output first and since store_output terminates
752 * shader on Utgard PP, rest of it will never be executed.
753 * Add fake dependencies for discard/branch/store to preserve
754 * instruction order.
755 *
756 * TODO: scheduler should schedule discard_if as early as possible otherwise
757 * we may end up with suboptimal code for cases like this:
758 *
759 * s3 = s1 < s2
760 * discard_if s3
761 * s4 = s1 + s2
762 * store s4
763 *
764 * In this case store depends on discard_if and s4, but since dependencies can
765 * be scheduled in any order it can result in code like this:
766 *
767 * instr1: s3 = s1 < s3
768 * instr2: s4 = s1 + s2
769 * instr3: discard_if s3
770 * instr4: store s4
771 */
772 list_for_each_entry(ppir_block, block, &comp->block_list, list) {
773 ppir_node *prev_node = NULL;
774 list_for_each_entry_rev(ppir_node, node, &block->node_list, list) {
775 if (prev_node && ppir_node_is_root(node) && node->op != ppir_op_const) {
776 ppir_node_add_dep(prev_node, node, ppir_dep_sequence);
777 }
778 if (node->op == ppir_op_discard ||
779 node->op == ppir_op_store_color ||
780 node->op == ppir_op_store_temp ||
781 node->op == ppir_op_branch) {
782 prev_node = node;
783 }
784 }
785 }
786 }
787
788 static void ppir_print_shader_db(struct nir_shader *nir, ppir_compiler *comp,
789 struct pipe_debug_callback *debug)
790 {
791 const struct shader_info *info = &nir->info;
792 char *shaderdb;
793 int ret = asprintf(&shaderdb,
794 "%s shader: %d inst, %d loops, %d:%d spills:fills\n",
795 gl_shader_stage_name(info->stage),
796 comp->cur_instr_index,
797 comp->num_loops,
798 comp->num_spills,
799 comp->num_fills);
800 assert(ret >= 0);
801
802 if (lima_debug & LIMA_DEBUG_SHADERDB)
803 fprintf(stderr, "SHADER-DB: %s\n", shaderdb);
804
805 pipe_debug_message(debug, SHADER_INFO, "%s", shaderdb);
806 free(shaderdb);
807 }
808
809 static void ppir_add_write_after_read_deps(ppir_compiler *comp)
810 {
811 list_for_each_entry(ppir_block, block, &comp->block_list, list) {
812 list_for_each_entry(ppir_reg, reg, &comp->reg_list, list) {
813 ppir_node *write = NULL;
814 list_for_each_entry_rev(ppir_node, node, &block->node_list, list) {
815 for (int i = 0; i < ppir_node_get_src_num(node); i++) {
816 ppir_src *src = ppir_node_get_src(node, i);
817 if (src && src->type == ppir_target_register &&
818 src->reg == reg &&
819 write) {
820 ppir_debug("Adding dep %d for write %d\n", node->index, write->index);
821 ppir_node_add_dep(write, node, ppir_dep_write_after_read);
822 }
823 }
824 ppir_dest *dest = ppir_node_get_dest(node);
825 if (dest && dest->type == ppir_target_register &&
826 dest->reg == reg)
827 write = node;
828 }
829 }
830 }
831 }
832
833 bool ppir_compile_nir(struct lima_fs_shader_state *prog, struct nir_shader *nir,
834 struct ra_regs *ra,
835 struct pipe_debug_callback *debug)
836 {
837 nir_function_impl *func = nir_shader_get_entrypoint(nir);
838 ppir_compiler *comp = ppir_compiler_create(prog, func->reg_alloc, func->ssa_alloc);
839 if (!comp)
840 return false;
841
842 comp->ra = ra;
843
844 /* 1st pass: create ppir blocks */
845 nir_foreach_function(function, nir) {
846 if (!function->impl)
847 continue;
848
849 nir_foreach_block(nblock, function->impl) {
850 ppir_block *block = ppir_block_create(comp);
851 if (!block)
852 return false;
853 block->index = nblock->index;
854 _mesa_hash_table_u64_insert(comp->blocks, (uint64_t)nblock, block);
855 }
856 }
857
858 /* 2nd pass: populate successors */
859 nir_foreach_function(function, nir) {
860 if (!function->impl)
861 continue;
862
863 nir_foreach_block(nblock, function->impl) {
864 ppir_block *block = ppir_get_block(comp, nblock);
865 assert(block);
866
867 for (int i = 0; i < 2; i++) {
868 if (nblock->successors[i])
869 block->successors[i] = ppir_get_block(comp, nblock->successors[i]);
870 }
871 }
872 }
873
874 /* Validate outputs, we support only gl_FragColor */
875 nir_foreach_variable(var, &nir->outputs) {
876 switch (var->data.location) {
877 case FRAG_RESULT_COLOR:
878 case FRAG_RESULT_DATA0:
879 break;
880 default:
881 ppir_error("unsupported output type\n");
882 goto err_out0;
883 break;
884 }
885 }
886
887 foreach_list_typed(nir_register, reg, node, &func->registers) {
888 ppir_reg *r = rzalloc(comp, ppir_reg);
889 if (!r)
890 return false;
891
892 r->index = reg->index;
893 r->num_components = reg->num_components;
894 r->is_head = false;
895 list_addtail(&r->list, &comp->reg_list);
896 }
897
898 if (!ppir_emit_cf_list(comp, &func->body))
899 goto err_out0;
900
901 /* If we have discard block add it to the very end */
902 if (comp->discard_block)
903 list_addtail(&comp->discard_block->list, &comp->block_list);
904
905 ppir_node_print_prog(comp);
906
907 if (!ppir_lower_prog(comp))
908 goto err_out0;
909
910 ppir_add_ordering_deps(comp);
911 ppir_add_write_after_read_deps(comp);
912
913 ppir_node_print_prog(comp);
914
915 if (!ppir_node_to_instr(comp))
916 goto err_out0;
917
918 if (!ppir_schedule_prog(comp))
919 goto err_out0;
920
921 if (!ppir_regalloc_prog(comp))
922 goto err_out0;
923
924 if (!ppir_codegen_prog(comp))
925 goto err_out0;
926
927 ppir_print_shader_db(nir, comp, debug);
928
929 _mesa_hash_table_u64_destroy(comp->blocks, NULL);
930 ralloc_free(comp);
931 return true;
932
933 err_out0:
934 _mesa_hash_table_u64_destroy(comp->blocks, NULL);
935 ralloc_free(comp);
936 return false;
937 }
938