9af1a8e65b0a5bd7c97d8c515f8c81a612b72b36
[mesa.git] / src / gallium / drivers / lima / ir / pp / nir.c
1 /*
2 * Copyright (c) 2017 Lima Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
13 * of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 */
24
25 #include <string.h>
26
27 #include "util/hash_table.h"
28 #include "util/ralloc.h"
29 #include "util/bitscan.h"
30 #include "compiler/nir/nir.h"
31 #include "pipe/p_state.h"
32
33
34 #include "ppir.h"
35
36 static void *ppir_node_create_ssa(ppir_block *block, ppir_op op, nir_ssa_def *ssa)
37 {
38 ppir_node *node = ppir_node_create(block, op, ssa->index, 0);
39 if (!node)
40 return NULL;
41
42 ppir_dest *dest = ppir_node_get_dest(node);
43 dest->type = ppir_target_ssa;
44 dest->ssa.num_components = ssa->num_components;
45 dest->ssa.live_in = INT_MAX;
46 dest->ssa.live_out = 0;
47 dest->write_mask = u_bit_consecutive(0, ssa->num_components);
48
49 if (node->type == ppir_node_type_load ||
50 node->type == ppir_node_type_store)
51 dest->ssa.is_head = true;
52
53 return node;
54 }
55
56 static void *ppir_node_create_reg(ppir_block *block, ppir_op op,
57 nir_register *reg, unsigned mask)
58 {
59 ppir_node *node = ppir_node_create(block, op, reg->index, mask);
60 if (!node)
61 return NULL;
62
63 ppir_dest *dest = ppir_node_get_dest(node);
64
65 list_for_each_entry(ppir_reg, r, &block->comp->reg_list, list) {
66 if (r->index == reg->index) {
67 dest->reg = r;
68 break;
69 }
70 }
71
72 dest->type = ppir_target_register;
73 dest->write_mask = mask;
74
75 if (node->type == ppir_node_type_load ||
76 node->type == ppir_node_type_store)
77 dest->reg->is_head = true;
78
79 return node;
80 }
81
82 static void *ppir_node_create_dest(ppir_block *block, ppir_op op,
83 nir_dest *dest, unsigned mask)
84 {
85 unsigned index = -1;
86
87 if (dest) {
88 if (dest->is_ssa)
89 return ppir_node_create_ssa(block, op, &dest->ssa);
90 else
91 return ppir_node_create_reg(block, op, dest->reg.reg, mask);
92 }
93
94 return ppir_node_create(block, op, index, 0);
95 }
96
97 static void ppir_node_add_src(ppir_compiler *comp, ppir_node *node,
98 ppir_src *ps, nir_src *ns, unsigned mask)
99 {
100 ppir_node *child = NULL;
101
102 if (ns->is_ssa) {
103 child = comp->var_nodes[ns->ssa->index];
104 /* Clone consts for each successor */
105 switch (child->op) {
106 case ppir_op_const:
107 child = ppir_node_clone(node->block, child);
108 break;
109 case ppir_op_load_texture:
110 /* Clone texture loads for each block */
111 if (child->block != node->block) {
112 child = ppir_node_clone(node->block, child);
113 comp->var_nodes[ns->ssa->index] = child;
114 }
115 break;
116 case ppir_op_load_varying:
117 if ((node->op != ppir_op_load_texture)) {
118 /* Clone varying loads for each block */
119 if (child->block != node->block) {
120 child = ppir_node_clone(node->block, child);
121 comp->var_nodes[ns->ssa->index] = child;
122 }
123 break;
124 }
125 /* At least one successor is load_texture, promote it to load_coords
126 * to ensure that is has exactly one successor */
127 child->op = ppir_op_load_coords;
128 /* Fallthrough */
129 case ppir_op_load_uniform:
130 case ppir_op_load_coords:
131 /* Clone uniform and texture coord loads for each block.
132 * Also ensure that each load has a single successor.
133 * Let's do a fetch each time and hope for a cache hit instead
134 * of increasing reg pressure.
135 */
136 if (child->block != node->block || !ppir_node_is_root(child)) {
137 child = ppir_node_clone(node->block, child);
138 comp->var_nodes[ns->ssa->index] = child;
139 }
140 break;
141 default:
142 break;
143 }
144
145 ppir_node_add_dep(node, child);
146 }
147 else {
148 nir_register *reg = ns->reg.reg;
149 while (mask) {
150 int swizzle = ps->swizzle[u_bit_scan(&mask)];
151 child = comp->var_nodes[(reg->index << 2) + comp->reg_base + swizzle];
152 /* Reg is read before it was written, create a dummy node for it */
153 if (!child) {
154 child = ppir_node_create_reg(node->block, ppir_op_dummy, reg,
155 u_bit_consecutive(0, 4));
156 comp->var_nodes[(reg->index << 2) + comp->reg_base + swizzle] = child;
157 }
158 /* Don't add dummies or recursive deps for ops like r1 = r1 + ssa1 */
159 if (child && node != child && child->op != ppir_op_dummy)
160 ppir_node_add_dep(node, child);
161 }
162 }
163
164 ppir_node_target_assign(ps, child);
165 }
166
167 static int nir_to_ppir_opcodes[nir_num_opcodes] = {
168 /* not supported */
169 [0 ... nir_last_opcode] = -1,
170
171 [nir_op_mov] = ppir_op_mov,
172 [nir_op_fmul] = ppir_op_mul,
173 [nir_op_fabs] = ppir_op_abs,
174 [nir_op_fneg] = ppir_op_neg,
175 [nir_op_fadd] = ppir_op_add,
176 [nir_op_fsum3] = ppir_op_sum3,
177 [nir_op_fsum4] = ppir_op_sum4,
178 [nir_op_frsq] = ppir_op_rsqrt,
179 [nir_op_flog2] = ppir_op_log2,
180 [nir_op_fexp2] = ppir_op_exp2,
181 [nir_op_fsqrt] = ppir_op_sqrt,
182 [nir_op_fsin] = ppir_op_sin,
183 [nir_op_fcos] = ppir_op_cos,
184 [nir_op_fmax] = ppir_op_max,
185 [nir_op_fmin] = ppir_op_min,
186 [nir_op_frcp] = ppir_op_rcp,
187 [nir_op_ffloor] = ppir_op_floor,
188 [nir_op_fceil] = ppir_op_ceil,
189 [nir_op_ffract] = ppir_op_fract,
190 [nir_op_sge] = ppir_op_ge,
191 [nir_op_fge] = ppir_op_ge,
192 [nir_op_slt] = ppir_op_lt,
193 [nir_op_flt] = ppir_op_lt,
194 [nir_op_seq] = ppir_op_eq,
195 [nir_op_feq] = ppir_op_eq,
196 [nir_op_sne] = ppir_op_ne,
197 [nir_op_fne] = ppir_op_ne,
198 [nir_op_fcsel] = ppir_op_select,
199 [nir_op_inot] = ppir_op_not,
200 [nir_op_ftrunc] = ppir_op_trunc,
201 [nir_op_fsat] = ppir_op_sat,
202 [nir_op_fddx] = ppir_op_ddx,
203 [nir_op_fddy] = ppir_op_ddy,
204 };
205
206 static ppir_node *ppir_emit_alu(ppir_block *block, nir_instr *ni)
207 {
208 nir_alu_instr *instr = nir_instr_as_alu(ni);
209 int op = nir_to_ppir_opcodes[instr->op];
210
211 if (op < 0) {
212 ppir_error("unsupported nir_op: %s\n", nir_op_infos[instr->op].name);
213 return NULL;
214 }
215
216 ppir_alu_node *node = ppir_node_create_dest(block, op, &instr->dest.dest,
217 instr->dest.write_mask);
218 if (!node)
219 return NULL;
220
221 ppir_dest *pd = &node->dest;
222 nir_alu_dest *nd = &instr->dest;
223 if (nd->saturate)
224 pd->modifier = ppir_outmod_clamp_fraction;
225
226 unsigned src_mask;
227 switch (op) {
228 case ppir_op_sum3:
229 src_mask = 0b0111;
230 break;
231 case ppir_op_sum4:
232 src_mask = 0b1111;
233 break;
234 default:
235 src_mask = pd->write_mask;
236 break;
237 }
238
239 unsigned num_child = nir_op_infos[instr->op].num_inputs;
240 node->num_src = num_child;
241
242 for (int i = 0; i < num_child; i++) {
243 nir_alu_src *ns = instr->src + i;
244 ppir_src *ps = node->src + i;
245 memcpy(ps->swizzle, ns->swizzle, sizeof(ps->swizzle));
246 ppir_node_add_src(block->comp, &node->node, ps, &ns->src, src_mask);
247
248 ps->absolute = ns->abs;
249 ps->negate = ns->negate;
250 }
251
252 return &node->node;
253 }
254
255 static ppir_block *ppir_block_create(ppir_compiler *comp);
256
257 static bool ppir_emit_discard_block(ppir_compiler *comp)
258 {
259 ppir_block *block = ppir_block_create(comp);
260 ppir_discard_node *discard;
261 if (!block)
262 return false;
263
264 comp->discard_block = block;
265 block->comp = comp;
266
267 discard = ppir_node_create(block, ppir_op_discard, -1, 0);
268 if (discard)
269 list_addtail(&discard->node.list, &block->node_list);
270 else
271 return false;
272
273 return true;
274 }
275
276 static ppir_node *ppir_emit_discard_if(ppir_block *block, nir_instr *ni)
277 {
278 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(ni);
279 ppir_node *node;
280 ppir_compiler *comp = block->comp;
281 ppir_branch_node *branch;
282
283 if (!comp->discard_block && !ppir_emit_discard_block(comp))
284 return NULL;
285
286 node = ppir_node_create(block, ppir_op_branch, -1, 0);
287 if (!node)
288 return NULL;
289 branch = ppir_node_to_branch(node);
290
291 /* second src and condition will be updated during lowering */
292 ppir_node_add_src(block->comp, node, &branch->src[0],
293 &instr->src[0], u_bit_consecutive(0, instr->num_components));
294 branch->num_src = 1;
295 branch->target = comp->discard_block;
296
297 return node;
298 }
299
300 static ppir_node *ppir_emit_discard(ppir_block *block, nir_instr *ni)
301 {
302 ppir_node *node = ppir_node_create(block, ppir_op_discard, -1, 0);
303
304 return node;
305 }
306
307 static ppir_node *ppir_emit_intrinsic(ppir_block *block, nir_instr *ni)
308 {
309 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(ni);
310 unsigned mask = 0;
311 ppir_load_node *lnode;
312 ppir_alu_node *alu_node;
313
314 switch (instr->intrinsic) {
315 case nir_intrinsic_load_input:
316 if (!instr->dest.is_ssa)
317 mask = u_bit_consecutive(0, instr->num_components);
318
319 lnode = ppir_node_create_dest(block, ppir_op_load_varying, &instr->dest, mask);
320 if (!lnode)
321 return NULL;
322
323 lnode->num_components = instr->num_components;
324 lnode->index = nir_intrinsic_base(instr) * 4 + nir_intrinsic_component(instr);
325 return &lnode->node;
326
327 case nir_intrinsic_load_frag_coord:
328 case nir_intrinsic_load_point_coord:
329 case nir_intrinsic_load_front_face:
330 if (!instr->dest.is_ssa)
331 mask = u_bit_consecutive(0, instr->num_components);
332
333 ppir_op op;
334 switch (instr->intrinsic) {
335 case nir_intrinsic_load_frag_coord:
336 op = ppir_op_load_fragcoord;
337 break;
338 case nir_intrinsic_load_point_coord:
339 op = ppir_op_load_pointcoord;
340 break;
341 case nir_intrinsic_load_front_face:
342 op = ppir_op_load_frontface;
343 break;
344 default:
345 assert(0);
346 break;
347 }
348
349 lnode = ppir_node_create_dest(block, op, &instr->dest, mask);
350 if (!lnode)
351 return NULL;
352
353 lnode->num_components = instr->num_components;
354 return &lnode->node;
355
356 case nir_intrinsic_load_uniform:
357 if (!instr->dest.is_ssa)
358 mask = u_bit_consecutive(0, instr->num_components);
359
360 lnode = ppir_node_create_dest(block, ppir_op_load_uniform, &instr->dest, mask);
361 if (!lnode)
362 return NULL;
363
364 lnode->num_components = instr->num_components;
365 lnode->index = nir_intrinsic_base(instr);
366 lnode->index += (uint32_t)nir_src_as_float(instr->src[0]);
367
368 return &lnode->node;
369
370 case nir_intrinsic_store_output: {
371 alu_node = ppir_node_create_dest(block, ppir_op_store_color, NULL, 0);
372 if (!alu_node)
373 return NULL;
374
375 ppir_dest *dest = ppir_node_get_dest(&alu_node->node);
376 dest->type = ppir_target_ssa;
377 dest->ssa.num_components = instr->num_components;
378 dest->ssa.live_in = INT_MAX;
379 dest->ssa.live_out = 0;
380 dest->ssa.index = 0;
381 dest->write_mask = u_bit_consecutive(0, instr->num_components);
382
383 alu_node->num_src = 1;
384
385 for (int i = 0; i < instr->num_components; i++)
386 alu_node->src[0].swizzle[i] = i;
387
388 ppir_node_add_src(block->comp, &alu_node->node, alu_node->src, instr->src,
389 u_bit_consecutive(0, instr->num_components));
390
391 return &alu_node->node;
392 }
393
394 case nir_intrinsic_discard:
395 return ppir_emit_discard(block, ni);
396
397 case nir_intrinsic_discard_if:
398 return ppir_emit_discard_if(block, ni);
399
400 default:
401 ppir_error("unsupported nir_intrinsic_instr %s\n",
402 nir_intrinsic_infos[instr->intrinsic].name);
403 return NULL;
404 }
405 }
406
407 static ppir_node *ppir_emit_load_const(ppir_block *block, nir_instr *ni)
408 {
409 nir_load_const_instr *instr = nir_instr_as_load_const(ni);
410 ppir_const_node *node = ppir_node_create_ssa(block, ppir_op_const, &instr->def);
411 if (!node)
412 return NULL;
413
414 assert(instr->def.bit_size == 32);
415
416 for (int i = 0; i < instr->def.num_components; i++)
417 node->constant.value[i].i = instr->value[i].i32;
418 node->constant.num = instr->def.num_components;
419
420 return &node->node;
421 }
422
423 static ppir_node *ppir_emit_ssa_undef(ppir_block *block, nir_instr *ni)
424 {
425 ppir_error("nir_ssa_undef_instr not support\n");
426 return NULL;
427 }
428
429 static ppir_node *ppir_emit_tex(ppir_block *block, nir_instr *ni)
430 {
431 nir_tex_instr *instr = nir_instr_as_tex(ni);
432 ppir_load_texture_node *node;
433
434 if (instr->op != nir_texop_tex) {
435 ppir_error("unsupported texop %d\n", instr->op);
436 return NULL;
437 }
438
439 unsigned mask = 0;
440 if (!instr->dest.is_ssa)
441 mask = u_bit_consecutive(0, nir_tex_instr_dest_size(instr));
442
443 node = ppir_node_create_dest(block, ppir_op_load_texture, &instr->dest, mask);
444 if (!node)
445 return NULL;
446
447 node->sampler = instr->texture_index;
448
449 switch (instr->sampler_dim) {
450 case GLSL_SAMPLER_DIM_2D:
451 case GLSL_SAMPLER_DIM_RECT:
452 case GLSL_SAMPLER_DIM_EXTERNAL:
453 break;
454 default:
455 ppir_error("unsupported sampler dim: %d\n", instr->sampler_dim);
456 return NULL;
457 }
458
459 node->sampler_dim = instr->sampler_dim;
460
461 for (int i = 0; i < instr->coord_components; i++)
462 node->src_coords.swizzle[i] = i;
463
464 for (int i = 0; i < instr->num_srcs; i++) {
465 switch (instr->src[i].src_type) {
466 case nir_tex_src_coord:
467 ppir_node_add_src(block->comp, &node->node, &node->src_coords, &instr->src[i].src,
468 u_bit_consecutive(0, instr->coord_components));
469 break;
470 default:
471 ppir_error("unsupported texture source type\n");
472 assert(0);
473 return NULL;
474 }
475 }
476
477 return &node->node;
478 }
479
480 static ppir_block *ppir_get_block(ppir_compiler *comp, nir_block *nblock)
481 {
482 ppir_block *block = _mesa_hash_table_u64_search(comp->blocks, (uint64_t)nblock);
483
484 return block;
485 }
486
487 static ppir_node *ppir_emit_jump(ppir_block *block, nir_instr *ni)
488 {
489 ppir_node *node;
490 ppir_compiler *comp = block->comp;
491 ppir_branch_node *branch;
492 ppir_block *jump_block;
493 nir_jump_instr *jump = nir_instr_as_jump(ni);
494
495 switch (jump->type) {
496 case nir_jump_break: {
497 assert(comp->current_block->successors[0]);
498 assert(!comp->current_block->successors[1]);
499 jump_block = comp->current_block->successors[0];
500 }
501 break;
502 case nir_jump_continue:
503 jump_block = comp->loop_cont_block;
504 break;
505 default:
506 ppir_error("nir_jump_instr not support\n");
507 return NULL;
508 }
509
510 assert(jump_block != NULL);
511
512 node = ppir_node_create(block, ppir_op_branch, -1, 0);
513 if (!node)
514 return NULL;
515 branch = ppir_node_to_branch(node);
516
517 /* Unconditional */
518 branch->num_src = 0;
519 branch->target = jump_block;
520
521 return node;
522 }
523
524 static ppir_node *(*ppir_emit_instr[nir_instr_type_phi])(ppir_block *, nir_instr *) = {
525 [nir_instr_type_alu] = ppir_emit_alu,
526 [nir_instr_type_intrinsic] = ppir_emit_intrinsic,
527 [nir_instr_type_load_const] = ppir_emit_load_const,
528 [nir_instr_type_ssa_undef] = ppir_emit_ssa_undef,
529 [nir_instr_type_tex] = ppir_emit_tex,
530 [nir_instr_type_jump] = ppir_emit_jump,
531 };
532
533 static ppir_block *ppir_block_create(ppir_compiler *comp)
534 {
535 ppir_block *block = rzalloc(comp, ppir_block);
536 if (!block)
537 return NULL;
538
539 list_inithead(&block->node_list);
540 list_inithead(&block->instr_list);
541
542 block->comp = comp;
543
544 return block;
545 }
546
547 static bool ppir_emit_block(ppir_compiler *comp, nir_block *nblock)
548 {
549 ppir_block *block = ppir_get_block(comp, nblock);
550
551 comp->current_block = block;
552
553 list_addtail(&block->list, &comp->block_list);
554
555 nir_foreach_instr(instr, nblock) {
556 assert(instr->type < nir_instr_type_phi);
557 ppir_node *node = ppir_emit_instr[instr->type](block, instr);
558 if (!node)
559 return false;
560
561 list_addtail(&node->list, &block->node_list);
562 }
563
564 return true;
565 }
566
567 static bool ppir_emit_cf_list(ppir_compiler *comp, struct exec_list *list);
568
569 static bool ppir_emit_if(ppir_compiler *comp, nir_if *if_stmt)
570 {
571 ppir_node *node;
572 ppir_branch_node *else_branch, *after_branch;
573 nir_block *nir_else_block = nir_if_first_else_block(if_stmt);
574 bool empty_else_block =
575 (nir_else_block == nir_if_last_else_block(if_stmt) &&
576 exec_list_is_empty(&nir_else_block->instr_list));
577 ppir_block *block = comp->current_block;
578
579 node = ppir_node_create(block, ppir_op_branch, -1, 0);
580 if (!node)
581 return false;
582 else_branch = ppir_node_to_branch(node);
583 ppir_node_add_src(block->comp, node, &else_branch->src[0],
584 &if_stmt->condition, 1);
585 else_branch->num_src = 1;
586 /* Negate condition to minimize branching. We're generating following:
587 * current_block: { ...; if (!statement) branch else_block; }
588 * then_block: { ...; branch after_block; }
589 * else_block: { ... }
590 * after_block: { ... }
591 *
592 * or if else list is empty:
593 * block: { if (!statement) branch else_block; }
594 * then_block: { ... }
595 * else_block: after_block: { ... }
596 */
597 else_branch->negate = true;
598 list_addtail(&else_branch->node.list, &block->node_list);
599
600 ppir_emit_cf_list(comp, &if_stmt->then_list);
601 if (empty_else_block) {
602 nir_block *nblock = nir_if_last_else_block(if_stmt);
603 assert(nblock->successors[0]);
604 assert(!nblock->successors[1]);
605 else_branch->target = ppir_get_block(comp, nblock->successors[0]);
606 /* Add empty else block to the list */
607 list_addtail(&block->successors[1]->list, &comp->block_list);
608 return true;
609 }
610
611 else_branch->target = ppir_get_block(comp, nir_if_first_else_block(if_stmt));
612
613 nir_block *last_then_block = nir_if_last_then_block(if_stmt);
614 assert(last_then_block->successors[0]);
615 assert(!last_then_block->successors[1]);
616 block = ppir_get_block(comp, last_then_block);
617 node = ppir_node_create(block, ppir_op_branch, -1, 0);
618 if (!node)
619 return false;
620 after_branch = ppir_node_to_branch(node);
621 /* Unconditional */
622 after_branch->num_src = 0;
623 after_branch->target = ppir_get_block(comp, last_then_block->successors[0]);
624 /* Target should be after_block, will fixup later */
625 list_addtail(&after_branch->node.list, &block->node_list);
626
627 ppir_emit_cf_list(comp, &if_stmt->else_list);
628
629 return true;
630 }
631
632 static bool ppir_emit_loop(ppir_compiler *comp, nir_loop *nloop)
633 {
634 ppir_block *save_loop_cont_block = comp->loop_cont_block;
635 ppir_block *block;
636 ppir_branch_node *loop_branch;
637 nir_block *loop_last_block;
638 ppir_node *node;
639
640 comp->loop_cont_block = ppir_get_block(comp, nir_loop_first_block(nloop));
641
642 ppir_emit_cf_list(comp, &nloop->body);
643
644 loop_last_block = nir_loop_last_block(nloop);
645 block = ppir_get_block(comp, loop_last_block);
646 node = ppir_node_create(block, ppir_op_branch, -1, 0);
647 if (!node)
648 return false;
649 loop_branch = ppir_node_to_branch(node);
650 /* Unconditional */
651 loop_branch->num_src = 0;
652 loop_branch->target = comp->loop_cont_block;
653 list_addtail(&loop_branch->node.list, &block->node_list);
654
655 comp->loop_cont_block = save_loop_cont_block;
656
657 comp->num_loops++;
658
659 return true;
660 }
661
662 static bool ppir_emit_function(ppir_compiler *comp, nir_function_impl *nfunc)
663 {
664 ppir_error("function nir_cf_node not support\n");
665 return false;
666 }
667
668 static bool ppir_emit_cf_list(ppir_compiler *comp, struct exec_list *list)
669 {
670 foreach_list_typed(nir_cf_node, node, node, list) {
671 bool ret;
672
673 switch (node->type) {
674 case nir_cf_node_block:
675 ret = ppir_emit_block(comp, nir_cf_node_as_block(node));
676 break;
677 case nir_cf_node_if:
678 ret = ppir_emit_if(comp, nir_cf_node_as_if(node));
679 break;
680 case nir_cf_node_loop:
681 ret = ppir_emit_loop(comp, nir_cf_node_as_loop(node));
682 break;
683 case nir_cf_node_function:
684 ret = ppir_emit_function(comp, nir_cf_node_as_function(node));
685 break;
686 default:
687 ppir_error("unknown NIR node type %d\n", node->type);
688 return false;
689 }
690
691 if (!ret)
692 return false;
693 }
694
695 return true;
696 }
697
698 static ppir_compiler *ppir_compiler_create(void *prog, unsigned num_reg, unsigned num_ssa)
699 {
700 ppir_compiler *comp = rzalloc_size(
701 prog, sizeof(*comp) + ((num_reg << 2) + num_ssa) * sizeof(ppir_node *));
702 if (!comp)
703 return NULL;
704
705 list_inithead(&comp->block_list);
706 list_inithead(&comp->reg_list);
707 comp->blocks = _mesa_hash_table_u64_create(prog);
708
709 comp->var_nodes = (ppir_node **)(comp + 1);
710 comp->reg_base = num_ssa;
711 comp->prog = prog;
712 return comp;
713 }
714
715 static void ppir_add_ordering_deps(ppir_compiler *comp)
716 {
717 /* Some intrinsics do not have explicit dependencies and thus depend
718 * on instructions order. Consider discard_if and store_ouput as
719 * example. If we don't add fake dependency of discard_if to store_output
720 * scheduler may put store_output first and since store_output terminates
721 * shader on Utgard PP, rest of it will never be executed.
722 * Add fake dependencies for discard/branch/store to preserve
723 * instruction order.
724 *
725 * TODO: scheduler should schedule discard_if as early as possible otherwise
726 * we may end up with suboptimal code for cases like this:
727 *
728 * s3 = s1 < s2
729 * discard_if s3
730 * s4 = s1 + s2
731 * store s4
732 *
733 * In this case store depends on discard_if and s4, but since dependencies can
734 * be scheduled in any order it can result in code like this:
735 *
736 * instr1: s3 = s1 < s3
737 * instr2: s4 = s1 + s2
738 * instr3: discard_if s3
739 * instr4: store s4
740 */
741 list_for_each_entry(ppir_block, block, &comp->block_list, list) {
742 ppir_node *prev_node = NULL;
743 list_for_each_entry_rev(ppir_node, node, &block->node_list, list) {
744 if (prev_node && ppir_node_is_root(node) && node->op != ppir_op_const) {
745 ppir_node_add_dep(prev_node, node);
746 }
747 if (node->op == ppir_op_discard ||
748 node->op == ppir_op_store_color ||
749 node->op == ppir_op_store_temp ||
750 node->op == ppir_op_branch) {
751 prev_node = node;
752 }
753 }
754 }
755 }
756
757 static void ppir_print_shader_db(struct nir_shader *nir, ppir_compiler *comp,
758 struct pipe_debug_callback *debug)
759 {
760 const struct shader_info *info = &nir->info;
761 char *shaderdb;
762 int ret = asprintf(&shaderdb,
763 "%s shader: %d inst, %d loops, %d:%d spills:fills\n",
764 gl_shader_stage_name(info->stage),
765 comp->cur_instr_index,
766 comp->num_loops,
767 comp->num_spills,
768 comp->num_fills);
769 assert(ret >= 0);
770
771 if (lima_debug & LIMA_DEBUG_SHADERDB)
772 fprintf(stderr, "SHADER-DB: %s\n", shaderdb);
773
774 pipe_debug_message(debug, SHADER_INFO, "%s", shaderdb);
775 free(shaderdb);
776 }
777
778 static void ppir_add_write_after_read_deps(ppir_compiler *comp)
779 {
780 list_for_each_entry(ppir_block, block, &comp->block_list, list) {
781 list_for_each_entry(ppir_reg, reg, &comp->reg_list, list) {
782 ppir_node *write = NULL;
783 list_for_each_entry_rev(ppir_node, node, &block->node_list, list) {
784 for (int i = 0; i < ppir_node_get_src_num(node); i++) {
785 ppir_src *src = ppir_node_get_src(node, i);
786 if (src && src->type == ppir_target_register &&
787 src->reg == reg &&
788 write)
789 ppir_node_add_dep(write, node);
790 }
791 ppir_dest *dest = ppir_node_get_dest(node);
792 if (dest && dest->type == ppir_target_register &&
793 dest->reg == reg)
794 write = node;
795 }
796 }
797 }
798 }
799
800 bool ppir_compile_nir(struct lima_fs_shader_state *prog, struct nir_shader *nir,
801 struct ra_regs *ra,
802 struct pipe_debug_callback *debug)
803 {
804 nir_function_impl *func = nir_shader_get_entrypoint(nir);
805 ppir_compiler *comp = ppir_compiler_create(prog, func->reg_alloc, func->ssa_alloc);
806 if (!comp)
807 return false;
808
809 comp->ra = ra;
810
811 /* 1st pass: create ppir blocks */
812 nir_foreach_function(function, nir) {
813 if (!function->impl)
814 continue;
815
816 nir_foreach_block(nblock, function->impl) {
817 ppir_block *block = ppir_block_create(comp);
818 if (!block)
819 return false;
820 block->index = nblock->index;
821 _mesa_hash_table_u64_insert(comp->blocks, (uint64_t)nblock, block);
822 }
823 }
824
825 /* 2nd pass: populate successors */
826 nir_foreach_function(function, nir) {
827 if (!function->impl)
828 continue;
829
830 nir_foreach_block(nblock, function->impl) {
831 ppir_block *block = ppir_get_block(comp, nblock);
832 assert(block);
833
834 for (int i = 0; i < 2; i++) {
835 if (nblock->successors[i])
836 block->successors[i] = ppir_get_block(comp, nblock->successors[i]);
837 }
838 }
839 }
840
841 /* Validate outputs, we support only gl_FragColor */
842 nir_foreach_variable(var, &nir->outputs) {
843 switch (var->data.location) {
844 case FRAG_RESULT_COLOR:
845 case FRAG_RESULT_DATA0:
846 break;
847 default:
848 ppir_error("unsupported output type\n");
849 goto err_out0;
850 break;
851 }
852 }
853
854 foreach_list_typed(nir_register, reg, node, &func->registers) {
855 ppir_reg *r = rzalloc(comp, ppir_reg);
856 if (!r)
857 return false;
858
859 r->index = reg->index;
860 r->num_components = reg->num_components;
861 r->live_in = INT_MAX;
862 r->live_out = 0;
863 r->is_head = false;
864 list_addtail(&r->list, &comp->reg_list);
865 }
866
867 if (!ppir_emit_cf_list(comp, &func->body))
868 goto err_out0;
869
870 /* If we have discard block add it to the very end */
871 if (comp->discard_block)
872 list_addtail(&comp->discard_block->list, &comp->block_list);
873
874 ppir_node_print_prog(comp);
875
876 if (!ppir_lower_prog(comp))
877 goto err_out0;
878
879 ppir_add_ordering_deps(comp);
880 ppir_add_write_after_read_deps(comp);
881
882 ppir_node_print_prog(comp);
883
884 if (!ppir_node_to_instr(comp))
885 goto err_out0;
886
887 if (!ppir_schedule_prog(comp))
888 goto err_out0;
889
890 if (!ppir_regalloc_prog(comp))
891 goto err_out0;
892
893 if (!ppir_codegen_prog(comp))
894 goto err_out0;
895
896 ppir_print_shader_db(nir, comp, debug);
897
898 _mesa_hash_table_u64_destroy(comp->blocks, NULL);
899 ralloc_free(comp);
900 return true;
901
902 err_out0:
903 _mesa_hash_table_u64_destroy(comp->blocks, NULL);
904 ralloc_free(comp);
905 return false;
906 }
907