lima/ppir: add control flow support
[mesa.git] / src / gallium / drivers / lima / ir / pp / nir.c
1 /*
2 * Copyright (c) 2017 Lima Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
13 * of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 */
24
25 #include <string.h>
26
27 #include "util/hash_table.h"
28 #include "util/ralloc.h"
29 #include "util/bitscan.h"
30 #include "compiler/nir/nir.h"
31 #include "pipe/p_state.h"
32
33
34 #include "ppir.h"
35
36 static void *ppir_node_create_ssa(ppir_block *block, ppir_op op, nir_ssa_def *ssa)
37 {
38 ppir_node *node = ppir_node_create(block, op, ssa->index, 0);
39 if (!node)
40 return NULL;
41
42 ppir_dest *dest = ppir_node_get_dest(node);
43 dest->type = ppir_target_ssa;
44 dest->ssa.num_components = ssa->num_components;
45 dest->ssa.live_in = INT_MAX;
46 dest->ssa.live_out = 0;
47 dest->write_mask = u_bit_consecutive(0, ssa->num_components);
48
49 if (node->type == ppir_node_type_load ||
50 node->type == ppir_node_type_store)
51 dest->ssa.is_head = true;
52
53 return node;
54 }
55
56 static void *ppir_node_create_reg(ppir_block *block, ppir_op op,
57 nir_register *reg, unsigned mask)
58 {
59 ppir_node *node = ppir_node_create(block, op, reg->index, mask);
60 if (!node)
61 return NULL;
62
63 ppir_dest *dest = ppir_node_get_dest(node);
64
65 list_for_each_entry(ppir_reg, r, &block->comp->reg_list, list) {
66 if (r->index == reg->index) {
67 dest->reg = r;
68 break;
69 }
70 }
71
72 dest->type = ppir_target_register;
73 dest->write_mask = mask;
74
75 if (node->type == ppir_node_type_load ||
76 node->type == ppir_node_type_store)
77 dest->reg->is_head = true;
78
79 return node;
80 }
81
82 static void *ppir_node_create_dest(ppir_block *block, ppir_op op,
83 nir_dest *dest, unsigned mask)
84 {
85 unsigned index = -1;
86
87 if (dest) {
88 if (dest->is_ssa)
89 return ppir_node_create_ssa(block, op, &dest->ssa);
90 else
91 return ppir_node_create_reg(block, op, dest->reg.reg, mask);
92 }
93
94 return ppir_node_create(block, op, index, 0);
95 }
96
97 static void ppir_node_add_src(ppir_compiler *comp, ppir_node *node,
98 ppir_src *ps, nir_src *ns, unsigned mask)
99 {
100 ppir_node *child = NULL;
101
102 if (ns->is_ssa) {
103 child = comp->var_nodes[ns->ssa->index];
104 /* Clone consts for each successor */
105 switch (child->op) {
106 case ppir_op_const:
107 child = ppir_node_clone(node->block, child);
108 break;
109 /* Clone uniforms and load textures for each block */
110 case ppir_op_load_texture:
111 case ppir_op_load_uniform:
112 case ppir_op_load_varying:
113 if (child->block != node->block) {
114 child = ppir_node_clone(node->block, child);
115 comp->var_nodes[ns->ssa->index] = child;
116 }
117 break;
118 default:
119 break;
120 }
121
122 ppir_node_add_dep(node, child);
123 }
124 else {
125 nir_register *reg = ns->reg.reg;
126 while (mask) {
127 int swizzle = ps->swizzle[u_bit_scan(&mask)];
128 child = comp->var_nodes[(reg->index << 2) + comp->reg_base + swizzle];
129 /* Reg is read before it was written, create a dummy node for it */
130 if (!child) {
131 child = ppir_node_create_reg(node->block, ppir_op_dummy, reg,
132 u_bit_consecutive(0, 4));
133 comp->var_nodes[(reg->index << 2) + comp->reg_base + swizzle] = child;
134 }
135 /* Don't add dummies or recursive deps for ops like r1 = r1 + ssa1 */
136 if (child && node != child && child->op != ppir_op_dummy)
137 ppir_node_add_dep(node, child);
138 }
139 }
140
141 ppir_node_target_assign(ps, child);
142 }
143
144 static int nir_to_ppir_opcodes[nir_num_opcodes] = {
145 /* not supported */
146 [0 ... nir_last_opcode] = -1,
147
148 [nir_op_mov] = ppir_op_mov,
149 [nir_op_fmul] = ppir_op_mul,
150 [nir_op_fabs] = ppir_op_abs,
151 [nir_op_fneg] = ppir_op_neg,
152 [nir_op_fadd] = ppir_op_add,
153 [nir_op_fsum3] = ppir_op_sum3,
154 [nir_op_fsum4] = ppir_op_sum4,
155 [nir_op_frsq] = ppir_op_rsqrt,
156 [nir_op_flog2] = ppir_op_log2,
157 [nir_op_fexp2] = ppir_op_exp2,
158 [nir_op_fsqrt] = ppir_op_sqrt,
159 [nir_op_fsin] = ppir_op_sin,
160 [nir_op_fcos] = ppir_op_cos,
161 [nir_op_fmax] = ppir_op_max,
162 [nir_op_fmin] = ppir_op_min,
163 [nir_op_frcp] = ppir_op_rcp,
164 [nir_op_ffloor] = ppir_op_floor,
165 [nir_op_fceil] = ppir_op_ceil,
166 [nir_op_ffract] = ppir_op_fract,
167 [nir_op_sge] = ppir_op_ge,
168 [nir_op_fge] = ppir_op_ge,
169 [nir_op_slt] = ppir_op_lt,
170 [nir_op_flt] = ppir_op_lt,
171 [nir_op_seq] = ppir_op_eq,
172 [nir_op_feq] = ppir_op_eq,
173 [nir_op_sne] = ppir_op_ne,
174 [nir_op_fne] = ppir_op_ne,
175 [nir_op_fcsel] = ppir_op_select,
176 [nir_op_inot] = ppir_op_not,
177 [nir_op_ftrunc] = ppir_op_trunc,
178 [nir_op_fsat] = ppir_op_sat,
179 [nir_op_fddx] = ppir_op_ddx,
180 [nir_op_fddy] = ppir_op_ddy,
181 };
182
183 static ppir_node *ppir_emit_alu(ppir_block *block, nir_instr *ni)
184 {
185 nir_alu_instr *instr = nir_instr_as_alu(ni);
186 int op = nir_to_ppir_opcodes[instr->op];
187
188 if (op < 0) {
189 ppir_error("unsupported nir_op: %s\n", nir_op_infos[instr->op].name);
190 return NULL;
191 }
192
193 ppir_alu_node *node = ppir_node_create_dest(block, op, &instr->dest.dest,
194 instr->dest.write_mask);
195 if (!node)
196 return NULL;
197
198 ppir_dest *pd = &node->dest;
199 nir_alu_dest *nd = &instr->dest;
200 if (nd->saturate)
201 pd->modifier = ppir_outmod_clamp_fraction;
202
203 unsigned src_mask;
204 switch (op) {
205 case ppir_op_sum3:
206 src_mask = 0b0111;
207 break;
208 case ppir_op_sum4:
209 src_mask = 0b1111;
210 break;
211 default:
212 src_mask = pd->write_mask;
213 break;
214 }
215
216 unsigned num_child = nir_op_infos[instr->op].num_inputs;
217 node->num_src = num_child;
218
219 for (int i = 0; i < num_child; i++) {
220 nir_alu_src *ns = instr->src + i;
221 ppir_src *ps = node->src + i;
222 memcpy(ps->swizzle, ns->swizzle, sizeof(ps->swizzle));
223 ppir_node_add_src(block->comp, &node->node, ps, &ns->src, src_mask);
224
225 ps->absolute = ns->abs;
226 ps->negate = ns->negate;
227 }
228
229 return &node->node;
230 }
231
232 static ppir_block *ppir_block_create(ppir_compiler *comp);
233
234 static bool ppir_emit_discard_block(ppir_compiler *comp)
235 {
236 ppir_block *block = ppir_block_create(comp);
237 ppir_discard_node *discard;
238 if (!block)
239 return false;
240
241 comp->discard_block = block;
242 block->comp = comp;
243
244 discard = ppir_node_create(block, ppir_op_discard, -1, 0);
245 if (discard)
246 list_addtail(&discard->node.list, &block->node_list);
247 else
248 return false;
249
250 return true;
251 }
252
253 static ppir_node *ppir_emit_discard_if(ppir_block *block, nir_instr *ni)
254 {
255 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(ni);
256 ppir_node *node;
257 ppir_compiler *comp = block->comp;
258 ppir_branch_node *branch;
259
260 if (!comp->discard_block && !ppir_emit_discard_block(comp))
261 return NULL;
262
263 node = ppir_node_create(block, ppir_op_branch, -1, 0);
264 if (!node)
265 return NULL;
266 branch = ppir_node_to_branch(node);
267
268 /* second src and condition will be updated during lowering */
269 ppir_node_add_src(block->comp, node, &branch->src[0],
270 &instr->src[0], u_bit_consecutive(0, instr->num_components));
271 branch->num_src = 1;
272 branch->target = comp->discard_block;
273
274 return node;
275 }
276
277 static ppir_node *ppir_emit_discard(ppir_block *block, nir_instr *ni)
278 {
279 ppir_node *node = ppir_node_create(block, ppir_op_discard, -1, 0);
280
281 return node;
282 }
283
284 static ppir_node *ppir_emit_intrinsic(ppir_block *block, nir_instr *ni)
285 {
286 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(ni);
287 unsigned mask = 0;
288 ppir_load_node *lnode;
289 ppir_alu_node *alu_node;
290
291 switch (instr->intrinsic) {
292 case nir_intrinsic_load_input:
293 if (!instr->dest.is_ssa)
294 mask = u_bit_consecutive(0, instr->num_components);
295
296 lnode = ppir_node_create_dest(block, ppir_op_load_varying, &instr->dest, mask);
297 if (!lnode)
298 return NULL;
299
300 lnode->num_components = instr->num_components;
301 lnode->index = nir_intrinsic_base(instr) * 4 + nir_intrinsic_component(instr);
302 return &lnode->node;
303
304 case nir_intrinsic_load_frag_coord:
305 case nir_intrinsic_load_point_coord:
306 case nir_intrinsic_load_front_face:
307 if (!instr->dest.is_ssa)
308 mask = u_bit_consecutive(0, instr->num_components);
309
310 ppir_op op;
311 switch (instr->intrinsic) {
312 case nir_intrinsic_load_frag_coord:
313 op = ppir_op_load_fragcoord;
314 break;
315 case nir_intrinsic_load_point_coord:
316 op = ppir_op_load_pointcoord;
317 break;
318 case nir_intrinsic_load_front_face:
319 op = ppir_op_load_frontface;
320 break;
321 default:
322 assert(0);
323 break;
324 }
325
326 lnode = ppir_node_create_dest(block, op, &instr->dest, mask);
327 if (!lnode)
328 return NULL;
329
330 lnode->num_components = instr->num_components;
331 return &lnode->node;
332
333 case nir_intrinsic_load_uniform:
334 if (!instr->dest.is_ssa)
335 mask = u_bit_consecutive(0, instr->num_components);
336
337 lnode = ppir_node_create_dest(block, ppir_op_load_uniform, &instr->dest, mask);
338 if (!lnode)
339 return NULL;
340
341 lnode->num_components = instr->num_components;
342 lnode->index = nir_intrinsic_base(instr);
343 lnode->index += (uint32_t)nir_src_as_float(instr->src[0]);
344
345 return &lnode->node;
346
347 case nir_intrinsic_store_output: {
348 alu_node = ppir_node_create_dest(block, ppir_op_store_color, NULL, 0);
349 if (!alu_node)
350 return NULL;
351
352 ppir_dest *dest = ppir_node_get_dest(&alu_node->node);
353 dest->type = ppir_target_ssa;
354 dest->ssa.num_components = instr->num_components;
355 dest->ssa.live_in = INT_MAX;
356 dest->ssa.live_out = 0;
357 dest->ssa.index = 0;
358 dest->write_mask = u_bit_consecutive(0, instr->num_components);
359
360 alu_node->num_src = 1;
361
362 for (int i = 0; i < instr->num_components; i++)
363 alu_node->src[0].swizzle[i] = i;
364
365 ppir_node_add_src(block->comp, &alu_node->node, alu_node->src, instr->src,
366 u_bit_consecutive(0, instr->num_components));
367
368 return &alu_node->node;
369 }
370
371 case nir_intrinsic_discard:
372 return ppir_emit_discard(block, ni);
373
374 case nir_intrinsic_discard_if:
375 return ppir_emit_discard_if(block, ni);
376
377 default:
378 ppir_error("unsupported nir_intrinsic_instr %s\n",
379 nir_intrinsic_infos[instr->intrinsic].name);
380 return NULL;
381 }
382 }
383
384 static ppir_node *ppir_emit_load_const(ppir_block *block, nir_instr *ni)
385 {
386 nir_load_const_instr *instr = nir_instr_as_load_const(ni);
387 ppir_const_node *node = ppir_node_create_ssa(block, ppir_op_const, &instr->def);
388 if (!node)
389 return NULL;
390
391 assert(instr->def.bit_size == 32);
392
393 for (int i = 0; i < instr->def.num_components; i++)
394 node->constant.value[i].i = instr->value[i].i32;
395 node->constant.num = instr->def.num_components;
396
397 return &node->node;
398 }
399
400 static ppir_node *ppir_emit_ssa_undef(ppir_block *block, nir_instr *ni)
401 {
402 ppir_error("nir_ssa_undef_instr not support\n");
403 return NULL;
404 }
405
406 static ppir_node *ppir_emit_tex(ppir_block *block, nir_instr *ni)
407 {
408 nir_tex_instr *instr = nir_instr_as_tex(ni);
409 ppir_load_texture_node *node;
410
411 if (instr->op != nir_texop_tex) {
412 ppir_error("unsupported texop %d\n", instr->op);
413 return NULL;
414 }
415
416 unsigned mask = 0;
417 if (!instr->dest.is_ssa)
418 mask = u_bit_consecutive(0, nir_tex_instr_dest_size(instr));
419
420 node = ppir_node_create_dest(block, ppir_op_load_texture, &instr->dest, mask);
421 if (!node)
422 return NULL;
423
424 node->sampler = instr->texture_index;
425
426 switch (instr->sampler_dim) {
427 case GLSL_SAMPLER_DIM_2D:
428 case GLSL_SAMPLER_DIM_RECT:
429 case GLSL_SAMPLER_DIM_EXTERNAL:
430 break;
431 default:
432 ppir_error("unsupported sampler dim: %d\n", instr->sampler_dim);
433 return NULL;
434 }
435
436 node->sampler_dim = instr->sampler_dim;
437
438 for (int i = 0; i < instr->coord_components; i++)
439 node->src_coords.swizzle[i] = i;
440
441 for (int i = 0; i < instr->num_srcs; i++) {
442 switch (instr->src[i].src_type) {
443 case nir_tex_src_coord:
444 ppir_node_add_src(block->comp, &node->node, &node->src_coords, &instr->src[i].src,
445 u_bit_consecutive(0, instr->coord_components));
446 break;
447 default:
448 ppir_error("unsupported texture source type\n");
449 assert(0);
450 return NULL;
451 }
452 }
453
454 return &node->node;
455 }
456
457 static ppir_block *ppir_get_block(ppir_compiler *comp, nir_block *nblock)
458 {
459 ppir_block *block = _mesa_hash_table_u64_search(comp->blocks, (uint64_t)nblock);
460
461 return block;
462 }
463
464 static ppir_node *ppir_emit_jump(ppir_block *block, nir_instr *ni)
465 {
466 ppir_node *node;
467 ppir_compiler *comp = block->comp;
468 ppir_branch_node *branch;
469 ppir_block *jump_block;
470 nir_jump_instr *jump = nir_instr_as_jump(ni);
471
472 switch (jump->type) {
473 case nir_jump_break: {
474 assert(comp->current_block->successors[0]);
475 assert(!comp->current_block->successors[1]);
476 jump_block = comp->current_block->successors[0];
477 }
478 break;
479 case nir_jump_continue:
480 jump_block = comp->loop_cont_block;
481 break;
482 default:
483 ppir_error("nir_jump_instr not support\n");
484 return NULL;
485 }
486
487 assert(jump_block != NULL);
488
489 node = ppir_node_create(block, ppir_op_branch, -1, 0);
490 if (!node)
491 return NULL;
492 branch = ppir_node_to_branch(node);
493
494 /* Unconditional */
495 branch->num_src = 0;
496 branch->target = jump_block;
497
498 return node;
499 }
500
501 static ppir_node *(*ppir_emit_instr[nir_instr_type_phi])(ppir_block *, nir_instr *) = {
502 [nir_instr_type_alu] = ppir_emit_alu,
503 [nir_instr_type_intrinsic] = ppir_emit_intrinsic,
504 [nir_instr_type_load_const] = ppir_emit_load_const,
505 [nir_instr_type_ssa_undef] = ppir_emit_ssa_undef,
506 [nir_instr_type_tex] = ppir_emit_tex,
507 [nir_instr_type_jump] = ppir_emit_jump,
508 };
509
510 static ppir_block *ppir_block_create(ppir_compiler *comp)
511 {
512 ppir_block *block = rzalloc(comp, ppir_block);
513 if (!block)
514 return NULL;
515
516 list_inithead(&block->node_list);
517 list_inithead(&block->instr_list);
518
519 block->comp = comp;
520
521 return block;
522 }
523
524 static bool ppir_emit_block(ppir_compiler *comp, nir_block *nblock)
525 {
526 ppir_block *block = ppir_get_block(comp, nblock);
527
528 comp->current_block = block;
529
530 list_addtail(&block->list, &comp->block_list);
531
532 nir_foreach_instr(instr, nblock) {
533 assert(instr->type < nir_instr_type_phi);
534 ppir_node *node = ppir_emit_instr[instr->type](block, instr);
535 if (!node)
536 return false;
537
538 list_addtail(&node->list, &block->node_list);
539 }
540
541 return true;
542 }
543
544 static bool ppir_emit_cf_list(ppir_compiler *comp, struct exec_list *list);
545
546 static bool ppir_emit_if(ppir_compiler *comp, nir_if *if_stmt)
547 {
548 ppir_node *node;
549 ppir_branch_node *else_branch, *after_branch;
550 nir_block *nir_else_block = nir_if_first_else_block(if_stmt);
551 bool empty_else_block =
552 (nir_else_block == nir_if_last_else_block(if_stmt) &&
553 exec_list_is_empty(&nir_else_block->instr_list));
554 ppir_block *block = comp->current_block;
555
556 node = ppir_node_create(block, ppir_op_branch, -1, 0);
557 if (!node)
558 return false;
559 else_branch = ppir_node_to_branch(node);
560 ppir_node_add_src(block->comp, node, &else_branch->src[0],
561 &if_stmt->condition, 1);
562 else_branch->num_src = 1;
563 /* Negate condition to minimize branching. We're generating following:
564 * current_block: { ...; if (!statement) branch else_block; }
565 * then_block: { ...; branch after_block; }
566 * else_block: { ... }
567 * after_block: { ... }
568 *
569 * or if else list is empty:
570 * block: { if (!statement) branch else_block; }
571 * then_block: { ... }
572 * else_block: after_block: { ... }
573 */
574 else_branch->negate = true;
575 list_addtail(&else_branch->node.list, &block->node_list);
576
577 ppir_emit_cf_list(comp, &if_stmt->then_list);
578 if (empty_else_block) {
579 nir_block *nblock = nir_if_last_else_block(if_stmt);
580 assert(nblock->successors[0]);
581 assert(!nblock->successors[1]);
582 else_branch->target = ppir_get_block(comp, nblock->successors[0]);
583 /* Add empty else block to the list */
584 list_addtail(&block->successors[1]->list, &comp->block_list);
585 return true;
586 }
587
588 else_branch->target = ppir_get_block(comp, nir_if_first_else_block(if_stmt));
589
590 nir_block *last_then_block = nir_if_last_then_block(if_stmt);
591 assert(last_then_block->successors[0]);
592 assert(!last_then_block->successors[1]);
593 block = ppir_get_block(comp, last_then_block);
594 node = ppir_node_create(block, ppir_op_branch, -1, 0);
595 if (!node)
596 return false;
597 after_branch = ppir_node_to_branch(node);
598 /* Unconditional */
599 after_branch->num_src = 0;
600 after_branch->target = ppir_get_block(comp, last_then_block->successors[0]);
601 /* Target should be after_block, will fixup later */
602 list_addtail(&after_branch->node.list, &block->node_list);
603
604 ppir_emit_cf_list(comp, &if_stmt->else_list);
605
606 return true;
607 }
608
609 static bool ppir_emit_loop(ppir_compiler *comp, nir_loop *nloop)
610 {
611 ppir_block *save_loop_cont_block = comp->loop_cont_block;
612 ppir_block *block;
613 ppir_branch_node *loop_branch;
614 nir_block *loop_last_block;
615 ppir_node *node;
616
617 comp->loop_cont_block = ppir_get_block(comp, nir_loop_first_block(nloop));
618
619 ppir_emit_cf_list(comp, &nloop->body);
620
621 loop_last_block = nir_loop_last_block(nloop);
622 block = ppir_get_block(comp, loop_last_block);
623 node = ppir_node_create(block, ppir_op_branch, -1, 0);
624 if (!node)
625 return false;
626 loop_branch = ppir_node_to_branch(node);
627 /* Unconditional */
628 loop_branch->num_src = 0;
629 loop_branch->target = comp->loop_cont_block;
630 list_addtail(&loop_branch->node.list, &block->node_list);
631
632 comp->loop_cont_block = save_loop_cont_block;
633
634 comp->num_loops++;
635
636 return true;
637 }
638
639 static bool ppir_emit_function(ppir_compiler *comp, nir_function_impl *nfunc)
640 {
641 ppir_error("function nir_cf_node not support\n");
642 return false;
643 }
644
645 static bool ppir_emit_cf_list(ppir_compiler *comp, struct exec_list *list)
646 {
647 foreach_list_typed(nir_cf_node, node, node, list) {
648 bool ret;
649
650 switch (node->type) {
651 case nir_cf_node_block:
652 ret = ppir_emit_block(comp, nir_cf_node_as_block(node));
653 break;
654 case nir_cf_node_if:
655 ret = ppir_emit_if(comp, nir_cf_node_as_if(node));
656 break;
657 case nir_cf_node_loop:
658 ret = ppir_emit_loop(comp, nir_cf_node_as_loop(node));
659 break;
660 case nir_cf_node_function:
661 ret = ppir_emit_function(comp, nir_cf_node_as_function(node));
662 break;
663 default:
664 ppir_error("unknown NIR node type %d\n", node->type);
665 return false;
666 }
667
668 if (!ret)
669 return false;
670 }
671
672 return true;
673 }
674
675 static ppir_compiler *ppir_compiler_create(void *prog, unsigned num_reg, unsigned num_ssa)
676 {
677 ppir_compiler *comp = rzalloc_size(
678 prog, sizeof(*comp) + ((num_reg << 2) + num_ssa) * sizeof(ppir_node *));
679 if (!comp)
680 return NULL;
681
682 list_inithead(&comp->block_list);
683 list_inithead(&comp->reg_list);
684 comp->blocks = _mesa_hash_table_u64_create(prog);
685
686 comp->var_nodes = (ppir_node **)(comp + 1);
687 comp->reg_base = num_ssa;
688 comp->prog = prog;
689 return comp;
690 }
691
692 static void ppir_add_ordering_deps(ppir_compiler *comp)
693 {
694 /* Some intrinsics do not have explicit dependencies and thus depend
695 * on instructions order. Consider discard_if and store_ouput as
696 * example. If we don't add fake dependency of discard_if to store_output
697 * scheduler may put store_output first and since store_output terminates
698 * shader on Utgard PP, rest of it will never be executed.
699 * Add fake dependencies for discard/branch/store to preserve
700 * instruction order.
701 *
702 * TODO: scheduler should schedule discard_if as early as possible otherwise
703 * we may end up with suboptimal code for cases like this:
704 *
705 * s3 = s1 < s2
706 * discard_if s3
707 * s4 = s1 + s2
708 * store s4
709 *
710 * In this case store depends on discard_if and s4, but since dependencies can
711 * be scheduled in any order it can result in code like this:
712 *
713 * instr1: s3 = s1 < s3
714 * instr2: s4 = s1 + s2
715 * instr3: discard_if s3
716 * instr4: store s4
717 */
718 list_for_each_entry(ppir_block, block, &comp->block_list, list) {
719 ppir_node *prev_node = NULL;
720 list_for_each_entry_rev(ppir_node, node, &block->node_list, list) {
721 if (prev_node && ppir_node_is_root(node) && node->op != ppir_op_const) {
722 ppir_node_add_dep(prev_node, node);
723 }
724 if (node->op == ppir_op_discard ||
725 node->op == ppir_op_store_color ||
726 node->op == ppir_op_store_temp ||
727 node->op == ppir_op_branch) {
728 prev_node = node;
729 }
730 }
731 }
732 }
733
734 static void ppir_print_shader_db(struct nir_shader *nir, ppir_compiler *comp,
735 struct pipe_debug_callback *debug)
736 {
737 const struct shader_info *info = &nir->info;
738 char *shaderdb;
739 int ret = asprintf(&shaderdb,
740 "%s shader: %d inst, %d loops, %d:%d spills:fills\n",
741 gl_shader_stage_name(info->stage),
742 comp->cur_instr_index,
743 comp->num_loops,
744 comp->num_spills,
745 comp->num_fills);
746 assert(ret >= 0);
747
748 if (lima_debug & LIMA_DEBUG_SHADERDB)
749 fprintf(stderr, "SHADER-DB: %s\n", shaderdb);
750
751 pipe_debug_message(debug, SHADER_INFO, "%s", shaderdb);
752 free(shaderdb);
753 }
754
755 static void ppir_add_write_after_read_deps(ppir_compiler *comp)
756 {
757 list_for_each_entry(ppir_block, block, &comp->block_list, list) {
758 list_for_each_entry(ppir_reg, reg, &comp->reg_list, list) {
759 ppir_node *write = NULL;
760 list_for_each_entry_rev(ppir_node, node, &block->node_list, list) {
761 for (int i = 0; i < ppir_node_get_src_num(node); i++) {
762 ppir_src *src = ppir_node_get_src(node, i);
763 if (src && src->type == ppir_target_register &&
764 src->reg == reg &&
765 write)
766 ppir_node_add_dep(write, node);
767 }
768 ppir_dest *dest = ppir_node_get_dest(node);
769 if (dest && dest->type == ppir_target_register &&
770 dest->reg == reg)
771 write = node;
772 }
773 }
774 }
775 }
776
777 bool ppir_compile_nir(struct lima_fs_shader_state *prog, struct nir_shader *nir,
778 struct ra_regs *ra,
779 struct pipe_debug_callback *debug)
780 {
781 nir_function_impl *func = nir_shader_get_entrypoint(nir);
782 ppir_compiler *comp = ppir_compiler_create(prog, func->reg_alloc, func->ssa_alloc);
783 if (!comp)
784 return false;
785
786 comp->ra = ra;
787
788 /* 1st pass: create ppir blocks */
789 nir_foreach_function(function, nir) {
790 if (!function->impl)
791 continue;
792
793 nir_foreach_block(nblock, function->impl) {
794 ppir_block *block = ppir_block_create(comp);
795 if (!block)
796 return false;
797 block->index = nblock->index;
798 _mesa_hash_table_u64_insert(comp->blocks, (uint64_t)nblock, block);
799 }
800 }
801
802 /* 2nd pass: populate successors */
803 nir_foreach_function(function, nir) {
804 if (!function->impl)
805 continue;
806
807 nir_foreach_block(nblock, function->impl) {
808 ppir_block *block = ppir_get_block(comp, nblock);
809 assert(block);
810
811 for (int i = 0; i < 2; i++) {
812 if (nblock->successors[i])
813 block->successors[i] = ppir_get_block(comp, nblock->successors[i]);
814 }
815 }
816 }
817
818 /* Validate outputs, we support only gl_FragColor */
819 nir_foreach_variable(var, &nir->outputs) {
820 switch (var->data.location) {
821 case FRAG_RESULT_COLOR:
822 case FRAG_RESULT_DATA0:
823 break;
824 default:
825 ppir_error("unsupported output type\n");
826 goto err_out0;
827 break;
828 }
829 }
830
831 foreach_list_typed(nir_register, reg, node, &func->registers) {
832 ppir_reg *r = rzalloc(comp, ppir_reg);
833 if (!r)
834 return false;
835
836 r->index = reg->index;
837 r->num_components = reg->num_components;
838 r->live_in = INT_MAX;
839 r->live_out = 0;
840 r->is_head = false;
841 list_addtail(&r->list, &comp->reg_list);
842 }
843
844 if (!ppir_emit_cf_list(comp, &func->body))
845 goto err_out0;
846
847 /* If we have discard block add it to the very end */
848 if (comp->discard_block)
849 list_addtail(&comp->discard_block->list, &comp->block_list);
850
851 ppir_node_print_prog(comp);
852
853 if (!ppir_lower_prog(comp))
854 goto err_out0;
855
856 ppir_add_ordering_deps(comp);
857 ppir_add_write_after_read_deps(comp);
858
859 ppir_node_print_prog(comp);
860
861 if (!ppir_node_to_instr(comp))
862 goto err_out0;
863
864 if (!ppir_schedule_prog(comp))
865 goto err_out0;
866
867 if (!ppir_regalloc_prog(comp))
868 goto err_out0;
869
870 if (!ppir_codegen_prog(comp))
871 goto err_out0;
872
873 ppir_print_shader_db(nir, comp, debug);
874
875 _mesa_hash_table_u64_destroy(comp->blocks, NULL);
876 ralloc_free(comp);
877 return true;
878
879 err_out0:
880 _mesa_hash_table_u64_destroy(comp->blocks, NULL);
881 ralloc_free(comp);
882 return false;
883 }
884