lima/ppir: duplicate consts in nir
[mesa.git] / src / gallium / drivers / lima / ir / pp / nir.c
1 /*
2 * Copyright (c) 2017 Lima Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
13 * of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 */
24
25 #include <string.h>
26
27 #include "util/hash_table.h"
28 #include "util/ralloc.h"
29 #include "util/bitscan.h"
30 #include "compiler/nir/nir.h"
31 #include "pipe/p_state.h"
32
33
34 #include "ppir.h"
35
36 static void *ppir_node_create_ssa(ppir_block *block, ppir_op op, nir_ssa_def *ssa)
37 {
38 ppir_node *node = ppir_node_create(block, op, ssa->index, 0);
39 if (!node)
40 return NULL;
41
42 ppir_dest *dest = ppir_node_get_dest(node);
43 dest->type = ppir_target_ssa;
44 dest->ssa.num_components = ssa->num_components;
45 dest->write_mask = u_bit_consecutive(0, ssa->num_components);
46
47 if (node->type == ppir_node_type_load ||
48 node->type == ppir_node_type_store)
49 dest->ssa.is_head = true;
50
51 return node;
52 }
53
54 static void *ppir_node_create_reg(ppir_block *block, ppir_op op,
55 nir_register *reg, unsigned mask)
56 {
57 ppir_node *node = ppir_node_create(block, op, reg->index, mask);
58 if (!node)
59 return NULL;
60
61 ppir_dest *dest = ppir_node_get_dest(node);
62
63 list_for_each_entry(ppir_reg, r, &block->comp->reg_list, list) {
64 if (r->index == reg->index) {
65 dest->reg = r;
66 break;
67 }
68 }
69
70 dest->type = ppir_target_register;
71 dest->write_mask = mask;
72
73 if (node->type == ppir_node_type_load ||
74 node->type == ppir_node_type_store)
75 dest->reg->is_head = true;
76
77 return node;
78 }
79
80 static void *ppir_node_create_dest(ppir_block *block, ppir_op op,
81 nir_dest *dest, unsigned mask)
82 {
83 unsigned index = -1;
84
85 if (dest) {
86 if (dest->is_ssa)
87 return ppir_node_create_ssa(block, op, &dest->ssa);
88 else
89 return ppir_node_create_reg(block, op, dest->reg.reg, mask);
90 }
91
92 return ppir_node_create(block, op, index, 0);
93 }
94
95 static void ppir_node_add_src(ppir_compiler *comp, ppir_node *node,
96 ppir_src *ps, nir_src *ns, unsigned mask)
97 {
98 ppir_node *child = NULL;
99
100 if (ns->is_ssa) {
101 child = comp->var_nodes[ns->ssa->index];
102 switch (child->op) {
103 case ppir_op_load_varying:
104 /* If at least one successor is load_texture, promote it to
105 * load_coords to ensure that is has exactly one successor */
106 if (node->op == ppir_op_load_texture) {
107 nir_tex_src *nts = (nir_tex_src *)ns;
108 if (nts->src_type == nir_tex_src_coord)
109 child->op = ppir_op_load_coords;
110 }
111 break;
112 default:
113 break;
114 }
115
116 if (child->op != ppir_op_undef)
117 ppir_node_add_dep(node, child, ppir_dep_src);
118 }
119 else {
120 nir_register *reg = ns->reg.reg;
121 while (mask) {
122 int swizzle = ps->swizzle[u_bit_scan(&mask)];
123 child = comp->var_nodes[(reg->index << 2) + comp->reg_base + swizzle];
124 /* Reg is read before it was written, create a dummy node for it */
125 if (!child) {
126 child = ppir_node_create_reg(node->block, ppir_op_dummy, reg,
127 u_bit_consecutive(0, 4));
128 comp->var_nodes[(reg->index << 2) + comp->reg_base + swizzle] = child;
129 }
130 /* Don't add dummies or recursive deps for ops like r1 = r1 + ssa1 */
131 if (child && node != child && child->op != ppir_op_dummy)
132 ppir_node_add_dep(node, child, ppir_dep_src);
133 }
134 }
135
136 ppir_node_target_assign(ps, child);
137 }
138
139 static int nir_to_ppir_opcodes[nir_num_opcodes] = {
140 /* not supported */
141 [0 ... nir_last_opcode] = -1,
142
143 [nir_op_mov] = ppir_op_mov,
144 [nir_op_fmul] = ppir_op_mul,
145 [nir_op_fabs] = ppir_op_abs,
146 [nir_op_fneg] = ppir_op_neg,
147 [nir_op_fadd] = ppir_op_add,
148 [nir_op_fsum3] = ppir_op_sum3,
149 [nir_op_fsum4] = ppir_op_sum4,
150 [nir_op_frsq] = ppir_op_rsqrt,
151 [nir_op_flog2] = ppir_op_log2,
152 [nir_op_fexp2] = ppir_op_exp2,
153 [nir_op_fsqrt] = ppir_op_sqrt,
154 [nir_op_fsin] = ppir_op_sin,
155 [nir_op_fcos] = ppir_op_cos,
156 [nir_op_fmax] = ppir_op_max,
157 [nir_op_fmin] = ppir_op_min,
158 [nir_op_frcp] = ppir_op_rcp,
159 [nir_op_ffloor] = ppir_op_floor,
160 [nir_op_fceil] = ppir_op_ceil,
161 [nir_op_ffract] = ppir_op_fract,
162 [nir_op_sge] = ppir_op_ge,
163 [nir_op_slt] = ppir_op_lt,
164 [nir_op_seq] = ppir_op_eq,
165 [nir_op_sne] = ppir_op_ne,
166 [nir_op_fcsel] = ppir_op_select,
167 [nir_op_inot] = ppir_op_not,
168 [nir_op_ftrunc] = ppir_op_trunc,
169 [nir_op_fsat] = ppir_op_sat,
170 [nir_op_fddx] = ppir_op_ddx,
171 [nir_op_fddy] = ppir_op_ddy,
172 };
173
174 static ppir_node *ppir_emit_alu(ppir_block *block, nir_instr *ni)
175 {
176 nir_alu_instr *instr = nir_instr_as_alu(ni);
177 int op = nir_to_ppir_opcodes[instr->op];
178
179 if (op < 0) {
180 ppir_error("unsupported nir_op: %s\n", nir_op_infos[instr->op].name);
181 return NULL;
182 }
183
184 ppir_alu_node *node = ppir_node_create_dest(block, op, &instr->dest.dest,
185 instr->dest.write_mask);
186 if (!node)
187 return NULL;
188
189 ppir_dest *pd = &node->dest;
190 nir_alu_dest *nd = &instr->dest;
191 if (nd->saturate)
192 pd->modifier = ppir_outmod_clamp_fraction;
193
194 unsigned src_mask;
195 switch (op) {
196 case ppir_op_sum3:
197 src_mask = 0b0111;
198 break;
199 case ppir_op_sum4:
200 src_mask = 0b1111;
201 break;
202 default:
203 src_mask = pd->write_mask;
204 break;
205 }
206
207 unsigned num_child = nir_op_infos[instr->op].num_inputs;
208 node->num_src = num_child;
209
210 for (int i = 0; i < num_child; i++) {
211 nir_alu_src *ns = instr->src + i;
212 ppir_src *ps = node->src + i;
213 memcpy(ps->swizzle, ns->swizzle, sizeof(ps->swizzle));
214 ppir_node_add_src(block->comp, &node->node, ps, &ns->src, src_mask);
215
216 ps->absolute = ns->abs;
217 ps->negate = ns->negate;
218 }
219
220 return &node->node;
221 }
222
223 static ppir_block *ppir_block_create(ppir_compiler *comp);
224
225 static bool ppir_emit_discard_block(ppir_compiler *comp)
226 {
227 ppir_block *block = ppir_block_create(comp);
228 ppir_discard_node *discard;
229 if (!block)
230 return false;
231
232 comp->discard_block = block;
233 block->comp = comp;
234
235 discard = ppir_node_create(block, ppir_op_discard, -1, 0);
236 if (discard)
237 list_addtail(&discard->node.list, &block->node_list);
238 else
239 return false;
240
241 return true;
242 }
243
244 static ppir_node *ppir_emit_discard_if(ppir_block *block, nir_instr *ni)
245 {
246 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(ni);
247 ppir_node *node;
248 ppir_compiler *comp = block->comp;
249 ppir_branch_node *branch;
250
251 if (!comp->discard_block && !ppir_emit_discard_block(comp))
252 return NULL;
253
254 node = ppir_node_create(block, ppir_op_branch, -1, 0);
255 if (!node)
256 return NULL;
257 branch = ppir_node_to_branch(node);
258
259 /* second src and condition will be updated during lowering */
260 ppir_node_add_src(block->comp, node, &branch->src[0],
261 &instr->src[0], u_bit_consecutive(0, instr->num_components));
262 branch->num_src = 1;
263 branch->target = comp->discard_block;
264
265 return node;
266 }
267
268 static ppir_node *ppir_emit_discard(ppir_block *block, nir_instr *ni)
269 {
270 ppir_node *node = ppir_node_create(block, ppir_op_discard, -1, 0);
271
272 return node;
273 }
274
275 static ppir_node *ppir_emit_intrinsic(ppir_block *block, nir_instr *ni)
276 {
277 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(ni);
278 unsigned mask = 0;
279 ppir_load_node *lnode;
280 ppir_alu_node *alu_node;
281
282 switch (instr->intrinsic) {
283 case nir_intrinsic_load_input:
284 if (!instr->dest.is_ssa)
285 mask = u_bit_consecutive(0, instr->num_components);
286
287 lnode = ppir_node_create_dest(block, ppir_op_load_varying, &instr->dest, mask);
288 if (!lnode)
289 return NULL;
290
291 lnode->num_components = instr->num_components;
292 lnode->index = nir_intrinsic_base(instr) * 4 + nir_intrinsic_component(instr);
293 if (nir_src_is_const(instr->src[0]))
294 lnode->index += (uint32_t)(nir_src_as_float(instr->src[0]) * 4);
295 else {
296 lnode->num_src = 1;
297 ppir_node_add_src(block->comp, &lnode->node, &lnode->src, instr->src, 1);
298 }
299 return &lnode->node;
300
301 case nir_intrinsic_load_frag_coord:
302 case nir_intrinsic_load_point_coord:
303 case nir_intrinsic_load_front_face:
304 if (!instr->dest.is_ssa)
305 mask = u_bit_consecutive(0, instr->num_components);
306
307 ppir_op op;
308 switch (instr->intrinsic) {
309 case nir_intrinsic_load_frag_coord:
310 op = ppir_op_load_fragcoord;
311 break;
312 case nir_intrinsic_load_point_coord:
313 op = ppir_op_load_pointcoord;
314 break;
315 case nir_intrinsic_load_front_face:
316 op = ppir_op_load_frontface;
317 break;
318 default:
319 assert(0);
320 break;
321 }
322
323 lnode = ppir_node_create_dest(block, op, &instr->dest, mask);
324 if (!lnode)
325 return NULL;
326
327 lnode->num_components = instr->num_components;
328 return &lnode->node;
329
330 case nir_intrinsic_load_uniform:
331 if (!instr->dest.is_ssa)
332 mask = u_bit_consecutive(0, instr->num_components);
333
334 lnode = ppir_node_create_dest(block, ppir_op_load_uniform, &instr->dest, mask);
335 if (!lnode)
336 return NULL;
337
338 lnode->num_components = instr->num_components;
339 lnode->index = nir_intrinsic_base(instr);
340 if (nir_src_is_const(instr->src[0]))
341 lnode->index += (uint32_t)nir_src_as_float(instr->src[0]);
342 else {
343 lnode->num_src = 1;
344 ppir_node_add_src(block->comp, &lnode->node, &lnode->src, instr->src, 1);
345 }
346
347 return &lnode->node;
348
349 case nir_intrinsic_store_output: {
350 alu_node = ppir_node_create_dest(block, ppir_op_store_color, NULL, 0);
351 if (!alu_node)
352 return NULL;
353
354 ppir_dest *dest = ppir_node_get_dest(&alu_node->node);
355 dest->type = ppir_target_ssa;
356 dest->ssa.num_components = instr->num_components;
357 dest->ssa.index = 0;
358 dest->write_mask = u_bit_consecutive(0, instr->num_components);
359
360 alu_node->num_src = 1;
361
362 for (int i = 0; i < instr->num_components; i++)
363 alu_node->src[0].swizzle[i] = i;
364
365 ppir_node_add_src(block->comp, &alu_node->node, alu_node->src, instr->src,
366 u_bit_consecutive(0, instr->num_components));
367
368 return &alu_node->node;
369 }
370
371 case nir_intrinsic_discard:
372 return ppir_emit_discard(block, ni);
373
374 case nir_intrinsic_discard_if:
375 return ppir_emit_discard_if(block, ni);
376
377 default:
378 ppir_error("unsupported nir_intrinsic_instr %s\n",
379 nir_intrinsic_infos[instr->intrinsic].name);
380 return NULL;
381 }
382 }
383
384 static ppir_node *ppir_emit_load_const(ppir_block *block, nir_instr *ni)
385 {
386 nir_load_const_instr *instr = nir_instr_as_load_const(ni);
387 ppir_const_node *node = ppir_node_create_ssa(block, ppir_op_const, &instr->def);
388 if (!node)
389 return NULL;
390
391 assert(instr->def.bit_size == 32);
392
393 for (int i = 0; i < instr->def.num_components; i++)
394 node->constant.value[i].i = instr->value[i].i32;
395 node->constant.num = instr->def.num_components;
396
397 return &node->node;
398 }
399
400 static ppir_node *ppir_emit_ssa_undef(ppir_block *block, nir_instr *ni)
401 {
402 nir_ssa_undef_instr *undef = nir_instr_as_ssa_undef(ni);
403 ppir_node *node = ppir_node_create_ssa(block, ppir_op_undef, &undef->def);
404 if (!node)
405 return NULL;
406 ppir_alu_node *alu = ppir_node_to_alu(node);
407
408 ppir_dest *dest = &alu->dest;
409 dest->ssa.undef = true;
410
411 return node;
412 }
413
414 static ppir_node *ppir_emit_tex(ppir_block *block, nir_instr *ni)
415 {
416 nir_tex_instr *instr = nir_instr_as_tex(ni);
417 ppir_load_texture_node *node;
418
419 switch (instr->op) {
420 case nir_texop_tex:
421 case nir_texop_txb:
422 case nir_texop_txl:
423 break;
424 default:
425 ppir_error("unsupported texop %d\n", instr->op);
426 return NULL;
427 }
428
429 unsigned mask = 0;
430 if (!instr->dest.is_ssa)
431 mask = u_bit_consecutive(0, nir_tex_instr_dest_size(instr));
432
433 node = ppir_node_create_dest(block, ppir_op_load_texture, &instr->dest, mask);
434 if (!node)
435 return NULL;
436
437 node->sampler = instr->texture_index;
438
439 switch (instr->sampler_dim) {
440 case GLSL_SAMPLER_DIM_2D:
441 case GLSL_SAMPLER_DIM_CUBE:
442 case GLSL_SAMPLER_DIM_RECT:
443 case GLSL_SAMPLER_DIM_EXTERNAL:
444 break;
445 default:
446 ppir_error("unsupported sampler dim: %d\n", instr->sampler_dim);
447 return NULL;
448 }
449
450 node->sampler_dim = instr->sampler_dim;
451
452 for (int i = 0; i < instr->coord_components; i++)
453 node->src[0].swizzle[i] = i;
454
455 for (int i = 0; i < instr->num_srcs; i++) {
456 switch (instr->src[i].src_type) {
457 case nir_tex_src_coord:
458 ppir_node_add_src(block->comp, &node->node, &node->src[0], &instr->src[i].src,
459 u_bit_consecutive(0, instr->coord_components));
460 node->num_src++;
461 break;
462 case nir_tex_src_bias:
463 case nir_tex_src_lod:
464 node->lod_bias_en = true;
465 node->explicit_lod = (instr->src[i].src_type == nir_tex_src_lod);
466 ppir_node_add_src(block->comp, &node->node, &node->src[1], &instr->src[i].src, 1);
467 node->num_src++;
468 break;
469 default:
470 ppir_error("unsupported texture source type\n");
471 return NULL;
472 }
473 }
474
475 return &node->node;
476 }
477
478 static ppir_block *ppir_get_block(ppir_compiler *comp, nir_block *nblock)
479 {
480 ppir_block *block = _mesa_hash_table_u64_search(comp->blocks, (uint64_t)nblock);
481
482 return block;
483 }
484
485 static ppir_node *ppir_emit_jump(ppir_block *block, nir_instr *ni)
486 {
487 ppir_node *node;
488 ppir_compiler *comp = block->comp;
489 ppir_branch_node *branch;
490 ppir_block *jump_block;
491 nir_jump_instr *jump = nir_instr_as_jump(ni);
492
493 switch (jump->type) {
494 case nir_jump_break: {
495 assert(comp->current_block->successors[0]);
496 assert(!comp->current_block->successors[1]);
497 jump_block = comp->current_block->successors[0];
498 }
499 break;
500 case nir_jump_continue:
501 jump_block = comp->loop_cont_block;
502 break;
503 default:
504 ppir_error("nir_jump_instr not support\n");
505 return NULL;
506 }
507
508 assert(jump_block != NULL);
509
510 node = ppir_node_create(block, ppir_op_branch, -1, 0);
511 if (!node)
512 return NULL;
513 branch = ppir_node_to_branch(node);
514
515 /* Unconditional */
516 branch->num_src = 0;
517 branch->target = jump_block;
518
519 return node;
520 }
521
522 static ppir_node *(*ppir_emit_instr[nir_instr_type_phi])(ppir_block *, nir_instr *) = {
523 [nir_instr_type_alu] = ppir_emit_alu,
524 [nir_instr_type_intrinsic] = ppir_emit_intrinsic,
525 [nir_instr_type_load_const] = ppir_emit_load_const,
526 [nir_instr_type_ssa_undef] = ppir_emit_ssa_undef,
527 [nir_instr_type_tex] = ppir_emit_tex,
528 [nir_instr_type_jump] = ppir_emit_jump,
529 };
530
531 static ppir_block *ppir_block_create(ppir_compiler *comp)
532 {
533 ppir_block *block = rzalloc(comp, ppir_block);
534 if (!block)
535 return NULL;
536
537 list_inithead(&block->node_list);
538 list_inithead(&block->instr_list);
539
540 block->comp = comp;
541
542 return block;
543 }
544
545 static bool ppir_emit_block(ppir_compiler *comp, nir_block *nblock)
546 {
547 ppir_block *block = ppir_get_block(comp, nblock);
548
549 comp->current_block = block;
550
551 list_addtail(&block->list, &comp->block_list);
552
553 nir_foreach_instr(instr, nblock) {
554 assert(instr->type < nir_instr_type_phi);
555 ppir_node *node = ppir_emit_instr[instr->type](block, instr);
556 if (!node)
557 return false;
558
559 list_addtail(&node->list, &block->node_list);
560 }
561
562 return true;
563 }
564
565 static bool ppir_emit_cf_list(ppir_compiler *comp, struct exec_list *list);
566
567 static bool ppir_emit_if(ppir_compiler *comp, nir_if *if_stmt)
568 {
569 ppir_node *node;
570 ppir_branch_node *else_branch, *after_branch;
571 nir_block *nir_else_block = nir_if_first_else_block(if_stmt);
572 bool empty_else_block =
573 (nir_else_block == nir_if_last_else_block(if_stmt) &&
574 exec_list_is_empty(&nir_else_block->instr_list));
575 ppir_block *block = comp->current_block;
576
577 node = ppir_node_create(block, ppir_op_branch, -1, 0);
578 if (!node)
579 return false;
580 else_branch = ppir_node_to_branch(node);
581 ppir_node_add_src(block->comp, node, &else_branch->src[0],
582 &if_stmt->condition, 1);
583 else_branch->num_src = 1;
584 /* Negate condition to minimize branching. We're generating following:
585 * current_block: { ...; if (!statement) branch else_block; }
586 * then_block: { ...; branch after_block; }
587 * else_block: { ... }
588 * after_block: { ... }
589 *
590 * or if else list is empty:
591 * block: { if (!statement) branch else_block; }
592 * then_block: { ... }
593 * else_block: after_block: { ... }
594 */
595 else_branch->negate = true;
596 list_addtail(&else_branch->node.list, &block->node_list);
597
598 ppir_emit_cf_list(comp, &if_stmt->then_list);
599 if (empty_else_block) {
600 nir_block *nblock = nir_if_last_else_block(if_stmt);
601 assert(nblock->successors[0]);
602 assert(!nblock->successors[1]);
603 else_branch->target = ppir_get_block(comp, nblock->successors[0]);
604 /* Add empty else block to the list */
605 list_addtail(&block->successors[1]->list, &comp->block_list);
606 return true;
607 }
608
609 else_branch->target = ppir_get_block(comp, nir_if_first_else_block(if_stmt));
610
611 nir_block *last_then_block = nir_if_last_then_block(if_stmt);
612 assert(last_then_block->successors[0]);
613 assert(!last_then_block->successors[1]);
614 block = ppir_get_block(comp, last_then_block);
615 node = ppir_node_create(block, ppir_op_branch, -1, 0);
616 if (!node)
617 return false;
618 after_branch = ppir_node_to_branch(node);
619 /* Unconditional */
620 after_branch->num_src = 0;
621 after_branch->target = ppir_get_block(comp, last_then_block->successors[0]);
622 /* Target should be after_block, will fixup later */
623 list_addtail(&after_branch->node.list, &block->node_list);
624
625 ppir_emit_cf_list(comp, &if_stmt->else_list);
626
627 return true;
628 }
629
630 static bool ppir_emit_loop(ppir_compiler *comp, nir_loop *nloop)
631 {
632 ppir_block *save_loop_cont_block = comp->loop_cont_block;
633 ppir_block *block;
634 ppir_branch_node *loop_branch;
635 nir_block *loop_last_block;
636 ppir_node *node;
637
638 comp->loop_cont_block = ppir_get_block(comp, nir_loop_first_block(nloop));
639
640 ppir_emit_cf_list(comp, &nloop->body);
641
642 loop_last_block = nir_loop_last_block(nloop);
643 block = ppir_get_block(comp, loop_last_block);
644 node = ppir_node_create(block, ppir_op_branch, -1, 0);
645 if (!node)
646 return false;
647 loop_branch = ppir_node_to_branch(node);
648 /* Unconditional */
649 loop_branch->num_src = 0;
650 loop_branch->target = comp->loop_cont_block;
651 list_addtail(&loop_branch->node.list, &block->node_list);
652
653 comp->loop_cont_block = save_loop_cont_block;
654
655 comp->num_loops++;
656
657 return true;
658 }
659
660 static bool ppir_emit_function(ppir_compiler *comp, nir_function_impl *nfunc)
661 {
662 ppir_error("function nir_cf_node not support\n");
663 return false;
664 }
665
666 static bool ppir_emit_cf_list(ppir_compiler *comp, struct exec_list *list)
667 {
668 foreach_list_typed(nir_cf_node, node, node, list) {
669 bool ret;
670
671 switch (node->type) {
672 case nir_cf_node_block:
673 ret = ppir_emit_block(comp, nir_cf_node_as_block(node));
674 break;
675 case nir_cf_node_if:
676 ret = ppir_emit_if(comp, nir_cf_node_as_if(node));
677 break;
678 case nir_cf_node_loop:
679 ret = ppir_emit_loop(comp, nir_cf_node_as_loop(node));
680 break;
681 case nir_cf_node_function:
682 ret = ppir_emit_function(comp, nir_cf_node_as_function(node));
683 break;
684 default:
685 ppir_error("unknown NIR node type %d\n", node->type);
686 return false;
687 }
688
689 if (!ret)
690 return false;
691 }
692
693 return true;
694 }
695
696 static ppir_compiler *ppir_compiler_create(void *prog, unsigned num_reg, unsigned num_ssa)
697 {
698 ppir_compiler *comp = rzalloc_size(
699 prog, sizeof(*comp) + ((num_reg << 2) + num_ssa) * sizeof(ppir_node *));
700 if (!comp)
701 return NULL;
702
703 list_inithead(&comp->block_list);
704 list_inithead(&comp->reg_list);
705 comp->blocks = _mesa_hash_table_u64_create(prog);
706
707 comp->var_nodes = (ppir_node **)(comp + 1);
708 comp->reg_base = num_ssa;
709 comp->prog = prog;
710 return comp;
711 }
712
713 static void ppir_add_ordering_deps(ppir_compiler *comp)
714 {
715 /* Some intrinsics do not have explicit dependencies and thus depend
716 * on instructions order. Consider discard_if and store_ouput as
717 * example. If we don't add fake dependency of discard_if to store_output
718 * scheduler may put store_output first and since store_output terminates
719 * shader on Utgard PP, rest of it will never be executed.
720 * Add fake dependencies for discard/branch/store to preserve
721 * instruction order.
722 *
723 * TODO: scheduler should schedule discard_if as early as possible otherwise
724 * we may end up with suboptimal code for cases like this:
725 *
726 * s3 = s1 < s2
727 * discard_if s3
728 * s4 = s1 + s2
729 * store s4
730 *
731 * In this case store depends on discard_if and s4, but since dependencies can
732 * be scheduled in any order it can result in code like this:
733 *
734 * instr1: s3 = s1 < s3
735 * instr2: s4 = s1 + s2
736 * instr3: discard_if s3
737 * instr4: store s4
738 */
739 list_for_each_entry(ppir_block, block, &comp->block_list, list) {
740 ppir_node *prev_node = NULL;
741 list_for_each_entry_rev(ppir_node, node, &block->node_list, list) {
742 if (prev_node && ppir_node_is_root(node) && node->op != ppir_op_const) {
743 ppir_node_add_dep(prev_node, node, ppir_dep_sequence);
744 }
745 if (node->op == ppir_op_discard ||
746 node->op == ppir_op_store_color ||
747 node->op == ppir_op_store_temp ||
748 node->op == ppir_op_branch) {
749 prev_node = node;
750 }
751 }
752 }
753 }
754
755 static void ppir_print_shader_db(struct nir_shader *nir, ppir_compiler *comp,
756 struct pipe_debug_callback *debug)
757 {
758 const struct shader_info *info = &nir->info;
759 char *shaderdb;
760 int ret = asprintf(&shaderdb,
761 "%s shader: %d inst, %d loops, %d:%d spills:fills\n",
762 gl_shader_stage_name(info->stage),
763 comp->cur_instr_index,
764 comp->num_loops,
765 comp->num_spills,
766 comp->num_fills);
767 assert(ret >= 0);
768
769 if (lima_debug & LIMA_DEBUG_SHADERDB)
770 fprintf(stderr, "SHADER-DB: %s\n", shaderdb);
771
772 pipe_debug_message(debug, SHADER_INFO, "%s", shaderdb);
773 free(shaderdb);
774 }
775
776 static void ppir_add_write_after_read_deps(ppir_compiler *comp)
777 {
778 list_for_each_entry(ppir_block, block, &comp->block_list, list) {
779 list_for_each_entry(ppir_reg, reg, &comp->reg_list, list) {
780 ppir_node *write = NULL;
781 list_for_each_entry_rev(ppir_node, node, &block->node_list, list) {
782 for (int i = 0; i < ppir_node_get_src_num(node); i++) {
783 ppir_src *src = ppir_node_get_src(node, i);
784 if (src && src->type == ppir_target_register &&
785 src->reg == reg &&
786 write) {
787 ppir_debug("Adding dep %d for write %d\n", node->index, write->index);
788 ppir_node_add_dep(write, node, ppir_dep_write_after_read);
789 }
790 }
791 ppir_dest *dest = ppir_node_get_dest(node);
792 if (dest && dest->type == ppir_target_register &&
793 dest->reg == reg)
794 write = node;
795 }
796 }
797 }
798 }
799
800 bool ppir_compile_nir(struct lima_fs_shader_state *prog, struct nir_shader *nir,
801 struct ra_regs *ra,
802 struct pipe_debug_callback *debug)
803 {
804 nir_function_impl *func = nir_shader_get_entrypoint(nir);
805 ppir_compiler *comp = ppir_compiler_create(prog, func->reg_alloc, func->ssa_alloc);
806 if (!comp)
807 return false;
808
809 comp->ra = ra;
810
811 /* 1st pass: create ppir blocks */
812 nir_foreach_function(function, nir) {
813 if (!function->impl)
814 continue;
815
816 nir_foreach_block(nblock, function->impl) {
817 ppir_block *block = ppir_block_create(comp);
818 if (!block)
819 return false;
820 block->index = nblock->index;
821 _mesa_hash_table_u64_insert(comp->blocks, (uint64_t)nblock, block);
822 }
823 }
824
825 /* 2nd pass: populate successors */
826 nir_foreach_function(function, nir) {
827 if (!function->impl)
828 continue;
829
830 nir_foreach_block(nblock, function->impl) {
831 ppir_block *block = ppir_get_block(comp, nblock);
832 assert(block);
833
834 for (int i = 0; i < 2; i++) {
835 if (nblock->successors[i])
836 block->successors[i] = ppir_get_block(comp, nblock->successors[i]);
837 }
838 }
839 }
840
841 /* Validate outputs, we support only gl_FragColor */
842 nir_foreach_variable(var, &nir->outputs) {
843 switch (var->data.location) {
844 case FRAG_RESULT_COLOR:
845 case FRAG_RESULT_DATA0:
846 break;
847 default:
848 ppir_error("unsupported output type\n");
849 goto err_out0;
850 break;
851 }
852 }
853
854 foreach_list_typed(nir_register, reg, node, &func->registers) {
855 ppir_reg *r = rzalloc(comp, ppir_reg);
856 if (!r)
857 return false;
858
859 r->index = reg->index;
860 r->num_components = reg->num_components;
861 r->is_head = false;
862 list_addtail(&r->list, &comp->reg_list);
863 }
864
865 if (!ppir_emit_cf_list(comp, &func->body))
866 goto err_out0;
867
868 /* If we have discard block add it to the very end */
869 if (comp->discard_block)
870 list_addtail(&comp->discard_block->list, &comp->block_list);
871
872 ppir_node_print_prog(comp);
873
874 if (!ppir_lower_prog(comp))
875 goto err_out0;
876
877 ppir_add_ordering_deps(comp);
878 ppir_add_write_after_read_deps(comp);
879
880 ppir_node_print_prog(comp);
881
882 if (!ppir_node_to_instr(comp))
883 goto err_out0;
884
885 if (!ppir_schedule_prog(comp))
886 goto err_out0;
887
888 if (!ppir_regalloc_prog(comp))
889 goto err_out0;
890
891 if (!ppir_codegen_prog(comp))
892 goto err_out0;
893
894 ppir_print_shader_db(nir, comp, debug);
895
896 _mesa_hash_table_u64_destroy(comp->blocks, NULL);
897 ralloc_free(comp);
898 return true;
899
900 err_out0:
901 _mesa_hash_table_u64_destroy(comp->blocks, NULL);
902 ralloc_free(comp);
903 return false;
904 }
905