lima: add cubemap support
[mesa.git] / src / gallium / drivers / lima / ir / pp / nir.c
1 /*
2 * Copyright (c) 2017 Lima Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
13 * of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 */
24
25 #include <string.h>
26
27 #include "util/hash_table.h"
28 #include "util/ralloc.h"
29 #include "util/bitscan.h"
30 #include "compiler/nir/nir.h"
31 #include "pipe/p_state.h"
32
33
34 #include "ppir.h"
35
36 static void *ppir_node_create_ssa(ppir_block *block, ppir_op op, nir_ssa_def *ssa)
37 {
38 ppir_node *node = ppir_node_create(block, op, ssa->index, 0);
39 if (!node)
40 return NULL;
41
42 ppir_dest *dest = ppir_node_get_dest(node);
43 dest->type = ppir_target_ssa;
44 dest->ssa.num_components = ssa->num_components;
45 dest->ssa.live_in = INT_MAX;
46 dest->ssa.live_out = 0;
47 dest->write_mask = u_bit_consecutive(0, ssa->num_components);
48
49 if (node->type == ppir_node_type_load ||
50 node->type == ppir_node_type_store)
51 dest->ssa.is_head = true;
52
53 return node;
54 }
55
56 static void *ppir_node_create_reg(ppir_block *block, ppir_op op,
57 nir_register *reg, unsigned mask)
58 {
59 ppir_node *node = ppir_node_create(block, op, reg->index, mask);
60 if (!node)
61 return NULL;
62
63 ppir_dest *dest = ppir_node_get_dest(node);
64
65 list_for_each_entry(ppir_reg, r, &block->comp->reg_list, list) {
66 if (r->index == reg->index) {
67 dest->reg = r;
68 break;
69 }
70 }
71
72 dest->type = ppir_target_register;
73 dest->write_mask = mask;
74
75 if (node->type == ppir_node_type_load ||
76 node->type == ppir_node_type_store)
77 dest->reg->is_head = true;
78
79 return node;
80 }
81
82 static void *ppir_node_create_dest(ppir_block *block, ppir_op op,
83 nir_dest *dest, unsigned mask)
84 {
85 unsigned index = -1;
86
87 if (dest) {
88 if (dest->is_ssa)
89 return ppir_node_create_ssa(block, op, &dest->ssa);
90 else
91 return ppir_node_create_reg(block, op, dest->reg.reg, mask);
92 }
93
94 return ppir_node_create(block, op, index, 0);
95 }
96
97 static void ppir_node_add_src(ppir_compiler *comp, ppir_node *node,
98 ppir_src *ps, nir_src *ns, unsigned mask)
99 {
100 ppir_node *child = NULL;
101
102 if (ns->is_ssa) {
103 child = comp->var_nodes[ns->ssa->index];
104 /* Clone consts for each successor */
105 switch (child->op) {
106 case ppir_op_const:
107 child = ppir_node_clone(node->block, child);
108 break;
109 case ppir_op_load_varying:
110 if ((node->op != ppir_op_load_texture)) {
111 /* Clone varying loads for each block */
112 if (child->block != node->block) {
113 child = ppir_node_clone(node->block, child);
114 comp->var_nodes[ns->ssa->index] = child;
115 }
116 break;
117 }
118 /* At least one successor is load_texture, promote it to load_coords
119 * to ensure that is has exactly one successor */
120 child->op = ppir_op_load_coords;
121 /* Fallthrough */
122 case ppir_op_load_uniform:
123 case ppir_op_load_coords:
124 case ppir_op_load_coords_reg:
125 /* Clone uniform and texture coord loads for each block.
126 * Also ensure that each load has a single successor.
127 * Let's do a fetch each time and hope for a cache hit instead
128 * of increasing reg pressure.
129 */
130 if (child->block != node->block || !ppir_node_is_root(child)) {
131 child = ppir_node_clone(node->block, child);
132 comp->var_nodes[ns->ssa->index] = child;
133 }
134 break;
135 default:
136 break;
137 }
138
139 if (child->op != ppir_op_undef)
140 ppir_node_add_dep(node, child, ppir_dep_src);
141 }
142 else {
143 nir_register *reg = ns->reg.reg;
144 while (mask) {
145 int swizzle = ps->swizzle[u_bit_scan(&mask)];
146 child = comp->var_nodes[(reg->index << 2) + comp->reg_base + swizzle];
147 /* Reg is read before it was written, create a dummy node for it */
148 if (!child) {
149 child = ppir_node_create_reg(node->block, ppir_op_undef, reg,
150 u_bit_consecutive(0, 4));
151 comp->var_nodes[(reg->index << 2) + comp->reg_base + swizzle] = child;
152 }
153 /* Don't add dummies or recursive deps for ops like r1 = r1 + ssa1 */
154 if (child && node != child && child->op != ppir_op_undef)
155 ppir_node_add_dep(node, child, ppir_dep_src);
156 }
157 }
158
159 ppir_node_target_assign(ps, child);
160 }
161
162 static int nir_to_ppir_opcodes[nir_num_opcodes] = {
163 /* not supported */
164 [0 ... nir_last_opcode] = -1,
165
166 [nir_op_mov] = ppir_op_mov,
167 [nir_op_fmul] = ppir_op_mul,
168 [nir_op_fabs] = ppir_op_abs,
169 [nir_op_fneg] = ppir_op_neg,
170 [nir_op_fadd] = ppir_op_add,
171 [nir_op_fsum3] = ppir_op_sum3,
172 [nir_op_fsum4] = ppir_op_sum4,
173 [nir_op_frsq] = ppir_op_rsqrt,
174 [nir_op_flog2] = ppir_op_log2,
175 [nir_op_fexp2] = ppir_op_exp2,
176 [nir_op_fsqrt] = ppir_op_sqrt,
177 [nir_op_fsin] = ppir_op_sin,
178 [nir_op_fcos] = ppir_op_cos,
179 [nir_op_fmax] = ppir_op_max,
180 [nir_op_fmin] = ppir_op_min,
181 [nir_op_frcp] = ppir_op_rcp,
182 [nir_op_ffloor] = ppir_op_floor,
183 [nir_op_fceil] = ppir_op_ceil,
184 [nir_op_ffract] = ppir_op_fract,
185 [nir_op_sge] = ppir_op_ge,
186 [nir_op_slt] = ppir_op_lt,
187 [nir_op_seq] = ppir_op_eq,
188 [nir_op_sne] = ppir_op_ne,
189 [nir_op_fcsel] = ppir_op_select,
190 [nir_op_inot] = ppir_op_not,
191 [nir_op_ftrunc] = ppir_op_trunc,
192 [nir_op_fsat] = ppir_op_sat,
193 [nir_op_fddx] = ppir_op_ddx,
194 [nir_op_fddy] = ppir_op_ddy,
195 };
196
197 static ppir_node *ppir_emit_alu(ppir_block *block, nir_instr *ni)
198 {
199 nir_alu_instr *instr = nir_instr_as_alu(ni);
200 int op = nir_to_ppir_opcodes[instr->op];
201
202 if (op < 0) {
203 ppir_error("unsupported nir_op: %s\n", nir_op_infos[instr->op].name);
204 return NULL;
205 }
206
207 ppir_alu_node *node = ppir_node_create_dest(block, op, &instr->dest.dest,
208 instr->dest.write_mask);
209 if (!node)
210 return NULL;
211
212 ppir_dest *pd = &node->dest;
213 nir_alu_dest *nd = &instr->dest;
214 if (nd->saturate)
215 pd->modifier = ppir_outmod_clamp_fraction;
216
217 unsigned src_mask;
218 switch (op) {
219 case ppir_op_sum3:
220 src_mask = 0b0111;
221 break;
222 case ppir_op_sum4:
223 src_mask = 0b1111;
224 break;
225 default:
226 src_mask = pd->write_mask;
227 break;
228 }
229
230 unsigned num_child = nir_op_infos[instr->op].num_inputs;
231 node->num_src = num_child;
232
233 for (int i = 0; i < num_child; i++) {
234 nir_alu_src *ns = instr->src + i;
235 ppir_src *ps = node->src + i;
236 memcpy(ps->swizzle, ns->swizzle, sizeof(ps->swizzle));
237 ppir_node_add_src(block->comp, &node->node, ps, &ns->src, src_mask);
238
239 ps->absolute = ns->abs;
240 ps->negate = ns->negate;
241 }
242
243 return &node->node;
244 }
245
246 static ppir_block *ppir_block_create(ppir_compiler *comp);
247
248 static bool ppir_emit_discard_block(ppir_compiler *comp)
249 {
250 ppir_block *block = ppir_block_create(comp);
251 ppir_discard_node *discard;
252 if (!block)
253 return false;
254
255 comp->discard_block = block;
256 block->comp = comp;
257
258 discard = ppir_node_create(block, ppir_op_discard, -1, 0);
259 if (discard)
260 list_addtail(&discard->node.list, &block->node_list);
261 else
262 return false;
263
264 return true;
265 }
266
267 static ppir_node *ppir_emit_discard_if(ppir_block *block, nir_instr *ni)
268 {
269 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(ni);
270 ppir_node *node;
271 ppir_compiler *comp = block->comp;
272 ppir_branch_node *branch;
273
274 if (!comp->discard_block && !ppir_emit_discard_block(comp))
275 return NULL;
276
277 node = ppir_node_create(block, ppir_op_branch, -1, 0);
278 if (!node)
279 return NULL;
280 branch = ppir_node_to_branch(node);
281
282 /* second src and condition will be updated during lowering */
283 ppir_node_add_src(block->comp, node, &branch->src[0],
284 &instr->src[0], u_bit_consecutive(0, instr->num_components));
285 branch->num_src = 1;
286 branch->target = comp->discard_block;
287
288 return node;
289 }
290
291 static ppir_node *ppir_emit_discard(ppir_block *block, nir_instr *ni)
292 {
293 ppir_node *node = ppir_node_create(block, ppir_op_discard, -1, 0);
294
295 return node;
296 }
297
298 static ppir_node *ppir_emit_intrinsic(ppir_block *block, nir_instr *ni)
299 {
300 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(ni);
301 unsigned mask = 0;
302 ppir_load_node *lnode;
303 ppir_alu_node *alu_node;
304
305 switch (instr->intrinsic) {
306 case nir_intrinsic_load_input:
307 if (!instr->dest.is_ssa)
308 mask = u_bit_consecutive(0, instr->num_components);
309
310 lnode = ppir_node_create_dest(block, ppir_op_load_varying, &instr->dest, mask);
311 if (!lnode)
312 return NULL;
313
314 lnode->num_components = instr->num_components;
315 lnode->index = nir_intrinsic_base(instr) * 4 + nir_intrinsic_component(instr);
316 if (nir_src_is_const(instr->src[0]))
317 lnode->index += (uint32_t)(nir_src_as_float(instr->src[0]) * 4);
318 else {
319 lnode->num_src = 1;
320 ppir_node_add_src(block->comp, &lnode->node, &lnode->src, instr->src, 1);
321 }
322 return &lnode->node;
323
324 case nir_intrinsic_load_frag_coord:
325 case nir_intrinsic_load_point_coord:
326 case nir_intrinsic_load_front_face:
327 if (!instr->dest.is_ssa)
328 mask = u_bit_consecutive(0, instr->num_components);
329
330 ppir_op op;
331 switch (instr->intrinsic) {
332 case nir_intrinsic_load_frag_coord:
333 op = ppir_op_load_fragcoord;
334 break;
335 case nir_intrinsic_load_point_coord:
336 op = ppir_op_load_pointcoord;
337 break;
338 case nir_intrinsic_load_front_face:
339 op = ppir_op_load_frontface;
340 break;
341 default:
342 assert(0);
343 break;
344 }
345
346 lnode = ppir_node_create_dest(block, op, &instr->dest, mask);
347 if (!lnode)
348 return NULL;
349
350 lnode->num_components = instr->num_components;
351 return &lnode->node;
352
353 case nir_intrinsic_load_uniform:
354 if (!instr->dest.is_ssa)
355 mask = u_bit_consecutive(0, instr->num_components);
356
357 lnode = ppir_node_create_dest(block, ppir_op_load_uniform, &instr->dest, mask);
358 if (!lnode)
359 return NULL;
360
361 lnode->num_components = instr->num_components;
362 lnode->index = nir_intrinsic_base(instr);
363 if (nir_src_is_const(instr->src[0]))
364 lnode->index += (uint32_t)nir_src_as_float(instr->src[0]);
365 else {
366 lnode->num_src = 1;
367 ppir_node_add_src(block->comp, &lnode->node, &lnode->src, instr->src, 1);
368 }
369
370 return &lnode->node;
371
372 case nir_intrinsic_store_output: {
373 alu_node = ppir_node_create_dest(block, ppir_op_store_color, NULL, 0);
374 if (!alu_node)
375 return NULL;
376
377 ppir_dest *dest = ppir_node_get_dest(&alu_node->node);
378 dest->type = ppir_target_ssa;
379 dest->ssa.num_components = instr->num_components;
380 dest->ssa.live_in = INT_MAX;
381 dest->ssa.live_out = 0;
382 dest->ssa.index = 0;
383 dest->write_mask = u_bit_consecutive(0, instr->num_components);
384
385 alu_node->num_src = 1;
386
387 for (int i = 0; i < instr->num_components; i++)
388 alu_node->src[0].swizzle[i] = i;
389
390 ppir_node_add_src(block->comp, &alu_node->node, alu_node->src, instr->src,
391 u_bit_consecutive(0, instr->num_components));
392
393 return &alu_node->node;
394 }
395
396 case nir_intrinsic_discard:
397 return ppir_emit_discard(block, ni);
398
399 case nir_intrinsic_discard_if:
400 return ppir_emit_discard_if(block, ni);
401
402 default:
403 ppir_error("unsupported nir_intrinsic_instr %s\n",
404 nir_intrinsic_infos[instr->intrinsic].name);
405 return NULL;
406 }
407 }
408
409 static ppir_node *ppir_emit_load_const(ppir_block *block, nir_instr *ni)
410 {
411 nir_load_const_instr *instr = nir_instr_as_load_const(ni);
412 ppir_const_node *node = ppir_node_create_ssa(block, ppir_op_const, &instr->def);
413 if (!node)
414 return NULL;
415
416 assert(instr->def.bit_size == 32);
417
418 for (int i = 0; i < instr->def.num_components; i++)
419 node->constant.value[i].i = instr->value[i].i32;
420 node->constant.num = instr->def.num_components;
421
422 return &node->node;
423 }
424
425 static ppir_node *ppir_emit_ssa_undef(ppir_block *block, nir_instr *ni)
426 {
427 nir_ssa_undef_instr *undef = nir_instr_as_ssa_undef(ni);
428 ppir_node *node = ppir_node_create_ssa(block, ppir_op_undef, &undef->def);
429 if (!node)
430 return NULL;
431 ppir_alu_node *alu = ppir_node_to_alu(node);
432
433 ppir_dest *dest = &alu->dest;
434 dest->ssa.undef = true;
435 ppir_reg *ssa = &dest->ssa;
436
437 list_add(&ssa->list, &block->comp->reg_list);
438
439 return node;
440 }
441
442 static ppir_node *ppir_emit_tex(ppir_block *block, nir_instr *ni)
443 {
444 nir_tex_instr *instr = nir_instr_as_tex(ni);
445 ppir_load_texture_node *node;
446
447 if (instr->op != nir_texop_tex) {
448 ppir_error("unsupported texop %d\n", instr->op);
449 return NULL;
450 }
451
452 unsigned mask = 0;
453 if (!instr->dest.is_ssa)
454 mask = u_bit_consecutive(0, nir_tex_instr_dest_size(instr));
455
456 node = ppir_node_create_dest(block, ppir_op_load_texture, &instr->dest, mask);
457 if (!node)
458 return NULL;
459
460 node->sampler = instr->texture_index;
461
462 switch (instr->sampler_dim) {
463 case GLSL_SAMPLER_DIM_2D:
464 case GLSL_SAMPLER_DIM_CUBE:
465 case GLSL_SAMPLER_DIM_RECT:
466 case GLSL_SAMPLER_DIM_EXTERNAL:
467 break;
468 default:
469 ppir_error("unsupported sampler dim: %d\n", instr->sampler_dim);
470 return NULL;
471 }
472
473 node->sampler_dim = instr->sampler_dim;
474
475 for (int i = 0; i < instr->coord_components; i++)
476 node->src_coords.swizzle[i] = i;
477
478 for (int i = 0; i < instr->num_srcs; i++) {
479 switch (instr->src[i].src_type) {
480 case nir_tex_src_coord:
481 ppir_node_add_src(block->comp, &node->node, &node->src_coords, &instr->src[i].src,
482 u_bit_consecutive(0, instr->coord_components));
483 break;
484 default:
485 ppir_error("unsupported texture source type\n");
486 assert(0);
487 return NULL;
488 }
489 }
490
491 return &node->node;
492 }
493
494 static ppir_block *ppir_get_block(ppir_compiler *comp, nir_block *nblock)
495 {
496 ppir_block *block = _mesa_hash_table_u64_search(comp->blocks, (uint64_t)nblock);
497
498 return block;
499 }
500
501 static ppir_node *ppir_emit_jump(ppir_block *block, nir_instr *ni)
502 {
503 ppir_node *node;
504 ppir_compiler *comp = block->comp;
505 ppir_branch_node *branch;
506 ppir_block *jump_block;
507 nir_jump_instr *jump = nir_instr_as_jump(ni);
508
509 switch (jump->type) {
510 case nir_jump_break: {
511 assert(comp->current_block->successors[0]);
512 assert(!comp->current_block->successors[1]);
513 jump_block = comp->current_block->successors[0];
514 }
515 break;
516 case nir_jump_continue:
517 jump_block = comp->loop_cont_block;
518 break;
519 default:
520 ppir_error("nir_jump_instr not support\n");
521 return NULL;
522 }
523
524 assert(jump_block != NULL);
525
526 node = ppir_node_create(block, ppir_op_branch, -1, 0);
527 if (!node)
528 return NULL;
529 branch = ppir_node_to_branch(node);
530
531 /* Unconditional */
532 branch->num_src = 0;
533 branch->target = jump_block;
534
535 return node;
536 }
537
538 static ppir_node *(*ppir_emit_instr[nir_instr_type_phi])(ppir_block *, nir_instr *) = {
539 [nir_instr_type_alu] = ppir_emit_alu,
540 [nir_instr_type_intrinsic] = ppir_emit_intrinsic,
541 [nir_instr_type_load_const] = ppir_emit_load_const,
542 [nir_instr_type_ssa_undef] = ppir_emit_ssa_undef,
543 [nir_instr_type_tex] = ppir_emit_tex,
544 [nir_instr_type_jump] = ppir_emit_jump,
545 };
546
547 static ppir_block *ppir_block_create(ppir_compiler *comp)
548 {
549 ppir_block *block = rzalloc(comp, ppir_block);
550 if (!block)
551 return NULL;
552
553 list_inithead(&block->node_list);
554 list_inithead(&block->instr_list);
555
556 block->comp = comp;
557
558 return block;
559 }
560
561 static bool ppir_emit_block(ppir_compiler *comp, nir_block *nblock)
562 {
563 ppir_block *block = ppir_get_block(comp, nblock);
564
565 comp->current_block = block;
566
567 list_addtail(&block->list, &comp->block_list);
568
569 nir_foreach_instr(instr, nblock) {
570 assert(instr->type < nir_instr_type_phi);
571 ppir_node *node = ppir_emit_instr[instr->type](block, instr);
572 if (!node)
573 return false;
574
575 list_addtail(&node->list, &block->node_list);
576 }
577
578 return true;
579 }
580
581 static bool ppir_emit_cf_list(ppir_compiler *comp, struct exec_list *list);
582
583 static bool ppir_emit_if(ppir_compiler *comp, nir_if *if_stmt)
584 {
585 ppir_node *node;
586 ppir_branch_node *else_branch, *after_branch;
587 nir_block *nir_else_block = nir_if_first_else_block(if_stmt);
588 bool empty_else_block =
589 (nir_else_block == nir_if_last_else_block(if_stmt) &&
590 exec_list_is_empty(&nir_else_block->instr_list));
591 ppir_block *block = comp->current_block;
592
593 node = ppir_node_create(block, ppir_op_branch, -1, 0);
594 if (!node)
595 return false;
596 else_branch = ppir_node_to_branch(node);
597 ppir_node_add_src(block->comp, node, &else_branch->src[0],
598 &if_stmt->condition, 1);
599 else_branch->num_src = 1;
600 /* Negate condition to minimize branching. We're generating following:
601 * current_block: { ...; if (!statement) branch else_block; }
602 * then_block: { ...; branch after_block; }
603 * else_block: { ... }
604 * after_block: { ... }
605 *
606 * or if else list is empty:
607 * block: { if (!statement) branch else_block; }
608 * then_block: { ... }
609 * else_block: after_block: { ... }
610 */
611 else_branch->negate = true;
612 list_addtail(&else_branch->node.list, &block->node_list);
613
614 ppir_emit_cf_list(comp, &if_stmt->then_list);
615 if (empty_else_block) {
616 nir_block *nblock = nir_if_last_else_block(if_stmt);
617 assert(nblock->successors[0]);
618 assert(!nblock->successors[1]);
619 else_branch->target = ppir_get_block(comp, nblock->successors[0]);
620 /* Add empty else block to the list */
621 list_addtail(&block->successors[1]->list, &comp->block_list);
622 return true;
623 }
624
625 else_branch->target = ppir_get_block(comp, nir_if_first_else_block(if_stmt));
626
627 nir_block *last_then_block = nir_if_last_then_block(if_stmt);
628 assert(last_then_block->successors[0]);
629 assert(!last_then_block->successors[1]);
630 block = ppir_get_block(comp, last_then_block);
631 node = ppir_node_create(block, ppir_op_branch, -1, 0);
632 if (!node)
633 return false;
634 after_branch = ppir_node_to_branch(node);
635 /* Unconditional */
636 after_branch->num_src = 0;
637 after_branch->target = ppir_get_block(comp, last_then_block->successors[0]);
638 /* Target should be after_block, will fixup later */
639 list_addtail(&after_branch->node.list, &block->node_list);
640
641 ppir_emit_cf_list(comp, &if_stmt->else_list);
642
643 return true;
644 }
645
646 static bool ppir_emit_loop(ppir_compiler *comp, nir_loop *nloop)
647 {
648 ppir_block *save_loop_cont_block = comp->loop_cont_block;
649 ppir_block *block;
650 ppir_branch_node *loop_branch;
651 nir_block *loop_last_block;
652 ppir_node *node;
653
654 comp->loop_cont_block = ppir_get_block(comp, nir_loop_first_block(nloop));
655
656 ppir_emit_cf_list(comp, &nloop->body);
657
658 loop_last_block = nir_loop_last_block(nloop);
659 block = ppir_get_block(comp, loop_last_block);
660 node = ppir_node_create(block, ppir_op_branch, -1, 0);
661 if (!node)
662 return false;
663 loop_branch = ppir_node_to_branch(node);
664 /* Unconditional */
665 loop_branch->num_src = 0;
666 loop_branch->target = comp->loop_cont_block;
667 list_addtail(&loop_branch->node.list, &block->node_list);
668
669 comp->loop_cont_block = save_loop_cont_block;
670
671 comp->num_loops++;
672
673 return true;
674 }
675
676 static bool ppir_emit_function(ppir_compiler *comp, nir_function_impl *nfunc)
677 {
678 ppir_error("function nir_cf_node not support\n");
679 return false;
680 }
681
682 static bool ppir_emit_cf_list(ppir_compiler *comp, struct exec_list *list)
683 {
684 foreach_list_typed(nir_cf_node, node, node, list) {
685 bool ret;
686
687 switch (node->type) {
688 case nir_cf_node_block:
689 ret = ppir_emit_block(comp, nir_cf_node_as_block(node));
690 break;
691 case nir_cf_node_if:
692 ret = ppir_emit_if(comp, nir_cf_node_as_if(node));
693 break;
694 case nir_cf_node_loop:
695 ret = ppir_emit_loop(comp, nir_cf_node_as_loop(node));
696 break;
697 case nir_cf_node_function:
698 ret = ppir_emit_function(comp, nir_cf_node_as_function(node));
699 break;
700 default:
701 ppir_error("unknown NIR node type %d\n", node->type);
702 return false;
703 }
704
705 if (!ret)
706 return false;
707 }
708
709 return true;
710 }
711
712 static ppir_compiler *ppir_compiler_create(void *prog, unsigned num_reg, unsigned num_ssa)
713 {
714 ppir_compiler *comp = rzalloc_size(
715 prog, sizeof(*comp) + ((num_reg << 2) + num_ssa) * sizeof(ppir_node *));
716 if (!comp)
717 return NULL;
718
719 list_inithead(&comp->block_list);
720 list_inithead(&comp->reg_list);
721 comp->blocks = _mesa_hash_table_u64_create(prog);
722
723 comp->var_nodes = (ppir_node **)(comp + 1);
724 comp->reg_base = num_ssa;
725 comp->prog = prog;
726 return comp;
727 }
728
729 static void ppir_add_ordering_deps(ppir_compiler *comp)
730 {
731 /* Some intrinsics do not have explicit dependencies and thus depend
732 * on instructions order. Consider discard_if and store_ouput as
733 * example. If we don't add fake dependency of discard_if to store_output
734 * scheduler may put store_output first and since store_output terminates
735 * shader on Utgard PP, rest of it will never be executed.
736 * Add fake dependencies for discard/branch/store to preserve
737 * instruction order.
738 *
739 * TODO: scheduler should schedule discard_if as early as possible otherwise
740 * we may end up with suboptimal code for cases like this:
741 *
742 * s3 = s1 < s2
743 * discard_if s3
744 * s4 = s1 + s2
745 * store s4
746 *
747 * In this case store depends on discard_if and s4, but since dependencies can
748 * be scheduled in any order it can result in code like this:
749 *
750 * instr1: s3 = s1 < s3
751 * instr2: s4 = s1 + s2
752 * instr3: discard_if s3
753 * instr4: store s4
754 */
755 list_for_each_entry(ppir_block, block, &comp->block_list, list) {
756 ppir_node *prev_node = NULL;
757 list_for_each_entry_rev(ppir_node, node, &block->node_list, list) {
758 if (prev_node && ppir_node_is_root(node) && node->op != ppir_op_const) {
759 ppir_node_add_dep(prev_node, node, ppir_dep_sequence);
760 }
761 if (node->op == ppir_op_discard ||
762 node->op == ppir_op_store_color ||
763 node->op == ppir_op_store_temp ||
764 node->op == ppir_op_branch) {
765 prev_node = node;
766 }
767 }
768 }
769 }
770
771 static void ppir_print_shader_db(struct nir_shader *nir, ppir_compiler *comp,
772 struct pipe_debug_callback *debug)
773 {
774 const struct shader_info *info = &nir->info;
775 char *shaderdb;
776 int ret = asprintf(&shaderdb,
777 "%s shader: %d inst, %d loops, %d:%d spills:fills\n",
778 gl_shader_stage_name(info->stage),
779 comp->cur_instr_index,
780 comp->num_loops,
781 comp->num_spills,
782 comp->num_fills);
783 assert(ret >= 0);
784
785 if (lima_debug & LIMA_DEBUG_SHADERDB)
786 fprintf(stderr, "SHADER-DB: %s\n", shaderdb);
787
788 pipe_debug_message(debug, SHADER_INFO, "%s", shaderdb);
789 free(shaderdb);
790 }
791
792 static void ppir_add_write_after_read_deps(ppir_compiler *comp)
793 {
794 list_for_each_entry(ppir_block, block, &comp->block_list, list) {
795 list_for_each_entry(ppir_reg, reg, &comp->reg_list, list) {
796 ppir_node *write = NULL;
797 list_for_each_entry_rev(ppir_node, node, &block->node_list, list) {
798 for (int i = 0; i < ppir_node_get_src_num(node); i++) {
799 ppir_src *src = ppir_node_get_src(node, i);
800 if (src && src->type == ppir_target_register &&
801 src->reg == reg &&
802 write) {
803 ppir_debug("Adding dep %d for write %d\n", node->index, write->index);
804 ppir_node_add_dep(write, node, ppir_dep_write_after_read);
805 }
806 }
807 ppir_dest *dest = ppir_node_get_dest(node);
808 if (dest && dest->type == ppir_target_register &&
809 dest->reg == reg)
810 write = node;
811 }
812 }
813 }
814 }
815
816 bool ppir_compile_nir(struct lima_fs_shader_state *prog, struct nir_shader *nir,
817 struct ra_regs *ra,
818 struct pipe_debug_callback *debug)
819 {
820 nir_function_impl *func = nir_shader_get_entrypoint(nir);
821 ppir_compiler *comp = ppir_compiler_create(prog, func->reg_alloc, func->ssa_alloc);
822 if (!comp)
823 return false;
824
825 comp->ra = ra;
826
827 /* 1st pass: create ppir blocks */
828 nir_foreach_function(function, nir) {
829 if (!function->impl)
830 continue;
831
832 nir_foreach_block(nblock, function->impl) {
833 ppir_block *block = ppir_block_create(comp);
834 if (!block)
835 return false;
836 block->index = nblock->index;
837 _mesa_hash_table_u64_insert(comp->blocks, (uint64_t)nblock, block);
838 }
839 }
840
841 /* 2nd pass: populate successors */
842 nir_foreach_function(function, nir) {
843 if (!function->impl)
844 continue;
845
846 nir_foreach_block(nblock, function->impl) {
847 ppir_block *block = ppir_get_block(comp, nblock);
848 assert(block);
849
850 for (int i = 0; i < 2; i++) {
851 if (nblock->successors[i])
852 block->successors[i] = ppir_get_block(comp, nblock->successors[i]);
853 }
854 }
855 }
856
857 /* Validate outputs, we support only gl_FragColor */
858 nir_foreach_variable(var, &nir->outputs) {
859 switch (var->data.location) {
860 case FRAG_RESULT_COLOR:
861 case FRAG_RESULT_DATA0:
862 break;
863 default:
864 ppir_error("unsupported output type\n");
865 goto err_out0;
866 break;
867 }
868 }
869
870 foreach_list_typed(nir_register, reg, node, &func->registers) {
871 ppir_reg *r = rzalloc(comp, ppir_reg);
872 if (!r)
873 return false;
874
875 r->index = reg->index;
876 r->num_components = reg->num_components;
877 r->live_in = INT_MAX;
878 r->live_out = 0;
879 r->is_head = false;
880 list_addtail(&r->list, &comp->reg_list);
881 }
882
883 if (!ppir_emit_cf_list(comp, &func->body))
884 goto err_out0;
885
886 /* If we have discard block add it to the very end */
887 if (comp->discard_block)
888 list_addtail(&comp->discard_block->list, &comp->block_list);
889
890 ppir_node_print_prog(comp);
891
892 if (!ppir_lower_prog(comp))
893 goto err_out0;
894
895 ppir_add_ordering_deps(comp);
896 ppir_add_write_after_read_deps(comp);
897
898 ppir_node_print_prog(comp);
899
900 if (!ppir_node_to_instr(comp))
901 goto err_out0;
902
903 if (!ppir_schedule_prog(comp))
904 goto err_out0;
905
906 if (!ppir_regalloc_prog(comp))
907 goto err_out0;
908
909 if (!ppir_codegen_prog(comp))
910 goto err_out0;
911
912 ppir_print_shader_db(nir, comp, debug);
913
914 _mesa_hash_table_u64_destroy(comp->blocks, NULL);
915 ralloc_free(comp);
916 return true;
917
918 err_out0:
919 _mesa_hash_table_u64_destroy(comp->blocks, NULL);
920 ralloc_free(comp);
921 return false;
922 }
923