lima/ppir: add lod-bias support
[mesa.git] / src / gallium / drivers / lima / ir / pp / nir.c
1 /*
2 * Copyright (c) 2017 Lima Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
13 * of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 */
24
25 #include <string.h>
26
27 #include "util/hash_table.h"
28 #include "util/ralloc.h"
29 #include "util/bitscan.h"
30 #include "compiler/nir/nir.h"
31 #include "pipe/p_state.h"
32
33
34 #include "ppir.h"
35
36 static void *ppir_node_create_ssa(ppir_block *block, ppir_op op, nir_ssa_def *ssa)
37 {
38 ppir_node *node = ppir_node_create(block, op, ssa->index, 0);
39 if (!node)
40 return NULL;
41
42 ppir_dest *dest = ppir_node_get_dest(node);
43 dest->type = ppir_target_ssa;
44 dest->ssa.num_components = ssa->num_components;
45 dest->ssa.live_in = INT_MAX;
46 dest->ssa.live_out = 0;
47 dest->write_mask = u_bit_consecutive(0, ssa->num_components);
48
49 if (node->type == ppir_node_type_load ||
50 node->type == ppir_node_type_store)
51 dest->ssa.is_head = true;
52
53 return node;
54 }
55
56 static void *ppir_node_create_reg(ppir_block *block, ppir_op op,
57 nir_register *reg, unsigned mask)
58 {
59 ppir_node *node = ppir_node_create(block, op, reg->index, mask);
60 if (!node)
61 return NULL;
62
63 ppir_dest *dest = ppir_node_get_dest(node);
64
65 list_for_each_entry(ppir_reg, r, &block->comp->reg_list, list) {
66 if (r->index == reg->index) {
67 dest->reg = r;
68 break;
69 }
70 }
71
72 dest->type = ppir_target_register;
73 dest->write_mask = mask;
74
75 if (node->type == ppir_node_type_load ||
76 node->type == ppir_node_type_store)
77 dest->reg->is_head = true;
78
79 return node;
80 }
81
82 static void *ppir_node_create_dest(ppir_block *block, ppir_op op,
83 nir_dest *dest, unsigned mask)
84 {
85 unsigned index = -1;
86
87 if (dest) {
88 if (dest->is_ssa)
89 return ppir_node_create_ssa(block, op, &dest->ssa);
90 else
91 return ppir_node_create_reg(block, op, dest->reg.reg, mask);
92 }
93
94 return ppir_node_create(block, op, index, 0);
95 }
96
97 static void ppir_node_add_src(ppir_compiler *comp, ppir_node *node,
98 ppir_src *ps, nir_src *ns, unsigned mask)
99 {
100 ppir_node *child = NULL;
101
102 if (ns->is_ssa) {
103 child = comp->var_nodes[ns->ssa->index];
104 /* Clone consts for each successor */
105 switch (child->op) {
106 case ppir_op_const:
107 child = ppir_node_clone(node->block, child);
108 break;
109 case ppir_op_load_varying: {
110 bool is_load_coords = false;
111 if (node->op == ppir_op_load_texture) {
112 nir_tex_src *nts = (nir_tex_src *)ns;
113 if (nts->src_type == nir_tex_src_coord)
114 is_load_coords = true;
115 }
116
117 if (!is_load_coords) {
118 /* Clone varying loads for each block */
119 if (child->block != node->block) {
120 child = ppir_node_clone(node->block, child);
121 comp->var_nodes[ns->ssa->index] = child;
122 }
123 break;
124 }
125 /* At least one successor is load_texture, promote it to load_coords
126 * to ensure that is has exactly one successor */
127 child->op = ppir_op_load_coords;
128 }
129 /* Fallthrough */
130 case ppir_op_load_uniform:
131 case ppir_op_load_coords:
132 case ppir_op_load_coords_reg:
133 /* Clone uniform and texture coord loads for each block.
134 * Also ensure that each load has a single successor.
135 * Let's do a fetch each time and hope for a cache hit instead
136 * of increasing reg pressure.
137 */
138 if (child->block != node->block || !ppir_node_is_root(child)) {
139 child = ppir_node_clone(node->block, child);
140 comp->var_nodes[ns->ssa->index] = child;
141 }
142 break;
143 default:
144 break;
145 }
146
147 if (child->op != ppir_op_undef)
148 ppir_node_add_dep(node, child, ppir_dep_src);
149 }
150 else {
151 nir_register *reg = ns->reg.reg;
152 while (mask) {
153 int swizzle = ps->swizzle[u_bit_scan(&mask)];
154 child = comp->var_nodes[(reg->index << 2) + comp->reg_base + swizzle];
155 /* Reg is read before it was written, create a dummy node for it */
156 if (!child) {
157 child = ppir_node_create_reg(node->block, ppir_op_undef, reg,
158 u_bit_consecutive(0, 4));
159 comp->var_nodes[(reg->index << 2) + comp->reg_base + swizzle] = child;
160 }
161 /* Don't add dummies or recursive deps for ops like r1 = r1 + ssa1 */
162 if (child && node != child && child->op != ppir_op_undef)
163 ppir_node_add_dep(node, child, ppir_dep_src);
164 }
165 }
166
167 ppir_node_target_assign(ps, child);
168 }
169
170 static int nir_to_ppir_opcodes[nir_num_opcodes] = {
171 /* not supported */
172 [0 ... nir_last_opcode] = -1,
173
174 [nir_op_mov] = ppir_op_mov,
175 [nir_op_fmul] = ppir_op_mul,
176 [nir_op_fabs] = ppir_op_abs,
177 [nir_op_fneg] = ppir_op_neg,
178 [nir_op_fadd] = ppir_op_add,
179 [nir_op_fsum3] = ppir_op_sum3,
180 [nir_op_fsum4] = ppir_op_sum4,
181 [nir_op_frsq] = ppir_op_rsqrt,
182 [nir_op_flog2] = ppir_op_log2,
183 [nir_op_fexp2] = ppir_op_exp2,
184 [nir_op_fsqrt] = ppir_op_sqrt,
185 [nir_op_fsin] = ppir_op_sin,
186 [nir_op_fcos] = ppir_op_cos,
187 [nir_op_fmax] = ppir_op_max,
188 [nir_op_fmin] = ppir_op_min,
189 [nir_op_frcp] = ppir_op_rcp,
190 [nir_op_ffloor] = ppir_op_floor,
191 [nir_op_fceil] = ppir_op_ceil,
192 [nir_op_ffract] = ppir_op_fract,
193 [nir_op_sge] = ppir_op_ge,
194 [nir_op_slt] = ppir_op_lt,
195 [nir_op_seq] = ppir_op_eq,
196 [nir_op_sne] = ppir_op_ne,
197 [nir_op_fcsel] = ppir_op_select,
198 [nir_op_inot] = ppir_op_not,
199 [nir_op_ftrunc] = ppir_op_trunc,
200 [nir_op_fsat] = ppir_op_sat,
201 [nir_op_fddx] = ppir_op_ddx,
202 [nir_op_fddy] = ppir_op_ddy,
203 };
204
205 static ppir_node *ppir_emit_alu(ppir_block *block, nir_instr *ni)
206 {
207 nir_alu_instr *instr = nir_instr_as_alu(ni);
208 int op = nir_to_ppir_opcodes[instr->op];
209
210 if (op < 0) {
211 ppir_error("unsupported nir_op: %s\n", nir_op_infos[instr->op].name);
212 return NULL;
213 }
214
215 ppir_alu_node *node = ppir_node_create_dest(block, op, &instr->dest.dest,
216 instr->dest.write_mask);
217 if (!node)
218 return NULL;
219
220 ppir_dest *pd = &node->dest;
221 nir_alu_dest *nd = &instr->dest;
222 if (nd->saturate)
223 pd->modifier = ppir_outmod_clamp_fraction;
224
225 unsigned src_mask;
226 switch (op) {
227 case ppir_op_sum3:
228 src_mask = 0b0111;
229 break;
230 case ppir_op_sum4:
231 src_mask = 0b1111;
232 break;
233 default:
234 src_mask = pd->write_mask;
235 break;
236 }
237
238 unsigned num_child = nir_op_infos[instr->op].num_inputs;
239 node->num_src = num_child;
240
241 for (int i = 0; i < num_child; i++) {
242 nir_alu_src *ns = instr->src + i;
243 ppir_src *ps = node->src + i;
244 memcpy(ps->swizzle, ns->swizzle, sizeof(ps->swizzle));
245 ppir_node_add_src(block->comp, &node->node, ps, &ns->src, src_mask);
246
247 ps->absolute = ns->abs;
248 ps->negate = ns->negate;
249 }
250
251 return &node->node;
252 }
253
254 static ppir_block *ppir_block_create(ppir_compiler *comp);
255
256 static bool ppir_emit_discard_block(ppir_compiler *comp)
257 {
258 ppir_block *block = ppir_block_create(comp);
259 ppir_discard_node *discard;
260 if (!block)
261 return false;
262
263 comp->discard_block = block;
264 block->comp = comp;
265
266 discard = ppir_node_create(block, ppir_op_discard, -1, 0);
267 if (discard)
268 list_addtail(&discard->node.list, &block->node_list);
269 else
270 return false;
271
272 return true;
273 }
274
275 static ppir_node *ppir_emit_discard_if(ppir_block *block, nir_instr *ni)
276 {
277 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(ni);
278 ppir_node *node;
279 ppir_compiler *comp = block->comp;
280 ppir_branch_node *branch;
281
282 if (!comp->discard_block && !ppir_emit_discard_block(comp))
283 return NULL;
284
285 node = ppir_node_create(block, ppir_op_branch, -1, 0);
286 if (!node)
287 return NULL;
288 branch = ppir_node_to_branch(node);
289
290 /* second src and condition will be updated during lowering */
291 ppir_node_add_src(block->comp, node, &branch->src[0],
292 &instr->src[0], u_bit_consecutive(0, instr->num_components));
293 branch->num_src = 1;
294 branch->target = comp->discard_block;
295
296 return node;
297 }
298
299 static ppir_node *ppir_emit_discard(ppir_block *block, nir_instr *ni)
300 {
301 ppir_node *node = ppir_node_create(block, ppir_op_discard, -1, 0);
302
303 return node;
304 }
305
306 static ppir_node *ppir_emit_intrinsic(ppir_block *block, nir_instr *ni)
307 {
308 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(ni);
309 unsigned mask = 0;
310 ppir_load_node *lnode;
311 ppir_alu_node *alu_node;
312
313 switch (instr->intrinsic) {
314 case nir_intrinsic_load_input:
315 if (!instr->dest.is_ssa)
316 mask = u_bit_consecutive(0, instr->num_components);
317
318 lnode = ppir_node_create_dest(block, ppir_op_load_varying, &instr->dest, mask);
319 if (!lnode)
320 return NULL;
321
322 lnode->num_components = instr->num_components;
323 lnode->index = nir_intrinsic_base(instr) * 4 + nir_intrinsic_component(instr);
324 if (nir_src_is_const(instr->src[0]))
325 lnode->index += (uint32_t)(nir_src_as_float(instr->src[0]) * 4);
326 else {
327 lnode->num_src = 1;
328 ppir_node_add_src(block->comp, &lnode->node, &lnode->src, instr->src, 1);
329 }
330 return &lnode->node;
331
332 case nir_intrinsic_load_frag_coord:
333 case nir_intrinsic_load_point_coord:
334 case nir_intrinsic_load_front_face:
335 if (!instr->dest.is_ssa)
336 mask = u_bit_consecutive(0, instr->num_components);
337
338 ppir_op op;
339 switch (instr->intrinsic) {
340 case nir_intrinsic_load_frag_coord:
341 op = ppir_op_load_fragcoord;
342 break;
343 case nir_intrinsic_load_point_coord:
344 op = ppir_op_load_pointcoord;
345 break;
346 case nir_intrinsic_load_front_face:
347 op = ppir_op_load_frontface;
348 break;
349 default:
350 assert(0);
351 break;
352 }
353
354 lnode = ppir_node_create_dest(block, op, &instr->dest, mask);
355 if (!lnode)
356 return NULL;
357
358 lnode->num_components = instr->num_components;
359 return &lnode->node;
360
361 case nir_intrinsic_load_uniform:
362 if (!instr->dest.is_ssa)
363 mask = u_bit_consecutive(0, instr->num_components);
364
365 lnode = ppir_node_create_dest(block, ppir_op_load_uniform, &instr->dest, mask);
366 if (!lnode)
367 return NULL;
368
369 lnode->num_components = instr->num_components;
370 lnode->index = nir_intrinsic_base(instr);
371 if (nir_src_is_const(instr->src[0]))
372 lnode->index += (uint32_t)nir_src_as_float(instr->src[0]);
373 else {
374 lnode->num_src = 1;
375 ppir_node_add_src(block->comp, &lnode->node, &lnode->src, instr->src, 1);
376 }
377
378 return &lnode->node;
379
380 case nir_intrinsic_store_output: {
381 alu_node = ppir_node_create_dest(block, ppir_op_store_color, NULL, 0);
382 if (!alu_node)
383 return NULL;
384
385 ppir_dest *dest = ppir_node_get_dest(&alu_node->node);
386 dest->type = ppir_target_ssa;
387 dest->ssa.num_components = instr->num_components;
388 dest->ssa.live_in = INT_MAX;
389 dest->ssa.live_out = 0;
390 dest->ssa.index = 0;
391 dest->write_mask = u_bit_consecutive(0, instr->num_components);
392
393 alu_node->num_src = 1;
394
395 for (int i = 0; i < instr->num_components; i++)
396 alu_node->src[0].swizzle[i] = i;
397
398 ppir_node_add_src(block->comp, &alu_node->node, alu_node->src, instr->src,
399 u_bit_consecutive(0, instr->num_components));
400
401 return &alu_node->node;
402 }
403
404 case nir_intrinsic_discard:
405 return ppir_emit_discard(block, ni);
406
407 case nir_intrinsic_discard_if:
408 return ppir_emit_discard_if(block, ni);
409
410 default:
411 ppir_error("unsupported nir_intrinsic_instr %s\n",
412 nir_intrinsic_infos[instr->intrinsic].name);
413 return NULL;
414 }
415 }
416
417 static ppir_node *ppir_emit_load_const(ppir_block *block, nir_instr *ni)
418 {
419 nir_load_const_instr *instr = nir_instr_as_load_const(ni);
420 ppir_const_node *node = ppir_node_create_ssa(block, ppir_op_const, &instr->def);
421 if (!node)
422 return NULL;
423
424 assert(instr->def.bit_size == 32);
425
426 for (int i = 0; i < instr->def.num_components; i++)
427 node->constant.value[i].i = instr->value[i].i32;
428 node->constant.num = instr->def.num_components;
429
430 return &node->node;
431 }
432
433 static ppir_node *ppir_emit_ssa_undef(ppir_block *block, nir_instr *ni)
434 {
435 nir_ssa_undef_instr *undef = nir_instr_as_ssa_undef(ni);
436 ppir_node *node = ppir_node_create_ssa(block, ppir_op_undef, &undef->def);
437 if (!node)
438 return NULL;
439 ppir_alu_node *alu = ppir_node_to_alu(node);
440
441 ppir_dest *dest = &alu->dest;
442 dest->ssa.undef = true;
443 ppir_reg *ssa = &dest->ssa;
444
445 list_add(&ssa->list, &block->comp->reg_list);
446
447 return node;
448 }
449
450 static ppir_node *ppir_emit_tex(ppir_block *block, nir_instr *ni)
451 {
452 nir_tex_instr *instr = nir_instr_as_tex(ni);
453 ppir_load_texture_node *node;
454
455 switch (instr->op) {
456 case nir_texop_tex:
457 case nir_texop_txb:
458 case nir_texop_txl:
459 break;
460 default:
461 ppir_error("unsupported texop %d\n", instr->op);
462 return NULL;
463 }
464
465 unsigned mask = 0;
466 if (!instr->dest.is_ssa)
467 mask = u_bit_consecutive(0, nir_tex_instr_dest_size(instr));
468
469 node = ppir_node_create_dest(block, ppir_op_load_texture, &instr->dest, mask);
470 if (!node)
471 return NULL;
472
473 node->sampler = instr->texture_index;
474
475 switch (instr->sampler_dim) {
476 case GLSL_SAMPLER_DIM_2D:
477 case GLSL_SAMPLER_DIM_CUBE:
478 case GLSL_SAMPLER_DIM_RECT:
479 case GLSL_SAMPLER_DIM_EXTERNAL:
480 break;
481 default:
482 ppir_error("unsupported sampler dim: %d\n", instr->sampler_dim);
483 return NULL;
484 }
485
486 node->sampler_dim = instr->sampler_dim;
487
488 for (int i = 0; i < instr->coord_components; i++)
489 node->src_coords.swizzle[i] = i;
490
491 for (int i = 0; i < instr->num_srcs; i++) {
492 switch (instr->src[i].src_type) {
493 case nir_tex_src_coord:
494 ppir_node_add_src(block->comp, &node->node, &node->src_coords, &instr->src[i].src,
495 u_bit_consecutive(0, instr->coord_components));
496 break;
497 case nir_tex_src_bias:
498 case nir_tex_src_lod:
499 node->lod_bias_en = true;
500 node->explicit_lod = (instr->src[i].src_type == nir_tex_src_lod);
501 ppir_node_add_src(block->comp, &node->node, &node->lod_bias, &instr->src[i].src, 1);
502 break;
503 default:
504 ppir_error("unsupported texture source type\n");
505 assert(0);
506 return NULL;
507 }
508 }
509
510 return &node->node;
511 }
512
513 static ppir_block *ppir_get_block(ppir_compiler *comp, nir_block *nblock)
514 {
515 ppir_block *block = _mesa_hash_table_u64_search(comp->blocks, (uint64_t)nblock);
516
517 return block;
518 }
519
520 static ppir_node *ppir_emit_jump(ppir_block *block, nir_instr *ni)
521 {
522 ppir_node *node;
523 ppir_compiler *comp = block->comp;
524 ppir_branch_node *branch;
525 ppir_block *jump_block;
526 nir_jump_instr *jump = nir_instr_as_jump(ni);
527
528 switch (jump->type) {
529 case nir_jump_break: {
530 assert(comp->current_block->successors[0]);
531 assert(!comp->current_block->successors[1]);
532 jump_block = comp->current_block->successors[0];
533 }
534 break;
535 case nir_jump_continue:
536 jump_block = comp->loop_cont_block;
537 break;
538 default:
539 ppir_error("nir_jump_instr not support\n");
540 return NULL;
541 }
542
543 assert(jump_block != NULL);
544
545 node = ppir_node_create(block, ppir_op_branch, -1, 0);
546 if (!node)
547 return NULL;
548 branch = ppir_node_to_branch(node);
549
550 /* Unconditional */
551 branch->num_src = 0;
552 branch->target = jump_block;
553
554 return node;
555 }
556
557 static ppir_node *(*ppir_emit_instr[nir_instr_type_phi])(ppir_block *, nir_instr *) = {
558 [nir_instr_type_alu] = ppir_emit_alu,
559 [nir_instr_type_intrinsic] = ppir_emit_intrinsic,
560 [nir_instr_type_load_const] = ppir_emit_load_const,
561 [nir_instr_type_ssa_undef] = ppir_emit_ssa_undef,
562 [nir_instr_type_tex] = ppir_emit_tex,
563 [nir_instr_type_jump] = ppir_emit_jump,
564 };
565
566 static ppir_block *ppir_block_create(ppir_compiler *comp)
567 {
568 ppir_block *block = rzalloc(comp, ppir_block);
569 if (!block)
570 return NULL;
571
572 list_inithead(&block->node_list);
573 list_inithead(&block->instr_list);
574
575 block->comp = comp;
576
577 return block;
578 }
579
580 static bool ppir_emit_block(ppir_compiler *comp, nir_block *nblock)
581 {
582 ppir_block *block = ppir_get_block(comp, nblock);
583
584 comp->current_block = block;
585
586 list_addtail(&block->list, &comp->block_list);
587
588 nir_foreach_instr(instr, nblock) {
589 assert(instr->type < nir_instr_type_phi);
590 ppir_node *node = ppir_emit_instr[instr->type](block, instr);
591 if (!node)
592 return false;
593
594 list_addtail(&node->list, &block->node_list);
595 }
596
597 return true;
598 }
599
600 static bool ppir_emit_cf_list(ppir_compiler *comp, struct exec_list *list);
601
602 static bool ppir_emit_if(ppir_compiler *comp, nir_if *if_stmt)
603 {
604 ppir_node *node;
605 ppir_branch_node *else_branch, *after_branch;
606 nir_block *nir_else_block = nir_if_first_else_block(if_stmt);
607 bool empty_else_block =
608 (nir_else_block == nir_if_last_else_block(if_stmt) &&
609 exec_list_is_empty(&nir_else_block->instr_list));
610 ppir_block *block = comp->current_block;
611
612 node = ppir_node_create(block, ppir_op_branch, -1, 0);
613 if (!node)
614 return false;
615 else_branch = ppir_node_to_branch(node);
616 ppir_node_add_src(block->comp, node, &else_branch->src[0],
617 &if_stmt->condition, 1);
618 else_branch->num_src = 1;
619 /* Negate condition to minimize branching. We're generating following:
620 * current_block: { ...; if (!statement) branch else_block; }
621 * then_block: { ...; branch after_block; }
622 * else_block: { ... }
623 * after_block: { ... }
624 *
625 * or if else list is empty:
626 * block: { if (!statement) branch else_block; }
627 * then_block: { ... }
628 * else_block: after_block: { ... }
629 */
630 else_branch->negate = true;
631 list_addtail(&else_branch->node.list, &block->node_list);
632
633 ppir_emit_cf_list(comp, &if_stmt->then_list);
634 if (empty_else_block) {
635 nir_block *nblock = nir_if_last_else_block(if_stmt);
636 assert(nblock->successors[0]);
637 assert(!nblock->successors[1]);
638 else_branch->target = ppir_get_block(comp, nblock->successors[0]);
639 /* Add empty else block to the list */
640 list_addtail(&block->successors[1]->list, &comp->block_list);
641 return true;
642 }
643
644 else_branch->target = ppir_get_block(comp, nir_if_first_else_block(if_stmt));
645
646 nir_block *last_then_block = nir_if_last_then_block(if_stmt);
647 assert(last_then_block->successors[0]);
648 assert(!last_then_block->successors[1]);
649 block = ppir_get_block(comp, last_then_block);
650 node = ppir_node_create(block, ppir_op_branch, -1, 0);
651 if (!node)
652 return false;
653 after_branch = ppir_node_to_branch(node);
654 /* Unconditional */
655 after_branch->num_src = 0;
656 after_branch->target = ppir_get_block(comp, last_then_block->successors[0]);
657 /* Target should be after_block, will fixup later */
658 list_addtail(&after_branch->node.list, &block->node_list);
659
660 ppir_emit_cf_list(comp, &if_stmt->else_list);
661
662 return true;
663 }
664
665 static bool ppir_emit_loop(ppir_compiler *comp, nir_loop *nloop)
666 {
667 ppir_block *save_loop_cont_block = comp->loop_cont_block;
668 ppir_block *block;
669 ppir_branch_node *loop_branch;
670 nir_block *loop_last_block;
671 ppir_node *node;
672
673 comp->loop_cont_block = ppir_get_block(comp, nir_loop_first_block(nloop));
674
675 ppir_emit_cf_list(comp, &nloop->body);
676
677 loop_last_block = nir_loop_last_block(nloop);
678 block = ppir_get_block(comp, loop_last_block);
679 node = ppir_node_create(block, ppir_op_branch, -1, 0);
680 if (!node)
681 return false;
682 loop_branch = ppir_node_to_branch(node);
683 /* Unconditional */
684 loop_branch->num_src = 0;
685 loop_branch->target = comp->loop_cont_block;
686 list_addtail(&loop_branch->node.list, &block->node_list);
687
688 comp->loop_cont_block = save_loop_cont_block;
689
690 comp->num_loops++;
691
692 return true;
693 }
694
695 static bool ppir_emit_function(ppir_compiler *comp, nir_function_impl *nfunc)
696 {
697 ppir_error("function nir_cf_node not support\n");
698 return false;
699 }
700
701 static bool ppir_emit_cf_list(ppir_compiler *comp, struct exec_list *list)
702 {
703 foreach_list_typed(nir_cf_node, node, node, list) {
704 bool ret;
705
706 switch (node->type) {
707 case nir_cf_node_block:
708 ret = ppir_emit_block(comp, nir_cf_node_as_block(node));
709 break;
710 case nir_cf_node_if:
711 ret = ppir_emit_if(comp, nir_cf_node_as_if(node));
712 break;
713 case nir_cf_node_loop:
714 ret = ppir_emit_loop(comp, nir_cf_node_as_loop(node));
715 break;
716 case nir_cf_node_function:
717 ret = ppir_emit_function(comp, nir_cf_node_as_function(node));
718 break;
719 default:
720 ppir_error("unknown NIR node type %d\n", node->type);
721 return false;
722 }
723
724 if (!ret)
725 return false;
726 }
727
728 return true;
729 }
730
731 static ppir_compiler *ppir_compiler_create(void *prog, unsigned num_reg, unsigned num_ssa)
732 {
733 ppir_compiler *comp = rzalloc_size(
734 prog, sizeof(*comp) + ((num_reg << 2) + num_ssa) * sizeof(ppir_node *));
735 if (!comp)
736 return NULL;
737
738 list_inithead(&comp->block_list);
739 list_inithead(&comp->reg_list);
740 comp->blocks = _mesa_hash_table_u64_create(prog);
741
742 comp->var_nodes = (ppir_node **)(comp + 1);
743 comp->reg_base = num_ssa;
744 comp->prog = prog;
745 return comp;
746 }
747
748 static void ppir_add_ordering_deps(ppir_compiler *comp)
749 {
750 /* Some intrinsics do not have explicit dependencies and thus depend
751 * on instructions order. Consider discard_if and store_ouput as
752 * example. If we don't add fake dependency of discard_if to store_output
753 * scheduler may put store_output first and since store_output terminates
754 * shader on Utgard PP, rest of it will never be executed.
755 * Add fake dependencies for discard/branch/store to preserve
756 * instruction order.
757 *
758 * TODO: scheduler should schedule discard_if as early as possible otherwise
759 * we may end up with suboptimal code for cases like this:
760 *
761 * s3 = s1 < s2
762 * discard_if s3
763 * s4 = s1 + s2
764 * store s4
765 *
766 * In this case store depends on discard_if and s4, but since dependencies can
767 * be scheduled in any order it can result in code like this:
768 *
769 * instr1: s3 = s1 < s3
770 * instr2: s4 = s1 + s2
771 * instr3: discard_if s3
772 * instr4: store s4
773 */
774 list_for_each_entry(ppir_block, block, &comp->block_list, list) {
775 ppir_node *prev_node = NULL;
776 list_for_each_entry_rev(ppir_node, node, &block->node_list, list) {
777 if (prev_node && ppir_node_is_root(node) && node->op != ppir_op_const) {
778 ppir_node_add_dep(prev_node, node, ppir_dep_sequence);
779 }
780 if (node->op == ppir_op_discard ||
781 node->op == ppir_op_store_color ||
782 node->op == ppir_op_store_temp ||
783 node->op == ppir_op_branch) {
784 prev_node = node;
785 }
786 }
787 }
788 }
789
790 static void ppir_print_shader_db(struct nir_shader *nir, ppir_compiler *comp,
791 struct pipe_debug_callback *debug)
792 {
793 const struct shader_info *info = &nir->info;
794 char *shaderdb;
795 int ret = asprintf(&shaderdb,
796 "%s shader: %d inst, %d loops, %d:%d spills:fills\n",
797 gl_shader_stage_name(info->stage),
798 comp->cur_instr_index,
799 comp->num_loops,
800 comp->num_spills,
801 comp->num_fills);
802 assert(ret >= 0);
803
804 if (lima_debug & LIMA_DEBUG_SHADERDB)
805 fprintf(stderr, "SHADER-DB: %s\n", shaderdb);
806
807 pipe_debug_message(debug, SHADER_INFO, "%s", shaderdb);
808 free(shaderdb);
809 }
810
811 static void ppir_add_write_after_read_deps(ppir_compiler *comp)
812 {
813 list_for_each_entry(ppir_block, block, &comp->block_list, list) {
814 list_for_each_entry(ppir_reg, reg, &comp->reg_list, list) {
815 ppir_node *write = NULL;
816 list_for_each_entry_rev(ppir_node, node, &block->node_list, list) {
817 for (int i = 0; i < ppir_node_get_src_num(node); i++) {
818 ppir_src *src = ppir_node_get_src(node, i);
819 if (src && src->type == ppir_target_register &&
820 src->reg == reg &&
821 write) {
822 ppir_debug("Adding dep %d for write %d\n", node->index, write->index);
823 ppir_node_add_dep(write, node, ppir_dep_write_after_read);
824 }
825 }
826 ppir_dest *dest = ppir_node_get_dest(node);
827 if (dest && dest->type == ppir_target_register &&
828 dest->reg == reg)
829 write = node;
830 }
831 }
832 }
833 }
834
835 bool ppir_compile_nir(struct lima_fs_shader_state *prog, struct nir_shader *nir,
836 struct ra_regs *ra,
837 struct pipe_debug_callback *debug)
838 {
839 nir_function_impl *func = nir_shader_get_entrypoint(nir);
840 ppir_compiler *comp = ppir_compiler_create(prog, func->reg_alloc, func->ssa_alloc);
841 if (!comp)
842 return false;
843
844 comp->ra = ra;
845
846 /* 1st pass: create ppir blocks */
847 nir_foreach_function(function, nir) {
848 if (!function->impl)
849 continue;
850
851 nir_foreach_block(nblock, function->impl) {
852 ppir_block *block = ppir_block_create(comp);
853 if (!block)
854 return false;
855 block->index = nblock->index;
856 _mesa_hash_table_u64_insert(comp->blocks, (uint64_t)nblock, block);
857 }
858 }
859
860 /* 2nd pass: populate successors */
861 nir_foreach_function(function, nir) {
862 if (!function->impl)
863 continue;
864
865 nir_foreach_block(nblock, function->impl) {
866 ppir_block *block = ppir_get_block(comp, nblock);
867 assert(block);
868
869 for (int i = 0; i < 2; i++) {
870 if (nblock->successors[i])
871 block->successors[i] = ppir_get_block(comp, nblock->successors[i]);
872 }
873 }
874 }
875
876 /* Validate outputs, we support only gl_FragColor */
877 nir_foreach_variable(var, &nir->outputs) {
878 switch (var->data.location) {
879 case FRAG_RESULT_COLOR:
880 case FRAG_RESULT_DATA0:
881 break;
882 default:
883 ppir_error("unsupported output type\n");
884 goto err_out0;
885 break;
886 }
887 }
888
889 foreach_list_typed(nir_register, reg, node, &func->registers) {
890 ppir_reg *r = rzalloc(comp, ppir_reg);
891 if (!r)
892 return false;
893
894 r->index = reg->index;
895 r->num_components = reg->num_components;
896 r->live_in = INT_MAX;
897 r->live_out = 0;
898 r->is_head = false;
899 list_addtail(&r->list, &comp->reg_list);
900 }
901
902 if (!ppir_emit_cf_list(comp, &func->body))
903 goto err_out0;
904
905 /* If we have discard block add it to the very end */
906 if (comp->discard_block)
907 list_addtail(&comp->discard_block->list, &comp->block_list);
908
909 ppir_node_print_prog(comp);
910
911 if (!ppir_lower_prog(comp))
912 goto err_out0;
913
914 ppir_add_ordering_deps(comp);
915 ppir_add_write_after_read_deps(comp);
916
917 ppir_node_print_prog(comp);
918
919 if (!ppir_node_to_instr(comp))
920 goto err_out0;
921
922 if (!ppir_schedule_prog(comp))
923 goto err_out0;
924
925 if (!ppir_regalloc_prog(comp))
926 goto err_out0;
927
928 if (!ppir_codegen_prog(comp))
929 goto err_out0;
930
931 ppir_print_shader_db(nir, comp, debug);
932
933 _mesa_hash_table_u64_destroy(comp->blocks, NULL);
934 ralloc_free(comp);
935 return true;
936
937 err_out0:
938 _mesa_hash_table_u64_destroy(comp->blocks, NULL);
939 ralloc_free(comp);
940 return false;
941 }
942