a2f3406e41410ff0bed9e82fae5f5c8fc01e475d
[mesa.git] / src / gallium / drivers / lima / ir / pp / nir.c
1 /*
2 * Copyright (c) 2017 Lima Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
13 * of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 */
24
25 #include <string.h>
26
27 #include "util/hash_table.h"
28 #include "util/ralloc.h"
29 #include "util/bitscan.h"
30 #include "compiler/nir/nir.h"
31 #include "pipe/p_state.h"
32
33
34 #include "ppir.h"
35
36 static void *ppir_node_create_ssa(ppir_block *block, ppir_op op, nir_ssa_def *ssa)
37 {
38 ppir_node *node = ppir_node_create(block, op, ssa->index, 0);
39 if (!node)
40 return NULL;
41
42 ppir_dest *dest = ppir_node_get_dest(node);
43 dest->type = ppir_target_ssa;
44 dest->ssa.num_components = ssa->num_components;
45 dest->write_mask = u_bit_consecutive(0, ssa->num_components);
46
47 if (node->type == ppir_node_type_load ||
48 node->type == ppir_node_type_store)
49 dest->ssa.is_head = true;
50
51 return node;
52 }
53
54 static void *ppir_node_create_reg(ppir_block *block, ppir_op op,
55 nir_register *reg, unsigned mask)
56 {
57 ppir_node *node = ppir_node_create(block, op, reg->index, mask);
58 if (!node)
59 return NULL;
60
61 ppir_dest *dest = ppir_node_get_dest(node);
62
63 list_for_each_entry(ppir_reg, r, &block->comp->reg_list, list) {
64 if (r->index == reg->index) {
65 dest->reg = r;
66 break;
67 }
68 }
69
70 dest->type = ppir_target_register;
71 dest->write_mask = mask;
72
73 if (node->type == ppir_node_type_load ||
74 node->type == ppir_node_type_store)
75 dest->reg->is_head = true;
76
77 return node;
78 }
79
80 static void *ppir_node_create_dest(ppir_block *block, ppir_op op,
81 nir_dest *dest, unsigned mask)
82 {
83 unsigned index = -1;
84
85 if (dest) {
86 if (dest->is_ssa)
87 return ppir_node_create_ssa(block, op, &dest->ssa);
88 else
89 return ppir_node_create_reg(block, op, dest->reg.reg, mask);
90 }
91
92 return ppir_node_create(block, op, index, 0);
93 }
94
95 static void ppir_node_add_src(ppir_compiler *comp, ppir_node *node,
96 ppir_src *ps, nir_src *ns, unsigned mask)
97 {
98 ppir_node *child = NULL;
99
100 if (ns->is_ssa) {
101 child = comp->var_nodes[ns->ssa->index];
102 if (child->op != ppir_op_undef)
103 ppir_node_add_dep(node, child, ppir_dep_src);
104 }
105 else {
106 nir_register *reg = ns->reg.reg;
107 while (mask) {
108 int swizzle = ps->swizzle[u_bit_scan(&mask)];
109 child = comp->var_nodes[(reg->index << 2) + comp->reg_base + swizzle];
110 /* Reg is read before it was written, create a dummy node for it */
111 if (!child) {
112 child = ppir_node_create_reg(node->block, ppir_op_dummy, reg,
113 u_bit_consecutive(0, 4));
114 comp->var_nodes[(reg->index << 2) + comp->reg_base + swizzle] = child;
115 }
116 /* Don't add dummies or recursive deps for ops like r1 = r1 + ssa1 */
117 if (child && node != child && child->op != ppir_op_dummy)
118 ppir_node_add_dep(node, child, ppir_dep_src);
119 }
120 }
121
122 ppir_node_target_assign(ps, child);
123 }
124
125 static int nir_to_ppir_opcodes[nir_num_opcodes] = {
126 /* not supported */
127 [0 ... nir_last_opcode] = -1,
128
129 [nir_op_mov] = ppir_op_mov,
130 [nir_op_fmul] = ppir_op_mul,
131 [nir_op_fabs] = ppir_op_abs,
132 [nir_op_fneg] = ppir_op_neg,
133 [nir_op_fadd] = ppir_op_add,
134 [nir_op_fsum3] = ppir_op_sum3,
135 [nir_op_fsum4] = ppir_op_sum4,
136 [nir_op_frsq] = ppir_op_rsqrt,
137 [nir_op_flog2] = ppir_op_log2,
138 [nir_op_fexp2] = ppir_op_exp2,
139 [nir_op_fsqrt] = ppir_op_sqrt,
140 [nir_op_fsin] = ppir_op_sin,
141 [nir_op_fcos] = ppir_op_cos,
142 [nir_op_fmax] = ppir_op_max,
143 [nir_op_fmin] = ppir_op_min,
144 [nir_op_frcp] = ppir_op_rcp,
145 [nir_op_ffloor] = ppir_op_floor,
146 [nir_op_fceil] = ppir_op_ceil,
147 [nir_op_ffract] = ppir_op_fract,
148 [nir_op_sge] = ppir_op_ge,
149 [nir_op_slt] = ppir_op_lt,
150 [nir_op_seq] = ppir_op_eq,
151 [nir_op_sne] = ppir_op_ne,
152 [nir_op_fcsel] = ppir_op_select,
153 [nir_op_inot] = ppir_op_not,
154 [nir_op_ftrunc] = ppir_op_trunc,
155 [nir_op_fsat] = ppir_op_sat,
156 [nir_op_fddx] = ppir_op_ddx,
157 [nir_op_fddy] = ppir_op_ddy,
158 };
159
160 static bool ppir_emit_alu(ppir_block *block, nir_instr *ni)
161 {
162 nir_alu_instr *instr = nir_instr_as_alu(ni);
163 int op = nir_to_ppir_opcodes[instr->op];
164
165 if (op < 0) {
166 ppir_error("unsupported nir_op: %s\n", nir_op_infos[instr->op].name);
167 return false;
168 }
169
170 ppir_alu_node *node = ppir_node_create_dest(block, op, &instr->dest.dest,
171 instr->dest.write_mask);
172 if (!node)
173 return false;
174
175 ppir_dest *pd = &node->dest;
176 nir_alu_dest *nd = &instr->dest;
177 if (nd->saturate)
178 pd->modifier = ppir_outmod_clamp_fraction;
179
180 unsigned src_mask;
181 switch (op) {
182 case ppir_op_sum3:
183 src_mask = 0b0111;
184 break;
185 case ppir_op_sum4:
186 src_mask = 0b1111;
187 break;
188 default:
189 src_mask = pd->write_mask;
190 break;
191 }
192
193 unsigned num_child = nir_op_infos[instr->op].num_inputs;
194 node->num_src = num_child;
195
196 for (int i = 0; i < num_child; i++) {
197 nir_alu_src *ns = instr->src + i;
198 ppir_src *ps = node->src + i;
199 memcpy(ps->swizzle, ns->swizzle, sizeof(ps->swizzle));
200 ppir_node_add_src(block->comp, &node->node, ps, &ns->src, src_mask);
201
202 ps->absolute = ns->abs;
203 ps->negate = ns->negate;
204 }
205
206 list_addtail(&node->node.list, &block->node_list);
207 return true;
208 }
209
210 static ppir_block *ppir_block_create(ppir_compiler *comp);
211
212 static bool ppir_emit_discard_block(ppir_compiler *comp)
213 {
214 ppir_block *block = ppir_block_create(comp);
215 ppir_discard_node *discard;
216 if (!block)
217 return false;
218
219 comp->discard_block = block;
220 block->comp = comp;
221
222 discard = ppir_node_create(block, ppir_op_discard, -1, 0);
223 if (discard)
224 list_addtail(&discard->node.list, &block->node_list);
225 else
226 return false;
227
228 return true;
229 }
230
231 static ppir_node *ppir_emit_discard_if(ppir_block *block, nir_instr *ni)
232 {
233 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(ni);
234 ppir_node *node;
235 ppir_compiler *comp = block->comp;
236 ppir_branch_node *branch;
237
238 if (!comp->discard_block && !ppir_emit_discard_block(comp))
239 return NULL;
240
241 node = ppir_node_create(block, ppir_op_branch, -1, 0);
242 if (!node)
243 return NULL;
244 branch = ppir_node_to_branch(node);
245
246 /* second src and condition will be updated during lowering */
247 ppir_node_add_src(block->comp, node, &branch->src[0],
248 &instr->src[0], u_bit_consecutive(0, instr->num_components));
249 branch->num_src = 1;
250 branch->target = comp->discard_block;
251
252 return node;
253 }
254
255 static ppir_node *ppir_emit_discard(ppir_block *block, nir_instr *ni)
256 {
257 ppir_node *node = ppir_node_create(block, ppir_op_discard, -1, 0);
258
259 return node;
260 }
261
262 static bool ppir_emit_intrinsic(ppir_block *block, nir_instr *ni)
263 {
264 ppir_node *node;
265 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(ni);
266 unsigned mask = 0;
267 ppir_load_node *lnode;
268 ppir_alu_node *alu_node;
269
270 switch (instr->intrinsic) {
271 case nir_intrinsic_load_input:
272 if (!instr->dest.is_ssa)
273 mask = u_bit_consecutive(0, instr->num_components);
274
275 lnode = ppir_node_create_dest(block, ppir_op_load_varying, &instr->dest, mask);
276 if (!lnode)
277 return false;
278
279 lnode->num_components = instr->num_components;
280 lnode->index = nir_intrinsic_base(instr) * 4 + nir_intrinsic_component(instr);
281 if (nir_src_is_const(instr->src[0]))
282 lnode->index += (uint32_t)(nir_src_as_float(instr->src[0]) * 4);
283 else {
284 lnode->num_src = 1;
285 ppir_node_add_src(block->comp, &lnode->node, &lnode->src, instr->src, 1);
286 }
287 list_addtail(&lnode->node.list, &block->node_list);
288 return true;
289
290 case nir_intrinsic_load_frag_coord:
291 case nir_intrinsic_load_point_coord:
292 case nir_intrinsic_load_front_face:
293 if (!instr->dest.is_ssa)
294 mask = u_bit_consecutive(0, instr->num_components);
295
296 ppir_op op;
297 switch (instr->intrinsic) {
298 case nir_intrinsic_load_frag_coord:
299 op = ppir_op_load_fragcoord;
300 break;
301 case nir_intrinsic_load_point_coord:
302 op = ppir_op_load_pointcoord;
303 break;
304 case nir_intrinsic_load_front_face:
305 op = ppir_op_load_frontface;
306 break;
307 default:
308 assert(0);
309 break;
310 }
311
312 lnode = ppir_node_create_dest(block, op, &instr->dest, mask);
313 if (!lnode)
314 return false;
315
316 lnode->num_components = instr->num_components;
317 list_addtail(&lnode->node.list, &block->node_list);
318 return true;
319
320 case nir_intrinsic_load_uniform:
321 if (!instr->dest.is_ssa)
322 mask = u_bit_consecutive(0, instr->num_components);
323
324 lnode = ppir_node_create_dest(block, ppir_op_load_uniform, &instr->dest, mask);
325 if (!lnode)
326 return false;
327
328 lnode->num_components = instr->num_components;
329 lnode->index = nir_intrinsic_base(instr);
330 if (nir_src_is_const(instr->src[0]))
331 lnode->index += (uint32_t)nir_src_as_float(instr->src[0]);
332 else {
333 lnode->num_src = 1;
334 ppir_node_add_src(block->comp, &lnode->node, &lnode->src, instr->src, 1);
335 }
336
337 list_addtail(&lnode->node.list, &block->node_list);
338 return true;
339
340 case nir_intrinsic_store_output: {
341 /* In simple cases where the store_output is ssa, that register
342 * can be directly marked as the output.
343 * If discard is used or the source is not ssa, things can get a
344 * lot more complicated, so don't try to optimize those and fall
345 * back to inserting a mov at the end.
346 * If the source node will only be able to output to pipeline
347 * registers, fall back to the mov as well. */
348 if (!block->comp->uses_discard && instr->src->is_ssa) {
349 node = block->comp->var_nodes[instr->src->ssa->index];
350 switch (node->op) {
351 case ppir_op_load_uniform:
352 case ppir_op_load_texture:
353 case ppir_op_const:
354 break;
355 default:
356 node->is_end = 1;
357 return true;
358 }
359 }
360
361 alu_node = ppir_node_create_dest(block, ppir_op_mov, NULL, 0);
362 if (!alu_node)
363 return false;
364
365 ppir_dest *dest = ppir_node_get_dest(&alu_node->node);
366 dest->type = ppir_target_ssa;
367 dest->ssa.num_components = instr->num_components;
368 dest->ssa.index = 0;
369 dest->write_mask = u_bit_consecutive(0, instr->num_components);
370
371 alu_node->num_src = 1;
372
373 for (int i = 0; i < instr->num_components; i++)
374 alu_node->src[0].swizzle[i] = i;
375
376 ppir_node_add_src(block->comp, &alu_node->node, alu_node->src, instr->src,
377 u_bit_consecutive(0, instr->num_components));
378
379 alu_node->node.is_end = 1;
380
381 list_addtail(&alu_node->node.list, &block->node_list);
382 return true;
383 }
384
385 case nir_intrinsic_discard:
386 node = ppir_emit_discard(block, ni);
387 list_addtail(&node->list, &block->node_list);
388 return true;
389
390 case nir_intrinsic_discard_if:
391 node = ppir_emit_discard_if(block, ni);
392 list_addtail(&node->list, &block->node_list);
393 return true;
394
395 default:
396 ppir_error("unsupported nir_intrinsic_instr %s\n",
397 nir_intrinsic_infos[instr->intrinsic].name);
398 return false;
399 }
400 }
401
402 static bool ppir_emit_load_const(ppir_block *block, nir_instr *ni)
403 {
404 nir_load_const_instr *instr = nir_instr_as_load_const(ni);
405 ppir_const_node *node = ppir_node_create_ssa(block, ppir_op_const, &instr->def);
406 if (!node)
407 return false;
408
409 assert(instr->def.bit_size == 32);
410
411 for (int i = 0; i < instr->def.num_components; i++)
412 node->constant.value[i].i = instr->value[i].i32;
413 node->constant.num = instr->def.num_components;
414
415 list_addtail(&node->node.list, &block->node_list);
416 return true;
417 }
418
419 static bool ppir_emit_ssa_undef(ppir_block *block, nir_instr *ni)
420 {
421 nir_ssa_undef_instr *undef = nir_instr_as_ssa_undef(ni);
422 ppir_node *node = ppir_node_create_ssa(block, ppir_op_undef, &undef->def);
423 if (!node)
424 return false;
425 ppir_alu_node *alu = ppir_node_to_alu(node);
426
427 ppir_dest *dest = &alu->dest;
428 dest->ssa.undef = true;
429
430 list_addtail(&node->list, &block->node_list);
431 return true;
432 }
433
434 static bool ppir_emit_tex(ppir_block *block, nir_instr *ni)
435 {
436 nir_tex_instr *instr = nir_instr_as_tex(ni);
437 ppir_load_texture_node *node;
438
439 switch (instr->op) {
440 case nir_texop_tex:
441 case nir_texop_txb:
442 case nir_texop_txl:
443 break;
444 default:
445 ppir_error("unsupported texop %d\n", instr->op);
446 return false;
447 }
448
449 switch (instr->sampler_dim) {
450 case GLSL_SAMPLER_DIM_2D:
451 case GLSL_SAMPLER_DIM_CUBE:
452 case GLSL_SAMPLER_DIM_RECT:
453 case GLSL_SAMPLER_DIM_EXTERNAL:
454 break;
455 default:
456 ppir_error("unsupported sampler dim: %d\n", instr->sampler_dim);
457 return false;
458 }
459
460 /* emit ld_tex node */
461
462 unsigned mask = 0;
463 if (!instr->dest.is_ssa)
464 mask = u_bit_consecutive(0, nir_tex_instr_dest_size(instr));
465
466 node = ppir_node_create_dest(block, ppir_op_load_texture, &instr->dest, mask);
467 if (!node)
468 return false;
469
470 node->sampler = instr->texture_index;
471 node->sampler_dim = instr->sampler_dim;
472
473 for (int i = 0; i < instr->coord_components; i++)
474 node->src[0].swizzle[i] = i;
475
476 for (int i = 0; i < instr->num_srcs; i++) {
477 switch (instr->src[i].src_type) {
478 case nir_tex_src_coord: {
479 nir_src *ns = &instr->src[i].src;
480 if (ns->is_ssa) {
481 ppir_node *child = block->comp->var_nodes[ns->ssa->index];
482 if (child->op == ppir_op_load_varying) {
483 /* If the successor is load_texture, promote it to load_coords */
484 nir_tex_src *nts = (nir_tex_src *)ns;
485 if (nts->src_type == nir_tex_src_coord)
486 child->op = ppir_op_load_coords;
487 }
488 }
489
490 /* src[0] is not used by the ld_tex instruction but ensures
491 * correct scheduling due to the pipeline dependency */
492 ppir_node_add_src(block->comp, &node->node, &node->src[0], &instr->src[i].src,
493 u_bit_consecutive(0, instr->coord_components));
494 node->num_src++;
495 break;
496 }
497 case nir_tex_src_bias:
498 case nir_tex_src_lod:
499 node->lod_bias_en = true;
500 node->explicit_lod = (instr->src[i].src_type == nir_tex_src_lod);
501 ppir_node_add_src(block->comp, &node->node, &node->src[1], &instr->src[i].src, 1);
502 node->num_src++;
503 break;
504 default:
505 ppir_error("unsupported texture source type\n");
506 return false;
507 }
508 }
509
510 list_addtail(&node->node.list, &block->node_list);
511
512 /* validate load coords node */
513
514 ppir_node *src_coords = ppir_node_get_src(&node->node, 0)->node;
515 ppir_load_node *load = NULL;
516
517 if (src_coords && ppir_node_has_single_src_succ(src_coords) &&
518 (src_coords->op == ppir_op_load_coords))
519 load = ppir_node_to_load(src_coords);
520 else {
521 /* Create load_coords node */
522 load = ppir_node_create(block, ppir_op_load_coords_reg, -1, 0);
523 if (!load)
524 return false;
525 list_addtail(&load->node.list, &block->node_list);
526
527 load->src = node->src[0];
528 load->num_src = 1;
529 if (node->sampler_dim == GLSL_SAMPLER_DIM_CUBE)
530 load->num_components = 3;
531 else
532 load->num_components = 2;
533
534 ppir_debug("%s create load_coords node %d for %d\n",
535 __FUNCTION__, load->index, node->node.index);
536
537 ppir_node_foreach_pred_safe((&node->node), dep) {
538 ppir_node *pred = dep->pred;
539 ppir_node_remove_dep(dep);
540 ppir_node_add_dep(&load->node, pred, ppir_dep_src);
541 }
542 ppir_node_add_dep(&node->node, &load->node, ppir_dep_src);
543 }
544
545 assert(load);
546 node->src[0].type = load->dest.type = ppir_target_pipeline;
547 node->src[0].pipeline = load->dest.pipeline = ppir_pipeline_reg_discard;
548
549 return true;
550 }
551
552 static ppir_block *ppir_get_block(ppir_compiler *comp, nir_block *nblock)
553 {
554 ppir_block *block = _mesa_hash_table_u64_search(comp->blocks, (uint64_t)nblock);
555
556 return block;
557 }
558
559 static bool ppir_emit_jump(ppir_block *block, nir_instr *ni)
560 {
561 ppir_node *node;
562 ppir_compiler *comp = block->comp;
563 ppir_branch_node *branch;
564 ppir_block *jump_block;
565 nir_jump_instr *jump = nir_instr_as_jump(ni);
566
567 switch (jump->type) {
568 case nir_jump_break: {
569 assert(comp->current_block->successors[0]);
570 assert(!comp->current_block->successors[1]);
571 jump_block = comp->current_block->successors[0];
572 }
573 break;
574 case nir_jump_continue:
575 jump_block = comp->loop_cont_block;
576 break;
577 default:
578 ppir_error("nir_jump_instr not support\n");
579 return false;
580 }
581
582 assert(jump_block != NULL);
583
584 node = ppir_node_create(block, ppir_op_branch, -1, 0);
585 if (!node)
586 return false;
587 branch = ppir_node_to_branch(node);
588
589 /* Unconditional */
590 branch->num_src = 0;
591 branch->target = jump_block;
592
593 list_addtail(&node->list, &block->node_list);
594 return true;
595 }
596
597 static bool (*ppir_emit_instr[nir_instr_type_phi])(ppir_block *, nir_instr *) = {
598 [nir_instr_type_alu] = ppir_emit_alu,
599 [nir_instr_type_intrinsic] = ppir_emit_intrinsic,
600 [nir_instr_type_load_const] = ppir_emit_load_const,
601 [nir_instr_type_ssa_undef] = ppir_emit_ssa_undef,
602 [nir_instr_type_tex] = ppir_emit_tex,
603 [nir_instr_type_jump] = ppir_emit_jump,
604 };
605
606 static ppir_block *ppir_block_create(ppir_compiler *comp)
607 {
608 ppir_block *block = rzalloc(comp, ppir_block);
609 if (!block)
610 return NULL;
611
612 list_inithead(&block->node_list);
613 list_inithead(&block->instr_list);
614
615 block->comp = comp;
616
617 return block;
618 }
619
620 static bool ppir_emit_block(ppir_compiler *comp, nir_block *nblock)
621 {
622 ppir_block *block = ppir_get_block(comp, nblock);
623
624 comp->current_block = block;
625
626 list_addtail(&block->list, &comp->block_list);
627
628 nir_foreach_instr(instr, nblock) {
629 assert(instr->type < nir_instr_type_phi);
630 if (!ppir_emit_instr[instr->type](block, instr))
631 return false;
632 }
633
634 return true;
635 }
636
637 static bool ppir_emit_cf_list(ppir_compiler *comp, struct exec_list *list);
638
639 static bool ppir_emit_if(ppir_compiler *comp, nir_if *if_stmt)
640 {
641 ppir_node *node;
642 ppir_branch_node *else_branch, *after_branch;
643 nir_block *nir_else_block = nir_if_first_else_block(if_stmt);
644 bool empty_else_block =
645 (nir_else_block == nir_if_last_else_block(if_stmt) &&
646 exec_list_is_empty(&nir_else_block->instr_list));
647 ppir_block *block = comp->current_block;
648
649 node = ppir_node_create(block, ppir_op_branch, -1, 0);
650 if (!node)
651 return false;
652 else_branch = ppir_node_to_branch(node);
653 ppir_node_add_src(block->comp, node, &else_branch->src[0],
654 &if_stmt->condition, 1);
655 else_branch->num_src = 1;
656 /* Negate condition to minimize branching. We're generating following:
657 * current_block: { ...; if (!statement) branch else_block; }
658 * then_block: { ...; branch after_block; }
659 * else_block: { ... }
660 * after_block: { ... }
661 *
662 * or if else list is empty:
663 * block: { if (!statement) branch else_block; }
664 * then_block: { ... }
665 * else_block: after_block: { ... }
666 */
667 else_branch->negate = true;
668 list_addtail(&else_branch->node.list, &block->node_list);
669
670 if (!ppir_emit_cf_list(comp, &if_stmt->then_list))
671 return false;
672
673 if (empty_else_block) {
674 nir_block *nblock = nir_if_last_else_block(if_stmt);
675 assert(nblock->successors[0]);
676 assert(!nblock->successors[1]);
677 else_branch->target = ppir_get_block(comp, nblock->successors[0]);
678 /* Add empty else block to the list */
679 list_addtail(&block->successors[1]->list, &comp->block_list);
680 return true;
681 }
682
683 else_branch->target = ppir_get_block(comp, nir_if_first_else_block(if_stmt));
684
685 nir_block *last_then_block = nir_if_last_then_block(if_stmt);
686 assert(last_then_block->successors[0]);
687 assert(!last_then_block->successors[1]);
688 block = ppir_get_block(comp, last_then_block);
689 node = ppir_node_create(block, ppir_op_branch, -1, 0);
690 if (!node)
691 return false;
692 after_branch = ppir_node_to_branch(node);
693 /* Unconditional */
694 after_branch->num_src = 0;
695 after_branch->target = ppir_get_block(comp, last_then_block->successors[0]);
696 /* Target should be after_block, will fixup later */
697 list_addtail(&after_branch->node.list, &block->node_list);
698
699 if (!ppir_emit_cf_list(comp, &if_stmt->else_list))
700 return false;
701
702 return true;
703 }
704
705 static bool ppir_emit_loop(ppir_compiler *comp, nir_loop *nloop)
706 {
707 ppir_block *save_loop_cont_block = comp->loop_cont_block;
708 ppir_block *block;
709 ppir_branch_node *loop_branch;
710 nir_block *loop_last_block;
711 ppir_node *node;
712
713 comp->loop_cont_block = ppir_get_block(comp, nir_loop_first_block(nloop));
714
715 if (!ppir_emit_cf_list(comp, &nloop->body))
716 return false;
717
718 loop_last_block = nir_loop_last_block(nloop);
719 block = ppir_get_block(comp, loop_last_block);
720 node = ppir_node_create(block, ppir_op_branch, -1, 0);
721 if (!node)
722 return false;
723 loop_branch = ppir_node_to_branch(node);
724 /* Unconditional */
725 loop_branch->num_src = 0;
726 loop_branch->target = comp->loop_cont_block;
727 list_addtail(&loop_branch->node.list, &block->node_list);
728
729 comp->loop_cont_block = save_loop_cont_block;
730
731 comp->num_loops++;
732
733 return true;
734 }
735
736 static bool ppir_emit_function(ppir_compiler *comp, nir_function_impl *nfunc)
737 {
738 ppir_error("function nir_cf_node not support\n");
739 return false;
740 }
741
742 static bool ppir_emit_cf_list(ppir_compiler *comp, struct exec_list *list)
743 {
744 foreach_list_typed(nir_cf_node, node, node, list) {
745 bool ret;
746
747 switch (node->type) {
748 case nir_cf_node_block:
749 ret = ppir_emit_block(comp, nir_cf_node_as_block(node));
750 break;
751 case nir_cf_node_if:
752 ret = ppir_emit_if(comp, nir_cf_node_as_if(node));
753 break;
754 case nir_cf_node_loop:
755 ret = ppir_emit_loop(comp, nir_cf_node_as_loop(node));
756 break;
757 case nir_cf_node_function:
758 ret = ppir_emit_function(comp, nir_cf_node_as_function(node));
759 break;
760 default:
761 ppir_error("unknown NIR node type %d\n", node->type);
762 return false;
763 }
764
765 if (!ret)
766 return false;
767 }
768
769 return true;
770 }
771
772 static ppir_compiler *ppir_compiler_create(void *prog, unsigned num_reg, unsigned num_ssa)
773 {
774 ppir_compiler *comp = rzalloc_size(
775 prog, sizeof(*comp) + ((num_reg << 2) + num_ssa) * sizeof(ppir_node *));
776 if (!comp)
777 return NULL;
778
779 list_inithead(&comp->block_list);
780 list_inithead(&comp->reg_list);
781 comp->blocks = _mesa_hash_table_u64_create(prog);
782
783 comp->var_nodes = (ppir_node **)(comp + 1);
784 comp->reg_base = num_ssa;
785 comp->prog = prog;
786 return comp;
787 }
788
789 static void ppir_add_ordering_deps(ppir_compiler *comp)
790 {
791 /* Some intrinsics do not have explicit dependencies and thus depend
792 * on instructions order. Consider discard_if and the is_end node as
793 * example. If we don't add fake dependency of discard_if to is_end,
794 * scheduler may put the is_end first and since is_end terminates
795 * shader on Utgard PP, rest of it will never be executed.
796 * Add fake dependencies for discard/branch/store to preserve
797 * instruction order.
798 *
799 * TODO: scheduler should schedule discard_if as early as possible otherwise
800 * we may end up with suboptimal code for cases like this:
801 *
802 * s3 = s1 < s2
803 * discard_if s3
804 * s4 = s1 + s2
805 * store s4
806 *
807 * In this case store depends on discard_if and s4, but since dependencies can
808 * be scheduled in any order it can result in code like this:
809 *
810 * instr1: s3 = s1 < s3
811 * instr2: s4 = s1 + s2
812 * instr3: discard_if s3
813 * instr4: store s4
814 */
815 list_for_each_entry(ppir_block, block, &comp->block_list, list) {
816 ppir_node *prev_node = NULL;
817 list_for_each_entry_rev(ppir_node, node, &block->node_list, list) {
818 if (prev_node && ppir_node_is_root(node) && node->op != ppir_op_const) {
819 ppir_node_add_dep(prev_node, node, ppir_dep_sequence);
820 }
821 if (node->is_end ||
822 node->op == ppir_op_discard ||
823 node->op == ppir_op_store_temp ||
824 node->op == ppir_op_branch) {
825 prev_node = node;
826 }
827 }
828 }
829 }
830
831 static void ppir_print_shader_db(struct nir_shader *nir, ppir_compiler *comp,
832 struct pipe_debug_callback *debug)
833 {
834 const struct shader_info *info = &nir->info;
835 char *shaderdb;
836 int ret = asprintf(&shaderdb,
837 "%s shader: %d inst, %d loops, %d:%d spills:fills\n",
838 gl_shader_stage_name(info->stage),
839 comp->cur_instr_index,
840 comp->num_loops,
841 comp->num_spills,
842 comp->num_fills);
843 assert(ret >= 0);
844
845 if (lima_debug & LIMA_DEBUG_SHADERDB)
846 fprintf(stderr, "SHADER-DB: %s\n", shaderdb);
847
848 pipe_debug_message(debug, SHADER_INFO, "%s", shaderdb);
849 free(shaderdb);
850 }
851
852 static void ppir_add_write_after_read_deps(ppir_compiler *comp)
853 {
854 list_for_each_entry(ppir_block, block, &comp->block_list, list) {
855 list_for_each_entry(ppir_reg, reg, &comp->reg_list, list) {
856 ppir_node *write = NULL;
857 list_for_each_entry_rev(ppir_node, node, &block->node_list, list) {
858 for (int i = 0; i < ppir_node_get_src_num(node); i++) {
859 ppir_src *src = ppir_node_get_src(node, i);
860 if (src && src->type == ppir_target_register &&
861 src->reg == reg &&
862 write) {
863 ppir_debug("Adding dep %d for write %d\n", node->index, write->index);
864 ppir_node_add_dep(write, node, ppir_dep_write_after_read);
865 }
866 }
867 ppir_dest *dest = ppir_node_get_dest(node);
868 if (dest && dest->type == ppir_target_register &&
869 dest->reg == reg)
870 write = node;
871 }
872 }
873 }
874 }
875
876 bool ppir_compile_nir(struct lima_fs_shader_state *prog, struct nir_shader *nir,
877 struct ra_regs *ra,
878 struct pipe_debug_callback *debug)
879 {
880 nir_function_impl *func = nir_shader_get_entrypoint(nir);
881 ppir_compiler *comp = ppir_compiler_create(prog, func->reg_alloc, func->ssa_alloc);
882 if (!comp)
883 return false;
884
885 comp->ra = ra;
886 comp->uses_discard = nir->info.fs.uses_discard;
887
888 /* 1st pass: create ppir blocks */
889 nir_foreach_function(function, nir) {
890 if (!function->impl)
891 continue;
892
893 nir_foreach_block(nblock, function->impl) {
894 ppir_block *block = ppir_block_create(comp);
895 if (!block)
896 return false;
897 block->index = nblock->index;
898 _mesa_hash_table_u64_insert(comp->blocks, (uint64_t)nblock, block);
899 }
900 }
901
902 /* 2nd pass: populate successors */
903 nir_foreach_function(function, nir) {
904 if (!function->impl)
905 continue;
906
907 nir_foreach_block(nblock, function->impl) {
908 ppir_block *block = ppir_get_block(comp, nblock);
909 assert(block);
910
911 for (int i = 0; i < 2; i++) {
912 if (nblock->successors[i])
913 block->successors[i] = ppir_get_block(comp, nblock->successors[i]);
914 }
915 }
916 }
917
918 /* Validate outputs, we support only gl_FragColor */
919 nir_foreach_variable(var, &nir->outputs) {
920 switch (var->data.location) {
921 case FRAG_RESULT_COLOR:
922 case FRAG_RESULT_DATA0:
923 break;
924 default:
925 ppir_error("unsupported output type\n");
926 goto err_out0;
927 break;
928 }
929 }
930
931 foreach_list_typed(nir_register, reg, node, &func->registers) {
932 ppir_reg *r = rzalloc(comp, ppir_reg);
933 if (!r)
934 return false;
935
936 r->index = reg->index;
937 r->num_components = reg->num_components;
938 r->is_head = false;
939 list_addtail(&r->list, &comp->reg_list);
940 }
941
942 if (!ppir_emit_cf_list(comp, &func->body))
943 goto err_out0;
944
945 /* If we have discard block add it to the very end */
946 if (comp->discard_block)
947 list_addtail(&comp->discard_block->list, &comp->block_list);
948
949 ppir_node_print_prog(comp);
950
951 if (!ppir_lower_prog(comp))
952 goto err_out0;
953
954 ppir_add_ordering_deps(comp);
955 ppir_add_write_after_read_deps(comp);
956
957 ppir_node_print_prog(comp);
958
959 if (!ppir_node_to_instr(comp))
960 goto err_out0;
961
962 if (!ppir_schedule_prog(comp))
963 goto err_out0;
964
965 if (!ppir_regalloc_prog(comp))
966 goto err_out0;
967
968 if (!ppir_codegen_prog(comp))
969 goto err_out0;
970
971 ppir_print_shader_db(nir, comp, debug);
972
973 _mesa_hash_table_u64_destroy(comp->blocks, NULL);
974 ralloc_free(comp);
975 return true;
976
977 err_out0:
978 _mesa_hash_table_u64_destroy(comp->blocks, NULL);
979 ralloc_free(comp);
980 return false;
981 }
982