lima/ppir: add write after read deps for registers
[mesa.git] / src / gallium / drivers / lima / ir / pp / nir.c
1 /*
2 * Copyright (c) 2017 Lima Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
13 * of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 */
24
25 #include <string.h>
26
27 #include "util/ralloc.h"
28 #include "util/bitscan.h"
29 #include "compiler/nir/nir.h"
30 #include "pipe/p_state.h"
31
32
33 #include "ppir.h"
34
35 static void *ppir_node_create_ssa(ppir_block *block, ppir_op op, nir_ssa_def *ssa)
36 {
37 ppir_node *node = ppir_node_create(block, op, ssa->index, 0);
38 if (!node)
39 return NULL;
40
41 ppir_dest *dest = ppir_node_get_dest(node);
42 dest->type = ppir_target_ssa;
43 dest->ssa.num_components = ssa->num_components;
44 dest->ssa.live_in = INT_MAX;
45 dest->ssa.live_out = 0;
46 dest->write_mask = u_bit_consecutive(0, ssa->num_components);
47
48 if (node->type == ppir_node_type_load ||
49 node->type == ppir_node_type_store)
50 dest->ssa.is_head = true;
51
52 return node;
53 }
54
55 static void *ppir_node_create_reg(ppir_block *block, ppir_op op,
56 nir_reg_dest *reg, unsigned mask)
57 {
58 ppir_node *node = ppir_node_create(block, op, reg->reg->index, mask);
59 if (!node)
60 return NULL;
61
62 ppir_dest *dest = ppir_node_get_dest(node);
63
64 list_for_each_entry(ppir_reg, r, &block->comp->reg_list, list) {
65 if (r->index == reg->reg->index) {
66 dest->reg = r;
67 break;
68 }
69 }
70
71 dest->type = ppir_target_register;
72 dest->write_mask = mask;
73
74 if (node->type == ppir_node_type_load ||
75 node->type == ppir_node_type_store)
76 dest->reg->is_head = true;
77
78 return node;
79 }
80
81 static void *ppir_node_create_dest(ppir_block *block, ppir_op op,
82 nir_dest *dest, unsigned mask)
83 {
84 unsigned index = -1;
85
86 if (dest) {
87 if (dest->is_ssa)
88 return ppir_node_create_ssa(block, op, &dest->ssa);
89 else
90 return ppir_node_create_reg(block, op, &dest->reg, mask);
91 }
92
93 return ppir_node_create(block, op, index, 0);
94 }
95
96 static void ppir_node_add_src(ppir_compiler *comp, ppir_node *node,
97 ppir_src *ps, nir_src *ns, unsigned mask)
98 {
99 ppir_node *child = NULL;
100
101 if (ns->is_ssa) {
102 child = comp->var_nodes[ns->ssa->index];
103 /* Clone consts for each successor */
104 switch (child->op) {
105 case ppir_op_const:
106 child = ppir_node_clone(node->block, child);
107 break;
108 /* Clone uniforms and load textures for each block */
109 case ppir_op_load_texture:
110 case ppir_op_load_uniform:
111 case ppir_op_load_varying:
112 if (child->block != node->block) {
113 child = ppir_node_clone(node->block, child);
114 comp->var_nodes[ns->ssa->index] = child;
115 }
116 break;
117 default:
118 break;
119 }
120
121 ppir_node_add_dep(node, child);
122 }
123 else {
124 nir_register *reg = ns->reg.reg;
125 while (mask) {
126 int swizzle = ps->swizzle[u_bit_scan(&mask)];
127 child = comp->var_nodes[(reg->index << 2) + comp->reg_base + swizzle];
128 ppir_node_add_dep(node, child);
129 }
130 }
131
132 ppir_node_target_assign(ps, child);
133 }
134
135 static int nir_to_ppir_opcodes[nir_num_opcodes] = {
136 /* not supported */
137 [0 ... nir_last_opcode] = -1,
138
139 [nir_op_mov] = ppir_op_mov,
140 [nir_op_fmul] = ppir_op_mul,
141 [nir_op_fabs] = ppir_op_abs,
142 [nir_op_fneg] = ppir_op_neg,
143 [nir_op_fadd] = ppir_op_add,
144 [nir_op_fsum3] = ppir_op_sum3,
145 [nir_op_fsum4] = ppir_op_sum4,
146 [nir_op_frsq] = ppir_op_rsqrt,
147 [nir_op_flog2] = ppir_op_log2,
148 [nir_op_fexp2] = ppir_op_exp2,
149 [nir_op_fsqrt] = ppir_op_sqrt,
150 [nir_op_fsin] = ppir_op_sin,
151 [nir_op_fcos] = ppir_op_cos,
152 [nir_op_fmax] = ppir_op_max,
153 [nir_op_fmin] = ppir_op_min,
154 [nir_op_frcp] = ppir_op_rcp,
155 [nir_op_ffloor] = ppir_op_floor,
156 [nir_op_fceil] = ppir_op_ceil,
157 [nir_op_ffract] = ppir_op_fract,
158 [nir_op_sge] = ppir_op_ge,
159 [nir_op_fge] = ppir_op_ge,
160 [nir_op_slt] = ppir_op_lt,
161 [nir_op_flt] = ppir_op_lt,
162 [nir_op_seq] = ppir_op_eq,
163 [nir_op_feq] = ppir_op_eq,
164 [nir_op_sne] = ppir_op_ne,
165 [nir_op_fne] = ppir_op_ne,
166 [nir_op_fcsel] = ppir_op_select,
167 [nir_op_inot] = ppir_op_not,
168 [nir_op_ftrunc] = ppir_op_trunc,
169 [nir_op_fsat] = ppir_op_sat,
170 [nir_op_fddx] = ppir_op_ddx,
171 [nir_op_fddy] = ppir_op_ddy,
172 };
173
174 static ppir_node *ppir_emit_alu(ppir_block *block, nir_instr *ni)
175 {
176 nir_alu_instr *instr = nir_instr_as_alu(ni);
177 int op = nir_to_ppir_opcodes[instr->op];
178
179 if (op < 0) {
180 ppir_error("unsupported nir_op: %s\n", nir_op_infos[instr->op].name);
181 return NULL;
182 }
183
184 ppir_alu_node *node = ppir_node_create_dest(block, op, &instr->dest.dest,
185 instr->dest.write_mask);
186 if (!node)
187 return NULL;
188
189 ppir_dest *pd = &node->dest;
190 nir_alu_dest *nd = &instr->dest;
191 if (nd->saturate)
192 pd->modifier = ppir_outmod_clamp_fraction;
193
194 unsigned src_mask;
195 switch (op) {
196 case ppir_op_sum3:
197 src_mask = 0b0111;
198 break;
199 case ppir_op_sum4:
200 src_mask = 0b1111;
201 break;
202 default:
203 src_mask = pd->write_mask;
204 break;
205 }
206
207 unsigned num_child = nir_op_infos[instr->op].num_inputs;
208 node->num_src = num_child;
209
210 for (int i = 0; i < num_child; i++) {
211 nir_alu_src *ns = instr->src + i;
212 ppir_src *ps = node->src + i;
213 memcpy(ps->swizzle, ns->swizzle, sizeof(ps->swizzle));
214 ppir_node_add_src(block->comp, &node->node, ps, &ns->src, src_mask);
215
216 ps->absolute = ns->abs;
217 ps->negate = ns->negate;
218 }
219
220 return &node->node;
221 }
222
223 static ppir_block *ppir_block_create(ppir_compiler *comp);
224
225 static bool ppir_emit_discard_block(ppir_compiler *comp)
226 {
227 ppir_block *block = ppir_block_create(comp);
228 ppir_discard_node *discard;
229 if (!block)
230 return false;
231
232 comp->discard_block = block;
233 block->comp = comp;
234
235 discard = ppir_node_create(block, ppir_op_discard, -1, 0);
236 if (discard)
237 list_addtail(&discard->node.list, &block->node_list);
238 else
239 return false;
240
241 return true;
242 }
243
244 static ppir_node *ppir_emit_discard_if(ppir_block *block, nir_instr *ni)
245 {
246 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(ni);
247 ppir_node *node;
248 ppir_compiler *comp = block->comp;
249 ppir_branch_node *branch;
250
251 if (!comp->discard_block && !ppir_emit_discard_block(comp))
252 return NULL;
253
254 node = ppir_node_create(block, ppir_op_branch, -1, 0);
255 if (!node)
256 return NULL;
257 branch = ppir_node_to_branch(node);
258
259 /* second src and condition will be updated during lowering */
260 ppir_node_add_src(block->comp, node, &branch->src[0],
261 &instr->src[0], u_bit_consecutive(0, instr->num_components));
262 branch->num_src = 1;
263 branch->target = comp->discard_block;
264
265 return node;
266 }
267
268 static ppir_node *ppir_emit_discard(ppir_block *block, nir_instr *ni)
269 {
270 ppir_node *node = ppir_node_create(block, ppir_op_discard, -1, 0);
271
272 return node;
273 }
274
275 static ppir_node *ppir_emit_intrinsic(ppir_block *block, nir_instr *ni)
276 {
277 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(ni);
278 unsigned mask = 0;
279 ppir_load_node *lnode;
280 ppir_store_node *snode;
281
282 switch (instr->intrinsic) {
283 case nir_intrinsic_load_input:
284 if (!instr->dest.is_ssa)
285 mask = u_bit_consecutive(0, instr->num_components);
286
287 lnode = ppir_node_create_dest(block, ppir_op_load_varying, &instr->dest, mask);
288 if (!lnode)
289 return NULL;
290
291 lnode->num_components = instr->num_components;
292 lnode->index = nir_intrinsic_base(instr) * 4 + nir_intrinsic_component(instr);
293 return &lnode->node;
294
295 case nir_intrinsic_load_frag_coord:
296 case nir_intrinsic_load_point_coord:
297 case nir_intrinsic_load_front_face:
298 if (!instr->dest.is_ssa)
299 mask = u_bit_consecutive(0, instr->num_components);
300
301 ppir_op op;
302 switch (instr->intrinsic) {
303 case nir_intrinsic_load_frag_coord:
304 op = ppir_op_load_fragcoord;
305 break;
306 case nir_intrinsic_load_point_coord:
307 op = ppir_op_load_pointcoord;
308 break;
309 case nir_intrinsic_load_front_face:
310 op = ppir_op_load_frontface;
311 break;
312 default:
313 assert(0);
314 break;
315 }
316
317 lnode = ppir_node_create_dest(block, op, &instr->dest, mask);
318 if (!lnode)
319 return NULL;
320
321 lnode->num_components = instr->num_components;
322 return &lnode->node;
323
324 case nir_intrinsic_load_uniform:
325 if (!instr->dest.is_ssa)
326 mask = u_bit_consecutive(0, instr->num_components);
327
328 lnode = ppir_node_create_dest(block, ppir_op_load_uniform, &instr->dest, mask);
329 if (!lnode)
330 return NULL;
331
332 lnode->num_components = instr->num_components;
333 lnode->index = nir_intrinsic_base(instr);
334 lnode->index += (uint32_t)nir_src_as_float(instr->src[0]);
335
336 return &lnode->node;
337
338 case nir_intrinsic_store_output:
339 snode = ppir_node_create_dest(block, ppir_op_store_color, NULL, 0);
340 if (!snode)
341 return NULL;
342
343 snode->index = nir_intrinsic_base(instr);
344
345 for (int i = 0; i < instr->num_components; i++)
346 snode->src.swizzle[i] = i;
347
348 ppir_node_add_src(block->comp, &snode->node, &snode->src, instr->src,
349 u_bit_consecutive(0, instr->num_components));
350
351 return &snode->node;
352
353 case nir_intrinsic_discard:
354 return ppir_emit_discard(block, ni);
355
356 case nir_intrinsic_discard_if:
357 return ppir_emit_discard_if(block, ni);
358
359 default:
360 ppir_error("unsupported nir_intrinsic_instr %s\n",
361 nir_intrinsic_infos[instr->intrinsic].name);
362 return NULL;
363 }
364 }
365
366 static ppir_node *ppir_emit_load_const(ppir_block *block, nir_instr *ni)
367 {
368 nir_load_const_instr *instr = nir_instr_as_load_const(ni);
369 ppir_const_node *node = ppir_node_create_ssa(block, ppir_op_const, &instr->def);
370 if (!node)
371 return NULL;
372
373 assert(instr->def.bit_size == 32);
374
375 for (int i = 0; i < instr->def.num_components; i++)
376 node->constant.value[i].i = instr->value[i].i32;
377 node->constant.num = instr->def.num_components;
378
379 return &node->node;
380 }
381
382 static ppir_node *ppir_emit_ssa_undef(ppir_block *block, nir_instr *ni)
383 {
384 ppir_error("nir_ssa_undef_instr not support\n");
385 return NULL;
386 }
387
388 static ppir_node *ppir_emit_tex(ppir_block *block, nir_instr *ni)
389 {
390 nir_tex_instr *instr = nir_instr_as_tex(ni);
391 ppir_load_texture_node *node;
392
393 if (instr->op != nir_texop_tex) {
394 ppir_error("unsupported texop %d\n", instr->op);
395 return NULL;
396 }
397
398 unsigned mask = 0;
399 if (!instr->dest.is_ssa)
400 mask = u_bit_consecutive(0, nir_tex_instr_dest_size(instr));
401
402 node = ppir_node_create_dest(block, ppir_op_load_texture, &instr->dest, mask);
403 if (!node)
404 return NULL;
405
406 node->sampler = instr->texture_index;
407
408 switch (instr->sampler_dim) {
409 case GLSL_SAMPLER_DIM_2D:
410 case GLSL_SAMPLER_DIM_RECT:
411 case GLSL_SAMPLER_DIM_EXTERNAL:
412 break;
413 default:
414 ppir_error("unsupported sampler dim: %d\n", instr->sampler_dim);
415 return NULL;
416 }
417
418 node->sampler_dim = instr->sampler_dim;
419
420 for (int i = 0; i < instr->coord_components; i++)
421 node->src_coords.swizzle[i] = i;
422
423 for (int i = 0; i < instr->num_srcs; i++) {
424 switch (instr->src[i].src_type) {
425 case nir_tex_src_coord:
426 ppir_node_add_src(block->comp, &node->node, &node->src_coords, &instr->src[i].src,
427 u_bit_consecutive(0, instr->coord_components));
428 break;
429 default:
430 ppir_error("unsupported texture source type\n");
431 assert(0);
432 return NULL;
433 }
434 }
435
436 return &node->node;
437 }
438
439 static ppir_node *ppir_emit_jump(ppir_block *block, nir_instr *ni)
440 {
441 ppir_error("nir_jump_instr not support\n");
442 return NULL;
443 }
444
445 static ppir_node *(*ppir_emit_instr[nir_instr_type_phi])(ppir_block *, nir_instr *) = {
446 [nir_instr_type_alu] = ppir_emit_alu,
447 [nir_instr_type_intrinsic] = ppir_emit_intrinsic,
448 [nir_instr_type_load_const] = ppir_emit_load_const,
449 [nir_instr_type_ssa_undef] = ppir_emit_ssa_undef,
450 [nir_instr_type_tex] = ppir_emit_tex,
451 [nir_instr_type_jump] = ppir_emit_jump,
452 };
453
454 static ppir_block *ppir_block_create(ppir_compiler *comp)
455 {
456 ppir_block *block = rzalloc(comp, ppir_block);
457 if (!block)
458 return NULL;
459
460 list_inithead(&block->node_list);
461 list_inithead(&block->instr_list);
462
463 return block;
464 }
465
466 static bool ppir_emit_block(ppir_compiler *comp, nir_block *nblock)
467 {
468 ppir_block *block = ppir_block_create(comp);
469 if (!block)
470 return false;
471
472 list_addtail(&block->list, &comp->block_list);
473 block->comp = comp;
474
475 nir_foreach_instr(instr, nblock) {
476 assert(instr->type < nir_instr_type_phi);
477 ppir_node *node = ppir_emit_instr[instr->type](block, instr);
478 if (!node)
479 return false;
480
481 list_addtail(&node->list, &block->node_list);
482 }
483
484 return true;
485 }
486
487 static bool ppir_emit_if(ppir_compiler *comp, nir_if *nif)
488 {
489 ppir_error("if nir_cf_node not support\n");
490 return false;
491 }
492
493 static bool ppir_emit_loop(ppir_compiler *comp, nir_loop *nloop)
494 {
495 ppir_error("loop nir_cf_node not support\n");
496 return false;
497 }
498
499 static bool ppir_emit_function(ppir_compiler *comp, nir_function_impl *nfunc)
500 {
501 ppir_error("function nir_cf_node not support\n");
502 return false;
503 }
504
505 static bool ppir_emit_cf_list(ppir_compiler *comp, struct exec_list *list)
506 {
507 foreach_list_typed(nir_cf_node, node, node, list) {
508 bool ret;
509
510 switch (node->type) {
511 case nir_cf_node_block:
512 ret = ppir_emit_block(comp, nir_cf_node_as_block(node));
513 break;
514 case nir_cf_node_if:
515 ret = ppir_emit_if(comp, nir_cf_node_as_if(node));
516 break;
517 case nir_cf_node_loop:
518 ret = ppir_emit_loop(comp, nir_cf_node_as_loop(node));
519 break;
520 case nir_cf_node_function:
521 ret = ppir_emit_function(comp, nir_cf_node_as_function(node));
522 break;
523 default:
524 ppir_error("unknown NIR node type %d\n", node->type);
525 return false;
526 }
527
528 if (!ret)
529 return false;
530 }
531
532 return true;
533 }
534
535 static ppir_compiler *ppir_compiler_create(void *prog, unsigned num_reg, unsigned num_ssa)
536 {
537 ppir_compiler *comp = rzalloc_size(
538 prog, sizeof(*comp) + ((num_reg << 2) + num_ssa) * sizeof(ppir_node *));
539 if (!comp)
540 return NULL;
541
542 list_inithead(&comp->block_list);
543 list_inithead(&comp->reg_list);
544
545 comp->var_nodes = (ppir_node **)(comp + 1);
546 comp->reg_base = num_ssa;
547 comp->prog = prog;
548 return comp;
549 }
550
551 static void ppir_add_ordering_deps(ppir_compiler *comp)
552 {
553 /* Some intrinsics do not have explicit dependencies and thus depend
554 * on instructions order. Consider discard_if and store_ouput as
555 * example. If we don't add fake dependency of discard_if to store_output
556 * scheduler may put store_output first and since store_output terminates
557 * shader on Utgard PP, rest of it will never be executed.
558 * Add fake dependencies for discard/branch/store to preserve
559 * instruction order.
560 *
561 * TODO: scheduler should schedule discard_if as early as possible otherwise
562 * we may end up with suboptimal code for cases like this:
563 *
564 * s3 = s1 < s2
565 * discard_if s3
566 * s4 = s1 + s2
567 * store s4
568 *
569 * In this case store depends on discard_if and s4, but since dependencies can
570 * be scheduled in any order it can result in code like this:
571 *
572 * instr1: s3 = s1 < s3
573 * instr2: s4 = s1 + s2
574 * instr3: discard_if s3
575 * instr4: store s4
576 */
577 list_for_each_entry(ppir_block, block, &comp->block_list, list) {
578 ppir_node *prev_node = NULL;
579 list_for_each_entry_rev(ppir_node, node, &block->node_list, list) {
580 if (prev_node && ppir_node_is_root(node) && node->op != ppir_op_const) {
581 ppir_node_add_dep(prev_node, node);
582 }
583 if (node->op == ppir_op_discard ||
584 node->op == ppir_op_store_color ||
585 node->op == ppir_op_store_temp ||
586 node->op == ppir_op_branch) {
587 prev_node = node;
588 }
589 }
590 }
591 }
592
593 static void ppir_print_shader_db(struct nir_shader *nir, ppir_compiler *comp,
594 struct pipe_debug_callback *debug)
595 {
596 const struct shader_info *info = &nir->info;
597 char *shaderdb;
598 int ret = asprintf(&shaderdb,
599 "%s shader: %d inst, %d loops, %d:%d spills:fills\n",
600 gl_shader_stage_name(info->stage),
601 comp->cur_instr_index,
602 comp->num_loops,
603 comp->num_spills,
604 comp->num_fills);
605 assert(ret >= 0);
606
607 if (lima_debug & LIMA_DEBUG_SHADERDB)
608 fprintf(stderr, "SHADER-DB: %s\n", shaderdb);
609
610 pipe_debug_message(debug, SHADER_INFO, "%s", shaderdb);
611 free(shaderdb);
612 }
613
614 static void ppir_add_write_after_read_deps(ppir_compiler *comp)
615 {
616 list_for_each_entry(ppir_block, block, &comp->block_list, list) {
617 list_for_each_entry(ppir_reg, reg, &comp->reg_list, list) {
618 ppir_node *write = NULL;
619 list_for_each_entry_rev(ppir_node, node, &block->node_list, list) {
620 for (int i = 0; i < ppir_node_get_src_num(node); i++) {
621 ppir_src *src = ppir_node_get_src(node, i);
622 if (src && src->type == ppir_target_register &&
623 src->reg == reg &&
624 write)
625 ppir_node_add_dep(write, node);
626 }
627 ppir_dest *dest = ppir_node_get_dest(node);
628 if (dest && dest->type == ppir_target_register &&
629 dest->reg == reg)
630 write = node;
631 }
632 }
633 }
634 }
635
636 bool ppir_compile_nir(struct lima_fs_shader_state *prog, struct nir_shader *nir,
637 struct ra_regs *ra,
638 struct pipe_debug_callback *debug)
639 {
640 nir_function_impl *func = nir_shader_get_entrypoint(nir);
641 ppir_compiler *comp = ppir_compiler_create(prog, func->reg_alloc, func->ssa_alloc);
642 if (!comp)
643 return false;
644
645 comp->ra = ra;
646
647 foreach_list_typed(nir_register, reg, node, &func->registers) {
648 ppir_reg *r = rzalloc(comp, ppir_reg);
649 if (!r)
650 return false;
651
652 r->index = reg->index;
653 r->num_components = reg->num_components;
654 r->live_in = INT_MAX;
655 r->live_out = 0;
656 r->is_head = false;
657 list_addtail(&r->list, &comp->reg_list);
658 }
659
660 if (!ppir_emit_cf_list(comp, &func->body))
661 goto err_out0;
662
663 /* If we have discard block add it to the very end */
664 if (comp->discard_block)
665 list_addtail(&comp->discard_block->list, &comp->block_list);
666
667 ppir_node_print_prog(comp);
668
669 if (!ppir_lower_prog(comp))
670 goto err_out0;
671
672 ppir_add_ordering_deps(comp);
673 ppir_add_write_after_read_deps(comp);
674
675 ppir_node_print_prog(comp);
676
677 if (!ppir_node_to_instr(comp))
678 goto err_out0;
679
680 if (!ppir_schedule_prog(comp))
681 goto err_out0;
682
683 if (!ppir_regalloc_prog(comp))
684 goto err_out0;
685
686 if (!ppir_codegen_prog(comp))
687 goto err_out0;
688
689 ppir_print_shader_db(nir, comp, debug);
690
691 ralloc_free(comp);
692 return true;
693
694 err_out0:
695 ralloc_free(comp);
696 return false;
697 }
698