lima/ppir: add ppir_node to ppir_src
[mesa.git] / src / gallium / drivers / lima / ir / pp / nir.c
1 /*
2 * Copyright (c) 2017 Lima Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
13 * of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 */
24
25 #include <string.h>
26
27 #include "util/ralloc.h"
28 #include "util/bitscan.h"
29 #include "compiler/nir/nir.h"
30 #include "pipe/p_state.h"
31
32
33 #include "ppir.h"
34
35 static void *ppir_node_create_ssa(ppir_block *block, ppir_op op, nir_ssa_def *ssa)
36 {
37 ppir_node *node = ppir_node_create(block, op, ssa->index, 0);
38 if (!node)
39 return NULL;
40
41 ppir_dest *dest = ppir_node_get_dest(node);
42 dest->type = ppir_target_ssa;
43 dest->ssa.num_components = ssa->num_components;
44 dest->ssa.live_in = INT_MAX;
45 dest->ssa.live_out = 0;
46 dest->write_mask = u_bit_consecutive(0, ssa->num_components);
47
48 if (node->type == ppir_node_type_load ||
49 node->type == ppir_node_type_store)
50 dest->ssa.is_head = true;
51
52 return node;
53 }
54
55 static void *ppir_node_create_reg(ppir_block *block, ppir_op op,
56 nir_reg_dest *reg, unsigned mask)
57 {
58 ppir_node *node = ppir_node_create(block, op, reg->reg->index, mask);
59 if (!node)
60 return NULL;
61
62 ppir_dest *dest = ppir_node_get_dest(node);
63
64 list_for_each_entry(ppir_reg, r, &block->comp->reg_list, list) {
65 if (r->index == reg->reg->index) {
66 dest->reg = r;
67 break;
68 }
69 }
70
71 dest->type = ppir_target_register;
72 dest->write_mask = mask;
73
74 if (node->type == ppir_node_type_load ||
75 node->type == ppir_node_type_store)
76 dest->reg->is_head = true;
77
78 return node;
79 }
80
81 static void *ppir_node_create_dest(ppir_block *block, ppir_op op,
82 nir_dest *dest, unsigned mask)
83 {
84 unsigned index = -1;
85
86 if (dest) {
87 if (dest->is_ssa)
88 return ppir_node_create_ssa(block, op, &dest->ssa);
89 else
90 return ppir_node_create_reg(block, op, &dest->reg, mask);
91 }
92
93 return ppir_node_create(block, op, index, 0);
94 }
95
96 static void ppir_node_add_src(ppir_compiler *comp, ppir_node *node,
97 ppir_src *ps, nir_src *ns, unsigned mask)
98 {
99 ppir_node *child = NULL;
100
101 if (ns->is_ssa) {
102 child = comp->var_nodes[ns->ssa->index];
103 ppir_node_add_dep(node, child);
104 }
105 else {
106 nir_register *reg = ns->reg.reg;
107 while (mask) {
108 int swizzle = ps->swizzle[u_bit_scan(&mask)];
109 child = comp->var_nodes[(reg->index << 2) + comp->reg_base + swizzle];
110 ppir_node_add_dep(node, child);
111 }
112 }
113
114 ppir_node_target_assign(ps, child);
115 }
116
117 static int nir_to_ppir_opcodes[nir_num_opcodes] = {
118 /* not supported */
119 [0 ... nir_last_opcode] = -1,
120
121 [nir_op_mov] = ppir_op_mov,
122 [nir_op_fmul] = ppir_op_mul,
123 [nir_op_fabs] = ppir_op_abs,
124 [nir_op_fneg] = ppir_op_neg,
125 [nir_op_fadd] = ppir_op_add,
126 [nir_op_fsum3] = ppir_op_sum3,
127 [nir_op_fsum4] = ppir_op_sum4,
128 [nir_op_frsq] = ppir_op_rsqrt,
129 [nir_op_flog2] = ppir_op_log2,
130 [nir_op_fexp2] = ppir_op_exp2,
131 [nir_op_fsqrt] = ppir_op_sqrt,
132 [nir_op_fsin] = ppir_op_sin,
133 [nir_op_fcos] = ppir_op_cos,
134 [nir_op_fmax] = ppir_op_max,
135 [nir_op_fmin] = ppir_op_min,
136 [nir_op_frcp] = ppir_op_rcp,
137 [nir_op_ffloor] = ppir_op_floor,
138 [nir_op_fceil] = ppir_op_ceil,
139 [nir_op_ffract] = ppir_op_fract,
140 [nir_op_sge] = ppir_op_ge,
141 [nir_op_fge] = ppir_op_ge,
142 [nir_op_slt] = ppir_op_lt,
143 [nir_op_flt] = ppir_op_lt,
144 [nir_op_seq] = ppir_op_eq,
145 [nir_op_feq] = ppir_op_eq,
146 [nir_op_sne] = ppir_op_ne,
147 [nir_op_fne] = ppir_op_ne,
148 [nir_op_fcsel] = ppir_op_select,
149 [nir_op_inot] = ppir_op_not,
150 [nir_op_ftrunc] = ppir_op_trunc,
151 [nir_op_fsat] = ppir_op_sat,
152 [nir_op_fddx] = ppir_op_ddx,
153 [nir_op_fddy] = ppir_op_ddy,
154 };
155
156 static ppir_node *ppir_emit_alu(ppir_block *block, nir_instr *ni)
157 {
158 nir_alu_instr *instr = nir_instr_as_alu(ni);
159 int op = nir_to_ppir_opcodes[instr->op];
160
161 if (op < 0) {
162 ppir_error("unsupported nir_op: %s\n", nir_op_infos[instr->op].name);
163 return NULL;
164 }
165
166 ppir_alu_node *node = ppir_node_create_dest(block, op, &instr->dest.dest,
167 instr->dest.write_mask);
168 if (!node)
169 return NULL;
170
171 ppir_dest *pd = &node->dest;
172 nir_alu_dest *nd = &instr->dest;
173 if (nd->saturate)
174 pd->modifier = ppir_outmod_clamp_fraction;
175
176 unsigned src_mask;
177 switch (op) {
178 case ppir_op_sum3:
179 src_mask = 0b0111;
180 break;
181 case ppir_op_sum4:
182 src_mask = 0b1111;
183 break;
184 default:
185 src_mask = pd->write_mask;
186 break;
187 }
188
189 unsigned num_child = nir_op_infos[instr->op].num_inputs;
190 node->num_src = num_child;
191
192 for (int i = 0; i < num_child; i++) {
193 nir_alu_src *ns = instr->src + i;
194 ppir_src *ps = node->src + i;
195 memcpy(ps->swizzle, ns->swizzle, sizeof(ps->swizzle));
196 ppir_node_add_src(block->comp, &node->node, ps, &ns->src, src_mask);
197
198 ps->absolute = ns->abs;
199 ps->negate = ns->negate;
200 }
201
202 return &node->node;
203 }
204
205 static ppir_block *ppir_block_create(ppir_compiler *comp);
206
207 static bool ppir_emit_discard_block(ppir_compiler *comp)
208 {
209 ppir_block *block = ppir_block_create(comp);
210 ppir_discard_node *discard;
211 if (!block)
212 return false;
213
214 comp->discard_block = block;
215 block->comp = comp;
216
217 discard = ppir_node_create(block, ppir_op_discard, -1, 0);
218 if (discard)
219 list_addtail(&discard->node.list, &block->node_list);
220 else
221 return false;
222
223 return true;
224 }
225
226 static ppir_node *ppir_emit_discard_if(ppir_block *block, nir_instr *ni)
227 {
228 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(ni);
229 ppir_node *node;
230 ppir_compiler *comp = block->comp;
231 ppir_branch_node *branch;
232
233 if (!comp->discard_block && !ppir_emit_discard_block(comp))
234 return NULL;
235
236 node = ppir_node_create(block, ppir_op_branch, -1, 0);
237 if (!node)
238 return NULL;
239 branch = ppir_node_to_branch(node);
240
241 /* second src and condition will be updated during lowering */
242 ppir_node_add_src(block->comp, node, &branch->src[0],
243 &instr->src[0], u_bit_consecutive(0, instr->num_components));
244 branch->target = comp->discard_block;
245
246 return node;
247 }
248
249 static ppir_node *ppir_emit_discard(ppir_block *block, nir_instr *ni)
250 {
251 ppir_node *node = ppir_node_create(block, ppir_op_discard, -1, 0);
252
253 return node;
254 }
255
256 static ppir_node *ppir_emit_intrinsic(ppir_block *block, nir_instr *ni)
257 {
258 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(ni);
259 unsigned mask = 0;
260 ppir_load_node *lnode;
261 ppir_store_node *snode;
262
263 switch (instr->intrinsic) {
264 case nir_intrinsic_load_input:
265 if (!instr->dest.is_ssa)
266 mask = u_bit_consecutive(0, instr->num_components);
267
268 lnode = ppir_node_create_dest(block, ppir_op_load_varying, &instr->dest, mask);
269 if (!lnode)
270 return NULL;
271
272 lnode->num_components = instr->num_components;
273 lnode->index = nir_intrinsic_base(instr) * 4 + nir_intrinsic_component(instr);
274 return &lnode->node;
275
276 case nir_intrinsic_load_frag_coord:
277 case nir_intrinsic_load_point_coord:
278 case nir_intrinsic_load_front_face:
279 if (!instr->dest.is_ssa)
280 mask = u_bit_consecutive(0, instr->num_components);
281
282 ppir_op op;
283 switch (instr->intrinsic) {
284 case nir_intrinsic_load_frag_coord:
285 op = ppir_op_load_fragcoord;
286 break;
287 case nir_intrinsic_load_point_coord:
288 op = ppir_op_load_pointcoord;
289 break;
290 case nir_intrinsic_load_front_face:
291 op = ppir_op_load_frontface;
292 break;
293 default:
294 assert(0);
295 break;
296 }
297
298 lnode = ppir_node_create_dest(block, op, &instr->dest, mask);
299 if (!lnode)
300 return NULL;
301
302 lnode->num_components = instr->num_components;
303 return &lnode->node;
304
305 case nir_intrinsic_load_uniform:
306 if (!instr->dest.is_ssa)
307 mask = u_bit_consecutive(0, instr->num_components);
308
309 lnode = ppir_node_create_dest(block, ppir_op_load_uniform, &instr->dest, mask);
310 if (!lnode)
311 return NULL;
312
313 lnode->num_components = instr->num_components;
314 lnode->index = nir_intrinsic_base(instr);
315 lnode->index += (uint32_t)nir_src_as_float(instr->src[0]);
316
317 return &lnode->node;
318
319 case nir_intrinsic_store_output:
320 snode = ppir_node_create_dest(block, ppir_op_store_color, NULL, 0);
321 if (!snode)
322 return NULL;
323
324 snode->index = nir_intrinsic_base(instr);
325
326 for (int i = 0; i < instr->num_components; i++)
327 snode->src.swizzle[i] = i;
328
329 ppir_node_add_src(block->comp, &snode->node, &snode->src, instr->src,
330 u_bit_consecutive(0, instr->num_components));
331
332 return &snode->node;
333
334 case nir_intrinsic_discard:
335 return ppir_emit_discard(block, ni);
336
337 case nir_intrinsic_discard_if:
338 return ppir_emit_discard_if(block, ni);
339
340 default:
341 ppir_error("unsupported nir_intrinsic_instr %s\n",
342 nir_intrinsic_infos[instr->intrinsic].name);
343 return NULL;
344 }
345 }
346
347 static ppir_node *ppir_emit_load_const(ppir_block *block, nir_instr *ni)
348 {
349 nir_load_const_instr *instr = nir_instr_as_load_const(ni);
350 ppir_const_node *node = ppir_node_create_ssa(block, ppir_op_const, &instr->def);
351 if (!node)
352 return NULL;
353
354 assert(instr->def.bit_size == 32);
355
356 for (int i = 0; i < instr->def.num_components; i++)
357 node->constant.value[i].i = instr->value[i].i32;
358 node->constant.num = instr->def.num_components;
359
360 return &node->node;
361 }
362
363 static ppir_node *ppir_emit_ssa_undef(ppir_block *block, nir_instr *ni)
364 {
365 ppir_error("nir_ssa_undef_instr not support\n");
366 return NULL;
367 }
368
369 static ppir_node *ppir_emit_tex(ppir_block *block, nir_instr *ni)
370 {
371 nir_tex_instr *instr = nir_instr_as_tex(ni);
372 ppir_load_texture_node *node;
373
374 if (instr->op != nir_texop_tex) {
375 ppir_error("unsupported texop %d\n", instr->op);
376 return NULL;
377 }
378
379 node = ppir_node_create_dest(block, ppir_op_load_texture, &instr->dest, 0);
380 if (!node)
381 return NULL;
382
383 node->sampler = instr->texture_index;
384
385 switch (instr->sampler_dim) {
386 case GLSL_SAMPLER_DIM_2D:
387 case GLSL_SAMPLER_DIM_RECT:
388 case GLSL_SAMPLER_DIM_EXTERNAL:
389 break;
390 default:
391 ppir_error("unsupported sampler dim: %d\n", instr->sampler_dim);
392 return NULL;
393 }
394
395 node->sampler_dim = instr->sampler_dim;
396
397 for (int i = 0; i < instr->coord_components; i++)
398 node->src_coords.swizzle[i] = i;
399
400 for (int i = 0; i < instr->num_srcs; i++) {
401 switch (instr->src[i].src_type) {
402 case nir_tex_src_coord:
403 ppir_node_add_src(block->comp, &node->node, &node->src_coords, &instr->src[i].src,
404 u_bit_consecutive(0, instr->coord_components));
405 break;
406 default:
407 ppir_error("unsupported texture source type\n");
408 assert(0);
409 return NULL;
410 }
411 }
412
413 return &node->node;
414 }
415
416 static ppir_node *ppir_emit_jump(ppir_block *block, nir_instr *ni)
417 {
418 ppir_error("nir_jump_instr not support\n");
419 return NULL;
420 }
421
422 static ppir_node *(*ppir_emit_instr[nir_instr_type_phi])(ppir_block *, nir_instr *) = {
423 [nir_instr_type_alu] = ppir_emit_alu,
424 [nir_instr_type_intrinsic] = ppir_emit_intrinsic,
425 [nir_instr_type_load_const] = ppir_emit_load_const,
426 [nir_instr_type_ssa_undef] = ppir_emit_ssa_undef,
427 [nir_instr_type_tex] = ppir_emit_tex,
428 [nir_instr_type_jump] = ppir_emit_jump,
429 };
430
431 static ppir_block *ppir_block_create(ppir_compiler *comp)
432 {
433 ppir_block *block = rzalloc(comp, ppir_block);
434 if (!block)
435 return NULL;
436
437 list_inithead(&block->node_list);
438 list_inithead(&block->instr_list);
439
440 return block;
441 }
442
443 static bool ppir_emit_block(ppir_compiler *comp, nir_block *nblock)
444 {
445 ppir_block *block = ppir_block_create(comp);
446 if (!block)
447 return false;
448
449 list_addtail(&block->list, &comp->block_list);
450 block->comp = comp;
451
452 nir_foreach_instr(instr, nblock) {
453 assert(instr->type < nir_instr_type_phi);
454 ppir_node *node = ppir_emit_instr[instr->type](block, instr);
455 if (!node)
456 return false;
457
458 list_addtail(&node->list, &block->node_list);
459 }
460
461 return true;
462 }
463
464 static bool ppir_emit_if(ppir_compiler *comp, nir_if *nif)
465 {
466 ppir_error("if nir_cf_node not support\n");
467 return false;
468 }
469
470 static bool ppir_emit_loop(ppir_compiler *comp, nir_loop *nloop)
471 {
472 ppir_error("loop nir_cf_node not support\n");
473 return false;
474 }
475
476 static bool ppir_emit_function(ppir_compiler *comp, nir_function_impl *nfunc)
477 {
478 ppir_error("function nir_cf_node not support\n");
479 return false;
480 }
481
482 static bool ppir_emit_cf_list(ppir_compiler *comp, struct exec_list *list)
483 {
484 foreach_list_typed(nir_cf_node, node, node, list) {
485 bool ret;
486
487 switch (node->type) {
488 case nir_cf_node_block:
489 ret = ppir_emit_block(comp, nir_cf_node_as_block(node));
490 break;
491 case nir_cf_node_if:
492 ret = ppir_emit_if(comp, nir_cf_node_as_if(node));
493 break;
494 case nir_cf_node_loop:
495 ret = ppir_emit_loop(comp, nir_cf_node_as_loop(node));
496 break;
497 case nir_cf_node_function:
498 ret = ppir_emit_function(comp, nir_cf_node_as_function(node));
499 break;
500 default:
501 ppir_error("unknown NIR node type %d\n", node->type);
502 return false;
503 }
504
505 if (!ret)
506 return false;
507 }
508
509 return true;
510 }
511
512 static ppir_compiler *ppir_compiler_create(void *prog, unsigned num_reg, unsigned num_ssa)
513 {
514 ppir_compiler *comp = rzalloc_size(
515 prog, sizeof(*comp) + ((num_reg << 2) + num_ssa) * sizeof(ppir_node *));
516 if (!comp)
517 return NULL;
518
519 list_inithead(&comp->block_list);
520 list_inithead(&comp->reg_list);
521
522 comp->var_nodes = (ppir_node **)(comp + 1);
523 comp->reg_base = num_ssa;
524 comp->prog = prog;
525 return comp;
526 }
527
528 static void ppir_add_ordering_deps(ppir_compiler *comp)
529 {
530 /* Some intrinsics do not have explicit dependencies and thus depend
531 * on instructions order. Consider discard_if and store_ouput as
532 * example. If we don't add fake dependency of discard_if to store_output
533 * scheduler may put store_output first and since store_output terminates
534 * shader on Utgard PP, rest of it will never be executed.
535 * Add fake dependencies for discard/branch/store to preserve
536 * instruction order.
537 *
538 * TODO: scheduler should schedule discard_if as early as possible otherwise
539 * we may end up with suboptimal code for cases like this:
540 *
541 * s3 = s1 < s2
542 * discard_if s3
543 * s4 = s1 + s2
544 * store s4
545 *
546 * In this case store depends on discard_if and s4, but since dependencies can
547 * be scheduled in any order it can result in code like this:
548 *
549 * instr1: s3 = s1 < s3
550 * instr2: s4 = s1 + s2
551 * instr3: discard_if s3
552 * instr4: store s4
553 */
554 list_for_each_entry(ppir_block, block, &comp->block_list, list) {
555 ppir_node *prev_node = NULL;
556 list_for_each_entry(ppir_node, node, &block->node_list, list) {
557 if (node->type == ppir_node_type_discard ||
558 node->type == ppir_node_type_store ||
559 node->type == ppir_node_type_branch) {
560 if (prev_node)
561 ppir_node_add_dep(node, prev_node);
562 prev_node = node;
563 }
564 }
565 }
566 }
567
568 static void ppir_print_shader_db(struct nir_shader *nir, ppir_compiler *comp,
569 struct pipe_debug_callback *debug)
570 {
571 const struct shader_info *info = &nir->info;
572 char *shaderdb;
573 int ret = asprintf(&shaderdb,
574 "%s shader: %d inst, %d loops, %d:%d spills:fills\n",
575 gl_shader_stage_name(info->stage),
576 comp->cur_instr_index,
577 comp->num_loops,
578 comp->num_spills,
579 comp->num_fills);
580 assert(ret >= 0);
581
582 if (lima_debug & LIMA_DEBUG_SHADERDB)
583 fprintf(stderr, "SHADER-DB: %s\n", shaderdb);
584
585 pipe_debug_message(debug, SHADER_INFO, "%s", shaderdb);
586 free(shaderdb);
587 }
588
589 bool ppir_compile_nir(struct lima_fs_shader_state *prog, struct nir_shader *nir,
590 struct ra_regs *ra,
591 struct pipe_debug_callback *debug)
592 {
593 nir_function_impl *func = nir_shader_get_entrypoint(nir);
594 ppir_compiler *comp = ppir_compiler_create(prog, func->reg_alloc, func->ssa_alloc);
595 if (!comp)
596 return false;
597
598 comp->ra = ra;
599
600 foreach_list_typed(nir_register, reg, node, &func->registers) {
601 ppir_reg *r = rzalloc(comp, ppir_reg);
602 if (!r)
603 return false;
604
605 r->index = reg->index;
606 r->num_components = reg->num_components;
607 r->live_in = INT_MAX;
608 r->live_out = 0;
609 r->is_head = false;
610 list_addtail(&r->list, &comp->reg_list);
611 }
612
613 if (!ppir_emit_cf_list(comp, &func->body))
614 goto err_out0;
615
616 /* If we have discard block add it to the very end */
617 if (comp->discard_block)
618 list_addtail(&comp->discard_block->list, &comp->block_list);
619
620 ppir_add_ordering_deps(comp);
621
622 ppir_node_print_prog(comp);
623
624 if (!ppir_lower_prog(comp))
625 goto err_out0;
626
627 if (!ppir_node_to_instr(comp))
628 goto err_out0;
629
630 if (!ppir_schedule_prog(comp))
631 goto err_out0;
632
633 if (!ppir_regalloc_prog(comp))
634 goto err_out0;
635
636 if (!ppir_codegen_prog(comp))
637 goto err_out0;
638
639 ppir_print_shader_db(nir, comp, debug);
640
641 ralloc_free(comp);
642 return true;
643
644 err_out0:
645 ralloc_free(comp);
646 return false;
647 }
648