lima/ppir: Add fsat op
[mesa.git] / src / gallium / drivers / lima / ir / pp / nir.c
1 /*
2 * Copyright (c) 2017 Lima Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
13 * of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 */
24
25 #include <string.h>
26
27 #include "util/ralloc.h"
28 #include "util/bitscan.h"
29 #include "compiler/nir/nir.h"
30
31 #include "ppir.h"
32
33 static void *ppir_node_create_ssa(ppir_block *block, ppir_op op, nir_ssa_def *ssa)
34 {
35 ppir_node *node = ppir_node_create(block, op, ssa->index, 0);
36 if (!node)
37 return NULL;
38
39 ppir_dest *dest = ppir_node_get_dest(node);
40 dest->type = ppir_target_ssa;
41 dest->ssa.num_components = ssa->num_components;
42 dest->ssa.live_in = INT_MAX;
43 dest->ssa.live_out = 0;
44 dest->write_mask = u_bit_consecutive(0, ssa->num_components);
45
46 if (node->type == ppir_node_type_load ||
47 node->type == ppir_node_type_store)
48 dest->ssa.is_head = true;
49
50 return node;
51 }
52
53 static void *ppir_node_create_reg(ppir_block *block, ppir_op op,
54 nir_reg_dest *reg, unsigned mask)
55 {
56 ppir_node *node = ppir_node_create(block, op, reg->reg->index, mask);
57 if (!node)
58 return NULL;
59
60 ppir_dest *dest = ppir_node_get_dest(node);
61
62 list_for_each_entry(ppir_reg, r, &block->comp->reg_list, list) {
63 if (r->index == reg->reg->index) {
64 dest->reg = r;
65 break;
66 }
67 }
68
69 dest->type = ppir_target_register;
70 dest->write_mask = mask;
71
72 if (node->type == ppir_node_type_load ||
73 node->type == ppir_node_type_store)
74 dest->reg->is_head = true;
75
76 return node;
77 }
78
79 static void *ppir_node_create_dest(ppir_block *block, ppir_op op,
80 nir_dest *dest, unsigned mask)
81 {
82 unsigned index = -1;
83
84 if (dest) {
85 if (dest->is_ssa)
86 return ppir_node_create_ssa(block, op, &dest->ssa);
87 else
88 return ppir_node_create_reg(block, op, &dest->reg, mask);
89 }
90
91 return ppir_node_create(block, op, index, 0);
92 }
93
94 static void ppir_node_add_src(ppir_compiler *comp, ppir_node *node,
95 ppir_src *ps, nir_src *ns, unsigned mask)
96 {
97 ppir_node *child = NULL;
98
99 if (ns->is_ssa) {
100 child = comp->var_nodes[ns->ssa->index];
101 ppir_node_add_dep(node, child);
102 }
103 else {
104 nir_register *reg = ns->reg.reg;
105 while (mask) {
106 int swizzle = ps->swizzle[u_bit_scan(&mask)];
107 child = comp->var_nodes[(reg->index << 2) + comp->reg_base + swizzle];
108 ppir_node_add_dep(node, child);
109 }
110 }
111
112 ppir_dest *dest = ppir_node_get_dest(child);
113 ppir_node_target_assign(ps, dest);
114 }
115
116 static int nir_to_ppir_opcodes[nir_num_opcodes] = {
117 /* not supported */
118 [0 ... nir_last_opcode] = -1,
119
120 [nir_op_mov] = ppir_op_mov,
121 [nir_op_fmul] = ppir_op_mul,
122 [nir_op_fabs] = ppir_op_abs,
123 [nir_op_fneg] = ppir_op_neg,
124 [nir_op_fadd] = ppir_op_add,
125 [nir_op_fdot2] = ppir_op_dot2,
126 [nir_op_fdot3] = ppir_op_dot3,
127 [nir_op_fdot4] = ppir_op_dot4,
128 [nir_op_frsq] = ppir_op_rsqrt,
129 [nir_op_flog2] = ppir_op_log2,
130 [nir_op_fexp2] = ppir_op_exp2,
131 [nir_op_fsqrt] = ppir_op_sqrt,
132 [nir_op_fsin] = ppir_op_sin,
133 [nir_op_fcos] = ppir_op_cos,
134 [nir_op_fmax] = ppir_op_max,
135 [nir_op_fmin] = ppir_op_min,
136 [nir_op_frcp] = ppir_op_rcp,
137 [nir_op_ffloor] = ppir_op_floor,
138 [nir_op_fceil] = ppir_op_ceil,
139 [nir_op_ffract] = ppir_op_fract,
140 [nir_op_fand] = ppir_op_and,
141 [nir_op_for] = ppir_op_or,
142 [nir_op_fxor] = ppir_op_xor,
143 [nir_op_sge] = ppir_op_ge,
144 [nir_op_fge] = ppir_op_ge,
145 [nir_op_slt] = ppir_op_lt,
146 [nir_op_flt] = ppir_op_lt,
147 [nir_op_seq] = ppir_op_eq,
148 [nir_op_feq] = ppir_op_eq,
149 [nir_op_sne] = ppir_op_ne,
150 [nir_op_fne] = ppir_op_ne,
151 [nir_op_fnot] = ppir_op_not,
152 [nir_op_fcsel] = ppir_op_select,
153 [nir_op_inot] = ppir_op_not,
154 [nir_op_ftrunc] = ppir_op_trunc,
155 [nir_op_fsat] = ppir_op_sat,
156 };
157
158 static ppir_node *ppir_emit_alu(ppir_block *block, nir_instr *ni)
159 {
160 nir_alu_instr *instr = nir_instr_as_alu(ni);
161 int op = nir_to_ppir_opcodes[instr->op];
162
163 if (op < 0) {
164 ppir_error("unsupported nir_op: %s\n", nir_op_infos[instr->op].name);
165 return NULL;
166 }
167
168 ppir_alu_node *node = ppir_node_create_dest(block, op, &instr->dest.dest,
169 instr->dest.write_mask);
170 if (!node)
171 return NULL;
172
173 ppir_dest *pd = &node->dest;
174 nir_alu_dest *nd = &instr->dest;
175 if (nd->saturate)
176 pd->modifier = ppir_outmod_clamp_fraction;
177
178 unsigned src_mask;
179 switch (op) {
180 case ppir_op_dot2:
181 src_mask = 0b0011;
182 break;
183 case ppir_op_dot3:
184 src_mask = 0b0111;
185 break;
186 case ppir_op_dot4:
187 src_mask = 0b1111;
188 break;
189 default:
190 src_mask = pd->write_mask;
191 break;
192 }
193
194 unsigned num_child = nir_op_infos[instr->op].num_inputs;
195 node->num_src = num_child;
196
197 for (int i = 0; i < num_child; i++) {
198 nir_alu_src *ns = instr->src + i;
199 ppir_src *ps = node->src + i;
200 memcpy(ps->swizzle, ns->swizzle, sizeof(ps->swizzle));
201 ppir_node_add_src(block->comp, &node->node, ps, &ns->src, src_mask);
202
203 ps->absolute = ns->abs;
204 ps->negate = ns->negate;
205 }
206
207 return &node->node;
208 }
209
210 static ppir_block *ppir_block_create(ppir_compiler *comp);
211
212 static bool ppir_emit_discard_block(ppir_compiler *comp)
213 {
214 ppir_block *block = ppir_block_create(comp);
215 ppir_discard_node *discard;
216 if (!block)
217 return false;
218
219 comp->discard_block = block;
220 block->comp = comp;
221
222 discard = ppir_node_create(block, ppir_op_discard, -1, 0);
223 if (discard)
224 list_addtail(&discard->node.list, &block->node_list);
225 else
226 return false;
227
228 return true;
229 }
230
231 static ppir_node *ppir_emit_discard_if(ppir_block *block, nir_instr *ni)
232 {
233 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(ni);
234 ppir_node *node;
235 ppir_compiler *comp = block->comp;
236 ppir_branch_node *branch;
237
238 if (!comp->discard_block && !ppir_emit_discard_block(comp))
239 return NULL;
240
241 node = ppir_node_create(block, ppir_op_branch, -1, 0);
242 if (!node)
243 return NULL;
244 branch = ppir_node_to_branch(node);
245
246 /* second src and condition will be updated during lowering */
247 ppir_node_add_src(block->comp, node, &branch->src[0],
248 &instr->src[0], u_bit_consecutive(0, instr->num_components));
249 branch->target = comp->discard_block;
250
251 return node;
252 }
253
254 static ppir_node *ppir_emit_discard(ppir_block *block, nir_instr *ni)
255 {
256 ppir_node *node = ppir_node_create(block, ppir_op_discard, -1, 0);
257
258 return node;
259 }
260
261 static ppir_node *ppir_emit_intrinsic(ppir_block *block, nir_instr *ni)
262 {
263 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(ni);
264 unsigned mask = 0;
265 ppir_load_node *lnode;
266 ppir_store_node *snode;
267
268 switch (instr->intrinsic) {
269 case nir_intrinsic_load_input:
270 if (!instr->dest.is_ssa)
271 mask = u_bit_consecutive(0, instr->num_components);
272
273 lnode = ppir_node_create_dest(block, ppir_op_load_varying, &instr->dest, mask);
274 if (!lnode)
275 return NULL;
276
277 lnode->num_components = instr->num_components;
278 lnode->index = nir_intrinsic_base(instr) * 4 + nir_intrinsic_component(instr);
279 return &lnode->node;
280
281 case nir_intrinsic_load_frag_coord:
282 if (!instr->dest.is_ssa)
283 mask = u_bit_consecutive(0, instr->num_components);
284
285 lnode = ppir_node_create_dest(block, ppir_op_load_fragcoord, &instr->dest, mask);
286 if (!lnode)
287 return NULL;
288
289 lnode->num_components = instr->num_components;
290 return &lnode->node;
291
292 case nir_intrinsic_load_uniform:
293 if (!instr->dest.is_ssa)
294 mask = u_bit_consecutive(0, instr->num_components);
295
296 lnode = ppir_node_create_dest(block, ppir_op_load_uniform, &instr->dest, mask);
297 if (!lnode)
298 return NULL;
299
300 lnode->num_components = instr->num_components;
301 lnode->index = nir_intrinsic_base(instr);
302 lnode->index += (uint32_t)nir_src_as_float(instr->src[0]);
303
304 return &lnode->node;
305
306 case nir_intrinsic_store_output:
307 snode = ppir_node_create_dest(block, ppir_op_store_color, NULL, 0);
308 if (!snode)
309 return NULL;
310
311 snode->index = nir_intrinsic_base(instr);
312
313 for (int i = 0; i < instr->num_components; i++)
314 snode->src.swizzle[i] = i;
315
316 ppir_node_add_src(block->comp, &snode->node, &snode->src, instr->src,
317 u_bit_consecutive(0, instr->num_components));
318
319 return &snode->node;
320
321 case nir_intrinsic_discard:
322 return ppir_emit_discard(block, ni);
323
324 case nir_intrinsic_discard_if:
325 return ppir_emit_discard_if(block, ni);
326
327 default:
328 ppir_error("unsupported nir_intrinsic_instr %s\n",
329 nir_intrinsic_infos[instr->intrinsic].name);
330 return NULL;
331 }
332 }
333
334 static ppir_node *ppir_emit_load_const(ppir_block *block, nir_instr *ni)
335 {
336 nir_load_const_instr *instr = nir_instr_as_load_const(ni);
337 ppir_const_node *node = ppir_node_create_ssa(block, ppir_op_const, &instr->def);
338 if (!node)
339 return NULL;
340
341 assert(instr->def.bit_size == 32);
342
343 for (int i = 0; i < instr->def.num_components; i++)
344 node->constant.value[i].i = instr->value[i].i32;
345 node->constant.num = instr->def.num_components;
346
347 return &node->node;
348 }
349
350 static ppir_node *ppir_emit_ssa_undef(ppir_block *block, nir_instr *ni)
351 {
352 ppir_error("nir_ssa_undef_instr not support\n");
353 return NULL;
354 }
355
356 static ppir_node *ppir_emit_tex(ppir_block *block, nir_instr *ni)
357 {
358 nir_tex_instr *instr = nir_instr_as_tex(ni);
359 ppir_load_texture_node *node;
360
361 if (instr->op != nir_texop_tex) {
362 ppir_error("unsupported texop %d\n", instr->op);
363 return NULL;
364 }
365
366 node = ppir_node_create_dest(block, ppir_op_load_texture, &instr->dest, 0);
367 if (!node)
368 return NULL;
369
370 node->sampler = instr->texture_index;
371
372 switch (instr->sampler_dim) {
373 case GLSL_SAMPLER_DIM_2D:
374 case GLSL_SAMPLER_DIM_RECT:
375 case GLSL_SAMPLER_DIM_EXTERNAL:
376 break;
377 default:
378 ppir_debug("unsupported sampler dim: %d\n", instr->sampler_dim);
379 return NULL;
380 }
381
382 node->sampler_dim = instr->sampler_dim;
383
384 for (int i = 0; i < instr->coord_components; i++)
385 node->src_coords.swizzle[i] = i;
386
387 assert(instr->num_srcs == 1);
388 for (int i = 0; i < instr->num_srcs; i++) {
389 switch (instr->src[i].src_type) {
390 case nir_tex_src_coord:
391 ppir_node_add_src(block->comp, &node->node, &node->src_coords, &instr->src[i].src,
392 u_bit_consecutive(0, instr->coord_components));
393 break;
394 default:
395 ppir_debug("unknown texture source");
396 return NULL;
397 }
398 }
399
400 return &node->node;
401 }
402
403 static ppir_node *ppir_emit_jump(ppir_block *block, nir_instr *ni)
404 {
405 ppir_error("nir_jump_instr not support\n");
406 return NULL;
407 }
408
409 static ppir_node *(*ppir_emit_instr[nir_instr_type_phi])(ppir_block *, nir_instr *) = {
410 [nir_instr_type_alu] = ppir_emit_alu,
411 [nir_instr_type_intrinsic] = ppir_emit_intrinsic,
412 [nir_instr_type_load_const] = ppir_emit_load_const,
413 [nir_instr_type_ssa_undef] = ppir_emit_ssa_undef,
414 [nir_instr_type_tex] = ppir_emit_tex,
415 [nir_instr_type_jump] = ppir_emit_jump,
416 };
417
418 static ppir_block *ppir_block_create(ppir_compiler *comp)
419 {
420 ppir_block *block = rzalloc(comp, ppir_block);
421 if (!block)
422 return NULL;
423
424 list_inithead(&block->node_list);
425 list_inithead(&block->instr_list);
426
427 return block;
428 }
429
430 static bool ppir_emit_block(ppir_compiler *comp, nir_block *nblock)
431 {
432 ppir_block *block = ppir_block_create(comp);
433 if (!block)
434 return false;
435
436 list_addtail(&block->list, &comp->block_list);
437 block->comp = comp;
438
439 nir_foreach_instr(instr, nblock) {
440 assert(instr->type < nir_instr_type_phi);
441 ppir_node *node = ppir_emit_instr[instr->type](block, instr);
442 if (!node)
443 return false;
444
445 list_addtail(&node->list, &block->node_list);
446 }
447
448 return true;
449 }
450
451 static bool ppir_emit_if(ppir_compiler *comp, nir_if *nif)
452 {
453 ppir_error("if nir_cf_node not support\n");
454 return false;
455 }
456
457 static bool ppir_emit_loop(ppir_compiler *comp, nir_loop *nloop)
458 {
459 ppir_error("loop nir_cf_node not support\n");
460 return false;
461 }
462
463 static bool ppir_emit_function(ppir_compiler *comp, nir_function_impl *nfunc)
464 {
465 ppir_error("function nir_cf_node not support\n");
466 return false;
467 }
468
469 static bool ppir_emit_cf_list(ppir_compiler *comp, struct exec_list *list)
470 {
471 foreach_list_typed(nir_cf_node, node, node, list) {
472 bool ret;
473
474 switch (node->type) {
475 case nir_cf_node_block:
476 ret = ppir_emit_block(comp, nir_cf_node_as_block(node));
477 break;
478 case nir_cf_node_if:
479 ret = ppir_emit_if(comp, nir_cf_node_as_if(node));
480 break;
481 case nir_cf_node_loop:
482 ret = ppir_emit_loop(comp, nir_cf_node_as_loop(node));
483 break;
484 case nir_cf_node_function:
485 ret = ppir_emit_function(comp, nir_cf_node_as_function(node));
486 break;
487 default:
488 ppir_error("unknown NIR node type %d\n", node->type);
489 return false;
490 }
491
492 if (!ret)
493 return false;
494 }
495
496 return true;
497 }
498
499 static ppir_compiler *ppir_compiler_create(void *prog, unsigned num_reg, unsigned num_ssa)
500 {
501 ppir_compiler *comp = rzalloc_size(
502 prog, sizeof(*comp) + ((num_reg << 2) + num_ssa) * sizeof(ppir_node *));
503 if (!comp)
504 return NULL;
505
506 list_inithead(&comp->block_list);
507 list_inithead(&comp->reg_list);
508
509 comp->var_nodes = (ppir_node **)(comp + 1);
510 comp->reg_base = num_ssa;
511 comp->prog = prog;
512 return comp;
513 }
514
515 static void ppir_add_ordering_deps(ppir_compiler *comp)
516 {
517 /* Some intrinsics do not have explicit dependencies and thus depend
518 * on instructions order. Consider discard_if and store_ouput as
519 * example. If we don't add fake dependency of discard_if to store_output
520 * scheduler may put store_output first and since store_output terminates
521 * shader on Utgard PP, rest of it will never be executed.
522 * Add fake dependencies for discard/branch/store to preserve
523 * instruction order.
524 *
525 * TODO: scheduler should schedule discard_if as early as possible otherwise
526 * we may end up with suboptimal code for cases like this:
527 *
528 * s3 = s1 < s2
529 * discard_if s3
530 * s4 = s1 + s2
531 * store s4
532 *
533 * In this case store depends on discard_if and s4, but since dependencies can
534 * be scheduled in any order it can result in code like this:
535 *
536 * instr1: s3 = s1 < s3
537 * instr2: s4 = s1 + s2
538 * instr3: discard_if s3
539 * instr4: store s4
540 */
541 list_for_each_entry(ppir_block, block, &comp->block_list, list) {
542 ppir_node *prev_node = NULL;
543 list_for_each_entry(ppir_node, node, &block->node_list, list) {
544 if (node->type == ppir_node_type_discard ||
545 node->type == ppir_node_type_store ||
546 node->type == ppir_node_type_branch) {
547 if (prev_node)
548 ppir_node_add_dep(node, prev_node);
549 prev_node = node;
550 }
551 }
552 }
553 }
554
555 bool ppir_compile_nir(struct lima_fs_shader_state *prog, struct nir_shader *nir,
556 struct ra_regs *ra)
557 {
558 nir_function_impl *func = nir_shader_get_entrypoint(nir);
559 ppir_compiler *comp = ppir_compiler_create(prog, func->reg_alloc, func->ssa_alloc);
560 if (!comp)
561 return false;
562
563 comp->ra = ra;
564
565 foreach_list_typed(nir_register, reg, node, &func->registers) {
566 ppir_reg *r = rzalloc(comp, ppir_reg);
567 if (!r)
568 return false;
569
570 r->index = reg->index;
571 r->num_components = reg->num_components;
572 r->live_in = INT_MAX;
573 r->live_out = 0;
574 r->is_head = false;
575 list_addtail(&r->list, &comp->reg_list);
576 }
577
578 if (!ppir_emit_cf_list(comp, &func->body))
579 goto err_out0;
580
581 /* If we have discard block add it to the very end */
582 if (comp->discard_block)
583 list_addtail(&comp->discard_block->list, &comp->block_list);
584
585 ppir_add_ordering_deps(comp);
586
587 ppir_node_print_prog(comp);
588
589 if (!ppir_lower_prog(comp))
590 goto err_out0;
591
592 if (!ppir_node_to_instr(comp))
593 goto err_out0;
594
595 if (!ppir_schedule_prog(comp))
596 goto err_out0;
597
598 if (!ppir_regalloc_prog(comp))
599 goto err_out0;
600
601 if (!ppir_codegen_prog(comp))
602 goto err_out0;
603
604 ralloc_free(comp);
605 return true;
606
607 err_out0:
608 ralloc_free(comp);
609 return false;
610 }
611