2 * Copyright (c) 2017 Lima Project
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
27 #include "util/ralloc.h"
28 #include "util/bitscan.h"
29 #include "compiler/nir/nir.h"
33 static void *ppir_node_create_ssa(ppir_block
*block
, ppir_op op
, nir_ssa_def
*ssa
)
35 ppir_node
*node
= ppir_node_create(block
, op
, ssa
->index
, 0);
39 ppir_dest
*dest
= ppir_node_get_dest(node
);
40 dest
->type
= ppir_target_ssa
;
41 dest
->ssa
.num_components
= ssa
->num_components
;
42 dest
->ssa
.live_in
= INT_MAX
;
43 dest
->ssa
.live_out
= 0;
44 dest
->write_mask
= u_bit_consecutive(0, ssa
->num_components
);
46 if (node
->type
== ppir_node_type_load
||
47 node
->type
== ppir_node_type_store
)
48 dest
->ssa
.is_head
= true;
53 static void *ppir_node_create_reg(ppir_block
*block
, ppir_op op
,
54 nir_reg_dest
*reg
, unsigned mask
)
56 ppir_node
*node
= ppir_node_create(block
, op
, reg
->reg
->index
, mask
);
60 ppir_dest
*dest
= ppir_node_get_dest(node
);
62 list_for_each_entry(ppir_reg
, r
, &block
->comp
->reg_list
, list
) {
63 if (r
->index
== reg
->reg
->index
) {
69 dest
->type
= ppir_target_register
;
70 dest
->write_mask
= mask
;
72 if (node
->type
== ppir_node_type_load
||
73 node
->type
== ppir_node_type_store
)
74 dest
->reg
->is_head
= true;
79 static void *ppir_node_create_dest(ppir_block
*block
, ppir_op op
,
80 nir_dest
*dest
, unsigned mask
)
86 return ppir_node_create_ssa(block
, op
, &dest
->ssa
);
88 return ppir_node_create_reg(block
, op
, &dest
->reg
, mask
);
91 return ppir_node_create(block
, op
, index
, 0);
94 static void ppir_node_add_src(ppir_compiler
*comp
, ppir_node
*node
,
95 ppir_src
*ps
, nir_src
*ns
, unsigned mask
)
97 ppir_node
*child
= NULL
;
100 child
= comp
->var_nodes
[ns
->ssa
->index
];
101 ppir_node_add_dep(node
, child
);
104 nir_register
*reg
= ns
->reg
.reg
;
106 int swizzle
= ps
->swizzle
[u_bit_scan(&mask
)];
107 child
= comp
->var_nodes
[(reg
->index
<< 2) + comp
->reg_base
+ swizzle
];
108 ppir_node_add_dep(node
, child
);
112 ppir_dest
*dest
= ppir_node_get_dest(child
);
113 ppir_node_target_assign(ps
, dest
);
116 static int nir_to_ppir_opcodes
[nir_num_opcodes
] = {
118 [0 ... nir_last_opcode
] = -1,
120 [nir_op_mov
] = ppir_op_mov
,
121 [nir_op_fmul
] = ppir_op_mul
,
122 [nir_op_fabs
] = ppir_op_abs
,
123 [nir_op_fneg
] = ppir_op_neg
,
124 [nir_op_fadd
] = ppir_op_add
,
125 [nir_op_fdot2
] = ppir_op_dot2
,
126 [nir_op_fdot3
] = ppir_op_dot3
,
127 [nir_op_fdot4
] = ppir_op_dot4
,
128 [nir_op_frsq
] = ppir_op_rsqrt
,
129 [nir_op_flog2
] = ppir_op_log2
,
130 [nir_op_fexp2
] = ppir_op_exp2
,
131 [nir_op_fsqrt
] = ppir_op_sqrt
,
132 [nir_op_fsin
] = ppir_op_sin
,
133 [nir_op_fcos
] = ppir_op_cos
,
134 [nir_op_fmax
] = ppir_op_max
,
135 [nir_op_fmin
] = ppir_op_min
,
136 [nir_op_frcp
] = ppir_op_rcp
,
137 [nir_op_ffloor
] = ppir_op_floor
,
138 [nir_op_fceil
] = ppir_op_ceil
,
139 [nir_op_ffract
] = ppir_op_fract
,
140 [nir_op_sge
] = ppir_op_ge
,
141 [nir_op_fge
] = ppir_op_ge
,
142 [nir_op_slt
] = ppir_op_lt
,
143 [nir_op_flt
] = ppir_op_lt
,
144 [nir_op_seq
] = ppir_op_eq
,
145 [nir_op_feq
] = ppir_op_eq
,
146 [nir_op_sne
] = ppir_op_ne
,
147 [nir_op_fne
] = ppir_op_ne
,
148 [nir_op_fcsel
] = ppir_op_select
,
149 [nir_op_inot
] = ppir_op_not
,
150 [nir_op_ftrunc
] = ppir_op_trunc
,
151 [nir_op_fsat
] = ppir_op_sat
,
154 static ppir_node
*ppir_emit_alu(ppir_block
*block
, nir_instr
*ni
)
156 nir_alu_instr
*instr
= nir_instr_as_alu(ni
);
157 int op
= nir_to_ppir_opcodes
[instr
->op
];
160 ppir_error("unsupported nir_op: %s\n", nir_op_infos
[instr
->op
].name
);
164 ppir_alu_node
*node
= ppir_node_create_dest(block
, op
, &instr
->dest
.dest
,
165 instr
->dest
.write_mask
);
169 ppir_dest
*pd
= &node
->dest
;
170 nir_alu_dest
*nd
= &instr
->dest
;
172 pd
->modifier
= ppir_outmod_clamp_fraction
;
186 src_mask
= pd
->write_mask
;
190 unsigned num_child
= nir_op_infos
[instr
->op
].num_inputs
;
191 node
->num_src
= num_child
;
193 for (int i
= 0; i
< num_child
; i
++) {
194 nir_alu_src
*ns
= instr
->src
+ i
;
195 ppir_src
*ps
= node
->src
+ i
;
196 memcpy(ps
->swizzle
, ns
->swizzle
, sizeof(ps
->swizzle
));
197 ppir_node_add_src(block
->comp
, &node
->node
, ps
, &ns
->src
, src_mask
);
199 ps
->absolute
= ns
->abs
;
200 ps
->negate
= ns
->negate
;
206 static ppir_block
*ppir_block_create(ppir_compiler
*comp
);
208 static bool ppir_emit_discard_block(ppir_compiler
*comp
)
210 ppir_block
*block
= ppir_block_create(comp
);
211 ppir_discard_node
*discard
;
215 comp
->discard_block
= block
;
218 discard
= ppir_node_create(block
, ppir_op_discard
, -1, 0);
220 list_addtail(&discard
->node
.list
, &block
->node_list
);
227 static ppir_node
*ppir_emit_discard_if(ppir_block
*block
, nir_instr
*ni
)
229 nir_intrinsic_instr
*instr
= nir_instr_as_intrinsic(ni
);
231 ppir_compiler
*comp
= block
->comp
;
232 ppir_branch_node
*branch
;
234 if (!comp
->discard_block
&& !ppir_emit_discard_block(comp
))
237 node
= ppir_node_create(block
, ppir_op_branch
, -1, 0);
240 branch
= ppir_node_to_branch(node
);
242 /* second src and condition will be updated during lowering */
243 ppir_node_add_src(block
->comp
, node
, &branch
->src
[0],
244 &instr
->src
[0], u_bit_consecutive(0, instr
->num_components
));
245 branch
->target
= comp
->discard_block
;
250 static ppir_node
*ppir_emit_discard(ppir_block
*block
, nir_instr
*ni
)
252 ppir_node
*node
= ppir_node_create(block
, ppir_op_discard
, -1, 0);
257 static ppir_node
*ppir_emit_intrinsic(ppir_block
*block
, nir_instr
*ni
)
259 nir_intrinsic_instr
*instr
= nir_instr_as_intrinsic(ni
);
261 ppir_load_node
*lnode
;
262 ppir_store_node
*snode
;
264 switch (instr
->intrinsic
) {
265 case nir_intrinsic_load_input
:
266 if (!instr
->dest
.is_ssa
)
267 mask
= u_bit_consecutive(0, instr
->num_components
);
269 lnode
= ppir_node_create_dest(block
, ppir_op_load_varying
, &instr
->dest
, mask
);
273 lnode
->num_components
= instr
->num_components
;
274 lnode
->index
= nir_intrinsic_base(instr
) * 4 + nir_intrinsic_component(instr
);
277 case nir_intrinsic_load_frag_coord
:
278 if (!instr
->dest
.is_ssa
)
279 mask
= u_bit_consecutive(0, instr
->num_components
);
281 lnode
= ppir_node_create_dest(block
, ppir_op_load_fragcoord
, &instr
->dest
, mask
);
285 lnode
->num_components
= instr
->num_components
;
288 case nir_intrinsic_load_point_coord
:
289 if (!instr
->dest
.is_ssa
)
290 mask
= u_bit_consecutive(0, instr
->num_components
);
292 lnode
= ppir_node_create_dest(block
, ppir_op_load_pointcoord
, &instr
->dest
, mask
);
296 lnode
->num_components
= instr
->num_components
;
299 case nir_intrinsic_load_uniform
:
300 if (!instr
->dest
.is_ssa
)
301 mask
= u_bit_consecutive(0, instr
->num_components
);
303 lnode
= ppir_node_create_dest(block
, ppir_op_load_uniform
, &instr
->dest
, mask
);
307 lnode
->num_components
= instr
->num_components
;
308 lnode
->index
= nir_intrinsic_base(instr
);
309 lnode
->index
+= (uint32_t)nir_src_as_float(instr
->src
[0]);
313 case nir_intrinsic_store_output
:
314 snode
= ppir_node_create_dest(block
, ppir_op_store_color
, NULL
, 0);
318 snode
->index
= nir_intrinsic_base(instr
);
320 for (int i
= 0; i
< instr
->num_components
; i
++)
321 snode
->src
.swizzle
[i
] = i
;
323 ppir_node_add_src(block
->comp
, &snode
->node
, &snode
->src
, instr
->src
,
324 u_bit_consecutive(0, instr
->num_components
));
328 case nir_intrinsic_discard
:
329 return ppir_emit_discard(block
, ni
);
331 case nir_intrinsic_discard_if
:
332 return ppir_emit_discard_if(block
, ni
);
335 ppir_error("unsupported nir_intrinsic_instr %s\n",
336 nir_intrinsic_infos
[instr
->intrinsic
].name
);
341 static ppir_node
*ppir_emit_load_const(ppir_block
*block
, nir_instr
*ni
)
343 nir_load_const_instr
*instr
= nir_instr_as_load_const(ni
);
344 ppir_const_node
*node
= ppir_node_create_ssa(block
, ppir_op_const
, &instr
->def
);
348 assert(instr
->def
.bit_size
== 32);
350 for (int i
= 0; i
< instr
->def
.num_components
; i
++)
351 node
->constant
.value
[i
].i
= instr
->value
[i
].i32
;
352 node
->constant
.num
= instr
->def
.num_components
;
357 static ppir_node
*ppir_emit_ssa_undef(ppir_block
*block
, nir_instr
*ni
)
359 ppir_error("nir_ssa_undef_instr not support\n");
363 static ppir_node
*ppir_emit_tex(ppir_block
*block
, nir_instr
*ni
)
365 nir_tex_instr
*instr
= nir_instr_as_tex(ni
);
366 ppir_load_texture_node
*node
;
368 if (instr
->op
!= nir_texop_tex
) {
369 ppir_error("unsupported texop %d\n", instr
->op
);
373 node
= ppir_node_create_dest(block
, ppir_op_load_texture
, &instr
->dest
, 0);
377 node
->sampler
= instr
->texture_index
;
379 switch (instr
->sampler_dim
) {
380 case GLSL_SAMPLER_DIM_2D
:
381 case GLSL_SAMPLER_DIM_RECT
:
382 case GLSL_SAMPLER_DIM_EXTERNAL
:
385 ppir_debug("unsupported sampler dim: %d\n", instr
->sampler_dim
);
389 node
->sampler_dim
= instr
->sampler_dim
;
391 for (int i
= 0; i
< instr
->coord_components
; i
++)
392 node
->src_coords
.swizzle
[i
] = i
;
394 assert(instr
->num_srcs
== 1);
395 for (int i
= 0; i
< instr
->num_srcs
; i
++) {
396 switch (instr
->src
[i
].src_type
) {
397 case nir_tex_src_coord
:
398 ppir_node_add_src(block
->comp
, &node
->node
, &node
->src_coords
, &instr
->src
[i
].src
,
399 u_bit_consecutive(0, instr
->coord_components
));
402 ppir_debug("unknown texture source");
410 static ppir_node
*ppir_emit_jump(ppir_block
*block
, nir_instr
*ni
)
412 ppir_error("nir_jump_instr not support\n");
416 static ppir_node
*(*ppir_emit_instr
[nir_instr_type_phi
])(ppir_block
*, nir_instr
*) = {
417 [nir_instr_type_alu
] = ppir_emit_alu
,
418 [nir_instr_type_intrinsic
] = ppir_emit_intrinsic
,
419 [nir_instr_type_load_const
] = ppir_emit_load_const
,
420 [nir_instr_type_ssa_undef
] = ppir_emit_ssa_undef
,
421 [nir_instr_type_tex
] = ppir_emit_tex
,
422 [nir_instr_type_jump
] = ppir_emit_jump
,
425 static ppir_block
*ppir_block_create(ppir_compiler
*comp
)
427 ppir_block
*block
= rzalloc(comp
, ppir_block
);
431 list_inithead(&block
->node_list
);
432 list_inithead(&block
->instr_list
);
437 static bool ppir_emit_block(ppir_compiler
*comp
, nir_block
*nblock
)
439 ppir_block
*block
= ppir_block_create(comp
);
443 list_addtail(&block
->list
, &comp
->block_list
);
446 nir_foreach_instr(instr
, nblock
) {
447 assert(instr
->type
< nir_instr_type_phi
);
448 ppir_node
*node
= ppir_emit_instr
[instr
->type
](block
, instr
);
452 list_addtail(&node
->list
, &block
->node_list
);
458 static bool ppir_emit_if(ppir_compiler
*comp
, nir_if
*nif
)
460 ppir_error("if nir_cf_node not support\n");
464 static bool ppir_emit_loop(ppir_compiler
*comp
, nir_loop
*nloop
)
466 ppir_error("loop nir_cf_node not support\n");
470 static bool ppir_emit_function(ppir_compiler
*comp
, nir_function_impl
*nfunc
)
472 ppir_error("function nir_cf_node not support\n");
476 static bool ppir_emit_cf_list(ppir_compiler
*comp
, struct exec_list
*list
)
478 foreach_list_typed(nir_cf_node
, node
, node
, list
) {
481 switch (node
->type
) {
482 case nir_cf_node_block
:
483 ret
= ppir_emit_block(comp
, nir_cf_node_as_block(node
));
486 ret
= ppir_emit_if(comp
, nir_cf_node_as_if(node
));
488 case nir_cf_node_loop
:
489 ret
= ppir_emit_loop(comp
, nir_cf_node_as_loop(node
));
491 case nir_cf_node_function
:
492 ret
= ppir_emit_function(comp
, nir_cf_node_as_function(node
));
495 ppir_error("unknown NIR node type %d\n", node
->type
);
506 static ppir_compiler
*ppir_compiler_create(void *prog
, unsigned num_reg
, unsigned num_ssa
)
508 ppir_compiler
*comp
= rzalloc_size(
509 prog
, sizeof(*comp
) + ((num_reg
<< 2) + num_ssa
) * sizeof(ppir_node
*));
513 list_inithead(&comp
->block_list
);
514 list_inithead(&comp
->reg_list
);
516 comp
->var_nodes
= (ppir_node
**)(comp
+ 1);
517 comp
->reg_base
= num_ssa
;
522 static void ppir_add_ordering_deps(ppir_compiler
*comp
)
524 /* Some intrinsics do not have explicit dependencies and thus depend
525 * on instructions order. Consider discard_if and store_ouput as
526 * example. If we don't add fake dependency of discard_if to store_output
527 * scheduler may put store_output first and since store_output terminates
528 * shader on Utgard PP, rest of it will never be executed.
529 * Add fake dependencies for discard/branch/store to preserve
532 * TODO: scheduler should schedule discard_if as early as possible otherwise
533 * we may end up with suboptimal code for cases like this:
540 * In this case store depends on discard_if and s4, but since dependencies can
541 * be scheduled in any order it can result in code like this:
543 * instr1: s3 = s1 < s3
544 * instr2: s4 = s1 + s2
545 * instr3: discard_if s3
548 list_for_each_entry(ppir_block
, block
, &comp
->block_list
, list
) {
549 ppir_node
*prev_node
= NULL
;
550 list_for_each_entry(ppir_node
, node
, &block
->node_list
, list
) {
551 if (node
->type
== ppir_node_type_discard
||
552 node
->type
== ppir_node_type_store
||
553 node
->type
== ppir_node_type_branch
) {
555 ppir_node_add_dep(node
, prev_node
);
562 bool ppir_compile_nir(struct lima_fs_shader_state
*prog
, struct nir_shader
*nir
,
565 nir_function_impl
*func
= nir_shader_get_entrypoint(nir
);
566 ppir_compiler
*comp
= ppir_compiler_create(prog
, func
->reg_alloc
, func
->ssa_alloc
);
572 foreach_list_typed(nir_register
, reg
, node
, &func
->registers
) {
573 ppir_reg
*r
= rzalloc(comp
, ppir_reg
);
577 r
->index
= reg
->index
;
578 r
->num_components
= reg
->num_components
;
579 r
->live_in
= INT_MAX
;
582 list_addtail(&r
->list
, &comp
->reg_list
);
585 if (!ppir_emit_cf_list(comp
, &func
->body
))
588 /* If we have discard block add it to the very end */
589 if (comp
->discard_block
)
590 list_addtail(&comp
->discard_block
->list
, &comp
->block_list
);
592 ppir_add_ordering_deps(comp
);
594 ppir_node_print_prog(comp
);
596 if (!ppir_lower_prog(comp
))
599 if (!ppir_node_to_instr(comp
))
602 if (!ppir_schedule_prog(comp
))
605 if (!ppir_regalloc_prog(comp
))
608 if (!ppir_codegen_prog(comp
))