2 * Copyright (c) 2017 Lima Project
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
27 #include "util/hash_table.h"
28 #include "util/ralloc.h"
29 #include "util/bitscan.h"
30 #include "compiler/nir/nir.h"
31 #include "pipe/p_state.h"
36 static void *ppir_node_create_ssa(ppir_block
*block
, ppir_op op
, nir_ssa_def
*ssa
)
38 ppir_node
*node
= ppir_node_create(block
, op
, ssa
->index
, 0);
42 ppir_dest
*dest
= ppir_node_get_dest(node
);
43 dest
->type
= ppir_target_ssa
;
44 dest
->ssa
.num_components
= ssa
->num_components
;
45 dest
->write_mask
= u_bit_consecutive(0, ssa
->num_components
);
47 if (node
->type
== ppir_node_type_load
||
48 node
->type
== ppir_node_type_store
)
49 dest
->ssa
.is_head
= true;
54 static void *ppir_node_create_reg(ppir_block
*block
, ppir_op op
,
55 nir_register
*reg
, unsigned mask
)
57 ppir_node
*node
= ppir_node_create(block
, op
, reg
->index
, mask
);
61 ppir_dest
*dest
= ppir_node_get_dest(node
);
63 list_for_each_entry(ppir_reg
, r
, &block
->comp
->reg_list
, list
) {
64 if (r
->index
== reg
->index
) {
70 dest
->type
= ppir_target_register
;
71 dest
->write_mask
= mask
;
73 if (node
->type
== ppir_node_type_load
||
74 node
->type
== ppir_node_type_store
)
75 dest
->reg
->is_head
= true;
80 static void *ppir_node_create_dest(ppir_block
*block
, ppir_op op
,
81 nir_dest
*dest
, unsigned mask
)
87 return ppir_node_create_ssa(block
, op
, &dest
->ssa
);
89 return ppir_node_create_reg(block
, op
, dest
->reg
.reg
, mask
);
92 return ppir_node_create(block
, op
, index
, 0);
95 static void ppir_node_add_src(ppir_compiler
*comp
, ppir_node
*node
,
96 ppir_src
*ps
, nir_src
*ns
, unsigned mask
)
98 ppir_node
*child
= NULL
;
101 child
= comp
->var_nodes
[ns
->ssa
->index
];
102 if (child
->op
!= ppir_op_undef
)
103 ppir_node_add_dep(node
, child
, ppir_dep_src
);
106 nir_register
*reg
= ns
->reg
.reg
;
108 int swizzle
= ps
->swizzle
[u_bit_scan(&mask
)];
109 child
= comp
->var_nodes
[(reg
->index
<< 2) + comp
->reg_base
+ swizzle
];
110 /* Reg is read before it was written, create a dummy node for it */
112 child
= ppir_node_create_reg(node
->block
, ppir_op_dummy
, reg
,
113 u_bit_consecutive(0, 4));
114 comp
->var_nodes
[(reg
->index
<< 2) + comp
->reg_base
+ swizzle
] = child
;
116 /* Don't add dummies or recursive deps for ops like r1 = r1 + ssa1 */
117 if (child
&& node
!= child
&& child
->op
!= ppir_op_dummy
)
118 ppir_node_add_dep(node
, child
, ppir_dep_src
);
122 ppir_node_target_assign(ps
, child
);
125 static int nir_to_ppir_opcodes
[nir_num_opcodes
] = {
127 [0 ... nir_last_opcode
] = -1,
129 [nir_op_mov
] = ppir_op_mov
,
130 [nir_op_fmul
] = ppir_op_mul
,
131 [nir_op_fabs
] = ppir_op_abs
,
132 [nir_op_fneg
] = ppir_op_neg
,
133 [nir_op_fadd
] = ppir_op_add
,
134 [nir_op_fsum3
] = ppir_op_sum3
,
135 [nir_op_fsum4
] = ppir_op_sum4
,
136 [nir_op_frsq
] = ppir_op_rsqrt
,
137 [nir_op_flog2
] = ppir_op_log2
,
138 [nir_op_fexp2
] = ppir_op_exp2
,
139 [nir_op_fsqrt
] = ppir_op_sqrt
,
140 [nir_op_fsin
] = ppir_op_sin
,
141 [nir_op_fcos
] = ppir_op_cos
,
142 [nir_op_fmax
] = ppir_op_max
,
143 [nir_op_fmin
] = ppir_op_min
,
144 [nir_op_frcp
] = ppir_op_rcp
,
145 [nir_op_ffloor
] = ppir_op_floor
,
146 [nir_op_fceil
] = ppir_op_ceil
,
147 [nir_op_ffract
] = ppir_op_fract
,
148 [nir_op_sge
] = ppir_op_ge
,
149 [nir_op_slt
] = ppir_op_lt
,
150 [nir_op_seq
] = ppir_op_eq
,
151 [nir_op_sne
] = ppir_op_ne
,
152 [nir_op_fcsel
] = ppir_op_select
,
153 [nir_op_inot
] = ppir_op_not
,
154 [nir_op_ftrunc
] = ppir_op_trunc
,
155 [nir_op_fsat
] = ppir_op_sat
,
156 [nir_op_fddx
] = ppir_op_ddx
,
157 [nir_op_fddy
] = ppir_op_ddy
,
160 static bool ppir_emit_alu(ppir_block
*block
, nir_instr
*ni
)
162 nir_alu_instr
*instr
= nir_instr_as_alu(ni
);
163 int op
= nir_to_ppir_opcodes
[instr
->op
];
166 ppir_error("unsupported nir_op: %s\n", nir_op_infos
[instr
->op
].name
);
170 ppir_alu_node
*node
= ppir_node_create_dest(block
, op
, &instr
->dest
.dest
,
171 instr
->dest
.write_mask
);
175 ppir_dest
*pd
= &node
->dest
;
176 nir_alu_dest
*nd
= &instr
->dest
;
178 pd
->modifier
= ppir_outmod_clamp_fraction
;
189 src_mask
= pd
->write_mask
;
193 unsigned num_child
= nir_op_infos
[instr
->op
].num_inputs
;
194 node
->num_src
= num_child
;
196 for (int i
= 0; i
< num_child
; i
++) {
197 nir_alu_src
*ns
= instr
->src
+ i
;
198 ppir_src
*ps
= node
->src
+ i
;
199 memcpy(ps
->swizzle
, ns
->swizzle
, sizeof(ps
->swizzle
));
200 ppir_node_add_src(block
->comp
, &node
->node
, ps
, &ns
->src
, src_mask
);
202 ps
->absolute
= ns
->abs
;
203 ps
->negate
= ns
->negate
;
206 list_addtail(&node
->node
.list
, &block
->node_list
);
210 static ppir_block
*ppir_block_create(ppir_compiler
*comp
);
212 static bool ppir_emit_discard_block(ppir_compiler
*comp
)
214 ppir_block
*block
= ppir_block_create(comp
);
215 ppir_discard_node
*discard
;
219 comp
->discard_block
= block
;
222 discard
= ppir_node_create(block
, ppir_op_discard
, -1, 0);
224 list_addtail(&discard
->node
.list
, &block
->node_list
);
231 static ppir_node
*ppir_emit_discard_if(ppir_block
*block
, nir_instr
*ni
)
233 nir_intrinsic_instr
*instr
= nir_instr_as_intrinsic(ni
);
235 ppir_compiler
*comp
= block
->comp
;
236 ppir_branch_node
*branch
;
238 if (!comp
->discard_block
&& !ppir_emit_discard_block(comp
))
241 node
= ppir_node_create(block
, ppir_op_branch
, -1, 0);
244 branch
= ppir_node_to_branch(node
);
246 /* second src and condition will be updated during lowering */
247 ppir_node_add_src(block
->comp
, node
, &branch
->src
[0],
248 &instr
->src
[0], u_bit_consecutive(0, instr
->num_components
));
250 branch
->target
= comp
->discard_block
;
255 static ppir_node
*ppir_emit_discard(ppir_block
*block
, nir_instr
*ni
)
257 ppir_node
*node
= ppir_node_create(block
, ppir_op_discard
, -1, 0);
262 static bool ppir_emit_intrinsic(ppir_block
*block
, nir_instr
*ni
)
265 nir_intrinsic_instr
*instr
= nir_instr_as_intrinsic(ni
);
267 ppir_load_node
*lnode
;
268 ppir_alu_node
*alu_node
;
270 switch (instr
->intrinsic
) {
271 case nir_intrinsic_load_input
:
272 if (!instr
->dest
.is_ssa
)
273 mask
= u_bit_consecutive(0, instr
->num_components
);
275 lnode
= ppir_node_create_dest(block
, ppir_op_load_varying
, &instr
->dest
, mask
);
279 lnode
->num_components
= instr
->num_components
;
280 lnode
->index
= nir_intrinsic_base(instr
) * 4 + nir_intrinsic_component(instr
);
281 if (nir_src_is_const(instr
->src
[0]))
282 lnode
->index
+= (uint32_t)(nir_src_as_float(instr
->src
[0]) * 4);
285 ppir_node_add_src(block
->comp
, &lnode
->node
, &lnode
->src
, instr
->src
, 1);
287 list_addtail(&lnode
->node
.list
, &block
->node_list
);
290 case nir_intrinsic_load_frag_coord
:
291 case nir_intrinsic_load_point_coord
:
292 case nir_intrinsic_load_front_face
:
293 if (!instr
->dest
.is_ssa
)
294 mask
= u_bit_consecutive(0, instr
->num_components
);
297 switch (instr
->intrinsic
) {
298 case nir_intrinsic_load_frag_coord
:
299 op
= ppir_op_load_fragcoord
;
301 case nir_intrinsic_load_point_coord
:
302 op
= ppir_op_load_pointcoord
;
304 case nir_intrinsic_load_front_face
:
305 op
= ppir_op_load_frontface
;
312 lnode
= ppir_node_create_dest(block
, op
, &instr
->dest
, mask
);
316 lnode
->num_components
= instr
->num_components
;
317 list_addtail(&lnode
->node
.list
, &block
->node_list
);
320 case nir_intrinsic_load_uniform
:
321 if (!instr
->dest
.is_ssa
)
322 mask
= u_bit_consecutive(0, instr
->num_components
);
324 lnode
= ppir_node_create_dest(block
, ppir_op_load_uniform
, &instr
->dest
, mask
);
328 lnode
->num_components
= instr
->num_components
;
329 lnode
->index
= nir_intrinsic_base(instr
);
330 if (nir_src_is_const(instr
->src
[0]))
331 lnode
->index
+= (uint32_t)nir_src_as_float(instr
->src
[0]);
334 ppir_node_add_src(block
->comp
, &lnode
->node
, &lnode
->src
, instr
->src
, 1);
337 list_addtail(&lnode
->node
.list
, &block
->node_list
);
340 case nir_intrinsic_store_output
: {
341 /* In simple cases where the store_output is ssa, that register
342 * can be directly marked as the output.
343 * If discard is used or the source is not ssa, things can get a
344 * lot more complicated, so don't try to optimize those and fall
345 * back to inserting a mov at the end.
346 * If the source node will only be able to output to pipeline
347 * registers, fall back to the mov as well. */
348 if (!block
->comp
->uses_discard
&& instr
->src
->is_ssa
) {
349 node
= block
->comp
->var_nodes
[instr
->src
->ssa
->index
];
351 case ppir_op_load_uniform
:
352 case ppir_op_load_texture
:
361 alu_node
= ppir_node_create_dest(block
, ppir_op_mov
, NULL
, 0);
365 ppir_dest
*dest
= ppir_node_get_dest(&alu_node
->node
);
366 dest
->type
= ppir_target_ssa
;
367 dest
->ssa
.num_components
= instr
->num_components
;
369 dest
->write_mask
= u_bit_consecutive(0, instr
->num_components
);
371 alu_node
->num_src
= 1;
373 for (int i
= 0; i
< instr
->num_components
; i
++)
374 alu_node
->src
[0].swizzle
[i
] = i
;
376 ppir_node_add_src(block
->comp
, &alu_node
->node
, alu_node
->src
, instr
->src
,
377 u_bit_consecutive(0, instr
->num_components
));
379 alu_node
->node
.is_end
= 1;
381 list_addtail(&alu_node
->node
.list
, &block
->node_list
);
385 case nir_intrinsic_discard
:
386 node
= ppir_emit_discard(block
, ni
);
387 list_addtail(&node
->list
, &block
->node_list
);
390 case nir_intrinsic_discard_if
:
391 node
= ppir_emit_discard_if(block
, ni
);
392 list_addtail(&node
->list
, &block
->node_list
);
396 ppir_error("unsupported nir_intrinsic_instr %s\n",
397 nir_intrinsic_infos
[instr
->intrinsic
].name
);
402 static bool ppir_emit_load_const(ppir_block
*block
, nir_instr
*ni
)
404 nir_load_const_instr
*instr
= nir_instr_as_load_const(ni
);
405 ppir_const_node
*node
= ppir_node_create_ssa(block
, ppir_op_const
, &instr
->def
);
409 assert(instr
->def
.bit_size
== 32);
411 for (int i
= 0; i
< instr
->def
.num_components
; i
++)
412 node
->constant
.value
[i
].i
= instr
->value
[i
].i32
;
413 node
->constant
.num
= instr
->def
.num_components
;
415 list_addtail(&node
->node
.list
, &block
->node_list
);
419 static bool ppir_emit_ssa_undef(ppir_block
*block
, nir_instr
*ni
)
421 nir_ssa_undef_instr
*undef
= nir_instr_as_ssa_undef(ni
);
422 ppir_node
*node
= ppir_node_create_ssa(block
, ppir_op_undef
, &undef
->def
);
425 ppir_alu_node
*alu
= ppir_node_to_alu(node
);
427 ppir_dest
*dest
= &alu
->dest
;
428 dest
->ssa
.undef
= true;
430 list_addtail(&node
->list
, &block
->node_list
);
434 static bool ppir_emit_tex(ppir_block
*block
, nir_instr
*ni
)
436 nir_tex_instr
*instr
= nir_instr_as_tex(ni
);
437 ppir_load_texture_node
*node
;
445 ppir_error("unsupported texop %d\n", instr
->op
);
449 switch (instr
->sampler_dim
) {
450 case GLSL_SAMPLER_DIM_2D
:
451 case GLSL_SAMPLER_DIM_CUBE
:
452 case GLSL_SAMPLER_DIM_RECT
:
453 case GLSL_SAMPLER_DIM_EXTERNAL
:
456 ppir_error("unsupported sampler dim: %d\n", instr
->sampler_dim
);
460 /* emit ld_tex node */
463 if (!instr
->dest
.is_ssa
)
464 mask
= u_bit_consecutive(0, nir_tex_instr_dest_size(instr
));
466 node
= ppir_node_create_dest(block
, ppir_op_load_texture
, &instr
->dest
, mask
);
470 node
->sampler
= instr
->texture_index
;
471 node
->sampler_dim
= instr
->sampler_dim
;
473 for (int i
= 0; i
< instr
->coord_components
; i
++)
474 node
->src
[0].swizzle
[i
] = i
;
476 for (int i
= 0; i
< instr
->num_srcs
; i
++) {
477 switch (instr
->src
[i
].src_type
) {
478 case nir_tex_src_coord
: {
479 nir_src
*ns
= &instr
->src
[i
].src
;
481 ppir_node
*child
= block
->comp
->var_nodes
[ns
->ssa
->index
];
482 if (child
->op
== ppir_op_load_varying
) {
483 /* If the successor is load_texture, promote it to load_coords */
484 nir_tex_src
*nts
= (nir_tex_src
*)ns
;
485 if (nts
->src_type
== nir_tex_src_coord
)
486 child
->op
= ppir_op_load_coords
;
490 /* src[0] is not used by the ld_tex instruction but ensures
491 * correct scheduling due to the pipeline dependency */
492 ppir_node_add_src(block
->comp
, &node
->node
, &node
->src
[0], &instr
->src
[i
].src
,
493 u_bit_consecutive(0, instr
->coord_components
));
497 case nir_tex_src_bias
:
498 case nir_tex_src_lod
:
499 node
->lod_bias_en
= true;
500 node
->explicit_lod
= (instr
->src
[i
].src_type
== nir_tex_src_lod
);
501 ppir_node_add_src(block
->comp
, &node
->node
, &node
->src
[1], &instr
->src
[i
].src
, 1);
505 ppir_error("unsupported texture source type\n");
510 list_addtail(&node
->node
.list
, &block
->node_list
);
512 /* validate load coords node */
514 ppir_node
*src_coords
= ppir_node_get_src(&node
->node
, 0)->node
;
515 ppir_load_node
*load
= NULL
;
517 if (src_coords
&& ppir_node_has_single_src_succ(src_coords
) &&
518 (src_coords
->op
== ppir_op_load_coords
))
519 load
= ppir_node_to_load(src_coords
);
521 /* Create load_coords node */
522 load
= ppir_node_create(block
, ppir_op_load_coords_reg
, -1, 0);
525 list_addtail(&load
->node
.list
, &block
->node_list
);
527 load
->src
= node
->src
[0];
529 if (node
->sampler_dim
== GLSL_SAMPLER_DIM_CUBE
)
530 load
->num_components
= 3;
532 load
->num_components
= 2;
534 ppir_debug("%s create load_coords node %d for %d\n",
535 __FUNCTION__
, load
->index
, node
->node
.index
);
537 ppir_node_foreach_pred_safe((&node
->node
), dep
) {
538 ppir_node
*pred
= dep
->pred
;
539 ppir_node_remove_dep(dep
);
540 ppir_node_add_dep(&load
->node
, pred
, ppir_dep_src
);
542 ppir_node_add_dep(&node
->node
, &load
->node
, ppir_dep_src
);
546 node
->src
[0].type
= load
->dest
.type
= ppir_target_pipeline
;
547 node
->src
[0].pipeline
= load
->dest
.pipeline
= ppir_pipeline_reg_discard
;
552 static ppir_block
*ppir_get_block(ppir_compiler
*comp
, nir_block
*nblock
)
554 ppir_block
*block
= _mesa_hash_table_u64_search(comp
->blocks
, (uint64_t)nblock
);
559 static bool ppir_emit_jump(ppir_block
*block
, nir_instr
*ni
)
562 ppir_compiler
*comp
= block
->comp
;
563 ppir_branch_node
*branch
;
564 ppir_block
*jump_block
;
565 nir_jump_instr
*jump
= nir_instr_as_jump(ni
);
567 switch (jump
->type
) {
568 case nir_jump_break
: {
569 assert(comp
->current_block
->successors
[0]);
570 assert(!comp
->current_block
->successors
[1]);
571 jump_block
= comp
->current_block
->successors
[0];
574 case nir_jump_continue
:
575 jump_block
= comp
->loop_cont_block
;
578 ppir_error("nir_jump_instr not support\n");
582 assert(jump_block
!= NULL
);
584 node
= ppir_node_create(block
, ppir_op_branch
, -1, 0);
587 branch
= ppir_node_to_branch(node
);
591 branch
->target
= jump_block
;
593 list_addtail(&node
->list
, &block
->node_list
);
597 static bool (*ppir_emit_instr
[nir_instr_type_phi
])(ppir_block
*, nir_instr
*) = {
598 [nir_instr_type_alu
] = ppir_emit_alu
,
599 [nir_instr_type_intrinsic
] = ppir_emit_intrinsic
,
600 [nir_instr_type_load_const
] = ppir_emit_load_const
,
601 [nir_instr_type_ssa_undef
] = ppir_emit_ssa_undef
,
602 [nir_instr_type_tex
] = ppir_emit_tex
,
603 [nir_instr_type_jump
] = ppir_emit_jump
,
606 static ppir_block
*ppir_block_create(ppir_compiler
*comp
)
608 ppir_block
*block
= rzalloc(comp
, ppir_block
);
612 list_inithead(&block
->node_list
);
613 list_inithead(&block
->instr_list
);
620 static bool ppir_emit_block(ppir_compiler
*comp
, nir_block
*nblock
)
622 ppir_block
*block
= ppir_get_block(comp
, nblock
);
624 comp
->current_block
= block
;
626 list_addtail(&block
->list
, &comp
->block_list
);
628 nir_foreach_instr(instr
, nblock
) {
629 assert(instr
->type
< nir_instr_type_phi
);
630 if (!ppir_emit_instr
[instr
->type
](block
, instr
))
637 static bool ppir_emit_cf_list(ppir_compiler
*comp
, struct exec_list
*list
);
639 static bool ppir_emit_if(ppir_compiler
*comp
, nir_if
*if_stmt
)
642 ppir_branch_node
*else_branch
, *after_branch
;
643 nir_block
*nir_else_block
= nir_if_first_else_block(if_stmt
);
644 bool empty_else_block
=
645 (nir_else_block
== nir_if_last_else_block(if_stmt
) &&
646 exec_list_is_empty(&nir_else_block
->instr_list
));
647 ppir_block
*block
= comp
->current_block
;
649 node
= ppir_node_create(block
, ppir_op_branch
, -1, 0);
652 else_branch
= ppir_node_to_branch(node
);
653 ppir_node_add_src(block
->comp
, node
, &else_branch
->src
[0],
654 &if_stmt
->condition
, 1);
655 else_branch
->num_src
= 1;
656 /* Negate condition to minimize branching. We're generating following:
657 * current_block: { ...; if (!statement) branch else_block; }
658 * then_block: { ...; branch after_block; }
659 * else_block: { ... }
660 * after_block: { ... }
662 * or if else list is empty:
663 * block: { if (!statement) branch else_block; }
664 * then_block: { ... }
665 * else_block: after_block: { ... }
667 else_branch
->negate
= true;
668 list_addtail(&else_branch
->node
.list
, &block
->node_list
);
670 if (!ppir_emit_cf_list(comp
, &if_stmt
->then_list
))
673 if (empty_else_block
) {
674 nir_block
*nblock
= nir_if_last_else_block(if_stmt
);
675 assert(nblock
->successors
[0]);
676 assert(!nblock
->successors
[1]);
677 else_branch
->target
= ppir_get_block(comp
, nblock
->successors
[0]);
678 /* Add empty else block to the list */
679 list_addtail(&block
->successors
[1]->list
, &comp
->block_list
);
683 else_branch
->target
= ppir_get_block(comp
, nir_if_first_else_block(if_stmt
));
685 nir_block
*last_then_block
= nir_if_last_then_block(if_stmt
);
686 assert(last_then_block
->successors
[0]);
687 assert(!last_then_block
->successors
[1]);
688 block
= ppir_get_block(comp
, last_then_block
);
689 node
= ppir_node_create(block
, ppir_op_branch
, -1, 0);
692 after_branch
= ppir_node_to_branch(node
);
694 after_branch
->num_src
= 0;
695 after_branch
->target
= ppir_get_block(comp
, last_then_block
->successors
[0]);
696 /* Target should be after_block, will fixup later */
697 list_addtail(&after_branch
->node
.list
, &block
->node_list
);
699 if (!ppir_emit_cf_list(comp
, &if_stmt
->else_list
))
705 static bool ppir_emit_loop(ppir_compiler
*comp
, nir_loop
*nloop
)
707 ppir_block
*save_loop_cont_block
= comp
->loop_cont_block
;
709 ppir_branch_node
*loop_branch
;
710 nir_block
*loop_last_block
;
713 comp
->loop_cont_block
= ppir_get_block(comp
, nir_loop_first_block(nloop
));
715 if (!ppir_emit_cf_list(comp
, &nloop
->body
))
718 loop_last_block
= nir_loop_last_block(nloop
);
719 block
= ppir_get_block(comp
, loop_last_block
);
720 node
= ppir_node_create(block
, ppir_op_branch
, -1, 0);
723 loop_branch
= ppir_node_to_branch(node
);
725 loop_branch
->num_src
= 0;
726 loop_branch
->target
= comp
->loop_cont_block
;
727 list_addtail(&loop_branch
->node
.list
, &block
->node_list
);
729 comp
->loop_cont_block
= save_loop_cont_block
;
736 static bool ppir_emit_function(ppir_compiler
*comp
, nir_function_impl
*nfunc
)
738 ppir_error("function nir_cf_node not support\n");
742 static bool ppir_emit_cf_list(ppir_compiler
*comp
, struct exec_list
*list
)
744 foreach_list_typed(nir_cf_node
, node
, node
, list
) {
747 switch (node
->type
) {
748 case nir_cf_node_block
:
749 ret
= ppir_emit_block(comp
, nir_cf_node_as_block(node
));
752 ret
= ppir_emit_if(comp
, nir_cf_node_as_if(node
));
754 case nir_cf_node_loop
:
755 ret
= ppir_emit_loop(comp
, nir_cf_node_as_loop(node
));
757 case nir_cf_node_function
:
758 ret
= ppir_emit_function(comp
, nir_cf_node_as_function(node
));
761 ppir_error("unknown NIR node type %d\n", node
->type
);
772 static ppir_compiler
*ppir_compiler_create(void *prog
, unsigned num_reg
, unsigned num_ssa
)
774 ppir_compiler
*comp
= rzalloc_size(
775 prog
, sizeof(*comp
) + ((num_reg
<< 2) + num_ssa
) * sizeof(ppir_node
*));
779 list_inithead(&comp
->block_list
);
780 list_inithead(&comp
->reg_list
);
781 comp
->blocks
= _mesa_hash_table_u64_create(prog
);
783 comp
->var_nodes
= (ppir_node
**)(comp
+ 1);
784 comp
->reg_base
= num_ssa
;
789 static void ppir_add_ordering_deps(ppir_compiler
*comp
)
791 /* Some intrinsics do not have explicit dependencies and thus depend
792 * on instructions order. Consider discard_if and the is_end node as
793 * example. If we don't add fake dependency of discard_if to is_end,
794 * scheduler may put the is_end first and since is_end terminates
795 * shader on Utgard PP, rest of it will never be executed.
796 * Add fake dependencies for discard/branch/store to preserve
799 * TODO: scheduler should schedule discard_if as early as possible otherwise
800 * we may end up with suboptimal code for cases like this:
807 * In this case store depends on discard_if and s4, but since dependencies can
808 * be scheduled in any order it can result in code like this:
810 * instr1: s3 = s1 < s3
811 * instr2: s4 = s1 + s2
812 * instr3: discard_if s3
815 list_for_each_entry(ppir_block
, block
, &comp
->block_list
, list
) {
816 ppir_node
*prev_node
= NULL
;
817 list_for_each_entry_rev(ppir_node
, node
, &block
->node_list
, list
) {
818 if (prev_node
&& ppir_node_is_root(node
) && node
->op
!= ppir_op_const
) {
819 ppir_node_add_dep(prev_node
, node
, ppir_dep_sequence
);
822 node
->op
== ppir_op_discard
||
823 node
->op
== ppir_op_store_temp
||
824 node
->op
== ppir_op_branch
) {
831 static void ppir_print_shader_db(struct nir_shader
*nir
, ppir_compiler
*comp
,
832 struct pipe_debug_callback
*debug
)
834 const struct shader_info
*info
= &nir
->info
;
836 ASSERTED
int ret
= asprintf(&shaderdb
,
837 "%s shader: %d inst, %d loops, %d:%d spills:fills\n",
838 gl_shader_stage_name(info
->stage
),
839 comp
->cur_instr_index
,
845 if (lima_debug
& LIMA_DEBUG_SHADERDB
)
846 fprintf(stderr
, "SHADER-DB: %s\n", shaderdb
);
848 pipe_debug_message(debug
, SHADER_INFO
, "%s", shaderdb
);
852 static void ppir_add_write_after_read_deps(ppir_compiler
*comp
)
854 list_for_each_entry(ppir_block
, block
, &comp
->block_list
, list
) {
855 list_for_each_entry(ppir_reg
, reg
, &comp
->reg_list
, list
) {
856 ppir_node
*write
= NULL
;
857 list_for_each_entry_rev(ppir_node
, node
, &block
->node_list
, list
) {
858 for (int i
= 0; i
< ppir_node_get_src_num(node
); i
++) {
859 ppir_src
*src
= ppir_node_get_src(node
, i
);
860 if (src
&& src
->type
== ppir_target_register
&&
863 ppir_debug("Adding dep %d for write %d\n", node
->index
, write
->index
);
864 ppir_node_add_dep(write
, node
, ppir_dep_write_after_read
);
867 ppir_dest
*dest
= ppir_node_get_dest(node
);
868 if (dest
&& dest
->type
== ppir_target_register
&&
876 bool ppir_compile_nir(struct lima_fs_shader_state
*prog
, struct nir_shader
*nir
,
878 struct pipe_debug_callback
*debug
)
880 nir_function_impl
*func
= nir_shader_get_entrypoint(nir
);
881 ppir_compiler
*comp
= ppir_compiler_create(prog
, func
->reg_alloc
, func
->ssa_alloc
);
886 comp
->uses_discard
= nir
->info
.fs
.uses_discard
;
888 /* 1st pass: create ppir blocks */
889 nir_foreach_function(function
, nir
) {
893 nir_foreach_block(nblock
, function
->impl
) {
894 ppir_block
*block
= ppir_block_create(comp
);
897 block
->index
= nblock
->index
;
898 _mesa_hash_table_u64_insert(comp
->blocks
, (uint64_t)nblock
, block
);
902 /* 2nd pass: populate successors */
903 nir_foreach_function(function
, nir
) {
907 nir_foreach_block(nblock
, function
->impl
) {
908 ppir_block
*block
= ppir_get_block(comp
, nblock
);
911 for (int i
= 0; i
< 2; i
++) {
912 if (nblock
->successors
[i
])
913 block
->successors
[i
] = ppir_get_block(comp
, nblock
->successors
[i
]);
918 /* Validate outputs, we support only gl_FragColor */
919 nir_foreach_shader_out_variable(var
, nir
) {
920 switch (var
->data
.location
) {
921 case FRAG_RESULT_COLOR
:
922 case FRAG_RESULT_DATA0
:
925 ppir_error("unsupported output type\n");
931 foreach_list_typed(nir_register
, reg
, node
, &func
->registers
) {
932 ppir_reg
*r
= rzalloc(comp
, ppir_reg
);
936 r
->index
= reg
->index
;
937 r
->num_components
= reg
->num_components
;
939 list_addtail(&r
->list
, &comp
->reg_list
);
942 if (!ppir_emit_cf_list(comp
, &func
->body
))
945 /* If we have discard block add it to the very end */
946 if (comp
->discard_block
)
947 list_addtail(&comp
->discard_block
->list
, &comp
->block_list
);
949 ppir_node_print_prog(comp
);
951 if (!ppir_lower_prog(comp
))
954 ppir_add_ordering_deps(comp
);
955 ppir_add_write_after_read_deps(comp
);
957 ppir_node_print_prog(comp
);
959 if (!ppir_node_to_instr(comp
))
962 if (!ppir_schedule_prog(comp
))
965 if (!ppir_regalloc_prog(comp
))
968 if (!ppir_codegen_prog(comp
))
971 ppir_print_shader_db(nir
, comp
, debug
);
973 _mesa_hash_table_u64_destroy(comp
->blocks
, NULL
);
978 _mesa_hash_table_u64_destroy(comp
->blocks
, NULL
);