2 * Copyright © 2016 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 #include "nir/nir_builder.h"
26 #include "nir_constant_expressions.h"
27 #include "nir_control_flow.h"
28 #include "nir_loop_analyze.h"
31 * Gets the single block that jumps back to the loop header. Already assumes
32 * there is exactly one such block.
35 find_continue_block(nir_loop
*loop
)
37 nir_block
*header_block
= nir_loop_first_block(loop
);
38 nir_block
*prev_block
=
39 nir_cf_node_as_block(nir_cf_node_prev(&loop
->cf_node
));
41 assert(header_block
->predecessors
->entries
== 2);
43 struct set_entry
*pred_entry
;
44 set_foreach(header_block
->predecessors
, pred_entry
) {
45 if (pred_entry
->key
!= prev_block
)
46 return (nir_block
*)pred_entry
->key
;
49 unreachable("Continue block not found!");
53 * This optimization detects if statements at the tops of loops where the
54 * condition is a phi node of two constants and moves half of the if to above
55 * the loop and the other half of the if to the end of the loop. A simple for
56 * loop "for (int i = 0; i < 4; i++)", when run through the SPIR-V front-end,
57 * ends up looking something like this:
59 * vec1 32 ssa_0 = load_const (0x00000000)
60 * vec1 32 ssa_1 = load_const (0xffffffff)
63 * vec1 32 ssa_2 = phi block_0: ssa_0, block_7: ssa_5
64 * vec1 32 ssa_3 = phi block_0: ssa_0, block_7: ssa_1
67 * vec1 32 ssa_4 = load_const (0x00000001)
68 * vec1 32 ssa_5 = iadd ssa_2, ssa_4
73 * vec1 32 ssa_6 = load_const (0x00000004)
74 * vec1 32 ssa_7 = ilt ssa_5, ssa_6
84 * This turns it into something like this:
86 * // Stuff from block 1
87 * // Stuff from block 3
90 * vec1 32 ssa_3 = phi block_0: ssa_0, block_7: ssa_1
91 * vec1 32 ssa_6 = load_const (0x00000004)
92 * vec1 32 ssa_7 = ilt ssa_5, ssa_6
100 * // Stuff from block 1
101 * // Stuff from block 2
102 * vec1 32 ssa_4 = load_const (0x00000001)
103 * vec1 32 ssa_5 = iadd ssa_2, ssa_4
107 opt_peel_loop_initial_if(nir_loop
*loop
)
109 nir_block
*header_block
= nir_loop_first_block(loop
);
110 MAYBE_UNUSED nir_block
*prev_block
=
111 nir_cf_node_as_block(nir_cf_node_prev(&loop
->cf_node
));
113 /* It would be insane if this were not true */
114 assert(_mesa_set_search(header_block
->predecessors
, prev_block
));
116 /* The loop must have exactly one continue block which could be a block
117 * ending in a continue instruction or the "natural" continue from the
118 * last block in the loop back to the top.
120 if (header_block
->predecessors
->entries
!= 2)
123 nir_block
*continue_block
= find_continue_block(loop
);
125 nir_cf_node
*if_node
= nir_cf_node_next(&header_block
->cf_node
);
126 if (!if_node
|| if_node
->type
!= nir_cf_node_if
)
129 nir_if
*nif
= nir_cf_node_as_if(if_node
);
130 assert(nif
->condition
.is_ssa
);
132 nir_ssa_def
*cond
= nif
->condition
.ssa
;
133 if (cond
->parent_instr
->type
!= nir_instr_type_phi
)
136 nir_phi_instr
*cond_phi
= nir_instr_as_phi(cond
->parent_instr
);
137 if (cond
->parent_instr
->block
!= header_block
)
140 /* We already know we have exactly one continue */
141 assert(exec_list_length(&cond_phi
->srcs
) == 2);
143 uint32_t entry_val
= 0, continue_val
= 0;
144 nir_foreach_phi_src(src
, cond_phi
) {
145 assert(src
->src
.is_ssa
);
146 nir_const_value
*const_src
= nir_src_as_const_value(src
->src
);
150 if (src
->pred
== continue_block
) {
151 continue_val
= const_src
->u32
[0];
153 assert(src
->pred
== prev_block
);
154 entry_val
= const_src
->u32
[0];
158 /* If they both execute or both don't execute, this is a job for
159 * nir_dead_cf, not this pass.
161 if ((entry_val
&& continue_val
) || (!entry_val
&& !continue_val
))
164 struct exec_list
*continue_list
, *entry_list
;
166 continue_list
= &nif
->then_list
;
167 entry_list
= &nif
->else_list
;
169 continue_list
= &nif
->else_list
;
170 entry_list
= &nif
->then_list
;
173 /* We want to be moving the contents of entry_list to above the loop so it
174 * can't contain any break or continue instructions.
176 foreach_list_typed(nir_cf_node
, cf_node
, node
, entry_list
) {
177 nir_foreach_block_in_cf_node(block
, cf_node
) {
178 nir_instr
*last_instr
= nir_block_last_instr(block
);
179 if (last_instr
&& last_instr
->type
== nir_instr_type_jump
)
184 /* We're about to re-arrange a bunch of blocks so make sure that we don't
185 * have deref uses which cross block boundaries. We don't want a deref
186 * accidentally ending up in a phi.
188 nir_rematerialize_derefs_in_use_blocks_impl(
189 nir_cf_node_get_function(&loop
->cf_node
));
191 /* Before we do anything, convert the loop to LCSSA. We're about to
192 * replace a bunch of SSA defs with registers and this will prevent any of
193 * it from leaking outside the loop.
195 nir_convert_loop_to_lcssa(loop
);
197 nir_block
*after_if_block
=
198 nir_cf_node_as_block(nir_cf_node_next(&nif
->cf_node
));
200 /* Get rid of phis in the header block since we will be duplicating it */
201 nir_lower_phis_to_regs_block(header_block
);
202 /* Get rid of phis after the if since dominance will change */
203 nir_lower_phis_to_regs_block(after_if_block
);
205 /* Get rid of SSA defs in the pieces we're about to move around */
206 nir_lower_ssa_defs_to_regs_block(header_block
);
207 nir_foreach_block_in_cf_node(block
, &nif
->cf_node
)
208 nir_lower_ssa_defs_to_regs_block(block
);
210 nir_cf_list header
, tmp
;
211 nir_cf_extract(&header
, nir_before_block(header_block
),
212 nir_after_block(header_block
));
214 nir_cf_list_clone(&tmp
, &header
, &loop
->cf_node
, NULL
);
215 nir_cf_reinsert(&tmp
, nir_before_cf_node(&loop
->cf_node
));
216 nir_cf_extract(&tmp
, nir_before_cf_list(entry_list
),
217 nir_after_cf_list(entry_list
));
218 nir_cf_reinsert(&tmp
, nir_before_cf_node(&loop
->cf_node
));
220 nir_cf_reinsert(&header
, nir_after_block_before_jump(continue_block
));
222 /* Get continue block again as the previous reinsert might have removed the block. */
223 continue_block
= find_continue_block(loop
);
225 nir_cf_extract(&tmp
, nir_before_cf_list(continue_list
),
226 nir_after_cf_list(continue_list
));
227 nir_cf_reinsert(&tmp
, nir_after_block_before_jump(continue_block
));
229 nir_cf_node_remove(&nif
->cf_node
);
235 is_block_empty(nir_block
*block
)
237 return nir_cf_node_is_last(&block
->cf_node
) &&
238 exec_list_is_empty(&block
->instr_list
);
242 * This optimization turns:
257 opt_if_simplification(nir_builder
*b
, nir_if
*nif
)
259 /* Only simplify if the then block is empty and the else block is not. */
260 if (!is_block_empty(nir_if_first_then_block(nif
)) ||
261 is_block_empty(nir_if_first_else_block(nif
)))
264 /* Make sure the condition is a comparison operation. */
265 nir_instr
*src_instr
= nif
->condition
.ssa
->parent_instr
;
266 if (src_instr
->type
!= nir_instr_type_alu
)
269 nir_alu_instr
*alu_instr
= nir_instr_as_alu(src_instr
);
270 if (!nir_alu_instr_is_comparison(alu_instr
))
273 /* Insert the inverted instruction and rewrite the condition. */
274 b
->cursor
= nir_after_instr(&alu_instr
->instr
);
276 nir_ssa_def
*new_condition
=
277 nir_inot(b
, &alu_instr
->dest
.dest
.ssa
);
279 nir_if_rewrite_condition(nif
, nir_src_for_ssa(new_condition
));
281 /* Grab pointers to the last then/else blocks for fixing up the phis. */
282 nir_block
*then_block
= nir_if_last_then_block(nif
);
283 nir_block
*else_block
= nir_if_last_else_block(nif
);
285 /* Walk all the phis in the block immediately following the if statement and
288 nir_block
*after_if_block
=
289 nir_cf_node_as_block(nir_cf_node_next(&nif
->cf_node
));
291 nir_foreach_instr(instr
, after_if_block
) {
292 if (instr
->type
!= nir_instr_type_phi
)
295 nir_phi_instr
*phi
= nir_instr_as_phi(instr
);
297 foreach_list_typed(nir_phi_src
, src
, node
, &phi
->srcs
) {
298 if (src
->pred
== else_block
) {
299 src
->pred
= then_block
;
300 } else if (src
->pred
== then_block
) {
301 src
->pred
= else_block
;
306 /* Finally, move the else block to the then block. */
308 nir_cf_extract(&tmp
, nir_before_cf_list(&nif
->else_list
),
309 nir_after_cf_list(&nif
->else_list
));
310 nir_cf_reinsert(&tmp
, nir_before_cf_list(&nif
->then_list
));
316 * This optimization simplifies potential loop terminators which then allows
317 * other passes such as opt_if_simplification() and loop unrolling to progress
321 * ... then block instructions ...
334 * ... then block instructions ...
337 opt_if_loop_terminator(nir_if
*nif
)
339 nir_block
*break_blk
= NULL
;
340 nir_block
*continue_from_blk
= NULL
;
341 bool continue_from_then
= true;
343 nir_block
*last_then
= nir_if_last_then_block(nif
);
344 nir_block
*last_else
= nir_if_last_else_block(nif
);
346 if (nir_block_ends_in_break(last_then
)) {
347 break_blk
= last_then
;
348 continue_from_blk
= last_else
;
349 continue_from_then
= false;
350 } else if (nir_block_ends_in_break(last_else
)) {
351 break_blk
= last_else
;
352 continue_from_blk
= last_then
;
355 /* Continue if the if-statement contained no jumps at all */
359 /* If the continue from block is empty then return as there is nothing to
362 nir_block
*first_continue_from_blk
= continue_from_then
?
363 nir_if_first_then_block(nif
) :
364 nir_if_first_else_block(nif
);
365 if (is_block_empty(first_continue_from_blk
))
368 if (!nir_is_trivial_loop_if(nif
, break_blk
))
371 /* Finally, move the continue from branch after the if-statement. */
373 nir_cf_extract(&tmp
, nir_before_block(first_continue_from_blk
),
374 nir_after_block(continue_from_blk
));
375 nir_cf_reinsert(&tmp
, nir_after_cf_node(&nif
->cf_node
));
381 replace_if_condition_use_with_const(nir_builder
*b
, nir_src
*use
,
382 nir_const_value nir_boolean
,
386 nir_ssa_def
*const_def
= nir_build_imm(b
, 1, 32, nir_boolean
);
388 /* Rewrite use to use const */
389 nir_src new_src
= nir_src_for_ssa(const_def
);
391 nir_if_rewrite_condition(use
->parent_if
, new_src
);
393 nir_instr_rewrite_src(use
->parent_instr
, use
, new_src
);
397 evaluate_if_condition(nir_if
*nif
, nir_cursor cursor
, uint32_t *value
)
399 nir_block
*use_block
= nir_cursor_current_block(cursor
);
400 if (nir_block_dominates(nir_if_first_then_block(nif
), use_block
)) {
403 } else if (nir_block_dominates(nir_if_first_else_block(nif
), use_block
)) {
412 * This propagates if condition evaluation down the chain of some alu
413 * instructions. For example by checking the use of some of the following alu
414 * instruction we can eventually replace ssa_107 with NIR_TRUE.
418 * vec1 32 ssa_85 = load_const (0x00000002)
419 * vec1 32 ssa_86 = ieq ssa_48, ssa_85
420 * vec1 32 ssa_87 = load_const (0x00000001)
421 * vec1 32 ssa_88 = ieq ssa_48, ssa_87
422 * vec1 32 ssa_89 = ior ssa_86, ssa_88
423 * vec1 32 ssa_90 = ieq ssa_48, ssa_0
424 * vec1 32 ssa_91 = ior ssa_89, ssa_90
449 * vec1 32 ssa_107 = inot ssa_91
459 propagate_condition_eval(nir_builder
*b
, nir_if
*nif
, nir_src
*use_src
,
460 nir_src
*alu_use
, nir_alu_instr
*alu
,
461 bool is_if_condition
)
463 bool progress
= false;
465 nir_const_value bool_value
;
466 b
->cursor
= nir_before_src(alu_use
, is_if_condition
);
467 if (nir_op_infos
[alu
->op
].num_inputs
== 1) {
468 assert(alu
->op
== nir_op_inot
|| alu
->op
== nir_op_b2i
);
470 if (evaluate_if_condition(nif
, b
->cursor
, &bool_value
.u32
[0])) {
471 assert(nir_src_bit_size(alu
->src
[0].src
) == 32);
473 nir_const_value result
=
474 nir_eval_const_opcode(alu
->op
, 1, 32, &bool_value
);
476 replace_if_condition_use_with_const(b
, alu_use
, result
,
481 assert(alu
->op
== nir_op_ior
|| alu
->op
== nir_op_iand
);
483 if (evaluate_if_condition(nif
, b
->cursor
, &bool_value
.u32
[0])) {
485 for (unsigned i
= 0; i
< 2; i
++) {
486 if (alu
->src
[i
].src
.ssa
== use_src
->ssa
) {
487 def
[i
] = nir_build_imm(b
, 1, 32, bool_value
);
489 def
[i
] = alu
->src
[i
].src
.ssa
;
494 nir_build_alu(b
, alu
->op
, def
[0], def
[1], NULL
, NULL
);
496 /* Rewrite use to use new alu instruction */
497 nir_src new_src
= nir_src_for_ssa(nalu
);
500 nir_if_rewrite_condition(alu_use
->parent_if
, new_src
);
502 nir_instr_rewrite_src(alu_use
->parent_instr
, alu_use
, new_src
);
512 can_propagate_through_alu(nir_src
*src
)
514 if (src
->parent_instr
->type
== nir_instr_type_alu
&&
515 (nir_instr_as_alu(src
->parent_instr
)->op
== nir_op_ior
||
516 nir_instr_as_alu(src
->parent_instr
)->op
== nir_op_iand
||
517 nir_instr_as_alu(src
->parent_instr
)->op
== nir_op_inot
||
518 nir_instr_as_alu(src
->parent_instr
)->op
== nir_op_b2i
))
525 evaluate_condition_use(nir_builder
*b
, nir_if
*nif
, nir_src
*use_src
,
526 bool is_if_condition
)
528 bool progress
= false;
530 nir_const_value value
;
531 b
->cursor
= nir_before_src(use_src
, is_if_condition
);
533 if (evaluate_if_condition(nif
, b
->cursor
, &value
.u32
[0])) {
534 replace_if_condition_use_with_const(b
, use_src
, value
, is_if_condition
);
538 if (!is_if_condition
&& can_propagate_through_alu(use_src
)) {
539 nir_alu_instr
*alu
= nir_instr_as_alu(use_src
->parent_instr
);
541 nir_foreach_use_safe(alu_use
, &alu
->dest
.dest
.ssa
) {
542 progress
|= propagate_condition_eval(b
, nif
, use_src
, alu_use
, alu
,
546 nir_foreach_if_use_safe(alu_use
, &alu
->dest
.dest
.ssa
) {
547 progress
|= propagate_condition_eval(b
, nif
, use_src
, alu_use
, alu
,
556 opt_if_evaluate_condition_use(nir_builder
*b
, nir_if
*nif
)
558 bool progress
= false;
560 /* Evaluate any uses of the if condition inside the if branches */
561 assert(nif
->condition
.is_ssa
);
562 nir_foreach_use_safe(use_src
, nif
->condition
.ssa
) {
563 progress
|= evaluate_condition_use(b
, nif
, use_src
, false);
566 nir_foreach_if_use_safe(use_src
, nif
->condition
.ssa
) {
567 if (use_src
->parent_if
!= nif
)
568 progress
|= evaluate_condition_use(b
, nif
, use_src
, true);
575 opt_if_cf_list(nir_builder
*b
, struct exec_list
*cf_list
)
577 bool progress
= false;
578 foreach_list_typed(nir_cf_node
, cf_node
, node
, cf_list
) {
579 switch (cf_node
->type
) {
580 case nir_cf_node_block
:
583 case nir_cf_node_if
: {
584 nir_if
*nif
= nir_cf_node_as_if(cf_node
);
585 progress
|= opt_if_cf_list(b
, &nif
->then_list
);
586 progress
|= opt_if_cf_list(b
, &nif
->else_list
);
587 progress
|= opt_if_loop_terminator(nif
);
588 progress
|= opt_if_simplification(b
, nif
);
592 case nir_cf_node_loop
: {
593 nir_loop
*loop
= nir_cf_node_as_loop(cf_node
);
594 progress
|= opt_if_cf_list(b
, &loop
->body
);
595 progress
|= opt_peel_loop_initial_if(loop
);
599 case nir_cf_node_function
:
600 unreachable("Invalid cf type");
608 * These optimisations depend on nir_metadata_block_index and therefore must
609 * not do anything to cause the metadata to become invalid.
612 opt_if_safe_cf_list(nir_builder
*b
, struct exec_list
*cf_list
)
614 bool progress
= false;
615 foreach_list_typed(nir_cf_node
, cf_node
, node
, cf_list
) {
616 switch (cf_node
->type
) {
617 case nir_cf_node_block
:
620 case nir_cf_node_if
: {
621 nir_if
*nif
= nir_cf_node_as_if(cf_node
);
622 progress
|= opt_if_safe_cf_list(b
, &nif
->then_list
);
623 progress
|= opt_if_safe_cf_list(b
, &nif
->else_list
);
624 progress
|= opt_if_evaluate_condition_use(b
, nif
);
628 case nir_cf_node_loop
: {
629 nir_loop
*loop
= nir_cf_node_as_loop(cf_node
);
630 progress
|= opt_if_safe_cf_list(b
, &loop
->body
);
634 case nir_cf_node_function
:
635 unreachable("Invalid cf type");
643 nir_opt_if(nir_shader
*shader
)
645 bool progress
= false;
647 nir_foreach_function(function
, shader
) {
648 if (function
->impl
== NULL
)
652 nir_builder_init(&b
, function
->impl
);
654 nir_metadata_require(function
->impl
, nir_metadata_block_index
|
655 nir_metadata_dominance
);
656 progress
= opt_if_safe_cf_list(&b
, &function
->impl
->body
);
657 nir_metadata_preserve(function
->impl
, nir_metadata_block_index
|
658 nir_metadata_dominance
);
660 if (opt_if_cf_list(&b
, &function
->impl
->body
)) {
661 nir_metadata_preserve(function
->impl
, nir_metadata_none
);
663 /* If that made progress, we're no longer really in SSA form. We
664 * need to convert registers back into SSA defs and clean up SSA defs
665 * that don't dominate their uses.
667 nir_lower_regs_to_ssa_impl(function
->impl
);