2 * Copyright © 2016 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 #include "nir_control_flow.h"
28 * This optimization detects if statements at the tops of loops where the
29 * condition is a phi node of two constants and moves half of the if to above
30 * the loop and the other half of the if to the end of the loop. A simple for
31 * loop "for (int i = 0; i < 4; i++)", when run through the SPIR-V front-end,
32 * ends up looking something like this:
34 * vec1 32 ssa_0 = load_const (0x00000000)
35 * vec1 32 ssa_1 = load_const (0xffffffff)
38 * vec1 32 ssa_2 = phi block_0: ssa_0, block_7: ssa_5
39 * vec1 32 ssa_3 = phi block_0: ssa_0, block_7: ssa_1
42 * vec1 32 ssa_4 = load_const (0x00000001)
43 * vec1 32 ssa_5 = iadd ssa_2, ssa_4
48 * vec1 32 ssa_6 = load_const (0x00000004)
49 * vec1 32 ssa_7 = ilt ssa_5, ssa_6
59 * This turns it into something like this:
61 * // Stuff from block 1
62 * // Stuff from block 3
65 * vec1 32 ssa_3 = phi block_0: ssa_0, block_7: ssa_1
66 * vec1 32 ssa_6 = load_const (0x00000004)
67 * vec1 32 ssa_7 = ilt ssa_5, ssa_6
75 * // Stuff from block 1
76 * // Stuff from block 2
77 * vec1 32 ssa_4 = load_const (0x00000001)
78 * vec1 32 ssa_5 = iadd ssa_2, ssa_4
82 opt_peel_loop_initial_if(nir_loop
*loop
)
84 nir_block
*header_block
= nir_loop_first_block(loop
);
85 nir_block
*prev_block
=
86 nir_cf_node_as_block(nir_cf_node_prev(&loop
->cf_node
));
88 /* It would be insane if this were not true */
89 assert(_mesa_set_search(header_block
->predecessors
, prev_block
));
91 /* The loop must have exactly one continue block which could be a block
92 * ending in a continue instruction or the "natural" continue from the
93 * last block in the loop back to the top.
95 if (header_block
->predecessors
->entries
!= 2)
98 nir_block
*continue_block
= NULL
;
99 struct set_entry
*pred_entry
;
100 set_foreach(header_block
->predecessors
, pred_entry
) {
101 if (pred_entry
->key
!= prev_block
)
102 continue_block
= (void *)pred_entry
->key
;
105 nir_cf_node
*if_node
= nir_cf_node_next(&header_block
->cf_node
);
106 if (!if_node
|| if_node
->type
!= nir_cf_node_if
)
109 nir_if
*nif
= nir_cf_node_as_if(if_node
);
110 assert(nif
->condition
.is_ssa
);
112 nir_ssa_def
*cond
= nif
->condition
.ssa
;
113 if (cond
->parent_instr
->type
!= nir_instr_type_phi
)
116 nir_phi_instr
*cond_phi
= nir_instr_as_phi(cond
->parent_instr
);
117 if (cond
->parent_instr
->block
!= header_block
)
120 /* We already know we have exactly one continue */
121 assert(exec_list_length(&cond_phi
->srcs
) == 2);
123 uint32_t entry_val
= 0, continue_val
= 0;
124 nir_foreach_phi_src(src
, cond_phi
) {
125 assert(src
->src
.is_ssa
);
126 nir_const_value
*const_src
= nir_src_as_const_value(src
->src
);
130 if (src
->pred
== continue_block
) {
131 continue_val
= const_src
->u32
[0];
133 assert(src
->pred
== prev_block
);
134 entry_val
= const_src
->u32
[0];
138 /* If they both execute or both don't execute, this is a job for
139 * nir_dead_cf, not this pass.
141 if ((entry_val
&& continue_val
) || (!entry_val
&& !continue_val
))
144 struct exec_list
*continue_list
, *entry_list
;
146 continue_list
= &nif
->then_list
;
147 entry_list
= &nif
->else_list
;
149 continue_list
= &nif
->else_list
;
150 entry_list
= &nif
->then_list
;
153 /* We want to be moving the contents of entry_list to above the loop so it
154 * can't contain any break or continue instructions.
156 foreach_list_typed(nir_cf_node
, cf_node
, node
, entry_list
) {
157 nir_foreach_block_in_cf_node(block
, cf_node
) {
158 nir_instr
*last_instr
= nir_block_last_instr(block
);
159 if (last_instr
&& last_instr
->type
== nir_instr_type_jump
)
164 /* Before we do anything, convert the loop to LCSSA. We're about to
165 * replace a bunch of SSA defs with registers and this will prevent any of
166 * it from leaking outside the loop.
168 nir_convert_loop_to_lcssa(loop
);
170 nir_block
*after_if_block
=
171 nir_cf_node_as_block(nir_cf_node_next(&nif
->cf_node
));
173 /* Get rid of phis in the header block since we will be duplicating it */
174 nir_lower_phis_to_regs_block(header_block
);
175 /* Get rid of phis after the if since dominance will change */
176 nir_lower_phis_to_regs_block(after_if_block
);
178 /* Get rid of SSA defs in the pieces we're about to move around */
179 nir_lower_ssa_defs_to_regs_block(header_block
);
180 nir_foreach_block_in_cf_node(block
, &nif
->cf_node
)
181 nir_lower_ssa_defs_to_regs_block(block
);
183 nir_cf_list header
, tmp
;
184 nir_cf_extract(&header
, nir_before_block(header_block
),
185 nir_after_block(header_block
));
187 nir_cf_list_clone(&tmp
, &header
, &loop
->cf_node
, NULL
);
188 nir_cf_reinsert(&tmp
, nir_before_cf_node(&loop
->cf_node
));
189 nir_cf_extract(&tmp
, nir_before_cf_list(entry_list
),
190 nir_after_cf_list(entry_list
));
191 nir_cf_reinsert(&tmp
, nir_before_cf_node(&loop
->cf_node
));
193 nir_cf_reinsert(&header
, nir_after_block_before_jump(continue_block
));
194 nir_cf_extract(&tmp
, nir_before_cf_list(continue_list
),
195 nir_after_cf_list(continue_list
));
196 nir_cf_reinsert(&tmp
, nir_after_block_before_jump(continue_block
));
198 nir_cf_node_remove(&nif
->cf_node
);
204 opt_if_cf_list(struct exec_list
*cf_list
)
206 bool progress
= false;
207 foreach_list_typed(nir_cf_node
, cf_node
, node
, cf_list
) {
208 switch (cf_node
->type
) {
209 case nir_cf_node_block
:
212 case nir_cf_node_if
: {
213 nir_if
*nif
= nir_cf_node_as_if(cf_node
);
214 progress
|= opt_if_cf_list(&nif
->then_list
);
215 progress
|= opt_if_cf_list(&nif
->else_list
);
219 case nir_cf_node_loop
: {
220 nir_loop
*loop
= nir_cf_node_as_loop(cf_node
);
221 progress
|= opt_if_cf_list(&loop
->body
);
222 progress
|= opt_peel_loop_initial_if(loop
);
226 case nir_cf_node_function
:
227 unreachable("Invalid cf type");
235 nir_opt_if(nir_shader
*shader
)
237 bool progress
= false;
239 nir_foreach_function(function
, shader
) {
240 if (function
->impl
== NULL
)
243 if (opt_if_cf_list(&function
->impl
->body
)) {
244 nir_metadata_preserve(function
->impl
, nir_metadata_none
);
246 /* If that made progress, we're no longer really in SSA form. We
247 * need to convert registers back into SSA defs and clean up SSA defs
248 * that don't dominate their uses.
250 nir_convert_to_ssa_impl(function
->impl
);