nir: Do not use continue block after removing it.
[mesa.git] / src / compiler / nir / nir_opt_if.c
1 /*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir.h"
25 #include "nir/nir_builder.h"
26 #include "nir_control_flow.h"
27 #include "nir_loop_analyze.h"
28
29 /**
30 * Gets the single block that jumps back to the loop header. Already assumes
31 * there is exactly one such block.
32 */
33 static nir_block*
34 find_continue_block(nir_loop *loop)
35 {
36 nir_block *header_block = nir_loop_first_block(loop);
37 nir_block *prev_block =
38 nir_cf_node_as_block(nir_cf_node_prev(&loop->cf_node));
39
40 assert(header_block->predecessors->entries == 2);
41
42 struct set_entry *pred_entry;
43 set_foreach(header_block->predecessors, pred_entry) {
44 if (pred_entry->key != prev_block)
45 return (nir_block*)pred_entry->key;
46 }
47 }
48
49 /**
50 * This optimization detects if statements at the tops of loops where the
51 * condition is a phi node of two constants and moves half of the if to above
52 * the loop and the other half of the if to the end of the loop. A simple for
53 * loop "for (int i = 0; i < 4; i++)", when run through the SPIR-V front-end,
54 * ends up looking something like this:
55 *
56 * vec1 32 ssa_0 = load_const (0x00000000)
57 * vec1 32 ssa_1 = load_const (0xffffffff)
58 * loop {
59 * block block_1:
60 * vec1 32 ssa_2 = phi block_0: ssa_0, block_7: ssa_5
61 * vec1 32 ssa_3 = phi block_0: ssa_0, block_7: ssa_1
62 * if ssa_2 {
63 * block block_2:
64 * vec1 32 ssa_4 = load_const (0x00000001)
65 * vec1 32 ssa_5 = iadd ssa_2, ssa_4
66 * } else {
67 * block block_3:
68 * }
69 * block block_4:
70 * vec1 32 ssa_6 = load_const (0x00000004)
71 * vec1 32 ssa_7 = ilt ssa_5, ssa_6
72 * if ssa_7 {
73 * block block_5:
74 * } else {
75 * block block_6:
76 * break
77 * }
78 * block block_7:
79 * }
80 *
81 * This turns it into something like this:
82 *
83 * // Stuff from block 1
84 * // Stuff from block 3
85 * loop {
86 * block block_1:
87 * vec1 32 ssa_3 = phi block_0: ssa_0, block_7: ssa_1
88 * vec1 32 ssa_6 = load_const (0x00000004)
89 * vec1 32 ssa_7 = ilt ssa_5, ssa_6
90 * if ssa_7 {
91 * block block_5:
92 * } else {
93 * block block_6:
94 * break
95 * }
96 * block block_7:
97 * // Stuff from block 1
98 * // Stuff from block 2
99 * vec1 32 ssa_4 = load_const (0x00000001)
100 * vec1 32 ssa_5 = iadd ssa_2, ssa_4
101 * }
102 */
103 static bool
104 opt_peel_loop_initial_if(nir_loop *loop)
105 {
106 nir_block *header_block = nir_loop_first_block(loop);
107 nir_block *prev_block =
108 nir_cf_node_as_block(nir_cf_node_prev(&loop->cf_node));
109
110 /* It would be insane if this were not true */
111 assert(_mesa_set_search(header_block->predecessors, prev_block));
112
113 /* The loop must have exactly one continue block which could be a block
114 * ending in a continue instruction or the "natural" continue from the
115 * last block in the loop back to the top.
116 */
117 if (header_block->predecessors->entries != 2)
118 return false;
119
120 nir_block *continue_block = find_continue_block(loop);
121
122 nir_cf_node *if_node = nir_cf_node_next(&header_block->cf_node);
123 if (!if_node || if_node->type != nir_cf_node_if)
124 return false;
125
126 nir_if *nif = nir_cf_node_as_if(if_node);
127 assert(nif->condition.is_ssa);
128
129 nir_ssa_def *cond = nif->condition.ssa;
130 if (cond->parent_instr->type != nir_instr_type_phi)
131 return false;
132
133 nir_phi_instr *cond_phi = nir_instr_as_phi(cond->parent_instr);
134 if (cond->parent_instr->block != header_block)
135 return false;
136
137 /* We already know we have exactly one continue */
138 assert(exec_list_length(&cond_phi->srcs) == 2);
139
140 uint32_t entry_val = 0, continue_val = 0;
141 nir_foreach_phi_src(src, cond_phi) {
142 assert(src->src.is_ssa);
143 nir_const_value *const_src = nir_src_as_const_value(src->src);
144 if (!const_src)
145 return false;
146
147 if (src->pred == continue_block) {
148 continue_val = const_src->u32[0];
149 } else {
150 assert(src->pred == prev_block);
151 entry_val = const_src->u32[0];
152 }
153 }
154
155 /* If they both execute or both don't execute, this is a job for
156 * nir_dead_cf, not this pass.
157 */
158 if ((entry_val && continue_val) || (!entry_val && !continue_val))
159 return false;
160
161 struct exec_list *continue_list, *entry_list;
162 if (continue_val) {
163 continue_list = &nif->then_list;
164 entry_list = &nif->else_list;
165 } else {
166 continue_list = &nif->else_list;
167 entry_list = &nif->then_list;
168 }
169
170 /* We want to be moving the contents of entry_list to above the loop so it
171 * can't contain any break or continue instructions.
172 */
173 foreach_list_typed(nir_cf_node, cf_node, node, entry_list) {
174 nir_foreach_block_in_cf_node(block, cf_node) {
175 nir_instr *last_instr = nir_block_last_instr(block);
176 if (last_instr && last_instr->type == nir_instr_type_jump)
177 return false;
178 }
179 }
180
181 /* Before we do anything, convert the loop to LCSSA. We're about to
182 * replace a bunch of SSA defs with registers and this will prevent any of
183 * it from leaking outside the loop.
184 */
185 nir_convert_loop_to_lcssa(loop);
186
187 nir_block *after_if_block =
188 nir_cf_node_as_block(nir_cf_node_next(&nif->cf_node));
189
190 /* Get rid of phis in the header block since we will be duplicating it */
191 nir_lower_phis_to_regs_block(header_block);
192 /* Get rid of phis after the if since dominance will change */
193 nir_lower_phis_to_regs_block(after_if_block);
194
195 /* Get rid of SSA defs in the pieces we're about to move around */
196 nir_lower_ssa_defs_to_regs_block(header_block);
197 nir_foreach_block_in_cf_node(block, &nif->cf_node)
198 nir_lower_ssa_defs_to_regs_block(block);
199
200 nir_cf_list header, tmp;
201 nir_cf_extract(&header, nir_before_block(header_block),
202 nir_after_block(header_block));
203
204 nir_cf_list_clone(&tmp, &header, &loop->cf_node, NULL);
205 nir_cf_reinsert(&tmp, nir_before_cf_node(&loop->cf_node));
206 nir_cf_extract(&tmp, nir_before_cf_list(entry_list),
207 nir_after_cf_list(entry_list));
208 nir_cf_reinsert(&tmp, nir_before_cf_node(&loop->cf_node));
209
210 nir_cf_reinsert(&header, nir_after_block_before_jump(continue_block));
211
212 /* Get continue block again as the previous reinsert might have removed the block. */
213 continue_block = find_continue_block(loop);
214
215 nir_cf_extract(&tmp, nir_before_cf_list(continue_list),
216 nir_after_cf_list(continue_list));
217 nir_cf_reinsert(&tmp, nir_after_block_before_jump(continue_block));
218
219 nir_cf_node_remove(&nif->cf_node);
220
221 return true;
222 }
223
224 static bool
225 is_block_empty(nir_block *block)
226 {
227 return nir_cf_node_is_last(&block->cf_node) &&
228 exec_list_is_empty(&block->instr_list);
229 }
230
231 /**
232 * This optimization turns:
233 *
234 * if (cond) {
235 * } else {
236 * do_work();
237 * }
238 *
239 * into:
240 *
241 * if (!cond) {
242 * do_work();
243 * } else {
244 * }
245 */
246 static bool
247 opt_if_simplification(nir_builder *b, nir_if *nif)
248 {
249 /* Only simplify if the then block is empty and the else block is not. */
250 if (!is_block_empty(nir_if_first_then_block(nif)) ||
251 is_block_empty(nir_if_first_else_block(nif)))
252 return false;
253
254 /* Make sure the condition is a comparison operation. */
255 nir_instr *src_instr = nif->condition.ssa->parent_instr;
256 if (src_instr->type != nir_instr_type_alu)
257 return false;
258
259 nir_alu_instr *alu_instr = nir_instr_as_alu(src_instr);
260 if (!nir_alu_instr_is_comparison(alu_instr))
261 return false;
262
263 /* Insert the inverted instruction and rewrite the condition. */
264 b->cursor = nir_after_instr(&alu_instr->instr);
265
266 nir_ssa_def *new_condition =
267 nir_inot(b, &alu_instr->dest.dest.ssa);
268
269 nir_if_rewrite_condition(nif, nir_src_for_ssa(new_condition));
270
271 /* Grab pointers to the last then/else blocks for fixing up the phis. */
272 nir_block *then_block = nir_if_last_then_block(nif);
273 nir_block *else_block = nir_if_last_else_block(nif);
274
275 /* Walk all the phis in the block immediately following the if statement and
276 * swap the blocks.
277 */
278 nir_block *after_if_block =
279 nir_cf_node_as_block(nir_cf_node_next(&nif->cf_node));
280
281 nir_foreach_instr(instr, after_if_block) {
282 if (instr->type != nir_instr_type_phi)
283 continue;
284
285 nir_phi_instr *phi = nir_instr_as_phi(instr);
286
287 foreach_list_typed(nir_phi_src, src, node, &phi->srcs) {
288 if (src->pred == else_block) {
289 src->pred = then_block;
290 } else if (src->pred == then_block) {
291 src->pred = else_block;
292 }
293 }
294 }
295
296 /* Finally, move the else block to the then block. */
297 nir_cf_list tmp;
298 nir_cf_extract(&tmp, nir_before_cf_list(&nif->else_list),
299 nir_after_cf_list(&nif->else_list));
300 nir_cf_reinsert(&tmp, nir_before_cf_list(&nif->then_list));
301
302 return true;
303 }
304
305 /**
306 * This optimization simplifies potential loop terminators which then allows
307 * other passes such as opt_if_simplification() and loop unrolling to progress
308 * further:
309 *
310 * if (cond) {
311 * ... then block instructions ...
312 * } else {
313 * ...
314 * break;
315 * }
316 *
317 * into:
318 *
319 * if (cond) {
320 * } else {
321 * ...
322 * break;
323 * }
324 * ... then block instructions ...
325 */
326 static bool
327 opt_if_loop_terminator(nir_if *nif)
328 {
329 nir_block *break_blk = NULL;
330 nir_block *continue_from_blk = NULL;
331 bool continue_from_then = true;
332
333 nir_block *last_then = nir_if_last_then_block(nif);
334 nir_block *last_else = nir_if_last_else_block(nif);
335
336 if (nir_block_ends_in_break(last_then)) {
337 break_blk = last_then;
338 continue_from_blk = last_else;
339 continue_from_then = false;
340 } else if (nir_block_ends_in_break(last_else)) {
341 break_blk = last_else;
342 continue_from_blk = last_then;
343 }
344
345 /* Continue if the if-statement contained no jumps at all */
346 if (!break_blk)
347 return false;
348
349 /* If the continue from block is empty then return as there is nothing to
350 * move.
351 */
352 nir_block *first_continue_from_blk = continue_from_then ?
353 nir_if_first_then_block(nif) :
354 nir_if_first_else_block(nif);
355 if (is_block_empty(first_continue_from_blk))
356 return false;
357
358 if (!nir_is_trivial_loop_if(nif, break_blk))
359 return false;
360
361 /* Finally, move the continue from branch after the if-statement. */
362 nir_cf_list tmp;
363 nir_cf_extract(&tmp, nir_before_block(first_continue_from_blk),
364 nir_after_block(continue_from_blk));
365 nir_cf_reinsert(&tmp, nir_after_cf_node(&nif->cf_node));
366
367 return true;
368 }
369
370 static bool
371 opt_if_cf_list(nir_builder *b, struct exec_list *cf_list)
372 {
373 bool progress = false;
374 foreach_list_typed(nir_cf_node, cf_node, node, cf_list) {
375 switch (cf_node->type) {
376 case nir_cf_node_block:
377 break;
378
379 case nir_cf_node_if: {
380 nir_if *nif = nir_cf_node_as_if(cf_node);
381 progress |= opt_if_cf_list(b, &nif->then_list);
382 progress |= opt_if_cf_list(b, &nif->else_list);
383 progress |= opt_if_loop_terminator(nif);
384 progress |= opt_if_simplification(b, nif);
385 break;
386 }
387
388 case nir_cf_node_loop: {
389 nir_loop *loop = nir_cf_node_as_loop(cf_node);
390 progress |= opt_if_cf_list(b, &loop->body);
391 progress |= opt_peel_loop_initial_if(loop);
392 break;
393 }
394
395 case nir_cf_node_function:
396 unreachable("Invalid cf type");
397 }
398 }
399
400 return progress;
401 }
402
403 bool
404 nir_opt_if(nir_shader *shader)
405 {
406 bool progress = false;
407
408 nir_foreach_function(function, shader) {
409 if (function->impl == NULL)
410 continue;
411
412 nir_builder b;
413 nir_builder_init(&b, function->impl);
414
415 if (opt_if_cf_list(&b, &function->impl->body)) {
416 nir_metadata_preserve(function->impl, nir_metadata_none);
417
418 /* If that made progress, we're no longer really in SSA form. We
419 * need to convert registers back into SSA defs and clean up SSA defs
420 * that don't dominate their uses.
421 */
422 nir_lower_regs_to_ssa_impl(function->impl);
423
424 /* Calling nir_convert_loop_to_lcssa() in opt_peel_loop_initial_if()
425 * adds extra phi nodes which may not be valid if they're used for
426 * something such as a deref. Remove any unneeded phis.
427 */
428 nir_opt_remove_phis_impl(function->impl);
429
430 progress = true;
431 }
432 }
433
434 return progress;
435 }