8406880204a017eb2a92905c8d2166ddc1e572b9
[mesa.git] / src / compiler / nir / nir_opt_loop_unroll.c
1 /*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "nir.h"
25 #include "nir_builder.h"
26 #include "nir_control_flow.h"
27 #include "nir_loop_analyze.h"
28
29
30 /* This limit is chosen fairly arbitrarily. GLSL IR max iteration is 32
31 * instructions. (Multiply counting nodes and magic number 5.) But there is
32 * no 1:1 mapping between GLSL IR and NIR so 25 was picked because it seemed
33 * to give about the same results. Around 5 instructions per node. But some
34 * loops that would unroll with GLSL IR fail to unroll if we set this to 25 so
35 * we set it to 26.
36 */
37 #define LOOP_UNROLL_LIMIT 26
38
39 /* Prepare this loop for unrolling by first converting to lcssa and then
40 * converting the phis from the top level of the loop body to regs.
41 * Partially converting out of SSA allows us to unroll the loop without having
42 * to keep track of and update phis along the way which gets tricky and
43 * doesn't add much value over converting to regs.
44 *
45 * The loop may have a continue instruction at the end of the loop which does
46 * nothing. Once we're out of SSA, we can safely delete it so we don't have
47 * to deal with it later.
48 */
49 static void
50 loop_prepare_for_unroll(nir_loop *loop)
51 {
52 nir_rematerialize_derefs_in_use_blocks_impl(
53 nir_cf_node_get_function(&loop->cf_node));
54
55 nir_convert_loop_to_lcssa(loop);
56
57 /* Lower phis at the top level of the loop body */
58 foreach_list_typed_safe(nir_cf_node, node, node, &loop->body) {
59 if (nir_cf_node_block == node->type) {
60 nir_lower_phis_to_regs_block(nir_cf_node_as_block(node));
61 }
62 }
63
64 /* Lower phis after the loop */
65 nir_block *block_after_loop =
66 nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node));
67
68 nir_lower_phis_to_regs_block(block_after_loop);
69
70 /* Remove continue if its the last instruction in the loop */
71 nir_instr *last_instr = nir_block_last_instr(nir_loop_last_block(loop));
72 if (last_instr && last_instr->type == nir_instr_type_jump) {
73 nir_instr_remove(last_instr);
74 }
75 }
76
77 static void
78 get_first_blocks_in_terminator(nir_loop_terminator *term,
79 nir_block **first_break_block,
80 nir_block **first_continue_block)
81 {
82 if (term->continue_from_then) {
83 *first_continue_block = nir_if_first_then_block(term->nif);
84 *first_break_block = nir_if_first_else_block(term->nif);
85 } else {
86 *first_continue_block = nir_if_first_else_block(term->nif);
87 *first_break_block = nir_if_first_then_block(term->nif);
88 }
89 }
90
91 /**
92 * Unroll a loop where we know exactly how many iterations there are and there
93 * is only a single exit point. Note here we can unroll loops with multiple
94 * theoretical exits that only have a single terminating exit that we always
95 * know is the "real" exit.
96 *
97 * loop {
98 * ...instrs...
99 * }
100 *
101 * And the iteration count is 3, the output will be:
102 *
103 * ...instrs... ...instrs... ...instrs...
104 */
105 static void
106 simple_unroll(nir_loop *loop)
107 {
108 nir_loop_terminator *limiting_term = loop->info->limiting_terminator;
109 assert(nir_is_trivial_loop_if(limiting_term->nif,
110 limiting_term->break_block));
111
112 loop_prepare_for_unroll(loop);
113
114 /* Skip over loop terminator and get the loop body. */
115 list_for_each_entry(nir_loop_terminator, terminator,
116 &loop->info->loop_terminator_list,
117 loop_terminator_link) {
118
119 /* Remove all but the limiting terminator as we know the other exit
120 * conditions can never be met. Note we need to extract any instructions
121 * in the continue from branch and insert then into the loop body before
122 * removing it.
123 */
124 if (terminator->nif != limiting_term->nif) {
125 nir_block *first_break_block;
126 nir_block *first_continue_block;
127 get_first_blocks_in_terminator(terminator, &first_break_block,
128 &first_continue_block);
129
130 assert(nir_is_trivial_loop_if(terminator->nif,
131 terminator->break_block));
132
133 nir_cf_list continue_from_lst;
134 nir_cf_extract(&continue_from_lst,
135 nir_before_block(first_continue_block),
136 nir_after_block(terminator->continue_from_block));
137 nir_cf_reinsert(&continue_from_lst,
138 nir_after_cf_node(&terminator->nif->cf_node));
139
140 nir_cf_node_remove(&terminator->nif->cf_node);
141 }
142 }
143
144 nir_block *first_break_block;
145 nir_block *first_continue_block;
146 get_first_blocks_in_terminator(limiting_term, &first_break_block,
147 &first_continue_block);
148
149 /* Pluck out the loop header */
150 nir_block *header_blk = nir_loop_first_block(loop);
151 nir_cf_list lp_header;
152 nir_cf_extract(&lp_header, nir_before_block(header_blk),
153 nir_before_cf_node(&limiting_term->nif->cf_node));
154
155 /* Add the continue from block of the limiting terminator to the loop body
156 */
157 nir_cf_list continue_from_lst;
158 nir_cf_extract(&continue_from_lst, nir_before_block(first_continue_block),
159 nir_after_block(limiting_term->continue_from_block));
160 nir_cf_reinsert(&continue_from_lst,
161 nir_after_cf_node(&limiting_term->nif->cf_node));
162
163 /* Pluck out the loop body */
164 nir_cf_list loop_body;
165 nir_cf_extract(&loop_body, nir_after_cf_node(&limiting_term->nif->cf_node),
166 nir_after_block(nir_loop_last_block(loop)));
167
168 struct hash_table *remap_table =
169 _mesa_hash_table_create(NULL, _mesa_hash_pointer,
170 _mesa_key_pointer_equal);
171
172 /* Clone the loop header and insert before the loop */
173 nir_cf_list_clone_and_reinsert(&lp_header, loop->cf_node.parent,
174 nir_before_cf_node(&loop->cf_node),
175 remap_table);
176
177 for (unsigned i = 0; i < loop->info->max_trip_count; i++) {
178 /* Clone loop body and insert before the loop */
179 nir_cf_list_clone_and_reinsert(&loop_body, loop->cf_node.parent,
180 nir_before_cf_node(&loop->cf_node),
181 remap_table);
182
183 /* Clone loop header and insert after loop body */
184 nir_cf_list_clone_and_reinsert(&lp_header, loop->cf_node.parent,
185 nir_before_cf_node(&loop->cf_node),
186 remap_table);
187 }
188
189 /* Remove the break from the loop terminator and add instructions from
190 * the break block after the unrolled loop.
191 */
192 nir_instr *break_instr = nir_block_last_instr(limiting_term->break_block);
193 nir_instr_remove(break_instr);
194 nir_cf_list break_list;
195 nir_cf_extract(&break_list, nir_before_block(first_break_block),
196 nir_after_block(limiting_term->break_block));
197
198 /* Clone so things get properly remapped */
199 nir_cf_list_clone_and_reinsert(&break_list, loop->cf_node.parent,
200 nir_before_cf_node(&loop->cf_node),
201 remap_table);
202
203 /* Remove the loop */
204 nir_cf_node_remove(&loop->cf_node);
205
206 /* Delete the original loop body, break block & header */
207 nir_cf_delete(&lp_header);
208 nir_cf_delete(&loop_body);
209 nir_cf_delete(&break_list);
210
211 _mesa_hash_table_destroy(remap_table, NULL);
212 }
213
214 static void
215 move_cf_list_into_loop_term(nir_cf_list *lst, nir_loop_terminator *term)
216 {
217 /* Move the rest of the loop inside the continue-from-block */
218 nir_cf_reinsert(lst, nir_after_block(term->continue_from_block));
219
220 /* Remove the break */
221 nir_instr_remove(nir_block_last_instr(term->break_block));
222 }
223
224 static nir_cursor
225 get_complex_unroll_insert_location(nir_cf_node *node, bool continue_from_then)
226 {
227 if (node->type == nir_cf_node_loop) {
228 return nir_before_cf_node(node);
229 } else {
230 nir_if *if_stmt = nir_cf_node_as_if(node);
231 if (continue_from_then) {
232 return nir_after_block(nir_if_last_then_block(if_stmt));
233 } else {
234 return nir_after_block(nir_if_last_else_block(if_stmt));
235 }
236 }
237 }
238
239 static nir_cf_node *
240 complex_unroll_loop_body(nir_loop *loop, nir_loop_terminator *unlimit_term,
241 nir_cf_list *lp_header, nir_cf_list *lp_body,
242 struct hash_table *remap_table,
243 unsigned num_times_to_clone)
244 {
245 /* In the terminator that we have no trip count for move everything after
246 * the terminator into the continue from branch.
247 */
248 nir_cf_list loop_end;
249 nir_cf_extract(&loop_end, nir_after_cf_node(&unlimit_term->nif->cf_node),
250 nir_after_block(nir_loop_last_block(loop)));
251 move_cf_list_into_loop_term(&loop_end, unlimit_term);
252
253 /* Pluck out the loop body. */
254 nir_cf_extract(lp_body, nir_before_block(nir_loop_first_block(loop)),
255 nir_after_block(nir_loop_last_block(loop)));
256
257 /* Set unroll_loc to the loop as we will insert the unrolled loop before it
258 */
259 nir_cf_node *unroll_loc = &loop->cf_node;
260
261 /* Temp list to store the cloned loop as we unroll */
262 nir_cf_list unrolled_lp_body;
263
264 for (unsigned i = 0; i < num_times_to_clone; i++) {
265
266 nir_cursor cursor =
267 get_complex_unroll_insert_location(unroll_loc,
268 unlimit_term->continue_from_then);
269
270 /* Clone loop header and insert in if branch */
271 nir_cf_list_clone_and_reinsert(lp_header, loop->cf_node.parent,
272 cursor, remap_table);
273
274 cursor =
275 get_complex_unroll_insert_location(unroll_loc,
276 unlimit_term->continue_from_then);
277
278 /* Clone loop body */
279 nir_cf_list_clone(&unrolled_lp_body, lp_body, loop->cf_node.parent,
280 remap_table);
281
282 unroll_loc = exec_node_data(nir_cf_node,
283 exec_list_get_tail(&unrolled_lp_body.list),
284 node);
285 assert(unroll_loc->type == nir_cf_node_block &&
286 exec_list_is_empty(&nir_cf_node_as_block(unroll_loc)->instr_list));
287
288 /* Get the unrolled if node */
289 unroll_loc = nir_cf_node_prev(unroll_loc);
290
291 /* Insert unrolled loop body */
292 nir_cf_reinsert(&unrolled_lp_body, cursor);
293 }
294
295 return unroll_loc;
296 }
297
298 /**
299 * Unroll a loop with two exists when the trip count of one of the exits is
300 * unknown. If continue_from_then is true, the loop is repeated only when the
301 * "then" branch of the if is taken; otherwise it is repeated only
302 * when the "else" branch of the if is taken.
303 *
304 * For example, if the input is:
305 *
306 * loop {
307 * ...phis/condition...
308 * if condition {
309 * ...then instructions...
310 * } else {
311 * ...continue instructions...
312 * break
313 * }
314 * ...body...
315 * }
316 *
317 * And the iteration count is 3, and unlimit_term->continue_from_then is true,
318 * then the output will be:
319 *
320 * ...condition...
321 * if condition {
322 * ...then instructions...
323 * ...body...
324 * if condition {
325 * ...then instructions...
326 * ...body...
327 * if condition {
328 * ...then instructions...
329 * ...body...
330 * } else {
331 * ...continue instructions...
332 * }
333 * } else {
334 * ...continue instructions...
335 * }
336 * } else {
337 * ...continue instructions...
338 * }
339 */
340 static void
341 complex_unroll(nir_loop *loop, nir_loop_terminator *unlimit_term,
342 bool limiting_term_second)
343 {
344 assert(nir_is_trivial_loop_if(unlimit_term->nif,
345 unlimit_term->break_block));
346
347 nir_loop_terminator *limiting_term = loop->info->limiting_terminator;
348 assert(nir_is_trivial_loop_if(limiting_term->nif,
349 limiting_term->break_block));
350
351 loop_prepare_for_unroll(loop);
352
353 nir_block *header_blk = nir_loop_first_block(loop);
354
355 nir_cf_list lp_header;
356 nir_cf_list limit_break_list;
357 unsigned num_times_to_clone;
358 if (limiting_term_second) {
359 /* Pluck out the loop header */
360 nir_cf_extract(&lp_header, nir_before_block(header_blk),
361 nir_before_cf_node(&unlimit_term->nif->cf_node));
362
363 /* We need some special handling when its the second terminator causing
364 * us to exit the loop for example:
365 *
366 * for (int i = 0; i < uniform_lp_count; i++) {
367 * colour = vec4(0.0, 1.0, 0.0, 1.0);
368 *
369 * if (i == 1) {
370 * break;
371 * }
372 * ... any further code is unreachable after i == 1 ...
373 * }
374 */
375 nir_cf_list after_lt;
376 nir_if *limit_if = limiting_term->nif;
377 nir_cf_extract(&after_lt, nir_after_cf_node(&limit_if->cf_node),
378 nir_after_block(nir_loop_last_block(loop)));
379 move_cf_list_into_loop_term(&after_lt, limiting_term);
380
381 /* Because the trip count is the number of times we pass over the entire
382 * loop before hitting a break when the second terminator is the
383 * limiting terminator we can actually execute code inside the loop when
384 * trip count == 0 e.g. the code above the break. So we need to bump
385 * the trip_count in order for the code below to clone anything. When
386 * trip count == 1 we execute the code above the break twice and the
387 * code below it once so we need clone things twice and so on.
388 */
389 num_times_to_clone = loop->info->max_trip_count + 1;
390 } else {
391 /* Pluck out the loop header */
392 nir_cf_extract(&lp_header, nir_before_block(header_blk),
393 nir_before_cf_node(&limiting_term->nif->cf_node));
394
395 nir_block *first_break_block;
396 nir_block *first_continue_block;
397 get_first_blocks_in_terminator(limiting_term, &first_break_block,
398 &first_continue_block);
399
400 /* Remove the break then extract instructions from the break block so we
401 * can insert them in the innermost else of the unrolled loop.
402 */
403 nir_instr *break_instr = nir_block_last_instr(limiting_term->break_block);
404 nir_instr_remove(break_instr);
405 nir_cf_extract(&limit_break_list, nir_before_block(first_break_block),
406 nir_after_block(limiting_term->break_block));
407
408 nir_cf_list continue_list;
409 nir_cf_extract(&continue_list, nir_before_block(first_continue_block),
410 nir_after_block(limiting_term->continue_from_block));
411
412 nir_cf_reinsert(&continue_list,
413 nir_after_cf_node(&limiting_term->nif->cf_node));
414
415 nir_cf_node_remove(&limiting_term->nif->cf_node);
416
417 num_times_to_clone = loop->info->max_trip_count;
418 }
419
420 struct hash_table *remap_table =
421 _mesa_hash_table_create(NULL, _mesa_hash_pointer,
422 _mesa_key_pointer_equal);
423
424 nir_cf_list lp_body;
425 nir_cf_node *unroll_loc =
426 complex_unroll_loop_body(loop, unlimit_term, &lp_header, &lp_body,
427 remap_table, num_times_to_clone);
428
429 if (!limiting_term_second) {
430 assert(unroll_loc->type == nir_cf_node_if);
431
432 nir_cursor cursor =
433 get_complex_unroll_insert_location(unroll_loc,
434 unlimit_term->continue_from_then);
435
436 /* Clone loop header and insert in if branch */
437 nir_cf_list_clone_and_reinsert(&lp_header, loop->cf_node.parent,
438 cursor, remap_table);
439
440 cursor =
441 get_complex_unroll_insert_location(unroll_loc,
442 unlimit_term->continue_from_then);
443
444 /* Clone so things get properly remapped, and insert break block from
445 * the limiting terminator.
446 */
447 nir_cf_list_clone_and_reinsert(&limit_break_list, loop->cf_node.parent,
448 cursor, remap_table);
449
450 nir_cf_delete(&limit_break_list);
451 }
452
453 /* The loop has been unrolled so remove it. */
454 nir_cf_node_remove(&loop->cf_node);
455
456 /* Delete the original loop header and body */
457 nir_cf_delete(&lp_header);
458 nir_cf_delete(&lp_body);
459
460 _mesa_hash_table_destroy(remap_table, NULL);
461 }
462
463 /* Unrolls the classic wrapper loops e.g
464 *
465 * do {
466 * // ...
467 * } while (false)
468 */
469 static bool
470 wrapper_unroll(nir_loop *loop)
471 {
472 if (!list_empty(&loop->info->loop_terminator_list)) {
473
474 /* Unrolling a loop with a large number of exits can result in a
475 * large inrease in register pressure. For now we just skip
476 * unrolling if we have more than 3 exits (not including the break
477 * at the end of the loop).
478 *
479 * TODO: Most loops that fit this pattern are simply switch
480 * statements that are converted to a loop to take advantage of
481 * exiting jump instruction handling. In this case we could make
482 * use of a binary seach pattern like we do in
483 * nir_lower_indirect_derefs(), this should allow us to unroll the
484 * loops in an optimal way and should also avoid some of the
485 * register pressure that comes from simply nesting the
486 * terminators one after the other.
487 */
488 if (list_length(&loop->info->loop_terminator_list) > 3)
489 return false;
490
491 loop_prepare_for_unroll(loop);
492
493 nir_cursor loop_end = nir_after_block(nir_loop_last_block(loop));
494 list_for_each_entry(nir_loop_terminator, terminator,
495 &loop->info->loop_terminator_list,
496 loop_terminator_link) {
497
498 /* Remove break from the terminator */
499 nir_instr *break_instr =
500 nir_block_last_instr(terminator->break_block);
501 nir_instr_remove(break_instr);
502
503 /* Pluck out the loop body. */
504 nir_cf_list loop_body;
505 nir_cf_extract(&loop_body,
506 nir_after_cf_node(&terminator->nif->cf_node),
507 loop_end);
508
509 /* Reinsert loop body into continue from block */
510 nir_cf_reinsert(&loop_body,
511 nir_after_block(terminator->continue_from_block));
512
513 loop_end = terminator->continue_from_then ?
514 nir_after_block(nir_if_last_then_block(terminator->nif)) :
515 nir_after_block(nir_if_last_else_block(terminator->nif));
516 }
517 } else {
518 nir_block *blk_after_loop =
519 nir_cursor_current_block(nir_after_cf_node(&loop->cf_node));
520
521 /* There may still be some single src phis following the loop that
522 * have not yet been cleaned up by another pass. Tidy those up
523 * before unrolling the loop.
524 */
525 nir_foreach_instr_safe(instr, blk_after_loop) {
526 if (instr->type != nir_instr_type_phi)
527 break;
528
529 nir_phi_instr *phi = nir_instr_as_phi(instr);
530 assert(exec_list_length(&phi->srcs) == 1);
531
532 nir_phi_src *phi_src =
533 exec_node_data(nir_phi_src, exec_list_get_head(&phi->srcs), node);
534
535 nir_ssa_def_rewrite_uses(&phi->dest.ssa, phi_src->src);
536 nir_instr_remove(instr);
537 }
538
539 /* Remove break at end of the loop */
540 nir_block *last_loop_blk = nir_loop_last_block(loop);
541 nir_instr *break_instr = nir_block_last_instr(last_loop_blk);
542 nir_instr_remove(break_instr);
543 }
544
545 /* Pluck out the loop body. */
546 nir_cf_list loop_body;
547 nir_cf_extract(&loop_body, nir_before_block(nir_loop_first_block(loop)),
548 nir_after_block(nir_loop_last_block(loop)));
549
550 /* Reinsert loop body after the loop */
551 nir_cf_reinsert(&loop_body, nir_after_cf_node(&loop->cf_node));
552
553 /* The loop has been unrolled so remove it. */
554 nir_cf_node_remove(&loop->cf_node);
555
556 return true;
557 }
558
559 static bool
560 is_loop_small_enough_to_unroll(nir_shader *shader, nir_loop_info *li)
561 {
562 unsigned max_iter = shader->options->max_unroll_iterations;
563
564 if (li->max_trip_count > max_iter)
565 return false;
566
567 if (li->force_unroll)
568 return true;
569
570 bool loop_not_too_large =
571 li->num_instructions * li->max_trip_count <= max_iter * LOOP_UNROLL_LIMIT;
572
573 return loop_not_too_large;
574 }
575
576 static bool
577 process_loops(nir_shader *sh, nir_cf_node *cf_node, bool *has_nested_loop_out)
578 {
579 bool progress = false;
580 bool has_nested_loop = false;
581 nir_loop *loop;
582
583 switch (cf_node->type) {
584 case nir_cf_node_block:
585 return progress;
586 case nir_cf_node_if: {
587 nir_if *if_stmt = nir_cf_node_as_if(cf_node);
588 foreach_list_typed_safe(nir_cf_node, nested_node, node, &if_stmt->then_list)
589 progress |= process_loops(sh, nested_node, has_nested_loop_out);
590 foreach_list_typed_safe(nir_cf_node, nested_node, node, &if_stmt->else_list)
591 progress |= process_loops(sh, nested_node, has_nested_loop_out);
592 return progress;
593 }
594 case nir_cf_node_loop: {
595 loop = nir_cf_node_as_loop(cf_node);
596 foreach_list_typed_safe(nir_cf_node, nested_node, node, &loop->body)
597 progress |= process_loops(sh, nested_node, &has_nested_loop);
598
599 break;
600 }
601 default:
602 unreachable("unknown cf node type");
603 }
604
605 /* Don't attempt to unroll a second inner loop in this pass, wait until the
606 * next pass as we have altered the cf.
607 */
608 if (!progress) {
609
610 /* Check for the classic
611 *
612 * do {
613 * // ...
614 * } while (false)
615 *
616 * that is used to wrap multi-line macros. GLSL IR also wraps switch
617 * statements in a loop like this.
618 */
619 if (loop->info->limiting_terminator == NULL &&
620 !loop->info->complex_loop) {
621
622 nir_block *last_loop_blk = nir_loop_last_block(loop);
623 if (!nir_block_ends_in_break(last_loop_blk))
624 goto exit;
625
626 progress = wrapper_unroll(loop);
627
628 goto exit;
629 }
630
631 if (has_nested_loop || loop->info->limiting_terminator == NULL)
632 goto exit;
633
634 if (!is_loop_small_enough_to_unroll(sh, loop->info))
635 goto exit;
636
637 if (loop->info->exact_trip_count_known) {
638 simple_unroll(loop);
639 progress = true;
640 } else {
641 /* Attempt to unroll loops with two terminators. */
642 unsigned num_lt = list_length(&loop->info->loop_terminator_list);
643 if (num_lt == 2) {
644 bool limiting_term_second = true;
645 nir_loop_terminator *terminator =
646 list_first_entry(&loop->info->loop_terminator_list,
647 nir_loop_terminator, loop_terminator_link);
648
649
650 if (terminator->nif == loop->info->limiting_terminator->nif) {
651 limiting_term_second = false;
652 terminator =
653 list_last_entry(&loop->info->loop_terminator_list,
654 nir_loop_terminator, loop_terminator_link);
655 }
656
657 /* If the first terminator has a trip count of zero and is the
658 * limiting terminator just do a simple unroll as the second
659 * terminator can never be reached.
660 */
661 if (loop->info->max_trip_count == 0 && !limiting_term_second) {
662 simple_unroll(loop);
663 } else {
664 complex_unroll(loop, terminator, limiting_term_second);
665 }
666 progress = true;
667 }
668 }
669 }
670
671 exit:
672 *has_nested_loop_out = true;
673 return progress;
674 }
675
676 static bool
677 nir_opt_loop_unroll_impl(nir_function_impl *impl,
678 nir_variable_mode indirect_mask)
679 {
680 bool progress = false;
681 nir_metadata_require(impl, nir_metadata_loop_analysis, indirect_mask);
682 nir_metadata_require(impl, nir_metadata_block_index);
683
684 foreach_list_typed_safe(nir_cf_node, node, node, &impl->body) {
685 bool has_nested_loop = false;
686 progress |= process_loops(impl->function->shader, node,
687 &has_nested_loop);
688 }
689
690 if (progress)
691 nir_lower_regs_to_ssa_impl(impl);
692
693 return progress;
694 }
695
696 /**
697 * indirect_mask specifies which type of indirectly accessed variables
698 * should force loop unrolling.
699 */
700 bool
701 nir_opt_loop_unroll(nir_shader *shader, nir_variable_mode indirect_mask)
702 {
703 bool progress = false;
704
705 nir_foreach_function(function, shader) {
706 if (function->impl) {
707 progress |= nir_opt_loop_unroll_impl(function->impl, indirect_mask);
708 }
709 }
710 return progress;
711 }