2 * Copyright © 2016 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
25 #include "nir_builder.h"
26 #include "nir_control_flow.h"
27 #include "nir_loop_analyze.h"
30 /* This limit is chosen fairly arbitrarily. GLSL IR max iteration is 32
31 * instructions. (Multiply counting nodes and magic number 5.) But there is
32 * no 1:1 mapping between GLSL IR and NIR so 25 was picked because it seemed
33 * to give about the same results. Around 5 instructions per node. But some
34 * loops that would unroll with GLSL IR fail to unroll if we set this to 25 so
37 #define LOOP_UNROLL_LIMIT 26
39 /* Prepare this loop for unrolling by first converting to lcssa and then
40 * converting the phis from the top level of the loop body to regs.
41 * Partially converting out of SSA allows us to unroll the loop without having
42 * to keep track of and update phis along the way which gets tricky and
43 * doesn't add much value over converting to regs.
45 * The loop may have a continue instruction at the end of the loop which does
46 * nothing. Once we're out of SSA, we can safely delete it so we don't have
47 * to deal with it later.
50 loop_prepare_for_unroll(nir_loop
*loop
)
52 nir_rematerialize_derefs_in_use_blocks_impl(
53 nir_cf_node_get_function(&loop
->cf_node
));
55 nir_convert_loop_to_lcssa(loop
);
57 /* Lower phis at the top level of the loop body */
58 foreach_list_typed_safe(nir_cf_node
, node
, node
, &loop
->body
) {
59 if (nir_cf_node_block
== node
->type
) {
60 nir_lower_phis_to_regs_block(nir_cf_node_as_block(node
));
64 /* Lower phis after the loop */
65 nir_block
*block_after_loop
=
66 nir_cf_node_as_block(nir_cf_node_next(&loop
->cf_node
));
68 nir_lower_phis_to_regs_block(block_after_loop
);
70 /* Remove continue if its the last instruction in the loop */
71 nir_instr
*last_instr
= nir_block_last_instr(nir_loop_last_block(loop
));
72 if (last_instr
&& last_instr
->type
== nir_instr_type_jump
) {
73 nir_instr_remove(last_instr
);
78 get_first_blocks_in_terminator(nir_loop_terminator
*term
,
79 nir_block
**first_break_block
,
80 nir_block
**first_continue_block
)
82 if (term
->continue_from_then
) {
83 *first_continue_block
= nir_if_first_then_block(term
->nif
);
84 *first_break_block
= nir_if_first_else_block(term
->nif
);
86 *first_continue_block
= nir_if_first_else_block(term
->nif
);
87 *first_break_block
= nir_if_first_then_block(term
->nif
);
92 * Unroll a loop where we know exactly how many iterations there are and there
93 * is only a single exit point. Note here we can unroll loops with multiple
94 * theoretical exits that only have a single terminating exit that we always
95 * know is the "real" exit.
101 * And the iteration count is 3, the output will be:
103 * ...instrs... ...instrs... ...instrs...
106 simple_unroll(nir_loop
*loop
)
108 nir_loop_terminator
*limiting_term
= loop
->info
->limiting_terminator
;
109 assert(nir_is_trivial_loop_if(limiting_term
->nif
,
110 limiting_term
->break_block
));
112 loop_prepare_for_unroll(loop
);
114 /* Skip over loop terminator and get the loop body. */
115 list_for_each_entry(nir_loop_terminator
, terminator
,
116 &loop
->info
->loop_terminator_list
,
117 loop_terminator_link
) {
119 /* Remove all but the limiting terminator as we know the other exit
120 * conditions can never be met. Note we need to extract any instructions
121 * in the continue from branch and insert then into the loop body before
124 if (terminator
->nif
!= limiting_term
->nif
) {
125 nir_block
*first_break_block
;
126 nir_block
*first_continue_block
;
127 get_first_blocks_in_terminator(terminator
, &first_break_block
,
128 &first_continue_block
);
130 assert(nir_is_trivial_loop_if(terminator
->nif
,
131 terminator
->break_block
));
133 nir_cf_list continue_from_lst
;
134 nir_cf_extract(&continue_from_lst
,
135 nir_before_block(first_continue_block
),
136 nir_after_block(terminator
->continue_from_block
));
137 nir_cf_reinsert(&continue_from_lst
,
138 nir_after_cf_node(&terminator
->nif
->cf_node
));
140 nir_cf_node_remove(&terminator
->nif
->cf_node
);
144 nir_block
*first_break_block
;
145 nir_block
*first_continue_block
;
146 get_first_blocks_in_terminator(limiting_term
, &first_break_block
,
147 &first_continue_block
);
149 /* Pluck out the loop header */
150 nir_block
*header_blk
= nir_loop_first_block(loop
);
151 nir_cf_list lp_header
;
152 nir_cf_extract(&lp_header
, nir_before_block(header_blk
),
153 nir_before_cf_node(&limiting_term
->nif
->cf_node
));
155 /* Add the continue from block of the limiting terminator to the loop body
157 nir_cf_list continue_from_lst
;
158 nir_cf_extract(&continue_from_lst
, nir_before_block(first_continue_block
),
159 nir_after_block(limiting_term
->continue_from_block
));
160 nir_cf_reinsert(&continue_from_lst
,
161 nir_after_cf_node(&limiting_term
->nif
->cf_node
));
163 /* Pluck out the loop body */
164 nir_cf_list loop_body
;
165 nir_cf_extract(&loop_body
, nir_after_cf_node(&limiting_term
->nif
->cf_node
),
166 nir_after_block(nir_loop_last_block(loop
)));
168 struct hash_table
*remap_table
= _mesa_pointer_hash_table_create(NULL
);
170 /* Clone the loop header and insert before the loop */
171 nir_cf_list_clone_and_reinsert(&lp_header
, loop
->cf_node
.parent
,
172 nir_before_cf_node(&loop
->cf_node
),
175 for (unsigned i
= 0; i
< loop
->info
->max_trip_count
; i
++) {
176 /* Clone loop body and insert before the loop */
177 nir_cf_list_clone_and_reinsert(&loop_body
, loop
->cf_node
.parent
,
178 nir_before_cf_node(&loop
->cf_node
),
181 /* Clone loop header and insert after loop body */
182 nir_cf_list_clone_and_reinsert(&lp_header
, loop
->cf_node
.parent
,
183 nir_before_cf_node(&loop
->cf_node
),
187 /* Remove the break from the loop terminator and add instructions from
188 * the break block after the unrolled loop.
190 nir_instr
*break_instr
= nir_block_last_instr(limiting_term
->break_block
);
191 nir_instr_remove(break_instr
);
192 nir_cf_list break_list
;
193 nir_cf_extract(&break_list
, nir_before_block(first_break_block
),
194 nir_after_block(limiting_term
->break_block
));
196 /* Clone so things get properly remapped */
197 nir_cf_list_clone_and_reinsert(&break_list
, loop
->cf_node
.parent
,
198 nir_before_cf_node(&loop
->cf_node
),
201 /* Remove the loop */
202 nir_cf_node_remove(&loop
->cf_node
);
204 /* Delete the original loop body, break block & header */
205 nir_cf_delete(&lp_header
);
206 nir_cf_delete(&loop_body
);
207 nir_cf_delete(&break_list
);
209 _mesa_hash_table_destroy(remap_table
, NULL
);
213 move_cf_list_into_loop_term(nir_cf_list
*lst
, nir_loop_terminator
*term
)
215 /* Move the rest of the loop inside the continue-from-block */
216 nir_cf_reinsert(lst
, nir_after_block(term
->continue_from_block
));
218 /* Remove the break */
219 nir_instr_remove(nir_block_last_instr(term
->break_block
));
223 get_complex_unroll_insert_location(nir_cf_node
*node
, bool continue_from_then
)
225 if (node
->type
== nir_cf_node_loop
) {
226 return nir_before_cf_node(node
);
228 nir_if
*if_stmt
= nir_cf_node_as_if(node
);
229 if (continue_from_then
) {
230 return nir_after_block(nir_if_last_then_block(if_stmt
));
232 return nir_after_block(nir_if_last_else_block(if_stmt
));
238 complex_unroll_loop_body(nir_loop
*loop
, nir_loop_terminator
*unlimit_term
,
239 nir_cf_list
*lp_header
, nir_cf_list
*lp_body
,
240 struct hash_table
*remap_table
,
241 unsigned num_times_to_clone
)
243 /* In the terminator that we have no trip count for move everything after
244 * the terminator into the continue from branch.
246 nir_cf_list loop_end
;
247 nir_cf_extract(&loop_end
, nir_after_cf_node(&unlimit_term
->nif
->cf_node
),
248 nir_after_block(nir_loop_last_block(loop
)));
249 move_cf_list_into_loop_term(&loop_end
, unlimit_term
);
251 /* Pluck out the loop body. */
252 nir_cf_extract(lp_body
, nir_before_block(nir_loop_first_block(loop
)),
253 nir_after_block(nir_loop_last_block(loop
)));
255 /* Set unroll_loc to the loop as we will insert the unrolled loop before it
257 nir_cf_node
*unroll_loc
= &loop
->cf_node
;
259 /* Temp list to store the cloned loop as we unroll */
260 nir_cf_list unrolled_lp_body
;
262 for (unsigned i
= 0; i
< num_times_to_clone
; i
++) {
265 get_complex_unroll_insert_location(unroll_loc
,
266 unlimit_term
->continue_from_then
);
268 /* Clone loop header and insert in if branch */
269 nir_cf_list_clone_and_reinsert(lp_header
, loop
->cf_node
.parent
,
270 cursor
, remap_table
);
273 get_complex_unroll_insert_location(unroll_loc
,
274 unlimit_term
->continue_from_then
);
276 /* Clone loop body */
277 nir_cf_list_clone(&unrolled_lp_body
, lp_body
, loop
->cf_node
.parent
,
280 unroll_loc
= exec_node_data(nir_cf_node
,
281 exec_list_get_tail(&unrolled_lp_body
.list
),
283 assert(unroll_loc
->type
== nir_cf_node_block
&&
284 exec_list_is_empty(&nir_cf_node_as_block(unroll_loc
)->instr_list
));
286 /* Get the unrolled if node */
287 unroll_loc
= nir_cf_node_prev(unroll_loc
);
289 /* Insert unrolled loop body */
290 nir_cf_reinsert(&unrolled_lp_body
, cursor
);
297 * Unroll a loop with two exists when the trip count of one of the exits is
298 * unknown. If continue_from_then is true, the loop is repeated only when the
299 * "then" branch of the if is taken; otherwise it is repeated only
300 * when the "else" branch of the if is taken.
302 * For example, if the input is:
305 * ...phis/condition...
307 * ...then instructions...
309 * ...continue instructions...
315 * And the iteration count is 3, and unlimit_term->continue_from_then is true,
316 * then the output will be:
320 * ...then instructions...
323 * ...then instructions...
326 * ...then instructions...
329 * ...continue instructions...
332 * ...continue instructions...
335 * ...continue instructions...
339 complex_unroll(nir_loop
*loop
, nir_loop_terminator
*unlimit_term
,
340 bool limiting_term_second
)
342 assert(nir_is_trivial_loop_if(unlimit_term
->nif
,
343 unlimit_term
->break_block
));
345 nir_loop_terminator
*limiting_term
= loop
->info
->limiting_terminator
;
346 assert(nir_is_trivial_loop_if(limiting_term
->nif
,
347 limiting_term
->break_block
));
349 loop_prepare_for_unroll(loop
);
351 nir_block
*header_blk
= nir_loop_first_block(loop
);
353 nir_cf_list lp_header
;
354 nir_cf_list limit_break_list
;
355 unsigned num_times_to_clone
;
356 if (limiting_term_second
) {
357 /* Pluck out the loop header */
358 nir_cf_extract(&lp_header
, nir_before_block(header_blk
),
359 nir_before_cf_node(&unlimit_term
->nif
->cf_node
));
361 /* We need some special handling when its the second terminator causing
362 * us to exit the loop for example:
364 * for (int i = 0; i < uniform_lp_count; i++) {
365 * colour = vec4(0.0, 1.0, 0.0, 1.0);
370 * ... any further code is unreachable after i == 1 ...
373 nir_cf_list after_lt
;
374 nir_if
*limit_if
= limiting_term
->nif
;
375 nir_cf_extract(&after_lt
, nir_after_cf_node(&limit_if
->cf_node
),
376 nir_after_block(nir_loop_last_block(loop
)));
377 move_cf_list_into_loop_term(&after_lt
, limiting_term
);
379 /* Because the trip count is the number of times we pass over the entire
380 * loop before hitting a break when the second terminator is the
381 * limiting terminator we can actually execute code inside the loop when
382 * trip count == 0 e.g. the code above the break. So we need to bump
383 * the trip_count in order for the code below to clone anything. When
384 * trip count == 1 we execute the code above the break twice and the
385 * code below it once so we need clone things twice and so on.
387 num_times_to_clone
= loop
->info
->max_trip_count
+ 1;
389 /* Pluck out the loop header */
390 nir_cf_extract(&lp_header
, nir_before_block(header_blk
),
391 nir_before_cf_node(&limiting_term
->nif
->cf_node
));
393 nir_block
*first_break_block
;
394 nir_block
*first_continue_block
;
395 get_first_blocks_in_terminator(limiting_term
, &first_break_block
,
396 &first_continue_block
);
398 /* Remove the break then extract instructions from the break block so we
399 * can insert them in the innermost else of the unrolled loop.
401 nir_instr
*break_instr
= nir_block_last_instr(limiting_term
->break_block
);
402 nir_instr_remove(break_instr
);
403 nir_cf_extract(&limit_break_list
, nir_before_block(first_break_block
),
404 nir_after_block(limiting_term
->break_block
));
406 nir_cf_list continue_list
;
407 nir_cf_extract(&continue_list
, nir_before_block(first_continue_block
),
408 nir_after_block(limiting_term
->continue_from_block
));
410 nir_cf_reinsert(&continue_list
,
411 nir_after_cf_node(&limiting_term
->nif
->cf_node
));
413 nir_cf_node_remove(&limiting_term
->nif
->cf_node
);
415 num_times_to_clone
= loop
->info
->max_trip_count
;
418 struct hash_table
*remap_table
= _mesa_pointer_hash_table_create(NULL
);
421 nir_cf_node
*unroll_loc
=
422 complex_unroll_loop_body(loop
, unlimit_term
, &lp_header
, &lp_body
,
423 remap_table
, num_times_to_clone
);
425 if (!limiting_term_second
) {
426 assert(unroll_loc
->type
== nir_cf_node_if
);
429 get_complex_unroll_insert_location(unroll_loc
,
430 unlimit_term
->continue_from_then
);
432 /* Clone loop header and insert in if branch */
433 nir_cf_list_clone_and_reinsert(&lp_header
, loop
->cf_node
.parent
,
434 cursor
, remap_table
);
437 get_complex_unroll_insert_location(unroll_loc
,
438 unlimit_term
->continue_from_then
);
440 /* Clone so things get properly remapped, and insert break block from
441 * the limiting terminator.
443 nir_cf_list_clone_and_reinsert(&limit_break_list
, loop
->cf_node
.parent
,
444 cursor
, remap_table
);
446 nir_cf_delete(&limit_break_list
);
449 /* The loop has been unrolled so remove it. */
450 nir_cf_node_remove(&loop
->cf_node
);
452 /* Delete the original loop header and body */
453 nir_cf_delete(&lp_header
);
454 nir_cf_delete(&lp_body
);
456 _mesa_hash_table_destroy(remap_table
, NULL
);
460 * Unroll loops where we only have a single terminator but the exact trip
461 * count is unknown. For example:
463 * for (int i = 0; i < imin(x, 4); i++)
467 complex_unroll_single_terminator(nir_loop
*loop
)
469 assert(list_length(&loop
->info
->loop_terminator_list
) == 1);
470 assert(loop
->info
->limiting_terminator
);
471 assert(nir_is_trivial_loop_if(loop
->info
->limiting_terminator
->nif
,
472 loop
->info
->limiting_terminator
->break_block
));
474 nir_loop_terminator
*terminator
= loop
->info
->limiting_terminator
;
476 loop_prepare_for_unroll(loop
);
478 /* Pluck out the loop header */
479 nir_cf_list lp_header
;
480 nir_cf_extract(&lp_header
, nir_before_block(nir_loop_first_block(loop
)),
481 nir_before_cf_node(&terminator
->nif
->cf_node
));
483 struct hash_table
*remap_table
=
484 _mesa_hash_table_create(NULL
, _mesa_hash_pointer
,
485 _mesa_key_pointer_equal
);
487 /* We need to clone the loop one extra time in order to clone the lcssa
488 * vars for the last iteration (they are inside the following ifs break
489 * branch). We leave other passes to clean up this redundant if.
491 unsigned num_times_to_clone
= loop
->info
->max_trip_count
+ 1;
494 nir_cf_node
*unroll_loc
=
495 complex_unroll_loop_body(loop
, terminator
, &lp_header
, &lp_body
,
496 remap_table
, num_times_to_clone
);
498 /* Delete the original loop header and body */
499 nir_cf_delete(&lp_header
);
500 nir_cf_delete(&lp_body
);
502 /* The original loop has been replaced so remove it. */
503 nir_cf_node_remove(&loop
->cf_node
);
505 _mesa_hash_table_destroy(remap_table
, NULL
);
508 /* Unrolls the classic wrapper loops e.g
515 wrapper_unroll(nir_loop
*loop
)
517 if (!list_empty(&loop
->info
->loop_terminator_list
)) {
519 /* Unrolling a loop with a large number of exits can result in a
520 * large inrease in register pressure. For now we just skip
521 * unrolling if we have more than 3 exits (not including the break
522 * at the end of the loop).
524 * TODO: Most loops that fit this pattern are simply switch
525 * statements that are converted to a loop to take advantage of
526 * exiting jump instruction handling. In this case we could make
527 * use of a binary seach pattern like we do in
528 * nir_lower_indirect_derefs(), this should allow us to unroll the
529 * loops in an optimal way and should also avoid some of the
530 * register pressure that comes from simply nesting the
531 * terminators one after the other.
533 if (list_length(&loop
->info
->loop_terminator_list
) > 3)
536 loop_prepare_for_unroll(loop
);
538 nir_cursor loop_end
= nir_after_block(nir_loop_last_block(loop
));
539 list_for_each_entry(nir_loop_terminator
, terminator
,
540 &loop
->info
->loop_terminator_list
,
541 loop_terminator_link
) {
543 /* Remove break from the terminator */
544 nir_instr
*break_instr
=
545 nir_block_last_instr(terminator
->break_block
);
546 nir_instr_remove(break_instr
);
548 /* Pluck out the loop body. */
549 nir_cf_list loop_body
;
550 nir_cf_extract(&loop_body
,
551 nir_after_cf_node(&terminator
->nif
->cf_node
),
554 /* Reinsert loop body into continue from block */
555 nir_cf_reinsert(&loop_body
,
556 nir_after_block(terminator
->continue_from_block
));
558 loop_end
= terminator
->continue_from_then
?
559 nir_after_block(nir_if_last_then_block(terminator
->nif
)) :
560 nir_after_block(nir_if_last_else_block(terminator
->nif
));
563 nir_block
*blk_after_loop
=
564 nir_cursor_current_block(nir_after_cf_node(&loop
->cf_node
));
566 /* There may still be some single src phis following the loop that
567 * have not yet been cleaned up by another pass. Tidy those up
568 * before unrolling the loop.
570 nir_foreach_instr_safe(instr
, blk_after_loop
) {
571 if (instr
->type
!= nir_instr_type_phi
)
574 nir_phi_instr
*phi
= nir_instr_as_phi(instr
);
575 assert(exec_list_length(&phi
->srcs
) == 1);
577 nir_phi_src
*phi_src
=
578 exec_node_data(nir_phi_src
, exec_list_get_head(&phi
->srcs
), node
);
580 nir_ssa_def_rewrite_uses(&phi
->dest
.ssa
, phi_src
->src
);
581 nir_instr_remove(instr
);
584 /* Remove break at end of the loop */
585 nir_block
*last_loop_blk
= nir_loop_last_block(loop
);
586 nir_instr
*break_instr
= nir_block_last_instr(last_loop_blk
);
587 nir_instr_remove(break_instr
);
590 /* Pluck out the loop body. */
591 nir_cf_list loop_body
;
592 nir_cf_extract(&loop_body
, nir_before_block(nir_loop_first_block(loop
)),
593 nir_after_block(nir_loop_last_block(loop
)));
595 /* Reinsert loop body after the loop */
596 nir_cf_reinsert(&loop_body
, nir_after_cf_node(&loop
->cf_node
));
598 /* The loop has been unrolled so remove it. */
599 nir_cf_node_remove(&loop
->cf_node
);
605 is_access_out_of_bounds(nir_loop_terminator
*term
, nir_deref_instr
*deref
,
608 for (nir_deref_instr
*d
= deref
; d
; d
= nir_deref_instr_parent(d
)) {
609 if (d
->deref_type
!= nir_deref_type_array
)
612 nir_alu_instr
*alu
= nir_instr_as_alu(term
->conditional_instr
);
613 nir_src src
= term
->induction_rhs
? alu
->src
[1].src
: alu
->src
[0].src
;
614 if (!nir_srcs_equal(d
->arr
.index
, src
))
617 nir_deref_instr
*parent
= nir_deref_instr_parent(d
);
618 assert(glsl_type_is_array(parent
->type
) ||
619 glsl_type_is_matrix(parent
->type
));
621 /* We have already unrolled the loop and the new one will be imbedded in
622 * the innermost continue branch. So unless the array is greater than
623 * the trip count any iteration over the loop will be an out of bounds
624 * access of the array.
626 return glsl_get_length(parent
->type
) <= trip_count
;
632 /* If we know an array access is going to be out of bounds remove or replace
633 * the access with an undef. This can later result in the entire loop being
634 * removed by nir_opt_dead_cf().
637 remove_out_of_bounds_induction_use(nir_shader
*shader
, nir_loop
*loop
,
638 nir_loop_terminator
*term
,
639 nir_cf_list
*lp_header
,
640 nir_cf_list
*lp_body
,
643 if (!loop
->info
->guessed_trip_count
)
646 /* Temporarily recreate the original loop so we can alter it */
647 nir_cf_reinsert(lp_header
, nir_after_block(nir_loop_last_block(loop
)));
648 nir_cf_reinsert(lp_body
, nir_after_block(nir_loop_last_block(loop
)));
651 nir_builder_init(&b
, nir_cf_node_get_function(&loop
->cf_node
));
653 nir_foreach_block_in_cf_node(block
, &loop
->cf_node
) {
654 nir_foreach_instr_safe(instr
, block
) {
655 if (instr
->type
!= nir_instr_type_intrinsic
)
658 nir_intrinsic_instr
*intrin
= nir_instr_as_intrinsic(instr
);
660 /* Check for arrays variably-indexed by a loop induction variable.
661 * If this access is out of bounds remove the instruction or replace
662 * its use with an undefined instruction.
663 * If the loop is no longer useful we leave it for the appropriate
664 * pass to clean it up for us.
666 if (intrin
->intrinsic
== nir_intrinsic_load_deref
||
667 intrin
->intrinsic
== nir_intrinsic_store_deref
||
668 intrin
->intrinsic
== nir_intrinsic_copy_deref
) {
670 if (is_access_out_of_bounds(term
, nir_src_as_deref(intrin
->src
[0]),
672 if (intrin
->intrinsic
== nir_intrinsic_load_deref
) {
673 assert(intrin
->src
[0].is_ssa
);
674 nir_ssa_def
*a_ssa
= intrin
->src
[0].ssa
;
676 nir_ssa_undef(&b
, intrin
->num_components
,
678 nir_ssa_def_rewrite_uses(&intrin
->dest
.ssa
,
679 nir_src_for_ssa(undef
));
681 nir_instr_remove(instr
);
686 if (intrin
->intrinsic
== nir_intrinsic_copy_deref
&&
687 is_access_out_of_bounds(term
, nir_src_as_deref(intrin
->src
[1]),
689 assert(intrin
->src
[1].is_ssa
);
690 nir_ssa_def
*a_ssa
= intrin
->src
[1].ssa
;
692 nir_ssa_undef(&b
, intrin
->num_components
, a_ssa
->bit_size
);
694 /* Replace the copy with a store of the undefined value */
695 b
.cursor
= nir_before_instr(instr
);
696 nir_store_deref(&b
, nir_src_as_deref(intrin
->src
[0]), undef
, ~0);
697 nir_instr_remove(instr
);
703 /* Now that we are done extract the loop header and body again */
704 nir_cf_extract(lp_header
, nir_before_block(nir_loop_first_block(loop
)),
705 nir_before_cf_node(&term
->nif
->cf_node
));
706 nir_cf_extract(lp_body
, nir_before_block(nir_loop_first_block(loop
)),
707 nir_after_block(nir_loop_last_block(loop
)));
710 /* Partially unrolls loops that don't have a known trip count.
713 partial_unroll(nir_shader
*shader
, nir_loop
*loop
, unsigned trip_count
)
715 assert(list_length(&loop
->info
->loop_terminator_list
) == 1);
717 nir_loop_terminator
*terminator
=
718 list_first_entry(&loop
->info
->loop_terminator_list
,
719 nir_loop_terminator
, loop_terminator_link
);
721 assert(nir_is_trivial_loop_if(terminator
->nif
, terminator
->break_block
));
723 loop_prepare_for_unroll(loop
);
725 /* Pluck out the loop header */
726 nir_cf_list lp_header
;
727 nir_cf_extract(&lp_header
, nir_before_block(nir_loop_first_block(loop
)),
728 nir_before_cf_node(&terminator
->nif
->cf_node
));
730 struct hash_table
*remap_table
=
731 _mesa_hash_table_create(NULL
, _mesa_hash_pointer
,
732 _mesa_key_pointer_equal
);
735 nir_cf_node
*unroll_loc
=
736 complex_unroll_loop_body(loop
, terminator
, &lp_header
, &lp_body
,
737 remap_table
, trip_count
);
739 /* Attempt to remove out of bounds array access */
740 remove_out_of_bounds_induction_use(shader
, loop
, terminator
, &lp_header
,
741 &lp_body
, trip_count
);
744 get_complex_unroll_insert_location(unroll_loc
,
745 terminator
->continue_from_then
);
747 /* Reinsert the loop in the innermost nested continue branch of the unrolled
750 nir_loop
*new_loop
= nir_loop_create(shader
);
751 nir_cf_node_insert(cursor
, &new_loop
->cf_node
);
752 new_loop
->partially_unrolled
= true;
754 /* Clone loop header and insert into new loop */
755 nir_cf_list_clone_and_reinsert(&lp_header
, loop
->cf_node
.parent
,
756 nir_after_cf_list(&new_loop
->body
),
759 /* Clone loop body and insert into new loop */
760 nir_cf_list_clone_and_reinsert(&lp_body
, loop
->cf_node
.parent
,
761 nir_after_cf_list(&new_loop
->body
),
764 /* Insert break back into terminator */
765 nir_jump_instr
*brk
= nir_jump_instr_create(shader
, nir_jump_break
);
766 nir_if
*nif
= nir_block_get_following_if(nir_loop_first_block(new_loop
));
767 if (terminator
->continue_from_then
) {
768 nir_instr_insert_after_block(nir_if_last_else_block(nif
), &brk
->instr
);
770 nir_instr_insert_after_block(nir_if_last_then_block(nif
), &brk
->instr
);
773 /* Delete the original loop header and body */
774 nir_cf_delete(&lp_header
);
775 nir_cf_delete(&lp_body
);
777 /* The original loop has been replaced so remove it. */
778 nir_cf_node_remove(&loop
->cf_node
);
780 _mesa_hash_table_destroy(remap_table
, NULL
);
784 is_loop_small_enough_to_unroll(nir_shader
*shader
, nir_loop_info
*li
)
786 unsigned max_iter
= shader
->options
->max_unroll_iterations
;
788 unsigned trip_count
=
789 li
->max_trip_count
? li
->max_trip_count
: li
->guessed_trip_count
;
791 if (trip_count
> max_iter
)
794 if (li
->force_unroll
&& !li
->guessed_trip_count
)
797 bool loop_not_too_large
=
798 li
->instr_cost
* trip_count
<= max_iter
* LOOP_UNROLL_LIMIT
;
800 return loop_not_too_large
;
804 process_loops(nir_shader
*sh
, nir_cf_node
*cf_node
, bool *has_nested_loop_out
)
806 bool progress
= false;
807 bool has_nested_loop
= false;
810 switch (cf_node
->type
) {
811 case nir_cf_node_block
:
813 case nir_cf_node_if
: {
814 nir_if
*if_stmt
= nir_cf_node_as_if(cf_node
);
815 foreach_list_typed_safe(nir_cf_node
, nested_node
, node
, &if_stmt
->then_list
)
816 progress
|= process_loops(sh
, nested_node
, has_nested_loop_out
);
817 foreach_list_typed_safe(nir_cf_node
, nested_node
, node
, &if_stmt
->else_list
)
818 progress
|= process_loops(sh
, nested_node
, has_nested_loop_out
);
821 case nir_cf_node_loop
: {
822 loop
= nir_cf_node_as_loop(cf_node
);
823 foreach_list_typed_safe(nir_cf_node
, nested_node
, node
, &loop
->body
)
824 progress
|= process_loops(sh
, nested_node
, &has_nested_loop
);
829 unreachable("unknown cf node type");
832 /* Don't attempt to unroll a second inner loop in this pass, wait until the
833 * next pass as we have altered the cf.
837 /* Check for the classic
843 * that is used to wrap multi-line macros. GLSL IR also wraps switch
844 * statements in a loop like this.
846 if (loop
->info
->limiting_terminator
== NULL
&&
847 !loop
->info
->complex_loop
) {
849 nir_block
*last_loop_blk
= nir_loop_last_block(loop
);
850 if (nir_block_ends_in_break(last_loop_blk
)) {
851 progress
= wrapper_unroll(loop
);
855 /* If we were able to guess the loop iteration based on array access
856 * then do a partial unroll.
858 unsigned num_lt
= list_length(&loop
->info
->loop_terminator_list
);
859 if (!has_nested_loop
&& num_lt
== 1 && !loop
->partially_unrolled
&&
860 loop
->info
->guessed_trip_count
&&
861 is_loop_small_enough_to_unroll(sh
, loop
->info
)) {
862 partial_unroll(sh
, loop
, loop
->info
->guessed_trip_count
);
867 if (has_nested_loop
|| !loop
->info
->limiting_terminator
)
870 if (!is_loop_small_enough_to_unroll(sh
, loop
->info
))
873 if (loop
->info
->exact_trip_count_known
) {
877 /* Attempt to unroll loops with two terminators. */
878 unsigned num_lt
= list_length(&loop
->info
->loop_terminator_list
);
880 !loop
->info
->limiting_terminator
->exact_trip_count_unknown
) {
881 bool limiting_term_second
= true;
882 nir_loop_terminator
*terminator
=
883 list_first_entry(&loop
->info
->loop_terminator_list
,
884 nir_loop_terminator
, loop_terminator_link
);
887 if (terminator
->nif
== loop
->info
->limiting_terminator
->nif
) {
888 limiting_term_second
= false;
890 list_last_entry(&loop
->info
->loop_terminator_list
,
891 nir_loop_terminator
, loop_terminator_link
);
894 /* If the first terminator has a trip count of zero and is the
895 * limiting terminator just do a simple unroll as the second
896 * terminator can never be reached.
898 if (loop
->info
->max_trip_count
== 0 && !limiting_term_second
) {
901 complex_unroll(loop
, terminator
, limiting_term_second
);
907 assert(loop
->info
->limiting_terminator
->exact_trip_count_unknown
);
908 complex_unroll_single_terminator(loop
);
915 *has_nested_loop_out
= true;
920 nir_opt_loop_unroll_impl(nir_function_impl
*impl
,
921 nir_variable_mode indirect_mask
)
923 bool progress
= false;
924 nir_metadata_require(impl
, nir_metadata_loop_analysis
, indirect_mask
);
925 nir_metadata_require(impl
, nir_metadata_block_index
);
927 foreach_list_typed_safe(nir_cf_node
, node
, node
, &impl
->body
) {
928 bool has_nested_loop
= false;
929 progress
|= process_loops(impl
->function
->shader
, node
,
934 nir_lower_regs_to_ssa_impl(impl
);
940 * indirect_mask specifies which type of indirectly accessed variables
941 * should force loop unrolling.
944 nir_opt_loop_unroll(nir_shader
*shader
, nir_variable_mode indirect_mask
)
946 bool progress
= false;
948 nir_foreach_function(function
, shader
) {
949 if (function
->impl
) {
950 progress
|= nir_opt_loop_unroll_impl(function
->impl
, indirect_mask
);