#include "nir_control_flow.h"
#include "nir_loop_analyze.h"
+
+/* This limit is chosen fairly arbitrarily. GLSL IR max iteration is 32
+ * instructions. (Multiply counting nodes and magic number 5.) But there is
+ * no 1:1 mapping between GLSL IR and NIR so 25 was picked because it seemed
+ * to give about the same results. Around 5 instructions per node. But some
+ * loops that would unroll with GLSL IR fail to unroll if we set this to 25 so
+ * we set it to 26.
+ */
+#define LOOP_UNROLL_LIMIT 26
+
/* Prepare this loop for unrolling by first converting to lcssa and then
- * converting the phis from the loops first block and the block that follows
- * the loop into regs. Partially converting out of SSA allows us to unroll
- * the loop without having to keep track of and update phis along the way
- * which gets tricky and doesn't add much value over conveting to regs.
+ * converting the phis from the top level of the loop body to regs.
+ * Partially converting out of SSA allows us to unroll the loop without having
+ * to keep track of and update phis along the way which gets tricky and
+ * doesn't add much value over converting to regs.
*
- * The loop may have a continue instruction at the end of the loop which does
+ * The loop may have a jump instruction at the end of the loop which does
* nothing. Once we're out of SSA, we can safely delete it so we don't have
* to deal with it later.
*/
static void
loop_prepare_for_unroll(nir_loop *loop)
{
+ nir_rematerialize_derefs_in_use_blocks_impl(
+ nir_cf_node_get_function(&loop->cf_node));
+
nir_convert_loop_to_lcssa(loop);
- nir_lower_phis_to_regs_block(nir_loop_first_block(loop));
+ /* Lower phis at the top level of the loop body */
+ foreach_list_typed_safe(nir_cf_node, node, node, &loop->body) {
+ if (nir_cf_node_block == node->type) {
+ nir_lower_phis_to_regs_block(nir_cf_node_as_block(node));
+ }
+ }
+ /* Lower phis after the loop */
nir_block *block_after_loop =
nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node));
nir_lower_phis_to_regs_block(block_after_loop);
+ /* Remove jump if it's the last instruction in the loop */
nir_instr *last_instr = nir_block_last_instr(nir_loop_last_block(loop));
if (last_instr && last_instr->type == nir_instr_type_jump) {
- assert(nir_instr_as_jump(last_instr)->type == nir_jump_continue);
nir_instr_remove(last_instr);
}
}
nir_cf_extract(&loop_body, nir_after_cf_node(&limiting_term->nif->cf_node),
nir_after_block(nir_loop_last_block(loop)));
- struct hash_table *remap_table =
- _mesa_hash_table_create(NULL, _mesa_hash_pointer,
- _mesa_key_pointer_equal);
-
- /* Clone the loop header */
- nir_cf_list cloned_header;
- nir_cf_list_clone(&cloned_header, &lp_header, loop->cf_node.parent,
- remap_table);
-
- /* Insert cloned loop header before the loop */
- nir_cf_reinsert(&cloned_header, nir_before_cf_node(&loop->cf_node));
-
- /* Temp list to store the cloned loop body as we unroll */
- nir_cf_list unrolled_lp_body;
-
- /* Clone loop header and append to the loop body */
- for (unsigned i = 0; i < loop->info->trip_count; i++) {
- /* Clone loop body */
- nir_cf_list_clone(&unrolled_lp_body, &loop_body, loop->cf_node.parent,
- remap_table);
+ struct hash_table *remap_table = _mesa_pointer_hash_table_create(NULL);
- /* Insert unrolled loop body before the loop */
- nir_cf_reinsert(&unrolled_lp_body, nir_before_cf_node(&loop->cf_node));
+ /* Clone the loop header and insert before the loop */
+ nir_cf_list_clone_and_reinsert(&lp_header, loop->cf_node.parent,
+ nir_before_cf_node(&loop->cf_node),
+ remap_table);
- /* Clone loop header */
- nir_cf_list_clone(&cloned_header, &lp_header, loop->cf_node.parent,
- remap_table);
+ for (unsigned i = 0; i < loop->info->max_trip_count; i++) {
+ /* Clone loop body and insert before the loop */
+ nir_cf_list_clone_and_reinsert(&loop_body, loop->cf_node.parent,
+ nir_before_cf_node(&loop->cf_node),
+ remap_table);
- /* Insert loop header after loop body */
- nir_cf_reinsert(&cloned_header, nir_before_cf_node(&loop->cf_node));
+ /* Clone loop header and insert after loop body */
+ nir_cf_list_clone_and_reinsert(&lp_header, loop->cf_node.parent,
+ nir_before_cf_node(&loop->cf_node),
+ remap_table);
}
/* Remove the break from the loop terminator and add instructions from
nir_after_block(limiting_term->break_block));
/* Clone so things get properly remapped */
- nir_cf_list cloned_break_list;
- nir_cf_list_clone(&cloned_break_list, &break_list, loop->cf_node.parent,
- remap_table);
-
- nir_cf_reinsert(&cloned_break_list, nir_before_cf_node(&loop->cf_node));
+ nir_cf_list_clone_and_reinsert(&break_list, loop->cf_node.parent,
+ nir_before_cf_node(&loop->cf_node),
+ remap_table);
/* Remove the loop */
nir_cf_node_remove(&loop->cf_node);
}
}
+static nir_cf_node *
+complex_unroll_loop_body(nir_loop *loop, nir_loop_terminator *unlimit_term,
+ nir_cf_list *lp_header, nir_cf_list *lp_body,
+ struct hash_table *remap_table,
+ unsigned num_times_to_clone)
+{
+ /* In the terminator that we have no trip count for move everything after
+ * the terminator into the continue from branch.
+ */
+ nir_cf_list loop_end;
+ nir_cf_extract(&loop_end, nir_after_cf_node(&unlimit_term->nif->cf_node),
+ nir_after_block(nir_loop_last_block(loop)));
+ move_cf_list_into_loop_term(&loop_end, unlimit_term);
+
+ /* Pluck out the loop body. */
+ nir_cf_extract(lp_body, nir_before_block(nir_loop_first_block(loop)),
+ nir_after_block(nir_loop_last_block(loop)));
+
+ /* Set unroll_loc to the loop as we will insert the unrolled loop before it
+ */
+ nir_cf_node *unroll_loc = &loop->cf_node;
+
+ /* Temp list to store the cloned loop as we unroll */
+ nir_cf_list unrolled_lp_body;
+
+ for (unsigned i = 0; i < num_times_to_clone; i++) {
+
+ nir_cursor cursor =
+ get_complex_unroll_insert_location(unroll_loc,
+ unlimit_term->continue_from_then);
+
+ /* Clone loop header and insert in if branch */
+ nir_cf_list_clone_and_reinsert(lp_header, loop->cf_node.parent,
+ cursor, remap_table);
+
+ cursor =
+ get_complex_unroll_insert_location(unroll_loc,
+ unlimit_term->continue_from_then);
+
+ /* Clone loop body */
+ nir_cf_list_clone(&unrolled_lp_body, lp_body, loop->cf_node.parent,
+ remap_table);
+
+ unroll_loc = exec_node_data(nir_cf_node,
+ exec_list_get_tail(&unrolled_lp_body.list),
+ node);
+ assert(unroll_loc->type == nir_cf_node_block &&
+ exec_list_is_empty(&nir_cf_node_as_block(unroll_loc)->instr_list));
+
+ /* Get the unrolled if node */
+ unroll_loc = nir_cf_node_prev(unroll_loc);
+
+ /* Insert unrolled loop body */
+ nir_cf_reinsert(&unrolled_lp_body, cursor);
+ }
+
+ return unroll_loc;
+}
+
/**
* Unroll a loop with two exists when the trip count of one of the exits is
* unknown. If continue_from_then is true, the loop is repeated only when the
* trip count == 1 we execute the code above the break twice and the
* code below it once so we need clone things twice and so on.
*/
- num_times_to_clone = loop->info->trip_count + 1;
+ num_times_to_clone = loop->info->max_trip_count + 1;
} else {
/* Pluck out the loop header */
nir_cf_extract(&lp_header, nir_before_block(header_blk),
nir_cf_node_remove(&limiting_term->nif->cf_node);
- num_times_to_clone = loop->info->trip_count;
+ num_times_to_clone = loop->info->max_trip_count;
}
- /* In the terminator that we have no trip count for move everything after
- * the terminator into the continue from branch.
- */
- nir_cf_list loop_end;
- nir_cf_extract(&loop_end, nir_after_cf_node(&unlimit_term->nif->cf_node),
- nir_after_block(nir_loop_last_block(loop)));
- move_cf_list_into_loop_term(&loop_end, unlimit_term);
+ struct hash_table *remap_table = _mesa_pointer_hash_table_create(NULL);
- /* Pluck out the loop body. */
- nir_cf_list loop_body;
- nir_cf_extract(&loop_body, nir_before_block(nir_loop_first_block(loop)),
- nir_after_block(nir_loop_last_block(loop)));
-
- struct hash_table *remap_table =
- _mesa_hash_table_create(NULL, _mesa_hash_pointer,
- _mesa_key_pointer_equal);
-
- /* Set unroll_loc to the loop as we will insert the unrolled loop before it
- */
- nir_cf_node *unroll_loc = &loop->cf_node;
+ nir_cf_list lp_body;
+ nir_cf_node *unroll_loc =
+ complex_unroll_loop_body(loop, unlimit_term, &lp_header, &lp_body,
+ remap_table, num_times_to_clone);
- /* Temp lists to store the cloned loop as we unroll */
- nir_cf_list unrolled_lp_body;
- nir_cf_list cloned_header;
-
- for (unsigned i = 0; i < num_times_to_clone; i++) {
- /* Clone loop header */
- nir_cf_list_clone(&cloned_header, &lp_header, loop->cf_node.parent,
- remap_table);
+ if (!limiting_term_second) {
+ assert(unroll_loc->type == nir_cf_node_if);
nir_cursor cursor =
get_complex_unroll_insert_location(unroll_loc,
unlimit_term->continue_from_then);
- /* Insert cloned loop header */
- nir_cf_reinsert(&cloned_header, cursor);
+ /* Clone loop header and insert in if branch */
+ nir_cf_list_clone_and_reinsert(&lp_header, loop->cf_node.parent,
+ cursor, remap_table);
cursor =
get_complex_unroll_insert_location(unroll_loc,
unlimit_term->continue_from_then);
- /* Clone loop body */
- nir_cf_list_clone(&unrolled_lp_body, &loop_body, loop->cf_node.parent,
- remap_table);
+ /* Clone so things get properly remapped, and insert break block from
+ * the limiting terminator.
+ */
+ nir_cf_list_clone_and_reinsert(&limit_break_list, loop->cf_node.parent,
+ cursor, remap_table);
- unroll_loc = exec_node_data(nir_cf_node,
- exec_list_get_tail(&unrolled_lp_body.list),
- node);
- assert(unroll_loc->type == nir_cf_node_block &&
- exec_list_is_empty(&nir_cf_node_as_block(unroll_loc)->instr_list));
+ nir_cf_delete(&limit_break_list);
+ }
- /* Get the unrolled if node */
- unroll_loc = nir_cf_node_prev(unroll_loc);
+ /* The loop has been unrolled so remove it. */
+ nir_cf_node_remove(&loop->cf_node);
- /* Insert unrolled loop body */
- nir_cf_reinsert(&unrolled_lp_body, cursor);
- }
+ /* Delete the original loop header and body */
+ nir_cf_delete(&lp_header);
+ nir_cf_delete(&lp_body);
- if (!limiting_term_second) {
- assert(unroll_loc->type == nir_cf_node_if);
+ _mesa_hash_table_destroy(remap_table, NULL);
+}
- nir_cf_list_clone(&cloned_header, &lp_header, loop->cf_node.parent,
- remap_table);
+/**
+ * Unroll loops where we only have a single terminator but the exact trip
+ * count is unknown. For example:
+ *
+ * for (int i = 0; i < imin(x, 4); i++)
+ * ...
+ */
+static void
+complex_unroll_single_terminator(nir_loop *loop)
+{
+ assert(list_length(&loop->info->loop_terminator_list) == 1);
+ assert(loop->info->limiting_terminator);
+ assert(nir_is_trivial_loop_if(loop->info->limiting_terminator->nif,
+ loop->info->limiting_terminator->break_block));
- nir_cursor cursor =
- get_complex_unroll_insert_location(unroll_loc,
- unlimit_term->continue_from_then);
+ nir_loop_terminator *terminator = loop->info->limiting_terminator;
- /* Insert cloned loop header */
- nir_cf_reinsert(&cloned_header, cursor);
+ loop_prepare_for_unroll(loop);
- /* Clone so things get properly remapped, and insert break block from
- * the limiting terminator.
- */
- nir_cf_list cloned_break_blk;
- nir_cf_list_clone(&cloned_break_blk, &limit_break_list,
- loop->cf_node.parent, remap_table);
+ /* Pluck out the loop header */
+ nir_cf_list lp_header;
+ nir_cf_extract(&lp_header, nir_before_block(nir_loop_first_block(loop)),
+ nir_before_cf_node(&terminator->nif->cf_node));
- cursor =
- get_complex_unroll_insert_location(unroll_loc,
- unlimit_term->continue_from_then);
+ struct hash_table *remap_table =
+ _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
- nir_cf_reinsert(&cloned_break_blk, cursor);
- nir_cf_delete(&limit_break_list);
+ /* We need to clone the loop one extra time in order to clone the lcssa
+ * vars for the last iteration (they are inside the following ifs break
+ * branch). We leave other passes to clean up this redundant if.
+ */
+ unsigned num_times_to_clone = loop->info->max_trip_count + 1;
+
+ nir_cf_list lp_body;
+ UNUSED nir_cf_node *unroll_loc =
+ complex_unroll_loop_body(loop, terminator, &lp_header, &lp_body,
+ remap_table, num_times_to_clone);
+
+ /* Delete the original loop header and body */
+ nir_cf_delete(&lp_header);
+ nir_cf_delete(&lp_body);
+
+ /* The original loop has been replaced so remove it. */
+ nir_cf_node_remove(&loop->cf_node);
+
+ _mesa_hash_table_destroy(remap_table, NULL);
+}
+
+/* Unrolls the classic wrapper loops e.g
+ *
+ * do {
+ * // ...
+ * } while (false)
+ */
+static bool
+wrapper_unroll(nir_loop *loop)
+{
+ if (!list_empty(&loop->info->loop_terminator_list)) {
+
+ /* Unrolling a loop with a large number of exits can result in a
+ * large inrease in register pressure. For now we just skip
+ * unrolling if we have more than 3 exits (not including the break
+ * at the end of the loop).
+ *
+ * TODO: Most loops that fit this pattern are simply switch
+ * statements that are converted to a loop to take advantage of
+ * exiting jump instruction handling. In this case we could make
+ * use of a binary seach pattern like we do in
+ * nir_lower_indirect_derefs(), this should allow us to unroll the
+ * loops in an optimal way and should also avoid some of the
+ * register pressure that comes from simply nesting the
+ * terminators one after the other.
+ */
+ if (list_length(&loop->info->loop_terminator_list) > 3)
+ return false;
+
+ loop_prepare_for_unroll(loop);
+
+ nir_cursor loop_end = nir_after_block(nir_loop_last_block(loop));
+ list_for_each_entry(nir_loop_terminator, terminator,
+ &loop->info->loop_terminator_list,
+ loop_terminator_link) {
+
+ /* Remove break from the terminator */
+ nir_instr *break_instr =
+ nir_block_last_instr(terminator->break_block);
+ nir_instr_remove(break_instr);
+
+ /* Pluck out the loop body. */
+ nir_cf_list loop_body;
+ nir_cf_extract(&loop_body,
+ nir_after_cf_node(&terminator->nif->cf_node),
+ loop_end);
+
+ /* Reinsert loop body into continue from block */
+ nir_cf_reinsert(&loop_body,
+ nir_after_block(terminator->continue_from_block));
+
+ loop_end = terminator->continue_from_then ?
+ nir_after_block(nir_if_last_then_block(terminator->nif)) :
+ nir_after_block(nir_if_last_else_block(terminator->nif));
+ }
+ } else {
+ loop_prepare_for_unroll(loop);
}
+ /* Pluck out the loop body. */
+ nir_cf_list loop_body;
+ nir_cf_extract(&loop_body, nir_before_block(nir_loop_first_block(loop)),
+ nir_after_block(nir_loop_last_block(loop)));
+
+ /* Reinsert loop body after the loop */
+ nir_cf_reinsert(&loop_body, nir_after_cf_node(&loop->cf_node));
+
/* The loop has been unrolled so remove it. */
nir_cf_node_remove(&loop->cf_node);
+ return true;
+}
+
+static bool
+is_access_out_of_bounds(nir_loop_terminator *term, nir_deref_instr *deref,
+ unsigned trip_count)
+{
+ for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) {
+ if (d->deref_type != nir_deref_type_array)
+ continue;
+
+ nir_alu_instr *alu = nir_instr_as_alu(term->conditional_instr);
+ nir_src src = term->induction_rhs ? alu->src[1].src : alu->src[0].src;
+ if (!nir_srcs_equal(d->arr.index, src))
+ continue;
+
+ nir_deref_instr *parent = nir_deref_instr_parent(d);
+ assert(glsl_type_is_array(parent->type) ||
+ glsl_type_is_matrix(parent->type));
+
+ /* We have already unrolled the loop and the new one will be imbedded in
+ * the innermost continue branch. So unless the array is greater than
+ * the trip count any iteration over the loop will be an out of bounds
+ * access of the array.
+ */
+ return glsl_get_length(parent->type) <= trip_count;
+ }
+
+ return false;
+}
+
+/* If we know an array access is going to be out of bounds remove or replace
+ * the access with an undef. This can later result in the entire loop being
+ * removed by nir_opt_dead_cf().
+ */
+static void
+remove_out_of_bounds_induction_use(nir_shader *shader, nir_loop *loop,
+ nir_loop_terminator *term,
+ nir_cf_list *lp_header,
+ nir_cf_list *lp_body,
+ unsigned trip_count)
+{
+ if (!loop->info->guessed_trip_count)
+ return;
+
+ /* Temporarily recreate the original loop so we can alter it */
+ nir_cf_reinsert(lp_header, nir_after_block(nir_loop_last_block(loop)));
+ nir_cf_reinsert(lp_body, nir_after_block(nir_loop_last_block(loop)));
+
+ nir_builder b;
+ nir_builder_init(&b, nir_cf_node_get_function(&loop->cf_node));
+
+ nir_foreach_block_in_cf_node(block, &loop->cf_node) {
+ nir_foreach_instr_safe(instr, block) {
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+
+ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+
+ /* Check for arrays variably-indexed by a loop induction variable.
+ * If this access is out of bounds remove the instruction or replace
+ * its use with an undefined instruction.
+ * If the loop is no longer useful we leave it for the appropriate
+ * pass to clean it up for us.
+ */
+ if (intrin->intrinsic == nir_intrinsic_load_deref ||
+ intrin->intrinsic == nir_intrinsic_store_deref ||
+ intrin->intrinsic == nir_intrinsic_copy_deref) {
+
+ if (is_access_out_of_bounds(term, nir_src_as_deref(intrin->src[0]),
+ trip_count)) {
+ if (intrin->intrinsic == nir_intrinsic_load_deref) {
+ nir_ssa_def *undef =
+ nir_ssa_undef(&b, intrin->dest.ssa.num_components,
+ intrin->dest.ssa.bit_size);
+ nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
+ nir_src_for_ssa(undef));
+ } else {
+ nir_instr_remove(instr);
+ continue;
+ }
+ }
+
+ if (intrin->intrinsic == nir_intrinsic_copy_deref &&
+ is_access_out_of_bounds(term, nir_src_as_deref(intrin->src[1]),
+ trip_count)) {
+ nir_instr_remove(instr);
+ }
+ }
+ }
+ }
+
+ /* Now that we are done extract the loop header and body again */
+ nir_cf_extract(lp_header, nir_before_block(nir_loop_first_block(loop)),
+ nir_before_cf_node(&term->nif->cf_node));
+ nir_cf_extract(lp_body, nir_before_block(nir_loop_first_block(loop)),
+ nir_after_block(nir_loop_last_block(loop)));
+}
+
+/* Partially unrolls loops that don't have a known trip count.
+ */
+static void
+partial_unroll(nir_shader *shader, nir_loop *loop, unsigned trip_count)
+{
+ assert(list_length(&loop->info->loop_terminator_list) == 1);
+
+ nir_loop_terminator *terminator =
+ list_first_entry(&loop->info->loop_terminator_list,
+ nir_loop_terminator, loop_terminator_link);
+
+ assert(nir_is_trivial_loop_if(terminator->nif, terminator->break_block));
+
+ loop_prepare_for_unroll(loop);
+
+ /* Pluck out the loop header */
+ nir_cf_list lp_header;
+ nir_cf_extract(&lp_header, nir_before_block(nir_loop_first_block(loop)),
+ nir_before_cf_node(&terminator->nif->cf_node));
+
+ struct hash_table *remap_table =
+ _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+
+ nir_cf_list lp_body;
+ nir_cf_node *unroll_loc =
+ complex_unroll_loop_body(loop, terminator, &lp_header, &lp_body,
+ remap_table, trip_count);
+
+ /* Attempt to remove out of bounds array access */
+ remove_out_of_bounds_induction_use(shader, loop, terminator, &lp_header,
+ &lp_body, trip_count);
+
+ nir_cursor cursor =
+ get_complex_unroll_insert_location(unroll_loc,
+ terminator->continue_from_then);
+
+ /* Reinsert the loop in the innermost nested continue branch of the unrolled
+ * loop.
+ */
+ nir_loop *new_loop = nir_loop_create(shader);
+ nir_cf_node_insert(cursor, &new_loop->cf_node);
+ new_loop->partially_unrolled = true;
+
+ /* Clone loop header and insert into new loop */
+ nir_cf_list_clone_and_reinsert(&lp_header, loop->cf_node.parent,
+ nir_after_cf_list(&new_loop->body),
+ remap_table);
+
+ /* Clone loop body and insert into new loop */
+ nir_cf_list_clone_and_reinsert(&lp_body, loop->cf_node.parent,
+ nir_after_cf_list(&new_loop->body),
+ remap_table);
+
+ /* Insert break back into terminator */
+ nir_jump_instr *brk = nir_jump_instr_create(shader, nir_jump_break);
+ nir_if *nif = nir_block_get_following_if(nir_loop_first_block(new_loop));
+ if (terminator->continue_from_then) {
+ nir_instr_insert_after_block(nir_if_last_else_block(nif), &brk->instr);
+ } else {
+ nir_instr_insert_after_block(nir_if_last_then_block(nif), &brk->instr);
+ }
+
/* Delete the original loop header and body */
nir_cf_delete(&lp_header);
- nir_cf_delete(&loop_body);
+ nir_cf_delete(&lp_body);
+
+ /* The original loop has been replaced so remove it. */
+ nir_cf_node_remove(&loop->cf_node);
_mesa_hash_table_destroy(remap_table, NULL);
}
+/*
+ * Returns true if we should unroll the loop, otherwise false.
+ */
static bool
-is_loop_small_enough_to_unroll(nir_shader *shader, nir_loop_info *li)
+check_unrolling_restrictions(nir_shader *shader, nir_loop *loop)
{
+ if (loop->control == nir_loop_control_unroll)
+ return true;
+
+ if (loop->control == nir_loop_control_dont_unroll)
+ return false;
+
+ nir_loop_info *li = loop->info;
unsigned max_iter = shader->options->max_unroll_iterations;
+ unsigned trip_count =
+ li->max_trip_count ? li->max_trip_count : li->guessed_trip_count;
- if (li->trip_count > max_iter)
+ if (trip_count > max_iter)
return false;
- if (li->force_unroll)
+ if (li->force_unroll && !li->guessed_trip_count)
return true;
bool loop_not_too_large =
- li->num_instructions * li->trip_count <= max_iter * 26;
+ li->instr_cost * trip_count <= max_iter * LOOP_UNROLL_LIMIT;
return loop_not_too_large;
}
static bool
-process_loops(nir_shader *sh, nir_cf_node *cf_node, bool *innermost_loop)
+process_loops(nir_shader *sh, nir_cf_node *cf_node, bool *has_nested_loop_out)
{
bool progress = false;
+ bool has_nested_loop = false;
nir_loop *loop;
switch (cf_node->type) {
case nir_cf_node_if: {
nir_if *if_stmt = nir_cf_node_as_if(cf_node);
foreach_list_typed_safe(nir_cf_node, nested_node, node, &if_stmt->then_list)
- progress |= process_loops(sh, nested_node, innermost_loop);
+ progress |= process_loops(sh, nested_node, has_nested_loop_out);
foreach_list_typed_safe(nir_cf_node, nested_node, node, &if_stmt->else_list)
- progress |= process_loops(sh, nested_node, innermost_loop);
+ progress |= process_loops(sh, nested_node, has_nested_loop_out);
return progress;
}
case nir_cf_node_loop: {
loop = nir_cf_node_as_loop(cf_node);
foreach_list_typed_safe(nir_cf_node, nested_node, node, &loop->body)
- progress |= process_loops(sh, nested_node, innermost_loop);
+ progress |= process_loops(sh, nested_node, &has_nested_loop);
+
break;
}
default:
unreachable("unknown cf node type");
}
- if (*innermost_loop) {
- /* Don't attempt to unroll outer loops or a second inner loop in
- * this pass wait until the next pass as we have altered the cf.
+ /* Don't attempt to unroll a second inner loop in this pass, wait until the
+ * next pass as we have altered the cf.
+ */
+ if (!progress && loop->control != nir_loop_control_dont_unroll) {
+
+ /* Check for the classic
+ *
+ * do {
+ * // ...
+ * } while (false)
+ *
+ * that is used to wrap multi-line macros. GLSL IR also wraps switch
+ * statements in a loop like this.
*/
- *innermost_loop = false;
+ if (loop->info->limiting_terminator == NULL &&
+ !loop->info->complex_loop) {
+
+ nir_block *last_loop_blk = nir_loop_last_block(loop);
+ if (nir_block_ends_in_break(last_loop_blk)) {
+ progress = wrapper_unroll(loop);
+ goto exit;
+ }
+
+ /* If we were able to guess the loop iteration based on array access
+ * then do a partial unroll.
+ */
+ unsigned num_lt = list_length(&loop->info->loop_terminator_list);
+ if (!has_nested_loop && num_lt == 1 && !loop->partially_unrolled &&
+ loop->info->guessed_trip_count &&
+ check_unrolling_restrictions(sh, loop)) {
+ partial_unroll(sh, loop, loop->info->guessed_trip_count);
+ progress = true;
+ }
+ }
- if (loop->info->limiting_terminator == NULL)
- return progress;
+ if (has_nested_loop || !loop->info->limiting_terminator)
+ goto exit;
- if (!is_loop_small_enough_to_unroll(sh, loop->info))
- return progress;
+ if (!check_unrolling_restrictions(sh, loop))
+ goto exit;
- if (loop->info->is_trip_count_known) {
+ if (loop->info->exact_trip_count_known) {
simple_unroll(loop);
progress = true;
} else {
/* Attempt to unroll loops with two terminators. */
unsigned num_lt = list_length(&loop->info->loop_terminator_list);
- if (num_lt == 2) {
+ if (num_lt == 2 &&
+ !loop->info->limiting_terminator->exact_trip_count_unknown) {
bool limiting_term_second = true;
nir_loop_terminator *terminator =
- list_last_entry(&loop->info->loop_terminator_list,
+ list_first_entry(&loop->info->loop_terminator_list,
nir_loop_terminator, loop_terminator_link);
if (terminator->nif == loop->info->limiting_terminator->nif) {
limiting_term_second = false;
terminator =
- list_first_entry(&loop->info->loop_terminator_list,
+ list_last_entry(&loop->info->loop_terminator_list,
nir_loop_terminator, loop_terminator_link);
}
* limiting terminator just do a simple unroll as the second
* terminator can never be reached.
*/
- if (loop->info->trip_count == 0 && !limiting_term_second) {
+ if (loop->info->max_trip_count == 0 && !limiting_term_second) {
simple_unroll(loop);
} else {
complex_unroll(loop, terminator, limiting_term_second);
}
progress = true;
}
+
+ if (num_lt == 1) {
+ assert(loop->info->limiting_terminator->exact_trip_count_unknown);
+ complex_unroll_single_terminator(loop);
+ progress = true;
+ }
}
}
+exit:
+ *has_nested_loop_out = true;
return progress;
}
nir_metadata_require(impl, nir_metadata_block_index);
foreach_list_typed_safe(nir_cf_node, node, node, &impl->body) {
- bool innermost_loop = true;
+ bool has_nested_loop = false;
progress |= process_loops(impl->function->shader, node,
- &innermost_loop);
+ &has_nested_loop);
}
if (progress)
return progress;
}
+/**
+ * indirect_mask specifies which type of indirectly accessed variables
+ * should force loop unrolling.
+ */
bool
nir_opt_loop_unroll(nir_shader *shader, nir_variable_mode indirect_mask)
{