X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fcompiler%2Fnir%2Fnir_opt_loop_unroll.c;h=ea2012e292a4f65564115f3bd90124dc1e3683e3;hb=4dfa7adc100061f96e15fcbbfa5f776cd6c5a94a;hp=dae5bfc90203621d7d818bb822a0500a15af8da7;hpb=2d36efdb7f18f061c519dbb93f6058bf161aad33;p=mesa.git diff --git a/src/compiler/nir/nir_opt_loop_unroll.c b/src/compiler/nir/nir_opt_loop_unroll.c index dae5bfc9020..ea2012e292a 100644 --- a/src/compiler/nir/nir_opt_loop_unroll.c +++ b/src/compiler/nir/nir_opt_loop_unroll.c @@ -33,16 +33,14 @@ * to give about the same results. Around 5 instructions per node. But some * loops that would unroll with GLSL IR fail to unroll if we set this to 25 so * we set it to 26. - * This was bumped to 96 because it unrolled more loops with a positive - * effect (vulkan ssao demo). */ -#define LOOP_UNROLL_LIMIT 96 +#define LOOP_UNROLL_LIMIT 26 /* Prepare this loop for unrolling by first converting to lcssa and then - * converting the phis from the loops first block and the block that follows - * the loop into regs. Partially converting out of SSA allows us to unroll - * the loop without having to keep track of and update phis along the way - * which gets tricky and doesn't add much value over conveting to regs. + * converting the phis from the top level of the loop body to regs. + * Partially converting out of SSA allows us to unroll the loop without having + * to keep track of and update phis along the way which gets tricky and + * doesn't add much value over converting to regs. * * The loop may have a continue instruction at the end of the loop which does * nothing. Once we're out of SSA, we can safely delete it so we don't have @@ -51,18 +49,27 @@ static void loop_prepare_for_unroll(nir_loop *loop) { + nir_rematerialize_derefs_in_use_blocks_impl( + nir_cf_node_get_function(&loop->cf_node)); + nir_convert_loop_to_lcssa(loop); - nir_lower_phis_to_regs_block(nir_loop_first_block(loop)); + /* Lower phis at the top level of the loop body */ + foreach_list_typed_safe(nir_cf_node, node, node, &loop->body) { + if (nir_cf_node_block == node->type) { + nir_lower_phis_to_regs_block(nir_cf_node_as_block(node)); + } + } + /* Lower phis after the loop */ nir_block *block_after_loop = nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node)); nir_lower_phis_to_regs_block(block_after_loop); + /* Remove continue if its the last instruction in the loop */ nir_instr *last_instr = nir_block_last_instr(nir_loop_last_block(loop)); if (last_instr && last_instr->type == nir_instr_type_jump) { - assert(nir_instr_as_jump(last_instr)->type == nir_jump_continue); nir_instr_remove(last_instr); } } @@ -460,6 +467,102 @@ complex_unroll(nir_loop *loop, nir_loop_terminator *unlimit_term, _mesa_hash_table_destroy(remap_table, NULL); } +/* Unrolls the classic wrapper loops e.g + * + * do { + * // ... + * } while (false) + */ +static bool +wrapper_unroll(nir_loop *loop) +{ + if (!list_empty(&loop->info->loop_terminator_list)) { + + /* Unrolling a loop with a large number of exits can result in a + * large inrease in register pressure. For now we just skip + * unrolling if we have more than 3 exits (not including the break + * at the end of the loop). + * + * TODO: Most loops that fit this pattern are simply switch + * statements that are converted to a loop to take advantage of + * exiting jump instruction handling. In this case we could make + * use of a binary seach pattern like we do in + * nir_lower_indirect_derefs(), this should allow us to unroll the + * loops in an optimal way and should also avoid some of the + * register pressure that comes from simply nesting the + * terminators one after the other. + */ + if (list_length(&loop->info->loop_terminator_list) > 3) + return false; + + loop_prepare_for_unroll(loop); + + nir_cursor loop_end = nir_after_block(nir_loop_last_block(loop)); + list_for_each_entry(nir_loop_terminator, terminator, + &loop->info->loop_terminator_list, + loop_terminator_link) { + + /* Remove break from the terminator */ + nir_instr *break_instr = + nir_block_last_instr(terminator->break_block); + nir_instr_remove(break_instr); + + /* Pluck out the loop body. */ + nir_cf_list loop_body; + nir_cf_extract(&loop_body, + nir_after_cf_node(&terminator->nif->cf_node), + loop_end); + + /* Reinsert loop body into continue from block */ + nir_cf_reinsert(&loop_body, + nir_after_block(terminator->continue_from_block)); + + loop_end = terminator->continue_from_then ? + nir_after_block(nir_if_last_then_block(terminator->nif)) : + nir_after_block(nir_if_last_else_block(terminator->nif)); + } + } else { + nir_block *blk_after_loop = + nir_cursor_current_block(nir_after_cf_node(&loop->cf_node)); + + /* There may still be some single src phis following the loop that + * have not yet been cleaned up by another pass. Tidy those up + * before unrolling the loop. + */ + nir_foreach_instr_safe(instr, blk_after_loop) { + if (instr->type != nir_instr_type_phi) + break; + + nir_phi_instr *phi = nir_instr_as_phi(instr); + assert(exec_list_length(&phi->srcs) == 1); + + nir_phi_src *phi_src = + exec_node_data(nir_phi_src, exec_list_get_head(&phi->srcs), node); + + nir_ssa_def_rewrite_uses(&phi->dest.ssa, phi_src->src); + nir_instr_remove(instr); + } + + /* Remove break at end of the loop */ + nir_block *last_loop_blk = nir_loop_last_block(loop); + nir_instr *break_instr = nir_block_last_instr(last_loop_blk); + nir_instr_remove(break_instr); + } + + /* Pluck out the loop body. */ + nir_cf_list loop_body; + nir_cf_extract(&loop_body, nir_before_block(nir_loop_first_block(loop)), + nir_after_block(nir_loop_last_block(loop))); + + /* Reinsert loop body after the loop */ + nir_cf_reinsert(&loop_body, nir_after_cf_node(&loop->cf_node)); + + /* The loop has been unrolled so remove it. */ + nir_cf_node_remove(&loop->cf_node); + + return true; +} + static bool is_loop_small_enough_to_unroll(nir_shader *shader, nir_loop_info *li) { @@ -478,9 +581,10 @@ is_loop_small_enough_to_unroll(nir_shader *shader, nir_loop_info *li) } static bool -process_loops(nir_shader *sh, nir_cf_node *cf_node, bool *innermost_loop) +process_loops(nir_shader *sh, nir_cf_node *cf_node, bool *has_nested_loop_out) { bool progress = false; + bool has_nested_loop = false; nir_loop *loop; switch (cf_node->type) { @@ -489,32 +593,53 @@ process_loops(nir_shader *sh, nir_cf_node *cf_node, bool *innermost_loop) case nir_cf_node_if: { nir_if *if_stmt = nir_cf_node_as_if(cf_node); foreach_list_typed_safe(nir_cf_node, nested_node, node, &if_stmt->then_list) - progress |= process_loops(sh, nested_node, innermost_loop); + progress |= process_loops(sh, nested_node, has_nested_loop_out); foreach_list_typed_safe(nir_cf_node, nested_node, node, &if_stmt->else_list) - progress |= process_loops(sh, nested_node, innermost_loop); + progress |= process_loops(sh, nested_node, has_nested_loop_out); return progress; } case nir_cf_node_loop: { loop = nir_cf_node_as_loop(cf_node); foreach_list_typed_safe(nir_cf_node, nested_node, node, &loop->body) - progress |= process_loops(sh, nested_node, innermost_loop); + progress |= process_loops(sh, nested_node, &has_nested_loop); + break; } default: unreachable("unknown cf node type"); } - if (*innermost_loop) { - /* Don't attempt to unroll outer loops or a second inner loop in - * this pass wait until the next pass as we have altered the cf. + /* Don't attempt to unroll a second inner loop in this pass, wait until the + * next pass as we have altered the cf. + */ + if (!progress) { + + /* Check for the classic + * + * do { + * // ... + * } while (false) + * + * that is used to wrap multi-line macros. GLSL IR also wraps switch + * statements in a loop like this. */ - *innermost_loop = false; + if (loop->info->limiting_terminator == NULL && + !loop->info->complex_loop) { + + nir_block *last_loop_blk = nir_loop_last_block(loop); + if (!nir_block_ends_in_break(last_loop_blk)) + goto exit; - if (loop->info->limiting_terminator == NULL) - return progress; + progress = wrapper_unroll(loop); + + goto exit; + } + + if (has_nested_loop || loop->info->limiting_terminator == NULL) + goto exit; if (!is_loop_small_enough_to_unroll(sh, loop->info)) - return progress; + goto exit; if (loop->info->is_trip_count_known) { simple_unroll(loop); @@ -525,14 +650,14 @@ process_loops(nir_shader *sh, nir_cf_node *cf_node, bool *innermost_loop) if (num_lt == 2) { bool limiting_term_second = true; nir_loop_terminator *terminator = - list_last_entry(&loop->info->loop_terminator_list, + list_first_entry(&loop->info->loop_terminator_list, nir_loop_terminator, loop_terminator_link); if (terminator->nif == loop->info->limiting_terminator->nif) { limiting_term_second = false; terminator = - list_first_entry(&loop->info->loop_terminator_list, + list_last_entry(&loop->info->loop_terminator_list, nir_loop_terminator, loop_terminator_link); } @@ -550,6 +675,8 @@ process_loops(nir_shader *sh, nir_cf_node *cf_node, bool *innermost_loop) } } +exit: + *has_nested_loop_out = true; return progress; } @@ -562,9 +689,9 @@ nir_opt_loop_unroll_impl(nir_function_impl *impl, nir_metadata_require(impl, nir_metadata_block_index); foreach_list_typed_safe(nir_cf_node, node, node, &impl->body) { - bool innermost_loop = true; + bool has_nested_loop = false; progress |= process_loops(impl->function->shader, node, - &innermost_loop); + &has_nested_loop); } if (progress) @@ -573,6 +700,10 @@ nir_opt_loop_unroll_impl(nir_function_impl *impl, return progress; } +/** + * indirect_mask specifies which type of indirectly accessed variables + * should force loop unrolling. + */ bool nir_opt_loop_unroll(nir_shader *shader, nir_variable_mode indirect_mask) {