From: Timothy Arceri Date: Thu, 8 Dec 2016 02:25:00 +0000 (+1100) Subject: nir: merge some basic consecutive ifs X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=4d3f6cb9739dfeaf9605fcd2f5318e03acf5066e;p=mesa.git nir: merge some basic consecutive ifs After trying multiple times to merge if-statements with phis between them I've come to the conclusion that it cannot be done without regressions. The problem is for some shaders we end up with a whole bunch of phis for the merged ifs resulting in increased register pressure. So this patch just merges ifs that have no phis between them. This seems to be consistent with what LLVM does so for radeonsi we only see a change (although its a large change) in a single shader. Shader-db results i965 (SKL): total instructions in shared programs: 13098176 -> 13098152 (<.01%) instructions in affected programs: 1326 -> 1302 (-1.81%) helped: 4 HURT: 0 total cycles in shared programs: 332032989 -> 332037583 (<.01%) cycles in affected programs: 60665 -> 65259 (7.57%) helped: 0 HURT: 4 The cycles estimates reported by shader-db for i965 seem inaccurate as the only difference in the final code is the removal of the redundent condition evaluations and jumps. Also the biggest code reduction (~7%) for radeonsi was in a tomb raider tressfx shader but for some reason this does not get merged for i965. Shader-db results radeonsi (VEGA): Totals from affected shaders: SGPRS: 232 -> 232 (0.00 %) VGPRS: 164 -> 164 (0.00 %) Spilled SGPRs: 59 -> 59 (0.00 %) Spilled VGPRs: 0 -> 0 (0.00 %) Private memory VGPRs: 0 -> 0 (0.00 %) Scratch size: 0 -> 0 (0.00 %) dwords per thread Code Size: 14584 -> 13520 (-7.30 %) bytes LDS: 0 -> 0 (0.00 %) blocks Max Waves: 13 -> 13 (0.00 %) Wait states: 0 -> 0 (0.00 %) Reviewed-by: Ian Romanick --- diff --git a/src/compiler/nir/nir_opt_if.c b/src/compiler/nir/nir_opt_if.c index 84cfc04d42d..d7a7bb2bb26 100644 --- a/src/compiler/nir/nir_opt_if.c +++ b/src/compiler/nir/nir_opt_if.c @@ -683,6 +683,98 @@ opt_if_evaluate_condition_use(nir_builder *b, nir_if *nif) return progress; } +static void +simple_merge_if(nir_if *dest_if, nir_if *src_if, bool dest_if_then, + bool src_if_then) +{ + /* Now merge the if branch */ + nir_block *dest_blk = dest_if_then ? nir_if_last_then_block(dest_if) + : nir_if_last_else_block(dest_if); + + struct exec_list *list = src_if_then ? &src_if->then_list + : &src_if->else_list; + + nir_cf_list if_cf_list; + nir_cf_extract(&if_cf_list, nir_before_cf_list(list), + nir_after_cf_list(list)); + nir_cf_reinsert(&if_cf_list, nir_after_block(dest_blk)); +} + +static bool +opt_if_merge(nir_if *nif) +{ + bool progress = false; + + nir_block *next_blk = nir_cf_node_cf_tree_next(&nif->cf_node); + if (next_blk && nif->condition.is_ssa) { + nir_if *next_if = nir_block_get_following_if(next_blk); + if (next_if && next_if->condition.is_ssa) { + + /* Here we merge two consecutive ifs that have the same + * condition e.g: + * + * if ssa_12 { + * ... + * } else { + * ... + * } + * if ssa_12 { + * ... + * } else { + * ... + * } + * + * Note: This only merges if-statements when the block between them + * is empty. The reason we don't try to merge ifs that just have phis + * between them is because this can results in increased register + * pressure. For example when merging if ladders created by indirect + * indexing. + */ + if (nif->condition.ssa == next_if->condition.ssa && + exec_list_is_empty(&next_blk->instr_list)) { + + simple_merge_if(nif, next_if, true, true); + simple_merge_if(nif, next_if, false, false); + + nir_block *new_then_block = nir_if_last_then_block(nif); + nir_block *new_else_block = nir_if_last_else_block(nif); + + nir_block *old_then_block = nir_if_last_then_block(next_if); + nir_block *old_else_block = nir_if_last_else_block(next_if); + + /* Rewrite the predecessor block for any phis following the second + * if-statement. + */ + rewrite_phi_predecessor_blocks(next_if, old_then_block, + old_else_block, + new_then_block, + new_else_block); + + /* Move phis after merged if to avoid them being deleted when we + * remove the merged if-statement. + */ + nir_block *after_next_if_block = + nir_cf_node_as_block(nir_cf_node_next(&next_if->cf_node)); + + nir_foreach_instr_safe(instr, after_next_if_block) { + if (instr->type != nir_instr_type_phi) + break; + + exec_node_remove(&instr->node); + exec_list_push_tail(&next_blk->instr_list, &instr->node); + instr->block = next_blk; + } + + nir_cf_node_remove(&next_if->cf_node); + + progress = true; + } + } + } + + return progress; +} + static bool opt_if_cf_list(nir_builder *b, struct exec_list *cf_list) { @@ -697,6 +789,7 @@ opt_if_cf_list(nir_builder *b, struct exec_list *cf_list) progress |= opt_if_cf_list(b, &nif->then_list); progress |= opt_if_cf_list(b, &nif->else_list); progress |= opt_if_loop_terminator(nif); + progress |= opt_if_merge(nif); progress |= opt_if_simplification(b, nif); break; }