* Implements a small peephole optimization that looks for
*
* if (cond) {
- * <empty>
+ * <then SSA defs>
* } else {
- * <empty>
+ * <else SSA defs>
* }
* phi
* ...
* phi
*
- * and replaces it with a series of selects. It can also handle the case
- * where, instead of being empty, the if may contain some move operations
- * whose only use is one of the following phi nodes. This happens all the
- * time when the SSA form comes from a conditional assignment with a
- * swizzle.
+ * and replaces it with:
+ *
+ * <then SSA defs>
+ * <else SSA defs>
+ * bcsel
+ * ...
+ * bcsel
+ *
+ * where the SSA defs are ALU operations or other cheap instructions (not
+ * texturing, for example).
+ *
+ * If the number of ALU operations in the branches is greater than the limit
+ * parameter, then the optimization is skipped. In limit=0 mode, the SSA defs
+ * must only be MOVs which we expect to get copy-propagated away once they're
+ * out of the inner blocks.
*/
static bool
-block_check_for_allowed_instrs(nir_block *block)
+block_check_for_allowed_instrs(nir_block *block, unsigned *count, bool alu_ok)
{
nir_foreach_instr(instr, block) {
switch (instr->type) {
}
break;
+ case nir_intrinsic_load_uniform:
+ if (!alu_ok)
+ return false;
+ break;
+
default:
return false;
}
case nir_op_vec2:
case nir_op_vec3:
case nir_op_vec4:
- /* It must be a move-like operation. */
break;
default:
- return false;
+ if (!alu_ok) {
+ /* It must be a move-like operation. */
+ return false;
+ }
+ break;
}
- /* Can't handle saturate */
- if (mov->dest.saturate)
- return false;
-
/* It must be SSA */
if (!mov->dest.dest.is_ssa)
return false;
- /* It cannot have any if-uses */
- if (!list_empty(&mov->dest.dest.ssa.if_uses))
- return false;
+ if (alu_ok) {
+ (*count)++;
+ } else {
+ /* Can't handle saturate */
+ if (mov->dest.saturate)
+ return false;
- /* The only uses of this definition must be phi's in the successor */
- nir_foreach_use(use, &mov->dest.dest.ssa) {
- if (use->parent_instr->type != nir_instr_type_phi ||
- use->parent_instr->block != block->successors[0])
+ /* It cannot have any if-uses */
+ if (!list_empty(&mov->dest.dest.ssa.if_uses))
return false;
+
+ /* The only uses of this definition must be phi's in the successor */
+ nir_foreach_use(use, &mov->dest.dest.ssa) {
+ if (use->parent_instr->type != nir_instr_type_phi ||
+ use->parent_instr->block != block->successors[0])
+ return false;
+ }
}
break;
}
}
static bool
-nir_opt_peephole_select_block(nir_block *block, void *mem_ctx)
+nir_opt_peephole_select_block(nir_block *block, void *mem_ctx, unsigned limit)
{
if (nir_cf_node_is_first(&block->cf_node))
return false;
nir_block *else_block = nir_cf_node_as_block(else_node);
/* ... and those blocks must only contain "allowed" instructions. */
- if (!block_check_for_allowed_instrs(then_block) ||
- !block_check_for_allowed_instrs(else_block))
+ unsigned count = 0;
+ if (!block_check_for_allowed_instrs(then_block, &count, limit != 0) ||
+ !block_check_for_allowed_instrs(else_block, &count, limit != 0))
+ return false;
+
+ if (count > limit)
return false;
/* At this point, we know that the previous CFG node is an if-then
}
static bool
-nir_opt_peephole_select_impl(nir_function_impl *impl)
+nir_opt_peephole_select_impl(nir_function_impl *impl, unsigned limit)
{
void *mem_ctx = ralloc_parent(impl);
bool progress = false;
nir_foreach_block_safe(block, impl) {
- progress |= nir_opt_peephole_select_block(block, mem_ctx);
+ progress |= nir_opt_peephole_select_block(block, mem_ctx, limit);
}
if (progress)
}
bool
-nir_opt_peephole_select(nir_shader *shader)
+nir_opt_peephole_select(nir_shader *shader, unsigned limit)
{
bool progress = false;
nir_foreach_function(function, shader) {
if (function->impl)
- progress |= nir_opt_peephole_select_impl(function->impl);
+ progress |= nir_opt_peephole_select_impl(function->impl, limit);
}
return progress;
NIR_PASS(progress, s, nir_opt_dce);
NIR_PASS(progress, s, nir_opt_dead_cf);
NIR_PASS(progress, s, nir_opt_cse);
- NIR_PASS(progress, s, nir_opt_peephole_select);
+ NIR_PASS(progress, s, nir_opt_peephole_select, 8);
NIR_PASS(progress, s, nir_opt_algebraic);
NIR_PASS(progress, s, nir_opt_constant_folding);
NIR_PASS(progress, s, nir_opt_undef);