NIR_PASS(progress, shader, nir_opt_if);
NIR_PASS(progress, shader, nir_opt_dead_cf);
NIR_PASS(progress, shader, nir_opt_cse);
- NIR_PASS(progress, shader, nir_opt_peephole_select, 8);
+ NIR_PASS(progress, shader, nir_opt_peephole_select, 8, true);
NIR_PASS(progress, shader, nir_opt_algebraic);
NIR_PASS(progress, shader, nir_opt_constant_folding);
NIR_PASS(progress, shader, nir_opt_undef);
NIR_PASS(progress, s, nir_opt_dce);
NIR_PASS(progress, s, nir_opt_dead_cf);
NIR_PASS(progress, s, nir_opt_cse);
- NIR_PASS(progress, s, nir_opt_peephole_select, 8);
+ NIR_PASS(progress, s, nir_opt_peephole_select, 8, true);
NIR_PASS(progress, s, nir_opt_algebraic);
NIR_PASS(progress, s, nir_opt_constant_folding);
NIR_PASS(progress, s, nir_opt_undef);
bool nir_opt_move_load_ubo(nir_shader *shader);
-bool nir_opt_peephole_select(nir_shader *shader, unsigned limit);
+bool nir_opt_peephole_select(nir_shader *shader, unsigned limit,
+ bool indirect_load_ok);
bool nir_opt_remove_phis_impl(nir_function_impl *impl);
bool nir_opt_remove_phis(nir_shader *shader);
*/
static bool
-block_check_for_allowed_instrs(nir_block *block, unsigned *count, bool alu_ok)
+block_check_for_allowed_instrs(nir_block *block, unsigned *count,
+ bool alu_ok, bool indirect_load_ok)
{
nir_foreach_instr(instr, block) {
switch (instr->type) {
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
switch (intrin->intrinsic) {
- case nir_intrinsic_load_deref:
- switch (nir_src_as_deref(intrin->src[0])->mode) {
+ case nir_intrinsic_load_deref: {
+ nir_deref_instr *const deref = nir_src_as_deref(intrin->src[0]);
+
+ switch (deref->mode) {
case nir_var_shader_in:
case nir_var_uniform:
+ /* Don't try to remove flow control around an indirect load
+ * because that flow control may be trying to avoid invalid
+ * loads.
+ */
+ if (!indirect_load_ok && nir_deref_instr_has_indirect(deref))
+ return false;
+
break;
default:
return false;
}
break;
+ }
case nir_intrinsic_load_uniform:
if (!alu_ok)
static bool
nir_opt_peephole_select_block(nir_block *block, nir_shader *shader,
- unsigned limit)
+ unsigned limit, bool indirect_load_ok)
{
if (nir_cf_node_is_first(&block->cf_node))
return false;
/* ... and those blocks must only contain "allowed" instructions. */
unsigned count = 0;
- if (!block_check_for_allowed_instrs(then_block, &count, limit != 0) ||
- !block_check_for_allowed_instrs(else_block, &count, limit != 0))
+ if (!block_check_for_allowed_instrs(then_block, &count, limit != 0,
+ indirect_load_ok) ||
+ !block_check_for_allowed_instrs(else_block, &count, limit != 0,
+ indirect_load_ok))
return false;
if (count > limit)
}
static bool
-nir_opt_peephole_select_impl(nir_function_impl *impl, unsigned limit)
+nir_opt_peephole_select_impl(nir_function_impl *impl, unsigned limit,
+ bool indirect_load_ok)
{
nir_shader *shader = impl->function->shader;
bool progress = false;
nir_foreach_block_safe(block, impl) {
- progress |= nir_opt_peephole_select_block(block, shader, limit);
+ progress |= nir_opt_peephole_select_block(block, shader, limit,
+ indirect_load_ok);
}
if (progress)
}
bool
-nir_opt_peephole_select(nir_shader *shader, unsigned limit)
+nir_opt_peephole_select(nir_shader *shader, unsigned limit,
+ bool indirect_load_ok)
{
bool progress = false;
nir_foreach_function(function, shader) {
if (function->impl)
- progress |= nir_opt_peephole_select_impl(function->impl, limit);
+ progress |= nir_opt_peephole_select_impl(function->impl, limit,
+ indirect_load_ok);
}
return progress;
progress |= OPT(s, nir_opt_gcm, true);
else if (gcm == 2)
progress |= OPT(s, nir_opt_gcm, false);
- progress |= OPT(s, nir_opt_peephole_select, 16);
+ progress |= OPT(s, nir_opt_peephole_select, 16, true);
progress |= OPT(s, nir_opt_intrinsics);
progress |= OPT(s, nir_opt_algebraic);
progress |= OPT(s, nir_opt_constant_folding);
NIR_PASS(progress, sel->nir, nir_opt_if);
NIR_PASS(progress, sel->nir, nir_opt_dead_cf);
NIR_PASS(progress, sel->nir, nir_opt_cse);
- NIR_PASS(progress, sel->nir, nir_opt_peephole_select, 8);
+ NIR_PASS(progress, sel->nir, nir_opt_peephole_select, 8, true);
/* Needed for algebraic lowering */
NIR_PASS(progress, sel->nir, nir_opt_algebraic);
NIR_PASS(progress, s, nir_opt_dce);
NIR_PASS(progress, s, nir_opt_dead_cf);
NIR_PASS(progress, s, nir_opt_cse);
- NIR_PASS(progress, s, nir_opt_peephole_select, 8);
+ NIR_PASS(progress, s, nir_opt_peephole_select, 8, true);
NIR_PASS(progress, s, nir_opt_algebraic);
NIR_PASS(progress, s, nir_opt_constant_folding);
NIR_PASS(progress, s, nir_opt_undef);
OPT(nir_copy_prop);
OPT(nir_opt_dce);
OPT(nir_opt_cse);
- OPT(nir_opt_peephole_select, 0);
+
+ /* For indirect loads of uniforms (push constants), we assume that array
+ * indices will nearly always be in bounds and the cost of the load is
+ * low. Therefore there shouldn't be a performance benefit to avoid it.
+ * However, in vec4 tessellation shaders, these loads operate by
+ * actually pulling from memory.
+ */
+ const bool is_vec4_tessellation = !is_scalar &&
+ (nir->info.stage == MESA_SHADER_TESS_CTRL ||
+ nir->info.stage == MESA_SHADER_TESS_EVAL);
+ OPT(nir_opt_peephole_select, 0, !is_vec4_tessellation);
+
OPT(nir_opt_intrinsics);
OPT(nir_opt_idiv_const, 32);
OPT(nir_opt_algebraic);
NIR_PASS(progress, nir, nir_opt_if);
NIR_PASS(progress, nir, nir_opt_dead_cf);
NIR_PASS(progress, nir, nir_opt_cse);
- NIR_PASS(progress, nir, nir_opt_peephole_select, 8);
+ NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true);
NIR_PASS(progress, nir, nir_opt_algebraic);
NIR_PASS(progress, nir, nir_opt_constant_folding);