From b02e0d260489cef7e98b222a52358dd75b0a37b4 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Fri, 22 Nov 2019 22:47:02 -0500 Subject: [PATCH] radeonsi/nir: don't run si_nir_opts again if there is no change 0.3% less overhead Reviewed-by: Pierre-Eric Pelloux-Prayer Reviewed-by: Connor Abbott --- src/amd/llvm/ac_nir_to_llvm.c | 15 +++++++++------ src/amd/llvm/ac_nir_to_llvm.h | 2 +- src/gallium/drivers/radeonsi/si_shader_nir.c | 11 ++++++----- 3 files changed, 16 insertions(+), 12 deletions(-) diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c index 2eba80a9c38..210a37a3906 100644 --- a/src/amd/llvm/ac_nir_to_llvm.c +++ b/src/amd/llvm/ac_nir_to_llvm.c @@ -4783,17 +4783,19 @@ void ac_nir_translate(struct ac_llvm_context *ac, struct ac_shader_abi *abi, ralloc_free(ctx.vars); } -void +bool ac_lower_indirect_derefs(struct nir_shader *nir, enum chip_class chip_class) { + bool progress = false; + /* Lower large variables to scratch first so that we won't bloat the * shader by generating large if ladders for them. We later lower * scratch to alloca's, assuming LLVM won't generate VGPR indexing. */ - NIR_PASS_V(nir, nir_lower_vars_to_scratch, - nir_var_function_temp, - 256, - glsl_get_natural_size_align_bytes); + NIR_PASS(progress, nir, nir_lower_vars_to_scratch, + nir_var_function_temp, + 256, + glsl_get_natural_size_align_bytes); /* While it would be nice not to have this flag, we are constrained * by the reality that LLVM 9.0 has buggy VGPR indexing on GFX9. @@ -4825,7 +4827,8 @@ ac_lower_indirect_derefs(struct nir_shader *nir, enum chip_class chip_class) */ indirect_mask |= nir_var_function_temp; - nir_lower_indirect_derefs(nir, indirect_mask); + progress |= nir_lower_indirect_derefs(nir, indirect_mask); + return progress; } static unsigned diff --git a/src/amd/llvm/ac_nir_to_llvm.h b/src/amd/llvm/ac_nir_to_llvm.h index 7c2d6b31955..b4ad68abb9e 100644 --- a/src/amd/llvm/ac_nir_to_llvm.h +++ b/src/amd/llvm/ac_nir_to_llvm.h @@ -46,7 +46,7 @@ static inline unsigned ac_llvm_reg_index_soa(unsigned index, unsigned chan) return (index * 4) + chan; } -void ac_lower_indirect_derefs(struct nir_shader *nir, enum chip_class); +bool ac_lower_indirect_derefs(struct nir_shader *nir, enum chip_class); bool ac_are_tessfactors_def_in_all_invocs(const struct nir_shader *nir); diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c b/src/gallium/drivers/radeonsi/si_shader_nir.c index ccb80f30f4a..850be07ac40 100644 --- a/src/gallium/drivers/radeonsi/si_shader_nir.c +++ b/src/gallium/drivers/radeonsi/si_shader_nir.c @@ -1004,14 +1004,15 @@ static void si_lower_nir(struct si_screen *sscreen, struct nir_shader *nir) * * st/mesa calls finalize_nir twice, but we can't call this pass twice. */ + bool changed = false; if (!nir->constant_data) { - NIR_PASS_V(nir, nir_opt_large_constants, - glsl_get_natural_size_align_bytes, 16); + NIR_PASS(changed, nir, nir_opt_large_constants, + glsl_get_natural_size_align_bytes, 16); } - ac_lower_indirect_derefs(nir, sscreen->info.chip_class); - - si_nir_opts(nir); + changed |= ac_lower_indirect_derefs(nir, sscreen->info.chip_class); + if (changed) + si_nir_opts(nir); NIR_PASS_V(nir, nir_lower_bool_to_int32); NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp); -- 2.30.2