radeonsi/nir: don't run si_nir_opts again if there is no change
authorMarek Olšák <marek.olsak@amd.com>
Sat, 23 Nov 2019 03:47:02 +0000 (22:47 -0500)
committerMarek Olšák <marek.olsak@amd.com>
Mon, 25 Nov 2019 21:48:27 +0000 (16:48 -0500)
0.3% less overhead

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
src/amd/llvm/ac_nir_to_llvm.c
src/amd/llvm/ac_nir_to_llvm.h
src/gallium/drivers/radeonsi/si_shader_nir.c

index 2eba80a9c3893fba1cec253eaac3d651e0c62fcc..210a37a39061158ffae3fa5a968279cb0dbff509 100644 (file)
@@ -4783,17 +4783,19 @@ void ac_nir_translate(struct ac_llvm_context *ac, struct ac_shader_abi *abi,
        ralloc_free(ctx.vars);
 }
 
-void
+bool
 ac_lower_indirect_derefs(struct nir_shader *nir, enum chip_class chip_class)
 {
+       bool progress = false;
+
        /* Lower large variables to scratch first so that we won't bloat the
         * shader by generating large if ladders for them. We later lower
         * scratch to alloca's, assuming LLVM won't generate VGPR indexing.
         */
-       NIR_PASS_V(nir, nir_lower_vars_to_scratch,
-                  nir_var_function_temp,
-                  256,
-                  glsl_get_natural_size_align_bytes);
+       NIR_PASS(progress, nir, nir_lower_vars_to_scratch,
+                nir_var_function_temp,
+                256,
+                glsl_get_natural_size_align_bytes);
 
        /* While it would be nice not to have this flag, we are constrained
         * by the reality that LLVM 9.0 has buggy VGPR indexing on GFX9.
@@ -4825,7 +4827,8 @@ ac_lower_indirect_derefs(struct nir_shader *nir, enum chip_class chip_class)
         */
        indirect_mask |= nir_var_function_temp;
 
-       nir_lower_indirect_derefs(nir, indirect_mask);
+       progress |= nir_lower_indirect_derefs(nir, indirect_mask);
+       return progress;
 }
 
 static unsigned
index 7c2d6b319553118f071c6682373382e028d3b819..b4ad68abb9e8516781f8761ff77efa97873d2550 100644 (file)
@@ -46,7 +46,7 @@ static inline unsigned ac_llvm_reg_index_soa(unsigned index, unsigned chan)
        return (index * 4) + chan;
 }
 
-void ac_lower_indirect_derefs(struct nir_shader *nir, enum chip_class);
+bool ac_lower_indirect_derefs(struct nir_shader *nir, enum chip_class);
 
 bool ac_are_tessfactors_def_in_all_invocs(const struct nir_shader *nir);
 
index ccb80f30f4a8f82d7f400cd4338d5c964980e4b1..850be07ac40941d5b8d36336e3b1f497d96b3080 100644 (file)
@@ -1004,14 +1004,15 @@ static void si_lower_nir(struct si_screen *sscreen, struct nir_shader *nir)
         *
         * st/mesa calls finalize_nir twice, but we can't call this pass twice.
         */
+       bool changed = false;
        if (!nir->constant_data) {
-               NIR_PASS_V(nir, nir_opt_large_constants,
-                          glsl_get_natural_size_align_bytes, 16);
+               NIR_PASS(changed, nir, nir_opt_large_constants,
+                        glsl_get_natural_size_align_bytes, 16);
        }
 
-       ac_lower_indirect_derefs(nir, sscreen->info.chip_class);
-
-       si_nir_opts(nir);
+       changed |= ac_lower_indirect_derefs(nir, sscreen->info.chip_class);
+       if (changed)
+               si_nir_opts(nir);
 
        NIR_PASS_V(nir, nir_lower_bool_to_int32);
        NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp);