nir/lower_indirect_derefs: Add a threshold
authorJason Ekstrand <jason@jlekstrand.net>
Tue, 14 Jul 2020 18:55:19 +0000 (13:55 -0500)
committerMarge Bot <eric+marge@anholt.net>
Thu, 3 Sep 2020 14:26:49 +0000 (14:26 +0000)
Instead of always lowering everything, we add a threshold such that if
the total indirected array size (AoA size) is above that threshold, it
won't lower.  It's assumed that the driver will sort things out somehow
by, for instance, lowering to scratch.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5909>

src/amd/llvm/ac_nir_to_llvm.c
src/compiler/nir/nir.h
src/compiler/nir/nir_lower_indirect_derefs.c
src/freedreno/vulkan/tu_shader.c
src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c
src/gallium/drivers/freedreno/a2xx/ir2_nir.c
src/intel/compiler/brw_nir.c

index 8fc9734e4a1f0b6742fc68eb85cdcf6b3a0171ce..13f12d350dcb80e5a37f6629216e058eeb066275 100644 (file)
@@ -5571,7 +5571,7 @@ ac_lower_indirect_derefs(struct nir_shader *nir, enum chip_class chip_class)
         */
        indirect_mask |= nir_var_function_temp;
 
-       progress |= nir_lower_indirect_derefs(nir, indirect_mask);
+       progress |= nir_lower_indirect_derefs(nir, indirect_mask, UINT32_MAX);
        return progress;
 }
 
index cb702ca3023aad2386e5f8310747369b1d9e976d..f6f667bcdcb74cb7d6068670b8835dc35901f13b 100644 (file)
@@ -4076,7 +4076,8 @@ typedef enum {
 bool nir_lower_array_deref_of_vec(nir_shader *shader, nir_variable_mode modes,
                                   nir_lower_array_deref_of_vec_options options);
 
-bool nir_lower_indirect_derefs(nir_shader *shader, nir_variable_mode modes);
+bool nir_lower_indirect_derefs(nir_shader *shader, nir_variable_mode modes,
+                               uint32_t max_lower_array_len);
 
 bool nir_lower_locals_to_regs(nir_shader *shader);
 
index d9dcba842988af15a1ba8ebd89c9df0a0031f410..08dbcb1f0d0dc0e9790cd893920f0e9687ae3e83 100644 (file)
@@ -113,7 +113,8 @@ emit_load_store_deref(nir_builder *b, nir_intrinsic_instr *orig_instr,
 
 static bool
 lower_indirect_derefs_block(nir_block *block, nir_builder *b,
-                            nir_variable_mode modes)
+                            nir_variable_mode modes,
+                            uint32_t max_lower_array_len)
 {
    bool progress = false;
 
@@ -133,17 +134,21 @@ lower_indirect_derefs_block(nir_block *block, nir_builder *b,
       nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
 
       /* Walk the deref chain back to the base and look for indirects */
+      uint32_t indirect_array_len = 1;
       bool has_indirect = false;
       nir_deref_instr *base = deref;
       while (base && base->deref_type != nir_deref_type_var) {
+         nir_deref_instr *parent = nir_deref_instr_parent(base);
          if (base->deref_type == nir_deref_type_array &&
-             !nir_src_is_const(base->arr.index))
+             !nir_src_is_const(base->arr.index)) {
+            indirect_array_len *= glsl_get_length(parent->type);
             has_indirect = true;
+         }
 
-         base = nir_deref_instr_parent(base);
+         base = parent;
       }
 
-      if (!has_indirect || !base)
+      if (!has_indirect || !base || indirect_array_len > max_lower_array_len)
          continue;
 
       /* Only lower variables whose mode is in the mask, or compact
@@ -179,14 +184,16 @@ lower_indirect_derefs_block(nir_block *block, nir_builder *b,
 }
 
 static bool
-lower_indirects_impl(nir_function_impl *impl, nir_variable_mode modes)
+lower_indirects_impl(nir_function_impl *impl, nir_variable_mode modes,
+                     uint32_t max_lower_array_len)
 {
    nir_builder builder;
    nir_builder_init(&builder, impl);
    bool progress = false;
 
    nir_foreach_block_safe(block, impl) {
-      progress |= lower_indirect_derefs_block(block, &builder, modes);
+      progress |= lower_indirect_derefs_block(block, &builder, modes,
+                                              max_lower_array_len);
    }
 
    if (progress)
@@ -203,13 +210,16 @@ lower_indirects_impl(nir_function_impl *impl, nir_variable_mode modes)
  * that does a binary search on the array index.
  */
 bool
-nir_lower_indirect_derefs(nir_shader *shader, nir_variable_mode modes)
+nir_lower_indirect_derefs(nir_shader *shader, nir_variable_mode modes,
+                          uint32_t max_lower_array_len)
 {
    bool progress = false;
 
    nir_foreach_function(function, shader) {
-      if (function->impl)
-         progress = lower_indirects_impl(function->impl, modes) || progress;
+      if (function->impl) {
+         progress = lower_indirects_impl(function->impl, modes,
+                                         max_lower_array_len) || progress;
+      }
    }
 
    return progress;
index afdb7c7de748ea9b6b842ab7deca04d7f0e7adf6..6533dc9580da908b82ee28a79a9c2c4c93b34d91 100644 (file)
@@ -757,7 +757,7 @@ tu_shader_create(struct tu_device *dev,
     * a global BO, they can be directly accessed via stg and ldg.
     * nir_lower_indirect_derefs will instead generate a big if-ladder which
     * isn't *incorrect* but is much less efficient. */
-   NIR_PASS_V(nir, nir_lower_indirect_derefs, nir_var_shader_in | nir_var_shader_out);
+   NIR_PASS_V(nir, nir_lower_indirect_derefs, nir_var_shader_in | nir_var_shader_out, UINT32_MAX);
 
    NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
 
index b09a720e43cb4992f13bec6977f870de5d8cf736..e48c048db5372ef0717a08a818eae3948d26ddbd 100644 (file)
@@ -1093,7 +1093,7 @@ etna_compile_shader_nir(struct etna_shader_variant *v)
 
    NIR_PASS_V(s, nir_lower_regs_to_ssa);
    NIR_PASS_V(s, nir_lower_vars_to_ssa);
-   NIR_PASS_V(s, nir_lower_indirect_derefs, nir_var_all);
+   NIR_PASS_V(s, nir_lower_indirect_derefs, nir_var_all, UINT32_MAX);
    NIR_PASS_V(s, nir_lower_tex, &(struct nir_lower_tex_options) { .lower_txp = ~0u });
    NIR_PASS_V(s, nir_lower_alu_to_scalar, etna_alu_to_scalar_filter_cb, specs);
 
index 88136861871ff1ed4e5a512cd6f325cd81aeabd1..58e99f0943bffe7889c102b78b6e6b8f5c9e3f64 100644 (file)
@@ -113,7 +113,7 @@ ir2_optimize_nir(nir_shader *s, bool lower)
 
        OPT_V(s, nir_lower_regs_to_ssa);
        OPT_V(s, nir_lower_vars_to_ssa);
-       OPT_V(s, nir_lower_indirect_derefs, nir_var_shader_in | nir_var_shader_out);
+       OPT_V(s, nir_lower_indirect_derefs, nir_var_shader_in | nir_var_shader_out, UINT32_MAX);
 
        if (lower) {
                OPT_V(s, ir3_nir_apply_trig_workarounds);
index ea71eb499e12af7242eba68faf76204e36e3157e..037fe780d37b3e536b0b3038108f75b8a7949078 100644 (file)
@@ -763,7 +763,7 @@ brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir,
 
    nir_variable_mode indirect_mask =
       brw_nir_no_indirect_mask(compiler, nir->info.stage);
-   OPT(nir_lower_indirect_derefs, indirect_mask);
+   OPT(nir_lower_indirect_derefs, indirect_mask, UINT32_MAX);
 
    /* Lower array derefs of vectors for SSBO and UBO loads.  For both UBOs and
     * SSBOs, our back-end is capable of loading an entire vec4 at a time and
@@ -813,9 +813,11 @@ brw_nir_link_shaders(const struct brw_compiler *compiler,
        * varyings we have demoted here.
        */
       NIR_PASS_V(producer, nir_lower_indirect_derefs,
-                 brw_nir_no_indirect_mask(compiler, producer->info.stage));
+                 brw_nir_no_indirect_mask(compiler, producer->info.stage),
+                 UINT32_MAX);
       NIR_PASS_V(consumer, nir_lower_indirect_derefs,
-                 brw_nir_no_indirect_mask(compiler, consumer->info.stage));
+                 brw_nir_no_indirect_mask(compiler, consumer->info.stage),
+                 UINT32_MAX);
 
       brw_nir_optimize(producer, compiler, p_is_scalar, false);
       brw_nir_optimize(consumer, compiler, c_is_scalar, false);