nir: Separate dead write removal into its own pass
authorCaio Marcelo de Oliveira Filho <caio.oliveira@intel.com>
Fri, 27 Jul 2018 20:56:35 +0000 (13:56 -0700)
committerCaio Marcelo de Oliveira Filho <caio.oliveira@intel.com>
Tue, 16 Oct 2018 00:29:46 +0000 (17:29 -0700)
Instead of doing this as part of the existing copy_prop_vars pass.

Separation makes easier to expand the scope of both passes to be more
than per-block.  For copy propagation, the information about valid
copies comes from previous instructions; while the dead write removal
depends on information from later instructions ("have any instruction
used this deref before overwrite it?").

Also change the tests to use this pass (instead of copy prop vars).
Note that the disabled tests continue to fail, since the standalone
pass is still per-block.

v2: Remove entries from dynarray instead of marking items as
    deleted.  Use foreach_reverse. (Caio)

    (all from Jason)
    Do not cache nir_deref_path.  Not worthy for this patch.
    Clear unused writes when hitting a call instruction.
    Clean up enumeration of modes for barriers.
    Move metadata calls to the inner function.

v3: For copies, use the vector length to calculate the mask.

    (all from Jason)
    Use nir_component_mask_t when applicable.
    Rename functions for clarity.
    Consider local vars used by a call to be conservative (SPIR-V has
    such cases).
    Comment and assert the assumption that stores and copies are
    always to a deref that ends with a vector or scalar.

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
src/compiler/Makefile.sources
src/compiler/nir/meson.build
src/compiler/nir/nir.h
src/compiler/nir/nir_opt_dead_write_vars.c [new file with mode: 0644]
src/compiler/nir/tests/vars_tests.cpp

index d3b06564832f296363d01382b930a3b4eb2f5b57..b65bb9b80b9e49d2a04696d93721cadca9258ffd 100644 (file)
@@ -274,6 +274,7 @@ NIR_FILES = \
        nir/nir_opt_cse.c \
        nir/nir_opt_dce.c \
        nir/nir_opt_dead_cf.c \
+       nir/nir_opt_dead_write_vars.c \
        nir/nir_opt_find_array_copies.c \
        nir/nir_opt_gcm.c \
        nir/nir_opt_global_to_local.c \
index 1a7fa2d3327f89909df6412a86487fb8fd0cd187..d8f656400045368668d3368bab059de272e869ed 100644 (file)
@@ -158,6 +158,7 @@ files_libnir = files(
   'nir_opt_cse.c',
   'nir_opt_dce.c',
   'nir_opt_dead_cf.c',
+  'nir_opt_dead_write_vars.c',
   'nir_opt_find_array_copies.c',
   'nir_opt_gcm.c',
   'nir_opt_global_to_local.c',
index bb772385c9d5d820a46b78e59967c3d726a354d6..5b871812d4645d2d426524a169fd78af94e17a17 100644 (file)
@@ -3057,6 +3057,8 @@ bool nir_opt_dce(nir_shader *shader);
 
 bool nir_opt_dead_cf(nir_shader *shader);
 
+bool nir_opt_dead_write_vars(nir_shader *shader);
+
 bool nir_opt_find_array_copies(nir_shader *shader);
 
 bool nir_opt_gcm(nir_shader *shader, bool value_number);
diff --git a/src/compiler/nir/nir_opt_dead_write_vars.c b/src/compiler/nir/nir_opt_dead_write_vars.c
new file mode 100644 (file)
index 0000000..dd94999
--- /dev/null
@@ -0,0 +1,223 @@
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+#include "nir_deref.h"
+
+#include "util/u_dynarray.h"
+
+/**
+ * Elimination of dead writes based on derefs.
+ *
+ * Dead writes are stores and copies that write to a deref, which then gets
+ * another write before it was used (read or sourced for a copy).  Those
+ * writes can be removed since they don't affect anything.
+ *
+ * For derefs that refer to a memory area that can be read after the program,
+ * the last write is considered used.  The presence of certain instructions
+ * may also cause writes to be considered used, e.g. memory barrier (in this case
+ * the value must be written as other thread might use it).
+ *
+ * The write mask for store instructions is considered, so it is possible that
+ * a store is removed because of the combination of other stores overwritten
+ * its value.
+ */
+
+/* Entry for unused_writes arrays. */
+struct write_entry {
+   /* If NULL indicates the entry is free to be reused. */
+   nir_intrinsic_instr *intrin;
+   nir_component_mask_t mask;
+   nir_deref_instr *dst;
+};
+
+static void
+clear_unused_for_modes(struct util_dynarray *unused_writes, nir_variable_mode modes)
+{
+   util_dynarray_foreach_reverse(unused_writes, struct write_entry, entry) {
+      nir_variable *var = nir_deref_instr_get_variable(entry->dst);
+      if (var->data.mode & modes)
+         *entry = util_dynarray_pop(unused_writes, struct write_entry);
+   }
+}
+
+static void
+clear_unused_for_read(struct util_dynarray *unused_writes, nir_deref_instr *src)
+{
+   util_dynarray_foreach_reverse(unused_writes, struct write_entry, entry) {
+      if (nir_compare_derefs(src, entry->dst) & nir_derefs_may_alias_bit)
+         *entry = util_dynarray_pop(unused_writes, struct write_entry);
+   }
+}
+
+static bool
+update_unused_writes(struct util_dynarray *unused_writes,
+                     nir_intrinsic_instr *intrin,
+                     nir_deref_instr *dst, nir_component_mask_t mask)
+{
+   bool progress = false;
+
+   /* This pass assumes that destination of copies and stores are derefs that
+    * end in a vector or scalar (it is OK to have wildcards or indirects for
+    * arrays).
+    */
+   assert(glsl_type_is_vector_or_scalar(dst->type));
+
+   /* Find writes that are unused and can be removed. */
+   util_dynarray_foreach_reverse(unused_writes, struct write_entry, entry) {
+      nir_deref_compare_result comp = nir_compare_derefs(dst, entry->dst);
+      if (comp & nir_derefs_a_contains_b_bit) {
+         entry->mask &= ~mask;
+         if (entry->mask == 0) {
+            nir_instr_remove(&entry->intrin->instr);
+            *entry = util_dynarray_pop(unused_writes, struct write_entry);
+            progress = true;
+         }
+      }
+   }
+
+   /* Add the new write to the unused array. */
+   struct write_entry new_entry = {
+      .intrin = intrin,
+      .mask = mask,
+      .dst = dst,
+   };
+
+   util_dynarray_append(unused_writes, struct write_entry, new_entry);
+
+   return progress;
+}
+
+static bool
+remove_dead_write_vars_local(void *mem_ctx, nir_block *block)
+{
+   bool progress = false;
+
+   struct util_dynarray unused_writes;
+   util_dynarray_init(&unused_writes, mem_ctx);
+
+   nir_foreach_instr_safe(instr, block) {
+      if (instr->type == nir_instr_type_call) {
+         clear_unused_for_modes(&unused_writes, nir_var_shader_out |
+                                                nir_var_global |
+                                                nir_var_local |
+                                                nir_var_shader_storage |
+                                                nir_var_shared);
+         continue;
+      }
+
+      if (instr->type != nir_instr_type_intrinsic)
+         continue;
+
+      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+      switch (intrin->intrinsic) {
+      case nir_intrinsic_barrier:
+      case nir_intrinsic_memory_barrier: {
+         clear_unused_for_modes(&unused_writes, nir_var_shader_out |
+                                                nir_var_shader_storage |
+                                                nir_var_shared);
+         break;
+      }
+
+      case nir_intrinsic_emit_vertex:
+      case nir_intrinsic_emit_vertex_with_counter: {
+         clear_unused_for_modes(&unused_writes, nir_var_shader_out);
+         break;
+      }
+
+      case nir_intrinsic_load_deref: {
+         nir_deref_instr *src = nir_src_as_deref(intrin->src[0]);
+         clear_unused_for_read(&unused_writes, src);
+         break;
+      }
+
+      case nir_intrinsic_store_deref: {
+         nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]);
+         nir_component_mask_t mask = nir_intrinsic_write_mask(intrin);
+         progress |= update_unused_writes(&unused_writes, intrin, dst, mask);
+         break;
+      }
+
+      case nir_intrinsic_copy_deref: {
+         nir_deref_instr *src = nir_src_as_deref(intrin->src[1]);
+         nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]);
+
+         /* Self-copy is removed. */
+         if (nir_compare_derefs(src, dst) & nir_derefs_equal_bit) {
+            nir_instr_remove(instr);
+            progress = true;
+            break;
+         }
+
+         clear_unused_for_read(&unused_writes, src);
+         nir_component_mask_t mask = (1 << glsl_get_vector_elements(dst->type)) - 1;
+         progress |= update_unused_writes(&unused_writes, intrin, dst, mask);
+         break;
+      }
+
+      default:
+         break;
+      }
+   }
+
+   /* All unused writes at the end of the block are kept, since we can't be
+    * sure they'll be overwritten or not with local analysis only.
+    */
+
+   return progress;
+}
+
+static bool
+remove_dead_write_vars_impl(void *mem_ctx, nir_function_impl *impl)
+{
+   bool progress = false;
+
+   nir_metadata_require(impl, nir_metadata_block_index);
+
+   nir_foreach_block(block, impl)
+      progress |= remove_dead_write_vars_local(mem_ctx, block);
+
+   if (progress) {
+      nir_metadata_preserve(impl, nir_metadata_block_index |
+                                  nir_metadata_dominance);
+   }
+
+   return progress;
+}
+
+bool
+nir_opt_dead_write_vars(nir_shader *shader)
+{
+   void *mem_ctx = ralloc_context(NULL);
+   bool progress = false;
+
+   nir_foreach_function(function, shader) {
+      if (!function->impl)
+         continue;
+      progress |= remove_dead_write_vars_impl(mem_ctx, function->impl);
+   }
+
+   ralloc_free(mem_ctx);
+   return progress;
+}
index 0a2ff3f3d3291df72b80caf896d7ec5733c1dbd2..eee3ad9ca52476aba55035f1bc6f523a12fea2b8 100644 (file)
@@ -26,9 +26,6 @@
 #include "nir.h"
 #include "nir_builder.h"
 
-/* This optimization is done together with copy propagation. */
-#define nir_opt_dead_write_vars nir_opt_copy_prop_vars
-
 namespace {
 
 class nir_vars_test : public ::testing::Test {