i965/vec4: Rewrite dead code elimination to use live in/out.
authorMatt Turner <mattst88@gmail.com>
Mon, 3 Nov 2014 04:36:53 +0000 (20:36 -0800)
committerMatt Turner <mattst88@gmail.com>
Tue, 2 Dec 2014 00:42:13 +0000 (16:42 -0800)
Improves 359 shaders by >=10%
         114 shaders by >=20%
          91 shaders by >=30%
          82 shaders by >=40%
          22 shaders by >=50%
           4 shaders by >=60%
           2 shaders by >=80%

total instructions in shared programs: 5845346 -> 5822422 (-0.39%)
instructions in affected programs:     364979 -> 342055 (-6.28%)

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
src/mesa/drivers/dri/i965/Makefile.sources
src/mesa/drivers/dri/i965/brw_vec4.cpp
src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp [new file with mode: 0644]

index 26bf458e183df264958ce94f6e05b504d26bbf8d..f03422e04986a259b5a7f90752da98388c3ab1ba 100644 (file)
@@ -103,6 +103,7 @@ i965_FILES = \
        brw_vec4.cpp \
        brw_vec4_copy_propagation.cpp \
        brw_vec4_cse.cpp \
+       brw_vec4_dead_code_eliminate.cpp \
        brw_vec4_generator.cpp \
        brw_vec4_gs_visitor.cpp \
        brw_vec4_live_variables.cpp \
index 534b4b0668f9e891f199e5d32d998f790477f9da..4d893e15dca7077398bf96cf0f6a6e12b5fe47fb 100644 (file)
@@ -429,161 +429,6 @@ vec4_visitor::opt_reduce_swizzle()
    return progress;
 }
 
-static bool
-try_eliminate_instruction(vec4_instruction *inst, int new_writemask,
-                          const struct brw_context *brw)
-{
-   if (inst->has_side_effects())
-      return false;
-
-   if (new_writemask == 0) {
-      /* Don't dead code eliminate instructions that write to the
-       * accumulator as a side-effect. Instead just set the destination
-       * to the null register to free it.
-       */
-      if (inst->writes_accumulator || inst->writes_flag()) {
-         inst->dst = dst_reg(retype(brw_null_reg(), inst->dst.type));
-      } else {
-         inst->opcode = BRW_OPCODE_NOP;
-      }
-
-      return true;
-   } else if (inst->dst.writemask != new_writemask) {
-      switch (inst->opcode) {
-      case SHADER_OPCODE_TXF_CMS:
-      case SHADER_OPCODE_GEN4_SCRATCH_READ:
-      case VS_OPCODE_PULL_CONSTANT_LOAD:
-      case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
-         break;
-      default:
-         /* Do not set a writemask on Gen6 for math instructions, those are
-          * executed using align1 mode that does not support a destination mask.
-          */
-         if (!(brw->gen == 6 && inst->is_math()) && !inst->is_tex()) {
-            inst->dst.writemask = new_writemask;
-            return true;
-         }
-      }
-   }
-
-   return false;
-}
-
-/**
- * Must be called after calculate_live_intervals() to remove unused
- * writes to registers -- register allocation will fail otherwise
- * because something deffed but not used won't be considered to
- * interfere with other regs.
- */
-bool
-vec4_visitor::dead_code_eliminate()
-{
-   bool progress = false;
-   int pc = -1;
-
-   calculate_live_intervals();
-
-   foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
-      pc++;
-
-      bool inst_writes_flag = false;
-      if (inst->dst.file != GRF) {
-         if (inst->dst.is_null() && inst->writes_flag()) {
-            inst_writes_flag = true;
-         } else {
-            continue;
-         }
-      }
-
-      if (inst->dst.file == GRF) {
-         int write_mask = inst->dst.writemask;
-
-         for (int c = 0; c < 4; c++) {
-            if (write_mask & (1 << c)) {
-               assert(this->virtual_grf_end[inst->dst.reg * 4 + c] >= pc);
-               if (this->virtual_grf_end[inst->dst.reg * 4 + c] == pc) {
-                  write_mask &= ~(1 << c);
-               }
-            }
-         }
-
-         progress = try_eliminate_instruction(inst, write_mask, brw) ||
-                    progress;
-      }
-
-      if (inst->predicate || inst->prev == NULL)
-         continue;
-
-      int dead_channels;
-      if (inst_writes_flag) {
-/* Arbitrarily chosen, other than not being an xyzw writemask. */
-#define FLAG_WRITEMASK (1 << 5)
-         dead_channels = inst->reads_flag() ? 0 : FLAG_WRITEMASK;
-      } else {
-         dead_channels = inst->dst.writemask;
-
-         for (int i = 0; i < 3; i++) {
-            if (inst->src[i].file != GRF ||
-                inst->src[i].reg != inst->dst.reg)
-                  continue;
-
-            for (int j = 0; j < 4; j++) {
-               int swiz = BRW_GET_SWZ(inst->src[i].swizzle, j);
-               dead_channels &= ~(1 << swiz);
-            }
-         }
-      }
-
-      foreach_inst_in_block_reverse_starting_from(vec4_instruction, scan_inst,
-                                                  inst, block) {
-         if (dead_channels == 0)
-            break;
-
-         if (inst_writes_flag) {
-            if (scan_inst->dst.is_null() && scan_inst->writes_flag()) {
-               scan_inst->opcode = BRW_OPCODE_NOP;
-               progress = true;
-               continue;
-            } else if (scan_inst->reads_flag()) {
-               break;
-            }
-         }
-
-         if (inst->dst.file == scan_inst->dst.file &&
-             inst->dst.reg == scan_inst->dst.reg &&
-             inst->dst.reg_offset == scan_inst->dst.reg_offset) {
-            int new_writemask = scan_inst->dst.writemask & ~dead_channels;
-
-            progress = try_eliminate_instruction(scan_inst, new_writemask, brw) ||
-                       progress;
-         }
-
-         for (int i = 0; i < 3; i++) {
-            if (scan_inst->src[i].file != inst->dst.file ||
-                scan_inst->src[i].reg != inst->dst.reg)
-               continue;
-
-            for (int j = 0; j < 4; j++) {
-               int swiz = BRW_GET_SWZ(scan_inst->src[i].swizzle, j);
-               dead_channels &= ~(1 << swiz);
-            }
-         }
-      }
-   }
-
-   if (progress) {
-      foreach_block_and_inst_safe (block, backend_instruction, inst, cfg) {
-         if (inst->opcode == BRW_OPCODE_NOP) {
-            inst->remove(block);
-         }
-      }
-
-      invalidate_live_intervals();
-   }
-
-   return progress;
-}
-
 void
 vec4_visitor::split_uniform_registers()
 {
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp
new file mode 100644 (file)
index 0000000..b8370ba
--- /dev/null
@@ -0,0 +1,169 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "brw_vec4.h"
+#include "brw_vec4_live_variables.h"
+#include "brw_cfg.h"
+
+/** @file brw_vec4_dead_code_eliminate.cpp
+ *
+ * Dataflow-aware dead code elimination.
+ *
+ * Walks the instruction list from the bottom, removing instructions that
+ * have results that both aren't used in later blocks and haven't been read
+ * yet in the tail end of this block.
+ */
+
+using namespace brw;
+
+static bool
+can_do_writemask(const struct brw_context *brw,
+                 const vec4_instruction *inst)
+{
+   switch (inst->opcode) {
+   case SHADER_OPCODE_GEN4_SCRATCH_READ:
+   case VS_OPCODE_PULL_CONSTANT_LOAD:
+   case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
+      return false;
+   default:
+      /* The MATH instruction on Gen6 only executes in align1 mode, which does
+       * not support writemasking.
+       */
+      if (brw->gen == 6 && inst->is_math())
+         return false;
+
+      if (inst->is_tex())
+         return false;
+
+      return true;
+   }
+}
+
+bool
+vec4_visitor::dead_code_eliminate()
+{
+   bool progress = false;
+
+   calculate_live_intervals();
+
+   int num_vars = live_intervals->num_vars;
+   BITSET_WORD *live = ralloc_array(NULL, BITSET_WORD, BITSET_WORDS(num_vars));
+   BITSET_WORD *flag_live = ralloc_array(NULL, BITSET_WORD, 1);
+
+   foreach_block(block, cfg) {
+      memcpy(live, live_intervals->block_data[block->num].liveout,
+             sizeof(BITSET_WORD) * BITSET_WORDS(num_vars));
+      memcpy(flag_live, live_intervals->block_data[block->num].flag_liveout,
+             sizeof(BITSET_WORD));
+
+      foreach_inst_in_block_reverse(vec4_instruction, inst, block) {
+         if (inst->dst.file == GRF && !inst->has_side_effects()) {
+            bool result_live[4] = { false };
+
+            for (int c = 0; c < 4; c++) {
+               int var = inst->dst.reg * 4 + c;
+               result_live[c] = BITSET_TEST(live, var);
+            }
+
+            /* If the instruction can't do writemasking, then it's all or
+             * nothing.
+             */
+            if (!can_do_writemask(brw, inst)) {
+               bool result = result_live[0] | result_live[1] |
+                             result_live[2] | result_live[3];
+               result_live[0] = result;
+               result_live[1] = result;
+               result_live[2] = result;
+               result_live[3] = result;
+            }
+
+            for (int c = 0; c < 4; c++) {
+               if (!result_live[c] && inst->dst.writemask & (1 << c)) {
+                  inst->dst.writemask &= ~(1 << c);
+                  progress = true;
+
+                  if (inst->dst.writemask == 0) {
+                     if (inst->writes_accumulator) {
+                        inst->dst = dst_reg(retype(brw_null_reg(), inst->dst.type));
+                     } else {
+                        inst->opcode = BRW_OPCODE_NOP;
+                        continue;
+                     }
+                  }
+               }
+            }
+         }
+
+         if (inst->dst.is_null() && inst->writes_flag()) {
+            if (!BITSET_TEST(flag_live, 0)) {
+               inst->opcode = BRW_OPCODE_NOP;
+               progress = true;
+               continue;
+            }
+         }
+
+         if (inst->dst.file == GRF && !inst->predicate) {
+            for (int c = 0; c < 4; c++) {
+               if (inst->dst.writemask & (1 << c)) {
+                  int var = inst->dst.reg * 4 + c;
+                  BITSET_CLEAR(live, var);
+               }
+            }
+         }
+
+         if (inst->writes_flag()) {
+            BITSET_CLEAR(flag_live, 0);
+         }
+
+         for (int i = 0; i < 3; i++) {
+            if (inst->src[i].file == GRF) {
+               for (int c = 0; c < 4; c++) {
+                  int swiz = BRW_GET_SWZ(inst->src[i].swizzle, c);
+                  int var = inst->src[i].reg * 4 + swiz;
+
+                  BITSET_SET(live, var);
+               }
+            }
+         }
+
+         if (inst->reads_flag()) {
+            BITSET_SET(flag_live, 0);
+         }
+      }
+   }
+
+   ralloc_free(live);
+   ralloc_free(flag_live);
+
+   if (progress) {
+      foreach_block_and_inst_safe(block, backend_instruction, inst, cfg) {
+         if (inst->opcode == BRW_OPCODE_NOP) {
+            inst->remove(block);
+         }
+      }
+
+      invalidate_live_intervals();
+   }
+
+   return progress;
+}