i965/fs: Add an optimization pass to remove redundant flags movs.
authorEric Anholt <eric@anholt.net>
Fri, 14 Feb 2014 23:29:01 +0000 (15:29 -0800)
committerEric Anholt <eric@anholt.net>
Thu, 20 Feb 2014 18:15:13 +0000 (10:15 -0800)
We generate steaming piles of these for the centroid workaround, and this
quickly cleans them up.

total instructions in shared programs: 1591228 -> 1590047 (-0.07%)
instructions in affected programs:     26111 -> 24930 (-4.52%)
GAINED:                                0
LOST:                                  0

(Improved apps are l4d2, csgo, and dolphin)

Reviewed-by: Matt Turner <mattst88@gmail.com>
src/mesa/drivers/dri/i965/brw_fs.cpp
src/mesa/drivers/dri/i965/brw_fs.h

index 0fd9e908f58b8d7d8c0e8f60d3b04229e0e90f1a..65f2c808626c22c30a7357d15caf024ad6b6c7bc 100644 (file)
@@ -3304,6 +3304,37 @@ fs_visitor::calculate_register_pressure()
    }
 }
 
+/**
+ * Look for repeated FS_OPCODE_MOV_DISPATCH_TO_FLAGS and drop the later ones.
+ *
+ * The needs_unlit_centroid_workaround ends up producing one of these per
+ * channel of centroid input, so it's good to clean them up.
+ *
+ * An assumption here is that nothing ever modifies the dispatched pixels
+ * value that FS_OPCODE_MOV_DISPATCH_TO_FLAGS reads from, but the hardware
+ * dictates that anyway.
+ */
+void
+fs_visitor::opt_drop_redundant_mov_to_flags()
+{
+   bool flag_mov_found[2] = {false};
+
+   foreach_list_safe(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
+
+      if (inst->is_control_flow()) {
+         memset(flag_mov_found, 0, sizeof(flag_mov_found));
+      } else if (inst->opcode == FS_OPCODE_MOV_DISPATCH_TO_FLAGS) {
+         if (!flag_mov_found[inst->flag_subreg])
+            flag_mov_found[inst->flag_subreg] = true;
+         else
+            inst->remove();
+      } else if (inst->writes_flag()) {
+         flag_mov_found[inst->flag_subreg] = false;
+      }
+   }
+}
+
 bool
 fs_visitor::run()
 {
@@ -3369,6 +3400,8 @@ fs_visitor::run()
       remove_dead_constants();
       setup_pull_constants();
 
+      opt_drop_redundant_mov_to_flags();
+
       bool progress;
       do {
         progress = false;
index 8a596bcaa0e4b1de81f0821b8cb96d83c2880e6e..5058c48bdc8f68a5e6438109c0daef44d140ebbc 100644 (file)
@@ -367,6 +367,7 @@ public:
    bool try_constant_propagate(fs_inst *inst, acp_entry *entry);
    bool opt_copy_propagate_local(void *mem_ctx, bblock_t *block,
                                  exec_list *acp);
+   void opt_drop_redundant_mov_to_flags();
    bool register_coalesce();
    bool compute_to_mrf();
    bool dead_code_eliminate();