i965/fs: Do more register coalescing by using the interference graph.
authorEric Anholt <eric@anholt.net>
Tue, 8 May 2012 17:18:20 +0000 (10:18 -0700)
committerEric Anholt <eric@anholt.net>
Thu, 17 May 2012 17:05:23 +0000 (10:05 -0700)
By using the live variables code for determining interference, we can
handle coalescing in the presence of control flow, which the other
register coalescing path couldn't.

Total instructions: 207184 -> 206990
74/1246 programs affected (5.9%)
33993 -> 33799 instructions in affected programs (0.6% reduction)

There is a newerth shader that loses out, because of some extra MOVs
that now get their dead-code nature obscured by coalescing.  This
should be fixed by doing better at dead code elimination.

src/mesa/drivers/dri/i965/brw_fs.cpp
src/mesa/drivers/dri/i965/brw_fs.h

index 7176d40bc17d07a6b5e5b59dc968e41778f41ac8..313e720d382a54889afb0f77dd9e21472cb395a4 100644 (file)
@@ -1218,6 +1218,66 @@ fs_visitor::dead_code_eliminate()
    return progress;
 }
 
+/**
+ * Implements a second type of register coalescing: This one checks if
+ * the two regs involved in a raw move don't interfere, in which case
+ * they can both by stored in the same place and the MOV removed.
+ */
+bool
+fs_visitor::register_coalesce_2()
+{
+   bool progress = false;
+
+   calculate_live_intervals();
+
+   foreach_list_safe(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
+
+      if (inst->opcode != BRW_OPCODE_MOV ||
+         inst->predicated ||
+         inst->saturate ||
+         inst->src[0].file != GRF ||
+         inst->src[0].negate ||
+         inst->src[0].abs ||
+         inst->src[0].smear != -1 ||
+         inst->dst.file != GRF ||
+         inst->dst.type != inst->src[0].type ||
+         virtual_grf_sizes[inst->src[0].reg] != 1 ||
+         virtual_grf_interferes(inst->dst.reg, inst->src[0].reg)) {
+        continue;
+      }
+
+      int reg_from = inst->src[0].reg;
+      assert(inst->src[0].reg_offset == 0);
+      int reg_to = inst->dst.reg;
+      int reg_to_offset = inst->dst.reg_offset;
+
+      foreach_list_safe(node, &this->instructions) {
+        fs_inst *scan_inst = (fs_inst *)node;
+
+        if (scan_inst->dst.file == GRF &&
+            scan_inst->dst.reg == reg_from) {
+           scan_inst->dst.reg = reg_to;
+           scan_inst->dst.reg_offset = reg_to_offset;
+        }
+        for (int i = 0; i < 3; i++) {
+           if (scan_inst->src[i].file == GRF &&
+               scan_inst->src[i].reg == reg_from) {
+              scan_inst->src[i].reg = reg_to;
+              scan_inst->src[i].reg_offset = reg_to_offset;
+           }
+        }
+      }
+
+      inst->remove();
+      live_intervals_valid = false;
+      progress = true;
+      continue;
+   }
+
+   return progress;
+}
+
 bool
 fs_visitor::register_coalesce()
 {
@@ -1684,6 +1744,7 @@ fs_visitor::run()
         progress = opt_cse() || progress;
         progress = opt_copy_propagate() || progress;
         progress = register_coalesce() || progress;
+        progress = register_coalesce_2() || progress;
         progress = compute_to_mrf() || progress;
         progress = dead_code_eliminate() || progress;
       } while (progress);
index f04c6fc73510b5352c0e1fb67478a91b8a1d1630..d7fd9a44348ca0393a2e0046a2bef8d23330236e 100644 (file)
@@ -495,6 +495,7 @@ public:
    bool opt_copy_propagate_local(void *mem_ctx, fs_bblock *block,
                                 exec_list *acp);
    bool register_coalesce();
+   bool register_coalesce_2();
    bool compute_to_mrf();
    bool dead_code_eliminate();
    bool remove_dead_constants();