i965/fs: Simplify interference scan in register coalescing.
authorMatt Turner <mattst88@gmail.com>
Wed, 9 Apr 2014 21:31:10 +0000 (14:31 -0700)
committerMatt Turner <mattst88@gmail.com>
Fri, 18 Apr 2014 16:16:19 +0000 (09:16 -0700)
We were starting at the beginning of the instruction list, rather than
with the MOV instruction itself. This allows us to coalesce after
control flow.

Excluding the shaders from an unreleased title, the shader-db results:

total instructions in shared programs: 1603791 -> 1594215 (-0.60%)
instructions in affected programs:     678772 -> 669196 (-1.41%)
GAINED:                                5
LOST:                                  0

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp

index 01b672f36ddff80e8c1310fca1a0c4afafcf69fe..020d992b15a537a4edc74e7e6ce708763d05e4a0 100644 (file)
@@ -77,7 +77,7 @@ is_coalesce_candidate(const fs_inst *inst, const int *virtual_grf_sizes)
 
 static bool
 can_coalesce_vars(brw::fs_live_variables *live_intervals,
-                  const exec_list *instructions, const fs_inst *inst,
+                  const exec_list *instructions, const fs_inst *inst, int ip,
                   int var_to, int var_from)
 {
    if (!live_intervals->vars_interfere(var_from, var_to))
@@ -96,21 +96,15 @@ can_coalesce_vars(brw::fs_live_variables *live_intervals,
    if (live_intervals->end[var_to] > live_intervals->end[var_from])
       return false;
 
-   int scan_ip = -1;
-
-   foreach_list(n, instructions) {
-      fs_inst *scan_inst = (fs_inst *)n;
-      scan_ip++;
+   assert(ip >= live_intervals->start[var_to]);
 
+   fs_inst *scan_inst;
+   for (scan_inst = (fs_inst *)inst->next;
+        !scan_inst->is_tail_sentinel() && ip <= live_intervals->end[var_to];
+        scan_inst = (fs_inst *)scan_inst->next, ip++) {
       if (scan_inst->is_control_flow())
          return false;
 
-      if (scan_ip <= live_intervals->start[var_to])
-         continue;
-
-      if (scan_ip > live_intervals->end[var_to])
-         break;
-
       if (scan_inst->dst.equals(inst->dst) ||
           scan_inst->dst.equals(inst->src[0]))
          return false;
@@ -133,9 +127,11 @@ fs_visitor::register_coalesce()
    fs_inst *mov[MAX_SAMPLER_MESSAGE_SIZE];
    int var_to[MAX_SAMPLER_MESSAGE_SIZE];
    int var_from[MAX_SAMPLER_MESSAGE_SIZE];
+   int ip = -1;
 
    foreach_list(node, &this->instructions) {
       fs_inst *inst = (fs_inst *)node;
+      ip++;
 
       if (!is_coalesce_candidate(inst, virtual_grf_sizes))
          continue;
@@ -174,7 +170,7 @@ fs_visitor::register_coalesce()
          var_to[i] = live_intervals->var_from_vgrf[reg_to] + reg_to_offset[i];
          var_from[i] = live_intervals->var_from_vgrf[reg_from] + i;
 
-         if (!can_coalesce_vars(live_intervals, &instructions, inst,
+         if (!can_coalesce_vars(live_intervals, &instructions, inst, ip,
                                 var_to[i], var_from[i])) {
             can_coalesce = false;
             reg_from = -1;