i965/fs: Split fs_visitor::register_coalesce() into its own file.
authorMatt Turner <mattst88@gmail.com>
Sat, 8 Feb 2014 02:17:03 +0000 (18:17 -0800)
committerMatt Turner <mattst88@gmail.com>
Sat, 5 Apr 2014 16:47:37 +0000 (09:47 -0700)
The function has gotten large, and brw_fs.cpp is the largest source file
in the driver.

Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
src/mesa/drivers/dri/i965/Makefile.sources
src/mesa/drivers/dri/i965/brw_fs.cpp
src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp [new file with mode: 0644]

index dfb88e281e170870addac81c40e6a01228790eb1..8205fe9baa106e3a6cf2347261cbcd29d8a132fa 100644 (file)
@@ -63,6 +63,7 @@ i965_FILES = \
        brw_fs_live_variables.cpp \
        brw_fs_peephole_predicated_break.cpp \
        brw_fs_reg_allocate.cpp \
+       brw_fs_register_coalesce.cpp \
        brw_fs_saturate_propagation.cpp \
        brw_fs_sel_peephole.cpp \
        brw_fs_vector_splitting.cpp \
index fd9fba73d5092211763ee57be52c2dc25f560e80..ad9c1f5eb0124c218af889ba0c743bf98eda9be3 100644 (file)
@@ -2287,187 +2287,6 @@ fs_visitor::dead_code_eliminate_local()
    return progress;
 }
 
-/**
- * Implements register coalescing: Checks if the two registers involved in a
- * raw move don't interfere, in which case they can both be stored in the same
- * place and the MOV removed.
- *
- * To do this, all uses of the source of the MOV in the shader are replaced
- * with the destination of the MOV. For example:
- *
- * add vgrf3:F, vgrf1:F, vgrf2:F
- * mov vgrf4:F, vgrf3:F
- * mul vgrf5:F, vgrf5:F, vgrf4:F
- *
- * becomes
- *
- * add vgrf4:F, vgrf1:F, vgrf2:F
- * mul vgrf5:F, vgrf5:F, vgrf4:F
- */
-bool
-fs_visitor::register_coalesce()
-{
-   bool progress = false;
-
-   calculate_live_intervals();
-
-   int src_size = 0;
-   int channels_remaining = 0;
-   int reg_from = -1, reg_to = -1;
-   int reg_to_offset[MAX_SAMPLER_MESSAGE_SIZE];
-   fs_inst *mov[MAX_SAMPLER_MESSAGE_SIZE];
-
-   foreach_list(node, &this->instructions) {
-      fs_inst *inst = (fs_inst *)node;
-
-      if (inst->opcode != BRW_OPCODE_MOV ||
-         inst->is_partial_write() ||
-         inst->saturate ||
-         inst->src[0].file != GRF ||
-         inst->src[0].negate ||
-         inst->src[0].abs ||
-         !inst->src[0].is_contiguous() ||
-         inst->dst.file != GRF ||
-         inst->dst.type != inst->src[0].type) {
-        continue;
-      }
-
-      if (virtual_grf_sizes[inst->src[0].reg] >
-          virtual_grf_sizes[inst->dst.reg])
-         continue;
-
-      int var_from = live_intervals->var_from_reg(&inst->src[0]);
-      int var_to = live_intervals->var_from_reg(&inst->dst);
-
-      if (live_intervals->vars_interfere(var_from, var_to) &&
-          !inst->dst.equals(inst->src[0])) {
-
-         /* We know that the live ranges of A (var_from) and B (var_to)
-          * interfere because of the ->vars_interfere() call above. If the end
-          * of B's live range is after the end of A's range, then we know two
-          * things:
-          *  - the start of B's live range must be in A's live range (since we
-          *    already know the two ranges interfere, this is the only remaining
-          *    possibility)
-          *  - the interference isn't of the form we're looking for (where B is
-          *    entirely inside A)
-          */
-         if (live_intervals->end[var_to] > live_intervals->end[var_from])
-            continue;
-
-         bool overwritten = false;
-         int scan_ip = -1;
-
-         foreach_list(n, &this->instructions) {
-            fs_inst *scan_inst = (fs_inst *)n;
-            scan_ip++;
-
-            if (scan_inst->is_control_flow()) {
-               overwritten = true;
-               break;
-            }
-
-            if (scan_ip <= live_intervals->start[var_to])
-               continue;
-
-            if (scan_ip > live_intervals->end[var_to])
-               break;
-
-            if (scan_inst->dst.equals(inst->dst) ||
-                scan_inst->dst.equals(inst->src[0])) {
-               overwritten = true;
-               break;
-            }
-         }
-
-         if (overwritten)
-            continue;
-      }
-
-      if (reg_from != inst->src[0].reg) {
-         reg_from = inst->src[0].reg;
-
-         src_size = virtual_grf_sizes[inst->src[0].reg];
-         assert(src_size <= MAX_SAMPLER_MESSAGE_SIZE);
-
-         channels_remaining = src_size;
-         memset(mov, 0, sizeof(mov));
-
-         reg_to = inst->dst.reg;
-      }
-
-      if (reg_to != inst->dst.reg)
-         continue;
-
-      const int offset = inst->src[0].reg_offset;
-      reg_to_offset[offset] = inst->dst.reg_offset;
-      mov[offset] = inst;
-      channels_remaining--;
-
-      if (channels_remaining)
-         continue;
-
-      bool removed = false;
-      for (int i = 0; i < src_size; i++) {
-         if (mov[i]) {
-            removed = true;
-
-            mov[i]->opcode = BRW_OPCODE_NOP;
-            mov[i]->conditional_mod = BRW_CONDITIONAL_NONE;
-            mov[i]->dst = reg_undef;
-            mov[i]->src[0] = reg_undef;
-            mov[i]->src[1] = reg_undef;
-            mov[i]->src[2] = reg_undef;
-         }
-      }
-
-      foreach_list(node, &this->instructions) {
-         fs_inst *scan_inst = (fs_inst *)node;
-
-         for (int i = 0; i < src_size; i++) {
-            if (mov[i]) {
-               if (scan_inst->dst.file == GRF &&
-                   scan_inst->dst.reg == reg_from &&
-                   scan_inst->dst.reg_offset == i) {
-                  scan_inst->dst.reg = reg_to;
-                  scan_inst->dst.reg_offset = reg_to_offset[i];
-               }
-               for (int j = 0; j < 3; j++) {
-                  if (scan_inst->src[j].file == GRF &&
-                      scan_inst->src[j].reg == reg_from &&
-                      scan_inst->src[j].reg_offset == i) {
-                     scan_inst->src[j].reg = reg_to;
-                     scan_inst->src[j].reg_offset = reg_to_offset[i];
-                  }
-               }
-            }
-         }
-      }
-
-      if (removed) {
-         live_intervals->start[var_to] = MIN2(live_intervals->start[var_to],
-                                              live_intervals->start[var_from]);
-         live_intervals->end[var_to] = MAX2(live_intervals->end[var_to],
-                                            live_intervals->end[var_from]);
-         reg_from = -1;
-      }
-   }
-
-   foreach_list_safe(node, &this->instructions) {
-      fs_inst *inst = (fs_inst *)node;
-
-      if (inst->opcode == BRW_OPCODE_NOP) {
-         inst->remove();
-         progress = true;
-      }
-   }
-
-   if (progress)
-      invalidate_live_intervals();
-
-   return progress;
-}
-
 bool
 fs_visitor::compute_to_mrf()
 {
diff --git a/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp b/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp
new file mode 100644 (file)
index 0000000..74b22b9
--- /dev/null
@@ -0,0 +1,208 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/** @file brw_fs_register_coalesce.cpp
+ *
+ * Implements register coalescing: Checks if the two registers involved in a
+ * raw move don't interfere, in which case they can both be stored in the same
+ * place and the MOV removed.
+ *
+ * To do this, all uses of the source of the MOV in the shader are replaced
+ * with the destination of the MOV. For example:
+ *
+ * add vgrf3:F, vgrf1:F, vgrf2:F
+ * mov vgrf4:F, vgrf3:F
+ * mul vgrf5:F, vgrf5:F, vgrf4:F
+ *
+ * becomes
+ *
+ * add vgrf4:F, vgrf1:F, vgrf2:F
+ * mul vgrf5:F, vgrf5:F, vgrf4:F
+ */
+
+#include "brw_fs.h"
+#include "brw_fs_live_variables.h"
+
+bool
+fs_visitor::register_coalesce()
+{
+   bool progress = false;
+
+   calculate_live_intervals();
+
+   int src_size = 0;
+   int channels_remaining = 0;
+   int reg_from = -1, reg_to = -1;
+   int reg_to_offset[MAX_SAMPLER_MESSAGE_SIZE];
+   fs_inst *mov[MAX_SAMPLER_MESSAGE_SIZE];
+
+   foreach_list(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
+
+      if (inst->opcode != BRW_OPCODE_MOV ||
+         inst->is_partial_write() ||
+         inst->saturate ||
+         inst->src[0].file != GRF ||
+         inst->src[0].negate ||
+         inst->src[0].abs ||
+         !inst->src[0].is_contiguous() ||
+         inst->dst.file != GRF ||
+         inst->dst.type != inst->src[0].type) {
+        continue;
+      }
+
+      if (virtual_grf_sizes[inst->src[0].reg] >
+          virtual_grf_sizes[inst->dst.reg])
+         continue;
+
+      int var_from = live_intervals->var_from_reg(&inst->src[0]);
+      int var_to = live_intervals->var_from_reg(&inst->dst);
+
+      if (live_intervals->vars_interfere(var_from, var_to) &&
+          !inst->dst.equals(inst->src[0])) {
+
+         /* We know that the live ranges of A (var_from) and B (var_to)
+          * interfere because of the ->vars_interfere() call above. If the end
+          * of B's live range is after the end of A's range, then we know two
+          * things:
+          *  - the start of B's live range must be in A's live range (since we
+          *    already know the two ranges interfere, this is the only remaining
+          *    possibility)
+          *  - the interference isn't of the form we're looking for (where B is
+          *    entirely inside A)
+          */
+         if (live_intervals->end[var_to] > live_intervals->end[var_from])
+            continue;
+
+         bool overwritten = false;
+         int scan_ip = -1;
+
+         foreach_list(n, &this->instructions) {
+            fs_inst *scan_inst = (fs_inst *)n;
+            scan_ip++;
+
+            if (scan_inst->is_control_flow()) {
+               overwritten = true;
+               break;
+            }
+
+            if (scan_ip <= live_intervals->start[var_to])
+               continue;
+
+            if (scan_ip > live_intervals->end[var_to])
+               break;
+
+            if (scan_inst->dst.equals(inst->dst) ||
+                scan_inst->dst.equals(inst->src[0])) {
+               overwritten = true;
+               break;
+            }
+         }
+
+         if (overwritten)
+            continue;
+      }
+
+      if (reg_from != inst->src[0].reg) {
+         reg_from = inst->src[0].reg;
+
+         src_size = virtual_grf_sizes[inst->src[0].reg];
+         assert(src_size <= MAX_SAMPLER_MESSAGE_SIZE);
+
+         channels_remaining = src_size;
+         memset(mov, 0, sizeof(mov));
+
+         reg_to = inst->dst.reg;
+      }
+
+      if (reg_to != inst->dst.reg)
+         continue;
+
+      const int offset = inst->src[0].reg_offset;
+      reg_to_offset[offset] = inst->dst.reg_offset;
+      mov[offset] = inst;
+      channels_remaining--;
+
+      if (channels_remaining)
+         continue;
+
+      bool removed = false;
+      for (int i = 0; i < src_size; i++) {
+         if (mov[i]) {
+            removed = true;
+
+            mov[i]->opcode = BRW_OPCODE_NOP;
+            mov[i]->conditional_mod = BRW_CONDITIONAL_NONE;
+            mov[i]->dst = reg_undef;
+            mov[i]->src[0] = reg_undef;
+            mov[i]->src[1] = reg_undef;
+            mov[i]->src[2] = reg_undef;
+         }
+      }
+
+      foreach_list(node, &this->instructions) {
+         fs_inst *scan_inst = (fs_inst *)node;
+
+         for (int i = 0; i < src_size; i++) {
+            if (mov[i]) {
+               if (scan_inst->dst.file == GRF &&
+                   scan_inst->dst.reg == reg_from &&
+                   scan_inst->dst.reg_offset == i) {
+                  scan_inst->dst.reg = reg_to;
+                  scan_inst->dst.reg_offset = reg_to_offset[i];
+               }
+               for (int j = 0; j < 3; j++) {
+                  if (scan_inst->src[j].file == GRF &&
+                      scan_inst->src[j].reg == reg_from &&
+                      scan_inst->src[j].reg_offset == i) {
+                     scan_inst->src[j].reg = reg_to;
+                     scan_inst->src[j].reg_offset = reg_to_offset[i];
+                  }
+               }
+            }
+         }
+      }
+
+      if (removed) {
+         live_intervals->start[var_to] = MIN2(live_intervals->start[var_to],
+                                              live_intervals->start[var_from]);
+         live_intervals->end[var_to] = MAX2(live_intervals->end[var_to],
+                                            live_intervals->end[var_from]);
+         reg_from = -1;
+      }
+   }
+
+   foreach_list_safe(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
+
+      if (inst->opcode == BRW_OPCODE_NOP) {
+         inst->remove();
+         progress = true;
+      }
+   }
+
+   if (progress)
+      invalidate_live_intervals();
+
+   return progress;
+}