i965/vs: Prepare split_virtual_grfs() for the presence of SENDs from GRFs.

author Eric Anholt <eric@anholt.net>

Tue, 18 Dec 2012 00:48:20 +0000 (16:48 -0800)

committer Eric Anholt <eric@anholt.net>

Thu, 28 Mar 2013 18:46:29 +0000 (11:46 -0700)
author Eric Anholt <eric@anholt.net>
Tue, 18 Dec 2012 00:48:20 +0000 (16:48 -0800)
committer Eric Anholt <eric@anholt.net>
Thu, 28 Mar 2013 18:46:29 +0000 (11:46 -0700)
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp

index 2756b71d9fcc75a6f8d38e1da0a4dab6f9f9bad1..91b72f7f20bbbb8afeeb24abd42a4b99a4b581e8 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -218,6 +218,13 @@ vec4_instruction::is_math()
            opcode == SHADER_OPCODE_INT_REMAINDER ||
            opcode == SHADER_OPCODE_POW);
  }
+
+bool
+vec4_instruction::is_send_from_grf()
+{
+   return false;
+}
+
  /**
   * Returns how many MRFs an opcode will write over.
   *
@@ -878,27 +885,46 @@ vec4_visitor::opt_register_coalesce()
   *
   * We initially create large virtual GRFs for temporary structures, arrays,
   * and matrices, so that the dereference visitor functions can add reg_offsets
- * to work their way down to the actual member being accessed.
+ * to work their way down to the actual member being accessed.  But when it
+ * comes to optimization, we'd like to treat each register as individual
+ * storage if possible.
   *
- * Unlike in the FS visitor, though, we have no SEND messages that return more
- * than 1 register.  We also don't do any array access in register space,
- * which would have required contiguous physical registers.  Thus, all those
- * large virtual GRFs can be split up into independent single-register virtual
- * GRFs, making allocation and optimization easier.
+ * So far, the only thing that might prevent splitting is a send message from
+ * a GRF on IVB.
   */
  void
  vec4_visitor::split_virtual_grfs()
  {
     int num_vars = this->virtual_grf_count;
     int new_virtual_grf[num_vars];
+   bool split_grf[num_vars];
  
     memset(new_virtual_grf, 0, sizeof(new_virtual_grf));
  
+   /* Try to split anything > 0 sized. */
+   for (int i = 0; i < num_vars; i++) {
+      split_grf[i] = this->virtual_grf_sizes[i] != 1;
+   }
+
+   /* Check that the instructions are compatible with the registers we're trying
+    * to split.
+    */
+   foreach_list(node, &this->instructions) {
+      vec4_instruction *inst = (vec4_instruction *)node;
+
+      /* If there's a SEND message loading from a GRF on gen7+, it needs to be
+       * contiguous.  Assume that the GRF for the SEND is always in src[0].
+       */
+      if (inst->is_send_from_grf()) {
+         split_grf[inst->src[0].reg] = false;
+      }
+   }
+
     /* Allocate new space for split regs.  Note that the virtual
      * numbers will be contiguous.
      */
     for (int i = 0; i < num_vars; i++) {
-      if (this->virtual_grf_sizes[i] == 1)
+      if (!split_grf[i])
           continue;
  
        new_virtual_grf[i] = virtual_grf_alloc(1);
@@ -913,21 +939,19 @@ vec4_visitor::split_virtual_grfs()
     foreach_list(node, &this->instructions) {
        vec4_instruction *inst = (vec4_instruction *)node;
  
-      if (inst->dst.file == GRF &&
-         new_virtual_grf[inst->dst.reg] &&
-         inst->dst.reg_offset != 0) {
-        inst->dst.reg = (new_virtual_grf[inst->dst.reg] +
-                         inst->dst.reg_offset - 1);
-        inst->dst.reg_offset = 0;
+      if (inst->dst.file == GRF && split_grf[inst->dst.reg] &&
+          inst->dst.reg_offset != 0) {
+         inst->dst.reg = (new_virtual_grf[inst->dst.reg] +
+                          inst->dst.reg_offset - 1);
+         inst->dst.reg_offset = 0;
        }
        for (int i = 0; i < 3; i++) {
-        if (inst->src[i].file == GRF &&
-            new_virtual_grf[inst->src[i].reg] &&
-            inst->src[i].reg_offset != 0) {
-           inst->src[i].reg = (new_virtual_grf[inst->src[i].reg] +
-                               inst->src[i].reg_offset - 1);
-           inst->src[i].reg_offset = 0;
-        }
+         if (inst->src[i].file == GRF && split_grf[inst->src[i].reg] &&
+             inst->src[i].reg_offset != 0) {
+            inst->src[i].reg = (new_virtual_grf[inst->src[i].reg] +
+                                inst->src[i].reg_offset - 1);
+            inst->src[i].reg_offset = 0;
+         }
        }
     }
     this->live_intervals_valid = false;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h

index 61e18a66a404f132a08d1eddb9d847ab7d28b0c4..38d06d0e535b727fc38137ea005b50e3913ff5ea 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -194,6 +194,7 @@ public:
  
     bool is_tex();
     bool is_math();
+   bool is_send_from_grf();
     bool can_reswizzle_dst(int dst_writemask, int swizzle, int swizzle_mask);
     void reswizzle_dst(int dst_writemask, int swizzle);
  };
author	Eric Anholt <eric@anholt.net>
	Tue, 18 Dec 2012 00:48:20 +0000 (16:48 -0800)
committer	Eric Anholt <eric@anholt.net>
	Thu, 28 Mar 2013 18:46:29 +0000 (11:46 -0700)
src/mesa/drivers/dri/i965/brw_vec4.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_vec4.h		patch \| blob \| history