i965/vs: Pack live uniform vectors together in the push constant upload.

author Eric Anholt <eric@anholt.net>

Tue, 23 Aug 2011 19:13:14 +0000 (12:13 -0700)

committer Eric Anholt <eric@anholt.net>

Tue, 30 Aug 2011 19:09:40 +0000 (12:09 -0700)
author Eric Anholt <eric@anholt.net>
Tue, 23 Aug 2011 19:13:14 +0000 (12:13 -0700)
committer Eric Anholt <eric@anholt.net>
Tue, 30 Aug 2011 19:09:40 +0000 (12:09 -0700)
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp

index cdbbb5579609c374078580da80033a9ca12ac480..9d64a4009d1e934f9d9b6f9d777b886e007633c0 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -188,4 +188,93 @@ vec4_visitor::split_uniform_registers()
     }
  }
  
+void
+vec4_visitor::pack_uniform_registers()
+{
+   bool uniform_used[this->uniforms];
+   int new_loc[this->uniforms];
+   int new_chan[this->uniforms];
+
+   memset(uniform_used, 0, sizeof(uniform_used));
+   memset(new_loc, 0, sizeof(new_loc));
+   memset(new_chan, 0, sizeof(new_chan));
+
+   /* Find which uniform vectors are actually used by the program.  We
+    * expect unused vector elements when we've moved array access out
+    * to pull constants, and from some GLSL code generators like wine.
+    */
+   foreach_list(node, &this->instructions) {
+      vec4_instruction *inst = (vec4_instruction *)node;
+
+      for (int i = 0 ; i < 3; i++) {
+        if (inst->src[i].file != UNIFORM)
+           continue;
+
+        uniform_used[inst->src[i].reg] = true;
+      }
+   }
+
+   int new_uniform_count = 0;
+
+   /* Now, figure out a packing of the live uniform vectors into our
+    * push constants.
+    */
+   for (int src = 0; src < uniforms; src++) {
+      int size = this->uniform_vector_size[src];
+
+      if (!uniform_used[src]) {
+        this->uniform_vector_size[src] = 0;
+        continue;
+      }
+
+      int dst;
+      /* Find the lowest place we can slot this uniform in. */
+      for (dst = 0; dst < src; dst++) {
+        if (this->uniform_vector_size[dst] + size <= 4)
+           break;
+      }
+
+      if (src == dst) {
+        new_loc[src] = dst;
+        new_chan[src] = 0;
+      } else {
+        new_loc[src] = dst;
+        new_chan[src] = this->uniform_vector_size[dst];
+
+        /* Move the references to the data */
+        for (int j = 0; j < size; j++) {
+           c->prog_data.param[dst * 4 + new_chan[src] + j] =
+              c->prog_data.param[src * 4 + j];
+        }
+
+        this->uniform_vector_size[dst] += size;
+        this->uniform_vector_size[src] = 0;
+      }
+
+      new_uniform_count = MAX2(new_uniform_count, dst + 1);
+   }
+
+   this->uniforms = new_uniform_count;
+
+   /* Now, update the instructions for our repacked uniforms. */
+   foreach_list(node, &this->instructions) {
+      vec4_instruction *inst = (vec4_instruction *)node;
+
+      for (int i = 0 ; i < 3; i++) {
+        int src = inst->src[i].reg;
+
+        if (inst->src[i].file != UNIFORM)
+           continue;
+
+        inst->src[i].reg = new_loc[src];
+
+        int sx = BRW_GET_SWZ(inst->src[i].swizzle, 0) + new_chan[src];
+        int sy = BRW_GET_SWZ(inst->src[i].swizzle, 1) + new_chan[src];
+        int sz = BRW_GET_SWZ(inst->src[i].swizzle, 2) + new_chan[src];
+        int sw = BRW_GET_SWZ(inst->src[i].swizzle, 3) + new_chan[src];
+        inst->src[i].swizzle = BRW_SWIZZLE4(sx, sy, sz, sw);
+      }
+   }
+}
+
  } /* namespace brw */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h

index 945eea576b1de9af441c41000665e218371b4b2e..327977357f73ef6030efdbf094168e996f2d25fc 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -389,6 +389,7 @@ public:
     void move_grf_array_access_to_scratch();
     void move_uniform_array_access_to_pull_constants();
     void split_uniform_registers();
+   void pack_uniform_registers();
     void calculate_live_intervals();
     bool dead_code_eliminate();
     bool virtual_grf_interferes(int a, int b);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp

index 49514070f34deba07e7dd2f083f6c0eb1b162140..f084a7f7e4a6b1908a24436ed0a119d298a4b84f 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -128,9 +128,6 @@ vec4_visitor::setup_uniforms(int reg)
        reg += ALIGN(uniforms, 2) / 2;
     }
  
-   /* for now, we are not doing any elimination of unused slots, nor
-    * are we packing our uniforms.
-    */
     c->prog_data.nr_params = this->uniforms * 4;
  
     c->prog_data.curb_read_length = reg - 1;
@@ -607,6 +604,8 @@ vec4_visitor::run()
        progress = dead_code_eliminate() || progress;
     } while (progress);
  
+   pack_uniform_registers();
+
     if (failed)
        return false;
  
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp

index dc11d9883ca30610ae66cd4a4b22190dd13097e8..69399045d854cec7818998672313509f824129f6 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -136,6 +136,12 @@ vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src)
     /* The gen6 math instruction ignores the source modifiers --
      * swizzle, abs, negate, and at least some parts of the register
      * region description.
+    *
+    * While it would seem that this MOV could be avoided at this point
+    * in the case that the swizzle is matched up with the destination
+    * writemask, note that uniform packing and register allocation
+    * could rearrange our swizzle, so let's leave this matter up to
+    * copy propagation later.
      */
     src_reg temp_src = src_reg(this, glsl_type::vec4_type);
     emit(BRW_OPCODE_MOV, dst_reg(temp_src), src);
author	Eric Anholt <eric@anholt.net>
	Tue, 23 Aug 2011 19:13:14 +0000 (12:13 -0700)
committer	Eric Anholt <eric@anholt.net>
	Tue, 30 Aug 2011 19:09:40 +0000 (12:09 -0700)
src/mesa/drivers/dri/i965/brw_vec4.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_vec4.h		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_vec4_emit.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp		patch \| blob \| history