i965/vec4: Add a pass to reduce swizzles.
authorMatt Turner <mattst88@gmail.com>
Sun, 17 Aug 2014 22:13:54 +0000 (15:13 -0700)
committerMatt Turner <mattst88@gmail.com>
Tue, 19 Aug 2014 19:37:11 +0000 (12:37 -0700)
total instructions in shared programs: 4344280 -> 4288033 (-1.29%)
instructions in affected programs:     397468 -> 341221 (-14.15%)

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
src/mesa/drivers/dri/i965/brw_vec4.cpp
src/mesa/drivers/dri/i965/brw_vec4.h

index bfb0fba39871f7e6effe03a73d6cf4df92920b2f..5477fe6ab5f4d48504fb4d48d99e192888c84411 100644 (file)
@@ -311,6 +311,103 @@ src_reg::equals(const src_reg &r) const
                  sizeof(fixed_hw_reg)) == 0);
 }
 
+/* Replaces unused channels of a swizzle with channels that are used.
+ *
+ * For instance, this pass transforms
+ *
+ *    mov vgrf4.yz, vgrf5.wxzy
+ *
+ * into
+ *
+ *    mov vgrf4.yz, vgrf5.xxzx
+ *
+ * This eliminates false uses of some channels, letting dead code elimination
+ * remove the instructions that wrote them.
+ */
+bool
+vec4_visitor::opt_reduce_swizzle()
+{
+   bool progress = false;
+
+   foreach_in_list_safe(vec4_instruction, inst, &instructions) {
+      if (inst->dst.file == BAD_FILE || inst->dst.file == HW_REG)
+         continue;
+
+      int swizzle[4];
+
+      /* Determine which channels of the sources are read. */
+      switch (inst->opcode) {
+      case BRW_OPCODE_DP4:
+      case BRW_OPCODE_DPH: /* FINISHME: DPH reads only three channels of src0,
+                            *           but all four of src1.
+                            */
+         swizzle[0] = 0;
+         swizzle[1] = 1;
+         swizzle[2] = 2;
+         swizzle[3] = 3;
+         break;
+      case BRW_OPCODE_DP3:
+         swizzle[0] = 0;
+         swizzle[1] = 1;
+         swizzle[2] = 2;
+         swizzle[3] = -1;
+         break;
+      case BRW_OPCODE_DP2:
+         swizzle[0] = 0;
+         swizzle[1] = 1;
+         swizzle[2] = -1;
+         swizzle[3] = -1;
+         break;
+      default:
+         swizzle[0] = inst->dst.writemask & WRITEMASK_X ? 0 : -1;
+         swizzle[1] = inst->dst.writemask & WRITEMASK_Y ? 1 : -1;
+         swizzle[2] = inst->dst.writemask & WRITEMASK_Z ? 2 : -1;
+         swizzle[3] = inst->dst.writemask & WRITEMASK_W ? 3 : -1;
+         break;
+      }
+
+      /* Resolve unread channels (-1) by assigning them the swizzle of the
+       * first channel that is used.
+       */
+      int first_used_channel = 0;
+      for (int i = 0; i < 4; i++) {
+         if (swizzle[i] != -1) {
+            first_used_channel = swizzle[i];
+            break;
+         }
+      }
+      for (int i = 0; i < 4; i++) {
+         if (swizzle[i] == -1) {
+            swizzle[i] = first_used_channel;
+         }
+      }
+
+      /* Update sources' swizzles. */
+      for (int i = 0; i < 3; i++) {
+         if (inst->src[i].file != GRF &&
+             inst->src[i].file != ATTR &&
+             inst->src[i].file != UNIFORM)
+            continue;
+
+         int swiz[4];
+         for (int j = 0; j < 4; j++) {
+            swiz[j] = BRW_GET_SWZ(inst->src[i].swizzle, swizzle[j]);
+         }
+
+         unsigned new_swizzle = BRW_SWIZZLE4(swiz[0], swiz[1], swiz[2], swiz[3]);
+         if (inst->src[i].swizzle != new_swizzle) {
+            inst->src[i].swizzle = new_swizzle;
+            progress = true;
+         }
+      }
+   }
+
+   if (progress)
+      invalidate_live_intervals();
+
+   return progress;
+}
+
 static bool
 try_eliminate_instruction(vec4_instruction *inst, int new_writemask,
                           const struct brw_context *brw)
@@ -1699,6 +1796,7 @@ vec4_visitor::run()
       iteration++;
       int pass_num = 0;
 
+      OPT(opt_reduce_swizzle);
       OPT(dead_code_eliminate);
       OPT(dead_control_flow_eliminate, this);
       OPT(opt_copy_propagation);
index 517eab13c1b6824e4042cd36c3c9f42f34813bbe..f0239cb998a3a621efaf2b8456ad6f1965fbcb6d 100644 (file)
@@ -380,6 +380,7 @@ public:
    void calculate_live_intervals();
    void invalidate_live_intervals();
    void split_virtual_grfs();
+   bool opt_reduce_swizzle();
    bool dead_code_eliminate();
    bool virtual_grf_interferes(int a, int b);
    bool opt_copy_propagation();