i965/vec4: Reswizzle sources when necessary.
authorMatt Turner <mattst88@gmail.com>
Sun, 31 Aug 2014 18:10:02 +0000 (11:10 -0700)
committerMatt Turner <mattst88@gmail.com>
Fri, 5 Sep 2014 17:22:06 +0000 (10:22 -0700)
Despite the comment above the function claiming otherwise, the function
did not reswizzle sources, which would lead to bad code generation since
commit 04895f5c, which began claiming we could do such swizzling when we
could not.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=82932
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
src/mesa/drivers/dri/i965/brw_vec4.cpp
src/mesa/drivers/dri/i965/brw_vec4.h

index 5d8f711a100347bde9b48166565bfef8623cdc5c..dcfa5940f9a1fb55e66d32ca33cfa158cd00e742 100644 (file)
@@ -933,9 +933,9 @@ vec4_visitor::opt_set_dependency_control()
 }
 
 bool
-vec4_instruction::can_reswizzle_dst(int dst_writemask,
-                                    int swizzle,
-                                    int swizzle_mask)
+vec4_instruction::can_reswizzle(int dst_writemask,
+                                int swizzle,
+                                int swizzle_mask)
 {
    /* If this instruction sets anything not referenced by swizzle, then we'd
     * totally break it when we reswizzle.
@@ -977,9 +977,10 @@ vec4_instruction::can_reswizzle_dst(int dst_writemask,
  * e.g. for swizzle=yywx, MUL a.xy b c -> MUL a.yy_x b.yy z.yy_x
  */
 void
-vec4_instruction::reswizzle_dst(int dst_writemask, int swizzle)
+vec4_instruction::reswizzle(int dst_writemask, int swizzle)
 {
    int new_writemask = 0;
+   int new_swizzle[4] = { 0 };
 
    switch (opcode) {
    default:
@@ -996,6 +997,19 @@ vec4_instruction::reswizzle_dst(int dst_writemask, int swizzle)
          }
          break;
       }
+
+      for (int i = 0; i < 3; i++) {
+         if (src[i].file == BAD_FILE || src[i].file == IMM)
+            continue;
+
+         for (int c = 0; c < 4; c++) {
+            new_swizzle[c] = BRW_GET_SWZ(src[i].swizzle, BRW_GET_SWZ(swizzle, c));
+         }
+
+         src[i].swizzle = BRW_SWIZZLE4(new_swizzle[0], new_swizzle[1],
+                                       new_swizzle[2], new_swizzle[3]);
+      }
+
       /* fallthrough */
    case BRW_OPCODE_DP4:
    case BRW_OPCODE_DP3:
@@ -1102,9 +1116,9 @@ vec4_visitor::opt_register_coalesce()
             }
 
             /* If we can't handle the swizzle, bail. */
-            if (!scan_inst->can_reswizzle_dst(inst->dst.writemask,
-                                              inst->src[0].swizzle,
-                                              swizzle_mask)) {
+            if (!scan_inst->can_reswizzle(inst->dst.writemask,
+                                          inst->src[0].swizzle,
+                                          swizzle_mask)) {
                break;
             }
 
@@ -1190,8 +1204,8 @@ vec4_visitor::opt_register_coalesce()
            if (scan_inst->dst.file == GRF &&
                scan_inst->dst.reg == inst->src[0].reg &&
                scan_inst->dst.reg_offset == inst->src[0].reg_offset) {
-               scan_inst->reswizzle_dst(inst->dst.writemask,
-                                        inst->src[0].swizzle);
+               scan_inst->reswizzle(inst->dst.writemask,
+                                    inst->src[0].swizzle);
               scan_inst->dst.file = inst->dst.file;
               scan_inst->dst.reg = inst->dst.reg;
               scan_inst->dst.reg_offset = inst->dst.reg_offset;
index c2421b04e3b2f9112325217d00ca9211d2d8415a..186667c62e51311ac3aa312e2fe7287c7b9b0687 100644 (file)
@@ -220,8 +220,8 @@ public:
    bool header_present;
 
    bool is_send_from_grf();
-   bool can_reswizzle_dst(int dst_writemask, int swizzle, int swizzle_mask);
-   void reswizzle_dst(int dst_writemask, int swizzle);
+   bool can_reswizzle(int dst_writemask, int swizzle, int swizzle_mask);
+   void reswizzle(int dst_writemask, int swizzle);
    bool can_do_source_mods(struct brw_context *brw);
 
    bool reads_flag()