i965/fs: Fix rewrite of the second half of 16-wide coalesced registers.
authorFrancisco Jerez <currojerez@riseup.net>
Mon, 13 Jul 2015 11:20:32 +0000 (14:20 +0300)
committerFrancisco Jerez <currojerez@riseup.net>
Wed, 29 Jul 2015 11:12:02 +0000 (14:12 +0300)
The register coalesce pass wasn't rewriting the destination and
sources of instructions that accessed the second half of a coalesced
register previously copied with a 16-wide MOV instruction.  E.g.:

| ADD (16) vgrf0:f, vgrf0:f, 1.0:f
| MOV (16) vgrf1:f, vgrf0:f
| MOV (8)  vgrf2:f, vgrf0+1:f { sechalf }

would get incorrectly register-coalesced into:

| ADD (16) vgrf1:f, vgrf1:f, 1.0:f
| MOV (8)  vgrf2:f, vgrf0+1:f { sechalf }

The reason is that the mov[i] pointer was being left equal to NULL for
every other register.  The fact that we've made it to the rewrite loop
implies that the whole register will be coalesced, so it doesn't seem
right not to update something that uses it depending on whether mov[i]
is NULL or not.  Fixes an amount of texturing and image_load_store
piglit tests on my SIMD-lowering branch.

Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp

index 4f00b7f146ff0810a5235a9f902b37b8549ed68c..20a548000996cf4193266a39fc20d40fe2de4cbd 100644 (file)
@@ -228,7 +228,6 @@ fs_visitor::register_coalesce()
          continue;
 
       progress = true;
-      bool was_load_payload = inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD;
 
       for (int i = 0; i < src_size; i++) {
          if (mov[i]) {
@@ -243,20 +242,18 @@ fs_visitor::register_coalesce()
 
       foreach_block_and_inst(block, fs_inst, scan_inst, cfg) {
          for (int i = 0; i < src_size; i++) {
-            if (mov[i] || was_load_payload) {
-               if (scan_inst->dst.file == GRF &&
-                   scan_inst->dst.reg == reg_from &&
-                   scan_inst->dst.reg_offset == i) {
-                  scan_inst->dst.reg = reg_to;
-                  scan_inst->dst.reg_offset = reg_to_offset[i];
-               }
-               for (int j = 0; j < scan_inst->sources; j++) {
-                  if (scan_inst->src[j].file == GRF &&
-                      scan_inst->src[j].reg == reg_from &&
-                      scan_inst->src[j].reg_offset == i) {
-                     scan_inst->src[j].reg = reg_to;
-                     scan_inst->src[j].reg_offset = reg_to_offset[i];
-                  }
+            if (scan_inst->dst.file == GRF &&
+                scan_inst->dst.reg == reg_from &&
+                scan_inst->dst.reg_offset == i) {
+               scan_inst->dst.reg = reg_to;
+               scan_inst->dst.reg_offset = reg_to_offset[i];
+            }
+            for (int j = 0; j < scan_inst->sources; j++) {
+               if (scan_inst->src[j].file == GRF &&
+                   scan_inst->src[j].reg == reg_from &&
+                   scan_inst->src[j].reg_offset == i) {
+                  scan_inst->src[j].reg = reg_to;
+                  scan_inst->src[j].reg_offset = reg_to_offset[i];
                }
             }
          }