i965/vec4: Don't coalesce registers with overlapping writes not matching the MOV...
authorFrancisco Jerez <currojerez@riseup.net>
Fri, 2 Sep 2016 05:08:29 +0000 (22:08 -0700)
committerFrancisco Jerez <currojerez@riseup.net>
Wed, 14 Sep 2016 21:50:58 +0000 (14:50 -0700)
In preparation for adding support for sub-GRF offsets to the VEC4 IR.

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
src/mesa/drivers/dri/i965/brw_vec4.cpp

index d9dbc4c8543b9da31a606429d1a90a3adf237a41..8f8d2623416519a357cd5bc7109251a80383ba01 100644 (file)
@@ -1181,8 +1181,11 @@ vec4_visitor::opt_register_coalesce()
                break;
             }
 
-            /* This doesn't handle coalescing of multiple registers. */
-            if (scan_inst->size_written > REG_SIZE)
+            /* This only handles coalescing of a single register starting at
+             * the source offset of the copy instruction.
+             */
+            if (scan_inst->size_written > REG_SIZE ||
+                scan_inst->dst.offset != inst->src[0].offset)
                break;
 
            /* Mark which channels we found unconditional writes for. */
@@ -1246,8 +1249,7 @@ vec4_visitor::opt_register_coalesce()
         while (scan_inst != inst) {
            if (scan_inst->dst.file == VGRF &&
                 scan_inst->dst.nr == inst->src[0].nr &&
-               scan_inst->dst.offset / REG_SIZE ==
-                 inst->src[0].offset / REG_SIZE) {
+               scan_inst->dst.offset == inst->src[0].offset) {
                scan_inst->reswizzle(inst->dst.writemask,
                                     inst->src[0].swizzle);
               scan_inst->dst.file = inst->dst.file;