i965/vec4: Fix copy propagation for non-register-aligned regions.
authorFrancisco Jerez <currojerez@riseup.net>
Fri, 2 Sep 2016 05:31:43 +0000 (22:31 -0700)
committerFrancisco Jerez <currojerez@riseup.net>
Wed, 14 Sep 2016 21:50:59 +0000 (14:50 -0700)
This prevents it from trying to propagate a copy through a
register-misaligned region.  MOV instructions with a misaligned
destination shouldn't be treated as a direct GRF copy, because they
only define the destination GRFs partially.  Also fix the interference
check implemented with is_channel_updated() to consider overlapping
regions with different register offset to interfere, since the
writemask check implemented in the function is only valid under the
assumption that the source and destination regions are aligned
component by component.

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp

index fe76dea79588e57243c2f93f1b21b0ead7e0200e..545f4c7f3e249b65e4ffeee70bd0f6e0f944fef3 100644 (file)
@@ -46,6 +46,7 @@ is_direct_copy(vec4_instruction *inst)
    return (inst->opcode == BRW_OPCODE_MOV &&
           !inst->predicate &&
           inst->dst.file == VGRF &&
+          inst->dst.offset % REG_SIZE == 0 &&
           !inst->dst.reladdr &&
           !inst->src[0].reladdr &&
           (inst->dst.type == inst->src[0].type ||
@@ -73,7 +74,8 @@ is_channel_updated(vec4_instruction *inst, src_reg *values[4], int ch)
       return false;
 
    return regions_overlap(*src, REG_SIZE, inst->dst, inst->size_written) &&
-          inst->dst.writemask & (1 << BRW_GET_SWZ(src->swizzle, ch));
+          (inst->dst.offset != src->offset ||
+           inst->dst.writemask & (1 << BRW_GET_SWZ(src->swizzle, ch)));
 }
 
 static bool
@@ -436,8 +438,9 @@ vec4_visitor::opt_copy_propagation(bool do_constant_prop)
             inst->src[i].reladdr)
            continue;
 
-         /* We only handle single-register copies. */
-         if (inst->size_read(i) != REG_SIZE)
+         /* We only handle register-aligned single GRF copies. */
+         if (inst->size_read(i) != REG_SIZE ||
+             inst->src[i].offset % REG_SIZE)
             continue;
 
          const unsigned reg = (alloc.offsets[inst->src[i].nr] +