From: Francisco Jerez Date: Fri, 2 Sep 2016 05:31:43 +0000 (-0700) Subject: i965/vec4: Fix copy propagation for non-register-aligned regions. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=8531f943d9aac13489a02e5a5b4bfa381c465a44;p=mesa.git i965/vec4: Fix copy propagation for non-register-aligned regions. This prevents it from trying to propagate a copy through a register-misaligned region. MOV instructions with a misaligned destination shouldn't be treated as a direct GRF copy, because they only define the destination GRFs partially. Also fix the interference check implemented with is_channel_updated() to consider overlapping regions with different register offset to interfere, since the writemask check implemented in the function is only valid under the assumption that the source and destination regions are aligned component by component. Reviewed-by: Iago Toral Quiroga --- diff --git a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp index fe76dea7958..545f4c7f3e2 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp @@ -46,6 +46,7 @@ is_direct_copy(vec4_instruction *inst) return (inst->opcode == BRW_OPCODE_MOV && !inst->predicate && inst->dst.file == VGRF && + inst->dst.offset % REG_SIZE == 0 && !inst->dst.reladdr && !inst->src[0].reladdr && (inst->dst.type == inst->src[0].type || @@ -73,7 +74,8 @@ is_channel_updated(vec4_instruction *inst, src_reg *values[4], int ch) return false; return regions_overlap(*src, REG_SIZE, inst->dst, inst->size_written) && - inst->dst.writemask & (1 << BRW_GET_SWZ(src->swizzle, ch)); + (inst->dst.offset != src->offset || + inst->dst.writemask & (1 << BRW_GET_SWZ(src->swizzle, ch))); } static bool @@ -436,8 +438,9 @@ vec4_visitor::opt_copy_propagation(bool do_constant_prop) inst->src[i].reladdr) continue; - /* We only handle single-register copies. */ - if (inst->size_read(i) != REG_SIZE) + /* We only handle register-aligned single GRF copies. */ + if (inst->size_read(i) != REG_SIZE || + inst->src[i].offset % REG_SIZE) continue; const unsigned reg = (alloc.offsets[inst->src[i].nr] +