From 170200e0fcb0b16d20bff86e1258e0a1b2034c10 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Mon, 13 Jul 2015 14:20:32 +0300 Subject: [PATCH] i965/fs: Fix rewrite of the second half of 16-wide coalesced registers. The register coalesce pass wasn't rewriting the destination and sources of instructions that accessed the second half of a coalesced register previously copied with a 16-wide MOV instruction. E.g.: | ADD (16) vgrf0:f, vgrf0:f, 1.0:f | MOV (16) vgrf1:f, vgrf0:f | MOV (8) vgrf2:f, vgrf0+1:f { sechalf } would get incorrectly register-coalesced into: | ADD (16) vgrf1:f, vgrf1:f, 1.0:f | MOV (8) vgrf2:f, vgrf0+1:f { sechalf } The reason is that the mov[i] pointer was being left equal to NULL for every other register. The fact that we've made it to the rewrite loop implies that the whole register will be coalesced, so it doesn't seem right not to update something that uses it depending on whether mov[i] is NULL or not. Fixes an amount of texturing and image_load_store piglit tests on my SIMD-lowering branch. Reviewed-by: Jason Ekstrand --- .../dri/i965/brw_fs_register_coalesce.cpp | 27 +++++++++---------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp b/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp index 4f00b7f146f..20a54800099 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp @@ -228,7 +228,6 @@ fs_visitor::register_coalesce() continue; progress = true; - bool was_load_payload = inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD; for (int i = 0; i < src_size; i++) { if (mov[i]) { @@ -243,20 +242,18 @@ fs_visitor::register_coalesce() foreach_block_and_inst(block, fs_inst, scan_inst, cfg) { for (int i = 0; i < src_size; i++) { - if (mov[i] || was_load_payload) { - if (scan_inst->dst.file == GRF && - scan_inst->dst.reg == reg_from && - scan_inst->dst.reg_offset == i) { - scan_inst->dst.reg = reg_to; - scan_inst->dst.reg_offset = reg_to_offset[i]; - } - for (int j = 0; j < scan_inst->sources; j++) { - if (scan_inst->src[j].file == GRF && - scan_inst->src[j].reg == reg_from && - scan_inst->src[j].reg_offset == i) { - scan_inst->src[j].reg = reg_to; - scan_inst->src[j].reg_offset = reg_to_offset[i]; - } + if (scan_inst->dst.file == GRF && + scan_inst->dst.reg == reg_from && + scan_inst->dst.reg_offset == i) { + scan_inst->dst.reg = reg_to; + scan_inst->dst.reg_offset = reg_to_offset[i]; + } + for (int j = 0; j < scan_inst->sources; j++) { + if (scan_inst->src[j].file == GRF && + scan_inst->src[j].reg == reg_from && + scan_inst->src[j].reg_offset == i) { + scan_inst->src[j].reg = reg_to; + scan_inst->src[j].reg_offset = reg_to_offset[i]; } } } -- 2.30.2