From dc657a8201fe942fce2c68835f1ee52b348e52f0 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Thu, 12 May 2016 23:04:10 -0700 Subject: [PATCH] i965: Make brw_reg_from_fs_reg() halve exec_size when compressed. In a5d7e144eaf43fee37e6ff9e2de194407087632b, Connor generalized the exec_size halving code to handle more cases. As part of this, he made it not halve anything if the region accessed falls completely in a single register. Unfortunately, it started producing some invalid regions: -add(16) g6<1>F g10<8,8,1>UW -g1<0,1,0>F { align1 compr }; -add(16) g8<1>F g12<8,8,1>UW -g1.1<0,1,0>F { align1 compr }; +add(16) g6<1>F g10<16,16,1>UW -g1<0,1,0>F { align1 compr }; +add(16) g8<1>F g12<16,16,1>UW -g1.1<0,1,0>F { align1 compr }; Here, the UW source region completely fits within a register. However, we have to use instruction compression because the destination region spans two registers. <16,16,1> is invalid because it's compressed. To handle this, skip the "everything fits in one register" case and fall through to the exec_size halving case when compressed. Fixes hundreds of Piglit regressions on GM965. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=95370 Signed-off-by: Kenneth Graunke Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index 4b85aaa504f..b9000d6d608 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -54,7 +54,8 @@ brw_file_from_reg(fs_reg *reg) } static struct brw_reg -brw_reg_from_fs_reg(fs_inst *inst, fs_reg *reg, unsigned gen) +brw_reg_from_fs_reg(const struct brw_codegen *p, + fs_inst *inst, fs_reg *reg, unsigned gen) { struct brw_reg brw_reg; @@ -65,7 +66,8 @@ brw_reg_from_fs_reg(fs_inst *inst, fs_reg *reg, unsigned gen) case VGRF: if (reg->stride == 0) { brw_reg = brw_vec1_reg(brw_file_from_reg(reg), reg->nr, 0); - } else if (inst->exec_size * reg->stride * type_sz(reg->type) <= 32) { + } else if (!p->compressed && + inst->exec_size * reg->stride * type_sz(reg->type) <= 32) { brw_reg = brw_vecn_reg(inst->exec_size, brw_file_from_reg(reg), reg->nr, 0); brw_reg = stride(brw_reg, inst->exec_size * reg->stride, @@ -1763,7 +1765,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) } for (unsigned int i = 0; i < inst->sources; i++) { - src[i] = brw_reg_from_fs_reg(inst, &inst->src[i], devinfo->gen); + src[i] = brw_reg_from_fs_reg(p, inst, &inst->src[i], devinfo->gen); /* The accumulator result appears to get used for the * conditional modifier generation. When negating a UD @@ -1775,7 +1777,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) inst->src[i].type != BRW_REGISTER_TYPE_UD || !inst->src[i].negate); } - dst = brw_reg_from_fs_reg(inst, &inst->dst, devinfo->gen); + dst = brw_reg_from_fs_reg(p, inst, &inst->dst, devinfo->gen); brw_set_default_predicate_control(p, inst->predicate); brw_set_default_predicate_inverse(p, inst->predicate_inverse); -- 2.30.2