From 4bddd82bf3dae44c2b75cef34e9e85e15d63df7f Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Tue, 14 Jul 2015 15:43:44 +0300 Subject: [PATCH] i965/fs: Factor out universally broken calculation of the register component size. This in principle simple calculation was being open-coded in a number of places (in a series I haven't yet sent for review there will be a couple more), all of them were subtly broken in one way or another: None of them were handling the HW_REG case correctly as pointed out by Connor, and fs_inst::regs_read() was handling the stride=0 case rather naively. This patch solves both problems and factors out the calculation as a new fs_reg method. Reviewed-by: Jason Ekstrand --- src/mesa/drivers/dri/i965/brw_fs.cpp | 21 ++++++++++++------- src/mesa/drivers/dri/i965/brw_fs.h | 6 +++--- .../drivers/dri/i965/brw_fs_generator.cpp | 2 +- src/mesa/drivers/dri/i965/brw_ir_fs.h | 6 ++++++ 4 files changed, 23 insertions(+), 12 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 189da1d553e..ff0675d146f 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -78,8 +78,8 @@ fs_inst::init(enum opcode opcode, uint8_t exec_size, const fs_reg &dst, case HW_REG: case MRF: case ATTR: - this->regs_written = - DIV_ROUND_UP(MAX2(exec_size * dst.stride, 1) * type_sz(dst.type), 32); + this->regs_written = DIV_ROUND_UP(dst.component_size(exec_size), + REG_SIZE); break; case BAD_FILE: this->regs_written = 0; @@ -443,6 +443,15 @@ fs_reg::is_contiguous() const return stride == 1; } +unsigned +fs_reg::component_size(unsigned width) const +{ + const unsigned stride = (file != HW_REG ? this->stride : + fixed_hw_reg.hstride == 0 ? 0 : + 1 << (fixed_hw_reg.hstride - 1)); + return MAX2(width * stride, 1) * type_sz(type); +} + int fs_visitor::type_size(const struct glsl_type *type) { @@ -702,12 +711,8 @@ fs_inst::regs_read(int arg) const return 1; case GRF: case HW_REG: - if (src[arg].stride == 0) { - return 1; - } else { - int size = components * this->exec_size * type_sz(src[arg].type); - return DIV_ROUND_UP(size * src[arg].stride, 32); - } + return DIV_ROUND_UP(components * src[arg].component_size(exec_size), + REG_SIZE); case MRF: unreachable("MRF registers are not allowed as sources"); default: diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 88a50ae0913..c00566667e0 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -70,14 +70,14 @@ offset(fs_reg reg, const brw::fs_builder& bld, unsigned delta) break; case GRF: case MRF: + case HW_REG: case ATTR: return byte_offset(reg, - delta * MAX2(bld.dispatch_width() * reg.stride, 1) * - type_sz(reg.type)); + delta * reg.component_size(bld.dispatch_width())); case UNIFORM: reg.reg_offset += delta; break; - default: + case IMM: assert(delta == 0); } return reg; diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index c986d91d988..bae72167972 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -1601,7 +1601,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) /* If the instruction writes to more than one register, it needs to * be a "compressed" instruction on Gen <= 5. */ - if (inst->exec_size * inst->dst.stride * type_sz(inst->dst.type) > 32) + if (inst->dst.component_size(inst->exec_size) > REG_SIZE) brw_set_default_compression_control(p, BRW_COMPRESSION_COMPRESSED); else brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); diff --git a/src/mesa/drivers/dri/i965/brw_ir_fs.h b/src/mesa/drivers/dri/i965/brw_ir_fs.h index b48244ae4ca..693357f2796 100644 --- a/src/mesa/drivers/dri/i965/brw_ir_fs.h +++ b/src/mesa/drivers/dri/i965/brw_ir_fs.h @@ -48,6 +48,12 @@ public: bool equals(const fs_reg &r) const; bool is_contiguous() const; + /** + * Return the size in bytes of a single logical component of the + * register assuming the given execution width. + */ + unsigned component_size(unsigned width) const; + /** Smear a channel of the reg to all channels. */ fs_reg &set_smear(unsigned subreg); -- 2.30.2