From 27bd8ac6f309b9f052a7fa9380ac5e12fb686e31 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 24 Nov 2015 09:01:11 -0800 Subject: [PATCH] i965/fs: Add support for MOV_INDIRECT on pre-Broadwell hardware MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit While we're at it, we also add support for the possibility that the indirect is, in fact, a constant. This shouldn't happen in the common case (if it does, that means NIR failed to constant-fold something), but it's possible so we should handle it. Reviewed-by: Kristian Høgsberg Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_fs.cpp | 4 + .../drivers/dri/i965/brw_fs_generator.cpp | 75 +++++++++++++++---- 2 files changed, 66 insertions(+), 13 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 24482096f17..0a2d769653f 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -4483,6 +4483,10 @@ get_lowered_simd_width(const struct brw_device_info *devinfo, case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL: return 8; + case SHADER_OPCODE_MOV_INDIRECT: + /* Prior to Broadwell, we only have 8 address subregisters */ + return devinfo->gen < 8 ? 8 : MIN2(inst->exec_size, 16); + default: return inst->exec_size; } diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index 35400cbe8c3..851cccf0f7c 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -351,22 +351,71 @@ fs_generator::generate_mov_indirect(fs_inst *inst, unsigned imm_byte_offset = reg.nr * REG_SIZE + reg.subnr; - /* We use VxH indirect addressing, clobbering a0.0 through a0.7. */ - struct brw_reg addr = vec8(brw_address_reg(0)); + if (indirect_byte_offset.file == BRW_IMMEDIATE_VALUE) { + imm_byte_offset += indirect_byte_offset.ud; - /* The destination stride of an instruction (in bytes) must be greater - * than or equal to the size of the rest of the instruction. Since the - * address register is of type UW, we can't use a D-type instruction. - * In order to get around this, re re-type to UW and use a stride. - */ - indirect_byte_offset = - retype(spread(indirect_byte_offset, 2), BRW_REGISTER_TYPE_UW); + reg.nr = imm_byte_offset / REG_SIZE; + reg.subnr = imm_byte_offset % REG_SIZE; + brw_MOV(p, dst, reg); + } else { + /* Prior to Broadwell, there are only 8 address registers. */ + assert(inst->exec_size == 8 || devinfo->gen >= 8); + + /* We use VxH indirect addressing, clobbering a0.0 through a0.7. */ + struct brw_reg addr = vec8(brw_address_reg(0)); + + /* The destination stride of an instruction (in bytes) must be greater + * than or equal to the size of the rest of the instruction. Since the + * address register is of type UW, we can't use a D-type instruction. + * In order to get around this, re retype to UW and use a stride. + */ + indirect_byte_offset = + retype(spread(indirect_byte_offset, 2), BRW_REGISTER_TYPE_UW); + + struct brw_reg ind_src; + if (devinfo->gen < 8) { + /* From the Haswell PRM section "Register Region Restrictions": + * + * "The lower bits of the AddressImmediate must not overflow to + * change the register address. The lower 5 bits of Address + * Immediate when added to lower 5 bits of address register gives + * the sub-register offset. The upper bits of Address Immediate + * when added to upper bits of address register gives the register + * address. Any overflow from sub-register offset is dropped." + * + * This restriction is only listed in the Haswell PRM but emperical + * testing indicates that it applies on all older generations and is + * lifted on Broadwell. + * + * Since the indirect may cause us to cross a register boundary, this + * makes the base offset almost useless. We could try and do + * something clever where we use a actual base offset if + * base_offset % 32 == 0 but that would mean we were generating + * different code depending on the base offset. Instead, for the + * sake of consistency, we'll just do the add ourselves. + */ + brw_ADD(p, addr, indirect_byte_offset, brw_imm_uw(imm_byte_offset)); + ind_src = brw_VxH_indirect(0, 0); + } else { + brw_MOV(p, addr, indirect_byte_offset); + ind_src = brw_VxH_indirect(0, imm_byte_offset); + } - /* Prior to Broadwell, there are only 8 address registers. */ - assert(inst->exec_size == 8 || devinfo->gen >= 8); + brw_inst *mov = brw_MOV(p, dst, retype(ind_src, dst.type)); - brw_MOV(p, addr, indirect_byte_offset); - brw_MOV(p, dst, retype(brw_VxH_indirect(0, imm_byte_offset), dst.type)); + if (devinfo->gen == 6 && dst.file == BRW_MESSAGE_REGISTER_FILE && + !inst->get_next()->is_tail_sentinel() && + ((fs_inst *)inst->get_next())->mlen > 0) { + /* From the Sandybridge PRM: + * + * "[Errata: DevSNB(SNB)] If MRF register is updated by any + * instruction that “indexed/indirect” source AND is followed by a + * send, the instruction requires a “Switch”. This is to avoid + * race condition where send may dispatch before MRF is updated." + */ + brw_inst_set_thread_control(devinfo, mov, BRW_THREAD_SWITCH); + } + } } void -- 2.30.2