From a7d319c00be425be219a101b5b4d48f1cbe4ec01 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Mon, 16 May 2016 15:47:39 -0700 Subject: [PATCH] i965/fs: Implement scratch reads and writes of 4 GRFs at a time. Reviewed-by: Jason Ekstrand --- src/mesa/drivers/dri/i965/brw_eu_emit.c | 31 ++++++++----------- .../drivers/dri/i965/brw_fs_generator.cpp | 4 +++ .../drivers/dri/i965/brw_fs_reg_allocate.cpp | 4 +-- 3 files changed, 18 insertions(+), 21 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 6cbe673e94d..9e544a9d18a 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -2065,21 +2065,19 @@ void brw_oword_block_write_scratch(struct brw_codegen *p, unsigned offset) { const struct brw_device_info *devinfo = p->devinfo; - uint32_t msg_control, msg_type; - int mlen; + uint32_t msg_type; if (devinfo->gen >= 6) offset /= 16; mrf = retype(mrf, BRW_REGISTER_TYPE_UD); - if (num_regs == 1) { - msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS; - mlen = 2; - } else { - msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS; - mlen = 3; - } + const unsigned mlen = 1 + num_regs; + const unsigned msg_control = + (num_regs == 1 ? BRW_DATAPORT_OWORD_BLOCK_2_OWORDS : + num_regs == 2 ? BRW_DATAPORT_OWORD_BLOCK_4_OWORDS : + num_regs == 4 ? BRW_DATAPORT_OWORD_BLOCK_8_OWORDS : 0); + assert(msg_control); /* Set up the message header. This is g0, with g0.2 filled with * the offset. We don't want to leave our offset around in g0 or @@ -2180,8 +2178,6 @@ brw_oword_block_read_scratch(struct brw_codegen *p, unsigned offset) { const struct brw_device_info *devinfo = p->devinfo; - uint32_t msg_control; - int rlen; if (devinfo->gen >= 6) offset /= 16; @@ -2200,13 +2196,12 @@ brw_oword_block_read_scratch(struct brw_codegen *p, } dest = retype(dest, BRW_REGISTER_TYPE_UW); - if (num_regs == 1) { - msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS; - rlen = 1; - } else { - msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS; - rlen = 2; - } + const unsigned rlen = num_regs; + const unsigned msg_control = + (num_regs == 1 ? BRW_DATAPORT_OWORD_BLOCK_2_OWORDS : + num_regs == 2 ? BRW_DATAPORT_OWORD_BLOCK_4_OWORDS : + num_regs == 4 ? BRW_DATAPORT_OWORD_BLOCK_8_OWORDS : 0); + assert(msg_control); { brw_push_insn_state(p); diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index 4b4792f96e4..7308fdd9986 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -1061,6 +1061,7 @@ fs_generator::generate_discard_jump(fs_inst *inst) void fs_generator::generate_scratch_write(fs_inst *inst, struct brw_reg src) { + assert(inst->exec_size <= 16 || inst->force_writemask_all); assert(inst->mlen != 0); brw_MOV(p, @@ -1073,6 +1074,7 @@ fs_generator::generate_scratch_write(fs_inst *inst, struct brw_reg src) void fs_generator::generate_scratch_read(fs_inst *inst, struct brw_reg dst) { + assert(inst->exec_size <= 16 || inst->force_writemask_all); assert(inst->mlen != 0); brw_oword_block_read_scratch(p, dst, brw_message_reg(inst->base_mrf), @@ -1082,6 +1084,8 @@ fs_generator::generate_scratch_read(fs_inst *inst, struct brw_reg dst) void fs_generator::generate_scratch_read_gen7(fs_inst *inst, struct brw_reg dst) { + assert(inst->exec_size <= 16 || inst->force_writemask_all); + gen7_block_read_scratch(p, dst, inst->exec_size / 8, inst->offset); } diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index e770a7f91cc..7fdedc084c5 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -928,11 +928,9 @@ fs_visitor::spill_reg(int spill_reg) /* We read the largest power-of-two divisor of the register count * (because only POT scratch read blocks are allowed by the * hardware) up to the maximum supported block size. - * XXX - Bump the limit when the generator code is ready for - * 32-wide spills. */ const unsigned width = - MIN2(16, 1u << (ffs(MAX2(1, regs_read) * 8) - 1)); + MIN2(32, 1u << (ffs(MAX2(1, regs_read) * 8) - 1)); /* Set exec_all() on unspill messages under the (rather * pessimistic) assumption that there is no one-to-one -- 2.30.2