From 0ac4c272755c75108a10a84ce33bf6a6234985d3 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Kristian=20H=C3=B8gsberg?= Date: Wed, 10 Dec 2014 14:59:26 -0800 Subject: [PATCH] i965/skl: Always use a header for SIMD4x2 sampler messages MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit SKL+ overloads the SIMD4x2 SIMD mode to mean either SIMD8D or SIMD4x2 depending on bit 22 in the message header. If the bit is 0 or there is no header we get SIMD8D. We always wand SIMD4x2 in vec4 and for fs pull constants, so use a message header in those cases and set bit 22 there. Based on an initial patch from Ken. Reviewed-by: Kenneth Graunke Signed-off-by: Kristian Høgsberg --- src/mesa/drivers/dri/i965/brw_defines.h | 5 +++ src/mesa/drivers/dri/i965/brw_fs.cpp | 8 +++++ .../drivers/dri/i965/brw_fs_generator.cpp | 32 +++++++++++++++---- .../drivers/dri/i965/brw_vec4_generator.cpp | 16 +++++++--- .../drivers/dri/i965/brw_vec4_visitor.cpp | 4 ++- 5 files changed, 54 insertions(+), 11 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 28e398d0b88..f02a0b8eb7e 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -1373,6 +1373,11 @@ enum brw_message_target { #define BRW_SAMPLER_SIMD_MODE_SIMD16 2 #define BRW_SAMPLER_SIMD_MODE_SIMD32_64 3 +/* GEN9 changes SIMD mode 0 to mean SIMD8D, but lets us get the SIMD4x2 + * behavior by setting bit 22 of dword 2 in the message header. */ +#define GEN9_SAMPLER_SIMD_MODE_SIMD8D 0 +#define GEN9_SAMPLER_SIMD_MODE_EXTENSION_SIMD4X2 (1 << 22) + #define BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW 0 #define BRW_DATAPORT_OWORD_BLOCK_1_OWORDHIGH 1 #define BRW_DATAPORT_OWORD_BLOCK_2_OWORDS 2 diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 8c7d780f352..9dfb7b7343d 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -2994,6 +2994,14 @@ fs_visitor::lower_uniform_pull_constant_loads() const_offset_reg.fixed_hw_reg.dw1.ud /= 4; fs_reg payload = fs_reg(this, glsl_type::uint_type); + /* We have to use a message header on Skylake to get SIMD4x2 mode. + * Reserve space for the register. + */ + if (brw->gen >= 9) { + payload.reg_offset++; + virtual_grf_sizes[payload.reg] = 2; + } + /* This is actually going to be a MOV, but since only the first dword * is accessed, we have a special opcode to do just that one. Note * that this needs to be an operation that will be considered a def diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index c652d654235..7b4ac8d3695 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -1017,6 +1017,26 @@ fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst, */ dst.width = BRW_WIDTH_4; + struct brw_reg src = offset; + bool header_present = false; + int mlen = 1; + + if (brw->gen >= 9) { + /* Skylake requires a message header in order to use SIMD4x2 mode. */ + src = retype(brw_vec8_grf(offset.nr - 1, 0), BRW_REGISTER_TYPE_UD); + mlen = 2; + header_present = true; + + brw_push_insn_state(p); + brw_set_default_mask_control(p, BRW_MASK_DISABLE); + brw_MOV(p, src, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); + brw_set_default_access_mode(p, BRW_ALIGN_1); + + brw_MOV(p, get_element_ud(src, 2), + brw_imm_ud(GEN9_SAMPLER_SIMD_MODE_EXTENSION_SIMD4X2)); + brw_pop_insn_state(p); + } + if (index.file == BRW_IMMEDIATE_VALUE) { uint32_t surf_index = index.dw1.ud; @@ -1028,14 +1048,14 @@ fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst, brw_pop_insn_state(p); brw_set_dest(p, send, dst); - brw_set_src0(p, send, offset); + brw_set_src0(p, send, src); brw_set_sampler_message(p, send, surf_index, 0, /* LD message ignores sampler unit */ GEN5_SAMPLER_MESSAGE_SAMPLE_LD, 1, /* rlen */ - 1, /* mlen */ - false, /* no header */ + mlen, + header_present, BRW_SAMPLER_SIMD_MODE_SIMD4X2, 0); @@ -1064,8 +1084,8 @@ fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst, 0 /* sampler */, GEN5_SAMPLER_MESSAGE_SAMPLE_LD, 1 /* rlen */, - 1 /* mlen */, - false /* header */, + mlen, + header_present, BRW_SAMPLER_SIMD_MODE_SIMD4X2, 0); brw_inst_set_exec_size(p->brw, insn_or, BRW_EXECUTE_1); @@ -1077,7 +1097,7 @@ fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst, /* dst = send(offset, a0.0) */ brw_inst *insn_send = brw_next_insn(p, BRW_OPCODE_SEND); brw_set_dest(p, insn_send, dst); - brw_set_src0(p, insn_send, offset); + brw_set_src0(p, insn_send, src); brw_set_indirect_send_descriptor(p, insn_send, BRW_SFID_SAMPLER, addr); brw_pop_insn_state(p); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index b88a57912cb..f900bf7f300 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -328,6 +328,7 @@ vec4_generator::generate_tex(vec4_instruction *inst, } else { struct brw_reg header = retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_UD); + uint32_t dw2 = 0; /* Explicitly set up the message header by copying g0 to the MRF. */ brw_push_insn_state(p); @@ -336,11 +337,18 @@ vec4_generator::generate_tex(vec4_instruction *inst, brw_set_default_access_mode(p, BRW_ALIGN_1); - if (inst->offset) { + if (inst->offset) /* Set the texel offset bits in DWord 2. */ - brw_MOV(p, get_element_ud(header, 2), - brw_imm_ud(inst->offset)); - } + dw2 = inst->offset; + + if (brw->gen >= 9) + /* SKL+ overloads BRW_SAMPLER_SIMD_MODE_SIMD4X2 to also do SIMD8D, + * based on bit 22 in the header. + */ + dw2 |= GEN9_SAMPLER_SIMD_MODE_EXTENSION_SIMD4X2; + + if (dw2) + brw_MOV(p, get_element_ud(header, 2), brw_imm_ud(dw2)); brw_adjust_sampler_state_pointer(p, header, sampler_index, dst); brw_pop_insn_state(p); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 09d79c83704..a81c66a861f 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -2586,12 +2586,14 @@ vec4_visitor::visit(ir_texture *ir) /* The message header is necessary for: * - Gen4 (always) + * - Gen9+ for selecting SIMD4x2 * - Texel offsets * - Gather channel selection * - Sampler indices too large to fit in a 4-bit value. */ inst->header_present = - brw->gen < 5 || inst->offset != 0 || ir->op == ir_tg4 || + brw->gen < 5 || brw->gen >= 9 || + inst->offset != 0 || ir->op == ir_tg4 || is_high_sampler(brw, sampler_reg); inst->base_mrf = 2; inst->mlen = inst->header_present + 1; /* always at least one */ -- 2.30.2