From 5a06ee7384934f8b5177b2f01bb7dff08b370145 Mon Sep 17 00:00:00 2001 From: Neil Roberts Date: Thu, 12 Mar 2015 17:41:07 +0000 Subject: [PATCH] i965/skl: Send a message header when doing constant loads SIMD4x2 MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Commit 0ac4c272755c7 made it add a header for the send message when using SIMD4x2 on Skylake because without this it will end up using SIMD8D. However the patch missed the case when a sampler is being used to implement constant loads from a buffer surface in a SIMD4x2 vertex shader. This fixes 29 Piglit tests, mostly related to the ARL instruction in vertex programs. Reviewed-by: Kristian Høgsberg Tested-by: Anuj Phogat --- .../drivers/dri/i965/brw_vec4_generator.cpp | 32 +++++++++++++++---- .../drivers/dri/i965/brw_vec4_visitor.cpp | 18 +++++++++++ src/mesa/drivers/dri/i965/brw_vec4_vp.cpp | 9 ++++++ 3 files changed, 53 insertions(+), 6 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index 010a5c48e8d..e3a94ffb125 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -1049,18 +1049,38 @@ vec4_generator::generate_pull_constant_load_gen7(vec4_instruction *inst, { assert(surf_index.type == BRW_REGISTER_TYPE_UD); + struct brw_reg src = offset; + bool header_present = false; + int mlen = 1; + + if (brw->gen >= 9) { + /* Skylake requires a message header in order to use SIMD4x2 mode. */ + src = retype(brw_vec4_grf(offset.nr - 1, 0), BRW_REGISTER_TYPE_UD); + mlen = 2; + header_present = true; + + brw_push_insn_state(p); + brw_set_default_mask_control(p, BRW_MASK_DISABLE); + brw_MOV(p, src, retype(brw_vec4_grf(0, 0), BRW_REGISTER_TYPE_UD)); + brw_set_default_access_mode(p, BRW_ALIGN_1); + + brw_MOV(p, get_element_ud(src, 2), + brw_imm_ud(GEN9_SAMPLER_SIMD_MODE_EXTENSION_SIMD4X2)); + brw_pop_insn_state(p); + } + if (surf_index.file == BRW_IMMEDIATE_VALUE) { brw_inst *insn = brw_next_insn(p, BRW_OPCODE_SEND); brw_set_dest(p, insn, dst); - brw_set_src0(p, insn, offset); + brw_set_src0(p, insn, src); brw_set_sampler_message(p, insn, surf_index.dw1.ud, 0, /* LD message ignores sampler unit */ GEN5_SAMPLER_MESSAGE_SAMPLE_LD, 1, /* rlen */ - 1, /* mlen */ - false, /* no header */ + mlen, + header_present, BRW_SAMPLER_SIMD_MODE_SIMD4X2, 0); @@ -1089,8 +1109,8 @@ vec4_generator::generate_pull_constant_load_gen7(vec4_instruction *inst, 0 /* sampler */, GEN5_SAMPLER_MESSAGE_SAMPLE_LD, 1 /* rlen */, - 1 /* mlen */, - false /* header */, + mlen /* mlen */, + header_present /* header */, BRW_SAMPLER_SIMD_MODE_SIMD4X2, 0); brw_inst_set_exec_size(p->brw, insn_or, BRW_EXECUTE_1); @@ -1102,7 +1122,7 @@ vec4_generator::generate_pull_constant_load_gen7(vec4_instruction *inst, /* dst = send(offset, a0.0) */ brw_inst *insn_send = brw_next_insn(p, BRW_OPCODE_SEND); brw_set_dest(p, insn_send, dst); - brw_set_src0(p, insn_send, offset); + brw_set_src0(p, insn_send, src); brw_set_indirect_send_descriptor(p, insn_send, BRW_SFID_SAMPLER, addr); brw_pop_insn_state(p); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 195c6f5dae7..d5c6e9b5741 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1783,6 +1783,15 @@ vec4_visitor::visit(ir_expression *ir) if (brw->gen >= 7) { dst_reg grf_offset = dst_reg(this, glsl_type::int_type); + + /* We have to use a message header on Skylake to get SIMD4x2 mode. + * Reserve space for the register. + */ + if (brw->gen >= 9) { + grf_offset.reg_offset++; + alloc.sizes[grf_offset.reg] = 2; + } + grf_offset.type = offset.type; emit(MOV(grf_offset, offset)); @@ -3477,6 +3486,15 @@ vec4_visitor::emit_pull_constant_load(bblock_t *block, vec4_instruction *inst, if (brw->gen >= 7) { dst_reg grf_offset = dst_reg(this, glsl_type::int_type); + + /* We have to use a message header on Skylake to get SIMD4x2 mode. + * Reserve space for the register. + */ + if (brw->gen >= 9) { + grf_offset.reg_offset++; + alloc.sizes[grf_offset.reg] = 2; + } + grf_offset.type = offset.type; emit_before(block, inst, MOV(grf_offset, offset)); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp b/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp index ba3264dce6f..c3b0233eba2 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp @@ -527,6 +527,15 @@ vec4_vs_visitor::get_vp_src_reg(const prog_src_register &src) /* Add the small constant index to the address register */ src_reg reladdr = src_reg(this, glsl_type::int_type); + + /* We have to use a message header on Skylake to get SIMD4x2 mode. + * Reserve space for the register. + */ + if (brw->gen >= 9) { + reladdr.reg_offset++; + alloc.sizes[reladdr.reg] = 2; + } + dst_reg dst_reladdr = dst_reg(reladdr); dst_reladdr.writemask = WRITEMASK_X; emit(ADD(dst_reladdr, this->vp_addr_reg, src_reg(src.Index))); -- 2.30.2