From ff4726077d86800d33520581f154a27dac408588 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 28 Feb 2018 19:57:44 -0800 Subject: [PATCH] intel/fs: Set up sampler message headers in the visitor on gen7+ This gives the scheduler visibility into the headers which should improve scheduling. More importantly, however, it lets the scheduler know that the header gets written. As-is, the scheduler thinks that a texture instruction only reads it's payload and is unaware that it may write to the first register so it may reorder it with respect to a read from that register. This is causing issues in a couple of Dota 2 vertex shaders. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104923 Cc: mesa-stable@lists.freedesktop.org Reviewed-by: Francisco Jerez --- src/intel/compiler/brw_fs.cpp | 40 ++++++++++++++++++++++--- src/intel/compiler/brw_fs_generator.cpp | 21 ++----------- 2 files changed, 39 insertions(+), 22 deletions(-) diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 244c6cda03a..0d7988dae47 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -4192,17 +4192,15 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op, op == SHADER_OPCODE_SAMPLEINFO || is_high_sampler(devinfo, sampler)) { /* For general texture offsets (no txf workaround), we need a header to - * put them in. Note that we're only reserving space for it in the - * message payload as it will be initialized implicitly by the - * generator. + * put them in. * * TG4 needs to place its channel select in the header, for interaction * with ARB_texture_swizzle. The sampler index is only 4-bits, so for * larger sampler numbers we need to offset the Sampler State Pointer in * the header. */ + fs_reg header = retype(sources[0], BRW_REGISTER_TYPE_UD); header_size = 1; - sources[0] = fs_reg(); length++; /* If we're requesting fewer than four channels worth of response, @@ -4214,6 +4212,40 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op, unsigned mask = ~((1 << (regs_written(inst) / reg_width)) - 1) & 0xf; inst->offset |= mask << 12; } + + /* Build the actual header */ + const fs_builder ubld = bld.exec_all().group(8, 0); + const fs_builder ubld1 = ubld.group(1, 0); + ubld.MOV(header, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); + if (inst->offset) { + ubld1.MOV(component(header, 2), brw_imm_ud(inst->offset)); + } else if (bld.shader->stage != MESA_SHADER_VERTEX && + bld.shader->stage != MESA_SHADER_FRAGMENT) { + /* The vertex and fragment stages have g0.2 set to 0, so + * header0.2 is 0 when g0 is copied. Other stages may not, so we + * must set it to 0 to avoid setting undesirable bits in the + * message. + */ + ubld1.MOV(component(header, 2), brw_imm_ud(0)); + } + + if (is_high_sampler(devinfo, sampler)) { + if (sampler.file == BRW_IMMEDIATE_VALUE) { + assert(sampler.ud >= 16); + const int sampler_state_size = 16; /* 16 bytes */ + + ubld1.ADD(component(header, 3), + retype(brw_vec1_grf(0, 3), BRW_REGISTER_TYPE_UD), + brw_imm_ud(16 * (sampler.ud / 16) * sampler_state_size)); + } else { + fs_reg tmp = ubld1.vgrf(BRW_REGISTER_TYPE_UD); + ubld1.AND(tmp, sampler, brw_imm_ud(0x0f0)); + ubld1.SHL(tmp, tmp, brw_imm_ud(4)); + ubld1.ADD(component(header, 3), + retype(brw_vec1_grf(0, 3), BRW_REGISTER_TYPE_UD), + tmp); + } + } } if (shadow_c.file != BAD_FILE) { diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp index b59c09f46ec..a5a821a13b0 100644 --- a/src/intel/compiler/brw_fs_generator.cpp +++ b/src/intel/compiler/brw_fs_generator.cpp @@ -1001,19 +1001,13 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src * we need to set it up explicitly and load the offset bitfield. * Otherwise, we can use an implied move from g0 to the first message reg. */ - if (inst->header_size != 0) { + if (inst->header_size != 0 && devinfo->gen < 7) { if (devinfo->gen < 6 && !inst->offset) { /* Set up an implied move from g0 to the MRF. */ src = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW); } else { - struct brw_reg header_reg; - - if (devinfo->gen >= 7) { - header_reg = src; - } else { - assert(inst->base_mrf != -1); - header_reg = brw_message_reg(inst->base_mrf); - } + assert(inst->base_mrf != -1); + struct brw_reg header_reg = brw_message_reg(inst->base_mrf); brw_push_insn_state(p); brw_set_default_exec_size(p, BRW_EXECUTE_8); @@ -1027,17 +1021,8 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src /* Set the offset bits in DWord 2. */ brw_MOV(p, get_element_ud(header_reg, 2), brw_imm_ud(inst->offset)); - } else if (stage != MESA_SHADER_VERTEX && - stage != MESA_SHADER_FRAGMENT) { - /* The vertex and fragment stages have g0.2 set to 0, so - * header0.2 is 0 when g0 is copied. Other stages may not, so we - * must set it to 0 to avoid setting undesirable bits in the - * message. - */ - brw_MOV(p, get_element_ud(header_reg, 2), brw_imm_ud(0)); } - brw_adjust_sampler_state_pointer(p, header_reg, sampler_index); brw_pop_insn_state(p); } } -- 2.30.2