From faf20df143a63e58aa729446f21c38ae39a438f2 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 28 Nov 2016 18:13:02 -0800 Subject: [PATCH] i965/fs: Refactor handling of constant tg4 offsets Previously, we had an OFFSET_VALUE source for logical texture instructions that was intended to mean exactly what it says, "offset". In reality, we only fully used it for tg4 offsets. We used offset_value.file == IMM to mean, "you have a constant offset, go look in instr->offset" and didn't actually use the contents of the register at all in that case except for in nir_emit_texture where we used it as a temporary before we copy it into instr->offset. This commit renames OFFSET_VALUE to TG4_OFFSET and restricts its usage to indirect tg4 offsets only. The nir_emit_texture code is refactored so that we explicitly build a header_bits value which is placed in instr->offset and the constant offset values (both for tg4 and regular texture operations) are used to construct header_bits and don't go through the offset source at all. Finally, we stop passing offset_value in to lower_sampler_logical_send_gen5 because we can't do indirect offsets until gen7 anyway. Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_defines.h | 2 +- src/mesa/drivers/dri/i965/brw_fs.cpp | 19 +++++------ src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 42 +++++++++++------------- 3 files changed, 29 insertions(+), 34 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 84a51b48cc6..f22a52f7394 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -1420,7 +1420,7 @@ enum tex_logical_srcs { /** Texture sampler index */ TEX_LOGICAL_SRC_SAMPLER, /** Texel offset for gathers */ - TEX_LOGICAL_SRC_OFFSET_VALUE, + TEX_LOGICAL_SRC_TG4_OFFSET, /** REQUIRED: Number of coordinate components (as UD immediate) */ TEX_LOGICAL_SRC_COORD_COMPONENTS, /** REQUIRED: Number of derivative components (as UD immediate) */ diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 3cdf0bf39f7..c218f56684d 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -730,7 +730,7 @@ fs_inst::components_read(unsigned i) const opcode == SHADER_OPCODE_TXD_LOGICAL) return src[TEX_LOGICAL_SRC_GRAD_COMPONENTS].ud; /* Texture offset. */ - else if (i == TEX_LOGICAL_SRC_OFFSET_VALUE) + else if (i == TEX_LOGICAL_SRC_TG4_OFFSET) return 2; /* MCS */ else if (i == TEX_LOGICAL_SRC_MCS && opcode == SHADER_OPCODE_TXF_CMS_W_LOGICAL) @@ -3877,7 +3877,6 @@ lower_sampler_logical_send_gen5(const fs_builder &bld, fs_inst *inst, opcode op, const fs_reg &sample_index, const fs_reg &surface, const fs_reg &sampler, - const fs_reg &offset_value, unsigned coord_components, unsigned grad_components) { @@ -3885,7 +3884,7 @@ lower_sampler_logical_send_gen5(const fs_builder &bld, fs_inst *inst, opcode op, fs_reg msg_coords = message; unsigned header_size = 0; - if (offset_value.file != BAD_FILE) { + if (inst->offset != 0) { /* The offsets set up by the visitor are in the m1 header, so we can't * go headerless. */ @@ -3985,7 +3984,7 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op, const fs_reg &mcs, const fs_reg &surface, const fs_reg &sampler, - const fs_reg &offset_value, + const fs_reg &tg4_offset, unsigned coord_components, unsigned grad_components) { @@ -3997,7 +3996,7 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op, sources[i] = bld.vgrf(BRW_REGISTER_TYPE_F); if (op == SHADER_OPCODE_TG4 || op == SHADER_OPCODE_TG4_OFFSET || - offset_value.file != BAD_FILE || inst->eot || + inst->offset != 0 || inst->eot || op == SHADER_OPCODE_SAMPLEINFO || is_high_sampler(devinfo, sampler)) { /* For general texture offsets (no txf workaround), we need a header to @@ -4142,7 +4141,7 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op, for (unsigned i = 0; i < 2; i++) /* offu, offv */ bld.MOV(retype(sources[length++], BRW_REGISTER_TYPE_D), - offset(offset_value, bld, i)); + offset(tg4_offset, bld, i)); if (coord_components == 3) /* r if present */ bld.MOV(sources[length++], offset(coordinate, bld, 2)); @@ -4194,7 +4193,7 @@ lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst, opcode op) const fs_reg &mcs = inst->src[TEX_LOGICAL_SRC_MCS]; const fs_reg &surface = inst->src[TEX_LOGICAL_SRC_SURFACE]; const fs_reg &sampler = inst->src[TEX_LOGICAL_SRC_SAMPLER]; - const fs_reg &offset_value = inst->src[TEX_LOGICAL_SRC_OFFSET_VALUE]; + const fs_reg &tg4_offset = inst->src[TEX_LOGICAL_SRC_TG4_OFFSET]; assert(inst->src[TEX_LOGICAL_SRC_COORD_COMPONENTS].file == IMM); const unsigned coord_components = inst->src[TEX_LOGICAL_SRC_COORD_COMPONENTS].ud; assert(inst->src[TEX_LOGICAL_SRC_GRAD_COMPONENTS].file == IMM); @@ -4203,12 +4202,12 @@ lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst, opcode op) if (devinfo->gen >= 7) { lower_sampler_logical_send_gen7(bld, inst, op, coordinate, shadow_c, lod, lod2, sample_index, - mcs, surface, sampler, offset_value, + mcs, surface, sampler, tg4_offset, coord_components, grad_components); } else if (devinfo->gen >= 5) { lower_sampler_logical_send_gen5(bld, inst, op, coordinate, shadow_c, lod, lod2, sample_index, - surface, sampler, offset_value, + surface, sampler, coord_components, grad_components); } else { lower_sampler_logical_send_gen4(bld, inst, op, coordinate, @@ -4677,7 +4676,7 @@ get_sampler_lowered_simd_width(const struct gen_device_info *devinfo, inst->components_read(TEX_LOGICAL_SRC_LOD2) + inst->components_read(TEX_LOGICAL_SRC_SAMPLE_INDEX) + (inst->opcode == SHADER_OPCODE_TG4_OFFSET_LOGICAL ? - inst->components_read(TEX_LOGICAL_SRC_OFFSET_VALUE) : 0) + + inst->components_read(TEX_LOGICAL_SRC_TG4_OFFSET) : 0) + inst->components_read(TEX_LOGICAL_SRC_MCS); /* SIMD16 messages with more than five arguments exceed the maximum message diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index c88fa77472c..baa973c31a0 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -4431,6 +4431,7 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr) if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) srcs[TEX_LOGICAL_SRC_LOD] = brw_imm_d(0); + uint32_t header_bits = 0; for (unsigned i = 0; i < instr->num_srcs; i++) { fs_reg src = get_nir_src(instr->src[i].src); switch (instr->src[i].src_type) { @@ -4485,11 +4486,9 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr) nir_const_value *const_offset = nir_src_as_const_value(instr->src[i].src); if (const_offset) { - unsigned header_bits = brw_texture_offset(const_offset->i32, 3); - if (header_bits != 0) - srcs[TEX_LOGICAL_SRC_OFFSET_VALUE] = brw_imm_ud(header_bits); + header_bits |= brw_texture_offset(const_offset->i32, 3); } else { - srcs[TEX_LOGICAL_SRC_OFFSET_VALUE] = + srcs[TEX_LOGICAL_SRC_TG4_OFFSET] = retype(src, BRW_REGISTER_TYPE_D); } break; @@ -4607,8 +4606,7 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr) opcode = SHADER_OPCODE_LOD_LOGICAL; break; case nir_texop_tg4: - if (srcs[TEX_LOGICAL_SRC_OFFSET_VALUE].file != BAD_FILE && - srcs[TEX_LOGICAL_SRC_OFFSET_VALUE].file != IMM) + if (srcs[TEX_LOGICAL_SRC_TG4_OFFSET].file != BAD_FILE) opcode = SHADER_OPCODE_TG4_OFFSET_LOGICAL; else opcode = SHADER_OPCODE_TG4_LOGICAL; @@ -4639,8 +4637,21 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr) unreachable("unknown texture opcode"); } + if (instr->op == nir_texop_tg4) { + if (instr->component == 1 && + key_tex->gather_channel_quirk_mask & (1 << texture)) { + /* gather4 sampler is broken for green channel on RG32F -- + * we must ask for blue instead. + */ + header_bits |= 2 << 16; + } else { + header_bits |= instr->component << 16; + } + } + fs_reg dst = bld.vgrf(brw_type_for_nir_type(instr->dest_type), 4); fs_inst *inst = bld.emit(opcode, dst, srcs, ARRAY_SIZE(srcs)); + inst->offset = header_bits; const unsigned dest_size = nir_tex_instr_dest_size(instr); if (devinfo->gen >= 9 && @@ -4658,23 +4669,8 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr) if (srcs[TEX_LOGICAL_SRC_SHADOW_C].file != BAD_FILE) inst->shadow_compare = true; - if (srcs[TEX_LOGICAL_SRC_OFFSET_VALUE].file == IMM) - inst->offset = srcs[TEX_LOGICAL_SRC_OFFSET_VALUE].ud; - - if (instr->op == nir_texop_tg4) { - if (instr->component == 1 && - key_tex->gather_channel_quirk_mask & (1 << texture)) { - /* gather4 sampler is broken for green channel on RG32F -- - * we must ask for blue instead. - */ - inst->offset |= 2 << 16; - } else { - inst->offset |= instr->component << 16; - } - - if (devinfo->gen == 6) - emit_gen6_gather_wa(key_tex->gen6_gather_wa[texture], dst); - } + if (instr->op == nir_texop_tg4 && devinfo->gen == 6) + emit_gen6_gather_wa(key_tex->gen6_gather_wa[texture], dst); fs_reg nir_dest[4]; for (unsigned i = 0; i < dest_size; i++) -- 2.30.2