From: Jason Ekstrand Date: Wed, 6 Feb 2019 21:42:17 +0000 (-0600) Subject: intel/fs: Add support for bindless texture ops X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=843286d324c833198f4f5bd6d548ab3612968169;p=mesa.git intel/fs: Add support for bindless texture ops We add two new texture sources for bindless surface and sampler handles. Bindless surface handles are expected to be pre-shifted so that the 20-bit surface state table index is in the top 20 bits of the 32-bit handle. This lets us avoid any extra shifts in the shader. Bindless sampler handles are 32-byte aligned byte offsets from general state base address. We use 32-byte aligned instead of 16-byte aligned to avoid having to use more indirect messages than needed. It means we can't tightly pack samplers but that's probably not a big deal. Reviewed-by: Lionel Landwerlin Reviewed-by: Caio Marcelo de Oliveira Filho --- diff --git a/src/intel/compiler/brw_eu_defines.h b/src/intel/compiler/brw_eu_defines.h index da723307b73..b14e33024b8 100644 --- a/src/intel/compiler/brw_eu_defines.h +++ b/src/intel/compiler/brw_eu_defines.h @@ -835,6 +835,10 @@ enum tex_logical_srcs { TEX_LOGICAL_SRC_SURFACE, /** Texture sampler index */ TEX_LOGICAL_SRC_SAMPLER, + /** Texture surface bindless handle */ + TEX_LOGICAL_SRC_SURFACE_HANDLE, + /** Texture sampler bindless handle */ + TEX_LOGICAL_SRC_SAMPLER_HANDLE, /** Texel offset for gathers */ TEX_LOGICAL_SRC_TG4_OFFSET, /** REQUIRED: Number of coordinate components (as UD immediate) */ @@ -1224,6 +1228,7 @@ enum brw_message_target { */ #define GEN8_BTI_STATELESS_IA_COHERENT 255 #define GEN8_BTI_STATELESS_NON_COHERENT 253 +#define GEN9_BTI_BINDLESS 252 /* Dataport atomic operations for Untyped Atomic Integer Operation message * (and others). diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 9f82946f078..856e2ef815d 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -4685,6 +4685,8 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op, const fs_reg &mcs, const fs_reg &surface, const fs_reg &sampler, + const fs_reg &surface_handle, + const fs_reg &sampler_handle, const fs_reg &tg4_offset, unsigned coord_components, unsigned grad_components) @@ -4697,9 +4699,14 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op, for (unsigned i = 0; i < ARRAY_SIZE(sources); i++) sources[i] = bld.vgrf(BRW_REGISTER_TYPE_F); + /* We must have exactly one of surface/sampler and surface/sampler_handle */ + assert((surface.file == BAD_FILE) != (surface_handle.file == BAD_FILE)); + assert((sampler.file == BAD_FILE) != (sampler_handle.file == BAD_FILE)); + if (op == SHADER_OPCODE_TG4 || op == SHADER_OPCODE_TG4_OFFSET || inst->offset != 0 || inst->eot || op == SHADER_OPCODE_SAMPLEINFO || + sampler_handle.file != BAD_FILE || is_high_sampler(devinfo, sampler)) { /* For general texture offsets (no txf workaround), we need a header to * put them in. @@ -4739,7 +4746,21 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op, ubld1.MOV(component(header, 2), brw_imm_ud(0)); } - if (is_high_sampler(devinfo, sampler)) { + if (sampler_handle.file != BAD_FILE) { + /* Bindless sampler handles aren't relative to the sampler state + * pointer passed into the shader through SAMPLER_STATE_POINTERS_*. + * Instead, it's an absolute pointer relative to dynamic state base + * address. + * + * Sampler states are 16 bytes each and the pointer we give here has + * to be 32-byte aligned. In order to avoid more indirect messages + * than required, we assume that all bindless sampler states are + * 32-byte aligned. This sacrifices a bit of general state base + * address space but means we can do something more efficient in the + * shader. + */ + ubld1.MOV(component(header, 3), sampler_handle); + } else if (is_high_sampler(devinfo, sampler)) { if (sampler.file == BRW_IMMEDIATE_VALUE) { assert(sampler.ud >= 16); const int sampler_state_size = 16; /* 16 bytes */ @@ -4942,14 +4963,42 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op, } inst->sfid = BRW_SFID_SAMPLER; - if (surface.file == IMM && sampler.file == IMM) { + if (surface.file == IMM && + (sampler.file == IMM || sampler_handle.file != BAD_FILE)) { inst->desc = brw_sampler_desc(devinfo, surface.ud + base_binding_table_index, - sampler.ud % 16, + sampler.file == IMM ? sampler.ud % 16 : 0, msg_type, simd_mode, 0 /* return_format unused on gen7+ */); inst->src[0] = brw_imm_ud(0); + inst->src[1] = brw_imm_ud(0); /* ex_desc */ + } else if (surface_handle.file != BAD_FILE) { + /* Bindless surface */ + assert(devinfo->gen >= 9); + inst->desc = brw_sampler_desc(devinfo, + GEN9_BTI_BINDLESS, + sampler.file == IMM ? sampler.ud % 16 : 0, + msg_type, + simd_mode, + 0 /* return_format unused on gen7+ */); + + /* For bindless samplers, the entire address is included in the message + * header so we can leave the portion in the message descriptor 0. + */ + if (sampler_handle.file != BAD_FILE || sampler.file == IMM) { + inst->src[0] = brw_imm_ud(0); + } else { + const fs_builder ubld = bld.group(1, 0).exec_all(); + fs_reg desc = ubld.vgrf(BRW_REGISTER_TYPE_UD); + ubld.SHL(desc, sampler, brw_imm_ud(8)); + inst->src[0] = desc; + } + + /* We assume that the driver provided the handle in the top 20 bits so + * we can use the surface handle directly as the extended descriptor. + */ + inst->src[1] = retype(surface_handle, BRW_REGISTER_TYPE_UD); } else { /* Immediate portion of the descriptor */ inst->desc = brw_sampler_desc(devinfo, @@ -4964,7 +5013,9 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op, /* This case is common in GL */ ubld.MUL(desc, surface, brw_imm_ud(0x101)); } else { - if (sampler.file == IMM) { + if (sampler_handle.file != BAD_FILE) { + ubld.MOV(desc, surface); + } else if (sampler.file == IMM) { ubld.OR(desc, surface, brw_imm_ud(sampler.ud << 8)); } else { ubld.SHL(desc, sampler, brw_imm_ud(8)); @@ -4976,8 +5027,8 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op, ubld.AND(desc, desc, brw_imm_ud(0xfff)); inst->src[0] = component(desc, 0); + inst->src[1] = brw_imm_ud(0); /* ex_desc */ } - inst->src[1] = brw_imm_ud(0); /* ex_desc */ inst->src[2] = src_payload; inst->resize_sources(3); @@ -5009,6 +5060,8 @@ lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst, opcode op) const fs_reg &mcs = inst->src[TEX_LOGICAL_SRC_MCS]; const fs_reg &surface = inst->src[TEX_LOGICAL_SRC_SURFACE]; const fs_reg &sampler = inst->src[TEX_LOGICAL_SRC_SAMPLER]; + const fs_reg &surface_handle = inst->src[TEX_LOGICAL_SRC_SURFACE_HANDLE]; + const fs_reg &sampler_handle = inst->src[TEX_LOGICAL_SRC_SAMPLER_HANDLE]; const fs_reg &tg4_offset = inst->src[TEX_LOGICAL_SRC_TG4_OFFSET]; assert(inst->src[TEX_LOGICAL_SRC_COORD_COMPONENTS].file == IMM); const unsigned coord_components = inst->src[TEX_LOGICAL_SRC_COORD_COMPONENTS].ud; @@ -5019,7 +5072,9 @@ lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst, opcode op) lower_sampler_logical_send_gen7(bld, inst, op, coordinate, shadow_c, lod, lod2, min_lod, sample_index, - mcs, surface, sampler, tg4_offset, + mcs, surface, sampler, + surface_handle, sampler_handle, + tg4_offset, coord_components, grad_components); } else if (devinfo->gen >= 5) { lower_sampler_logical_send_gen5(bld, inst, op, coordinate, diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index b6536eb7158..e8af99e1705 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -183,7 +183,8 @@ public: void emit_interpolation_setup_gen6(); void compute_sample_position(fs_reg dst, fs_reg int_sample_pos); fs_reg emit_mcs_fetch(const fs_reg &coordinate, unsigned components, - const fs_reg &sampler); + const fs_reg &texture, + const fs_reg &texture_handle); void emit_gen6_gather_wa(uint8_t wa, fs_reg dst); fs_reg resolve_source_modifiers(const fs_reg &src); void emit_discard_jump(); diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 4bf85bfb7e8..28c6e0e209a 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -3201,7 +3201,7 @@ fs_visitor::emit_non_coherent_fb_read(const fs_builder &bld, const fs_reg &dst, const fs_reg sample = nir_system_values[SYSTEM_VALUE_SAMPLE_ID]; const fs_reg mcs = wm_key->multisample_fbo ? - emit_mcs_fetch(coords, 3, brw_imm_ud(surface)) : fs_reg(); + emit_mcs_fetch(coords, 3, brw_imm_ud(surface), fs_reg()) : fs_reg(); /* Use either a normal or a CMS texel fetch message depending on whether * the framebuffer is single or multisample. On SKL+ use the wide CMS @@ -5237,6 +5237,18 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr) break; } + case nir_tex_src_texture_handle: + assert(nir_tex_instr_src_index(instr, nir_tex_src_texture_offset) == -1); + srcs[TEX_LOGICAL_SRC_SURFACE] = fs_reg(); + srcs[TEX_LOGICAL_SRC_SURFACE_HANDLE] = bld.emit_uniformize(src); + break; + + case nir_tex_src_sampler_handle: + assert(nir_tex_instr_src_index(instr, nir_tex_src_sampler_offset) == -1); + srcs[TEX_LOGICAL_SRC_SAMPLER] = fs_reg(); + srcs[TEX_LOGICAL_SRC_SAMPLER_HANDLE] = bld.emit_uniformize(src); + break; + case nir_tex_src_ms_mcs: assert(instr->op == nir_texop_txf_ms); srcs[TEX_LOGICAL_SRC_MCS] = retype(src, BRW_REGISTER_TYPE_D); @@ -5266,7 +5278,8 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr) srcs[TEX_LOGICAL_SRC_MCS] = emit_mcs_fetch(srcs[TEX_LOGICAL_SRC_COORDINATE], instr->coord_components, - srcs[TEX_LOGICAL_SRC_SURFACE]); + srcs[TEX_LOGICAL_SRC_SURFACE], + srcs[TEX_LOGICAL_SRC_SURFACE_HANDLE]); } else { srcs[TEX_LOGICAL_SRC_MCS] = brw_imm_ud(0u); } diff --git a/src/intel/compiler/brw_fs_visitor.cpp b/src/intel/compiler/brw_fs_visitor.cpp index f8e8d36360e..232693cec78 100644 --- a/src/intel/compiler/brw_fs_visitor.cpp +++ b/src/intel/compiler/brw_fs_visitor.cpp @@ -35,7 +35,8 @@ using namespace brw; /* Sample from the MCS surface attached to this multisample texture. */ fs_reg fs_visitor::emit_mcs_fetch(const fs_reg &coordinate, unsigned components, - const fs_reg &texture) + const fs_reg &texture, + const fs_reg &texture_handle) { const fs_reg dest = vgrf(glsl_type::uvec4_type); @@ -43,6 +44,7 @@ fs_visitor::emit_mcs_fetch(const fs_reg &coordinate, unsigned components, srcs[TEX_LOGICAL_SRC_COORDINATE] = coordinate; srcs[TEX_LOGICAL_SRC_SURFACE] = texture; srcs[TEX_LOGICAL_SRC_SAMPLER] = brw_imm_ud(0); + srcs[TEX_LOGICAL_SRC_SURFACE_HANDLE] = texture_handle; srcs[TEX_LOGICAL_SRC_COORD_COMPONENTS] = brw_imm_d(components); srcs[TEX_LOGICAL_SRC_GRAD_COMPONENTS] = brw_imm_d(0);