From 8514eba693c9daa07284a248e1c4e5d825152c1c Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 30 Oct 2018 15:47:39 -0500 Subject: [PATCH] intel/fs: Use SHADER_OPCODE_SEND for texturing on gen7+ Reviewed-by: Iago Toral Quiroga --- src/intel/compiler/brw_fs.cpp | 138 ++++++++++++++- src/intel/compiler/brw_fs.h | 2 +- src/intel/compiler/brw_fs_generator.cpp | 162 +++--------------- .../compiler/brw_schedule_instructions.cpp | 17 ++ 4 files changed, 177 insertions(+), 142 deletions(-) diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 35e78eed7f8..d9c339b1f08 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -4551,6 +4551,66 @@ is_high_sampler(const struct gen_device_info *devinfo, const fs_reg &sampler) return sampler.file != IMM || sampler.ud >= 16; } +static unsigned +sampler_msg_type(const gen_device_info *devinfo, + opcode opcode, bool shadow_compare) +{ + assert(devinfo->gen >= 5); + switch (opcode) { + case SHADER_OPCODE_TEX: + return shadow_compare ? GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE : + GEN5_SAMPLER_MESSAGE_SAMPLE; + case FS_OPCODE_TXB: + return shadow_compare ? GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE : + GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS; + case SHADER_OPCODE_TXL: + return shadow_compare ? GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE : + GEN5_SAMPLER_MESSAGE_SAMPLE_LOD; + case SHADER_OPCODE_TXL_LZ: + return shadow_compare ? GEN9_SAMPLER_MESSAGE_SAMPLE_C_LZ : + GEN9_SAMPLER_MESSAGE_SAMPLE_LZ; + case SHADER_OPCODE_TXS: + case SHADER_OPCODE_IMAGE_SIZE: + return GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO; + case SHADER_OPCODE_TXD: + assert(!shadow_compare || devinfo->gen >= 8 || devinfo->is_haswell); + return shadow_compare ? HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE : + GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS; + case SHADER_OPCODE_TXF: + return GEN5_SAMPLER_MESSAGE_SAMPLE_LD; + case SHADER_OPCODE_TXF_LZ: + assert(devinfo->gen >= 9); + return GEN9_SAMPLER_MESSAGE_SAMPLE_LD_LZ; + case SHADER_OPCODE_TXF_CMS_W: + assert(devinfo->gen >= 9); + return GEN9_SAMPLER_MESSAGE_SAMPLE_LD2DMS_W; + case SHADER_OPCODE_TXF_CMS: + return devinfo->gen >= 7 ? GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS : + GEN5_SAMPLER_MESSAGE_SAMPLE_LD; + case SHADER_OPCODE_TXF_UMS: + assert(devinfo->gen >= 7); + return GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DSS; + case SHADER_OPCODE_TXF_MCS: + assert(devinfo->gen >= 7); + return GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS; + case SHADER_OPCODE_LOD: + return GEN5_SAMPLER_MESSAGE_LOD; + case SHADER_OPCODE_TG4: + assert(devinfo->gen >= 7); + return shadow_compare ? GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_C : + GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4; + break; + case SHADER_OPCODE_TG4_OFFSET: + assert(devinfo->gen >= 7); + return shadow_compare ? GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C : + GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO; + case SHADER_OPCODE_SAMPLEINFO: + return GEN6_SAMPLER_MESSAGE_SAMPLE_SAMPLEINFO; + default: + unreachable("not reached"); + } +} + static void lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op, const fs_reg &coordinate, @@ -4566,6 +4626,7 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op, unsigned grad_components) { const gen_device_info *devinfo = bld.shader->devinfo; + const brw_stage_prog_data *prog_data = bld.shader->stage_prog_data; unsigned reg_width = bld.dispatch_width() / 8; unsigned header_size = 0, length = 0; fs_reg sources[MAX_SAMPLER_MESSAGE_SIZE]; @@ -4792,14 +4853,81 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op, bld.LOAD_PAYLOAD(src_payload, sources, length, header_size); /* Generate the SEND. */ - inst->opcode = op; - inst->src[0] = src_payload; - inst->src[1] = surface; - inst->src[2] = sampler; - inst->resize_sources(3); + inst->opcode = SHADER_OPCODE_SEND; inst->mlen = mlen; inst->header_size = header_size; + const unsigned msg_type = + sampler_msg_type(devinfo, op, inst->shadow_compare); + const unsigned simd_mode = + inst->exec_size <= 8 ? BRW_SAMPLER_SIMD_MODE_SIMD8 : + BRW_SAMPLER_SIMD_MODE_SIMD16; + + uint32_t base_binding_table_index; + switch (op) { + case SHADER_OPCODE_TG4: + case SHADER_OPCODE_TG4_OFFSET: + base_binding_table_index = prog_data->binding_table.gather_texture_start; + break; + case SHADER_OPCODE_IMAGE_SIZE: + base_binding_table_index = prog_data->binding_table.image_start; + break; + default: + base_binding_table_index = prog_data->binding_table.texture_start; + break; + } + + inst->sfid = BRW_SFID_SAMPLER; + if (surface.file == IMM && sampler.file == IMM) { + inst->desc = brw_sampler_desc(devinfo, + surface.ud + base_binding_table_index, + sampler.ud % 16, + msg_type, + simd_mode, + 0 /* return_format unused on gen7+ */); + inst->src[0] = brw_imm_ud(0); + } else { + /* Immediate portion of the descriptor */ + inst->desc = brw_sampler_desc(devinfo, + 0, /* surface */ + 0, /* sampler */ + msg_type, + simd_mode, + 0 /* return_format unused on gen7+ */); + const fs_builder ubld = bld.group(1, 0).exec_all(); + fs_reg desc = ubld.vgrf(BRW_REGISTER_TYPE_UD); + if (surface.equals(sampler)) { + /* This case is common in GL */ + ubld.MUL(desc, surface, brw_imm_ud(0x101)); + } else { + if (sampler.file == IMM) { + ubld.OR(desc, surface, brw_imm_ud(sampler.ud << 8)); + } else { + ubld.SHL(desc, sampler, brw_imm_ud(8)); + ubld.OR(desc, desc, surface); + } + } + if (base_binding_table_index) + ubld.ADD(desc, desc, brw_imm_ud(base_binding_table_index)); + ubld.AND(desc, desc, brw_imm_ud(0xfff)); + + inst->src[0] = component(desc, 0); + } + inst->src[1] = brw_imm_ud(0); /* ex_desc */ + + inst->src[2] = src_payload; + inst->resize_sources(3); + + if (inst->eot) { + /* EOT sampler messages don't make sense to split because it would + * involve ending half of the thread early. + */ + assert(inst->group == 0); + /* We need to use SENDC for EOT sampler messages */ + inst->check_tdr = true; + inst->send_has_side_effects = true; + } + /* Message length > MAX_SAMPLER_MESSAGE_SIZE disallowed by hardware. */ assert(inst->mlen <= MAX_SAMPLER_MESSAGE_SIZE); } diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index 093a5751e2d..4e913eb8d80 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -421,7 +421,7 @@ private: void generate_barrier(fs_inst *inst, struct brw_reg src); bool generate_linterp(fs_inst *inst, struct brw_reg dst, struct brw_reg *src); - void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src, + void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg surface_index, struct brw_reg sampler_index); void generate_get_buffer_size(fs_inst *inst, struct brw_reg dst, diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp index 0c9feb63a8c..35762b43615 100644 --- a/src/intel/compiler/brw_fs_generator.cpp +++ b/src/intel/compiler/brw_fs_generator.cpp @@ -965,10 +965,11 @@ fs_generator::generate_get_buffer_size(fs_inst *inst, } void -fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src, +fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg surface_index, struct brw_reg sampler_index) { + assert(devinfo->gen < 7); assert(inst->size_written % REG_SIZE == 0); int msg_type = -1; uint32_t simd_mode; @@ -1037,71 +1038,26 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD; } break; - case SHADER_OPCODE_TXL_LZ: - assert(devinfo->gen >= 9); - if (inst->shadow_compare) { - msg_type = GEN9_SAMPLER_MESSAGE_SAMPLE_C_LZ; - } else { - msg_type = GEN9_SAMPLER_MESSAGE_SAMPLE_LZ; - } - break; case SHADER_OPCODE_TXS: - case SHADER_OPCODE_IMAGE_SIZE: msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO; break; case SHADER_OPCODE_TXD: - if (inst->shadow_compare) { - /* Gen7.5+. Otherwise, lowered in NIR */ - assert(devinfo->gen >= 8 || devinfo->is_haswell); - msg_type = HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE; - } else { - msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS; - } + assert(!inst->shadow_compare); + msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS; break; case SHADER_OPCODE_TXF: msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD; break; - case SHADER_OPCODE_TXF_LZ: - assert(devinfo->gen >= 9); - msg_type = GEN9_SAMPLER_MESSAGE_SAMPLE_LD_LZ; - break; - case SHADER_OPCODE_TXF_CMS_W: - assert(devinfo->gen >= 9); - msg_type = GEN9_SAMPLER_MESSAGE_SAMPLE_LD2DMS_W; - break; case SHADER_OPCODE_TXF_CMS: - if (devinfo->gen >= 7) - msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS; - else - msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD; - break; - case SHADER_OPCODE_TXF_UMS: - assert(devinfo->gen >= 7); - msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DSS; - break; - case SHADER_OPCODE_TXF_MCS: - assert(devinfo->gen >= 7); - msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS; + msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD; break; case SHADER_OPCODE_LOD: msg_type = GEN5_SAMPLER_MESSAGE_LOD; break; case SHADER_OPCODE_TG4: - if (inst->shadow_compare) { - assert(devinfo->gen >= 7); - msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_C; - } else { - assert(devinfo->gen >= 6); - msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4; - } - break; - case SHADER_OPCODE_TG4_OFFSET: - assert(devinfo->gen >= 7); - if (inst->shadow_compare) { - msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C; - } else { - msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO; - } + assert(devinfo->gen == 6); + assert(!inst->shadow_compare); + msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4; break; case SHADER_OPCODE_SAMPLEINFO: msg_type = GEN6_SAMPLER_MESSAGE_SAMPLE_SAMPLEINFO; @@ -1180,16 +1136,14 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src dst = vec16(dst); } - assert(devinfo->gen < 7 || inst->header_size == 0 || - src.file == BRW_GENERAL_REGISTER_FILE); - assert(sampler_index.type == BRW_REGISTER_TYPE_UD); /* Load the message header if present. If there's a texture offset, * we need to set it up explicitly and load the offset bitfield. * Otherwise, we can use an implied move from g0 to the first message reg. */ - if (inst->header_size != 0 && devinfo->gen < 7) { + struct brw_reg src = brw_null_reg(); + if (inst->header_size != 0) { if (devinfo->gen < 6 && !inst->offset) { /* Set up an implied move from g0 to the MRF. */ src = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW); @@ -1218,83 +1172,28 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src uint32_t base_binding_table_index; switch (inst->opcode) { case SHADER_OPCODE_TG4: - case SHADER_OPCODE_TG4_OFFSET: base_binding_table_index = prog_data->binding_table.gather_texture_start; break; - case SHADER_OPCODE_IMAGE_SIZE: - base_binding_table_index = prog_data->binding_table.image_start; - break; default: base_binding_table_index = prog_data->binding_table.texture_start; break; } - if (surface_index.file == BRW_IMMEDIATE_VALUE && - sampler_index.file == BRW_IMMEDIATE_VALUE) { - uint32_t surface = surface_index.ud; - uint32_t sampler = sampler_index.ud; - - brw_SAMPLE(p, - retype(dst, BRW_REGISTER_TYPE_UW), - inst->base_mrf, - src, - surface + base_binding_table_index, - sampler % 16, - msg_type, - inst->size_written / REG_SIZE, - inst->mlen, - inst->header_size != 0, - simd_mode, - return_format); - } else { - /* Non-const sampler index */ - - struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD)); - struct brw_reg surface_reg = vec1(retype(surface_index, BRW_REGISTER_TYPE_UD)); - struct brw_reg sampler_reg = vec1(retype(sampler_index, BRW_REGISTER_TYPE_UD)); - - brw_push_insn_state(p); - brw_set_default_mask_control(p, BRW_MASK_DISABLE); - brw_set_default_access_mode(p, BRW_ALIGN_1); - brw_set_default_exec_size(p, BRW_EXECUTE_1); + assert(surface_index.file == BRW_IMMEDIATE_VALUE); + assert(sampler_index.file == BRW_IMMEDIATE_VALUE); - if (brw_regs_equal(&surface_reg, &sampler_reg)) { - brw_MUL(p, addr, sampler_reg, brw_imm_uw(0x101)); - } else { - if (sampler_reg.file == BRW_IMMEDIATE_VALUE) { - brw_OR(p, addr, surface_reg, brw_imm_ud(sampler_reg.ud << 8)); - } else { - brw_SHL(p, addr, sampler_reg, brw_imm_ud(8)); - brw_OR(p, addr, addr, surface_reg); - } - } - if (base_binding_table_index) - brw_ADD(p, addr, addr, brw_imm_ud(base_binding_table_index)); - brw_AND(p, addr, addr, brw_imm_ud(0xfff)); - - brw_pop_insn_state(p); - - /* dst = send(offset, a0.0 | ) */ - brw_send_indirect_message( - p, BRW_SFID_SAMPLER, dst, src, addr, - brw_message_desc(devinfo, inst->mlen, inst->size_written / REG_SIZE, - inst->header_size) | - brw_sampler_desc(devinfo, - 0 /* surface */, - 0 /* sampler */, - msg_type, - simd_mode, - return_format)); - - /* visitor knows more than we do about the surface limit required, - * so has already done marking. - */ - } - - if (is_combined_send) { - brw_inst_set_eot(p->devinfo, brw_last_inst, true); - brw_inst_set_opcode(p->devinfo, brw_last_inst, BRW_OPCODE_SENDC); - } + brw_SAMPLE(p, + retype(dst, BRW_REGISTER_TYPE_UW), + inst->base_mrf, + src, + surface_index.ud + base_binding_table_index, + sampler_index.ud % 16, + msg_type, + inst->size_written / REG_SIZE, + inst->mlen, + inst->header_size != 0, + simd_mode, + return_format); } @@ -2170,23 +2069,14 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) case FS_OPCODE_TXB: case SHADER_OPCODE_TXD: case SHADER_OPCODE_TXF: - case SHADER_OPCODE_TXF_LZ: case SHADER_OPCODE_TXF_CMS: - case SHADER_OPCODE_TXF_CMS_W: - case SHADER_OPCODE_TXF_UMS: - case SHADER_OPCODE_TXF_MCS: case SHADER_OPCODE_TXL: - case SHADER_OPCODE_TXL_LZ: case SHADER_OPCODE_TXS: case SHADER_OPCODE_LOD: case SHADER_OPCODE_TG4: - case SHADER_OPCODE_TG4_OFFSET: case SHADER_OPCODE_SAMPLEINFO: - generate_tex(inst, dst, src[0], src[1], src[2]); - break; - - case SHADER_OPCODE_IMAGE_SIZE: - generate_tex(inst, dst, src[0], src[1], brw_imm_ud(0)); + assert(inst->src[0].file == BAD_FILE); + generate_tex(inst, dst, src[1], src[2]); break; case FS_OPCODE_DDX_COARSE: diff --git a/src/intel/compiler/brw_schedule_instructions.cpp b/src/intel/compiler/brw_schedule_instructions.cpp index f453bb42574..46d3111045d 100644 --- a/src/intel/compiler/brw_schedule_instructions.cpp +++ b/src/intel/compiler/brw_schedule_instructions.cpp @@ -416,6 +416,23 @@ schedule_node::set_latency_gen7(bool is_haswell) case SHADER_OPCODE_SEND: switch (inst->sfid) { + case BRW_SFID_SAMPLER: { + unsigned msg_type = (inst->desc >> 12) & 0x1f; + switch (msg_type) { + case GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO: + case GEN6_SAMPLER_MESSAGE_SAMPLE_SAMPLEINFO: + /* See also SHADER_OPCODE_TXS */ + latency = 100; + break; + + default: + /* See also SHADER_OPCODE_TEX */ + latency = 200; + break; + } + break; + } + case GEN6_SFID_DATAPORT_RENDER_CACHE: switch ((inst->desc >> 14) & 0x1f) { case GEN7_DATAPORT_RC_TYPED_SURFACE_WRITE: -- 2.30.2