From: Neil Roberts Date: Tue, 8 Sep 2015 14:52:09 +0000 (+0100) Subject: i965/fs/skl+: Use ld2dms_w instead of ld2dms X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=e386fb0dee40d0f2342b43b6750b64c8174463a9;p=mesa.git i965/fs/skl+: Use ld2dms_w instead of ld2dms In order to support 16x MSAA, skl+ has a wider version of ld2dms that takes two parameters for the MCS data. The MCS data retrieved from the ld_mcs instruction already returns 4 or 8 registers and is documented to return zeroes for the mcsh value when the sample count is less than 16. v2: Use get_lowered_simd_width to fall back to SIMD8 instructions when the message length would be too long in SIMD16. Reviewed-by: Ben Widawsky --- diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 6433cffc919..0396e13d0c2 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -964,6 +964,8 @@ enum opcode { FS_OPCODE_TXB_LOGICAL, SHADER_OPCODE_TXF_CMS, SHADER_OPCODE_TXF_CMS_LOGICAL, + SHADER_OPCODE_TXF_CMS_W, + SHADER_OPCODE_TXF_CMS_W_LOGICAL, SHADER_OPCODE_TXF_UMS, SHADER_OPCODE_TXF_UMS_LOGICAL, SHADER_OPCODE_TXF_MCS, @@ -1539,6 +1541,7 @@ enum brw_message_target { #define GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO 17 #define GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C 18 #define HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE 20 +#define GEN9_SAMPLER_MESSAGE_SAMPLE_LD2DMS_W 28 #define GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS 29 #define GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS 30 #define GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DSS 31 diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c index df747107188..fd93beaec19 100644 --- a/src/mesa/drivers/dri/i965/brw_disasm.c +++ b/src/mesa/drivers/dri/i965/brw_disasm.c @@ -622,6 +622,7 @@ static const char *const gen5_sampler_msg_type[] = { [GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO] = "gather4_po", [GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C] = "gather4_po_c", [HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE] = "sample_d_c", + [GEN9_SAMPLER_MESSAGE_SAMPLE_LD2DMS_W] = "ld2dms_w", [GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS] = "ld_mcs", [GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS] = "ld2dms", [GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DSS] = "ld2dss", diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 4cc962613b3..f5294195656 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -717,6 +717,7 @@ fs_inst::components_read(unsigned i) const case SHADER_OPCODE_TXS_LOGICAL: case FS_OPCODE_TXB_LOGICAL: case SHADER_OPCODE_TXF_CMS_LOGICAL: + case SHADER_OPCODE_TXF_CMS_W_LOGICAL: case SHADER_OPCODE_TXF_UMS_LOGICAL: case SHADER_OPCODE_TXF_MCS_LOGICAL: case SHADER_OPCODE_LOD_LOGICAL: @@ -732,6 +733,9 @@ fs_inst::components_read(unsigned i) const /* Texture offset. */ else if (i == 7) return 2; + /* MCS */ + else if (i == 5 && opcode == SHADER_OPCODE_TXF_CMS_W_LOGICAL) + return 2; else return 1; @@ -896,6 +900,7 @@ fs_visitor::implied_mrf_writes(fs_inst *inst) case SHADER_OPCODE_TXD: case SHADER_OPCODE_TXF: case SHADER_OPCODE_TXF_CMS: + case SHADER_OPCODE_TXF_CMS_W: case SHADER_OPCODE_TXF_MCS: case SHADER_OPCODE_TG4: case SHADER_OPCODE_TG4_OFFSET: @@ -3920,17 +3925,31 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op, coordinate_done = true; break; case SHADER_OPCODE_TXF_CMS: + case SHADER_OPCODE_TXF_CMS_W: case SHADER_OPCODE_TXF_UMS: case SHADER_OPCODE_TXF_MCS: - if (op == SHADER_OPCODE_TXF_UMS || op == SHADER_OPCODE_TXF_CMS) { + if (op == SHADER_OPCODE_TXF_UMS || + op == SHADER_OPCODE_TXF_CMS || + op == SHADER_OPCODE_TXF_CMS_W) { bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), sample_index); length++; } - if (op == SHADER_OPCODE_TXF_CMS) { + if (op == SHADER_OPCODE_TXF_CMS || op == SHADER_OPCODE_TXF_CMS_W) { /* Data from the multisample control surface. */ bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), mcs); length++; + + /* On Gen9+ we'll use ld2dms_w instead which has two registers for + * the MCS data. + */ + if (op == SHADER_OPCODE_TXF_CMS_W) { + bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), + mcs.file == IMM ? + mcs : + offset(mcs, bld, 1)); + length++; + } } /* There is no offsetting for this message; just copy in the integer @@ -4144,6 +4163,10 @@ fs_visitor::lower_logical_sends() lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TXF_CMS); break; + case SHADER_OPCODE_TXF_CMS_W_LOGICAL: + lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TXF_CMS_W); + break; + case SHADER_OPCODE_TXF_UMS_LOGICAL: lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TXF_UMS); break; @@ -4336,6 +4359,21 @@ get_lowered_simd_width(const struct brw_device_info *devinfo, else return inst->exec_size; + case SHADER_OPCODE_TXF_CMS_W_LOGICAL: { + /* This opcode can take up to 6 arguments which means that in some + * circumstances it can end up with a message that is too long in SIMD16 + * mode. + */ + const unsigned coord_components = inst->src[8].fixed_hw_reg.dw1.ud; + /* First three arguments are the sample index and the two arguments for + * the MCS data. + */ + if ((coord_components + 3) * 2 > MAX_SAMPLER_MESSAGE_SIZE) + return 8; + else + return inst->exec_size; + } + case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL: case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL: case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL: diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index e207a77fdc1..28fb620279b 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -741,6 +741,10 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src case SHADER_OPCODE_TXF: msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD; break; + case SHADER_OPCODE_TXF_CMS_W: + assert(devinfo->gen >= 9); + msg_type = GEN9_SAMPLER_MESSAGE_SAMPLE_LD2DMS_W; + break; case SHADER_OPCODE_TXF_CMS: if (devinfo->gen >= 7) msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS; @@ -2050,6 +2054,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) case SHADER_OPCODE_TXD: case SHADER_OPCODE_TXF: case SHADER_OPCODE_TXF_CMS: + case SHADER_OPCODE_TXF_CMS_W: case SHADER_OPCODE_TXF_UMS: case SHADER_OPCODE_TXF_MCS: case SHADER_OPCODE_TXL: diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index ef92098286c..94a9c1b68f2 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -208,8 +208,8 @@ fs_visitor::emit_mcs_fetch(const fs_reg &coordinate, unsigned components, fs_inst *inst = bld.emit(SHADER_OPCODE_TXF_MCS_LOGICAL, dest, srcs, ARRAY_SIZE(srcs)); - /* We only care about one reg of response, but the sampler always writes - * 4/8. + /* We only care about one or two regs of response, but the sampler always + * writes 4/8. */ inst->regs_written = 4 * dispatch_width / 8; @@ -295,7 +295,10 @@ fs_visitor::emit_texture(ir_texture_opcode op, opcode = SHADER_OPCODE_TXF_LOGICAL; break; case ir_txf_ms: - opcode = SHADER_OPCODE_TXF_CMS_LOGICAL; + if (devinfo->gen >= 9) + opcode = SHADER_OPCODE_TXF_CMS_W_LOGICAL; + else + opcode = SHADER_OPCODE_TXF_CMS_LOGICAL; break; case ir_txs: case ir_query_levels: diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 4ea297ade4c..0312024ed1b 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -351,6 +351,10 @@ brw_instruction_name(enum opcode op) return "txf_cms"; case SHADER_OPCODE_TXF_CMS_LOGICAL: return "txf_cms_logical"; + case SHADER_OPCODE_TXF_CMS_W: + return "txf_cms_w"; + case SHADER_OPCODE_TXF_CMS_W_LOGICAL: + return "txf_cms_w_logical"; case SHADER_OPCODE_TXF_UMS: return "txf_ums"; case SHADER_OPCODE_TXF_UMS_LOGICAL: @@ -787,6 +791,7 @@ backend_instruction::is_tex() const opcode == SHADER_OPCODE_TXD || opcode == SHADER_OPCODE_TXF || opcode == SHADER_OPCODE_TXF_CMS || + opcode == SHADER_OPCODE_TXF_CMS_W || opcode == SHADER_OPCODE_TXF_UMS || opcode == SHADER_OPCODE_TXF_MCS || opcode == SHADER_OPCODE_TXL ||