From 797d606127c131a6ccff28150495d2b1f3f7e46e Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Fri, 20 Feb 2015 15:11:49 -0800 Subject: [PATCH] i965: Implement SIMD16 texturing on Gen4. This allows SIMD16 mode to work for a lot more programs. Texturing is also more efficient in SIMD16 mode than SIMD8. Several messages don't actually exist in SIMD8 mode, so we did SIMD16 messages and threw away half of the data. Now we compute real data in both halves. Also, the SIMD16 "sample" message doesn't require all three coordinate components to exist (like the SIMD8 one), so we can shorten the message lengths, cutting register usage a bit. I chose to implement the visitor functionality in a separate function, since mixing true SIMD16 with SIMD8 code that uses SIMD16 fallbacks seemed like a mess. The new code bails on a few cases where we'd have to do two SIMD8 messages - we just fall back to SIMD8 for now. Improves performance in "Shadowrun: Dragonfall - Director's Cut" by about 20% on GM45 (measured with LIBGL_SHOW_FPS=1 while standing around in the first mission). v2: Add ir_txf to the has_lod case (caught by Jordan Justen). Signed-off-by: Kenneth Graunke Reviewed-by: Jordan Justen --- src/mesa/drivers/dri/i965/brw_fs.h | 4 ++ .../drivers/dri/i965/brw_fs_generator.cpp | 28 +++++--- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 68 ++++++++++++++++++- 3 files changed, 90 insertions(+), 10 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 278a8eed76d..cfdbf555d62 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -271,6 +271,10 @@ public: fs_reg shadow_comp, fs_reg lod, fs_reg lod2, int grad_components, uint32_t sampler); + fs_inst *emit_texture_gen4_simd16(ir_texture_opcode op, fs_reg dst, + fs_reg coordinate, int vector_elements, + fs_reg shadow_c, fs_reg lod, + uint32_t sampler); fs_inst *emit_texture_gen5(ir_texture_opcode op, fs_reg dst, fs_reg coordinate, int coord_components, fs_reg shadow_comp, diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index 40e51aa0f82..2743297b053 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -622,16 +622,26 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src /* Note that G45 and older determines shadow compare and dispatch width * from message length for most messages. */ - assert(dispatch_width == 8); - msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE; - if (inst->shadow_compare) { - assert(inst->mlen == 6); - } else { - assert(inst->mlen <= 4); - } + if (dispatch_width == 8) { + msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE; + if (inst->shadow_compare) { + assert(inst->mlen == 6); + } else { + assert(inst->mlen <= 4); + } + } else { + if (inst->shadow_compare) { + msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE; + assert(inst->mlen == 9); + } else { + msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE; + assert(inst->mlen <= 7 && inst->mlen % 2 == 1); + } + } break; case FS_OPCODE_TXB: if (inst->shadow_compare) { + assert(dispatch_width == 8); assert(inst->mlen == 6); msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE; } else { @@ -642,6 +652,7 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src break; case SHADER_OPCODE_TXL: if (inst->shadow_compare) { + assert(dispatch_width == 8); assert(inst->mlen == 6); msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE; } else { @@ -652,11 +663,12 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src break; case SHADER_OPCODE_TXD: /* There is no sample_d_c message; comparisons are done manually */ + assert(dispatch_width == 8); assert(inst->mlen == 7 || inst->mlen == 10); msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS; break; case SHADER_OPCODE_TXF: - assert(inst->mlen == 9); + assert(inst->mlen <= 9 && inst->mlen % 2 == 1); msg_type = BRW_SAMPLER_MESSAGE_SIMD16_LD; simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16; break; diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index c538dcff42a..3622e651818 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -1433,8 +1433,6 @@ fs_visitor::emit_texture_gen4(ir_texture_opcode op, fs_reg dst, bool simd16 = false; fs_reg orig_dst; - no16("SIMD16 texturing on Gen4 not supported yet."); - /* g0 header. */ mlen = 1; @@ -1586,6 +1584,69 @@ fs_visitor::emit_texture_gen4(ir_texture_opcode op, fs_reg dst, return inst; } +fs_inst * +fs_visitor::emit_texture_gen4_simd16(ir_texture_opcode op, fs_reg dst, + fs_reg coordinate, int vector_elements, + fs_reg shadow_c, fs_reg lod, + uint32_t sampler) +{ + fs_reg message(MRF, 2, BRW_REGISTER_TYPE_F, dispatch_width); + bool has_lod = op == ir_txl || op == ir_txb || op == ir_txf; + + if (has_lod && shadow_c.file != BAD_FILE) + no16("TXB and TXL with shadow comparison unsupported in SIMD16."); + + if (op == ir_txd) + no16("textureGrad unsupported in SIMD16."); + + /* Copy the coordinates. */ + for (int i = 0; i < vector_elements; i++) { + emit(MOV(retype(offset(message, i), coordinate.type), coordinate)); + coordinate = offset(coordinate, 1); + } + + fs_reg msg_end = offset(message, vector_elements); + + /* Messages other than sample and ld require all three components */ + if (has_lod || shadow_c.file != BAD_FILE) { + for (int i = vector_elements; i < 3; i++) { + emit(MOV(offset(message, i), fs_reg(0.0f))); + } + } + + if (has_lod) { + fs_reg msg_lod = retype(offset(message, 3), op == ir_txf ? + BRW_REGISTER_TYPE_UD : BRW_REGISTER_TYPE_F); + emit(MOV(msg_lod, lod)); + msg_end = offset(msg_lod, 1); + } + + if (shadow_c.file != BAD_FILE) { + fs_reg msg_ref = offset(message, 3 + has_lod); + emit(MOV(msg_ref, shadow_c)); + msg_end = offset(msg_ref, 1); + } + + enum opcode opcode; + switch (op) { + case ir_tex: opcode = SHADER_OPCODE_TEX; break; + case ir_txb: opcode = FS_OPCODE_TXB; break; + case ir_txd: opcode = SHADER_OPCODE_TXD; break; + case ir_txl: opcode = SHADER_OPCODE_TXL; break; + case ir_txs: opcode = SHADER_OPCODE_TXS; break; + case ir_txf: opcode = SHADER_OPCODE_TXF; break; + default: unreachable("not reached"); + } + + fs_inst *inst = emit(opcode, dst, reg_undef, fs_reg(sampler)); + inst->base_mrf = message.reg - 1; + inst->mlen = msg_end.reg - inst->base_mrf; + inst->header_present = true; + inst->regs_written = 8; + + return inst; +} + /* gen5's sampler has slots for u, v, r, array index, then optional * parameters like shadow comparitor or LOD bias. If optional * parameters aren't present, those base slots are optional and don't @@ -2148,6 +2209,9 @@ fs_visitor::emit_texture(ir_texture_opcode op, shadow_c, lod, lod2, grad_components, sample_index, sampler, offset_value.file != BAD_FILE); + } else if (dispatch_width == 16) { + inst = emit_texture_gen4_simd16(op, dst, coordinate, coord_components, + shadow_c, lod, sampler); } else { inst = emit_texture_gen4(op, dst, coordinate, coord_components, shadow_c, lod, lod2, grad_components, -- 2.30.2