From 942a4ec18f7e713b962e7acc451fbd50ff9c4042 Mon Sep 17 00:00:00 2001 From: Chris Forbes Date: Sun, 31 Mar 2013 21:33:51 +1300 Subject: [PATCH] i965/fs: Add support for ir_tg4 Lowers ir_tg4 (from textureGather and textureGatherOffset builtins) to SHADER_OPCODE_TG4. The usual post-sampling swizzle workaround can't work for ir_tg4, so avoid doing that: * For R/G/B/A swizzles use the hardware channel select (lives in the same dword in the header as the texel offset), and then don't do anything afterward in the shader. * For 0/1 swizzles blast the appropriate constant over all the output channels instead of sampling. V2: Avoid duplicating header enabling block V3: Avoid sampling at all, for degenerate swizzles. Signed-off-by: Chris Forbes Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_fs.h | 1 + src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 62 +++++++++++++++++++- 2 files changed, 60 insertions(+), 3 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 6d93132cd05..cf6379c26a1 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -213,6 +213,7 @@ public: void visit(ir_emit_vertex *); void visit(ir_end_primitive *); + uint32_t gather_channel(ir_texture *ir, int sampler); void swizzle_result(ir_texture *ir, fs_reg orig_val, int sampler); bool can_do_source_mods(fs_inst *inst); diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 89d410219af..262cead1cce 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -1167,6 +1167,12 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate, case ir_lod: inst = emit(SHADER_OPCODE_LOD, dst); break; + case ir_tg4: + inst = emit(SHADER_OPCODE_TG4, dst); + break; + default: + fail("unrecognized texture opcode"); + break; } inst->base_mrf = base_mrf; inst->mlen = mlen; @@ -1191,9 +1197,12 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, bool header_present = false; int offsets[3]; - if (ir->offset && ir->op != ir_txf) { - /* The offsets set up by the ir_texture visitor are in the + if (ir->op == ir_tg4 || (ir->offset && ir->op != ir_txf)) { + /* * The offsets set up by the ir_texture visitor are in the * m1 header, so we can't go headerless. + * + * * ir4_tg4 needs to place its channel select in the header, + * for interaction with ARB_texture_swizzle */ header_present = true; mlen++; @@ -1209,6 +1218,7 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, switch (ir->op) { case ir_tex: case ir_lod: + case ir_tg4: break; case ir_txb: emit(MOV(fs_reg(MRF, base_mrf + mlen), lod)); @@ -1323,6 +1333,7 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, case ir_txf_ms: inst = emit(SHADER_OPCODE_TXF_MS, dst); break; case ir_txs: inst = emit(SHADER_OPCODE_TXS, dst); break; case ir_lod: inst = emit(SHADER_OPCODE_LOD, dst); break; + case ir_tg4: inst = emit(SHADER_OPCODE_TG4, dst); break; } inst->base_mrf = base_mrf; inst->mlen = mlen; @@ -1450,6 +1461,24 @@ fs_visitor::visit(ir_texture *ir) */ int texunit = fp->Base.SamplerUnits[sampler]; + if (ir->op == ir_tg4) { + /* When tg4 is used with the degenerate ZERO/ONE swizzles, don't bother + * emitting anything other than setting up the constant result. + */ + int swiz = GET_SWZ(c->key.tex.swizzles[sampler], 0); + if (swiz == SWIZZLE_ZERO || swiz == SWIZZLE_ONE) { + + fs_reg res = fs_reg(this, glsl_type::vec4_type); + this->result = res; + + for (int i=0; i<4; i++) { + emit(MOV(res, fs_reg(swiz == SWIZZLE_ZERO ? 0.0f : 1.0f))); + res.reg_offset++; + } + return; + } + } + /* Should be lowered by do_lower_texture_projection */ assert(!ir->projector); @@ -1477,6 +1506,7 @@ fs_visitor::visit(ir_texture *ir) switch (ir->op) { case ir_tex: case ir_lod: + case ir_tg4: break; case ir_txb: ir->lod_info.bias->accept(this); @@ -1499,6 +1529,8 @@ fs_visitor::visit(ir_texture *ir) ir->lod_info.sample_index->accept(this); sample_index = this->result; break; + default: + assert(!"Unrecognized texture opcode"); }; /* Writemasking doesn't eliminate channels on SIMD8 texture @@ -1523,6 +1555,9 @@ fs_visitor::visit(ir_texture *ir) if (ir->offset != NULL && ir->op != ir_txf) inst->texture_offset = brw_texture_offset(ir->offset->as_constant()); + if (ir->op == ir_tg4) + inst->texture_offset |= gather_channel(ir, sampler) << 16; // M0.2:16-17 + inst->sampler = sampler; if (ir->shadow_comparitor) @@ -1542,6 +1577,24 @@ fs_visitor::visit(ir_texture *ir) swizzle_result(ir, dst, sampler); } +/** + * Set up the gather channel based on the swizzle, for gather4. + */ +uint32_t +fs_visitor::gather_channel(ir_texture *ir, int sampler) +{ + int swiz = GET_SWZ(c->key.tex.swizzles[sampler], 0 /* red */); + switch (swiz) { + case SWIZZLE_X: return 0; + case SWIZZLE_Y: return 1; + case SWIZZLE_Z: return 2; + case SWIZZLE_W: return 3; + default: + assert(!"Not reached"); /* zero, one swizzles handled already */ + return 0; + } +} + /** * Swizzle the result of a texture result. This is necessary for * EXT_texture_swizzle as well as DEPTH_TEXTURE_MODE for shadow comparisons. @@ -1551,7 +1604,10 @@ fs_visitor::swizzle_result(ir_texture *ir, fs_reg orig_val, int sampler) { this->result = orig_val; - if (ir->op == ir_txs || ir->op == ir_lod) + /* txs,lod don't actually sample the texture, so swizzling the result + * makes no sense. + */ + if (ir->op == ir_txs || ir->op == ir_lod || ir->op == ir_tg4) return; if (ir->type == glsl_type::float_type) { -- 2.30.2