From 85e8e9e000732908b259a7e2cbc1724a1be2d447 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Fri, 24 Aug 2012 01:22:58 -0700 Subject: [PATCH] i965: Use linker-assigned sampler IDs in instruction encoding. When assigning uniform locations, the linker assigns each sampler uniform a sequential numerical ID. gl_shader_program::SamplerUnits maps these sampler variable IDs to the actual texture units they reference (specified via glUniform1i). Previously, we encoded this mapping in the SEND instruction encoding: the "sampler" was the texture unit number, and the binding table index was SURF_INDEX_TEXTURE(the texture unit number). This unfortunately meant that whenever the application changed the value of a sampler uniform, we had to recompile the shader to change the SEND instructions. This was horrible for the game Cogs, which repeatedly switches between using texture unit 0 and 1. It also made fragment shader precompiles useless: we'd do the precompile at glLinkShader() time, before the application called glUniform1i to set the sampler values. As soon as it did that, we'd have to recompile, wasting time and space in the program cache. This patch encodes the SamplerUnits indirection in the binding table, sampler state, and sampler default color tables. Instead of baking the texture unit number into the shader, we bake in the sampler variable ID assigned by the linker. Since those never change, we don't need to recompile programs on uniform changes. This does mean that the tables now depend on the linked shader program being used for rendering, rather than simply representing all available texture units. This could cause an increase in state emission. Another plus is that the sampler state and sampler default color tables are now compact: we only emit as many entries as there are sampler uniforms, with no holes in the table since the new sampler IDs are sequential. Previously we had to emit a full 16 entries every time, since the tables tracked the state of all active texture units. Signed-off-by: Kenneth Graunke Acked-by: Paul Berry Reviewed-by: Eric Anholt --- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 2 +- .../drivers/dri/i965/brw_vec4_visitor.cpp | 2 +- .../drivers/dri/i965/brw_wm_sampler_state.c | 27 ++++++---- .../drivers/dri/i965/brw_wm_surface_state.c | 51 ++++++++++++++----- .../drivers/dri/i965/gen7_sampler_state.c | 27 ++++++---- 5 files changed, 74 insertions(+), 35 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 601f83c073a..e3d3a98bbdf 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -1345,7 +1345,7 @@ fs_visitor::visit(ir_texture *ir) if (ir->offset != NULL && ir->op != ir_txf) inst->texture_offset = brw_texture_offset(ir->offset->as_constant()); - inst->sampler = texunit; + inst->sampler = sampler; if (ir->shadow_comparitor) inst->shadow_compare = true; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index c22ba6067cc..629ecb0b6ea 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1897,7 +1897,7 @@ vec4_visitor::visit(ir_texture *ir) inst->header_present = ir->offset || intel->gen < 5; inst->base_mrf = 2; inst->mlen = inst->header_present + 1; /* always at least one */ - inst->sampler = texunit; + inst->sampler = sampler; inst->dst = dst_reg(this, ir->type); inst->shadow_compare = ir->shadow_comparitor != NULL; diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c index 10deb1da0fb..610ef345f42 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c @@ -334,13 +334,14 @@ brw_upload_samplers(struct brw_context *brw) { struct gl_context *ctx = &brw->intel.ctx; struct brw_sampler_state *samplers; - int i; - brw->sampler.count = 0; - for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { - if (ctx->Texture.Unit[i]._ReallyEnabled) - brw->sampler.count = i + 1; - } + /* BRW_NEW_VERTEX_PROGRAM and BRW_NEW_FRAGMENT_PROGRAM */ + struct gl_program *vs = (struct gl_program *) brw->vertex_program; + struct gl_program *fs = (struct gl_program *) brw->fragment_program; + + GLbitfield SamplersUsed = vs->SamplersUsed | fs->SamplersUsed; + + brw->sampler.count = _mesa_bitcount(SamplersUsed); if (brw->sampler.count == 0) return; @@ -350,9 +351,13 @@ brw_upload_samplers(struct brw_context *brw) 32, &brw->sampler.offset); memset(samplers, 0, brw->sampler.count * sizeof(*samplers)); - for (i = 0; i < brw->sampler.count; i++) { - if (ctx->Texture.Unit[i]._ReallyEnabled) - brw_update_sampler_state(brw, i, i, &samplers[i]); + for (unsigned s = 0; s < brw->sampler.count; s++) { + if (SamplersUsed & (1 << s)) { + const unsigned unit = (fs->SamplersUsed & (1 << s)) ? + fs->SamplerUnits[s] : vs->SamplerUnits[s]; + if (ctx->Texture.Unit[unit]._ReallyEnabled) + brw_update_sampler_state(brw, unit, s, &samplers[s]); + } } brw->state.dirty.cache |= CACHE_NEW_SAMPLER; @@ -361,7 +366,9 @@ brw_upload_samplers(struct brw_context *brw) const struct brw_tracked_state brw_samplers = { .dirty = { .mesa = _NEW_TEXTURE, - .brw = BRW_NEW_BATCH, + .brw = BRW_NEW_BATCH | + BRW_NEW_VERTEX_PROGRAM | + BRW_NEW_FRAGMENT_PROGRAM, .cache = 0 }, .emit = brw_upload_samplers, diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 0c87b841d71..eefa427f1a5 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -1238,22 +1238,45 @@ const struct brw_tracked_state gen6_renderbuffer_surfaces = { static void brw_update_texture_surfaces(struct brw_context *brw) { - struct gl_context *ctx = &brw->intel.ctx; + struct intel_context *intel = &brw->intel; + struct gl_context *ctx = &intel->ctx; + + /* BRW_NEW_VERTEX_PROGRAM and BRW_NEW_FRAGMENT_PROGRAM: + * Unfortunately, we're stuck using the gl_program structs until the + * ARB_fragment_program front-end gets converted to GLSL IR. These + * have the downside that SamplerUnits is split and only contains the + * mappings for samplers active in that stage. + */ + struct gl_program *vs = (struct gl_program *) brw->vertex_program; + struct gl_program *fs = (struct gl_program *) brw->fragment_program; - for (unsigned i = 0; i < BRW_MAX_TEX_UNIT; i++) { - const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i]; - const GLuint surf = SURF_INDEX_TEXTURE(i); + unsigned num_samplers = _mesa_bitcount(vs->SamplersUsed | fs->SamplersUsed); - /* _NEW_TEXTURE */ - if (texUnit->_ReallyEnabled) { - brw->intel.vtbl.update_texture_surface(ctx, i, brw->wm.surf_offset, surf); - } else { - brw->wm.surf_offset[surf] = 0; + for (unsigned s = 0; s < num_samplers; s++) { + brw->vs.surf_offset[SURF_INDEX_VS_TEXTURE(s)] = 0; + brw->wm.surf_offset[SURF_INDEX_TEXTURE(s)] = 0; + + if (vs->SamplersUsed & (1 << s)) { + const unsigned unit = vs->SamplerUnits[s]; + + /* _NEW_TEXTURE */ + if (ctx->Texture.Unit[unit]._ReallyEnabled) { + intel->vtbl.update_texture_surface(ctx, unit, + brw->vs.surf_offset, + SURF_INDEX_VS_TEXTURE(s)); + } } - /* For now, just mirror the texture setup to the VS slots. */ - brw->vs.surf_offset[SURF_INDEX_VS_TEXTURE(i)] = - brw->wm.surf_offset[surf]; + if (fs->SamplersUsed & (1 << s)) { + const unsigned unit = fs->SamplerUnits[s]; + + /* _NEW_TEXTURE */ + if (ctx->Texture.Unit[unit]._ReallyEnabled) { + intel->vtbl.update_texture_surface(ctx, unit, + brw->wm.surf_offset, + SURF_INDEX_TEXTURE(s)); + } + } } brw->state.dirty.brw |= BRW_NEW_SURFACES; @@ -1262,7 +1285,9 @@ brw_update_texture_surfaces(struct brw_context *brw) const struct brw_tracked_state brw_texture_surfaces = { .dirty = { .mesa = _NEW_TEXTURE, - .brw = BRW_NEW_BATCH, + .brw = BRW_NEW_BATCH | + BRW_NEW_VERTEX_PROGRAM | + BRW_NEW_FRAGMENT_PROGRAM, .cache = 0 }, .emit = brw_update_texture_surfaces, diff --git a/src/mesa/drivers/dri/i965/gen7_sampler_state.c b/src/mesa/drivers/dri/i965/gen7_sampler_state.c index c3b67e85140..ea634a9be09 100644 --- a/src/mesa/drivers/dri/i965/gen7_sampler_state.c +++ b/src/mesa/drivers/dri/i965/gen7_sampler_state.c @@ -188,13 +188,14 @@ gen7_upload_samplers(struct brw_context *brw) { struct gl_context *ctx = &brw->intel.ctx; struct gen7_sampler_state *samplers; - int i; - brw->sampler.count = 0; - for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { - if (ctx->Texture.Unit[i]._ReallyEnabled) - brw->sampler.count = i + 1; - } + /* BRW_NEW_VERTEX_PROGRAM and BRW_NEW_FRAGMENT_PROGRAM */ + struct gl_program *vs = (struct gl_program *) brw->vertex_program; + struct gl_program *fs = (struct gl_program *) brw->fragment_program; + + GLbitfield SamplersUsed = vs->SamplersUsed | fs->SamplersUsed; + + brw->sampler.count = _mesa_bitcount(SamplersUsed); if (brw->sampler.count == 0) return; @@ -204,9 +205,13 @@ gen7_upload_samplers(struct brw_context *brw) 32, &brw->sampler.offset); memset(samplers, 0, brw->sampler.count * sizeof(*samplers)); - for (i = 0; i < brw->sampler.count; i++) { - if (ctx->Texture.Unit[i]._ReallyEnabled) - gen7_update_sampler_state(brw, i, i, &samplers[i]); + for (unsigned s = 0; s < brw->sampler.count; s++) { + if (SamplersUsed & (1 << s)) { + const unsigned unit = (fs->SamplersUsed & (1 << s)) ? + fs->SamplerUnits[s] : vs->SamplerUnits[s]; + if (ctx->Texture.Unit[unit]._ReallyEnabled) + gen7_update_sampler_state(brw, unit, s, &samplers[s]); + } } brw->state.dirty.cache |= CACHE_NEW_SAMPLER; @@ -215,7 +220,9 @@ gen7_upload_samplers(struct brw_context *brw) const struct brw_tracked_state gen7_samplers = { .dirty = { .mesa = _NEW_TEXTURE, - .brw = BRW_NEW_BATCH, + .brw = BRW_NEW_BATCH | + BRW_NEW_VERTEX_PROGRAM | + BRW_NEW_FRAGMENT_PROGRAM, .cache = 0 }, .emit = gen7_upload_samplers, -- 2.30.2