From 7e9559c9ba4dd82aca83b08d039103e38a3f94be Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Wed, 14 Aug 2013 20:25:40 -0700 Subject: [PATCH] i965/vs: Rework binding table size calculation. Unlike the FS, the VS backend already computed the binding table size. However, it did so poorly: after compilation, it looked to see if any pull constants/textures/UBOs were in use, and set num_surfaces to the maximum surface index for that category. If the VS only used a single texture or UBO, this overcounted by quite a bit. The shader time surface was also noted at state upload time (during drawing), not at compile time, which is inefficient. I believe it also had an off by one error. This patch computes it accurately, while also simplifying the code. It also renames num_surfaces to binding_table_size, since num_surfaces wasn't actually the number of surfaces used. For example, a VS that used one UBO and no other surfaces would have set num_surfaces to SURF_INDEX_VS_UBO(1) == 18, rather than 1. A bit of a misnomer there. Signed-off-by: Kenneth Graunke Reviewed-by: Paul Berry --- src/mesa/drivers/dri/i965/brw_context.h | 2 +- src/mesa/drivers/dri/i965/brw_vec4.h | 2 ++ src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 16 ++++++++++++++++ src/mesa/drivers/dri/i965/brw_vs.c | 10 ---------- src/mesa/drivers/dri/i965/brw_vs_surface_state.c | 6 +----- 5 files changed, 20 insertions(+), 16 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 9178485cb5f..dae3219a09a 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -493,7 +493,7 @@ struct brw_vec4_prog_data { */ GLuint urb_entry_size; - int num_surfaces; + unsigned binding_table_size; /* These pointers must appear last. See brw_vec4_prog_data_compare(). */ const float **param; diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 833bef17d0e..111b1050983 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -596,6 +596,8 @@ private: void generate_unpack_flags(vec4_instruction *inst, struct brw_reg dst); + void mark_surface_used(unsigned surf_index); + struct brw_context *brw; struct gl_context *ctx; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp index 5417c82dbb2..ce9bcd08d38 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -151,6 +151,15 @@ vec4_generator::~vec4_generator() { } +void +vec4_generator::mark_surface_used(unsigned surf_index) +{ + assert(surf_index < BRW_MAX_VS_SURFACES); + + prog_data->binding_table_size = MAX2(prog_data->binding_table_size, + surf_index + 1); +} + void vec4_generator::generate_math1_gen4(vec4_instruction *inst, struct brw_reg dst, @@ -384,6 +393,8 @@ vec4_generator::generate_tex(vec4_instruction *inst, inst->header_present, BRW_SAMPLER_SIMD_MODE_SIMD4X2, return_format); + + mark_surface_used(SURF_INDEX_VS_TEXTURE(inst->sampler)); } void @@ -614,6 +625,8 @@ vec4_generator::generate_pull_constant_load(vec4_instruction *inst, 2, /* mlen */ true, /* header_present */ 1 /* rlen */); + + mark_surface_used(surf_index); } void @@ -637,6 +650,8 @@ vec4_generator::generate_pull_constant_load_gen7(vec4_instruction *inst, false, /* no header */ BRW_SAMPLER_SIMD_MODE_SIMD4X2, 0); + + mark_surface_used(surf_index.dw1.ud); } /** @@ -869,6 +884,7 @@ vec4_generator::generate_vec4_instruction(vec4_instruction *instruction, case SHADER_OPCODE_SHADER_TIME_ADD: brw_shader_time_add(p, src[0], SURF_INDEX_VS_SHADER_TIME); + mark_surface_used(SURF_INDEX_VS_SHADER_TIME); break; case VS_OPCODE_UNPACK_FLAGS_SIMD4X2: diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index 9e18ef0119a..dcd14a319df 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -306,16 +306,6 @@ do_vs_prog(struct brw_context *brw, return false; } - if (prog_data.base.nr_pull_params) - prog_data.base.num_surfaces = 1; - if (c.vp->program.Base.SamplersUsed) - prog_data.base.num_surfaces = SURF_INDEX_VS_TEXTURE(BRW_MAX_TEX_UNIT); - if (prog && - prog->_LinkedShaders[MESA_SHADER_VERTEX]->NumUniformBlocks) { - prog_data.base.num_surfaces = - SURF_INDEX_VS_UBO(prog->_LinkedShaders[MESA_SHADER_VERTEX]->NumUniformBlocks); - } - /* Scratch space is used for register spilling */ if (c.base.last_scratch) { perf_debug("Vertex shader triggered register spilling. " diff --git a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c index eaeff956769..2c2d713b98c 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c @@ -140,16 +140,12 @@ brw_vs_upload_binding_table(struct brw_context *brw) if (INTEL_DEBUG & DEBUG_SHADER_TIME) { gen7_create_shader_time_surface(brw, &brw->vs.surf_offset[SURF_INDEX_VS_SHADER_TIME]); - - assert(brw->vs.prog_data->base.num_surfaces - <= SURF_INDEX_VS_SHADER_TIME); - brw->vs.prog_data->base.num_surfaces = SURF_INDEX_VS_SHADER_TIME; } /* CACHE_NEW_VS_PROG: Skip making a binding table if we don't use textures or * pull constants. */ - if (brw->vs.prog_data->base.num_surfaces == 0) { + if (brw->vs.prog_data->base.binding_table_size == 0) { if (brw->vs.bind_bo_offset != 0) { brw->state.dirty.brw |= BRW_NEW_VS_BINDING_TABLE; brw->vs.bind_bo_offset = 0; -- 2.30.2