From 07e00b3040d6da381595c65db5afe597f20d99fc Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 15 Feb 2012 13:33:07 -0800 Subject: [PATCH] i965: Split the VS binding table to a separate table. This is a step toward making the samplers/binding tables reflect sampler uniform mappings instead of embedding those in the programs. No significant performance difference on the microbenchmark (n=10). Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_context.h | 34 ++++++++++--- src/mesa/drivers/dri/i965/brw_misc_state.c | 4 +- src/mesa/drivers/dri/i965/brw_state.h | 1 + src/mesa/drivers/dri/i965/brw_state_upload.c | 3 ++ src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 2 +- src/mesa/drivers/dri/i965/brw_vs.c | 5 ++ .../drivers/dri/i965/brw_vs_surface_state.c | 51 ++++++++++++++++++- .../drivers/dri/i965/brw_wm_surface_state.c | 8 +-- src/mesa/drivers/dri/i965/gen7_vs_state.c | 3 +- 9 files changed, 94 insertions(+), 17 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 98f68e7c381..44a01e69ba5 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -409,6 +409,8 @@ struct brw_vs_prog_data { bool uses_new_param_layout; bool uses_vertexid; bool userclip; + + int num_surfaces; }; @@ -468,7 +470,7 @@ struct brw_vs_ouput_sizes { * (VS, HS, DS, GS, PS), we currently share a single binding table for all of * them. This is purely for convenience. * - * Currently our binding tables are (arbitrarily) programmed as follows: + * Currently our SOL/WM binding tables are (arbitrarily) programmed as follows: * * +-------------------------------+ * | 0 | Draw buffer 0 | . @@ -476,18 +478,28 @@ struct brw_vs_ouput_sizes { * | : | : | > Only relevant to the WM. * | 7 | Draw buffer 7 | / * |-----|-------------------------| ` - * | 8 | VS Pull Constant Buffer | - * | 9 | WM Pull Constant Buffer | + * | 8 | WM Pull Constant Buffer | * |-----|-------------------------| - * | 10 | Texture 0 | + * | 9 | Texture 0 | * | . | . | * | : | : | - * | 25 | Texture 15 | + * | 24 | Texture 15 | * +-----|-------------------------+ - * | 26 | SOL Binding 0 | + * | 25 | SOL Binding 0 | + * | . | . | + * | : | : | + * | 88 | SOL Binding 63 | + * +-------------------------------+ + * + * Our VS binding tables are programmed as follows: + * + * +-----+-------------------------+ ` + * | 0 | VS Pull Constant Buffer | + * +-----+-------------------------+ + * | 1 | Texture 0 | * | . | . | * | : | : | - * | 89 | SOL Binding 63 | + * | 16 | Texture 15 | * +-------------------------------+ * * Note that nothing actually uses the SURF_INDEX_DRAW macro, so it has to be @@ -495,7 +507,6 @@ struct brw_vs_ouput_sizes { * first so we can use headerless render target writes for RT 0. */ #define SURF_INDEX_DRAW(d) (d) -#define SURF_INDEX_VERT_CONST_BUFFER (BRW_MAX_DRAW_BUFFERS + 0) #define SURF_INDEX_FRAG_CONST_BUFFER (BRW_MAX_DRAW_BUFFERS + 1) #define SURF_INDEX_TEXTURE(t) (BRW_MAX_DRAW_BUFFERS + 2 + (t)) #define SURF_INDEX_SOL_BINDING(t) (SURF_INDEX_TEXTURE(BRW_MAX_TEX_UNIT) + (t)) @@ -503,6 +514,10 @@ struct brw_vs_ouput_sizes { /** Maximum size of the binding table. */ #define BRW_MAX_SURFACES SURF_INDEX_SOL_BINDING(BRW_MAX_SOL_BINDINGS) +#define SURF_INDEX_VERT_CONST_BUFFER (0) +#define SURF_INDEX_VS_TEXTURE(t) (SURF_INDEX_VERT_CONST_BUFFER + 1 + (t)) +#define BRW_MAX_VS_SURFACES SURF_INDEX_VS_TEXTURE(BRW_MAX_TEX_UNIT) + enum brw_cache_id { BRW_BLEND_STATE, BRW_DEPTH_STENCIL_STATE, @@ -841,6 +856,9 @@ struct brw_context */ uint8_t *ra_reg_to_grf; /** @} */ + + uint32_t bind_bo_offset; + uint32_t surf_offset[BRW_MAX_VS_SURFACES]; } vs; struct { diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index 0343ae19073..7bc7e1c1025 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -77,7 +77,7 @@ static void upload_binding_table_pointers(struct brw_context *brw) BEGIN_BATCH(6); OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS << 16 | (6 - 2)); - OUT_BATCH(brw->bind.bo_offset); + OUT_BATCH(brw->vs.bind_bo_offset); OUT_BATCH(0); /* gs */ OUT_BATCH(0); /* clip */ OUT_BATCH(0); /* sf */ @@ -115,7 +115,7 @@ static void upload_gen6_binding_table_pointers(struct brw_context *brw) GEN6_BINDING_TABLE_MODIFY_GS | GEN6_BINDING_TABLE_MODIFY_PS | (4 - 2)); - OUT_BATCH(brw->bind.bo_offset); /* vs */ + OUT_BATCH(brw->vs.bind_bo_offset); /* vs */ OUT_BATCH(brw->bind.bo_offset); /* gs */ OUT_BATCH(brw->bind.bo_offset); /* wm/ps */ ADVANCE_BATCH(); diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index 2dd566538ee..59a2bb32501 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -71,6 +71,7 @@ extern const struct brw_tracked_state brw_wm_prog; extern const struct brw_tracked_state brw_renderbuffer_surfaces; extern const struct brw_tracked_state brw_texture_surfaces; extern const struct brw_tracked_state brw_binding_table; +extern const struct brw_tracked_state brw_vs_binding_table; extern const struct brw_tracked_state brw_wm_unit; extern const struct brw_tracked_state brw_psp_urb_cbs; diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index ea506950c46..28e4d26209e 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -70,6 +70,7 @@ static const struct brw_tracked_state *gen4_atoms[] = &brw_wm_pull_constants, &brw_renderbuffer_surfaces, &brw_texture_surfaces, + &brw_vs_binding_table, &brw_binding_table, &brw_samplers, @@ -146,6 +147,7 @@ static const struct brw_tracked_state *gen6_atoms[] = &gen6_renderbuffer_surfaces, &brw_texture_surfaces, &gen6_sol_surface, + &brw_vs_binding_table, &brw_binding_table, &brw_samplers, @@ -214,6 +216,7 @@ const struct brw_tracked_state *gen7_atoms[] = &brw_wm_pull_constants, &gen6_renderbuffer_surfaces, &brw_texture_surfaces, + &brw_vs_binding_table, &brw_binding_table, &gen7_samplers, diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp index f9eed61d92c..9df7b11f5ad 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -465,7 +465,7 @@ vec4_visitor::generate_tex(vec4_instruction *inst, dst, inst->base_mrf, src, - SURF_INDEX_TEXTURE(inst->sampler), + SURF_INDEX_VS_TEXTURE(inst->sampler), inst->sampler, WRITEMASK_XYZW, msg_type, diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index ca205cdf79a..bd703c7389a 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -247,6 +247,11 @@ do_vs_prog(struct brw_context *brw, brw_old_vs_emit(&c); } + if (c.prog_data.nr_pull_params) + c.prog_data.num_surfaces = 1; + if (c.vp->program.Base.SamplersUsed) + c.prog_data.num_surfaces = BRW_MAX_VS_SURFACES; + /* Scratch space is used for register spilling */ if (c.last_scratch) { c.prog_data.total_scratch = brw_get_scratch_size(c.last_scratch); diff --git a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c index 2f7b211d5ec..b29e414a54e 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c @@ -65,7 +65,7 @@ brw_upload_vs_pull_constants(struct brw_context *brw) if (brw->vs.const_bo) { drm_intel_bo_unreference(brw->vs.const_bo); brw->vs.const_bo = NULL; - brw->bind.surf_offset[SURF_INDEX_VERT_CONST_BUFFER] = 0; + brw->vs.surf_offset[SURF_INDEX_VERT_CONST_BUFFER] = 0; brw->state.dirty.brw |= BRW_NEW_VS_CONSTBUF; } return; @@ -97,7 +97,7 @@ brw_upload_vs_pull_constants(struct brw_context *brw) const int surf = SURF_INDEX_VERT_CONST_BUFFER; intel->vtbl.create_constant_surface(brw, brw->vs.const_bo, params->NumParameters, - &brw->bind.surf_offset[surf]); + &brw->vs.surf_offset[surf]); brw->state.dirty.brw |= BRW_NEW_VS_CONSTBUF; } @@ -110,3 +110,50 @@ const struct brw_tracked_state brw_vs_pull_constants = { }, .emit = brw_upload_vs_pull_constants, }; + +/** + * Constructs the binding table for the WM surface state, which maps unit + * numbers to surface state objects. + */ +static void +brw_vs_upload_binding_table(struct brw_context *brw) +{ + uint32_t *bind; + int i; + + /* CACHE_NEW_VS_PROG: Skip making a binding table if we don't use textures or + * pull constants. + */ + if (brw->vs.prog_data->num_surfaces == 0) { + if (brw->vs.bind_bo_offset != 0) { + brw->state.dirty.brw |= BRW_NEW_VS_BINDING_TABLE; + brw->vs.bind_bo_offset = 0; + } + return; + } + + /* Might want to calculate nr_surfaces first, to avoid taking up so much + * space for the binding table. + */ + bind = brw_state_batch(brw, AUB_TRACE_BINDING_TABLE, + sizeof(uint32_t) * BRW_MAX_SURFACES, + 32, &brw->vs.bind_bo_offset); + + /* BRW_NEW_SURFACES and BRW_NEW_VS_CONSTBUF */ + for (i = 0; i < BRW_MAX_VS_SURFACES; i++) { + bind[i] = brw->vs.surf_offset[i]; + } + + brw->state.dirty.brw |= BRW_NEW_VS_BINDING_TABLE; +} + +const struct brw_tracked_state brw_vs_binding_table = { + .dirty = { + .mesa = 0, + .brw = (BRW_NEW_BATCH | + BRW_NEW_VS_CONSTBUF | + BRW_NEW_SURFACES), + .cache = CACHE_NEW_VS_PROG + }, + .emit = brw_vs_upload_binding_table, +}; diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 97ae489ea96..a975b2d1c55 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -1097,6 +1097,10 @@ brw_update_texture_surfaces(struct brw_context *brw) } else { brw->bind.surf_offset[surf] = 0; } + + /* For now, just mirror the texture setup to the VS slots. */ + brw->vs.surf_offset[SURF_INDEX_VS_TEXTURE(i)] = + brw->bind.surf_offset[surf]; } brw->state.dirty.brw |= BRW_NEW_SURFACES; @@ -1128,12 +1132,11 @@ brw_upload_binding_table(struct brw_context *brw) sizeof(uint32_t) * BRW_MAX_SURFACES, 32, &brw->bind.bo_offset); - /* BRW_NEW_SURFACES and BRW_NEW_VS_CONSTBUF */ + /* BRW_NEW_SURFACES */ for (i = 0; i < BRW_MAX_SURFACES; i++) { bind[i] = brw->bind.surf_offset[i]; } - brw->state.dirty.brw |= BRW_NEW_VS_BINDING_TABLE; brw->state.dirty.brw |= BRW_NEW_PS_BINDING_TABLE; } @@ -1141,7 +1144,6 @@ const struct brw_tracked_state brw_binding_table = { .dirty = { .mesa = 0, .brw = (BRW_NEW_BATCH | - BRW_NEW_VS_CONSTBUF | BRW_NEW_SURFACES), .cache = 0 }, diff --git a/src/mesa/drivers/dri/i965/gen7_vs_state.c b/src/mesa/drivers/dri/i965/gen7_vs_state.c index a3d652cb6f7..73822e3350c 100644 --- a/src/mesa/drivers/dri/i965/gen7_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c @@ -37,9 +37,10 @@ upload_vs_state(struct brw_context *brw) gen7_emit_vs_workaround_flush(intel); + /* BRW_NEW_VS_BINDING_TABLE */ BEGIN_BATCH(2); OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_VS << 16 | (2 - 2)); - OUT_BATCH(brw->bind.bo_offset); + OUT_BATCH(brw->vs.bind_bo_offset); ADVANCE_BATCH(); /* CACHE_NEW_SAMPLER */ -- 2.30.2