From: Kenneth Graunke Date: Sun, 30 Oct 2011 23:03:13 +0000 (-0700) Subject: i965: Use a single binding table for all pipeline stages. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=e7c29c5de82f6de3d30ed1143d9672dd2e25f0e7;p=mesa.git i965: Use a single binding table for all pipeline stages. Although the hardware supports separate binding tables for each pipeline stage, we don't see much advantage over a single shared table. Consider the contents of the binding table: - Textures (16) - Draw buffers (8) - Pull constant buffers (1 for VS, 1 for WM) OpenGL's texture bindings are global: the same set of textures is available to all shader targets. So our binding table entries for textures would be exactly the same in every table. There are only two pull constant buffers (not many), and although draw buffers aren't interesting to the VS, it shouldn't hurt to have them in the table. The hardware supports up to 254 binding table entries, and we currently only use 26. Signed-off-by: Kenneth Graunke Reviewed-by: Eric Anholt Reviewed-by: Paul Berry --- diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index c889e54021f..5d70345e668 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -404,31 +404,48 @@ struct brw_vs_ouput_sizes { #define BRW_MAX_DRAW_BUFFERS 8 /** - * Size of our surface binding table for the WM. - * This contains pointers to the drawing surfaces and current texture - * objects and shader constant buffers (+2). - */ -#define BRW_WM_MAX_SURF (BRW_MAX_DRAW_BUFFERS + BRW_MAX_TEX_UNIT + 1) - -/** - * Helpers to convert drawing buffers, textures and constant buffers - * to surface binding table indexes, for WM. + * Helpers to create Surface Binding Table indexes for draw buffers, + * textures, and constant buffers. + * + * Shader threads access surfaces via numeric handles, rather than directly + * using pointers. The binding table maps these numeric handles to the + * address of the actual buffer. + * + * For example, a shader might ask to sample from "surface 7." In this case, + * bind[7] would contain a pointer to a texture. + * + * Although the hardware supports separate binding tables per pipeline stage + * (VS, HS, DS, GS, PS), we currently share a single binding table for all of + * them. This is purely for convenience. + * + * Currently our binding tables are (arbitrarily) programmed as follows: + * + * +-------------------------------+ + * | 0 | Draw buffer 0 | . + * | . | . | \ + * | : | : | > Only relevant to the WM. + * | 7 | Draw buffer 7 | / + * |-----|-------------------------| ` + * | 8 | VS Pull Constant Buffer | + * | 9 | WM Pull Constant Buffer | + * |-----|-------------------------| + * | 10 | Texture 0 | + * | . | . | + * | : | : | + * | 25 | Texture 15 | + * +-------------------------------+ + * + * Note that nothing actually uses the SURF_INDEX_DRAW macro, so it has to be + * the identity function or things will break. We do want to keep draw buffers + * first so we can use headerless render target writes for RT 0. */ #define SURF_INDEX_DRAW(d) (d) -#define SURF_INDEX_FRAG_CONST_BUFFER (BRW_MAX_DRAW_BUFFERS) -#define SURF_INDEX_TEXTURE(t) (BRW_MAX_DRAW_BUFFERS + 1 + (t)) - -/** - * Size of surface binding table for the VS. - * Only one constant buffer for now. - */ -#define BRW_VS_MAX_SURF 1 - -/** - * Only a VS constant buffer - */ -#define SURF_INDEX_VERT_CONST_BUFFER 0 +#define SURF_INDEX_VERT_CONST_BUFFER (BRW_MAX_DRAW_BUFFERS + 0) +#define SURF_INDEX_FRAG_CONST_BUFFER (BRW_MAX_DRAW_BUFFERS + 1) +#define SURF_INDEX_TEXTURE(t) (BRW_MAX_DRAW_BUFFERS + 2 + (t)) +/** Maximum size of the binding table. */ +#define BRW_MAX_SURFACES (BRW_MAX_DRAW_BUFFERS + BRW_MAX_TEX_UNIT + 2) enum brw_cache_id { BRW_BLEND_STATE, @@ -722,6 +739,12 @@ struct brw_context GLuint last_bufsz; } curbe; + struct { + /** Binding table of pointers to surf_bo entries */ + uint32_t bo_offset; + uint32_t surf_offset[BRW_MAX_SURFACES]; + } bind; + struct { struct brw_vs_prog_data *prog_data; int8_t *constant_map; /* variable array following prog_data */ @@ -732,10 +755,6 @@ struct brw_context uint32_t prog_offset; uint32_t state_offset; - /** Binding table of pointers to surf_bo entries */ - uint32_t bind_bo_offset; - uint32_t surf_offset[BRW_VS_MAX_SURF]; - uint32_t push_const_offset; /* Offset in the batchbuffer */ int push_const_size; /* in 256-bit register increments */ @@ -814,9 +833,6 @@ struct brw_context /** Offset in the program cache to the WM program */ uint32_t prog_offset; - /** Binding table of pointers to surf_bo entries */ - uint32_t bind_bo_offset; - uint32_t surf_offset[BRW_WM_MAX_SURF]; uint32_t state_offset; /* offset in batchbuffer to pre-gen6 WM state */ drm_intel_bo *const_bo; /* pull constant buffer. */ diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index fb1d3e11162..514c990ed25 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -76,11 +76,11 @@ static void upload_binding_table_pointers(struct brw_context *brw) BEGIN_BATCH(6); OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS << 16 | (6 - 2)); - OUT_BATCH(brw->vs.bind_bo_offset); + OUT_BATCH(brw->bind.bo_offset); OUT_BATCH(0); /* gs */ OUT_BATCH(0); /* clip */ OUT_BATCH(0); /* sf */ - OUT_BATCH(brw->wm.bind_bo_offset); + OUT_BATCH(brw->bind.bo_offset); ADVANCE_BATCH(); } @@ -114,9 +114,9 @@ static void upload_gen6_binding_table_pointers(struct brw_context *brw) GEN6_BINDING_TABLE_MODIFY_GS | GEN6_BINDING_TABLE_MODIFY_PS | (4 - 2)); - OUT_BATCH(brw->vs.bind_bo_offset); /* vs */ + OUT_BATCH(brw->bind.bo_offset); /* vs */ OUT_BATCH(0); /* gs */ - OUT_BATCH(brw->wm.bind_bo_offset); /* wm/ps */ + OUT_BATCH(brw->bind.bo_offset); /* wm/ps */ ADVANCE_BATCH(); } diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index 9b11c6f1874..c27399678b2 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -71,7 +71,7 @@ extern const struct brw_tracked_state brw_wm_prog; extern const struct brw_tracked_state brw_wm_samplers; extern const struct brw_tracked_state brw_renderbuffer_surfaces; extern const struct brw_tracked_state brw_texture_surfaces; -extern const struct brw_tracked_state brw_wm_binding_table; +extern const struct brw_tracked_state brw_binding_table; extern const struct brw_tracked_state brw_wm_unit; extern const struct brw_tracked_state brw_psp_urb_cbs; diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index 7f32c20da5d..c94b0ebbd72 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -69,7 +69,7 @@ static const struct brw_tracked_state *gen4_atoms[] = &brw_vs_surfaces, /* must do before unit */ &brw_renderbuffer_surfaces, /* must do before unit */ &brw_texture_surfaces, /* must do before unit */ - &brw_wm_binding_table, + &brw_binding_table, &brw_wm_samplers, /* These set up state for brw_psp_urb_cbs */ @@ -141,7 +141,7 @@ static const struct brw_tracked_state *gen6_atoms[] = &brw_vs_surfaces, /* must do before unit */ &brw_renderbuffer_surfaces, /* must do before unit */ &brw_texture_surfaces, /* must do before unit */ - &brw_wm_binding_table, + &brw_binding_table, &brw_wm_samplers, &gen6_sampler_state, @@ -206,7 +206,7 @@ const struct brw_tracked_state *gen7_atoms[] = &brw_vs_surfaces, /* must do before unit */ &brw_renderbuffer_surfaces, /* must do before unit */ &brw_texture_surfaces, /* must do before unit */ - &brw_wm_binding_table, + &brw_binding_table, &gen7_samplers, diff --git a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c index 4c99185010f..66d5545b7a9 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c @@ -119,19 +119,17 @@ brw_update_vs_constant_surface( struct gl_context *ctx, (struct brw_vertex_program *) brw->vertex_program; const struct gl_program_parameter_list *params = vp->program.Base.Parameters; - assert(surf == 0); - /* If there's no constant buffer, then no surface BO is needed to point at * it. */ if (brw->vs.const_bo == NULL) { - brw->vs.surf_offset[surf] = 0; + brw->bind.surf_offset[surf] = 0; return; } intel->vtbl.create_constant_surface(brw, brw->vs.const_bo, params->NumParameters, - &brw->vs.surf_offset[surf]); + &brw->bind.surf_offset[surf]); } /** @@ -141,32 +139,11 @@ static void brw_upload_vs_surfaces(struct brw_context *brw) { struct gl_context *ctx = &brw->intel.ctx; - uint32_t *bind; - int i; - int nr_surfaces = 0; /* BRW_NEW_VS_CONSTBUF */ if (brw->vs.const_bo) { - nr_surfaces = 1; brw_update_vs_constant_surface(ctx, SURF_INDEX_VERT_CONST_BUFFER); } - - if (nr_surfaces != 0) { - bind = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, - sizeof(uint32_t) * nr_surfaces, - 32, &brw->vs.bind_bo_offset); - - for (i = 0; i < nr_surfaces; i++) { - /* BRW_NEW_VS_CONSTBUF */ - bind[i] = brw->vs.surf_offset[i]; - } - brw->state.dirty.brw |= BRW_NEW_VS_BINDING_TABLE; - } else { - if (brw->vs.bind_bo_offset) { - brw->state.dirty.brw |= BRW_NEW_VS_BINDING_TABLE; - brw->vs.bind_bo_offset = 0; - } - } } const struct brw_tracked_state brw_vs_surfaces = { diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 3bda5fa912a..377b19dae5d 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -271,7 +271,7 @@ brw_update_texture_surface( struct gl_context *ctx, GLuint unit ) intel_miptree_get_dimensions_for_image(firstImage, &width, &height, &depth); surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, - 6 * 4, 32, &brw->wm.surf_offset[surf_index]); + 6 * 4, 32, &brw->bind.surf_offset[surf_index]); surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT | BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT | @@ -298,7 +298,7 @@ brw_update_texture_surface( struct gl_context *ctx, GLuint unit ) /* Emit relocation to surface contents */ drm_intel_bo_emit_reloc(brw->intel.batch.bo, - brw->wm.surf_offset[surf_index] + 4, + brw->bind.surf_offset[surf_index] + 4, intelObj->mt->region->bo, 0, I915_GEM_DOMAIN_SAMPLER, 0); } @@ -375,7 +375,7 @@ brw_upload_wm_pull_constants(struct brw_context *brw) if (brw->wm.const_bo) { drm_intel_bo_unreference(brw->wm.const_bo); brw->wm.const_bo = NULL; - brw->wm.surf_offset[surf_index] = 0; + brw->bind.surf_offset[surf_index] = 0; brw->state.dirty.brw |= BRW_NEW_WM_SURFACES; } return; @@ -396,7 +396,7 @@ brw_upload_wm_pull_constants(struct brw_context *brw) intel->vtbl.create_constant_surface(brw, brw->wm.const_bo, params->NumParameters, - &brw->wm.surf_offset[surf_index]); + &brw->bind.surf_offset[surf_index]); brw->state.dirty.brw |= BRW_NEW_WM_SURFACES; } @@ -417,7 +417,7 @@ brw_update_null_renderbuffer_surface(struct brw_context *brw, unsigned int unit) uint32_t *surf; surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, - 6 * 4, 32, &brw->wm.surf_offset[unit]); + 6 * 4, 32, &brw->bind.surf_offset[unit]); surf[0] = (BRW_SURFACE_NULL << BRW_SURFACE_TYPE_SHIFT | BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT); @@ -453,7 +453,7 @@ brw_update_renderbuffer_surface(struct brw_context *brw, uint32_t format = 0; surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, - 6 * 4, 32, &brw->wm.surf_offset[unit]); + 6 * 4, 32, &brw->bind.surf_offset[unit]); switch (irb->Base.Format) { case MESA_FORMAT_XRGB8888: @@ -534,7 +534,7 @@ brw_update_renderbuffer_surface(struct brw_context *brw, } drm_intel_bo_emit_reloc(brw->intel.batch.bo, - brw->wm.surf_offset[unit] + 4, + brw->bind.surf_offset[unit] + 4, region->bo, surf[1] - region->bo->offset, I915_GEM_DOMAIN_RENDER, @@ -593,7 +593,7 @@ brw_update_texture_surfaces(struct brw_context *brw) if (texUnit->_ReallyEnabled) { brw->intel.vtbl.update_texture_surface(ctx, i); } else { - brw->wm.surf_offset[surf] = 0; + brw->bind.surf_offset[surf] = 0; } } @@ -614,7 +614,7 @@ const struct brw_tracked_state brw_texture_surfaces = { * numbers to surface state objects. */ static void -brw_wm_upload_binding_table(struct brw_context *brw) +brw_upload_binding_table(struct brw_context *brw) { uint32_t *bind; int i; @@ -623,25 +623,27 @@ brw_wm_upload_binding_table(struct brw_context *brw) * space for the binding table. */ bind = brw_state_batch(brw, AUB_TRACE_BINDING_TABLE, - sizeof(uint32_t) * BRW_WM_MAX_SURF, - 32, &brw->wm.bind_bo_offset); + sizeof(uint32_t) * BRW_MAX_SURFACES, + 32, &brw->bind.bo_offset); - for (i = 0; i < BRW_WM_MAX_SURF; i++) { - /* BRW_NEW_WM_SURFACES */ - bind[i] = brw->wm.surf_offset[i]; + /* BRW_NEW_WM_SURFACES and BRW_NEW_VS_CONSTBUF */ + for (i = 0; i < BRW_MAX_SURFACES; i++) { + bind[i] = brw->bind.surf_offset[i]; } + brw->state.dirty.brw |= BRW_NEW_VS_BINDING_TABLE; brw->state.dirty.brw |= BRW_NEW_PS_BINDING_TABLE; } -const struct brw_tracked_state brw_wm_binding_table = { +const struct brw_tracked_state brw_binding_table = { .dirty = { .mesa = 0, .brw = (BRW_NEW_BATCH | + BRW_NEW_VS_CONSTBUF | BRW_NEW_WM_SURFACES), .cache = 0 }, - .emit = brw_wm_upload_binding_table, + .emit = brw_upload_binding_table, }; void diff --git a/src/mesa/drivers/dri/i965/gen7_vs_state.c b/src/mesa/drivers/dri/i965/gen7_vs_state.c index 462db5bf194..e3234b593b0 100644 --- a/src/mesa/drivers/dri/i965/gen7_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c @@ -37,7 +37,7 @@ upload_vs_state(struct brw_context *brw) BEGIN_BATCH(2); OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_VS << 16 | (2 - 2)); - OUT_BATCH(brw->vs.bind_bo_offset); + OUT_BATCH(brw->bind.bo_offset); ADVANCE_BATCH(); if (brw->vs.push_const_size == 0) { diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c index 8b79663da3f..97c079a2be8 100644 --- a/src/mesa/drivers/dri/i965/gen7_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c @@ -104,7 +104,7 @@ upload_ps_state(struct brw_context *brw) BEGIN_BATCH(2); OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_PS << 16 | (2 - 2)); - OUT_BATCH(brw->wm.bind_bo_offset); + OUT_BATCH(brw->bind.bo_offset); ADVANCE_BATCH(); /* CACHE_NEW_SAMPLER */ diff --git a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c index b4730c4582c..f74198b2468 100644 --- a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c @@ -68,7 +68,7 @@ gen7_update_texture_surface(struct gl_context *ctx, GLuint unit) intel_miptree_get_dimensions_for_image(firstImage, &width, &height, &depth); surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, - sizeof(*surf), 32, &brw->wm.surf_offset[surf_index]); + sizeof(*surf), 32, &brw->bind.surf_offset[surf_index]); memset(surf, 0, sizeof(*surf)); surf->ss0.surface_type = translate_tex_target(tObj->Target); @@ -118,7 +118,7 @@ gen7_update_texture_surface(struct gl_context *ctx, GLuint unit) /* Emit relocation to surface contents */ drm_intel_bo_emit_reloc(brw->intel.batch.bo, - brw->wm.surf_offset[surf_index] + + brw->bind.surf_offset[surf_index] + offsetof(struct gen7_surface_state, ss1), intelObj->mt->region->bo, 0, I915_GEM_DOMAIN_SAMPLER, 0); @@ -172,7 +172,7 @@ gen7_update_null_renderbuffer_surface(struct brw_context *brw, unsigned unit) struct gen7_surface_state *surf; surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, - sizeof(*surf), 32, &brw->wm.surf_offset[unit]); + sizeof(*surf), 32, &brw->bind.surf_offset[unit]); memset(surf, 0, sizeof(*surf)); surf->ss0.surface_type = BRW_SURFACE_NULL; @@ -197,7 +197,7 @@ gen7_update_renderbuffer_surface(struct brw_context *brw, uint32_t tile_x, tile_y; surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, - sizeof(*surf), 32, &brw->wm.surf_offset[unit]); + sizeof(*surf), 32, &brw->bind.surf_offset[unit]); memset(surf, 0, sizeof(*surf)); switch (irb->Base.Format) { @@ -252,7 +252,7 @@ gen7_update_renderbuffer_surface(struct brw_context *brw, surf->ss3.pitch = (region->pitch * region->cpp) - 1; drm_intel_bo_emit_reloc(brw->intel.batch.bo, - brw->wm.surf_offset[unit] + + brw->bind.surf_offset[unit] + offsetof(struct gen7_surface_state, ss1), region->bo, surf->ss1.base_addr - region->bo->offset,