From 32cc0c9d8de343f699e80e7e416ea0d7e3121a42 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 22 Apr 2011 09:55:25 -0700 Subject: [PATCH] i965/gen6: Stream the VS push constants. Improves 3DMMES taiji demo performance by 10.1% +/- 0.9% (n=15). Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_context.h | 3 + src/mesa/drivers/dri/i965/brw_draw.c | 1 + src/mesa/drivers/dri/i965/brw_state.h | 1 + src/mesa/drivers/dri/i965/brw_state_upload.c | 1 + src/mesa/drivers/dri/i965/gen6_vs_state.c | 91 ++++++++++++-------- 5 files changed, 62 insertions(+), 35 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 6a244984e91..d9b755ba19b 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -640,6 +640,9 @@ struct brw_context uint32_t bind_bo_offset; uint32_t surf_offset[BRW_VS_MAX_SURF]; GLuint nr_surfaces; + + uint32_t push_const_offset; /* Offset in the batchbuffer */ + int push_const_size; /* in 256-bit register increments */ } vs; struct { diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index 321dbc19ede..0c93a03151b 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -367,6 +367,7 @@ static GLboolean brw_try_draw_prims( struct gl_context *ctx, int estimated_max_prim_size; estimated_max_prim_size = 512; /* batchbuffer commands */ + estimated_max_prim_size += 1024; /* gen6 VS push constants */ estimated_max_prim_size += 1024; /* gen6 WM push constants */ estimated_max_prim_size += 512; /* misc. pad */ diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index 86b0caa4a4e..4c9ac1ed90e 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -107,6 +107,7 @@ extern const struct brw_tracked_state gen6_sf_state; extern const struct brw_tracked_state gen6_sf_vp; extern const struct brw_tracked_state gen6_urb; extern const struct brw_tracked_state gen6_viewport_state; +extern const struct brw_tracked_state gen6_vs_constants; extern const struct brw_tracked_state gen6_vs_state; extern const struct brw_tracked_state gen6_wm_constants; extern const struct brw_tracked_state gen6_wm_state; diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index 6f521be6599..a397460feca 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -129,6 +129,7 @@ static const struct brw_tracked_state *gen6_atoms[] = &brw_vs_constants, /* Before vs_surfaces and constant_buffer */ &brw_wm_constants, /* Before wm_surfaces and constant_buffer */ + &gen6_vs_constants, /* Before vs_state */ &gen6_wm_constants, /* Before wm_state */ &brw_vs_surfaces, /* must do before unit */ diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c index a10cec318d6..00c28ed8d22 100644 --- a/src/mesa/drivers/dri/i965/gen6_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c @@ -34,43 +34,36 @@ #include "intel_batchbuffer.h" static void -upload_vs_state(struct brw_context *brw) +gen6_prepare_vs_push_constants(struct brw_context *brw) { struct intel_context *intel = &brw->intel; struct gl_context *ctx = &intel->ctx; + /* _BRW_NEW_VERTEX_PROGRAM */ const struct brw_vertex_program *vp = brw_vertex_program_const(brw->vertex_program); unsigned int nr_params = brw->vs.prog_data->nr_params / 4; - drm_intel_bo *constant_bo; - int i; + if (brw->vertex_program->IsNVProgram) + _mesa_load_tracked_matrices(ctx); + + /* Updates the ParamaterValues[i] pointers for all parameters of the + * basic type of PROGRAM_STATE_VAR. + */ + /* XXX: Should this happen somewhere before to get our state flag set? */ + _mesa_load_state_parameters(ctx, vp->program.Base.Parameters); + + /* CACHE_NEW_VS_PROG | _NEW_TRANSFORM */ if (brw->vs.prog_data->nr_params == 0 && !ctx->Transform.ClipPlanesEnabled) { - /* Disable the push constant buffers. */ - BEGIN_BATCH(5); - OUT_BATCH(_3DSTATE_CONSTANT_VS << 16 | (5 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); + brw->vs.push_const_size = 0; } else { - int params_uploaded = 0, param_regs; + int params_uploaded = 0; float *param; + int i; - if (brw->vertex_program->IsNVProgram) - _mesa_load_tracked_matrices(ctx); - - /* Updates the ParamaterValues[i] pointers for all parameters of the - * basic type of PROGRAM_STATE_VAR. - */ - _mesa_load_state_parameters(ctx, vp->program.Base.Parameters); - - constant_bo = drm_intel_bo_alloc(intel->bufmgr, "VS constant_bo", - (MAX_CLIP_PLANES + nr_params) * - 4 * sizeof(float), - 4096); - drm_intel_gem_bo_map_gtt(constant_bo); - param = constant_bo->virtual; + param = brw_state_batch(brw, + (MAX_CLIP_PLANES + nr_params) * + 4 * sizeof(float), + 32, &brw->vs.push_const_offset); /* This should be loaded like any other param, but it's ad-hoc * until we redo the VS backend. @@ -100,30 +93,56 @@ upload_vs_state(struct brw_context *brw) if (0) { printf("VS constant buffer:\n"); for (i = 0; i < params_uploaded; i++) { - float *buf = (float *)constant_bo->virtual + i * 4; + float *buf = param + i * 4; printf("%d: %f %f %f %f\n", i, buf[0], buf[1], buf[2], buf[3]); } } - drm_intel_gem_bo_unmap_gtt(constant_bo); + brw->vs.push_const_size = (params_uploaded + 1) / 2; + /* We can only push 32 registers of constants at a time. */ + assert(brw->vs.push_const_size <= 32); + } +} - param_regs = (params_uploaded + 1) / 2; - assert(param_regs <= 32); +const struct brw_tracked_state gen6_vs_constants = { + .dirty = { + .mesa = _NEW_TRANSFORM | _NEW_PROGRAM_CONSTANTS, + .brw = (BRW_NEW_BATCH | + BRW_NEW_VERTEX_PROGRAM), + .cache = 0, + }, + .prepare = gen6_prepare_vs_push_constants, +}; +static void +upload_vs_state(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + + if (brw->vs.push_const_size == 0) { + /* Disable the push constant buffers. */ + BEGIN_BATCH(5); + OUT_BATCH(_3DSTATE_CONSTANT_VS << 16 | (5 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } else { BEGIN_BATCH(5); OUT_BATCH(_3DSTATE_CONSTANT_VS << 16 | GEN6_CONSTANT_BUFFER_0_ENABLE | (5 - 2)); - OUT_RELOC(constant_bo, + /* This is also the set of state flags from gen6_prepare_vs_constants */ + OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_RENDER, 0, /* XXX: bad domain */ - param_regs - 1); + brw->vs.push_const_offset + + brw->vs.push_const_size - 1); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); ADVANCE_BATCH(); - - drm_intel_bo_unreference(constant_bo); } BEGIN_BATCH(6); @@ -149,7 +168,9 @@ const struct brw_tracked_state gen6_vs_state = { .brw = (BRW_NEW_CURBE_OFFSETS | BRW_NEW_NR_VS_SURFACES | BRW_NEW_URB_FENCE | - BRW_NEW_CONTEXT), + BRW_NEW_CONTEXT | + BRW_NEW_VERTEX_PROGRAM | + BRW_NEW_BATCH), .cache = CACHE_NEW_VS_PROG }, .emit = upload_vs_state, -- 2.30.2