From: Eric Anholt Date: Fri, 22 Apr 2011 02:03:18 +0000 (-0700) Subject: i965/gen6: Stream the WM push constants. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=90c70123b830bead0ac622df94f2809ac056af95;p=mesa.git i965/gen6: Stream the WM push constants. Improves 3DMMES taiji demo performance by 5.1% +/- 1.9% (n=15), by reducing CPU time spent thrashing around those tiny little constant BOs. Reviewed-by: Kenneth Graunke --- diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 6bf8a1c83c7..6a244984e91 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -698,11 +698,11 @@ struct brw_context drm_intel_bo *state_bo; drm_intel_bo *const_bo; /* pull constant buffer. */ /** - * This is the push constant BO on gen6. + * This is offset in the batch to the push constants on gen6. * * Pre-gen6, push constants live in the CURBE. */ - drm_intel_bo *push_const_bo; + uint32_t push_const_offset; } wm; diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index a22e63c9caa..321dbc19ede 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -364,15 +364,17 @@ static GLboolean brw_try_draw_prims( struct gl_context *ctx, for (i = 0; i < nr_prims; i++) { uint32_t hw_prim; + int estimated_max_prim_size; + + estimated_max_prim_size = 512; /* batchbuffer commands */ + estimated_max_prim_size += 1024; /* gen6 WM push constants */ + estimated_max_prim_size += 512; /* misc. pad */ /* Flush the batch if it's approaching full, so that we don't wrap while * we've got validated state that needs to be in the same batch as the - * primitives. This fraction is just a guess (minimal full state plus - * a primitive is around 512 bytes), and would be better if we had - * an upper bound of how much we might emit in a single - * brw_try_draw_prims(). + * primitives. */ - intel_batchbuffer_require_space(intel, 1024, false); + intel_batchbuffer_require_space(intel, estimated_max_prim_size, false); hw_prim = brw_set_prim(brw, &prim[i]); if (brw->state.dirty.brw) { diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c index ce8712a260f..4961b0449df 100644 --- a/src/mesa/drivers/dri/i965/brw_vtbl.c +++ b/src/mesa/drivers/dri/i965/brw_vtbl.c @@ -93,7 +93,6 @@ static void brw_destroy_context( struct intel_context *intel ) dri_bo_release(&brw->wm.prog_bo); dri_bo_release(&brw->wm.state_bo); dri_bo_release(&brw->wm.const_bo); - dri_bo_release(&brw->wm.push_const_bo); dri_bo_release(&brw->cc.prog_bo); dri_bo_release(&brw->cc.vp_bo); dri_bo_release(&brw->cc.blend_state_bo); diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c index c4b57fe1f95..fe5c7a1120b 100644 --- a/src/mesa/drivers/dri/i965/gen6_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c @@ -42,9 +42,6 @@ gen6_prepare_wm_push_constants(struct brw_context *brw) const struct brw_fragment_program *fp = brw_fragment_program_const(brw->fragment_program); - drm_intel_bo_unreference(brw->wm.push_const_bo); - brw->wm.push_const_bo = NULL; - /* Updates the ParamaterValues[i] pointers for all parameters of the * basic type of PROGRAM_STATE_VAR. */ @@ -55,13 +52,11 @@ gen6_prepare_wm_push_constants(struct brw_context *brw) float *constants; unsigned int i; - brw->wm.push_const_bo = drm_intel_bo_alloc(intel->bufmgr, - "WM constant_bo", - brw->wm.prog_data->nr_params * - sizeof(float), - 4096); - drm_intel_gem_bo_map_gtt(brw->wm.push_const_bo); - constants = brw->wm.push_const_bo->virtual; + constants = brw_state_batch(brw, + brw->wm.prog_data->nr_params * + sizeof(float), + 32, &brw->wm.push_const_offset); + for (i = 0; i < brw->wm.prog_data->nr_params; i++) { constants[i] = convert_param(brw->wm.prog_data->param_convert[i], *brw->wm.prog_data->param[i]); @@ -80,15 +75,14 @@ gen6_prepare_wm_push_constants(struct brw_context *brw) printf("\n"); printf("\n"); } - - drm_intel_gem_bo_unmap_gtt(brw->wm.push_const_bo); } } const struct brw_tracked_state gen6_wm_constants = { .dirty = { .mesa = _NEW_PROGRAM_CONSTANTS, - .brw = BRW_NEW_FRAGMENT_PROGRAM, + .brw = (BRW_NEW_BATCH | + BRW_NEW_FRAGMENT_PROGRAM), .cache = 0, }, .prepare = gen6_prepare_wm_push_constants, @@ -118,8 +112,10 @@ upload_wm_state(struct brw_context *brw) OUT_BATCH(_3DSTATE_CONSTANT_PS << 16 | GEN6_CONSTANT_BUFFER_0_ENABLE | (5 - 2)); - OUT_RELOC(brw->wm.push_const_bo, + /* This is also the set of state flags from gen6_prepare_wm_constants */ + OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_RENDER, 0, /* XXX: bad domain */ + brw->wm.push_const_offset + ALIGN(brw->wm.prog_data->nr_params, brw->wm.prog_data->dispatch_width) / 8 - 1); OUT_BATCH(0);