From 88022278f71ed3ea9613a7fa72a03367f75443d3 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 9 Mar 2011 13:18:30 -0800 Subject: [PATCH] i965: Move the SF VP from state caching to state streaming. This is a 49.6% +/- 2.0% (n=9, IPS outlier removed) performance improvement for the hacked-up-for-cache-misses scissor-many, and no statistically significant performance difference for the hacked-up-for-cache-hits version (n=9, IPS outlier removed). No statistically significant performance difference from ETQW (n=5) from these last two commits. --- src/mesa/drivers/dri/i965/brw_context.h | 1 + src/mesa/drivers/dri/i965/brw_sf_state.c | 20 ++++++++++++++------ src/mesa/drivers/dri/i965/brw_state_dump.c | 4 ++-- 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 5cf5590b606..ab0c78ff8f8 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -641,6 +641,7 @@ struct brw_context drm_intel_bo *state_bo; uint32_t state_offset; drm_intel_bo *vp_bo; + uint32_t vp_offset; } sf; struct { diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c b/src/mesa/drivers/dri/i965/brw_sf_state.c index f8b5275561d..66d91a0bde7 100644 --- a/src/mesa/drivers/dri/i965/brw_sf_state.c +++ b/src/mesa/drivers/dri/i965/brw_sf_state.c @@ -38,13 +38,15 @@ static void upload_sf_vp(struct brw_context *brw) { + struct intel_context *intel = &brw->intel; struct gl_context *ctx = &brw->intel.ctx; const GLfloat depth_scale = 1.0F / ctx->DrawBuffer->_DepthMaxF; - struct brw_sf_viewport sfv_stack, *sfv = &sfv_stack; + struct brw_sf_viewport *sfv; GLfloat y_scale, y_bias; const GLboolean render_to_fbo = (ctx->DrawBuffer->Name != 0); const GLfloat *v = ctx->Viewport._WindowMap.m; + sfv = brw_state_batch(brw, sizeof(*sfv), 32, &brw->sf.vp_offset); memset(sfv, 0, sizeof(*sfv)); if (render_to_fbo) { @@ -104,8 +106,12 @@ static void upload_sf_vp(struct brw_context *brw) sfv->scissor.ymax = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymin - 1; } + /* Keep a pointer to it for brw_state_dump.c */ drm_intel_bo_unreference(brw->sf.vp_bo); - brw->sf.vp_bo = brw_cache_data(&brw->cache, BRW_SF_VP, sfv, sizeof(*sfv)); + drm_intel_bo_reference(intel->batch.bo); + brw->sf.vp_bo = intel->batch.bo; + + brw->state.dirty.cache |= CACHE_NEW_SF_VP; } const struct brw_tracked_state brw_sf_vp = { @@ -113,7 +119,7 @@ const struct brw_tracked_state brw_sf_vp = { .mesa = (_NEW_VIEWPORT | _NEW_SCISSOR | _NEW_BUFFERS), - .brw = 0, + .brw = BRW_NEW_BATCH, .cache = 0 }, .prepare = upload_sf_vp @@ -171,7 +177,8 @@ static void upload_sf_unit( struct brw_context *brw ) sf->thread4.stats_enable = 1; /* CACHE_NEW_SF_VP */ - sf->sf5.sf_viewport_state_offset = brw->sf.vp_bo->offset >> 5; /* reloc */ + sf->sf5.sf_viewport_state_offset = (brw->sf.vp_bo->offset + + brw->sf.vp_offset) >> 5; /* reloc */ sf->sf5.viewport_transform = 1; @@ -289,8 +296,9 @@ static void upload_sf_unit( struct brw_context *brw ) /* Emit SF viewport relocation */ drm_intel_bo_emit_reloc(bo, (brw->sf.state_offset + offsetof(struct brw_sf_unit_state, sf5)), - brw->sf.vp_bo, (sf->sf5.front_winding | - (sf->sf5.viewport_transform << 1)), + intel->batch.bo, (brw->sf.vp_offset | + sf->sf5.front_winding | + (sf->sf5.viewport_transform << 1)), I915_GEM_DOMAIN_INSTRUCTION, 0); brw->state.dirty.cache |= CACHE_NEW_SF_UNIT; diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c index fdce79da2f4..b393259c915 100644 --- a/src/mesa/drivers/dri/i965/brw_state_dump.c +++ b/src/mesa/drivers/dri/i965/brw_state_dump.c @@ -195,8 +195,8 @@ static void dump_sf_viewport_state(struct brw_context *brw) drm_intel_bo_map(brw->sf.vp_bo, GL_FALSE); - vp = brw->sf.vp_bo->virtual; - vp_off = brw->sf.vp_bo->offset; + vp = brw->sf.vp_bo->virtual + brw->sf.vp_offset; + vp_off = brw->sf.vp_bo->offset + brw->sf.vp_offset; state_out(name, vp, vp_off, 0, "m00 = %f\n", vp->viewport.m00); state_out(name, vp, vp_off, 1, "m11 = %f\n", vp->viewport.m11); -- 2.30.2