From a82a43e8d99e1715dd11c9c091b5ab734079b6a6 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 22 Apr 2011 16:00:14 -0700 Subject: [PATCH] i965/gen6: Use the dynamic state base address to reduce relocations. Now that all the dynamic state is streamed through the top of the batchbuffer, we can cut out many of our relocations to that state by using the base address. Improves 3DMMES taiji performance 3.3% +/- 0.4% (n=15). Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_misc_state.c | 28 +++++++++++++++---- src/mesa/drivers/dri/i965/brw_state_upload.c | 5 ++-- .../drivers/dri/i965/brw_wm_sampler_state.c | 24 +++++++++------- src/mesa/drivers/dri/i965/gen6_cc.c | 9 ++---- .../drivers/dri/i965/gen6_sampler_state.c | 7 +---- .../drivers/dri/i965/gen6_scissor_state.c | 3 +- .../drivers/dri/i965/gen6_viewport_state.c | 9 ++---- src/mesa/drivers/dri/i965/gen6_vs_state.c | 8 +++--- src/mesa/drivers/dri/i965/gen6_wm_state.c | 8 +++--- 9 files changed, 55 insertions(+), 46 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index c0ed6f750cd..7325a2b6c8d 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -550,12 +550,28 @@ static void upload_state_base_address( struct brw_context *brw ) if (intel->gen >= 6) { BEGIN_BATCH(10); OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (10 - 2)); - OUT_BATCH(1); /* General state base address */ - OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0, - 1); /* Surface state base address */ - OUT_BATCH(1); /* Dynamic state base address */ - OUT_BATCH(1); /* Indirect object base address */ - OUT_BATCH(1); /* Instruction base address */ + /* General state base address: stateless DP read/write requests */ + OUT_BATCH(1); + /* Surface state base address: + * BINDING_TABLE_STATE + * SURFACE_STATE + */ + OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0, 1); + /* Dynamic state base address: + * SAMPLER_STATE + * SAMPLER_BORDER_COLOR_STATE + * CLIP, SF, WM/CC viewport state + * COLOR_CALC_STATE + * DEPTH_STENCIL_STATE + * BLEND_STATE + * Push constants (when INSTPM: CONSTANT_BUFFER Address Offset + * Disable is clear, which we rely on) + */ + OUT_RELOC(intel->batch.bo, (I915_GEM_DOMAIN_RENDER | + I915_GEM_DOMAIN_INSTRUCTION), 0, 1); + + OUT_BATCH(1); /* Indirect object base address: MEDIA_OBJECT data */ + OUT_BATCH(1); /* Instruction base address: shader kernels (incl. SIP) */ OUT_BATCH(1); /* General state upper bound */ OUT_BATCH(1); /* Dynamic state upper bound */ OUT_BATCH(1); /* Indirect object upper bound */ diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index 7d215f563c3..008aceb222b 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -120,6 +120,9 @@ static const struct brw_tracked_state *gen6_atoms[] = /* Command packets: */ &brw_invarient_state, + /* must do before binding table pointers, cc state ptrs */ + &brw_state_base_address, + &brw_cc_vp, &gen6_viewport_state, /* must do after *_vp stages */ @@ -150,8 +153,6 @@ static const struct brw_tracked_state *gen6_atoms[] = &gen6_scissor_state, - &brw_state_base_address, - &gen6_binding_table_pointers, &brw_depthbuffer, diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c index c24494b645f..7b93bf90241 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c @@ -271,16 +271,20 @@ static void brw_update_sampler_state(struct brw_context *brw, upload_default_color(brw, gl_sampler, unit); - /* reloc */ - sampler->ss2.default_color_pointer = (intel->batch.bo->offset + - brw->wm.sdc_offset[unit]) >> 5; - - drm_intel_bo_emit_reloc(intel->batch.bo, - brw->wm.sampler_offset + - unit * sizeof(struct brw_sampler_state) + - offsetof(struct brw_sampler_state, ss2), - intel->batch.bo, brw->wm.sdc_offset[unit], - I915_GEM_DOMAIN_SAMPLER, 0); + if (intel->gen >= 6) { + sampler->ss2.default_color_pointer = brw->wm.sdc_offset[unit] >> 5; + } else { + /* reloc */ + sampler->ss2.default_color_pointer = (intel->batch.bo->offset + + brw->wm.sdc_offset[unit]) >> 5; + + drm_intel_bo_emit_reloc(intel->batch.bo, + brw->wm.sampler_offset + + unit * sizeof(struct brw_sampler_state) + + offsetof(struct brw_sampler_state, ss2), + intel->batch.bo, brw->wm.sdc_offset[unit], + I915_GEM_DOMAIN_SAMPLER, 0); + } } diff --git a/src/mesa/drivers/dri/i965/gen6_cc.c b/src/mesa/drivers/dri/i965/gen6_cc.c index 938f9148d22..80985db3d68 100644 --- a/src/mesa/drivers/dri/i965/gen6_cc.c +++ b/src/mesa/drivers/dri/i965/gen6_cc.c @@ -156,12 +156,9 @@ static void upload_cc_state_pointers(struct brw_context *brw) BEGIN_BATCH(4); OUT_BATCH(_3DSTATE_CC_STATE_POINTERS << 16 | (4 - 2)); - OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, - brw->cc.blend_state_offset | 1); - OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, - brw->cc.depth_stencil_state_offset | 1); - OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, - brw->cc.state_offset | 1); + OUT_BATCH(brw->cc.blend_state_offset | 1); + OUT_BATCH(brw->cc.depth_stencil_state_offset | 1); + OUT_BATCH(brw->cc.state_offset | 1); ADVANCE_BATCH(); } diff --git a/src/mesa/drivers/dri/i965/gen6_sampler_state.c b/src/mesa/drivers/dri/i965/gen6_sampler_state.c index 969780ac1df..4cdec699df6 100644 --- a/src/mesa/drivers/dri/i965/gen6_sampler_state.c +++ b/src/mesa/drivers/dri/i965/gen6_sampler_state.c @@ -43,12 +43,7 @@ upload_sampler_state_pointers(struct brw_context *brw) (4 - 2)); OUT_BATCH(0); /* VS */ OUT_BATCH(0); /* GS */ - if (brw->wm.sampler_count) - OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, - brw->wm.sampler_offset); - else - OUT_BATCH(0); - + OUT_BATCH(brw->wm.sampler_offset); ADVANCE_BATCH(); } diff --git a/src/mesa/drivers/dri/i965/gen6_scissor_state.c b/src/mesa/drivers/dri/i965/gen6_scissor_state.c index d0b37a078d5..fad3ca0dd04 100644 --- a/src/mesa/drivers/dri/i965/gen6_scissor_state.c +++ b/src/mesa/drivers/dri/i965/gen6_scissor_state.c @@ -79,8 +79,7 @@ gen6_prepare_scissor_state(struct brw_context *brw) BEGIN_BATCH(2); OUT_BATCH(_3DSTATE_SCISSOR_STATE_POINTERS << 16 | (2 - 2)); - OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, - scissor_state_offset); + OUT_BATCH(scissor_state_offset); ADVANCE_BATCH(); } diff --git a/src/mesa/drivers/dri/i965/gen6_viewport_state.c b/src/mesa/drivers/dri/i965/gen6_viewport_state.c index c46dc3e3c11..4116bdb96de 100644 --- a/src/mesa/drivers/dri/i965/gen6_viewport_state.c +++ b/src/mesa/drivers/dri/i965/gen6_viewport_state.c @@ -113,12 +113,9 @@ static void upload_viewport_state_pointers(struct brw_context *brw) GEN6_CC_VIEWPORT_MODIFY | GEN6_SF_VIEWPORT_MODIFY | GEN6_CLIP_VIEWPORT_MODIFY); - OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, - brw->clip.vp_offset); - OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, - brw->sf.vp_offset); - OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, - brw->cc.vp_offset); + OUT_BATCH(brw->clip.vp_offset); + OUT_BATCH(brw->sf.vp_offset); + OUT_BATCH(brw->cc.vp_offset); ADVANCE_BATCH(); } diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c index 00c28ed8d22..b46368e36e2 100644 --- a/src/mesa/drivers/dri/i965/gen6_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c @@ -134,10 +134,10 @@ upload_vs_state(struct brw_context *brw) OUT_BATCH(_3DSTATE_CONSTANT_VS << 16 | GEN6_CONSTANT_BUFFER_0_ENABLE | (5 - 2)); - /* This is also the set of state flags from gen6_prepare_vs_constants */ - OUT_RELOC(intel->batch.bo, - I915_GEM_DOMAIN_RENDER, 0, /* XXX: bad domain */ - brw->vs.push_const_offset + + /* Pointer to the VS constant buffer. Covered by the set of + * state flags from gen6_prepare_wm_constants + */ + OUT_BATCH(brw->vs.push_const_offset + brw->vs.push_const_size - 1); OUT_BATCH(0); OUT_BATCH(0); diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c index fe5c7a1120b..33b233414c6 100644 --- a/src/mesa/drivers/dri/i965/gen6_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c @@ -112,10 +112,10 @@ upload_wm_state(struct brw_context *brw) OUT_BATCH(_3DSTATE_CONSTANT_PS << 16 | GEN6_CONSTANT_BUFFER_0_ENABLE | (5 - 2)); - /* This is also the set of state flags from gen6_prepare_wm_constants */ - OUT_RELOC(intel->batch.bo, - I915_GEM_DOMAIN_RENDER, 0, /* XXX: bad domain */ - brw->wm.push_const_offset + + /* Pointer to the WM constant buffer. Covered by the set of + * state flags from gen6_prepare_wm_constants + */ + OUT_BATCH(brw->wm.push_const_offset + ALIGN(brw->wm.prog_data->nr_params, brw->wm.prog_data->dispatch_width) / 8 - 1); OUT_BATCH(0); -- 2.30.2