From f9a2679db5886a65eac7e08a8f75674cf3dff8b7 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 28 Apr 2014 11:27:22 -0700 Subject: [PATCH] i965/gen7+: Move sampler state packets to the stage sampler state table update. Now that we have the stage state coming into our setup of sampler states, it's easy to drop an identifier into it of which stage the stage_state is, and then look up which packet to emit in a little table. No performance difference on cairo on glamor (n=492). v2: Don't forget to do the workaround flush on IVB. Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_context.c | 3 +++ src/mesa/drivers/dri/i965/brw_context.h | 1 + src/mesa/drivers/dri/i965/gen7_gs_state.c | 8 +------- src/mesa/drivers/dri/i965/gen7_sampler_state.c | 15 ++++++++++++++- src/mesa/drivers/dri/i965/gen7_vs_state.c | 8 +------- src/mesa/drivers/dri/i965/gen7_wm_state.c | 9 +-------- src/mesa/drivers/dri/i965/gen8_gs_state.c | 8 +------- src/mesa/drivers/dri/i965/gen8_ps_state.c | 8 +------- src/mesa/drivers/dri/i965/gen8_vs_state.c | 8 +------- 9 files changed, 24 insertions(+), 44 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 449fcfc4789..17ae6857b10 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -628,6 +628,9 @@ brwCreateContext(gl_api api, brw->must_use_separate_stencil = screen->hw_must_use_separate_stencil; brw->has_swizzling = screen->hw_has_swizzling; + brw->vs.base.stage = MESA_SHADER_VERTEX; + brw->gs.base.stage = MESA_SHADER_GEOMETRY; + brw->wm.base.stage = MESA_SHADER_FRAGMENT; if (brw->gen >= 8) { gen8_init_vtable_surface_functions(brw); gen7_init_vtable_sampler_functions(brw); diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 379af38cba0..92e1592bfa4 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -925,6 +925,7 @@ struct brw_transform_feedback_object { */ struct brw_stage_state { + gl_shader_stage stage; struct brw_stage_prog_data *prog_data; /** diff --git a/src/mesa/drivers/dri/i965/gen7_gs_state.c b/src/mesa/drivers/dri/i965/gen7_gs_state.c index d18ae155185..06e6cf7500e 100644 --- a/src/mesa/drivers/dri/i965/gen7_gs_state.c +++ b/src/mesa/drivers/dri/i965/gen7_gs_state.c @@ -66,12 +66,6 @@ upload_gs_state(struct brw_context *brw) /* CACHE_NEW_GS_PROG */ const struct brw_vec4_prog_data *prog_data = &brw->gs.prog_data->base; - /* CACHE_NEW_SAMPLER */ - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS_GS << 16 | (2 - 2)); - OUT_BATCH(stage_state->sampler_offset); - ADVANCE_BATCH(); - gen7_upload_constant_state(brw, stage_state, active, _3DSTATE_CONSTANT_GS); /** @@ -198,7 +192,7 @@ const struct brw_tracked_state gen7_gs_state = { BRW_NEW_GS_BINDING_TABLE | BRW_NEW_BATCH | BRW_NEW_PUSH_CONSTANT_ALLOCATION), - .cache = CACHE_NEW_GS_PROG | CACHE_NEW_SAMPLER + .cache = CACHE_NEW_GS_PROG }, .emit = upload_gs_state, }; diff --git a/src/mesa/drivers/dri/i965/gen7_sampler_state.c b/src/mesa/drivers/dri/i965/gen7_sampler_state.c index 8eb337d5492..74d5e9e0a7e 100644 --- a/src/mesa/drivers/dri/i965/gen7_sampler_state.c +++ b/src/mesa/drivers/dri/i965/gen7_sampler_state.c @@ -187,6 +187,11 @@ gen7_upload_sampler_state_table(struct brw_context *brw, struct gl_context *ctx = &brw->ctx; struct gen7_sampler_state *samplers; uint32_t sampler_count = stage_state->sampler_count; + static const uint16_t packet_headers[] = { + [MESA_SHADER_VERTEX] = _3DSTATE_SAMPLER_STATE_POINTERS_VS, + [MESA_SHADER_GEOMETRY] = _3DSTATE_SAMPLER_STATE_POINTERS_GS, + [MESA_SHADER_FRAGMENT] = _3DSTATE_SAMPLER_STATE_POINTERS_PS, + }; GLbitfield SamplersUsed = prog->SamplersUsed; @@ -207,7 +212,15 @@ gen7_upload_sampler_state_table(struct brw_context *brw, } } - brw->state.dirty.cache |= CACHE_NEW_SAMPLER; + if (brw->gen == 7 && !brw->is_haswell && + stage_state->stage == MESA_SHADER_VERTEX) { + gen7_emit_vs_workaround_flush(brw); + } + + BEGIN_BATCH(2); + OUT_BATCH(packet_headers[stage_state->stage] << 16 | (2 - 2)); + OUT_BATCH(stage_state->sampler_offset); + ADVANCE_BATCH(); } void diff --git a/src/mesa/drivers/dri/i965/gen7_vs_state.c b/src/mesa/drivers/dri/i965/gen7_vs_state.c index b5fc871cfe1..6b1f6807950 100644 --- a/src/mesa/drivers/dri/i965/gen7_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c @@ -75,12 +75,6 @@ upload_vs_state(struct brw_context *brw) if (!brw->is_haswell) gen7_emit_vs_workaround_flush(brw); - /* CACHE_NEW_SAMPLER */ - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS_VS << 16 | (2 - 2)); - OUT_BATCH(stage_state->sampler_offset); - ADVANCE_BATCH(); - gen7_upload_constant_state(brw, stage_state, true /* active */, _3DSTATE_CONSTANT_VS); @@ -126,7 +120,7 @@ const struct brw_tracked_state gen7_vs_state = { BRW_NEW_VS_BINDING_TABLE | BRW_NEW_BATCH | BRW_NEW_PUSH_CONSTANT_ALLOCATION), - .cache = CACHE_NEW_VS_PROG | CACHE_NEW_SAMPLER + .cache = CACHE_NEW_VS_PROG }, .emit = upload_vs_state, }; diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c index eabadee3bf9..2b95ef1ffd2 100644 --- a/src/mesa/drivers/dri/i965/gen7_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c @@ -143,12 +143,6 @@ upload_ps_state(struct brw_context *brw) const int max_threads_shift = brw->is_haswell ? HSW_PS_MAX_THREADS_SHIFT : IVB_PS_MAX_THREADS_SHIFT; - /* CACHE_NEW_SAMPLER */ - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS_PS << 16 | (2 - 2)); - OUT_BATCH(brw->wm.base.sampler_offset); - ADVANCE_BATCH(); - /* CACHE_NEW_WM_PROG */ gen7_upload_constant_state(brw, &brw->wm.base, true, _3DSTATE_CONSTANT_PS); @@ -281,8 +275,7 @@ const struct brw_tracked_state gen7_ps_state = { BRW_NEW_PS_BINDING_TABLE | BRW_NEW_BATCH | BRW_NEW_PUSH_CONSTANT_ALLOCATION), - .cache = (CACHE_NEW_SAMPLER | - CACHE_NEW_WM_PROG) + .cache = (CACHE_NEW_WM_PROG) }, .emit = upload_ps_state, }; diff --git a/src/mesa/drivers/dri/i965/gen8_gs_state.c b/src/mesa/drivers/dri/i965/gen8_gs_state.c index 97fbf84f7ff..6baada31ee9 100644 --- a/src/mesa/drivers/dri/i965/gen8_gs_state.c +++ b/src/mesa/drivers/dri/i965/gen8_gs_state.c @@ -36,12 +36,6 @@ gen8_upload_gs_state(struct brw_context *brw) /* CACHE_NEW_GS_PROG */ const struct brw_vec4_prog_data *prog_data = &brw->gs.prog_data->base; - /* CACHE_NEW_SAMPLER */ - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS_GS << 16 | (2 - 2)); - OUT_BATCH(stage_state->sampler_offset); - ADVANCE_BATCH(); - gen8_upload_constant_state(brw, stage_state, active, _3DSTATE_CONSTANT_GS); if (active) { @@ -135,7 +129,7 @@ const struct brw_tracked_state gen8_gs_state = { BRW_NEW_GS_BINDING_TABLE | BRW_NEW_BATCH | BRW_NEW_PUSH_CONSTANT_ALLOCATION), - .cache = CACHE_NEW_GS_PROG | CACHE_NEW_SAMPLER + .cache = CACHE_NEW_GS_PROG }, .emit = gen8_upload_gs_state, }; diff --git a/src/mesa/drivers/dri/i965/gen8_ps_state.c b/src/mesa/drivers/dri/i965/gen8_ps_state.c index 085606072bd..aa7183ba932 100644 --- a/src/mesa/drivers/dri/i965/gen8_ps_state.c +++ b/src/mesa/drivers/dri/i965/gen8_ps_state.c @@ -136,12 +136,6 @@ upload_ps_state(struct brw_context *brw) struct gl_context *ctx = &brw->ctx; uint32_t dw3 = 0, dw6 = 0, dw7 = 0; - /* CACHE_NEW_SAMPLER */ - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS_PS << 16 | (2 - 2)); - OUT_BATCH(brw->wm.base.sampler_offset); - ADVANCE_BATCH(); - /* CACHE_NEW_WM_PROG */ gen8_upload_constant_state(brw, &brw->wm.base, true, _3DSTATE_CONSTANT_PS); @@ -254,7 +248,7 @@ const struct brw_tracked_state gen8_ps_state = { BRW_NEW_PS_BINDING_TABLE | BRW_NEW_BATCH | BRW_NEW_PUSH_CONSTANT_ALLOCATION, - .cache = CACHE_NEW_SAMPLER | CACHE_NEW_WM_PROG + .cache = CACHE_NEW_WM_PROG }, .emit = upload_ps_state, }; diff --git a/src/mesa/drivers/dri/i965/gen8_vs_state.c b/src/mesa/drivers/dri/i965/gen8_vs_state.c index 373cfe4b6f8..e7634eec7cf 100644 --- a/src/mesa/drivers/dri/i965/gen8_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen8_vs_state.c @@ -62,12 +62,6 @@ upload_vs_state(struct brw_context *brw) /* CACHE_NEW_VS_PROG */ const struct brw_vec4_prog_data *prog_data = &brw->vs.prog_data->base; - /* CACHE_NEW_SAMPLER */ - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS_VS << 16 | (2 - 2)); - OUT_BATCH(stage_state->sampler_offset); - ADVANCE_BATCH(); - gen8_upload_constant_state(brw, stage_state, true /* active */, _3DSTATE_CONSTANT_VS); @@ -119,7 +113,7 @@ const struct brw_tracked_state gen8_vs_state = { BRW_NEW_VS_BINDING_TABLE | BRW_NEW_BATCH | BRW_NEW_PUSH_CONSTANT_ALLOCATION, - .cache = CACHE_NEW_VS_PROG | CACHE_NEW_SAMPLER + .cache = CACHE_NEW_VS_PROG }, .emit = upload_vs_state, }; -- 2.30.2