From: Eric Anholt Date: Thu, 1 May 2014 16:59:22 +0000 (-0700) Subject: i965: Move push constant state packets to push constant update time. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=20404e45c7c2d815ade67f78e1d76104c9349797;p=mesa.git i965: Move push constant state packets to push constant update time. -0.553779% +/- 0.423394% effect on cairo-perf-trace runtime on glamor (n=612) Reviewed-by: Kenneth Graunke --- diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c index 25f55c47d9e..9764645daab 100644 --- a/src/mesa/drivers/dri/i965/gen6_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c @@ -98,13 +98,22 @@ gen6_upload_vs_push_constants(struct brw_context *brw) gen6_upload_vec4_push_constants(brw, &vp->program.Base, prog_data, stage_state, AUB_TRACE_VS_CONSTANTS); + + if (brw->gen >= 7) { + if (brw->gen == 7 && !brw->is_haswell) + gen7_emit_vs_workaround_flush(brw); + + gen7_upload_constant_state(brw, stage_state, true /* active */, + _3DSTATE_CONSTANT_VS); + } } const struct brw_tracked_state gen6_vs_push_constants = { .dirty = { .mesa = _NEW_TRANSFORM | _NEW_PROGRAM_CONSTANTS, .brw = (BRW_NEW_BATCH | - BRW_NEW_VERTEX_PROGRAM), + BRW_NEW_VERTEX_PROGRAM | + BRW_NEW_PUSH_CONSTANT_ALLOCATION), .cache = CACHE_NEW_VS_PROG, }, .emit = gen6_upload_vs_push_constants, diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c index 3f5dd29487f..402d9c38635 100644 --- a/src/mesa/drivers/dri/i965/gen6_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c @@ -81,13 +81,19 @@ gen6_upload_wm_push_constants(struct brw_context *brw) brw->wm.base.push_const_size = ALIGN(prog_data->base.nr_params, 8) / 8; } + + if (brw->gen >= 7) { + gen7_upload_constant_state(brw, &brw->wm.base, true, + _3DSTATE_CONSTANT_PS); + } } const struct brw_tracked_state gen6_wm_push_constants = { .dirty = { .mesa = _NEW_PROGRAM_CONSTANTS, .brw = (BRW_NEW_BATCH | - BRW_NEW_FRAGMENT_PROGRAM), + BRW_NEW_FRAGMENT_PROGRAM | + BRW_NEW_PUSH_CONSTANT_ALLOCATION), .cache = CACHE_NEW_WM_PROG, }, .emit = gen6_upload_wm_push_constants, diff --git a/src/mesa/drivers/dri/i965/gen7_gs_state.c b/src/mesa/drivers/dri/i965/gen7_gs_state.c index 599997d8ad8..30dfa6b87c6 100644 --- a/src/mesa/drivers/dri/i965/gen7_gs_state.c +++ b/src/mesa/drivers/dri/i965/gen7_gs_state.c @@ -30,25 +30,29 @@ static void gen7_upload_gs_push_constants(struct brw_context *brw) { + const struct brw_stage_state *stage_state = &brw->gs.base; /* BRW_NEW_GEOMETRY_PROGRAM */ - const struct brw_geometry_program *vp = + const struct brw_geometry_program *gp = (struct brw_geometry_program *) brw->geometry_program; - if (!vp) - return; - /* CACHE_NEW_GS_PROG */ - const struct brw_vec4_prog_data *prog_data = &brw->gs.prog_data->base; - struct brw_stage_state *stage_state = &brw->gs.base; + if (gp) { + /* CACHE_NEW_GS_PROG */ + const struct brw_vec4_prog_data *prog_data = &brw->gs.prog_data->base; + struct brw_stage_state *stage_state = &brw->gs.base; + + gen6_upload_vec4_push_constants(brw, &gp->program.Base, prog_data, + stage_state, AUB_TRACE_VS_CONSTANTS); + } - gen6_upload_vec4_push_constants(brw, &vp->program.Base, prog_data, - stage_state, AUB_TRACE_VS_CONSTANTS); + gen7_upload_constant_state(brw, stage_state, gp, _3DSTATE_CONSTANT_GS); } const struct brw_tracked_state gen7_gs_push_constants = { .dirty = { .mesa = _NEW_TRANSFORM | _NEW_PROGRAM_CONSTANTS, .brw = (BRW_NEW_BATCH | - BRW_NEW_GEOMETRY_PROGRAM), + BRW_NEW_GEOMETRY_PROGRAM | + BRW_NEW_PUSH_CONSTANT_ALLOCATION), .cache = CACHE_NEW_GS_PROG, }, .emit = gen7_upload_gs_push_constants, @@ -66,8 +70,6 @@ upload_gs_state(struct brw_context *brw) /* CACHE_NEW_GS_PROG */ const struct brw_vec4_prog_data *prog_data = &brw->gs.prog_data->base; - gen7_upload_constant_state(brw, stage_state, active, _3DSTATE_CONSTANT_GS); - /** * From Graphics BSpec: 3D-Media-GPGPU Engine > 3D Pipeline Stages > * Geometry > Geometry Shader > State: @@ -186,11 +188,10 @@ upload_gs_state(struct brw_context *brw) const struct brw_tracked_state gen7_gs_state = { .dirty = { - .mesa = _NEW_TRANSFORM | _NEW_PROGRAM_CONSTANTS, + .mesa = _NEW_TRANSFORM, .brw = (BRW_NEW_CONTEXT | BRW_NEW_GEOMETRY_PROGRAM | - BRW_NEW_BATCH | - BRW_NEW_PUSH_CONSTANT_ALLOCATION), + BRW_NEW_BATCH), .cache = CACHE_NEW_GS_PROG }, .emit = upload_gs_state, diff --git a/src/mesa/drivers/dri/i965/gen7_vs_state.c b/src/mesa/drivers/dri/i965/gen7_vs_state.c index a0303104c17..4d9915059cc 100644 --- a/src/mesa/drivers/dri/i965/gen7_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c @@ -75,9 +75,6 @@ upload_vs_state(struct brw_context *brw) if (!brw->is_haswell) gen7_emit_vs_workaround_flush(brw); - gen7_upload_constant_state(brw, stage_state, true /* active */, - _3DSTATE_CONSTANT_VS); - /* Use ALT floating point mode for ARB vertex programs, because they * require 0^0 == 1. */ @@ -114,11 +111,10 @@ upload_vs_state(struct brw_context *brw) const struct brw_tracked_state gen7_vs_state = { .dirty = { - .mesa = _NEW_TRANSFORM | _NEW_PROGRAM_CONSTANTS, + .mesa = _NEW_TRANSFORM, .brw = (BRW_NEW_CONTEXT | BRW_NEW_VERTEX_PROGRAM | - BRW_NEW_BATCH | - BRW_NEW_PUSH_CONSTANT_ALLOCATION), + BRW_NEW_BATCH), .cache = CACHE_NEW_VS_PROG }, .emit = upload_vs_state, diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c index 972ee0098ec..078284101b4 100644 --- a/src/mesa/drivers/dri/i965/gen7_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c @@ -143,9 +143,6 @@ upload_ps_state(struct brw_context *brw) const int max_threads_shift = brw->is_haswell ? HSW_PS_MAX_THREADS_SHIFT : IVB_PS_MAX_THREADS_SHIFT; - /* CACHE_NEW_WM_PROG */ - gen7_upload_constant_state(brw, &brw->wm.base, true, _3DSTATE_CONSTANT_PS); - dw2 = dw4 = dw5 = 0; dw2 |= @@ -267,13 +264,11 @@ upload_ps_state(struct brw_context *brw) const struct brw_tracked_state gen7_ps_state = { .dirty = { - .mesa = (_NEW_PROGRAM_CONSTANTS | - _NEW_COLOR | + .mesa = (_NEW_COLOR | _NEW_BUFFERS | _NEW_MULTISAMPLE), .brw = (BRW_NEW_FRAGMENT_PROGRAM | - BRW_NEW_BATCH | - BRW_NEW_PUSH_CONSTANT_ALLOCATION), + BRW_NEW_BATCH), .cache = (CACHE_NEW_WM_PROG) }, .emit = upload_ps_state, diff --git a/src/mesa/drivers/dri/i965/gen8_gs_state.c b/src/mesa/drivers/dri/i965/gen8_gs_state.c index ef25115db92..a0f933c0918 100644 --- a/src/mesa/drivers/dri/i965/gen8_gs_state.c +++ b/src/mesa/drivers/dri/i965/gen8_gs_state.c @@ -36,8 +36,6 @@ gen8_upload_gs_state(struct brw_context *brw) /* CACHE_NEW_GS_PROG */ const struct brw_vec4_prog_data *prog_data = &brw->gs.prog_data->base; - gen7_upload_constant_state(brw, stage_state, active, _3DSTATE_CONSTANT_GS); - if (active) { int urb_entry_write_offset = 1; uint32_t urb_entry_output_length = @@ -123,11 +121,10 @@ gen8_upload_gs_state(struct brw_context *brw) const struct brw_tracked_state gen8_gs_state = { .dirty = { - .mesa = _NEW_TRANSFORM | _NEW_PROGRAM_CONSTANTS, + .mesa = _NEW_TRANSFORM, .brw = (BRW_NEW_CONTEXT | BRW_NEW_GEOMETRY_PROGRAM | - BRW_NEW_BATCH | - BRW_NEW_PUSH_CONSTANT_ALLOCATION), + BRW_NEW_BATCH), .cache = CACHE_NEW_GS_PROG }, .emit = gen8_upload_gs_state, diff --git a/src/mesa/drivers/dri/i965/gen8_ps_state.c b/src/mesa/drivers/dri/i965/gen8_ps_state.c index f0362a276d0..8b8d5ea7747 100644 --- a/src/mesa/drivers/dri/i965/gen8_ps_state.c +++ b/src/mesa/drivers/dri/i965/gen8_ps_state.c @@ -136,9 +136,6 @@ upload_ps_state(struct brw_context *brw) struct gl_context *ctx = &brw->ctx; uint32_t dw3 = 0, dw6 = 0, dw7 = 0; - /* CACHE_NEW_WM_PROG */ - gen7_upload_constant_state(brw, &brw->wm.base, true, _3DSTATE_CONSTANT_PS); - /* Initialize the execution mask with VMask. Otherwise, derivatives are * incorrect for subspans where some of the pixels are unlit. We believe * the bit just didn't take effect in previous generations. @@ -243,10 +240,9 @@ upload_ps_state(struct brw_context *brw) const struct brw_tracked_state gen8_ps_state = { .dirty = { - .mesa = _NEW_PROGRAM_CONSTANTS | _NEW_MULTISAMPLE, + .mesa = _NEW_MULTISAMPLE, .brw = BRW_NEW_FRAGMENT_PROGRAM | - BRW_NEW_BATCH | - BRW_NEW_PUSH_CONSTANT_ALLOCATION, + BRW_NEW_BATCH, .cache = CACHE_NEW_WM_PROG }, .emit = upload_ps_state, diff --git a/src/mesa/drivers/dri/i965/gen8_vs_state.c b/src/mesa/drivers/dri/i965/gen8_vs_state.c index 9ac681fb419..638e216a469 100644 --- a/src/mesa/drivers/dri/i965/gen8_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen8_vs_state.c @@ -39,9 +39,6 @@ upload_vs_state(struct brw_context *brw) /* CACHE_NEW_VS_PROG */ const struct brw_vec4_prog_data *prog_data = &brw->vs.prog_data->base; - gen7_upload_constant_state(brw, stage_state, true /* active */, - _3DSTATE_CONSTANT_VS); - /* Use ALT floating point mode for ARB vertex programs, because they * require 0^0 == 1. */ @@ -84,11 +81,10 @@ upload_vs_state(struct brw_context *brw) const struct brw_tracked_state gen8_vs_state = { .dirty = { - .mesa = _NEW_TRANSFORM | _NEW_PROGRAM_CONSTANTS, + .mesa = _NEW_TRANSFORM, .brw = BRW_NEW_CONTEXT | BRW_NEW_VERTEX_PROGRAM | - BRW_NEW_BATCH | - BRW_NEW_PUSH_CONSTANT_ALLOCATION, + BRW_NEW_BATCH, .cache = CACHE_NEW_VS_PROG }, .emit = upload_vs_state,