From 60c5f9716c90090f41e0796e93cc60a297fa883b Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 25 Apr 2014 16:39:18 -0700 Subject: [PATCH] i965: Track the number of samples in the drawbuffer. This keeps us from having to emit the nonpipelined state packet on every FBO binding. -4.42003% +/- 1.09961% effect on cairo-perf-trace runtime on glamor (n=110). Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_context.h | 8 ++++++++ src/mesa/drivers/dri/i965/brw_state_upload.c | 6 ++++++ .../drivers/dri/i965/gen6_multisample_state.c | 17 +++++++---------- .../drivers/dri/i965/gen8_multisample_state.c | 12 ++++-------- src/mesa/drivers/dri/i965/gen8_ps_state.c | 8 ++++---- 5 files changed, 29 insertions(+), 22 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 82b38fca803..1f7108f30e9 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -181,6 +181,7 @@ enum brw_state_id { BRW_STATE_META_IN_PROGRESS, BRW_STATE_INTERPOLATION_MAP, BRW_STATE_PUSH_CONSTANT_ALLOCATION, + BRW_STATE_NUM_SAMPLES, BRW_NUM_STATE_BITS }; @@ -220,6 +221,7 @@ enum brw_state_id { #define BRW_NEW_META_IN_PROGRESS (1 << BRW_STATE_META_IN_PROGRESS) #define BRW_NEW_INTERPOLATION_MAP (1 << BRW_STATE_INTERPOLATION_MAP) #define BRW_NEW_PUSH_CONSTANT_ALLOCATION (1 << BRW_STATE_PUSH_CONSTANT_ALLOCATION) +#define BRW_NEW_NUM_SAMPLES (1 << BRW_STATE_NUM_SAMPLES) struct brw_state_flags { /** State update flags signalled by mesa internals */ @@ -1164,6 +1166,12 @@ struct brw_context const struct gl_geometry_program *geometry_program; const struct gl_fragment_program *fragment_program; + /** + * Number of samples in ctx->DrawBuffer, updated by BRW_NEW_NUM_SAMPLES so + * that we don't have to reemit that state every time we change FBOs. + */ + int num_samples; + /* hw-dependent 3DSTATE_VF_STATISTICS opcode */ uint32_t CMD_VF_STATISTICS; /* hw-dependent 3DSTATE_PIPELINE_SELECT opcode */ diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index 7ae2e1e6825..3a452c3a586 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -507,6 +507,7 @@ static struct dirty_bit_map brw_bits[] = { DEFINE_BIT(BRW_NEW_META_IN_PROGRESS), DEFINE_BIT(BRW_NEW_INTERPOLATION_MAP), DEFINE_BIT(BRW_NEW_PUSH_CONSTANT_ALLOCATION), + DEFINE_BIT(BRW_NEW_NUM_SAMPLES), {0, 0, 0} }; @@ -604,6 +605,11 @@ void brw_upload_state(struct brw_context *brw) brw->state.dirty.brw |= BRW_NEW_META_IN_PROGRESS; } + if (brw->num_samples != ctx->DrawBuffer->Visual.samples) { + brw->num_samples = ctx->DrawBuffer->Visual.samples; + brw->state.dirty.brw |= BRW_NEW_NUM_SAMPLES; + } + if ((state->mesa | state->cache | state->brw) == 0) return; diff --git a/src/mesa/drivers/dri/i965/gen6_multisample_state.c b/src/mesa/drivers/dri/i965/gen6_multisample_state.c index 5d65453a675..701fda2ff55 100644 --- a/src/mesa/drivers/dri/i965/gen6_multisample_state.c +++ b/src/mesa/drivers/dri/i965/gen6_multisample_state.c @@ -111,7 +111,8 @@ gen6_determine_sample_mask(struct brw_context *brw) float coverage_invert = false; unsigned sample_mask = ~0u; - unsigned num_samples = ctx->DrawBuffer->Visual.samples; + /* BRW_NEW_NUM_SAMPLES */ + unsigned num_samples = brw->num_samples; if (ctx->Multisample._Enabled) { if (ctx->Multisample.SampleCoverage) { @@ -150,21 +151,17 @@ gen6_emit_3dstate_sample_mask(struct brw_context *brw, unsigned mask) static void upload_multisample_state(struct brw_context *brw) { - struct gl_context *ctx = &brw->ctx; - - /* _NEW_BUFFERS, _NEW_MULTISAMPLE */ - unsigned num_samples = ctx->DrawBuffer->Visual.samples; - - gen6_emit_3dstate_multisample(brw, num_samples); + /* BRW_NEW_NUM_SAMPLES */ + gen6_emit_3dstate_multisample(brw, brw->num_samples); gen6_emit_3dstate_sample_mask(brw, gen6_determine_sample_mask(brw)); } const struct brw_tracked_state gen6_multisample_state = { .dirty = { - .mesa = _NEW_BUFFERS | - _NEW_MULTISAMPLE, - .brw = BRW_NEW_CONTEXT, + .mesa = _NEW_MULTISAMPLE, + .brw = (BRW_NEW_CONTEXT | + BRW_NEW_NUM_SAMPLES), .cache = 0 }, .emit = upload_multisample_state diff --git a/src/mesa/drivers/dri/i965/gen8_multisample_state.c b/src/mesa/drivers/dri/i965/gen8_multisample_state.c index bfe0d5b610b..daf3e69b3a0 100644 --- a/src/mesa/drivers/dri/i965/gen8_multisample_state.c +++ b/src/mesa/drivers/dri/i965/gen8_multisample_state.c @@ -76,19 +76,15 @@ gen8_emit_3dstate_sample_pattern(struct brw_context *brw) static void upload_multisample_state(struct brw_context *brw) { - struct gl_context *ctx = &brw->ctx; - - /* _NEW_BUFFERS, _NEW_MULTISAMPLE */ - unsigned num_samples = ctx->DrawBuffer->Visual.samples; - - gen8_emit_3dstate_multisample(brw, num_samples); + gen8_emit_3dstate_multisample(brw, brw->num_samples); gen6_emit_3dstate_sample_mask(brw, gen6_determine_sample_mask(brw)); } const struct brw_tracked_state gen8_multisample_state = { .dirty = { - .mesa = _NEW_BUFFERS | _NEW_MULTISAMPLE, - .brw = BRW_NEW_CONTEXT, + .mesa = _NEW_MULTISAMPLE, + .brw = (BRW_NEW_CONTEXT | + BRW_NEW_NUM_SAMPLES), .cache = 0 }, .emit = upload_multisample_state diff --git a/src/mesa/drivers/dri/i965/gen8_ps_state.c b/src/mesa/drivers/dri/i965/gen8_ps_state.c index 7d8f9544ee6..8d9ad643e86 100644 --- a/src/mesa/drivers/dri/i965/gen8_ps_state.c +++ b/src/mesa/drivers/dri/i965/gen8_ps_state.c @@ -65,8 +65,8 @@ upload_ps_extra(struct brw_context *brw) if (fp->program.Base.InputsRead & VARYING_BIT_POS) dw1 |= GEN8_PSX_USES_SOURCE_DEPTH | GEN8_PSX_USES_SOURCE_W; - /* _NEW_BUFFERS | _NEW_MULTISAMPLE */ - bool multisampled_fbo = ctx->DrawBuffer->Visual.samples > 1; + /* BRW_NEW_NUM_SAMPLES | _NEW_MULTISAMPLE */ + bool multisampled_fbo = brw->num_samples > 1; if (multisampled_fbo && _mesa_get_min_invocations_per_fragment(ctx, &fp->program, false) > 1) dw1 |= GEN8_PSX_SHADER_IS_PER_SAMPLE; @@ -85,8 +85,8 @@ upload_ps_extra(struct brw_context *brw) const struct brw_tracked_state gen8_ps_extra = { .dirty = { - .mesa = _NEW_BUFFERS | _NEW_MULTISAMPLE, - .brw = BRW_NEW_CONTEXT | BRW_NEW_FRAGMENT_PROGRAM, + .mesa = _NEW_MULTISAMPLE, + .brw = BRW_NEW_CONTEXT | BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_NUM_SAMPLES, .cache = 0, }, .emit = upload_ps_extra, -- 2.30.2