i965: Track the number of samples in the drawbuffer.
authorEric Anholt <eric@anholt.net>
Fri, 25 Apr 2014 23:39:18 +0000 (16:39 -0700)
committerEric Anholt <eric@anholt.net>
Wed, 30 Apr 2014 21:33:21 +0000 (14:33 -0700)
This keeps us from having to emit the nonpipelined state packet on every
FBO binding.

-4.42003% +/- 1.09961% effect on cairo-perf-trace runtime on glamor (n=110).

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
src/mesa/drivers/dri/i965/brw_context.h
src/mesa/drivers/dri/i965/brw_state_upload.c
src/mesa/drivers/dri/i965/gen6_multisample_state.c
src/mesa/drivers/dri/i965/gen8_multisample_state.c
src/mesa/drivers/dri/i965/gen8_ps_state.c

index 82b38fca80336fcb2921af2cd93e18d98d022e3b..1f7108f30e9ee67014cfad93daf74338155ea65c 100644 (file)
@@ -181,6 +181,7 @@ enum brw_state_id {
    BRW_STATE_META_IN_PROGRESS,
    BRW_STATE_INTERPOLATION_MAP,
    BRW_STATE_PUSH_CONSTANT_ALLOCATION,
+   BRW_STATE_NUM_SAMPLES,
    BRW_NUM_STATE_BITS
 };
 
@@ -220,6 +221,7 @@ enum brw_state_id {
 #define BRW_NEW_META_IN_PROGRESS        (1 << BRW_STATE_META_IN_PROGRESS)
 #define BRW_NEW_INTERPOLATION_MAP       (1 << BRW_STATE_INTERPOLATION_MAP)
 #define BRW_NEW_PUSH_CONSTANT_ALLOCATION (1 << BRW_STATE_PUSH_CONSTANT_ALLOCATION)
+#define BRW_NEW_NUM_SAMPLES             (1 << BRW_STATE_NUM_SAMPLES)
 
 struct brw_state_flags {
    /** State update flags signalled by mesa internals */
@@ -1164,6 +1166,12 @@ struct brw_context
    const struct gl_geometry_program *geometry_program;
    const struct gl_fragment_program *fragment_program;
 
+   /**
+    * Number of samples in ctx->DrawBuffer, updated by BRW_NEW_NUM_SAMPLES so
+    * that we don't have to reemit that state every time we change FBOs.
+    */
+   int num_samples;
+
    /* hw-dependent 3DSTATE_VF_STATISTICS opcode */
    uint32_t CMD_VF_STATISTICS;
    /* hw-dependent 3DSTATE_PIPELINE_SELECT opcode */
index 7ae2e1e682593c88dce687685e62278b6b03e85c..3a452c3a58601d93dde9d07efebc93c4edddb291 100644 (file)
@@ -507,6 +507,7 @@ static struct dirty_bit_map brw_bits[] = {
    DEFINE_BIT(BRW_NEW_META_IN_PROGRESS),
    DEFINE_BIT(BRW_NEW_INTERPOLATION_MAP),
    DEFINE_BIT(BRW_NEW_PUSH_CONSTANT_ALLOCATION),
+   DEFINE_BIT(BRW_NEW_NUM_SAMPLES),
    {0, 0, 0}
 };
 
@@ -604,6 +605,11 @@ void brw_upload_state(struct brw_context *brw)
       brw->state.dirty.brw |= BRW_NEW_META_IN_PROGRESS;
    }
 
+   if (brw->num_samples != ctx->DrawBuffer->Visual.samples) {
+      brw->num_samples = ctx->DrawBuffer->Visual.samples;
+      brw->state.dirty.brw |= BRW_NEW_NUM_SAMPLES;
+   }
+
    if ((state->mesa | state->cache | state->brw) == 0)
       return;
 
index 5d65453a675184051108a18de3e4ffc24a61e686..701fda2ff553f92ee85a4a1909ab76a59f850398 100644 (file)
@@ -111,7 +111,8 @@ gen6_determine_sample_mask(struct brw_context *brw)
    float coverage_invert = false;
    unsigned sample_mask = ~0u;
 
-   unsigned num_samples = ctx->DrawBuffer->Visual.samples;
+   /* BRW_NEW_NUM_SAMPLES */
+   unsigned num_samples = brw->num_samples;
 
    if (ctx->Multisample._Enabled) {
       if (ctx->Multisample.SampleCoverage) {
@@ -150,21 +151,17 @@ gen6_emit_3dstate_sample_mask(struct brw_context *brw, unsigned mask)
 
 static void upload_multisample_state(struct brw_context *brw)
 {
-   struct gl_context *ctx = &brw->ctx;
-
-   /* _NEW_BUFFERS, _NEW_MULTISAMPLE */
-   unsigned num_samples = ctx->DrawBuffer->Visual.samples;
-
-   gen6_emit_3dstate_multisample(brw, num_samples);
+   /* BRW_NEW_NUM_SAMPLES */
+   gen6_emit_3dstate_multisample(brw, brw->num_samples);
    gen6_emit_3dstate_sample_mask(brw, gen6_determine_sample_mask(brw));
 }
 
 
 const struct brw_tracked_state gen6_multisample_state = {
    .dirty = {
-      .mesa = _NEW_BUFFERS |
-              _NEW_MULTISAMPLE,
-      .brw = BRW_NEW_CONTEXT,
+      .mesa = _NEW_MULTISAMPLE,
+      .brw = (BRW_NEW_CONTEXT |
+              BRW_NEW_NUM_SAMPLES),
       .cache = 0
    },
    .emit = upload_multisample_state
index bfe0d5b610b6d59f7c6a53af2f39756e5de2639c..daf3e69b3a00dc116ea7ac973be5d5fe061a885f 100644 (file)
@@ -76,19 +76,15 @@ gen8_emit_3dstate_sample_pattern(struct brw_context *brw)
 static void
 upload_multisample_state(struct brw_context *brw)
 {
-   struct gl_context *ctx = &brw->ctx;
-
-   /* _NEW_BUFFERS, _NEW_MULTISAMPLE */
-   unsigned num_samples = ctx->DrawBuffer->Visual.samples;
-
-   gen8_emit_3dstate_multisample(brw, num_samples);
+   gen8_emit_3dstate_multisample(brw, brw->num_samples);
    gen6_emit_3dstate_sample_mask(brw, gen6_determine_sample_mask(brw));
 }
 
 const struct brw_tracked_state gen8_multisample_state = {
    .dirty = {
-      .mesa = _NEW_BUFFERS | _NEW_MULTISAMPLE,
-      .brw = BRW_NEW_CONTEXT,
+      .mesa = _NEW_MULTISAMPLE,
+      .brw = (BRW_NEW_CONTEXT |
+              BRW_NEW_NUM_SAMPLES),
       .cache = 0
    },
    .emit = upload_multisample_state
index 7d8f9544ee6547642ee32de8f2a8904f92511755..8d9ad643e860eb01362973cc7f6c46e6114477ee 100644 (file)
@@ -65,8 +65,8 @@ upload_ps_extra(struct brw_context *brw)
    if (fp->program.Base.InputsRead & VARYING_BIT_POS)
       dw1 |= GEN8_PSX_USES_SOURCE_DEPTH | GEN8_PSX_USES_SOURCE_W;
 
-   /* _NEW_BUFFERS | _NEW_MULTISAMPLE */
-   bool multisampled_fbo = ctx->DrawBuffer->Visual.samples > 1;
+   /* BRW_NEW_NUM_SAMPLES | _NEW_MULTISAMPLE */
+   bool multisampled_fbo = brw->num_samples > 1;
    if (multisampled_fbo &&
        _mesa_get_min_invocations_per_fragment(ctx, &fp->program, false) > 1)
       dw1 |= GEN8_PSX_SHADER_IS_PER_SAMPLE;
@@ -85,8 +85,8 @@ upload_ps_extra(struct brw_context *brw)
 
 const struct brw_tracked_state gen8_ps_extra = {
    .dirty = {
-      .mesa  = _NEW_BUFFERS | _NEW_MULTISAMPLE,
-      .brw   = BRW_NEW_CONTEXT | BRW_NEW_FRAGMENT_PROGRAM,
+      .mesa  = _NEW_MULTISAMPLE,
+      .brw   = BRW_NEW_CONTEXT | BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_NUM_SAMPLES,
       .cache = 0,
    },
    .emit = upload_ps_extra,