i965: Stream out CC unit state.
authorEric Anholt <eric@anholt.net>
Fri, 11 Jun 2010 20:21:59 +0000 (13:21 -0700)
committerEric Anholt <eric@anholt.net>
Sun, 13 Jun 2010 04:47:31 +0000 (21:47 -0700)
before:
[ # ]  backend                         test   min(s) median(s) stddev. count
[  0]       gl            firefox-talos-gfx   31.791   32.287   1.11%    6/6
after:
[  0]       gl            firefox-talos-gfx   31.198   31.675   0.96%    6/6

src/mesa/drivers/dri/i965/brw_cc.c
src/mesa/drivers/dri/i965/brw_context.h
src/mesa/drivers/dri/i965/brw_misc_state.c

index 2b5efdd336bd6b11c392b54e52fd5174cd9482cd..cfce5d314051b0bd5a4f42e5e6f86abb40acf9fc 100644 (file)
@@ -58,27 +58,6 @@ brw_update_cc_vp(struct brw_context *brw)
    brw->cc.vp_bo = brw_cache_data(&brw->cache, BRW_CC_VP, &ccv, sizeof(ccv));
 }
 
-struct brw_cc_unit_key {
-   GLboolean stencil, stencil_two_side, color_blend, alpha_enabled;
-
-   GLenum stencil_func[2], stencil_fail_op[2];
-   GLenum stencil_pass_depth_fail_op[2], stencil_pass_depth_pass_op[2];
-   GLubyte stencil_ref[2], stencil_write_mask[2], stencil_test_mask[2];
-   GLenum logic_op;
-
-   GLenum blend_eq_rgb, blend_eq_a;
-   GLenum blend_src_rgb, blend_src_a;
-   GLenum blend_dst_rgb, blend_dst_a;
-
-   GLenum alpha_func;
-   GLclampf alpha_ref;
-
-   GLboolean dither;
-
-   GLboolean depth_test, depth_write;
-   GLenum depth_func;
-};
-
 /**
  * Modify blend function to force destination alpha to 1.0
  *
@@ -101,136 +80,83 @@ fix_xRGB_alpha(GLenum function)
    return function;
 }
 
-static void
-cc_unit_populate_key(struct brw_context *brw, struct brw_cc_unit_key *key)
+static void prepare_cc_unit(struct brw_context *brw)
 {
-   GLcontext *ctx = &brw->intel.ctx;
-   const unsigned back = ctx->Stencil._BackFace;
-
-   memset(key, 0, sizeof(*key));
-
-   key->stencil = ctx->Stencil._Enabled;
-   key->stencil_two_side = ctx->Stencil._TestTwoSide;
-
-   if (key->stencil) {
-      key->stencil_func[0] = ctx->Stencil.Function[0];
-      key->stencil_fail_op[0] = ctx->Stencil.FailFunc[0];
-      key->stencil_pass_depth_fail_op[0] = ctx->Stencil.ZFailFunc[0];
-      key->stencil_pass_depth_pass_op[0] = ctx->Stencil.ZPassFunc[0];
-      key->stencil_ref[0] = ctx->Stencil.Ref[0];
-      key->stencil_write_mask[0] = ctx->Stencil.WriteMask[0];
-      key->stencil_test_mask[0] = ctx->Stencil.ValueMask[0];
-   }
-   if (key->stencil_two_side) {
-      key->stencil_func[1] = ctx->Stencil.Function[back];
-      key->stencil_fail_op[1] = ctx->Stencil.FailFunc[back];
-      key->stencil_pass_depth_fail_op[1] = ctx->Stencil.ZFailFunc[back];
-      key->stencil_pass_depth_pass_op[1] = ctx->Stencil.ZPassFunc[back];
-      key->stencil_ref[1] = ctx->Stencil.Ref[back];
-      key->stencil_write_mask[1] = ctx->Stencil.WriteMask[back];
-      key->stencil_test_mask[1] = ctx->Stencil.ValueMask[back];
-   }
-
-   if (ctx->Color._LogicOpEnabled)
-      key->logic_op = ctx->Color.LogicOp;
-   else
-      key->logic_op = GL_COPY;
-
-   key->color_blend = ctx->Color.BlendEnabled;
-   if (key->color_blend) {
-      key->blend_eq_rgb = ctx->Color.BlendEquationRGB;
-      key->blend_eq_a = ctx->Color.BlendEquationA;
-      key->blend_src_rgb = ctx->Color.BlendSrcRGB;
-      key->blend_dst_rgb = ctx->Color.BlendDstRGB;
-      key->blend_src_a = ctx->Color.BlendSrcA;
-      key->blend_dst_a = ctx->Color.BlendDstA;
-
-      /* If the renderbuffer is XRGB, we have to frob the blend function to
-       * force the destination alpha to 1.0.  This means replacing GL_DST_ALPHA
-       * with GL_ONE and GL_ONE_MINUS_DST_ALPHA with GL_ZERO.
-       */
-      if (ctx->DrawBuffer->Visual.alphaBits == 0) {
-        key->blend_src_rgb = fix_xRGB_alpha(key->blend_src_rgb);
-        key->blend_src_a   = fix_xRGB_alpha(key->blend_src_a);
-        key->blend_dst_rgb = fix_xRGB_alpha(key->blend_dst_rgb);
-        key->blend_dst_a   = fix_xRGB_alpha(key->blend_dst_a);
-      }
-   }
-
-   key->alpha_enabled = ctx->Color.AlphaEnabled;
-   if (key->alpha_enabled) {
-      key->alpha_func = ctx->Color.AlphaFunc;
-      key->alpha_ref = ctx->Color.AlphaRef;
-   }
-
-   key->dither = ctx->Color.DitherFlag;
-
-   key->depth_test = ctx->Depth.Test;
-   if (key->depth_test) {
-      key->depth_func = ctx->Depth.Func;
-      key->depth_write = ctx->Depth.Mask;
-   }
+   brw_add_validated_bo(brw, brw->cc.vp_bo);
 }
 
 /**
  * Creates the state cache entry for the given CC unit key.
  */
-static drm_intel_bo *
-cc_unit_create_from_key(struct brw_context *brw, struct brw_cc_unit_key *key)
+static void upload_cc_unit(struct brw_context *brw)
 {
+   GLcontext *ctx = &brw->intel.ctx;
    struct brw_cc_unit_state cc;
-   drm_intel_bo *bo;
+   void *map;
 
    memset(&cc, 0, sizeof(cc));
 
    /* _NEW_STENCIL */
-   if (key->stencil) {
+   if (ctx->Stencil._Enabled) {
+      const unsigned back = ctx->Stencil._BackFace;
+
       cc.cc0.stencil_enable = 1;
       cc.cc0.stencil_func =
-        intel_translate_compare_func(key->stencil_func[0]);
+        intel_translate_compare_func(ctx->Stencil.Function[0]);
       cc.cc0.stencil_fail_op =
-        intel_translate_stencil_op(key->stencil_fail_op[0]);
+        intel_translate_stencil_op(ctx->Stencil.FailFunc[0]);
       cc.cc0.stencil_pass_depth_fail_op =
-        intel_translate_stencil_op(key->stencil_pass_depth_fail_op[0]);
+        intel_translate_stencil_op(ctx->Stencil.ZFailFunc[0]);
       cc.cc0.stencil_pass_depth_pass_op =
-        intel_translate_stencil_op(key->stencil_pass_depth_pass_op[0]);
-      cc.cc1.stencil_ref = key->stencil_ref[0];
-      cc.cc1.stencil_write_mask = key->stencil_write_mask[0];
-      cc.cc1.stencil_test_mask = key->stencil_test_mask[0];
+        intel_translate_stencil_op(ctx->Stencil.ZPassFunc[0]);
+      cc.cc1.stencil_ref = ctx->Stencil.Ref[0];
+      cc.cc1.stencil_write_mask = ctx->Stencil.WriteMask[0];
+      cc.cc1.stencil_test_mask = ctx->Stencil.ValueMask[0];
 
-      if (key->stencil_two_side) {
+      if (ctx->Stencil._TestTwoSide) {
         cc.cc0.bf_stencil_enable = 1;
         cc.cc0.bf_stencil_func =
-           intel_translate_compare_func(key->stencil_func[1]);
+           intel_translate_compare_func(ctx->Stencil.Function[back]);
         cc.cc0.bf_stencil_fail_op =
-           intel_translate_stencil_op(key->stencil_fail_op[1]);
+           intel_translate_stencil_op(ctx->Stencil.FailFunc[back]);
         cc.cc0.bf_stencil_pass_depth_fail_op =
-           intel_translate_stencil_op(key->stencil_pass_depth_fail_op[1]);
+           intel_translate_stencil_op(ctx->Stencil.ZFailFunc[back]);
         cc.cc0.bf_stencil_pass_depth_pass_op =
-           intel_translate_stencil_op(key->stencil_pass_depth_pass_op[1]);
-        cc.cc1.bf_stencil_ref = key->stencil_ref[1];
-        cc.cc2.bf_stencil_write_mask = key->stencil_write_mask[1];
-        cc.cc2.bf_stencil_test_mask = key->stencil_test_mask[1];
+           intel_translate_stencil_op(ctx->Stencil.ZPassFunc[back]);
+        cc.cc1.bf_stencil_ref = ctx->Stencil.Ref[back];
+        cc.cc2.bf_stencil_write_mask = ctx->Stencil.WriteMask[back];
+        cc.cc2.bf_stencil_test_mask = ctx->Stencil.ValueMask[back];
       }
 
       /* Not really sure about this:
        */
-      if (key->stencil_write_mask[0] ||
-         (key->stencil_two_side && key->stencil_write_mask[1]))
+      if (ctx->Stencil.WriteMask[0] ||
+         (ctx->Stencil._TestTwoSide && ctx->Stencil.WriteMask[back]))
         cc.cc0.stencil_write_enable = 1;
    }
 
    /* _NEW_COLOR */
-   if (key->logic_op != GL_COPY) {
+   if (ctx->Color._LogicOpEnabled && ctx->Color.LogicOp != GL_COPY) {
       cc.cc2.logicop_enable = 1;
-      cc.cc5.logicop_func = intel_translate_logic_op(key->logic_op);
-   } else if (key->color_blend) {
-      GLenum eqRGB = key->blend_eq_rgb;
-      GLenum eqA = key->blend_eq_a;
-      GLenum srcRGB = key->blend_src_rgb;
-      GLenum dstRGB = key->blend_dst_rgb;
-      GLenum srcA = key->blend_src_a;
-      GLenum dstA = key->blend_dst_a;
+      cc.cc5.logicop_func = intel_translate_logic_op(ctx->Color.LogicOp);
+   } else if (ctx->Color.BlendEnabled) {
+      GLenum eqRGB = ctx->Color.BlendEquationRGB;
+      GLenum eqA = ctx->Color.BlendEquationA;
+      GLenum srcRGB = ctx->Color.BlendSrcRGB;
+      GLenum dstRGB = ctx->Color.BlendDstRGB;
+      GLenum srcA = ctx->Color.BlendSrcA;
+      GLenum dstA = ctx->Color.BlendDstA;
+
+      /* If the renderbuffer is XRGB, we have to frob the blend function to
+       * force the destination alpha to 1.0.  This means replacing GL_DST_ALPHA
+       * with GL_ONE and GL_ONE_MINUS_DST_ALPHA with GL_ZERO.
+       */
+      if (ctx->DrawBuffer->Visual.alphaBits == 0) {
+        srcRGB = fix_xRGB_alpha(srcRGB);
+        srcA   = fix_xRGB_alpha(srcA);
+        dstRGB = fix_xRGB_alpha(dstRGB);
+        dstA   = fix_xRGB_alpha(dstA);
+      }
 
       if (eqRGB == GL_MIN || eqRGB == GL_MAX) {
         srcRGB = dstRGB = GL_ONE;
@@ -254,25 +180,27 @@ cc_unit_create_from_key(struct brw_context *brw, struct brw_cc_unit_key *key)
                                eqA != eqRGB);
    }
 
-   if (key->alpha_enabled) {
+   if (ctx->Color.AlphaEnabled) {
       cc.cc3.alpha_test = 1;
-      cc.cc3.alpha_test_func = intel_translate_compare_func(key->alpha_func);
+      cc.cc3.alpha_test_func =
+        intel_translate_compare_func(ctx->Color.AlphaFunc);
       cc.cc3.alpha_test_format = BRW_ALPHATEST_FORMAT_UNORM8;
 
-      UNCLAMPED_FLOAT_TO_UBYTE(cc.cc7.alpha_ref.ub[0], key->alpha_ref);
+      UNCLAMPED_FLOAT_TO_UBYTE(cc.cc7.alpha_ref.ub[0], ctx->Color.AlphaRef);
    }
 
-   if (key->dither) {
+   if (ctx->Color.DitherFlag) {
       cc.cc5.dither_enable = 1;
       cc.cc6.y_dither_offset = 0;
       cc.cc6.x_dither_offset = 0;
    }
 
    /* _NEW_DEPTH */
-   if (key->depth_test) {
+   if (ctx->Depth.Test) {
       cc.cc2.depth_test = 1;
-      cc.cc2.depth_test_function = intel_translate_compare_func(key->depth_func);
-      cc.cc2.depth_write_enable = key->depth_write;
+      cc.cc2.depth_test_function =
+        intel_translate_compare_func(ctx->Depth.Func);
+      cc.cc2.depth_write_enable = ctx->Depth.Mask;
    }
 
    /* CACHE_NEW_CC_VP */
@@ -281,43 +209,25 @@ cc_unit_create_from_key(struct brw_context *brw, struct brw_cc_unit_key *key)
    if (INTEL_DEBUG & DEBUG_STATS)
       cc.cc5.statistics_enable = 1;
 
-   bo = brw_upload_cache(&brw->cache, BRW_CC_UNIT,
-                        key, sizeof(*key),
-                        &brw->cc.vp_bo, 1,
-                        &cc, sizeof(cc));
+   map = brw_state_batch(brw, sizeof(cc), 64,
+                        &brw->cc.state_bo, &brw->cc.state_offset);
+   memcpy(map, &cc, sizeof(cc));
+   brw->state.dirty.cache |= CACHE_NEW_CC_UNIT;
 
    /* Emit CC viewport relocation */
-   drm_intel_bo_emit_reloc(bo, offsetof(struct brw_cc_unit_state, cc4),
+   drm_intel_bo_emit_reloc(brw->cc.state_bo, (brw->cc.state_offset +
+                                             offsetof(struct brw_cc_unit_state,
+                                                      cc4)),
                           brw->cc.vp_bo, 0,
                           I915_GEM_DOMAIN_INSTRUCTION, 0);
-
-   return bo;
-}
-
-static void prepare_cc_unit( struct brw_context *brw )
-{
-   struct brw_cc_unit_key key;
-
-   cc_unit_populate_key(brw, &key);
-
-   drm_intel_bo_unreference(brw->cc.state_bo);
-   brw->cc.state_bo = brw_search_cache(&brw->cache, BRW_CC_UNIT,
-                                      &key, sizeof(key),
-                                      &brw->cc.vp_bo, 1,
-                                      NULL);
-
-   if (brw->cc.state_bo == NULL)
-      brw->cc.state_bo = cc_unit_create_from_key(brw, &key);
 }
 
 const struct brw_tracked_state brw_cc_unit = {
    .dirty = {
       .mesa = _NEW_STENCIL | _NEW_COLOR | _NEW_DEPTH,
-      .brw = 0,
+      .brw = BRW_NEW_BATCH,
       .cache = CACHE_NEW_CC_VP
    },
    .prepare = prepare_cc_unit,
+   .emit = upload_cc_unit,
 };
-
-
-
index 7fbffbeb030f6d9e60d5678057d438381ab743e0..cc4e6638e8ba39cd13537d9a390e9766271b0229 100644 (file)
@@ -669,6 +669,7 @@ struct brw_context
       drm_intel_bo *color_calc_state_bo;
 
       drm_intel_bo *state_bo;
+      uint32_t state_offset;
    } cc;
 
    struct {
index b68b5af01060647eb214e5b6cebbeed98af19fef..572175f463e16fdf8408027570f13c86135828c1 100644 (file)
@@ -185,7 +185,8 @@ static void upload_pipelined_state_pointers(struct brw_context *brw )
    OUT_RELOC(brw->clip.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
    OUT_RELOC(brw->sf.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
    OUT_RELOC(brw->wm.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
-   OUT_RELOC(brw->cc.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+   OUT_RELOC(brw->cc.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
+            brw->cc.state_offset);
    ADVANCE_BATCH();
 
    brw->state.dirty.brw |= BRW_NEW_PSP;
@@ -199,7 +200,6 @@ static void prepare_psp_urb_cbs(struct brw_context *brw)
    brw_add_validated_bo(brw, brw->clip.state_bo);
    brw_add_validated_bo(brw, brw->sf.state_bo);
    brw_add_validated_bo(brw, brw->wm.state_bo);
-   brw_add_validated_bo(brw, brw->cc.state_bo);
 }
 
 static void upload_psp_urb_cbs(struct brw_context *brw )