i965/gen6: Manipulate state batches for HiZ meta-ops [v4]
authorChad Versace <chad.versace@linux.intel.com>
Wed, 16 Nov 2011 02:20:31 +0000 (18:20 -0800)
committerChad Versace <chad.versace@linux.intel.com>
Tue, 22 Nov 2011 18:50:50 +0000 (10:50 -0800)
A lot of the state manipulation is handled by the meta-op state setup.
However, some batches need manual intervention.

v2:
   Do not special-case the 3DSTATE_DEPTH_STENCIL.Depth_Test_Enable bit
   for HiZ in gen6_upload_depth_stencil(). The HiZ meta-op sets
   ctx->Depth.Test, just read the value from that.

v3:
   Add a new dirty flag, BRW_STATE_HIZ, for brw_tracked_state. Flag it
   immediately before and after executing the HiZ operation in
   gen6_resolve_slice(). Add the flag to the the dirty bits for the
   following state packets:
      gen6_clip_state
      gen6_depth_stencil_state
      gen6_sf_state
      gen6_wm_state

v4:
   - Add BRW_NEW_STATE_HIZ to the dirty bit table in brw_state_upload.c.
     This is needed for INTEL_DEBUG=state.
   - Align brw dirty bit for gen6_depth_stencil_state.

Reviewed-by: Eric Anholt <eric@anholt.net>
Signed-off-by: Chad Versace <chad.versace@linux.intel.com>
src/mesa/drivers/dri/i965/brw_context.h
src/mesa/drivers/dri/i965/brw_draw.c
src/mesa/drivers/dri/i965/brw_state_upload.c
src/mesa/drivers/dri/i965/gen6_clip_state.c
src/mesa/drivers/dri/i965/gen6_depthstencil.c
src/mesa/drivers/dri/i965/gen6_hiz.c
src/mesa/drivers/dri/i965/gen6_sf_state.c
src/mesa/drivers/dri/i965/gen6_wm_state.c

index fa2fc72e435e41568cedc999a77bcad010464492..ec05fb7586941d29a801a24a76d227723aed3b48 100644 (file)
@@ -144,6 +144,7 @@ enum brw_state_id {
    BRW_STATE_VS_CONSTBUF,
    BRW_STATE_PROGRAM_CACHE,
    BRW_STATE_STATE_BASE_ADDRESS,
+   BRW_STATE_HIZ,
 };
 
 #define BRW_NEW_URB_FENCE               (1 << BRW_STATE_URB_FENCE)
@@ -172,6 +173,7 @@ enum brw_state_id {
 #define BRW_NEW_VS_CONSTBUF            (1 << BRW_STATE_VS_CONSTBUF)
 #define BRW_NEW_PROGRAM_CACHE          (1 << BRW_STATE_PROGRAM_CACHE)
 #define BRW_NEW_STATE_BASE_ADDRESS     (1 << BRW_STATE_STATE_BASE_ADDRESS)
+#define BRW_NEW_HIZ                    (1 << BRW_STATE_HIZ)
 
 struct brw_state_flags {
    /** State update flags signalled by mesa internals */
index 1571fb74d6c07f66e135ac2d054adcb2de2fdfd6..d2ae0877e8dad8b3a3b2d8ac67cc02c6a35164c0 100644 (file)
@@ -117,10 +117,17 @@ static void brw_set_prim(struct brw_context *brw,
 static void gen6_set_prim(struct brw_context *brw,
                           const struct _mesa_prim *prim)
 {
-   uint32_t hw_prim = prim_to_hw_prim[prim->mode];
+   uint32_t hw_prim;
 
    DBG("PRIM: %s\n", _mesa_lookup_enum_by_nr(prim->mode));
 
+   if (brw->hiz.op) {
+      assert(prim->mode == GL_TRIANGLES);
+      hw_prim = _3DPRIM_RECTLIST;
+   } else {
+      hw_prim = prim_to_hw_prim[prim->mode];
+   }
+
    if (hw_prim != brw->primitive) {
       brw->primitive = hw_prim;
       brw->state.dirty.brw |= BRW_NEW_PRIMITIVE;
index 730a0e8a12b32e3e7962bec99eca3cc52a6963da..bd32815d08ccb16713e2ed025be16a225a6d56ae 100644 (file)
@@ -368,6 +368,7 @@ static struct dirty_bit_map brw_bits[] = {
    DEFINE_BIT(BRW_NEW_GS_BINDING_TABLE),
    DEFINE_BIT(BRW_NEW_PS_BINDING_TABLE),
    DEFINE_BIT(BRW_NEW_STATE_BASE_ADDRESS),
+   DEFINE_BIT(BRW_NEW_HIZ),
    {0, 0, 0}
 };
 
index b3bb8aee3ec3bb9bd0c46df3c49f4f9b2ebf604d..d2a5f755726ad88783cb9685ae6caf6e5d5e248d 100644 (file)
@@ -67,6 +67,23 @@ upload_clip_state(struct brw_context *brw)
          GEN6_CLIP_NON_PERSPECTIVE_BARYCENTRIC_ENABLE;
    }
 
+   if (brw->hiz.op) {
+      /* HiZ operations emit a rectangle primitive, which requires clipping to
+       * be disabled. From page 10 of the Sandy Bridge PRM Volume 2 Part 1
+       * Section 1.3 3D Primitives Overview:
+       *    RECTLIST:
+       *    Either the CLIP unit should be DISABLED, or the CLIP unit's Clip
+       *    Mode should be set to a value other than CLIPMODE_NORMAL.
+       */
+      BEGIN_BATCH(4);
+      OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2));
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+      return;
+   }
+
    if (!ctx->Transform.DepthClamp)
       depth_clamp = GEN6_CLIP_Z_TEST;
 
@@ -107,7 +124,8 @@ const struct brw_tracked_state gen6_clip_state = {
    .dirty = {
       .mesa  = _NEW_TRANSFORM | _NEW_LIGHT,
       .brw   = (BRW_NEW_CONTEXT |
-                BRW_NEW_FRAGMENT_PROGRAM),
+                BRW_NEW_FRAGMENT_PROGRAM |
+                BRW_NEW_HIZ),
       .cache = 0
    },
    .emit = upload_clip_state,
index 72e86879b2bb95d9fb3405031d6ce749a1336793..eec1bf690150f0a653d72cdebebd0ae3f01a9a02 100644 (file)
@@ -77,8 +77,12 @@ gen6_upload_depth_stencil_state(struct brw_context *brw)
    }
 
    /* _NEW_DEPTH */
-   if (ctx->Depth.Test) {
-      ds->ds2.depth_test_enable = 1;
+   if (ctx->Depth.Test || brw->hiz.op) {
+      assert(brw->hiz.op != BRW_HIZ_OP_DEPTH_RESOLVE || ctx->Depth.Test);
+      assert(brw->hiz.op != BRW_HIZ_OP_HIZ_RESOLVE   || !ctx->Depth.Test);
+      assert(brw->hiz.op != BRW_HIZ_OP_DEPTH_CLEAR   || !ctx->Depth.Test);
+
+      ds->ds2.depth_test_enable = ctx->Depth.Test;
       ds->ds2.depth_test_func = intel_translate_compare_func(ctx->Depth.Func);
       ds->ds2.depth_write_enable = ctx->Depth.Mask;
    }
@@ -89,7 +93,8 @@ gen6_upload_depth_stencil_state(struct brw_context *brw)
 const struct brw_tracked_state gen6_depth_stencil_state = {
    .dirty = {
       .mesa = _NEW_DEPTH | _NEW_STENCIL,
-      .brw = BRW_NEW_BATCH,
+      .brw  = (BRW_NEW_BATCH |
+              BRW_NEW_HIZ),
       .cache = 0,
    },
    .emit = gen6_upload_depth_stencil_state,
index a71c64c3f72dcca917cf611f9021dde5431817c9..e28251182c9b09f17ea5a36ae75894ffb526d177 100644 (file)
@@ -301,7 +301,9 @@ gen6_resolve_slice(struct intel_context *intel,
 
    /* Execute the HiZ operation. */
    brw->hiz.op = op;
+   brw->state.dirty.brw |= BRW_NEW_HIZ;
    _mesa_DrawArrays(GL_TRIANGLES, 0, 3);
+   brw->state.dirty.brw |= BRW_NEW_HIZ;
    brw->hiz.op = BRW_HIZ_OP_NONE;
 
    /* Restore state.
index 67119d8d1822df790637b8b37a36b7fe900f3e4e..4c4ff308054687d203d7b4d0f458bda40ed20170 100644 (file)
@@ -147,8 +147,19 @@ upload_sf_state(struct brw_context *brw)
       num_outputs << GEN6_SF_NUM_OUTPUTS_SHIFT |
       urb_entry_read_length << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT |
       urb_entry_read_offset << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT;
-   dw2 = GEN6_SF_VIEWPORT_TRANSFORM_ENABLE |
-      GEN6_SF_STATISTICS_ENABLE;
+
+   dw2 = GEN6_SF_STATISTICS_ENABLE;
+
+   /* Enable viewport transform only if no HiZ operation is progress
+    *
+    * From page 11 of the SandyBridge PRM, Volume 2, Part 1, Section 1.3, "3D
+    * Primitives Overview":
+    *     RECTLIST: Viewport Mapping must be DISABLED (as is typical with the
+    *     use of screen- space coordinates).
+    */
+   if (!brw->hiz.op)
+      dw2 |= GEN6_SF_VIEWPORT_TRANSFORM_ENABLE;
+
    dw3 = 0;
    dw4 = 0;
    dw16 = 0;
@@ -334,7 +345,8 @@ const struct brw_tracked_state gen6_sf_state = {
                _NEW_POINT |
                _NEW_TRANSFORM),
       .brw   = (BRW_NEW_CONTEXT |
-               BRW_NEW_FRAGMENT_PROGRAM),
+               BRW_NEW_FRAGMENT_PROGRAM |
+               BRW_NEW_HIZ),
       .cache = CACHE_NEW_VS_PROG
    },
    .emit = upload_sf_state,
index 271a9aeca78c2d97692c5c05be32f96e08949d58..070220a5077e633fd487c32491216ca8daa44924 100644 (file)
@@ -147,6 +147,23 @@ upload_wm_state(struct brw_context *brw)
    dw4 |= (brw->wm.prog_data->first_curbe_grf_16 <<
           GEN6_WM_DISPATCH_START_GRF_SHIFT_2);
 
+   switch (brw->hiz.op) {
+   case BRW_HIZ_OP_NONE:
+      break;
+   case BRW_HIZ_OP_DEPTH_CLEAR:
+      dw4 |= GEN6_WM_DEPTH_CLEAR;
+      break;
+   case BRW_HIZ_OP_DEPTH_RESOLVE:
+      dw4 |= GEN6_WM_DEPTH_RESOLVE;
+      break;
+   case BRW_HIZ_OP_HIZ_RESOLVE:
+      dw4 |= GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE;
+      break;
+   default:
+      assert(0);
+      break;
+   }
+
    dw5 |= (brw->max_wm_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT;
 
    /* CACHE_NEW_WM_PROG */
@@ -215,7 +232,8 @@ const struct brw_tracked_state gen6_wm_state = {
                _NEW_POLYGON),
       .brw   = (BRW_NEW_FRAGMENT_PROGRAM |
                BRW_NEW_URB_FENCE |
-               BRW_NEW_BATCH),
+               BRW_NEW_BATCH |
+               BRW_NEW_HIZ),
       .cache = (CACHE_NEW_SAMPLER |
                CACHE_NEW_WM_PROG)
    },