i965/gen7: Enable HiZ
authorChad Versace <chad.versace@linux.intel.com>
Thu, 22 Dec 2011 01:09:58 +0000 (17:09 -0800)
committerChad Versace <chad.versace@linux.intel.com>
Tue, 10 Jan 2012 23:52:38 +0000 (15:52 -0800)
This patch modifies all batches needed for HiZ. The batch length for
3DSTATE_HIER_DEPTH_BUFFER is also corrected from 4 to 3.

Performance +6.7% on Citybench.
    num-frames: 400
    resolution: 1918x1031
    avg-hiz-off: 127.90 fps
    avg-hiz-on: 136.50 fps
    kernel: git://people.freedesktop.org/~anholt/linux.git branch=gen7-reset-sol sha=23360e4

Reviewed-by: Eric Anholt <eric@anholt.net>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Signed-off-by: Chad Versace <chad.versace@linux.intel.com>
src/mesa/drivers/dri/i965/gen7_clip_state.c
src/mesa/drivers/dri/i965/gen7_misc_state.c
src/mesa/drivers/dri/i965/gen7_sf_state.c
src/mesa/drivers/dri/i965/gen7_wm_state.c
src/mesa/drivers/dri/intel/intel_screen.c

index c32cd988297dbeef569f8e28f836e2da6daacb84..9be3ce9c84667a8ea2e974b91c58c327001982b8 100644 (file)
@@ -39,6 +39,23 @@ upload_clip_state(struct brw_context *brw)
    /* BRW_NEW_FRAGMENT_PROGRAM */
    const struct gl_fragment_program *fprog = brw->fragment_program;
 
+   if (brw->hiz.op) {
+      /* HiZ operations emit a rectangle primitive, which requires clipping to
+       * be disabled. From page 10 of the Sandy Bridge PRM Volume 2 Part 1
+       * Section 1.3 3D Primitives Overview:
+       *    RECTLIST:
+       *    Either the CLIP unit should be DISABLED, or the CLIP unit's Clip
+       *    Mode should be set to a value other than CLIPMODE_NORMAL.
+       */
+      BEGIN_BATCH(4);
+      OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2));
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+      return;
+   }
+
    /* _NEW_BUFFERS */
    bool render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0;
 
@@ -116,7 +133,8 @@ const struct brw_tracked_state gen7_clip_state = {
                 _NEW_LIGHT |
                 _NEW_TRANSFORM),
       .brw   = (BRW_NEW_CONTEXT |
-                BRW_NEW_FRAGMENT_PROGRAM),
+                BRW_NEW_FRAGMENT_PROGRAM |
+                BRW_NEW_HIZ),
       .cache = 0
    },
    .emit = upload_clip_state,
index 9c93046fdfb804fd5109e481299b2477d1581e2f..f28748597a11335a20e4f86059c8fc3b7b843c01 100644 (file)
@@ -38,11 +38,16 @@ static void emit_depthbuffer(struct brw_context *brw)
    /* _NEW_BUFFERS */
    struct intel_renderbuffer *drb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
    struct intel_renderbuffer *srb = intel_get_renderbuffer(fb, BUFFER_STENCIL);
-   struct intel_mipmap_tree *depth_mt = NULL, *stencil_mt = NULL;
+   struct intel_mipmap_tree *depth_mt = NULL,
+                           *stencil_mt = NULL,
+                           *hiz_mt = NULL;
 
    if (drb)
       depth_mt = drb->mt;
 
+   if (depth_mt)
+      hiz_mt = depth_mt->hiz_mt;
+
    if (srb) {
       stencil_mt = srb->mt;
       if (stencil_mt->stencil_mt)
@@ -97,7 +102,7 @@ static void emit_depthbuffer(struct brw_context *brw)
       OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2));
       OUT_BATCH(((region->pitch * region->cpp) - 1) |
                (brw_depthbuffer_format(brw) << 18) |
-               (0 << 22) /* no HiZ buffer */ |
+               ((hiz_mt ? 1 : 0) << 22) | /* hiz enable */
                ((stencil_mt != NULL && ctx->Stencil.WriteMask != 0) << 27) |
                ((ctx->Depth.Mask != 0) << 28) |
                (BRW_SURFACE_2D << 29));
@@ -112,12 +117,22 @@ static void emit_depthbuffer(struct brw_context *brw)
       ADVANCE_BATCH();
    }
 
-   BEGIN_BATCH(4);
-   OUT_BATCH(GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16 | (4 - 2));
-   OUT_BATCH(0);
-   OUT_BATCH(0);
-   OUT_BATCH(0);
-   ADVANCE_BATCH();
+   if (hiz_mt == NULL) {
+      BEGIN_BATCH(5);
+      OUT_BATCH(GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16 | (3 - 2));
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   } else {
+      BEGIN_BATCH(5);
+      OUT_BATCH(GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16 | (3 - 2));
+      OUT_BATCH(hiz_mt->region->pitch * hiz_mt->region->cpp - 1);
+      OUT_RELOC(hiz_mt->region->bo,
+                I915_GEM_DOMAIN_RENDER,
+                I915_GEM_DOMAIN_RENDER,
+                0);
+      ADVANCE_BATCH();
+   }
 
    if (stencil_mt == NULL) {
       BEGIN_BATCH(3);
index c4cacf095ed85b08abdc3b98829b7a681de28b25..7691cb218d69807bcd0918ae495eca50575b0d14 100644 (file)
@@ -137,7 +137,8 @@ const struct brw_tracked_state gen7_sbe_state = {
                _NEW_PROGRAM |
                _NEW_TRANSFORM),
       .brw   = (BRW_NEW_CONTEXT |
-               BRW_NEW_FRAGMENT_PROGRAM),
+               BRW_NEW_FRAGMENT_PROGRAM |
+               BRW_NEW_HIZ),
       .cache = CACHE_NEW_VS_PROG
    },
    .emit = upload_sbe_state,
@@ -153,7 +154,17 @@ upload_sf_state(struct brw_context *brw)
    /* _NEW_BUFFERS */
    bool render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0;
 
-   dw1 = GEN6_SF_STATISTICS_ENABLE | GEN6_SF_VIEWPORT_TRANSFORM_ENABLE;
+   dw1 = GEN6_SF_STATISTICS_ENABLE;
+
+   /* Enable viewport transform only if no HiZ operation is progress
+    *
+    * From page 11 of the SandyBridge PRM, Volume 2, Part 1, Section 1.3, "3D
+    * Primitives Overview":
+    *     RECTLIST: Viewport Mapping must be DISABLED (as is typical with the
+    *     use of screen- space coordinates).
+    */
+   if (!brw->hiz.op)
+      dw1 |= GEN6_SF_VIEWPORT_TRANSFORM_ENABLE;
 
    /* _NEW_BUFFERS */
    dw1 |= (brw_depthbuffer_format(brw) << GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT);
@@ -287,7 +298,8 @@ const struct brw_tracked_state gen7_sf_state = {
                _NEW_SCISSOR |
                _NEW_BUFFERS |
                _NEW_POINT),
-      .brw   = (BRW_NEW_CONTEXT),
+      .brw   = (BRW_NEW_CONTEXT |
+               BRW_NEW_HIZ),
       .cache = CACHE_NEW_VS_PROG
    },
    .emit = upload_sf_state,
index 504599149cbb0f12e2e32955179a363b4b0abc59..1468fcc1278e7f86523fdfc878a7b7535b40dd57 100644 (file)
@@ -49,6 +49,23 @@ upload_wm_state(struct brw_context *brw)
    dw1 |= GEN7_WM_LINE_AA_WIDTH_1_0;
    dw1 |= GEN7_WM_LINE_END_CAP_AA_WIDTH_0_5;
 
+   switch (brw->hiz.op) {
+   case BRW_HIZ_OP_NONE:
+      break;
+   case BRW_HIZ_OP_DEPTH_CLEAR:
+      dw1 |= GEN7_WM_DEPTH_CLEAR;
+      break;
+   case BRW_HIZ_OP_DEPTH_RESOLVE:
+      dw1 |= GEN7_WM_DEPTH_RESOLVE;
+      break;
+   case BRW_HIZ_OP_HIZ_RESOLVE:
+      dw1 |= GEN7_WM_HIERARCHICAL_DEPTH_RESOLVE;
+      break;
+   default:
+      assert(0);
+      break;
+   }
+
    /* _NEW_LINE */
    if (ctx->Line.StippleFlag)
       dw1 |= GEN7_WM_LINE_STIPPLE_ENABLE;
@@ -89,6 +106,7 @@ const struct brw_tracked_state gen7_wm_state = {
       .mesa  = (_NEW_LINE | _NEW_LIGHT | _NEW_POLYGON |
                _NEW_COLOR | _NEW_BUFFERS),
       .brw   = (BRW_NEW_FRAGMENT_PROGRAM |
+                BRW_NEW_HIZ |
                BRW_NEW_BATCH),
       .cache = 0,
    },
index 2478cb3b775f0bc5056b074565ba3fc728194f9c..ce96ddda85cbe48116a32ec997c043a49885d7aa 100644 (file)
@@ -717,7 +717,7 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp)
 
    intelScreen->hw_has_separate_stencil = intelScreen->gen >= 6;
    intelScreen->hw_must_use_separate_stencil = intelScreen->gen >= 7;
-   intelScreen->hw_has_hiz = intelScreen->gen == 6; /* Not yet for gen7. */
+   intelScreen->hw_has_hiz = intelScreen->gen >= 6;
    intelScreen->dri2_has_hiz = INTEL_DRI2_HAS_HIZ_UNKNOWN;
 
    intel_override_hiz(intelScreen);