i965: Split the gen6 GS binding table to a separate table.

[mesa.git] / src / mesa / drivers / dri / i965 / brw_misc_state.c
diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c

index 7b83ff5253c3045e74d54e99b39051e12495199d..c86755de6593cc2044ab285804f7246617f490e8 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -33,6 +33,7 @@
  
  #include "intel_batchbuffer.h"
  #include "intel_fbo.h"
+#include "intel_mipmap_tree.h"
  #include "intel_regions.h"
  
  #include "brw_context.h"
@@ -80,7 +81,7 @@ static void upload_binding_table_pointers(struct brw_context *brw)
     OUT_BATCH(0); /* gs */
     OUT_BATCH(0); /* clip */
     OUT_BATCH(0); /* sf */
-   OUT_BATCH(brw->wm.bind_bo_offset);
+   OUT_BATCH(brw->bind.bo_offset);
     ADVANCE_BATCH();
  }
  
@@ -115,8 +116,8 @@ static void upload_gen6_binding_table_pointers(struct brw_context *brw)
              GEN6_BINDING_TABLE_MODIFY_PS |
              (4 - 2));
     OUT_BATCH(brw->vs.bind_bo_offset); /* vs */
-   OUT_BATCH(0); /* gs */
-   OUT_BATCH(brw->wm.bind_bo_offset); /* wm/ps */
+   OUT_BATCH(brw->gs.bind_bo_offset); /* gs */
+   OUT_BATCH(brw->bind.bo_offset); /* wm/ps */
     ADVANCE_BATCH();
  }
  
@@ -196,20 +197,58 @@ const struct brw_tracked_state brw_psp_urb_cbs = {
     .emit = upload_psp_urb_cbs,
  };
  
-static void prepare_depthbuffer(struct brw_context *brw)
+uint32_t
+brw_depthbuffer_format(struct brw_context *brw)
  {
     struct intel_context *intel = &brw->intel;
     struct gl_context *ctx = &intel->ctx;
     struct gl_framebuffer *fb = ctx->DrawBuffer;
     struct intel_renderbuffer *drb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
-   struct intel_renderbuffer *srb = intel_get_renderbuffer(fb, BUFFER_STENCIL);
-
-   if (drb)
-      brw_add_validated_bo(brw, drb->region->buffer);
-   if (drb && drb->hiz_region)
-      brw_add_validated_bo(brw, drb->hiz_region->buffer);
-   if (srb)
-      brw_add_validated_bo(brw, srb->region->buffer);
+   struct intel_renderbuffer *srb;
+
+   if (!drb &&
+       (srb = intel_get_renderbuffer(fb, BUFFER_STENCIL)) &&
+       !srb->mt->stencil_mt &&
+       (intel_rb_format(srb) == MESA_FORMAT_S8_Z24 ||
+       intel_rb_format(srb) == MESA_FORMAT_Z32_FLOAT_X24S8)) {
+      drb = srb;
+   }
+
+   if (!drb)
+      return BRW_DEPTHFORMAT_D32_FLOAT;
+
+   switch (drb->mt->format) {
+   case MESA_FORMAT_Z16:
+      return BRW_DEPTHFORMAT_D16_UNORM;
+   case MESA_FORMAT_Z32_FLOAT:
+      return BRW_DEPTHFORMAT_D32_FLOAT;
+   case MESA_FORMAT_X8_Z24:
+      if (intel->gen >= 6) {
+        return BRW_DEPTHFORMAT_D24_UNORM_X8_UINT;
+      } else {
+        /* Use D24_UNORM_S8, not D24_UNORM_X8.
+         *
+         * D24_UNORM_X8 was not introduced until Gen5. (See the Ironlake PRM,
+         * Volume 2, Part 1, Section 8.4.6 "Depth/Stencil Buffer State", Bits
+         * 3DSTATE_DEPTH_BUFFER.Surface_Format).
+         *
+         * However, on Gen5, D24_UNORM_X8 may be used only if separate
+         * stencil is enabled, and we never enable it. From the Ironlake PRM,
+         * same section as above, Bit 3DSTATE_DEPTH_BUFFER.Separate_Stencil_Buffer_Enable:
+         *     If this field is disabled, the Surface Format of the depth
+         *     buffer cannot be D24_UNORM_X8_UINT.
+         */
+        return BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
+      }
+   case MESA_FORMAT_S8_Z24:
+      return BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
+   case MESA_FORMAT_Z32_FLOAT_X24S8:
+      return BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT;
+   default:
+      _mesa_problem(ctx, "Unexpected depth format %s\n",
+                   _mesa_get_format_name(intel_rb_format(drb)));
+      return BRW_DEPTHFORMAT_D16_UNORM;
+   }
  }
  
  static void emit_depthbuffer(struct brw_context *brw)
@@ -220,8 +259,16 @@ static void emit_depthbuffer(struct brw_context *brw)
     /* _NEW_BUFFERS */
     struct intel_renderbuffer *depth_irb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
     struct intel_renderbuffer *stencil_irb = intel_get_renderbuffer(fb, BUFFER_STENCIL);
-   struct intel_region *hiz_region = depth_irb ? depth_irb->hiz_region : NULL;
+   struct intel_mipmap_tree *stencil_mt = NULL;
+   struct intel_region *hiz_region = NULL;
     unsigned int len;
+   bool separate_stencil = false;
+
+   if (depth_irb &&
+       depth_irb->mt &&
+       depth_irb->mt->hiz_mt) {
+      hiz_region = depth_irb->mt->hiz_mt->region;
+   }
  
     /* 3DSTATE_DEPTH_BUFFER, 3DSTATE_STENCIL_BUFFER are both
      * non-pipelined state that will need the PIPE_CONTROL workaround.
@@ -231,17 +278,21 @@ static void emit_depthbuffer(struct brw_context *brw)
        intel_emit_depth_stall_flushes(intel);
     }
  
-   /*
-    * If either depth or stencil buffer has packed depth/stencil format,
-    * then don't use separate stencil. Emit only a depth buffer.
+   /* Find the real separate stencil mt if present. */
+   if (stencil_irb) {
+      stencil_mt = stencil_irb->mt;
+      if (stencil_mt->stencil_mt)
+        stencil_mt = stencil_mt->stencil_mt;
+
+      if (stencil_mt->format == MESA_FORMAT_S8)
+        separate_stencil = true;
+   }
+
+   /* If there's a packed depth/stencil bound to stencil only, we need to
+    * emit the packed depth/stencil buffer packet.
      */
-   if (depth_irb && depth_irb->Base.Format == MESA_FORMAT_S8_Z24) {
-      stencil_irb = NULL;
-   } else if (!depth_irb && stencil_irb
-             && stencil_irb->Base.Format == MESA_FORMAT_S8_Z24) {
+   if (!depth_irb && stencil_irb && !separate_stencil)
        depth_irb = stencil_irb;
-      stencil_irb = NULL;
-   }
  
     if (intel->gen >= 6)
        len = 7;
@@ -250,7 +301,7 @@ static void emit_depthbuffer(struct brw_context *brw)
     else
        len = 5;
  
-   if (!depth_irb && !stencil_irb) {
+   if (!depth_irb && !separate_stencil) {
        BEGIN_BATCH(len);
        OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2));
        OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) |
@@ -267,7 +318,7 @@ static void emit_depthbuffer(struct brw_context *brw)
  
        ADVANCE_BATCH();
  
-   } else if (!depth_irb && stencil_irb) {
+   } else if (!depth_irb && separate_stencil) {
        /*
         * There exists a separate stencil buffer but no depth buffer.
         *
@@ -287,9 +338,12 @@ static void emit_depthbuffer(struct brw_context *brw)
         *
         *     [DevGT]: This field must be set to the same value (enabled or
         *     disabled) as Hierarchical Depth Buffer Enable
+       *
+       * The tiled bit must be set. From the Sandybridge PRM, Volume 2, Part 1,
+       * Section 7.5.5.1.1 3DSTATE_DEPTH_BUFFER, Bit 1.27 Tiled Surface:
+       *     [DevGT+]: This field must be set to TRUE.
         */
        assert(intel->has_separate_stencil);
-      assert(stencil_irb->Base.Format == MESA_FORMAT_S8);
  
        BEGIN_BATCH(len);
        OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2));
@@ -297,10 +351,11 @@ static void emit_depthbuffer(struct brw_context *brw)
                 (1 << 21) | /* separate stencil enable */
                 (1 << 22) | /* hiz enable */
                 (BRW_TILEWALK_YMAJOR << 26) |
+               (1 << 27) | /* tiled surface */
                 (BRW_SURFACE_2D << 29));
        OUT_BATCH(0);
-      OUT_BATCH(((stencil_irb->region->width - 1) << 6) |
-                (2 * stencil_irb->region->height - 1) << 19);
+      OUT_BATCH(((stencil_irb->Base.Base.Width - 1) << 6) |
+                (stencil_irb->Base.Base.Height - 1) << 19);
        OUT_BATCH(0);
        OUT_BATCH(0);
  
@@ -310,29 +365,11 @@ static void emit_depthbuffer(struct brw_context *brw)
        ADVANCE_BATCH();
  
     } else {
-      struct intel_region *region = depth_irb->region;
-      unsigned int format;
+      struct intel_region *region = depth_irb->mt->region;
        uint32_t tile_x, tile_y, offset;
  
        /* If using separate stencil, hiz must be enabled. */
-      assert(!stencil_irb || hiz_region);
-
-      switch (region->cpp) {
-      case 2:
-        format = BRW_DEPTHFORMAT_D16_UNORM;
-        break;
-      case 4:
-        if (intel->depth_buffer_is_float)
-           format = BRW_DEPTHFORMAT_D32_FLOAT;
-        else if (hiz_region)
-           format = BRW_DEPTHFORMAT_D24_UNORM_X8_UINT;
-        else
-           format = BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
-        break;
-      default:
-        assert(0);
-        return;
-      }
+      assert(!separate_stencil || hiz_region);
  
        offset = intel_renderbuffer_tile_offsets(depth_irb, &tile_x, &tile_y);
  
@@ -342,18 +379,18 @@ static void emit_depthbuffer(struct brw_context *brw)
        BEGIN_BATCH(len);
        OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2));
        OUT_BATCH(((region->pitch * region->cpp) - 1) |
-               (format << 18) |
+               (brw_depthbuffer_format(brw) << 18) |
                 ((hiz_region ? 1 : 0) << 21) | /* separate stencil enable */
                 ((hiz_region ? 1 : 0) << 22) | /* hiz enable */
                 (BRW_TILEWALK_YMAJOR << 26) |
                 ((region->tiling != I915_TILING_NONE) << 27) |
                 (BRW_SURFACE_2D << 29));
-      OUT_RELOC(region->buffer,
+      OUT_RELOC(region->bo,
                 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
                 offset);
        OUT_BATCH((BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1) |
-               ((region->width - 1) << 6) |
-               ((region->height - 1) << 19));
+               (((depth_irb->Base.Base.Width + tile_x) - 1) << 6) |
+               (((depth_irb->Base.Base.Height + tile_y) - 1) << 19));
        OUT_BATCH(0);
  
        if (intel->is_g4x || intel->gen >= 5)
@@ -367,7 +404,7 @@ static void emit_depthbuffer(struct brw_context *brw)
        ADVANCE_BATCH();
     }
  
-   if (hiz_region || stencil_irb) {
+   if (hiz_region || separate_stencil) {
        /*
         * In the 3DSTATE_DEPTH_BUFFER batch emitted above, the 'separate
         * stencil enable' and 'hiz enable' bits were set. Therefore we must
@@ -381,7 +418,7 @@ static void emit_depthbuffer(struct brw_context *brw)
          BEGIN_BATCH(3);
          OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
          OUT_BATCH(hiz_region->pitch * hiz_region->cpp - 1);
-        OUT_RELOC(hiz_region->buffer,
+        OUT_RELOC(hiz_region->bo,
                    I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
                    0);
          ADVANCE_BATCH();
@@ -394,11 +431,12 @@ static void emit_depthbuffer(struct brw_context *brw)
        }
  
        /* Emit stencil buffer. */
-      if (stencil_irb) {
+      if (separate_stencil) {
+        struct intel_region *region = stencil_mt->region;
          BEGIN_BATCH(3);
          OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
-        OUT_BATCH(stencil_irb->region->pitch * stencil_irb->region->cpp - 1);
-        OUT_RELOC(stencil_irb->region->buffer,
+        OUT_BATCH(region->pitch * region->cpp - 1);
+        OUT_RELOC(region->bo,
                    I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
                    0);
          ADVANCE_BATCH();
@@ -436,7 +474,6 @@ const struct brw_tracked_state brw_depthbuffer = {
        .brw = BRW_NEW_BATCH,
        .cache = 0,
     },
-   .prepare = prepare_depthbuffer,
     .emit = emit_depthbuffer,
  };
  
@@ -604,10 +641,10 @@ const struct brw_tracked_state brw_line_stipple = {
  
  
  /***********************************************************************
- * Misc invarient state packets
+ * Misc invariant state packets
   */
  
-static void upload_invarient_state( struct brw_context *brw )
+static void upload_invariant_state( struct brw_context *brw )
  {
     struct intel_context *intel = &brw->intel;
  
@@ -669,13 +706,13 @@ static void upload_invarient_state( struct brw_context *brw )
     ADVANCE_BATCH();
  }
  
-const struct brw_tracked_state brw_invarient_state = {
+const struct brw_tracked_state brw_invariant_state = {
     .dirty = {
        .mesa = 0,
        .brw = BRW_NEW_CONTEXT,
        .cache = 0
     },
-   .emit = upload_invarient_state
+   .emit = upload_invariant_state
  };
  
  /**
@@ -732,7 +769,13 @@ static void upload_state_base_address( struct brw_context *brw )
                  1); /* Instruction base address: shader kernels (incl. SIP) */
  
         OUT_BATCH(1); /* General state upper bound */
-       OUT_BATCH(1); /* Dynamic state upper bound */
+       /* Dynamic state upper bound.  Although the documentation says that
+       * programming it to zero will cause it to be ignored, that is a lie.
+       * If this isn't programmed to a real bound, the sampler border color
+       * pointer is rejected, causing border color to mysteriously fail.
+       */
+       OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
+                intel->batch.bo->size | 1);
         OUT_BATCH(1); /* Indirect object upper bound */
         OUT_BATCH(1); /* Instruction access upper bound */
         ADVANCE_BATCH();
@@ -745,7 +788,7 @@ static void upload_state_base_address( struct brw_context *brw )
         OUT_BATCH(1); /* Indirect object base address */
         OUT_RELOC(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
                  1); /* Instruction base address */
-       OUT_BATCH(1); /* General state upper bound */
+       OUT_BATCH(0xfffff001); /* General state upper bound */
         OUT_BATCH(1); /* Indirect object upper bound */
         OUT_BATCH(1); /* Instruction access upper bound */
         ADVANCE_BATCH();