i965/gs: Add a case to brwNewProgram() for geometry shaders.

[mesa.git] / src / mesa / drivers / dri / i965 / gen7_misc_state.c
diff --git a/src/mesa/drivers/dri/i965/gen7_misc_state.c b/src/mesa/drivers/dri/i965/gen7_misc_state.c

index 9709b8ef8b4ab98deb5d567b60878f349949342c..eb942cfcafa0c2c838890371bcc3ad28b8b241a3 100644 (file)
--- a/src/mesa/drivers/dri/i965/gen7_misc_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_misc_state.c
@@ -21,6 +21,7 @@
   * IN THE SOFTWARE.
   */
  
+#include "main/mtypes.h"
  #include "intel_batchbuffer.h"
  #include "intel_mipmap_tree.h"
  #include "intel_regions.h"
@@ -29,200 +30,128 @@
  #include "brw_state.h"
  #include "brw_defines.h"
  
-static void emit_depthbuffer(struct brw_context *brw)
+void
+gen7_emit_depth_stencil_hiz(struct brw_context *brw,
+                            struct intel_mipmap_tree *depth_mt,
+                            uint32_t depth_offset, uint32_t depthbuffer_format,
+                            uint32_t depth_surface_type,
+                            struct intel_mipmap_tree *stencil_mt,
+                            bool hiz, bool separate_stencil,
+                            uint32_t width, uint32_t height,
+                            uint32_t tile_x, uint32_t tile_y)
  {
-   struct intel_context *intel = &brw->intel;
-   struct gl_context *ctx = &intel->ctx;
+   struct gl_context *ctx = &brw->ctx;
+   const uint8_t mocs = GEN7_MOCS_L3;
     struct gl_framebuffer *fb = ctx->DrawBuffer;
-
-   /* _NEW_BUFFERS */
-   struct intel_renderbuffer *drb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
-   struct intel_renderbuffer *srb = intel_get_renderbuffer(fb, BUFFER_STENCIL);
-   struct intel_mipmap_tree *depth_mt = NULL,
-                           *stencil_mt = NULL,
-                           *hiz_mt = NULL;
-
-   /* Amount by which drawing should be offset in order to draw to the
-    * appropriate miplevel/zoffset/cubeface.  We will extract these values
-    * from depth_irb or stencil_irb once we determine which is present.
-    */
-   uint32_t draw_x = 0, draw_y = 0;
-
-   /* Masks used to determine how much of the draw_x and draw_y offsets should
-    * be performed using the fine adjustment of "depth coordinate offset X/Y"
-    * (dw5 of 3DSTATE_DEPTH_BUFFER).  Any remaining coarse adjustment will be
-    * performed by changing the base addresses of the buffers.
-    *
-    * Since the HiZ, depth, and stencil buffers all use the same "depth
-    * coordinate offset X/Y" values, we need to make sure that the coarse
-    * adjustment will be possible to apply to all three buffers.  Since coarse
-    * adjustment can only be applied in multiples of the tile size, we will OR
-    * together the tile masks of all the buffers to determine which offsets to
-    * perform as fine adjustments.
-    */
-   uint32_t tile_mask_x = 0, tile_mask_y = 0;
-
-   if (drb)
-      depth_mt = drb->mt;
-
-   if (depth_mt) {
-      hiz_mt = depth_mt->hiz_mt;
-
-      intel_region_get_tile_masks(depth_mt->region,
-                                  &tile_mask_x, &tile_mask_y, false);
-
-      if (hiz_mt) {
-         uint32_t hiz_tile_mask_x, hiz_tile_mask_y;
-         intel_region_get_tile_masks(hiz_mt->region,
-                                     &hiz_tile_mask_x, &hiz_tile_mask_y,
-                                     false);
-
-         /* Each HiZ row represents 2 rows of pixels */
-         hiz_tile_mask_y = hiz_tile_mask_y << 1 | 1;
-
-         tile_mask_x |= hiz_tile_mask_x;
-         tile_mask_y |= hiz_tile_mask_y;
-      }
+   uint32_t surftype;
+   unsigned int depth = 1;
+   unsigned int min_array_element;
+   GLenum gl_target = GL_TEXTURE_2D;
+   unsigned int lod;
+   const struct intel_mipmap_tree *mt = depth_mt ? depth_mt : stencil_mt;
+   const struct intel_renderbuffer *irb = NULL;
+   const struct gl_renderbuffer *rb = NULL;
+
+   intel_emit_depth_stall_flushes(brw);
+
+   irb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
+   if (!irb)
+      irb = intel_get_renderbuffer(fb, BUFFER_STENCIL);
+   rb = (struct gl_renderbuffer*) irb;
+
+   if (rb) {
+      depth = MAX2(rb->Depth, 1);
+      if (rb->TexImage)
+         gl_target = rb->TexImage->TexObject->Target;
     }
  
-   if (srb) {
-      stencil_mt = srb->mt;
-      if (stencil_mt->stencil_mt)
-        stencil_mt = stencil_mt->stencil_mt;
-
-      assert(stencil_mt->format == MESA_FORMAT_S8);
-
-      /* Stencil buffer uses 64x64 tiles. */
-      tile_mask_x |= 63;
-      tile_mask_y |= 63;
+   switch (gl_target) {
+   case GL_TEXTURE_CUBE_MAP_ARRAY:
+   case GL_TEXTURE_CUBE_MAP:
+      /* The PRM claims that we should use BRW_SURFACE_CUBE for this
+       * situation, but experiments show that gl_Layer doesn't work when we do
+       * this.  So we use BRW_SURFACE_2D, since for rendering purposes this is
+       * equivalent.
+       */
+      surftype = BRW_SURFACE_2D;
+      depth *= 6;
+      break;
+   default:
+      surftype = translate_tex_target(gl_target);
+      break;
     }
  
-   /* Gen7 doesn't support packed depth/stencil */
-   assert(stencil_mt == NULL || depth_mt != stencil_mt);
-   assert(!depth_mt || !_mesa_is_format_packed_depth_stencil(depth_mt->format));
-
-   intel_emit_depth_stall_flushes(intel);
-
-   if (depth_mt == NULL) {
-      uint32_t dw1 = BRW_DEPTHFORMAT_D32_FLOAT << 18;
-      uint32_t dw3 = 0;
-      uint32_t tile_x = 0, tile_y = 0;
+   if (fb->Layered || !irb) {
+      min_array_element = 0;
+   } else if (irb->mt->num_samples > 1) {
+      /* Convert physical layer to logical layer. */
+      min_array_element = irb->mt_layer / irb->mt->num_samples;
+   } else {
+      min_array_element = irb->mt_layer;
+   }
  
-      if (stencil_mt == NULL) {
-        dw1 |= (BRW_SURFACE_NULL << 29);
-      } else {
-        /* _NEW_STENCIL: enable stencil buffer writes */
-        dw1 |= ((ctx->Stencil.WriteMask != 0) << 27);
+   lod = irb ? irb->mt_level - irb->mt->first_level : 0;
  
-         draw_x = srb->draw_x;
-         draw_y = srb->draw_y;
-         tile_x = draw_x & tile_mask_x;
-         tile_y = draw_y & tile_mask_y;
+   if (mt) {
+      width = mt->physical_width0;
+      height = mt->physical_height0;
+   }
  
-         /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327
-          * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth
-          * Coordinate Offset X/Y":
-          *
-          *   "The 3 LSBs of both offsets must be zero to ensure correct
-          *   alignment"
-          *
-          * We have no guarantee that tile_x and tile_y are correctly aligned,
-          * since they are determined by the mipmap layout, which is only
-          * aligned to multiples of 4.
-          *
-          * So, to avoid hanging the GPU, just smash the low order 3 bits of
-          * tile_x and tile_y to 0.  This is a temporary workaround until we
-          * come up with a better solution.
-          */
-         tile_x &= ~7;
-         tile_y &= ~7;
+   /* _NEW_DEPTH, _NEW_STENCIL, _NEW_BUFFERS */
+   BEGIN_BATCH(7);
+   /* 3DSTATE_DEPTH_BUFFER dw0 */
+   OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2));
  
-        /* 3DSTATE_STENCIL_BUFFER inherits surface type and dimensions. */
-        dw1 |= (BRW_SURFACE_2D << 29);
-        dw3 = ((srb->Base.Base.Width + tile_x - 1) << 4) |
-              ((srb->Base.Base.Height + tile_y - 1) << 18);
-      }
+   /* 3DSTATE_DEPTH_BUFFER dw1 */
+   OUT_BATCH((depth_mt ? depth_mt->region->pitch - 1 : 0) |
+             (depthbuffer_format << 18) |
+             ((hiz ? 1 : 0) << 22) |
+             ((stencil_mt != NULL && ctx->Stencil._WriteEnabled) << 27) |
+             ((ctx->Depth.Mask != 0) << 28) |
+             (surftype << 29));
  
-      BEGIN_BATCH(7);
-      OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2));
-      OUT_BATCH(dw1);
-      OUT_BATCH(0);
-      OUT_BATCH(dw3);
-      OUT_BATCH(0);
-      OUT_BATCH(tile_x | (tile_y << 16));
-      OUT_BATCH(0);
-      ADVANCE_BATCH();
+   /* 3DSTATE_DEPTH_BUFFER dw2 */
+   if (depth_mt) {
+      OUT_RELOC(depth_mt->region->bo,
+               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+               0);
     } else {
-      struct intel_region *region = depth_mt->region;
-      uint32_t tile_x, tile_y, offset;
+      OUT_BATCH(0);
+   }
  
-      draw_x = drb->draw_x;
-      draw_y = drb->draw_y;
-      tile_x = draw_x & tile_mask_x;
-      tile_y = draw_y & tile_mask_y;
+   /* 3DSTATE_DEPTH_BUFFER dw3 */
+   OUT_BATCH(((width - 1) << 4) |
+             ((height - 1) << 18) |
+             lod);
  
-      /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327
-       * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth
-       * Coordinate Offset X/Y":
-       *
-       *   "The 3 LSBs of both offsets must be zero to ensure correct
-       *   alignment"
-       *
-       * We have no guarantee that tile_x and tile_y are correctly aligned,
-       * since they are determined by the mipmap layout, which is only aligned
-       * to multiples of 4.
-       *
-       * So, to avoid hanging the GPU, just smash the low order 3 bits of
-       * tile_x and tile_y to 0.  This is a temporary workaround until we come
-       * up with a better solution.
-       */
-      tile_x &= ~7;
-      tile_y &= ~7;
+   /* 3DSTATE_DEPTH_BUFFER dw4 */
+   OUT_BATCH(((depth - 1) << 21) |
+             (min_array_element << 10) |
+             mocs);
  
-      offset = intel_region_get_aligned_offset(region,
-                                               draw_x & ~tile_mask_x,
-                                               draw_y & ~tile_mask_y);
+   /* 3DSTATE_DEPTH_BUFFER dw5 */
+   OUT_BATCH(0);
  
-      assert(region->tiling == I915_TILING_Y);
-
-      /* _NEW_DEPTH, _NEW_STENCIL */
-      BEGIN_BATCH(7);
-      OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2));
-      OUT_BATCH(((region->pitch * region->cpp) - 1) |
-               (brw_depthbuffer_format(brw) << 18) |
-               ((hiz_mt ? 1 : 0) << 22) | /* hiz enable */
-               ((stencil_mt != NULL && ctx->Stencil.WriteMask != 0) << 27) |
-               ((ctx->Depth.Mask != 0) << 28) |
-               (BRW_SURFACE_2D << 29));
-      OUT_RELOC(region->bo,
-               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
-               offset);
-      OUT_BATCH((((drb->Base.Base.Width + tile_x) - 1) << 4) |
-                (((drb->Base.Base.Height + tile_y) - 1) << 18));
-      OUT_BATCH(0);
-      OUT_BATCH(tile_x | (tile_y << 16));
-      OUT_BATCH(0);
-      ADVANCE_BATCH();
-   }
+   /* 3DSTATE_DEPTH_BUFFER dw6 */
+   OUT_BATCH((depth - 1) << 21);
+   ADVANCE_BATCH();
  
-   if (hiz_mt == NULL) {
+   if (!hiz) {
        BEGIN_BATCH(3);
        OUT_BATCH(GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16 | (3 - 2));
        OUT_BATCH(0);
        OUT_BATCH(0);
        ADVANCE_BATCH();
     } else {
-      uint32_t hiz_offset =
-         intel_region_get_aligned_offset(hiz_mt->region,
-                                         draw_x & ~tile_mask_x,
-                                         (draw_y & ~tile_mask_y) / 2);
+      struct intel_mipmap_tree *hiz_mt = depth_mt->hiz_mt;
        BEGIN_BATCH(3);
        OUT_BATCH(GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16 | (3 - 2));
-      OUT_BATCH(hiz_mt->region->pitch * hiz_mt->region->cpp - 1);
+      OUT_BATCH((mocs << 25) |
+                (hiz_mt->region->pitch - 1));
        OUT_RELOC(hiz_mt->region->bo,
                  I915_GEM_DOMAIN_RENDER,
                  I915_GEM_DOMAIN_RENDER,
-                hiz_offset);
+                0);
        ADVANCE_BATCH();
     }
  
@@ -233,37 +162,26 @@ static void emit_depthbuffer(struct brw_context *brw)
        OUT_BATCH(0);
        ADVANCE_BATCH();
     } else {
-      const int enabled = intel->is_haswell ? HSW_STENCIL_ENABLED : 0;
-
-      /* Note: We can't compute the stencil offset using
-       * intel_region_get_aligned_offset(), because the stencil region claims
-       * that the region is untiled; in fact it's W tiled.
-       */
-      uint32_t stencil_offset =
-         (draw_y & ~tile_mask_y) * stencil_mt->region->pitch +
-         (draw_x & ~tile_mask_x) * 64;
+      const int enabled = brw->is_haswell ? HSW_STENCIL_ENABLED : 0;
  
        BEGIN_BATCH(3);
        OUT_BATCH(GEN7_3DSTATE_STENCIL_BUFFER << 16 | (3 - 2));
-      /* The stencil buffer has quirky pitch requirements.  From the Graphics
-       * BSpec: vol2a.11 3D Pipeline Windower > Early Depth/Stencil Processing
-       * > Depth/Stencil Buffer State > 3DSTATE_STENCIL_BUFFER [DevIVB+],
-       * field "Surface Pitch":
+      /* The stencil buffer has quirky pitch requirements.  From the
+       * Sandybridge PRM, Volume 2 Part 1, page 329 (3DSTATE_STENCIL_BUFFER
+       * dword 1 bits 16:0 - Surface Pitch):
         *
         *    The pitch must be set to 2x the value computed based on width, as
         *    the stencil buffer is stored with two rows interleaved.
         *
-       * (Note that it is not 100% clear whether this intended to apply to
-       * Gen7; the BSpec flags this comment as "DevILK,DevSNB" (which would
-       * imply that it doesn't), however the comment appears on a "DevIVB+"
-       * page (which would imply that it does).  Experiments with the hardware
-       * indicate that it does.
+       * While the Ivybridge PRM lacks this comment, the BSpec contains the
+       * same text, and experiments indicate that this is necessary.
         */
        OUT_BATCH(enabled |
-               (2 * stencil_mt->region->pitch * stencil_mt->region->cpp - 1));
+                mocs << 25 |
+               (2 * stencil_mt->region->pitch - 1));
        OUT_RELOC(stencil_mt->region->bo,
                 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
-               stencil_offset);
+               0);
        ADVANCE_BATCH();
     }
  
@@ -283,5 +201,5 @@ const struct brw_tracked_state gen7_depthbuffer = {
        .brw = BRW_NEW_BATCH,
        .cache = 0,
     },
-   .emit = emit_depthbuffer,
+   .emit = brw_emit_depthbuffer,
  };