i965: Rework Sandy Bridge HiZ and stencil layouts

author Jason Ekstrand <jason.ekstrand@intel.com>

Sat, 27 May 2017 17:36:23 +0000 (10:36 -0700)

committer Jason Ekstrand <jason.ekstrand@intel.com>

Thu, 1 Jun 2017 22:33:26 +0000 (15:33 -0700)
author Jason Ekstrand <jason.ekstrand@intel.com>
Sat, 27 May 2017 17:36:23 +0000 (10:36 -0700)
committer Jason Ekstrand <jason.ekstrand@intel.com>
Thu, 1 Jun 2017 22:33:26 +0000 (15:33 -0700)
diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c b/src/mesa/drivers/dri/i965/brw_blorp.c

index 9030fe7e7608a2119b69d65796c13fd72771d1db..61c6bda56b5db520099ad48f265814426d08e956 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_blorp.c
+++ b/src/mesa/drivers/dri/i965/brw_blorp.c
@@ -123,7 +123,7 @@ apply_gen6_stencil_hiz_offset(struct isl_surf *surf,
                                uint32_t lod,
                                uint32_t *offset)
  {
-   assert(mt->array_layout == ALL_SLICES_AT_EACH_LOD);
+   assert(mt->array_layout == GEN6_HIZ_STENCIL);
  
     if (mt->format == MESA_FORMAT_S_UINT8) {
        /* Note: we can't compute the stencil offset using
@@ -182,12 +182,12 @@ blorp_surf_for_miptree(struct brw_context *brw,
     };
  
     if (brw->gen == 6 && mt->format == MESA_FORMAT_S_UINT8 &&
-       mt->array_layout == ALL_SLICES_AT_EACH_LOD) {
-      /* Sandy bridge stencil and HiZ use this ALL_SLICES_AT_EACH_LOD hack in
+       mt->array_layout == GEN6_HIZ_STENCIL) {
+      /* Sandy bridge stencil and HiZ use this GEN6_HIZ_STENCIL hack in
         * order to allow for layered rendering.  The hack makes each LOD of the
         * stencil or HiZ buffer a single tightly packed array surface at some
         * offset into the surface.  Since ISL doesn't know how to deal with the
-       * crazy ALL_SLICES_AT_EACH_LOD layout and since we have to do a manual
+       * crazy GEN6_HIZ_STENCIL layout and since we have to do a manual
         * offset of it anyway, we might as well do the offset here and keep the
         * hacks inside the i965 driver.
         *
@@ -261,8 +261,7 @@ blorp_surf_for_miptree(struct brw_context *brw,
  
           struct intel_mipmap_tree *hiz_mt = mt->hiz_buf->mt;
           if (hiz_mt) {
-            assert(brw->gen == 6 &&
-                   hiz_mt->array_layout == ALL_SLICES_AT_EACH_LOD);
+            assert(brw->gen == 6 && hiz_mt->array_layout == GEN6_HIZ_STENCIL);
  
              /* gen6 requires the HiZ buffer to be manually offset to the
               * right location.  We could fixup the surf but it doesn't
diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c

index bfa8afaa69899e9a414f9b630ad0e6234375f208..1f0a1e9a38daac13770465c65b0eddcf0ad3c6b5 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_tex_layout.c
+++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c
@@ -216,6 +216,8 @@ brw_miptree_layout_2d(struct intel_mipmap_tree *mt)
        mt->total_height = MAX2(mt->total_height, y + img_height);
  
        /* Layout_below: step right after second mipmap.
+       *
+       * For Sandy Bridge HiZ and stencil, we always step down.
         */
        if (level == mt->first_level + 1) {
          x += ALIGN_NPOT(width, mt->halign) / bw;
@@ -231,6 +233,67 @@ brw_miptree_layout_2d(struct intel_mipmap_tree *mt)
     }
  }
  
+static void
+brw_miptree_layout_gen6_hiz_stencil(struct intel_mipmap_tree *mt)
+{
+   unsigned x = 0;
+   unsigned y = 0;
+   unsigned width = mt->physical_width0;
+   unsigned height = mt->physical_height0;
+   /* Number of layers of array texture. */
+   unsigned depth = mt->physical_depth0;
+   unsigned tile_width, tile_height, bw, bh;
+
+   if (mt->format == MESA_FORMAT_S_UINT8) {
+      bw = bh = 1;
+      /* W-tiled */
+      tile_width = 64;
+      tile_height = 64;
+   } else {
+      assert(_mesa_get_format_base_format(mt->format) == GL_DEPTH_COMPONENT ||
+             _mesa_get_format_base_format(mt->format) == GL_DEPTH_STENCIL);
+      /* Each 128-bit HiZ block corresponds to a region of of 8x4 depth
+       * samples.  Each cache line in the Y-Tiled HiZ image contains 2x2 HiZ
+       * blocks.  Therefore, each Y-tiled cache line corresponds to an 16x8
+       * region in the depth surface.  Since we're representing it as
+       * RGBA_FLOAT32, the miptree calculations will think that each cache
+       * line is 1x4 pixels.  Therefore, we need a scale-down factor of 16x2
+       * and a vertical alignment of 2.
+       */
+      mt->cpp = 16;
+      bw = 16;
+      bh = 2;
+      /* Y-tiled */
+      tile_width = 128 / mt->cpp;
+      tile_height = 32;
+   }
+
+   mt->total_width = 0;
+   mt->total_height = 0;
+
+   for (unsigned level = mt->first_level; level <= mt->last_level; level++) {
+      intel_miptree_set_level_info(mt, level, x, y, depth);
+
+      const unsigned img_width = ALIGN(DIV_ROUND_UP(width, bw), mt->halign);
+      const unsigned img_height =
+         ALIGN(DIV_ROUND_UP(height, bh), mt->valign) * depth;
+
+      mt->total_width = MAX2(mt->total_width, x + img_width);
+      mt->total_height = MAX2(mt->total_height, y + img_height);
+
+      if (level == mt->first_level) {
+         y += ALIGN(img_height, tile_height);
+      } else {
+         x += ALIGN(img_width, tile_width);
+      }
+
+      /* We only minify the width.  We want qpitch to match for all miplevels
+       * because the hardware doesn't know we aren't on LOD0.
+       */
+      width = minify(width, 1);
+   }
+}
+
  unsigned
  brw_miptree_get_horizontal_slice_pitch(const struct brw_context *brw,
                                         const struct intel_mipmap_tree *mt,
@@ -249,6 +312,8 @@ brw_miptree_get_vertical_slice_pitch(const struct brw_context *brw,
                                       const struct intel_mipmap_tree *mt,
                                       unsigned level)
  {
+   assert(mt->array_layout != GEN6_HIZ_STENCIL || brw->gen == 6);
+
     if (brw->gen >= 9) {
        /* ALL_SLICES_AT_EACH_LOD isn't supported on Gen8+ but this code will
         * effectively end up with a packed qpitch anyway whenever
@@ -281,6 +346,15 @@ brw_miptree_get_vertical_slice_pitch(const struct brw_context *brw,
                mt->array_layout == ALL_SLICES_AT_EACH_LOD) {
        return ALIGN_NPOT(minify(mt->physical_height0, level), mt->valign);
  
+   } else if (mt->array_layout == GEN6_HIZ_STENCIL) {
+      /* For HiZ and stencil on Sandy Bridge, we don't minify the height. */
+      if (mt->format == MESA_FORMAT_S_UINT8) {
+         return ALIGN(mt->physical_height0, mt->valign);
+      } else {
+         /* HiZ has a vertical scale factor of 2. */
+         return ALIGN(DIV_ROUND_UP(mt->physical_height0, 2), mt->valign);
+      }
+
     } else {
        const unsigned h0 = ALIGN_NPOT(mt->physical_height0, mt->valign);
        const unsigned h1 = ALIGN_NPOT(minify(mt->physical_height0, 1), mt->valign);
@@ -333,6 +407,8 @@ brw_miptree_layout_texture_array(struct brw_context *brw,
  
     if (layout_1d)
        gen9_miptree_layout_1d(mt);
+   else if (mt->array_layout == GEN6_HIZ_STENCIL)
+      brw_miptree_layout_gen6_hiz_stencil(mt);
     else
        brw_miptree_layout_2d(mt);
  
@@ -556,6 +632,8 @@ intel_miptree_set_total_width_height(struct brw_context *brw,
        case INTEL_MSAA_LAYOUT_IMS:
           if (gen9_use_linear_1d_layout(brw, mt))
              gen9_miptree_layout_1d(mt);
+         else if (mt->array_layout == GEN6_HIZ_STENCIL)
+            brw_miptree_layout_gen6_hiz_stencil(mt);
           else
              brw_miptree_layout_2d(mt);
           break;
@@ -579,15 +657,9 @@ intel_miptree_set_alignment(struct brw_context *brw,
      * - Ironlake and Sandybridge PRMs: Volume 1, Part 1, Section 7.18.3.4
      * - BSpec (for Ivybridge and slight variations in separate stencil)
      */
-   bool gen6_hiz_or_stencil = false;
  
-   if (brw->gen == 6 && mt->array_layout == ALL_SLICES_AT_EACH_LOD) {
-      const GLenum base_format = _mesa_get_format_base_format(mt->format);
-      gen6_hiz_or_stencil = _mesa_is_depth_or_stencil_format(base_format);
-   }
-
-   if (gen6_hiz_or_stencil) {
-      /* On gen6, we use ALL_SLICES_AT_EACH_LOD for stencil/hiz because the
+   if (mt->array_layout == GEN6_HIZ_STENCIL) {
+      /* On gen6, we use GEN6_HIZ_STENCIL for stencil/hiz because the
         * hardware doesn't support multiple mip levels on stencil/hiz.
         *
         * PRM Vol 2, Part 1, 7.5.3 Hierarchical Depth Buffer:
@@ -600,15 +672,13 @@ intel_miptree_set_alignment(struct brw_context *brw,
           /* Stencil uses W tiling, so we force W tiling alignment for the
            * ALL_SLICES_AT_EACH_LOD miptree layout.
            */
-         mt->halign = 64;
-         mt->valign = 64;
+         mt->halign = 4;
+         mt->valign = 2;
           assert((layout_flags & MIPTREE_LAYOUT_FORCE_HALIGN16) == 0);
        } else {
-         /* Depth uses Y tiling, so we force need Y tiling alignment for the
-          * ALL_SLICES_AT_EACH_LOD miptree layout.
-          */
-         mt->halign = 128 / mt->cpp;
-         mt->valign = 32;
+         /* See brw_miptree_layout_gen6_hiz_stencil() */
+         mt->halign = 1;
+         mt->valign = 2;
        }
     } else if (mt->compressed) {
         /* The hardware alignment requirements for compressed textures
diff --git a/src/mesa/drivers/dri/i965/gen6_depth_state.c b/src/mesa/drivers/dri/i965/gen6_depth_state.c

index 0ff240753e3163b5ae01eda756ce4c2be7bb1f4d..a77e4616dc5063655c3409546c666de817a2d79b 100644 (file)
--- a/src/mesa/drivers/dri/i965/gen6_depth_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_depth_state.c
@@ -164,7 +164,7 @@ gen6_emit_depth_stencil_hiz(struct brw_context *brw,
           struct intel_mipmap_tree *hiz_mt = depth_mt->hiz_buf->mt;
           uint32_t offset = 0;
  
-         if (hiz_mt->array_layout == ALL_SLICES_AT_EACH_LOD) {
+         if (hiz_mt->array_layout == GEN6_HIZ_STENCIL) {
              offset = intel_miptree_get_aligned_offset(
                          hiz_mt,
                          hiz_mt->level[lod].level_x,
@@ -190,7 +190,7 @@ gen6_emit_depth_stencil_hiz(struct brw_context *brw,
        if (separate_stencil) {
           uint32_t offset = 0;
  
-         if (stencil_mt->array_layout == ALL_SLICES_AT_EACH_LOD) {
+         if (stencil_mt->array_layout == GEN6_HIZ_STENCIL) {
              assert(stencil_mt->format == MESA_FORMAT_S_UINT8);
  
              /* Note: we can't compute the stencil offset using
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c

index b73c0d0ad1de8553be3ec393b91023b2029deed6..f7a69a0e235c40d0a70a5fc01878a29cdead736a 100644 (file)
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -464,7 +464,7 @@ intel_miptree_create_layout(struct brw_context *brw,
           intel_miptree_wants_hiz_buffer(brw, mt)))) {
        uint32_t stencil_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD;
        if (brw->gen == 6) {
-         stencil_flags |= MIPTREE_LAYOUT_FORCE_ALL_SLICE_AT_LOD |
+         stencil_flags |= MIPTREE_LAYOUT_GEN6_HIZ_STENCIL |
                            MIPTREE_LAYOUT_TILING_ANY;
        }
  
@@ -497,8 +497,8 @@ intel_miptree_create_layout(struct brw_context *brw,
        }
     }
  
-   if (layout_flags & MIPTREE_LAYOUT_FORCE_ALL_SLICE_AT_LOD)
-      mt->array_layout = ALL_SLICES_AT_EACH_LOD;
+   if (layout_flags & MIPTREE_LAYOUT_GEN6_HIZ_STENCIL)
+      mt->array_layout = GEN6_HIZ_STENCIL;
  
     /*
      * Obey HALIGN_16 constraints for Gen8 and Gen9 buffers which are
@@ -1790,7 +1790,7 @@ intel_hiz_miptree_buf_create(struct brw_context *brw,
     uint32_t layout_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD;
  
     if (brw->gen == 6)
-      layout_flags |= MIPTREE_LAYOUT_FORCE_ALL_SLICE_AT_LOD;
+      layout_flags |= MIPTREE_LAYOUT_GEN6_HIZ_STENCIL;
  
     if (!buf)
        return NULL;
@@ -2380,7 +2380,7 @@ intel_update_r8stencil(struct brw_context *brw,
        const uint32_t r8stencil_flags =
           MIPTREE_LAYOUT_ACCELERATED_UPLOAD | MIPTREE_LAYOUT_TILING_Y |
           MIPTREE_LAYOUT_DISABLE_AUX;
-      assert(brw->gen > 6); /* Handle MIPTREE_LAYOUT_FORCE_ALL_SLICE_AT_LOD */
+      assert(brw->gen > 6); /* Handle MIPTREE_LAYOUT_GEN6_HIZ_STENCIL */
        mt->r8stencil_mt = intel_miptree_create(brw,
                                                src->target,
                                                MESA_FORMAT_R_UINT8,
@@ -3287,6 +3287,7 @@ intel_miptree_get_isl_surf(struct brw_context *brw,
        surf->array_pitch_span = ISL_ARRAY_PITCH_SPAN_FULL;
        break;
     case ALL_SLICES_AT_EACH_LOD:
+   case GEN6_HIZ_STENCIL:
        surf->array_pitch_span = ISL_ARRAY_PITCH_SPAN_COMPACT;
        break;
     default:
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h

index 7aabac006f3632ac3d8b7ee20144865b198bb36d..be460f33eb8c4511d3ebd918ae9d0d40de2a193a 100644 (file)
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
@@ -251,6 +251,41 @@ enum miptree_array_layout {
      *   +---+
      */
     ALL_SLICES_AT_EACH_LOD,
+
+   /* On Sandy Bridge, HiZ and stencil buffers work the same as on Ivy Bridge
+    * except that they don't technically support mipmapping.  That does not,
+    * however, stop us from doing it.  As far as Sandy Bridge hardware is
+    * concerned, HiZ and stencil always operates on a single miplevel 2D
+    * (possibly array) image.  The dimensions of that image are NOT minified.
+    *
+    * In order to implement HiZ and stencil on Sandy Bridge, we create one
+    * full-sized 2D (possibly array) image for every LOD with every image
+    * aligned to a page boundary.  In order to save memory, we pretend that
+    * the width of each miplevel is minified and we place LOD1 and above below
+    * LOD0 but horizontally adjacent to each other.  When considered as
+    * full-sized images, LOD1 and above technically overlap.  However, since
+    * we only write to part of that image, the hardware will never notice the
+    * overlap.
+    *
+    * This layout looks something like this:
+    *
+    *   +---------+
+    *   |         |
+    *   |         |
+    *   +---------+
+    *   |         |
+    *   |         |
+    *   +---------+
+    *
+    *   +----+ +-+ .
+    *   |    | +-+
+    *   +----+
+    *
+    *   +----+ +-+ .
+    *   |    | +-+
+    *   +----+
+    */
+   GEN6_HIZ_STENCIL,
  };
  
  enum intel_aux_disable {
@@ -672,7 +707,7 @@ intel_miptree_alloc_non_msrt_mcs(struct brw_context *brw,
  
  enum {
     MIPTREE_LAYOUT_ACCELERATED_UPLOAD       = 1 << 0,
-   MIPTREE_LAYOUT_FORCE_ALL_SLICE_AT_LOD   = 1 << 1,
+   MIPTREE_LAYOUT_GEN6_HIZ_STENCIL         = 1 << 1,
     MIPTREE_LAYOUT_FOR_BO                   = 1 << 2,
     MIPTREE_LAYOUT_DISABLE_AUX              = 1 << 3,
     MIPTREE_LAYOUT_FORCE_HALIGN16           = 1 << 4,
author	Jason Ekstrand <jason.ekstrand@intel.com>
	Sat, 27 May 2017 17:36:23 +0000 (10:36 -0700)
committer	Jason Ekstrand <jason.ekstrand@intel.com>
	Thu, 1 Jun 2017 22:33:26 +0000 (15:33 -0700)
src/mesa/drivers/dri/i965/brw_blorp.c		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_tex_layout.c		patch \| blob \| history
src/mesa/drivers/dri/i965/gen6_depth_state.c		patch \| blob \| history
src/mesa/drivers/dri/i965/intel_mipmap_tree.c		patch \| blob \| history
src/mesa/drivers/dri/i965/intel_mipmap_tree.h		patch \| blob \| history