i965/skl: Add fast color clear infrastructure

author Ben Widawsky <benjamin.widawsky@intel.com>

Tue, 14 Apr 2015 21:57:51 +0000 (14:57 -0700)

committer Ben Widawsky <benjamin.widawsky@intel.com>

Fri, 20 Nov 2015 19:45:32 +0000 (11:45 -0800)
author Ben Widawsky <benjamin.widawsky@intel.com>
Tue, 14 Apr 2015 21:57:51 +0000 (14:57 -0700)
committer Ben Widawsky <benjamin.widawsky@intel.com>
Fri, 20 Nov 2015 19:45:32 +0000 (11:45 -0800)
diff --git a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c

index 211c0a44162b8f3700a189aa78d6ba24a3e6b633..938e028f58d437b3bd4197212ef68260ad90a3b2 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
+++ b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
@@ -204,7 +204,7 @@ brw_draw_rectlist(struct gl_context *ctx, struct rect *rect, int num_instances)
  }
  
  static void
-get_fast_clear_rect(struct gl_framebuffer *fb,
+get_fast_clear_rect(struct brw_context *brw, struct gl_framebuffer *fb,
                      struct intel_renderbuffer *irb, struct rect *rect)
  {
     unsigned int x_align, y_align;
@@ -228,7 +228,14 @@ get_fast_clear_rect(struct gl_framebuffer *fb,
         */
        intel_get_non_msrt_mcs_alignment(irb->mt, &x_align, &y_align);
        x_align *= 16;
-      y_align *= 32;
+
+      /* SKL+ line alignment requirement for Y-tiled are half those of the prior
+       * generations.
+       */
+      if (brw->gen >= 9)
+         y_align *= 16;
+      else
+         y_align *= 32;
  
        /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
         * Target(s)", beneath the "Fast Color Clear" bullet (p327):
@@ -265,8 +272,10 @@ get_fast_clear_rect(struct gl_framebuffer *fb,
         *     terms of (width,height) of the RT.
         *
         *     MSAA  Width of Clear Rect  Height of Clear Rect
+       *      2X     Ceil(1/8*width)      Ceil(1/2*height)
         *      4X     Ceil(1/8*width)      Ceil(1/2*height)
         *      8X     Ceil(1/2*width)      Ceil(1/2*height)
+       *     16X         width            Ceil(1/2*height)
         *
         * The text "with upper left co-ordinate to coincide with actual
         * rectangle being cleared" is a little confusing--it seems to imply
@@ -289,6 +298,9 @@ get_fast_clear_rect(struct gl_framebuffer *fb,
        case 8:
           x_scaledown = 2;
           break;
+      case 16:
+         x_scaledown = 1;
+         break;
        default:
           unreachable("Unexpected sample count for fast clear");
        }
@@ -357,18 +369,25 @@ is_color_fast_clear_compatible(struct brw_context *brw,
  
  /**
   * Convert the given color to a bitfield suitable for ORing into DWORD 7 of
- * SURFACE_STATE.
+ * SURFACE_STATE (DWORD 12-15 on SKL+).
   */
-static uint32_t
-compute_fast_clear_color_bits(const union gl_color_union *color)
+static void
+set_fast_clear_color(struct brw_context *brw,
+                     struct intel_mipmap_tree *mt,
+                     const union gl_color_union *color)
  {
-   uint32_t bits = 0;
-   for (int i = 0; i < 4; i++) {
-      /* Testing for non-0 works for integer and float colors */
-      if (color->f[i] != 0.0f)
-         bits |= 1 << (GEN7_SURFACE_CLEAR_COLOR_SHIFT + (3 - i));
+   if (brw->gen >= 9) {
+      mt->gen9_fast_clear_color = *color;
+   } else {
+      mt->fast_clear_color_value = 0;
+      for (int i = 0; i < 4; i++) {
+         /* Testing for non-0 works for integer and float colors */
+         if (color->f[i] != 0.0f) {
+             mt->fast_clear_color_value |=
+                1 << (GEN7_SURFACE_CLEAR_COLOR_SHIFT + (3 - i));
+         }
+      }
     }
-   return bits;
  }
  
  static const uint32_t fast_clear_color[4] = { ~0, ~0, ~0, ~0 };
@@ -510,8 +529,7 @@ brw_meta_fast_clear(struct brw_context *brw, struct gl_framebuffer *fb,
  
        switch (clear_type) {
        case FAST_CLEAR:
-         irb->mt->fast_clear_color_value =
-            compute_fast_clear_color_bits(&ctx->Color.ClearColor);
+         set_fast_clear_color(brw, irb->mt, &ctx->Color.ClearColor);
           irb->need_downsample = true;
  
           /* If the buffer is already in INTEL_FAST_CLEAR_STATE_CLEAR, the
@@ -527,7 +545,7 @@ brw_meta_fast_clear(struct brw_context *brw, struct gl_framebuffer *fb,
           irb->mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED;
           irb->need_downsample = true;
           fast_clear_buffers |= 1 << index;
-         get_fast_clear_rect(fb, irb, &fast_clear_rect);
+         get_fast_clear_rect(brw, fb, irb, &fast_clear_rect);
           break;
  
        case REP_CLEAR:
@@ -662,8 +680,9 @@ get_resolve_rect(struct brw_context *brw,
      *
      * The scaledown factors in the table that follows are related to the
      * alignment size returned by intel_get_non_msrt_mcs_alignment() by a
-    * multiplier.  For IVB and HSW, we divide by two, for BDW we multiply
-    * by 8 and 16 and 8 and 8 for SKL.
+    * multiplier. For IVB and HSW, we divide by two, for BDW we multiply
+    * by 8 and 16. Similar to the fast clear, SKL eases the BDW vertical scaling
+    * by a factor of 2.
      */
  
     intel_get_non_msrt_mcs_alignment(mt, &x_align, &y_align);
@@ -709,6 +728,10 @@ brw_meta_resolve_color(struct brw_context *brw,
  
     brw_bind_rep_write_shader(brw, (float *) fast_clear_color);
  
+   /* SKL+ also has a resolve mode for compressed render targets and thus more
+    * bits to let us select the type of resolve.  For fast clear resolves, it
+    * turns out we can use the same value as pre-SKL though.
+    */
     set_fast_clear_op(brw, GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE);
  
     mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED;
diff --git a/src/mesa/drivers/dri/i965/gen8_surface_state.c b/src/mesa/drivers/dri/i965/gen8_surface_state.c

index 140a65449838e38cdd4510fc879e52504556b4cc..69098583357f488a58956d26e5375e90c483e1c2 100644 (file)
--- a/src/mesa/drivers/dri/i965/gen8_surface_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_surface_state.c
@@ -187,7 +187,21 @@ gen8_emit_fast_clear_color(struct brw_context *brw,
                             struct intel_mipmap_tree *mt,
                             uint32_t *surf)
  {
-   surf[7] |= mt->fast_clear_color_value;
+   if (brw->gen >= 9) {
+#define check_fast_clear_val(x) \
+      assert(mt->gen9_fast_clear_color.f[x] == 0.0 || \
+             mt->gen9_fast_clear_color.f[x] == 1.0)
+      check_fast_clear_val(0);
+      check_fast_clear_val(1);
+      check_fast_clear_val(2);
+      check_fast_clear_val(3);
+#undef check_fast_clear_val
+      surf[12] = mt->gen9_fast_clear_color.ui[0];
+      surf[13] = mt->gen9_fast_clear_color.ui[1];
+      surf[14] = mt->gen9_fast_clear_color.ui[2];
+      surf[15] = mt->gen9_fast_clear_color.ui[3];
+   } else
+      surf[7] |= mt->fast_clear_color_value;
  }
  
  static void
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c

index b1a7632d82faf851b8c9e40a2ca1f82517909dc6..4c3f2c00d6f0b70b882d42f2024de69b14f4a192 100644 (file)
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -192,6 +192,12 @@ intel_tiling_supports_non_msrt_mcs(struct brw_context *brw, unsigned tiling)
   *
   *     - MCS buffer for non-MSRT is supported only for RT formats 32bpp,
   *       64bpp, and 128bpp.
+ *
+ * From the Skylake documentation, it is made clear that X-tiling is no longer
+ * supported:
+ *
+ *     - MCS and Lossless compression is supported for TiledY/TileYs/TileYf
+ *     non-MSRTs only.
   */
  static bool
  intel_miptree_supports_non_msrt_fast_clear(struct brw_context *brw,
@@ -1495,6 +1501,17 @@ intel_miptree_alloc_non_msrt_mcs(struct brw_context *brw,
     intel_get_non_msrt_mcs_alignment(mt, &block_width_px, &block_height);
     unsigned width_divisor = block_width_px * 4;
     unsigned height_divisor = block_height * 8;
+
+   /* The Skylake MCS is twice as tall as the Broadwell MCS.
+    *
+    * In pre-Skylake, each bit in the MCS contained the state of 2 cachelines
+    * in the main surface. In Skylake, it's two bits.  The extra bit
+    * doubles the MCS height, not width, because in Skylake the MCS is always
+    * Y-tiled.
+    */
+   if (brw->gen >= 9)
+      height_divisor /= 2;
+
     unsigned mcs_width =
        ALIGN(mt->logical_width0, width_divisor) / width_divisor;
     unsigned mcs_height =
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h

index 805cd714d882e64511478e9d877c8da682d459ec..64f73ea9ae5b5315d8ccf0cbc3d86bbf9a2977c5 100644 (file)
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
@@ -633,15 +633,22 @@ struct intel_mipmap_tree
      * The SURFACE_STATE bits associated with the last fast color clear to this
      * color mipmap tree, if any.
      *
-    * This value will only ever contain ones in bits 28-31, so it is safe to
-    * OR into dword 7 of SURFACE_STATE.
+    * Prior to GEN9 there is a single bit for RGBA clear values which gives you
+    * the option of 2^4 clear colors. Each bit determines if the color channel
+    * is fully saturated or unsaturated (Cherryview does add a 32b value per
+    * channel, but it is globally applied instead of being part of the render
+    * surface state). Starting with GEN9, the surface state accepts a 32b value
+    * for each color channel.
      *
      * @see RENDER_SURFACE_STATE.RedClearColor
      * @see RENDER_SURFACE_STATE.GreenClearColor
      * @see RENDER_SURFACE_STATE.BlueClearColor
      * @see RENDER_SURFACE_STATE.AlphaClearColor
      */
-   uint32_t fast_clear_color_value;
+   union {
+      uint32_t fast_clear_color_value;
+      union gl_color_union gen9_fast_clear_color;
+   };
  
     /**
      * Disable allocation of auxiliary buffers, such as the HiZ buffer and MCS
author	Ben Widawsky <benjamin.widawsky@intel.com>
	Tue, 14 Apr 2015 21:57:51 +0000 (14:57 -0700)
committer	Ben Widawsky <benjamin.widawsky@intel.com>
	Fri, 20 Nov 2015 19:45:32 +0000 (11:45 -0800)
src/mesa/drivers/dri/i965/brw_meta_fast_clear.c		patch \| blob \| history
src/mesa/drivers/dri/i965/gen8_surface_state.c		patch \| blob \| history
src/mesa/drivers/dri/i965/intel_mipmap_tree.c		patch \| blob \| history
src/mesa/drivers/dri/i965/intel_mipmap_tree.h		patch \| blob \| history