i965/gen7+: Implement fast color clear operation in BLORP.
authorPaul Berry <stereotype441@gmail.com>
Wed, 1 May 2013 15:04:12 +0000 (08:04 -0700)
committerPaul Berry <stereotype441@gmail.com>
Wed, 12 Jun 2013 18:10:06 +0000 (11:10 -0700)
Since we defer allocation of the MCS miptree until the time of the
fast clear operation, this patch also implements creation of the MCS
miptree.

In addition, this patch adds the field
intel_mipmap_tree::fast_clear_color_value, which holds the most recent
fast color clear value, if any. We use it to set the SURFACE_STATE's
clear color for render targets.

v2: Flag BRW_NEW_SURFACES when allocating the MCS miptree.  Generate a
perf_debug message if clearing to a color that isn't compatible with
fast color clear.  Fix "control reaches end of non-void function"
build warning.

Reviewed-by: Eric Anholt <eric@anholt.net>
src/mesa/drivers/dri/i965/brw_blorp.cpp
src/mesa/drivers/dri/i965/brw_blorp.h
src/mesa/drivers/dri/i965/brw_blorp_clear.cpp
src/mesa/drivers/dri/i965/brw_clear.c
src/mesa/drivers/dri/i965/brw_defines.h
src/mesa/drivers/dri/i965/gen7_blorp.cpp
src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
src/mesa/drivers/dri/intel/intel_mipmap_tree.c
src/mesa/drivers/dri/intel/intel_mipmap_tree.h

index a2d02bfc5e0d50752c54133c7611e9a9a03e623c..9c9a4a7b38d4f55450906c6060c9807ff8e4cc4a 100644 (file)
@@ -145,6 +145,7 @@ brw_blorp_params::brw_blorp_params()
      y1(0),
      depth_format(0),
      hiz_op(GEN6_HIZ_OP_NONE),
+     fast_clear_op(GEN7_FAST_CLEAR_OP_NONE),
      num_samples(0),
      use_wm_prog(false)
 {
index 51b23dbea12136ac9d5a3eb7cb6cd50f50b674a6..08082060b7d41ab7b36e1658fd960f87aab2951e 100644 (file)
@@ -47,7 +47,8 @@ brw_blorp_blit_miptrees(struct intel_context *intel,
                         bool mirror_x, bool mirror_y);
 
 bool
-brw_blorp_clear_color(struct intel_context *intel, struct gl_framebuffer *fb);
+brw_blorp_clear_color(struct intel_context *intel, struct gl_framebuffer *fb,
+                      bool partial_clear);
 
 #ifdef __cplusplus
 } /* end extern "C" */
@@ -192,6 +193,13 @@ struct brw_blorp_prog_data
    bool persample_msaa_dispatch;
 };
 
+
+enum gen7_fast_clear_op {
+   GEN7_FAST_CLEAR_OP_NONE,
+   GEN7_FAST_CLEAR_OP_FAST_CLEAR,
+};
+
+
 class brw_blorp_params
 {
 public:
@@ -209,6 +217,7 @@ public:
    brw_blorp_surface_info src;
    brw_blorp_surface_info dst;
    enum gen6_hiz_op hiz_op;
+   enum gen7_fast_clear_op fast_clear_op;
    unsigned num_samples;
    bool use_wm_prog;
    brw_blorp_wm_push_constants wm_push_consts;
index b626659f97ef71292374a719929811eafbbf4d77..1f98360b51f1d40b6c76e79f2cd15dae8b5ff155 100644 (file)
@@ -49,7 +49,8 @@ public:
    brw_blorp_clear_params(struct brw_context *brw,
                           struct gl_framebuffer *fb,
                           struct gl_renderbuffer *rb,
-                          GLubyte *color_mask);
+                          GLubyte *color_mask,
+                          bool partial_clear);
 
    virtual uint32_t get_wm_prog(struct brw_context *brw,
                                 brw_blorp_prog_data **prog_data) const;
@@ -105,10 +106,53 @@ brw_blorp_clear_program::~brw_blorp_clear_program()
    ralloc_free(mem_ctx);
 }
 
+
+/**
+ * Determine if fast color clear supports the given clear color.
+ *
+ * Fast color clear can only clear to color values of 1.0 or 0.0.  At the
+ * moment we only support floating point, unorm, and snorm buffers.
+ */
+static bool
+is_color_fast_clear_compatible(struct intel_context *intel,
+                               gl_format format,
+                               const union gl_color_union *color)
+{
+   if (_mesa_is_format_integer_color(format))
+      return false;
+
+   for (int i = 0; i < 4; i++) {
+      if (color->f[i] != 0.0 && color->f[i] != 1.0) {
+         perf_debug("Clear color unsupported by fast color clear.  "
+                    "Falling back to slow clear.");
+         return false;
+      }
+   }
+   return true;
+}
+
+
+/**
+ * Convert the given color to a bitfield suitable for ORing into DWORD 7 of
+ * SURFACE_STATE.
+ */
+static uint32_t
+compute_fast_clear_color_bits(const union gl_color_union *color)
+{
+   uint32_t bits = 0;
+   for (int i = 0; i < 4; i++) {
+      if (color->f[i] != 0.0)
+         bits |= 1 << (GEN7_SURFACE_CLEAR_COLOR_SHIFT + (3 - i));
+   }
+   return bits;
+}
+
+
 brw_blorp_clear_params::brw_blorp_clear_params(struct brw_context *brw,
                                                struct gl_framebuffer *fb,
                                                struct gl_renderbuffer *rb,
-                                               GLubyte *color_mask)
+                                               GLubyte *color_mask,
+                                               bool partial_clear)
 {
    struct intel_context *intel = &brw->intel;
    struct gl_context *ctx = &intel->ctx;
@@ -161,6 +205,56 @@ brw_blorp_clear_params::brw_blorp_clear_params(struct brw_context *brw,
          wm_prog_key.use_simd16_replicated_data = false;
       }
    }
+
+   /* If we can do this as a fast color clear, do so. */
+   if (irb->mt->mcs_state != INTEL_MCS_STATE_NONE && !partial_clear &&
+       wm_prog_key.use_simd16_replicated_data &&
+       is_color_fast_clear_compatible(intel, format, &ctx->Color.ClearColor)) {
+      memset(push_consts, 0xff, 4*sizeof(float));
+      fast_clear_op = GEN7_FAST_CLEAR_OP_FAST_CLEAR;
+
+      /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
+       * Target(s)", beneath the "Fast Color Clear" bullet (p327):
+       *
+       *     Clear pass must have a clear rectangle that must follow alignment
+       *     rules in terms of pixels and lines as shown in the table
+       *     below. Further, the clear-rectangle height and width must be
+       *     multiple of the following dimensions. If the height and width of
+       *     the render target being cleared do not meet these requirements,
+       *     an MCS buffer can be created such that it follows the requirement
+       *     and covers the RT.
+       *
+       * The alignment size in the table that follows is related to the
+       * alignment size returned by intel_get_non_msrt_mcs_alignment(), but
+       * with X alignment multiplied by 16 and Y alignment multiplied by 32.
+       */
+      unsigned x_align, y_align;
+      intel_get_non_msrt_mcs_alignment(intel, irb->mt, &x_align, &y_align);
+      x_align *= 16;
+      y_align *= 32;
+      x0 = ROUND_DOWN_TO(x0, x_align);
+      y0 = ROUND_DOWN_TO(y0, y_align);
+      x1 = ALIGN(x1, x_align);
+      y1 = ALIGN(y1, y_align);
+
+      /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
+       * Target(s)", beneath the "Fast Color Clear" bullet (p327):
+       *
+       *     In order to optimize the performance MCS buffer (when bound to 1X
+       *     RT) clear similarly to MCS buffer clear for MSRT case, clear rect
+       *     is required to be scaled by the following factors in the
+       *     horizontal and vertical directions:
+       *
+       * The X and Y scale down factors in the table that follows are each
+       * equal to half the alignment value computed above.
+       */
+      unsigned x_scaledown = x_align / 2;
+      unsigned y_scaledown = y_align / 2;
+      x0 /= x_scaledown;
+      y0 /= y_scaledown;
+      x1 /= x_scaledown;
+      y1 /= y_scaledown;
+   }
 }
 
 uint32_t
@@ -264,7 +358,8 @@ brw_blorp_clear_program::compile(struct brw_context *brw,
 
 extern "C" {
 bool
-brw_blorp_clear_color(struct intel_context *intel, struct gl_framebuffer *fb)
+brw_blorp_clear_color(struct intel_context *intel, struct gl_framebuffer *fb,
+                      bool partial_clear)
 {
    struct gl_context *ctx = &intel->ctx;
    struct brw_context *brw = brw_context(ctx);
@@ -286,6 +381,7 @@ brw_blorp_clear_color(struct intel_context *intel, struct gl_framebuffer *fb)
 
    for (unsigned buf = 0; buf < ctx->DrawBuffer->_NumColorDrawBuffers; buf++) {
       struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[buf];
+      struct intel_renderbuffer *irb = intel_renderbuffer(rb);
 
       /* If this is an ES2 context or GL_ARB_ES2_compatibility is supported,
        * the framebuffer can be complete with some attachments missing.  In
@@ -294,8 +390,53 @@ brw_blorp_clear_color(struct intel_context *intel, struct gl_framebuffer *fb)
       if (rb == NULL)
          continue;
 
-      brw_blorp_clear_params params(brw, fb, rb, ctx->Color.ColorMask[buf]);
+      brw_blorp_clear_params params(brw, fb, rb, ctx->Color.ColorMask[buf],
+                                    partial_clear);
+
+      bool is_fast_clear =
+         (params.fast_clear_op == GEN7_FAST_CLEAR_OP_FAST_CLEAR);
+      if (is_fast_clear) {
+         /* Record the clear color in the miptree so that it will be
+          * programmed in SURFACE_STATE by later rendering and resolve
+          * operations.
+          */
+         uint32_t new_color_value =
+            compute_fast_clear_color_bits(&ctx->Color.ClearColor);
+         if (irb->mt->fast_clear_color_value != new_color_value) {
+            irb->mt->fast_clear_color_value = new_color_value;
+            brw->state.dirty.brw |= BRW_NEW_SURFACES;
+         }
+
+         /* If the buffer is already in INTEL_MCS_STATE_CLEAR, the clear is
+          * redundant and can be skipped.
+          */
+         if (irb->mt->mcs_state == INTEL_MCS_STATE_CLEAR)
+            continue;
+
+         /* If the MCS buffer hasn't been allocated yet, we need to allocate
+          * it now.
+          */
+         if (!irb->mt->mcs_mt) {
+            if (!intel_miptree_alloc_non_msrt_mcs(intel, irb->mt)) {
+               /* MCS allocation failed--probably this will only happen in
+                * out-of-memory conditions.  But in any case, try to recover
+                * by falling back to a non-blorp clear technique.
+                */
+               return false;
+            }
+            brw->state.dirty.brw |= BRW_NEW_SURFACES;
+         }
+      }
+
       brw_blorp_exec(intel, &params);
+
+      if (is_fast_clear) {
+         /* Now that the fast clear has occurred, put the buffer in
+          * INTEL_MCS_STATE_CLEAR so that we won't waste time doing redundant
+          * clears.
+          */
+         irb->mt->mcs_state = INTEL_MCS_STATE_CLEAR;
+      }
    }
 
    return true;
index 2b999bfb5b9fa48e93b4c582a065bfa5b21b6bbb..80b7a0c0751c2ef143c73b7f3eb9045660a4e3bf 100644 (file)
@@ -234,7 +234,7 @@ brw_clear(struct gl_context *ctx, GLbitfield mask)
    /* BLORP is currently only supported on Gen6+. */
    if (intel->gen >= 6) {
       if (mask & BUFFER_BITS_COLOR) {
-         if (brw_blorp_clear_color(intel, fb)) {
+         if (brw_blorp_clear_color(intel, fb, partial_clear)) {
             debug_mask("blorp color", mask & BUFFER_BITS_COLOR);
             mask &= ~BUFFER_BITS_COLOR;
          }
index d61151f6506324c278790061c8173c78abf11105..ce1f71db9e76c873c8472dbb00fbb6d763420cca 100644 (file)
 #define GEN7_SURFACE_MCS_PITCH_MASK             INTEL_MASK(11, 3)
 
 /* Surface state DW7 */
+#define GEN7_SURFACE_CLEAR_COLOR_SHIFT         28
 #define GEN7_SURFACE_SCS_R_SHIFT                25
 #define GEN7_SURFACE_SCS_R_MASK                 INTEL_MASK(27, 25)
 #define GEN7_SURFACE_SCS_G_SHIFT                22
@@ -1615,6 +1616,7 @@ enum brw_wm_barycentric_interp_mode {
 # define GEN7_PS_PUSH_CONSTANT_ENABLE                  (1 << 11)
 # define GEN7_PS_ATTRIBUTE_ENABLE                      (1 << 10)
 # define GEN7_PS_OMASK_TO_RENDER_TARGET                        (1 << 9)
+# define GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE       (1 << 8)
 # define GEN7_PS_DUAL_SOURCE_BLEND_ENABLE              (1 << 7)
 # define GEN7_PS_POSOFFSET_NONE                                (0 << 3)
 # define GEN7_PS_POSOFFSET_CENTROID                    (2 << 3)
index 208c66a28f516b629388fe6b8be0e37a2314b5cc..1b2d3099491600ca6c96ab9d296c7e3e21cdb1e2 100644 (file)
@@ -199,11 +199,13 @@ gen7_blorp_emit_surface_state(struct brw_context *brw,
                                 is_render_target);
    }
 
+   surf[7] = surface->mt->fast_clear_color_value;
+
    if (intel->is_haswell) {
-      surf[7] SET_FIELD(HSW_SCS_RED,   GEN7_SURFACE_SCS_R) |
-                SET_FIELD(HSW_SCS_GREEN, GEN7_SURFACE_SCS_G) |
-                SET_FIELD(HSW_SCS_BLUE,  GEN7_SURFACE_SCS_B) |
-                SET_FIELD(HSW_SCS_ALPHA, GEN7_SURFACE_SCS_A);
+      surf[7] |= (SET_FIELD(HSW_SCS_RED,   GEN7_SURFACE_SCS_R) |
+                  SET_FIELD(HSW_SCS_GREEN, GEN7_SURFACE_SCS_G) |
+                  SET_FIELD(HSW_SCS_BLUE,  GEN7_SURFACE_SCS_B) |
+                  SET_FIELD(HSW_SCS_ALPHA, GEN7_SURFACE_SCS_A));
    }
 
    /* Emit relocation to surface contents */
@@ -584,6 +586,14 @@ gen7_blorp_emit_ps_config(struct brw_context *brw,
       dw5 |= prog_data->first_curbe_grf << GEN7_PS_DISPATCH_START_GRF_SHIFT_0;
    }
 
+   switch (params->fast_clear_op) {
+   case GEN7_FAST_CLEAR_OP_FAST_CLEAR:
+      dw4 |= GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE;
+      break;
+   default:
+      break;
+   }
+
    BEGIN_BATCH(8);
    OUT_BATCH(_3DSTATE_PS << 16 | (8 - 2));
    OUT_BATCH(params->use_wm_prog ? prog_offset : 0);
index 3164f994dd4c1691db8e2fc2c055d5dc15d15e5b..1a4e416d777e2c0fa9b5a5f84b6e87603a6ad9ac 100644 (file)
@@ -614,11 +614,13 @@ gen7_update_renderbuffer_surface(struct brw_context *brw,
                                 irb->mt->mcs_mt, true /* is RT */);
    }
 
+   surf[7] = irb->mt->fast_clear_color_value;
+
    if (intel->is_haswell) {
-      surf[7] SET_FIELD(HSW_SCS_RED,   GEN7_SURFACE_SCS_R) |
-                SET_FIELD(HSW_SCS_GREEN, GEN7_SURFACE_SCS_G) |
-                SET_FIELD(HSW_SCS_BLUE,  GEN7_SURFACE_SCS_B) |
-                SET_FIELD(HSW_SCS_ALPHA, GEN7_SURFACE_SCS_A);
+      surf[7] |= (SET_FIELD(HSW_SCS_RED,   GEN7_SURFACE_SCS_R) |
+                  SET_FIELD(HSW_SCS_GREEN, GEN7_SURFACE_SCS_G) |
+                  SET_FIELD(HSW_SCS_BLUE,  GEN7_SURFACE_SCS_B) |
+                  SET_FIELD(HSW_SCS_ALPHA, GEN7_SURFACE_SCS_A));
    }
 
    drm_intel_bo_emit_reloc(brw->intel.batch.bo,
index a75ac81994b8dbd557bfb9ec0d3ace1cf119983b..ba941c099f281366f7d0aff0b46bda07e44ddad8 100644 (file)
@@ -1201,6 +1201,54 @@ intel_miptree_alloc_mcs(struct intel_context *intel,
 #endif
 }
 
+
+bool
+intel_miptree_alloc_non_msrt_mcs(struct intel_context *intel,
+                                 struct intel_mipmap_tree *mt)
+{
+#ifdef I915
+   assert(!"MCS not supported on i915");
+   return false;
+#else
+   assert(mt->mcs_mt == NULL);
+
+   /* The format of the MCS buffer is opaque to the driver; all that matters
+    * is that we get its size and pitch right.  We'll pretend that the format
+    * is R32.  Since an MCS tile covers 128 blocks horizontally, and a Y-tiled
+    * R32 buffer is 32 pixels across, we'll need to scale the width down by
+    * the block width and then a further factor of 4.  Since an MCS tile
+    * covers 256 blocks vertically, and a Y-tiled R32 buffer is 32 rows high,
+    * we'll need to scale the height down by the block height and then a
+    * further factor of 8.
+    */
+   const gl_format format = MESA_FORMAT_R_UINT32;
+   unsigned block_width_px;
+   unsigned block_height;
+   intel_get_non_msrt_mcs_alignment(intel, mt, &block_width_px, &block_height);
+   unsigned width_divisor = block_width_px * 4;
+   unsigned height_divisor = block_height * 8;
+   unsigned mcs_width =
+      ALIGN(mt->logical_width0, width_divisor) / width_divisor;
+   unsigned mcs_height =
+      ALIGN(mt->logical_height0, height_divisor) / height_divisor;
+   assert(mt->logical_depth0 == 1);
+   mt->mcs_mt = intel_miptree_create(intel,
+                                     mt->target,
+                                     format,
+                                     mt->first_level,
+                                     mt->last_level,
+                                     mcs_width,
+                                     mcs_height,
+                                     mt->logical_depth0,
+                                     true,
+                                     0 /* num_samples */,
+                                     INTEL_MIPTREE_TILING_Y);
+
+   return mt->mcs_mt;
+#endif
+}
+
+
 /**
  * Helper for intel_miptree_alloc_hiz() that sets
  * \c mt->level[level].slice[layer].has_hiz. Return true if and only if
index e11d0d63a27336e35a0d16dd8e29860ab8a97fd8..c44c8eaf4a9ace16bba7f0f23b4b4a8a1a40105a 100644 (file)
@@ -459,6 +459,15 @@ struct intel_mipmap_tree
    enum intel_mcs_state mcs_state;
 #endif
 
+   /**
+    * The SURFACE_STATE bits associated with the last fast color clear to this
+    * color mipmap tree, if any.
+    *
+    * This value will only ever contain ones in bits 28-31, so it is safe to
+    * OR into dword 7 of SURFACE_STATE.
+    */
+   uint32_t fast_clear_color_value;
+
    /* These are also refcounted:
     */
    GLuint refcount;
@@ -479,6 +488,10 @@ intel_get_non_msrt_mcs_alignment(struct intel_context *intel,
                                  struct intel_mipmap_tree *mt,
                                  unsigned *width_px, unsigned *height);
 
+bool
+intel_miptree_alloc_non_msrt_mcs(struct intel_context *intel,
+                                 struct intel_mipmap_tree *mt);
+
 struct intel_mipmap_tree *intel_miptree_create(struct intel_context *intel,
                                                GLenum target,
                                               gl_format format,