i965/gen9: Optimize slice and subslice load balancing behavior.

[mesa.git] / src / mesa / drivers / dri / i965 / brw_meta_util.c
diff --git a/src/mesa/drivers/dri/i965/brw_meta_util.c b/src/mesa/drivers/dri/i965/brw_meta_util.c

index ac4f6154dc78191798d16de8c191e593480cf829..6a6d68425fad878d55f99068dfdd64c971b190aa 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_meta_util.c
+++ b/src/mesa/drivers/dri/i965/brw_meta_util.c
@@ -22,9 +22,13 @@
   */
  
  #include "brw_context.h"
+#include "brw_defines.h"
  #include "intel_fbo.h"
  #include "brw_meta_util.h"
+#include "brw_state.h"
+#include "main/blend.h"
  #include "main/fbobject.h"
+#include "util/format_srgb.h"
  
  /**
   * Helper function for handling mirror image blits.
@@ -216,6 +220,10 @@ brw_meta_mirror_clip_and_scissor(const struct gl_context *ctx,
      * 4 * 2 = 8 > 5 in the src.
      */
  
+   if (*srcX0 == *srcX1 || *srcY0 == *srcY1
+       || *dstX0 == *dstX1 || *dstY0 == *dstY1)
+      return true;
+
     float scaleX = (float) (*srcX1 - *srcX0) / (*dstX1 - *dstX0);
     float scaleY = (float) (*srcY1 - *srcY0) / (*dstY1 - *dstY0);
  
@@ -246,62 +254,169 @@ brw_meta_mirror_clip_and_scissor(const struct gl_context *ctx,
     /* Account for the fact that in the system framebuffer, the origin is at
      * the lower left.
      */
-   if (_mesa_is_winsys_fbo(read_fb)) {
+   if (read_fb->FlipY) {
        GLint tmp = read_fb->Height - *srcY0;
        *srcY0 = read_fb->Height - *srcY1;
        *srcY1 = tmp;
        *mirror_y = !*mirror_y;
     }
-   if (_mesa_is_winsys_fbo(draw_fb)) {
+   if (draw_fb->FlipY) {
        GLint tmp = draw_fb->Height - *dstY0;
        *dstY0 = draw_fb->Height - *dstY1;
        *dstY1 = tmp;
        *mirror_y = !*mirror_y;
     }
  
-   return false;
+   /* Check for invalid bounds
+    * Can't blit for 0-dimensions
+    */
+   return *srcX0 == *srcX1 || *srcY0 == *srcY1
+      || *dstX0 == *dstX1 || *dstY0 == *dstY1;
  }
  
  /**
- * Creates a new named renderbuffer that wraps the first slice
- * of an existing miptree.
+ * Determine if fast color clear supports the given clear color.
   *
- * Clobbers the current renderbuffer binding (ctx->CurrentRenderbuffer).
+ * Fast color clear can only clear to color values of 1.0 or 0.0.  At the
+ * moment we only support floating point, unorm, and snorm buffers.
   */
-struct gl_renderbuffer *
-brw_get_rb_for_slice(struct brw_context *brw,
-                     struct intel_mipmap_tree *mt,
-                     unsigned level, unsigned layer, bool flat)
+bool
+brw_is_color_fast_clear_compatible(struct brw_context *brw,
+                                   const struct intel_mipmap_tree *mt,
+                                   const union gl_color_union *color)
  {
-   struct gl_context *ctx = &brw->ctx;
-   struct gl_renderbuffer *rb = ctx->Driver.NewRenderbuffer(ctx, 0xDEADBEEF);
-   struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+   const struct gen_device_info *devinfo = &brw->screen->devinfo;
+   const struct gl_context *ctx = &brw->ctx;
+
+   /* If we're mapping the render format to a different format than the
+    * format we use for texturing then it is a bit questionable whether it
+    * should be possible to use a fast clear. Although we only actually
+    * render using a renderable format, without the override workaround it
+    * wouldn't be possible to have a non-renderable surface in a fast clear
+    * state so the hardware probably legitimately doesn't need to support
+    * this case. At least on Gen9 this really does seem to cause problems.
+    */
+   if (devinfo->gen >= 9 &&
+       brw_isl_format_for_mesa_format(mt->format) !=
+       brw->mesa_to_isl_render_format[mt->format])
+      return false;
+
+   const mesa_format format = _mesa_get_render_format(ctx, mt->format);
+   if (_mesa_is_format_integer_color(format)) {
+      if (devinfo->gen >= 8) {
+         perf_debug("Integer fast clear not enabled for (%s)",
+                    _mesa_get_format_name(format));
+      }
+      return false;
+   }
+
+   for (int i = 0; i < 4; i++) {
+      if (!_mesa_format_has_color_component(format, i)) {
+         continue;
+      }
+
+      if (devinfo->gen < 9 &&
+          color->f[i] != 0.0f && color->f[i] != 1.0f) {
+         return false;
+      }
+   }
+   return true;
+}
  
-   rb->RefCount = 1;
-   rb->Format = mt->format;
-   rb->_BaseFormat = _mesa_get_format_base_format(mt->format);
+/**
+ * Convert the given color to a bitfield suitable for ORing into DWORD 7 of
+ * SURFACE_STATE (DWORD 12-15 on SKL+).
+ */
+union isl_color_value
+brw_meta_convert_fast_clear_color(const struct brw_context *brw,
+                                  const struct intel_mipmap_tree *mt,
+                                  const union gl_color_union *color)
+{
+   union isl_color_value override_color = {
+      .u32 = {
+         color->ui[0],
+         color->ui[1],
+         color->ui[2],
+         color->ui[3],
+      },
+   };
  
-   /* Program takes care of msaa and mip-level access manually for stencil.
-    * The surface is also treated as Y-tiled instead of as W-tiled calling for
-    * twice the width and half the height in dimensions.
+   /* The sampler doesn't look at the format of the surface when the fast
+    * clear color is used so we need to implement luminance, intensity and
+    * missing components manually.
      */
-   if (flat) {
-      const unsigned halign_stencil = 8;
+   switch (_mesa_get_format_base_format(mt->format)) {
+   case GL_INTENSITY:
+      override_color.u32[3] = override_color.u32[0];
+      /* flow through */
+   case GL_LUMINANCE:
+   case GL_LUMINANCE_ALPHA:
+      override_color.u32[1] = override_color.u32[0];
+      override_color.u32[2] = override_color.u32[0];
+      break;
+   default:
+      for (int i = 0; i < 3; i++) {
+         if (!_mesa_format_has_color_component(mt->format, i))
+            override_color.u32[i] = 0;
+      }
+      break;
+   }
  
-      rb->NumSamples = 0;
-      rb->Width = ALIGN(mt->total_width, halign_stencil) * 2;
-      rb->Height = (mt->total_height / mt->physical_depth0) / 2;
-      irb->mt_level = 0;
-   } else {
-      rb->NumSamples = mt->num_samples;
-      rb->Width = mt->logical_width0;
-      rb->Height = mt->logical_height0;
-      irb->mt_level = level;
+   switch (_mesa_get_format_datatype(mt->format)) {
+   case GL_UNSIGNED_NORMALIZED:
+      for (int i = 0; i < 4; i++)
+         override_color.f32[i] = CLAMP(override_color.f32[i], 0.0f, 1.0f);
+      break;
+
+   case GL_SIGNED_NORMALIZED:
+      for (int i = 0; i < 4; i++)
+         override_color.f32[i] = CLAMP(override_color.f32[i], -1.0f, 1.0f);
+      break;
+
+   case GL_UNSIGNED_INT:
+      for (int i = 0; i < 4; i++) {
+         unsigned bits = _mesa_get_format_bits(mt->format, GL_RED_BITS + i);
+         if (bits < 32) {
+            uint32_t max = (1u << bits) - 1;
+            override_color.u32[i] = MIN2(override_color.u32[i], max);
+         }
+      }
+      break;
+
+   case GL_INT:
+      for (int i = 0; i < 4; i++) {
+         unsigned bits = _mesa_get_format_bits(mt->format, GL_RED_BITS + i);
+         if (bits < 32) {
+            int32_t max = (1 << (bits - 1)) - 1;
+            int32_t min = -(1 << (bits - 1));
+            override_color.i32[i] = CLAMP(override_color.i32[i], min, max);
+         }
+      }
+      break;
+
+   case GL_FLOAT:
+      if (!_mesa_is_format_signed(mt->format)) {
+         for (int i = 0; i < 4; i++)
+            override_color.f32[i] = MAX2(override_color.f32[i], 0.0f);
+      }
+      break;
     }
  
-   irb->mt_layer = layer;
+   if (!_mesa_format_has_color_component(mt->format, 3)) {
+      if (_mesa_is_format_integer_color(mt->format))
+         override_color.u32[3] = 1;
+      else
+         override_color.f32[3] = 1.0f;
+   }
  
-   intel_miptree_reference(&irb->mt, mt);
+   /* Handle linear to SRGB conversion */
+   if (brw->ctx.Color.sRGBEnabled &&
+       _mesa_get_srgb_format_linear(mt->format) != mt->format) {
+      for (int i = 0; i < 3; i++) {
+         override_color.f32[i] =
+            util_format_linear_to_srgb_float(override_color.f32[i]);
+      }
+   }
  
-   return rb;
+   return override_color;
  }