i965: Update BLEND_STATE for Broadwell.
authorKenneth Graunke <kenneth@whitecape.org>
Thu, 6 Dec 2012 03:30:26 +0000 (19:30 -0800)
committerKenneth Graunke <kenneth@whitecape.org>
Sat, 1 Feb 2014 01:50:08 +0000 (17:50 -0800)
v2: Allow logic ops on all surface types.  The UNORM restriction was
    lifted with Haswell and I simply hadn't noticed.  Also, add missing
    BRW_NEW_STATE_BASE_ADDRESS dirty bit.  Both caught by Eric Anholt.

v3: Fix swapped per-RT DWord pairs.  Eliminates bizarre hacks.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
src/mesa/drivers/dri/i965/brw_defines.h
src/mesa/drivers/dri/i965/brw_state.h
src/mesa/drivers/dri/i965/brw_state_upload.c
src/mesa/drivers/dri/i965/gen8_blend_state.c

index 7242109dde4e48ce54d45be5c9d70e749004ecd7..52af08b46753d9c3df2b53f67e14b8f6f3f60f36 100644 (file)
@@ -1706,6 +1706,47 @@ enum brw_message_target {
 # define GEN8_RASTER_SCISSOR_ENABLE                     (1 << 1)
 # define GEN8_RASTER_VIEWPORT_Z_CLIP_TEST_ENABLE        (1 << 0)
 
+/* Gen8 BLEND_STATE */
+/* DW0 */
+#define GEN8_BLEND_ALPHA_TO_COVERAGE_ENABLE             (1 << 31)
+#define GEN8_BLEND_INDEPENDENT_ALPHA_BLEND_ENABLE       (1 << 30)
+#define GEN8_BLEND_ALPHA_TO_ONE_ENABLE                  (1 << 29)
+#define GEN8_BLEND_ALPHA_TO_COVERAGE_DITHER_ENABLE      (1 << 28)
+#define GEN8_BLEND_ALPHA_TEST_ENABLE                    (1 << 27)
+#define GEN8_BLEND_ALPHA_TEST_FUNCTION_MASK             INTEL_MASK(26, 24)
+#define GEN8_BLEND_ALPHA_TEST_FUNCTION_SHIFT            24
+#define GEN8_BLEND_COLOR_DITHER_ENABLE                  (1 << 23)
+#define GEN8_BLEND_X_DITHER_OFFSET_MASK                 INTEL_MASK(22, 21)
+#define GEN8_BLEND_X_DITHER_OFFSET_SHIFT                21
+#define GEN8_BLEND_Y_DITHER_OFFSET_MASK                 INTEL_MASK(20, 19)
+#define GEN8_BLEND_Y_DITHER_OFFSET_SHIFT                19
+/* DW1 + 2n */
+#define GEN8_BLEND_COLOR_BUFFER_BLEND_ENABLE            (1 << 31)
+#define GEN8_BLEND_SRC_BLEND_FACTOR_MASK                INTEL_MASK(30, 26)
+#define GEN8_BLEND_SRC_BLEND_FACTOR_SHIFT               26
+#define GEN8_BLEND_DST_BLEND_FACTOR_MASK                INTEL_MASK(25, 21)
+#define GEN8_BLEND_DST_BLEND_FACTOR_SHIFT               21
+#define GEN8_BLEND_COLOR_BLEND_FUNCTION_MASK            INTEL_MASK(20, 18)
+#define GEN8_BLEND_COLOR_BLEND_FUNCTION_SHIFT           18
+#define GEN8_BLEND_SRC_ALPHA_BLEND_FACTOR_MASK          INTEL_MASK(17, 13)
+#define GEN8_BLEND_SRC_ALPHA_BLEND_FACTOR_SHIFT         13
+#define GEN8_BLEND_DST_ALPHA_BLEND_FACTOR_MASK          INTEL_MASK(12, 8)
+#define GEN8_BLEND_DST_ALPHA_BLEND_FACTOR_SHIFT         8
+#define GEN8_BLEND_ALPHA_BLEND_FUNCTION_MASK            INTEL_MASK(7, 5)
+#define GEN8_BLEND_ALPHA_BLEND_FUNCTION_SHIFT           5
+#define GEN8_BLEND_WRITE_DISABLE_ALPHA                  (1 << 3)
+#define GEN8_BLEND_WRITE_DISABLE_RED                    (1 << 2)
+#define GEN8_BLEND_WRITE_DISABLE_GREEN                  (1 << 1)
+#define GEN8_BLEND_WRITE_DISABLE_BLUE                   (1 << 0)
+/* DW1 + 2n + 1 */
+#define GEN8_BLEND_LOGIC_OP_ENABLE                      (1 << 31)
+#define GEN8_BLEND_LOGIC_OP_FUNCTION_MASK               INTEL_MASK(30, 27)
+#define GEN8_BLEND_LOGIC_OP_FUNCTION_SHIFT              27
+#define GEN8_BLEND_PRE_BLEND_SRC_ONLY_CLAMP_ENABLE      (1 << 4)
+#define GEN8_BLEND_COLOR_CLAMP_RANGE_RTFORMAT           (2 << 2)
+#define GEN8_BLEND_PRE_BLEND_COLOR_CLAMP_ENABLE         (1 << 1)
+#define GEN8_BLEND_POST_BLEND_COLOR_CLAMP_ENABLE        (1 << 0)
+
 #define _3DSTATE_PS_BLEND                       0x784D /* GEN8+ */
 /* DW1 */
 # define GEN8_PS_BLEND_ALPHA_TO_COVERAGE_ENABLE         (1 << 31)
index 3b01640a56e331de037276aea3c0ce6861b46ea3..865448c516c5e7cc47a1809b81462b334b4c645d 100644 (file)
@@ -131,6 +131,7 @@ extern const struct brw_tracked_state gen7_urb;
 extern const struct brw_tracked_state gen7_vs_state;
 extern const struct brw_tracked_state gen7_wm_state;
 extern const struct brw_tracked_state haswell_cut_index;
+extern const struct brw_tracked_state gen8_blend_state;
 extern const struct brw_tracked_state gen8_disable_stages;
 extern const struct brw_tracked_state gen8_index_buffer;
 extern const struct brw_tracked_state gen8_ps_blend;
index fac14b7eaf068d64d728edab4360fd6c1264efd1..044a30a95101a8e7ce0414a0005f2869ef3b46db 100644 (file)
@@ -266,7 +266,7 @@ static const struct brw_tracked_state *gen8_atoms[] =
 
    &gen7_push_constant_space,
    &gen7_urb,
-   &gen6_blend_state,
+   &gen8_blend_state,
    &gen6_color_calc_state,
 
    &gen6_vs_push_constants, /* Before vs_state */
index 2efdfa7c82ca4cb2bee257858850b87d2246f00b..f487fc906ebecc6d1591cc0fdfd7fa0df28ab57a 100644 (file)
 #include "main/glformats.h"
 
 #define blend_factor(x) brw_translate_blend_factor(x)
+#define blend_eqn(x) brw_translate_blend_equation(x)
+
+static void
+gen8_upload_blend_state(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+
+   /* We need at least one BLEND_STATE written, because we might do
+    * thread dispatch even if _NumColorDrawBuffers is 0 (for example
+    * for computed depth or alpha test), which will do an FB write
+    * with render target 0, which will reference BLEND_STATE[0] for
+    * alpha test enable.
+    */
+   int nr_draw_buffers = ctx->DrawBuffer->_NumColorDrawBuffers;
+   if (nr_draw_buffers == 0 && ctx->Color.AlphaEnabled)
+      nr_draw_buffers = 1;
+
+   int size = 4 + 8 * nr_draw_buffers;
+   uint32_t *blend = brw_state_batch(brw, AUB_TRACE_BLEND_STATE,
+                                     size, 64, &brw->cc.blend_state_offset);
+   memset(blend, 0, size);
+
+   /* OpenGL specification 3.3 (page 196), section 4.1.3 says:
+    * "If drawbuffer zero is not NONE and the buffer it references has an
+    * integer format, the SAMPLE_ALPHA_TO_COVERAGE and SAMPLE_ALPHA_TO_ONE
+    * operations are skipped."
+    */
+   struct gl_renderbuffer *rb0 = ctx->DrawBuffer->_ColorDrawBuffers[0];
+   GLenum rb_zero_type =
+      rb0 ? _mesa_get_format_datatype(rb0->Format) : GL_UNSIGNED_NORMALIZED;
+
+   if (rb_zero_type != GL_INT && rb_zero_type != GL_UNSIGNED_INT) {
+      /* _NEW_MULTISAMPLE */
+      if (ctx->Multisample._Enabled) {
+         if (ctx->Multisample.SampleAlphaToCoverage) {
+            blend[0] |= GEN8_BLEND_ALPHA_TO_COVERAGE_ENABLE;
+            blend[0] |= GEN8_BLEND_ALPHA_TO_COVERAGE_DITHER_ENABLE;
+         }
+         if (ctx->Multisample.SampleAlphaToOne)
+            blend[0] |= GEN8_BLEND_ALPHA_TO_ONE_ENABLE;
+      }
+
+      /* _NEW_COLOR */
+      if (ctx->Color.AlphaEnabled) {
+         blend[0] |=
+            GEN8_BLEND_ALPHA_TEST_ENABLE |
+            SET_FIELD(intel_translate_compare_func(ctx->Color.AlphaFunc),
+                      GEN8_BLEND_ALPHA_TEST_FUNCTION);
+      }
+
+      if (ctx->Color.DitherFlag) {
+         blend[0] |= GEN8_BLEND_COLOR_DITHER_ENABLE;
+      }
+   }
+
+   for (int i = 0; i < nr_draw_buffers; i++) {
+      /* _NEW_BUFFERS */
+      struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i];
+      GLenum rb_type =
+         rb ? _mesa_get_format_datatype(rb->Format) : GL_UNSIGNED_NORMALIZED;
+
+      /* Used for implementing the following bit of GL_EXT_texture_integer:
+       * "Per-fragment operations that require floating-point color
+       *  components, including multisample alpha operations, alpha test,
+       *  blending, and dithering, have no effect when the corresponding
+       *  colors are written to an integer color buffer."
+      */
+      bool integer = rb_type == GL_INT || rb_type == GL_UNSIGNED_INT;
+
+      /* _NEW_COLOR */
+      if (ctx->Color.ColorLogicOpEnabled) {
+         blend[1 + 2*i+1] |=
+            GEN8_BLEND_LOGIC_OP_ENABLE |
+            SET_FIELD(intel_translate_logic_op(ctx->Color.LogicOp),
+                      GEN8_BLEND_LOGIC_OP_FUNCTION);
+      } else if (ctx->Color.BlendEnabled & (1 << i) && !integer) {
+         GLenum eqRGB = ctx->Color.Blend[i].EquationRGB;
+         GLenum eqA = ctx->Color.Blend[i].EquationA;
+         GLenum srcRGB = ctx->Color.Blend[i].SrcRGB;
+         GLenum dstRGB = ctx->Color.Blend[i].DstRGB;
+         GLenum srcA = ctx->Color.Blend[i].SrcA;
+         GLenum dstA = ctx->Color.Blend[i].DstA;
+
+         if (eqRGB == GL_MIN || eqRGB == GL_MAX)
+            srcRGB = dstRGB = GL_ONE;
+
+         if (eqA == GL_MIN || eqA == GL_MAX)
+            srcA = dstA = GL_ONE;
+
+         /* Due to hardware limitations, the destination may have information
+          * in an alpha channel even when the format specifies no alpha
+          * channel. In order to avoid getting any incorrect blending due to
+          * that alpha channel, coerce the blend factors to values that will
+          * not read the alpha channel, but will instead use the correct
+          * implicit value for alpha.
+          */
+         if (rb && !_mesa_base_format_has_channel(rb->_BaseFormat, GL_TEXTURE_ALPHA_TYPE)) {
+            srcRGB = brw_fix_xRGB_alpha(srcRGB);
+            srcA = brw_fix_xRGB_alpha(srcA);
+            dstRGB = brw_fix_xRGB_alpha(dstRGB);
+            dstA = brw_fix_xRGB_alpha(dstA);
+         }
+
+         blend[1 + 2*i] |=
+            GEN8_BLEND_COLOR_BUFFER_BLEND_ENABLE |
+            SET_FIELD(blend_factor(dstRGB), GEN8_BLEND_DST_BLEND_FACTOR) |
+            SET_FIELD(blend_factor(srcRGB), GEN8_BLEND_SRC_BLEND_FACTOR) |
+            SET_FIELD(blend_factor(dstA), GEN8_BLEND_DST_ALPHA_BLEND_FACTOR) |
+            SET_FIELD(blend_factor(srcA), GEN8_BLEND_SRC_ALPHA_BLEND_FACTOR) |
+            SET_FIELD(blend_eqn(eqRGB), GEN8_BLEND_COLOR_BLEND_FUNCTION) |
+            SET_FIELD(blend_eqn(eqA), GEN8_BLEND_ALPHA_BLEND_FUNCTION);
+
+         if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB)
+            blend[0] |= GEN8_BLEND_INDEPENDENT_ALPHA_BLEND_ENABLE;
+      }
+
+      /* See section 8.1.6 "Pre-Blend Color Clamping" of the
+       * SandyBridge PRM Volume 2 Part 1 for HW requirements.
+       *
+       * We do our ARB_color_buffer_float CLAMP_FRAGMENT_COLOR
+       * clamping in the fragment shader.  For its clamping of
+       * blending, the spec says:
+       *
+       *     "RESOLVED: For fixed-point color buffers, the inputs and
+       *      the result of the blending equation are clamped.  For
+       *      floating-point color buffers, no clamping occurs."
+       *
+       * So, generally, we want clamping to the render target's range.
+       * And, good news, the hardware tables for both pre- and
+       * post-blend color clamping are either ignored, or any are
+       * allowed, or clamping is required but RT range clamping is a
+       * valid option.
+       */
+      blend[1 + 2*i+1] |=
+         GEN8_BLEND_PRE_BLEND_COLOR_CLAMP_ENABLE |
+         GEN8_BLEND_POST_BLEND_COLOR_CLAMP_ENABLE |
+         GEN8_BLEND_COLOR_CLAMP_RANGE_RTFORMAT;
+
+      if (!ctx->Color.ColorMask[i][0])
+         blend[1 + 2*i] |= GEN8_BLEND_WRITE_DISABLE_RED;
+      if (!ctx->Color.ColorMask[i][1])
+         blend[1 + 2*i] |= GEN8_BLEND_WRITE_DISABLE_GREEN;
+      if (!ctx->Color.ColorMask[i][2])
+         blend[1 + 2*i] |= GEN8_BLEND_WRITE_DISABLE_BLUE;
+      if (!ctx->Color.ColorMask[i][3])
+         blend[1 + 2*i] |= GEN8_BLEND_WRITE_DISABLE_ALPHA;
+
+     /* From the BLEND_STATE docs, DWord 0, Bit 29 (AlphaToOne Enable):
+      * "If Dual Source Blending is enabled, this bit must be disabled."
+      */
+      WARN_ONCE(ctx->Color.Blend[i]._UsesDualSrc &&
+                ctx->Multisample._Enabled &&
+                ctx->Multisample.SampleAlphaToOne,
+                "HW workaround: disabling alpha to one with dual src "
+                "blending\n");
+      if (ctx->Color.Blend[i]._UsesDualSrc)
+         blend[0] &= ~GEN8_BLEND_ALPHA_TO_ONE_ENABLE;
+   }
+
+   BEGIN_BATCH(2);
+   OUT_BATCH(_3DSTATE_BLEND_STATE_POINTERS << 16 | (2 - 2));
+   OUT_BATCH(brw->cc.blend_state_offset | 1);
+   ADVANCE_BATCH();
+}
+
+const struct brw_tracked_state gen8_blend_state = {
+   .dirty = {
+      .mesa = _NEW_COLOR | _NEW_BUFFERS | _NEW_MULTISAMPLE,
+      .brw = BRW_NEW_BATCH | BRW_NEW_STATE_BASE_ADDRESS,
+      .cache = 0,
+   },
+   .emit = gen8_upload_blend_state,
+};
 
 static void
 gen8_upload_ps_blend(struct brw_context *brw)