i965: Move PSCDEPTH calculations from draw time to compile time.
authorKenneth Graunke <kenneth@whitecape.org>
Sun, 30 Nov 2014 09:14:17 +0000 (01:14 -0800)
committerKenneth Graunke <kenneth@whitecape.org>
Thu, 4 Dec 2014 23:04:35 +0000 (15:04 -0800)
The "Pixel Shader Computed Depth Mode" value is entirely based on the
shader program, so we can easily do it at compile time.  This avoids the
if+switch on every 3DSTATE_WM (Gen7)/3DSTATE_PS_EXTRA (Gen8+) upload,
and shares a bit more code.

This also simplifies the PMA stall code, making it match the formula
more closely, and drops a BRW_NEW_FRAGMENT_PROGRAM dependency.  (Note
that the previous comment was wrong - the code and the documentation
have != PSCDEPTH_OFF, not ==.)

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Matt Turner <mattst88@gmail.com>
src/mesa/drivers/dri/i965/brw_context.h
src/mesa/drivers/dri/i965/brw_defines.h
src/mesa/drivers/dri/i965/brw_wm.c
src/mesa/drivers/dri/i965/gen7_wm_state.c
src/mesa/drivers/dri/i965/gen8_depth_state.c
src/mesa/drivers/dri/i965/gen8_ps_state.c

index ec4b3dd88b113661b9566373c6576ff5dfff2c44..b4ddc17b3da37756ca96a0091686ceba93d700eb 100644 (file)
@@ -390,6 +390,8 @@ struct brw_wm_prog_data {
       /** @} */
    } binding_table;
 
+   uint8_t computed_depth_mode;
+
    bool no_8;
    bool dual_src_blend;
    bool uses_pos_offset;
index adcf1db6147afaebc8f482fde2cdb02ed0312d53..2acd0f838a3d3d79f0bc299d57c5fd13b5e114a2 100644 (file)
@@ -2051,16 +2051,20 @@ enum brw_message_target {
 # define GEN9_WM_DS_BF_STENCIL_REF_MASK                 INTEL_MASK(7, 0)
 # define GEN9_WM_DS_BF_STENCIL_REF_SHIFT                0
 
+enum brw_pixel_shader_computed_depth_mode {
+   BRW_PSCDEPTH_OFF   = 0, /* PS does not compute depth */
+   BRW_PSCDEPTH_ON    = 1, /* PS computes depth; no guarantee about value */
+   BRW_PSCDEPTH_ON_GE = 2, /* PS guarantees output depth >= source depth */
+   BRW_PSCDEPTH_ON_LE = 3, /* PS guarantees output depth <= source depth */
+};
+
 #define _3DSTATE_PS_EXTRA                       0x784F /* GEN8+ */
 /* DW1 */
 # define GEN8_PSX_PIXEL_SHADER_VALID                    (1 << 31)
 # define GEN8_PSX_PIXEL_SHADER_NO_RT_WRITE              (1 << 30)
 # define GEN8_PSX_OMASK_TO_RENDER_TARGET                (1 << 29)
 # define GEN8_PSX_KILL_ENABLE                           (1 << 28)
-# define GEN8_PSX_PSCDEPTH_OFF                          (0 << 26)
-# define GEN8_PSX_PSCDEPTH_ON                           (1 << 26)
-# define GEN8_PSX_PSCDEPTH_ON_GE                        (2 << 26)
-# define GEN8_PSX_PSCDEPTH_ON_LE                        (3 << 26)
+# define GEN8_PSX_COMPUTED_DEPTH_MODE_SHIFT             26
 # define GEN8_PSX_FORCE_COMPUTED_DEPTH                  (1 << 25)
 # define GEN8_PSX_USES_SOURCE_DEPTH                     (1 << 24)
 # define GEN8_PSX_USES_SOURCE_W                         (1 << 23)
@@ -2202,10 +2206,7 @@ enum brw_wm_barycentric_interp_mode {
 # define GEN7_WM_DEPTH_RESOLVE                         (1 << 28)
 # define GEN7_WM_HIERARCHICAL_DEPTH_RESOLVE            (1 << 27)
 # define GEN7_WM_KILL_ENABLE                           (1 << 25)
-# define GEN7_WM_PSCDEPTH_OFF                          (0 << 23)
-# define GEN7_WM_PSCDEPTH_ON                           (1 << 23)
-# define GEN7_WM_PSCDEPTH_ON_GE                                (2 << 23)
-# define GEN7_WM_PSCDEPTH_ON_LE                                (3 << 23)
+# define GEN7_WM_COMPUTED_DEPTH_MODE_SHIFT              23
 # define GEN7_WM_USES_SOURCE_DEPTH                     (1 << 20)
 # define GEN7_WM_USES_SOURCE_W                         (1 << 19)
 # define GEN7_WM_POSITION_ZW_PIXEL                     (0 << 17)
index fe36dd422850695d695c83546f6cc737def6010c..7badb231983f5cff1200c7bbfcf19e0710061f95 100644 (file)
@@ -116,6 +116,25 @@ brw_compute_barycentric_interp_modes(struct brw_context *brw,
    return barycentric_interp_modes;
 }
 
+static uint8_t
+computed_depth_mode(struct gl_fragment_program *fp)
+{
+   if (fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
+      switch (fp->FragDepthLayout) {
+      case FRAG_DEPTH_LAYOUT_NONE:
+      case FRAG_DEPTH_LAYOUT_ANY:
+         return BRW_PSCDEPTH_ON;
+      case FRAG_DEPTH_LAYOUT_GREATER:
+         return BRW_PSCDEPTH_ON_GE;
+      case FRAG_DEPTH_LAYOUT_LESS:
+         return BRW_PSCDEPTH_ON_LE;
+      case FRAG_DEPTH_LAYOUT_UNCHANGED:
+         return BRW_PSCDEPTH_OFF;
+      }
+   }
+   return BRW_PSCDEPTH_OFF;
+}
+
 bool
 brw_wm_prog_data_compare(const void *in_a, const void *in_b)
 {
@@ -161,6 +180,8 @@ bool do_wm_prog(struct brw_context *brw,
     */
    prog_data.uses_kill = fp->program.UsesKill || key->alpha_test_func;
 
+   prog_data.computed_depth_mode = computed_depth_mode(&fp->program);
+
    /* Allocate the references to the uniforms that will end up in the
     * prog_data associated with the compiled program, and which will be freed
     * by the state cache.
index 575eaf56de9acf9be8085fbb3762757dcba8e8fb..5a5c726e2bc26244e6b4f0909afcd1f772b9f95e 100644 (file)
@@ -41,7 +41,7 @@ upload_wm_state(struct brw_context *brw)
       brw_fragment_program_const(brw->fragment_program);
    /* BRW_NEW_FS_PROG_DATA */
    const struct brw_wm_prog_data *prog_data = brw->wm.prog_data;
-   bool writes_depth = false;
+   bool writes_depth = prog_data->computed_depth_mode != BRW_PSCDEPTH_OFF;
    uint32_t dw1, dw2;
 
    /* _NEW_BUFFERS */
@@ -62,24 +62,8 @@ upload_wm_state(struct brw_context *brw)
 
    if (fp->program.Base.InputsRead & VARYING_BIT_POS)
       dw1 |= GEN7_WM_USES_SOURCE_DEPTH | GEN7_WM_USES_SOURCE_W;
-   if (fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
-      writes_depth = fp->program.FragDepthLayout != FRAG_DEPTH_LAYOUT_UNCHANGED;
-
-      switch (fp->program.FragDepthLayout) {
-         case FRAG_DEPTH_LAYOUT_NONE:
-         case FRAG_DEPTH_LAYOUT_ANY:
-            dw1 |= GEN7_WM_PSCDEPTH_ON;
-            break;
-         case FRAG_DEPTH_LAYOUT_GREATER:
-            dw1 |= GEN7_WM_PSCDEPTH_ON_GE;
-            break;
-         case FRAG_DEPTH_LAYOUT_LESS:
-            dw1 |= GEN7_WM_PSCDEPTH_ON_LE;
-            break;
-         case FRAG_DEPTH_LAYOUT_UNCHANGED:
-            break;
-      }
-   }
+
+   dw1 |= prog_data->computed_depth_mode << GEN7_WM_COMPUTED_DEPTH_MODE_SHIFT;
    dw1 |= prog_data->barycentric_interp_modes <<
       GEN7_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT;
 
index fc0e3fda59d3e1cb6933dad56a21ea22934c088b..e428089dd2e00df01f2cd33fa06b20308fdd942b 100644 (file)
@@ -225,8 +225,6 @@ static bool
 pma_fix_enable(const struct brw_context *brw)
 {
    const struct gl_context *ctx = &brw->ctx;
-   /* BRW_NEW_FRAGMENT_PROGRAM */
-   const struct gl_fragment_program *fp = brw->fragment_program;
    /* _NEW_BUFFERS */
    struct intel_renderbuffer *depth_irb =
       intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH);
@@ -279,12 +277,11 @@ pma_fix_enable(const struct brw_context *brw)
     */
    const bool stencil_writes_enabled = ctx->Stencil._WriteEnabled;
 
-   /* BRW_NEW_FRAGMENT_PROGRAM:
-    * 3DSTATE_PS_EXTRA::Pixel Shader Computed Depth Mode == PSCDEPTH_OFF
+   /* BRW_NEW_FS_PROG_DATA:
+    * 3DSTATE_PS_EXTRA::Pixel Shader Computed Depth Mode != PSCDEPTH_OFF
     */
    const bool ps_computes_depth =
-      (fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) &&
-      fp->FragDepthLayout != FRAG_DEPTH_LAYOUT_UNCHANGED;
+      brw->wm.prog_data->computed_depth_mode != BRW_PSCDEPTH_OFF;
 
    /* BRW_NEW_FS_PROG_DATA:        3DSTATE_PS_EXTRA::PixelShaderKillsPixels
     * BRW_NEW_FS_PROG_DATA:        3DSTATE_PS_EXTRA::oMask Present to RenderTarget
@@ -372,8 +369,7 @@ const struct brw_tracked_state gen8_pma_fix = {
               _NEW_DEPTH |
               _NEW_MULTISAMPLE |
               _NEW_STENCIL,
-      .brw = BRW_NEW_FRAGMENT_PROGRAM |
-             BRW_NEW_FS_PROG_DATA,
+      .brw = BRW_NEW_FS_PROG_DATA,
    },
    .emit = gen8_emit_pma_stall_workaround
 };
index 4267bf269587197fe98ca51b01fb7bbe8637c699..3aa0ef3502dfe7f5504176b8d9db30081778fac0 100644 (file)
@@ -39,6 +39,7 @@ upload_ps_extra(struct brw_context *brw)
    uint32_t dw1 = 0;
 
    dw1 |= GEN8_PSX_PIXEL_SHADER_VALID;
+   dw1 |= prog_data->computed_depth_mode << GEN8_PSX_COMPUTED_DEPTH_MODE_SHIFT;
 
    if (prog_data->uses_kill)
       dw1 |= GEN8_PSX_KILL_ENABLE;
@@ -46,23 +47,6 @@ upload_ps_extra(struct brw_context *brw)
    if (prog_data->num_varying_inputs != 0)
       dw1 |= GEN8_PSX_ATTRIBUTE_ENABLE;
 
-   if (fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
-      switch (fp->program.FragDepthLayout) {
-         case FRAG_DEPTH_LAYOUT_NONE:
-         case FRAG_DEPTH_LAYOUT_ANY:
-            dw1 |= GEN8_PSX_PSCDEPTH_ON;
-            break;
-         case FRAG_DEPTH_LAYOUT_GREATER:
-            dw1 |= GEN8_PSX_PSCDEPTH_ON_GE;
-            break;
-         case FRAG_DEPTH_LAYOUT_LESS:
-            dw1 |= GEN8_PSX_PSCDEPTH_ON_LE;
-            break;
-         case FRAG_DEPTH_LAYOUT_UNCHANGED:
-            break;
-      }
-   }
-
    if (fp->program.Base.InputsRead & VARYING_BIT_POS)
       dw1 |= GEN8_PSX_USES_SOURCE_DEPTH | GEN8_PSX_USES_SOURCE_W;