ilo: rework pipeline workarounds
authorChia-I Wu <olvaffe@gmail.com>
Mon, 22 Sep 2014 15:59:53 +0000 (23:59 +0800)
committerChia-I Wu <olvaffe@gmail.com>
Tue, 23 Sep 2014 02:08:05 +0000 (10:08 +0800)
Add current_pipe_control_dw1 and deferred_pipe_control_dw1 to track what have
been done since lsat 3DPRIMITIVE and what need to be done before next
3DPRIMITIVE.  Based on them, we can emit WAs more smartly.

Signed-off-by: Chia-I Wu <olvaffe@gmail.com>
src/gallium/drivers/ilo/ilo_3d_pipeline.h
src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.c
src/gallium/drivers/ilo/ilo_3d_pipeline_gen7.c

index 5556edb4c6992e43f5dfbebfc7b952ac307d39d9..e85bb8aee28ff33d21387fff5b4784d185415fda 100644 (file)
@@ -87,7 +87,19 @@ struct ilo_3d_pipeline {
     * HW states.
     */
    struct ilo_3d_pipeline_state {
-      bool has_gen6_wa_pipe_control;
+      /*
+       * When a WA is needed before some command, we always emit the WA right
+       * before the command.  Knowing what have already been done since last
+       * 3DPRIMITIVE allows us to skip some WAs.
+       */
+      uint32_t current_pipe_control_dw1;
+
+      /*
+       * When a WA is needed after some command, we may have the WA follow the
+       * command immediately or defer it.  If this is non-zero, a PIPE_CONTROL
+       * will be emitted before 3DPRIMITIVE.
+       */
+      uint32_t deferred_pipe_control_dw1;
 
       bool primitive_restart;
       int reduced_prim;
@@ -144,7 +156,9 @@ static inline void
 ilo_3d_pipeline_invalidate(struct ilo_3d_pipeline *p, uint32_t flags)
 {
    p->invalidate_flags |= flags;
-   p->state.has_gen6_wa_pipe_control = false;
+
+   /* Kernel flushes everything.  Shouldn't we set all bits here? */
+   p->state.current_pipe_control_dw1 = 0;
 }
 
 /**
index 7ae6cc124e86f757df942596f026a69278426fa1..e5cd937ee94bd8b58cedf3419a31ab3a2e0e1c0d 100644 (file)
 #include "ilo_3d_pipeline_gen6.h"
 
 /**
- * This should be called before any depth stall flush (including those
- * produced by non-pipelined state commands) or cache flush on GEN6.
- *
- * \see intel_emit_post_sync_nonzero_flush()
+ * A wrapper for gen6_PIPE_CONTROL().
  */
-static void
-gen6_wa_pipe_control_post_sync(struct ilo_3d_pipeline *p,
-                               bool caller_post_sync)
+static inline void
+gen6_pipe_control(struct ilo_3d_pipeline *p, uint32_t dw1)
 {
-   assert(ilo_dev_gen(p->dev) == ILO_GEN(6));
+   struct intel_bo *bo = (dw1 & GEN6_PIPE_CONTROL_WRITE__MASK) ?
+      p->workaround_bo : NULL;
 
-   /* emit once */
-   if (p->state.has_gen6_wa_pipe_control)
-      return;
+   ILO_DEV_ASSERT(p->dev, 6, 6);
+
+   gen6_PIPE_CONTROL(p->builder, dw1, bo, 0, false);
+
+   p->state.current_pipe_control_dw1 |= dw1;
 
-   p->state.has_gen6_wa_pipe_control = true;
+   assert(!p->state.deferred_pipe_control_dw1);
+}
 
+/**
+ * This should be called before PIPE_CONTROL.
+ */
+static void
+gen6_wa_pre_pipe_control(struct ilo_3d_pipeline *p, uint32_t dw1)
+{
    /*
     * From the Sandy Bridge PRM, volume 2 part 1, page 60:
     *
     *     "Pipe-control with CS-stall bit set must be sent BEFORE the
     *      pipe-control with a post-sync op and no write-cache flushes."
     *
-    * The workaround below necessitates this workaround.
-    */
-   gen6_PIPE_CONTROL(p->builder,
-         GEN6_PIPE_CONTROL_CS_STALL |
-         GEN6_PIPE_CONTROL_PIXEL_SCOREBOARD_STALL,
-         NULL, 0, false);
-
-   /* the caller will emit the post-sync op */
-   if (caller_post_sync)
-      return;
-
-   /*
-    * From the Sandy Bridge PRM, volume 2 part 1, page 60:
+    * This WA may also be triggered indirectly by the other two WAs on the
+    * same page:
     *
     *     "Before any depth stall flush (including those produced by
     *      non-pipelined state commands), software needs to first send a
@@ -84,66 +79,78 @@ gen6_wa_pipe_control_post_sync(struct ilo_3d_pipeline *p,
     *     "Before a PIPE_CONTROL with Write Cache Flush Enable =1, a
     *      PIPE_CONTROL with any non-zero post-sync-op is required."
     */
-   gen6_PIPE_CONTROL(p->builder,
-         GEN6_PIPE_CONTROL_WRITE_IMM,
-         p->workaround_bo, 0, false);
-}
+   const bool direct_wa_cond = (dw1 & GEN6_PIPE_CONTROL_WRITE__MASK) &&
+                               !(dw1 & GEN6_PIPE_CONTROL_RENDER_CACHE_FLUSH);
+   const bool indirect_wa_cond = (dw1 & GEN6_PIPE_CONTROL_DEPTH_STALL) |
+                                 (dw1 & GEN6_PIPE_CONTROL_RENDER_CACHE_FLUSH);
 
-static void
-gen6_wa_pipe_control_wm_multisample_flush(struct ilo_3d_pipeline *p)
-{
-   assert(ilo_dev_gen(p->dev) == ILO_GEN(6));
+   ILO_DEV_ASSERT(p->dev, 6, 6);
+
+   if (!direct_wa_cond && !indirect_wa_cond)
+      return;
 
-   gen6_wa_pipe_control_post_sync(p, false);
+   if (!(p->state.current_pipe_control_dw1 & GEN6_PIPE_CONTROL_CS_STALL)) {
+      /*
+       * From the Sandy Bridge PRM, volume 2 part 1, page 73:
+       *
+       *     "1 of the following must also be set (when CS stall is set):
+       *
+       *       - Depth Cache Flush Enable ([0] of DW1)
+       *       - Stall at Pixel Scoreboard ([1] of DW1)
+       *       - Depth Stall ([13] of DW1)
+       *       - Post-Sync Operation ([13] of DW1)
+       *       - Render Target Cache Flush Enable ([12] of DW1)
+       *       - Notify Enable ([8] of DW1)"
+       *
+       * Because of the WAs above, we have to pick Stall at Pixel Scoreboard.
+       */
+      const uint32_t direct_wa = GEN6_PIPE_CONTROL_CS_STALL |
+                                 GEN6_PIPE_CONTROL_PIXEL_SCOREBOARD_STALL;
 
-   /*
-    * From the Sandy Bridge PRM, volume 2 part 1, page 305:
-    *
-    *     "Driver must guarentee that all the caches in the depth pipe are
-    *      flushed before this command (3DSTATE_MULTISAMPLE) is parsed. This
-    *      requires driver to send a PIPE_CONTROL with a CS stall along with a
-    *      Depth Flush prior to this command."
-    */
-   gen6_PIPE_CONTROL(p->builder,
-         GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH |
-         GEN6_PIPE_CONTROL_CS_STALL,
-         0, 0, false);
+      gen6_pipe_control(p, direct_wa);
+   }
+
+   if (indirect_wa_cond &&
+       !(p->state.current_pipe_control_dw1 & GEN6_PIPE_CONTROL_WRITE__MASK)) {
+      const uint32_t indirect_wa = GEN6_PIPE_CONTROL_WRITE_IMM;
+
+      gen6_pipe_control(p, indirect_wa);
+   }
 }
 
+/**
+ * This should be called before any non-pipelined state command.
+ */
 static void
-gen6_wa_pipe_control_wm_depth_flush(struct ilo_3d_pipeline *p)
+gen6_wa_pre_non_pipelined(struct ilo_3d_pipeline *p)
 {
-   assert(ilo_dev_gen(p->dev) == ILO_GEN(6));
+   ILO_DEV_ASSERT(p->dev, 6, 6);
 
-   gen6_wa_pipe_control_post_sync(p, false);
+   /* non-pipelined state commands produce depth stall */
+   gen6_wa_pre_pipe_control(p, GEN6_PIPE_CONTROL_DEPTH_STALL);
+}
 
+static void
+gen6_wa_post_3dstate_constant_vs(struct ilo_3d_pipeline *p)
+{
    /*
-    * According to intel_emit_depth_stall_flushes() of classic i965, we need
-    * to emit a sequence of PIPE_CONTROLs prior to emitting depth related
-    * commands.
+    * According to upload_vs_state() of the classic driver, we need to emit a
+    * PIPE_CONTROL after 3DSTATE_CONSTANT_VS, otherwise the command is kept
+    * being buffered by VS FF, to the point that the FF dies.
     */
-   gen6_PIPE_CONTROL(p->builder,
-         GEN6_PIPE_CONTROL_DEPTH_STALL,
-         NULL, 0, false);
+   const uint32_t dw1 = GEN6_PIPE_CONTROL_DEPTH_STALL |
+                        GEN6_PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE |
+                        GEN6_PIPE_CONTROL_STATE_CACHE_INVALIDATE;
 
-   gen6_PIPE_CONTROL(p->builder,
-         GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH,
-         NULL, 0, false);
+   gen6_wa_pre_pipe_control(p, dw1);
 
-   gen6_PIPE_CONTROL(p->builder,
-         GEN6_PIPE_CONTROL_DEPTH_STALL,
-         NULL, 0, false);
+   if ((p->state.current_pipe_control_dw1 & dw1) != dw1)
+      gen6_pipe_control(p, dw1);
 }
 
 static void
-gen6_wa_pipe_control_wm_max_threads_stall(struct ilo_3d_pipeline *p)
+gen6_wa_pre_3dstate_wm_max_threads(struct ilo_3d_pipeline *p)
 {
-   assert(ilo_dev_gen(p->dev) == ILO_GEN(6));
-
-   /* the post-sync workaround should cover this already */
-   if (p->state.has_gen6_wa_pipe_control)
-      return;
-
    /*
     * From the Sandy Bridge PRM, volume 2 part 1, page 274:
     *
@@ -151,29 +158,64 @@ gen6_wa_pipe_control_wm_max_threads_stall(struct ilo_3d_pipeline *p)
     *      field set (DW1 Bit 1), must be issued prior to any change to the
     *      value in this field (Maximum Number of Threads in 3DSTATE_WM)"
     */
-   gen6_PIPE_CONTROL(p->builder,
-         GEN6_PIPE_CONTROL_PIXEL_SCOREBOARD_STALL,
-         NULL, 0, false);
+   const uint32_t dw1 = GEN6_PIPE_CONTROL_PIXEL_SCOREBOARD_STALL;
+
+   ILO_DEV_ASSERT(p->dev, 6, 6);
+
+   gen6_wa_pre_pipe_control(p, dw1);
 
+   if ((p->state.current_pipe_control_dw1 & dw1) != dw1)
+      gen6_pipe_control(p, dw1);
 }
 
 static void
-gen6_wa_pipe_control_vs_const_flush(struct ilo_3d_pipeline *p)
+gen6_wa_pre_3dstate_multisample(struct ilo_3d_pipeline *p)
 {
-   assert(ilo_dev_gen(p->dev) == ILO_GEN(6));
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 305:
+    *
+    *     "Driver must guarentee that all the caches in the depth pipe are
+    *      flushed before this command (3DSTATE_MULTISAMPLE) is parsed. This
+    *      requires driver to send a PIPE_CONTROL with a CS stall along with a
+    *      Depth Flush prior to this command."
+    */
+   const uint32_t dw1 = GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+                        GEN6_PIPE_CONTROL_CS_STALL;
+
+   ILO_DEV_ASSERT(p->dev, 6, 6);
+
+   gen6_wa_pre_pipe_control(p, dw1);
+
+   if ((p->state.current_pipe_control_dw1 & dw1) != dw1)
+      gen6_pipe_control(p, dw1);
+}
 
-   gen6_wa_pipe_control_post_sync(p, false);
+static void
+gen6_wa_pre_depth(struct ilo_3d_pipeline *p)
+{
+   ILO_DEV_ASSERT(p->dev, 6, 6);
 
    /*
-    * According to upload_vs_state() of classic i965, we need to emit
-    * PIPE_CONTROL after 3DSTATE_CONSTANT_VS so that the command is kept being
-    * buffered by VS FF, to the point that the FF dies.
+    * From the Ivy Bridge PRM, volume 2 part 1, page 315:
+    *
+    *     "Restriction: Prior to changing Depth/Stencil Buffer state (i.e.,
+    *      any combination of 3DSTATE_DEPTH_BUFFER, 3DSTATE_CLEAR_PARAMS,
+    *      3DSTATE_STENCIL_BUFFER, 3DSTATE_HIER_DEPTH_BUFFER) SW must first
+    *      issue a pipelined depth stall (PIPE_CONTROL with Depth Stall bit
+    *      set), followed by a pipelined depth cache flush (PIPE_CONTROL with
+    *      Depth Flush Bit set, followed by another pipelined depth stall
+    *      (PIPE_CONTROL with Depth Stall Bit set), unless SW can otherwise
+    *      guarantee that the pipeline from WM onwards is already flushed
+    *      (e.g., via a preceding MI_FLUSH)."
+    *
+    * According to the classic driver, it also applies for GEN6.
     */
-   gen6_PIPE_CONTROL(p->builder,
-         GEN6_PIPE_CONTROL_DEPTH_STALL |
-         GEN6_PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE |
-         GEN6_PIPE_CONTROL_STATE_CACHE_INVALIDATE,
-         NULL, 0, false);
+   gen6_wa_pre_pipe_control(p, GEN6_PIPE_CONTROL_DEPTH_STALL |
+                               GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH);
+
+   gen6_pipe_control(p, GEN6_PIPE_CONTROL_DEPTH_STALL);
+   gen6_pipe_control(p, GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH);
+   gen6_pipe_control(p, GEN6_PIPE_CONTROL_DEPTH_STALL);
 }
 
 #define DIRTY(state) (session->pipe_dirty & ILO_DIRTY_ ## state)
@@ -186,7 +228,7 @@ gen6_pipeline_common_select(struct ilo_3d_pipeline *p,
    /* PIPELINE_SELECT */
    if (session->hw_ctx_changed) {
       if (ilo_dev_gen(p->dev) == ILO_GEN(6))
-         gen6_wa_pipe_control_post_sync(p, false);
+         gen6_wa_pre_non_pipelined(p);
 
       gen6_PIPELINE_SELECT(p->builder, 0x0);
    }
@@ -200,7 +242,7 @@ gen6_pipeline_common_sip(struct ilo_3d_pipeline *p,
    /* STATE_SIP */
    if (session->hw_ctx_changed) {
       if (ilo_dev_gen(p->dev) == ILO_GEN(6))
-         gen6_wa_pipe_control_post_sync(p, false);
+         gen6_wa_pre_non_pipelined(p);
 
       gen6_STATE_SIP(p->builder, 0);
    }
@@ -215,7 +257,7 @@ gen6_pipeline_common_base_address(struct ilo_3d_pipeline *p,
    if (session->state_bo_changed || session->kernel_bo_changed ||
        session->batch_bo_changed) {
       if (ilo_dev_gen(p->dev) == ILO_GEN(6))
-         gen6_wa_pipe_control_post_sync(p, false);
+         gen6_wa_pre_non_pipelined(p);
 
       gen6_state_base_address(p->builder, session->hw_ctx_changed);
 
@@ -468,7 +510,9 @@ gen6_pipeline_vf_draw(struct ilo_3d_pipeline *p,
 {
    /* 3DPRIMITIVE */
    gen6_3DPRIMITIVE(p->builder, vec->draw, &vec->ib);
-   p->state.has_gen6_wa_pipe_control = false;
+
+   p->state.current_pipe_control_dw1 = 0;
+   assert(!p->state.deferred_pipe_control_dw1);
 }
 
 void
@@ -485,7 +529,7 @@ gen6_pipeline_vs(struct ilo_3d_pipeline *p,
     * cannot find
     */
    if (emit_3dstate_vs && ilo_dev_gen(p->dev) == ILO_GEN(6))
-      gen6_wa_pipe_control_post_sync(p, false);
+      gen6_wa_pre_non_pipelined(p);
 
    /* 3DSTATE_CONSTANT_VS */
    if (emit_3dstate_constant_vs) {
@@ -503,7 +547,7 @@ gen6_pipeline_vs(struct ilo_3d_pipeline *p,
    }
 
    if (emit_3dstate_constant_vs && ilo_dev_gen(p->dev) == ILO_GEN(6))
-      gen6_wa_pipe_control_vs_const_flush(p);
+      gen6_wa_post_3dstate_constant_vs(p);
 }
 
 static void
@@ -578,7 +622,7 @@ gen6_pipeline_gs_svbi(struct ilo_3d_pipeline *p,
    /* 3DSTATE_GS_SVB_INDEX */
    if (emit) {
       if (ilo_dev_gen(p->dev) == ILO_GEN(6))
-         gen6_wa_pipe_control_post_sync(p, false);
+         gen6_wa_pre_non_pipelined(p);
 
       gen6_3DSTATE_GS_SVB_INDEX(p->builder,
             0, 0, p->state.so_max_vertices,
@@ -651,7 +695,7 @@ gen6_pipeline_sf_rect(struct ilo_3d_pipeline *p,
    /* 3DSTATE_DRAWING_RECTANGLE */
    if (DIRTY(FB)) {
       if (ilo_dev_gen(p->dev) == ILO_GEN(6))
-         gen6_wa_pipe_control_post_sync(p, false);
+         gen6_wa_pre_non_pipelined(p);
 
       gen6_3DSTATE_DRAWING_RECTANGLE(p->builder, 0, 0,
             vec->fb.state.width, vec->fb.state.height);
@@ -680,7 +724,7 @@ gen6_pipeline_wm(struct ilo_3d_pipeline *p,
                                 vec->blend->alpha_to_coverage);
 
       if (ilo_dev_gen(p->dev) == ILO_GEN(6) && session->hw_ctx_changed)
-         gen6_wa_pipe_control_wm_max_threads_stall(p);
+         gen6_wa_pre_3dstate_wm_max_threads(p);
 
       gen6_3DSTATE_WM(p->builder, vec->fs, num_samplers,
             vec->rasterizer, dual_blend, cc_may_kill, 0);
@@ -700,8 +744,8 @@ gen6_pipeline_wm_multisample(struct ilo_3d_pipeline *p,
          &p->packed_sample_position_4x : &p->packed_sample_position_1x;
 
       if (ilo_dev_gen(p->dev) == ILO_GEN(6)) {
-         gen6_wa_pipe_control_post_sync(p, false);
-         gen6_wa_pipe_control_wm_multisample_flush(p);
+         gen6_wa_pre_non_pipelined(p);
+         gen6_wa_pre_3dstate_multisample(p);
       }
 
       gen6_3DSTATE_MULTISAMPLE(p->builder,
@@ -741,8 +785,8 @@ gen6_pipeline_wm_depth(struct ilo_3d_pipeline *p,
       }
 
       if (ilo_dev_gen(p->dev) == ILO_GEN(6)) {
-         gen6_wa_pipe_control_post_sync(p, false);
-         gen6_wa_pipe_control_wm_depth_flush(p);
+         gen6_wa_pre_non_pipelined(p);
+         gen6_wa_pre_depth(p);
       }
 
       gen6_3DSTATE_DEPTH_BUFFER(p->builder, zs);
@@ -761,7 +805,7 @@ gen6_pipeline_wm_raster(struct ilo_3d_pipeline *p,
    if ((DIRTY(RASTERIZER) || DIRTY(POLY_STIPPLE)) &&
        vec->rasterizer->state.poly_stipple_enable) {
       if (ilo_dev_gen(p->dev) == ILO_GEN(6))
-         gen6_wa_pipe_control_post_sync(p, false);
+         gen6_wa_pre_non_pipelined(p);
 
       gen6_3DSTATE_POLY_STIPPLE_PATTERN(p->builder,
             &vec->poly_stipple);
@@ -772,7 +816,7 @@ gen6_pipeline_wm_raster(struct ilo_3d_pipeline *p,
    /* 3DSTATE_LINE_STIPPLE */
    if (DIRTY(RASTERIZER) && vec->rasterizer->state.line_stipple_enable) {
       if (ilo_dev_gen(p->dev) == ILO_GEN(6))
-         gen6_wa_pipe_control_post_sync(p, false);
+         gen6_wa_pre_non_pipelined(p);
 
       gen6_3DSTATE_LINE_STIPPLE(p->builder,
             vec->rasterizer->state.line_stipple_pattern,
@@ -782,7 +826,7 @@ gen6_pipeline_wm_raster(struct ilo_3d_pipeline *p,
    /* 3DSTATE_AA_LINE_PARAMETERS */
    if (DIRTY(RASTERIZER) && vec->rasterizer->state.line_smooth) {
       if (ilo_dev_gen(p->dev) == ILO_GEN(6))
-         gen6_wa_pipe_control_post_sync(p, false);
+         gen6_wa_pre_non_pipelined(p);
 
       gen6_3DSTATE_AA_LINE_PARAMETERS(p->builder);
    }
@@ -1437,18 +1481,22 @@ ilo_3d_pipeline_emit_draw_gen6(struct ilo_3d_pipeline *p,
 void
 ilo_3d_pipeline_emit_flush_gen6(struct ilo_3d_pipeline *p)
 {
+   const uint32_t dw1 = GEN6_PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE |
+                        GEN6_PIPE_CONTROL_RENDER_CACHE_FLUSH |
+                        GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+                        GEN6_PIPE_CONTROL_VF_CACHE_INVALIDATE |
+                        GEN6_PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
+                        GEN6_PIPE_CONTROL_CS_STALL;
+
+   ILO_DEV_ASSERT(p->dev, 6, 7.5);
+
    if (ilo_dev_gen(p->dev) == ILO_GEN(6))
-      gen6_wa_pipe_control_post_sync(p, false);
-
-   gen6_PIPE_CONTROL(p->builder,
-         GEN6_PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE |
-         GEN6_PIPE_CONTROL_RENDER_CACHE_FLUSH |
-         GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH |
-         GEN6_PIPE_CONTROL_VF_CACHE_INVALIDATE |
-         GEN6_PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
-         GEN6_PIPE_CONTROL_WRITE_NONE |
-         GEN6_PIPE_CONTROL_CS_STALL,
-         0, 0, false);
+      gen6_wa_pre_pipe_control(p, dw1);
+
+   gen6_PIPE_CONTROL(p->builder, dw1, NULL, 0, false);
+
+   p->state.current_pipe_control_dw1 |= dw1;
+   p->state.deferred_pipe_control_dw1 &= ~dw1;
 }
 
 void
@@ -1478,27 +1526,18 @@ ilo_3d_pipeline_emit_query_gen6(struct ilo_3d_pipeline *p,
       GEN6_REG_SO_NUM_PRIMS_WRITTEN;
    const uint32_t *regs;
    int reg_count = 0, i;
+   uint32_t pipe_control_dw1 = 0;
 
    ILO_DEV_ASSERT(p->dev, 6, 7.5);
 
    switch (q->type) {
    case PIPE_QUERY_OCCLUSION_COUNTER:
-      if (ilo_dev_gen(p->dev) == ILO_GEN(6))
-         gen6_wa_pipe_control_post_sync(p, false);
-
-      gen6_PIPE_CONTROL(p->builder,
-            GEN6_PIPE_CONTROL_DEPTH_STALL |
-            GEN6_PIPE_CONTROL_WRITE_PS_DEPTH_COUNT,
-            q->bo, offset, true);
+      pipe_control_dw1 = GEN6_PIPE_CONTROL_DEPTH_STALL |
+                         GEN6_PIPE_CONTROL_WRITE_PS_DEPTH_COUNT;
       break;
    case PIPE_QUERY_TIMESTAMP:
    case PIPE_QUERY_TIME_ELAPSED:
-      if (ilo_dev_gen(p->dev) == ILO_GEN(6))
-         gen6_wa_pipe_control_post_sync(p, true);
-
-      gen6_PIPE_CONTROL(p->builder,
-            GEN6_PIPE_CONTROL_WRITE_TIMESTAMP,
-            q->bo, offset, true);
+      pipe_control_dw1 = GEN6_PIPE_CONTROL_WRITE_TIMESTAMP;
       break;
    case PIPE_QUERY_PRIMITIVES_GENERATED:
       regs = &primitives_generated_reg;
@@ -1516,6 +1555,16 @@ ilo_3d_pipeline_emit_query_gen6(struct ilo_3d_pipeline *p,
       break;
    }
 
+   if (pipe_control_dw1) {
+      if (ilo_dev_gen(p->dev) == ILO_GEN(6))
+         gen6_wa_pre_pipe_control(p, pipe_control_dw1);
+
+      gen6_PIPE_CONTROL(p->builder, pipe_control_dw1, q->bo, offset, true);
+
+      p->state.current_pipe_control_dw1 |= pipe_control_dw1;
+      p->state.deferred_pipe_control_dw1 &= ~pipe_control_dw1;
+   }
+
    if (!reg_count)
       return;
 
@@ -1544,7 +1593,7 @@ gen6_rectlist_vs_to_sf(struct ilo_3d_pipeline *p,
    gen6_3DSTATE_CONSTANT_VS(p->builder, NULL, NULL, 0);
    gen6_3DSTATE_VS(p->builder, NULL, 0);
 
-   gen6_wa_pipe_control_vs_const_flush(p);
+   gen6_wa_post_3dstate_constant_vs(p);
 
    gen6_3DSTATE_CONSTANT_GS(p->builder, NULL, NULL, 0);
    gen6_3DSTATE_GS(p->builder, NULL, NULL, 0);
@@ -1577,7 +1626,7 @@ gen6_rectlist_wm(struct ilo_3d_pipeline *p,
 
    gen6_3DSTATE_CONSTANT_PS(p->builder, NULL, NULL, 0);
 
-   gen6_wa_pipe_control_wm_max_threads_stall(p);
+   gen6_wa_pre_3dstate_wm_max_threads(p);
    gen6_3DSTATE_WM(p->builder, NULL, 0, NULL, false, false, hiz_op);
 }
 
@@ -1586,7 +1635,7 @@ gen6_rectlist_wm_depth(struct ilo_3d_pipeline *p,
                        const struct ilo_blitter *blitter,
                        struct gen6_rectlist_session *session)
 {
-   gen6_wa_pipe_control_wm_depth_flush(p);
+   gen6_wa_pre_depth(p);
 
    if (blitter->uses & (ILO_BLITTER_USE_FB_DEPTH |
                         ILO_BLITTER_USE_FB_STENCIL)) {
@@ -1616,7 +1665,7 @@ gen6_rectlist_wm_multisample(struct ilo_3d_pipeline *p,
    const uint32_t *packed_sample_pos = (blitter->fb.num_samples > 1) ?
       &p->packed_sample_position_4x : &p->packed_sample_position_1x;
 
-   gen6_wa_pipe_control_wm_multisample_flush(p);
+   gen6_wa_pre_3dstate_multisample(p);
 
    gen6_3DSTATE_MULTISAMPLE(p->builder, blitter->fb.num_samples,
          packed_sample_pos, true);
@@ -1630,7 +1679,7 @@ gen6_rectlist_commands(struct ilo_3d_pipeline *p,
                        const struct ilo_blitter *blitter,
                        struct gen6_rectlist_session *session)
 {
-   gen6_wa_pipe_control_post_sync(p, false);
+   gen6_wa_pre_non_pipelined(p);
 
    gen6_rectlist_wm_multisample(p, blitter, session);
 
index 2e3c2ec6ac7c0a3b8d8be7bef17d0478fb0c0fbc..51f663b8e82e14fbab8388bd38e24187946ed51f 100644 (file)
 #include "ilo_3d_pipeline_gen6.h"
 #include "ilo_3d_pipeline_gen7.h"
 
-static void
-gen7_wa_pipe_control_cs_stall(struct ilo_3d_pipeline *p,
-                              bool change_multisample_state,
-                              bool change_depth_state)
+/**
+ * A wrapper for gen6_PIPE_CONTROL().
+ */
+static inline void
+gen7_pipe_control(struct ilo_3d_pipeline *p, uint32_t dw1)
 {
-   struct intel_bo *bo = NULL;
-   uint32_t dw1 = GEN6_PIPE_CONTROL_CS_STALL;
+   struct intel_bo *bo = (dw1 & GEN6_PIPE_CONTROL_WRITE__MASK) ?
+      p->workaround_bo : NULL;
+
+   ILO_DEV_ASSERT(p->dev, 7, 7.5);
+
+   if (dw1 & GEN6_PIPE_CONTROL_CS_STALL) {
+      /* CS stall cannot be set alone */
+      const uint32_t mask = GEN6_PIPE_CONTROL_RENDER_CACHE_FLUSH |
+                            GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+                            GEN6_PIPE_CONTROL_PIXEL_SCOREBOARD_STALL |
+                            GEN6_PIPE_CONTROL_DEPTH_STALL |
+                            GEN6_PIPE_CONTROL_WRITE__MASK;
+      if (!(dw1 & mask))
+         dw1 |= GEN6_PIPE_CONTROL_PIXEL_SCOREBOARD_STALL;
+   }
+
+   gen6_PIPE_CONTROL(p->builder, dw1, bo, 0, false);
 
-   assert(ilo_dev_gen(p->dev) == ILO_GEN(7) ||
-          ilo_dev_gen(p->dev) == ILO_GEN(7.5));
 
-   /* emit once */
-   if (p->state.has_gen6_wa_pipe_control)
-      return;
-   p->state.has_gen6_wa_pipe_control = true;
+   p->state.current_pipe_control_dw1 |= dw1;
+   p->state.deferred_pipe_control_dw1 &= ~dw1;
+}
 
+static void
+gen7_wa_post_3dstate_push_constant_alloc_ps(struct ilo_3d_pipeline *p)
+{
    /*
-    * From the Ivy Bridge PRM, volume 2 part 1, page 258:
-    *
-    *     "Due to an HW issue driver needs to send a pipe control with stall
-    *      when ever there is state change in depth bias related state"
-    *
     * From the Ivy Bridge PRM, volume 2 part 1, page 292:
     *
     *     "A PIPE_CONTOL command with the CS Stall bit set must be programmed
     *      in the ring after this instruction
     *      (3DSTATE_PUSH_CONSTANT_ALLOC_PS)."
-    *
-    * From the Ivy Bridge PRM, volume 2 part 1, page 304:
-    *
-    *     "Driver must ierarchi that all the caches in the depth pipe are
-    *      flushed before this command (3DSTATE_MULTISAMPLE) is parsed. This
-    *      requires driver to send a PIPE_CONTROL with a CS stall along with a
-    *      Depth Flush prior to this command.
-    *
-    * From the Ivy Bridge PRM, volume 2 part 1, page 315:
-    *
-    *     "Driver must send a least one PIPE_CONTROL command with CS Stall and
-    *      a post sync operation prior to the group of depth
-    *      commands(3DSTATE_DEPTH_BUFFER, 3DSTATE_CLEAR_PARAMS,
-    *      3DSTATE_STENCIL_BUFFER, and 3DSTATE_HIER_DEPTH_BUFFER)."
     */
+   const uint32_t dw1 = GEN6_PIPE_CONTROL_CS_STALL;
 
-   if (change_multisample_state)
-      dw1 |= GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH;
-
-   if (change_depth_state) {
-      dw1 |= GEN6_PIPE_CONTROL_WRITE_IMM;
-      bo = p->workaround_bo;
-   }
+   ILO_DEV_ASSERT(p->dev, 7, 7.5);
 
-   gen6_PIPE_CONTROL(p->builder, dw1, bo, 0, false);
+   p->state.deferred_pipe_control_dw1 |= dw1;
 }
 
 static void
-gen7_wa_pipe_control_vs_depth_stall(struct ilo_3d_pipeline *p)
+gen7_wa_pre_vs(struct ilo_3d_pipeline *p)
 {
-   assert(ilo_dev_gen(p->dev) == ILO_GEN(7) ||
-          ilo_dev_gen(p->dev) == ILO_GEN(7.5));
-
    /*
     * From the Ivy Bridge PRM, volume 2 part 1, page 106:
     *
@@ -106,34 +95,73 @@ gen7_wa_pipe_control_vs_depth_stall(struct ilo_3d_pipeline *p)
     *      3DSTATE_SAMPLER_STATE_POINTER_VS command.  Only one PIPE_CONTROL
     *      needs to be sent before any combination of VS associated 3DSTATE."
     */
-   gen6_PIPE_CONTROL(p->builder,
-         GEN6_PIPE_CONTROL_DEPTH_STALL |
-         GEN6_PIPE_CONTROL_WRITE_IMM,
-         p->workaround_bo, 0, false);
+   const uint32_t dw1 = GEN6_PIPE_CONTROL_DEPTH_STALL |
+                        GEN6_PIPE_CONTROL_WRITE_IMM;
+
+   ILO_DEV_ASSERT(p->dev, 7, 7.5);
+
+   if ((p->state.current_pipe_control_dw1 & dw1) != dw1)
+      gen7_pipe_control(p, dw1);
 }
 
 static void
-gen7_wa_pipe_control_wm_depth_stall(struct ilo_3d_pipeline *p,
-                                    bool change_depth_buffer)
+gen7_wa_pre_3dstate_sf_depth_bias(struct ilo_3d_pipeline *p)
 {
-   assert(ilo_dev_gen(p->dev) == ILO_GEN(7) ||
-          ilo_dev_gen(p->dev) == ILO_GEN(7.5));
-
    /*
-    * From the Ivy Bridge PRM, volume 2 part 1, page 276:
+    * From the Ivy Bridge PRM, volume 2 part 1, page 258:
     *
-    *     "The driver must make sure a PIPE_CONTROL with the Depth Stall
-    *      Enable bit set after all the following states are programmed:
+    *     "Due to an HW issue driver needs to send a pipe control with stall
+    *      when ever there is state change in depth bias related state (in
+    *      3DSTATE_SF)"
+    */
+   const uint32_t dw1 = GEN6_PIPE_CONTROL_CS_STALL;
+
+   ILO_DEV_ASSERT(p->dev, 7, 7.5);
+
+   if ((p->state.current_pipe_control_dw1 & dw1) != dw1)
+      gen7_pipe_control(p, dw1);
+}
+
+static void
+gen7_wa_pre_3dstate_multisample(struct ilo_3d_pipeline *p)
+{
+   /*
+    * From the Ivy Bridge PRM, volume 2 part 1, page 304:
     *
-    *       * 3DSTATE_PS
-    *       * 3DSTATE_VIEWPORT_STATE_POINTERS_CC
-    *       * 3DSTATE_CONSTANT_PS
-    *       * 3DSTATE_BINDING_TABLE_POINTERS_PS
-    *       * 3DSTATE_SAMPLER_STATE_POINTERS_PS
-    *       * 3DSTATE_CC_STATE_POINTERS
-    *       * 3DSTATE_BLEND_STATE_POINTERS
-    *       * 3DSTATE_DEPTH_STENCIL_STATE_POINTERS"
+    *     "Driver must ierarchi that all the caches in the depth pipe are
+    *      flushed before this command (3DSTATE_MULTISAMPLE) is parsed. This
+    *      requires driver to send a PIPE_CONTROL with a CS stall along with a
+    *      Depth Flush prior to this command.
+    */
+   const uint32_t dw1 = GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+                        GEN6_PIPE_CONTROL_CS_STALL;
+
+   ILO_DEV_ASSERT(p->dev, 7, 7.5);
+
+   if ((p->state.current_pipe_control_dw1 & dw1) != dw1)
+      gen7_pipe_control(p, dw1);
+}
+
+static void
+gen7_wa_pre_depth(struct ilo_3d_pipeline *p)
+{
+   /*
+    * From the Ivy Bridge PRM, volume 2 part 1, page 315:
     *
+    *     "Driver must send a least one PIPE_CONTROL command with CS Stall and
+    *      a post sync operation prior to the group of depth
+    *      commands(3DSTATE_DEPTH_BUFFER, 3DSTATE_CLEAR_PARAMS,
+    *      3DSTATE_STENCIL_BUFFER, and 3DSTATE_HIER_DEPTH_BUFFER)."
+    */
+   const uint32_t dw1 = GEN6_PIPE_CONTROL_CS_STALL |
+                        GEN6_PIPE_CONTROL_WRITE_IMM;
+
+   ILO_DEV_ASSERT(p->dev, 7, 7.5);
+
+   if ((p->state.current_pipe_control_dw1 & dw1) != dw1)
+      gen7_pipe_control(p, dw1);
+
+   /*
     * From the Ivy Bridge PRM, volume 2 part 1, page 315:
     *
     *     "Restriction: Prior to changing Depth/Stencil Buffer state (i.e.,
@@ -146,28 +174,14 @@ gen7_wa_pipe_control_wm_depth_stall(struct ilo_3d_pipeline *p,
     *      guarantee that the pipeline from WM onwards is already flushed
     *      (e.g., via a preceding MI_FLUSH)."
     */
-   gen6_PIPE_CONTROL(p->builder,
-         GEN6_PIPE_CONTROL_DEPTH_STALL,
-         NULL, 0, false);
-
-   if (!change_depth_buffer)
-      return;
-
-   gen6_PIPE_CONTROL(p->builder,
-         GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH,
-         NULL, 0, false);
-
-   gen6_PIPE_CONTROL(p->builder,
-         GEN6_PIPE_CONTROL_DEPTH_STALL,
-         NULL, 0, false);
+   gen7_pipe_control(p, GEN6_PIPE_CONTROL_DEPTH_STALL);
+   gen7_pipe_control(p, GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH);
+   gen7_pipe_control(p, GEN6_PIPE_CONTROL_DEPTH_STALL);
 }
 
 static void
-gen7_wa_pipe_control_ps_max_threads_stall(struct ilo_3d_pipeline *p)
+gen7_wa_pre_3dstate_ps_max_threads(struct ilo_3d_pipeline *p)
 {
-   assert(ilo_dev_gen(p->dev) == ILO_GEN(7) ||
-          ilo_dev_gen(p->dev) == ILO_GEN(7.5));
-
    /*
     * From the Ivy Bridge PRM, volume 2 part 1, page 286:
     *
@@ -175,10 +189,37 @@ gen7_wa_pipe_control_ps_max_threads_stall(struct ilo_3d_pipeline *p)
     *      between 3DPRIMITIVE commands, a PIPE_CONTROL command with Stall at
     *      Pixel Scoreboard set is required to be issued."
     */
-   gen6_PIPE_CONTROL(p->builder,
-         GEN6_PIPE_CONTROL_PIXEL_SCOREBOARD_STALL,
-         NULL, 0, false);
+   const uint32_t dw1 = GEN6_PIPE_CONTROL_PIXEL_SCOREBOARD_STALL;
 
+   ILO_DEV_ASSERT(p->dev, 7, 7.5);
+
+   if ((p->state.current_pipe_control_dw1 & dw1) != dw1)
+      gen7_pipe_control(p, dw1);
+}
+
+static void
+gen7_wa_post_ps_and_later(struct ilo_3d_pipeline *p)
+{
+   /*
+    * From the Ivy Bridge PRM, volume 2 part 1, page 276:
+    *
+    *     "The driver must make sure a PIPE_CONTROL with the Depth Stall
+    *      Enable bit set after all the following states are programmed:
+    *
+    *       - 3DSTATE_PS
+    *       - 3DSTATE_VIEWPORT_STATE_POINTERS_CC
+    *       - 3DSTATE_CONSTANT_PS
+    *       - 3DSTATE_BINDING_TABLE_POINTERS_PS
+    *       - 3DSTATE_SAMPLER_STATE_POINTERS_PS
+    *       - 3DSTATE_CC_STATE_POINTERS
+    *       - 3DSTATE_BLEND_STATE_POINTERS
+    *       - 3DSTATE_DEPTH_STENCIL_STATE_POINTERS"
+    */
+   const uint32_t dw1 = GEN6_PIPE_CONTROL_DEPTH_STALL;
+
+   ILO_DEV_ASSERT(p->dev, 7, 7.5);
+
+   p->state.deferred_pipe_control_dw1 |= dw1;
 }
 
 #define DIRTY(state) (session->pipe_dirty & ILO_DIRTY_ ## state)
@@ -212,7 +253,7 @@ gen7_pipeline_common_urb(struct ilo_3d_pipeline *p,
       vs_entry_size *= sizeof(float) * 4;
       vs_total_size = p->dev->urb_size - offset;
 
-      gen7_wa_pipe_control_vs_depth_stall(p);
+      gen7_wa_pre_vs(p);
 
       gen7_3DSTATE_URB_VS(p->builder,
             offset, vs_total_size, vs_entry_size);
@@ -245,7 +286,7 @@ gen7_pipeline_common_pcb_alloc(struct ilo_3d_pipeline *p,
       gen7_3DSTATE_PUSH_CONSTANT_ALLOC_PS(p->builder, offset, size);
 
       if (ilo_dev_gen(p->dev) == ILO_GEN(7))
-         gen7_wa_pipe_control_cs_stall(p, true, true);
+         gen7_wa_post_3dstate_push_constant_alloc_ps(p);
    }
 }
 
@@ -303,7 +344,7 @@ gen7_pipeline_vs(struct ilo_3d_pipeline *p,
    /* emit depth stall before any of the VS commands */
    if (emit_3dstate_binding_table || emit_3dstate_sampler_state ||
            emit_3dstate_constant_vs || emit_3dstate_vs)
-      gen7_wa_pipe_control_vs_depth_stall(p);
+      gen7_wa_pre_vs(p);
 
    /* 3DSTATE_BINDING_TABLE_POINTERS_VS */
    if (emit_3dstate_binding_table) {
@@ -459,7 +500,7 @@ gen7_pipeline_sf(struct ilo_3d_pipeline *p,
    if (DIRTY(RASTERIZER) || DIRTY(FB)) {
       struct pipe_surface *zs = vec->fb.state.zsbuf;
 
-      gen7_wa_pipe_control_cs_stall(p, true, true);
+      gen7_wa_pre_3dstate_sf_depth_bias(p);
       gen7_3DSTATE_SF(p->builder, vec->rasterizer,
             (zs) ? zs->format : PIPE_FORMAT_NONE);
    }
@@ -508,7 +549,7 @@ gen7_pipeline_wm(struct ilo_3d_pipeline *p,
       if ((ilo_dev_gen(p->dev) == ILO_GEN(7) ||
            ilo_dev_gen(p->dev) == ILO_GEN(7.5)) &&
           session->hw_ctx_changed)
-         gen7_wa_pipe_control_ps_max_threads_stall(p);
+         gen7_wa_pre_3dstate_ps_max_threads(p);
 
       gen7_3DSTATE_PS(p->builder, vec->fs, num_samplers, dual_blend);
    }
@@ -527,7 +568,6 @@ gen7_pipeline_wm(struct ilo_3d_pipeline *p,
          (DIRTY(FB) || DIRTY(DSA) || session->state_bo_changed);
 
       if (emit_3dstate_ps ||
-          emit_3dstate_depth_buffer ||
           session->pcb_state_fs_changed ||
           session->viewport_state_changed ||
           session->binding_table_fs_changed ||
@@ -535,7 +575,10 @@ gen7_pipeline_wm(struct ilo_3d_pipeline *p,
           session->cc_state_cc_changed ||
           session->cc_state_blend_changed ||
           session->cc_state_dsa_changed)
-         gen7_wa_pipe_control_wm_depth_stall(p, emit_3dstate_depth_buffer);
+         gen7_wa_post_ps_and_later(p);
+
+      if (emit_3dstate_depth_buffer)
+         gen7_wa_pre_depth(p);
    }
 
    /* 3DSTATE_DEPTH_BUFFER and 3DSTATE_CLEAR_PARAMS */
@@ -575,7 +618,7 @@ gen7_pipeline_wm_multisample(struct ilo_3d_pipeline *p,
    if (DIRTY(SAMPLE_MASK) || DIRTY(FB)) {
       const uint32_t *packed_sample_pos;
 
-      gen7_wa_pipe_control_cs_stall(p, true, true);
+      gen7_wa_pre_3dstate_multisample(p);
 
       packed_sample_pos =
          (vec->fb.num_samples > 4) ? p->packed_sample_position_8x :
@@ -597,9 +640,14 @@ gen7_pipeline_vf_draw(struct ilo_3d_pipeline *p,
                       const struct ilo_state_vector *vec,
                       struct gen6_pipeline_session *session)
 {
+   if (p->state.deferred_pipe_control_dw1)
+      gen7_pipe_control(p, p->state.deferred_pipe_control_dw1);
+
    /* 3DPRIMITIVE */
    gen7_3DPRIMITIVE(p->builder, vec->draw, &vec->ib);
-   p->state.has_gen6_wa_pipe_control = false;
+
+   p->state.current_pipe_control_dw1 = 0;
+   p->state.deferred_pipe_control_dw1 = 0;
 }
 
 static void
@@ -670,7 +718,7 @@ gen7_rectlist_pcb_alloc(struct ilo_3d_pipeline *p,
 
    gen7_3DSTATE_PUSH_CONSTANT_ALLOC_PS(p->builder, offset, size);
 
-   gen7_wa_pipe_control_cs_stall(p, true, true);
+   gen7_wa_post_3dstate_push_constant_alloc_ps(p);
 }
 
 static void
@@ -713,7 +761,7 @@ gen7_rectlist_vs_to_sf(struct ilo_3d_pipeline *p,
 
    gen6_3DSTATE_CLIP(p->builder, NULL, NULL, false, 0);
 
-   gen7_wa_pipe_control_cs_stall(p, true, true);
+   gen7_wa_pre_3dstate_sf_depth_bias(p);
 
    gen7_3DSTATE_SF(p->builder, NULL, blitter->fb.dst.base.format);
    gen7_3DSTATE_SBE(p->builder, NULL, NULL);
@@ -745,7 +793,7 @@ gen7_rectlist_wm(struct ilo_3d_pipeline *p,
 
    gen7_3DSTATE_CONSTANT_PS(p->builder, NULL, NULL, 0);
 
-   gen7_wa_pipe_control_ps_max_threads_stall(p);
+   gen7_wa_pre_3dstate_ps_max_threads(p);
    gen7_3DSTATE_PS(p->builder, NULL, 0, false);
 }
 
@@ -754,7 +802,7 @@ gen7_rectlist_wm_depth(struct ilo_3d_pipeline *p,
                        const struct ilo_blitter *blitter,
                        struct gen6_rectlist_session *session)
 {
-   gen7_wa_pipe_control_wm_depth_stall(p, true);
+   gen7_wa_pre_depth(p);
 
    if (blitter->uses & (ILO_BLITTER_USE_FB_DEPTH |
                         ILO_BLITTER_USE_FB_STENCIL)) {
@@ -786,7 +834,7 @@ gen7_rectlist_wm_multisample(struct ilo_3d_pipeline *p,
       (blitter->fb.num_samples > 1) ? &p->packed_sample_position_4x :
       &p->packed_sample_position_1x;
 
-   gen7_wa_pipe_control_cs_stall(p, true, true);
+   gen7_wa_pre_3dstate_multisample(p);
 
    gen6_3DSTATE_MULTISAMPLE(p->builder, blitter->fb.num_samples,
          packed_sample_pos, true);
@@ -813,7 +861,7 @@ gen7_rectlist_commands(struct ilo_3d_pipeline *p,
    gen7_rectlist_pcb_alloc(p, blitter, session);
 
    /* needed for any VS-related commands */
-   gen7_wa_pipe_control_vs_depth_stall(p);
+   gen7_wa_pre_vs(p);
 
    gen7_rectlist_urb(p, blitter, session);