i965: Use genxml for emitting PIPE_CONTROL.
authorKenneth Graunke <kenneth@whitecape.org>
Thu, 1 Nov 2018 22:55:51 +0000 (15:55 -0700)
committerKenneth Graunke <kenneth@whitecape.org>
Tue, 12 Mar 2019 02:32:40 +0000 (19:32 -0700)
While this does add a bunch of boilerplate, it also protects us against
the hardware moving bits, or changing their meaning.  For something as
finnicky as PIPE_CONTROL, the extra safety seems worth it.

We turn PIPE_CONTROL_* into an bitfield of arbitrary flags, and then
pack them appropriately.

Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
src/mesa/drivers/dri/i965/Makefile.sources
src/mesa/drivers/dri/i965/brw_context.h
src/mesa/drivers/dri/i965/brw_pipe_control.c
src/mesa/drivers/dri/i965/brw_pipe_control.h
src/mesa/drivers/dri/i965/brw_state.h
src/mesa/drivers/dri/i965/genX_pipe_control.c [new file with mode: 0644]
src/mesa/drivers/dri/i965/meson.build

index e01c1e5e0de484fc264a60fb4a070d14f676f008..d1c23cb2fc01712bb7c8cc8b3bdbab4d5fa2cbe2 100644 (file)
@@ -116,51 +116,61 @@ i965_FILES = \
 i965_gen4_FILES = \
        genX_blorp_exec.c \
        genX_boilerplate.h \
+       genX_pipe_control.c \
        genX_state_upload.c
 
 i965_gen45_FILES = \
        genX_blorp_exec.c \
        genX_boilerplate.h \
+       genX_pipe_control.c \
        genX_state_upload.c
 
 i965_gen5_FILES = \
        genX_blorp_exec.c \
        genX_boilerplate.h \
+       genX_pipe_control.c \
        genX_state_upload.c
 
 i965_gen6_FILES = \
        genX_blorp_exec.c \
        genX_boilerplate.h \
+       genX_pipe_control.c \
        genX_state_upload.c
 
 i965_gen7_FILES = \
        genX_blorp_exec.c \
        genX_boilerplate.h \
+       genX_pipe_control.c \
        genX_state_upload.c
 
 i965_gen75_FILES = \
        genX_blorp_exec.c \
        genX_boilerplate.h \
+       genX_pipe_control.c \
        genX_state_upload.c
 
 i965_gen8_FILES = \
        genX_blorp_exec.c \
        genX_boilerplate.h \
+       genX_pipe_control.c \
        genX_state_upload.c
 
 i965_gen9_FILES = \
        genX_blorp_exec.c \
        genX_boilerplate.h \
+       genX_pipe_control.c \
        genX_state_upload.c
 
 i965_gen10_FILES = \
        genX_blorp_exec.c \
        genX_boilerplate.h \
+       genX_pipe_control.c \
        genX_state_upload.c
 
 i965_gen11_FILES = \
        genX_blorp_exec.c \
        genX_boilerplate.h \
+       genX_pipe_control.c \
        genX_state_upload.c
 
 i965_oa_GENERATED_FILES = \
index 301d33051332e2b2013fd3547a05e84fee2eb376..8b46b678064b13765f58a249192a4a8ceba0c3ca 100644 (file)
@@ -754,6 +754,9 @@ struct brw_context
                                         uint32_t report_id);
 
       void (*emit_compute_walker)(struct brw_context *brw);
+      void (*emit_raw_pipe_control)(struct brw_context *brw, uint32_t flags,
+                                    struct brw_bo *bo, uint32_t offset,
+                                    uint64_t imm);
    } vtbl;
 
    struct brw_bufmgr *bufmgr;
index 704f65100d32cc994455f7c7c0900f4527b02734..0e96f76a22ec39426f6b5d9fe1346578116e6f42 100644 (file)
 
 #include "brw_context.h"
 #include "brw_defines.h"
+#include "brw_state.h"
 #include "intel_batchbuffer.h"
 #include "intel_fbo.h"
 
-/**
- * According to the latest documentation, any PIPE_CONTROL with the
- * "Command Streamer Stall" bit set must also have another bit set,
- * with five different options:
- *
- *  - Render Target Cache Flush
- *  - Depth Cache Flush
- *  - Stall at Pixel Scoreboard
- *  - Post-Sync Operation
- *  - Depth Stall
- *  - DC Flush Enable
- *
- * I chose "Stall at Pixel Scoreboard" since we've used it effectively
- * in the past, but the choice is fairly arbitrary.
- */
-static void
-gen8_add_cs_stall_workaround_bits(uint32_t *flags)
-{
-   uint32_t wa_bits = PIPE_CONTROL_RENDER_TARGET_FLUSH |
-                      PIPE_CONTROL_DEPTH_CACHE_FLUSH |
-                      PIPE_CONTROL_WRITE_IMMEDIATE |
-                      PIPE_CONTROL_WRITE_DEPTH_COUNT |
-                      PIPE_CONTROL_WRITE_TIMESTAMP |
-                      PIPE_CONTROL_STALL_AT_SCOREBOARD |
-                      PIPE_CONTROL_DEPTH_STALL |
-                      PIPE_CONTROL_DATA_CACHE_FLUSH;
-
-   /* If we're doing a CS stall, and don't already have one of the
-    * workaround bits set, add "Stall at Pixel Scoreboard."
-    */
-   if ((*flags & PIPE_CONTROL_CS_STALL) != 0 && (*flags & wa_bits) == 0)
-      *flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD;
-}
-
-/* Implement the WaCsStallAtEveryFourthPipecontrol workaround on IVB, BYT:
- *
- * "Every 4th PIPE_CONTROL command, not counting the PIPE_CONTROL with
- *  only read-cache-invalidate bit(s) set, must have a CS_STALL bit set."
- *
- * Note that the kernel does CS stalls between batches, so we only need
- * to count them within a batch.
- */
-static uint32_t
-gen7_cs_stall_every_four_pipe_controls(struct brw_context *brw, uint32_t flags)
-{
-   const struct gen_device_info *devinfo = &brw->screen->devinfo;
-
-   if (devinfo->gen == 7 && !devinfo->is_haswell) {
-      if (flags & PIPE_CONTROL_CS_STALL) {
-         /* If we're doing a CS stall, reset the counter and carry on. */
-         brw->pipe_controls_since_last_cs_stall = 0;
-         return 0;
-      }
-
-      /* If this is the fourth pipe control without a CS stall, do one now. */
-      if (++brw->pipe_controls_since_last_cs_stall == 4) {
-         brw->pipe_controls_since_last_cs_stall = 0;
-         return PIPE_CONTROL_CS_STALL;
-      }
-   }
-   return 0;
-}
-
-/* #1130 from gen10 workarounds page in h/w specs:
- * "Enable Depth Stall on every Post Sync Op if Render target Cache Flush is
- *  not enabled in same PIPE CONTROL and Enable Pixel score board stall if
- *  Render target cache flush is enabled."
- *
- * Applicable to CNL B0 and C0 steppings only.
- */
-static void
-gen10_add_rcpfe_workaround_bits(uint32_t *flags)
-{
-   if (*flags & PIPE_CONTROL_RENDER_TARGET_FLUSH) {
-      *flags = *flags | PIPE_CONTROL_STALL_AT_SCOREBOARD;
-   } else if (*flags &
-             (PIPE_CONTROL_WRITE_IMMEDIATE |
-              PIPE_CONTROL_WRITE_DEPTH_COUNT |
-              PIPE_CONTROL_WRITE_TIMESTAMP)) {
-      *flags = *flags | PIPE_CONTROL_DEPTH_STALL;
-   }
-}
-
-static void
-brw_emit_pipe_control(struct brw_context *brw, uint32_t flags,
-                      struct brw_bo *bo, uint32_t offset, uint64_t imm)
-{
-   const struct gen_device_info *devinfo = &brw->screen->devinfo;
-
-   if (devinfo->gen >= 8) {
-      if (devinfo->gen == 8)
-         gen8_add_cs_stall_workaround_bits(&flags);
-
-      if (flags & PIPE_CONTROL_VF_CACHE_INVALIDATE) {
-         if (devinfo->gen == 9) {
-            /* The PIPE_CONTROL "VF Cache Invalidation Enable" bit description
-             * lists several workarounds:
-             *
-             *    "Project: SKL, KBL, BXT
-             *
-             *     If the VF Cache Invalidation Enable is set to a 1 in a
-             *     PIPE_CONTROL, a separate Null PIPE_CONTROL, all bitfields
-             *     sets to 0, with the VF Cache Invalidation Enable set to 0
-             *     needs to be sent prior to the PIPE_CONTROL with VF Cache
-             *     Invalidation Enable set to a 1."
-             */
-            brw_emit_pipe_control_flush(brw, 0);
-         }
-
-         if (devinfo->gen >= 9) {
-            /* THE PIPE_CONTROL "VF Cache Invalidation Enable" docs continue:
-             *
-             *    "Project: BDW+
-             *
-             *     When VF Cache Invalidate is set “Post Sync Operation” must
-             *     be enabled to “Write Immediate Data” or “Write PS Depth
-             *     Count” or “Write Timestamp”."
-             *
-             * If there's a BO, we're already doing some kind of write.
-             * If not, add a write to the workaround BO.
-             *
-             * XXX: This causes GPU hangs on Broadwell, so restrict it to
-             *      Gen9+ for now...see this bug for more information:
-             *      https://bugs.freedesktop.org/show_bug.cgi?id=103787
-             */
-            if (!bo) {
-               flags |= PIPE_CONTROL_WRITE_IMMEDIATE;
-               bo = brw->workaround_bo;
-            }
-         }
-      }
-
-      if (devinfo->gen == 10)
-         gen10_add_rcpfe_workaround_bits(&flags);
-
-      BEGIN_BATCH(6);
-      OUT_BATCH(_3DSTATE_PIPE_CONTROL | (6 - 2));
-      OUT_BATCH(flags);
-      if (bo) {
-         OUT_RELOC64(bo, RELOC_WRITE, offset);
-      } else {
-         OUT_BATCH(0);
-         OUT_BATCH(0);
-      }
-      OUT_BATCH(imm);
-      OUT_BATCH(imm >> 32);
-      ADVANCE_BATCH();
-   } else if (devinfo->gen >= 6) {
-      if (devinfo->gen == 6 &&
-          (flags & PIPE_CONTROL_RENDER_TARGET_FLUSH)) {
-         /* Hardware workaround: SNB B-Spec says:
-          *
-          *   [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush
-          *   Enable = 1, a PIPE_CONTROL with any non-zero post-sync-op is
-          *   required.
-          */
-         brw_emit_post_sync_nonzero_flush(brw);
-      }
-
-      flags |= gen7_cs_stall_every_four_pipe_controls(brw, flags);
-
-      /* PPGTT/GGTT is selected by DW2 bit 2 on Sandybridge, but DW1 bit 24
-       * on later platforms.  We always use PPGTT on Gen7+.
-       */
-      unsigned gen6_gtt = devinfo->gen == 6 ? PIPE_CONTROL_GLOBAL_GTT_WRITE : 0;
-
-      BEGIN_BATCH(5);
-      OUT_BATCH(_3DSTATE_PIPE_CONTROL | (5 - 2));
-      OUT_BATCH(flags);
-      if (bo) {
-         OUT_RELOC(bo, RELOC_WRITE | RELOC_NEEDS_GGTT, gen6_gtt | offset);
-      } else {
-         OUT_BATCH(0);
-      }
-      OUT_BATCH(imm);
-      OUT_BATCH(imm >> 32);
-      ADVANCE_BATCH();
-   } else {
-      BEGIN_BATCH(4);
-      OUT_BATCH(_3DSTATE_PIPE_CONTROL | flags | (4 - 2));
-      if (bo) {
-         OUT_RELOC(bo, RELOC_WRITE, PIPE_CONTROL_GLOBAL_GTT_WRITE | offset);
-      } else {
-         OUT_BATCH(0);
-      }
-      OUT_BATCH(imm);
-      OUT_BATCH(imm >> 32);
-      ADVANCE_BATCH();
-   }
-}
-
 /**
  * Emit a PIPE_CONTROL with various flushing flags.
  *
@@ -246,7 +56,7 @@ brw_emit_pipe_control_flush(struct brw_context *brw, uint32_t flags)
       flags &= ~(PIPE_CONTROL_CACHE_FLUSH_BITS | PIPE_CONTROL_CS_STALL);
    }
 
-   brw_emit_pipe_control(brw, flags, NULL, 0, 0);
+   brw->vtbl.emit_raw_pipe_control(brw, flags, NULL, 0, 0);
 }
 
 /**
@@ -262,7 +72,7 @@ brw_emit_pipe_control_write(struct brw_context *brw, uint32_t flags,
                             struct brw_bo *bo, uint32_t offset,
                             uint64_t imm)
 {
-   brw_emit_pipe_control(brw, flags, bo, offset, imm);
+   brw->vtbl.emit_raw_pipe_control(brw, flags, bo, offset, imm);
 }
 
 /**
@@ -357,14 +167,14 @@ gen7_emit_vs_workaround_flush(struct brw_context *brw)
 void
 gen10_emit_isp_disable(struct brw_context *brw)
 {
-   brw_emit_pipe_control(brw,
-                         PIPE_CONTROL_STALL_AT_SCOREBOARD |
-                         PIPE_CONTROL_CS_STALL,
-                         NULL, 0, 0);
-   brw_emit_pipe_control(brw,
-                         PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE |
-                         PIPE_CONTROL_CS_STALL,
-                         NULL, 0, 0);
+   brw->vtbl.emit_raw_pipe_control(brw,
+                                   PIPE_CONTROL_STALL_AT_SCOREBOARD |
+                                   PIPE_CONTROL_CS_STALL,
+                                   NULL, 0, 0);
+   brw->vtbl.emit_raw_pipe_control(brw,
+                                   PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE |
+                                   PIPE_CONTROL_CS_STALL,
+                                   NULL, 0, 0);
 
    brw->vs.base.push_constants_dirty = true;
    brw->tcs.base.push_constants_dirty = true;
@@ -561,6 +371,37 @@ int
 brw_init_pipe_control(struct brw_context *brw,
                       const struct gen_device_info *devinfo)
 {
+   switch (devinfo->gen) {
+   case 11:
+      brw->vtbl.emit_raw_pipe_control = gen11_emit_raw_pipe_control;
+      break;
+   case 10:
+      brw->vtbl.emit_raw_pipe_control = gen10_emit_raw_pipe_control;
+      break;
+   case 9:
+      brw->vtbl.emit_raw_pipe_control = gen9_emit_raw_pipe_control;
+      break;
+   case 8:
+      brw->vtbl.emit_raw_pipe_control = gen8_emit_raw_pipe_control;
+      break;
+   case 7:
+      brw->vtbl.emit_raw_pipe_control =
+         devinfo->is_haswell ? gen75_emit_raw_pipe_control
+                             : gen7_emit_raw_pipe_control;
+      break;
+   case 6:
+      brw->vtbl.emit_raw_pipe_control = gen6_emit_raw_pipe_control;
+      break;
+   case 5:
+      brw->vtbl.emit_raw_pipe_control = gen5_emit_raw_pipe_control;
+      break;
+   case 4:
+      brw->vtbl.emit_raw_pipe_control =
+         devinfo->is_g4x ? gen45_emit_raw_pipe_control
+                         : gen4_emit_raw_pipe_control;
+      break;
+   }
+
    if (devinfo->gen < 6)
       return 0;
 
index 69b1c7c31e6227ae97dcc895a6073b452452bc5e..e213f43a4f71c816f0bfdada76e39390df905b48 100644 (file)
@@ -32,34 +32,38 @@ struct brw_bo;
  *
  * PIPE_CONTROL operation, a combination MI_FLUSH and register write with
  * additional flushing control.
+ *
+ * The bits here are not the actual hardware values.  The actual values
+ * shift around a bit per-generation, so we just have flags for each
+ * potential operation, and use genxml to encode the actual packet.
  */
-#define _3DSTATE_PIPE_CONTROL          (CMD_3D | (3 << 27) | (2 << 24))
-#define PIPE_CONTROL_LRI_WRITE_IMMEDIATE (1 << 23) /* Gen7+ */
-#define PIPE_CONTROL_CS_STALL          (1 << 20)
-#define PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET       (1 << 19)
-#define PIPE_CONTROL_TLB_INVALIDATE    (1 << 18)
-#define PIPE_CONTROL_SYNC_GFDT         (1 << 17)
-#define PIPE_CONTROL_MEDIA_STATE_CLEAR (1 << 16)
-#define PIPE_CONTROL_NO_WRITE          (0 << 14)
-#define PIPE_CONTROL_WRITE_IMMEDIATE   (1 << 14)
-#define PIPE_CONTROL_WRITE_DEPTH_COUNT (2 << 14)
-#define PIPE_CONTROL_WRITE_TIMESTAMP   (3 << 14)
-#define PIPE_CONTROL_DEPTH_STALL       (1 << 13)
-#define PIPE_CONTROL_RENDER_TARGET_FLUSH (1 << 12)
-#define PIPE_CONTROL_INSTRUCTION_INVALIDATE (1 << 11)
-#define PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE  (1 << 10) /* GM45+ only */
-#define PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE (1 << 9)
-#define PIPE_CONTROL_INTERRUPT_ENABLE  (1 << 8)
-#define PIPE_CONTROL_FLUSH_ENABLE      (1 << 7) /* Gen7+ only */
-/* GT */
-#define PIPE_CONTROL_DATA_CACHE_FLUSH          (1 << 5)
-#define PIPE_CONTROL_VF_CACHE_INVALIDATE       (1 << 4)
-#define PIPE_CONTROL_CONST_CACHE_INVALIDATE    (1 << 3)
-#define PIPE_CONTROL_STATE_CACHE_INVALIDATE    (1 << 2)
-#define PIPE_CONTROL_STALL_AT_SCOREBOARD       (1 << 1)
-#define PIPE_CONTROL_DEPTH_CACHE_FLUSH         (1 << 0)
-#define PIPE_CONTROL_PPGTT_WRITE       (0 << 2)
-#define PIPE_CONTROL_GLOBAL_GTT_WRITE  (1 << 2)
+enum pipe_control_flags
+{
+   PIPE_CONTROL_FLUSH_LLC                       = (1 << 1),
+   PIPE_CONTROL_LRI_POST_SYNC_OP                = (1 << 2),
+   PIPE_CONTROL_STORE_DATA_INDEX                = (1 << 3),
+   PIPE_CONTROL_CS_STALL                        = (1 << 4),
+   PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET     = (1 << 5),
+   PIPE_CONTROL_SYNC_GFDT                       = (1 << 6),
+   PIPE_CONTROL_TLB_INVALIDATE                  = (1 << 7),
+   PIPE_CONTROL_MEDIA_STATE_CLEAR               = (1 << 8),
+   PIPE_CONTROL_WRITE_IMMEDIATE                 = (1 << 9),
+   PIPE_CONTROL_WRITE_DEPTH_COUNT               = (1 << 10),
+   PIPE_CONTROL_WRITE_TIMESTAMP                 = (1 << 11),
+   PIPE_CONTROL_DEPTH_STALL                     = (1 << 12),
+   PIPE_CONTROL_RENDER_TARGET_FLUSH             = (1 << 13),
+   PIPE_CONTROL_INSTRUCTION_INVALIDATE          = (1 << 14),
+   PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE        = (1 << 15),
+   PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE = (1 << 16),
+   PIPE_CONTROL_NOTIFY_ENABLE                   = (1 << 17),
+   PIPE_CONTROL_FLUSH_ENABLE                    = (1 << 18),
+   PIPE_CONTROL_DATA_CACHE_FLUSH                = (1 << 19),
+   PIPE_CONTROL_VF_CACHE_INVALIDATE             = (1 << 20),
+   PIPE_CONTROL_CONST_CACHE_INVALIDATE          = (1 << 21),
+   PIPE_CONTROL_STATE_CACHE_INVALIDATE          = (1 << 22),
+   PIPE_CONTROL_STALL_AT_SCOREBOARD             = (1 << 23),
+   PIPE_CONTROL_DEPTH_CACHE_FLUSH               = (1 << 24),
+};
 
 #define PIPE_CONTROL_CACHE_FLUSH_BITS \
    (PIPE_CONTROL_DEPTH_CACHE_FLUSH | PIPE_CONTROL_DATA_CACHE_FLUSH | \
index 546d103d1a490bf0f225ab9d6d4a7d3ff4a7b7fd..402ae692f9fd373975685967ba7c0a84e24ac3a0 100644 (file)
@@ -95,6 +95,37 @@ extern const struct brw_tracked_state gen7_urb;
 extern const struct brw_tracked_state gen8_pma_fix;
 extern const struct brw_tracked_state brw_cs_work_groups_surface;
 
+void gen4_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
+                                struct brw_bo *bo, uint32_t offset,
+                                uint64_t imm);
+void gen45_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
+                                 struct brw_bo *bo, uint32_t offset,
+                                 uint64_t imm);
+void gen5_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
+                                struct brw_bo *bo, uint32_t offset,
+                                uint64_t imm);
+void gen6_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
+                                struct brw_bo *bo, uint32_t offset,
+                                uint64_t imm);
+void gen7_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
+                                struct brw_bo *bo, uint32_t offset,
+                                uint64_t imm);
+void gen75_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
+                                 struct brw_bo *bo, uint32_t offset,
+                                 uint64_t imm);
+void gen8_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
+                                struct brw_bo *bo, uint32_t offset,
+                                uint64_t imm);
+void gen9_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
+                                struct brw_bo *bo, uint32_t offset,
+                                uint64_t imm);
+void gen10_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
+                                 struct brw_bo *bo, uint32_t offset,
+                                 uint64_t imm);
+void gen11_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
+                                 struct brw_bo *bo, uint32_t offset,
+                                 uint64_t imm);
+
 static inline bool
 brw_state_dirty(const struct brw_context *brw,
                 GLuint mesa_flags, uint64_t brw_flags)
diff --git a/src/mesa/drivers/dri/i965/genX_pipe_control.c b/src/mesa/drivers/dri/i965/genX_pipe_control.c
new file mode 100644 (file)
index 0000000..8eb3744
--- /dev/null
@@ -0,0 +1,243 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "genX_boilerplate.h"
+#include "brw_defines.h"
+#include "brw_state.h"
+
+/**
+ * According to the latest documentation, any PIPE_CONTROL with the
+ * "Command Streamer Stall" bit set must also have another bit set,
+ * with five different options:
+ *
+ *  - Render Target Cache Flush
+ *  - Depth Cache Flush
+ *  - Stall at Pixel Scoreboard
+ *  - Post-Sync Operation
+ *  - Depth Stall
+ *  - DC Flush Enable
+ *
+ * I chose "Stall at Pixel Scoreboard" since we've used it effectively
+ * in the past, but the choice is fairly arbitrary.
+ */
+static void
+gen8_add_cs_stall_workaround_bits(uint32_t *flags)
+{
+   uint32_t wa_bits = PIPE_CONTROL_RENDER_TARGET_FLUSH |
+                      PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+                      PIPE_CONTROL_WRITE_IMMEDIATE |
+                      PIPE_CONTROL_WRITE_DEPTH_COUNT |
+                      PIPE_CONTROL_WRITE_TIMESTAMP |
+                      PIPE_CONTROL_STALL_AT_SCOREBOARD |
+                      PIPE_CONTROL_DEPTH_STALL |
+                      PIPE_CONTROL_DATA_CACHE_FLUSH;
+
+   /* If we're doing a CS stall, and don't already have one of the
+    * workaround bits set, add "Stall at Pixel Scoreboard."
+    */
+   if ((*flags & PIPE_CONTROL_CS_STALL) != 0 && (*flags & wa_bits) == 0)
+      *flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD;
+}
+
+/* Implement the WaCsStallAtEveryFourthPipecontrol workaround on IVB, BYT:
+ *
+ * "Every 4th PIPE_CONTROL command, not counting the PIPE_CONTROL with
+ *  only read-cache-invalidate bit(s) set, must have a CS_STALL bit set."
+ *
+ * Note that the kernel does CS stalls between batches, so we only need
+ * to count them within a batch.
+ */
+static uint32_t
+gen7_cs_stall_every_four_pipe_controls(struct brw_context *brw, uint32_t flags)
+{
+   if (GEN_GEN == 7 && !GEN_IS_HASWELL) {
+      if (flags & PIPE_CONTROL_CS_STALL) {
+         /* If we're doing a CS stall, reset the counter and carry on. */
+         brw->pipe_controls_since_last_cs_stall = 0;
+         return 0;
+      }
+
+      /* If this is the fourth pipe control without a CS stall, do one now. */
+      if (++brw->pipe_controls_since_last_cs_stall == 4) {
+         brw->pipe_controls_since_last_cs_stall = 0;
+         return PIPE_CONTROL_CS_STALL;
+      }
+   }
+   return 0;
+}
+
+/* #1130 from gen10 workarounds page in h/w specs:
+ * "Enable Depth Stall on every Post Sync Op if Render target Cache Flush is
+ *  not enabled in same PIPE CONTROL and Enable Pixel score board stall if
+ *  Render target cache flush is enabled."
+ *
+ * Applicable to CNL B0 and C0 steppings only.
+ */
+static void
+gen10_add_rcpfe_workaround_bits(uint32_t *flags)
+{
+   if (*flags & PIPE_CONTROL_RENDER_TARGET_FLUSH) {
+      *flags = *flags | PIPE_CONTROL_STALL_AT_SCOREBOARD;
+   } else if (*flags &
+             (PIPE_CONTROL_WRITE_IMMEDIATE |
+              PIPE_CONTROL_WRITE_DEPTH_COUNT |
+              PIPE_CONTROL_WRITE_TIMESTAMP)) {
+      *flags = *flags | PIPE_CONTROL_DEPTH_STALL;
+   }
+}
+
+static unsigned
+flags_to_post_sync_op(uint32_t flags)
+{
+   flags &= PIPE_CONTROL_WRITE_IMMEDIATE |
+            PIPE_CONTROL_WRITE_DEPTH_COUNT |
+            PIPE_CONTROL_WRITE_TIMESTAMP;
+
+   assert(util_bitcount(flags) <= 1);
+
+   if (flags & PIPE_CONTROL_WRITE_IMMEDIATE)
+      return WriteImmediateData;
+
+   if (flags & PIPE_CONTROL_WRITE_DEPTH_COUNT)
+      return WritePSDepthCount;
+
+   if (flags & PIPE_CONTROL_WRITE_TIMESTAMP)
+      return WriteTimestamp;
+
+   return 0;
+}
+
+void
+genX(emit_raw_pipe_control)(struct brw_context *brw, uint32_t flags,
+                            struct brw_bo *bo, uint32_t offset, uint64_t imm)
+{
+   if (GEN_GEN >= 8) {
+      if (GEN_GEN == 8)
+         gen8_add_cs_stall_workaround_bits(&flags);
+
+      if (flags & PIPE_CONTROL_VF_CACHE_INVALIDATE) {
+         if (GEN_GEN == 9) {
+            /* The PIPE_CONTROL "VF Cache Invalidation Enable" bit description
+             * lists several workarounds:
+             *
+             *    "Project: SKL, KBL, BXT
+             *
+             *     If the VF Cache Invalidation Enable is set to a 1 in a
+             *     PIPE_CONTROL, a separate Null PIPE_CONTROL, all bitfields
+             *     sets to 0, with the VF Cache Invalidation Enable set to 0
+             *     needs to be sent prior to the PIPE_CONTROL with VF Cache
+             *     Invalidation Enable set to a 1."
+             */
+            brw_emit_pipe_control_flush(brw, 0);
+         }
+
+         if (GEN_GEN >= 9) {
+            /* THE PIPE_CONTROL "VF Cache Invalidation Enable" docs continue:
+             *
+             *    "Project: BDW+
+             *
+             *     When VF Cache Invalidate is set “Post Sync Operation” must
+             *     be enabled to “Write Immediate Data” or “Write PS Depth
+             *     Count” or “Write Timestamp”."
+             *
+             * If there's a BO, we're already doing some kind of write.
+             * If not, add a write to the workaround BO.
+             *
+             * XXX: This causes GPU hangs on Broadwell, so restrict it to
+             *      Gen9+ for now...see this bug for more information:
+             *      https://bugs.freedesktop.org/show_bug.cgi?id=103787
+             */
+            if (!bo) {
+               flags |= PIPE_CONTROL_WRITE_IMMEDIATE;
+               bo = brw->workaround_bo;
+            }
+         }
+      }
+
+      if (GEN_GEN == 10)
+         gen10_add_rcpfe_workaround_bits(&flags);
+   } else if (GEN_GEN >= 6) {
+      if (GEN_GEN == 6 &&
+          (flags & PIPE_CONTROL_RENDER_TARGET_FLUSH)) {
+         /* Hardware workaround: SNB B-Spec says:
+          *
+          *   [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush
+          *   Enable = 1, a PIPE_CONTROL with any non-zero post-sync-op is
+          *   required.
+          */
+         brw_emit_post_sync_nonzero_flush(brw);
+      }
+
+      flags |= gen7_cs_stall_every_four_pipe_controls(brw, flags);
+   }
+
+   brw_batch_emit(brw, GENX(PIPE_CONTROL), pc) {
+   #if GEN_GEN >= 9
+      pc.FlushLLC = 0;
+   #endif
+   #if GEN_GEN >= 7
+      pc.LRIPostSyncOperation = NoLRIOperation;
+      pc.PipeControlFlushEnable = flags & PIPE_CONTROL_FLUSH_ENABLE;
+      pc.DCFlushEnable = flags & PIPE_CONTROL_DATA_CACHE_FLUSH;
+   #endif
+   #if GEN_GEN >= 6
+      pc.StoreDataIndex = 0;
+      pc.CommandStreamerStallEnable = flags & PIPE_CONTROL_CS_STALL;
+      pc.GlobalSnapshotCountReset =
+         flags & PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET;
+      pc.TLBInvalidate = flags & PIPE_CONTROL_TLB_INVALIDATE;
+      pc.GenericMediaStateClear = flags & PIPE_CONTROL_MEDIA_STATE_CLEAR;
+      pc.StallAtPixelScoreboard = flags & PIPE_CONTROL_STALL_AT_SCOREBOARD;
+      pc.RenderTargetCacheFlushEnable =
+         flags & PIPE_CONTROL_RENDER_TARGET_FLUSH;
+      pc.DepthCacheFlushEnable = flags & PIPE_CONTROL_DEPTH_CACHE_FLUSH;
+      pc.StateCacheInvalidationEnable =
+         flags & PIPE_CONTROL_STATE_CACHE_INVALIDATE;
+      pc.VFCacheInvalidationEnable = flags & PIPE_CONTROL_VF_CACHE_INVALIDATE;
+      pc.ConstantCacheInvalidationEnable =
+         flags & PIPE_CONTROL_CONST_CACHE_INVALIDATE;
+   #else
+      pc.WriteCacheFlush = flags & PIPE_CONTROL_RENDER_TARGET_FLUSH;
+   #endif
+      pc.PostSyncOperation = flags_to_post_sync_op(flags);
+      pc.DepthStallEnable = flags & PIPE_CONTROL_DEPTH_STALL;
+      pc.InstructionCacheInvalidateEnable =
+         flags & PIPE_CONTROL_INSTRUCTION_INVALIDATE;
+      pc.NotifyEnable = flags & PIPE_CONTROL_NOTIFY_ENABLE;
+   #if GEN_GEN >= 5 || GEN_IS_G4X
+      pc.IndirectStatePointersDisable =
+         flags & PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE;
+   #endif
+   #if GEN_GEN >= 6
+      pc.TextureCacheInvalidationEnable =
+         flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
+   #elif GEN_GEN == 5 || GEN_IS_G4X
+      pc.TextureCacheFlushEnable =
+         flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
+   #endif
+      pc.Address = ggtt_bo(bo, offset);
+      if (GEN_GEN < 7 && bo)
+         pc.DestinationAddressType = DAT_GGTT;
+      pc.ImmediateData = imm;
+   }
+}
index abb7b241f78a4946257e62e33ca43ca86e374379..95f8cd21cbab6515d1a85d1b8ca9614fd11e36b6 100644 (file)
@@ -137,8 +137,8 @@ i965_gen_libs = []
 foreach v : ['40', '45', '50', '60', '70', '75', '80', '90', '100', '110']
   i965_gen_libs += static_library(
     'i965_gen@0@'.format(v),
-    ['genX_blorp_exec.c', 'genX_boilerplate.h', 'genX_state_upload.c',
-     gen_xml_pack],
+    ['genX_blorp_exec.c', 'genX_boilerplate.h', 'genX_pipe_control.c',
+     'genX_state_upload.c', gen_xml_pack],
     include_directories : [inc_common, inc_intel, inc_dri_common],
     c_args : [
       c_vis_args, no_override_init_args, c_sse2_args,