i965: Fix remaining flush vs invalidate race conditions in brw_emit_pipe_control_flush.
authorFrancisco Jerez <currojerez@riseup.net>
Fri, 1 Jul 2016 00:49:18 +0000 (17:49 -0700)
committerFrancisco Jerez <currojerez@riseup.net>
Thu, 7 Jul 2016 21:16:39 +0000 (14:16 -0700)
This hardware race condition has caused problems several times already
(see "i965: Fix cache pollution race during L3 partitioning set-up.",
"i965: Fix brw_render_cache_set_check_flush's PIPE_CONTROLs." and
"i965: intel_texture_barrier reimplemented").  The problem is that
whenever we attempt to both flush and invalidate multiple caches with
a single pipe control command the flush and invalidation happen in
reverse order, so the contents flushed from the R/W caches aren't
guaranteed to become visible from the invalidated caches after the
PIPE_CONTROL command completes execution if some concurrent rendering
workload happened to pollute any of the invalidated R/O caches in the
short window of time between the invalidation and flush.

This makes sure that brw_emit_pipe_control_flush() has the effect
expected by most callers of making the contents flushed from any R/W
caches visible from the invalidated R/O caches.

Cc: "12.0 11.1 11.2" <mesa-stable@lists.freedesktop.org>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
src/mesa/drivers/dri/i965/brw_pipe_control.c
src/mesa/drivers/dri/i965/intel_reg.h

index 14a8f7c7cdbee3b1af9237685d33a896ac7c4060..05e8c05f3ecdf20ff4bceab3f3a2f58b7ae073df 100644 (file)
@@ -96,6 +96,24 @@ gen7_cs_stall_every_four_pipe_controls(struct brw_context *brw, uint32_t flags)
 void
 brw_emit_pipe_control_flush(struct brw_context *brw, uint32_t flags)
 {
+   if (brw->gen >= 6 &&
+       (flags & PIPE_CONTROL_CACHE_FLUSH_BITS) &&
+       (flags & PIPE_CONTROL_CACHE_INVALIDATE_BITS)) {
+      /* A pipe control command with flush and invalidate bits set
+       * simultaneously is an inherently racy operation on Gen6+ if the
+       * contents of the flushed caches were intended to become visible from
+       * any of the invalidated caches.  Split it in two PIPE_CONTROLs, the
+       * first one should stall the pipeline to make sure that the flushed R/W
+       * caches are coherent with memory once the specified R/O caches are
+       * invalidated.  On pre-Gen6 hardware the (implicit) R/O cache
+       * invalidation seems to happen at the bottom of the pipeline together
+       * with any write cache flush, so this shouldn't be a concern.
+       */
+      brw_emit_pipe_control_flush(brw, (flags & PIPE_CONTROL_CACHE_FLUSH_BITS) |
+                                       PIPE_CONTROL_CS_STALL);
+      flags &= ~(PIPE_CONTROL_CACHE_FLUSH_BITS | PIPE_CONTROL_CS_STALL);
+   }
+
    if (brw->gen >= 8) {
       if (brw->gen == 8)
          gen8_add_cs_stall_workaround_bits(&flags);
index 95365fe4f4fd77e4ae0c964dd59d7a6e1bb3030a..7a82be469fb630021546c167717a4c410e6baf51 100644 (file)
 #define PIPE_CONTROL_PPGTT_WRITE       (0 << 2)
 #define PIPE_CONTROL_GLOBAL_GTT_WRITE  (1 << 2)
 
+#define PIPE_CONTROL_CACHE_FLUSH_BITS \
+   (PIPE_CONTROL_DEPTH_CACHE_FLUSH | PIPE_CONTROL_DATA_CACHE_FLUSH | \
+    PIPE_CONTROL_RENDER_TARGET_FLUSH)
+
+#define PIPE_CONTROL_CACHE_INVALIDATE_BITS \
+   (PIPE_CONTROL_STATE_CACHE_INVALIDATE | PIPE_CONTROL_CONST_CACHE_INVALIDATE | \
+    PIPE_CONTROL_VF_CACHE_INVALIDATE | PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | \
+    PIPE_CONTROL_INSTRUCTION_INVALIDATE)
+
 /** @} */
 
 #define XY_SETUP_BLT_CMD               (CMD_2D | (0x01 << 22))