i965: Emit depth stalls and flushes before changing depth state on Gen6+.
authorKenneth Graunke <kenneth@whitecape.org>
Thu, 1 Sep 2011 11:18:20 +0000 (04:18 -0700)
committerKenneth Graunke <kenneth@whitecape.org>
Mon, 26 Sep 2011 18:54:03 +0000 (11:54 -0700)
Fixes OpenArena on Gen7.  Technically, adding only the first depth stall
fixes it, but the documentation says to do all three, and the Windows
driver seems to do it.

Not observed to fix anything on Gen6 yet.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=38863
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
src/mesa/drivers/dri/i965/brw_misc_state.c
src/mesa/drivers/dri/i965/gen7_misc_state.c
src/mesa/drivers/dri/intel/intel_batchbuffer.c

index ce4fc84420976e5a0f8e3c14fdbb9f9546c35fe0..7b83ff5253c3045e74d54e99b39051e12495199d 100644 (file)
@@ -226,8 +226,10 @@ static void emit_depthbuffer(struct brw_context *brw)
    /* 3DSTATE_DEPTH_BUFFER, 3DSTATE_STENCIL_BUFFER are both
     * non-pipelined state that will need the PIPE_CONTROL workaround.
     */
-   if (intel->gen == 6)
+   if (intel->gen == 6) {
       intel_emit_post_sync_nonzero_flush(intel);
+      intel_emit_depth_stall_flushes(intel);
+   }
 
    /*
     * If either depth or stencil buffer has packed depth/stencil format,
index 7544f961da9332d8df56a938f5ba2c7de4525eeb..9eb75e2a76ddeed4abad78794177875add86ae72 100644 (file)
@@ -80,6 +80,8 @@ static void emit_depthbuffer(struct brw_context *brw)
    struct intel_renderbuffer *drb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
    struct intel_renderbuffer *srb = intel_get_renderbuffer(fb, BUFFER_STENCIL);
 
+   intel_emit_depth_stall_flushes(intel);
+
    /* Gen7 doesn't support packed depth/stencil */
    assert(srb == NULL || srb != drb);
 
index bb14cc27123ef25eb75c8865742b8359b9e8e3df..37c13c967ab6f082d033f5c16eb2885421668b4e 100644 (file)
@@ -296,6 +296,45 @@ emit:
    item->header = intel->batch.emit;
 }
 
+/**
+ * Restriction [DevSNB, DevIVB]:
+ *
+ * Prior to changing Depth/Stencil Buffer state (i.e. any combination of
+ * 3DSTATE_DEPTH_BUFFER, 3DSTATE_CLEAR_PARAMS, 3DSTATE_STENCIL_BUFFER,
+ * 3DSTATE_HIER_DEPTH_BUFFER) SW must first issue a pipelined depth stall
+ * (PIPE_CONTROL with Depth Stall bit set), followed by a pipelined depth
+ * cache flush (PIPE_CONTROL with Depth Flush Bit set), followed by
+ * another pipelined depth stall (PIPE_CONTROL with Depth Stall bit set),
+ * unless SW can otherwise guarantee that the pipeline from WM onwards is
+ * already flushed (e.g., via a preceding MI_FLUSH).
+ */
+void
+intel_emit_depth_stall_flushes(struct intel_context *intel)
+{
+   assert(intel->gen >= 6 && intel->gen <= 7);
+
+   BEGIN_BATCH(4);
+   OUT_BATCH(_3DSTATE_PIPE_CONTROL);
+   OUT_BATCH(PIPE_CONTROL_DEPTH_STALL);
+   OUT_BATCH(0); /* address */
+   OUT_BATCH(0); /* write data */
+   ADVANCE_BATCH()
+
+   BEGIN_BATCH(4);
+   OUT_BATCH(_3DSTATE_PIPE_CONTROL);
+   OUT_BATCH(PIPE_CONTROL_DEPTH_CACHE_FLUSH);
+   OUT_BATCH(0); /* address */
+   OUT_BATCH(0); /* write data */
+   ADVANCE_BATCH();
+
+   BEGIN_BATCH(4);
+   OUT_BATCH(_3DSTATE_PIPE_CONTROL);
+   OUT_BATCH(PIPE_CONTROL_DEPTH_STALL);
+   OUT_BATCH(0); /* address */
+   OUT_BATCH(0); /* write data */
+   ADVANCE_BATCH();
+}
+
 /**
  * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
  * implementing two workarounds on gen6.  From section 1.4.7.1