i965: Create a helper function for emitting PIPE_CONTROL writes.
authorKenneth Graunke <kenneth@whitecape.org>
Tue, 14 Jan 2014 00:00:18 +0000 (16:00 -0800)
committerKenneth Graunke <kenneth@whitecape.org>
Mon, 20 Jan 2014 23:38:23 +0000 (15:38 -0800)
There are a lot of places that use PIPE_CONTROL to write a value to a
buffer (either an immediate write, TIMESTAMP, or PS_DEPTH_COUNT).
Creating a single function to do this seems convenient.

As part of this refactor, we now set the PPGTT/GTT selection bit
correctly on Gen7+.  Previously, we set bit 2 of DW2 on all platforms.
This is correct for Sandybridge, but actually part of the address on
Ivybridge and later!

Broadwell will also increase the length of these packets by 1; with the
refactoring, we should have to adjust that in substantially fewer
places, giving us confidence that we've hit them all.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
Reviewed-by: Matt Turner <mattst88@gmail.com>
src/mesa/drivers/dri/i965/brw_queryobj.c
src/mesa/drivers/dri/i965/gen6_queryobj.c
src/mesa/drivers/dri/i965/intel_batchbuffer.c
src/mesa/drivers/dri/i965/intel_batchbuffer.h

index 9f839379bff0703802bce0d19c5dc63db4be7980..dc26c0864e17a168a1269cbe9ada63d20c58a7dc 100644 (file)
 void
 brw_write_timestamp(struct brw_context *brw, drm_intel_bo *query_bo, int idx)
 {
-   if (brw->gen >= 6) {
-      /* Emit workaround flushes: */
-      if (brw->gen == 6) {
-         brw_emit_pipe_control_flush(brw,
-                                     PIPE_CONTROL_CS_STALL |
-                                     PIPE_CONTROL_STALL_AT_SCOREBOARD);
-      }
-
-      BEGIN_BATCH(5);
-      OUT_BATCH(_3DSTATE_PIPE_CONTROL | (5 - 2));
-      OUT_BATCH(PIPE_CONTROL_WRITE_TIMESTAMP);
-      OUT_RELOC(query_bo,
-                I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
-                PIPE_CONTROL_GLOBAL_GTT_WRITE |
-                idx * sizeof(uint64_t));
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      ADVANCE_BATCH();
-   } else {
-      BEGIN_BATCH(4);
-      OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2) |
-                PIPE_CONTROL_WRITE_TIMESTAMP);
-      OUT_RELOC(query_bo,
-                I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
-                PIPE_CONTROL_GLOBAL_GTT_WRITE |
-                idx * sizeof(uint64_t));
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      ADVANCE_BATCH();
+   if (brw->gen == 6) {
+      /* Emit Sandybridge workaround flush: */
+      brw_emit_pipe_control_flush(brw,
+                                  PIPE_CONTROL_CS_STALL |
+                                  PIPE_CONTROL_STALL_AT_SCOREBOARD);
    }
+
+   brw_emit_pipe_control_write(brw, PIPE_CONTROL_WRITE_TIMESTAMP,
+                               query_bo, idx * sizeof(uint64_t), 0, 0);
 }
 
 /**
@@ -89,21 +68,10 @@ write_depth_count(struct brw_context *brw, drm_intel_bo *query_bo, int idx)
 {
    assert(brw->gen < 6);
 
-   BEGIN_BATCH(4);
-   OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2) |
-             PIPE_CONTROL_DEPTH_STALL | PIPE_CONTROL_WRITE_DEPTH_COUNT);
-   /* This object could be mapped cacheable, but we don't have an exposed
-    * mechanism to support that.  Since it's going uncached, tell GEM that
-    * we're writing to it.  The usual clflush should be all that's required
-    * to pick up the results.
-    */
-   OUT_RELOC(query_bo,
-             I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
-             PIPE_CONTROL_GLOBAL_GTT_WRITE |
-             (idx * sizeof(uint64_t)));
-   OUT_BATCH(0);
-   OUT_BATCH(0);
-   ADVANCE_BATCH();
+   brw_emit_pipe_control_write(brw,
+                               PIPE_CONTROL_WRITE_DEPTH_COUNT
+                               | PIPE_CONTROL_DEPTH_STALL,
+                               query_bo, idx * sizeof(uint64_t), 0, 0);
 }
 
 /**
index 16d0c101432d1bc760801d3e6f13503b415be39b..e76562393de4ae02639c72b4d48f52de2a604312 100644 (file)
@@ -49,17 +49,10 @@ write_depth_count(struct brw_context *brw, drm_intel_bo *query_bo, int idx)
    if (brw->gen == 6)
       intel_emit_post_sync_nonzero_flush(brw);
 
-   BEGIN_BATCH(5);
-   OUT_BATCH(_3DSTATE_PIPE_CONTROL | (5 - 2));
-   OUT_BATCH(PIPE_CONTROL_DEPTH_STALL |
-             PIPE_CONTROL_WRITE_DEPTH_COUNT);
-   OUT_RELOC(query_bo,
-             I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
-             PIPE_CONTROL_GLOBAL_GTT_WRITE |
-             (idx * sizeof(uint64_t)));
-   OUT_BATCH(0);
-   OUT_BATCH(0);
-   ADVANCE_BATCH();
+   brw_emit_pipe_control_write(brw,
+                               PIPE_CONTROL_WRITE_DEPTH_COUNT
+                               | PIPE_CONTROL_DEPTH_STALL,
+                               query_bo, idx * sizeof(uint64_t), 0, 0);
 }
 
 /*
index f54ca9b30e4f778ad0b34d0919ac398b7bae5f8e..d1587cba10c458d369e788c13777a6d2e9e6b07b 100644 (file)
@@ -458,6 +458,44 @@ brw_emit_pipe_control_flush(struct brw_context *brw, uint32_t flags)
    }
 }
 
+/**
+ * Emit a PIPE_CONTROL that writes to a buffer object.
+ *
+ * \p flags should contain one of the following items:
+ *  - PIPE_CONTROL_WRITE_IMMEDIATE
+ *  - PIPE_CONTROL_WRITE_TIMESTAMP
+ *  - PIPE_CONTROL_WRITE_DEPTH_COUNT
+ */
+void
+brw_emit_pipe_control_write(struct brw_context *brw, uint32_t flags,
+                            drm_intel_bo *bo, uint32_t offset,
+                            uint32_t imm_lower, uint32_t imm_upper)
+{
+   if (brw->gen >= 6) {
+      /* PPGTT/GGTT is selected by DW2 bit 2 on Sandybridge, but DW1 bit 24
+       * on later platforms.  We always use PPGTT on Gen7+.
+       */
+      unsigned gen6_gtt = brw->gen == 6 ? PIPE_CONTROL_GLOBAL_GTT_WRITE : 0;
+
+      BEGIN_BATCH(5);
+      OUT_BATCH(_3DSTATE_PIPE_CONTROL | (5 - 2));
+      OUT_BATCH(flags);
+      OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                gen6_gtt | offset);
+      OUT_BATCH(imm_lower);
+      OUT_BATCH(imm_upper);
+      ADVANCE_BATCH();
+   } else {
+      BEGIN_BATCH(4);
+      OUT_BATCH(_3DSTATE_PIPE_CONTROL | flags | (4 - 2));
+      OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                PIPE_CONTROL_GLOBAL_GTT_WRITE | offset);
+      OUT_BATCH(imm_lower);
+      OUT_BATCH(imm_upper);
+      ADVANCE_BATCH();
+   }
+}
+
 /**
  * Restriction [DevSNB, DevIVB]:
  *
@@ -492,15 +530,11 @@ void
 gen7_emit_vs_workaround_flush(struct brw_context *brw)
 {
    assert(brw->gen == 7);
-
-   BEGIN_BATCH(5);
-   OUT_BATCH(_3DSTATE_PIPE_CONTROL | (5 - 2));
-   OUT_BATCH(PIPE_CONTROL_DEPTH_STALL | PIPE_CONTROL_WRITE_IMMEDIATE);
-   OUT_RELOC(brw->batch.workaround_bo,
-            I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0);
-   OUT_BATCH(0); /* write data */
-   OUT_BATCH(0); /* write data */
-   ADVANCE_BATCH();
+   brw_emit_pipe_control_write(brw,
+                               PIPE_CONTROL_WRITE_IMMEDIATE
+                               | PIPE_CONTROL_DEPTH_STALL,
+                               brw->batch.workaround_bo, 0,
+                               0, 0);
 }
 
 
@@ -510,27 +544,11 @@ gen7_emit_vs_workaround_flush(struct brw_context *brw)
 void
 gen7_emit_cs_stall_flush(struct brw_context *brw)
 {
-   BEGIN_BATCH(5);
-   OUT_BATCH(_3DSTATE_PIPE_CONTROL | (5 - 2));
-   /* From p61 of the Ivy Bridge PRM (1.10.4 PIPE_CONTROL Command: DW1[20]
-    * CS Stall):
-    *
-    *     One of the following must also be set:
-    *     - Render Target Cache Flush Enable ([12] of DW1)
-    *     - Depth Cache Flush Enable ([0] of DW1)
-    *     - Stall at Pixel Scoreboard ([1] of DW1)
-    *     - Depth Stall ([13] of DW1)
-    *     - Post-Sync Operation ([13] of DW1)
-    *
-    * We choose to do a Post-Sync Operation (Write Immediate Data), since
-    * it seems like it will incur the least additional performance penalty.
-    */
-   OUT_BATCH(PIPE_CONTROL_CS_STALL | PIPE_CONTROL_WRITE_IMMEDIATE);
-   OUT_RELOC(brw->batch.workaround_bo,
-             I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0);
-   OUT_BATCH(0);
-   OUT_BATCH(0);
-   ADVANCE_BATCH();
+   brw_emit_pipe_control_write(brw,
+                               PIPE_CONTROL_CS_STALL
+                               | PIPE_CONTROL_WRITE_IMMEDIATE,
+                               brw->batch.workaround_bo, 0,
+                               0, 0);
 }
 
 
@@ -581,14 +599,8 @@ intel_emit_post_sync_nonzero_flush(struct brw_context *brw)
                                PIPE_CONTROL_CS_STALL |
                                PIPE_CONTROL_STALL_AT_SCOREBOARD);
 
-   BEGIN_BATCH(5);
-   OUT_BATCH(_3DSTATE_PIPE_CONTROL | (5 - 2));
-   OUT_BATCH(PIPE_CONTROL_WRITE_IMMEDIATE);
-   OUT_RELOC(brw->batch.workaround_bo,
-            I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0);
-   OUT_BATCH(0); /* write data */
-   OUT_BATCH(0); /* write data */
-   ADVANCE_BATCH();
+   brw_emit_pipe_control_write(brw, PIPE_CONTROL_WRITE_IMMEDIATE,
+                               brw->batch.workaround_bo, 0, 0, 0);
 
    brw->batch.need_workaround_flush = false;
 }
index 5ed2089049bc83a287b73ae0830332d17013b02f..779a7ccd05c023427945435aaa6204c4c9f59b26 100644 (file)
@@ -65,6 +65,9 @@ bool intel_batchbuffer_emit_reloc64(struct brw_context *brw,
                                     uint32_t write_domain,
                                     uint32_t offset);
 void brw_emit_pipe_control_flush(struct brw_context *brw, uint32_t flags);
+void brw_emit_pipe_control_write(struct brw_context *brw, uint32_t flags,
+                                 drm_intel_bo *bo, uint32_t offset,
+                                 uint32_t imm_lower, uint32_t imm_upper);
 void intel_batchbuffer_emit_mi_flush(struct brw_context *brw);
 void intel_emit_post_sync_nonzero_flush(struct brw_context *brw);
 void intel_emit_depth_stall_flushes(struct brw_context *brw);