From f5dd608db2d6a67cfe27efed948408414a057fe3 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Mon, 13 Jan 2014 16:00:18 -0800 Subject: [PATCH] i965: Create a helper function for emitting PIPE_CONTROL writes. There are a lot of places that use PIPE_CONTROL to write a value to a buffer (either an immediate write, TIMESTAMP, or PS_DEPTH_COUNT). Creating a single function to do this seems convenient. As part of this refactor, we now set the PPGTT/GTT selection bit correctly on Gen7+. Previously, we set bit 2 of DW2 on all platforms. This is correct for Sandybridge, but actually part of the address on Ivybridge and later! Broadwell will also increase the length of these packets by 1; with the refactoring, we should have to adjust that in substantially fewer places, giving us confidence that we've hit them all. Signed-off-by: Kenneth Graunke Reviewed-by: Eric Anholt Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_queryobj.c | 56 +++--------- src/mesa/drivers/dri/i965/gen6_queryobj.c | 15 +--- src/mesa/drivers/dri/i965/intel_batchbuffer.c | 88 +++++++++++-------- src/mesa/drivers/dri/i965/intel_batchbuffer.h | 3 + 4 files changed, 69 insertions(+), 93 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_queryobj.c b/src/mesa/drivers/dri/i965/brw_queryobj.c index 9f839379bff..dc26c0864e1 100644 --- a/src/mesa/drivers/dri/i965/brw_queryobj.c +++ b/src/mesa/drivers/dri/i965/brw_queryobj.c @@ -49,36 +49,15 @@ void brw_write_timestamp(struct brw_context *brw, drm_intel_bo *query_bo, int idx) { - if (brw->gen >= 6) { - /* Emit workaround flushes: */ - if (brw->gen == 6) { - brw_emit_pipe_control_flush(brw, - PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_STALL_AT_SCOREBOARD); - } - - BEGIN_BATCH(5); - OUT_BATCH(_3DSTATE_PIPE_CONTROL | (5 - 2)); - OUT_BATCH(PIPE_CONTROL_WRITE_TIMESTAMP); - OUT_RELOC(query_bo, - I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, - PIPE_CONTROL_GLOBAL_GTT_WRITE | - idx * sizeof(uint64_t)); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - } else { - BEGIN_BATCH(4); - OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2) | - PIPE_CONTROL_WRITE_TIMESTAMP); - OUT_RELOC(query_bo, - I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, - PIPE_CONTROL_GLOBAL_GTT_WRITE | - idx * sizeof(uint64_t)); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); + if (brw->gen == 6) { + /* Emit Sandybridge workaround flush: */ + brw_emit_pipe_control_flush(brw, + PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_STALL_AT_SCOREBOARD); } + + brw_emit_pipe_control_write(brw, PIPE_CONTROL_WRITE_TIMESTAMP, + query_bo, idx * sizeof(uint64_t), 0, 0); } /** @@ -89,21 +68,10 @@ write_depth_count(struct brw_context *brw, drm_intel_bo *query_bo, int idx) { assert(brw->gen < 6); - BEGIN_BATCH(4); - OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2) | - PIPE_CONTROL_DEPTH_STALL | PIPE_CONTROL_WRITE_DEPTH_COUNT); - /* This object could be mapped cacheable, but we don't have an exposed - * mechanism to support that. Since it's going uncached, tell GEM that - * we're writing to it. The usual clflush should be all that's required - * to pick up the results. - */ - OUT_RELOC(query_bo, - I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, - PIPE_CONTROL_GLOBAL_GTT_WRITE | - (idx * sizeof(uint64_t))); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); + brw_emit_pipe_control_write(brw, + PIPE_CONTROL_WRITE_DEPTH_COUNT + | PIPE_CONTROL_DEPTH_STALL, + query_bo, idx * sizeof(uint64_t), 0, 0); } /** diff --git a/src/mesa/drivers/dri/i965/gen6_queryobj.c b/src/mesa/drivers/dri/i965/gen6_queryobj.c index 16d0c101432..e76562393de 100644 --- a/src/mesa/drivers/dri/i965/gen6_queryobj.c +++ b/src/mesa/drivers/dri/i965/gen6_queryobj.c @@ -49,17 +49,10 @@ write_depth_count(struct brw_context *brw, drm_intel_bo *query_bo, int idx) if (brw->gen == 6) intel_emit_post_sync_nonzero_flush(brw); - BEGIN_BATCH(5); - OUT_BATCH(_3DSTATE_PIPE_CONTROL | (5 - 2)); - OUT_BATCH(PIPE_CONTROL_DEPTH_STALL | - PIPE_CONTROL_WRITE_DEPTH_COUNT); - OUT_RELOC(query_bo, - I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, - PIPE_CONTROL_GLOBAL_GTT_WRITE | - (idx * sizeof(uint64_t))); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); + brw_emit_pipe_control_write(brw, + PIPE_CONTROL_WRITE_DEPTH_COUNT + | PIPE_CONTROL_DEPTH_STALL, + query_bo, idx * sizeof(uint64_t), 0, 0); } /* diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c index f54ca9b30e4..d1587cba10c 100644 --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c @@ -458,6 +458,44 @@ brw_emit_pipe_control_flush(struct brw_context *brw, uint32_t flags) } } +/** + * Emit a PIPE_CONTROL that writes to a buffer object. + * + * \p flags should contain one of the following items: + * - PIPE_CONTROL_WRITE_IMMEDIATE + * - PIPE_CONTROL_WRITE_TIMESTAMP + * - PIPE_CONTROL_WRITE_DEPTH_COUNT + */ +void +brw_emit_pipe_control_write(struct brw_context *brw, uint32_t flags, + drm_intel_bo *bo, uint32_t offset, + uint32_t imm_lower, uint32_t imm_upper) +{ + if (brw->gen >= 6) { + /* PPGTT/GGTT is selected by DW2 bit 2 on Sandybridge, but DW1 bit 24 + * on later platforms. We always use PPGTT on Gen7+. + */ + unsigned gen6_gtt = brw->gen == 6 ? PIPE_CONTROL_GLOBAL_GTT_WRITE : 0; + + BEGIN_BATCH(5); + OUT_BATCH(_3DSTATE_PIPE_CONTROL | (5 - 2)); + OUT_BATCH(flags); + OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + gen6_gtt | offset); + OUT_BATCH(imm_lower); + OUT_BATCH(imm_upper); + ADVANCE_BATCH(); + } else { + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_PIPE_CONTROL | flags | (4 - 2)); + OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + PIPE_CONTROL_GLOBAL_GTT_WRITE | offset); + OUT_BATCH(imm_lower); + OUT_BATCH(imm_upper); + ADVANCE_BATCH(); + } +} + /** * Restriction [DevSNB, DevIVB]: * @@ -492,15 +530,11 @@ void gen7_emit_vs_workaround_flush(struct brw_context *brw) { assert(brw->gen == 7); - - BEGIN_BATCH(5); - OUT_BATCH(_3DSTATE_PIPE_CONTROL | (5 - 2)); - OUT_BATCH(PIPE_CONTROL_DEPTH_STALL | PIPE_CONTROL_WRITE_IMMEDIATE); - OUT_RELOC(brw->batch.workaround_bo, - I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); - OUT_BATCH(0); /* write data */ - OUT_BATCH(0); /* write data */ - ADVANCE_BATCH(); + brw_emit_pipe_control_write(brw, + PIPE_CONTROL_WRITE_IMMEDIATE + | PIPE_CONTROL_DEPTH_STALL, + brw->batch.workaround_bo, 0, + 0, 0); } @@ -510,27 +544,11 @@ gen7_emit_vs_workaround_flush(struct brw_context *brw) void gen7_emit_cs_stall_flush(struct brw_context *brw) { - BEGIN_BATCH(5); - OUT_BATCH(_3DSTATE_PIPE_CONTROL | (5 - 2)); - /* From p61 of the Ivy Bridge PRM (1.10.4 PIPE_CONTROL Command: DW1[20] - * CS Stall): - * - * One of the following must also be set: - * - Render Target Cache Flush Enable ([12] of DW1) - * - Depth Cache Flush Enable ([0] of DW1) - * - Stall at Pixel Scoreboard ([1] of DW1) - * - Depth Stall ([13] of DW1) - * - Post-Sync Operation ([13] of DW1) - * - * We choose to do a Post-Sync Operation (Write Immediate Data), since - * it seems like it will incur the least additional performance penalty. - */ - OUT_BATCH(PIPE_CONTROL_CS_STALL | PIPE_CONTROL_WRITE_IMMEDIATE); - OUT_RELOC(brw->batch.workaround_bo, - I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); + brw_emit_pipe_control_write(brw, + PIPE_CONTROL_CS_STALL + | PIPE_CONTROL_WRITE_IMMEDIATE, + brw->batch.workaround_bo, 0, + 0, 0); } @@ -581,14 +599,8 @@ intel_emit_post_sync_nonzero_flush(struct brw_context *brw) PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD); - BEGIN_BATCH(5); - OUT_BATCH(_3DSTATE_PIPE_CONTROL | (5 - 2)); - OUT_BATCH(PIPE_CONTROL_WRITE_IMMEDIATE); - OUT_RELOC(brw->batch.workaround_bo, - I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); - OUT_BATCH(0); /* write data */ - OUT_BATCH(0); /* write data */ - ADVANCE_BATCH(); + brw_emit_pipe_control_write(brw, PIPE_CONTROL_WRITE_IMMEDIATE, + brw->batch.workaround_bo, 0, 0, 0); brw->batch.need_workaround_flush = false; } diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.h b/src/mesa/drivers/dri/i965/intel_batchbuffer.h index 5ed2089049b..779a7ccd05c 100644 --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.h +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.h @@ -65,6 +65,9 @@ bool intel_batchbuffer_emit_reloc64(struct brw_context *brw, uint32_t write_domain, uint32_t offset); void brw_emit_pipe_control_flush(struct brw_context *brw, uint32_t flags); +void brw_emit_pipe_control_write(struct brw_context *brw, uint32_t flags, + drm_intel_bo *bo, uint32_t offset, + uint32_t imm_lower, uint32_t imm_upper); void intel_batchbuffer_emit_mi_flush(struct brw_context *brw); void intel_emit_post_sync_nonzero_flush(struct brw_context *brw); void intel_emit_depth_stall_flushes(struct brw_context *brw); -- 2.30.2