From: Eric Anholt Date: Sat, 18 Jun 2011 01:44:26 +0000 (-0700) Subject: i965/gen6: Apply documented workaround for nonpipelined state packets. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=f6e5230b2614cc91e4c849c07781b2230878d274;p=mesa.git i965/gen6: Apply documented workaround for nonpipelined state packets. Fixes a 100% reproducible GPU hang in topogun-1.06-orc-84k.trace. Reviewed-by: Kenneth Graunke --- diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index b0f95dd66b5..c23569679e4 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -219,6 +219,12 @@ static void emit_depthbuffer(struct brw_context *brw) struct intel_region *hiz_region = depth_irb ? depth_irb->hiz_region : NULL; unsigned int len; + /* 3DSTATE_DEPTH_BUFFER, 3DSTATE_STENCIL_BUFFER are both + * non-pipelined state that will need the PIPE_CONTROL workaround. + */ + if (intel->gen == 6) + intel_emit_post_sync_nonzero_flush(intel); + /* * If either depth or stencil buffer has packed depth/stencil format, * then don't use separate stencil. Emit only a depth buffer. @@ -408,6 +414,9 @@ static void emit_depthbuffer(struct brw_context *brw) * when HiZ is enabled and the DEPTH_BUFFER_STATE changes. */ if (intel->gen >= 6 || hiz_region) { + if (intel->gen == 6) + intel_emit_post_sync_nonzero_flush(intel); + BEGIN_BATCH(2); OUT_BATCH(_3DSTATE_CLEAR_PARAMS << 16 | (2 - 2)); OUT_BATCH(0); @@ -523,6 +532,9 @@ static void upload_aa_line_parameters(struct brw_context *brw) if (!ctx->Line.SmoothFlag || !brw->has_aa_line_parameters) return; + if (intel->gen == 6) + intel_emit_post_sync_nonzero_flush(intel); + OUT_BATCH(_3DSTATE_AA_LINE_PARAMETERS << 16 | (3 - 2)); /* use legacy aa line coverage computation */ OUT_BATCH(0); @@ -553,6 +565,9 @@ static void upload_line_stipple(struct brw_context *brw) if (!ctx->Line.StippleFlag) return; + if (intel->gen == 6) + intel_emit_post_sync_nonzero_flush(intel); + BEGIN_BATCH(3); OUT_BATCH(_3DSTATE_LINE_STIPPLE_PATTERN << 16 | (3 - 2)); OUT_BATCH(ctx->Line.StipplePattern); @@ -580,6 +595,10 @@ static void upload_invarient_state( struct brw_context *brw ) { struct intel_context *intel = &brw->intel; + /* 3DSTATE_SIP, 3DSTATE_MULTISAMPLE, etc. are nonpipelined. */ + if (intel->gen == 6) + intel_emit_post_sync_nonzero_flush(intel); + { /* 0x61040000 Pipeline Select */ /* PipelineSelect : 0 */ @@ -643,6 +662,7 @@ static void upload_invarient_state( struct brw_context *brw ) sip.header.length = 0; sip.bits0.pad = 0; sip.bits0.system_instruction_pointer = 0; + BRW_BATCH_STRUCT(brw, &sip); } @@ -683,6 +703,9 @@ static void upload_state_base_address( struct brw_context *brw ) struct intel_context *intel = &brw->intel; if (intel->gen >= 6) { + if (intel->gen == 6) + intel_emit_post_sync_nonzero_flush(intel); + BEGIN_BATCH(10); OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (10 - 2)); /* General state base address: stateless DP read/write requests */ diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c index 77563aefdc5..735382902d1 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c @@ -293,7 +293,27 @@ emit: item->header = intel->batch.emit; } -static void +/** + * Emits a PIPE_CONTROL with a non-zero post-sync operation, for + * implementing two workarounds on gen6. From section 1.4.7.1 + * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1: + * + * [DevSNB-C+{W/A}] Before any depth stall flush (including those + * produced by non-pipelined state commands), software needs to first + * send a PIPE_CONTROL with no bits set except Post-Sync Operation != + * 0. + * + * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable + * =1, a PIPE_CONTROL with any non-zero post-sync-op is required. + * + * XXX: There is also a workaround that would appear to apply to this + * workaround, but it doesn't appear to be necessary so far: + * + * Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent + * BEFORE the pipe-control with a post-sync op and no write-cache + * flushes. + */ +void intel_emit_post_sync_nonzero_flush(struct intel_context *intel) { if (!intel->batch.need_workaround_flush) diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.h b/src/mesa/drivers/dri/intel/intel_batchbuffer.h index 3ed88d086d3..fb4134d889e 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.h +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.h @@ -39,6 +39,7 @@ GLboolean intel_batchbuffer_emit_reloc_fenced(struct intel_context *intel, uint32_t write_domain, uint32_t offset); void intel_batchbuffer_emit_mi_flush(struct intel_context *intel); +void intel_emit_post_sync_nonzero_flush(struct intel_context *intel); static INLINE uint32_t float_as_int(float f) {