From: Kenneth Graunke Date: Sat, 10 Jan 2015 07:07:56 +0000 (-0800) Subject: i965: Do Sandybridge workaround flushes before each primitive. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=0b499abb51c80867ad034f2a6d9fcb1e86d021cc;p=mesa.git i965: Do Sandybridge workaround flushes before each primitive. Sandybridge requires the post-sync non-zero workaround in a ton of places, and if you ever miss one, the GPU usually hangs. Currently, we try to track exactly when a workaround flush is necessary (via the brw->batch.need_workaround_flush flag). This is tricky to get right, and we've botched it several times in the past. This patch unconditionally performs the post-sync non-zero flush at the start of each primitive's state upload (including BLORP). We drop the needs_workaround_flush flag, and drop all the other callers, as the flush has already been performed. We have no data to indicate that simply flushing all the time will hurt performance, and it has the potential to help stability. v2: Add post-sync workaround to initial GPU state upload to be extra cautious (suggested by Chad Versace). Signed-off-by: Kenneth Graunke Reviewed-by: Chad Versace --- diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 298e0fbab7a..4e722087faf 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -861,7 +861,6 @@ struct intel_batchbuffer { drm_intel_bo *last_bo; /** BO for post-sync nonzero writes for gen6 workaround. */ drm_intel_bo *workaround_bo; - bool need_workaround_flush; uint16_t emit, total; uint16_t used, reserved_space; diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index b7e42cbf7d9..2d3de4501d9 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -276,9 +276,6 @@ static void brw_emit_prim(struct brw_context *brw, OUT_BATCH(base_vertex_location); ADVANCE_BATCH(); - /* Only used on Sandybridge; harmless to set elsewhere. */ - brw->batch.need_workaround_flush = true; - if (brw->always_flush_cache) { intel_batchbuffer_emit_mi_flush(brw); } diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index a405eb25f65..190cdf76135 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -47,10 +47,6 @@ static void upload_drawing_rect(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; - /* 3DSTATE_DRAWING_RECTANGLE is non-pipelined. */ - if (brw->gen == 6) - intel_emit_post_sync_nonzero_flush(brw); - BEGIN_BATCH(4); OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2)); OUT_BATCH(0); /* xmin, ymin */ @@ -581,7 +577,6 @@ brw_emit_depth_stencil_hiz(struct brw_context *brw, * non-pipelined state that will need the PIPE_CONTROL workaround. */ if (brw->gen == 6) { - intel_emit_post_sync_nonzero_flush(brw); intel_emit_depth_stall_flushes(brw); } @@ -685,9 +680,6 @@ brw_emit_depth_stencil_hiz(struct brw_context *brw, * when HiZ is enabled and the DEPTH_BUFFER_STATE changes. */ if (brw->gen >= 6 || hiz) { - if (brw->gen == 6) - intel_emit_post_sync_nonzero_flush(brw); - BEGIN_BATCH(2); OUT_BATCH(_3DSTATE_CLEAR_PARAMS << 16 | GEN5_DEPTH_CLEAR_VALID | @@ -720,9 +712,6 @@ static void upload_polygon_stipple(struct brw_context *brw) if (!ctx->Polygon.StippleFlag) return; - if (brw->gen == 6) - intel_emit_post_sync_nonzero_flush(brw); - BEGIN_BATCH(33); OUT_BATCH(_3DSTATE_POLY_STIPPLE_PATTERN << 16 | (33 - 2)); @@ -766,9 +755,6 @@ static void upload_polygon_stipple_offset(struct brw_context *brw) if (!ctx->Polygon.StippleFlag) return; - if (brw->gen == 6) - intel_emit_post_sync_nonzero_flush(brw); - BEGIN_BATCH(2); OUT_BATCH(_3DSTATE_POLY_STIPPLE_OFFSET << 16 | (2-2)); @@ -810,9 +796,6 @@ static void upload_aa_line_parameters(struct brw_context *brw) if (brw->gen == 4 && !brw->is_g4x) return; - if (brw->gen == 6) - intel_emit_post_sync_nonzero_flush(brw); - BEGIN_BATCH(3); OUT_BATCH(_3DSTATE_AA_LINE_PARAMETERS << 16 | (3 - 2)); /* use legacy aa line coverage computation */ @@ -842,9 +825,6 @@ static void upload_line_stipple(struct brw_context *brw) if (!ctx->Line.StippleFlag) return; - if (brw->gen == 6) - intel_emit_post_sync_nonzero_flush(brw); - BEGIN_BATCH(3); OUT_BATCH(_3DSTATE_LINE_STIPPLE_PATTERN << 16 | (3 - 2)); OUT_BATCH(ctx->Line.StipplePattern); @@ -883,10 +863,6 @@ brw_upload_invariant_state(struct brw_context *brw) { const bool is_965 = brw->gen == 4 && !brw->is_g4x; - /* 3DSTATE_SIP, 3DSTATE_MULTISAMPLE, etc. are nonpipelined. */ - if (brw->gen == 6) - intel_emit_post_sync_nonzero_flush(brw); - /* Select the 3D pipeline (as opposed to media) */ const uint32_t _3DSTATE_PIPELINE_SELECT = is_965 ? CMD_PIPELINE_SELECT_965 : CMD_PIPELINE_SELECT_GM45; @@ -954,9 +930,6 @@ static void upload_state_base_address( struct brw_context *brw ) if (brw->gen >= 6) { uint8_t mocs = brw->gen == 7 ? GEN7_MOCS_L3 : 0; - if (brw->gen == 6) - intel_emit_post_sync_nonzero_flush(brw); - BEGIN_BATCH(10); OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (10 - 2)); OUT_BATCH(mocs << 8 | /* General State Memory Object Control State */ diff --git a/src/mesa/drivers/dri/i965/brw_queryobj.c b/src/mesa/drivers/dri/i965/brw_queryobj.c index c053c34ba67..2a9082272d4 100644 --- a/src/mesa/drivers/dri/i965/brw_queryobj.c +++ b/src/mesa/drivers/dri/i965/brw_queryobj.c @@ -66,10 +66,6 @@ brw_write_timestamp(struct brw_context *brw, drm_intel_bo *query_bo, int idx) void brw_write_depth_count(struct brw_context *brw, drm_intel_bo *query_bo, int idx) { - /* Emit Sandybridge workaround flush: */ - if (brw->gen == 6) - intel_emit_post_sync_nonzero_flush(brw); - brw_emit_pipe_control_write(brw, PIPE_CONTROL_WRITE_DEPTH_COUNT | PIPE_CONTROL_DEPTH_STALL, diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index 52d96f40413..84e5c104bf3 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -344,6 +344,9 @@ brw_upload_initial_gpu_state(struct brw_context *brw) if (!brw->hw_ctx) return; + if (brw->gen == 6) + intel_emit_post_sync_nonzero_flush(brw); + brw_upload_invariant_state(brw); if (brw->gen >= 8) { @@ -612,6 +615,10 @@ void brw_upload_state(struct brw_context *brw) if ((state->mesa | state->brw) == 0) return; + /* Emit Sandybridge workaround flushes on every primitive, for safety. */ + if (brw->gen == 6) + intel_emit_post_sync_nonzero_flush(brw); + if (unlikely(INTEL_DEBUG)) { /* Debug version which enforces various sanity checks on the * state flags which are generated and checked to help ensure diff --git a/src/mesa/drivers/dri/i965/gen6_blorp.cpp b/src/mesa/drivers/dri/i965/gen6_blorp.cpp index d4aa955e2d5..372846b0716 100644 --- a/src/mesa/drivers/dri/i965/gen6_blorp.cpp +++ b/src/mesa/drivers/dri/i965/gen6_blorp.cpp @@ -517,17 +517,16 @@ void gen6_blorp_emit_vs_disable(struct brw_context *brw, const brw_blorp_params *params) { - if (brw->gen == 6) { - /* From the BSpec, 3D Pipeline > Geometry > Vertex Shader > State, - * 3DSTATE_VS, Dword 5.0 "VS Function Enable": - * - * [DevSNB] A pipeline flush must be programmed prior to a - * 3DSTATE_VS command that causes the VS Function Enable to - * toggle. Pipeline flush can be executed by sending a PIPE_CONTROL - * command with CS stall bit set and a post sync operation. - */ - intel_emit_post_sync_nonzero_flush(brw); - } + /* From the BSpec, 3D Pipeline > Geometry > Vertex Shader > State, + * 3DSTATE_VS, Dword 5.0 "VS Function Enable": + * + * [DevSNB] A pipeline flush must be programmed prior to a + * 3DSTATE_VS command that causes the VS Function Enable to + * toggle. Pipeline flush can be executed by sending a PIPE_CONTROL + * command with CS stall bit set and a post sync operation. + * + * We've already done one at the start of the BLORP operation. + */ /* Disable the push constant buffers. */ BEGIN_BATCH(5); @@ -817,7 +816,6 @@ gen6_blorp_emit_depth_stencil_config(struct brw_context *brw, /* 3DSTATE_DEPTH_BUFFER */ { - intel_emit_post_sync_nonzero_flush(brw); intel_emit_depth_stall_flushes(brw); BEGIN_BATCH(7); @@ -893,7 +891,6 @@ static void gen6_blorp_emit_depth_disable(struct brw_context *brw, const brw_blorp_params *params) { - intel_emit_post_sync_nonzero_flush(brw); intel_emit_depth_stall_flushes(brw); BEGIN_BATCH(7); @@ -945,9 +942,6 @@ void gen6_blorp_emit_drawing_rectangle(struct brw_context *brw, const brw_blorp_params *params) { - if (brw->gen == 6) - intel_emit_post_sync_nonzero_flush(brw); - BEGIN_BATCH(4); OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2)); OUT_BATCH(0); @@ -997,9 +991,6 @@ gen6_blorp_emit_primitive(struct brw_context *brw, OUT_BATCH(0); OUT_BATCH(0); ADVANCE_BATCH(); - - /* Only used on Sandybridge; harmless to set elsewhere. */ - brw->batch.need_workaround_flush = true; } /** @@ -1025,7 +1016,7 @@ gen6_blorp_exec(struct brw_context *brw, uint32_t prog_offset = params->get_wm_prog(brw, &prog_data); /* Emit workaround flushes when we switch from drawing to blorping. */ - brw->batch.need_workaround_flush = true; + intel_emit_post_sync_nonzero_flush(brw); gen6_emit_3dstate_multisample(brw, params->dst.num_samples); gen6_emit_3dstate_sample_mask(brw, diff --git a/src/mesa/drivers/dri/i965/gen6_depth_state.c b/src/mesa/drivers/dri/i965/gen6_depth_state.c index f92c1afe64d..effb9c638cc 100644 --- a/src/mesa/drivers/dri/i965/gen6_depth_state.c +++ b/src/mesa/drivers/dri/i965/gen6_depth_state.c @@ -65,11 +65,6 @@ gen6_emit_depth_stencil_hiz(struct brw_context *brw, */ bool enable_hiz_ss = hiz || separate_stencil; - - /* 3DSTATE_DEPTH_BUFFER, 3DSTATE_STENCIL_BUFFER are both - * non-pipelined state that will need the PIPE_CONTROL workaround. - */ - intel_emit_post_sync_nonzero_flush(brw); intel_emit_depth_stall_flushes(brw); irb = intel_get_renderbuffer(fb, BUFFER_DEPTH); @@ -238,8 +233,6 @@ gen6_emit_depth_stencil_hiz(struct brw_context *brw, * 3DSTATE_CLEAR_PARAMS packet must follow the DEPTH_BUFFER_STATE packet * when HiZ is enabled and the DEPTH_BUFFER_STATE changes. */ - intel_emit_post_sync_nonzero_flush(brw); - BEGIN_BATCH(2); OUT_BATCH(_3DSTATE_CLEAR_PARAMS << 16 | GEN5_DEPTH_CLEAR_VALID | diff --git a/src/mesa/drivers/dri/i965/gen6_multisample_state.c b/src/mesa/drivers/dri/i965/gen6_multisample_state.c index 7c9cbfaea63..ec46479ff75 100644 --- a/src/mesa/drivers/dri/i965/gen6_multisample_state.c +++ b/src/mesa/drivers/dri/i965/gen6_multisample_state.c @@ -132,9 +132,6 @@ gen6_emit_3dstate_multisample(struct brw_context *brw, unreachable("Unrecognized num_samples in gen6_emit_3dstate_multisample"); } - /* 3DSTATE_MULTISAMPLE is nonpipelined. */ - intel_emit_post_sync_nonzero_flush(brw); - int len = brw->gen >= 7 ? 4 : 3; BEGIN_BATCH(len); OUT_BATCH(_3DSTATE_MULTISAMPLE << 16 | (len - 2)); diff --git a/src/mesa/drivers/dri/i965/gen6_sol.c b/src/mesa/drivers/dri/i965/gen6_sol.c index ff93de341d6..511b6990503 100644 --- a/src/mesa/drivers/dri/i965/gen6_sol.c +++ b/src/mesa/drivers/dri/i965/gen6_sol.c @@ -259,9 +259,6 @@ brw_begin_transform_feedback(struct gl_context *ctx, GLenum mode, = _mesa_compute_max_transform_feedback_vertices(xfb_obj, linked_xfb_info); - /* 3DSTATE_GS_SVB_INDEX is non-pipelined. */ - intel_emit_post_sync_nonzero_flush(brw); - /* Initialize the SVBI 0 register to zero and set the maximum index. */ BEGIN_BATCH(4); OUT_BATCH(_3DSTATE_GS_SVB_INDEX << 16 | (4 - 2)); diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c index e365cc67df7..ee68ba5cf17 100644 --- a/src/mesa/drivers/dri/i965/gen6_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c @@ -171,10 +171,9 @@ upload_vs_state(struct brw_context *brw) * flush can be executed by sending a PIPE_CONTROL command with CS * stall bit set and a post sync operation. * - * Although we don't disable the VS during normal drawing, BLORP sometimes - * disables it. To be safe, do the flush here just in case. + * We've already done such a flush at the start of state upload, so we + * don't need to do another one here. */ - intel_emit_post_sync_nonzero_flush(brw); if (stage_state->push_const_size == 0) { /* Disable the push constant buffers. */ @@ -247,7 +246,6 @@ upload_vs_state(struct brw_context *brw) * bug reports that led to this workaround, and may be more than * what is strictly required to avoid the issue. */ - intel_emit_post_sync_nonzero_flush(brw); brw_emit_pipe_control_flush(brw, PIPE_CONTROL_DEPTH_STALL | PIPE_CONTROL_INSTRUCTION_FLUSH | diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c index 654dcdb54c7..7b96005ae05 100644 --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c @@ -51,8 +51,6 @@ intel_batchbuffer_init(struct brw_context *brw) 4096, 4096); } - brw->batch.need_workaround_flush = true; - if (!brw->has_llc) { brw->batch.cpu_map = malloc(BATCH_SZ); brw->batch.map = brw->batch.cpu_map; @@ -183,11 +181,6 @@ brw_new_batch(struct brw_context *brw) brw->state.dirty.brw |= BRW_NEW_BATCH; - /* Assume that the last command before the start of our batch was a - * primitive, for safety. - */ - brw->batch.need_workaround_flush = true; - brw->state_batch_count = 0; brw->ib.type = -1; @@ -647,17 +640,12 @@ gen7_emit_cs_stall_flush(struct brw_context *brw) void intel_emit_post_sync_nonzero_flush(struct brw_context *brw) { - if (!brw->batch.need_workaround_flush) - return; - brw_emit_pipe_control_flush(brw, PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD); brw_emit_pipe_control_write(brw, PIPE_CONTROL_WRITE_IMMEDIATE, brw->batch.workaround_bo, 0, 0, 0); - - brw->batch.need_workaround_flush = false; } /* Emit a pipelined flush to either flush render and texture cache for