From: Jason Ekstrand Date: Fri, 3 Nov 2017 23:11:54 +0000 (-0700) Subject: i965: Track the depth and render caches separately X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=fb0e9b5197cb65bde1e116d89acd5deb32f9132c;p=mesa.git i965: Track the depth and render caches separately Previously, we just had one hash set for tracking depth and render caches called brw_context::render_cache. This is less than ideal because the depth and render caches are separate and we can't track moves between the depth and the render caches. This limitation led to some unnecessary flushing around the depth cache. There are cases (mostly with BLORP) where we can end up touching a depth or stencil buffer through the render cache. To guard against this, blorp would unconditionally do a render_cache_set_check_flush on it's destination which meant that if you did any rendering (including a BLORP operation) to a given surface and then used it as a blorp destination, you would end up flushing it out of the render cache before rendering into it. Things get worse when you dig into the depth/stencil state code for regular GL draw calls. Because we may end up rendering to a depth or stencil buffer via BLORP, we did a render_cache_set_check_flush on all depth and stencil buffers in brw_emit_depthbuffer to ensure that they got flushed out of the render cache prior to using them for depth or stencil testing. However, because we also need to track dirtiness for depth and stencil so that we can implement depth and stencil texturing correctly, we were adding all depth and stencil buffers to the render cache set in brw_postdraw_set_buffers_need_resolve. This meant that, if anything caused 3DSTATE_DEPTH_BUFFER to get re-emitted (currently _NEW_BUFFERS, BRW_NEW_BATCH, and BRW_NEW_BLORP), we would almost always do a full pipeline stall and render/depth cache flush. The root cause of both of these problems is that we can't tell the difference between the render and depth caches in our tracking. This commit splits our cache tracking into two sets, one for render and one for depth, and properly handles transitioning between the two. We still flush all the caches whenever anything needs to be flushed. The idea is that if we're going to take the hit of a flush and stall, we may as well flush everything in the hopes that we can avoid a flush by something else later. Reviewed-by: Kenneth Graunke --- diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 8aa0c5ff64c..7457aba54a3 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -702,6 +702,13 @@ struct brw_context */ struct set *render_cache; + /** + * Set of struct brw_bo * that have been used as a depth buffer within this + * batchbuffer and would need flushing before being used from another cache + * domain that isn't coherent with it (i.e. the sampler). + */ + struct set *depth_cache; + /** * Number of resets observed in the system at context creation. * diff --git a/src/mesa/drivers/dri/i965/genX_blorp_exec.c b/src/mesa/drivers/dri/i965/genX_blorp_exec.c index 84117531410..f824c127e40 100644 --- a/src/mesa/drivers/dri/i965/genX_blorp_exec.c +++ b/src/mesa/drivers/dri/i965/genX_blorp_exec.c @@ -232,7 +232,6 @@ genX(blorp_exec)(struct blorp_batch *batch, brw_cache_flush_for_depth(brw, params->depth.addr.buffer); if (params->stencil.enabled) brw_cache_flush_for_depth(brw, params->stencil.addr.buffer); - brw_render_cache_set_check_flush(brw, params->dst.addr.buffer); brw_select_pipeline(brw, BRW_RENDER_PIPELINE); diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c index 1a366c78b00..33c79a2b54d 100644 --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c @@ -220,7 +220,7 @@ static void intel_batchbuffer_reset_and_clear_render_cache(struct brw_context *brw) { intel_batchbuffer_reset(brw); - brw_render_cache_set_clear(brw); + brw_cache_sets_clear(brw); } void diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c b/src/mesa/drivers/dri/i965/intel_fbo.c index 927f589321e..75c85ecb639 100644 --- a/src/mesa/drivers/dri/i965/intel_fbo.c +++ b/src/mesa/drivers/dri/i965/intel_fbo.c @@ -970,19 +970,16 @@ intel_renderbuffer_move_to_temp(struct brw_context *brw, } void -brw_render_cache_set_clear(struct brw_context *brw) +brw_cache_sets_clear(struct brw_context *brw) { struct set_entry *entry; set_foreach(brw->render_cache, entry) { _mesa_set_remove(brw->render_cache, entry); } -} -void -brw_render_cache_set_add_bo(struct brw_context *brw, struct brw_bo *bo) -{ - _mesa_set_add(brw->render_cache, bo); + set_foreach(brw->depth_cache, entry) + _mesa_set_remove(brw->depth_cache, entry); } /** @@ -997,14 +994,11 @@ brw_render_cache_set_add_bo(struct brw_context *brw, struct brw_bo *bo) * necessary is flushed before another use of that BO, but for reuse from * different caches within a batchbuffer, it's all our responsibility. */ -void -brw_render_cache_set_check_flush(struct brw_context *brw, struct brw_bo *bo) +static void +flush_depth_and_render_caches(struct brw_context *brw, struct brw_bo *bo) { const struct gen_device_info *devinfo = &brw->screen->devinfo; - if (!_mesa_set_search(brw->render_cache, bo)) - return; - if (devinfo->gen >= 6) { brw_emit_pipe_control_flush(brw, PIPE_CONTROL_DEPTH_CACHE_FLUSH | @@ -1018,36 +1012,41 @@ brw_render_cache_set_check_flush(struct brw_context *brw, struct brw_bo *bo) brw_emit_mi_flush(brw); } - brw_render_cache_set_clear(brw); + brw_cache_sets_clear(brw); } void brw_cache_flush_for_read(struct brw_context *brw, struct brw_bo *bo) { - brw_render_cache_set_check_flush(brw, bo); + if (_mesa_set_search(brw->render_cache, bo) || + _mesa_set_search(brw->depth_cache, bo)) + flush_depth_and_render_caches(brw, bo); } void brw_cache_flush_for_render(struct brw_context *brw, struct brw_bo *bo) { + if (_mesa_set_search(brw->depth_cache, bo)) + flush_depth_and_render_caches(brw, bo); } void brw_render_cache_add_bo(struct brw_context *brw, struct brw_bo *bo) { - brw_render_cache_set_add_bo(brw, bo); + _mesa_set_add(brw->render_cache, bo); } void brw_cache_flush_for_depth(struct brw_context *brw, struct brw_bo *bo) { - brw_render_cache_set_check_flush(brw, bo); + if (_mesa_set_search(brw->render_cache, bo)) + flush_depth_and_render_caches(brw, bo); } void brw_depth_cache_add_bo(struct brw_context *brw, struct brw_bo *bo) { - brw_render_cache_set_add_bo(brw, bo); + _mesa_set_add(brw->depth_cache, bo); } /** @@ -1069,4 +1068,6 @@ intel_fbo_init(struct brw_context *brw) brw->render_cache = _mesa_set_create(brw, _mesa_hash_pointer, _mesa_key_pointer_equal); + brw->depth_cache = _mesa_set_create(brw, _mesa_hash_pointer, + _mesa_key_pointer_equal); } diff --git a/src/mesa/drivers/dri/i965/intel_fbo.h b/src/mesa/drivers/dri/i965/intel_fbo.h index d06a1e8fe99..10be4bbc7dc 100644 --- a/src/mesa/drivers/dri/i965/intel_fbo.h +++ b/src/mesa/drivers/dri/i965/intel_fbo.h @@ -234,10 +234,7 @@ void intel_renderbuffer_upsample(struct brw_context *brw, struct intel_renderbuffer *irb); -void brw_render_cache_set_clear(struct brw_context *brw); -void brw_render_cache_set_add_bo(struct brw_context *brw, struct brw_bo *bo); -void brw_render_cache_set_check_flush(struct brw_context *brw, struct brw_bo *bo); - +void brw_cache_sets_clear(struct brw_context *brw); void brw_cache_flush_for_read(struct brw_context *brw, struct brw_bo *bo); void brw_cache_flush_for_render(struct brw_context *brw, struct brw_bo *bo); void brw_cache_flush_for_depth(struct brw_context *brw, struct brw_bo *bo);