From d84275b884244a2fd3a6e67ceb2a5277e5edf89a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 13 Dec 2017 17:25:26 -0800 Subject: [PATCH] i965: Track format and aux usage in the render cache This lets us perform render cache flushes whenever a surface goes from being used with one aux+format to a different aux+format. This is the "proper" fix for https://bugs.freedesktop.org/102435. ee57b15ec764736e2d5360beaef9fb2045ed0f68 which was really just a partial revert of 3e57e9494c2279580ad6a83ab8c065d01e7e634e was just a hack to get rid of a hang in a bunch of Valve games. This solves the actual problem responsible for the hang and lets us enable CCS_E once again. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=102435 Reviewed-by: Iago Toral Quiroga Reviewed-by: Kenneth Graunke Cc: "17.3" --- src/mesa/drivers/dri/i965/brw_context.h | 2 +- src/mesa/drivers/dri/i965/brw_draw.c | 20 ++++-- src/mesa/drivers/dri/i965/genX_blorp_exec.c | 14 ++-- src/mesa/drivers/dri/i965/intel_fbo.c | 75 +++++++++++++++++---- src/mesa/drivers/dri/i965/intel_fbo.h | 8 ++- 5 files changed, 92 insertions(+), 27 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 8d8ab71093b..3cbc2e8c133 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -764,7 +764,7 @@ struct brw_context * and would need flushing before being used from another cache domain that * isn't coherent with it (i.e. the sampler). */ - struct set *render_cache; + struct hash_table *render_cache; /** * Set of struct brw_bo * that have been used as a depth buffer within this diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index 1f86378f5ee..96e014dc1fc 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -503,13 +503,17 @@ brw_predraw_resolve_framebuffer(struct brw_context *brw) mesa_format mesa_format = _mesa_get_render_format(ctx, intel_rb_format(irb)); enum isl_format isl_format = brw_isl_format_for_mesa_format(mesa_format); + bool blend_enabled = ctx->Color.BlendEnabled & (1 << i); + enum isl_aux_usage aux_usage = + intel_miptree_render_aux_usage(brw, irb->mt, isl_format, + blend_enabled); intel_miptree_prepare_render(brw, irb->mt, irb->mt_level, irb->mt_layer, irb->layer_count, - isl_format, - ctx->Color.BlendEnabled & (1 << i)); + isl_format, blend_enabled); - brw_cache_flush_for_render(brw, irb->mt->bo); + brw_cache_flush_for_render(brw, irb->mt->bo, + isl_format, aux_usage); } } @@ -575,12 +579,16 @@ brw_postdraw_set_buffers_need_resolve(struct brw_context *brw) mesa_format mesa_format = _mesa_get_render_format(ctx, intel_rb_format(irb)); enum isl_format isl_format = brw_isl_format_for_mesa_format(mesa_format); + bool blend_enabled = ctx->Color.BlendEnabled & (1 << i); + enum isl_aux_usage aux_usage = + intel_miptree_render_aux_usage(brw, irb->mt, isl_format, + blend_enabled); + + brw_render_cache_add_bo(brw, irb->mt->bo, isl_format, aux_usage); - brw_render_cache_add_bo(brw, irb->mt->bo); intel_miptree_finish_render(brw, irb->mt, irb->mt_level, irb->mt_layer, irb->layer_count, - isl_format, - ctx->Color.BlendEnabled & (1 << i)); + isl_format, blend_enabled); } } diff --git a/src/mesa/drivers/dri/i965/genX_blorp_exec.c b/src/mesa/drivers/dri/i965/genX_blorp_exec.c index e8bc52e5d70..062171af600 100644 --- a/src/mesa/drivers/dri/i965/genX_blorp_exec.c +++ b/src/mesa/drivers/dri/i965/genX_blorp_exec.c @@ -239,8 +239,11 @@ genX(blorp_exec)(struct blorp_batch *batch, */ if (params->src.enabled) brw_cache_flush_for_read(brw, params->src.addr.buffer); - if (params->dst.enabled) - brw_cache_flush_for_render(brw, params->dst.addr.buffer); + if (params->dst.enabled) { + brw_cache_flush_for_render(brw, params->dst.addr.buffer, + params->dst.view.format, + params->dst.aux_usage); + } if (params->depth.enabled) brw_cache_flush_for_depth(brw, params->depth.addr.buffer); if (params->stencil.enabled) @@ -310,8 +313,11 @@ retry: !params->stencil.enabled; brw->ib.index_size = -1; - if (params->dst.enabled) - brw_render_cache_add_bo(brw, params->dst.addr.buffer); + if (params->dst.enabled) { + brw_render_cache_add_bo(brw, params->dst.addr.buffer, + params->dst.view.format, + params->dst.aux_usage); + } if (params->depth.enabled) brw_depth_cache_add_bo(brw, params->depth.addr.buffer); if (params->stencil.enabled) diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c b/src/mesa/drivers/dri/i965/intel_fbo.c index 75c85ecb639..056f494e2f9 100644 --- a/src/mesa/drivers/dri/i965/intel_fbo.c +++ b/src/mesa/drivers/dri/i965/intel_fbo.c @@ -972,14 +972,13 @@ intel_renderbuffer_move_to_temp(struct brw_context *brw, void brw_cache_sets_clear(struct brw_context *brw) { - struct set_entry *entry; + struct hash_entry *render_entry; + hash_table_foreach(brw->render_cache, render_entry) + _mesa_hash_table_remove(brw->render_cache, render_entry); - set_foreach(brw->render_cache, entry) { - _mesa_set_remove(brw->render_cache, entry); - } - - set_foreach(brw->depth_cache, entry) - _mesa_set_remove(brw->depth_cache, entry); + struct set_entry *depth_entry; + set_foreach(brw->depth_cache, depth_entry) + _mesa_set_remove(brw->depth_cache, depth_entry); } /** @@ -1018,28 +1017,76 @@ flush_depth_and_render_caches(struct brw_context *brw, struct brw_bo *bo) void brw_cache_flush_for_read(struct brw_context *brw, struct brw_bo *bo) { - if (_mesa_set_search(brw->render_cache, bo) || + if (_mesa_hash_table_search(brw->render_cache, bo) || _mesa_set_search(brw->depth_cache, bo)) flush_depth_and_render_caches(brw, bo); } +static void * +format_aux_tuple(enum isl_format format, enum isl_aux_usage aux_usage) +{ + return (void *)(uintptr_t)((uint32_t)format << 8 | aux_usage); +} + void -brw_cache_flush_for_render(struct brw_context *brw, struct brw_bo *bo) +brw_cache_flush_for_render(struct brw_context *brw, struct brw_bo *bo, + enum isl_format format, + enum isl_aux_usage aux_usage) { if (_mesa_set_search(brw->depth_cache, bo)) flush_depth_and_render_caches(brw, bo); + + /* Check to see if this bo has been used by a previous rendering operation + * but with a different format or aux usage. If it has, flush the render + * cache so we ensure that it's only in there with one format or aux usage + * at a time. + * + * Even though it's not obvious, this can easily happen in practice. + * Suppose a client is blending on a surface with sRGB encode enabled on + * gen9. This implies that you get AUX_USAGE_CCS_D at best. If the client + * then disables sRGB decode and continues blending we will flip on + * AUX_USAGE_CCS_E without doing any sort of resolve in-between (this is + * perfectly valid since CCS_E is a subset of CCS_D). However, this means + * that we have fragments in-flight which are rendering with UNORM+CCS_E + * and other fragments in-flight with SRGB+CCS_D on the same surface at the + * same time and the pixel scoreboard and color blender are trying to sort + * it all out. This ends badly (i.e. GPU hangs). + * + * To date, we have never observed GPU hangs or even corruption to be + * associated with switching the format, only the aux usage. However, + * there are comments in various docs which indicate that the render cache + * isn't 100% resilient to format changes. We may as well be conservative + * and flush on format changes too. We can always relax this later if we + * find it to be a performance problem. + */ + struct hash_entry *entry = _mesa_hash_table_search(brw->render_cache, bo); + if (entry && entry->data != format_aux_tuple(format, aux_usage)) + flush_depth_and_render_caches(brw, bo); } void -brw_render_cache_add_bo(struct brw_context *brw, struct brw_bo *bo) +brw_render_cache_add_bo(struct brw_context *brw, struct brw_bo *bo, + enum isl_format format, + enum isl_aux_usage aux_usage) { - _mesa_set_add(brw->render_cache, bo); +#ifndef NDEBUG + struct hash_entry *entry = _mesa_hash_table_search(brw->render_cache, bo); + if (entry) { + /* Otherwise, someone didn't do a flush_for_render and that would be + * very bad indeed. + */ + assert(entry->data == format_aux_tuple(format, aux_usage)); + } +#endif + + _mesa_hash_table_insert(brw->render_cache, bo, + format_aux_tuple(format, aux_usage)); } void brw_cache_flush_for_depth(struct brw_context *brw, struct brw_bo *bo) { - if (_mesa_set_search(brw->render_cache, bo)) + if (_mesa_hash_table_search(brw->render_cache, bo)) flush_depth_and_render_caches(brw, bo); } @@ -1066,8 +1113,8 @@ intel_fbo_init(struct brw_context *brw) dd->EGLImageTargetRenderbufferStorage = intel_image_target_renderbuffer_storage; - brw->render_cache = _mesa_set_create(brw, _mesa_hash_pointer, - _mesa_key_pointer_equal); + brw->render_cache = _mesa_hash_table_create(brw, _mesa_hash_pointer, + _mesa_key_pointer_equal); brw->depth_cache = _mesa_set_create(brw, _mesa_hash_pointer, _mesa_key_pointer_equal); } diff --git a/src/mesa/drivers/dri/i965/intel_fbo.h b/src/mesa/drivers/dri/i965/intel_fbo.h index 10be4bbc7dc..88a5b6732b2 100644 --- a/src/mesa/drivers/dri/i965/intel_fbo.h +++ b/src/mesa/drivers/dri/i965/intel_fbo.h @@ -236,9 +236,13 @@ intel_renderbuffer_upsample(struct brw_context *brw, void brw_cache_sets_clear(struct brw_context *brw); void brw_cache_flush_for_read(struct brw_context *brw, struct brw_bo *bo); -void brw_cache_flush_for_render(struct brw_context *brw, struct brw_bo *bo); +void brw_cache_flush_for_render(struct brw_context *brw, struct brw_bo *bo, + enum isl_format format, + enum isl_aux_usage aux_usage); void brw_cache_flush_for_depth(struct brw_context *brw, struct brw_bo *bo); -void brw_render_cache_add_bo(struct brw_context *brw, struct brw_bo *bo); +void brw_render_cache_add_bo(struct brw_context *brw, struct brw_bo *bo, + enum isl_format format, + enum isl_aux_usage aux_usage); void brw_depth_cache_add_bo(struct brw_context *brw, struct brw_bo *bo); unsigned -- 2.30.2