From f9e630e23dfa3de815026a08e7e767e2ed90da0b Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 7 Sep 2018 00:40:54 -0500 Subject: [PATCH] i965: Workaround the gen9 hw astc5x5 sampler bug MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit gen9 hardware has a bug in the sampler cache that can cause GPU hangs whenever an texture with aux compression enabled is in the sampler cache together with an ASTC5x5 texture. Because we can't control what the client binds at any given time, we have two options: resolve the CCS or decompresss the ASTC. Doing a CCS or HiZ resolve is far less drastic and will likely have a smaller performance impact. Cc: mesa-stable@lists.freedesktop.org Reviewed-by: Kristian H. Kristensen Tested-by: Tapani Pälli Reviewed-by: Topi Pohjolainen --- src/mesa/drivers/dri/i965/brw_blorp.c | 6 +- src/mesa/drivers/dri/i965/brw_context.h | 11 +++ src/mesa/drivers/dri/i965/brw_draw.c | 95 ++++++++++++++++++- .../drivers/dri/i965/brw_wm_surface_state.c | 6 +- src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 19 +++- src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 6 +- 6 files changed, 131 insertions(+), 12 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c b/src/mesa/drivers/dri/i965/brw_blorp.c index 7476cee43a4..ad747e0766e 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp.c +++ b/src/mesa/drivers/dri/i965/brw_blorp.c @@ -187,6 +187,9 @@ blorp_surf_for_miptree(struct brw_context *brw, assert((surf->aux_usage == ISL_AUX_USAGE_NONE) == (surf->aux_addr.buffer == NULL)); + if (!is_render_target && brw->screen->devinfo.gen == 9) + gen9_apply_single_tex_astc5x5_wa(brw, mt->format, surf->aux_usage); + /* ISL wants real levels, not offset ones. */ *level -= mt->first_level; } @@ -382,7 +385,8 @@ brw_blorp_blit_miptrees(struct brw_context *brw, enum isl_format src_isl_format = brw_blorp_to_isl_format(brw, src_format, false); enum isl_aux_usage src_aux_usage = - intel_miptree_texture_aux_usage(brw, src_mt, src_isl_format); + intel_miptree_texture_aux_usage(brw, src_mt, src_isl_format, + 0 /* The astc5x5 WA isn't needed */); /* We do format workarounds for some depth formats so we can't reliably * sample with HiZ. One of these days, we should fix that. */ diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index d3b96953467..bf2cddebdc6 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -168,6 +168,11 @@ enum brw_cache_id { BRW_MAX_CACHE }; +enum gen9_astc5x5_wa_tex_type { + GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5 = 1 << 0, + GEN9_ASTC5X5_WA_TEX_TYPE_AUX = 1 << 1, +}; + enum brw_state_id { /* brw_cache_ids must come first - see brw_program_cache.c */ BRW_STATE_URB_FENCE = BRW_MAX_CACHE, @@ -1326,6 +1331,8 @@ struct brw_context */ enum isl_aux_usage draw_aux_usage[MAX_DRAW_BUFFERS]; + enum gen9_astc5x5_wa_tex_type gen9_astc5x5_wa_tex_mask; + __DRIcontext *driContext; struct intel_screen *screen; }; @@ -1350,6 +1357,10 @@ void intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable); void intel_prepare_render(struct brw_context *brw); +void gen9_apply_single_tex_astc5x5_wa(struct brw_context *brw, + mesa_format format, + enum isl_aux_usage aux_usage); + void brw_predraw_resolve_inputs(struct brw_context *brw, bool rendering, bool *draw_aux_buffer_disabled); diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index 71461d7b0a7..8536c040109 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -378,6 +378,68 @@ intel_disable_rb_aux_buffer(struct brw_context *brw, return found; } +/** Implement the ASTC 5x5 sampler workaround + * + * Gen9 sampling hardware has a bug where an ASTC 5x5 compressed surface + * cannot live in the sampler cache at the same time as an aux compressed + * surface. In order to work around the bug we have to stall rendering with a + * CS and pixel scoreboard stall (implicit in the CS stall) and invalidate the + * texture cache whenever one of ASTC 5x5 or aux compressed may be in the + * sampler cache and we're about to render with something which samples from + * the other. + * + * In the case of a single shader which textures from both ASTC 5x5 and + * a texture which is CCS or HiZ compressed, we have to resolve the aux + * compressed texture prior to rendering. This second part is handled in + * brw_predraw_resolve_inputs() below. + * + * We have observed this issue to affect CCS and HiZ sampling but whether or + * not it also affects MCS is unknown. Because MCS has no concept of a + * resolve (and doing one would be stupid expensive), we choose to simply + * ignore the possibility and hope for the best. + */ +static void +gen9_apply_astc5x5_wa_flush(struct brw_context *brw, + enum gen9_astc5x5_wa_tex_type curr_mask) +{ + assert(brw->screen->devinfo.gen == 9); + + if (((brw->gen9_astc5x5_wa_tex_mask & GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5) && + (curr_mask & GEN9_ASTC5X5_WA_TEX_TYPE_AUX)) || + ((brw->gen9_astc5x5_wa_tex_mask & GEN9_ASTC5X5_WA_TEX_TYPE_AUX) && + (curr_mask & GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5))) { + brw_emit_pipe_control_flush(brw, PIPE_CONTROL_CS_STALL); + brw_emit_pipe_control_flush(brw, PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE); + } + + brw->gen9_astc5x5_wa_tex_mask = curr_mask; +} + +static enum gen9_astc5x5_wa_tex_type +gen9_astc5x5_wa_bits(mesa_format format, enum isl_aux_usage aux_usage) +{ + if (aux_usage != ISL_AUX_USAGE_NONE && + aux_usage != ISL_AUX_USAGE_MCS) + return GEN9_ASTC5X5_WA_TEX_TYPE_AUX; + + if (format == MESA_FORMAT_RGBA_ASTC_5x5 || + format == MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x5) + return GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5; + + return 0; +} + +/* Helper for the gen9 ASTC 5x5 workaround. This version exists for BLORP's + * use-cases where only a single texture is bound. + */ +void +gen9_apply_single_tex_astc5x5_wa(struct brw_context *brw, + mesa_format format, + enum isl_aux_usage aux_usage) +{ + gen9_apply_astc5x5_wa_flush(brw, gen9_astc5x5_wa_bits(format, aux_usage)); +} + static void mark_textures_used_for_txf(BITSET_WORD *used_for_txf, const struct gl_program *prog) @@ -417,8 +479,30 @@ brw_predraw_resolve_inputs(struct brw_context *brw, bool rendering, mark_textures_used_for_txf(used_for_txf, ctx->ComputeProgram._Current); } - /* Resolve depth buffer and render cache of each enabled texture. */ int maxEnabledUnit = ctx->Texture._MaxEnabledTexImageUnit; + + enum gen9_astc5x5_wa_tex_type astc5x5_wa_bits = 0; + if (brw->screen->devinfo.gen == 9) { + /* In order to properly implement the ASTC 5x5 workaround for an + * arbitrary draw or dispatch call, we have to walk the entire list of + * textures looking for ASTC 5x5. If there is any ASTC 5x5 in this draw + * call, all aux compressed textures must be resolved and have aux + * compression disabled while sampling. + */ + for (int i = 0; i <= maxEnabledUnit; i++) { + if (!ctx->Texture.Unit[i]._Current) + continue; + tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current); + if (!tex_obj || !tex_obj->mt) + continue; + + astc5x5_wa_bits |= gen9_astc5x5_wa_bits(tex_obj->_Format, + tex_obj->mt->aux_usage); + } + gen9_apply_astc5x5_wa_flush(brw, astc5x5_wa_bits); + } + + /* Resolve depth buffer and render cache of each enabled texture. */ for (int i = 0; i <= maxEnabledUnit; i++) { if (!ctx->Texture.Unit[i]._Current) continue; @@ -452,7 +536,8 @@ brw_predraw_resolve_inputs(struct brw_context *brw, bool rendering, intel_miptree_prepare_texture(brw, tex_obj->mt, view_format, min_level, num_levels, - min_layer, num_layers); + min_layer, num_layers, + astc5x5_wa_bits); /* If any programs are using it with texelFetch, we may need to also do * a prepare with an sRGB format to ensure texelFetch works "properly". @@ -463,7 +548,8 @@ brw_predraw_resolve_inputs(struct brw_context *brw, bool rendering, if (txf_format != view_format) { intel_miptree_prepare_texture(brw, tex_obj->mt, txf_format, min_level, num_levels, - min_layer, num_layers); + min_layer, num_layers, + astc5x5_wa_bits); } } @@ -535,7 +621,8 @@ brw_predraw_resolve_framebuffer(struct brw_context *brw, if (irb) { intel_miptree_prepare_texture(brw, irb->mt, irb->mt->surf.format, irb->mt_level, 1, - irb->mt_layer, irb->layer_count); + irb->mt_layer, irb->layer_count, + brw->gen9_astc5x5_wa_tex_mask); } } } diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 42af41aca32..944762ec46b 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -603,7 +603,8 @@ static void brw_update_texture_surface(struct gl_context *ctx, view.usage |= ISL_SURF_USAGE_CUBE_BIT; enum isl_aux_usage aux_usage = - intel_miptree_texture_aux_usage(brw, mt, format); + intel_miptree_texture_aux_usage(brw, mt, format, + brw->gen9_astc5x5_wa_tex_mask); brw_emit_surface_state(brw, mt, mt->target, view, aux_usage, surf_offset, surf_index, @@ -1107,7 +1108,8 @@ update_renderbuffer_read_surfaces(struct brw_context *brw) }; enum isl_aux_usage aux_usage = - intel_miptree_texture_aux_usage(brw, irb->mt, format); + intel_miptree_texture_aux_usage(brw, irb->mt, format, + brw->gen9_astc5x5_wa_tex_mask); if (brw->draw_aux_usage[i] == ISL_AUX_USAGE_NONE) aux_usage = ISL_AUX_USAGE_NONE; diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index 983f145afc9..36681352ba7 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -2542,8 +2542,19 @@ can_texture_with_ccs(struct brw_context *brw, enum isl_aux_usage intel_miptree_texture_aux_usage(struct brw_context *brw, struct intel_mipmap_tree *mt, - enum isl_format view_format) + enum isl_format view_format, + enum gen9_astc5x5_wa_tex_type astc5x5_wa_bits) { + assert(brw->screen->devinfo.gen == 9 || astc5x5_wa_bits == 0); + + /* On gen9, ASTC 5x5 textures cannot live in the sampler cache along side + * CCS or HiZ compressed textures. See gen9_apply_astc5x5_wa_flush() for + * details. + */ + if ((astc5x5_wa_bits & GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5) && + mt->aux_usage != ISL_AUX_USAGE_MCS) + return ISL_AUX_USAGE_NONE; + switch (mt->aux_usage) { case ISL_AUX_USAGE_HIZ: if (intel_miptree_sample_with_hiz(brw, mt)) @@ -2601,10 +2612,12 @@ intel_miptree_prepare_texture(struct brw_context *brw, struct intel_mipmap_tree *mt, enum isl_format view_format, uint32_t start_level, uint32_t num_levels, - uint32_t start_layer, uint32_t num_layers) + uint32_t start_layer, uint32_t num_layers, + enum gen9_astc5x5_wa_tex_type astc5x5_wa_bits) { enum isl_aux_usage aux_usage = - intel_miptree_texture_aux_usage(brw, mt, view_format); + intel_miptree_texture_aux_usage(brw, mt, view_format, astc5x5_wa_bits); + bool clear_supported = aux_usage != ISL_AUX_USAGE_NONE; /* Clear color is specified as ints or floats and the conversion is done by diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h index bb7df7ad235..08c129a4b8b 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h @@ -621,13 +621,15 @@ intel_miptree_access_raw(struct brw_context *brw, enum isl_aux_usage intel_miptree_texture_aux_usage(struct brw_context *brw, struct intel_mipmap_tree *mt, - enum isl_format view_format); + enum isl_format view_format, + enum gen9_astc5x5_wa_tex_type astc5x5_wa_bits); void intel_miptree_prepare_texture(struct brw_context *brw, struct intel_mipmap_tree *mt, enum isl_format view_format, uint32_t start_level, uint32_t num_levels, - uint32_t start_layer, uint32_t num_layers); + uint32_t start_layer, uint32_t num_layers, + enum gen9_astc5x5_wa_tex_type astc5x5_wa_bits); void intel_miptree_prepare_image(struct brw_context *brw, struct intel_mipmap_tree *mt); -- 2.30.2