i965: Workaround the gen9 hw astc5x5 sampler bug
authorJason Ekstrand <jason.ekstrand@intel.com>
Fri, 7 Sep 2018 05:40:54 +0000 (00:40 -0500)
committerJason Ekstrand <jason.ekstrand@intel.com>
Fri, 7 Sep 2018 15:42:40 +0000 (10:42 -0500)
gen9 hardware has a bug in the sampler cache that can cause GPU hangs
whenever an texture with aux compression enabled is in the sampler cache
together with an ASTC5x5 texture.  Because we can't control what the
client binds at any given time, we have two options: resolve the CCS or
decompresss the ASTC.  Doing a CCS or HiZ resolve is far less drastic
and will likely have a smaller performance impact.

Cc: mesa-stable@lists.freedesktop.org
Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com>
Tested-by: Tapani Pälli <tapani.palli@intel.com>
Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
src/mesa/drivers/dri/i965/brw_blorp.c
src/mesa/drivers/dri/i965/brw_context.h
src/mesa/drivers/dri/i965/brw_draw.c
src/mesa/drivers/dri/i965/brw_wm_surface_state.c
src/mesa/drivers/dri/i965/intel_mipmap_tree.c
src/mesa/drivers/dri/i965/intel_mipmap_tree.h

index 7476cee43a4854ed4eb3dee57c168bf2f339493f..ad747e0766e75c92b1b51d1c590a180ba897ca09 100644 (file)
@@ -187,6 +187,9 @@ blorp_surf_for_miptree(struct brw_context *brw,
    assert((surf->aux_usage == ISL_AUX_USAGE_NONE) ==
           (surf->aux_addr.buffer == NULL));
 
+   if (!is_render_target && brw->screen->devinfo.gen == 9)
+      gen9_apply_single_tex_astc5x5_wa(brw, mt->format, surf->aux_usage);
+
    /* ISL wants real levels, not offset ones. */
    *level -= mt->first_level;
 }
@@ -382,7 +385,8 @@ brw_blorp_blit_miptrees(struct brw_context *brw,
    enum isl_format src_isl_format =
       brw_blorp_to_isl_format(brw, src_format, false);
    enum isl_aux_usage src_aux_usage =
-      intel_miptree_texture_aux_usage(brw, src_mt, src_isl_format);
+      intel_miptree_texture_aux_usage(brw, src_mt, src_isl_format,
+                                      0 /* The astc5x5 WA isn't needed */);
    /* We do format workarounds for some depth formats so we can't reliably
     * sample with HiZ.  One of these days, we should fix that.
     */
index d3b969534673c943155f059c4e82171f5d2f94de..bf2cddebdc6eebbb71123ac7b291dfa04699e43e 100644 (file)
@@ -168,6 +168,11 @@ enum brw_cache_id {
    BRW_MAX_CACHE
 };
 
+enum gen9_astc5x5_wa_tex_type {
+   GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5 = 1 << 0,
+   GEN9_ASTC5X5_WA_TEX_TYPE_AUX     = 1 << 1,
+};
+
 enum brw_state_id {
    /* brw_cache_ids must come first - see brw_program_cache.c */
    BRW_STATE_URB_FENCE = BRW_MAX_CACHE,
@@ -1326,6 +1331,8 @@ struct brw_context
     */
    enum isl_aux_usage draw_aux_usage[MAX_DRAW_BUFFERS];
 
+   enum gen9_astc5x5_wa_tex_type gen9_astc5x5_wa_tex_mask;
+
    __DRIcontext *driContext;
    struct intel_screen *screen;
 };
@@ -1350,6 +1357,10 @@ void intel_update_renderbuffers(__DRIcontext *context,
                                 __DRIdrawable *drawable);
 void intel_prepare_render(struct brw_context *brw);
 
+void gen9_apply_single_tex_astc5x5_wa(struct brw_context *brw,
+                                      mesa_format format,
+                                      enum isl_aux_usage aux_usage);
+
 void brw_predraw_resolve_inputs(struct brw_context *brw, bool rendering,
                                 bool *draw_aux_buffer_disabled);
 
index 71461d7b0a75a9bcf6f95398499b573391643ab9..8536c040109bd3053a340d12854c653d45e0485b 100644 (file)
@@ -378,6 +378,68 @@ intel_disable_rb_aux_buffer(struct brw_context *brw,
    return found;
 }
 
+/** Implement the ASTC 5x5 sampler workaround
+ *
+ * Gen9 sampling hardware has a bug where an ASTC 5x5 compressed surface
+ * cannot live in the sampler cache at the same time as an aux compressed
+ * surface.  In order to work around the bug we have to stall rendering with a
+ * CS and pixel scoreboard stall (implicit in the CS stall) and invalidate the
+ * texture cache whenever one of ASTC 5x5 or aux compressed may be in the
+ * sampler cache and we're about to render with something which samples from
+ * the other.
+ *
+ * In the case of a single shader which textures from both ASTC 5x5 and
+ * a texture which is CCS or HiZ compressed, we have to resolve the aux
+ * compressed texture prior to rendering.  This second part is handled in
+ * brw_predraw_resolve_inputs() below.
+ *
+ * We have observed this issue to affect CCS and HiZ sampling but whether or
+ * not it also affects MCS is unknown.  Because MCS has no concept of a
+ * resolve (and doing one would be stupid expensive), we choose to simply
+ * ignore the possibility and hope for the best.
+ */
+static void
+gen9_apply_astc5x5_wa_flush(struct brw_context *brw,
+                            enum gen9_astc5x5_wa_tex_type curr_mask)
+{
+   assert(brw->screen->devinfo.gen == 9);
+
+   if (((brw->gen9_astc5x5_wa_tex_mask & GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5) &&
+        (curr_mask & GEN9_ASTC5X5_WA_TEX_TYPE_AUX)) ||
+       ((brw->gen9_astc5x5_wa_tex_mask & GEN9_ASTC5X5_WA_TEX_TYPE_AUX) &&
+        (curr_mask & GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5))) {
+      brw_emit_pipe_control_flush(brw, PIPE_CONTROL_CS_STALL);
+      brw_emit_pipe_control_flush(brw, PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
+   }
+
+   brw->gen9_astc5x5_wa_tex_mask = curr_mask;
+}
+
+static enum gen9_astc5x5_wa_tex_type
+gen9_astc5x5_wa_bits(mesa_format format, enum isl_aux_usage aux_usage)
+{
+   if (aux_usage != ISL_AUX_USAGE_NONE &&
+       aux_usage != ISL_AUX_USAGE_MCS)
+      return GEN9_ASTC5X5_WA_TEX_TYPE_AUX;
+
+   if (format == MESA_FORMAT_RGBA_ASTC_5x5 ||
+       format == MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x5)
+      return GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5;
+
+   return 0;
+}
+
+/* Helper for the gen9 ASTC 5x5 workaround.  This version exists for BLORP's
+ * use-cases where only a single texture is bound.
+ */
+void
+gen9_apply_single_tex_astc5x5_wa(struct brw_context *brw,
+                                 mesa_format format,
+                                 enum isl_aux_usage aux_usage)
+{
+   gen9_apply_astc5x5_wa_flush(brw, gen9_astc5x5_wa_bits(format, aux_usage));
+}
+
 static void
 mark_textures_used_for_txf(BITSET_WORD *used_for_txf,
                            const struct gl_program *prog)
@@ -417,8 +479,30 @@ brw_predraw_resolve_inputs(struct brw_context *brw, bool rendering,
       mark_textures_used_for_txf(used_for_txf, ctx->ComputeProgram._Current);
    }
 
-   /* Resolve depth buffer and render cache of each enabled texture. */
    int maxEnabledUnit = ctx->Texture._MaxEnabledTexImageUnit;
+
+   enum gen9_astc5x5_wa_tex_type astc5x5_wa_bits = 0;
+   if (brw->screen->devinfo.gen == 9) {
+      /* In order to properly implement the ASTC 5x5 workaround for an
+       * arbitrary draw or dispatch call, we have to walk the entire list of
+       * textures looking for ASTC 5x5.  If there is any ASTC 5x5 in this draw
+       * call, all aux compressed textures must be resolved and have aux
+       * compression disabled while sampling.
+       */
+      for (int i = 0; i <= maxEnabledUnit; i++) {
+         if (!ctx->Texture.Unit[i]._Current)
+            continue;
+         tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current);
+         if (!tex_obj || !tex_obj->mt)
+            continue;
+
+         astc5x5_wa_bits |= gen9_astc5x5_wa_bits(tex_obj->_Format,
+                                                 tex_obj->mt->aux_usage);
+      }
+      gen9_apply_astc5x5_wa_flush(brw, astc5x5_wa_bits);
+   }
+
+   /* Resolve depth buffer and render cache of each enabled texture. */
    for (int i = 0; i <= maxEnabledUnit; i++) {
       if (!ctx->Texture.Unit[i]._Current)
         continue;
@@ -452,7 +536,8 @@ brw_predraw_resolve_inputs(struct brw_context *brw, bool rendering,
 
       intel_miptree_prepare_texture(brw, tex_obj->mt, view_format,
                                     min_level, num_levels,
-                                    min_layer, num_layers);
+                                    min_layer, num_layers,
+                                    astc5x5_wa_bits);
 
       /* If any programs are using it with texelFetch, we may need to also do
        * a prepare with an sRGB format to ensure texelFetch works "properly".
@@ -463,7 +548,8 @@ brw_predraw_resolve_inputs(struct brw_context *brw, bool rendering,
          if (txf_format != view_format) {
             intel_miptree_prepare_texture(brw, tex_obj->mt, txf_format,
                                           min_level, num_levels,
-                                          min_layer, num_layers);
+                                          min_layer, num_layers,
+                                          astc5x5_wa_bits);
          }
       }
 
@@ -535,7 +621,8 @@ brw_predraw_resolve_framebuffer(struct brw_context *brw,
          if (irb) {
             intel_miptree_prepare_texture(brw, irb->mt, irb->mt->surf.format,
                                           irb->mt_level, 1,
-                                          irb->mt_layer, irb->layer_count);
+                                          irb->mt_layer, irb->layer_count,
+                                          brw->gen9_astc5x5_wa_tex_mask);
          }
       }
    }
index 42af41aca320ba60e4f7535a6816db05a8473f47..944762ec46ba656e9646b73bdeede002b383fed3 100644 (file)
@@ -603,7 +603,8 @@ static void brw_update_texture_surface(struct gl_context *ctx,
          view.usage |= ISL_SURF_USAGE_CUBE_BIT;
 
       enum isl_aux_usage aux_usage =
-         intel_miptree_texture_aux_usage(brw, mt, format);
+         intel_miptree_texture_aux_usage(brw, mt, format,
+                                         brw->gen9_astc5x5_wa_tex_mask);
 
       brw_emit_surface_state(brw, mt, mt->target, view, aux_usage,
                              surf_offset, surf_index,
@@ -1107,7 +1108,8 @@ update_renderbuffer_read_surfaces(struct brw_context *brw)
             };
 
             enum isl_aux_usage aux_usage =
-               intel_miptree_texture_aux_usage(brw, irb->mt, format);
+               intel_miptree_texture_aux_usage(brw, irb->mt, format,
+                                               brw->gen9_astc5x5_wa_tex_mask);
             if (brw->draw_aux_usage[i] == ISL_AUX_USAGE_NONE)
                aux_usage = ISL_AUX_USAGE_NONE;
 
index 983f145afc969e58cea6c8cb321df9576aca4c26..36681352ba73c2e4c12512d529ba82bf0ed654da 100644 (file)
@@ -2542,8 +2542,19 @@ can_texture_with_ccs(struct brw_context *brw,
 enum isl_aux_usage
 intel_miptree_texture_aux_usage(struct brw_context *brw,
                                 struct intel_mipmap_tree *mt,
-                                enum isl_format view_format)
+                                enum isl_format view_format,
+                                enum gen9_astc5x5_wa_tex_type astc5x5_wa_bits)
 {
+   assert(brw->screen->devinfo.gen == 9 || astc5x5_wa_bits == 0);
+
+   /* On gen9, ASTC 5x5 textures cannot live in the sampler cache along side
+    * CCS or HiZ compressed textures.  See gen9_apply_astc5x5_wa_flush() for
+    * details.
+    */
+   if ((astc5x5_wa_bits & GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5) &&
+       mt->aux_usage != ISL_AUX_USAGE_MCS)
+      return ISL_AUX_USAGE_NONE;
+
    switch (mt->aux_usage) {
    case ISL_AUX_USAGE_HIZ:
       if (intel_miptree_sample_with_hiz(brw, mt))
@@ -2601,10 +2612,12 @@ intel_miptree_prepare_texture(struct brw_context *brw,
                               struct intel_mipmap_tree *mt,
                               enum isl_format view_format,
                               uint32_t start_level, uint32_t num_levels,
-                              uint32_t start_layer, uint32_t num_layers)
+                              uint32_t start_layer, uint32_t num_layers,
+                              enum gen9_astc5x5_wa_tex_type astc5x5_wa_bits)
 {
    enum isl_aux_usage aux_usage =
-      intel_miptree_texture_aux_usage(brw, mt, view_format);
+      intel_miptree_texture_aux_usage(brw, mt, view_format, astc5x5_wa_bits);
+
    bool clear_supported = aux_usage != ISL_AUX_USAGE_NONE;
 
    /* Clear color is specified as ints or floats and the conversion is done by
index bb7df7ad235d5f04c1241b4082345910be973893..08c129a4b8b113deaae40991fc016eecfe1b4f34 100644 (file)
@@ -621,13 +621,15 @@ intel_miptree_access_raw(struct brw_context *brw,
 enum isl_aux_usage
 intel_miptree_texture_aux_usage(struct brw_context *brw,
                                 struct intel_mipmap_tree *mt,
-                                enum isl_format view_format);
+                                enum isl_format view_format,
+                                enum gen9_astc5x5_wa_tex_type astc5x5_wa_bits);
 void
 intel_miptree_prepare_texture(struct brw_context *brw,
                               struct intel_mipmap_tree *mt,
                               enum isl_format view_format,
                               uint32_t start_level, uint32_t num_levels,
-                              uint32_t start_layer, uint32_t num_layers);
+                              uint32_t start_layer, uint32_t num_layers,
+                              enum gen9_astc5x5_wa_tex_type astc5x5_wa_bits);
 void
 intel_miptree_prepare_image(struct brw_context *brw,
                             struct intel_mipmap_tree *mt);