i965: Track format and aux usage in the render cache
authorJason Ekstrand <jason.ekstrand@intel.com>
Thu, 14 Dec 2017 01:25:26 +0000 (17:25 -0800)
committerJason Ekstrand <jason.ekstrand@intel.com>
Wed, 17 Jan 2018 05:41:32 +0000 (21:41 -0800)
This lets us perform render cache flushes whenever a surface goes from
being used with one aux+format to a different aux+format.

This is the "proper" fix for https://bugs.freedesktop.org/102435.
ee57b15ec764736e2d5360beaef9fb2045ed0f68 which was really just a partial
revert of 3e57e9494c2279580ad6a83ab8c065d01e7e634e was just a hack to
get rid of a hang in a bunch of Valve games.  This solves the actual
problem responsible for the hang and lets us enable CCS_E once again.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=102435
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Cc: "17.3" <mesa-stable@lists.freedesktop.org>
src/mesa/drivers/dri/i965/brw_context.h
src/mesa/drivers/dri/i965/brw_draw.c
src/mesa/drivers/dri/i965/genX_blorp_exec.c
src/mesa/drivers/dri/i965/intel_fbo.c
src/mesa/drivers/dri/i965/intel_fbo.h

index 8d8ab71093b0bb51b2fb17c2f98d7ad70a5f0c40..3cbc2e8c13362612ba97e8072f250852969f6e54 100644 (file)
@@ -764,7 +764,7 @@ struct brw_context
     * and would need flushing before being used from another cache domain that
     * isn't coherent with it (i.e. the sampler).
     */
-   struct set *render_cache;
+   struct hash_table *render_cache;
 
    /**
     * Set of struct brw_bo * that have been used as a depth buffer within this
index 1f86378f5eebfbd28786a58e2972c5287069e29e..96e014dc1fc00fdd2e81639ab5a8ed721a4b6b51 100644 (file)
@@ -503,13 +503,17 @@ brw_predraw_resolve_framebuffer(struct brw_context *brw)
       mesa_format mesa_format =
          _mesa_get_render_format(ctx, intel_rb_format(irb));
       enum isl_format isl_format = brw_isl_format_for_mesa_format(mesa_format);
+      bool blend_enabled = ctx->Color.BlendEnabled & (1 << i);
+      enum isl_aux_usage aux_usage =
+         intel_miptree_render_aux_usage(brw, irb->mt, isl_format,
+                                        blend_enabled);
 
       intel_miptree_prepare_render(brw, irb->mt, irb->mt_level,
                                    irb->mt_layer, irb->layer_count,
-                                   isl_format,
-                                   ctx->Color.BlendEnabled & (1 << i));
+                                   isl_format, blend_enabled);
 
-      brw_cache_flush_for_render(brw, irb->mt->bo);
+      brw_cache_flush_for_render(brw, irb->mt->bo,
+                                 isl_format, aux_usage);
    }
 }
 
@@ -575,12 +579,16 @@ brw_postdraw_set_buffers_need_resolve(struct brw_context *brw)
       mesa_format mesa_format =
          _mesa_get_render_format(ctx, intel_rb_format(irb));
       enum isl_format isl_format = brw_isl_format_for_mesa_format(mesa_format);
+      bool blend_enabled = ctx->Color.BlendEnabled & (1 << i);
+      enum isl_aux_usage aux_usage =
+         intel_miptree_render_aux_usage(brw, irb->mt, isl_format,
+                                        blend_enabled);
+
+      brw_render_cache_add_bo(brw, irb->mt->bo, isl_format, aux_usage);
 
-      brw_render_cache_add_bo(brw, irb->mt->bo);
       intel_miptree_finish_render(brw, irb->mt, irb->mt_level,
                                   irb->mt_layer, irb->layer_count,
-                                  isl_format,
-                                  ctx->Color.BlendEnabled & (1 << i));
+                                  isl_format, blend_enabled);
    }
 }
 
index e8bc52e5d705321822c91441e7e3a4ba6d7fd349..062171af600312ea83bc49dea1f24b330b98ab74 100644 (file)
@@ -239,8 +239,11 @@ genX(blorp_exec)(struct blorp_batch *batch,
     */
    if (params->src.enabled)
       brw_cache_flush_for_read(brw, params->src.addr.buffer);
-   if (params->dst.enabled)
-      brw_cache_flush_for_render(brw, params->dst.addr.buffer);
+   if (params->dst.enabled) {
+      brw_cache_flush_for_render(brw, params->dst.addr.buffer,
+                                 params->dst.view.format,
+                                 params->dst.aux_usage);
+   }
    if (params->depth.enabled)
       brw_cache_flush_for_depth(brw, params->depth.addr.buffer);
    if (params->stencil.enabled)
@@ -310,8 +313,11 @@ retry:
                               !params->stencil.enabled;
    brw->ib.index_size = -1;
 
-   if (params->dst.enabled)
-      brw_render_cache_add_bo(brw, params->dst.addr.buffer);
+   if (params->dst.enabled) {
+      brw_render_cache_add_bo(brw, params->dst.addr.buffer,
+                              params->dst.view.format,
+                              params->dst.aux_usage);
+   }
    if (params->depth.enabled)
       brw_depth_cache_add_bo(brw, params->depth.addr.buffer);
    if (params->stencil.enabled)
index 75c85ecb6393f5dc257a6de6a97fef29929885d7..056f494e2f9f476df592e27a432fd50bd1bf2353 100644 (file)
@@ -972,14 +972,13 @@ intel_renderbuffer_move_to_temp(struct brw_context *brw,
 void
 brw_cache_sets_clear(struct brw_context *brw)
 {
-   struct set_entry *entry;
+   struct hash_entry *render_entry;
+   hash_table_foreach(brw->render_cache, render_entry)
+      _mesa_hash_table_remove(brw->render_cache, render_entry);
 
-   set_foreach(brw->render_cache, entry) {
-      _mesa_set_remove(brw->render_cache, entry);
-   }
-
-   set_foreach(brw->depth_cache, entry)
-      _mesa_set_remove(brw->depth_cache, entry);
+   struct set_entry *depth_entry;
+   set_foreach(brw->depth_cache, depth_entry)
+      _mesa_set_remove(brw->depth_cache, depth_entry);
 }
 
 /**
@@ -1018,28 +1017,76 @@ flush_depth_and_render_caches(struct brw_context *brw, struct brw_bo *bo)
 void
 brw_cache_flush_for_read(struct brw_context *brw, struct brw_bo *bo)
 {
-   if (_mesa_set_search(brw->render_cache, bo) ||
+   if (_mesa_hash_table_search(brw->render_cache, bo) ||
        _mesa_set_search(brw->depth_cache, bo))
       flush_depth_and_render_caches(brw, bo);
 }
 
+static void *
+format_aux_tuple(enum isl_format format, enum isl_aux_usage aux_usage)
+{
+   return (void *)(uintptr_t)((uint32_t)format << 8 | aux_usage);
+}
+
 void
-brw_cache_flush_for_render(struct brw_context *brw, struct brw_bo *bo)
+brw_cache_flush_for_render(struct brw_context *brw, struct brw_bo *bo,
+                           enum isl_format format,
+                           enum isl_aux_usage aux_usage)
 {
    if (_mesa_set_search(brw->depth_cache, bo))
       flush_depth_and_render_caches(brw, bo);
+
+   /* Check to see if this bo has been used by a previous rendering operation
+    * but with a different format or aux usage.  If it has, flush the render
+    * cache so we ensure that it's only in there with one format or aux usage
+    * at a time.
+    *
+    * Even though it's not obvious, this can easily happen in practice.
+    * Suppose a client is blending on a surface with sRGB encode enabled on
+    * gen9.  This implies that you get AUX_USAGE_CCS_D at best.  If the client
+    * then disables sRGB decode and continues blending we will flip on
+    * AUX_USAGE_CCS_E without doing any sort of resolve in-between (this is
+    * perfectly valid since CCS_E is a subset of CCS_D).  However, this means
+    * that we have fragments in-flight which are rendering with UNORM+CCS_E
+    * and other fragments in-flight with SRGB+CCS_D on the same surface at the
+    * same time and the pixel scoreboard and color blender are trying to sort
+    * it all out.  This ends badly (i.e. GPU hangs).
+    *
+    * To date, we have never observed GPU hangs or even corruption to be
+    * associated with switching the format, only the aux usage.  However,
+    * there are comments in various docs which indicate that the render cache
+    * isn't 100% resilient to format changes.  We may as well be conservative
+    * and flush on format changes too.  We can always relax this later if we
+    * find it to be a performance problem.
+    */
+   struct hash_entry *entry = _mesa_hash_table_search(brw->render_cache, bo);
+   if (entry && entry->data != format_aux_tuple(format, aux_usage))
+      flush_depth_and_render_caches(brw, bo);
 }
 
 void
-brw_render_cache_add_bo(struct brw_context *brw, struct brw_bo *bo)
+brw_render_cache_add_bo(struct brw_context *brw, struct brw_bo *bo,
+                        enum isl_format format,
+                        enum isl_aux_usage aux_usage)
 {
-   _mesa_set_add(brw->render_cache, bo);
+#ifndef NDEBUG
+   struct hash_entry *entry = _mesa_hash_table_search(brw->render_cache, bo);
+   if (entry) {
+      /* Otherwise, someone didn't do a flush_for_render and that would be
+       * very bad indeed.
+       */
+      assert(entry->data == format_aux_tuple(format, aux_usage));
+   }
+#endif
+
+   _mesa_hash_table_insert(brw->render_cache, bo,
+                           format_aux_tuple(format, aux_usage));
 }
 
 void
 brw_cache_flush_for_depth(struct brw_context *brw, struct brw_bo *bo)
 {
-   if (_mesa_set_search(brw->render_cache, bo))
+   if (_mesa_hash_table_search(brw->render_cache, bo))
       flush_depth_and_render_caches(brw, bo);
 }
 
@@ -1066,8 +1113,8 @@ intel_fbo_init(struct brw_context *brw)
    dd->EGLImageTargetRenderbufferStorage =
       intel_image_target_renderbuffer_storage;
 
-   brw->render_cache = _mesa_set_create(brw, _mesa_hash_pointer,
-                                        _mesa_key_pointer_equal);
+   brw->render_cache = _mesa_hash_table_create(brw, _mesa_hash_pointer,
+                                               _mesa_key_pointer_equal);
    brw->depth_cache = _mesa_set_create(brw, _mesa_hash_pointer,
                                        _mesa_key_pointer_equal);
 }
index 10be4bbc7dc0477ecb9abeccdf93ab433c0cf9d2..88a5b6732b24cc7167537786a827a80269d6787f 100644 (file)
@@ -236,9 +236,13 @@ intel_renderbuffer_upsample(struct brw_context *brw,
 
 void brw_cache_sets_clear(struct brw_context *brw);
 void brw_cache_flush_for_read(struct brw_context *brw, struct brw_bo *bo);
-void brw_cache_flush_for_render(struct brw_context *brw, struct brw_bo *bo);
+void brw_cache_flush_for_render(struct brw_context *brw, struct brw_bo *bo,
+                                enum isl_format format,
+                                enum isl_aux_usage aux_usage);
 void brw_cache_flush_for_depth(struct brw_context *brw, struct brw_bo *bo);
-void brw_render_cache_add_bo(struct brw_context *brw, struct brw_bo *bo);
+void brw_render_cache_add_bo(struct brw_context *brw, struct brw_bo *bo,
+                             enum isl_format format,
+                             enum isl_aux_usage aux_usage);
 void brw_depth_cache_add_bo(struct brw_context *brw, struct brw_bo *bo);
 
 unsigned