i965: Share the flush for brw_blorp_miptree_download into a pbo
authorChris Wilson <chris@chris-wilson.co.uk>
Wed, 11 Oct 2017 20:43:45 +0000 (21:43 +0100)
committerKenneth Graunke <kenneth@whitecape.org>
Fri, 13 Oct 2017 02:58:40 +0000 (19:58 -0700)
As all users of brw_blorp_miptree_download() must emit a full pipeline
and cache flush when targetting a user PBO (as that PBO may then be
subsequently bound or *be* bound anywhere and outside of the driver
dirty tracking) move that flush into brw_blorp_miptree_download()
itself.

v2 (Ken): Rebase without userptr stuff so it can land sooner.

Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
src/mesa/drivers/dri/i965/brw_blorp.c
src/mesa/drivers/dri/i965/intel_pixel_read.c
src/mesa/drivers/dri/i965/intel_tex_image.c

index eec2b1417462cd7e7c02924e89292282516cff3a..ed4f9870f230201ba6234cf4d7d516bb2c266576 100644 (file)
@@ -1094,6 +1094,28 @@ brw_blorp_download_miptree(struct brw_context *brw,
 
    result = true;
 
+   /* As we implement PBO transfers by binding the user-provided BO as a
+    * fake framebuffer and rendering to it.  This breaks the invariant of the
+    * GL that nothing is able to render to a BO, causing nondeterministic
+    * corruption issues because the render cache is not coherent with a
+    * number of other caches that the BO could potentially be bound to
+    * afterwards.
+    *
+    * This could be solved in the same way that we guarantee texture
+    * coherency after a texture is attached to a framebuffer and
+    * rendered to, but that would involve checking *all* BOs bound to
+    * the pipeline for the case we need to emit a cache flush due to
+    * previous rendering to any of them -- Including vertex, index,
+    * uniform, atomic counter, shader image, transform feedback,
+    * indirect draw buffers, etc.
+    *
+    * That would increase the per-draw call overhead even though it's
+    * very unlikely that any of the BOs bound to the pipeline has been
+    * rendered to via a PBO at any point, so it seems better to just
+    * flush here unconditionally.
+    */
+   brw_emit_mi_flush(brw);
+
 err:
    brw_bo_unreference(dst_bo);
 
index 6aa9b53464db3da3286fcda11af830c8572ab54d..4528d6d265a5dcd3fb1d7d06a8b51d3e3177cc34 100644 (file)
@@ -275,30 +275,8 @@ intelReadPixels(struct gl_context * ctx,
 
    if (_mesa_is_bufferobj(pack->BufferObj)) {
       if (intel_readpixels_blorp(ctx, x, y, width, height,
-                                 format, type, pixels, pack)) {
-         /* intel_readpixels_blorp() implements PBO transfers by
-          * binding the user-provided BO as a fake framebuffer and rendering
-          * to it.  This breaks the invariant of the GL that nothing is able
-          * to render to a BO, causing nondeterministic corruption issues
-          * because the render cache is not coherent with a number of other
-          * caches that the BO could potentially be bound to afterwards.
-          *
-          * This could be solved in the same way that we guarantee texture
-          * coherency after a texture is attached to a framebuffer and
-          * rendered to, but that would involve checking *all* BOs bound to
-          * the pipeline for the case we need to emit a cache flush due to
-          * previous rendering to any of them -- Including vertex, index,
-          * uniform, atomic counter, shader image, transform feedback,
-          * indirect draw buffers, etc.
-          *
-          * That would increase the per-draw call overhead even though it's
-          * very unlikely that any of the BOs bound to the pipeline has been
-          * rendered to via a PBO at any point, so it seems better to just
-          * flush here unconditionally.
-          */
-         brw_emit_mi_flush(brw);
+                                 format, type, pixels, pack))
          return;
-      }
 
       perf_debug("%s: fallback to CPU mapping in PBO case\n", __func__);
    }
index e4d3f120387ee516095f0ffe8604992ccf1fcf1e..5396e0a43bcee489b87f537dce6ef71d31b6e1f5 100644 (file)
@@ -747,15 +747,8 @@ intel_get_tex_sub_image(struct gl_context *ctx,
       if (intel_gettexsubimage_blorp(brw, texImage,
                                      xoffset, yoffset, zoffset,
                                      width, height, depth, format, type,
-                                     pixels, &ctx->Pack)) {
-         /* Flush to guarantee coherency between the render cache and other
-          * caches the PBO could potentially be bound to after this point.
-          * See the related comment in intelReadPixels() for a more detailed
-          * explanation.
-          */
-         brw_emit_mi_flush(brw);
+                                     pixels, &ctx->Pack))
          return;
-      }
 
       perf_debug("%s: fallback to CPU mapping in PBO case\n", __func__);
    }