i965: Fix PBO cache coherency issue after _mesa_meta_pbo_GetTexSubImage().
authorFrancisco Jerez <currojerez@riseup.net>
Sat, 31 Jan 2015 18:04:55 +0000 (20:04 +0200)
committerFrancisco Jerez <currojerez@riseup.net>
Wed, 13 May 2015 11:28:25 +0000 (14:28 +0300)
This problem can easily be reproduced with a number of
ARB_shader_image_load_store piglit tests, which use a buffer object as
PBO for a pixel transfer operation and later on bind the same buffer
to the pipeline as shader image -- The problem is not exclusive to
images though, and is likely to affect other kinds of buffer objects
that can be bound to the 3D pipeline, including vertex, index,
uniform, atomic counter buffers, etc.

CC: 10.5 <mesa-stable@lists.freedesktop.org>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
src/mesa/drivers/dri/i965/intel_pixel_read.c
src/mesa/drivers/dri/i965/intel_tex_image.c

index d3ca38b6ecded2498bd9fd0a464fa393cab0415a..30380570d620f602040d0d7caac55525c63a96a0 100644 (file)
@@ -226,8 +226,30 @@ intelReadPixels(struct gl_context * ctx,
 
    if (_mesa_is_bufferobj(pack->BufferObj)) {
       if (_mesa_meta_pbo_GetTexSubImage(ctx, 2, NULL, x, y, 0, width, height, 1,
 
    if (_mesa_is_bufferobj(pack->BufferObj)) {
       if (_mesa_meta_pbo_GetTexSubImage(ctx, 2, NULL, x, y, 0, width, height, 1,
-                                        format, type, pixels, pack))
+                                        format, type, pixels, pack)) {
+         /* _mesa_meta_pbo_GetTexSubImage() implements PBO transfers by
+          * binding the user-provided BO as a fake framebuffer and rendering
+          * to it.  This breaks the invariant of the GL that nothing is able
+          * to render to a BO, causing nondeterministic corruption issues
+          * because the render cache is not coherent with a number of other
+          * caches that the BO could potentially be bound to afterwards.
+          *
+          * This could be solved in the same way that we guarantee texture
+          * coherency after a texture is attached to a framebuffer and
+          * rendered to, but that would involve checking *all* BOs bound to
+          * the pipeline for the case we need to emit a cache flush due to
+          * previous rendering to any of them -- Including vertex, index,
+          * uniform, atomic counter, shader image, transform feedback,
+          * indirect draw buffers, etc.
+          *
+          * That would increase the per-draw call overhead even though it's
+          * very unlikely that any of the BOs bound to the pipeline has been
+          * rendered to via a PBO at any point, so it seems better to just
+          * flush here unconditionally.
+          */
+         intel_batchbuffer_emit_mi_flush(brw);
          return;
          return;
+      }
 
       perf_debug("%s: fallback to CPU mapping in PBO case\n", __func__);
    }
 
       perf_debug("%s: fallback to CPU mapping in PBO case\n", __func__);
    }
index 7952ee5ad8899824f7967554627ac05ea14241bf..85d3d04ecb30616d49071d7fa5850dc252d4ff82 100644 (file)
@@ -486,8 +486,15 @@ intel_get_tex_image(struct gl_context *ctx,
       if (_mesa_meta_pbo_GetTexSubImage(ctx, 3, texImage, 0, 0, 0,
                                         texImage->Width, texImage->Height,
                                         texImage->Depth, format, type,
       if (_mesa_meta_pbo_GetTexSubImage(ctx, 3, texImage, 0, 0, 0,
                                         texImage->Width, texImage->Height,
                                         texImage->Depth, format, type,
-                                        pixels, &ctx->Pack))
+                                        pixels, &ctx->Pack)) {
+         /* Flush to guarantee coherency between the render cache and other
+          * caches the PBO could potentially be bound to after this point.
+          * See the related comment in intelReadPixels() for a more detailed
+          * explanation.
+          */
+         intel_batchbuffer_emit_mi_flush(brw);
          return;
          return;
+      }
 
       perf_debug("%s: fallback to CPU mapping in PBO case\n", __func__);
    }
 
       perf_debug("%s: fallback to CPU mapping in PBO case\n", __func__);
    }