st/mesa: cache staging texture for glReadPixels
[mesa.git] / src / mesa / state_tracker / st_cb_readpixels.c
index 6ff6cf6f6d6bfe0eaf0c014dbfef78d669e44173..39d2274b3b432bebc599b5a333d76bbfbaa6571b 100644 (file)
@@ -25,6 +25,7 @@
  * 
  **************************************************************************/
 
+#include "main/bufferobj.h"
 #include "main/image.h"
 #include "main/pbo.h"
 #include "main/imports.h"
 #include "main/framebuffer.h"
 #include "util/u_inlines.h"
 #include "util/u_format.h"
+#include "cso_cache/cso_context.h"
 
 #include "st_cb_fbo.h"
 #include "st_atom.h"
 #include "st_context.h"
 #include "st_cb_bitmap.h"
 #include "st_cb_readpixels.h"
+#include "st_debug.h"
 #include "state_tracker/st_cb_texture.h"
 #include "state_tracker/st_format.h"
+#include "state_tracker/st_pbo.h"
 #include "state_tracker/st_texture.h"
 
+/* The readpixels cache caches a blitted staging texture so that back-to-back
+ * calls to glReadPixels with user pointers require less CPU-GPU synchronization.
+ *
+ * Assumptions:
+ *
+ * (1) Blits have high synchronization overheads, and it is beneficial to
+ *     use a single blit of the entire framebuffer instead of many smaller
+ *     blits (because the smaller blits cannot be batched, and we have to wait
+ *     for the GPU after each one).
+ *
+ * (2) transfer_map implicitly involves a blit as well (for de-tiling, copy
+ *     from VRAM, etc.), so that it is beneficial to replace the
+ *     _mesa_readpixels path as well when possible.
+ *
+ * Change this #define to true to fill and use the cache whenever possible
+ * (this is inefficient and only meant for testing / debugging).
+ */
+#define ALWAYS_READPIXELS_CACHE false
+
 static boolean
 needs_integer_signed_unsigned_conversion(const struct gl_context *ctx,
                                          GLenum format, GLenum type)
@@ -68,6 +91,290 @@ needs_integer_signed_unsigned_conversion(const struct gl_context *ctx,
    return FALSE;
 }
 
+static bool
+try_pbo_readpixels(struct st_context *st, struct st_renderbuffer *strb,
+                   bool invert_y,
+                   GLint x, GLint y, GLsizei width, GLsizei height,
+                   enum pipe_format src_format, enum pipe_format dst_format,
+                   const struct gl_pixelstore_attrib *pack, void *pixels)
+{
+   struct pipe_context *pipe = st->pipe;
+   struct pipe_screen *screen = pipe->screen;
+   struct cso_context *cso = st->cso_context;
+   struct pipe_surface *surface = strb->surface;
+   struct pipe_resource *texture = strb->texture;
+   const struct util_format_description *desc;
+   struct st_pbo_addresses addr;
+   struct pipe_framebuffer_state fb;
+   enum pipe_texture_target view_target;
+   bool success = false;
+
+   if (texture->nr_samples > 1)
+      return false;
+
+   if (!screen->is_format_supported(screen, dst_format, PIPE_TEXTURE_2D,
+                                    texture->nr_samples,
+                                    PIPE_BIND_SHADER_IMAGE))
+      return false;
+
+   desc = util_format_description(dst_format);
+
+   /* Compute PBO addresses */
+   addr.bytes_per_pixel = desc->block.bits / 8;
+   addr.xoffset = x;
+   addr.yoffset = y;
+   addr.width = width;
+   addr.height = height;
+   addr.depth = 1;
+   if (!st_pbo_addresses_pixelstore(st, GL_TEXTURE_2D, false, pack, pixels, &addr))
+      return false;
+
+   cso_save_state(cso, (CSO_BIT_FRAGMENT_SAMPLER_VIEWS |
+                        CSO_BIT_FRAGMENT_SAMPLERS |
+                        CSO_BIT_FRAGMENT_IMAGE0 |
+                        CSO_BIT_VERTEX_ELEMENTS |
+                        CSO_BIT_AUX_VERTEX_BUFFER_SLOT |
+                        CSO_BIT_FRAMEBUFFER |
+                        CSO_BIT_VIEWPORT |
+                        CSO_BIT_RASTERIZER |
+                        CSO_BIT_DEPTH_STENCIL_ALPHA |
+                        CSO_BIT_STREAM_OUTPUTS |
+                        CSO_BIT_PAUSE_QUERIES |
+                        CSO_BITS_ALL_SHADERS));
+   cso_save_constant_buffer_slot0(cso, PIPE_SHADER_FRAGMENT);
+
+   /* Set up the sampler_view */
+   {
+      struct pipe_sampler_view templ;
+      struct pipe_sampler_view *sampler_view;
+      struct pipe_sampler_state sampler = {0};
+      const struct pipe_sampler_state *samplers[1] = {&sampler};
+
+      u_sampler_view_default_template(&templ, texture, src_format);
+
+      switch (texture->target) {
+      case PIPE_TEXTURE_CUBE:
+      case PIPE_TEXTURE_CUBE_ARRAY:
+         view_target = PIPE_TEXTURE_2D_ARRAY;
+         break;
+      default:
+         view_target = texture->target;
+         break;
+      }
+
+      templ.target = view_target;
+      templ.u.tex.first_level = surface->u.tex.level;
+      templ.u.tex.last_level = templ.u.tex.first_level;
+
+      if (view_target != PIPE_TEXTURE_3D) {
+         templ.u.tex.first_layer = surface->u.tex.first_layer;
+         templ.u.tex.last_layer = templ.u.tex.last_layer;
+      } else {
+         addr.constants.layer_offset = surface->u.tex.first_layer;
+      }
+
+      sampler_view = pipe->create_sampler_view(pipe, texture, &templ);
+      if (sampler_view == NULL)
+         goto fail;
+
+      cso_set_sampler_views(cso, PIPE_SHADER_FRAGMENT, 1, &sampler_view);
+
+      pipe_sampler_view_reference(&sampler_view, NULL);
+
+      cso_set_samplers(cso, PIPE_SHADER_FRAGMENT, 1, samplers);
+   }
+
+   /* Set up destination image */
+   {
+      struct pipe_image_view image;
+
+      memset(&image, 0, sizeof(image));
+      image.resource = addr.buffer;
+      image.format = dst_format;
+      image.access = PIPE_IMAGE_ACCESS_WRITE;
+      image.u.buf.first_element = addr.first_element;
+      image.u.buf.last_element = addr.last_element;
+
+      cso_set_shader_images(cso, PIPE_SHADER_FRAGMENT, 0, 1, &image);
+   }
+
+   /* Set up no-attachment framebuffer */
+   memset(&fb, 0, sizeof(fb));
+   fb.width = surface->width;
+   fb.height = surface->height;
+   fb.samples = 1;
+   fb.layers = 1;
+   cso_set_framebuffer(cso, &fb);
+
+   cso_set_viewport_dims(cso, fb.width, fb.height, invert_y);
+
+   if (invert_y)
+      st_pbo_addresses_invert_y(&addr, fb.height);
+
+   {
+      struct pipe_depth_stencil_alpha_state dsa;
+      memset(&dsa, 0, sizeof(dsa));
+      cso_set_depth_stencil_alpha(cso, &dsa);
+   }
+
+   /* Set up the fragment shader */
+   {
+      void *fs = st_pbo_get_download_fs(st, view_target);
+      if (!fs)
+         goto fail;
+
+      cso_set_fragment_shader_handle(cso, fs);
+   }
+
+   success = st_pbo_draw(st, &addr, fb.width, fb.height);
+
+   /* Buffer written via shader images needs explicit synchronization. */
+   pipe->memory_barrier(pipe, PIPE_BARRIER_ALL);
+
+fail:
+   cso_restore_state(cso);
+   cso_restore_constant_buffer_slot0(cso, PIPE_SHADER_FRAGMENT);
+
+   return success;
+}
+
+/* Invalidate the readpixels cache to ensure we don't read stale data.
+ */
+void st_invalidate_readpix_cache(struct st_context *st)
+{
+   pipe_resource_reference(&st->readpix_cache.src, NULL);
+   pipe_resource_reference(&st->readpix_cache.cache, NULL);
+}
+
+/**
+ * Create a staging texture and blit the requested region to it.
+ */
+static struct pipe_resource *
+blit_to_staging(struct st_context *st, struct st_renderbuffer *strb,
+                   bool invert_y,
+                   GLint x, GLint y, GLsizei width, GLsizei height,
+                   GLenum format,
+                   enum pipe_format src_format, enum pipe_format dst_format)
+{
+   struct pipe_context *pipe = st->pipe;
+   struct pipe_screen *screen = pipe->screen;
+   struct pipe_resource dst_templ;
+   struct pipe_resource *dst;
+   struct pipe_blit_info blit;
+
+   /* We are creating a texture of the size of the region being read back.
+    * Need to check for NPOT texture support. */
+   if (!screen->get_param(screen, PIPE_CAP_NPOT_TEXTURES) &&
+       (!util_is_power_of_two(width) ||
+        !util_is_power_of_two(height)))
+      return NULL;
+
+   /* create the destination texture */
+   memset(&dst_templ, 0, sizeof(dst_templ));
+   dst_templ.target = PIPE_TEXTURE_2D;
+   dst_templ.format = dst_format;
+   dst_templ.bind = PIPE_BIND_TRANSFER_READ;
+   if (util_format_is_depth_or_stencil(dst_format))
+      dst_templ.bind |= PIPE_BIND_DEPTH_STENCIL;
+   else
+      dst_templ.bind |= PIPE_BIND_RENDER_TARGET;
+   dst_templ.usage = PIPE_USAGE_STAGING;
+
+   st_gl_texture_dims_to_pipe_dims(GL_TEXTURE_2D, width, height, 1,
+                                   &dst_templ.width0, &dst_templ.height0,
+                                   &dst_templ.depth0, &dst_templ.array_size);
+
+   dst = screen->resource_create(screen, &dst_templ);
+   if (!dst)
+      return NULL;
+
+   memset(&blit, 0, sizeof(blit));
+   blit.src.resource = strb->texture;
+   blit.src.level = strb->surface->u.tex.level;
+   blit.src.format = src_format;
+   blit.dst.resource = dst;
+   blit.dst.level = 0;
+   blit.dst.format = dst->format;
+   blit.src.box.x = x;
+   blit.dst.box.x = 0;
+   blit.src.box.y = y;
+   blit.dst.box.y = 0;
+   blit.src.box.z = strb->surface->u.tex.first_layer;
+   blit.dst.box.z = 0;
+   blit.src.box.width = blit.dst.box.width = width;
+   blit.src.box.height = blit.dst.box.height = height;
+   blit.src.box.depth = blit.dst.box.depth = 1;
+   blit.mask = st_get_blit_mask(strb->Base._BaseFormat, format);
+   blit.filter = PIPE_TEX_FILTER_NEAREST;
+   blit.scissor_enable = FALSE;
+
+   if (invert_y) {
+      blit.src.box.y = strb->Base.Height - blit.src.box.y;
+      blit.src.box.height = -blit.src.box.height;
+   }
+
+   /* blit */
+   st->pipe->blit(st->pipe, &blit);
+
+   return dst;
+}
+
+static struct pipe_resource *
+try_cached_readpixels(struct st_context *st, struct st_renderbuffer *strb,
+                      bool invert_y,
+                      GLsizei width, GLsizei height,
+                      GLenum format,
+                      enum pipe_format src_format, enum pipe_format dst_format)
+{
+   struct pipe_resource *src = strb->texture;
+   struct pipe_resource *dst = NULL;
+
+   if (ST_DEBUG & DEBUG_NOREADPIXCACHE)
+      return NULL;
+
+   /* Reset cache after invalidation or switch of parameters. */
+   if (st->readpix_cache.src != src ||
+       st->readpix_cache.dst_format != dst_format ||
+       st->readpix_cache.level != strb->surface->u.tex.level ||
+       st->readpix_cache.layer != strb->surface->u.tex.first_layer) {
+      pipe_resource_reference(&st->readpix_cache.src, src);
+      pipe_resource_reference(&st->readpix_cache.cache, NULL);
+      st->readpix_cache.dst_format = dst_format;
+      st->readpix_cache.level = strb->surface->u.tex.level;
+      st->readpix_cache.layer = strb->surface->u.tex.first_layer;
+      st->readpix_cache.hits = 0;
+   }
+
+   /* Decide whether to trigger the cache. */
+   if (!st->readpix_cache.cache) {
+      if (!strb->use_readpix_cache && !ALWAYS_READPIXELS_CACHE) {
+         /* Heuristic: If previous successive calls read at least a fraction
+          * of the surface _and_ we read again, trigger the cache.
+          */
+         unsigned threshold = MAX2(1, strb->Base.Width * strb->Base.Height / 8);
+
+         if (st->readpix_cache.hits < threshold) {
+            st->readpix_cache.hits += width * height;
+            return NULL;
+         }
+
+         strb->use_readpix_cache = true;
+      }
+
+      /* Fill the cache */
+      st->readpix_cache.cache = blit_to_staging(st, strb, invert_y,
+                                                0, 0,
+                                                strb->Base.Width,
+                                                strb->Base.Height, format,
+                                                src_format, dst_format);
+   }
+
+   /* Return an owning reference to stay consistent with the non-cached path */
+   pipe_resource_reference(&dst, st->readpix_cache.cache);
+
+   return dst;
+}
+
 /**
  * This uses a blit to copy the read buffer to a texture format which matches
  * the format and type combo and then a fast read-back is done using memcpy.
@@ -81,11 +388,11 @@ needs_integer_signed_unsigned_conversion(const struct gl_context *ctx,
  *       we do here should be free in such cases.
  */
 static void
-st_readpixels(struct gl_context *ctx, GLint x, GLint y,
+st_ReadPixels(struct gl_context *ctx, GLint x, GLint y,
               GLsizei width, GLsizei height,
               GLenum format, GLenum type,
               const struct gl_pixelstore_attrib *pack,
-              GLvoid *pixels)
+              void *pixels)
 {
    struct st_context *st = st_context(ctx);
    struct gl_renderbuffer *rb =
@@ -95,16 +402,15 @@ st_readpixels(struct gl_context *ctx, GLint x, GLint y,
    struct pipe_screen *screen = pipe->screen;
    struct pipe_resource *src;
    struct pipe_resource *dst = NULL;
-   struct pipe_resource dst_templ;
    enum pipe_format dst_format, src_format;
-   struct pipe_blit_info blit;
    unsigned bind = PIPE_BIND_TRANSFER_READ;
    struct pipe_transfer *tex_xfer;
    ubyte *map = NULL;
+   bool window;
 
    /* Validate state (to be sure we have up-to-date framebuffer surfaces)
     * and flush the bitmap cache prior to reading. */
-   st_validate_state(st);
+   st_validate_state(st, ST_PIPELINE_RENDER);
    st_flush_bitmap_cache(st);
 
    if (!st->prefer_blit_based_texture_transfer) {
@@ -120,14 +426,6 @@ st_readpixels(struct gl_context *ctx, GLint x, GLint y,
       goto fallback;
    }
 
-   /* We are creating a texture of the size of the region being read back.
-    * Need to check for NPOT texture support. */
-   if (!screen->get_param(screen, PIPE_CAP_NPOT_TEXTURES) &&
-       (!util_is_power_of_two(width) ||
-        !util_is_power_of_two(height))) {
-      goto fallback;
-   }
-
    /* If the base internal format and the texture format don't match, we have
     * to use the slow path. */
    if (rb->_BaseFormat !=
@@ -135,22 +433,10 @@ st_readpixels(struct gl_context *ctx, GLint x, GLint y,
       goto fallback;
    }
 
-   /* See if the texture format already matches the format and type,
-    * in which case the memcpy-based fast path will likely be used and
-    * we don't have to blit. */
-   if (_mesa_format_matches_format_and_type(rb->Format, format,
-                                            type, pack->SwapBytes, NULL)) {
-      goto fallback;
-   }
-
    if (_mesa_readpixels_needs_slow_path(ctx, format, type, GL_TRUE)) {
       goto fallback;
    }
 
-   if (needs_integer_signed_unsigned_conversion(ctx, format, type)) {
-      goto fallback;
-   }
-
    /* Convert the source format to what is expected by ReadPixels
     * and see if it's supported. */
    src_format = util_format_linear(src->format);
@@ -177,49 +463,45 @@ st_readpixels(struct gl_context *ctx, GLint x, GLint y,
       goto fallback;
    }
 
-   /* create the destination texture */
-   memset(&dst_templ, 0, sizeof(dst_templ));
-   dst_templ.target = PIPE_TEXTURE_2D;
-   dst_templ.format = dst_format;
-   dst_templ.bind = bind;
-   dst_templ.usage = PIPE_USAGE_STAGING;
-
-   st_gl_texture_dims_to_pipe_dims(GL_TEXTURE_2D, width, height, 1,
-                                   &dst_templ.width0, &dst_templ.height0,
-                                   &dst_templ.depth0, &dst_templ.array_size);
+   if (st->pbo.download_enabled && _mesa_is_bufferobj(pack->BufferObj)) {
+      if (try_pbo_readpixels(st, strb,
+                             st_fb_orientation(ctx->ReadBuffer) == Y_0_TOP,
+                             x, y, width, height,
+                             src_format, dst_format,
+                             pack, pixels))
+         return;
+   }
 
-   dst = screen->resource_create(screen, &dst_templ);
-   if (!dst) {
+   if (needs_integer_signed_unsigned_conversion(ctx, format, type)) {
       goto fallback;
    }
 
-   memset(&blit, 0, sizeof(blit));
-   blit.src.resource = src;
-   blit.src.level = strb->surface->u.tex.level;
-   blit.src.format = src_format;
-   blit.dst.resource = dst;
-   blit.dst.level = 0;
-   blit.dst.format = dst->format;
-   blit.src.box.x = x;
-   blit.dst.box.x = 0;
-   blit.src.box.y = y;
-   blit.dst.box.y = 0;
-   blit.src.box.z = strb->surface->u.tex.first_layer;
-   blit.dst.box.z = 0;
-   blit.src.box.width = blit.dst.box.width = width;
-   blit.src.box.height = blit.dst.box.height = height;
-   blit.src.box.depth = blit.dst.box.depth = 1;
-   blit.mask = st_get_blit_mask(rb->_BaseFormat, format);
-   blit.filter = PIPE_TEX_FILTER_NEAREST;
-   blit.scissor_enable = FALSE;
+   /* Cache a staging texture for back-to-back ReadPixels, to avoid CPU-GPU
+    * synchronization overhead.
+    */
+   dst = try_cached_readpixels(st, strb,
+                               st_fb_orientation(ctx->ReadBuffer) == Y_0_TOP,
+                               width, height, format, src_format, dst_format);
+   if (dst) {
+      window = false;
+   } else {
+      /* See if the texture format already matches the format and type,
+       * in which case the memcpy-based fast path will likely be used and
+       * we don't have to blit. */
+      if (_mesa_format_matches_format_and_type(rb->Format, format,
+                                               type, pack->SwapBytes, NULL)) {
+         goto fallback;
+      }
 
-   if (st_fb_orientation(ctx->ReadBuffer) == Y_0_TOP) {
-      blit.src.box.y = rb->Height - blit.src.box.y;
-      blit.src.box.height = -blit.src.box.height;
-   }
+      dst = blit_to_staging(st, strb,
+                            st_fb_orientation(ctx->ReadBuffer) == Y_0_TOP,
+                            x, y, width, height, format,
+                            src_format, dst_format);
+      if (!dst)
+         goto fallback;
 
-   /* blit */
-   st->pipe->blit(st->pipe, &blit);
+      window = true;
+   }
 
    /* map resources */
    pixels = _mesa_map_pbo_dest(ctx, pack, pixels);
@@ -232,15 +514,18 @@ st_readpixels(struct gl_context *ctx, GLint x, GLint y,
       goto fallback;
    }
 
+   if (!window)
+      map += y * tex_xfer->stride + x * util_format_get_blocksize(dst_format);
+
    /* memcpy data into a user buffer */
    {
       const uint bytesPerRow = width * util_format_get_blocksize(dst_format);
       GLuint row;
 
       for (row = 0; row < (unsigned) height; row++) {
-         GLvoid *dest = _mesa_image_address3d(pack, pixels,
+         void *dest = _mesa_image_address2d(pack, pixels,
                                               width, height, format,
-                                              type, 0, row, 0);
+                                              type, row, 0);
          memcpy(dest, map, bytesPerRow);
          map += tex_xfer->stride;
       }
@@ -257,5 +542,5 @@ fallback:
 
 void st_init_readpixels_functions(struct dd_function_table *functions)
 {
-   functions->ReadPixels = st_readpixels;
+   functions->ReadPixels = st_ReadPixels;
 }