From: Nicolai Hähnle Date: Wed, 8 Jun 2016 11:24:14 +0000 (+0200) Subject: st/mesa: cache staging texture for glReadPixels X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=3735a925ef5692c836c4d26d6adee370dae1c2b0;p=mesa.git st/mesa: cache staging texture for glReadPixels v2: add ST_DEBUG flag for disabling (suggested by Ilia) Reviewed-by: Marek Olšák (v1) --- diff --git a/src/mesa/state_tracker/st_cb_fbo.h b/src/mesa/state_tracker/st_cb_fbo.h index f3b310b07b7..414a6616b23 100644 --- a/src/mesa/state_tracker/st_cb_fbo.h +++ b/src/mesa/state_tracker/st_cb_fbo.h @@ -58,6 +58,8 @@ struct st_renderbuffer boolean software; void *data; + bool use_readpix_cache; + /* Inputs from Driver.RenderTexture, don't use directly. */ boolean is_rtt; /**< whether Driver.RenderTexture was called */ unsigned rtt_face, rtt_slice; diff --git a/src/mesa/state_tracker/st_cb_readpixels.c b/src/mesa/state_tracker/st_cb_readpixels.c index a501b7b3ee4..39d2274b3b4 100644 --- a/src/mesa/state_tracker/st_cb_readpixels.c +++ b/src/mesa/state_tracker/st_cb_readpixels.c @@ -41,11 +41,31 @@ #include "st_context.h" #include "st_cb_bitmap.h" #include "st_cb_readpixels.h" +#include "st_debug.h" #include "state_tracker/st_cb_texture.h" #include "state_tracker/st_format.h" #include "state_tracker/st_pbo.h" #include "state_tracker/st_texture.h" +/* The readpixels cache caches a blitted staging texture so that back-to-back + * calls to glReadPixels with user pointers require less CPU-GPU synchronization. + * + * Assumptions: + * + * (1) Blits have high synchronization overheads, and it is beneficial to + * use a single blit of the entire framebuffer instead of many smaller + * blits (because the smaller blits cannot be batched, and we have to wait + * for the GPU after each one). + * + * (2) transfer_map implicitly involves a blit as well (for de-tiling, copy + * from VRAM, etc.), so that it is beneficial to replace the + * _mesa_readpixels path as well when possible. + * + * Change this #define to true to fill and use the cache whenever possible + * (this is inefficient and only meant for testing / debugging). + */ +#define ALWAYS_READPIXELS_CACHE false + static boolean needs_integer_signed_unsigned_conversion(const struct gl_context *ctx, GLenum format, GLenum type) @@ -299,6 +319,62 @@ blit_to_staging(struct st_context *st, struct st_renderbuffer *strb, return dst; } +static struct pipe_resource * +try_cached_readpixels(struct st_context *st, struct st_renderbuffer *strb, + bool invert_y, + GLsizei width, GLsizei height, + GLenum format, + enum pipe_format src_format, enum pipe_format dst_format) +{ + struct pipe_resource *src = strb->texture; + struct pipe_resource *dst = NULL; + + if (ST_DEBUG & DEBUG_NOREADPIXCACHE) + return NULL; + + /* Reset cache after invalidation or switch of parameters. */ + if (st->readpix_cache.src != src || + st->readpix_cache.dst_format != dst_format || + st->readpix_cache.level != strb->surface->u.tex.level || + st->readpix_cache.layer != strb->surface->u.tex.first_layer) { + pipe_resource_reference(&st->readpix_cache.src, src); + pipe_resource_reference(&st->readpix_cache.cache, NULL); + st->readpix_cache.dst_format = dst_format; + st->readpix_cache.level = strb->surface->u.tex.level; + st->readpix_cache.layer = strb->surface->u.tex.first_layer; + st->readpix_cache.hits = 0; + } + + /* Decide whether to trigger the cache. */ + if (!st->readpix_cache.cache) { + if (!strb->use_readpix_cache && !ALWAYS_READPIXELS_CACHE) { + /* Heuristic: If previous successive calls read at least a fraction + * of the surface _and_ we read again, trigger the cache. + */ + unsigned threshold = MAX2(1, strb->Base.Width * strb->Base.Height / 8); + + if (st->readpix_cache.hits < threshold) { + st->readpix_cache.hits += width * height; + return NULL; + } + + strb->use_readpix_cache = true; + } + + /* Fill the cache */ + st->readpix_cache.cache = blit_to_staging(st, strb, invert_y, + 0, 0, + strb->Base.Width, + strb->Base.Height, format, + src_format, dst_format); + } + + /* Return an owning reference to stay consistent with the non-cached path */ + pipe_resource_reference(&dst, st->readpix_cache.cache); + + return dst; +} + /** * This uses a blit to copy the read buffer to a texture format which matches * the format and type combo and then a fast read-back is done using memcpy. @@ -330,6 +406,7 @@ st_ReadPixels(struct gl_context *ctx, GLint x, GLint y, unsigned bind = PIPE_BIND_TRANSFER_READ; struct pipe_transfer *tex_xfer; ubyte *map = NULL; + bool window; /* Validate state (to be sure we have up-to-date framebuffer surfaces) * and flush the bitmap cache prior to reading. */ @@ -395,24 +472,36 @@ st_ReadPixels(struct gl_context *ctx, GLint x, GLint y, return; } - /* See if the texture format already matches the format and type, - * in which case the memcpy-based fast path will likely be used and - * we don't have to blit. */ - if (_mesa_format_matches_format_and_type(rb->Format, format, - type, pack->SwapBytes, NULL)) { - goto fallback; - } - if (needs_integer_signed_unsigned_conversion(ctx, format, type)) { goto fallback; } - dst = blit_to_staging(st, strb, - st_fb_orientation(ctx->ReadBuffer) == Y_0_TOP, - x, y, width, height, format, - src_format, dst_format); - if (!dst) - goto fallback; + /* Cache a staging texture for back-to-back ReadPixels, to avoid CPU-GPU + * synchronization overhead. + */ + dst = try_cached_readpixels(st, strb, + st_fb_orientation(ctx->ReadBuffer) == Y_0_TOP, + width, height, format, src_format, dst_format); + if (dst) { + window = false; + } else { + /* See if the texture format already matches the format and type, + * in which case the memcpy-based fast path will likely be used and + * we don't have to blit. */ + if (_mesa_format_matches_format_and_type(rb->Format, format, + type, pack->SwapBytes, NULL)) { + goto fallback; + } + + dst = blit_to_staging(st, strb, + st_fb_orientation(ctx->ReadBuffer) == Y_0_TOP, + x, y, width, height, format, + src_format, dst_format); + if (!dst) + goto fallback; + + window = true; + } /* map resources */ pixels = _mesa_map_pbo_dest(ctx, pack, pixels); @@ -425,6 +514,9 @@ st_ReadPixels(struct gl_context *ctx, GLint x, GLint y, goto fallback; } + if (!window) + map += y * tex_xfer->stride + x * util_format_get_blocksize(dst_format); + /* memcpy data into a user buffer */ { const uint bytesPerRow = width * util_format_get_blocksize(dst_format); diff --git a/src/mesa/state_tracker/st_debug.c b/src/mesa/state_tracker/st_debug.c index 9eb3b53b230..eaf25490bd6 100644 --- a/src/mesa/state_tracker/st_debug.c +++ b/src/mesa/state_tracker/st_debug.c @@ -58,6 +58,7 @@ static const struct debug_named_value st_debug_flags[] = { { "wf", DEBUG_WIREFRAME, NULL }, { "precompile", DEBUG_PRECOMPILE, NULL }, { "gremedy", DEBUG_GREMEDY, "Enable GREMEDY debug extensions" }, + { "noreadpixcache", DEBUG_NOREADPIXCACHE, NULL }, DEBUG_NAMED_VALUE_END }; diff --git a/src/mesa/state_tracker/st_debug.h b/src/mesa/state_tracker/st_debug.h index a094fdc2bfa..e14360937a9 100644 --- a/src/mesa/state_tracker/st_debug.h +++ b/src/mesa/state_tracker/st_debug.h @@ -51,6 +51,7 @@ st_print_current(void); #define DEBUG_WIREFRAME 0x400 #define DEBUG_PRECOMPILE 0x800 #define DEBUG_GREMEDY 0x1000 +#define DEBUG_NOREADPIXCACHE 0x2000 #ifdef DEBUG extern int ST_DEBUG;