st/mesa: cache staging texture for glReadPixels
[mesa.git] / src / mesa / state_tracker / st_cb_readpixels.c
1 /**************************************************************************
2 *
3 * Copyright 2007 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include "main/bufferobj.h"
29 #include "main/image.h"
30 #include "main/pbo.h"
31 #include "main/imports.h"
32 #include "main/readpix.h"
33 #include "main/enums.h"
34 #include "main/framebuffer.h"
35 #include "util/u_inlines.h"
36 #include "util/u_format.h"
37 #include "cso_cache/cso_context.h"
38
39 #include "st_cb_fbo.h"
40 #include "st_atom.h"
41 #include "st_context.h"
42 #include "st_cb_bitmap.h"
43 #include "st_cb_readpixels.h"
44 #include "st_debug.h"
45 #include "state_tracker/st_cb_texture.h"
46 #include "state_tracker/st_format.h"
47 #include "state_tracker/st_pbo.h"
48 #include "state_tracker/st_texture.h"
49
50 /* The readpixels cache caches a blitted staging texture so that back-to-back
51 * calls to glReadPixels with user pointers require less CPU-GPU synchronization.
52 *
53 * Assumptions:
54 *
55 * (1) Blits have high synchronization overheads, and it is beneficial to
56 * use a single blit of the entire framebuffer instead of many smaller
57 * blits (because the smaller blits cannot be batched, and we have to wait
58 * for the GPU after each one).
59 *
60 * (2) transfer_map implicitly involves a blit as well (for de-tiling, copy
61 * from VRAM, etc.), so that it is beneficial to replace the
62 * _mesa_readpixels path as well when possible.
63 *
64 * Change this #define to true to fill and use the cache whenever possible
65 * (this is inefficient and only meant for testing / debugging).
66 */
67 #define ALWAYS_READPIXELS_CACHE false
68
69 static boolean
70 needs_integer_signed_unsigned_conversion(const struct gl_context *ctx,
71 GLenum format, GLenum type)
72 {
73 struct gl_renderbuffer *rb =
74 _mesa_get_read_renderbuffer_for_format(ctx, format);
75
76 assert(rb);
77
78 GLenum srcType = _mesa_get_format_datatype(rb->Format);
79
80 if ((srcType == GL_INT &&
81 (type == GL_UNSIGNED_INT ||
82 type == GL_UNSIGNED_SHORT ||
83 type == GL_UNSIGNED_BYTE)) ||
84 (srcType == GL_UNSIGNED_INT &&
85 (type == GL_INT ||
86 type == GL_SHORT ||
87 type == GL_BYTE))) {
88 return TRUE;
89 }
90
91 return FALSE;
92 }
93
94 static bool
95 try_pbo_readpixels(struct st_context *st, struct st_renderbuffer *strb,
96 bool invert_y,
97 GLint x, GLint y, GLsizei width, GLsizei height,
98 enum pipe_format src_format, enum pipe_format dst_format,
99 const struct gl_pixelstore_attrib *pack, void *pixels)
100 {
101 struct pipe_context *pipe = st->pipe;
102 struct pipe_screen *screen = pipe->screen;
103 struct cso_context *cso = st->cso_context;
104 struct pipe_surface *surface = strb->surface;
105 struct pipe_resource *texture = strb->texture;
106 const struct util_format_description *desc;
107 struct st_pbo_addresses addr;
108 struct pipe_framebuffer_state fb;
109 enum pipe_texture_target view_target;
110 bool success = false;
111
112 if (texture->nr_samples > 1)
113 return false;
114
115 if (!screen->is_format_supported(screen, dst_format, PIPE_TEXTURE_2D,
116 texture->nr_samples,
117 PIPE_BIND_SHADER_IMAGE))
118 return false;
119
120 desc = util_format_description(dst_format);
121
122 /* Compute PBO addresses */
123 addr.bytes_per_pixel = desc->block.bits / 8;
124 addr.xoffset = x;
125 addr.yoffset = y;
126 addr.width = width;
127 addr.height = height;
128 addr.depth = 1;
129 if (!st_pbo_addresses_pixelstore(st, GL_TEXTURE_2D, false, pack, pixels, &addr))
130 return false;
131
132 cso_save_state(cso, (CSO_BIT_FRAGMENT_SAMPLER_VIEWS |
133 CSO_BIT_FRAGMENT_SAMPLERS |
134 CSO_BIT_FRAGMENT_IMAGE0 |
135 CSO_BIT_VERTEX_ELEMENTS |
136 CSO_BIT_AUX_VERTEX_BUFFER_SLOT |
137 CSO_BIT_FRAMEBUFFER |
138 CSO_BIT_VIEWPORT |
139 CSO_BIT_RASTERIZER |
140 CSO_BIT_DEPTH_STENCIL_ALPHA |
141 CSO_BIT_STREAM_OUTPUTS |
142 CSO_BIT_PAUSE_QUERIES |
143 CSO_BITS_ALL_SHADERS));
144 cso_save_constant_buffer_slot0(cso, PIPE_SHADER_FRAGMENT);
145
146 /* Set up the sampler_view */
147 {
148 struct pipe_sampler_view templ;
149 struct pipe_sampler_view *sampler_view;
150 struct pipe_sampler_state sampler = {0};
151 const struct pipe_sampler_state *samplers[1] = {&sampler};
152
153 u_sampler_view_default_template(&templ, texture, src_format);
154
155 switch (texture->target) {
156 case PIPE_TEXTURE_CUBE:
157 case PIPE_TEXTURE_CUBE_ARRAY:
158 view_target = PIPE_TEXTURE_2D_ARRAY;
159 break;
160 default:
161 view_target = texture->target;
162 break;
163 }
164
165 templ.target = view_target;
166 templ.u.tex.first_level = surface->u.tex.level;
167 templ.u.tex.last_level = templ.u.tex.first_level;
168
169 if (view_target != PIPE_TEXTURE_3D) {
170 templ.u.tex.first_layer = surface->u.tex.first_layer;
171 templ.u.tex.last_layer = templ.u.tex.last_layer;
172 } else {
173 addr.constants.layer_offset = surface->u.tex.first_layer;
174 }
175
176 sampler_view = pipe->create_sampler_view(pipe, texture, &templ);
177 if (sampler_view == NULL)
178 goto fail;
179
180 cso_set_sampler_views(cso, PIPE_SHADER_FRAGMENT, 1, &sampler_view);
181
182 pipe_sampler_view_reference(&sampler_view, NULL);
183
184 cso_set_samplers(cso, PIPE_SHADER_FRAGMENT, 1, samplers);
185 }
186
187 /* Set up destination image */
188 {
189 struct pipe_image_view image;
190
191 memset(&image, 0, sizeof(image));
192 image.resource = addr.buffer;
193 image.format = dst_format;
194 image.access = PIPE_IMAGE_ACCESS_WRITE;
195 image.u.buf.first_element = addr.first_element;
196 image.u.buf.last_element = addr.last_element;
197
198 cso_set_shader_images(cso, PIPE_SHADER_FRAGMENT, 0, 1, &image);
199 }
200
201 /* Set up no-attachment framebuffer */
202 memset(&fb, 0, sizeof(fb));
203 fb.width = surface->width;
204 fb.height = surface->height;
205 fb.samples = 1;
206 fb.layers = 1;
207 cso_set_framebuffer(cso, &fb);
208
209 cso_set_viewport_dims(cso, fb.width, fb.height, invert_y);
210
211 if (invert_y)
212 st_pbo_addresses_invert_y(&addr, fb.height);
213
214 {
215 struct pipe_depth_stencil_alpha_state dsa;
216 memset(&dsa, 0, sizeof(dsa));
217 cso_set_depth_stencil_alpha(cso, &dsa);
218 }
219
220 /* Set up the fragment shader */
221 {
222 void *fs = st_pbo_get_download_fs(st, view_target);
223 if (!fs)
224 goto fail;
225
226 cso_set_fragment_shader_handle(cso, fs);
227 }
228
229 success = st_pbo_draw(st, &addr, fb.width, fb.height);
230
231 /* Buffer written via shader images needs explicit synchronization. */
232 pipe->memory_barrier(pipe, PIPE_BARRIER_ALL);
233
234 fail:
235 cso_restore_state(cso);
236 cso_restore_constant_buffer_slot0(cso, PIPE_SHADER_FRAGMENT);
237
238 return success;
239 }
240
241 /* Invalidate the readpixels cache to ensure we don't read stale data.
242 */
243 void st_invalidate_readpix_cache(struct st_context *st)
244 {
245 pipe_resource_reference(&st->readpix_cache.src, NULL);
246 pipe_resource_reference(&st->readpix_cache.cache, NULL);
247 }
248
249 /**
250 * Create a staging texture and blit the requested region to it.
251 */
252 static struct pipe_resource *
253 blit_to_staging(struct st_context *st, struct st_renderbuffer *strb,
254 bool invert_y,
255 GLint x, GLint y, GLsizei width, GLsizei height,
256 GLenum format,
257 enum pipe_format src_format, enum pipe_format dst_format)
258 {
259 struct pipe_context *pipe = st->pipe;
260 struct pipe_screen *screen = pipe->screen;
261 struct pipe_resource dst_templ;
262 struct pipe_resource *dst;
263 struct pipe_blit_info blit;
264
265 /* We are creating a texture of the size of the region being read back.
266 * Need to check for NPOT texture support. */
267 if (!screen->get_param(screen, PIPE_CAP_NPOT_TEXTURES) &&
268 (!util_is_power_of_two(width) ||
269 !util_is_power_of_two(height)))
270 return NULL;
271
272 /* create the destination texture */
273 memset(&dst_templ, 0, sizeof(dst_templ));
274 dst_templ.target = PIPE_TEXTURE_2D;
275 dst_templ.format = dst_format;
276 dst_templ.bind = PIPE_BIND_TRANSFER_READ;
277 if (util_format_is_depth_or_stencil(dst_format))
278 dst_templ.bind |= PIPE_BIND_DEPTH_STENCIL;
279 else
280 dst_templ.bind |= PIPE_BIND_RENDER_TARGET;
281 dst_templ.usage = PIPE_USAGE_STAGING;
282
283 st_gl_texture_dims_to_pipe_dims(GL_TEXTURE_2D, width, height, 1,
284 &dst_templ.width0, &dst_templ.height0,
285 &dst_templ.depth0, &dst_templ.array_size);
286
287 dst = screen->resource_create(screen, &dst_templ);
288 if (!dst)
289 return NULL;
290
291 memset(&blit, 0, sizeof(blit));
292 blit.src.resource = strb->texture;
293 blit.src.level = strb->surface->u.tex.level;
294 blit.src.format = src_format;
295 blit.dst.resource = dst;
296 blit.dst.level = 0;
297 blit.dst.format = dst->format;
298 blit.src.box.x = x;
299 blit.dst.box.x = 0;
300 blit.src.box.y = y;
301 blit.dst.box.y = 0;
302 blit.src.box.z = strb->surface->u.tex.first_layer;
303 blit.dst.box.z = 0;
304 blit.src.box.width = blit.dst.box.width = width;
305 blit.src.box.height = blit.dst.box.height = height;
306 blit.src.box.depth = blit.dst.box.depth = 1;
307 blit.mask = st_get_blit_mask(strb->Base._BaseFormat, format);
308 blit.filter = PIPE_TEX_FILTER_NEAREST;
309 blit.scissor_enable = FALSE;
310
311 if (invert_y) {
312 blit.src.box.y = strb->Base.Height - blit.src.box.y;
313 blit.src.box.height = -blit.src.box.height;
314 }
315
316 /* blit */
317 st->pipe->blit(st->pipe, &blit);
318
319 return dst;
320 }
321
322 static struct pipe_resource *
323 try_cached_readpixels(struct st_context *st, struct st_renderbuffer *strb,
324 bool invert_y,
325 GLsizei width, GLsizei height,
326 GLenum format,
327 enum pipe_format src_format, enum pipe_format dst_format)
328 {
329 struct pipe_resource *src = strb->texture;
330 struct pipe_resource *dst = NULL;
331
332 if (ST_DEBUG & DEBUG_NOREADPIXCACHE)
333 return NULL;
334
335 /* Reset cache after invalidation or switch of parameters. */
336 if (st->readpix_cache.src != src ||
337 st->readpix_cache.dst_format != dst_format ||
338 st->readpix_cache.level != strb->surface->u.tex.level ||
339 st->readpix_cache.layer != strb->surface->u.tex.first_layer) {
340 pipe_resource_reference(&st->readpix_cache.src, src);
341 pipe_resource_reference(&st->readpix_cache.cache, NULL);
342 st->readpix_cache.dst_format = dst_format;
343 st->readpix_cache.level = strb->surface->u.tex.level;
344 st->readpix_cache.layer = strb->surface->u.tex.first_layer;
345 st->readpix_cache.hits = 0;
346 }
347
348 /* Decide whether to trigger the cache. */
349 if (!st->readpix_cache.cache) {
350 if (!strb->use_readpix_cache && !ALWAYS_READPIXELS_CACHE) {
351 /* Heuristic: If previous successive calls read at least a fraction
352 * of the surface _and_ we read again, trigger the cache.
353 */
354 unsigned threshold = MAX2(1, strb->Base.Width * strb->Base.Height / 8);
355
356 if (st->readpix_cache.hits < threshold) {
357 st->readpix_cache.hits += width * height;
358 return NULL;
359 }
360
361 strb->use_readpix_cache = true;
362 }
363
364 /* Fill the cache */
365 st->readpix_cache.cache = blit_to_staging(st, strb, invert_y,
366 0, 0,
367 strb->Base.Width,
368 strb->Base.Height, format,
369 src_format, dst_format);
370 }
371
372 /* Return an owning reference to stay consistent with the non-cached path */
373 pipe_resource_reference(&dst, st->readpix_cache.cache);
374
375 return dst;
376 }
377
378 /**
379 * This uses a blit to copy the read buffer to a texture format which matches
380 * the format and type combo and then a fast read-back is done using memcpy.
381 * We can do arbitrary X/Y/Z/W/0/1 swizzling here as long as there is
382 * a format which matches the swizzling.
383 *
384 * If such a format isn't available, we fall back to _mesa_readpixels.
385 *
386 * NOTE: Some drivers use a blit to convert between tiled and linear
387 * texture layouts during texture uploads/downloads, so the blit
388 * we do here should be free in such cases.
389 */
390 static void
391 st_ReadPixels(struct gl_context *ctx, GLint x, GLint y,
392 GLsizei width, GLsizei height,
393 GLenum format, GLenum type,
394 const struct gl_pixelstore_attrib *pack,
395 void *pixels)
396 {
397 struct st_context *st = st_context(ctx);
398 struct gl_renderbuffer *rb =
399 _mesa_get_read_renderbuffer_for_format(ctx, format);
400 struct st_renderbuffer *strb = st_renderbuffer(rb);
401 struct pipe_context *pipe = st->pipe;
402 struct pipe_screen *screen = pipe->screen;
403 struct pipe_resource *src;
404 struct pipe_resource *dst = NULL;
405 enum pipe_format dst_format, src_format;
406 unsigned bind = PIPE_BIND_TRANSFER_READ;
407 struct pipe_transfer *tex_xfer;
408 ubyte *map = NULL;
409 bool window;
410
411 /* Validate state (to be sure we have up-to-date framebuffer surfaces)
412 * and flush the bitmap cache prior to reading. */
413 st_validate_state(st, ST_PIPELINE_RENDER);
414 st_flush_bitmap_cache(st);
415
416 if (!st->prefer_blit_based_texture_transfer) {
417 goto fallback;
418 }
419
420 /* This must be done after state validation. */
421 src = strb->texture;
422
423 /* XXX Fallback for depth-stencil formats due to an incomplete
424 * stencil blit implementation in some drivers. */
425 if (format == GL_DEPTH_STENCIL) {
426 goto fallback;
427 }
428
429 /* If the base internal format and the texture format don't match, we have
430 * to use the slow path. */
431 if (rb->_BaseFormat !=
432 _mesa_get_format_base_format(rb->Format)) {
433 goto fallback;
434 }
435
436 if (_mesa_readpixels_needs_slow_path(ctx, format, type, GL_TRUE)) {
437 goto fallback;
438 }
439
440 /* Convert the source format to what is expected by ReadPixels
441 * and see if it's supported. */
442 src_format = util_format_linear(src->format);
443 src_format = util_format_luminance_to_red(src_format);
444 src_format = util_format_intensity_to_red(src_format);
445
446 if (!src_format ||
447 !screen->is_format_supported(screen, src_format, src->target,
448 src->nr_samples,
449 PIPE_BIND_SAMPLER_VIEW)) {
450 goto fallback;
451 }
452
453 if (format == GL_DEPTH_COMPONENT || format == GL_DEPTH_STENCIL)
454 bind |= PIPE_BIND_DEPTH_STENCIL;
455 else
456 bind |= PIPE_BIND_RENDER_TARGET;
457
458 /* Choose the destination format by finding the best match
459 * for the format+type combo. */
460 dst_format = st_choose_matching_format(st, bind, format, type,
461 pack->SwapBytes);
462 if (dst_format == PIPE_FORMAT_NONE) {
463 goto fallback;
464 }
465
466 if (st->pbo.download_enabled && _mesa_is_bufferobj(pack->BufferObj)) {
467 if (try_pbo_readpixels(st, strb,
468 st_fb_orientation(ctx->ReadBuffer) == Y_0_TOP,
469 x, y, width, height,
470 src_format, dst_format,
471 pack, pixels))
472 return;
473 }
474
475 if (needs_integer_signed_unsigned_conversion(ctx, format, type)) {
476 goto fallback;
477 }
478
479 /* Cache a staging texture for back-to-back ReadPixels, to avoid CPU-GPU
480 * synchronization overhead.
481 */
482 dst = try_cached_readpixels(st, strb,
483 st_fb_orientation(ctx->ReadBuffer) == Y_0_TOP,
484 width, height, format, src_format, dst_format);
485 if (dst) {
486 window = false;
487 } else {
488 /* See if the texture format already matches the format and type,
489 * in which case the memcpy-based fast path will likely be used and
490 * we don't have to blit. */
491 if (_mesa_format_matches_format_and_type(rb->Format, format,
492 type, pack->SwapBytes, NULL)) {
493 goto fallback;
494 }
495
496 dst = blit_to_staging(st, strb,
497 st_fb_orientation(ctx->ReadBuffer) == Y_0_TOP,
498 x, y, width, height, format,
499 src_format, dst_format);
500 if (!dst)
501 goto fallback;
502
503 window = true;
504 }
505
506 /* map resources */
507 pixels = _mesa_map_pbo_dest(ctx, pack, pixels);
508
509 map = pipe_transfer_map_3d(pipe, dst, 0, PIPE_TRANSFER_READ,
510 0, 0, 0, width, height, 1, &tex_xfer);
511 if (!map) {
512 _mesa_unmap_pbo_dest(ctx, pack);
513 pipe_resource_reference(&dst, NULL);
514 goto fallback;
515 }
516
517 if (!window)
518 map += y * tex_xfer->stride + x * util_format_get_blocksize(dst_format);
519
520 /* memcpy data into a user buffer */
521 {
522 const uint bytesPerRow = width * util_format_get_blocksize(dst_format);
523 GLuint row;
524
525 for (row = 0; row < (unsigned) height; row++) {
526 void *dest = _mesa_image_address2d(pack, pixels,
527 width, height, format,
528 type, row, 0);
529 memcpy(dest, map, bytesPerRow);
530 map += tex_xfer->stride;
531 }
532 }
533
534 pipe_transfer_unmap(pipe, tex_xfer);
535 _mesa_unmap_pbo_dest(ctx, pack);
536 pipe_resource_reference(&dst, NULL);
537 return;
538
539 fallback:
540 _mesa_readpixels(ctx, x, y, width, height, format, type, pack, pixels);
541 }
542
543 void st_init_readpixels_functions(struct dd_function_table *functions)
544 {
545 functions->ReadPixels = st_ReadPixels;
546 }