gallium/radeon: fix performance of buffer readbacks
authorMarek Olšák <marek.olsak@amd.com>
Thu, 9 Feb 2017 02:14:22 +0000 (03:14 +0100)
committerMarek Olšák <marek.olsak@amd.com>
Fri, 10 Feb 2017 10:27:50 +0000 (11:27 +0100)
We want cached GTT for all non-persistent read mappings.
Set level = 0 on purpose.

Use dma_copy, because resource_copy_region causes a failure in the PBO
read of piglit/getteximage-luminance.

If Rocket League used the READ flag, it should get cached GTT.

v2: mask out UNSYNCHRONIZED

Cc: 13.0 17.0 <mesa-stable@lists.freedesktop.org>
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
src/gallium/drivers/radeon/r600_buffer_common.c

index da6f0206d78e6954aebdd7f60eed1f397f98f330..762181fb9d112f062379e2b0ebae33d320c33556 100644 (file)
@@ -381,11 +381,11 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx,
                        usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
                }
        }
-       /* Using a staging buffer in GTT for larger reads is much faster. */
+       /* Use a staging buffer in cached GTT for reads. */
        else if ((usage & PIPE_TRANSFER_READ) &&
-                !(usage & (PIPE_TRANSFER_WRITE |
-                           PIPE_TRANSFER_PERSISTENT)) &&
-                rbuffer->domains & RADEON_DOMAIN_VRAM &&
+                !(usage & PIPE_TRANSFER_PERSISTENT) &&
+                (rbuffer->domains & RADEON_DOMAIN_VRAM ||
+                 rbuffer->flags & RADEON_FLAG_GTT_WC) &&
                 r600_can_dma_copy_buffer(rctx, 0, box->x, box->width)) {
                struct r600_resource *staging;
 
@@ -394,11 +394,12 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx,
                                box->width + (box->x % R600_MAP_BUFFER_ALIGNMENT));
                if (staging) {
                        /* Copy the VRAM buffer to the staging buffer. */
-                       ctx->resource_copy_region(ctx, &staging->b.b, 0,
-                                                 box->x % R600_MAP_BUFFER_ALIGNMENT,
-                                                 0, 0, resource, level, box);
+                       rctx->dma_copy(ctx, &staging->b.b, 0,
+                                      box->x % R600_MAP_BUFFER_ALIGNMENT,
+                                      0, 0, resource, 0, box);
 
-                       data = r600_buffer_map_sync_with_rings(rctx, staging, PIPE_TRANSFER_READ);
+                       data = r600_buffer_map_sync_with_rings(rctx, staging,
+                                                              usage & ~PIPE_TRANSFER_UNSYNCHRONIZED);
                        if (!data) {
                                r600_resource_reference(&staging, NULL);
                                return NULL;