radeonsi: don't map big VRAM buffers for the first upload directly
authorMarek Olšák <marek.olsak@amd.com>
Wed, 1 Nov 2017 23:05:15 +0000 (00:05 +0100)
committerMarek Olšák <marek.olsak@amd.com>
Mon, 6 Nov 2017 15:23:20 +0000 (16:23 +0100)
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
src/gallium/drivers/radeon/r600_buffer_common.c
src/gallium/drivers/radeon/r600_pipe_common.h

index 92521f4779273c8616394634c4d2856b62a5f4bd..519e52ecfa3b3bc78149120d57009c13bc85c8fc 100644 (file)
@@ -194,6 +194,7 @@ void si_init_resource_fields(struct r600_common_screen *rscreen,
        if (res->domains & RADEON_DOMAIN_VRAM) {
                res->vram_usage = size;
 
+               res->max_forced_staging_uploads =
                res->b.max_forced_staging_uploads =
                        rscreen->info.has_dedicated_vram &&
                        size >= rscreen->info.vram_vis_size / 4 ? 1 : 0;
@@ -295,6 +296,7 @@ void si_replace_buffer_storage(struct pipe_context *ctx,
        rdst->gpu_address = rsrc->gpu_address;
        rdst->b.b.bind = rsrc->b.b.bind;
        rdst->b.max_forced_staging_uploads = rsrc->b.max_forced_staging_uploads;
+       rdst->max_forced_staging_uploads = rsrc->max_forced_staging_uploads;
        rdst->flags = rsrc->flags;
 
        assert(rdst->vram_usage == rsrc->vram_usage);
@@ -402,6 +404,23 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx,
                usage |= PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE;
        }
 
+       /* If a buffer in VRAM is too large and the range is discarded, don't
+        * map it directly. This makes sure that the buffer stays in VRAM.
+        */
+       bool force_discard_range = false;
+       if (usage & (PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE |
+                    PIPE_TRANSFER_DISCARD_RANGE) &&
+           !(usage & PIPE_TRANSFER_PERSISTENT) &&
+           /* Try not to decrement the counter if it's not positive. Still racy,
+            * but it makes it harder to wrap the counter from INT_MIN to INT_MAX. */
+           rbuffer->max_forced_staging_uploads > 0 &&
+           p_atomic_dec_return(&rbuffer->max_forced_staging_uploads) >= 0) {
+               usage &= ~(PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE |
+                          PIPE_TRANSFER_UNSYNCHRONIZED);
+               usage |= PIPE_TRANSFER_DISCARD_RANGE;
+               force_discard_range = true;
+       }
+
        if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE &&
            !(usage & (PIPE_TRANSFER_UNSYNCHRONIZED |
                       TC_TRANSFER_MAP_NO_INVALIDATE))) {
@@ -427,6 +446,7 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx,
                /* Check if mapping this buffer would cause waiting for the GPU.
                 */
                if (rbuffer->flags & RADEON_FLAG_SPARSE ||
+                   force_discard_range ||
                    si_rings_is_buffer_referenced(rctx, rbuffer->buf, RADEON_USAGE_READWRITE) ||
                    !rctx->ws->buffer_wait(rbuffer->buf, 0, RADEON_USAGE_READWRITE)) {
                        /* Do a wait-free write-only transfer using a temporary buffer. */
index a45921e72b6eba06cd55c1684738301e418a6b62..a7fec373fcf1ae368644c710cb02c5e0c3ce2d96 100644 (file)
@@ -174,6 +174,7 @@ struct r600_resource {
        enum radeon_bo_domain           domains;
        enum radeon_bo_flag             flags;
        unsigned                        bind_history;
+       int                             max_forced_staging_uploads;
 
        /* The buffer range which is initialized (with a write transfer,
         * streamout, DMA, or as a random access target). The rest of