if (usage & tc_flags)
return usage;
+ /* Use the staging upload if it's preferred. */
+ if (usage & (PIPE_TRANSFER_DISCARD_RANGE |
+ PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) &&
+ !(usage & PIPE_TRANSFER_PERSISTENT) &&
+ /* Try not to decrement the counter if it's not positive. Still racy,
+ * but it makes it harder to wrap the counter from INT_MIN to INT_MAX. */
+ tres->max_forced_staging_uploads > 0 &&
+ p_atomic_dec_return(&tres->max_forced_staging_uploads) >= 0) {
+ usage &= ~(PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE |
+ PIPE_TRANSFER_UNSYNCHRONIZED);
+
+ return usage | tc_flags | PIPE_TRANSFER_DISCARD_RANGE;
+ }
+
/* Sparse buffers can't be mapped directly and can't be reallocated
* (fully invalidated). That may just be a radeonsi limitation, but
* the threaded context must obey it with radeonsi.
* pointers. */
bool is_shared;
bool is_user_ptr;
+
+ /* If positive, prefer DISCARD_RANGE with a staging buffer over any other
+ * method of CPU access when map flags allow it. Useful for buffers that
+ * are too large for the visible VRAM window.
+ */
+ int max_forced_staging_uploads;
};
struct threaded_transfer {
res->vram_usage = 0;
res->gart_usage = 0;
- if (res->domains & RADEON_DOMAIN_VRAM)
+ if (res->domains & RADEON_DOMAIN_VRAM) {
res->vram_usage = size;
- else if (res->domains & RADEON_DOMAIN_GTT)
+
+ res->b.max_forced_staging_uploads =
+ rscreen->info.has_dedicated_vram &&
+ size >= rscreen->info.vram_vis_size / 4 ? 1 : 0;
+ } else if (res->domains & RADEON_DOMAIN_GTT) {
res->gart_usage = size;
+ }
}
bool si_alloc_resource(struct r600_common_screen *rscreen,
pb_reference(&rdst->buf, rsrc->buf);
rdst->gpu_address = rsrc->gpu_address;
rdst->b.b.bind = rsrc->b.b.bind;
+ rdst->b.max_forced_staging_uploads = rsrc->b.max_forced_staging_uploads;
rdst->flags = rsrc->flags;
assert(rdst->vram_usage == rsrc->vram_usage);