- struct si_context *sctx = (struct si_context*)ctx;
- struct si_resource *rbuffer = si_resource(resource);
- uint8_t *data;
-
- assert(box->x + box->width <= resource->width0);
-
- /* From GL_AMD_pinned_memory issues:
- *
- * 4) Is glMapBuffer on a shared buffer guaranteed to return the
- * same system address which was specified at creation time?
- *
- * RESOLVED: NO. The GL implementation might return a different
- * virtual mapping of that memory, although the same physical
- * page will be used.
- *
- * So don't ever use staging buffers.
- */
- if (rbuffer->b.is_user_ptr)
- usage |= PIPE_TRANSFER_PERSISTENT;
-
- /* See if the buffer range being mapped has never been initialized,
- * in which case it can be mapped unsynchronized. */
- if (!(usage & (PIPE_TRANSFER_UNSYNCHRONIZED |
- TC_TRANSFER_MAP_NO_INFER_UNSYNCHRONIZED)) &&
- usage & PIPE_TRANSFER_WRITE &&
- !rbuffer->b.is_shared &&
- !util_ranges_intersect(&rbuffer->valid_buffer_range, box->x, box->x + box->width)) {
- usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
- }
-
- /* If discarding the entire range, discard the whole resource instead. */
- if (usage & PIPE_TRANSFER_DISCARD_RANGE &&
- box->x == 0 && box->width == resource->width0) {
- usage |= PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE;
- }
-
- /* If a buffer in VRAM is too large and the range is discarded, don't
- * map it directly. This makes sure that the buffer stays in VRAM.
- */
- bool force_discard_range = false;
- if (usage & (PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE |
- PIPE_TRANSFER_DISCARD_RANGE) &&
- !(usage & PIPE_TRANSFER_PERSISTENT) &&
- /* Try not to decrement the counter if it's not positive. Still racy,
- * but it makes it harder to wrap the counter from INT_MIN to INT_MAX. */
- rbuffer->max_forced_staging_uploads > 0 &&
- p_atomic_dec_return(&rbuffer->max_forced_staging_uploads) >= 0) {
- usage &= ~(PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE |
- PIPE_TRANSFER_UNSYNCHRONIZED);
- usage |= PIPE_TRANSFER_DISCARD_RANGE;
- force_discard_range = true;
- }
-
- if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE &&
- !(usage & (PIPE_TRANSFER_UNSYNCHRONIZED |
- TC_TRANSFER_MAP_NO_INVALIDATE))) {
- assert(usage & PIPE_TRANSFER_WRITE);
-
- if (si_invalidate_buffer(sctx, rbuffer)) {
- /* At this point, the buffer is always idle. */
- usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
- } else {
- /* Fall back to a temporary buffer. */
- usage |= PIPE_TRANSFER_DISCARD_RANGE;
- }
- }
-
- if ((usage & PIPE_TRANSFER_DISCARD_RANGE) &&
- ((!(usage & (PIPE_TRANSFER_UNSYNCHRONIZED |
- PIPE_TRANSFER_PERSISTENT))) ||
- (rbuffer->flags & RADEON_FLAG_SPARSE))) {
- assert(usage & PIPE_TRANSFER_WRITE);
-
- /* Check if mapping this buffer would cause waiting for the GPU.
- */
- if (rbuffer->flags & RADEON_FLAG_SPARSE ||
- force_discard_range ||
- si_rings_is_buffer_referenced(sctx, rbuffer->buf, RADEON_USAGE_READWRITE) ||
- !sctx->ws->buffer_wait(rbuffer->buf, 0, RADEON_USAGE_READWRITE)) {
- /* Do a wait-free write-only transfer using a temporary buffer. */
- unsigned offset;
- struct si_resource *staging = NULL;
-
- u_upload_alloc(ctx->stream_uploader, 0,
- box->width + (box->x % SI_MAP_BUFFER_ALIGNMENT),
- sctx->screen->info.tcc_cache_line_size,
- &offset, (struct pipe_resource**)&staging,
- (void**)&data);
-
- if (staging) {
- data += box->x % SI_MAP_BUFFER_ALIGNMENT;
- return si_buffer_get_transfer(ctx, resource, usage, box,
- ptransfer, data, staging, offset);
- } else if (rbuffer->flags & RADEON_FLAG_SPARSE) {
- return NULL;
- }
- } else {
- /* At this point, the buffer is always idle (we checked it above). */
- usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
- }
- }
- /* Use a staging buffer in cached GTT for reads. */
- else if (((usage & PIPE_TRANSFER_READ) &&
- !(usage & PIPE_TRANSFER_PERSISTENT) &&
- (rbuffer->domains & RADEON_DOMAIN_VRAM ||
- rbuffer->flags & RADEON_FLAG_GTT_WC)) ||
- (rbuffer->flags & RADEON_FLAG_SPARSE)) {
- struct si_resource *staging;
-
- assert(!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC));
- staging = si_resource(pipe_buffer_create(
- ctx->screen, 0, PIPE_USAGE_STAGING,
- box->width + (box->x % SI_MAP_BUFFER_ALIGNMENT)));
- if (staging) {
- /* Copy the VRAM buffer to the staging buffer. */
- sctx->dma_copy(ctx, &staging->b.b, 0,
- box->x % SI_MAP_BUFFER_ALIGNMENT,
- 0, 0, resource, 0, box);
-
- data = si_buffer_map_sync_with_rings(sctx, staging,
- usage & ~PIPE_TRANSFER_UNSYNCHRONIZED);
- if (!data) {
- si_resource_reference(&staging, NULL);
- return NULL;
- }
- data += box->x % SI_MAP_BUFFER_ALIGNMENT;
-
- return si_buffer_get_transfer(ctx, resource, usage, box,
- ptransfer, data, staging, 0);
- } else if (rbuffer->flags & RADEON_FLAG_SPARSE) {
- return NULL;
- }
- }
-
- data = si_buffer_map_sync_with_rings(sctx, rbuffer, usage);
- if (!data) {
- return NULL;
- }
- data += box->x;
-
- return si_buffer_get_transfer(ctx, resource, usage, box,
- ptransfer, data, NULL, 0);
+ struct si_context *sctx = (struct si_context *)ctx;
+ struct si_resource *buf = si_resource(resource);
+ uint8_t *data;
+
+ assert(box->x + box->width <= resource->width0);
+
+ /* From GL_AMD_pinned_memory issues:
+ *
+ * 4) Is glMapBuffer on a shared buffer guaranteed to return the
+ * same system address which was specified at creation time?
+ *
+ * RESOLVED: NO. The GL implementation might return a different
+ * virtual mapping of that memory, although the same physical
+ * page will be used.
+ *
+ * So don't ever use staging buffers.
+ */
+ if (buf->b.is_user_ptr)
+ usage |= PIPE_TRANSFER_PERSISTENT;
+
+ /* See if the buffer range being mapped has never been initialized,
+ * in which case it can be mapped unsynchronized. */
+ if (!(usage & (PIPE_TRANSFER_UNSYNCHRONIZED | TC_TRANSFER_MAP_NO_INFER_UNSYNCHRONIZED)) &&
+ usage & PIPE_TRANSFER_WRITE && !buf->b.is_shared &&
+ !util_ranges_intersect(&buf->valid_buffer_range, box->x, box->x + box->width)) {
+ usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
+ }
+
+ /* If discarding the entire range, discard the whole resource instead. */
+ if (usage & PIPE_TRANSFER_DISCARD_RANGE && box->x == 0 && box->width == resource->width0) {
+ usage |= PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE;
+ }
+
+ /* If a buffer in VRAM is too large and the range is discarded, don't
+ * map it directly. This makes sure that the buffer stays in VRAM.
+ */
+ bool force_discard_range = false;
+ if (usage & (PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE | PIPE_TRANSFER_DISCARD_RANGE) &&
+ !(usage & PIPE_TRANSFER_PERSISTENT) &&
+ /* Try not to decrement the counter if it's not positive. Still racy,
+ * but it makes it harder to wrap the counter from INT_MIN to INT_MAX. */
+ buf->max_forced_staging_uploads > 0 &&
+ p_atomic_dec_return(&buf->max_forced_staging_uploads) >= 0) {
+ usage &= ~(PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE | PIPE_TRANSFER_UNSYNCHRONIZED);
+ usage |= PIPE_TRANSFER_DISCARD_RANGE;
+ force_discard_range = true;
+ }
+
+ if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE &&
+ !(usage & (PIPE_TRANSFER_UNSYNCHRONIZED | TC_TRANSFER_MAP_NO_INVALIDATE))) {
+ assert(usage & PIPE_TRANSFER_WRITE);
+
+ if (si_invalidate_buffer(sctx, buf)) {
+ /* At this point, the buffer is always idle. */
+ usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
+ } else {
+ /* Fall back to a temporary buffer. */
+ usage |= PIPE_TRANSFER_DISCARD_RANGE;
+ }
+ }
+
+ if (usage & PIPE_TRANSFER_FLUSH_EXPLICIT &&
+ buf->b.b.flags & SI_RESOURCE_FLAG_UPLOAD_FLUSH_EXPLICIT_VIA_SDMA) {
+ usage &= ~(PIPE_TRANSFER_UNSYNCHRONIZED | PIPE_TRANSFER_PERSISTENT);
+ usage |= PIPE_TRANSFER_DISCARD_RANGE;
+ force_discard_range = true;
+ }
+
+ if (usage & PIPE_TRANSFER_DISCARD_RANGE &&
+ ((!(usage & (PIPE_TRANSFER_UNSYNCHRONIZED | PIPE_TRANSFER_PERSISTENT))) ||
+ (buf->flags & RADEON_FLAG_SPARSE))) {
+ assert(usage & PIPE_TRANSFER_WRITE);
+
+ /* Check if mapping this buffer would cause waiting for the GPU.
+ */
+ if (buf->flags & RADEON_FLAG_SPARSE || force_discard_range ||
+ si_rings_is_buffer_referenced(sctx, buf->buf, RADEON_USAGE_READWRITE) ||
+ !sctx->ws->buffer_wait(buf->buf, 0, RADEON_USAGE_READWRITE)) {
+ /* Do a wait-free write-only transfer using a temporary buffer. */
+ struct u_upload_mgr *uploader;
+ struct si_resource *staging = NULL;
+ unsigned offset;
+
+ /* If we are not called from the driver thread, we have
+ * to use the uploader from u_threaded_context, which is
+ * local to the calling thread.
+ */
+ if (usage & TC_TRANSFER_MAP_THREADED_UNSYNC)
+ uploader = sctx->tc->base.stream_uploader;
+ else
+ uploader = sctx->b.stream_uploader;
+
+ u_upload_alloc(uploader, 0, box->width + (box->x % SI_MAP_BUFFER_ALIGNMENT),
+ sctx->screen->info.tcc_cache_line_size, &offset,
+ (struct pipe_resource **)&staging, (void **)&data);
+
+ if (staging) {
+ data += box->x % SI_MAP_BUFFER_ALIGNMENT;
+ return si_buffer_get_transfer(ctx, resource, usage, box, ptransfer, data, staging,
+ offset);
+ } else if (buf->flags & RADEON_FLAG_SPARSE) {
+ return NULL;
+ }
+ } else {
+ /* At this point, the buffer is always idle (we checked it above). */
+ usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
+ }
+ }
+ /* Use a staging buffer in cached GTT for reads. */
+ else if (((usage & PIPE_TRANSFER_READ) && !(usage & PIPE_TRANSFER_PERSISTENT) &&
+ (buf->domains & RADEON_DOMAIN_VRAM || buf->flags & RADEON_FLAG_GTT_WC)) ||
+ (buf->flags & RADEON_FLAG_SPARSE)) {
+ struct si_resource *staging;
+
+ assert(!(usage & (TC_TRANSFER_MAP_THREADED_UNSYNC | PIPE_TRANSFER_THREAD_SAFE)));
+ staging = si_aligned_buffer_create(ctx->screen, SI_RESOURCE_FLAG_UNCACHED,
+ PIPE_USAGE_STAGING,
+ box->width + (box->x % SI_MAP_BUFFER_ALIGNMENT), 256);
+ if (staging) {
+ /* Copy the VRAM buffer to the staging buffer. */
+ si_sdma_copy_buffer(sctx, &staging->b.b, resource, box->x % SI_MAP_BUFFER_ALIGNMENT,
+ box->x, box->width);
+
+ data = si_buffer_map_sync_with_rings(sctx, staging, usage & ~PIPE_TRANSFER_UNSYNCHRONIZED);
+ if (!data) {
+ si_resource_reference(&staging, NULL);
+ return NULL;
+ }
+ data += box->x % SI_MAP_BUFFER_ALIGNMENT;
+
+ return si_buffer_get_transfer(ctx, resource, usage, box, ptransfer, data, staging, 0);
+ } else if (buf->flags & RADEON_FLAG_SPARSE) {
+ return NULL;
+ }
+ }
+
+ data = si_buffer_map_sync_with_rings(sctx, buf, usage);
+ if (!data) {
+ return NULL;
+ }
+ data += box->x;
+
+ return si_buffer_get_transfer(ctx, resource, usage, box, ptransfer, data, NULL, 0);