* persistent buffers into GTT to prevent VRAM CPU page faults.
*/
if (!sscreen->info.kernel_flushes_hdp_before_ib ||
- sscreen->info.drm_major == 2)
+ !sscreen->info.is_amdgpu)
res->domains = RADEON_DOMAIN_GTT;
}
res->gpu_address, res->gpu_address + res->buf->size,
res->buf->size);
}
+
+ if (res->b.b.flags & SI_RESOURCE_FLAG_CLEAR)
+ si_screen_clear_buffer(sscreen, &res->b.b, 0, res->bo_size, 0);
+
return true;
}
static void si_buffer_destroy(struct pipe_screen *screen,
struct pipe_resource *buf)
{
- struct si_resource *rbuffer = si_resource(buf);
+ struct si_resource *buffer = si_resource(buf);
threaded_resource_deinit(buf);
- util_range_destroy(&rbuffer->valid_buffer_range);
- pb_reference(&rbuffer->buf, NULL);
- FREE(rbuffer);
+ util_range_destroy(&buffer->valid_buffer_range);
+ pb_reference(&buffer->buf, NULL);
+ FREE(buffer);
}
/* Reallocate the buffer a update all resource bindings where the buffer is
*/
static bool
si_invalidate_buffer(struct si_context *sctx,
- struct si_resource *rbuffer)
+ struct si_resource *buf)
{
/* Shared buffers can't be reallocated. */
- if (rbuffer->b.is_shared)
+ if (buf->b.is_shared)
return false;
/* Sparse buffers can't be reallocated. */
- if (rbuffer->flags & RADEON_FLAG_SPARSE)
+ if (buf->flags & RADEON_FLAG_SPARSE)
return false;
/* In AMD_pinned_memory, the user pointer association only gets
* broken when the buffer is explicitly re-allocated.
*/
- if (rbuffer->b.is_user_ptr)
+ if (buf->b.is_user_ptr)
return false;
/* Check if mapping this buffer would cause waiting for the GPU. */
- if (si_rings_is_buffer_referenced(sctx, rbuffer->buf, RADEON_USAGE_READWRITE) ||
- !sctx->ws->buffer_wait(rbuffer->buf, 0, RADEON_USAGE_READWRITE)) {
- uint64_t old_va = rbuffer->gpu_address;
-
+ if (si_rings_is_buffer_referenced(sctx, buf->buf, RADEON_USAGE_READWRITE) ||
+ !sctx->ws->buffer_wait(buf->buf, 0, RADEON_USAGE_READWRITE)) {
/* Reallocate the buffer in the same pipe_resource. */
- si_alloc_resource(sctx->screen, rbuffer);
- si_rebind_buffer(sctx, &rbuffer->b.b, old_va);
+ si_alloc_resource(sctx->screen, buf);
+ si_rebind_buffer(sctx, &buf->b.b);
} else {
- util_range_set_empty(&rbuffer->valid_buffer_range);
+ util_range_set_empty(&buf->valid_buffer_range);
}
return true;
struct si_context *sctx = (struct si_context*)ctx;
struct si_resource *sdst = si_resource(dst);
struct si_resource *ssrc = si_resource(src);
- uint64_t old_gpu_address = sdst->gpu_address;
pb_reference(&sdst->buf, ssrc->buf);
sdst->gpu_address = ssrc->gpu_address;
assert(sdst->bo_alignment == ssrc->bo_alignment);
assert(sdst->domains == ssrc->domains);
- si_rebind_buffer(sctx, dst, old_gpu_address);
+ si_rebind_buffer(sctx, dst);
}
static void si_invalidate_resource(struct pipe_context *ctx,
struct pipe_resource *resource)
{
struct si_context *sctx = (struct si_context*)ctx;
- struct si_resource *rbuffer = si_resource(resource);
+ struct si_resource *buf = si_resource(resource);
/* We currently only do anyting here for buffers */
if (resource->target == PIPE_BUFFER)
- (void)si_invalidate_buffer(sctx, rbuffer);
+ (void)si_invalidate_buffer(sctx, buf);
}
static void *si_buffer_get_transfer(struct pipe_context *ctx,
struct pipe_transfer **ptransfer)
{
struct si_context *sctx = (struct si_context*)ctx;
- struct si_resource *rbuffer = si_resource(resource);
+ struct si_resource *buf = si_resource(resource);
uint8_t *data;
assert(box->x + box->width <= resource->width0);
*
* So don't ever use staging buffers.
*/
- if (rbuffer->b.is_user_ptr)
+ if (buf->b.is_user_ptr)
usage |= PIPE_TRANSFER_PERSISTENT;
/* See if the buffer range being mapped has never been initialized,
if (!(usage & (PIPE_TRANSFER_UNSYNCHRONIZED |
TC_TRANSFER_MAP_NO_INFER_UNSYNCHRONIZED)) &&
usage & PIPE_TRANSFER_WRITE &&
- !rbuffer->b.is_shared &&
- !util_ranges_intersect(&rbuffer->valid_buffer_range, box->x, box->x + box->width)) {
+ !buf->b.is_shared &&
+ !util_ranges_intersect(&buf->valid_buffer_range, box->x, box->x + box->width)) {
usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
}
!(usage & PIPE_TRANSFER_PERSISTENT) &&
/* Try not to decrement the counter if it's not positive. Still racy,
* but it makes it harder to wrap the counter from INT_MIN to INT_MAX. */
- rbuffer->max_forced_staging_uploads > 0 &&
- p_atomic_dec_return(&rbuffer->max_forced_staging_uploads) >= 0) {
+ buf->max_forced_staging_uploads > 0 &&
+ p_atomic_dec_return(&buf->max_forced_staging_uploads) >= 0) {
usage &= ~(PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE |
PIPE_TRANSFER_UNSYNCHRONIZED);
usage |= PIPE_TRANSFER_DISCARD_RANGE;
TC_TRANSFER_MAP_NO_INVALIDATE))) {
assert(usage & PIPE_TRANSFER_WRITE);
- if (si_invalidate_buffer(sctx, rbuffer)) {
+ if (si_invalidate_buffer(sctx, buf)) {
/* At this point, the buffer is always idle. */
usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
} else {
}
}
- if ((usage & PIPE_TRANSFER_DISCARD_RANGE) &&
+ if (usage & PIPE_TRANSFER_FLUSH_EXPLICIT &&
+ buf->b.b.flags & SI_RESOURCE_FLAG_UPLOAD_FLUSH_EXPLICIT_VIA_SDMA) {
+ usage &= ~(PIPE_TRANSFER_UNSYNCHRONIZED |
+ PIPE_TRANSFER_PERSISTENT);
+ usage |= PIPE_TRANSFER_DISCARD_RANGE;
+ force_discard_range = true;
+ }
+
+ if (usage & PIPE_TRANSFER_DISCARD_RANGE &&
((!(usage & (PIPE_TRANSFER_UNSYNCHRONIZED |
PIPE_TRANSFER_PERSISTENT))) ||
- (rbuffer->flags & RADEON_FLAG_SPARSE))) {
+ (buf->flags & RADEON_FLAG_SPARSE))) {
assert(usage & PIPE_TRANSFER_WRITE);
/* Check if mapping this buffer would cause waiting for the GPU.
*/
- if (rbuffer->flags & RADEON_FLAG_SPARSE ||
+ if (buf->flags & RADEON_FLAG_SPARSE ||
force_discard_range ||
- si_rings_is_buffer_referenced(sctx, rbuffer->buf, RADEON_USAGE_READWRITE) ||
- !sctx->ws->buffer_wait(rbuffer->buf, 0, RADEON_USAGE_READWRITE)) {
+ si_rings_is_buffer_referenced(sctx, buf->buf, RADEON_USAGE_READWRITE) ||
+ !sctx->ws->buffer_wait(buf->buf, 0, RADEON_USAGE_READWRITE)) {
/* Do a wait-free write-only transfer using a temporary buffer. */
- unsigned offset;
+ struct u_upload_mgr *uploader;
struct si_resource *staging = NULL;
+ unsigned offset;
- u_upload_alloc(ctx->stream_uploader, 0,
+ /* If we are not called from the driver thread, we have
+ * to use the uploader from u_threaded_context, which is
+ * local to the calling thread.
+ */
+ if (usage & TC_TRANSFER_MAP_THREADED_UNSYNC)
+ uploader = sctx->tc->base.stream_uploader;
+ else
+ uploader = sctx->b.stream_uploader;
+
+ u_upload_alloc(uploader, 0,
box->width + (box->x % SI_MAP_BUFFER_ALIGNMENT),
sctx->screen->info.tcc_cache_line_size,
&offset, (struct pipe_resource**)&staging,
data += box->x % SI_MAP_BUFFER_ALIGNMENT;
return si_buffer_get_transfer(ctx, resource, usage, box,
ptransfer, data, staging, offset);
- } else if (rbuffer->flags & RADEON_FLAG_SPARSE) {
+ } else if (buf->flags & RADEON_FLAG_SPARSE) {
return NULL;
}
} else {
/* Use a staging buffer in cached GTT for reads. */
else if (((usage & PIPE_TRANSFER_READ) &&
!(usage & PIPE_TRANSFER_PERSISTENT) &&
- (rbuffer->domains & RADEON_DOMAIN_VRAM ||
- rbuffer->flags & RADEON_FLAG_GTT_WC)) ||
- (rbuffer->flags & RADEON_FLAG_SPARSE)) {
+ (buf->domains & RADEON_DOMAIN_VRAM ||
+ buf->flags & RADEON_FLAG_GTT_WC)) ||
+ (buf->flags & RADEON_FLAG_SPARSE)) {
struct si_resource *staging;
assert(!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC));
return si_buffer_get_transfer(ctx, resource, usage, box,
ptransfer, data, staging, 0);
- } else if (rbuffer->flags & RADEON_FLAG_SPARSE) {
+ } else if (buf->flags & RADEON_FLAG_SPARSE) {
return NULL;
}
}
- data = si_buffer_map_sync_with_rings(sctx, rbuffer, usage);
+ data = si_buffer_map_sync_with_rings(sctx, buf, usage);
if (!data) {
return NULL;
}
struct pipe_transfer *transfer,
const struct pipe_box *box)
{
+ struct si_context *sctx = (struct si_context*)ctx;
struct si_transfer *stransfer = (struct si_transfer*)transfer;
- struct si_resource *rbuffer = si_resource(transfer->resource);
+ struct si_resource *buf = si_resource(transfer->resource);
if (stransfer->staging) {
+ unsigned src_offset = stransfer->offset +
+ transfer->box.x % SI_MAP_BUFFER_ALIGNMENT +
+ (box->x - transfer->box.x);
+
+ if (buf->b.b.flags & SI_RESOURCE_FLAG_UPLOAD_FLUSH_EXPLICIT_VIA_SDMA) {
+ /* This should be true for all uploaders. */
+ assert(transfer->box.x == 0);
+
+ /* Find a previous upload and extend its range. The last
+ * upload is likely to be at the end of the list.
+ */
+ for (int i = sctx->num_sdma_uploads - 1; i >= 0; i--) {
+ struct si_sdma_upload *up = &sctx->sdma_uploads[i];
+
+ if (up->dst != buf)
+ continue;
+
+ assert(up->src == stransfer->staging);
+ assert(box->x > up->dst_offset);
+ up->size = box->x + box->width - up->dst_offset;
+ return;
+ }
+
+ /* Enlarge the array if it's full. */
+ if (sctx->num_sdma_uploads == sctx->max_sdma_uploads) {
+ unsigned size;
+
+ sctx->max_sdma_uploads += 4;
+ size = sctx->max_sdma_uploads * sizeof(sctx->sdma_uploads[0]);
+ sctx->sdma_uploads = realloc(sctx->sdma_uploads, size);
+ }
+
+ /* Add a new upload. */
+ struct si_sdma_upload *up =
+ &sctx->sdma_uploads[sctx->num_sdma_uploads++];
+ up->dst = up->src = NULL;
+ si_resource_reference(&up->dst, buf);
+ si_resource_reference(&up->src, stransfer->staging);
+ up->dst_offset = box->x;
+ up->src_offset = src_offset;
+ up->size = box->width;
+ return;
+ }
+
/* Copy the staging buffer into the original one. */
- si_copy_buffer((struct si_context*)ctx, transfer->resource,
- &stransfer->staging->b.b, box->x,
- stransfer->offset + box->x % SI_MAP_BUFFER_ALIGNMENT,
- box->width);
+ si_copy_buffer(sctx, transfer->resource, &stransfer->staging->b.b,
+ box->x, src_offset, box->width);
}
- util_range_add(&rbuffer->valid_buffer_range, box->x,
+ util_range_add(&buf->valid_buffer_range, box->x,
box->x + box->width);
}
si_alloc_buffer_struct(struct pipe_screen *screen,
const struct pipe_resource *templ)
{
- struct si_resource *rbuffer;
+ struct si_resource *buf;
- rbuffer = MALLOC_STRUCT(si_resource);
+ buf = MALLOC_STRUCT(si_resource);
- rbuffer->b.b = *templ;
- rbuffer->b.b.next = NULL;
- pipe_reference_init(&rbuffer->b.b.reference, 1);
- rbuffer->b.b.screen = screen;
+ buf->b.b = *templ;
+ buf->b.b.next = NULL;
+ pipe_reference_init(&buf->b.b.reference, 1);
+ buf->b.b.screen = screen;
- rbuffer->b.vtbl = &si_buffer_vtbl;
- threaded_resource_init(&rbuffer->b.b);
+ buf->b.vtbl = &si_buffer_vtbl;
+ threaded_resource_init(&buf->b.b);
- rbuffer->buf = NULL;
- rbuffer->bind_history = 0;
- rbuffer->TC_L2_dirty = false;
- util_range_init(&rbuffer->valid_buffer_range);
- return rbuffer;
+ buf->buf = NULL;
+ buf->bind_history = 0;
+ buf->TC_L2_dirty = false;
+ util_range_init(&buf->valid_buffer_range);
+ return buf;
}
static struct pipe_resource *si_buffer_create(struct pipe_screen *screen,
unsigned alignment)
{
struct si_screen *sscreen = (struct si_screen*)screen;
- struct si_resource *rbuffer = si_alloc_buffer_struct(screen, templ);
+ struct si_resource *buf = si_alloc_buffer_struct(screen, templ);
if (templ->flags & PIPE_RESOURCE_FLAG_SPARSE)
- rbuffer->b.b.flags |= SI_RESOURCE_FLAG_UNMAPPABLE;
+ buf->b.b.flags |= SI_RESOURCE_FLAG_UNMAPPABLE;
- si_init_resource_fields(sscreen, rbuffer, templ->width0, alignment);
+ si_init_resource_fields(sscreen, buf, templ->width0, alignment);
if (templ->flags & PIPE_RESOURCE_FLAG_SPARSE)
- rbuffer->flags |= RADEON_FLAG_SPARSE;
+ buf->flags |= RADEON_FLAG_SPARSE;
- if (!si_alloc_resource(sscreen, rbuffer)) {
- FREE(rbuffer);
+ if (!si_alloc_resource(sscreen, buf)) {
+ FREE(buf);
return NULL;
}
- return &rbuffer->b.b;
+ return &buf->b.b;
}
struct pipe_resource *pipe_aligned_buffer_create(struct pipe_screen *screen,
{
struct si_screen *sscreen = (struct si_screen*)screen;
struct radeon_winsys *ws = sscreen->ws;
- struct si_resource *rbuffer = si_alloc_buffer_struct(screen, templ);
+ struct si_resource *buf = si_alloc_buffer_struct(screen, templ);
- rbuffer->domains = RADEON_DOMAIN_GTT;
- rbuffer->flags = 0;
- rbuffer->b.is_user_ptr = true;
- util_range_add(&rbuffer->valid_buffer_range, 0, templ->width0);
- util_range_add(&rbuffer->b.valid_buffer_range, 0, templ->width0);
+ buf->domains = RADEON_DOMAIN_GTT;
+ buf->flags = 0;
+ buf->b.is_user_ptr = true;
+ util_range_add(&buf->valid_buffer_range, 0, templ->width0);
+ util_range_add(&buf->b.valid_buffer_range, 0, templ->width0);
/* Convert a user pointer to a buffer. */
- rbuffer->buf = ws->buffer_from_ptr(ws, user_memory, templ->width0);
- if (!rbuffer->buf) {
- FREE(rbuffer);
+ buf->buf = ws->buffer_from_ptr(ws, user_memory, templ->width0);
+ if (!buf->buf) {
+ FREE(buf);
return NULL;
}
- rbuffer->gpu_address = ws->buffer_get_virtual_address(rbuffer->buf);
- rbuffer->vram_usage = 0;
- rbuffer->gart_usage = templ->width0;
+ buf->gpu_address = ws->buffer_get_virtual_address(buf->buf);
+ buf->vram_usage = 0;
+ buf->gart_usage = templ->width0;
- return &rbuffer->b.b;
+ return &buf->b.b;
}
static struct pipe_resource *si_resource_create(struct pipe_screen *screen,