X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fradeon%2Fr600_buffer_common.c;h=2106b9b3a5ef964aebc0fb4e38bf63123d6bcbe6;hb=53db2790c06faa9dd58465b79065f97bc8e0cb62;hp=fb74b45d2fa43156f4548e48a954da3c80595563;hpb=7166773f90d541103b85e35227d59d82b416aa45;p=mesa.git diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c b/src/gallium/drivers/radeon/r600_buffer_common.c index fb74b45d2fa..2106b9b3a5e 100644 --- a/src/gallium/drivers/radeon/r600_buffer_common.c +++ b/src/gallium/drivers/radeon/r600_buffer_common.c @@ -19,20 +19,18 @@ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Marek Olšák */ +#include "radeonsi/si_pipe.h" #include "r600_cs.h" #include "util/u_memory.h" #include "util/u_upload_mgr.h" #include #include -bool r600_rings_is_buffer_referenced(struct r600_common_context *ctx, - struct pb_buffer *buf, - enum radeon_bo_usage usage) +bool si_rings_is_buffer_referenced(struct r600_common_context *ctx, + struct pb_buffer *buf, + enum radeon_bo_usage usage) { if (ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs, buf, usage)) { return true; @@ -44,9 +42,9 @@ bool r600_rings_is_buffer_referenced(struct r600_common_context *ctx, return false; } -void *r600_buffer_map_sync_with_rings(struct r600_common_context *ctx, - struct r600_resource *resource, - unsigned usage) +void *si_buffer_map_sync_with_rings(struct r600_common_context *ctx, + struct r600_resource *resource, + unsigned usage) { enum radeon_bo_usage rusage = RADEON_USAGE_READWRITE; bool busy = false; @@ -66,7 +64,7 @@ void *r600_buffer_map_sync_with_rings(struct r600_common_context *ctx, ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs, resource->buf, rusage)) { if (usage & PIPE_TRANSFER_DONTBLOCK) { - ctx->gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL); + ctx->gfx.flush(ctx, PIPE_FLUSH_ASYNC, NULL); return NULL; } else { ctx->gfx.flush(ctx, 0, NULL); @@ -77,7 +75,7 @@ void *r600_buffer_map_sync_with_rings(struct r600_common_context *ctx, ctx->ws->cs_is_buffer_referenced(ctx->dma.cs, resource->buf, rusage)) { if (usage & PIPE_TRANSFER_DONTBLOCK) { - ctx->dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL); + ctx->dma.flush(ctx, PIPE_FLUSH_ASYNC, NULL); return NULL; } else { ctx->dma.flush(ctx, 0, NULL); @@ -101,15 +99,17 @@ void *r600_buffer_map_sync_with_rings(struct r600_common_context *ctx, return ctx->ws->buffer_map(resource->buf, NULL, usage); } -void r600_init_resource_fields(struct r600_common_screen *rscreen, - struct r600_resource *res, - uint64_t size, unsigned alignment) +void si_init_resource_fields(struct si_screen *sscreen, + struct r600_resource *res, + uint64_t size, unsigned alignment) { struct r600_texture *rtex = (struct r600_texture*)res; res->bo_size = size; res->bo_alignment = alignment; res->flags = 0; + res->texture_handle_allocated = false; + res->image_handle_allocated = false; switch (res->b.b.usage) { case PIPE_USAGE_STREAM: @@ -124,13 +124,12 @@ void r600_init_resource_fields(struct r600_common_screen *rscreen, /* Older kernels didn't always flush the HDP cache before * CS execution */ - if (rscreen->info.drm_major == 2 && - rscreen->info.drm_minor < 40) { + if (sscreen->info.drm_major == 2 && + sscreen->info.drm_minor < 40) { res->domains = RADEON_DOMAIN_GTT; res->flags |= RADEON_FLAG_GTT_WC; break; } - res->flags |= RADEON_FLAG_CPU_ACCESS; /* fall through */ case PIPE_USAGE_DEFAULT: case PIPE_USAGE_IMMUTABLE: @@ -153,54 +152,59 @@ void r600_init_resource_fields(struct r600_common_screen *rscreen, * ensures all CPU writes finish before the GPU * executes a command stream. */ - if (rscreen->info.drm_major == 2 && - rscreen->info.drm_minor < 40) + if (sscreen->info.drm_major == 2 && + sscreen->info.drm_minor < 40) res->domains = RADEON_DOMAIN_GTT; - else if (res->domains & RADEON_DOMAIN_VRAM) - res->flags |= RADEON_FLAG_CPU_ACCESS; } /* Tiled textures are unmappable. Always put them in VRAM. */ if ((res->b.b.target != PIPE_BUFFER && !rtex->surface.is_linear) || - res->flags & R600_RESOURCE_FLAG_UNMAPPABLE) { + res->b.b.flags & R600_RESOURCE_FLAG_UNMAPPABLE) { res->domains = RADEON_DOMAIN_VRAM; - res->flags &= ~RADEON_FLAG_CPU_ACCESS; res->flags |= RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_GTT_WC; } - /* If VRAM is just stolen system memory, allow both VRAM and - * GTT, whichever has free space. If a buffer is evicted from - * VRAM to GTT, it will stay there. - * - * DRM 3.6.0 has good BO move throttling, so we can allow VRAM-only - * placements even with a low amount of stolen VRAM. - */ - if (!rscreen->info.has_dedicated_vram && - (rscreen->info.drm_major < 3 || rscreen->info.drm_minor < 6) && - res->domains == RADEON_DOMAIN_VRAM) - res->domains = RADEON_DOMAIN_VRAM_GTT; + /* Displayable and shareable surfaces are not suballocated. */ + if (res->b.b.bind & (PIPE_BIND_SHARED | PIPE_BIND_SCANOUT)) + res->flags |= RADEON_FLAG_NO_SUBALLOC; /* shareable */ + else + res->flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING; - if (rscreen->debug_flags & DBG_NO_WC) + if (sscreen->debug_flags & DBG(NO_WC)) res->flags &= ~RADEON_FLAG_GTT_WC; + if (res->b.b.flags & R600_RESOURCE_FLAG_READ_ONLY) + res->flags |= RADEON_FLAG_READ_ONLY; + + if (res->b.b.flags & R600_RESOURCE_FLAG_32BIT) + res->flags |= RADEON_FLAG_32BIT; + /* Set expected VRAM and GART usage for the buffer. */ res->vram_usage = 0; res->gart_usage = 0; + res->max_forced_staging_uploads = 0; + res->b.max_forced_staging_uploads = 0; - if (res->domains & RADEON_DOMAIN_VRAM) + if (res->domains & RADEON_DOMAIN_VRAM) { res->vram_usage = size; - else if (res->domains & RADEON_DOMAIN_GTT) + + res->max_forced_staging_uploads = + res->b.max_forced_staging_uploads = + sscreen->info.has_dedicated_vram && + size >= sscreen->info.vram_vis_size / 4 ? 1 : 0; + } else if (res->domains & RADEON_DOMAIN_GTT) { res->gart_usage = size; + } } -bool r600_alloc_resource(struct r600_common_screen *rscreen, - struct r600_resource *res) +bool si_alloc_resource(struct si_screen *sscreen, + struct r600_resource *res) { struct pb_buffer *old_buf, *new_buf; /* Allocate a new resource. */ - new_buf = rscreen->ws->buffer_create(rscreen->ws, res->bo_size, + new_buf = sscreen->ws->buffer_create(sscreen->ws, res->bo_size, res->bo_alignment, res->domains, res->flags); if (!new_buf) { @@ -214,10 +218,21 @@ bool r600_alloc_resource(struct r600_common_screen *rscreen, old_buf = res->buf; res->buf = new_buf; /* should be atomic */ - if (rscreen->info.has_virtual_memory) - res->gpu_address = rscreen->ws->buffer_get_virtual_address(res->buf); - else + if (sscreen->info.has_virtual_memory) { + res->gpu_address = sscreen->ws->buffer_get_virtual_address(res->buf); + + if (res->flags & RADEON_FLAG_32BIT) { + uint64_t start = res->gpu_address; + uint64_t last = start + res->bo_size - 1; + (void)start; + (void)last; + + assert((start >> 32) == sscreen->info.address32_hi); + assert((last >> 32) == sscreen->info.address32_hi); + } + } else { res->gpu_address = 0; + } pb_reference(&old_buf, NULL); @@ -225,7 +240,7 @@ bool r600_alloc_resource(struct r600_common_screen *rscreen, res->TC_L2_dirty = false; /* Print debug information. */ - if (rscreen->debug_flags & DBG_VM && res->b.b.target == PIPE_BUFFER) { + if (sscreen->debug_flags & DBG(VM) && res->b.b.target == PIPE_BUFFER) { fprintf(stderr, "VM start=0x%"PRIX64" end=0x%"PRIX64" | Buffer %"PRIu64" bytes\n", res->gpu_address, res->gpu_address + res->buf->size, res->buf->size); @@ -263,7 +278,7 @@ r600_invalidate_buffer(struct r600_common_context *rctx, return false; /* Check if mapping this buffer would cause waiting for the GPU. */ - if (r600_rings_is_buffer_referenced(rctx, rbuffer->buf, RADEON_USAGE_READWRITE) || + if (si_rings_is_buffer_referenced(rctx, rbuffer->buf, RADEON_USAGE_READWRITE) || !rctx->ws->buffer_wait(rbuffer->buf, 0, RADEON_USAGE_READWRITE)) { rctx->invalidate_buffer(&rctx->b, &rbuffer->b.b); } else { @@ -274,7 +289,7 @@ r600_invalidate_buffer(struct r600_common_context *rctx, } /* Replace the storage of dst with src. */ -void r600_replace_buffer_storage(struct pipe_context *ctx, +void si_replace_buffer_storage(struct pipe_context *ctx, struct pipe_resource *dst, struct pipe_resource *src) { @@ -285,19 +300,22 @@ void r600_replace_buffer_storage(struct pipe_context *ctx, pb_reference(&rdst->buf, rsrc->buf); rdst->gpu_address = rsrc->gpu_address; + rdst->b.b.bind = rsrc->b.b.bind; + rdst->b.max_forced_staging_uploads = rsrc->b.max_forced_staging_uploads; + rdst->max_forced_staging_uploads = rsrc->max_forced_staging_uploads; + rdst->flags = rsrc->flags; assert(rdst->vram_usage == rsrc->vram_usage); assert(rdst->gart_usage == rsrc->gart_usage); assert(rdst->bo_size == rsrc->bo_size); assert(rdst->bo_alignment == rsrc->bo_alignment); assert(rdst->domains == rsrc->domains); - assert(rdst->flags == rsrc->flags); rctx->rebind_buffer(ctx, dst, old_gpu_address); } -void r600_invalidate_resource(struct pipe_context *ctx, - struct pipe_resource *resource) +static void si_invalidate_resource(struct pipe_context *ctx, + struct pipe_resource *resource) { struct r600_common_context *rctx = (struct r600_common_context*)ctx; struct r600_resource *rbuffer = r600_resource(resource); @@ -337,17 +355,6 @@ static void *r600_buffer_get_transfer(struct pipe_context *ctx, return data; } -static bool r600_can_dma_copy_buffer(struct r600_common_context *rctx, - unsigned dstx, unsigned srcx, unsigned size) -{ - bool dword_aligned = !(dstx % 4) && !(srcx % 4) && !(size % 4); - - return rctx->screen->has_cp_dma || - (dword_aligned && (rctx->dma.cs || - rctx->screen->has_streamout)); - -} - static void *r600_buffer_transfer_map(struct pipe_context *ctx, struct pipe_resource *resource, unsigned level, @@ -356,7 +363,6 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx, struct pipe_transfer **ptransfer) { struct r600_common_context *rctx = (struct r600_common_context*)ctx; - struct r600_common_screen *rscreen = (struct r600_common_screen*)ctx->screen; struct r600_resource *rbuffer = r600_resource(resource); uint8_t *data; @@ -379,7 +385,7 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx, /* See if the buffer range being mapped has never been initialized, * in which case it can be mapped unsynchronized. */ if (!(usage & (PIPE_TRANSFER_UNSYNCHRONIZED | - TC_TRANSFER_MAP_IGNORE_VALID_RANGE)) && + TC_TRANSFER_MAP_NO_INFER_UNSYNCHRONIZED)) && usage & PIPE_TRANSFER_WRITE && !rbuffer->b.is_shared && !util_ranges_intersect(&rbuffer->valid_buffer_range, box->x, box->x + box->width)) { @@ -392,6 +398,23 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx, usage |= PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE; } + /* If a buffer in VRAM is too large and the range is discarded, don't + * map it directly. This makes sure that the buffer stays in VRAM. + */ + bool force_discard_range = false; + if (usage & (PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE | + PIPE_TRANSFER_DISCARD_RANGE) && + !(usage & PIPE_TRANSFER_PERSISTENT) && + /* Try not to decrement the counter if it's not positive. Still racy, + * but it makes it harder to wrap the counter from INT_MIN to INT_MAX. */ + rbuffer->max_forced_staging_uploads > 0 && + p_atomic_dec_return(&rbuffer->max_forced_staging_uploads) >= 0) { + usage &= ~(PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE | + PIPE_TRANSFER_UNSYNCHRONIZED); + usage |= PIPE_TRANSFER_DISCARD_RANGE; + force_discard_range = true; + } + if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE && !(usage & (PIPE_TRANSFER_UNSYNCHRONIZED | TC_TRANSFER_MAP_NO_INVALIDATE))) { @@ -407,17 +430,16 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx, } if ((usage & PIPE_TRANSFER_DISCARD_RANGE) && - !(rscreen->debug_flags & DBG_NO_DISCARD_RANGE) && ((!(usage & (PIPE_TRANSFER_UNSYNCHRONIZED | - PIPE_TRANSFER_PERSISTENT)) && - r600_can_dma_copy_buffer(rctx, box->x, 0, box->width)) || + PIPE_TRANSFER_PERSISTENT))) || (rbuffer->flags & RADEON_FLAG_SPARSE))) { assert(usage & PIPE_TRANSFER_WRITE); /* Check if mapping this buffer would cause waiting for the GPU. */ if (rbuffer->flags & RADEON_FLAG_SPARSE || - r600_rings_is_buffer_referenced(rctx, rbuffer->buf, RADEON_USAGE_READWRITE) || + force_discard_range || + si_rings_is_buffer_referenced(rctx, rbuffer->buf, RADEON_USAGE_READWRITE) || !rctx->ws->buffer_wait(rbuffer->buf, 0, RADEON_USAGE_READWRITE)) { /* Do a wait-free write-only transfer using a temporary buffer. */ unsigned offset; @@ -445,8 +467,7 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx, else if (((usage & PIPE_TRANSFER_READ) && !(usage & PIPE_TRANSFER_PERSISTENT) && (rbuffer->domains & RADEON_DOMAIN_VRAM || - rbuffer->flags & RADEON_FLAG_GTT_WC) && - r600_can_dma_copy_buffer(rctx, 0, box->x, box->width)) || + rbuffer->flags & RADEON_FLAG_GTT_WC)) || (rbuffer->flags & RADEON_FLAG_SPARSE)) { struct r600_resource *staging; @@ -460,7 +481,7 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx, box->x % R600_MAP_BUFFER_ALIGNMENT, 0, 0, resource, 0, box); - data = r600_buffer_map_sync_with_rings(rctx, staging, + data = si_buffer_map_sync_with_rings(rctx, staging, usage & ~PIPE_TRANSFER_UNSYNCHRONIZED); if (!data) { r600_resource_reference(&staging, NULL); @@ -475,7 +496,7 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx, } } - data = r600_buffer_map_sync_with_rings(rctx, rbuffer, usage); + data = si_buffer_map_sync_with_rings(rctx, rbuffer, usage); if (!data) { return NULL; } @@ -545,10 +566,10 @@ static void r600_buffer_transfer_unmap(struct pipe_context *ctx, slab_free(&rctx->pool_transfers, transfer); } -void r600_buffer_subdata(struct pipe_context *ctx, - struct pipe_resource *buffer, - unsigned usage, unsigned offset, - unsigned size, const void *data) +static void si_buffer_subdata(struct pipe_context *ctx, + struct pipe_resource *buffer, + unsigned usage, unsigned offset, + unsigned size, const void *data) { struct pipe_transfer *transfer = NULL; struct pipe_box box; @@ -599,32 +620,33 @@ r600_alloc_buffer_struct(struct pipe_screen *screen, return rbuffer; } -struct pipe_resource *r600_buffer_create(struct pipe_screen *screen, - const struct pipe_resource *templ, - unsigned alignment) +static struct pipe_resource *si_buffer_create(struct pipe_screen *screen, + const struct pipe_resource *templ, + unsigned alignment) { - struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; + struct si_screen *sscreen = (struct si_screen*)screen; struct r600_resource *rbuffer = r600_alloc_buffer_struct(screen, templ); - r600_init_resource_fields(rscreen, rbuffer, templ->width0, alignment); + if (templ->flags & PIPE_RESOURCE_FLAG_SPARSE) + rbuffer->b.b.flags |= R600_RESOURCE_FLAG_UNMAPPABLE; + + si_init_resource_fields(sscreen, rbuffer, templ->width0, alignment); - if (templ->bind & PIPE_BIND_SHARED) - rbuffer->flags |= RADEON_FLAG_HANDLE; if (templ->flags & PIPE_RESOURCE_FLAG_SPARSE) rbuffer->flags |= RADEON_FLAG_SPARSE; - if (!r600_alloc_resource(rscreen, rbuffer)) { + if (!si_alloc_resource(sscreen, rbuffer)) { FREE(rbuffer); return NULL; } return &rbuffer->b.b; } -struct pipe_resource *r600_aligned_buffer_create(struct pipe_screen *screen, - unsigned flags, - unsigned usage, - unsigned size, - unsigned alignment) +struct pipe_resource *si_aligned_buffer_create(struct pipe_screen *screen, + unsigned flags, + unsigned usage, + unsigned size, + unsigned alignment) { struct pipe_resource buffer; @@ -638,16 +660,16 @@ struct pipe_resource *r600_aligned_buffer_create(struct pipe_screen *screen, buffer.height0 = 1; buffer.depth0 = 1; buffer.array_size = 1; - return r600_buffer_create(screen, &buffer, alignment); + return si_buffer_create(screen, &buffer, alignment); } -struct pipe_resource * -r600_buffer_from_user_memory(struct pipe_screen *screen, - const struct pipe_resource *templ, - void *user_memory) +static struct pipe_resource * +si_buffer_from_user_memory(struct pipe_screen *screen, + const struct pipe_resource *templ, + void *user_memory) { - struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; - struct radeon_winsys *ws = rscreen->ws; + struct si_screen *sscreen = (struct si_screen*)screen; + struct radeon_winsys *ws = sscreen->ws; struct r600_resource *rbuffer = r600_alloc_buffer_struct(screen, templ); rbuffer->domains = RADEON_DOMAIN_GTT; @@ -663,7 +685,7 @@ r600_buffer_from_user_memory(struct pipe_screen *screen, return NULL; } - if (rscreen->info.has_virtual_memory) + if (sscreen->info.has_virtual_memory) rbuffer->gpu_address = ws->buffer_get_virtual_address(rbuffer->buf); else @@ -674,3 +696,30 @@ r600_buffer_from_user_memory(struct pipe_screen *screen, return &rbuffer->b.b; } + +static struct pipe_resource *si_resource_create(struct pipe_screen *screen, + const struct pipe_resource *templ) +{ + if (templ->target == PIPE_BUFFER) { + return si_buffer_create(screen, templ, 256); + } else { + return si_texture_create(screen, templ); + } +} + +void si_init_screen_buffer_functions(struct si_screen *sscreen) +{ + sscreen->b.resource_create = si_resource_create; + sscreen->b.resource_destroy = u_resource_destroy_vtbl; + sscreen->b.resource_from_user_memory = si_buffer_from_user_memory; +} + +void si_init_buffer_functions(struct si_context *sctx) +{ + sctx->b.b.invalidate_resource = si_invalidate_resource; + sctx->b.b.transfer_map = u_transfer_map_vtbl; + sctx->b.b.transfer_flush_region = u_transfer_flush_region_vtbl; + sctx->b.b.transfer_unmap = u_transfer_unmap_vtbl; + sctx->b.b.texture_subdata = u_default_texture_subdata; + sctx->b.b.buffer_subdata = si_buffer_subdata; +}