From 7105774babc4d23623c3547cd19122a55c1090db Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 29 Jun 2016 13:51:26 -0400 Subject: [PATCH] freedreno: shadow textures if possible to avoid stall/flush To make batch re-ordering useful, we need to be able to create shadow resources to avoid a flush/stall in transfer_map(). For example, uploading new texture contents or updating a UBO mid-batch. In these cases, we want to clone the buffer, and update the new buffer, leaving the old buffer (whose reference is held by cmdstream) as a shadow. This is done by blitting the remaining other levels (and whatever part of current level that is not discarded) from the old/shadow buffer to the new one. Signed-off-by: Rob Clark --- .../drivers/freedreno/freedreno_context.h | 5 + .../drivers/freedreno/freedreno_resource.c | 211 +++++++++++++++++- .../drivers/freedreno/freedreno_util.h | 6 + 3 files changed, 211 insertions(+), 11 deletions(-) diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h index f25ec34758f..c0004fabeaa 100644 --- a/src/gallium/drivers/freedreno/freedreno_context.h +++ b/src/gallium/drivers/freedreno/freedreno_context.h @@ -299,6 +299,11 @@ struct fd_context { bool cond_cond; /* inverted rendering condition */ uint cond_mode; + /* Are we in process of shadowing a resource? Used to detect recursion + * in transfer_map, and skip unneeded synchronization. + */ + bool in_shadow; + struct pipe_debug_callback debug; /* GMEM/tile handling fxns: */ diff --git a/src/gallium/drivers/freedreno/freedreno_resource.c b/src/gallium/drivers/freedreno/freedreno_resource.c index 5bcf11da85b..dce953f74c1 100644 --- a/src/gallium/drivers/freedreno/freedreno_resource.c +++ b/src/gallium/drivers/freedreno/freedreno_resource.c @@ -124,6 +124,177 @@ realloc_bo(struct fd_resource *rsc, uint32_t size) fd_bc_invalidate_resource(rsc, true); } +static void fd_blitter_pipe_begin(struct fd_context *ctx, bool render_cond); +static void fd_blitter_pipe_end(struct fd_context *ctx); + +static void +do_blit(struct fd_context *ctx, const struct pipe_blit_info *blit, bool fallback) +{ + /* TODO size threshold too?? */ + if ((blit->src.resource->target != PIPE_BUFFER) && !fallback) { + /* do blit on gpu: */ + fd_blitter_pipe_begin(ctx, false); + util_blitter_blit(ctx->blitter, blit); + fd_blitter_pipe_end(ctx); + } else { + /* do blit on cpu: */ + util_resource_copy_region(&ctx->base, + blit->dst.resource, blit->dst.level, blit->dst.box.x, + blit->dst.box.y, blit->dst.box.z, + blit->src.resource, blit->src.level, &blit->src.box); + } +} + +static bool +fd_try_shadow_resource(struct fd_context *ctx, struct fd_resource *rsc, + unsigned level, unsigned usage, const struct pipe_box *box) +{ + struct pipe_context *pctx = &ctx->base; + struct pipe_resource *prsc = &rsc->base.b; + bool fallback = false; + + /* TODO: somehow munge dimensions and format to copy unsupported + * render target format to something that is supported? + */ + if (!pctx->screen->is_format_supported(pctx->screen, + prsc->format, prsc->target, prsc->nr_samples, + PIPE_BIND_RENDER_TARGET)) + fallback = true; + + /* these cases should be handled elsewhere.. just for future + * reference in case this gets split into a more generic(ish) + * helper. + */ + debug_assert(!(usage & PIPE_TRANSFER_READ)); + debug_assert(!(usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE)); + + /* if we do a gpu blit to clone the whole resource, we'll just + * end up stalling on that.. so only allow if we can discard + * current range (and blit, possibly cpu or gpu, the rest) + */ + if (!(usage & PIPE_TRANSFER_DISCARD_RANGE)) + return false; + + bool whole_level = util_texrange_covers_whole_level(prsc, level, + box->x, box->y, box->z, box->width, box->height, box->depth); + + /* TODO need to be more clever about current level */ + if ((prsc->target >= PIPE_TEXTURE_2D) && !whole_level) + return false; + + struct pipe_resource *pshadow = + pctx->screen->resource_create(pctx->screen, prsc); + + if (!pshadow) + return false; + + assert(!ctx->in_shadow); + ctx->in_shadow = true; + + /* get rid of any references that batch-cache might have to us (which + * should empty/destroy rsc->batches hashset) + */ + fd_bc_invalidate_resource(rsc, false); + + /* Swap the backing bo's, so shadow becomes the old buffer, + * blit from shadow to new buffer. From here on out, we + * cannot fail. + * + * Note that we need to do it in this order, otherwise if + * we go down cpu blit path, the recursive transfer_map() + * sees the wrong status.. + */ + struct fd_resource *shadow = fd_resource(pshadow); + + DBG("shadow: %p (%d) -> %p (%d)\n", rsc, rsc->base.b.reference.count, + shadow, shadow->base.b.reference.count); + + /* TODO valid_buffer_range?? */ + swap(rsc->bo, shadow->bo); + swap(rsc->timestamp, shadow->timestamp); + swap(rsc->write_batch, shadow->write_batch); + + /* at this point, the newly created shadow buffer is not referenced + * by any batches, but the existing rsc (probably) is. We need to + * transfer those references over: + */ + debug_assert(shadow->batch_mask == 0); + struct fd_batch *batch; + foreach_batch(batch, &ctx->screen->batch_cache, rsc->batch_mask) { + struct set_entry *entry = _mesa_set_search(batch->resources, rsc); + _mesa_set_remove(batch->resources, entry); + _mesa_set_add(batch->resources, shadow); + } + swap(rsc->batch_mask, shadow->batch_mask); + + struct pipe_blit_info blit = {0}; + blit.dst.resource = prsc; + blit.dst.format = prsc->format; + blit.src.resource = pshadow; + blit.src.format = pshadow->format; + blit.mask = util_format_get_mask(prsc->format); + blit.filter = PIPE_TEX_FILTER_NEAREST; + +#define set_box(field, val) do { \ + blit.dst.field = (val); \ + blit.src.field = (val); \ + } while (0) + + /* blit the other levels in their entirety: */ + for (unsigned l = 0; l <= prsc->last_level; l++) { + if (l == level) + continue; + + /* just blit whole level: */ + set_box(level, l); + set_box(box.width, u_minify(prsc->width0, l)); + set_box(box.height, u_minify(prsc->height0, l)); + set_box(box.depth, u_minify(prsc->depth0, l)); + + do_blit(ctx, &blit, fallback); + } + + /* deal w/ current level specially, since we might need to split + * it up into a couple blits: + */ + if (!whole_level) { + set_box(level, level); + + switch (prsc->target) { + case PIPE_BUFFER: + case PIPE_TEXTURE_1D: + set_box(box.y, 0); + set_box(box.z, 0); + set_box(box.height, 1); + set_box(box.depth, 1); + + if (box->x > 0) { + set_box(box.x, 0); + set_box(box.width, box->x); + + do_blit(ctx, &blit, fallback); + } + if ((box->x + box->width) < u_minify(prsc->width0, level)) { + set_box(box.x, box->x + box->width); + set_box(box.width, u_minify(prsc->width0, level) - (box->x + box->width)); + + do_blit(ctx, &blit, fallback); + } + break; + case PIPE_TEXTURE_2D: + /* TODO */ + default: + unreachable("TODO"); + } + } + + ctx->in_shadow = false; + + pipe_resource_reference(&pshadow, NULL); + + return true; +} + static unsigned fd_resource_layer_offset(struct fd_resource *rsc, struct fd_resource_slice *slice, @@ -311,6 +482,9 @@ fd_resource_transfer_map(struct pipe_context *pctx, ptrans->stride = util_format_get_nblocksx(format, slice->pitch) * rsc->cpp; ptrans->layer_stride = rsc->layer_first ? rsc->layer_size : slice->size0; + if (ctx->in_shadow && !(usage & PIPE_TRANSFER_READ)) + usage |= PIPE_TRANSFER_UNSYNCHRONIZED; + if (usage & PIPE_TRANSFER_READ) op |= DRM_FREEDRENO_PREP_READ; @@ -333,27 +507,45 @@ fd_resource_transfer_map(struct pipe_context *pctx, /* If the GPU is writing to the resource, or if it is reading from the * resource and we're trying to write to it, flush the renders. */ - if (((ptrans->usage & PIPE_TRANSFER_WRITE) && - pending(rsc, true)) || - pending(rsc, false)) { + bool needs_flush = pending(rsc, !!(usage & PIPE_TRANSFER_WRITE)); + bool busy = needs_flush || (0 != fd_bo_cpu_prep(rsc->bo, + ctx->screen->pipe, op | DRM_FREEDRENO_PREP_NOSYNC)); + + /* if we need to flush/stall, see if we can make a shadow buffer + * to avoid this: + * + * TODO we could go down this path !reorder && !busy_for_read + * ie. we only *don't* want to go down this path if the blit + * will trigger a flush! + */ + if (ctx->screen->reorder && busy && !(usage & PIPE_TRANSFER_READ)) { + if (fd_try_shadow_resource(ctx, rsc, level, usage, box)) { + needs_flush = busy = false; + fd_invalidate_resource(ctx, prsc); + } + } + + if (needs_flush) { if (usage & PIPE_TRANSFER_WRITE) { struct fd_batch *batch; - foreach_batch(batch, &ctx->screen->batch_cache, rsc->batch_mask) { + foreach_batch(batch, &ctx->screen->batch_cache, rsc->batch_mask) fd_batch_flush(batch); - } assert(rsc->batch_mask == 0); } else { fd_batch_flush(rsc->write_batch); } + assert(!rsc->write_batch); } /* The GPU keeps track of how the various bo's are being used, and * will wait if necessary for the proper operation to have * completed. */ - ret = fd_bo_cpu_prep(rsc->bo, ctx->screen->pipe, op); - if (ret) - goto fail; + if (busy) { + ret = fd_bo_cpu_prep(rsc->bo, ctx->screen->pipe, op); + if (ret) + goto fail; + } } buf = fd_bo_map(rsc->bo); @@ -698,9 +890,6 @@ fail: return NULL; } -static void fd_blitter_pipe_begin(struct fd_context *ctx, bool render_cond); -static void fd_blitter_pipe_end(struct fd_context *ctx); - /** * _copy_region using pipe (3d engine) */ diff --git a/src/gallium/drivers/freedreno/freedreno_util.h b/src/gallium/drivers/freedreno/freedreno_util.h index ea08e31c465..6321cd7b4cd 100644 --- a/src/gallium/drivers/freedreno/freedreno_util.h +++ b/src/gallium/drivers/freedreno/freedreno_util.h @@ -332,4 +332,10 @@ pack_rgba(enum pipe_format format, const float *rgba) return uc.ui[0]; } +/* + * swap - swap value of @a and @b + */ +#define swap(a, b) \ + do { __typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0) + #endif /* FREEDRENO_UTIL_H_ */ -- 2.30.2