From 00bed8a794de3d80a46b65b9ab23c6df83e416a8 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 13 Jul 2016 09:49:53 -0400 Subject: [PATCH] freedreno: threaded batch flush With the state accessed from GMEM+submit factored out of fd_context and into fd_batch, now it is possible to punt this off to a helper thread. And more importantly, since there are cases where one context might force the batch-cache to flush another context's batches (ie. when there are too many in-flight batches), using a per-context helper thread keeps various different flushes for a given context serialized. TODO as with batch-cache, there are a few places where we'll need a mutex to protect critical sections, which is completely missing at the moment. Signed-off-by: Rob Clark --- .../drivers/freedreno/freedreno_batch.c | 64 +++++++++++++++++-- .../drivers/freedreno/freedreno_batch.h | 6 +- .../drivers/freedreno/freedreno_batch_cache.c | 15 +++-- .../drivers/freedreno/freedreno_context.c | 10 ++- .../drivers/freedreno/freedreno_context.h | 4 ++ .../drivers/freedreno/freedreno_gmem.c | 2 - .../drivers/freedreno/freedreno_query_hw.c | 4 +- .../drivers/freedreno/freedreno_resource.c | 16 +++-- .../drivers/freedreno/freedreno_state.c | 4 +- 9 files changed, 99 insertions(+), 26 deletions(-) diff --git a/src/gallium/drivers/freedreno/freedreno_batch.c b/src/gallium/drivers/freedreno/freedreno_batch.c index 5008f5dbe56..219e0a80988 100644 --- a/src/gallium/drivers/freedreno/freedreno_batch.c +++ b/src/gallium/drivers/freedreno/freedreno_batch.c @@ -40,6 +40,9 @@ batch_init(struct fd_batch *batch) struct fd_context *ctx = batch->ctx; unsigned size = 0; + if (ctx->screen->reorder) + util_queue_fence_init(&batch->flush_fence); + /* if kernel is too old to support unlimited # of cmd buffers, we * have no option but to allocate large worst-case sizes so that * we don't need to grow the ringbuffer. Performance is likely to @@ -119,6 +122,9 @@ batch_fini(struct fd_batch *batch) fd_hw_sample_reference(batch->ctx, &samp, NULL); } util_dynarray_fini(&batch->samples); + + if (batch->ctx->screen->reorder) + util_queue_fence_destroy(&batch->flush_fence); } static void @@ -129,7 +135,7 @@ batch_flush_reset_dependencies(struct fd_batch *batch, bool flush) foreach_batch(dep, cache, batch->dependents_mask) { if (flush) - fd_batch_flush(dep); + fd_batch_flush(dep, false); fd_batch_reference(&dep, NULL); } @@ -156,6 +162,8 @@ batch_reset(struct fd_batch *batch) { DBG("%p", batch); + fd_batch_sync(batch); + batch_flush_reset_dependencies(batch, false); batch_reset_resources(batch); @@ -197,6 +205,31 @@ __fd_batch_describe(char* buf, const struct fd_batch *batch) util_sprintf(buf, "fd_batch<%u>", batch->seqno); } +void +fd_batch_sync(struct fd_batch *batch) +{ + if (!batch->ctx->screen->reorder) + return; + util_queue_job_wait(&batch->flush_fence); +} + +static void +batch_flush_func(void *job, int id) +{ + struct fd_batch *batch = job; + + fd_gmem_render_tiles(batch); + batch_reset_resources(batch); + batch->ctx->last_fence = fd_ringbuffer_timestamp(batch->gmem); +} + +static void +batch_cleanup_func(void *job, int id) +{ + struct fd_batch *batch = job; + fd_batch_reference(&batch, NULL); +} + static void batch_flush(struct fd_batch *batch) { @@ -207,11 +240,25 @@ batch_flush(struct fd_batch *batch) batch->needs_flush = false; - batch_flush_reset_dependencies(batch, true); + /* close out the draw cmds by making sure any active queries are + * paused: + */ + fd_hw_query_set_stage(batch, batch->draw, FD_STAGE_NULL); - fd_gmem_render_tiles(batch); + batch->ctx->dirty = ~0; + batch_flush_reset_dependencies(batch, true); - batch_reset_resources(batch); + if (batch->ctx->screen->reorder) { + struct fd_batch *tmp = NULL; + fd_batch_reference(&tmp, batch); + util_queue_add_job(&batch->ctx->flush_queue, + batch, &batch->flush_fence, + batch_flush_func, batch_cleanup_func); + } else { + fd_gmem_render_tiles(batch); + batch_reset_resources(batch); + batch->ctx->last_fence = fd_ringbuffer_timestamp(batch->gmem); + } debug_assert(batch->reference.count > 0); @@ -222,8 +269,9 @@ batch_flush(struct fd_batch *batch) } } +/* NOTE: could drop the last ref to batch */ void -fd_batch_flush(struct fd_batch *batch) +fd_batch_flush(struct fd_batch *batch, bool sync) { /* NOTE: we need to hold an extra ref across the body of flush, * since the last ref to this batch could be dropped when cleaning @@ -232,6 +280,8 @@ fd_batch_flush(struct fd_batch *batch) struct fd_batch *tmp = NULL; fd_batch_reference(&tmp, batch); batch_flush(tmp); + if (sync) + fd_batch_sync(tmp); fd_batch_reference(&tmp, NULL); } @@ -263,7 +313,7 @@ batch_add_dep(struct fd_batch *batch, struct fd_batch *dep) */ if (batch_depends_on(dep, batch)) { DBG("%p: flush forced on %p!", batch, dep); - fd_batch_flush(dep); + fd_batch_flush(dep, false); } else { struct fd_batch *other = NULL; fd_batch_reference(&other, dep); @@ -327,5 +377,5 @@ fd_batch_check_size(struct fd_batch *batch) struct fd_ringbuffer *ring = batch->draw; if (((ring->cur - ring->start) > (ring->size/4 - 0x1000)) || (fd_mesa_debug & FD_DBG_FLUSH)) - fd_batch_flush(batch); + fd_batch_flush(batch, true); } diff --git a/src/gallium/drivers/freedreno/freedreno_batch.h b/src/gallium/drivers/freedreno/freedreno_batch.h index 6be196534ab..047044a9538 100644 --- a/src/gallium/drivers/freedreno/freedreno_batch.h +++ b/src/gallium/drivers/freedreno/freedreno_batch.h @@ -28,6 +28,7 @@ #define FREEDRENO_BATCH_H_ #include "util/u_inlines.h" +#include "util/u_queue.h" #include "util/list.h" #include "freedreno_util.h" @@ -76,6 +77,8 @@ struct fd_batch { struct fd_context *ctx; + struct util_queue_fence flush_fence; + /* do we need to mem2gmem before rendering. We don't, if for example, * there was a glClear() that invalidated the entire previous buffer * contents. Keep track of which buffer(s) are cleared, or needs @@ -197,7 +200,8 @@ struct fd_batch { struct fd_batch * fd_batch_create(struct fd_context *ctx); void fd_batch_reset(struct fd_batch *batch); -void fd_batch_flush(struct fd_batch *batch); +void fd_batch_sync(struct fd_batch *batch); +void fd_batch_flush(struct fd_batch *batch, bool sync); void fd_batch_resource_used(struct fd_batch *batch, struct fd_resource *rsc, bool write); void fd_batch_check_size(struct fd_batch *batch); diff --git a/src/gallium/drivers/freedreno/freedreno_batch_cache.c b/src/gallium/drivers/freedreno/freedreno_batch_cache.c index c947a559df9..635f2a7c994 100644 --- a/src/gallium/drivers/freedreno/freedreno_batch_cache.c +++ b/src/gallium/drivers/freedreno/freedreno_batch_cache.c @@ -128,19 +128,24 @@ uint32_t fd_bc_flush(struct fd_batch_cache *cache, struct fd_context *ctx) { struct hash_entry *entry; - uint32_t timestamp = 0; + struct fd_batch *last_batch = NULL; hash_table_foreach(cache->ht, entry) { struct fd_batch *batch = NULL; fd_batch_reference(&batch, (struct fd_batch *)entry->data); if (batch->ctx == ctx) { - fd_batch_flush(batch); - timestamp = MAX2(timestamp, fd_ringbuffer_timestamp(batch->gmem)); + fd_batch_reference(&last_batch, batch); + fd_batch_flush(batch, false); } fd_batch_reference(&batch, NULL); } - return timestamp; + if (last_batch) { + fd_batch_sync(last_batch); + fd_batch_reference(&last_batch, NULL); + } + + return ctx->last_fence; } void @@ -238,7 +243,7 @@ fd_bc_alloc_batch(struct fd_batch_cache *cache, struct fd_context *ctx) fd_batch_reference(&flush_batch, cache->batches[i]); } DBG("%p: too many batches! flush forced!", flush_batch); - fd_batch_flush(flush_batch); + fd_batch_flush(flush_batch, true); /* While the resources get cleaned up automatically, the flush_batch * doesn't get removed from the dependencies of other batches, so diff --git a/src/gallium/drivers/freedreno/freedreno_context.c b/src/gallium/drivers/freedreno/freedreno_context.c index 1c32cd9ae92..599f94ffec1 100644 --- a/src/gallium/drivers/freedreno/freedreno_context.c +++ b/src/gallium/drivers/freedreno/freedreno_context.c @@ -48,7 +48,7 @@ fd_context_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence, if (!ctx->screen->reorder) { struct fd_batch *batch = NULL; fd_batch_reference(&batch, ctx->batch); - fd_batch_flush(batch); + fd_batch_flush(batch, true); timestamp = fd_ringbuffer_timestamp(batch->gmem); fd_batch_reference(&batch, NULL); } else { @@ -103,6 +103,9 @@ fd_context_destroy(struct pipe_context *pctx) DBG(""); + if (ctx->screen->reorder) + util_queue_destroy(&ctx->flush_queue); + fd_batch_reference(&ctx->batch, NULL); /* unref current batch */ fd_bc_invalidate_context(ctx); @@ -179,8 +182,11 @@ fd_context_init(struct fd_context *ctx, struct pipe_screen *pscreen, * batches per compute job (since it isn't using tiling, so no point * in getting involved with the re-ordering madness).. */ - if (!screen->reorder) + if (!screen->reorder) { ctx->batch = fd_bc_alloc_batch(&screen->batch_cache, ctx); + } else { + util_queue_init(&ctx->flush_queue, "flush_queue", 16, 1); + } fd_reset_wfi(ctx); diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h index 7e25e57d43b..2d88cdcbd8c 100644 --- a/src/gallium/drivers/freedreno/freedreno_context.h +++ b/src/gallium/drivers/freedreno/freedreno_context.h @@ -114,6 +114,8 @@ struct fd_context { struct fd_device *dev; struct fd_screen *screen; + struct util_queue flush_queue; + struct blitter_context *blitter; struct primconvert_context *primconvert; @@ -161,6 +163,8 @@ struct fd_context { */ struct fd_batch *batch; + uint32_t last_fence; + /* Are we in process of shadowing a resource? Used to detect recursion * in transfer_map, and skip unneeded synchronization. */ diff --git a/src/gallium/drivers/freedreno/freedreno_gmem.c b/src/gallium/drivers/freedreno/freedreno_gmem.c index d57b6a36d8b..ed013d9d037 100644 --- a/src/gallium/drivers/freedreno/freedreno_gmem.c +++ b/src/gallium/drivers/freedreno/freedreno_gmem.c @@ -405,8 +405,6 @@ fd_gmem_render_tiles(struct fd_batch *batch) fd_ringbuffer_flush(batch->gmem); fd_reset_wfi(ctx); - - ctx->dirty = ~0; } /* tile needs restore if it isn't completely contained within the diff --git a/src/gallium/drivers/freedreno/freedreno_query_hw.c b/src/gallium/drivers/freedreno/freedreno_query_hw.c index 12d40d04cda..b61ea0d5e08 100644 --- a/src/gallium/drivers/freedreno/freedreno_query_hw.c +++ b/src/gallium/drivers/freedreno/freedreno_query_hw.c @@ -238,7 +238,7 @@ fd_hw_get_query_result(struct fd_context *ctx, struct fd_query *q, * spin forever: */ if (hq->no_wait_cnt++ > 5) - fd_batch_flush(rsc->write_batch); + fd_batch_flush(rsc->write_batch, false); return false; } @@ -266,7 +266,7 @@ fd_hw_get_query_result(struct fd_context *ctx, struct fd_query *q, struct fd_resource *rsc = fd_resource(start->prsc); if (rsc->write_batch) - fd_batch_flush(rsc->write_batch); + fd_batch_flush(rsc->write_batch, true); /* some piglit tests at least do query with no draws, I guess: */ if (!rsc->bo) diff --git a/src/gallium/drivers/freedreno/freedreno_resource.c b/src/gallium/drivers/freedreno/freedreno_resource.c index 0e0305885a7..a091f5f1774 100644 --- a/src/gallium/drivers/freedreno/freedreno_resource.c +++ b/src/gallium/drivers/freedreno/freedreno_resource.c @@ -516,12 +516,18 @@ fd_resource_transfer_map(struct pipe_context *pctx, if (needs_flush) { if (usage & PIPE_TRANSFER_WRITE) { - struct fd_batch *batch; - foreach_batch(batch, &ctx->screen->batch_cache, rsc->batch_mask) - fd_batch_flush(batch); + struct fd_batch *batch, *last_batch = NULL; + foreach_batch(batch, &ctx->screen->batch_cache, rsc->batch_mask) { + fd_batch_reference(&last_batch, batch); + fd_batch_flush(batch, false); + } + if (last_batch) { + fd_batch_sync(last_batch); + fd_batch_reference(&last_batch, NULL); + } assert(rsc->batch_mask == 0); } else { - fd_batch_flush(rsc->write_batch); + fd_batch_flush(rsc->write_batch, true); } assert(!rsc->write_batch); } @@ -1080,7 +1086,7 @@ fd_flush_resource(struct pipe_context *pctx, struct pipe_resource *prsc) struct fd_resource *rsc = fd_resource(prsc); if (rsc->write_batch) - fd_batch_flush(rsc->write_batch); + fd_batch_flush(rsc->write_batch, true); assert(!rsc->write_batch); } diff --git a/src/gallium/drivers/freedreno/freedreno_state.c b/src/gallium/drivers/freedreno/freedreno_state.c index f83fd219f0a..c7d83692741 100644 --- a/src/gallium/drivers/freedreno/freedreno_state.c +++ b/src/gallium/drivers/freedreno/freedreno_state.c @@ -137,14 +137,14 @@ fd_set_framebuffer_state(struct pipe_context *pctx, * multiple times to the same surface), so we might as * well go ahead and flush this one: */ - fd_batch_flush(old_batch); + fd_batch_flush(old_batch, false); } fd_batch_reference(&old_batch, NULL); } else { DBG("%d: cbufs[0]=%p, zsbuf=%p", ctx->batch->needs_flush, framebuffer->cbufs[0], framebuffer->zsbuf); - fd_batch_flush(ctx->batch); + fd_batch_flush(ctx->batch, false); } cso = &ctx->batch->framebuffer; -- 2.30.2