From b852c3bf67cf1a047b8d17391506b19b5d1bdb70 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Fri, 24 Nov 2017 10:37:22 -0500 Subject: [PATCH] freedreno: add non-draw batches for compute/blit Get rid of "gmem" (ie. tiling) ringbuffer, and just emit setup commands directly to "draw" ringbuffer for compute (and in future for blits not using the 3d pipe). This way we can have a simple flat cmdstream buffer and bypass setup related to 3d pipe. Signed-off-by: Rob Clark --- .../drivers/freedreno/a5xx/fd5_compute.c | 31 +++++++++++++++++++ .../drivers/freedreno/freedreno_batch.c | 27 +++++++++++----- .../drivers/freedreno/freedreno_batch.h | 4 ++- .../drivers/freedreno/freedreno_batch_cache.c | 2 +- .../drivers/freedreno/freedreno_context.c | 5 +-- .../drivers/freedreno/freedreno_context.h | 4 +-- .../drivers/freedreno/freedreno_draw.c | 5 +-- .../drivers/freedreno/freedreno_gmem.c | 23 +++++++------- .../drivers/freedreno/freedreno_gmem.h | 1 - .../drivers/freedreno/freedreno_query.c | 1 + .../drivers/freedreno/freedreno_query.h | 7 +++-- .../drivers/freedreno/freedreno_query_sw.c | 4 +++ 12 files changed, 82 insertions(+), 32 deletions(-) diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_compute.c b/src/gallium/drivers/freedreno/a5xx/fd5_compute.c index f9fb599e785..b7f596f336a 100644 --- a/src/gallium/drivers/freedreno/a5xx/fd5_compute.c +++ b/src/gallium/drivers/freedreno/a5xx/fd5_compute.c @@ -120,6 +120,35 @@ cs_program_emit(struct fd_ringbuffer *ring, struct ir3_shader_variant *v) fd5_emit_shader(ring, v); } +static void +emit_setup(struct fd_context *ctx) +{ + struct fd_ringbuffer *ring = ctx->batch->draw; + + fd5_emit_restore(ctx->batch, ring); + fd5_emit_lrz_flush(ring); + + OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1); + OUT_RING(ring, 0x0); + + OUT_PKT7(ring, CP_EVENT_WRITE, 1); + OUT_RING(ring, UNK_19); + + OUT_PKT4(ring, REG_A5XX_PC_POWER_CNTL, 1); + OUT_RING(ring, 0x00000003); /* PC_POWER_CNTL */ + + OUT_PKT4(ring, REG_A5XX_VFD_POWER_CNTL, 1); + OUT_RING(ring, 0x00000003); /* VFD_POWER_CNTL */ + + /* 0x10000000 for BYPASS.. 0x7c13c080 for GMEM: */ + fd_wfi(ctx->batch, ring); + OUT_PKT4(ring, REG_A5XX_RB_CCU_CNTL, 1); + OUT_RING(ring, 0x10000000); /* RB_CCU_CNTL */ + + OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1); + OUT_RING(ring, A5XX_RB_CNTL_BYPASS); +} + static void fd5_launch_grid(struct fd_context *ctx, const struct pipe_grid_info *info) { @@ -128,6 +157,8 @@ fd5_launch_grid(struct fd_context *ctx, const struct pipe_grid_info *info) struct ir3_shader_variant *v; struct fd_ringbuffer *ring = ctx->batch->draw; + emit_setup(ctx); + v = ir3_shader_variant(so->shader, key, &ctx->debug); if (ctx->dirty_shader[PIPE_SHADER_COMPUTE] & FD_DIRTY_SHADER_PROG) diff --git a/src/gallium/drivers/freedreno/freedreno_batch.c b/src/gallium/drivers/freedreno/freedreno_batch.c index 6e208d350ea..e44e8e5403e 100644 --- a/src/gallium/drivers/freedreno/freedreno_batch.c +++ b/src/gallium/drivers/freedreno/freedreno_batch.c @@ -55,12 +55,16 @@ batch_init(struct fd_batch *batch) } batch->draw = fd_ringbuffer_new(ctx->pipe, size); - batch->binning = fd_ringbuffer_new(ctx->pipe, size); - batch->gmem = fd_ringbuffer_new(ctx->pipe, size); + if (!batch->nondraw) { + batch->binning = fd_ringbuffer_new(ctx->pipe, size); + batch->gmem = fd_ringbuffer_new(ctx->pipe, size); - fd_ringbuffer_set_parent(batch->gmem, NULL); - fd_ringbuffer_set_parent(batch->draw, batch->gmem); - fd_ringbuffer_set_parent(batch->binning, batch->gmem); + fd_ringbuffer_set_parent(batch->gmem, NULL); + fd_ringbuffer_set_parent(batch->draw, batch->gmem); + fd_ringbuffer_set_parent(batch->binning, batch->gmem); + } else { + fd_ringbuffer_set_parent(batch->draw, NULL); + } batch->in_fence_fd = -1; batch->fence = fd_fence_create(batch); @@ -89,7 +93,7 @@ batch_init(struct fd_batch *batch) } struct fd_batch * -fd_batch_create(struct fd_context *ctx) +fd_batch_create(struct fd_context *ctx, bool nondraw) { struct fd_batch *batch = CALLOC_STRUCT(fd_batch); @@ -100,6 +104,7 @@ fd_batch_create(struct fd_context *ctx) pipe_reference_init(&batch->reference, 1); batch->ctx = ctx; + batch->nondraw = nondraw; batch->resources = _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal); @@ -123,8 +128,13 @@ batch_fini(struct fd_batch *batch) fd_fence_ref(NULL, &batch->fence, NULL); fd_ringbuffer_del(batch->draw); - fd_ringbuffer_del(batch->binning); - fd_ringbuffer_del(batch->gmem); + if (!batch->nondraw) { + fd_ringbuffer_del(batch->binning); + fd_ringbuffer_del(batch->gmem); + } else { + debug_assert(!batch->binning); + debug_assert(!batch->gmem); + } if (batch->lrz_clear) { fd_ringbuffer_del(batch->lrz_clear); batch->lrz_clear = NULL; @@ -326,6 +336,7 @@ fd_batch_flush(struct fd_batch *batch, bool sync, bool force) * up used_resources */ struct fd_batch *tmp = NULL; + fd_batch_reference(&tmp, batch); batch_flush(tmp, force); if (sync) diff --git a/src/gallium/drivers/freedreno/freedreno_batch.h b/src/gallium/drivers/freedreno/freedreno_batch.h index d69ff6f80b6..56665b70390 100644 --- a/src/gallium/drivers/freedreno/freedreno_batch.h +++ b/src/gallium/drivers/freedreno/freedreno_batch.h @@ -93,6 +93,8 @@ struct fd_batch { FD_BUFFER_ALL = FD_BUFFER_COLOR | FD_BUFFER_DEPTH | FD_BUFFER_STENCIL, } cleared, partial_cleared, restore, resolve; + /* is this a non-draw batch (ie compute/blit which has no pfb state)? */ + bool nondraw : 1; bool needs_flush : 1; bool blit : 1; bool back_blit : 1; /* only blit so far is resource shadowing back-blit */ @@ -202,7 +204,7 @@ struct fd_batch { uint32_t dependents_mask; }; -struct fd_batch * fd_batch_create(struct fd_context *ctx); +struct fd_batch * fd_batch_create(struct fd_context *ctx, bool nondraw); void fd_batch_reset(struct fd_batch *batch); void fd_batch_sync(struct fd_batch *batch); diff --git a/src/gallium/drivers/freedreno/freedreno_batch_cache.c b/src/gallium/drivers/freedreno/freedreno_batch_cache.c index ae48b912e59..b3a6041eead 100644 --- a/src/gallium/drivers/freedreno/freedreno_batch_cache.c +++ b/src/gallium/drivers/freedreno/freedreno_batch_cache.c @@ -316,7 +316,7 @@ fd_bc_alloc_batch(struct fd_batch_cache *cache, struct fd_context *ctx) idx--; /* bit zero returns 1 for ffs() */ - batch = fd_batch_create(ctx); + batch = fd_batch_create(ctx, false); if (!batch) goto out; diff --git a/src/gallium/drivers/freedreno/freedreno_context.c b/src/gallium/drivers/freedreno/freedreno_context.c index 0ec81f882da..3da058dcdef 100644 --- a/src/gallium/drivers/freedreno/freedreno_context.c +++ b/src/gallium/drivers/freedreno/freedreno_context.c @@ -155,9 +155,10 @@ fd_context_destroy(struct pipe_context *pctx) fd_pipe_del(ctx->pipe); if (fd_mesa_debug & (FD_DBG_BSTAT | FD_DBG_MSGS)) { - printf("batch_total=%u, batch_sysmem=%u, batch_gmem=%u, batch_restore=%u\n", + printf("batch_total=%u, batch_sysmem=%u, batch_gmem=%u, batch_nondraw=%u, batch_restore=%u\n", (uint32_t)ctx->stats.batch_total, (uint32_t)ctx->stats.batch_sysmem, - (uint32_t)ctx->stats.batch_gmem, (uint32_t)ctx->stats.batch_restore); + (uint32_t)ctx->stats.batch_gmem, (uint32_t)ctx->stats.batch_nondraw, + (uint32_t)ctx->stats.batch_restore); } FREE(ctx); diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h index d8d23ad8539..02656e82519 100644 --- a/src/gallium/drivers/freedreno/freedreno_context.h +++ b/src/gallium/drivers/freedreno/freedreno_context.h @@ -215,7 +215,7 @@ struct fd_context { uint64_t prims_emitted; uint64_t prims_generated; uint64_t draw_calls; - uint64_t batch_total, batch_sysmem, batch_gmem, batch_restore; + uint64_t batch_total, batch_sysmem, batch_gmem, batch_nondraw, batch_restore; uint64_t staging_uploads, shadow_uploads; } stats; @@ -304,7 +304,7 @@ struct fd_context { /* draw: */ bool (*draw_vbo)(struct fd_context *ctx, const struct pipe_draw_info *info, - unsigned index_offset); + unsigned index_offset); bool (*clear)(struct fd_context *ctx, unsigned buffers, const union pipe_color_union *color, double depth, unsigned stencil); diff --git a/src/gallium/drivers/freedreno/freedreno_draw.c b/src/gallium/drivers/freedreno/freedreno_draw.c index d3bf3165276..933481e742d 100644 --- a/src/gallium/drivers/freedreno/freedreno_draw.c +++ b/src/gallium/drivers/freedreno/freedreno_draw.c @@ -459,7 +459,7 @@ fd_launch_grid(struct pipe_context *pctx, const struct pipe_grid_info *info) struct fd_batch *batch, *save_batch = NULL; unsigned i; - batch = fd_batch_create(ctx); + batch = fd_batch_create(ctx, true); fd_batch_reference(&save_batch, ctx->batch); fd_batch_reference(&ctx->batch, batch); @@ -493,9 +493,10 @@ fd_launch_grid(struct pipe_context *pctx, const struct pipe_grid_info *info) mtx_unlock(&ctx->screen->lock); + batch->needs_flush = true; ctx->launch_grid(ctx, info); - fd_gmem_flush_compute(batch); + fd_batch_flush(batch, false, false); fd_batch_reference(&ctx->batch, save_batch); fd_batch_reference(&save_batch, NULL); diff --git a/src/gallium/drivers/freedreno/freedreno_gmem.c b/src/gallium/drivers/freedreno/freedreno_gmem.c index 79fdb1102c3..37a2f33365d 100644 --- a/src/gallium/drivers/freedreno/freedreno_gmem.c +++ b/src/gallium/drivers/freedreno/freedreno_gmem.c @@ -372,13 +372,15 @@ render_sysmem(struct fd_batch *batch) static void flush_ring(struct fd_batch *batch) { + /* for compute/blit batch, there is no batch->gmem, only batch->draw: */ + struct fd_ringbuffer *ring = batch->nondraw ? batch->draw : batch->gmem; uint32_t timestamp; int out_fence_fd = -1; - fd_ringbuffer_flush2(batch->gmem, batch->in_fence_fd, + fd_ringbuffer_flush2(ring, batch->in_fence_fd, batch->needs_out_fence_fd ? &out_fence_fd : NULL); - timestamp = fd_ringbuffer_timestamp(batch->gmem); + timestamp = fd_ringbuffer_timestamp(ring); fd_fence_populate(batch->fence, timestamp, out_fence_fd); } @@ -389,8 +391,9 @@ fd_gmem_render_tiles(struct fd_batch *batch) struct pipe_framebuffer_state *pfb = &batch->framebuffer; bool sysmem = false; - if (ctx->emit_sysmem_prep) { - if (batch->cleared || batch->gmem_reason || (batch->num_draws > 5)) { + if (ctx->emit_sysmem_prep && !batch->nondraw) { + if (batch->cleared || batch->gmem_reason || + ((batch->num_draws > 5) && !batch->blit)) { DBG("GMEM: cleared=%x, gmem_reason=%x, num_draws=%u", batch->cleared, batch->gmem_reason, batch->num_draws); } else if (!(fd_mesa_debug & FD_DBG_NOBYPASS)) { @@ -407,7 +410,10 @@ fd_gmem_render_tiles(struct fd_batch *batch) ctx->stats.batch_total++; - if (sysmem) { + if (batch->nondraw) { + DBG("%p: rendering non-draw", batch); + ctx->stats.batch_nondraw++; + } else if (sysmem) { DBG("%p: rendering sysmem %ux%u (%s/%s)", batch, pfb->width, pfb->height, util_format_short_name(pipe_surface_format(pfb->cbufs[0])), @@ -447,13 +453,6 @@ fd_gmem_render_noop(struct fd_batch *batch) flush_ring(batch); } -void -fd_gmem_flush_compute(struct fd_batch *batch) -{ - render_sysmem(batch); - flush_ring(batch); -} - /* tile needs restore if it isn't completely contained within the * cleared scissor: */ diff --git a/src/gallium/drivers/freedreno/freedreno_gmem.h b/src/gallium/drivers/freedreno/freedreno_gmem.h index f5276ce4481..07e13f57328 100644 --- a/src/gallium/drivers/freedreno/freedreno_gmem.h +++ b/src/gallium/drivers/freedreno/freedreno_gmem.h @@ -64,7 +64,6 @@ struct fd_batch; void fd_gmem_render_tiles(struct fd_batch *batch); void fd_gmem_render_noop(struct fd_batch *batch); -void fd_gmem_flush_compute(struct fd_batch *batch); bool fd_gmem_needs_restore(struct fd_batch *batch, struct fd_tile *tile, uint32_t buffers); diff --git a/src/gallium/drivers/freedreno/freedreno_query.c b/src/gallium/drivers/freedreno/freedreno_query.c index d76994e5d9e..2a809a3304e 100644 --- a/src/gallium/drivers/freedreno/freedreno_query.c +++ b/src/gallium/drivers/freedreno/freedreno_query.c @@ -127,6 +127,7 @@ fd_get_driver_query_info(struct pipe_screen *pscreen, {"batches", FD_QUERY_BATCH_TOTAL, {0}}, {"batches-sysmem", FD_QUERY_BATCH_SYSMEM, {0}}, {"batches-gmem", FD_QUERY_BATCH_GMEM, {0}}, + {"batches-nondraw", FD_QUERY_BATCH_NONDRAW, {0}}, {"restores", FD_QUERY_BATCH_RESTORE, {0}}, {"prims-emitted", PIPE_QUERY_PRIMITIVES_EMITTED, {0}}, {"staging", FD_QUERY_STAGING_UPLOADS, {0}}, diff --git a/src/gallium/drivers/freedreno/freedreno_query.h b/src/gallium/drivers/freedreno/freedreno_query.h index 78717c68701..296c3567939 100644 --- a/src/gallium/drivers/freedreno/freedreno_query.h +++ b/src/gallium/drivers/freedreno/freedreno_query.h @@ -60,9 +60,10 @@ fd_query(struct pipe_query *pq) #define FD_QUERY_BATCH_TOTAL (PIPE_QUERY_DRIVER_SPECIFIC + 1) /* total # of batches (submits) */ #define FD_QUERY_BATCH_SYSMEM (PIPE_QUERY_DRIVER_SPECIFIC + 2) /* batches using system memory (GMEM bypass) */ #define FD_QUERY_BATCH_GMEM (PIPE_QUERY_DRIVER_SPECIFIC + 3) /* batches using GMEM */ -#define FD_QUERY_BATCH_RESTORE (PIPE_QUERY_DRIVER_SPECIFIC + 4) /* batches requiring GMEM restore */ -#define FD_QUERY_STAGING_UPLOADS (PIPE_QUERY_DRIVER_SPECIFIC + 5) /* texture/buffer uploads using staging blit */ -#define FD_QUERY_SHADOW_UPLOADS (PIPE_QUERY_DRIVER_SPECIFIC + 6) /* texture/buffer uploads that shadowed rsc */ +#define FD_QUERY_BATCH_NONDRAW (PIPE_QUERY_DRIVER_SPECIFIC + 4) /* compute/blit batches */ +#define FD_QUERY_BATCH_RESTORE (PIPE_QUERY_DRIVER_SPECIFIC + 5) /* batches requiring GMEM restore */ +#define FD_QUERY_STAGING_UPLOADS (PIPE_QUERY_DRIVER_SPECIFIC + 6) /* texture/buffer uploads using staging blit */ +#define FD_QUERY_SHADOW_UPLOADS (PIPE_QUERY_DRIVER_SPECIFIC + 7) /* texture/buffer uploads that shadowed rsc */ void fd_query_screen_init(struct pipe_screen *pscreen); void fd_query_context_init(struct pipe_context *pctx); diff --git a/src/gallium/drivers/freedreno/freedreno_query_sw.c b/src/gallium/drivers/freedreno/freedreno_query_sw.c index 93da2dc08e1..080b2b17a2e 100644 --- a/src/gallium/drivers/freedreno/freedreno_query_sw.c +++ b/src/gallium/drivers/freedreno/freedreno_query_sw.c @@ -65,6 +65,8 @@ read_counter(struct fd_context *ctx, int type) return ctx->stats.batch_sysmem; case FD_QUERY_BATCH_GMEM: return ctx->stats.batch_gmem; + case FD_QUERY_BATCH_NONDRAW: + return ctx->stats.batch_nondraw; case FD_QUERY_BATCH_RESTORE: return ctx->stats.batch_restore; case FD_QUERY_STAGING_UPLOADS: @@ -82,6 +84,7 @@ is_rate_query(struct fd_query *q) case FD_QUERY_BATCH_TOTAL: case FD_QUERY_BATCH_SYSMEM: case FD_QUERY_BATCH_GMEM: + case FD_QUERY_BATCH_NONDRAW: case FD_QUERY_BATCH_RESTORE: case FD_QUERY_STAGING_UPLOADS: case FD_QUERY_SHADOW_UPLOADS: @@ -147,6 +150,7 @@ fd_sw_create_query(struct fd_context *ctx, unsigned query_type) case FD_QUERY_BATCH_TOTAL: case FD_QUERY_BATCH_SYSMEM: case FD_QUERY_BATCH_GMEM: + case FD_QUERY_BATCH_NONDRAW: case FD_QUERY_BATCH_RESTORE: case FD_QUERY_STAGING_UPLOADS: case FD_QUERY_SHADOW_UPLOADS: -- 2.30.2