freedreno: add non-draw batches for compute/blit
authorRob Clark <robdclark@gmail.com>
Fri, 24 Nov 2017 15:37:22 +0000 (10:37 -0500)
committerRob Clark <robdclark@gmail.com>
Sun, 17 Dec 2017 17:41:32 +0000 (12:41 -0500)
Get rid of "gmem" (ie. tiling) ringbuffer, and just emit setup commands
directly to "draw" ringbuffer for compute (and in future for blits not
using the 3d pipe).  This way we can have a simple flat cmdstream buffer
and bypass setup related to 3d pipe.

Signed-off-by: Rob Clark <robdclark@gmail.com>
12 files changed:
src/gallium/drivers/freedreno/a5xx/fd5_compute.c
src/gallium/drivers/freedreno/freedreno_batch.c
src/gallium/drivers/freedreno/freedreno_batch.h
src/gallium/drivers/freedreno/freedreno_batch_cache.c
src/gallium/drivers/freedreno/freedreno_context.c
src/gallium/drivers/freedreno/freedreno_context.h
src/gallium/drivers/freedreno/freedreno_draw.c
src/gallium/drivers/freedreno/freedreno_gmem.c
src/gallium/drivers/freedreno/freedreno_gmem.h
src/gallium/drivers/freedreno/freedreno_query.c
src/gallium/drivers/freedreno/freedreno_query.h
src/gallium/drivers/freedreno/freedreno_query_sw.c

index f9fb599e78555989e940df1a78a1b21de2780e7d..b7f596f336a8e010fde9a5c5958f46f4a7f4ea57 100644 (file)
@@ -120,6 +120,35 @@ cs_program_emit(struct fd_ringbuffer *ring, struct ir3_shader_variant *v)
        fd5_emit_shader(ring, v);
 }
 
+static void
+emit_setup(struct fd_context *ctx)
+{
+       struct fd_ringbuffer *ring = ctx->batch->draw;
+
+       fd5_emit_restore(ctx->batch, ring);
+       fd5_emit_lrz_flush(ring);
+
+       OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
+       OUT_RING(ring, 0x0);
+
+       OUT_PKT7(ring, CP_EVENT_WRITE, 1);
+       OUT_RING(ring, UNK_19);
+
+       OUT_PKT4(ring, REG_A5XX_PC_POWER_CNTL, 1);
+       OUT_RING(ring, 0x00000003);   /* PC_POWER_CNTL */
+
+       OUT_PKT4(ring, REG_A5XX_VFD_POWER_CNTL, 1);
+       OUT_RING(ring, 0x00000003);   /* VFD_POWER_CNTL */
+
+       /* 0x10000000 for BYPASS.. 0x7c13c080 for GMEM: */
+       fd_wfi(ctx->batch, ring);
+       OUT_PKT4(ring, REG_A5XX_RB_CCU_CNTL, 1);
+       OUT_RING(ring, 0x10000000);   /* RB_CCU_CNTL */
+
+       OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1);
+       OUT_RING(ring, A5XX_RB_CNTL_BYPASS);
+}
+
 static void
 fd5_launch_grid(struct fd_context *ctx, const struct pipe_grid_info *info)
 {
@@ -128,6 +157,8 @@ fd5_launch_grid(struct fd_context *ctx, const struct pipe_grid_info *info)
        struct ir3_shader_variant *v;
        struct fd_ringbuffer *ring = ctx->batch->draw;
 
+       emit_setup(ctx);
+
        v = ir3_shader_variant(so->shader, key, &ctx->debug);
 
        if (ctx->dirty_shader[PIPE_SHADER_COMPUTE] & FD_DIRTY_SHADER_PROG)
index 6e208d350ea2368dad78f6882559fced485e7bf6..e44e8e5403e45ef8ec143c4768381781deb44848 100644 (file)
@@ -55,12 +55,16 @@ batch_init(struct fd_batch *batch)
        }
 
        batch->draw    = fd_ringbuffer_new(ctx->pipe, size);
-       batch->binning = fd_ringbuffer_new(ctx->pipe, size);
-       batch->gmem    = fd_ringbuffer_new(ctx->pipe, size);
+       if (!batch->nondraw) {
+               batch->binning = fd_ringbuffer_new(ctx->pipe, size);
+               batch->gmem    = fd_ringbuffer_new(ctx->pipe, size);
 
-       fd_ringbuffer_set_parent(batch->gmem, NULL);
-       fd_ringbuffer_set_parent(batch->draw, batch->gmem);
-       fd_ringbuffer_set_parent(batch->binning, batch->gmem);
+               fd_ringbuffer_set_parent(batch->gmem, NULL);
+               fd_ringbuffer_set_parent(batch->draw, batch->gmem);
+               fd_ringbuffer_set_parent(batch->binning, batch->gmem);
+       } else {
+               fd_ringbuffer_set_parent(batch->draw, NULL);
+       }
 
        batch->in_fence_fd = -1;
        batch->fence = fd_fence_create(batch);
@@ -89,7 +93,7 @@ batch_init(struct fd_batch *batch)
 }
 
 struct fd_batch *
-fd_batch_create(struct fd_context *ctx)
+fd_batch_create(struct fd_context *ctx, bool nondraw)
 {
        struct fd_batch *batch = CALLOC_STRUCT(fd_batch);
 
@@ -100,6 +104,7 @@ fd_batch_create(struct fd_context *ctx)
 
        pipe_reference_init(&batch->reference, 1);
        batch->ctx = ctx;
+       batch->nondraw = nondraw;
 
        batch->resources = _mesa_set_create(NULL, _mesa_hash_pointer,
                        _mesa_key_pointer_equal);
@@ -123,8 +128,13 @@ batch_fini(struct fd_batch *batch)
        fd_fence_ref(NULL, &batch->fence, NULL);
 
        fd_ringbuffer_del(batch->draw);
-       fd_ringbuffer_del(batch->binning);
-       fd_ringbuffer_del(batch->gmem);
+       if (!batch->nondraw) {
+               fd_ringbuffer_del(batch->binning);
+               fd_ringbuffer_del(batch->gmem);
+       } else {
+               debug_assert(!batch->binning);
+               debug_assert(!batch->gmem);
+       }
        if (batch->lrz_clear) {
                fd_ringbuffer_del(batch->lrz_clear);
                batch->lrz_clear = NULL;
@@ -326,6 +336,7 @@ fd_batch_flush(struct fd_batch *batch, bool sync, bool force)
         * up used_resources
         */
        struct fd_batch *tmp = NULL;
+
        fd_batch_reference(&tmp, batch);
        batch_flush(tmp, force);
        if (sync)
index d69ff6f80b683c197a108b396a933b7f5403076a..56665b703900424f76122ac53dddd7a0f8af52f6 100644 (file)
@@ -93,6 +93,8 @@ struct fd_batch {
                FD_BUFFER_ALL     = FD_BUFFER_COLOR | FD_BUFFER_DEPTH | FD_BUFFER_STENCIL,
        } cleared, partial_cleared, restore, resolve;
 
+       /* is this a non-draw batch (ie compute/blit which has no pfb state)? */
+       bool nondraw : 1;
        bool needs_flush : 1;
        bool blit : 1;
        bool back_blit : 1;      /* only blit so far is resource shadowing back-blit */
@@ -202,7 +204,7 @@ struct fd_batch {
        uint32_t dependents_mask;
 };
 
-struct fd_batch * fd_batch_create(struct fd_context *ctx);
+struct fd_batch * fd_batch_create(struct fd_context *ctx, bool nondraw);
 
 void fd_batch_reset(struct fd_batch *batch);
 void fd_batch_sync(struct fd_batch *batch);
index ae48b912e59da1b1f68a821b44813e90c9520ad6..b3a6041eead5afe5d7390668860295affb4bc214 100644 (file)
@@ -316,7 +316,7 @@ fd_bc_alloc_batch(struct fd_batch_cache *cache, struct fd_context *ctx)
 
        idx--;              /* bit zero returns 1 for ffs() */
 
-       batch = fd_batch_create(ctx);
+       batch = fd_batch_create(ctx, false);
        if (!batch)
                goto out;
 
index 0ec81f882daf7341ceb88097a5323075225f3246..3da058dcdefd04b3e39638e8ec0fe23c992c8670 100644 (file)
@@ -155,9 +155,10 @@ fd_context_destroy(struct pipe_context *pctx)
        fd_pipe_del(ctx->pipe);
 
        if (fd_mesa_debug & (FD_DBG_BSTAT | FD_DBG_MSGS)) {
-               printf("batch_total=%u, batch_sysmem=%u, batch_gmem=%u, batch_restore=%u\n",
+               printf("batch_total=%u, batch_sysmem=%u, batch_gmem=%u, batch_nondraw=%u, batch_restore=%u\n",
                        (uint32_t)ctx->stats.batch_total, (uint32_t)ctx->stats.batch_sysmem,
-                       (uint32_t)ctx->stats.batch_gmem, (uint32_t)ctx->stats.batch_restore);
+                       (uint32_t)ctx->stats.batch_gmem, (uint32_t)ctx->stats.batch_nondraw,
+                       (uint32_t)ctx->stats.batch_restore);
        }
 
        FREE(ctx);
index d8d23ad8539c3312d33b3894748b4cf39464ef39..02656e82519726b19730dbe31a1396681731d365 100644 (file)
@@ -215,7 +215,7 @@ struct fd_context {
                uint64_t prims_emitted;
                uint64_t prims_generated;
                uint64_t draw_calls;
-               uint64_t batch_total, batch_sysmem, batch_gmem, batch_restore;
+               uint64_t batch_total, batch_sysmem, batch_gmem, batch_nondraw, batch_restore;
                uint64_t staging_uploads, shadow_uploads;
        } stats;
 
@@ -304,7 +304,7 @@ struct fd_context {
 
        /* draw: */
        bool (*draw_vbo)(struct fd_context *ctx, const struct pipe_draw_info *info,
-                         unsigned index_offset);
+                       unsigned index_offset);
        bool (*clear)(struct fd_context *ctx, unsigned buffers,
                        const union pipe_color_union *color, double depth, unsigned stencil);
 
index d3bf3165276191a616883c6279f3bea63969a5ec..933481e742d0416c36c61845a3b7c9c32c9076ae 100644 (file)
@@ -459,7 +459,7 @@ fd_launch_grid(struct pipe_context *pctx, const struct pipe_grid_info *info)
        struct fd_batch *batch, *save_batch = NULL;
        unsigned i;
 
-       batch = fd_batch_create(ctx);
+       batch = fd_batch_create(ctx, true);
        fd_batch_reference(&save_batch, ctx->batch);
        fd_batch_reference(&ctx->batch, batch);
 
@@ -493,9 +493,10 @@ fd_launch_grid(struct pipe_context *pctx, const struct pipe_grid_info *info)
 
        mtx_unlock(&ctx->screen->lock);
 
+       batch->needs_flush = true;
        ctx->launch_grid(ctx, info);
 
-       fd_gmem_flush_compute(batch);
+       fd_batch_flush(batch, false, false);
 
        fd_batch_reference(&ctx->batch, save_batch);
        fd_batch_reference(&save_batch, NULL);
index 79fdb1102c348ba818016f57cbde2c8f64bbfc65..37a2f33365daae527cb56fe5bf709f3e2a59527e 100644 (file)
@@ -372,13 +372,15 @@ render_sysmem(struct fd_batch *batch)
 static void
 flush_ring(struct fd_batch *batch)
 {
+       /* for compute/blit batch, there is no batch->gmem, only batch->draw: */
+       struct fd_ringbuffer *ring = batch->nondraw ? batch->draw : batch->gmem;
        uint32_t timestamp;
        int out_fence_fd = -1;
 
-       fd_ringbuffer_flush2(batch->gmem, batch->in_fence_fd,
+       fd_ringbuffer_flush2(ring, batch->in_fence_fd,
                        batch->needs_out_fence_fd ? &out_fence_fd : NULL);
 
-       timestamp = fd_ringbuffer_timestamp(batch->gmem);
+       timestamp = fd_ringbuffer_timestamp(ring);
        fd_fence_populate(batch->fence, timestamp, out_fence_fd);
 }
 
@@ -389,8 +391,9 @@ fd_gmem_render_tiles(struct fd_batch *batch)
        struct pipe_framebuffer_state *pfb = &batch->framebuffer;
        bool sysmem = false;
 
-       if (ctx->emit_sysmem_prep) {
-               if (batch->cleared || batch->gmem_reason || (batch->num_draws > 5)) {
+       if (ctx->emit_sysmem_prep && !batch->nondraw) {
+               if (batch->cleared || batch->gmem_reason ||
+                               ((batch->num_draws > 5) && !batch->blit)) {
                        DBG("GMEM: cleared=%x, gmem_reason=%x, num_draws=%u",
                                batch->cleared, batch->gmem_reason, batch->num_draws);
                } else if (!(fd_mesa_debug & FD_DBG_NOBYPASS)) {
@@ -407,7 +410,10 @@ fd_gmem_render_tiles(struct fd_batch *batch)
 
        ctx->stats.batch_total++;
 
-       if (sysmem) {
+       if (batch->nondraw) {
+               DBG("%p: rendering non-draw", batch);
+               ctx->stats.batch_nondraw++;
+       } else if (sysmem) {
                DBG("%p: rendering sysmem %ux%u (%s/%s)",
                        batch, pfb->width, pfb->height,
                        util_format_short_name(pipe_surface_format(pfb->cbufs[0])),
@@ -447,13 +453,6 @@ fd_gmem_render_noop(struct fd_batch *batch)
        flush_ring(batch);
 }
 
-void
-fd_gmem_flush_compute(struct fd_batch *batch)
-{
-       render_sysmem(batch);
-       flush_ring(batch);
-}
-
 /* tile needs restore if it isn't completely contained within the
  * cleared scissor:
  */
index f5276ce4481b6b7e96e0e40161b0b31e6e79f656..07e13f573288e7546192d5e157c7bd68795148c1 100644 (file)
@@ -64,7 +64,6 @@ struct fd_batch;
 
 void fd_gmem_render_tiles(struct fd_batch *batch);
 void fd_gmem_render_noop(struct fd_batch *batch);
-void fd_gmem_flush_compute(struct fd_batch *batch);
 
 bool fd_gmem_needs_restore(struct fd_batch *batch, struct fd_tile *tile,
                uint32_t buffers);
index d76994e5d9e96fd09d2158e11c6873b739f29e8a..2a809a3304ef1c2d5386896bf8be06e8b6e1f788 100644 (file)
@@ -127,6 +127,7 @@ fd_get_driver_query_info(struct pipe_screen *pscreen,
                        {"batches", FD_QUERY_BATCH_TOTAL, {0}},
                        {"batches-sysmem", FD_QUERY_BATCH_SYSMEM, {0}},
                        {"batches-gmem", FD_QUERY_BATCH_GMEM, {0}},
+                       {"batches-nondraw", FD_QUERY_BATCH_NONDRAW, {0}},
                        {"restores", FD_QUERY_BATCH_RESTORE, {0}},
                        {"prims-emitted", PIPE_QUERY_PRIMITIVES_EMITTED, {0}},
                        {"staging", FD_QUERY_STAGING_UPLOADS, {0}},
index 78717c687012ad6def88bade5e8dc6540573ecb5..296c3567939524a350e0df48677fb5eee0fdfba7 100644 (file)
@@ -60,9 +60,10 @@ fd_query(struct pipe_query *pq)
 #define FD_QUERY_BATCH_TOTAL     (PIPE_QUERY_DRIVER_SPECIFIC + 1)  /* total # of batches (submits) */
 #define FD_QUERY_BATCH_SYSMEM    (PIPE_QUERY_DRIVER_SPECIFIC + 2)  /* batches using system memory (GMEM bypass) */
 #define FD_QUERY_BATCH_GMEM      (PIPE_QUERY_DRIVER_SPECIFIC + 3)  /* batches using GMEM */
-#define FD_QUERY_BATCH_RESTORE   (PIPE_QUERY_DRIVER_SPECIFIC + 4)  /* batches requiring GMEM restore */
-#define FD_QUERY_STAGING_UPLOADS (PIPE_QUERY_DRIVER_SPECIFIC + 5)  /* texture/buffer uploads using staging blit */
-#define FD_QUERY_SHADOW_UPLOADS  (PIPE_QUERY_DRIVER_SPECIFIC + 6)  /* texture/buffer uploads that shadowed rsc */
+#define FD_QUERY_BATCH_NONDRAW   (PIPE_QUERY_DRIVER_SPECIFIC + 4)  /* compute/blit batches */
+#define FD_QUERY_BATCH_RESTORE   (PIPE_QUERY_DRIVER_SPECIFIC + 5)  /* batches requiring GMEM restore */
+#define FD_QUERY_STAGING_UPLOADS (PIPE_QUERY_DRIVER_SPECIFIC + 6)  /* texture/buffer uploads using staging blit */
+#define FD_QUERY_SHADOW_UPLOADS  (PIPE_QUERY_DRIVER_SPECIFIC + 7)  /* texture/buffer uploads that shadowed rsc */
 
 void fd_query_screen_init(struct pipe_screen *pscreen);
 void fd_query_context_init(struct pipe_context *pctx);
index 93da2dc08e1d8ebedf0642e35d356d08e55553ac..080b2b17a2e1461c73cbcc84fd434b6377fa7a0e 100644 (file)
@@ -65,6 +65,8 @@ read_counter(struct fd_context *ctx, int type)
                return ctx->stats.batch_sysmem;
        case FD_QUERY_BATCH_GMEM:
                return ctx->stats.batch_gmem;
+       case FD_QUERY_BATCH_NONDRAW:
+               return ctx->stats.batch_nondraw;
        case FD_QUERY_BATCH_RESTORE:
                return ctx->stats.batch_restore;
        case FD_QUERY_STAGING_UPLOADS:
@@ -82,6 +84,7 @@ is_rate_query(struct fd_query *q)
        case FD_QUERY_BATCH_TOTAL:
        case FD_QUERY_BATCH_SYSMEM:
        case FD_QUERY_BATCH_GMEM:
+       case FD_QUERY_BATCH_NONDRAW:
        case FD_QUERY_BATCH_RESTORE:
        case FD_QUERY_STAGING_UPLOADS:
        case FD_QUERY_SHADOW_UPLOADS:
@@ -147,6 +150,7 @@ fd_sw_create_query(struct fd_context *ctx, unsigned query_type)
        case FD_QUERY_BATCH_TOTAL:
        case FD_QUERY_BATCH_SYSMEM:
        case FD_QUERY_BATCH_GMEM:
+       case FD_QUERY_BATCH_NONDRAW:
        case FD_QUERY_BATCH_RESTORE:
        case FD_QUERY_STAGING_UPLOADS:
        case FD_QUERY_SHADOW_UPLOADS: