From eeafaf2d37cdc7f83f997e8babd8f770243ecf25 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Tue, 21 Jun 2016 12:39:32 -0400 Subject: [PATCH] freedreno: dynamically sized/growable cmd buffers Signed-off-by: Rob Clark --- .../drivers/freedreno/freedreno_batch.c | 25 +++++++++------ .../drivers/freedreno/freedreno_util.h | 31 +++++++++++-------- 2 files changed, 33 insertions(+), 23 deletions(-) diff --git a/src/gallium/drivers/freedreno/freedreno_batch.c b/src/gallium/drivers/freedreno/freedreno_batch.c index 51a61d96e02..6d17a422fc4 100644 --- a/src/gallium/drivers/freedreno/freedreno_batch.c +++ b/src/gallium/drivers/freedreno/freedreno_batch.c @@ -36,6 +36,7 @@ fd_batch_create(struct fd_context *ctx) { struct fd_batch *batch = CALLOC_STRUCT(fd_batch); static unsigned seqno = 0; + unsigned size = 0; if (!batch) return NULL; @@ -44,13 +45,18 @@ fd_batch_create(struct fd_context *ctx) batch->seqno = ++seqno; batch->ctx = ctx; - /* TODO how to pick a good size? Or maybe we should introduce - * fd_ringlist? Also, make sure size is aligned with bo-cache - * bucket size, since otherwise that will round up size.. + /* if kernel is too old to support unlimited # of cmd buffers, we + * have no option but to allocate large worst-case sizes so that + * we don't need to grow the ringbuffer. Performance is likely to + * suffer, but there is no good alternative. */ - batch->draw = fd_ringbuffer_new(ctx->screen->pipe, 0x10000); - batch->binning = fd_ringbuffer_new(ctx->screen->pipe, 0x10000); - batch->gmem = fd_ringbuffer_new(ctx->screen->pipe, 0x10000); + if (fd_device_version(ctx->screen->dev) < FD_VERSION_UNLIMITED_CMDS) { + size = 0x100000; + } + + batch->draw = fd_ringbuffer_new(ctx->screen->pipe, size); + batch->binning = fd_ringbuffer_new(ctx->screen->pipe, size); + batch->gmem = fd_ringbuffer_new(ctx->screen->pipe, size); fd_ringbuffer_set_parent(batch->gmem, NULL); fd_ringbuffer_set_parent(batch->draw, batch->gmem); @@ -117,10 +123,9 @@ fd_batch_resource_used(struct fd_batch *batch, struct fd_resource *rsc, void fd_batch_check_size(struct fd_batch *batch) { - /* TODO eventually support having a list of draw/binning rb's - * and if we are too close to the end, add another to the - * list. For now we just flush. - */ + if (fd_device_version(batch->ctx->screen->dev) >= FD_VERSION_UNLIMITED_CMDS) + return; + struct fd_ringbuffer *ring = batch->draw; if (((ring->cur - ring->start) > (ring->size/4 - 0x1000)) || (fd_mesa_debug & FD_DBG_FLUSH)) diff --git a/src/gallium/drivers/freedreno/freedreno_util.h b/src/gallium/drivers/freedreno/freedreno_util.h index b6b91f92c7c..8f125d95554 100644 --- a/src/gallium/drivers/freedreno/freedreno_util.h +++ b/src/gallium/drivers/freedreno/freedreno_util.h @@ -238,13 +238,8 @@ OUT_RELOCW(struct fd_ringbuffer *ring, struct fd_bo *bo, static inline void BEGIN_RING(struct fd_ringbuffer *ring, uint32_t ndwords) { - if ((ring->cur + ndwords) >= ring->end) { - /* this probably won't really work if we have multiple tiles.. - * but it is ok for 2d.. we might need different behavior - * depending on 2d or 3d pipe. - */ - DBG("uh oh.."); - } + if (ring->cur + ndwords >= ring->end) + fd_ringbuffer_grow(ring, ndwords); } static inline void @@ -254,6 +249,13 @@ OUT_PKT0(struct fd_ringbuffer *ring, uint16_t regindx, uint16_t cnt) OUT_RING(ring, CP_TYPE0_PKT | ((cnt-1) << 16) | (regindx & 0x7FFF)); } +static inline void +OUT_PKT2(struct fd_ringbuffer *ring) +{ + BEGIN_RING(ring, 1); + OUT_RING(ring, CP_TYPE2_PKT); +} + static inline void OUT_PKT3(struct fd_ringbuffer *ring, uint8_t opcode, uint16_t cnt) { @@ -271,9 +273,7 @@ OUT_WFI(struct fd_ringbuffer *ring) static inline void __OUT_IB(struct fd_ringbuffer *ring, bool prefetch, struct fd_ringbuffer *target) { - uint32_t dwords = target->cur - target->start; - - assert(dwords > 0); + unsigned count = fd_ringbuffer_cmd_count(target); /* for debug after a lock up, write a unique counter value * to scratch6 for each IB, to make it easier to match up @@ -283,9 +283,14 @@ __OUT_IB(struct fd_ringbuffer *ring, bool prefetch, struct fd_ringbuffer *target */ emit_marker(ring, 6); - OUT_PKT3(ring, prefetch ? CP_INDIRECT_BUFFER_PFE : CP_INDIRECT_BUFFER_PFD, 2); - fd_ringbuffer_emit_reloc_ring_full(ring, target, 0); - OUT_RING(ring, dwords); + for (unsigned i = 0; i < count; i++) { + uint32_t dwords; + OUT_PKT3(ring, prefetch ? CP_INDIRECT_BUFFER_PFE : CP_INDIRECT_BUFFER_PFD, 2); + dwords = fd_ringbuffer_emit_reloc_ring_full(ring, target, i) / 4; + assert(dwords > 0); + OUT_RING(ring, dwords); + OUT_PKT2(ring); + } emit_marker(ring, 6); } -- 2.30.2