freedreno: dynamically sized/growable cmd buffers
authorRob Clark <robclark@freedesktop.org>
Tue, 21 Jun 2016 16:39:32 +0000 (12:39 -0400)
committerRob Clark <robdclark@gmail.com>
Sat, 30 Jul 2016 13:23:42 +0000 (09:23 -0400)
Signed-off-by: Rob Clark <robdclark@gmail.com>
src/gallium/drivers/freedreno/freedreno_batch.c
src/gallium/drivers/freedreno/freedreno_util.h

index 51a61d96e029b55c67a4b6db8913f40f91d36bfb..6d17a422fc44ef91ae547f6c2085455b1f167083 100644 (file)
@@ -36,6 +36,7 @@ fd_batch_create(struct fd_context *ctx)
 {
        struct fd_batch *batch = CALLOC_STRUCT(fd_batch);
        static unsigned seqno = 0;
+       unsigned size = 0;
 
        if (!batch)
                return NULL;
@@ -44,13 +45,18 @@ fd_batch_create(struct fd_context *ctx)
        batch->seqno = ++seqno;
        batch->ctx = ctx;
 
-       /* TODO how to pick a good size?  Or maybe we should introduce
-        * fd_ringlist?  Also, make sure size is aligned with bo-cache
-        * bucket size, since otherwise that will round up size..
+       /* if kernel is too old to support unlimited # of cmd buffers, we
+        * have no option but to allocate large worst-case sizes so that
+        * we don't need to grow the ringbuffer.  Performance is likely to
+        * suffer, but there is no good alternative.
         */
-       batch->draw    = fd_ringbuffer_new(ctx->screen->pipe, 0x10000);
-       batch->binning = fd_ringbuffer_new(ctx->screen->pipe, 0x10000);
-       batch->gmem    = fd_ringbuffer_new(ctx->screen->pipe, 0x10000);
+       if (fd_device_version(ctx->screen->dev) < FD_VERSION_UNLIMITED_CMDS) {
+               size = 0x100000;
+       }
+
+       batch->draw    = fd_ringbuffer_new(ctx->screen->pipe, size);
+       batch->binning = fd_ringbuffer_new(ctx->screen->pipe, size);
+       batch->gmem    = fd_ringbuffer_new(ctx->screen->pipe, size);
 
        fd_ringbuffer_set_parent(batch->gmem, NULL);
        fd_ringbuffer_set_parent(batch->draw, batch->gmem);
@@ -117,10 +123,9 @@ fd_batch_resource_used(struct fd_batch *batch, struct fd_resource *rsc,
 void
 fd_batch_check_size(struct fd_batch *batch)
 {
-       /* TODO eventually support having a list of draw/binning rb's
-        * and if we are too close to the end, add another to the
-        * list.  For now we just flush.
-        */
+       if (fd_device_version(batch->ctx->screen->dev) >= FD_VERSION_UNLIMITED_CMDS)
+               return;
+
        struct fd_ringbuffer *ring = batch->draw;
        if (((ring->cur - ring->start) > (ring->size/4 - 0x1000)) ||
                        (fd_mesa_debug & FD_DBG_FLUSH))
index b6b91f92c7c91b550e7979f40197c0ad220e4d0d..8f125d95554fba5745df28970578aaca3b0b721c 100644 (file)
@@ -238,13 +238,8 @@ OUT_RELOCW(struct fd_ringbuffer *ring, struct fd_bo *bo,
 
 static inline void BEGIN_RING(struct fd_ringbuffer *ring, uint32_t ndwords)
 {
-       if ((ring->cur + ndwords) >= ring->end) {
-               /* this probably won't really work if we have multiple tiles..
-                * but it is ok for 2d..  we might need different behavior
-                * depending on 2d or 3d pipe.
-                */
-               DBG("uh oh..");
-       }
+       if (ring->cur + ndwords >= ring->end)
+               fd_ringbuffer_grow(ring, ndwords);
 }
 
 static inline void
@@ -254,6 +249,13 @@ OUT_PKT0(struct fd_ringbuffer *ring, uint16_t regindx, uint16_t cnt)
        OUT_RING(ring, CP_TYPE0_PKT | ((cnt-1) << 16) | (regindx & 0x7FFF));
 }
 
+static inline void
+OUT_PKT2(struct fd_ringbuffer *ring)
+{
+       BEGIN_RING(ring, 1);
+       OUT_RING(ring, CP_TYPE2_PKT);
+}
+
 static inline void
 OUT_PKT3(struct fd_ringbuffer *ring, uint8_t opcode, uint16_t cnt)
 {
@@ -271,9 +273,7 @@ OUT_WFI(struct fd_ringbuffer *ring)
 static inline void
 __OUT_IB(struct fd_ringbuffer *ring, bool prefetch, struct fd_ringbuffer *target)
 {
-       uint32_t dwords = target->cur - target->start;
-
-       assert(dwords > 0);
+       unsigned count = fd_ringbuffer_cmd_count(target);
 
        /* for debug after a lock up, write a unique counter value
         * to scratch6 for each IB, to make it easier to match up
@@ -283,9 +283,14 @@ __OUT_IB(struct fd_ringbuffer *ring, bool prefetch, struct fd_ringbuffer *target
         */
        emit_marker(ring, 6);
 
-       OUT_PKT3(ring, prefetch ? CP_INDIRECT_BUFFER_PFE : CP_INDIRECT_BUFFER_PFD, 2);
-       fd_ringbuffer_emit_reloc_ring_full(ring, target, 0);
-       OUT_RING(ring, dwords);
+       for (unsigned i = 0; i < count; i++) {
+               uint32_t dwords;
+               OUT_PKT3(ring, prefetch ? CP_INDIRECT_BUFFER_PFE : CP_INDIRECT_BUFFER_PFD, 2);
+               dwords = fd_ringbuffer_emit_reloc_ring_full(ring, target, i) / 4;
+               assert(dwords > 0);
+               OUT_RING(ring, dwords);
+               OUT_PKT2(ring);
+       }
 
        emit_marker(ring, 6);
 }