From: Rob Clark <robclark@freedesktop.org>
Date: Fri, 6 Sep 2013 17:20:46 +0000 (-0400)
Subject: freedreno: avoid stalling at ringbuffer wraparound
X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=6e9c386d16b3b38be6d1496758ef983b64744844;p=mesa.git

freedreno: avoid stalling at ringbuffer wraparound

Because of how the tiling works, we can't really flush at arbitrary
points very easily.  So wraparound is handled by resetting to top of
ringbuffer.  Previously this would stall until current rendering is
complete.  Instead cycle through multiple ringbuffers to avoid a stall.

Signed-off-by: Rob Clark <robclark@freedesktop.org>
---

diff --git a/src/gallium/drivers/freedreno/freedreno_context.c b/src/gallium/drivers/freedreno/freedreno_context.c
index 1d03351f041..96e1ef6e579 100644
--- a/src/gallium/drivers/freedreno/freedreno_context.c
+++ b/src/gallium/drivers/freedreno/freedreno_context.c
@@ -34,27 +34,32 @@
 #include "freedreno_gmem.h"
 #include "freedreno_util.h"
 
-/* there are two cases where we currently need to wait for render complete:
- * 1) pctx->flush() .. since at the moment we have no way for DDX to sync
- *    the presentation blit with the 3d core
- * 2) wrap-around for ringbuffer.. possibly we can do something more
- *    Intelligent here.  Right now we need to ensure there is enough room
- *    at the end of the drawcmds in the cmdstream buffer for all the per-
- *    tile cmds.  We do this the lamest way possible, by making the ringbuffer
- *    big, and flushing and resetting back to the beginning if we get too
- *    close to the end.
- */
 static void
-fd_context_wait(struct pipe_context *pctx)
+fd_context_next_rb(struct pipe_context *pctx)
 {
 	struct fd_context *ctx = fd_context(pctx);
-	uint32_t ts = fd_ringbuffer_timestamp(ctx->ring);
+	struct fd_ringbuffer *ring;
+	uint32_t ts;
+
+	fd_ringmarker_del(ctx->draw_start);
+	fd_ringmarker_del(ctx->draw_end);
+
+	/* grab next ringbuffer: */
+	ring = ctx->rings[(ctx->rings_idx++) % ARRAY_SIZE(ctx->rings)];
 
-	DBG("wait: %u", ts);
+	/* wait for new rb to be idle: */
+	ts = fd_ringbuffer_timestamp(ring);
+	if (ts) {
+		DBG("wait: %u", ts);
+		fd_pipe_wait(ctx->screen->pipe, ts);
+	}
+
+	fd_ringbuffer_reset(ring);
+
+	ctx->draw_start = fd_ringmarker_new(ring);
+	ctx->draw_end = fd_ringmarker_new(ring);
 
-	fd_pipe_wait(ctx->screen->pipe, ts);
-	fd_ringbuffer_reset(ctx->ring);
-	fd_ringmarker_mark(ctx->draw_start);
+	ctx->ring = ring;
 }
 
 /* emit accumulated render cmds, needed for example if render target has
@@ -79,7 +84,7 @@ fd_context_render(struct pipe_context *pctx)
 	 * wrap around:
 	 */
 	if ((ctx->ring->cur - ctx->ring->start) > ctx->ring->size/8)
-		fd_context_wait(pctx);
+		fd_context_next_rb(pctx);
 
 	ctx->needs_flush = false;
 	ctx->cleared = ctx->restore = ctx->resolve = 0;
@@ -131,6 +136,7 @@ fd_context_init(struct fd_context *ctx,
 {
 	struct fd_screen *screen = fd_screen(pscreen);
 	struct pipe_context *pctx;
+	int i;
 
 	ctx->screen = screen;
 
@@ -144,12 +150,13 @@ fd_context_init(struct fd_context *ctx,
 	pctx->priv = priv;
 	pctx->flush = fd_context_flush;
 
-	ctx->ring = fd_ringbuffer_new(screen->pipe, 0x100000);
-	if (!ctx->ring)
-		goto fail;
+	for (i = 0; i < ARRAY_SIZE(ctx->rings); i++) {
+		ctx->rings[i] = fd_ringbuffer_new(screen->pipe, 0x400000);
+		if (!ctx->rings[i])
+			goto fail;
+	}
 
-	ctx->draw_start = fd_ringmarker_new(ctx->ring);
-	ctx->draw_end = fd_ringmarker_new(ctx->ring);
+	fd_context_next_rb(pctx);
 
 	util_slab_create(&ctx->transfer_pool, sizeof(struct pipe_transfer),
 			16, UTIL_SLAB_SINGLETHREADED);
diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h
index 3d18260445f..808e3a72f02 100644
--- a/src/gallium/drivers/freedreno/freedreno_context.h
+++ b/src/gallium/drivers/freedreno/freedreno_context.h
@@ -134,6 +134,18 @@ struct fd_context {
 	} gmem_reason;
 	unsigned num_draws;
 
+	/* we can't really sanely deal with wraparound point in ringbuffer
+	 * and because of the way tiling works we can't really flush at
+	 * arbitrary points (without a big performance hit).  When we get
+	 * too close to the end of the current ringbuffer, cycle to the next
+	 * one (and wait for pending rendering from next rb to complete).
+	 * We want the # of ringbuffers to be high enough that we don't
+	 * normally have to wait before resetting to the start of the next
+	 * rb.
+	 */
+	struct fd_ringbuffer *rings[4];
+	unsigned rings_idx;
+
 	struct fd_ringbuffer *ring;
 	struct fd_ringmarker *draw_start, *draw_end;