X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Ffreedreno%2Ffreedreno_gmem.c;h=41fc1d0a0d4e109c83f5297903fa06144feb9a71;hb=7dee91610550c00eb2c5e01f1f55474dca9338bd;hp=bb15f0a3e164d529c4fddb02ad49fd24cf2f2ab6;hpb=f3cc0d2747568a186dba433ac94af607c38fa024;p=mesa.git

diff --git a/src/gallium/drivers/freedreno/freedreno_gmem.c b/src/gallium/drivers/freedreno/freedreno_gmem.c
index bb15f0a3e16..41fc1d0a0d4 100644
--- a/src/gallium/drivers/freedreno/freedreno_gmem.c
+++ b/src/gallium/drivers/freedreno/freedreno_gmem.c
@@ -77,24 +77,25 @@ static uint32_t bin_width(struct fd_screen *screen)
 
 static uint32_t
 total_size(uint8_t cbuf_cpp[], uint8_t zsbuf_cpp[2],
-		   uint32_t bin_w, uint32_t bin_h, struct fd_gmem_stateobj *gmem)
+		   uint32_t bin_w, uint32_t bin_h, uint32_t gmem_align,
+		   struct fd_gmem_stateobj *gmem)
 {
 	uint32_t total = 0, i;
 
 	for (i = 0; i < MAX_RENDER_TARGETS; i++) {
 		if (cbuf_cpp[i]) {
-			gmem->cbuf_base[i] = align(total, 0x4000);
+			gmem->cbuf_base[i] = align(total, gmem_align);
 			total = gmem->cbuf_base[i] + cbuf_cpp[i] * bin_w * bin_h;
 		}
 	}
 
 	if (zsbuf_cpp[0]) {
-		gmem->zsbuf_base[0] = align(total, 0x4000);
+		gmem->zsbuf_base[0] = align(total, gmem_align);
 		total = gmem->zsbuf_base[0] + zsbuf_cpp[0] * bin_w * bin_h;
 	}
 
 	if (zsbuf_cpp[1]) {
-		gmem->zsbuf_base[1] = align(total, 0x4000);
+		gmem->zsbuf_base[1] = align(total, gmem_align);
 		total = gmem->zsbuf_base[1] + zsbuf_cpp[1] * bin_w * bin_h;
 	}
 
@@ -116,11 +117,13 @@ calculate_tiles(struct fd_batch *batch)
 	uint32_t minx, miny, width, height;
 	uint32_t nbins_x = 1, nbins_y = 1;
 	uint32_t bin_w, bin_h;
+	uint32_t gmem_align = 0x4000;
 	uint32_t max_width = bin_width(screen);
 	uint8_t cbuf_cpp[MAX_RENDER_TARGETS] = {0}, zsbuf_cpp[2] = {0};
 	uint32_t i, j, t, xoff, yoff;
 	uint32_t tpp_x, tpp_y;
-	bool has_zs = !!(batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL));
+	bool has_zs = !!(batch->gmem_reason & (FD_GMEM_DEPTH_ENABLED |
+		FD_GMEM_STENCIL_ENABLED | FD_GMEM_CLEARS_DEPTH_STENCIL));
 	int tile_n[npipes];
 
 	if (has_zs) {
@@ -128,6 +131,10 @@ calculate_tiles(struct fd_batch *batch)
 		zsbuf_cpp[0] = rsc->cpp;
 		if (rsc->stencil)
 			zsbuf_cpp[1] = rsc->stencil->cpp;
+	} else {
+		/* we might have a zsbuf, but it isn't used */
+		batch->restore &= ~(FD_BUFFER_DEPTH | FD_BUFFER_STENCIL);
+		batch->resolve &= ~(FD_BUFFER_DEPTH | FD_BUFFER_STENCIL);
 	}
 	for (i = 0; i < pfb->nr_cbufs; i++) {
 		if (pfb->cbufs[i])
@@ -177,10 +184,18 @@ calculate_tiles(struct fd_batch *batch)
 				zsbuf_cpp[0], width, height);
 	}
 
+	if (is_a20x(screen) && batch->cleared) {
+		/* under normal circumstances the requirement would be 4K
+		 * but the fast clear path requires an alignment of 32K
+		 */
+		gmem_align = 0x8000;
+	}
+
 	/* then find a bin width/height that satisfies the memory
 	 * constraints:
 	 */
-	while (total_size(cbuf_cpp, zsbuf_cpp, bin_w, bin_h, gmem) > gmem_size) {
+	while (total_size(cbuf_cpp, zsbuf_cpp, bin_w, bin_h, gmem_align, gmem) >
+		   gmem_size) {
 		if (bin_w > bin_h) {
 			nbins_x++;
 			bin_w = align(width / nbins_x, gmem_alignw);
@@ -214,12 +229,21 @@ calculate_tiles(struct fd_batch *batch)
 
 #define div_round_up(v, a)  (((v) + (a) - 1) / (a))
 	/* figure out number of tiles per pipe: */
-	tpp_x = tpp_y = 1;
-	while (div_round_up(nbins_y, tpp_y) > screen->num_vsc_pipes)
-		tpp_y += 2;
-	while ((div_round_up(nbins_y, tpp_y) *
-			div_round_up(nbins_x, tpp_x)) > screen->num_vsc_pipes)
-		tpp_x += 1;
+	if (is_a20x(ctx->screen)) {
+		/* for a20x we want to minimize the number of "pipes"
+		 * binning data has 3 bits for x/y (8x8) but the edges are used to
+		 * cull off-screen vertices with hw binning, so we have 6x6 pipes
+		 */
+		tpp_x = 6;
+		tpp_y = 6;
+	} else {
+		tpp_x = tpp_y = 1;
+		while (div_round_up(nbins_y, tpp_y) > screen->num_vsc_pipes)
+			tpp_y += 2;
+		while ((div_round_up(nbins_y, tpp_y) *
+				div_round_up(nbins_x, tpp_x)) > screen->num_vsc_pipes)
+			tpp_x += 1;
+	}
 
 	gmem->maxpw = tpp_x;
 	gmem->maxph = tpp_y;
@@ -246,6 +270,9 @@ calculate_tiles(struct fd_batch *batch)
 		xoff += tpp_x;
 	}
 
+	/* number of pipes to use for a20x */
+	gmem->num_vsc_pipes = MAX2(1, i);
+
 	for (; i < npipes; i++) {
 		struct fd_vsc_pipe *pipe = &ctx->vsc_pipe[i];
 		pipe->x = pipe->y = pipe->w = pipe->h = 0;
@@ -280,11 +307,12 @@ calculate_tiles(struct fd_batch *batch)
 
 			/* pipe number: */
 			p = ((i / tpp_y) * div_round_up(nbins_x, tpp_x)) + (j / tpp_x);
+			assert(p < gmem->num_vsc_pipes);
 
 			/* clip bin width: */
 			bw = MIN2(bin_w, minx + width - xoff);
-
-			tile->n = tile_n[p]++;
+			tile->n = !is_a20x(ctx->screen) ? tile_n[p]++ :
+				((i % tpp_y + 1) << 3 | (j % tpp_x + 1));
 			tile->p = p;
 			tile->bin_w = bw;
 			tile->bin_h = bh;
@@ -415,10 +443,11 @@ fd_gmem_render_tiles(struct fd_batch *batch)
 		DBG("%p: rendering non-draw", batch);
 		ctx->stats.batch_nondraw++;
 	} else if (sysmem) {
-		DBG("%p: rendering sysmem %ux%u (%s/%s)",
+		DBG("%p: rendering sysmem %ux%u (%s/%s), num_draws=%u",
 			batch, pfb->width, pfb->height,
 			util_format_short_name(pipe_surface_format(pfb->cbufs[0])),
-			util_format_short_name(pipe_surface_format(pfb->zsbuf)));
+			util_format_short_name(pipe_surface_format(pfb->zsbuf)),
+			batch->num_draws);
 		if (ctx->query_prepare)
 			ctx->query_prepare(batch, 1);
 		render_sysmem(batch);