freedreno/a3xx/compiler: refactor trans_samp()

[mesa.git] / src / gallium / drivers / freedreno / freedreno_gmem.c
diff --git a/src/gallium/drivers/freedreno/freedreno_gmem.c b/src/gallium/drivers/freedreno/freedreno_gmem.c

index 47f7a310e8c747d6a7b07de5bb18726ac5159733..861ebf5675e96ff2a1a0a949abc514c150efd1cb 100644 (file)
--- a/src/gallium/drivers/freedreno/freedreno_gmem.c
+++ b/src/gallium/drivers/freedreno/freedreno_gmem.c
@@ -35,6 +35,7 @@
  #include "freedreno_gmem.h"
  #include "freedreno_context.h"
  #include "freedreno_resource.h"
+#include "freedreno_query_hw.h"
  #include "freedreno_util.h"
  
  /*
@@ -85,7 +86,8 @@ calculate_tiles(struct fd_context *ctx)
         uint32_t bin_w, bin_h;
         uint32_t max_width = bin_width(ctx);
         uint32_t cpp = 4;
-       uint32_t i, j, t, p, n, xoff, yoff;
+       uint32_t i, j, t, xoff, yoff;
+       uint32_t tpp_x, tpp_y;
         bool has_zs = !!(ctx->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL));
  
         if (pfb->cbufs[0])
@@ -142,23 +144,70 @@ calculate_tiles(struct fd_context *ctx)
         gmem->bin_w = bin_w;
         gmem->nbins_x = nbins_x;
         gmem->nbins_y = nbins_y;
+       gmem->minx = minx;
+       gmem->miny = miny;
         gmem->width = width;
         gmem->height = height;
  
-       /* Assign tiles and pipes:
-        * NOTE we currently take a rather simplistic approach of
-        * mapping rows of tiles to a pipe.  At some point it might
-        * be worth playing with different strategies and seeing if
-        * that makes much impact on performance.
+       /*
+        * Assign tiles and pipes:
+        *
+        * At some point it might be worth playing with different
+        * strategies and seeing if that makes much impact on
+        * performance.
          */
-       t = p = n = 0;
+
+#define div_round_up(v, a)  (((v) + (a) - 1) / (a))
+       /* figure out number of tiles per pipe: */
+       tpp_x = tpp_y = 1;
+       while (div_round_up(nbins_y, tpp_y) > 8)
+               tpp_y += 2;
+       while ((div_round_up(nbins_y, tpp_y) *
+                       div_round_up(nbins_x, tpp_x)) > 8)
+               tpp_x += 1;
+
+       /* configure pipes: */
+       xoff = yoff = 0;
+       for (i = 0; i < ARRAY_SIZE(ctx->pipe); i++) {
+               struct fd_vsc_pipe *pipe = &ctx->pipe[i];
+
+               if (xoff >= nbins_x) {
+                       xoff = 0;
+                       yoff += tpp_y;
+               }
+
+               if (yoff >= nbins_y) {
+                       break;
+               }
+
+               pipe->x = xoff;
+               pipe->y = yoff;
+               pipe->w = MIN2(tpp_x, nbins_x - xoff);
+               pipe->h = MIN2(tpp_y, nbins_y - yoff);
+
+               xoff += tpp_x;
+       }
+
+       for (; i < ARRAY_SIZE(ctx->pipe); i++) {
+               struct fd_vsc_pipe *pipe = &ctx->pipe[i];
+               pipe->x = pipe->y = pipe->w = pipe->h = 0;
+       }
+
+#if 0 /* debug */
+       printf("%dx%d ... tpp=%dx%d\n", nbins_x, nbins_y, tpp_x, tpp_y);
+       for (i = 0; i < 8; i++) {
+               struct fd_vsc_pipe *pipe = &ctx->pipe[i];
+               printf("pipe[%d]: %ux%u @ %u,%u\n", i,
+                               pipe->w, pipe->h, pipe->x, pipe->y);
+       }
+#endif
+
+       /* configure tiles: */
+       t = 0;
         yoff = miny;
         for (i = 0; i < nbins_y; i++) {
-               struct fd_vsc_pipe *pipe = &ctx->pipe[p];
                 uint32_t bw, bh;
  
-               assert(p < ARRAY_SIZE(ctx->pipe));
-
                 xoff = minx;
  
                 /* clip bin height: */
@@ -166,13 +215,20 @@ calculate_tiles(struct fd_context *ctx)
  
                 for (j = 0; j < nbins_x; j++) {
                         struct fd_tile *tile = &ctx->tile[t];
+                       uint32_t n, p;
  
                         assert(t < ARRAY_SIZE(ctx->tile));
  
+                       /* pipe number: */
+                       p = ((i / tpp_y) * div_round_up(nbins_x, tpp_x)) + (j / tpp_x);
+
+                       /* slot number: */
+                       n = ((i % tpp_y) * tpp_x) + (j % tpp_x);
+
                         /* clip bin width: */
                         bw = MIN2(bin_w, minx + width - xoff);
  
-                       tile->n = n++;
+                       tile->n = n;
                         tile->p = p;
                         tile->bin_w = bw;
                         tile->bin_h = bh;
@@ -184,22 +240,19 @@ calculate_tiles(struct fd_context *ctx)
                         xoff += bw;
                 }
  
-               /* one pipe per row: */
-               pipe->x = 0;
-               pipe->y = i;
-               pipe->w = nbins_x;
-               pipe->h = 1;
-
-               p++;
-               n = 0;
-
                 yoff += bh;
         }
  
-       for (; p < ARRAY_SIZE(ctx->pipe); p++) {
-               struct fd_vsc_pipe *pipe = &ctx->pipe[p];
-               pipe->x = pipe->y = pipe->w = pipe->h = 0;
+#if 0 /* debug */
+       t = 0;
+       for (i = 0; i < nbins_y; i++) {
+               for (j = 0; j < nbins_x; j++) {
+                       struct fd_tile *tile = &ctx->tile[t++];
+                       printf("|p:%u n:%u|", tile->p, tile->n);
+               }
+               printf("\n");
         }
+#endif
  }
  
  static void
@@ -210,6 +263,9 @@ render_tiles(struct fd_context *ctx)
  
         ctx->emit_tile_init(ctx);
  
+       if (ctx->restore)
+               ctx->stats.batch_restore++;
+
         for (i = 0; i < (gmem->nbins_x * gmem->nbins_y); i++) {
                 struct fd_tile *tile = &ctx->tile[i];
  
@@ -218,16 +274,24 @@ render_tiles(struct fd_context *ctx)
  
                 ctx->emit_tile_prep(ctx, tile);
  
-               if (ctx->restore)
+               if (ctx->restore) {
+                       fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_MEM2GMEM);
                         ctx->emit_tile_mem2gmem(ctx, tile);
+                       fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_NULL);
+               }
  
                 ctx->emit_tile_renderprep(ctx, tile);
  
+               fd_hw_query_prepare_tile(ctx, i, ctx->ring);
+
                 /* emit IB to drawcmds: */
                 OUT_IB(ctx->ring, ctx->draw_start, ctx->draw_end);
+               fd_reset_wfi(ctx);
  
                 /* emit gmem2mem to transfer tile back to system memory: */
+               fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_GMEM2MEM);
                 ctx->emit_tile_gmem2mem(ctx, tile);
+               fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_NULL);
         }
  }
  
@@ -236,8 +300,11 @@ render_sysmem(struct fd_context *ctx)
  {
         ctx->emit_sysmem_prep(ctx);
  
+       fd_hw_query_prepare_tile(ctx, 0, ctx->ring);
+
         /* emit IB to drawcmds: */
         OUT_IB(ctx->ring, ctx->draw_start, ctx->draw_end);
+       fd_reset_wfi(ctx);
  }
  
  void
@@ -257,30 +324,45 @@ fd_gmem_render_tiles(struct pipe_context *pctx)
                 }
         }
  
+       /* close out the draw cmds by making sure any active queries are
+        * paused:
+        */
+       fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_NULL);
+
         /* mark the end of the clear/draw cmds before emitting per-tile cmds: */
         fd_ringmarker_mark(ctx->draw_end);
+       fd_ringmarker_mark(ctx->binning_end);
+
+       fd_reset_wfi(ctx);
+
+       ctx->stats.batch_total++;
  
         if (sysmem) {
                 DBG("rendering sysmem (%s/%s)",
                         util_format_short_name(pipe_surface_format(pfb->cbufs[0])),
                         util_format_short_name(pipe_surface_format(pfb->zsbuf)));
+               fd_hw_query_prepare(ctx, 1);
                 render_sysmem(ctx);
+               ctx->stats.batch_sysmem++;
         } else {
                 struct fd_gmem_stateobj *gmem = &ctx->gmem;
                 calculate_tiles(ctx);
                 DBG("rendering %dx%d tiles (%s/%s)", gmem->nbins_x, gmem->nbins_y,
                         util_format_short_name(pipe_surface_format(pfb->cbufs[0])),
                         util_format_short_name(pipe_surface_format(pfb->zsbuf)));
+               fd_hw_query_prepare(ctx, gmem->nbins_x * gmem->nbins_y);
                 render_tiles(ctx);
+               ctx->stats.batch_gmem++;
         }
  
         /* GPU executes starting from tile cmds, which IB back to draw cmds: */
         fd_ringmarker_flush(ctx->draw_end);
  
-       /* mark start for next draw cmds: */
+       /* mark start for next draw/binning cmds: */
         fd_ringmarker_mark(ctx->draw_start);
+       fd_ringmarker_mark(ctx->binning_start);
  
-       fd_reset_rmw_state(ctx);
+       fd_reset_wfi(ctx);
  
         /* update timestamps on render targets: */
         timestamp = fd_ringbuffer_timestamp(ctx->ring);