freedreno/a6xx: Clear gmem buffers at flush time
authorKristian H. Kristensen <hoegsberg@chromium.org>
Fri, 19 Oct 2018 21:29:49 +0000 (14:29 -0700)
committerRob Clark <robdclark@gmail.com>
Tue, 27 Nov 2018 20:44:02 +0000 (15:44 -0500)
We generate an IB to clear the gmem at flush time and jump to it
before rendering each tile. This lets us get rid of the command stream
patching for gmem offsets.

Signed-off-by: Kristian H. Kristensen <hoegsberg@chromium.org>
Signed-off-by: Rob Clark <robdclark@gmail.com>
src/gallium/drivers/freedreno/a6xx/fd6_draw.c
src/gallium/drivers/freedreno/a6xx/fd6_gmem.c
src/gallium/drivers/freedreno/freedreno_batch.c
src/gallium/drivers/freedreno/freedreno_batch.h

index d921a33f5f4ec757189290c18dc19f0c612c109c..adb045adbea90939e61609da78c33f097042d704 100644 (file)
@@ -376,159 +376,25 @@ fd6_clear(struct fd_context *ctx, unsigned buffers,
                const union pipe_color_union *color, double depth, unsigned stencil)
 {
        struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
-       struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx);
-       struct fd_ringbuffer *ring = ctx->batch->draw;
-
-       OUT_PKT4(ring, REG_A6XX_RB_BLIT_SCISSOR_TL, 2);
-       OUT_RING(ring, A6XX_RB_BLIT_SCISSOR_TL_X(scissor->minx) |
-                        A6XX_RB_BLIT_SCISSOR_TL_Y(scissor->miny));
-       OUT_RING(ring, A6XX_RB_BLIT_SCISSOR_BR_X(scissor->maxx - 1) |
-                        A6XX_RB_BLIT_SCISSOR_BR_Y(scissor->maxy - 1));
-
-       if (buffers & PIPE_CLEAR_COLOR) {
-               for (int i = 0; i < pfb->nr_cbufs; i++) {
-                       union util_color uc = {0};
-
-                       if (!pfb->cbufs[i])
-                               continue;
-
-                       if (!(buffers & (PIPE_CLEAR_COLOR0 << i)))
-                               continue;
-
-                       enum pipe_format pfmt = pfb->cbufs[i]->format;
-
-                       // XXX I think RB_CLEAR_COLOR_DWn wants to take into account SWAP??
-                       union pipe_color_union swapped;
-                       switch (fd6_pipe2swap(pfmt)) {
-                       case WZYX:
-                               swapped.ui[0] = color->ui[0];
-                               swapped.ui[1] = color->ui[1];
-                               swapped.ui[2] = color->ui[2];
-                               swapped.ui[3] = color->ui[3];
-                               break;
-                       case WXYZ:
-                               swapped.ui[2] = color->ui[0];
-                               swapped.ui[1] = color->ui[1];
-                               swapped.ui[0] = color->ui[2];
-                               swapped.ui[3] = color->ui[3];
-                               break;
-                       case ZYXW:
-                               swapped.ui[3] = color->ui[0];
-                               swapped.ui[0] = color->ui[1];
-                               swapped.ui[1] = color->ui[2];
-                               swapped.ui[2] = color->ui[3];
-                               break;
-                       case XYZW:
-                               swapped.ui[3] = color->ui[0];
-                               swapped.ui[2] = color->ui[1];
-                               swapped.ui[1] = color->ui[2];
-                               swapped.ui[0] = color->ui[3];
-                               break;
-                       }
-
-                       if (util_format_is_pure_uint(pfmt)) {
-                               util_format_write_4ui(pfmt, swapped.ui, 0, &uc, 0, 0, 0, 1, 1);
-                       } else if (util_format_is_pure_sint(pfmt)) {
-                               util_format_write_4i(pfmt, swapped.i, 0, &uc, 0, 0, 0, 1, 1);
-                       } else {
-                               util_pack_color(swapped.f, pfmt, &uc);
-                       }
-
-                       OUT_PKT4(ring, REG_A6XX_RB_BLIT_DST_INFO, 1);
-                       OUT_RING(ring, A6XX_RB_BLIT_DST_INFO_TILE_MODE(TILE6_LINEAR) |
-                               A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(fd6_pipe2color(pfmt)));
-
-                       OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1);
-                       OUT_RING(ring, A6XX_RB_BLIT_INFO_GMEM |
-                               A6XX_RB_BLIT_INFO_CLEAR_MASK(0xf));
-
-                       OUT_PKT4(ring, REG_A6XX_RB_BLIT_BASE_GMEM, 1);
-                       OUT_RINGP(ring, i, &ctx->batch->gmem_patches);
-
-                       OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_88D0, 1);
-                       OUT_RING(ring, 0);
-
-                       OUT_PKT4(ring, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 4);
-                       OUT_RING(ring, uc.ui[0]);
-                       OUT_RING(ring, uc.ui[1]);
-                       OUT_RING(ring, uc.ui[2]);
-                       OUT_RING(ring, uc.ui[3]);
-
-                       fd6_emit_blit(ctx->batch, ring);
-               }
-       }
-
        const bool has_depth = pfb->zsbuf;
-       const bool has_separate_stencil =
-               has_depth && fd_resource(pfb->zsbuf->texture)->stencil;
-
-       /* First clear depth or combined depth/stencil. */
-       if ((has_depth && (buffers & PIPE_CLEAR_DEPTH)) ||
-               (!has_separate_stencil && (buffers & PIPE_CLEAR_STENCIL))) {
-               enum pipe_format pfmt = pfb->zsbuf->format;
-               uint32_t clear_value;
-               uint32_t mask = 0;
-
-               if (has_separate_stencil) {
-                       pfmt = util_format_get_depth_only(pfb->zsbuf->format);
-                       clear_value = util_pack_z(pfmt, depth);
-               } else {
-                       pfmt = pfb->zsbuf->format;
-                       clear_value = util_pack_z_stencil(pfmt, depth, stencil);
-               }
-
-               if (buffers & PIPE_CLEAR_DEPTH)
-                       mask |= 0x1;
-
-               if (!has_separate_stencil && (buffers & PIPE_CLEAR_STENCIL))
-                       mask |= 0x2;
-
-               OUT_PKT4(ring, REG_A6XX_RB_BLIT_DST_INFO, 1);
-               OUT_RING(ring, A6XX_RB_BLIT_DST_INFO_TILE_MODE(TILE6_LINEAR) |
-                       A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(fd6_pipe2color(pfmt)));
-
-               OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1);
-               OUT_RING(ring, A6XX_RB_BLIT_INFO_GMEM |
-                       // XXX UNK0 for separate stencil ??
-                       A6XX_RB_BLIT_INFO_DEPTH |
-                       A6XX_RB_BLIT_INFO_CLEAR_MASK(mask));
-
-               OUT_PKT4(ring, REG_A6XX_RB_BLIT_BASE_GMEM, 1);
-               OUT_RINGP(ring, MAX_RENDER_TARGETS, &ctx->batch->gmem_patches);
-
-               OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_88D0, 1);
-               OUT_RING(ring, 0);
-
-               OUT_PKT4(ring, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 1);
-               OUT_RING(ring, clear_value);
-
-               fd6_emit_blit(ctx->batch, ring);
-       }
-
-       /* Then clear the separate stencil buffer in case of 32 bit depth
-        * formats with separate stencil. */
-       if (has_separate_stencil && (buffers & PIPE_CLEAR_STENCIL)) {
-               OUT_PKT4(ring, REG_A6XX_RB_BLIT_DST_INFO, 1);
-               OUT_RING(ring, A6XX_RB_BLIT_DST_INFO_TILE_MODE(TILE6_LINEAR) |
-                                A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(RB6_R8_UINT));
-
-               OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1);
-               OUT_RING(ring, A6XX_RB_BLIT_INFO_GMEM |
-                                //A6XX_RB_BLIT_INFO_UNK0 |
-                                A6XX_RB_BLIT_INFO_DEPTH |
-                                A6XX_RB_BLIT_INFO_CLEAR_MASK(0x1));
-
-               OUT_PKT4(ring, REG_A6XX_RB_BLIT_BASE_GMEM, 1);
-               OUT_RINGP(ring, MAX_RENDER_TARGETS + 1, &ctx->batch->gmem_patches);
-
-               OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_88D0, 1);
-               OUT_RING(ring, 0);
+       unsigned color_buffers = buffers >> 2;
+       unsigned i;
+
+       /* If we're clearing after draws, fallback to 3D pipe clears.  We could
+        * use blitter clears in the draw batch but then we'd have to patch up the
+        * gmem offsets. This doesn't seem like a useful thing to optimize for
+        * however.*/
+       if (ctx->batch->num_draws > 0)
+               return false;
 
-               OUT_PKT4(ring, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 1);
-               OUT_RING(ring, stencil & 0xff);
+       foreach_bit(i, color_buffers)
+               ctx->batch->clear_color[i] = *color;
+       if (buffers & PIPE_CLEAR_DEPTH)
+               ctx->batch->clear_depth = depth;
+       if (buffers & PIPE_CLEAR_STENCIL)
+               ctx->batch->clear_stencil = stencil;
 
-               fd6_emit_blit(ctx->batch, ring);
-       }
+       ctx->batch->fast_cleared |= buffers;
 
        if (has_depth && (buffers & PIPE_CLEAR_DEPTH)) {
                struct fd_resource *zsbuf = fd_resource(pfb->zsbuf->texture);
index e8b6d051cb65641c5d8e6b6ad9fd743f5185b3ef..94ad66417188a52fbe754088cee29ac68f0b61fd 100644 (file)
@@ -253,22 +253,6 @@ patch_draws(struct fd_batch *batch, enum pc_di_vis_cull_mode vismode)
        util_dynarray_resize(&batch->draw_patches, 0);
 }
 
-static void
-patch_gmem_bases(struct fd_batch *batch)
-{
-       struct fd_gmem_stateobj *gmem = &batch->ctx->gmem;
-       unsigned i;
-
-       for (i = 0; i < fd_patch_num_elements(&batch->gmem_patches); i++) {
-               struct fd_cs_patch *patch = fd_patch_element(&batch->gmem_patches, i);
-               if (patch->val < MAX_RENDER_TARGETS)
-                       *patch->cs = gmem->cbuf_base[patch->val];
-               else
-                       *patch->cs = gmem->zsbuf_base[patch->val - MAX_RENDER_TARGETS];
-       }
-       util_dynarray_resize(&batch->gmem_patches, 0);
-}
-
 static void
 update_render_cntl(struct fd_batch *batch, bool binning)
 {
@@ -484,8 +468,6 @@ fd6_emit_tile_init(struct fd_batch *batch)
        emit_zs(ring, pfb->zsbuf, &ctx->gmem);
        emit_mrt(ring, pfb, &ctx->gmem);
 
-       patch_gmem_bases(batch);
-
        disable_msaa(ring);
 
        if (use_hw_binning(batch)) {
@@ -678,6 +660,163 @@ emit_restore_blit(struct fd_batch *batch,
        emit_blit(batch, ring, base, psurf, rsc);
 }
 
+static void
+emit_clears(struct fd_batch *batch, struct fd_ringbuffer *ring)
+{
+       struct pipe_framebuffer_state *pfb = &batch->framebuffer;
+       struct fd_gmem_stateobj *gmem = &batch->ctx->gmem;
+
+       uint32_t buffers = batch->fast_cleared;
+
+       if (buffers & PIPE_CLEAR_COLOR) {
+
+               for (int i = 0; i < pfb->nr_cbufs; i++) {
+                       union pipe_color_union *color = &batch->clear_color[i];
+                       union util_color uc = {0};
+
+                       if (!pfb->cbufs[i])
+                               continue;
+
+                       if (!(buffers & (PIPE_CLEAR_COLOR0 << i)))
+                               continue;
+
+                       enum pipe_format pfmt = pfb->cbufs[i]->format;
+
+                       // XXX I think RB_CLEAR_COLOR_DWn wants to take into account SWAP??
+                       union pipe_color_union swapped;
+                       switch (fd6_pipe2swap(pfmt)) {
+                       case WZYX:
+                               swapped.ui[0] = color->ui[0];
+                               swapped.ui[1] = color->ui[1];
+                               swapped.ui[2] = color->ui[2];
+                               swapped.ui[3] = color->ui[3];
+                               break;
+                       case WXYZ:
+                               swapped.ui[2] = color->ui[0];
+                               swapped.ui[1] = color->ui[1];
+                               swapped.ui[0] = color->ui[2];
+                               swapped.ui[3] = color->ui[3];
+                               break;
+                       case ZYXW:
+                               swapped.ui[3] = color->ui[0];
+                               swapped.ui[0] = color->ui[1];
+                               swapped.ui[1] = color->ui[2];
+                               swapped.ui[2] = color->ui[3];
+                               break;
+                       case XYZW:
+                               swapped.ui[3] = color->ui[0];
+                               swapped.ui[2] = color->ui[1];
+                               swapped.ui[1] = color->ui[2];
+                               swapped.ui[0] = color->ui[3];
+                               break;
+                       }
+
+                       if (util_format_is_pure_uint(pfmt)) {
+                               util_format_write_4ui(pfmt, swapped.ui, 0, &uc, 0, 0, 0, 1, 1);
+                       } else if (util_format_is_pure_sint(pfmt)) {
+                               util_format_write_4i(pfmt, swapped.i, 0, &uc, 0, 0, 0, 1, 1);
+                       } else {
+                               util_pack_color(swapped.f, pfmt, &uc);
+                       }
+
+                       OUT_PKT4(ring, REG_A6XX_RB_BLIT_DST_INFO, 1);
+                       OUT_RING(ring, A6XX_RB_BLIT_DST_INFO_TILE_MODE(TILE6_LINEAR) |
+                               A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(fd6_pipe2color(pfmt)));
+
+                       OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1);
+                       OUT_RING(ring, A6XX_RB_BLIT_INFO_GMEM |
+                               A6XX_RB_BLIT_INFO_CLEAR_MASK(0xf));
+
+                       OUT_PKT4(ring, REG_A6XX_RB_BLIT_BASE_GMEM, 1);
+                       OUT_RING(ring, gmem->cbuf_base[i]);
+
+                       OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_88D0, 1);
+                       OUT_RING(ring, 0);
+
+                       OUT_PKT4(ring, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 4);
+                       OUT_RING(ring, uc.ui[0]);
+                       OUT_RING(ring, uc.ui[1]);
+                       OUT_RING(ring, uc.ui[2]);
+                       OUT_RING(ring, uc.ui[3]);
+
+                       fd6_emit_blit(batch, ring);
+               }
+       }
+
+       const bool has_depth = pfb->zsbuf;
+       const bool has_separate_stencil =
+               has_depth && fd_resource(pfb->zsbuf->texture)->stencil;
+
+       /* First clear depth or combined depth/stencil. */
+       if ((has_depth && (buffers & PIPE_CLEAR_DEPTH)) ||
+               (!has_separate_stencil && (buffers & PIPE_CLEAR_STENCIL))) {
+               enum pipe_format pfmt = pfb->zsbuf->format;
+               uint32_t clear_value;
+               uint32_t mask = 0;
+
+               if (has_separate_stencil) {
+                       pfmt = util_format_get_depth_only(pfb->zsbuf->format);
+                       clear_value = util_pack_z(pfmt, batch->clear_depth);
+               } else {
+                       pfmt = pfb->zsbuf->format;
+                       clear_value = util_pack_z_stencil(pfmt, batch->clear_depth,
+                                                                                         batch->clear_stencil);
+               }
+
+               if (buffers & PIPE_CLEAR_DEPTH)
+                       mask |= 0x1;
+
+               if (!has_separate_stencil && (buffers & PIPE_CLEAR_STENCIL))
+                       mask |= 0x2;
+
+               OUT_PKT4(ring, REG_A6XX_RB_BLIT_DST_INFO, 1);
+               OUT_RING(ring, A6XX_RB_BLIT_DST_INFO_TILE_MODE(TILE6_LINEAR) |
+                       A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(fd6_pipe2color(pfmt)));
+
+               OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1);
+               OUT_RING(ring, A6XX_RB_BLIT_INFO_GMEM |
+                       // XXX UNK0 for separate stencil ??
+                       A6XX_RB_BLIT_INFO_DEPTH |
+                       A6XX_RB_BLIT_INFO_CLEAR_MASK(mask));
+
+               OUT_PKT4(ring, REG_A6XX_RB_BLIT_BASE_GMEM, 1);
+               OUT_RING(ring, gmem->zsbuf_base[0]);
+
+               OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_88D0, 1);
+               OUT_RING(ring, 0);
+
+               OUT_PKT4(ring, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 1);
+               OUT_RING(ring, clear_value);
+
+               fd6_emit_blit(batch, ring);
+       }
+
+       /* Then clear the separate stencil buffer in case of 32 bit depth
+        * formats with separate stencil. */
+       if (has_separate_stencil && (buffers & PIPE_CLEAR_STENCIL)) {
+               OUT_PKT4(ring, REG_A6XX_RB_BLIT_DST_INFO, 1);
+               OUT_RING(ring, A6XX_RB_BLIT_DST_INFO_TILE_MODE(TILE6_LINEAR) |
+                                A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(RB6_R8_UINT));
+
+               OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1);
+               OUT_RING(ring, A6XX_RB_BLIT_INFO_GMEM |
+                                //A6XX_RB_BLIT_INFO_UNK0 |
+                                A6XX_RB_BLIT_INFO_DEPTH |
+                                A6XX_RB_BLIT_INFO_CLEAR_MASK(0x1));
+
+               OUT_PKT4(ring, REG_A6XX_RB_BLIT_BASE_GMEM, 1);
+               OUT_RING(ring, gmem->zsbuf_base[1]);
+
+               OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_88D0, 1);
+               OUT_RING(ring, 0);
+
+               OUT_PKT4(ring, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 1);
+               OUT_RING(ring, batch->clear_stencil & 0xff);
+
+               fd6_emit_blit(batch, ring);
+       }
+}
+
 /*
  * transfer from system memory to gmem
  */
@@ -724,6 +863,7 @@ prepare_tile_setup_ib(struct fd_batch *batch)
        set_blit_scissor(batch, batch->tile_setup);
 
        emit_restore_blits(batch, batch->tile_setup);
+       emit_clears(batch, batch->tile_setup);
 }
 
 /*
index de4c8198c201478dfaba4fc362a994714ff77479..28b4942f9f8a8ef47adb311512fec021fd037e0e 100644 (file)
@@ -76,6 +76,7 @@ batch_init(struct fd_batch *batch)
        batch->fence = fd_fence_create(batch);
 
        batch->cleared = 0;
+       batch->fast_cleared = 0;
        batch->invalidated = 0;
        batch->restore = batch->resolve = 0;
        batch->needs_flush = false;
@@ -91,8 +92,6 @@ batch_init(struct fd_batch *batch)
        if (is_a3xx(ctx->screen))
                util_dynarray_init(&batch->rbrc_patches, NULL);
 
-       util_dynarray_init(&batch->gmem_patches, NULL);
-
        assert(batch->resources->entries == 0);
 
        util_dynarray_init(&batch->samples, NULL);
@@ -167,8 +166,6 @@ batch_fini(struct fd_batch *batch)
        if (is_a3xx(batch->ctx->screen))
                util_dynarray_fini(&batch->rbrc_patches);
 
-       util_dynarray_fini(&batch->gmem_patches);
-
        while (batch->samples.size > 0) {
                struct fd_hw_sample *samp =
                        util_dynarray_pop(&batch->samples, struct fd_hw_sample *);
index a9a7ecdaf2484c3c546af010d6dc921e1d086b6d..d4feadd55904b73245e7a2a2a2646e7537b2210a 100644 (file)
@@ -95,7 +95,7 @@ struct fd_batch {
                FD_BUFFER_DEPTH   = PIPE_CLEAR_DEPTH,
                FD_BUFFER_STENCIL = PIPE_CLEAR_STENCIL,
                FD_BUFFER_ALL     = FD_BUFFER_COLOR | FD_BUFFER_DEPTH | FD_BUFFER_STENCIL,
-       } invalidated, cleared, restore, resolve;
+       } invalidated, cleared, fast_cleared, restore, resolve;
 
        /* is this a non-draw batch (ie compute/blit which has no pfb state)? */
        bool nondraw : 1;
@@ -136,11 +136,6 @@ struct fd_batch {
         */
        struct util_dynarray draw_patches;
 
-       /* Keep track of blitter GMEM offsets that need to be patched up once we
-        * know the gmem layout:
-        */
-       struct util_dynarray gmem_patches;
-
        /* Keep track of writes to RB_RENDER_CONTROL which need to be patched
         * once we know whether or not to use GMEM, and GMEM tile pitch.
         *
@@ -165,6 +160,10 @@ struct fd_batch {
        struct fd_ringbuffer *tile_setup;
        struct fd_ringbuffer *tile_fini;
 
+       union pipe_color_union clear_color[MAX_RENDER_TARGETS];
+       double clear_depth;
+       unsigned clear_stencil;
+
        /**
         * hw query related state:
         */