freedreno/a6xx: move ubwc clear to blitter
authorRob Clark <robdclark@chromium.org>
Wed, 26 Aug 2020 18:47:02 +0000 (11:47 -0700)
committerMarge Bot <eric+marge@anholt.net>
Fri, 28 Aug 2020 22:24:25 +0000 (22:24 +0000)
Avoid having to mmap() unnecessarily by moving UBWC clear/init to
blitter.

Because we don't have a context when the bo is allocated, we need to
lazily initialize UBWC data, so hook into the resource_written()
tracking to do this.  Don't bother with resource_read() because that
would be undefined anyways.

Signed-off-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6475>

src/gallium/drivers/freedreno/a6xx/fd6_blitter.c
src/gallium/drivers/freedreno/freedreno_batch.c
src/gallium/drivers/freedreno/freedreno_context.h
src/gallium/drivers/freedreno/freedreno_resource.c
src/gallium/drivers/freedreno/freedreno_resource.h

index 767b55698ee37f45f191c3b35be463745d874067..ac1468e2c0f11fbe4dcfa1553d414febec0df9eb 100644 (file)
@@ -432,6 +432,108 @@ emit_blit_buffer(struct fd_context *ctx, struct fd_ringbuffer *ring,
        }
 }
 
+static void
+fd6_clear_ubwc(struct fd_batch *batch, struct fd_resource *rsc)
+{
+       struct fd_ringbuffer *ring = fd_batch_get_prologue(batch);
+       union pipe_color_union color = {};
+
+       emit_blit_setup(ring, PIPE_FORMAT_R8_UNORM, false, &color);
+
+       OUT_PKT4(ring, REG_A6XX_SP_PS_2D_SRC_INFO, 13);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+
+       OUT_PKT4(ring, REG_A6XX_RB_2D_SRC_SOLID_C0, 4);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+
+       OUT_PKT4(ring, REG_A6XX_GRAS_2D_SRC_TL_X, 4);
+       OUT_RING(ring, A6XX_GRAS_2D_SRC_TL_X(0));
+       OUT_RING(ring, A6XX_GRAS_2D_SRC_BR_X(0));
+       OUT_RING(ring, A6XX_GRAS_2D_SRC_TL_Y(0));
+       OUT_RING(ring, A6XX_GRAS_2D_SRC_BR_Y(0));
+
+       unsigned size = rsc->layout.slices[0].offset;
+       unsigned offset = 0;
+
+       /* We could be more clever here and realize that we could use a
+        * larger width if the size is aligned to something more than a
+        * single page.. or even use a format larger than r8 in those
+        * cases. But for normal sized textures and even up to 16k x 16k
+        * at <= 4byte/pixel, we'll only go thru the loop once
+        */
+       const unsigned w = 0x1000;
+
+       /* ubwc size should always be page aligned: */
+       assert((size % w) == 0);
+
+       while (size > 0) {
+               const unsigned h = MIN2(0x4000, size / w);
+               /* width is already aligned to a suitable pitch: */
+               const unsigned p = w;
+
+               /*
+                * Emit destination:
+                */
+               OUT_PKT4(ring, REG_A6XX_RB_2D_DST_INFO, 9);
+               OUT_RING(ring, A6XX_RB_2D_DST_INFO_COLOR_FORMAT(FMT6_8_UNORM) |
+                               A6XX_RB_2D_DST_INFO_TILE_MODE(TILE6_LINEAR) |
+                               A6XX_RB_2D_DST_INFO_COLOR_SWAP(WZYX));
+               OUT_RELOC(ring, rsc->bo, offset, 0, 0);    /* RB_2D_DST_LO/HI */
+               OUT_RING(ring, A6XX_RB_2D_DST_PITCH(p));
+               OUT_RING(ring, 0x00000000);
+               OUT_RING(ring, 0x00000000);
+               OUT_RING(ring, 0x00000000);
+               OUT_RING(ring, 0x00000000);
+               OUT_RING(ring, 0x00000000);
+
+               /*
+                * Blit command:
+                */
+
+               OUT_PKT4(ring, REG_A6XX_GRAS_2D_DST_TL, 2);
+               OUT_RING(ring, A6XX_GRAS_2D_DST_TL_X(0) | A6XX_GRAS_2D_DST_TL_Y(0));
+               OUT_RING(ring, A6XX_GRAS_2D_DST_BR_X(w - 1) | A6XX_GRAS_2D_DST_BR_Y(h - 1));
+
+               OUT_PKT7(ring, CP_EVENT_WRITE, 1);
+               OUT_RING(ring, 0x3f);
+               OUT_WFI5(ring);
+
+               OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8E04, 1);
+               OUT_RING(ring, fd6_context(batch->ctx)->magic.RB_UNKNOWN_8E04_blit);
+
+               OUT_PKT7(ring, CP_BLIT, 1);
+               OUT_RING(ring, CP_BLIT_0_OP(BLIT_OP_SCALE));
+
+               OUT_WFI5(ring);
+
+               OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8E04, 1);
+               OUT_RING(ring, 0);             /* RB_UNKNOWN_8E04 */
+
+               offset += w * h;
+               size -= w * h;
+       }
+
+       fd6_event_write(batch, ring, PC_CCU_FLUSH_COLOR_TS, true);
+       fd6_event_write(batch, ring, PC_CCU_FLUSH_DEPTH_TS, true);
+       fd6_event_write(batch, ring, CACHE_FLUSH_TS, true);
+       fd6_cache_inv(batch, ring);
+}
+
 static void
 emit_blit_dst(struct fd_ringbuffer *ring, struct pipe_resource *prsc, enum pipe_format pfmt, unsigned level, unsigned layer)
 {
@@ -896,6 +998,8 @@ fd6_blit(struct fd_context *ctx, const struct pipe_blit_info *info)
 void
 fd6_blitter_init(struct pipe_context *pctx)
 {
+       fd_context(pctx)->clear_ubwc = fd6_clear_ubwc;
+
        if (fd_mesa_debug & FD_DBG_NOBLIT)
                return;
 
index 54f8601d49980556506a49e96e7c15b7a674a2cc..f6858bcd292822dc87e18d079c36374476738f1c 100644 (file)
@@ -437,6 +437,8 @@ fd_batch_resource_write(struct fd_batch *batch, struct fd_resource *rsc)
 {
        fd_screen_assert_locked(batch->ctx->screen);
 
+       fd_batch_write_prep(batch, rsc);
+
        if (rsc->stencil)
                fd_batch_resource_write(batch, rsc->stencil);
 
index 620768306abee536a083f521d00e5c57b39ab694..e3ee25db022a2d2f393b1f9c96cc669187470f15 100644 (file)
@@ -365,6 +365,7 @@ struct fd_context {
 
        /* blitter: */
        bool (*blit)(struct fd_context *ctx, const struct pipe_blit_info *info);
+       void (*clear_ubwc)(struct fd_batch *batch, struct fd_resource *rsc);
 
        /* handling for barriers: */
        void (*framebuffer_barrier)(struct fd_context *ctx);
index ddc7448102626febdc35ed1f4a7e010581e4ab5c..0caa3f6756b7ec1db7d1ceefacb4e91960a5f0f3 100644 (file)
@@ -194,8 +194,7 @@ realloc_bo(struct fd_resource *rsc, uint32_t size)
         * around the issue, but any memset value seems to.
         */
        if (rsc->layout.ubwc) {
-               void *buf = fd_bo_map(rsc->bo);
-               memset(buf, 0, rsc->layout.slices[0].offset);
+               rsc->needs_ubwc_clear = true;
        }
 
        rsc->seqno = p_atomic_inc_return(&screen->rsc_seqno);
index a88f6cd45addfe652bb69522b695137f84f3d066..3bc0ffb1320a7ac073fada4f779ba4bf7d0862e7 100644 (file)
@@ -87,6 +87,12 @@ struct fd_resource {
         */
        enum fd_dirty_3d_state dirty;
 
+       /* Uninitialized resources with UBWC format need their UBWC flag data
+        * cleared before writes, as the UBWC state is read and used during
+        * writes, so undefined UBWC flag data results in undefined results.
+        */
+       bool needs_ubwc_clear : 1;
+
        /*
         * LRZ
         *
@@ -282,6 +288,15 @@ fd_batch_references_resource(struct fd_batch *batch, struct fd_resource *rsc)
        return rsc->batch_mask & (1 << batch->idx);
 }
 
+static inline void
+fd_batch_write_prep(struct fd_batch *batch, struct fd_resource *rsc)
+{
+       if (unlikely(rsc->needs_ubwc_clear)) {
+               batch->ctx->clear_ubwc(batch, rsc);
+               rsc->needs_ubwc_clear = false;
+       }
+}
+
 static inline void
 fd_batch_resource_read(struct fd_batch *batch,
                struct fd_resource *rsc)