From 8d1d0c126fdf4e65cef9d140c3254331549847fa Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 26 Aug 2020 11:47:02 -0700 Subject: [PATCH] freedreno/a6xx: move ubwc clear to blitter Avoid having to mmap() unnecessarily by moving UBWC clear/init to blitter. Because we don't have a context when the bo is allocated, we need to lazily initialize UBWC data, so hook into the resource_written() tracking to do this. Don't bother with resource_read() because that would be undefined anyways. Signed-off-by: Rob Clark Part-of: --- .../drivers/freedreno/a6xx/fd6_blitter.c | 104 ++++++++++++++++++ .../drivers/freedreno/freedreno_batch.c | 2 + .../drivers/freedreno/freedreno_context.h | 1 + .../drivers/freedreno/freedreno_resource.c | 3 +- .../drivers/freedreno/freedreno_resource.h | 15 +++ 5 files changed, 123 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_blitter.c b/src/gallium/drivers/freedreno/a6xx/fd6_blitter.c index 767b55698ee..ac1468e2c0f 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_blitter.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_blitter.c @@ -432,6 +432,108 @@ emit_blit_buffer(struct fd_context *ctx, struct fd_ringbuffer *ring, } } +static void +fd6_clear_ubwc(struct fd_batch *batch, struct fd_resource *rsc) +{ + struct fd_ringbuffer *ring = fd_batch_get_prologue(batch); + union pipe_color_union color = {}; + + emit_blit_setup(ring, PIPE_FORMAT_R8_UNORM, false, &color); + + OUT_PKT4(ring, REG_A6XX_SP_PS_2D_SRC_INFO, 13); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + + OUT_PKT4(ring, REG_A6XX_RB_2D_SRC_SOLID_C0, 4); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + + OUT_PKT4(ring, REG_A6XX_GRAS_2D_SRC_TL_X, 4); + OUT_RING(ring, A6XX_GRAS_2D_SRC_TL_X(0)); + OUT_RING(ring, A6XX_GRAS_2D_SRC_BR_X(0)); + OUT_RING(ring, A6XX_GRAS_2D_SRC_TL_Y(0)); + OUT_RING(ring, A6XX_GRAS_2D_SRC_BR_Y(0)); + + unsigned size = rsc->layout.slices[0].offset; + unsigned offset = 0; + + /* We could be more clever here and realize that we could use a + * larger width if the size is aligned to something more than a + * single page.. or even use a format larger than r8 in those + * cases. But for normal sized textures and even up to 16k x 16k + * at <= 4byte/pixel, we'll only go thru the loop once + */ + const unsigned w = 0x1000; + + /* ubwc size should always be page aligned: */ + assert((size % w) == 0); + + while (size > 0) { + const unsigned h = MIN2(0x4000, size / w); + /* width is already aligned to a suitable pitch: */ + const unsigned p = w; + + /* + * Emit destination: + */ + OUT_PKT4(ring, REG_A6XX_RB_2D_DST_INFO, 9); + OUT_RING(ring, A6XX_RB_2D_DST_INFO_COLOR_FORMAT(FMT6_8_UNORM) | + A6XX_RB_2D_DST_INFO_TILE_MODE(TILE6_LINEAR) | + A6XX_RB_2D_DST_INFO_COLOR_SWAP(WZYX)); + OUT_RELOC(ring, rsc->bo, offset, 0, 0); /* RB_2D_DST_LO/HI */ + OUT_RING(ring, A6XX_RB_2D_DST_PITCH(p)); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + + /* + * Blit command: + */ + + OUT_PKT4(ring, REG_A6XX_GRAS_2D_DST_TL, 2); + OUT_RING(ring, A6XX_GRAS_2D_DST_TL_X(0) | A6XX_GRAS_2D_DST_TL_Y(0)); + OUT_RING(ring, A6XX_GRAS_2D_DST_BR_X(w - 1) | A6XX_GRAS_2D_DST_BR_Y(h - 1)); + + OUT_PKT7(ring, CP_EVENT_WRITE, 1); + OUT_RING(ring, 0x3f); + OUT_WFI5(ring); + + OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8E04, 1); + OUT_RING(ring, fd6_context(batch->ctx)->magic.RB_UNKNOWN_8E04_blit); + + OUT_PKT7(ring, CP_BLIT, 1); + OUT_RING(ring, CP_BLIT_0_OP(BLIT_OP_SCALE)); + + OUT_WFI5(ring); + + OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8E04, 1); + OUT_RING(ring, 0); /* RB_UNKNOWN_8E04 */ + + offset += w * h; + size -= w * h; + } + + fd6_event_write(batch, ring, PC_CCU_FLUSH_COLOR_TS, true); + fd6_event_write(batch, ring, PC_CCU_FLUSH_DEPTH_TS, true); + fd6_event_write(batch, ring, CACHE_FLUSH_TS, true); + fd6_cache_inv(batch, ring); +} + static void emit_blit_dst(struct fd_ringbuffer *ring, struct pipe_resource *prsc, enum pipe_format pfmt, unsigned level, unsigned layer) { @@ -896,6 +998,8 @@ fd6_blit(struct fd_context *ctx, const struct pipe_blit_info *info) void fd6_blitter_init(struct pipe_context *pctx) { + fd_context(pctx)->clear_ubwc = fd6_clear_ubwc; + if (fd_mesa_debug & FD_DBG_NOBLIT) return; diff --git a/src/gallium/drivers/freedreno/freedreno_batch.c b/src/gallium/drivers/freedreno/freedreno_batch.c index 54f8601d499..f6858bcd292 100644 --- a/src/gallium/drivers/freedreno/freedreno_batch.c +++ b/src/gallium/drivers/freedreno/freedreno_batch.c @@ -437,6 +437,8 @@ fd_batch_resource_write(struct fd_batch *batch, struct fd_resource *rsc) { fd_screen_assert_locked(batch->ctx->screen); + fd_batch_write_prep(batch, rsc); + if (rsc->stencil) fd_batch_resource_write(batch, rsc->stencil); diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h index 620768306ab..e3ee25db022 100644 --- a/src/gallium/drivers/freedreno/freedreno_context.h +++ b/src/gallium/drivers/freedreno/freedreno_context.h @@ -365,6 +365,7 @@ struct fd_context { /* blitter: */ bool (*blit)(struct fd_context *ctx, const struct pipe_blit_info *info); + void (*clear_ubwc)(struct fd_batch *batch, struct fd_resource *rsc); /* handling for barriers: */ void (*framebuffer_barrier)(struct fd_context *ctx); diff --git a/src/gallium/drivers/freedreno/freedreno_resource.c b/src/gallium/drivers/freedreno/freedreno_resource.c index ddc74481026..0caa3f6756b 100644 --- a/src/gallium/drivers/freedreno/freedreno_resource.c +++ b/src/gallium/drivers/freedreno/freedreno_resource.c @@ -194,8 +194,7 @@ realloc_bo(struct fd_resource *rsc, uint32_t size) * around the issue, but any memset value seems to. */ if (rsc->layout.ubwc) { - void *buf = fd_bo_map(rsc->bo); - memset(buf, 0, rsc->layout.slices[0].offset); + rsc->needs_ubwc_clear = true; } rsc->seqno = p_atomic_inc_return(&screen->rsc_seqno); diff --git a/src/gallium/drivers/freedreno/freedreno_resource.h b/src/gallium/drivers/freedreno/freedreno_resource.h index a88f6cd45ad..3bc0ffb1320 100644 --- a/src/gallium/drivers/freedreno/freedreno_resource.h +++ b/src/gallium/drivers/freedreno/freedreno_resource.h @@ -87,6 +87,12 @@ struct fd_resource { */ enum fd_dirty_3d_state dirty; + /* Uninitialized resources with UBWC format need their UBWC flag data + * cleared before writes, as the UBWC state is read and used during + * writes, so undefined UBWC flag data results in undefined results. + */ + bool needs_ubwc_clear : 1; + /* * LRZ * @@ -282,6 +288,15 @@ fd_batch_references_resource(struct fd_batch *batch, struct fd_resource *rsc) return rsc->batch_mask & (1 << batch->idx); } +static inline void +fd_batch_write_prep(struct fd_batch *batch, struct fd_resource *rsc) +{ + if (unlikely(rsc->needs_ubwc_clear)) { + batch->ctx->clear_ubwc(batch, rsc); + rsc->needs_ubwc_clear = false; + } +} + static inline void fd_batch_resource_read(struct fd_batch *batch, struct fd_resource *rsc) -- 2.30.2