From 958f6ffb60640c333a8b568c5f41467a1fecd1c0 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Thu, 6 Jun 2019 10:22:04 -0700 Subject: [PATCH] freedreno/a6xx: fix hangs with newer sqe fw With the newer (v1.76) fw, we were getting hangs (compared to older v1.66 fw). Re-work the GMEM code to structure things a bit closer to the blob. This moves some PKT7 packets from IB2 to IB1, which I think is what was confusing SQE and causing it to get stuck in an infinite loop. But in general structuring things at least closer to the same way blob does makes it easier to compare cmdstream. Note: this is a bit on the large side for what I'd normally consider for stable.. but right now it is looking like it is the newer fw that is headed for linux-firmware. This should defn have some soak time on master, but probably a good idea for this patch to end up in distro mesa builds by the time a630_sqe.fw hits linux-firmware. Cc: mesa-stable@lists.freedesktop.org Signed-off-by: Rob Clark Reviewed-by: Kristian H. Kristensen --- src/gallium/drivers/freedreno/a6xx/fd6_gmem.c | 113 +++++++++++++----- 1 file changed, 81 insertions(+), 32 deletions(-) diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c b/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c index 6ad0bc68ef4..ebdfd5b8923 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c @@ -214,6 +214,12 @@ emit_zs(struct fd_ringbuffer *ring, struct pipe_surface *zsbuf, OUT_RING(ring, 0x00000000); } + /* NOTE: blob emits GRAS_LRZ_CNTL plus GRAZ_LRZ_BUFFER_BASE + * plus this CP_EVENT_WRITE at the end in it's own IB.. + */ + OUT_PKT7(ring, CP_EVENT_WRITE, 1); + OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(UNK_25)); + if (rsc->stencil) { struct fd_resource_slice *slice = fd_resource_slice(rsc->stencil, 0); stride = slice->pitch * rsc->stencil->cpp; @@ -402,7 +408,6 @@ set_bin_size(struct fd_ringbuffer *ring, uint32_t w, uint32_t h, uint32_t flag) static void emit_binning_pass(struct fd_batch *batch) { - struct fd_context *ctx = batch->ctx; struct fd_ringbuffer *ring = batch->gmem; struct fd_gmem_stateobj *gmem = &batch->ctx->gmem; @@ -463,12 +468,22 @@ emit_binning_pass(struct fd_batch *batch) OUT_PKT7(ring, CP_EVENT_WRITE, 1); OUT_RING(ring, UNK_2D); - OUT_PKT7(ring, CP_EVENT_WRITE, 4); - OUT_RING(ring, CACHE_FLUSH_TS); - OUT_RELOCW(ring, fd6_context(ctx)->blit_mem, 0, 0, 0); /* ADDR_LO/HI */ - OUT_RING(ring, 0x00000000); - + fd6_cache_inv(batch, ring); + fd6_cache_flush(batch, ring); fd_wfi(batch, ring); + + OUT_PKT7(ring, CP_WAIT_FOR_ME, 0); + + OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1); + OUT_RING(ring, 0x0); + + OUT_PKT7(ring, CP_SET_MODE, 1); + OUT_RING(ring, 0x0); + + OUT_WFI5(ring); + + OUT_PKT4(ring, REG_A6XX_RB_CCU_CNTL, 1); + OUT_RING(ring, 0x7c400004); /* RB_CCU_CNTL */ } static void @@ -544,6 +559,15 @@ fd6_emit_tile_init(struct fd_batch *batch) OUT_PKT4(ring, REG_A6XX_VFD_MODE_CNTL, 1); OUT_RING(ring, 0x0); + + OUT_PKT4(ring, REG_A6XX_PC_UNKNOWN_9805, 1); + OUT_RING(ring, 0x1); + + OUT_PKT4(ring, REG_A6XX_SP_UNKNOWN_A0F8, 1); + OUT_RING(ring, 0x1); + + OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1); + OUT_RING(ring, 0x1); } else { set_bin_size(ring, gmem->bin_w, gmem->bin_h, 0x6000000); patch_draws(batch, IGNORE_VISIBILITY); @@ -580,9 +604,6 @@ fd6_emit_tile_prep(struct fd_batch *batch, struct fd_tile *tile) struct fd6_context *fd6_ctx = fd6_context(ctx); struct fd_ringbuffer *ring = batch->gmem; - OUT_PKT7(ring, CP_SET_MARKER, 1); - OUT_RING(ring, A2XX_CP_SET_MARKER_0_MODE(0x7)); - emit_marker6(ring, 7); OUT_PKT7(ring, CP_SET_MARKER, 1); OUT_RING(ring, A2XX_CP_SET_MARKER_0_MODE(RM6_GMEM) | 0x10); @@ -595,8 +616,6 @@ fd6_emit_tile_prep(struct fd_batch *batch, struct fd_tile *tile) set_scissor(ring, x1, y1, x2, y2); - set_window_offset(ring, x1, y1); - OUT_PKT4(ring, REG_A6XX_VPC_SO_OVERRIDE, 1); OUT_RING(ring, A6XX_VPC_SO_OVERRIDE_SO_DISABLE); @@ -620,7 +639,32 @@ fd6_emit_tile_prep(struct fd_batch *batch, struct fd_tile *tile) (tile->p * 4) + (32 * A6XX_VSC_DATA_PITCH), 0, 0); OUT_RELOC(ring, fd6_ctx->vsc_data2, (tile->p * A6XX_VSC_DATA2_PITCH), 0, 0); + + set_window_offset(ring, x1, y1); + + struct fd_gmem_stateobj *gmem = &batch->ctx->gmem; + set_bin_size(ring, gmem->bin_w, gmem->bin_h, 0x6000000); + + OUT_PKT4(ring, REG_A6XX_VPC_SO_OVERRIDE, 1); + OUT_RING(ring, A6XX_VPC_SO_OVERRIDE_SO_DISABLE); + + OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1); + OUT_RING(ring, 0x0); + + OUT_PKT7(ring, CP_SET_MODE, 1); + OUT_RING(ring, 0x0); + + OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8804, 1); + OUT_RING(ring, 0x0); + + OUT_PKT4(ring, REG_A6XX_SP_TP_UNKNOWN_B304, 1); + OUT_RING(ring, 0x0); + + OUT_PKT4(ring, REG_A6XX_GRAS_UNKNOWN_80A4, 1); + OUT_RING(ring, 0x0); } else { + set_window_offset(ring, x1, y1); + OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1); OUT_RING(ring, 0x1); @@ -1028,26 +1072,6 @@ prepare_tile_fini_ib(struct fd_batch *batch) FD_RINGBUFFER_STREAMING); ring = batch->tile_fini; - if (use_hw_binning(batch)) { - OUT_PKT7(ring, CP_SET_MARKER, 1); - OUT_RING(ring, A2XX_CP_SET_MARKER_0_MODE(0x5) | 0x10); - } - - OUT_PKT7(ring, CP_SET_DRAW_STATE, 3); - OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(0) | - CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS | - CP_SET_DRAW_STATE__0_GROUP_ID(0)); - OUT_RING(ring, CP_SET_DRAW_STATE__1_ADDR_LO(0)); - OUT_RING(ring, CP_SET_DRAW_STATE__2_ADDR_HI(0)); - - OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1); - OUT_RING(ring, 0x0); - - emit_marker6(ring, 7); - OUT_PKT7(ring, CP_SET_MARKER, 1); - OUT_RING(ring, A2XX_CP_SET_MARKER_0_MODE(RM6_RESOLVE) | 0x10); - emit_marker6(ring, 7); - set_blit_scissor(batch, ring); if (batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) { @@ -1081,7 +1105,32 @@ prepare_tile_fini_ib(struct fd_batch *batch) static void fd6_emit_tile_gmem2mem(struct fd_batch *batch, struct fd_tile *tile) { - fd6_emit_ib(batch->gmem, batch->tile_fini); + struct fd_ringbuffer *ring = batch->gmem; + + if (use_hw_binning(batch)) { + OUT_PKT7(ring, CP_SET_MARKER, 1); + OUT_RING(ring, A2XX_CP_SET_MARKER_0_MODE(0x5) | 0x10); + } + + OUT_PKT7(ring, CP_SET_DRAW_STATE, 3); + OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(0) | + CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS | + CP_SET_DRAW_STATE__0_GROUP_ID(0)); + OUT_RING(ring, CP_SET_DRAW_STATE__1_ADDR_LO(0)); + OUT_RING(ring, CP_SET_DRAW_STATE__2_ADDR_HI(0)); + + OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_LOCAL, 1); + OUT_RING(ring, 0x0); + + emit_marker6(ring, 7); + OUT_PKT7(ring, CP_SET_MARKER, 1); + OUT_RING(ring, A2XX_CP_SET_MARKER_0_MODE(RM6_RESOLVE) | 0x10); + emit_marker6(ring, 7); + + fd6_emit_ib(ring, batch->tile_fini); + + OUT_PKT7(ring, CP_SET_MARKER, 1); + OUT_RING(ring, A2XX_CP_SET_MARKER_0_MODE(0x7)); } static void -- 2.30.2