freedreno/a6xx: fix hangs with newer sqe fw
authorRob Clark <robdclark@chromium.org>
Thu, 6 Jun 2019 17:22:04 +0000 (10:22 -0700)
committerRob Clark <robdclark@chromium.org>
Fri, 7 Jun 2019 19:07:29 +0000 (12:07 -0700)
With the newer (v1.76) fw, we were getting hangs (compared to older
v1.66 fw).  Re-work the GMEM code to structure things a bit closer to
the blob.  This moves some PKT7 packets from IB2 to IB1, which I think
is what was confusing SQE and causing it to get stuck in an infinite
loop.  But in general structuring things at least closer to the same way
blob does makes it easier to compare cmdstream.

Note: this is a bit on the large side for what I'd normally consider for
stable.. but right now it is looking  like it is the newer fw that is
headed for linux-firmware.  This should defn have some soak time on
master, but probably a good idea for this patch to end up in distro mesa
builds by the time a630_sqe.fw hits linux-firmware.

Cc: mesa-stable@lists.freedesktop.org
Signed-off-by: Rob Clark <robdclark@chromium.org>
Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com>
src/gallium/drivers/freedreno/a6xx/fd6_gmem.c

index 6ad0bc68ef4db1e98a63fb7dc33e25c5e34bf23d..ebdfd5b892358120926eb092bfe5f4f80ab1fa63 100644 (file)
@@ -214,6 +214,12 @@ emit_zs(struct fd_ringbuffer *ring, struct pipe_surface *zsbuf,
                        OUT_RING(ring, 0x00000000);
                }
 
+               /* NOTE: blob emits GRAS_LRZ_CNTL plus GRAZ_LRZ_BUFFER_BASE
+                * plus this CP_EVENT_WRITE at the end in it's own IB..
+                */
+               OUT_PKT7(ring, CP_EVENT_WRITE, 1);
+               OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(UNK_25));
+
                if (rsc->stencil) {
                        struct fd_resource_slice *slice = fd_resource_slice(rsc->stencil, 0);
                        stride = slice->pitch * rsc->stencil->cpp;
@@ -402,7 +408,6 @@ set_bin_size(struct fd_ringbuffer *ring, uint32_t w, uint32_t h, uint32_t flag)
 static void
 emit_binning_pass(struct fd_batch *batch)
 {
-       struct fd_context *ctx = batch->ctx;
        struct fd_ringbuffer *ring = batch->gmem;
        struct fd_gmem_stateobj *gmem = &batch->ctx->gmem;
 
@@ -463,12 +468,22 @@ emit_binning_pass(struct fd_batch *batch)
        OUT_PKT7(ring, CP_EVENT_WRITE, 1);
        OUT_RING(ring, UNK_2D);
 
-       OUT_PKT7(ring, CP_EVENT_WRITE, 4);
-       OUT_RING(ring, CACHE_FLUSH_TS);
-       OUT_RELOCW(ring, fd6_context(ctx)->blit_mem, 0, 0, 0);  /* ADDR_LO/HI */
-       OUT_RING(ring, 0x00000000);
-
+       fd6_cache_inv(batch, ring);
+       fd6_cache_flush(batch, ring);
        fd_wfi(batch, ring);
+
+       OUT_PKT7(ring, CP_WAIT_FOR_ME, 0);
+
+       OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
+       OUT_RING(ring, 0x0);
+
+       OUT_PKT7(ring, CP_SET_MODE, 1);
+       OUT_RING(ring, 0x0);
+
+       OUT_WFI5(ring);
+
+       OUT_PKT4(ring, REG_A6XX_RB_CCU_CNTL, 1);
+       OUT_RING(ring, 0x7c400004);        /* RB_CCU_CNTL */
 }
 
 static void
@@ -544,6 +559,15 @@ fd6_emit_tile_init(struct fd_batch *batch)
 
                OUT_PKT4(ring, REG_A6XX_VFD_MODE_CNTL, 1);
                OUT_RING(ring, 0x0);
+
+               OUT_PKT4(ring, REG_A6XX_PC_UNKNOWN_9805, 1);
+               OUT_RING(ring, 0x1);
+
+               OUT_PKT4(ring, REG_A6XX_SP_UNKNOWN_A0F8, 1);
+               OUT_RING(ring, 0x1);
+
+               OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
+               OUT_RING(ring, 0x1);
        } else {
                set_bin_size(ring, gmem->bin_w, gmem->bin_h, 0x6000000);
                patch_draws(batch, IGNORE_VISIBILITY);
@@ -580,9 +604,6 @@ fd6_emit_tile_prep(struct fd_batch *batch, struct fd_tile *tile)
        struct fd6_context *fd6_ctx = fd6_context(ctx);
        struct fd_ringbuffer *ring = batch->gmem;
 
-       OUT_PKT7(ring, CP_SET_MARKER, 1);
-       OUT_RING(ring, A2XX_CP_SET_MARKER_0_MODE(0x7));
-
        emit_marker6(ring, 7);
        OUT_PKT7(ring, CP_SET_MARKER, 1);
        OUT_RING(ring, A2XX_CP_SET_MARKER_0_MODE(RM6_GMEM) | 0x10);
@@ -595,8 +616,6 @@ fd6_emit_tile_prep(struct fd_batch *batch, struct fd_tile *tile)
 
        set_scissor(ring, x1, y1, x2, y2);
 
-       set_window_offset(ring, x1, y1);
-
        OUT_PKT4(ring, REG_A6XX_VPC_SO_OVERRIDE, 1);
        OUT_RING(ring, A6XX_VPC_SO_OVERRIDE_SO_DISABLE);
 
@@ -620,7 +639,32 @@ fd6_emit_tile_prep(struct fd_batch *batch, struct fd_tile *tile)
                                (tile->p * 4) + (32 * A6XX_VSC_DATA_PITCH), 0, 0);
                OUT_RELOC(ring, fd6_ctx->vsc_data2,
                                (tile->p * A6XX_VSC_DATA2_PITCH), 0, 0);
+
+               set_window_offset(ring, x1, y1);
+
+               struct fd_gmem_stateobj *gmem = &batch->ctx->gmem;
+               set_bin_size(ring, gmem->bin_w, gmem->bin_h, 0x6000000);
+
+               OUT_PKT4(ring, REG_A6XX_VPC_SO_OVERRIDE, 1);
+               OUT_RING(ring, A6XX_VPC_SO_OVERRIDE_SO_DISABLE);
+
+               OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
+               OUT_RING(ring, 0x0);
+
+               OUT_PKT7(ring, CP_SET_MODE, 1);
+               OUT_RING(ring, 0x0);
+
+               OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8804, 1);
+               OUT_RING(ring, 0x0);
+
+               OUT_PKT4(ring, REG_A6XX_SP_TP_UNKNOWN_B304, 1);
+               OUT_RING(ring, 0x0);
+
+               OUT_PKT4(ring, REG_A6XX_GRAS_UNKNOWN_80A4, 1);
+               OUT_RING(ring, 0x0);
        } else {
+               set_window_offset(ring, x1, y1);
+
                OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
                OUT_RING(ring, 0x1);
 
@@ -1028,26 +1072,6 @@ prepare_tile_fini_ib(struct fd_batch *batch)
                        FD_RINGBUFFER_STREAMING);
        ring = batch->tile_fini;
 
-       if (use_hw_binning(batch)) {
-               OUT_PKT7(ring, CP_SET_MARKER, 1);
-               OUT_RING(ring, A2XX_CP_SET_MARKER_0_MODE(0x5) | 0x10);
-       }
-
-       OUT_PKT7(ring, CP_SET_DRAW_STATE, 3);
-       OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(0) |
-                       CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS |
-                       CP_SET_DRAW_STATE__0_GROUP_ID(0));
-       OUT_RING(ring, CP_SET_DRAW_STATE__1_ADDR_LO(0));
-       OUT_RING(ring, CP_SET_DRAW_STATE__2_ADDR_HI(0));
-
-       OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
-       OUT_RING(ring, 0x0);
-
-       emit_marker6(ring, 7);
-       OUT_PKT7(ring, CP_SET_MARKER, 1);
-       OUT_RING(ring, A2XX_CP_SET_MARKER_0_MODE(RM6_RESOLVE) | 0x10);
-       emit_marker6(ring, 7);
-
        set_blit_scissor(batch, ring);
 
        if (batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
@@ -1081,7 +1105,32 @@ prepare_tile_fini_ib(struct fd_batch *batch)
 static void
 fd6_emit_tile_gmem2mem(struct fd_batch *batch, struct fd_tile *tile)
 {
-       fd6_emit_ib(batch->gmem, batch->tile_fini);
+       struct fd_ringbuffer *ring = batch->gmem;
+
+       if (use_hw_binning(batch)) {
+               OUT_PKT7(ring, CP_SET_MARKER, 1);
+               OUT_RING(ring, A2XX_CP_SET_MARKER_0_MODE(0x5) | 0x10);
+       }
+
+       OUT_PKT7(ring, CP_SET_DRAW_STATE, 3);
+       OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(0) |
+                       CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS |
+                       CP_SET_DRAW_STATE__0_GROUP_ID(0));
+       OUT_RING(ring, CP_SET_DRAW_STATE__1_ADDR_LO(0));
+       OUT_RING(ring, CP_SET_DRAW_STATE__2_ADDR_HI(0));
+
+       OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_LOCAL, 1);
+       OUT_RING(ring, 0x0);
+
+       emit_marker6(ring, 7);
+       OUT_PKT7(ring, CP_SET_MARKER, 1);
+       OUT_RING(ring, A2XX_CP_SET_MARKER_0_MODE(RM6_RESOLVE) | 0x10);
+       emit_marker6(ring, 7);
+
+       fd6_emit_ib(ring, batch->tile_fini);
+
+       OUT_PKT7(ring, CP_SET_MARKER, 1);
+       OUT_RING(ring, A2XX_CP_SET_MARKER_0_MODE(0x7));
 }
 
 static void