freedreno/a3xx: fix blend state corruption issue
authorRob Clark <robclark@freedesktop.org>
Mon, 23 Dec 2013 14:59:42 +0000 (09:59 -0500)
committerRob Clark <robclark@freedesktop.org>
Thu, 26 Dec 2013 17:13:42 +0000 (12:13 -0500)
Using RMW on banked context registers is not safe.  The value read
could be the wrong one.  So if there has been a DRAW_IDX launched,
the RMW must be preceded by a WAIT_FOR_IDLE to ensure the read part
of RMW sees the correct value.

To avoid unnecessary WFI's, keep track if there is a need for WFI,
and only emit one if needed.  Furthermore, keep track if we even
need to update the register in the first place.

And to cut down on the amount of RMW to avoid excessive WFI's, at the
tiling/GMEM level we can always overwrite RB_RENDER_CONTROL, as the
state at beginning of draw/clear cmds (which we IB to) is always
undefined.  In the draw/clear commands, we always still use RMW (with
WFI if needed), but only if the register value actually changes.  (At
points where the current value cannot be known, the saved value is
reset to ~0, which includes bits outside of RBRC_DRAW_STATE, so there
never is chance for confusion.)

Signed-off-by: Rob Clark <robclark@freedesktop.org>
src/gallium/drivers/freedreno/a3xx/fd3_draw.c
src/gallium/drivers/freedreno/a3xx/fd3_emit.c
src/gallium/drivers/freedreno/a3xx/fd3_emit.h
src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
src/gallium/drivers/freedreno/freedreno_context.c
src/gallium/drivers/freedreno/freedreno_context.h
src/gallium/drivers/freedreno/freedreno_draw.h
src/gallium/drivers/freedreno/freedreno_gmem.c

index d808777a1fb2653b4ec2c67172d59dc4da87734d..c5d8b7745521d2ff9769a7f11b8044d918ef6531 100644 (file)
@@ -80,8 +80,6 @@ fd3_draw(struct fd_context *ctx, const struct pipe_draw_info *info)
        OUT_PKT0(ring, REG_A3XX_PC_VERTEX_REUSE_BLOCK_CNTL, 1);
        OUT_RING(ring, 0x0000000b);                  /* PC_VERTEX_REUSE_BLOCK_CNTL */
 
-       OUT_WFI (ring);
-
        OUT_PKT0(ring, REG_A3XX_VFD_INDEX_MIN, 4);
        OUT_RING(ring, info->min_index);        /* VFD_INDEX_MIN */
        OUT_RING(ring, info->max_index);        /* VFD_INDEX_MAX */
@@ -111,7 +109,7 @@ fd3_clear(struct fd_context *ctx, unsigned buffers,
        OUT_RING(ring, A3XX_RB_BLEND_ALPHA_UINT(0xff) |
                        A3XX_RB_BLEND_ALPHA_FLOAT(1.0));
 
-       fd3_emit_rbrc_draw_state(ring,
+       fd3_emit_rbrc_draw_state(ctx, ring,
                        A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER));
 
        if (buffers & PIPE_CLEAR_DEPTH) {
@@ -220,8 +218,6 @@ fd3_clear(struct fd_context *ctx, unsigned buffers,
 
        fd_draw(ctx, DI_PT_RECTLIST, DI_SRC_SEL_AUTO_INDEX, 2,
                        INDEX_SIZE_IGN, 0, 0, NULL);
-
-       OUT_WFI (ring);
 }
 
 void
index 825656ae62b14ba498b0237780e94d694386f764..91993725ea69c3227335b5430d6ac160725db78d 100644 (file)
@@ -243,8 +243,6 @@ emit_cache_flush(struct fd_ringbuffer *ring)
        OUT_RING(ring, 0x00000000);
        OUT_RING(ring, 0x00000000);
        OUT_RING(ring, 0x00000000);
-
-       OUT_WFI (ring);
 }
 
 /* emit texture state for mem->gmem restore operation.. eventually it would
@@ -356,7 +354,7 @@ fd3_emit_state(struct fd_context *ctx, uint32_t dirty)
                struct fd3_zsa_stateobj *zsa = fd3_zsa_stateobj(ctx->zsa);
                struct pipe_stencil_ref *sr = &ctx->stencil_ref;
 
-               fd3_emit_rbrc_draw_state(ring, zsa->rb_render_control);
+               fd3_emit_rbrc_draw_state(ctx, ring, zsa->rb_render_control);
 
                OUT_PKT0(ring, REG_A3XX_RB_ALPHA_REF, 1);
                OUT_RING(ring, zsa->rb_alpha_ref);
@@ -607,4 +605,5 @@ fd3_emit_restore(struct fd_context *ctx)
        }
 
        emit_cache_flush(ring);
+       fd_rmw_wfi(ctx, ring);
 }
index 668e5ddd095f070020f00ed30a7ae2b21d1f05cd..bf7787ab6f7c8b2ceb3311a392f5e09ce7e9eec6 100644 (file)
@@ -66,24 +66,23 @@ void fd3_emit_restore(struct fd_context *ctx);
  * GMEM/binning code is deciding on the bin-width (and whether to
  * use binning) after the draw/clear state is emitted.
  */
-static inline void
-fd3_emit_rbrc_draw_state(struct fd_ringbuffer *ring, uint32_t val)
-{
-       OUT_PKT3(ring, CP_REG_RMW, 3);
-       OUT_RING(ring, REG_A3XX_RB_RENDER_CONTROL);
-       OUT_RING(ring, A3XX_RB_RENDER_CONTROL_BIN_WIDTH__MASK |
-                       A3XX_RB_RENDER_CONTROL_ENABLE_GMEM);
-       OUT_RING(ring, val);
-}
+
+#define RBRC_DRAW_STATE  (A3XX_RB_RENDER_CONTROL_ALPHA_TEST | \
+               A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC__MASK)
 
 static inline void
-fd3_emit_rbrc_tile_state(struct fd_ringbuffer *ring, uint32_t val)
+fd3_emit_rbrc_draw_state(struct fd_context *ctx,
+               struct fd_ringbuffer *ring, uint32_t val)
 {
-       OUT_PKT3(ring, CP_REG_RMW, 3);
-       OUT_RING(ring, REG_A3XX_RB_RENDER_CONTROL);
-       OUT_RING(ring, ~(A3XX_RB_RENDER_CONTROL_BIN_WIDTH__MASK |
-                       A3XX_RB_RENDER_CONTROL_ENABLE_GMEM));
-       OUT_RING(ring, val);
+       assert(!(val & ~RBRC_DRAW_STATE));
+       if (val != ctx->rmw.rbrc_draw) {
+               fd_rmw_wfi(ctx, ring);
+               OUT_PKT3(ring, CP_REG_RMW, 3);
+               OUT_RING(ring, REG_A3XX_RB_RENDER_CONTROL);
+               OUT_RING(ring, ~RBRC_DRAW_STATE);
+               OUT_RING(ring, val);
+               ctx->rmw.rbrc_draw = val;
+       }
 }
 
 #endif /* FD3_EMIT_H */
index 4a5a241bac99820fc584d36b574f529521f19e53..3d0a607ed286ce61a31e59b86e1b04a641b9517b 100644 (file)
@@ -181,9 +181,11 @@ fd3_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile)
        OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) |
                        A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE);
 
-       fd3_emit_rbrc_draw_state(ring,
-                       A3XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE |
-                       A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER));
+       OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1);
+       OUT_RING(ring, A3XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE |
+                       A3XX_RB_RENDER_CONTROL_ENABLE_GMEM |
+                       A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER) |
+                       A3XX_RB_RENDER_CONTROL_BIN_WIDTH(ctx->gmem.bin_w));
 
        OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
        OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) |
@@ -295,7 +297,8 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
                                A3XX_RB_MRT_BLEND_CONTROL_CLAMP_ENABLE);
        }
 
-       fd3_emit_rbrc_tile_state(ring,
+       OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1);
+       OUT_RING(ring, A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_ALWAYS) |
                        A3XX_RB_RENDER_CONTROL_BIN_WIDTH(gmem->bin_w));
 
        OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1);
@@ -335,9 +338,6 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
                        A3XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |
                        A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
 
-       fd3_emit_rbrc_draw_state(ring,
-                       A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_ALWAYS));
-
        OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
        OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
                        A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
@@ -423,7 +423,8 @@ fd3_emit_sysmem_prep(struct fd_context *ctx)
 
        emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL, 0);
 
-       fd3_emit_rbrc_tile_state(ring,
+       OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1);
+       OUT_RING(ring, A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER) |
                        A3XX_RB_RENDER_CONTROL_BIN_WIDTH(pitch));
 
        /* setup scissor/offset for current tile: */
@@ -514,8 +515,9 @@ fd3_emit_tile_renderprep(struct fd_context *ctx, struct fd_tile *tile)
 
        emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL, gmem->bin_w);
 
-       fd3_emit_rbrc_tile_state(ring,
-                       A3XX_RB_RENDER_CONTROL_ENABLE_GMEM |
+       OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1);
+       OUT_RING(ring, A3XX_RB_RENDER_CONTROL_ENABLE_GMEM |
+                       A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER) |
                        A3XX_RB_RENDER_CONTROL_BIN_WIDTH(gmem->bin_w));
 
        /* setup scissor/offset for current tile: */
index c959ccfcc898aa79756f3dc588dae9dbdaaa3018..28be508e329ec6195e215b8e5e52da9e75d2fee6 100644 (file)
@@ -174,6 +174,7 @@ fd_context_init(struct fd_context *ctx, struct pipe_screen *pscreen,
        }
 
        fd_context_next_rb(pctx);
+       fd_reset_rmw_state(ctx);
 
        util_slab_create(&ctx->transfer_pool, sizeof(struct pipe_transfer),
                        16, UTIL_SLAB_SINGLETHREADED);
index db37f9cb8a3f1c79f07fad911611d6ff41d32db1..a8abbca7a62e031246b917db145e5826c76a8af0 100644 (file)
@@ -38,6 +38,7 @@
 
 #include "freedreno_screen.h"
 #include "freedreno_gmem.h"
+#include "freedreno_util.h"
 
 struct fd_vertex_stateobj;
 
@@ -150,6 +151,20 @@ struct fd_context {
        struct fd_ringbuffer *ring;
        struct fd_ringmarker *draw_start, *draw_end;
 
+       /* Keep track if WAIT_FOR_IDLE is needed for registers we need
+        * to update via RMW:
+        */
+       struct {
+               bool need_wfi;
+               /* note: would be nicer to have in fd3_context, fd2_context,
+                * etc, because the registered modified via RMR differ across
+                * generation.  But as long as it is a small set of registers
+                * that might be more hassle than it's worth.
+                */
+               /* state for RB_RENDER_CONTROL: */
+               uint32_t rbrc_draw;
+       } rmw;
+
        struct pipe_scissor_state scissor;
 
        /* we don't have a disable/enable bit for scissor, so instead we keep
@@ -249,6 +264,23 @@ fd_supported_prim(struct fd_context *ctx, unsigned prim)
        return (1 << prim) & ctx->primtype_mask;
 }
 
+static INLINE void
+fd_reset_rmw_state(struct fd_context *ctx)
+{
+       ctx->rmw.need_wfi = true;
+       ctx->rmw.rbrc_draw = ~0;
+}
+
+/* emit before a RMW a WAIT_FOR_IDLE only if needed: */
+static inline void
+fd_rmw_wfi(struct fd_context *ctx, struct fd_ringbuffer *ring)
+{
+       if (ctx->rmw.need_wfi) {
+               OUT_WFI(ring);
+               ctx->rmw.need_wfi = false;
+       }
+}
+
 struct pipe_context * fd_context_init(struct fd_context *ctx,
                struct pipe_screen *pscreen, const uint8_t *primtypes,
                void *priv);
index 9ccc246395fbfa070ae39188b851f29924542d2b..190c0e52d24e8b153611b41711051f5b7b0beb1b 100644 (file)
@@ -85,6 +85,8 @@ fd_draw(struct fd_context *ctx, enum pc_di_primtype primtype,
        }
 
        emit_marker(ring, 7);
+
+       ctx->rmw.need_wfi = true;
 }
 
 #endif /* FREEDRENO_DRAW_H_ */
index 68b4eb607a6b9da607be456bd44ca4c21c2c4b25..47d99d3429c4426a514ec7051797d7272b67b96a 100644 (file)
@@ -272,6 +272,8 @@ fd_gmem_render_tiles(struct pipe_context *pctx)
        /* mark start for next draw cmds: */
        fd_ringmarker_mark(ctx->draw_start);
 
+       fd_reset_rmw_state(ctx);
+
        /* update timestamps on render targets: */
        timestamp = fd_ringbuffer_timestamp(ctx->ring);
        if (pfb->cbufs[0])