freedreno/a3xx: use cs patch instead of RFI+RMW
authorRob Clark <robclark@freedesktop.org>
Wed, 8 Jan 2014 15:06:52 +0000 (10:06 -0500)
committerRob Clark <robclark@freedesktop.org>
Wed, 8 Jan 2014 21:30:18 +0000 (16:30 -0500)
Since we now have the cmdstream patch mechanism needed for hw binning,
might as well also use it for RB_RENDER_CONTROL updates.  This avoids
the need to use RMW (and associated WFI) to update RB_RENDER_CONTROL.

Signed-off-by: Rob Clark <robclark@freedesktop.org>
src/gallium/drivers/freedreno/a3xx/fd3_context.c
src/gallium/drivers/freedreno/a3xx/fd3_context.h
src/gallium/drivers/freedreno/a3xx/fd3_draw.c
src/gallium/drivers/freedreno/a3xx/fd3_emit.c
src/gallium/drivers/freedreno/a3xx/fd3_emit.h
src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
src/gallium/drivers/freedreno/freedreno_context.h
src/gallium/drivers/freedreno/freedreno_draw.h

index 23467193d4331cc21c361f1e31da06e70433ee8a..3732896d2a280cce473b4077228b828b7138a524 100644 (file)
@@ -44,6 +44,8 @@ fd3_context_destroy(struct pipe_context *pctx)
 
        fd3_prog_fini(pctx);
 
+       util_dynarray_fini(&fd3_ctx->rbrc_patches);
+
        fd_bo_del(fd3_ctx->vs_pvt_mem);
        fd_bo_del(fd3_ctx->fs_pvt_mem);
        fd_bo_del(fd3_ctx->vsc_size_mem);
@@ -119,6 +121,8 @@ fd3_context_create(struct pipe_screen *pscreen, void *priv)
        if (!pctx)
                return NULL;
 
+       util_dynarray_init(&fd3_ctx->rbrc_patches);
+
        fd3_ctx->vs_pvt_mem = fd_bo_new(screen->dev, 0x2000,
                        DRM_FREEDRENO_GEM_TYPE_KMEM);
 
index 3599fe1bb08620aff5f15384fb29398dfba96fbf..26c8cc762b905a95a070f6743dd3bda115959a65 100644 (file)
 struct fd3_context {
        struct fd_context base;
 
+       /* Keep track of writes to RB_RENDER_CONTROL which need to be patched
+        * once we know whether or not to use GMEM, and GMEM tile pitch.
+        */
+       struct util_dynarray rbrc_patches;
+
        struct fd_bo *vs_pvt_mem, *fs_pvt_mem;
 
        /* This only needs to be 4 * num_of_pipes bytes (ie. 32 bytes).  We
index 4c90d98495570e3cf856faab8d8c4ad9fdcbbbe2..4f28b0e0608b7a2be2d8a826a4c12a71fc3e8507 100644 (file)
@@ -162,8 +162,9 @@ fd3_clear(struct fd_context *ctx, unsigned buffers,
        OUT_RING(ring, A3XX_RB_BLEND_ALPHA_UINT(0xff) |
                        A3XX_RB_BLEND_ALPHA_FLOAT(1.0));
 
-       fd3_emit_rbrc_draw_state(ctx, ring,
-                       A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER));
+       OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1);
+       OUT_RINGP(ring, A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER),
+                       &fd3_ctx->rbrc_patches);
 
        if (buffers & PIPE_CLEAR_DEPTH) {
                OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1);
index 9cfe4ddb66246f09bbacce59cb6fb82c3e00f8fd..c479666b1cf5b50f1aaadea94879de39e4172a17 100644 (file)
@@ -353,8 +353,19 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
                struct fd3_zsa_stateobj *zsa = fd3_zsa_stateobj(ctx->zsa);
                struct pipe_stencil_ref *sr = &ctx->stencil_ref;
 
-               if (!binning)
-                       fd3_emit_rbrc_draw_state(ctx, ring, zsa->rb_render_control);
+               if (!binning) {
+                       struct fd3_context *fd3_ctx = fd3_context(ctx);
+
+                       /* I suppose if we needed to (which I don't *think* we need
+                        * to), we could emit this for binning pass too.  But we
+                        * would need to keep a different patch-list for binning
+                        * vs render pass.
+                        */
+
+                       OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1);
+                       OUT_RINGP(ring, zsa->rb_render_control,
+                                       &fd3_ctx->rbrc_patches);
+               }
 
                OUT_PKT0(ring, REG_A3XX_RB_ALPHA_REF, 1);
                OUT_RING(ring, zsa->rb_alpha_ref);
index 50559d10d2237e6edf835a168a6b0baef9f9545d..1b4774d83f999e6c0e4f6a74514b12a635137d49 100644 (file)
@@ -62,28 +62,4 @@ void fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
                uint32_t dirty, bool binning);
 void fd3_emit_restore(struct fd_context *ctx);
 
-
-/* use RMW (read-modify-write) to update RB_RENDER_CONTROL since the
- * GMEM/binning code is deciding on the bin-width (and whether to
- * use binning) after the draw/clear state is emitted.
- */
-
-#define RBRC_DRAW_STATE  (A3XX_RB_RENDER_CONTROL_ALPHA_TEST | \
-               A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC__MASK)
-
-static inline void
-fd3_emit_rbrc_draw_state(struct fd_context *ctx,
-               struct fd_ringbuffer *ring, uint32_t val)
-{
-       assert(!(val & ~RBRC_DRAW_STATE));
-       if (val != ctx->rmw.rbrc_draw) {
-               fd_rmw_wfi(ctx, ring);
-               OUT_PKT3(ring, CP_REG_RMW, 3);
-               OUT_RING(ring, REG_A3XX_RB_RENDER_CONTROL);
-               OUT_RING(ring, ~RBRC_DRAW_STATE);
-               OUT_RING(ring, val);
-               ctx->rmw.rbrc_draw = val;
-       }
-}
-
 #endif /* FD3_EMIT_H */
index 8720e087b7b893aa81d48e9b5f5354d258da2b8a..a08b482fafb919e1ff89bd6c509d3eadadb42506 100644 (file)
@@ -544,6 +544,18 @@ patch_draws(struct fd_context *ctx, enum pc_di_vis_cull_mode vismode)
        util_dynarray_resize(&ctx->draw_patches, 0);
 }
 
+static void
+patch_rbrc(struct fd_context *ctx, uint32_t val)
+{
+       struct fd3_context *fd3_ctx = fd3_context(ctx);
+       unsigned i;
+       for (i = 0; i < fd_patch_num_elements(&fd3_ctx->rbrc_patches); i++) {
+               struct fd_cs_patch *patch = fd_patch_element(&fd3_ctx->rbrc_patches, i);
+               *patch->cs = patch->val | val;
+       }
+       util_dynarray_resize(&fd3_ctx->rbrc_patches, 0);
+}
+
 /* for rendering directly to system memory: */
 static void
 fd3_emit_sysmem_prep(struct fd_context *ctx)
@@ -563,10 +575,6 @@ fd3_emit_sysmem_prep(struct fd_context *ctx)
 
        emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL, 0);
 
-       OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1);
-       OUT_RING(ring, A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER) |
-                       A3XX_RB_RENDER_CONTROL_BIN_WIDTH(pitch));
-
        /* setup scissor/offset for current tile: */
        OUT_PKT0(ring, REG_A3XX_RB_WINDOW_OFFSET, 1);
        OUT_RING(ring, A3XX_RB_WINDOW_OFFSET_X(0) |
@@ -584,6 +592,7 @@ fd3_emit_sysmem_prep(struct fd_context *ctx)
                        A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE);
 
        patch_draws(ctx, IGNORE_VISIBILITY);
+       patch_rbrc(ctx, A3XX_RB_RENDER_CONTROL_BIN_WIDTH(pitch));
 }
 
 static void
@@ -757,6 +766,9 @@ fd3_emit_tile_init(struct fd_context *ctx)
        } else {
                patch_draws(ctx, IGNORE_VISIBILITY);
        }
+
+       patch_rbrc(ctx, A3XX_RB_RENDER_CONTROL_ENABLE_GMEM |
+                       A3XX_RB_RENDER_CONTROL_BIN_WIDTH(gmem->bin_w));
 }
 
 /* before mem2gmem */
@@ -837,11 +849,6 @@ fd3_emit_tile_renderprep(struct fd_context *ctx, struct fd_tile *tile)
 
        emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL, gmem->bin_w);
 
-       OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1);
-       OUT_RING(ring, A3XX_RB_RENDER_CONTROL_ENABLE_GMEM |
-                       A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER) |
-                       A3XX_RB_RENDER_CONTROL_BIN_WIDTH(gmem->bin_w));
-
        /* setup scissor/offset for current tile: */
        OUT_PKT0(ring, REG_A3XX_RB_WINDOW_OFFSET, 1);
        OUT_RING(ring, A3XX_RB_WINDOW_OFFSET_X(tile->xoff) |
index a0227e49c032d50c396d2b460916cd3ec5cd12be..5373de6c6383053718bb09a1ce352dac02d7a653 100644 (file)
@@ -159,16 +159,7 @@ struct fd_context {
        /* Keep track if WAIT_FOR_IDLE is needed for registers we need
         * to update via RMW:
         */
-       struct {
-               bool need_wfi;
-               /* note: would be nicer to have in fd3_context, fd2_context,
-                * etc, because the registered modified via RMR differ across
-                * generation.  But as long as it is a small set of registers
-                * that might be more hassle than it's worth.
-                */
-               /* state for RB_RENDER_CONTROL: */
-               uint32_t rbrc_draw;
-       } rmw;
+       bool rmw_needs_wfi;
 
        /* Keep track of DRAW initiators that need to be patched up depending
         * on whether we using binning or not:
@@ -277,17 +268,16 @@ fd_supported_prim(struct fd_context *ctx, unsigned prim)
 static INLINE void
 fd_reset_rmw_state(struct fd_context *ctx)
 {
-       ctx->rmw.need_wfi = true;
-       ctx->rmw.rbrc_draw = ~0;
+       ctx->rmw_needs_wfi = true;
 }
 
 /* emit before a RMW a WAIT_FOR_IDLE only if needed: */
 static inline void
 fd_rmw_wfi(struct fd_context *ctx, struct fd_ringbuffer *ring)
 {
-       if (ctx->rmw.need_wfi) {
+       if (ctx->rmw_needs_wfi) {
                OUT_WFI(ring);
-               ctx->rmw.need_wfi = false;
+               ctx->rmw_needs_wfi = false;
        }
 }
 
index e8bb420889e6e3447a010d0a9e3820c5395e9eae..608d071989de36dbe712d45940a19e74578fbaa6 100644 (file)
@@ -95,7 +95,7 @@ fd_draw(struct fd_context *ctx, struct fd_ringbuffer *ring,
 
        emit_marker(ring, 7);
 
-       ctx->rmw.need_wfi = true;
+       ctx->rmw_needs_wfi = true;
 }
 
 #endif /* FREEDRENO_DRAW_H_ */