From: Rob Clark Date: Wed, 8 Jan 2014 15:06:52 +0000 (-0500) Subject: freedreno/a3xx: use cs patch instead of RFI+RMW X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=725d736f6a6a14d10223888d585ddab80ee803f0;p=mesa.git freedreno/a3xx: use cs patch instead of RFI+RMW Since we now have the cmdstream patch mechanism needed for hw binning, might as well also use it for RB_RENDER_CONTROL updates. This avoids the need to use RMW (and associated WFI) to update RB_RENDER_CONTROL. Signed-off-by: Rob Clark --- diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_context.c b/src/gallium/drivers/freedreno/a3xx/fd3_context.c index 23467193d43..3732896d2a2 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_context.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_context.c @@ -44,6 +44,8 @@ fd3_context_destroy(struct pipe_context *pctx) fd3_prog_fini(pctx); + util_dynarray_fini(&fd3_ctx->rbrc_patches); + fd_bo_del(fd3_ctx->vs_pvt_mem); fd_bo_del(fd3_ctx->fs_pvt_mem); fd_bo_del(fd3_ctx->vsc_size_mem); @@ -119,6 +121,8 @@ fd3_context_create(struct pipe_screen *pscreen, void *priv) if (!pctx) return NULL; + util_dynarray_init(&fd3_ctx->rbrc_patches); + fd3_ctx->vs_pvt_mem = fd_bo_new(screen->dev, 0x2000, DRM_FREEDRENO_GEM_TYPE_KMEM); diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_context.h b/src/gallium/drivers/freedreno/a3xx/fd3_context.h index 3599fe1bb08..26c8cc762b9 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_context.h +++ b/src/gallium/drivers/freedreno/a3xx/fd3_context.h @@ -36,6 +36,11 @@ struct fd3_context { struct fd_context base; + /* Keep track of writes to RB_RENDER_CONTROL which need to be patched + * once we know whether or not to use GMEM, and GMEM tile pitch. + */ + struct util_dynarray rbrc_patches; + struct fd_bo *vs_pvt_mem, *fs_pvt_mem; /* This only needs to be 4 * num_of_pipes bytes (ie. 32 bytes). We diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c index 4c90d984955..4f28b0e0608 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c @@ -162,8 +162,9 @@ fd3_clear(struct fd_context *ctx, unsigned buffers, OUT_RING(ring, A3XX_RB_BLEND_ALPHA_UINT(0xff) | A3XX_RB_BLEND_ALPHA_FLOAT(1.0)); - fd3_emit_rbrc_draw_state(ctx, ring, - A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER)); + OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1); + OUT_RINGP(ring, A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER), + &fd3_ctx->rbrc_patches); if (buffers & PIPE_CLEAR_DEPTH) { OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1); diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c index 9cfe4ddb662..c479666b1cf 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c @@ -353,8 +353,19 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, struct fd3_zsa_stateobj *zsa = fd3_zsa_stateobj(ctx->zsa); struct pipe_stencil_ref *sr = &ctx->stencil_ref; - if (!binning) - fd3_emit_rbrc_draw_state(ctx, ring, zsa->rb_render_control); + if (!binning) { + struct fd3_context *fd3_ctx = fd3_context(ctx); + + /* I suppose if we needed to (which I don't *think* we need + * to), we could emit this for binning pass too. But we + * would need to keep a different patch-list for binning + * vs render pass. + */ + + OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1); + OUT_RINGP(ring, zsa->rb_render_control, + &fd3_ctx->rbrc_patches); + } OUT_PKT0(ring, REG_A3XX_RB_ALPHA_REF, 1); OUT_RING(ring, zsa->rb_alpha_ref); diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.h b/src/gallium/drivers/freedreno/a3xx/fd3_emit.h index 50559d10d22..1b4774d83f9 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.h +++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.h @@ -62,28 +62,4 @@ void fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, uint32_t dirty, bool binning); void fd3_emit_restore(struct fd_context *ctx); - -/* use RMW (read-modify-write) to update RB_RENDER_CONTROL since the - * GMEM/binning code is deciding on the bin-width (and whether to - * use binning) after the draw/clear state is emitted. - */ - -#define RBRC_DRAW_STATE (A3XX_RB_RENDER_CONTROL_ALPHA_TEST | \ - A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC__MASK) - -static inline void -fd3_emit_rbrc_draw_state(struct fd_context *ctx, - struct fd_ringbuffer *ring, uint32_t val) -{ - assert(!(val & ~RBRC_DRAW_STATE)); - if (val != ctx->rmw.rbrc_draw) { - fd_rmw_wfi(ctx, ring); - OUT_PKT3(ring, CP_REG_RMW, 3); - OUT_RING(ring, REG_A3XX_RB_RENDER_CONTROL); - OUT_RING(ring, ~RBRC_DRAW_STATE); - OUT_RING(ring, val); - ctx->rmw.rbrc_draw = val; - } -} - #endif /* FD3_EMIT_H */ diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c index 8720e087b7b..a08b482fafb 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c @@ -544,6 +544,18 @@ patch_draws(struct fd_context *ctx, enum pc_di_vis_cull_mode vismode) util_dynarray_resize(&ctx->draw_patches, 0); } +static void +patch_rbrc(struct fd_context *ctx, uint32_t val) +{ + struct fd3_context *fd3_ctx = fd3_context(ctx); + unsigned i; + for (i = 0; i < fd_patch_num_elements(&fd3_ctx->rbrc_patches); i++) { + struct fd_cs_patch *patch = fd_patch_element(&fd3_ctx->rbrc_patches, i); + *patch->cs = patch->val | val; + } + util_dynarray_resize(&fd3_ctx->rbrc_patches, 0); +} + /* for rendering directly to system memory: */ static void fd3_emit_sysmem_prep(struct fd_context *ctx) @@ -563,10 +575,6 @@ fd3_emit_sysmem_prep(struct fd_context *ctx) emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL, 0); - OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1); - OUT_RING(ring, A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER) | - A3XX_RB_RENDER_CONTROL_BIN_WIDTH(pitch)); - /* setup scissor/offset for current tile: */ OUT_PKT0(ring, REG_A3XX_RB_WINDOW_OFFSET, 1); OUT_RING(ring, A3XX_RB_WINDOW_OFFSET_X(0) | @@ -584,6 +592,7 @@ fd3_emit_sysmem_prep(struct fd_context *ctx) A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE); patch_draws(ctx, IGNORE_VISIBILITY); + patch_rbrc(ctx, A3XX_RB_RENDER_CONTROL_BIN_WIDTH(pitch)); } static void @@ -757,6 +766,9 @@ fd3_emit_tile_init(struct fd_context *ctx) } else { patch_draws(ctx, IGNORE_VISIBILITY); } + + patch_rbrc(ctx, A3XX_RB_RENDER_CONTROL_ENABLE_GMEM | + A3XX_RB_RENDER_CONTROL_BIN_WIDTH(gmem->bin_w)); } /* before mem2gmem */ @@ -837,11 +849,6 @@ fd3_emit_tile_renderprep(struct fd_context *ctx, struct fd_tile *tile) emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL, gmem->bin_w); - OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1); - OUT_RING(ring, A3XX_RB_RENDER_CONTROL_ENABLE_GMEM | - A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER) | - A3XX_RB_RENDER_CONTROL_BIN_WIDTH(gmem->bin_w)); - /* setup scissor/offset for current tile: */ OUT_PKT0(ring, REG_A3XX_RB_WINDOW_OFFSET, 1); OUT_RING(ring, A3XX_RB_WINDOW_OFFSET_X(tile->xoff) | diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h index a0227e49c03..5373de6c638 100644 --- a/src/gallium/drivers/freedreno/freedreno_context.h +++ b/src/gallium/drivers/freedreno/freedreno_context.h @@ -159,16 +159,7 @@ struct fd_context { /* Keep track if WAIT_FOR_IDLE is needed for registers we need * to update via RMW: */ - struct { - bool need_wfi; - /* note: would be nicer to have in fd3_context, fd2_context, - * etc, because the registered modified via RMR differ across - * generation. But as long as it is a small set of registers - * that might be more hassle than it's worth. - */ - /* state for RB_RENDER_CONTROL: */ - uint32_t rbrc_draw; - } rmw; + bool rmw_needs_wfi; /* Keep track of DRAW initiators that need to be patched up depending * on whether we using binning or not: @@ -277,17 +268,16 @@ fd_supported_prim(struct fd_context *ctx, unsigned prim) static INLINE void fd_reset_rmw_state(struct fd_context *ctx) { - ctx->rmw.need_wfi = true; - ctx->rmw.rbrc_draw = ~0; + ctx->rmw_needs_wfi = true; } /* emit before a RMW a WAIT_FOR_IDLE only if needed: */ static inline void fd_rmw_wfi(struct fd_context *ctx, struct fd_ringbuffer *ring) { - if (ctx->rmw.need_wfi) { + if (ctx->rmw_needs_wfi) { OUT_WFI(ring); - ctx->rmw.need_wfi = false; + ctx->rmw_needs_wfi = false; } } diff --git a/src/gallium/drivers/freedreno/freedreno_draw.h b/src/gallium/drivers/freedreno/freedreno_draw.h index e8bb420889e..608d071989d 100644 --- a/src/gallium/drivers/freedreno/freedreno_draw.h +++ b/src/gallium/drivers/freedreno/freedreno_draw.h @@ -95,7 +95,7 @@ fd_draw(struct fd_context *ctx, struct fd_ringbuffer *ring, emit_marker(ring, 7); - ctx->rmw.need_wfi = true; + ctx->rmw_needs_wfi = true; } #endif /* FREEDRENO_DRAW_H_ */