From dc00ec154bda15672861d1b508aa4aacdb306f68 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Sat, 1 Feb 2014 10:53:00 -0500 Subject: [PATCH] freedreno: better manage our WFI's Updates to non-banked registers, CP_LOAD_STATE, etc, need a WFI if there is potentially pending rendering. Track this better, and add fd_wfi() calls everywhere that might potentially need CP_WAIT_FOR_IDLE. Signed-off-by: Rob Clark --- src/gallium/drivers/freedreno/a3xx/fd3_draw.c | 1 + src/gallium/drivers/freedreno/a3xx/fd3_emit.c | 19 ++++++++----------- src/gallium/drivers/freedreno/a3xx/fd3_gmem.c | 14 +++++++++++--- .../drivers/freedreno/freedreno_context.c | 2 +- .../drivers/freedreno/freedreno_context.h | 16 +++++++++------- .../drivers/freedreno/freedreno_draw.h | 2 +- .../drivers/freedreno/freedreno_gmem.c | 6 +++++- 7 files changed, 36 insertions(+), 24 deletions(-) diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c index be710d16013..83024c11a8b 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c @@ -249,6 +249,7 @@ fd3_clear(struct fd_context *ctx, unsigned buffers, { .prsc = fd3_ctx->solid_vbuf, .stride = 12, .format = PIPE_FORMAT_R32G32B32_FLOAT }, }, 1); + fd_wfi(ctx, ring); fd3_emit_constant(ring, SB_FRAG_SHADER, 0, 0, 4, color->ui, NULL); OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1); diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c index 3ca49ff949b..a364fbfcd15 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c @@ -64,15 +64,6 @@ fd3_emit_constant(struct fd_ringbuffer *ring, src = SS_DIRECT; } - /* we have this sometimes, not others.. perhaps we could be clever - * and figure out actually when we need to invalidate cache: - */ - OUT_PKT0(ring, REG_A3XX_UCHE_CACHE_INVALIDATE0_REG, 2); - OUT_RING(ring, A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR(0)); - OUT_RING(ring, A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR(0) | - A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE(INVALIDATE) | - A3XX_UCHE_CACHE_INVALIDATE1_REG_ENTIRE_CACHE); - OUT_PKT3(ring, CP_LOAD_STATE, 2 + sz); OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/2) | CP_LOAD_STATE_0_STATE_SRC(src) | @@ -458,8 +449,10 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(ctx->viewport.scale[2])); } - if (dirty & FD_DIRTY_PROG) + if (dirty & FD_DIRTY_PROG) { + fd_wfi(ctx, ring); fd3_program_emit(ring, prog, binning); + } OUT_PKT3(ring, CP_EVENT_WRITE, 1); OUT_RING(ring, HLSQ_FLUSH); @@ -467,6 +460,7 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, if ((dirty & (FD_DIRTY_PROG | FD_DIRTY_CONSTBUF)) && /* evil hack to deal sanely with clear path: */ (prog == &ctx->prog)) { + fd_wfi(ctx, ring); emit_constants(ring, SB_VERT_SHADER, &ctx->constbuf[PIPE_SHADER_VERTEX], (prog->dirty & FD_SHADER_DIRTY_VP) ? prog->vp : NULL); @@ -501,6 +495,9 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, A3XX_RB_BLEND_ALPHA_FLOAT(bcolor->color[3])); } + if (dirty & (FD_DIRTY_VERTTEX | FD_DIRTY_FRAGTEX)) + fd_wfi(ctx, ring); + if (dirty & FD_DIRTY_VERTTEX) emit_textures(ring, SB_VERT_TEX, &ctx->verttex); @@ -638,5 +635,5 @@ fd3_emit_restore(struct fd_context *ctx) OUT_RING(ring, 0x00000000); emit_cache_flush(ring); - fd_rmw_wfi(ctx, ring); + fd_wfi(ctx, ring); } diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c index a4845446633..2eb20247bb8 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c @@ -146,6 +146,7 @@ emit_binning_workaround(struct fd_context *ctx) A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) | A3XX_GRAS_SC_CONTROL_RASTER_MODE(1)); + fd_wfi(ctx, ring); fd3_program_emit(ring, &ctx->solid_prog, false); fd3_emit_vertex_bufs(ring, &ctx->solid_prog, (struct fd3_vertex_buf[]) { @@ -237,6 +238,7 @@ emit_binning_workaround(struct fd_context *ctx) OUT_RING(ring, 2); /* NumIndices */ OUT_RING(ring, 2); OUT_RING(ring, 1); + fd_reset_wfi(ctx); OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 1); OUT_RING(ring, A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(TWO_QUADS)); @@ -244,8 +246,7 @@ emit_binning_workaround(struct fd_context *ctx) OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1); OUT_RING(ring, 0x00000000); - OUT_WFI(ring); - + fd_wfi(ctx, ring); OUT_PKT0(ring, REG_A3XX_VSC_BIN_SIZE, 1); OUT_RING(ring, A3XX_VSC_BIN_SIZE_WIDTH(gmem->bin_w) | A3XX_VSC_BIN_SIZE_HEIGHT(gmem->bin_h)); @@ -363,6 +364,7 @@ fd3_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile) OUT_RING(ring, 0); /* VFD_INSTANCEID_OFFSET */ OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */ + fd_wfi(ctx, ring); fd3_program_emit(ring, &ctx->solid_prog, false); fd3_emit_vertex_bufs(ring, &ctx->solid_prog, (struct fd3_vertex_buf[]) { @@ -403,6 +405,7 @@ emit_mem2gmem_surf(struct fd_context *ctx, uint32_t base, emit_mrt(ring, 1, &psurf, &base, bin_w); + fd_wfi(ctx, ring); fd3_emit_gmem_restore_tex(ring, psurf); fd_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY, @@ -508,6 +511,7 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile) OUT_RING(ring, 0); /* VFD_INSTANCEID_OFFSET */ OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */ + fd_wfi(ctx, ring); fd3_program_emit(ring, &ctx->blit_prog, false); fd3_emit_vertex_bufs(ring, &ctx->blit_prog, (struct fd3_vertex_buf[]) { @@ -685,6 +689,9 @@ emit_binning_pass(struct fd_context *ctx) /* emit IB to binning drawcmds: */ OUT_IB(ring, ctx->binning_start, ctx->binning_end); + fd_reset_wfi(ctx); + + fd_wfi(ctx, ring); /* and then put stuff back the way it was: */ @@ -722,6 +729,7 @@ emit_binning_pass(struct fd_context *ctx) OUT_RING(ring, DRAW(1, DI_SRC_SEL_AUTO_INDEX, INDEX_SIZE_IGN, IGNORE_VISIBILITY)); OUT_RING(ring, 0); /* NumIndices */ + fd_reset_wfi(ctx); } OUT_PKT3(ring, CP_NOP, 4); @@ -730,7 +738,7 @@ emit_binning_pass(struct fd_context *ctx) OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); - OUT_WFI(ring); + fd_wfi(ctx, ring); if (ctx->screen->gpu_id == 320) { emit_binning_workaround(ctx); diff --git a/src/gallium/drivers/freedreno/freedreno_context.c b/src/gallium/drivers/freedreno/freedreno_context.c index 33abb317045..f0485d8175e 100644 --- a/src/gallium/drivers/freedreno/freedreno_context.c +++ b/src/gallium/drivers/freedreno/freedreno_context.c @@ -206,7 +206,7 @@ fd_context_init(struct fd_context *ctx, struct pipe_screen *pscreen, } fd_context_next_rb(pctx); - fd_reset_rmw_state(ctx); + fd_reset_wfi(ctx); util_dynarray_init(&ctx->draw_patches); diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h index 0364d0f94c6..ef83048545d 100644 --- a/src/gallium/drivers/freedreno/freedreno_context.h +++ b/src/gallium/drivers/freedreno/freedreno_context.h @@ -168,7 +168,7 @@ struct fd_context { /* Keep track if WAIT_FOR_IDLE is needed for registers we need * to update via RMW: */ - bool rmw_needs_wfi; + bool needs_wfi; /* Keep track of DRAW initiators that need to be patched up depending * on whether we using binning or not: @@ -275,18 +275,20 @@ fd_supported_prim(struct fd_context *ctx, unsigned prim) } static INLINE void -fd_reset_rmw_state(struct fd_context *ctx) +fd_reset_wfi(struct fd_context *ctx) { - ctx->rmw_needs_wfi = true; + ctx->needs_wfi = true; } -/* emit before a RMW a WAIT_FOR_IDLE only if needed: */ +/* emit a WAIT_FOR_IDLE only if needed, ie. if there has not already + * been one since last draw: + */ static inline void -fd_rmw_wfi(struct fd_context *ctx, struct fd_ringbuffer *ring) +fd_wfi(struct fd_context *ctx, struct fd_ringbuffer *ring) { - if (ctx->rmw_needs_wfi) { + if (ctx->needs_wfi) { OUT_WFI(ring); - ctx->rmw_needs_wfi = false; + ctx->needs_wfi = false; } } diff --git a/src/gallium/drivers/freedreno/freedreno_draw.h b/src/gallium/drivers/freedreno/freedreno_draw.h index 608d071989d..fe1c548b12e 100644 --- a/src/gallium/drivers/freedreno/freedreno_draw.h +++ b/src/gallium/drivers/freedreno/freedreno_draw.h @@ -95,7 +95,7 @@ fd_draw(struct fd_context *ctx, struct fd_ringbuffer *ring, emit_marker(ring, 7); - ctx->rmw_needs_wfi = true; + fd_reset_wfi(ctx); } #endif /* FREEDRENO_DRAW_H_ */ diff --git a/src/gallium/drivers/freedreno/freedreno_gmem.c b/src/gallium/drivers/freedreno/freedreno_gmem.c index 6a55aa4c133..80cf7c89c6f 100644 --- a/src/gallium/drivers/freedreno/freedreno_gmem.c +++ b/src/gallium/drivers/freedreno/freedreno_gmem.c @@ -278,6 +278,7 @@ render_tiles(struct fd_context *ctx) /* emit IB to drawcmds: */ OUT_IB(ctx->ring, ctx->draw_start, ctx->draw_end); + fd_reset_wfi(ctx); /* emit gmem2mem to transfer tile back to system memory: */ ctx->emit_tile_gmem2mem(ctx, tile); @@ -291,6 +292,7 @@ render_sysmem(struct fd_context *ctx) /* emit IB to drawcmds: */ OUT_IB(ctx->ring, ctx->draw_start, ctx->draw_end); + fd_reset_wfi(ctx); } void @@ -314,6 +316,8 @@ fd_gmem_render_tiles(struct pipe_context *pctx) fd_ringmarker_mark(ctx->draw_end); fd_ringmarker_mark(ctx->binning_end); + fd_reset_wfi(ctx); + ctx->stats.batch_total++; if (sysmem) { @@ -339,7 +343,7 @@ fd_gmem_render_tiles(struct pipe_context *pctx) fd_ringmarker_mark(ctx->draw_start); fd_ringmarker_mark(ctx->binning_start); - fd_reset_rmw_state(ctx); + fd_reset_wfi(ctx); /* update timestamps on render targets: */ timestamp = fd_ringbuffer_timestamp(ctx->ring); -- 2.30.2