From c63450e8298a34aa0f4077a846b5b0467cdeb567 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Fri, 13 Jun 2014 17:39:59 -0400 Subject: [PATCH] freedreno/a3xx: WFI fixes/cleanup Blob driver seems to need WFI in some cases after CP_EVENT_WRITE, implying that this is asynchronous and should reset needs_wfi. Also, CP_INVALIDATE_STATE seems to need WFI. But CP_LOAD_STATE does not. The blob driver also puts WFIs before writing GRAS_CL_VPORT registers. The latter may be a work-around, as these registers should be banked/ context registers. I haven't yet found a lockup that this averts, but I expect viewport to change infrequently so out of paranoia I will keep these for now. Signed-off-by: Rob Clark --- src/gallium/drivers/freedreno/a3xx/fd3_draw.c | 8 ++-- src/gallium/drivers/freedreno/a3xx/fd3_emit.c | 45 +++++++------------ src/gallium/drivers/freedreno/a3xx/fd3_gmem.c | 22 ++++----- .../drivers/freedreno/a3xx/fd3_query.c | 3 +- .../drivers/freedreno/freedreno_context.h | 11 +++++ 5 files changed, 41 insertions(+), 48 deletions(-) diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c index f822aa728fe..4b2d94103f5 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c @@ -154,8 +154,7 @@ fd3_clear_binning(struct fd_context *ctx, unsigned dirty) OUT_PKT0(ring, REG_A3XX_PC_RESTART_INDEX, 1); OUT_RING(ring, 0xffffffff); /* PC_RESTART_INDEX */ - OUT_PKT3(ring, CP_EVENT_WRITE, 1); - OUT_RING(ring, PERFCOUNTER_STOP); + fd_event_write(ctx, ring, PERFCOUNTER_STOP); fd_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY, DI_SRC_SEL_AUTO_INDEX, 2, INDEX_SIZE_IGN, 0, 0, NULL); @@ -195,6 +194,7 @@ fd3_clear(struct fd_context *ctx, unsigned buffers, A3XX_RB_DEPTH_CONTROL_Z_ENABLE | A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_ALWAYS)); + fd_wfi(ctx, ring); OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_ZOFFSET, 2); OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZOFFSET(0.0)); OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(depth)); @@ -276,7 +276,6 @@ fd3_clear(struct fd_context *ctx, unsigned buffers, .format = PIPE_FORMAT_R32G32B32_FLOAT, }}, 1); - fd_wfi(ctx, ring); fd3_emit_constant(ring, SB_FRAG_SHADER, 0, 0, 4, color->ui, NULL); OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1); @@ -292,8 +291,7 @@ fd3_clear(struct fd_context *ctx, unsigned buffers, OUT_PKT0(ring, REG_A3XX_PC_RESTART_INDEX, 1); OUT_RING(ring, 0xffffffff); /* PC_RESTART_INDEX */ - OUT_PKT3(ring, CP_EVENT_WRITE, 1); - OUT_RING(ring, PERFCOUNTER_STOP); + fd_event_write(ctx, ring, PERFCOUNTER_STOP); fd_draw(ctx, ring, DI_PT_RECTLIST, USE_VISIBILITY, DI_SRC_SEL_AUTO_INDEX, 2, INDEX_SIZE_IGN, 0, 0, NULL); diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c index 878d6ff6325..f77c7226b5b 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c @@ -234,26 +234,6 @@ emit_textures(struct fd_ringbuffer *ring, } } -static void -emit_cache_flush(struct fd_ringbuffer *ring) -{ - OUT_PKT3(ring, CP_EVENT_WRITE, 1); - OUT_RING(ring, CACHE_FLUSH); - - /* probably only really needed on a320: */ - OUT_PKT3(ring, CP_DRAW_INDX, 3); - OUT_RING(ring, 0x00000000); - OUT_RING(ring, DRAW(1, DI_SRC_SEL_AUTO_INDEX, - INDEX_SIZE_IGN, IGNORE_VISIBILITY)); - OUT_RING(ring, 0); /* NumIndices */ - - OUT_PKT3(ring, CP_NOP, 4); - OUT_RING(ring, 0x00000000); - OUT_RING(ring, 0x00000000); - OUT_RING(ring, 0x00000000); - OUT_RING(ring, 0x00000000); -} - /* emit texture state for mem->gmem restore operation.. eventually it would * be good to get rid of this and use normal CSO/etc state for more of these * special cases, but for now the compiler is not sufficient.. @@ -492,6 +472,7 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, } if (dirty & FD_DIRTY_VIEWPORT) { + fd_wfi(ctx, ring); OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_XOFFSET, 6); OUT_RING(ring, A3XX_GRAS_CL_VPORT_XOFFSET(ctx->viewport.translate[0] - 0.5)); OUT_RING(ring, A3XX_GRAS_CL_VPORT_XSCALE(ctx->viewport.scale[0])); @@ -502,17 +483,12 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, } if (dirty & FD_DIRTY_PROG) { - fd_wfi(ctx, ring); fd3_program_emit(ring, prog, key); } - OUT_PKT3(ring, CP_EVENT_WRITE, 1); - OUT_RING(ring, HLSQ_FLUSH); - if ((dirty & (FD_DIRTY_PROG | FD_DIRTY_CONSTBUF)) && /* evil hack to deal sanely with clear path: */ (prog == &ctx->prog)) { - fd_wfi(ctx, ring); emit_constants(ring, SB_VERT_SHADER, &ctx->constbuf[PIPE_SHADER_VERTEX], (prog->dirty & FD_SHADER_DIRTY_VP) ? vp : NULL); @@ -549,8 +525,6 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, A3XX_RB_BLEND_ALPHA_FLOAT(bcolor->color[3])); } - if (dirty & (FD_DIRTY_VERTTEX | FD_DIRTY_FRAGTEX)) - fd_wfi(ctx, ring); if (dirty & FD_DIRTY_VERTTEX) { if (vp->has_samp) @@ -586,6 +560,7 @@ fd3_emit_restore(struct fd_context *ctx) OUT_RING(ring, 0x00000000); } + fd_wfi(ctx, ring); OUT_PKT3(ring, CP_INVALIDATE_STATE, 1); OUT_RING(ring, 0x00007fff); @@ -696,7 +671,21 @@ fd3_emit_restore(struct fd_context *ctx) OUT_PKT0(ring, REG_A3XX_PC_VSTREAM_CONTROL, 1); OUT_RING(ring, 0x00000000); - emit_cache_flush(ring); + fd_event_write(ctx, ring, CACHE_FLUSH); + + /* probably only really needed on a320: */ + OUT_PKT3(ring, CP_DRAW_INDX, 3); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, DRAW(1, DI_SRC_SEL_AUTO_INDEX, + INDEX_SIZE_IGN, IGNORE_VISIBILITY)); + OUT_RING(ring, 0); /* NumIndices */ + + OUT_PKT3(ring, CP_NOP, 4); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + fd_wfi(ctx, ring); ctx->needs_rb_fbd = true; diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c index 0c9dd996ab6..8519a90ccfa 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c @@ -177,7 +177,6 @@ emit_binning_workaround(struct fd_context *ctx) A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) | A3XX_GRAS_SC_CONTROL_RASTER_MODE(1)); - fd_wfi(ctx, ring); fd3_program_emit(ring, &ctx->solid_prog, key); fd3_emit_vertex_bufs(ring, fd3_shader_variant(ctx->solid_prog.vp, key), (struct fd3_vertex_buf[]) {{ @@ -245,6 +244,7 @@ emit_binning_workaround(struct fd_context *ctx) OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X(31) | A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(0)); + fd_wfi(ctx, ring); OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_XOFFSET, 6); OUT_RING(ring, A3XX_GRAS_CL_VPORT_XOFFSET(0.0)); OUT_RING(ring, A3XX_GRAS_CL_VPORT_XSCALE(1.0)); @@ -356,6 +356,7 @@ fd3_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile) OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1); OUT_RING(ring, 0x00000000); /* GRAS_CL_CLIP_CNTL */ + fd_wfi(ctx, ring); OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_XOFFSET, 6); OUT_RING(ring, A3XX_GRAS_CL_VPORT_XOFFSET((float)pfb->width/2.0 - 0.5)); OUT_RING(ring, A3XX_GRAS_CL_VPORT_XSCALE((float)pfb->width/2.0)); @@ -397,7 +398,6 @@ fd3_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile) OUT_RING(ring, 0); /* VFD_INSTANCEID_OFFSET */ OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */ - fd_wfi(ctx, ring); fd3_program_emit(ring, &ctx->solid_prog, key); fd3_emit_vertex_bufs(ring, fd3_shader_variant(ctx->solid_prog.vp, key), (struct fd3_vertex_buf[]) {{ @@ -435,7 +435,6 @@ emit_mem2gmem_surf(struct fd_context *ctx, uint32_t base, emit_mrt(ring, 1, &psurf, &base, bin_w); - fd_wfi(ctx, ring); fd3_emit_gmem_restore_tex(ring, psurf); fd_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY, @@ -487,12 +486,14 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile) OUT_RING(ring, A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_ALWAYS) | A3XX_RB_RENDER_CONTROL_BIN_WIDTH(gmem->bin_w)); + fd_wfi(ctx, ring); OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1); OUT_RING(ring, A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_LESS)); OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1); OUT_RING(ring, A3XX_GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER); /* GRAS_CL_CLIP_CNTL */ + fd_wfi(ctx, ring); OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_XOFFSET, 6); OUT_RING(ring, A3XX_GRAS_CL_VPORT_XOFFSET((float)bin_w/2.0 - 0.5)); OUT_RING(ring, A3XX_GRAS_CL_VPORT_XSCALE((float)bin_w/2.0)); @@ -541,7 +542,6 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile) OUT_RING(ring, 0); /* VFD_INSTANCEID_OFFSET */ OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */ - fd_wfi(ctx, ring); fd3_program_emit(ring, &ctx->blit_prog, key); fd3_emit_vertex_bufs(ring, fd3_shader_variant(ctx->blit_prog.vp, key), (struct fd3_vertex_buf[]) {{ @@ -677,7 +677,7 @@ emit_binning_pass(struct fd_context *ctx) if (ctx->screen->gpu_id == 320) { emit_binning_workaround(ctx); - + fd_wfi(ctx, ring); OUT_PKT3(ring, CP_INVALIDATE_STATE, 1); OUT_RING(ring, 0x00007fff); } @@ -760,8 +760,8 @@ emit_binning_pass(struct fd_context *ctx) A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER) | A3XX_RB_RENDER_CONTROL_BIN_WIDTH(gmem->bin_w)); - OUT_PKT3(ring, CP_EVENT_WRITE, 1); - OUT_RING(ring, CACHE_FLUSH); + fd_event_write(ctx, ring, CACHE_FLUSH); + fd_wfi(ctx, ring); if (ctx->screen->gpu_id == 320) { /* dummy-draw workaround: */ @@ -877,17 +877,13 @@ fd3_emit_tile_renderprep(struct fd_context *ctx, struct fd_tile *tile) assert(pipe->w * pipe->h); - OUT_PKT3(ring, CP_EVENT_WRITE, 1); - OUT_RING(ring, HLSQ_FLUSH); - - OUT_WFI(ring); + fd_event_write(ctx, ring, HLSQ_FLUSH); + fd_wfi(ctx, ring); OUT_PKT0(ring, REG_A3XX_PC_VSTREAM_CONTROL, 1); OUT_RING(ring, A3XX_PC_VSTREAM_CONTROL_SIZE(pipe->w * pipe->h) | A3XX_PC_VSTREAM_CONTROL_N(tile->n)); - OUT_PKT3(ring, CP_EVENT_WRITE, 1); - OUT_RING(ring, CACHE_FLUSH); OUT_PKT3(ring, CP_SET_BIN_DATA, 2); OUT_RELOC(ring, pipe->bo, 0, 0, 0); /* BIN_DATA_ADDR <- VSC_PIPE[p].DATA_ADDRESS */ diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_query.c b/src/gallium/drivers/freedreno/a3xx/fd3_query.c index 77ae8b6b1d1..34e0b5adbd1 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_query.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_query.c @@ -68,8 +68,7 @@ occlusion_get_sample(struct fd_context *ctx, struct fd_ringbuffer *ring) INDEX_SIZE_IGN, USE_VISIBILITY)); OUT_RING(ring, 0); /* NumIndices */ - OUT_PKT3(ring, CP_EVENT_WRITE, 1); - OUT_RING(ring, ZPASS_DONE); + fd_event_write(ctx, ring, ZPASS_DONE); OUT_PKT0(ring, REG_A3XX_RBBM_PERFCTR_CTL, 1); OUT_RING(ring, A3XX_RBBM_PERFCTR_CTL_ENABLE); diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h index 46984823427..0051c9ddcab 100644 --- a/src/gallium/drivers/freedreno/freedreno_context.h +++ b/src/gallium/drivers/freedreno/freedreno_context.h @@ -363,6 +363,17 @@ fd_wfi(struct fd_context *ctx, struct fd_ringbuffer *ring) } } +/* emit a CP_EVENT_WRITE: + */ +static inline void +fd_event_write(struct fd_context *ctx, struct fd_ringbuffer *ring, + enum vgt_event_type evt) +{ + OUT_PKT3(ring, CP_EVENT_WRITE, 1); + OUT_RING(ring, evt); + fd_reset_wfi(ctx); +} + struct pipe_context * fd_context_init(struct fd_context *ctx, struct pipe_screen *pscreen, const uint8_t *primtypes, void *priv); -- 2.30.2