freedreno: better manage our WFI's
authorRob Clark <robclark@freedesktop.org>
Sat, 1 Feb 2014 15:53:00 +0000 (10:53 -0500)
committerRob Clark <robclark@freedesktop.org>
Sat, 1 Feb 2014 17:10:17 +0000 (12:10 -0500)
Updates to non-banked registers, CP_LOAD_STATE, etc, need a WFI if there
is potentially pending rendering.  Track this better, and add fd_wfi()
calls everywhere that might potentially need CP_WAIT_FOR_IDLE.

Signed-off-by: Rob Clark <robclark@freedesktop.org>
src/gallium/drivers/freedreno/a3xx/fd3_draw.c
src/gallium/drivers/freedreno/a3xx/fd3_emit.c
src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
src/gallium/drivers/freedreno/freedreno_context.c
src/gallium/drivers/freedreno/freedreno_context.h
src/gallium/drivers/freedreno/freedreno_draw.h
src/gallium/drivers/freedreno/freedreno_gmem.c

index be710d16013179a4dd9c4eac7095659c594a2a20..83024c11a8b3856446ddd68bdd83c7b96b980da4 100644 (file)
@@ -249,6 +249,7 @@ fd3_clear(struct fd_context *ctx, unsigned buffers,
                        { .prsc = fd3_ctx->solid_vbuf, .stride = 12, .format = PIPE_FORMAT_R32G32B32_FLOAT },
                }, 1);
 
+       fd_wfi(ctx, ring);
        fd3_emit_constant(ring, SB_FRAG_SHADER, 0, 0, 4, color->ui, NULL);
 
        OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1);
index 3ca49ff949b2d820d2bd08a386471a58bc2c715a..a364fbfcd15572cbb55f5e803887e5747a68d5a9 100644 (file)
@@ -64,15 +64,6 @@ fd3_emit_constant(struct fd_ringbuffer *ring,
                src = SS_DIRECT;
        }
 
-       /* we have this sometimes, not others.. perhaps we could be clever
-        * and figure out actually when we need to invalidate cache:
-        */
-       OUT_PKT0(ring, REG_A3XX_UCHE_CACHE_INVALIDATE0_REG, 2);
-       OUT_RING(ring, A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR(0));
-       OUT_RING(ring, A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR(0) |
-                       A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE(INVALIDATE) |
-                       A3XX_UCHE_CACHE_INVALIDATE1_REG_ENTIRE_CACHE);
-
        OUT_PKT3(ring, CP_LOAD_STATE, 2 + sz);
        OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/2) |
                        CP_LOAD_STATE_0_STATE_SRC(src) |
@@ -458,8 +449,10 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
                OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(ctx->viewport.scale[2]));
        }
 
-       if (dirty & FD_DIRTY_PROG)
+       if (dirty & FD_DIRTY_PROG) {
+               fd_wfi(ctx, ring);
                fd3_program_emit(ring, prog, binning);
+       }
 
        OUT_PKT3(ring, CP_EVENT_WRITE, 1);
        OUT_RING(ring, HLSQ_FLUSH);
@@ -467,6 +460,7 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
        if ((dirty & (FD_DIRTY_PROG | FD_DIRTY_CONSTBUF)) &&
                        /* evil hack to deal sanely with clear path: */
                        (prog == &ctx->prog)) {
+               fd_wfi(ctx, ring);
                emit_constants(ring,  SB_VERT_SHADER,
                                &ctx->constbuf[PIPE_SHADER_VERTEX],
                                (prog->dirty & FD_SHADER_DIRTY_VP) ? prog->vp : NULL);
@@ -501,6 +495,9 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
                                A3XX_RB_BLEND_ALPHA_FLOAT(bcolor->color[3]));
        }
 
+       if (dirty & (FD_DIRTY_VERTTEX | FD_DIRTY_FRAGTEX))
+               fd_wfi(ctx, ring);
+
        if (dirty & FD_DIRTY_VERTTEX)
                emit_textures(ring, SB_VERT_TEX, &ctx->verttex);
 
@@ -638,5 +635,5 @@ fd3_emit_restore(struct fd_context *ctx)
        OUT_RING(ring, 0x00000000);
 
        emit_cache_flush(ring);
-       fd_rmw_wfi(ctx, ring);
+       fd_wfi(ctx, ring);
 }
index a48454466338bd60c7c8e46eee060aba668caa2b..2eb20247bb8a0e4dc235d6973be7a61ec834049d 100644 (file)
@@ -146,6 +146,7 @@ emit_binning_workaround(struct fd_context *ctx)
                        A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
                        A3XX_GRAS_SC_CONTROL_RASTER_MODE(1));
 
+       fd_wfi(ctx, ring);
        fd3_program_emit(ring, &ctx->solid_prog, false);
 
        fd3_emit_vertex_bufs(ring, &ctx->solid_prog, (struct fd3_vertex_buf[]) {
@@ -237,6 +238,7 @@ emit_binning_workaround(struct fd_context *ctx)
        OUT_RING(ring, 2);            /* NumIndices */
        OUT_RING(ring, 2);
        OUT_RING(ring, 1);
+       fd_reset_wfi(ctx);
 
        OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 1);
        OUT_RING(ring, A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(TWO_QUADS));
@@ -244,8 +246,7 @@ emit_binning_workaround(struct fd_context *ctx)
        OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1);
        OUT_RING(ring, 0x00000000);
 
-       OUT_WFI(ring);
-
+       fd_wfi(ctx, ring);
        OUT_PKT0(ring, REG_A3XX_VSC_BIN_SIZE, 1);
        OUT_RING(ring, A3XX_VSC_BIN_SIZE_WIDTH(gmem->bin_w) |
                        A3XX_VSC_BIN_SIZE_HEIGHT(gmem->bin_h));
@@ -363,6 +364,7 @@ fd3_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile)
        OUT_RING(ring, 0);            /* VFD_INSTANCEID_OFFSET */
        OUT_RING(ring, 0);            /* VFD_INDEX_OFFSET */
 
+       fd_wfi(ctx, ring);
        fd3_program_emit(ring, &ctx->solid_prog, false);
 
        fd3_emit_vertex_bufs(ring, &ctx->solid_prog, (struct fd3_vertex_buf[]) {
@@ -403,6 +405,7 @@ emit_mem2gmem_surf(struct fd_context *ctx, uint32_t base,
 
        emit_mrt(ring, 1, &psurf, &base, bin_w);
 
+       fd_wfi(ctx, ring);
        fd3_emit_gmem_restore_tex(ring, psurf);
 
        fd_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
@@ -508,6 +511,7 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
        OUT_RING(ring, 0);            /* VFD_INSTANCEID_OFFSET */
        OUT_RING(ring, 0);            /* VFD_INDEX_OFFSET */
 
+       fd_wfi(ctx, ring);
        fd3_program_emit(ring, &ctx->blit_prog, false);
 
        fd3_emit_vertex_bufs(ring, &ctx->blit_prog, (struct fd3_vertex_buf[]) {
@@ -685,6 +689,9 @@ emit_binning_pass(struct fd_context *ctx)
 
        /* emit IB to binning drawcmds: */
        OUT_IB(ring, ctx->binning_start, ctx->binning_end);
+       fd_reset_wfi(ctx);
+
+       fd_wfi(ctx, ring);
 
        /* and then put stuff back the way it was: */
 
@@ -722,6 +729,7 @@ emit_binning_pass(struct fd_context *ctx)
                OUT_RING(ring, DRAW(1, DI_SRC_SEL_AUTO_INDEX,
                                INDEX_SIZE_IGN, IGNORE_VISIBILITY));
                OUT_RING(ring, 0);             /* NumIndices */
+               fd_reset_wfi(ctx);
        }
 
        OUT_PKT3(ring, CP_NOP, 4);
@@ -730,7 +738,7 @@ emit_binning_pass(struct fd_context *ctx)
        OUT_RING(ring, 0x00000000);
        OUT_RING(ring, 0x00000000);
 
-       OUT_WFI(ring);
+       fd_wfi(ctx, ring);
 
        if (ctx->screen->gpu_id == 320) {
                emit_binning_workaround(ctx);
index 33abb317045bc22fb2d1c5a6468c52b7758eb885..f0485d8175e95a00b08de83cfdd74bec96293fb6 100644 (file)
@@ -206,7 +206,7 @@ fd_context_init(struct fd_context *ctx, struct pipe_screen *pscreen,
        }
 
        fd_context_next_rb(pctx);
-       fd_reset_rmw_state(ctx);
+       fd_reset_wfi(ctx);
 
        util_dynarray_init(&ctx->draw_patches);
 
index 0364d0f94c61cb50902df47815db3bf1b481ec54..ef83048545df5d11328a64e8374f0349ce681be1 100644 (file)
@@ -168,7 +168,7 @@ struct fd_context {
        /* Keep track if WAIT_FOR_IDLE is needed for registers we need
         * to update via RMW:
         */
-       bool rmw_needs_wfi;
+       bool needs_wfi;
 
        /* Keep track of DRAW initiators that need to be patched up depending
         * on whether we using binning or not:
@@ -275,18 +275,20 @@ fd_supported_prim(struct fd_context *ctx, unsigned prim)
 }
 
 static INLINE void
-fd_reset_rmw_state(struct fd_context *ctx)
+fd_reset_wfi(struct fd_context *ctx)
 {
-       ctx->rmw_needs_wfi = true;
+       ctx->needs_wfi = true;
 }
 
-/* emit before a RMW a WAIT_FOR_IDLE only if needed: */
+/* emit a WAIT_FOR_IDLE only if needed, ie. if there has not already
+ * been one since last draw:
+ */
 static inline void
-fd_rmw_wfi(struct fd_context *ctx, struct fd_ringbuffer *ring)
+fd_wfi(struct fd_context *ctx, struct fd_ringbuffer *ring)
 {
-       if (ctx->rmw_needs_wfi) {
+       if (ctx->needs_wfi) {
                OUT_WFI(ring);
-               ctx->rmw_needs_wfi = false;
+               ctx->needs_wfi = false;
        }
 }
 
index 608d071989de36dbe712d45940a19e74578fbaa6..fe1c548b12efa5a12c00a91d5f453fdecac3e477 100644 (file)
@@ -95,7 +95,7 @@ fd_draw(struct fd_context *ctx, struct fd_ringbuffer *ring,
 
        emit_marker(ring, 7);
 
-       ctx->rmw_needs_wfi = true;
+       fd_reset_wfi(ctx);
 }
 
 #endif /* FREEDRENO_DRAW_H_ */
index 6a55aa4c133baaf4821e79b43f650fe2f4afe91b..80cf7c89c6f527e5347ff8d0dbb4a4915a84baf7 100644 (file)
@@ -278,6 +278,7 @@ render_tiles(struct fd_context *ctx)
 
                /* emit IB to drawcmds: */
                OUT_IB(ctx->ring, ctx->draw_start, ctx->draw_end);
+               fd_reset_wfi(ctx);
 
                /* emit gmem2mem to transfer tile back to system memory: */
                ctx->emit_tile_gmem2mem(ctx, tile);
@@ -291,6 +292,7 @@ render_sysmem(struct fd_context *ctx)
 
        /* emit IB to drawcmds: */
        OUT_IB(ctx->ring, ctx->draw_start, ctx->draw_end);
+       fd_reset_wfi(ctx);
 }
 
 void
@@ -314,6 +316,8 @@ fd_gmem_render_tiles(struct pipe_context *pctx)
        fd_ringmarker_mark(ctx->draw_end);
        fd_ringmarker_mark(ctx->binning_end);
 
+       fd_reset_wfi(ctx);
+
        ctx->stats.batch_total++;
 
        if (sysmem) {
@@ -339,7 +343,7 @@ fd_gmem_render_tiles(struct pipe_context *pctx)
        fd_ringmarker_mark(ctx->draw_start);
        fd_ringmarker_mark(ctx->binning_start);
 
-       fd_reset_rmw_state(ctx);
+       fd_reset_wfi(ctx);
 
        /* update timestamps on render targets: */
        timestamp = fd_ringbuffer_timestamp(ctx->ring);