freedreno/a3xx: refactor vertex state emit
authorRob Clark <robclark@freedesktop.org>
Tue, 14 Oct 2014 16:20:54 +0000 (12:20 -0400)
committerRob Clark <robclark@freedesktop.org>
Wed, 15 Oct 2014 19:49:48 +0000 (15:49 -0400)
Get rid of fd3_vertex_buf and use fd_vertex_state directly for all
draws.  Removes a tiny bit of CPU overhead for munging around the vertex
state every time it is emitted, but more importantly it cleans things up
for later optimizations, so the emit paths don't have to special case
internal draws (gmem<->mem, clears, etc) with regular draws.

Instead of constructing fd3_vertex_buf array each time for internal
draws, and context init time pre-create solid_vbuf_state and
blit_vbuf_state.

Signed-off-by: Rob Clark <robclark@freedesktop.org>
src/gallium/drivers/freedreno/a2xx/fd2_draw.c
src/gallium/drivers/freedreno/a2xx/fd2_program.c
src/gallium/drivers/freedreno/a3xx/fd3_context.c
src/gallium/drivers/freedreno/a3xx/fd3_context.h
src/gallium/drivers/freedreno/a3xx/fd3_draw.c
src/gallium/drivers/freedreno/a3xx/fd3_emit.c
src/gallium/drivers/freedreno/a3xx/fd3_emit.h
src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
src/gallium/drivers/freedreno/freedreno_context.h
src/gallium/drivers/freedreno/freedreno_resource.c
src/gallium/drivers/freedreno/freedreno_state.c

index bc7960a28a5cd2c1449318b9b15f291409c6cbde..6c1a590433204e8b9e58734e58594acd7c017d9d 100644 (file)
@@ -56,8 +56,8 @@ emit_cacheflush(struct fd_ringbuffer *ring)
 static void
 emit_vertexbufs(struct fd_context *ctx)
 {
-       struct fd_vertex_stateobj *vtx = ctx->vtx;
-       struct fd_vertexbuf_stateobj *vertexbuf = &ctx->vertexbuf;
+       struct fd_vertex_stateobj *vtx = ctx->vtx.vtx;
+       struct fd_vertexbuf_stateobj *vertexbuf = &ctx->vtx.vertexbuf;
        struct fd2_vertex_buf bufs[PIPE_MAX_ATTRIBS];
        unsigned i;
 
index 84b6ffec338a712ec239681b8fb4aa8fdb6f50b8..cb6281bd5db53fe88a2b0969bd957a282f6fab2b 100644 (file)
@@ -174,7 +174,7 @@ patch_vtx_fetches(struct fd_context *ctx, struct fd2_shader_stateobj *so,
                struct ir2_instruction *instr = so->vfetch_instrs[i];
                struct pipe_vertex_element *elem = &vtx->pipe[i];
                struct pipe_vertex_buffer *vb =
-                               &ctx->vertexbuf.vb[elem->vertex_buffer_index];
+                               &ctx->vtx.vertexbuf.vb[elem->vertex_buffer_index];
                enum pipe_format format = elem->src_format;
                const struct util_format_description *desc =
                                util_format_description(format);
@@ -258,7 +258,7 @@ fd2_program_validate(struct fd_context *ctx)
 
        /* if necessary, fix up vertex fetch instructions: */
        if (ctx->dirty & (FD_DIRTY_VTXSTATE | FD_DIRTY_PROG))
-               patch_vtx_fetches(ctx, prog->vp, ctx->vtx);
+               patch_vtx_fetches(ctx, prog->vp, ctx->vtx.vtx);
 
        /* if necessary, fix up texture fetch instructions: */
        if (ctx->dirty & (FD_DIRTY_TEXSTATE | FD_DIRTY_PROG)) {
index f8f412e1a1ba46a12b5307929461c1d481bb7bb1..4e3b5038bed85b97fac99c004297b8668e801bf8 100644 (file)
@@ -49,6 +49,9 @@ fd3_context_destroy(struct pipe_context *pctx)
        fd_bo_del(fd3_ctx->fs_pvt_mem);
        fd_bo_del(fd3_ctx->vsc_size_mem);
 
+       pctx->delete_vertex_elements_state(pctx, fd3_ctx->solid_vbuf_state.vtx);
+       pctx->delete_vertex_elements_state(pctx, fd3_ctx->blit_vbuf_state.vtx);
+
        pipe_resource_reference(&fd3_ctx->solid_vbuf, NULL);
        pipe_resource_reference(&fd3_ctx->blit_texcoord_vbuf, NULL);
 
@@ -135,6 +138,34 @@ fd3_context_create(struct pipe_screen *pscreen, void *priv)
        fd3_ctx->solid_vbuf = create_solid_vertexbuf(pctx);
        fd3_ctx->blit_texcoord_vbuf = create_blit_texcoord_vertexbuf(pctx);
 
+       /* setup solid_vbuf_state: */
+       fd3_ctx->solid_vbuf_state.vtx = pctx->create_vertex_elements_state(
+                       pctx, 1, (struct pipe_vertex_element[]){{
+                               .vertex_buffer_index = 0,
+                               .src_offset = 0,
+                               .src_format = PIPE_FORMAT_R32G32B32_FLOAT,
+                       }});
+       fd3_ctx->solid_vbuf_state.vertexbuf.count = 1;
+       fd3_ctx->solid_vbuf_state.vertexbuf.vb[0].stride = 12;
+       fd3_ctx->solid_vbuf_state.vertexbuf.vb[0].buffer = fd3_ctx->solid_vbuf;
+
+       /* setup blit_vbuf_state: */
+       fd3_ctx->blit_vbuf_state.vtx = pctx->create_vertex_elements_state(
+                       pctx, 2, (struct pipe_vertex_element[]){{
+                               .vertex_buffer_index = 0,
+                               .src_offset = 0,
+                               .src_format = PIPE_FORMAT_R32G32_FLOAT,
+                       }, {
+                               .vertex_buffer_index = 1,
+                               .src_offset = 0,
+                               .src_format = PIPE_FORMAT_R32G32B32_FLOAT,
+                       }});
+       fd3_ctx->blit_vbuf_state.vertexbuf.count = 2;
+       fd3_ctx->blit_vbuf_state.vertexbuf.vb[0].stride = 8;
+       fd3_ctx->blit_vbuf_state.vertexbuf.vb[0].buffer = fd3_ctx->blit_texcoord_vbuf;
+       fd3_ctx->blit_vbuf_state.vertexbuf.vb[1].stride = 12;
+       fd3_ctx->blit_vbuf_state.vertexbuf.vb[1].buffer = fd3_ctx->solid_vbuf;
+
        fd3_query_context_init(pctx);
 
        fd3_ctx->border_color_uploader = u_upload_create(pctx, 4096,
index 2736470b93a5cce0fce59c4705c3000ad77f01fe..324edb2eb808d3b5eb3eb724dd4cca32c21166cc 100644 (file)
@@ -62,6 +62,18 @@ struct fd3_context {
         */
        struct pipe_resource *blit_texcoord_vbuf;
 
+       /* vertex state for solid_vbuf:
+        *    - solid_vbuf / 12 / R32G32B32_FLOAT
+        */
+       struct fd_vertex_state solid_vbuf_state;
+
+       /* vertex state for blit_prog:
+        *    - blit_texcoord_vbuf / 8 / R32G32_FLOAT
+        *    - solid_vbuf / 12 / R32G32B32_FLOAT
+        */
+       struct fd_vertex_state blit_vbuf_state;
+
+
        /*
         * Border color layout *appears* to be as arrays of 0x40 byte
         * elements, with frag shader elements starting at (16 x 0x40).
index bd395f602b0dd71c2c4d91d656337938c3e53ce5..e333a80879f5353cd44cd6ef72be7a37e8bd6999 100644 (file)
@@ -47,26 +47,7 @@ static void
 emit_vertexbufs(struct fd_context *ctx, struct fd_ringbuffer *ring,
                struct ir3_shader_key key)
 {
-       struct fd_vertex_stateobj *vtx = ctx->vtx;
-       struct fd_vertexbuf_stateobj *vertexbuf = &ctx->vertexbuf;
-       struct fd3_vertex_buf bufs[PIPE_MAX_ATTRIBS];
-       unsigned i;
-
-       if (!vtx->num_elements)
-               return;
-
-       for (i = 0; i < vtx->num_elements; i++) {
-               struct pipe_vertex_element *elem = &vtx->pipe[i];
-               struct pipe_vertex_buffer *vb =
-                               &vertexbuf->vb[elem->vertex_buffer_index];
-               bufs[i].offset = vb->buffer_offset + elem->src_offset;
-               bufs[i].stride = vb->stride;
-               bufs[i].prsc   = vb->buffer;
-               bufs[i].format = elem->src_format;
-       }
-
-       fd3_emit_vertex_bufs(ring, fd3_shader_variant(ctx->prog.vp, key),
-                       bufs, vtx->num_elements);
+       fd3_emit_vertex_bufs(ring, fd3_shader_variant(ctx->prog.vp, key), &ctx->vtx);
 }
 
 static void
@@ -75,7 +56,7 @@ draw_impl(struct fd_context *ctx, const struct pipe_draw_info *info,
 {
        fd3_emit_state(ctx, ring, info, &ctx->prog, key, dirty);
 
-       if (dirty & FD_DIRTY_VTXBUF)
+       if (dirty & (FD_DIRTY_VTXBUF | FD_DIRTY_VTXSTATE))
                emit_vertexbufs(ctx, ring, key);
 
        OUT_PKT0(ring, REG_A3XX_PC_VERTEX_REUSE_BLOCK_CNTL, 1);
@@ -185,11 +166,7 @@ fd3_clear_binning(struct fd_context *ctx, unsigned dirty)
        fd3_emit_state(ctx, ring, NULL, &ctx->solid_prog, key, dirty);
 
        fd3_emit_vertex_bufs(ring, fd3_shader_variant(ctx->solid_prog.vp, key),
-                       (struct fd3_vertex_buf[]) {{
-                               .prsc = fd3_ctx->solid_vbuf,
-                               .stride = 12,
-                               .format = PIPE_FORMAT_R32G32B32_FLOAT,
-                       }}, 1);
+                       &fd3_ctx->solid_vbuf_state);
 
        OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1);
        OUT_RING(ring, A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(0) |
@@ -320,11 +297,7 @@ fd3_clear(struct fd_context *ctx, unsigned buffers,
        OUT_RING(ring, A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(0));
 
        fd3_emit_vertex_bufs(ring, fd3_shader_variant(ctx->solid_prog.vp, key),
-                       (struct fd3_vertex_buf[]) {{
-                               .prsc = fd3_ctx->solid_vbuf,
-                               .stride = 12,
-                               .format = PIPE_FORMAT_R32G32B32_FLOAT,
-                       }}, 1);
+                       &fd3_ctx->solid_vbuf_state);
 
        fd3_emit_constant(ring, SB_FRAG_SHADER, 0, 0, 4, color->ui, NULL);
 
index d92ebc2f0adbd2f836109b4eabb5dce766664672..e0cbebaeaf7c2ae18aed54284f653b82c4de1ec4 100644 (file)
@@ -331,13 +331,15 @@ fd3_emit_gmem_restore_tex(struct fd_ringbuffer *ring, struct pipe_surface *psurf
 
 void
 fd3_emit_vertex_bufs(struct fd_ringbuffer *ring,
-               struct ir3_shader_variant *vp,
-               struct fd3_vertex_buf *vbufs, uint32_t n)
+               struct ir3_shader_variant *vp, struct fd_vertex_state *vtx)
 {
        uint32_t i, j, last = 0;
        uint32_t total_in = 0;
+       unsigned n = MIN2(vtx->vtx->num_elements, vp->inputs_count);
 
-       n = MIN2(n, vp->inputs_count);
+       /* hw doesn't like to be configured for zero vbo's, it seems: */
+       if (vtx->vtx->num_elements == 0)
+               return;
 
        for (i = 0; i < n; i++)
                if (vp->inputs[i].compmask)
@@ -345,9 +347,11 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring,
 
        for (i = 0, j = 0; i <= last; i++) {
                if (vp->inputs[i].compmask) {
-                       struct pipe_resource *prsc = vbufs[i].prsc;
-                       struct fd_resource *rsc = fd_resource(prsc);
-                       enum pipe_format pfmt = vbufs[i].format;
+                       struct pipe_vertex_element *elem = &vtx->vtx->pipe[i];
+                       struct pipe_vertex_buffer *vb =
+                                       &vtx->vertexbuf.vb[elem->vertex_buffer_index];
+                       struct fd_resource *rsc = fd_resource(vb->buffer);
+                       enum pipe_format pfmt = elem->src_format;
                        enum a3xx_vtx_fmt fmt = fd3_pipe2vtx(pfmt);
                        bool switchnext = (i != last);
                        bool isint = util_format_is_pure_integer(pfmt);
@@ -357,11 +361,11 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring,
 
                        OUT_PKT0(ring, REG_A3XX_VFD_FETCH(j), 2);
                        OUT_RING(ring, A3XX_VFD_FETCH_INSTR_0_FETCHSIZE(fs - 1) |
-                                       A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE(vbufs[i].stride) |
+                                       A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE(vb->stride) |
                                        COND(switchnext, A3XX_VFD_FETCH_INSTR_0_SWITCHNEXT) |
                                        A3XX_VFD_FETCH_INSTR_0_INDEXCODE(j) |
                                        A3XX_VFD_FETCH_INSTR_0_STEPRATE(1));
-                       OUT_RELOC(ring, rsc->bo, vbufs[i].offset, 0, 0);
+                       OUT_RELOC(ring, rsc->bo, vb->buffer_offset + elem->src_offset, 0, 0);
 
                        OUT_PKT0(ring, REG_A3XX_VFD_DECODE_INSTR(j), 1);
                        OUT_RING(ring, A3XX_VFD_DECODE_INSTR_CONSTFILL |
index 81ff06275bcd5aaf674df115a70b3346c3240411..89e73cf2cc1a676fd12ff29d357f3ea8d20773fd 100644 (file)
@@ -46,21 +46,13 @@ void fd3_emit_constant(struct fd_ringbuffer *ring,
 void fd3_emit_gmem_restore_tex(struct fd_ringbuffer *ring,
                struct pipe_surface *psurf);
 
-/* NOTE: this just exists because we don't have proper vertex/vertexbuf
- * state objs for clear, and mem2gmem/gmem2mem operations..
- */
-struct fd3_vertex_buf {
-       unsigned offset, stride;
-       struct pipe_resource *prsc;
-       enum pipe_format format;
-};
-
 void fd3_emit_vertex_bufs(struct fd_ringbuffer *ring,
-               struct ir3_shader_variant *vp,
-               struct fd3_vertex_buf *vbufs, uint32_t n);
+               struct ir3_shader_variant *vp, struct fd_vertex_state *vtx);
+
 void fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
                const struct pipe_draw_info *info,  struct fd_program_stateobj *prog,
                struct ir3_shader_key key, uint32_t dirty);
+
 void fd3_emit_restore(struct fd_context *ctx);
 
 #endif /* FD3_EMIT_H */
index c43121993c0464550255079417d3b8408ba2fe92..172bd4c9d20129cce776aabca511781416cc0d0a 100644 (file)
@@ -185,11 +185,7 @@ emit_binning_workaround(struct fd_context *ctx)
 
        fd3_program_emit(ring, &ctx->solid_prog, key, false);
        fd3_emit_vertex_bufs(ring, fd3_shader_variant(ctx->solid_prog.vp, key),
-                       (struct fd3_vertex_buf[]) {{
-                               .prsc = fd3_ctx->solid_vbuf,
-                               .stride = 12,
-                               .format = PIPE_FORMAT_R32G32B32_FLOAT,
-                       }}, 1);
+                       &fd3_ctx->solid_vbuf_state);
 
        OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 4);
        OUT_RING(ring, A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(FOUR_QUADS) |
@@ -410,11 +406,7 @@ fd3_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile)
 
        fd3_program_emit(ring, &ctx->solid_prog, key, false);
        fd3_emit_vertex_bufs(ring, fd3_shader_variant(ctx->solid_prog.vp, key),
-                       (struct fd3_vertex_buf[]) {{
-                               .prsc = fd3_ctx->solid_vbuf,
-                               .stride = 12,
-                               .format = PIPE_FORMAT_R32G32B32_FLOAT,
-                       }}, 1);
+                       &fd3_ctx->solid_vbuf_state);
 
        if (ctx->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
                uint32_t base = depth_base(ctx);
@@ -554,15 +546,7 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
 
        fd3_program_emit(ring, &ctx->blit_prog, key, false);
        fd3_emit_vertex_bufs(ring, fd3_shader_variant(ctx->blit_prog.vp, key),
-                       (struct fd3_vertex_buf[]) {{
-                               .prsc = fd3_ctx->blit_texcoord_vbuf,
-                               .stride = 8,
-                               .format = PIPE_FORMAT_R32G32_FLOAT,
-                       }, {
-                               .prsc = fd3_ctx->solid_vbuf,
-                               .stride = 12,
-                               .format = PIPE_FORMAT_R32G32B32_FLOAT,
-                       }}, 2);
+                       &fd3_ctx->blit_vbuf_state);
 
        /* for gmem pitch/base calculations, we need to use the non-
         * truncated tile sizes:
index b7e016bb4691a90026f3e6007a6c235f18e806ae..be2c2638a1792bb1ce23cdb9df43861a6b7480cc 100644 (file)
@@ -82,6 +82,15 @@ struct fd_vertex_stateobj {
        unsigned num_elements;
 };
 
+/* group together the vertex and vertexbuf state.. for ease of passing
+ * around, and because various internal operations (gmem<->mem, etc)
+ * need their own vertex state:
+ */
+struct fd_vertex_state {
+       struct fd_vertex_stateobj *vtx;
+       struct fd_vertexbuf_stateobj vertexbuf;
+};
+
 /* Bitmask of stages in rendering that a particular query query is
  * active.  Queries will be automatically started/stopped (generating
  * additional fd_hw_sample_period's) on entrance/exit from stages that
@@ -304,7 +313,7 @@ struct fd_context {
 
        struct fd_program_stateobj prog;
 
-       struct fd_vertex_stateobj *vtx;
+       struct fd_vertex_state vtx;
 
        struct pipe_blend_color blend_color;
        struct pipe_stencil_ref stencil_ref;
@@ -313,7 +322,6 @@ struct fd_context {
        struct pipe_poly_stipple stipple;
        struct pipe_viewport_state viewport;
        struct fd_constbuf_stateobj constbuf[PIPE_SHADER_TYPES];
-       struct fd_vertexbuf_stateobj vertexbuf;
        struct pipe_index_buffer indexbuf;
 
        /* GMEM/tile handling fxns: */
index 1b39c3363e5409963f3c6d3aae05d464fb0b0460..6391dd10fc9081eab6884077bfa8e294b00ce38f 100644 (file)
@@ -449,8 +449,8 @@ fd_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info)
 static void
 fd_blitter_pipe_begin(struct fd_context *ctx)
 {
-       util_blitter_save_vertex_buffer_slot(ctx->blitter, ctx->vertexbuf.vb);
-       util_blitter_save_vertex_elements(ctx->blitter, ctx->vtx);
+       util_blitter_save_vertex_buffer_slot(ctx->blitter, ctx->vtx.vertexbuf.vb);
+       util_blitter_save_vertex_elements(ctx->blitter, ctx->vtx.vtx);
        util_blitter_save_vertex_shader(ctx->blitter, ctx->prog.vp);
        util_blitter_save_rasterizer(ctx->blitter, ctx->rasterizer);
        util_blitter_save_viewport(ctx->blitter, &ctx->viewport);
index e7d783a6a561220a9014a8b4c6cdf56b357b1f2f..27869295622c6693307de94a3c923abcbe4017f5 100644 (file)
@@ -177,7 +177,7 @@ fd_set_vertex_buffers(struct pipe_context *pctx,
                const struct pipe_vertex_buffer *vb)
 {
        struct fd_context *ctx = fd_context(pctx);
-       struct fd_vertexbuf_stateobj *so = &ctx->vertexbuf;
+       struct fd_vertexbuf_stateobj *so = &ctx->vtx.vertexbuf;
        int i;
 
        /* on a2xx, pitch is encoded in the vtx fetch instruction, so
@@ -286,7 +286,7 @@ static void
 fd_vertex_state_bind(struct pipe_context *pctx, void *hwcso)
 {
        struct fd_context *ctx = fd_context(pctx);
-       ctx->vtx = hwcso;
+       ctx->vtx.vtx = hwcso;
        ctx->dirty |= FD_DIRTY_VTXSTATE;
 }