freedreno/a6xx: Create stateobj for VFD_DECODE
authorKristian H. Kristensen <hoegsberg@google.com>
Thu, 21 May 2020 08:38:44 +0000 (01:38 -0700)
committerMarge Bot <eric+marge@anholt.net>
Fri, 29 May 2020 18:59:56 +0000 (18:59 +0000)
This now only depends on vertex state and we can create it once
up front in pctx->create_vertex_elements_state().

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5140>

src/gallium/drivers/freedreno/a6xx/fd6_context.c
src/gallium/drivers/freedreno/a6xx/fd6_context.h
src/gallium/drivers/freedreno/a6xx/fd6_emit.c
src/gallium/drivers/freedreno/a6xx/fd6_emit.h
src/gallium/drivers/freedreno/freedreno_state.c

index d31d43a3c8a21886e36781dd95a344dcdc5fe0cb..773c3d239f62f0b926977e8cddef985c7a10748a 100644 (file)
@@ -81,6 +81,49 @@ static const uint8_t primtypes[] = {
                [PIPE_PRIM_MAX]                         = DI_PT_RECTLIST,  /* internal clear blits */
 };
 
+static void *
+fd6_vertex_state_create(struct pipe_context *pctx, unsigned num_elements,
+               const struct pipe_vertex_element *elements)
+{
+       struct fd_context *ctx = fd_context(pctx);
+
+       struct fd6_vertex_stateobj *state = CALLOC_STRUCT(fd6_vertex_stateobj);
+       memcpy(state->base.pipe, elements, sizeof(*elements) * num_elements);
+       state->base.num_elements = num_elements;
+       state->stateobj =
+               fd_ringbuffer_new_object(ctx->pipe, 4 * (num_elements * 2 + 1));
+       struct fd_ringbuffer *ring = state->stateobj;
+
+       OUT_PKT4(ring, REG_A6XX_VFD_DECODE(0), 2 * num_elements);
+       for (int32_t i = 0; i < num_elements; i++) {
+               const struct pipe_vertex_element *elem = &elements[i];
+               enum pipe_format pfmt = elem->src_format;
+               enum a6xx_format fmt = fd6_pipe2vtx(pfmt);
+               bool isint = util_format_is_pure_integer(pfmt);
+               debug_assert(fmt != FMT6_NONE);
+
+               OUT_RING(ring, A6XX_VFD_DECODE_INSTR_IDX(elem->vertex_buffer_index) |
+                               A6XX_VFD_DECODE_INSTR_OFFSET(elem->src_offset) |
+                               A6XX_VFD_DECODE_INSTR_FORMAT(fmt) |
+                               COND(elem->instance_divisor, A6XX_VFD_DECODE_INSTR_INSTANCED) |
+                               A6XX_VFD_DECODE_INSTR_SWAP(fd6_pipe2swap(pfmt)) |
+                               A6XX_VFD_DECODE_INSTR_UNK30 |
+                               COND(!isint, A6XX_VFD_DECODE_INSTR_FLOAT));
+               OUT_RING(ring, MAX2(1, elem->instance_divisor)); /* VFD_DECODE[j].STEP_RATE */
+       }
+
+       return state;
+}
+
+static void
+fd6_vertex_state_delete(struct pipe_context *pctx, void *hwcso)
+{
+       struct fd6_vertex_stateobj *so = hwcso;
+
+       fd_ringbuffer_del(so->stateobj);
+       FREE(hwcso);
+}
+
 struct pipe_context *
 fd6_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
 {
@@ -157,6 +200,7 @@ PC_UNKNOWN_9805:
        pctx->create_blend_state = fd6_blend_state_create;
        pctx->create_rasterizer_state = fd6_rasterizer_state_create;
        pctx->create_depth_stencil_alpha_state = fd6_zsa_state_create;
+       pctx->create_vertex_elements_state = fd6_vertex_state_create;
 
        fd6_draw_init(pctx);
        fd6_compute_init(pctx);
@@ -175,6 +219,8 @@ PC_UNKNOWN_9805:
 
        util_blitter_set_texture_multisample(fd6_ctx->base.blitter, true);
 
+       pctx->delete_vertex_elements_state = fd6_vertex_state_delete;
+
        /* fd_context_init overwrites delete_rasterizer_state, so set this
         * here. */
        pctx->delete_rasterizer_state = fd6_rasterizer_state_delete;
index 75a52d4828ae8a0ec608e72137fa7875aa659468..c890cdd84231fea0f62dc022f1514e8c6b58093d 100644 (file)
@@ -168,4 +168,16 @@ emit_marker6(struct fd_ringbuffer *ring, int scratch_idx)
        }
 }
 
+struct fd6_vertex_stateobj {
+       struct fd_vertex_stateobj base;
+       struct fd_ringbuffer *stateobj;
+};
+
+static inline struct fd6_vertex_stateobj *
+fd6_vertex_stateobj(void *p)
+{
+       return (struct fd6_vertex_stateobj *) p;
+}
+
+
 #endif /* FD6_CONTEXT_H_ */
index 741f4b8e6d3ea9263e82a028a9311cfdd9e99068..9f281893dcdd89a1c9dbefabc3e37638e9df51a3 100644 (file)
@@ -572,7 +572,7 @@ build_vbo_state(struct fd6_emit *emit, const struct ir3_shader_variant *vp)
 
        OUT_PKT4(ring, REG_A6XX_VFD_CONTROL_0, 1);
        OUT_RING(ring, A6XX_VFD_CONTROL_0_FETCH_CNT(vtx->vertexbuf.count) |
-                       A6XX_VFD_CONTROL_0_DECODE_CNT(cnt));
+                       A6XX_VFD_CONTROL_0_DECODE_CNT(vtx->vtx->num_elements));
 
        OUT_PKT4(ring, REG_A6XX_VFD_FETCH(0), 4 * vtx->vertexbuf.count);
        for (int32_t j = 0; j < vtx->vertexbuf.count; j++) {
@@ -593,25 +593,6 @@ build_vbo_state(struct fd6_emit *emit, const struct ir3_shader_variant *vp)
                }
        }
 
-       OUT_PKT4(ring, REG_A6XX_VFD_DECODE(0), 2 * cnt);
-       for (int32_t j = 0; j < cnt; j++) {
-               int32_t i = map[j];
-               struct pipe_vertex_element *elem = &vtx->vtx->pipe[i];
-               enum pipe_format pfmt = elem->src_format;
-               enum a6xx_format fmt = fd6_pipe2vtx(pfmt);
-               bool isint = util_format_is_pure_integer(pfmt);
-               debug_assert(fmt != FMT6_NONE);
-
-               OUT_RING(ring, A6XX_VFD_DECODE_INSTR_IDX(elem->vertex_buffer_index) |
-                               A6XX_VFD_DECODE_INSTR_OFFSET(elem->src_offset) |
-                               A6XX_VFD_DECODE_INSTR_FORMAT(fmt) |
-                               COND(elem->instance_divisor, A6XX_VFD_DECODE_INSTR_INSTANCED) |
-                               A6XX_VFD_DECODE_INSTR_SWAP(fd6_pipe2swap(pfmt)) |
-                               A6XX_VFD_DECODE_INSTR_UNK30 |
-                               COND(!isint, A6XX_VFD_DECODE_INSTR_FLOAT));
-               OUT_RING(ring, MAX2(1, elem->instance_divisor)); /* VFD_DECODE[j].STEP_RATE */
-       }
-
        OUT_PKT4(ring, REG_A6XX_VFD_DEST_CNTL(0), cnt);
        for (int32_t j = 0; j < cnt; j++) {
                int32_t i = map[j];
@@ -619,8 +600,6 @@ build_vbo_state(struct fd6_emit *emit, const struct ir3_shader_variant *vp)
                OUT_RING(ring, A6XX_VFD_DEST_CNTL_INSTR_WRITEMASK(vp->inputs[i].compmask) |
                                A6XX_VFD_DEST_CNTL_INSTR_REGID(vp->inputs[i].regid));
        }
-
-       return ring;
 }
 
 static struct fd_ringbuffer *
@@ -752,6 +731,15 @@ fd6_emit_state(struct fd_ringbuffer *ring, struct fd6_emit *emit)
        if (fs->fb_read)
                ctx->batch->gmem_reason |= FD_GMEM_FB_READ;
 
+       if (emit->dirty & FD_DIRTY_VTXSTATE) {
+               struct fd6_vertex_stateobj *vtx = fd6_vertex_stateobj(ctx->vtx.vtx);
+
+               fd6_emit_add_group(emit, vtx->stateobj, FD6_GROUP_VTXSTATE, ENABLE_ALL);
+       }
+
+       /* VFD_CONTROL packs both vfd fetch count and vfd decode count, so we have
+        * to emit this if either change.
+        */
        if (emit->dirty & (FD_DIRTY_VTXBUF | FD_DIRTY_VTXSTATE)) {
                struct fd_ringbuffer *state;
 
index 9f4d1b4ce421ef03076149c5955b8cd811062900..16e54047245e78016516f0e94aefebbd2493bd1f 100644 (file)
@@ -50,6 +50,7 @@ enum fd6_state_id {
        FD6_GROUP_PROG_FB_RAST,
        FD6_GROUP_LRZ,
        FD6_GROUP_LRZ_BINNING,
+       FD6_GROUP_VTXSTATE,
        FD6_GROUP_VBO,
        FD6_GROUP_CONST,
        FD6_GROUP_VS_DRIVER_PARAMS,
index 2049e381cc0e0d67b7b4cc6d3a6d5d85c72f91a6..1e0f6f40b18f37c193441c77f5fad89346613513 100644 (file)
@@ -623,7 +623,8 @@ fd_state_init(struct pipe_context *pctx)
        pctx->bind_depth_stencil_alpha_state = fd_zsa_state_bind;
        pctx->delete_depth_stencil_alpha_state = fd_zsa_state_delete;
 
-       pctx->create_vertex_elements_state = fd_vertex_state_create;
+       if (!pctx->create_vertex_elements_state)
+               pctx->create_vertex_elements_state = fd_vertex_state_create;
        pctx->delete_vertex_elements_state = fd_vertex_state_delete;
        pctx->bind_vertex_elements_state = fd_vertex_state_bind;