From 8952dd6d991fd5041a48df31f849e8ddbcb74046 Mon Sep 17 00:00:00 2001 From: "Kristian H. Kristensen" Date: Wed, 20 May 2020 22:50:20 -0700 Subject: [PATCH] freedreno/a6xx: Decouple VFD_FETCH and VFD_DECODE We used to output a VFD_FETCH entry for each VFD_DECODE, but we can instead output just one VFD_FETCH per VBO and point multiple VFD_DECODE entries at the same VFD_FETCH entry. There's typically fewer VBOs than vertex elements so this is a small win in itselfs, but more importantly, the VFD_DECODE state now only depends on program state. Part-of: --- src/gallium/drivers/freedreno/a6xx/fd6_emit.c | 36 ++++++++----------- 1 file changed, 14 insertions(+), 22 deletions(-) diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c index 5f5198f2652..741f4b8e6d3 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c @@ -568,37 +568,29 @@ build_vbo_state(struct fd6_emit *emit, const struct ir3_shader_variant *vp) } struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(emit->ctx->batch->submit, - 4 * (5 + cnt * 7), FD_RINGBUFFER_STREAMING); + 4 * (5 + cnt * 3 + vtx->vertexbuf.count * 4), FD_RINGBUFFER_STREAMING); OUT_PKT4(ring, REG_A6XX_VFD_CONTROL_0, 1); - OUT_RING(ring, A6XX_VFD_CONTROL_0_FETCH_CNT(cnt) | + OUT_RING(ring, A6XX_VFD_CONTROL_0_FETCH_CNT(vtx->vertexbuf.count) | A6XX_VFD_CONTROL_0_DECODE_CNT(cnt)); - OUT_PKT4(ring, REG_A6XX_VFD_FETCH(0), 4 * cnt); - for (int32_t j = 0; j < cnt; j++) { - int32_t i = map[j]; - struct pipe_vertex_element *elem = &vtx->vtx->pipe[i]; - const struct pipe_vertex_buffer *vb = - &vtx->vertexbuf.vb[elem->vertex_buffer_index]; + OUT_PKT4(ring, REG_A6XX_VFD_FETCH(0), 4 * vtx->vertexbuf.count); + for (int32_t j = 0; j < vtx->vertexbuf.count; j++) { + const struct pipe_vertex_buffer *vb = &vtx->vertexbuf.vb[j]; struct fd_resource *rsc = fd_resource(vb->buffer.resource); - uint32_t off = vb->buffer_offset; - uint32_t size = fd_bo_size(rsc->bo) - off; - -#ifdef DEBUG - /* see dEQP-GLES31.stress.vertex_attribute_binding.buffer_bounds.bind_vertex_buffer_offset_near_wrap_10 - */ - if (off > fd_bo_size(rsc->bo)) { + if (rsc == NULL) { OUT_RING(ring, 0); OUT_RING(ring, 0); OUT_RING(ring, 0); OUT_RING(ring, 0); - continue; - } -#endif + } else { + uint32_t off = vb->buffer_offset; + uint32_t size = fd_bo_size(rsc->bo) - off; - OUT_RELOC(ring, rsc->bo, off, 0, 0); - OUT_RING(ring, size); /* VFD_FETCH[j].SIZE */ - OUT_RING(ring, vb->stride); /* VFD_FETCH[j].STRIDE */ + OUT_RELOC(ring, rsc->bo, off, 0, 0); + OUT_RING(ring, size); /* VFD_FETCH[j].SIZE */ + OUT_RING(ring, vb->stride); /* VFD_FETCH[j].STRIDE */ + } } OUT_PKT4(ring, REG_A6XX_VFD_DECODE(0), 2 * cnt); @@ -610,7 +602,7 @@ build_vbo_state(struct fd6_emit *emit, const struct ir3_shader_variant *vp) bool isint = util_format_is_pure_integer(pfmt); debug_assert(fmt != FMT6_NONE); - OUT_RING(ring, A6XX_VFD_DECODE_INSTR_IDX(j) | + OUT_RING(ring, A6XX_VFD_DECODE_INSTR_IDX(elem->vertex_buffer_index) | A6XX_VFD_DECODE_INSTR_OFFSET(elem->src_offset) | A6XX_VFD_DECODE_INSTR_FORMAT(fmt) | COND(elem->instance_divisor, A6XX_VFD_DECODE_INSTR_INSTANCED) | -- 2.30.2