From 92a0b0bd7099b15320faaccfd70b3c8dc877810e Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 9 Jan 2015 12:56:34 -0800 Subject: [PATCH] vc4: Pack VPM attr contents according to just the size of the attribute. total instructions in shared programs: 40960 -> 39753 (-2.95%) instructions in affected programs: 20871 -> 19664 (-5.78%) --- src/gallium/drivers/vc4/vc4_draw.c | 7 +++++-- src/gallium/drivers/vc4/vc4_program.c | 12 +++--------- src/gallium/drivers/vc4/vc4_qir.h | 1 + 3 files changed, 9 insertions(+), 11 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_draw.c b/src/gallium/drivers/vc4/vc4_draw.c index 0d915040c88..77e98211c6c 100644 --- a/src/gallium/drivers/vc4/vc4_draw.c +++ b/src/gallium/drivers/vc4/vc4_draw.c @@ -197,6 +197,7 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) cl_u32(&vc4->shader_rec, 0); /* UBO offset written by kernel */ uint32_t max_index = 0xffff; + uint32_t vpm_offset = 0; for (int i = 0; i < vtx->num_elements; i++) { struct pipe_vertex_element *elem = &vtx->pipe[i]; struct pipe_vertex_buffer *vb = @@ -210,8 +211,10 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) cl_reloc(vc4, &vc4->shader_rec, rsc->bo, offset); cl_u8(&vc4->shader_rec, elem_size - 1); cl_u8(&vc4->shader_rec, vb->stride); - cl_u8(&vc4->shader_rec, i * 16); /* VS VPM offset */ - cl_u8(&vc4->shader_rec, i * 16); /* CS VPM offset */ + cl_u8(&vc4->shader_rec, vpm_offset); /* VS VPM offset */ + cl_u8(&vc4->shader_rec, vpm_offset); /* CS VPM offset */ + + vpm_offset += align(elem_size, 4) / 4; if (vb->stride > 0) { max_index = MIN2(max_index, diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 6bad1560b2f..e362dcdf6ca 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -1076,17 +1076,11 @@ static void emit_vertex_input(struct vc4_compile *c, int attr) { enum pipe_format format = c->vs_key->attr_formats[attr]; + uint32_t attr_size = util_format_get_blocksize(format); struct qreg vpm_reads[4]; - /* Right now, we're setting the VPM offsets to be 16 bytes wide every - * time, so we always read 4 32-bit VPM entries. - */ - for (int i = 0; i < 4; i++) { - vpm_reads[i] = qir_get_temp(c); - qir_emit(c, qir_inst(QOP_VPM_READ, - vpm_reads[i], - c->undef, - c->undef)); + for (int i = 0; i < align(attr_size, 4) / 4; i++) { + vpm_reads[i] = qir_VPM_READ(c); c->num_inputs++; } diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h index 6dac00fbbd8..d8f9babef4c 100644 --- a/src/gallium/drivers/vc4/vc4_qir.h +++ b/src/gallium/drivers/vc4/vc4_qir.h @@ -495,6 +495,7 @@ QIR_ALU0(FRAG_W) QIR_ALU0(FRAG_REV_FLAG) QIR_ALU0(TEX_RESULT) QIR_ALU0(TLB_COLOR_READ) +QIR_ALU0(VPM_READ) QIR_NODST_1(TLB_Z_WRITE) QIR_NODST_1(TLB_DISCARD_SETUP) QIR_NODST_1(TLB_STENCIL_SETUP) -- 2.30.2