From b920ecf793bd419558a240014624add08774765d Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Sat, 10 Jan 2015 14:30:25 +1300 Subject: [PATCH] vc4: Cook up the draw-time VPM setup info during shader compile. This will give the compiler the chance to dead-code eliminate unused VPM reads. This is particularly a big deal in the CS where a bunch of vattrs are just not going to be used. --- src/gallium/drivers/vc4/vc4_context.h | 6 ++++++ src/gallium/drivers/vc4/vc4_draw.c | 12 ++++++------ src/gallium/drivers/vc4/vc4_program.c | 19 ++++++++++++++----- src/gallium/drivers/vc4/vc4_qir.h | 2 ++ 4 files changed, 28 insertions(+), 11 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_context.h b/src/gallium/drivers/vc4/vc4_context.h index 7e18a75e5b6..90a68e5c28e 100644 --- a/src/gallium/drivers/vc4/vc4_context.h +++ b/src/gallium/drivers/vc4/vc4_context.h @@ -121,6 +121,12 @@ struct vc4_compiled_shader { uint8_t num_inputs; + /* Byte offsets for the start of the vertex attributes 0-7, and the + * total size as "attribute" 8. + */ + uint8_t vattr_offsets[9]; + uint8_t vattrs_live; + /** * Array of the meanings of the VPM inputs this shader needs. * diff --git a/src/gallium/drivers/vc4/vc4_draw.c b/src/gallium/drivers/vc4/vc4_draw.c index cc3f2d42183..bb4b9a42217 100644 --- a/src/gallium/drivers/vc4/vc4_draw.c +++ b/src/gallium/drivers/vc4/vc4_draw.c @@ -185,14 +185,14 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) cl_u32(&vc4->shader_rec, 0); /* UBO offset written by kernel */ cl_u16(&vc4->shader_rec, 0); /* vs num uniforms */ - cl_u8(&vc4->shader_rec, (1 << num_elements_emit) - 1); /* vs attribute array bitfield */ - cl_u8(&vc4->shader_rec, 16 * num_elements_emit); /* vs total attribute size */ + cl_u8(&vc4->shader_rec, vc4->prog.vs->vattrs_live); + cl_u8(&vc4->shader_rec, vc4->prog.vs->vattr_offsets[8]); cl_reloc(vc4, &vc4->shader_rec, vc4->prog.vs->bo, 0); cl_u32(&vc4->shader_rec, 0); /* UBO offset written by kernel */ cl_u16(&vc4->shader_rec, 0); /* cs num uniforms */ - cl_u8(&vc4->shader_rec, (1 << num_elements_emit) - 1); /* cs attribute array bitfield */ - cl_u8(&vc4->shader_rec, 16 * num_elements_emit); /* cs total attribute size */ + cl_u8(&vc4->shader_rec, vc4->prog.cs->vattrs_live); + cl_u8(&vc4->shader_rec, vc4->prog.cs->vattr_offsets[8]); cl_reloc(vc4, &vc4->shader_rec, vc4->prog.cs->bo, 0); cl_u32(&vc4->shader_rec, 0); /* UBO offset written by kernel */ @@ -211,8 +211,8 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) cl_reloc(vc4, &vc4->shader_rec, rsc->bo, offset); cl_u8(&vc4->shader_rec, elem_size - 1); cl_u8(&vc4->shader_rec, vb->stride); - cl_u8(&vc4->shader_rec, vpm_offset); /* VS VPM offset */ - cl_u8(&vc4->shader_rec, vpm_offset); /* CS VPM offset */ + cl_u8(&vc4->shader_rec, vc4->prog.vs->vattr_offsets[i]); + cl_u8(&vc4->shader_rec, vc4->prog.cs->vattr_offsets[i]); vpm_offset += align(elem_size, 4); diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 56cd5c27d7b..581b9400957 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -1079,6 +1079,7 @@ emit_vertex_input(struct vc4_compile *c, int attr) uint32_t attr_size = util_format_get_blocksize(format); struct qreg vpm_reads[4]; + c->vattr_sizes[attr] = align(attr_size, 4); for (int i = 0; i < align(attr_size, 4) / 4; i++) { struct qreg vpm = { QFILE_VPM, attr * 4 + i }; vpm_reads[i] = qir_MOV(c, vpm); @@ -1933,11 +1934,10 @@ emit_stub_vpm_read(struct vc4_compile *c) if (c->num_inputs) return; - for (int i = 0; i < 4; i++) { - struct qreg vpm = { QFILE_VPM, 0 }; - (void)qir_MOV(c, vpm); - c->num_inputs++; - } + c->vattr_sizes[0] = 4; + struct qreg vpm = { QFILE_VPM, 0 }; + (void)qir_MOV(c, vpm); + c->num_inputs++; } static void @@ -2275,6 +2275,15 @@ vc4_get_compiled_shader(struct vc4_context *vc4, enum qstage stage, } } else { shader->num_inputs = c->num_inputs; + + shader->vattr_offsets[0] = 0; + for (int i = 0; i < 8; i++) { + shader->vattr_offsets[i + 1] = + shader->vattr_offsets[i] + c->vattr_sizes[i]; + + if (c->vattr_sizes[i]) + shader->vattrs_live |= (1 << i); + } } copy_uniform_state_to_shader(shader, c); diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h index ebec7ccfbe3..d2f89ae9e69 100644 --- a/src/gallium/drivers/vc4/vc4_qir.h +++ b/src/gallium/drivers/vc4/vc4_qir.h @@ -312,6 +312,8 @@ struct vc4_compile { struct qreg line_x, point_x, point_y; struct qreg discard; + uint8_t vattr_sizes[8]; + /** * Array of the TGSI semantics of all FS QFILE_VARY reads. * -- 2.30.2