vc4: Pack VPM attr contents according to just the size of the attribute.
authorEric Anholt <eric@anholt.net>
Fri, 9 Jan 2015 20:56:34 +0000 (12:56 -0800)
committerEric Anholt <eric@anholt.net>
Sat, 10 Jan 2015 00:54:12 +0000 (13:54 +1300)
total instructions in shared programs: 40960 -> 39753 (-2.95%)
instructions in affected programs:     20871 -> 19664 (-5.78%)

src/gallium/drivers/vc4/vc4_draw.c
src/gallium/drivers/vc4/vc4_program.c
src/gallium/drivers/vc4/vc4_qir.h

index 0d915040c884f34d59c5fc448230bfb11c902671..77e98211c6c7c686d18bb1cba298cb06b71e249e 100644 (file)
@@ -197,6 +197,7 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
         cl_u32(&vc4->shader_rec, 0); /* UBO offset written by kernel */
 
         uint32_t max_index = 0xffff;
+        uint32_t vpm_offset = 0;
         for (int i = 0; i < vtx->num_elements; i++) {
                 struct pipe_vertex_element *elem = &vtx->pipe[i];
                 struct pipe_vertex_buffer *vb =
@@ -210,8 +211,10 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
                 cl_reloc(vc4, &vc4->shader_rec, rsc->bo, offset);
                 cl_u8(&vc4->shader_rec, elem_size - 1);
                 cl_u8(&vc4->shader_rec, vb->stride);
-                cl_u8(&vc4->shader_rec, i * 16); /* VS VPM offset */
-                cl_u8(&vc4->shader_rec, i * 16); /* CS VPM offset */
+                cl_u8(&vc4->shader_rec, vpm_offset); /* VS VPM offset */
+                cl_u8(&vc4->shader_rec, vpm_offset); /* CS VPM offset */
+
+                vpm_offset += align(elem_size, 4) / 4;
 
                 if (vb->stride > 0) {
                         max_index = MIN2(max_index,
index 6bad1560b2f197a29b6a1238841b10dc172181f7..e362dcdf6ca5655887e24bf9d4ca3d0e873b9f9c 100644 (file)
@@ -1076,17 +1076,11 @@ static void
 emit_vertex_input(struct vc4_compile *c, int attr)
 {
         enum pipe_format format = c->vs_key->attr_formats[attr];
+        uint32_t attr_size = util_format_get_blocksize(format);
         struct qreg vpm_reads[4];
 
-        /* Right now, we're setting the VPM offsets to be 16 bytes wide every
-         * time, so we always read 4 32-bit VPM entries.
-         */
-        for (int i = 0; i < 4; i++) {
-                vpm_reads[i] = qir_get_temp(c);
-                qir_emit(c, qir_inst(QOP_VPM_READ,
-                                     vpm_reads[i],
-                                     c->undef,
-                                     c->undef));
+        for (int i = 0; i < align(attr_size, 4) / 4; i++) {
+                vpm_reads[i] = qir_VPM_READ(c);
                 c->num_inputs++;
         }
 
index 6dac00fbbd84ded0d8982e4ba73a8f4b3a76a518..d8f9babef4ce4457eb3f212bb8931b2730043e3c 100644 (file)
@@ -495,6 +495,7 @@ QIR_ALU0(FRAG_W)
 QIR_ALU0(FRAG_REV_FLAG)
 QIR_ALU0(TEX_RESULT)
 QIR_ALU0(TLB_COLOR_READ)
+QIR_ALU0(VPM_READ)
 QIR_NODST_1(TLB_Z_WRITE)
 QIR_NODST_1(TLB_DISCARD_SETUP)
 QIR_NODST_1(TLB_STENCIL_SETUP)