cl_u32(&vc4->shader_rec, 0); /* UBO offset written by kernel */
uint32_t max_index = 0xffff;
+ uint32_t vpm_offset = 0;
for (int i = 0; i < vtx->num_elements; i++) {
struct pipe_vertex_element *elem = &vtx->pipe[i];
struct pipe_vertex_buffer *vb =
cl_reloc(vc4, &vc4->shader_rec, rsc->bo, offset);
cl_u8(&vc4->shader_rec, elem_size - 1);
cl_u8(&vc4->shader_rec, vb->stride);
- cl_u8(&vc4->shader_rec, i * 16); /* VS VPM offset */
- cl_u8(&vc4->shader_rec, i * 16); /* CS VPM offset */
+ cl_u8(&vc4->shader_rec, vpm_offset); /* VS VPM offset */
+ cl_u8(&vc4->shader_rec, vpm_offset); /* CS VPM offset */
+
+ vpm_offset += align(elem_size, 4) / 4;
if (vb->stride > 0) {
max_index = MIN2(max_index,
emit_vertex_input(struct vc4_compile *c, int attr)
{
enum pipe_format format = c->vs_key->attr_formats[attr];
+ uint32_t attr_size = util_format_get_blocksize(format);
struct qreg vpm_reads[4];
- /* Right now, we're setting the VPM offsets to be 16 bytes wide every
- * time, so we always read 4 32-bit VPM entries.
- */
- for (int i = 0; i < 4; i++) {
- vpm_reads[i] = qir_get_temp(c);
- qir_emit(c, qir_inst(QOP_VPM_READ,
- vpm_reads[i],
- c->undef,
- c->undef));
+ for (int i = 0; i < align(attr_size, 4) / 4; i++) {
+ vpm_reads[i] = qir_VPM_READ(c);
c->num_inputs++;
}