desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
S_008F04_STRIDE(vb->stride);
- if (sctx->b.chip_class <= CIK && vb->stride)
+ if (sctx->b.chip_class <= CIK && vb->stride) {
/* Round up by rounding down and adding 1 */
desc[2] = (vb->buffer->width0 - offset -
sctx->vertex_elements->format_size[i]) /
vb->stride + 1;
- else
+ } else {
+ uint32_t size3;
+
desc[2] = vb->buffer->width0 - offset;
+ /* For attributes of size 3 with byte or short
+ * components, we use a 4-component data format.
+ *
+ * As a consequence, we have to round the buffer size
+ * up so that the hardware sees four components as
+ * being inside the buffer if and only if the first
+ * three components are in the buffer.
+ *
+ * Since the offset and stride are guaranteed to be
+ * 4-byte aligned, this alignment will never cross the
+ * winsys buffer boundary.
+ */
+ size3 = (sctx->vertex_elements->fix_size3 >> (2 * i)) & 3;
+ if (vb->stride && size3) {
+ assert(offset % 4 == 0 && vb->stride % 4 == 0);
+ assert(size3 <= 2);
+ desc[2] = align(desc[2], size3 * 2);
+ }
+ }
+
desc[3] = sctx->vertex_elements->rsrc_word3[i];
if (!bound[ve->vertex_buffer_index]) {
v->fix_fetch |= SI_FIX_FETCH_A2_SINT << (2 * i);
}
}
+
+ /* We work around the fact that 8_8_8 and 16_16_16 data formats
+ * do not exist by using the corresponding 4-component formats.
+ * This requires a fixup of the descriptor for bounds checks.
+ */
+ if (desc->block.bits == 3 * 8 ||
+ desc->block.bits == 3 * 16) {
+ v->fix_size3 |= (desc->block.bits / 24) << (2 * i);
+ }
}
memcpy(v->elements, elements, sizeof(struct pipe_vertex_element) * count);
{
unsigned count;
uint32_t fix_fetch;
+
+ /* Two bits per attribute indicating the size of each vector component
+ * in bytes if the size 3-workaround must be applied.
+ */
+ uint32_t fix_size3;
+
uint32_t rsrc_word3[SI_MAX_ATTRIBS];
uint32_t format_size[SI_MAX_ATTRIBS];
struct pipe_vertex_element elements[SI_MAX_ATTRIBS];