unsigned int min_index = brw->vb.min_index;
unsigned int max_index = brw->vb.max_index;
int delta, i, j;
+ GLboolean can_merge_uploads = GL_TRUE;
struct brw_vertex_element *upload[VERT_ATTRIB_MAX];
GLuint nr_uploads = 0;
const struct gl_client_array *other = brw->vb.enabled[k]->glarray;
if (glarray->BufferObj == other->BufferObj &&
glarray->StrideB == other->StrideB &&
+ glarray->InstanceDivisor == other->InstanceDivisor &&
(uintptr_t)(glarray->Ptr - other->Ptr) < glarray->StrideB)
{
input->buffer = brw->vb.enabled[k]->buffer;
drm_intel_bo_reference(buffer->bo);
buffer->offset += (uintptr_t)glarray->Ptr;
buffer->stride = glarray->StrideB;
+ buffer->step_rate = glarray->InstanceDivisor;
input->buffer = j++;
input->offset = 0;
}
upload[nr_uploads++] = input;
+
total_size = ALIGN(total_size, type_size);
total_size += input->element_size;
+
+ if (glarray->InstanceDivisor != 0) {
+ can_merge_uploads = GL_FALSE;
+ }
}
}
nr_uploads = 0;
}
- else if (total_size < 2048) {
+ else if ((total_size < 2048) && can_merge_uploads) {
/* Upload non-interleaved arrays into a single interleaved array */
struct brw_vertex_buffer *buffer;
int count = MAX2(max_index - min_index + 1, 1);
intel_upload_unmap(&brw->intel, map, offset * count, offset,
&buffer->bo, &buffer->offset);
buffer->stride = offset;
+ buffer->step_rate = 0;
buffer->offset -= delta * offset;
nr_uploads = 0;
/* Upload non-interleaved arrays */
for (i = 0; i < nr_uploads; i++) {
struct brw_vertex_buffer *buffer = &brw->vb.buffers[j];
- copy_array_to_vbo_array(brw, upload[i], min_index, max_index,
- buffer, upload[i]->element_size);
+ if (upload[i]->glarray->InstanceDivisor == 0) {
+ copy_array_to_vbo_array(brw, upload[i], min_index, max_index,
+ buffer, upload[i]->element_size);
+ } else {
+ /* This is an instanced attribute, since its InstanceDivisor
+ * is not zero. Therefore, its data will be stepped after the
+ * instanced draw has been run InstanceDivisor times.
+ */
+ uint32_t instanced_attr_max_index =
+ (brw->num_instances - 1) / upload[i]->glarray->InstanceDivisor;
+ copy_array_to_vbo_array(brw, upload[i], 0, instanced_attr_max_index,
+ buffer, upload[i]->element_size);
+ }
buffer->offset -= delta * buffer->stride;
+ buffer->step_rate = upload[i]->glarray->InstanceDivisor;
upload[i]->buffer = j++;
upload[i]->offset = 0;
}
int d;
if (brw->vb.current_buffers[i].handle != brw->vb.buffers[i].bo->handle ||
- brw->vb.current_buffers[i].stride != brw->vb.buffers[i].stride)
+ brw->vb.current_buffers[i].stride != brw->vb.buffers[i].stride ||
+ brw->vb.current_buffers[i].step_rate != brw->vb.buffers[i].step_rate)
break;
d = brw->vb.buffers[i].offset - brw->vb.current_buffers[i].offset;
uint32_t dw0;
if (intel->gen >= 6) {
- dw0 = GEN6_VB0_ACCESS_VERTEXDATA | (i << GEN6_VB0_INDEX_SHIFT);
+ dw0 = buffer->step_rate
+ ? GEN6_VB0_ACCESS_INSTANCEDATA
+ : GEN6_VB0_ACCESS_VERTEXDATA;
+ dw0 |= i << GEN6_VB0_INDEX_SHIFT;
} else {
- dw0 = BRW_VB0_ACCESS_VERTEXDATA | (i << BRW_VB0_INDEX_SHIFT);
+ dw0 = buffer->step_rate
+ ? BRW_VB0_ACCESS_INSTANCEDATA
+ : BRW_VB0_ACCESS_VERTEXDATA;
+ dw0 |= i << BRW_VB0_INDEX_SHIFT;
}
if (intel->gen >= 7)
OUT_RELOC(buffer->bo, I915_GEM_DOMAIN_VERTEX, 0, buffer->bo->size - 1);
} else
OUT_BATCH(0);
- OUT_BATCH(0); /* Instance data step rate */
+ OUT_BATCH(buffer->step_rate);
brw->vb.current_buffers[i].handle = buffer->bo->handle;
brw->vb.current_buffers[i].offset = buffer->offset;
brw->vb.current_buffers[i].stride = buffer->stride;
+ brw->vb.current_buffers[i].step_rate = buffer->step_rate;
}
brw->vb.nr_current_buffers = i;
ADVANCE_BATCH();