From 3588098ed85bbd6bacac5a8dd3774569f1314783 Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Sun, 27 May 2012 10:48:47 -0700 Subject: [PATCH] i965: enable ARB_instanced_arrays extension Set the step_rate value when drawing to implement ARB_instanced_arrays for gen >= 4. Signed-off-by: Jordan Justen Reviewed-by: Eric Anholt --- docs/GL3.txt | 2 +- src/mesa/drivers/dri/i965/brw_context.h | 4 ++ src/mesa/drivers/dri/i965/brw_draw.c | 1 + src/mesa/drivers/dri/i965/brw_draw_upload.c | 43 ++++++++++++++++--- src/mesa/drivers/dri/intel/intel_extensions.c | 1 + 5 files changed, 43 insertions(+), 8 deletions(-) diff --git a/docs/GL3.txt b/docs/GL3.txt index 91ede50ca19..36d24725d05 100644 --- a/docs/GL3.txt +++ b/docs/GL3.txt @@ -81,7 +81,7 @@ GL_ARB_shader_bit_encoding DONE GL_ARB_texture_rgb10_a2ui DONE (r600) GL_ARB_texture_swizzle DONE (same as EXT version) (i965, r300, r600, swrast) GL_ARB_timer_query ~60% done (the EXT variant) -GL_ARB_instanced_arrays DONE (r300, r600) +GL_ARB_instanced_arrays DONE (i965, r300, r600) GL_ARB_vertex_type_2_10_10_10_rev DONE (r600) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 69659c4e53c..2c9f5adc771 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -676,6 +676,7 @@ struct brw_vertex_buffer { uint32_t offset; /** Byte stride between elements in the uploaded array */ GLuint stride; + GLuint step_rate; }; struct brw_vertex_element { const struct gl_client_array *glarray; @@ -738,6 +739,7 @@ struct brw_context uint32_t handle; uint32_t offset; uint32_t stride; + uint32_t step_rate; } current_buffers[VERT_ATTRIB_MAX]; struct brw_vertex_element *enabled[VERT_ATTRIB_MAX]; @@ -1046,6 +1048,8 @@ struct brw_context bool in_progress; bool enable_cut_index; } prim_restart; + + uint32_t num_instances; }; diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index 16ce9949779..1069a6335fe 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -466,6 +466,7 @@ static bool brw_try_draw_prims( struct gl_context *ctx, intel_batchbuffer_require_space(intel, estimated_max_prim_size, false); intel_batchbuffer_save_state(intel); + brw->num_instances = prim->num_instances; if (intel->gen < 6) brw_set_prim(brw, &prim[i]); else diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c index cf7783725d8..82dd81d03c0 100644 --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c @@ -361,6 +361,7 @@ static void brw_prepare_vertices(struct brw_context *brw) unsigned int min_index = brw->vb.min_index; unsigned int max_index = brw->vb.max_index; int delta, i, j; + GLboolean can_merge_uploads = GL_TRUE; struct brw_vertex_element *upload[VERT_ATTRIB_MAX]; GLuint nr_uploads = 0; @@ -403,6 +404,7 @@ static void brw_prepare_vertices(struct brw_context *brw) const struct gl_client_array *other = brw->vb.enabled[k]->glarray; if (glarray->BufferObj == other->BufferObj && glarray->StrideB == other->StrideB && + glarray->InstanceDivisor == other->InstanceDivisor && (uintptr_t)(glarray->Ptr - other->Ptr) < glarray->StrideB) { input->buffer = brw->vb.enabled[k]->buffer; @@ -420,6 +422,7 @@ static void brw_prepare_vertices(struct brw_context *brw) drm_intel_bo_reference(buffer->bo); buffer->offset += (uintptr_t)glarray->Ptr; buffer->stride = glarray->StrideB; + buffer->step_rate = glarray->InstanceDivisor; input->buffer = j++; input->offset = 0; @@ -465,8 +468,13 @@ static void brw_prepare_vertices(struct brw_context *brw) } upload[nr_uploads++] = input; + total_size = ALIGN(total_size, type_size); total_size += input->element_size; + + if (glarray->InstanceDivisor != 0) { + can_merge_uploads = GL_FALSE; + } } } @@ -504,7 +512,7 @@ static void brw_prepare_vertices(struct brw_context *brw) nr_uploads = 0; } - else if (total_size < 2048) { + else if ((total_size < 2048) && can_merge_uploads) { /* Upload non-interleaved arrays into a single interleaved array */ struct brw_vertex_buffer *buffer; int count = MAX2(max_index - min_index + 1, 1); @@ -539,6 +547,7 @@ static void brw_prepare_vertices(struct brw_context *brw) intel_upload_unmap(&brw->intel, map, offset * count, offset, &buffer->bo, &buffer->offset); buffer->stride = offset; + buffer->step_rate = 0; buffer->offset -= delta * offset; nr_uploads = 0; @@ -547,9 +556,21 @@ static void brw_prepare_vertices(struct brw_context *brw) /* Upload non-interleaved arrays */ for (i = 0; i < nr_uploads; i++) { struct brw_vertex_buffer *buffer = &brw->vb.buffers[j]; - copy_array_to_vbo_array(brw, upload[i], min_index, max_index, - buffer, upload[i]->element_size); + if (upload[i]->glarray->InstanceDivisor == 0) { + copy_array_to_vbo_array(brw, upload[i], min_index, max_index, + buffer, upload[i]->element_size); + } else { + /* This is an instanced attribute, since its InstanceDivisor + * is not zero. Therefore, its data will be stepped after the + * instanced draw has been run InstanceDivisor times. + */ + uint32_t instanced_attr_max_index = + (brw->num_instances - 1) / upload[i]->glarray->InstanceDivisor; + copy_array_to_vbo_array(brw, upload[i], 0, instanced_attr_max_index, + buffer, upload[i]->element_size); + } buffer->offset -= delta * buffer->stride; + buffer->step_rate = upload[i]->glarray->InstanceDivisor; upload[i]->buffer = j++; upload[i]->offset = 0; } @@ -561,7 +582,8 @@ static void brw_prepare_vertices(struct brw_context *brw) int d; if (brw->vb.current_buffers[i].handle != brw->vb.buffers[i].bo->handle || - brw->vb.current_buffers[i].stride != brw->vb.buffers[i].stride) + brw->vb.current_buffers[i].stride != brw->vb.buffers[i].stride || + brw->vb.current_buffers[i].step_rate != brw->vb.buffers[i].step_rate) break; d = brw->vb.buffers[i].offset - brw->vb.current_buffers[i].offset; @@ -643,9 +665,15 @@ static void brw_emit_vertices(struct brw_context *brw) uint32_t dw0; if (intel->gen >= 6) { - dw0 = GEN6_VB0_ACCESS_VERTEXDATA | (i << GEN6_VB0_INDEX_SHIFT); + dw0 = buffer->step_rate + ? GEN6_VB0_ACCESS_INSTANCEDATA + : GEN6_VB0_ACCESS_VERTEXDATA; + dw0 |= i << GEN6_VB0_INDEX_SHIFT; } else { - dw0 = BRW_VB0_ACCESS_VERTEXDATA | (i << BRW_VB0_INDEX_SHIFT); + dw0 = buffer->step_rate + ? BRW_VB0_ACCESS_INSTANCEDATA + : BRW_VB0_ACCESS_VERTEXDATA; + dw0 |= i << BRW_VB0_INDEX_SHIFT; } if (intel->gen >= 7) @@ -657,11 +685,12 @@ static void brw_emit_vertices(struct brw_context *brw) OUT_RELOC(buffer->bo, I915_GEM_DOMAIN_VERTEX, 0, buffer->bo->size - 1); } else OUT_BATCH(0); - OUT_BATCH(0); /* Instance data step rate */ + OUT_BATCH(buffer->step_rate); brw->vb.current_buffers[i].handle = buffer->bo->handle; brw->vb.current_buffers[i].offset = buffer->offset; brw->vb.current_buffers[i].stride = buffer->stride; + brw->vb.current_buffers[i].step_rate = buffer->step_rate; } brw->vb.nr_current_buffers = i; ADVANCE_BATCH(); diff --git a/src/mesa/drivers/dri/intel/intel_extensions.c b/src/mesa/drivers/dri/intel/intel_extensions.c index 29da36ce7bb..953b98389c3 100755 --- a/src/mesa/drivers/dri/intel/intel_extensions.c +++ b/src/mesa/drivers/dri/intel/intel_extensions.c @@ -115,6 +115,7 @@ intelInitExtensions(struct gl_context *ctx) ctx->Extensions.ARB_depth_buffer_float = true; ctx->Extensions.ARB_depth_clamp = true; ctx->Extensions.ARB_draw_instanced = true; + ctx->Extensions.ARB_instanced_arrays = true; ctx->Extensions.ARB_fragment_coord_conventions = true; ctx->Extensions.ARB_fragment_program_shadow = true; ctx->Extensions.ARB_fragment_shader = true; -- 2.30.2