From 65482f2c2b1c3456d0ca58a38d82c02a5d6d362c Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sat, 5 Mar 2011 15:53:46 +0100 Subject: [PATCH] r300g: implement instanced arrays --- src/gallium/drivers/r300/r300_context.h | 2 + src/gallium/drivers/r300/r300_emit.c | 100 ++++++++++++++++++------ src/gallium/drivers/r300/r300_emit.h | 3 +- src/gallium/drivers/r300/r300_render.c | 73 +++++++++++------ src/gallium/drivers/r300/r300_screen.c | 2 +- 5 files changed, 132 insertions(+), 48 deletions(-) diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 58e1094e339..1e28221326d 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -610,6 +610,8 @@ struct r300_context { boolean vertex_arrays_dirty; boolean vertex_arrays_indexed; int vertex_arrays_offset; + int vertex_arrays_instance_id; + boolean instancing_enabled; }; #define foreach_atom(r300, atom) \ diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index e3945b72d7a..60f83058569 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -794,7 +794,8 @@ void r300_emit_textures_state(struct r300_context *r300, END_CS; } -void r300_emit_vertex_arrays(struct r300_context* r300, int offset, boolean indexed) +void r300_emit_vertex_arrays(struct r300_context* r300, int offset, + boolean indexed, int instance_id) { struct pipe_vertex_buffer *vbuf = r300->vbuf_mgr->vertex_buffer; struct pipe_resource **valid_vbuf = r300->vbuf_mgr->real_vertex_buffer; @@ -804,39 +805,92 @@ void r300_emit_vertex_arrays(struct r300_context* r300, int offset, boolean inde unsigned vertex_array_count = r300->velems->count; unsigned packet_size = (vertex_array_count * 3 + 1) / 2; struct pipe_vertex_buffer *vb1, *vb2; - unsigned *hw_format_size; - unsigned size1, size2; + unsigned *hw_format_size = r300->velems->format_size; + unsigned size1, size2, offset1, offset2, stride1, stride2; CS_LOCALS(r300); BEGIN_CS(2 + packet_size + vertex_array_count * 2); OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, packet_size); OUT_CS(vertex_array_count | (!indexed ? R300_VC_FORCE_PREFETCH : 0)); - hw_format_size = r300->velems->format_size; + if (instance_id == -1) { + /* Non-instanced arrays. This ignores instance_divisor and instance_id. */ + for (i = 0; i < vertex_array_count - 1; i += 2) { + vb1 = &vbuf[velem[i].vertex_buffer_index]; + vb2 = &vbuf[velem[i+1].vertex_buffer_index]; + size1 = hw_format_size[i]; + size2 = hw_format_size[i+1]; + + OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride) | + R300_VBPNTR_SIZE1(size2) | R300_VBPNTR_STRIDE1(vb2->stride)); + OUT_CS(vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride); + OUT_CS(vb2->buffer_offset + velem[i+1].src_offset + offset * vb2->stride); + } - for (i = 0; i < vertex_array_count - 1; i += 2) { - vb1 = &vbuf[velem[i].vertex_buffer_index]; - vb2 = &vbuf[velem[i+1].vertex_buffer_index]; - size1 = hw_format_size[i]; - size2 = hw_format_size[i+1]; + if (vertex_array_count & 1) { + vb1 = &vbuf[velem[i].vertex_buffer_index]; + size1 = hw_format_size[i]; - OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride) | - R300_VBPNTR_SIZE1(size2) | R300_VBPNTR_STRIDE1(vb2->stride)); - OUT_CS(vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride); - OUT_CS(vb2->buffer_offset + velem[i+1].src_offset + offset * vb2->stride); - } + OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride)); + OUT_CS(vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride); + } + + for (i = 0; i < vertex_array_count; i++) { + buf = r300_resource(valid_vbuf[velem[i].vertex_buffer_index]); + OUT_CS_RELOC(buf); + } + } else { + /* Instanced arrays. */ + for (i = 0; i < vertex_array_count - 1; i += 2) { + vb1 = &vbuf[velem[i].vertex_buffer_index]; + vb2 = &vbuf[velem[i+1].vertex_buffer_index]; + size1 = hw_format_size[i]; + size2 = hw_format_size[i+1]; + + if (velem[i].instance_divisor) { + stride1 = 0; + offset1 = vb1->buffer_offset + velem[i].src_offset + + (instance_id / velem[i].instance_divisor) * vb1->stride; + } else { + stride1 = vb1->stride; + offset1 = vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride; + } + if (velem[i+1].instance_divisor) { + stride2 = 0; + offset2 = vb2->buffer_offset + velem[i+1].src_offset + + (instance_id / velem[i+1].instance_divisor) * vb2->stride; + } else { + stride2 = vb2->stride; + offset2 = vb2->buffer_offset + velem[i+1].src_offset + offset * vb2->stride; + } - if (vertex_array_count & 1) { - vb1 = &vbuf[velem[i].vertex_buffer_index]; - size1 = hw_format_size[i]; + OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(stride1) | + R300_VBPNTR_SIZE1(size2) | R300_VBPNTR_STRIDE1(stride2)); + OUT_CS(offset1); + OUT_CS(offset2); + } - OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride)); - OUT_CS(vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride); - } + if (vertex_array_count & 1) { + vb1 = &vbuf[velem[i].vertex_buffer_index]; + size1 = hw_format_size[i]; + + if (velem[i].instance_divisor) { + stride1 = 0; + offset1 = vb1->buffer_offset + velem[i].src_offset + + (instance_id / velem[i].instance_divisor) * vb1->stride; + } else { + stride1 = vb1->stride; + offset1 = vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride; + } - for (i = 0; i < vertex_array_count; i++) { - buf = r300_resource(valid_vbuf[velem[i].vertex_buffer_index]); - OUT_CS_RELOC(buf); + OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(stride1)); + OUT_CS(offset1); + } + + for (i = 0; i < vertex_array_count; i++) { + buf = r300_resource(valid_vbuf[velem[i].vertex_buffer_index]); + OUT_CS_RELOC(buf); + } } END_CS; } diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h index acea51d942f..6c1c9d2fb13 100644 --- a/src/gallium/drivers/r300/r300_emit.h +++ b/src/gallium/drivers/r300/r300_emit.h @@ -31,7 +31,8 @@ struct r300_vertex_program_code; uint32_t pack_float24(float f); -void r300_emit_vertex_arrays(struct r300_context* r300, int offset, boolean indexed); +void r300_emit_vertex_arrays(struct r300_context* r300, int offset, + boolean indexed, int instance_id); void r300_emit_blend_state(struct r300_context* r300, unsigned size, void* state); diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index c402a83541e..300cb86acfe 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -233,13 +233,14 @@ static boolean r300_reserve_cs_dwords(struct r300_context *r300, * \param index_buffer The index buffer to validate. The parameter may be NULL. * \param buffer_offset The offset passed to emit_vertex_arrays. * \param index_bias The index bias to emit. + * \param instance_id Index of instance to render * \return TRUE if rendering should be skipped */ static boolean r300_emit_states(struct r300_context *r300, enum r300_prepare_flags flags, struct pipe_resource *index_buffer, int buffer_offset, - int index_bias) + int index_bias, int instance_id) { boolean first_draw = flags & PREP_EMIT_STATES; boolean emit_vertex_arrays = flags & PREP_EMIT_AOS; @@ -267,12 +268,14 @@ static boolean r300_emit_states(struct r300_context *r300, if (emit_vertex_arrays && (r300->vertex_arrays_dirty || r300->vertex_arrays_indexed != indexed || - r300->vertex_arrays_offset != buffer_offset)) { - r300_emit_vertex_arrays(r300, buffer_offset, indexed); + r300->vertex_arrays_offset != buffer_offset || + r300->vertex_arrays_instance_id != instance_id)) { + r300_emit_vertex_arrays(r300, buffer_offset, indexed, instance_id); r300->vertex_arrays_dirty = FALSE; r300->vertex_arrays_indexed = indexed; r300->vertex_arrays_offset = buffer_offset; + r300->vertex_arrays_instance_id = instance_id; } if (emit_vertex_arrays_swtcl) @@ -291,6 +294,7 @@ static boolean r300_emit_states(struct r300_context *r300, * \param cs_dwords The number of dwords to reserve in CS. * \param buffer_offset The offset passed to emit_vertex_arrays. * \param index_bias The index bias to emit. + * \param instance_id The instance to render. * \return TRUE if rendering should be skipped */ static boolean r300_prepare_for_rendering(struct r300_context *r300, @@ -298,14 +302,15 @@ static boolean r300_prepare_for_rendering(struct r300_context *r300, struct pipe_resource *index_buffer, unsigned cs_dwords, int buffer_offset, - int index_bias) + int index_bias, + int instance_id) { /* Make sure there is enough space in the command stream and emit states. */ if (r300_reserve_cs_dwords(r300, flags, cs_dwords)) flags |= PREP_EMIT_STATES; return r300_emit_states(r300, flags, index_buffer, buffer_offset, - index_bias); + index_bias, instance_id); } static boolean immd_is_good_idea(struct r300_context *r300, @@ -379,7 +384,7 @@ static void r300_draw_arrays_immediate(struct r300_context *r300, CS_LOCALS(r300); - if (!r300_prepare_for_rendering(r300, PREP_EMIT_STATES, NULL, dwords, 0, 0)) + if (!r300_prepare_for_rendering(r300, PREP_EMIT_STATES, NULL, dwords, 0, 0, -1)) return; /* Calculate the vertex size, offsets, strides etc. and map the buffers. */ @@ -540,7 +545,7 @@ static void r300_draw_elements_immediate(struct r300_context *r300, /* 19 dwords for r300_draw_elements_immediate. Give up if the function fails. */ if (!r300_prepare_for_rendering(r300, PREP_EMIT_STATES | PREP_VALIDATE_VBOS | PREP_EMIT_AOS | - PREP_INDEXED, NULL, 2+count_dwords, 0, info->index_bias)) + PREP_INDEXED, NULL, 2+count_dwords, 0, info->index_bias, -1)) return; r300_emit_draw_init(r300, info->mode, info->min_index, info->max_index); @@ -612,7 +617,8 @@ static void r300_draw_elements_immediate(struct r300_context *r300, } static void r300_draw_elements(struct r300_context *r300, - const struct pipe_draw_info *info) + const struct pipe_draw_info *info, + int instance_id) { struct pipe_resource *indexBuffer = r300->index_buffer.buffer; unsigned indexSize = r300->index_buffer.index_size; @@ -661,7 +667,8 @@ static void r300_draw_elements(struct r300_context *r300, /* 19 dwords for emit_draw_elements. Give up if the function fails. */ if (!r300_prepare_for_rendering(r300, PREP_EMIT_STATES | PREP_VALIDATE_VBOS | PREP_EMIT_AOS | - PREP_INDEXED, indexBuffer, 19, buffer_offset, info->index_bias)) + PREP_INDEXED, indexBuffer, 19, buffer_offset, info->index_bias, + instance_id)) goto done; if (alt_num_verts || count <= 65535) { @@ -686,7 +693,8 @@ static void r300_draw_elements(struct r300_context *r300, if (count) { if (!r300_prepare_for_rendering(r300, PREP_VALIDATE_VBOS | PREP_EMIT_AOS | PREP_INDEXED, - indexBuffer, 19, buffer_offset, info->index_bias)) + indexBuffer, 19, buffer_offset, info->index_bias, + instance_id)) goto done; } } while (count); @@ -699,7 +707,8 @@ done: } static void r300_draw_arrays(struct r300_context *r300, - const struct pipe_draw_info *info) + const struct pipe_draw_info *info, + int instance_id) { boolean alt_num_verts = r300->screen->caps.is_r500 && info->count > 65536; @@ -710,7 +719,7 @@ static void r300_draw_arrays(struct r300_context *r300, /* 9 spare dwords for emit_draw_arrays. Give up if the function fails. */ if (!r300_prepare_for_rendering(r300, PREP_EMIT_STATES | PREP_VALIDATE_VBOS | PREP_EMIT_AOS, - NULL, 9, start, 0)) + NULL, 9, start, 0, instance_id)) return; if (alt_num_verts || count <= 65535) { @@ -727,13 +736,31 @@ static void r300_draw_arrays(struct r300_context *r300, if (count) { if (!r300_prepare_for_rendering(r300, PREP_VALIDATE_VBOS | PREP_EMIT_AOS, NULL, 9, - start, 0)) + start, 0, instance_id)) return; } } while (count); } } +static void r300_draw_arrays_instanced(struct r300_context *r300, + const struct pipe_draw_info *info) +{ + int i; + + for (i = 0; i < info->instance_count; i++) + r300_draw_arrays(r300, info, i); +} + +static void r300_draw_elements_instanced(struct r300_context *r300, + const struct pipe_draw_info *info) +{ + int i; + + for (i = 0; i < info->instance_count; i++) + r300_draw_elements(r300, info, i); +} + static void r300_draw_vbo(struct pipe_context* pipe, const struct pipe_draw_info *dinfo) { @@ -767,20 +794,20 @@ static void r300_draw_vbo(struct pipe_context* pipe, r300_resource(r300->index_buffer.buffer)->b.user_ptr) { r300_draw_elements_immediate(r300, &info); } else { - r300_draw_elements(r300, &info); + r300_draw_elements(r300, &info, -1); } } else { - assert(0); + r300_draw_elements_instanced(r300, &info); } } else { if (info.instance_count <= 1) { if (immd_is_good_idea(r300, info.count)) { r300_draw_arrays_immediate(r300, &info); } else { - r300_draw_arrays(r300, &info); + r300_draw_arrays(r300, &info, -1); } } else { - assert(0); + r300_draw_arrays_instanced(r300, &info); } } @@ -998,12 +1025,12 @@ static void r300_render_draw_arrays(struct vbuf_render* render, if (r300->draw_first_emitted) { if (!r300_prepare_for_rendering(r300, PREP_EMIT_STATES | PREP_EMIT_AOS_SWTCL, - NULL, dwords, 0, 0)) + NULL, dwords, 0, 0, -1)) return; } else { if (!r300_emit_states(r300, PREP_EMIT_STATES | PREP_EMIT_AOS_SWTCL, - NULL, 0, 0)) + NULL, 0, 0, -1)) return; } @@ -1038,12 +1065,12 @@ static void r300_render_draw_elements(struct vbuf_render* render, if (r300->draw_first_emitted) { if (!r300_prepare_for_rendering(r300, PREP_EMIT_STATES | PREP_EMIT_AOS_SWTCL | PREP_INDEXED, - NULL, 256, 0, 0)) + NULL, 256, 0, 0, -1)) return; } else { if (!r300_emit_states(r300, PREP_EMIT_STATES | PREP_EMIT_AOS_SWTCL | PREP_INDEXED, - NULL, 0, 0)) + NULL, 0, 0, -1)) return; } @@ -1080,7 +1107,7 @@ static void r300_render_draw_elements(struct vbuf_render* render, if (count) { if (!r300_prepare_for_rendering(r300, PREP_EMIT_AOS_SWTCL | PREP_INDEXED, - NULL, 256, 0, 0)) + NULL, 256, 0, 0, -1)) return; end_cs_dwords = r300_get_num_cs_end_dwords(r300); @@ -1184,7 +1211,7 @@ static void r300_blitter_draw_rectangle(struct blitter_context *blitter, r300->clip_state.dirty = FALSE; r300->viewport_state.dirty = FALSE; - if (!r300_prepare_for_rendering(r300, PREP_EMIT_STATES, NULL, dwords, 0, 0)) + if (!r300_prepare_for_rendering(r300, PREP_EMIT_STATES, NULL, dwords, 0, 0, -1)) goto done; DBG(r300, DBG_DRAW, "r300: draw_rectangle\n"); diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 2f82f78125d..6c0dc99dd74 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -112,6 +112,7 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_TEXTURE_MIRROR_CLAMP: case PIPE_CAP_TEXTURE_MIRROR_REPEAT: case PIPE_CAP_BLEND_EQUATION_SEPARATE: + case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: return 1; case PIPE_CAP_TEXTURE_SWIZZLE: return util_format_s3tc_enabled ? r300screen->caps.dxtc_swizzle : 1; @@ -127,7 +128,6 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_STREAM_OUTPUT: case PIPE_CAP_PRIMITIVE_RESTART: case PIPE_CAP_TGSI_INSTANCEID: - case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: case PIPE_CAP_ARRAY_TEXTURES: return 0; -- 2.30.2