r300g: implement instanced arrays
authorMarek Olšák <maraeo@gmail.com>
Sat, 5 Mar 2011 14:53:46 +0000 (15:53 +0100)
committerMarek Olšák <maraeo@gmail.com>
Sat, 5 Mar 2011 16:41:11 +0000 (17:41 +0100)
src/gallium/drivers/r300/r300_context.h
src/gallium/drivers/r300/r300_emit.c
src/gallium/drivers/r300/r300_emit.h
src/gallium/drivers/r300/r300_render.c
src/gallium/drivers/r300/r300_screen.c

index 58e1094e339f8887326232026acd14834fc95573..1e28221326dfabc4a21253733048296f11007c41 100644 (file)
@@ -610,6 +610,8 @@ struct r300_context {
     boolean vertex_arrays_dirty;
     boolean vertex_arrays_indexed;
     int vertex_arrays_offset;
+    int vertex_arrays_instance_id;
+    boolean instancing_enabled;
 };
 
 #define foreach_atom(r300, atom) \
index e3945b72d7a35fce9cda82fda95873cce9d6589a..60f83058569094b936156a9bacc0b553c5794d12 100644 (file)
@@ -794,7 +794,8 @@ void r300_emit_textures_state(struct r300_context *r300,
     END_CS;
 }
 
-void r300_emit_vertex_arrays(struct r300_context* r300, int offset, boolean indexed)
+void r300_emit_vertex_arrays(struct r300_context* r300, int offset,
+                             boolean indexed, int instance_id)
 {
     struct pipe_vertex_buffer *vbuf = r300->vbuf_mgr->vertex_buffer;
     struct pipe_resource **valid_vbuf = r300->vbuf_mgr->real_vertex_buffer;
@@ -804,39 +805,92 @@ void r300_emit_vertex_arrays(struct r300_context* r300, int offset, boolean inde
     unsigned vertex_array_count = r300->velems->count;
     unsigned packet_size = (vertex_array_count * 3 + 1) / 2;
     struct pipe_vertex_buffer *vb1, *vb2;
-    unsigned *hw_format_size;
-    unsigned size1, size2;
+    unsigned *hw_format_size = r300->velems->format_size;
+    unsigned size1, size2, offset1, offset2, stride1, stride2;
     CS_LOCALS(r300);
 
     BEGIN_CS(2 + packet_size + vertex_array_count * 2);
     OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, packet_size);
     OUT_CS(vertex_array_count | (!indexed ? R300_VC_FORCE_PREFETCH : 0));
 
-    hw_format_size = r300->velems->format_size;
+    if (instance_id == -1) {
+        /* Non-instanced arrays. This ignores instance_divisor and instance_id. */
+        for (i = 0; i < vertex_array_count - 1; i += 2) {
+            vb1 = &vbuf[velem[i].vertex_buffer_index];
+            vb2 = &vbuf[velem[i+1].vertex_buffer_index];
+            size1 = hw_format_size[i];
+            size2 = hw_format_size[i+1];
+
+            OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride) |
+                   R300_VBPNTR_SIZE1(size2) | R300_VBPNTR_STRIDE1(vb2->stride));
+            OUT_CS(vb1->buffer_offset + velem[i].src_offset   + offset * vb1->stride);
+            OUT_CS(vb2->buffer_offset + velem[i+1].src_offset + offset * vb2->stride);
+        }
 
-    for (i = 0; i < vertex_array_count - 1; i += 2) {
-        vb1 = &vbuf[velem[i].vertex_buffer_index];
-        vb2 = &vbuf[velem[i+1].vertex_buffer_index];
-        size1 = hw_format_size[i];
-        size2 = hw_format_size[i+1];
+        if (vertex_array_count & 1) {
+            vb1 = &vbuf[velem[i].vertex_buffer_index];
+            size1 = hw_format_size[i];
 
-        OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride) |
-               R300_VBPNTR_SIZE1(size2) | R300_VBPNTR_STRIDE1(vb2->stride));
-        OUT_CS(vb1->buffer_offset + velem[i].src_offset   + offset * vb1->stride);
-        OUT_CS(vb2->buffer_offset + velem[i+1].src_offset + offset * vb2->stride);
-    }
+            OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride));
+            OUT_CS(vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride);
+        }
+
+        for (i = 0; i < vertex_array_count; i++) {
+            buf = r300_resource(valid_vbuf[velem[i].vertex_buffer_index]);
+            OUT_CS_RELOC(buf);
+        }
+    } else {
+        /* Instanced arrays. */
+        for (i = 0; i < vertex_array_count - 1; i += 2) {
+            vb1 = &vbuf[velem[i].vertex_buffer_index];
+            vb2 = &vbuf[velem[i+1].vertex_buffer_index];
+            size1 = hw_format_size[i];
+            size2 = hw_format_size[i+1];
+
+            if (velem[i].instance_divisor) {
+                stride1 = 0;
+                offset1 = vb1->buffer_offset + velem[i].src_offset +
+                          (instance_id / velem[i].instance_divisor) * vb1->stride;
+            } else {
+                stride1 = vb1->stride;
+                offset1 = vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride;
+            }
+            if (velem[i+1].instance_divisor) {
+                stride2 = 0;
+                offset2 = vb2->buffer_offset + velem[i+1].src_offset +
+                          (instance_id / velem[i+1].instance_divisor) * vb2->stride;
+            } else {
+                stride2 = vb2->stride;
+                offset2 = vb2->buffer_offset + velem[i+1].src_offset + offset * vb2->stride;
+            }
 
-    if (vertex_array_count & 1) {
-        vb1 = &vbuf[velem[i].vertex_buffer_index];
-        size1 = hw_format_size[i];
+            OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(stride1) |
+                   R300_VBPNTR_SIZE1(size2) | R300_VBPNTR_STRIDE1(stride2));
+            OUT_CS(offset1);
+            OUT_CS(offset2);
+        }
 
-        OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride));
-        OUT_CS(vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride);
-    }
+        if (vertex_array_count & 1) {
+            vb1 = &vbuf[velem[i].vertex_buffer_index];
+            size1 = hw_format_size[i];
+
+            if (velem[i].instance_divisor) {
+                stride1 = 0;
+                offset1 = vb1->buffer_offset + velem[i].src_offset +
+                          (instance_id / velem[i].instance_divisor) * vb1->stride;
+            } else {
+                stride1 = vb1->stride;
+                offset1 = vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride;
+            }
 
-    for (i = 0; i < vertex_array_count; i++) {
-        buf = r300_resource(valid_vbuf[velem[i].vertex_buffer_index]);
-        OUT_CS_RELOC(buf);
+            OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(stride1));
+            OUT_CS(offset1);
+        }
+
+        for (i = 0; i < vertex_array_count; i++) {
+            buf = r300_resource(valid_vbuf[velem[i].vertex_buffer_index]);
+            OUT_CS_RELOC(buf);
+        }
     }
     END_CS;
 }
index acea51d942f131f7d6b83cd89dc494b6d8334e02..6c1c9d2fb13b791a95c116ea00a79875c1ea1c8e 100644 (file)
@@ -31,7 +31,8 @@ struct r300_vertex_program_code;
 
 uint32_t pack_float24(float f);
 
-void r300_emit_vertex_arrays(struct r300_context* r300, int offset, boolean indexed);
+void r300_emit_vertex_arrays(struct r300_context* r300, int offset,
+                             boolean indexed, int instance_id);
 
 void r300_emit_blend_state(struct r300_context* r300,
                            unsigned size, void* state);
index c402a83541ed67d74c4869cb987d63fabfb63b53..300cb86acfe6afd429fb9be7c6c36f0cfed447c7 100644 (file)
@@ -233,13 +233,14 @@ static boolean r300_reserve_cs_dwords(struct r300_context *r300,
  * \param index_buffer  The index buffer to validate. The parameter may be NULL.
  * \param buffer_offset The offset passed to emit_vertex_arrays.
  * \param index_bias    The index bias to emit.
+ * \param instance_id   Index of instance to render
  * \return TRUE if rendering should be skipped
  */
 static boolean r300_emit_states(struct r300_context *r300,
                                 enum r300_prepare_flags flags,
                                 struct pipe_resource *index_buffer,
                                 int buffer_offset,
-                                int index_bias)
+                                int index_bias, int instance_id)
 {
     boolean first_draw     = flags & PREP_EMIT_STATES;
     boolean emit_vertex_arrays       = flags & PREP_EMIT_AOS;
@@ -267,12 +268,14 @@ static boolean r300_emit_states(struct r300_context *r300,
         if (emit_vertex_arrays &&
             (r300->vertex_arrays_dirty ||
              r300->vertex_arrays_indexed != indexed ||
-             r300->vertex_arrays_offset != buffer_offset)) {
-            r300_emit_vertex_arrays(r300, buffer_offset, indexed);
+             r300->vertex_arrays_offset != buffer_offset ||
+             r300->vertex_arrays_instance_id != instance_id)) {
+            r300_emit_vertex_arrays(r300, buffer_offset, indexed, instance_id);
 
             r300->vertex_arrays_dirty = FALSE;
             r300->vertex_arrays_indexed = indexed;
             r300->vertex_arrays_offset = buffer_offset;
+            r300->vertex_arrays_instance_id = instance_id;
         }
 
         if (emit_vertex_arrays_swtcl)
@@ -291,6 +294,7 @@ static boolean r300_emit_states(struct r300_context *r300,
  * \param cs_dwords     The number of dwords to reserve in CS.
  * \param buffer_offset The offset passed to emit_vertex_arrays.
  * \param index_bias    The index bias to emit.
+ * \param instance_id The instance to render.
  * \return TRUE if rendering should be skipped
  */
 static boolean r300_prepare_for_rendering(struct r300_context *r300,
@@ -298,14 +302,15 @@ static boolean r300_prepare_for_rendering(struct r300_context *r300,
                                           struct pipe_resource *index_buffer,
                                           unsigned cs_dwords,
                                           int buffer_offset,
-                                          int index_bias)
+                                          int index_bias,
+                                          int instance_id)
 {
     /* Make sure there is enough space in the command stream and emit states. */
     if (r300_reserve_cs_dwords(r300, flags, cs_dwords))
         flags |= PREP_EMIT_STATES;
 
     return r300_emit_states(r300, flags, index_buffer, buffer_offset,
-                            index_bias);
+                            index_bias, instance_id);
 }
 
 static boolean immd_is_good_idea(struct r300_context *r300,
@@ -379,7 +384,7 @@ static void r300_draw_arrays_immediate(struct r300_context *r300,
 
     CS_LOCALS(r300);
 
-    if (!r300_prepare_for_rendering(r300, PREP_EMIT_STATES, NULL, dwords, 0, 0))
+    if (!r300_prepare_for_rendering(r300, PREP_EMIT_STATES, NULL, dwords, 0, 0, -1))
         return;
 
     /* Calculate the vertex size, offsets, strides etc. and map the buffers. */
@@ -540,7 +545,7 @@ static void r300_draw_elements_immediate(struct r300_context *r300,
     /* 19 dwords for r300_draw_elements_immediate. Give up if the function fails. */
     if (!r300_prepare_for_rendering(r300,
             PREP_EMIT_STATES | PREP_VALIDATE_VBOS | PREP_EMIT_AOS |
-            PREP_INDEXED, NULL, 2+count_dwords, 0, info->index_bias))
+            PREP_INDEXED, NULL, 2+count_dwords, 0, info->index_bias, -1))
         return;
 
     r300_emit_draw_init(r300, info->mode, info->min_index, info->max_index);
@@ -612,7 +617,8 @@ static void r300_draw_elements_immediate(struct r300_context *r300,
 }
 
 static void r300_draw_elements(struct r300_context *r300,
-                               const struct pipe_draw_info *info)
+                               const struct pipe_draw_info *info,
+                               int instance_id)
 {
     struct pipe_resource *indexBuffer = r300->index_buffer.buffer;
     unsigned indexSize = r300->index_buffer.index_size;
@@ -661,7 +667,8 @@ static void r300_draw_elements(struct r300_context *r300,
     /* 19 dwords for emit_draw_elements. Give up if the function fails. */
     if (!r300_prepare_for_rendering(r300,
             PREP_EMIT_STATES | PREP_VALIDATE_VBOS | PREP_EMIT_AOS |
-            PREP_INDEXED, indexBuffer, 19, buffer_offset, info->index_bias))
+            PREP_INDEXED, indexBuffer, 19, buffer_offset, info->index_bias,
+            instance_id))
         goto done;
 
     if (alt_num_verts || count <= 65535) {
@@ -686,7 +693,8 @@ static void r300_draw_elements(struct r300_context *r300,
             if (count) {
                 if (!r300_prepare_for_rendering(r300,
                         PREP_VALIDATE_VBOS | PREP_EMIT_AOS | PREP_INDEXED,
-                        indexBuffer, 19, buffer_offset, info->index_bias))
+                        indexBuffer, 19, buffer_offset, info->index_bias,
+                        instance_id))
                     goto done;
             }
         } while (count);
@@ -699,7 +707,8 @@ done:
 }
 
 static void r300_draw_arrays(struct r300_context *r300,
-                             const struct pipe_draw_info *info)
+                             const struct pipe_draw_info *info,
+                             int instance_id)
 {
     boolean alt_num_verts = r300->screen->caps.is_r500 &&
                             info->count > 65536;
@@ -710,7 +719,7 @@ static void r300_draw_arrays(struct r300_context *r300,
     /* 9 spare dwords for emit_draw_arrays. Give up if the function fails. */
     if (!r300_prepare_for_rendering(r300,
                                     PREP_EMIT_STATES | PREP_VALIDATE_VBOS | PREP_EMIT_AOS,
-                                    NULL, 9, start, 0))
+                                    NULL, 9, start, 0, instance_id))
         return;
 
     if (alt_num_verts || count <= 65535) {
@@ -727,13 +736,31 @@ static void r300_draw_arrays(struct r300_context *r300,
             if (count) {
                 if (!r300_prepare_for_rendering(r300,
                                                 PREP_VALIDATE_VBOS | PREP_EMIT_AOS, NULL, 9,
-                                                start, 0))
+                                                start, 0, instance_id))
                     return;
             }
         } while (count);
     }
 }
 
+static void r300_draw_arrays_instanced(struct r300_context *r300,
+                                       const struct pipe_draw_info *info)
+{
+    int i;
+
+    for (i = 0; i < info->instance_count; i++)
+        r300_draw_arrays(r300, info, i);
+}
+
+static void r300_draw_elements_instanced(struct r300_context *r300,
+                                         const struct pipe_draw_info *info)
+{
+    int i;
+
+    for (i = 0; i < info->instance_count; i++)
+        r300_draw_elements(r300, info, i);
+}
+
 static void r300_draw_vbo(struct pipe_context* pipe,
                           const struct pipe_draw_info *dinfo)
 {
@@ -767,20 +794,20 @@ static void r300_draw_vbo(struct pipe_context* pipe,
                 r300_resource(r300->index_buffer.buffer)->b.user_ptr) {
                 r300_draw_elements_immediate(r300, &info);
             } else {
-                r300_draw_elements(r300, &info);
+                r300_draw_elements(r300, &info, -1);
             }
         } else {
-            assert(0);
+            r300_draw_elements_instanced(r300, &info);
         }
     } else {
         if (info.instance_count <= 1) {
             if (immd_is_good_idea(r300, info.count)) {
                 r300_draw_arrays_immediate(r300, &info);
             } else {
-                r300_draw_arrays(r300, &info);
+                r300_draw_arrays(r300, &info, -1);
             }
         } else {
-            assert(0);
+            r300_draw_arrays_instanced(r300, &info);
         }
     }
 
@@ -998,12 +1025,12 @@ static void r300_render_draw_arrays(struct vbuf_render* render,
     if (r300->draw_first_emitted) {
         if (!r300_prepare_for_rendering(r300,
                 PREP_EMIT_STATES | PREP_EMIT_AOS_SWTCL,
-                NULL, dwords, 0, 0))
+                NULL, dwords, 0, 0, -1))
             return;
     } else {
         if (!r300_emit_states(r300,
                 PREP_EMIT_STATES | PREP_EMIT_AOS_SWTCL,
-                NULL, 0, 0))
+                NULL, 0, 0, -1))
             return;
     }
 
@@ -1038,12 +1065,12 @@ static void r300_render_draw_elements(struct vbuf_render* render,
     if (r300->draw_first_emitted) {
         if (!r300_prepare_for_rendering(r300,
                 PREP_EMIT_STATES | PREP_EMIT_AOS_SWTCL | PREP_INDEXED,
-                NULL, 256, 0, 0))
+                NULL, 256, 0, 0, -1))
             return;
     } else {
         if (!r300_emit_states(r300,
                 PREP_EMIT_STATES | PREP_EMIT_AOS_SWTCL | PREP_INDEXED,
-                NULL, 0, 0))
+                NULL, 0, 0, -1))
             return;
     }
 
@@ -1080,7 +1107,7 @@ static void r300_render_draw_elements(struct vbuf_render* render,
         if (count) {
             if (!r300_prepare_for_rendering(r300,
                     PREP_EMIT_AOS_SWTCL | PREP_INDEXED,
-                    NULL, 256, 0, 0))
+                    NULL, 256, 0, 0, -1))
                 return;
 
             end_cs_dwords = r300_get_num_cs_end_dwords(r300);
@@ -1184,7 +1211,7 @@ static void r300_blitter_draw_rectangle(struct blitter_context *blitter,
     r300->clip_state.dirty = FALSE;
     r300->viewport_state.dirty = FALSE;
 
-    if (!r300_prepare_for_rendering(r300, PREP_EMIT_STATES, NULL, dwords, 0, 0))
+    if (!r300_prepare_for_rendering(r300, PREP_EMIT_STATES, NULL, dwords, 0, 0, -1))
         goto done;
 
     DBG(r300, DBG_DRAW, "r300: draw_rectangle\n");
index 2f82f78125d17cb0e366b8ae2b36a8afea9a6603..6c0dc99dd74c1e2d1c7f8ad080148499481b625e 100644 (file)
@@ -112,6 +112,7 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
         case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
         case PIPE_CAP_TEXTURE_MIRROR_REPEAT:
         case PIPE_CAP_BLEND_EQUATION_SEPARATE:
+        case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
             return 1;
         case PIPE_CAP_TEXTURE_SWIZZLE:
             return util_format_s3tc_enabled ? r300screen->caps.dxtc_swizzle : 1;
@@ -127,7 +128,6 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
         case PIPE_CAP_STREAM_OUTPUT:
         case PIPE_CAP_PRIMITIVE_RESTART:
         case PIPE_CAP_TGSI_INSTANCEID:
-        case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
         case PIPE_CAP_ARRAY_TEXTURES:
             return 0;