swr: fix range computation for instanced client-side arrays
authorIlia Mirkin <imirkin@alum.mit.edu>
Sat, 26 Nov 2016 22:11:50 +0000 (17:11 -0500)
committerIlia Mirkin <imirkin@alum.mit.edu>
Thu, 1 Dec 2016 01:35:33 +0000 (20:35 -0500)
We need to take the instance divisor and number of instances into
account for instanced client-side arrays, rather than the vertex
parameters.

Loosely based on the comparable nvc0 logic.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
src/gallium/drivers/swr/swr_state.cpp
src/gallium/drivers/swr/swr_state.h

index f1cf4ae2e52a456e339da6ba472fd96102cc2737..9f6b5b0d9153f64efa828223741839a0e24918e0 100644 (file)
@@ -494,6 +494,15 @@ swr_create_vertex_elements_state(struct pipe_context *pipe,
          const SWR_FORMAT_INFO &swr_desc = GetFormatInfo(
             mesa_to_swr_format(attribs[i].src_format));
          velems->stream_pitch[attribs[i].vertex_buffer_index] += swr_desc.Bpp;
+
+         if (attribs[i].instance_divisor != 0) {
+            velems->instanced_bufs |= 1U << attribs[i].vertex_buffer_index;
+            uint32_t *min_instance_div =
+               &velems->min_instance_div[attribs[i].vertex_buffer_index];
+            if (!*min_instance_div ||
+                attribs[i].instance_divisor < *min_instance_div)
+               *min_instance_div = attribs[i].instance_divisor;
+         }
       }
    }
 
@@ -870,6 +879,34 @@ swr_change_rt(struct swr_context *ctx,
    return need_fence;
 }
 
+static inline void
+swr_user_vbuf_range(const struct pipe_draw_info *info,
+                    const struct swr_vertex_element_state *velems,
+                    const struct pipe_vertex_buffer *vb,
+                    uint32_t i,
+                    uint32_t *totelems,
+                    uint32_t *base,
+                    uint32_t *size)
+{
+   /* FIXME: The size is too large - we don't access the full extra stride. */
+   unsigned elems;
+   if (velems->instanced_bufs & (1U << i)) {
+      elems = info->instance_count / velems->min_instance_div[i] + 1;
+      *totelems = info->start_instance + elems;
+      *base = info->start_instance * vb->stride;
+      *size = elems * vb->stride;
+   } else if (vb->stride) {
+      elems = info->max_index - info->min_index + 1;
+      *totelems = info->max_index + 1;
+      *base = info->min_index * vb->stride;
+      *size = elems * vb->stride;
+   } else {
+      *totelems = 1;
+      *base = 0;
+      *size = velems->stream_pitch[i];
+   }
+}
+
 void
 swr_update_derived(struct pipe_context *pipe,
                    const struct pipe_draw_info *p_draw_info)
@@ -1039,8 +1076,7 @@ swr_update_derived(struct pipe_context *pipe,
    /* Set vertex & index buffers */
    /* (using draw info if called by swr_draw_vbo) */
    if (ctx->dirty & SWR_NEW_VERTEX) {
-      uint32_t size, pitch, max_vertex, partial_inbounds, scratch_total;
-      const uint8_t *p_data;
+      uint32_t scratch_total;
       uint8_t *scratch = NULL;
 
       /* If being called by swr_draw_vbo, copy draw details */
@@ -1056,14 +1092,8 @@ swr_update_derived(struct pipe_context *pipe,
          if (!vb->user_buffer)
             continue;
 
-         if (vb->stride) {
-            size = (info.max_index - info.min_index + 1) * vb->stride;
-         } else {
-            /* pitch = 0, means constant value
-             * set size to 1 vertex */
-            size = ctx->velems->stream_pitch[i];
-         }
-
+         uint32_t elems, base, size;
+         swr_user_vbuf_range(&info, ctx->velems, vb, i, &elems, &base, &size);
          scratch_total += AlignUp(size, 4);
       }
 
@@ -1075,6 +1105,8 @@ swr_update_derived(struct pipe_context *pipe,
       /* vertex buffers */
       SWR_VERTEX_BUFFER_STATE swrVertexBuffers[PIPE_MAX_ATTRIBS];
       for (UINT i = 0; i < ctx->num_vertex_buffers; i++) {
+         uint32_t size, pitch, elems, partial_inbounds;
+         const uint8_t *p_data;
          struct pipe_vertex_buffer *vb = &ctx->vertex_buffer[i];
 
          pitch = vb->stride;
@@ -1083,7 +1115,7 @@ swr_update_derived(struct pipe_context *pipe,
              * size is based on buffer->width0 rather than info.max_index
              * to prevent having to validate VBO on each draw */
             size = vb->buffer->width0;
-            max_vertex = size / pitch;
+            elems = size / pitch;
             partial_inbounds = size % pitch;
 
             p_data = swr_resource_data(vb->buffer) + vb->buffer_offset;
@@ -1093,25 +1125,17 @@ swr_update_derived(struct pipe_context *pipe,
              * revalidate on each draw */
             post_update_dirty_flags |= SWR_NEW_VERTEX;
 
-            if (pitch) {
-               size = (info.max_index - info.min_index + 1) * pitch;
-            } else {
-               /* pitch = 0, means constant value
-                * set size to 1 vertex */
-               size = ctx->velems->stream_pitch[i];
-            }
-
-            max_vertex = info.max_index + 1;
+            uint32_t base;
+            swr_user_vbuf_range(&info, ctx->velems, vb, i, &elems, &base, &size);
             partial_inbounds = 0;
 
             /* Copy only needed vertices to scratch space */
             size = AlignUp(size, 4);
-            const void *ptr = (const uint8_t *) vb->user_buffer
-               + info.min_index * pitch;
+            const void *ptr = (const uint8_t *) vb->user_buffer + base;
             memcpy(scratch, ptr, size);
             ptr = scratch;
             scratch += size;
-            p_data = (const uint8_t *)ptr - info.min_index * pitch;
+            p_data = (const uint8_t *)ptr - base;
          }
 
          swrVertexBuffers[i] = {0};
@@ -1119,7 +1143,7 @@ swr_update_derived(struct pipe_context *pipe,
          swrVertexBuffers[i].pitch = pitch;
          swrVertexBuffers[i].pData = p_data;
          swrVertexBuffers[i].size = size;
-         swrVertexBuffers[i].maxVertex = max_vertex;
+         swrVertexBuffers[i].maxVertex = elems;
          swrVertexBuffers[i].partialInboundsSize = partial_inbounds;
       }
 
@@ -1129,6 +1153,8 @@ swr_update_derived(struct pipe_context *pipe,
       /* index buffer, if required (info passed in by swr_draw_vbo) */
       SWR_FORMAT index_type = R32_UINT; /* Default for non-indexed draws */
       if (info.indexed) {
+         const uint8_t *p_data;
+         uint32_t size, pitch;
          struct pipe_index_buffer *ib = &ctx->index_buffer;
 
          pitch = ib->index_size ? ib->index_size : sizeof(uint32_t);
index 17d487326c99705e99a98511747278a4b6fa3c49..1960abddfa6adf019a1c548132fa006921634b88 100644 (file)
@@ -72,6 +72,8 @@ struct swr_vertex_element_state {
    FETCH_COMPILE_STATE fsState;
    PFN_FETCH_FUNC fsFunc;
    uint32_t stream_pitch[PIPE_MAX_ATTRIBS];
+   uint32_t min_instance_div[PIPE_MAX_ATTRIBS];
+   uint32_t instanced_bufs;
 };
 
 struct swr_blend_state {