gallium/svga: Upload only parts of user-buffers that we actually use
authorThomas Hellstrom <thellstrom@vmware.com>
Thu, 17 Mar 2011 20:09:38 +0000 (21:09 +0100)
committerThomas Hellstrom <thellstrom@vmware.com>
Fri, 1 Jul 2011 11:30:38 +0000 (13:30 +0200)
Stream user buffer contents rather than trying to maintain persistent
host / hardware copies.
Resulting negative array offsets are not allowed by the hardware,
(well, at least not according to header files), so adjust index bias
to make all array offsets positive.

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
src/gallium/drivers/svga/svga_draw.c
src/gallium/drivers/svga/svga_draw.h
src/gallium/drivers/svga/svga_draw_private.h
src/gallium/drivers/svga/svga_pipe_draw.c
src/gallium/drivers/svga/svga_resource_buffer.h
src/gallium/drivers/svga/svga_state_vdecl.c

index 28ba470d8c7cb94344fe2500a9ef705aecac94bc..aa0966928882c790a83436b85772fc40b53b889f 100644 (file)
@@ -242,6 +242,11 @@ svga_hwtnl_flush( struct svga_hwtnl *hwtnl )
 }
 
 
+void svga_hwtnl_set_index_bias( struct svga_hwtnl *hwtnl,
+                               int index_bias)
+{
+   hwtnl->index_bias = index_bias;
+}
 
 
 
@@ -265,15 +270,16 @@ enum pipe_error svga_hwtnl_prim( struct svga_hwtnl *hwtnl,
          unsigned size = vb ? vb->width0 : 0;
          unsigned offset = hwtnl->cmd.vdecl[i].array.offset;
          unsigned stride = hwtnl->cmd.vdecl[i].array.stride;
-         unsigned index_bias = range->indexBias;
+         int index_bias = (int) range->indexBias + hwtnl->index_bias;
          unsigned width;
 
          assert(vb);
          assert(size);
          assert(offset < size);
-         assert(index_bias >= 0);
          assert(min_index <= max_index);
-         assert(offset + index_bias*stride < size);
+         if (index_bias >= 0) {
+            assert(offset + index_bias*stride < size);
+         }
          if (min_index != ~0) {
             assert(offset + (index_bias + min_index) * stride < size);
          }
@@ -394,6 +400,7 @@ enum pipe_error svga_hwtnl_prim( struct svga_hwtnl *hwtnl,
    hwtnl->cmd.max_index[hwtnl->cmd.prim_count] = max_index;
 
    hwtnl->cmd.prim[hwtnl->cmd.prim_count] = *range;
+   hwtnl->cmd.prim[hwtnl->cmd.prim_count].indexBias += hwtnl->index_bias;
 
    pipe_resource_reference(&hwtnl->cmd.prim_ib[hwtnl->cmd.prim_count], ib);
    hwtnl->cmd.prim_count++;
index a2403d802bebe46293498c9983127746de046632..1dac17421e1d06cb7e9a0e3ff41a75e2a2e69715 100644 (file)
@@ -79,5 +79,8 @@ svga_hwtnl_draw_range_elements( struct svga_hwtnl *hwtnl,
 enum pipe_error
 svga_hwtnl_flush( struct svga_hwtnl *hwtnl );
 
+void svga_hwtnl_set_index_bias( struct svga_hwtnl *hwtnl,
+                                int index_bias);
+
 
 #endif /* SVGA_DRAW_H_ */
index ca658ac6745dafacaa65401032811356704ce424..8126f7ee23c8e9da5dc90d117d39d34a91c8f85b 100644 (file)
@@ -116,6 +116,13 @@ struct draw_cmd {
 struct svga_hwtnl {
    struct svga_context *svga;
    struct u_upload_mgr *upload_ib;
+
+   /* Additional negative index bias due to partial buffer uploads
+    * This is compensated for in the offset associated with all
+    * vertex buffers.
+    */
+
+   int index_bias;
    
    /* Flatshade information:
     */
index a632fb12c94ec220bb68d05154d297add4b073e4..8e1c764ef5f60f5024d8beafd6bd7ec081453bb3 100644 (file)
 #include "svga_state.h"
 #include "svga_swtnl.h"
 #include "svga_debug.h"
+#include "svga_resource_buffer.h"
+#include "util/u_upload_mgr.h"
+
+/**
+ * svga_upload_user_buffers - upload parts of user buffers
+ *
+ * This function streams a part of a user buffer to hw and sets
+ * svga_buffer::source_offset to the first byte uploaded. After upload
+ * also svga_buffer::uploaded::buffer is set to !NULL
+ */
+
+static int
+svga_upload_user_buffers(struct svga_context *svga,
+                         unsigned start,
+                         unsigned count,
+                         unsigned instance_count)
+{
+   const struct pipe_vertex_element *ve = svga->curr.velems->velem;
+   unsigned i;
+   int ret;
+
+   for (i=0; i < svga->curr.velems->count; i++) {
+      struct pipe_vertex_buffer *vb =
+         &svga->curr.vb[ve[i].vertex_buffer_index];
+
+      if (vb->buffer && svga_buffer_is_user_buffer(vb->buffer)) {
+         struct svga_buffer *buffer = svga_buffer(vb->buffer);
+         unsigned first, size;
+         boolean flushed;
+         unsigned instance_div = ve[i].instance_divisor;
+
+         svga->dirty |= SVGA_NEW_VBUFFER;
+
+         if (instance_div) {
+            first = 0;
+            size = vb->stride *
+               (instance_count + instance_div - 1) / instance_div;
+         } else if (vb->stride) {
+            first = vb->stride * start;
+            size = vb->stride * count;
+         } else {
+            /* Only a single vertex!
+             * Upload with the largest vertex size the hw supports,
+             * if possible.
+             */
+            first = 0;
+            size = MIN2(16, vb->buffer->width0);
+         }
+
+         ret = u_upload_buffer( svga->upload_vb,
+                                0, first, size,
+                                &buffer->b.b,
+                                &buffer->uploaded.offset,
+                                &buffer->uploaded.buffer,
+                                &flushed);
+
+         if (ret)
+            return ret;
+
+         if (0)
+            debug_printf("%s: %d: orig buf %p upl buf %p ofs %d sofs %d"
+                         " sz %d\n",
+                         __FUNCTION__,
+                         i,
+                         buffer,
+                         buffer->uploaded.buffer,
+                         buffer->uploaded.offset,
+                         first,
+                         size);
+
+         vb->buffer_offset = buffer->uploaded.offset;
+         buffer->source_offset = first;
+      }
+   }
+
+   return PIPE_OK;
+}
+
+/**
+ * svga_release_user_upl_buffers - release uploaded parts of user buffers
+ *
+ * This function releases the hw copy of the uploaded fraction of the
+ * user-buffer. It's important to do this as soon as all draw calls
+ * affecting the uploaded fraction are issued, as this allows for
+ * efficient reuse of the hardware surface backing the uploaded fraction.
+ *
+ * svga_buffer::source_offset is set to 0, and svga_buffer::uploaded::buffer
+ * is set to 0.
+ */
+
+static void
+svga_release_user_upl_buffers(struct svga_context *svga)
+{
+   unsigned i;
+   unsigned nr;
+
+   nr = svga->curr.num_vertex_buffers;
+
+   for (i = 0; i < nr; ++i) {
+      struct pipe_vertex_buffer *vb = &svga->curr.vb[i];
+
+      if (vb->buffer && svga_buffer_is_user_buffer(vb->buffer)) {
+         struct svga_buffer *buffer = svga_buffer(vb->buffer);
+
+         buffer->source_offset = 0;
+         if (buffer->uploaded.buffer)
+            pipe_resource_reference(&buffer->uploaded.buffer, NULL);
+      }
+   }
+}
 
 
 
@@ -50,6 +160,7 @@ retry_draw_range_elements( struct svga_context *svga,
                            unsigned prim, 
                            unsigned start, 
                            unsigned count,
+                           unsigned instance_count,
                            boolean do_retry )
 {
    enum pipe_error ret = 0;
@@ -61,6 +172,10 @@ retry_draw_range_elements( struct svga_context *svga,
                              svga->curr.rast->templ.flatshade,
                              svga->curr.rast->templ.flatshade_first );
 
+   ret = svga_upload_user_buffers( svga, min_index + index_bias,
+                                   max_index - min_index + 1, instance_count );
+   if (ret != PIPE_OK)
+      goto retry;
 
    ret = svga_update_state( svga, SVGA_STATE_HW_DRAW );
    if (ret)
@@ -84,7 +199,7 @@ retry:
                                         index_buffer, index_size, index_bias,
                                         min_index, max_index,
                                         prim, start, count,
-                                        FALSE );
+                                        instance_count, FALSE );
    }
 
    return ret;
@@ -96,6 +211,7 @@ retry_draw_arrays( struct svga_context *svga,
                    unsigned prim, 
                    unsigned start, 
                    unsigned count,
+                   unsigned instance_count,
                    boolean do_retry )
 {
    enum pipe_error ret;
@@ -107,6 +223,11 @@ retry_draw_arrays( struct svga_context *svga,
                              svga->curr.rast->templ.flatshade,
                              svga->curr.rast->templ.flatshade_first );
 
+   ret = svga_upload_user_buffers( svga, start, count, instance_count );
+
+   if (ret != PIPE_OK)
+      goto retry;
+
    ret = svga_update_state( svga, SVGA_STATE_HW_DRAW );
    if (ret)
       goto retry;
@@ -127,6 +248,7 @@ retry:
                                 prim,
                                 start,
                                 count,
+                                instance_count,
                                 FALSE );
    }
 
@@ -183,6 +305,8 @@ svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
          svga_context_flush(svga, NULL);
       }
 
+      /* Avoid leaking the previous hwtnl bias to swtnl */
+      svga_hwtnl_set_index_bias( svga->hwtnl, 0 );
       ret = svga_swtnl_draw_vbo( svga, info );
    }
    else {
@@ -201,6 +325,7 @@ svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
                                           info->mode,
                                           info->start + offset,
                                           info->count,
+                                          info->instance_count,
                                           TRUE );
       }
       else {
@@ -208,10 +333,13 @@ svga_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
                                   info->mode,
                                   info->start,
                                   info->count,
+                                  info->instance_count,
                                   TRUE );
       }
    }
 
+   svga_release_user_upl_buffers( svga );
+
    if (SVGA_DEBUG & DEBUG_FLUSH) {
       svga_hwtnl_flush_retry( svga );
       svga_context_flush(svga, NULL);
index 95032213fa50767ddc3c982add1a5ac7a6dff9c1..2ae44d2a5e954ddc5feace08f64ab229862905d8 100644 (file)
@@ -131,6 +131,13 @@ struct svga_buffer
       unsigned offset;
    } uploaded;
 
+   /**
+    * For user buffers, this is the offset to the data about to be
+    * referenced by the next draw command, and hence the data that needs
+    * to be uploaded.
+    */
+   unsigned source_offset;
+
    /**
     * DMA'ble memory.
     *
index 7c393a1da8debabb777cd88fb94ad6722a69c096..2375a022f97a7195d95df036a3dbf44f9350c810 100644 (file)
 #include "svga_hw_reg.h"
 
 
-static int
-upload_user_buffers( struct svga_context *svga )
-{
-   enum pipe_error ret = PIPE_OK;
-   int i;
-   int nr;
-
-   if (0) 
-      debug_printf("%s: %d\n", __FUNCTION__, svga->curr.num_vertex_buffers);
-
-   nr = svga->curr.num_vertex_buffers;
-
-   for (i = 0; i < nr; i++) 
-   {
-      if (svga_buffer_is_user_buffer(svga->curr.vb[i].buffer))
-      {
-         struct svga_buffer *buffer = svga_buffer(svga->curr.vb[i].buffer);
-
-         if (!buffer->uploaded.buffer) {
-            boolean flushed;
-            ret = u_upload_buffer( svga->upload_vb,
-                                   0, 0,
-                                   buffer->b.b.width0,
-                                   &buffer->b.b,
-                                   &buffer->uploaded.offset,
-                                   &buffer->uploaded.buffer,
-                                   &flushed);
-            if (ret)
-               return ret;
-
-            if (0)
-               debug_printf("%s: %d: orig buf %p upl buf %p ofs %d sz %d\n",
-                            __FUNCTION__,
-                            i,
-                            buffer,
-                            buffer->uploaded.buffer,
-                            buffer->uploaded.offset,
-                            buffer->b.b.width0);
-         }
-
-         svga->curr.vb[i].buffer_offset = buffer->uploaded.offset;
-      }
-   }
-
-   if (0)
-      debug_printf("%s: DONE\n", __FUNCTION__);
-
-   return ret;
-}
-
-
 /***********************************************************************
  */
 
@@ -99,6 +48,7 @@ static int emit_hw_vs_vdecl( struct svga_context *svga,
    const struct pipe_vertex_element *ve = svga->curr.velems->velem;
    SVGA3dVertexDecl decl;
    unsigned i;
+   unsigned neg_bias = 0;
 
    assert(svga->curr.velems->count >=
           svga->curr.vs->base.info.file_count[TGSI_FILE_INPUT]);
@@ -106,12 +56,50 @@ static int emit_hw_vs_vdecl( struct svga_context *svga,
    svga_hwtnl_reset_vdecl( svga->hwtnl, 
                            svga->curr.velems->count );
 
+   /**
+    * We can't set the VDECL offset to something negative, so we
+    * must calculate a common negative additional index bias, and modify
+    * the VDECL offsets accordingly so they *all* end up positive.
+    *
+    * Note that the exact value of the negative index bias is not that
+    * important, since we compensate for it when we calculate the vertex
+    * buffer offset below. The important thing is that all vertex buffer
+    * offsets remain positive.
+    *
+    * Note that we use a negative bias variable in order to make the
+    * rounding maths more easy to follow, and to avoid int / unsigned
+    * confusion.
+    */
+
    for (i = 0; i < svga->curr.velems->count; i++) {
-      const struct pipe_vertex_buffer *vb = &svga->curr.vb[ve[i].vertex_buffer_index];
+      const struct pipe_vertex_buffer *vb =
+         &svga->curr.vb[ve[i].vertex_buffer_index];
+      struct svga_buffer *buffer;
+      unsigned int offset = vb->buffer_offset + ve[i].src_offset;
+      unsigned tmp_neg_bias = 0;
+
+      if (!vb->buffer)
+         continue;
+
+      buffer = svga_buffer(vb->buffer);
+      if (buffer->source_offset > offset) {
+         tmp_neg_bias = buffer->source_offset - offset;
+         if (vb->stride)
+            tmp_neg_bias = (tmp_neg_bias + vb->stride - 1) / vb->stride;
+         neg_bias = MAX2(neg_bias, tmp_neg_bias);
+      }
+   }
+
+   for (i = 0; i < svga->curr.velems->count; i++) {
+      const struct pipe_vertex_buffer *vb =
+         &svga->curr.vb[ve[i].vertex_buffer_index];
       unsigned usage, index;
-      struct svga_buffer *buffer = svga_buffer(vb->buffer);
+      struct svga_buffer *buffer;
 
+      if (!vb->buffer)
+         continue;
 
+      buffer= svga_buffer(vb->buffer);
       svga_generate_vdecl_semantics( i, &usage, &index );
 
       /* SVGA_NEW_VELEMENT
@@ -121,8 +109,16 @@ static int emit_hw_vs_vdecl( struct svga_context *svga,
       decl.identity.usage = usage;
       decl.identity.usageIndex = index;
       decl.array.stride = vb->stride;
-      decl.array.offset = (vb->buffer_offset +
-                           ve[i].src_offset);
+
+      /* Compensate for partially uploaded vbo, and
+       * for the negative index bias.
+       */
+      decl.array.offset = (vb->buffer_offset
+                           + ve[i].src_offset
+                          + neg_bias * vb->stride
+                          - buffer->source_offset);
+
+      assert(decl.array.offset >= 0);
 
       svga_hwtnl_vdecl( svga->hwtnl,
                         i,
@@ -131,6 +127,7 @@ static int emit_hw_vs_vdecl( struct svga_context *svga,
                         vb->buffer );
    }
 
+   svga_hwtnl_set_index_bias( svga->hwtnl, -neg_bias );
    return 0;
 }
 
@@ -138,23 +135,11 @@ static int emit_hw_vs_vdecl( struct svga_context *svga,
 static int emit_hw_vdecl( struct svga_context *svga,
                           unsigned dirty )
 {
-   int ret = 0;
-
    /* SVGA_NEW_NEED_SWTNL
     */
    if (svga->state.sw.need_swtnl)
       return 0; /* Do not emit during swtnl */
 
-   /* If we get to here, we know that we're going to draw.  Upload
-    * userbuffers now and try to combine multiple userbuffers from
-    * multiple draw calls into a single host buffer for performance.
-    */
-   if (svga->curr.any_user_vertex_buffers) {
-      ret = upload_user_buffers( svga );
-      if (ret)
-         return ret;
-   }
-
    return emit_hw_vs_vdecl( svga, dirty );
 }